#!perl use strict; use Data::Dumper; use Carp; # # This is a SAS Component # =head1 region_to_alleles Example: region_to_alleles [-d 10000] < input > output Here the region will be 20kb centered on the input positions. The standard input should be a tab-separated table (i.e., each line is a tab-separated set of fields). Normally, the last field in each line would contain the identifer. If another column contains the identifier use -c N where N is the column (from 1) that contains the identifier. This is a pipe command. The input is taken from the standard input, and the output is to the standard output. =head2 Documentation for underlying call This script is a wrapper for the CDMI-API call region_to_alleles. It is documented as follows: =over 4 =item Parameter and return types =begin html <pre> $region_of_dna is a region_of_dna $return is a reference to a list where each element is a reference to a list containing 2 items: 0: an allele 1: an int region_of_dna is a reference to a list containing 4 items: 0: a contig 1: a begin 2: a strand 3: a length contig is a string begin is an int strand is a string length is an int allele is a string </pre> =end html =begin text $region_of_dna is a region_of_dna $return is a reference to a list where each element is a reference to a list containing 2 items: 0: an allele 1: an int region_of_dna is a reference to a list containing 4 items: 0: a contig 1: a begin 2: a strand 3: a length contig is a string begin is an int strand is a string length is an int allele is a string =end text =back =head2 Command-Line Options =over 4 =item -c Column This is used only if the column containing the identifier is not the last column. =item -i InputFile [ use InputFile, rather than stdin ] =back =head2 Output Format The standard output is a tab-delimited file. It consists of the input file with extra columns added. Input lines that cannot be extended are written to stderr. =cut use SeedUtils; my $usage = "usage: region_to_alleles [-c column] < input > output"; use Bio::KBase::CDMI::CDMIClient; use Bio::KBase::Utilities::ScriptThing; my $d = 5000; my $column; my $input_file; my $kbO = Bio::KBase::CDMI::CDMIClient->new_for_script('c=i' => \$column, 'd=i' => \$d, 'i=s' => \$input_file); if (! $kbO) { print STDERR $usage; exit } my $ih; if ($input_file) { open $ih, "<", $input_file or die "Cannot open input file $input_file: $!"; } else { $ih = \*STDIN; } while (my @tuples = Bio::KBase::Utilities::ScriptThing::GetBatch($ih, undef, $column)) { foreach my $tuple (@tuples) { print STDERR &Dumper($tuple); my($position,$line) = @$tuple; my($contig,$pos,$strand,$len); if ($position =~ /^(\S+)\_(\d+)$/) { ($contig,$pos,$strand,$len) = ($1,$2,'+',1); } elsif ($position =~ /^(\S+)\_(\d+)\+1$/) { ($contig,$pos,$strand,$len) = ($1,$2,'+',1); } my $beg = $pos - $d; if ($beg < 1) { $beg = 1 } my $ln = 2 * $d; my $tuples = $kbO->region_to_alleles([$contig,$beg,$strand,$ln]); foreach my $tuple (@$tuples) { print join("\t",($line,$tuple->[0])),"\n"; } } }