#!perl use strict; use Data::Dumper; use Carp; # # This is a SAS Component # =head1 alleles_to_bp_locs This command can be used to find the contig and position corresponding to each of a set of alleles. The command takes in a table with a designated column containing allele IDs. It adds a column containing the location corresponding to the allele Example: alleles_to_bp_locs [arguments] < input > output The standard input should be a tab-separated table (i.e., each line is a tab-separated set of fields). Normally, the last field in each line would contain the identifer. If another column contains the identifier use -c N where N is the column (from 1) that contains the identifier. This is a pipe command. The input is taken from the standard input, and the output is to the standard output. =head2 Documentation for underlying call This script is a wrapper for the CDMI-API call alleles_to_bp_locs. It is documented as follows: =over 4 =item Parameter and return types =begin html <pre> $alleles is an alleles $return is a reference to a hash where the key is an allele and the value is a bp_loc alleles is a reference to a list where each element is an allele allele is a string bp_loc is a reference to a list containing 2 items: 0: a contig 1: an int contig is a string </pre> =end html =begin text $alleles is an alleles $return is a reference to a hash where the key is an allele and the value is a bp_loc alleles is a reference to a list where each element is an allele allele is a string bp_loc is a reference to a list containing 2 items: 0: a contig 1: an int contig is a string =end text =back =head2 Command-Line Options =over 4 =item -c Column This is used only if the column containing the identifier is not the last column. =item -i InputFile [ use InputFile, rather than stdin ] =back =head2 Output Format The standard output is a tab-delimited file. It consists of the input file with extra columns added. Input lines that cannot be extended are written to stderr. =cut use SeedUtils; my $usage = "usage: alleles_to_bp_locs [-c column] < input > output"; use Bio::KBase::CDMI::CDMIClient; use Bio::KBase::Utilities::ScriptThing; my $column; my $input_file; my $kbO = Bio::KBase::CDMI::CDMIClient->new_for_script('c=i' => \$column, 'i=s' => \$input_file); if (! $kbO) { print STDERR $usage; exit } my $ih; if ($input_file) { open $ih, "<", $input_file or die "Cannot open input file $input_file: $!"; } else { $ih = \*STDIN; } while (my @tuples = Bio::KBase::Utilities::ScriptThing::GetBatch($ih, undef, $column)) { my @h = map { $_->[0] } @tuples; my $h = $kbO->alleles_to_bp_locs(\@h); for my $tuple (@tuples) { # # Process output here and print. # my ($id, $line) = @$tuple; my $v = $h->{$id}; if (! defined($v)) { print STDERR $line,"\n"; } elsif (ref($v) eq 'ARRAY') { my ($contig,$bp) = @$v; print join("\t",($line,"$contig\_$bp+1")),"\n"; } } }