#!perl use strict; use Data::Dumper; use Carp; # # This is a SAS Component # =head1 text_search text_search performs a search against a full-text index maintained for the CDMI. The parameter "input" is the text string to be searched for. The parameter "entities" defines the entities to be searched. If the list is empty, all indexed entities will be searched. The "start" and "count" parameters limit the results to "count" hits starting at "start". Example: text_search [arguments] < input > output The standard input should be a tab-separated table (i.e., each line is a tab-separated set of fields). Normally, the last field in each line would contain the identifer. If another column contains the identifier use -c N where N is the column (from 1) that contains the subsystem. This is a pipe command. The input is taken from the standard input, and the output is to the standard output. =head2 Documentation for underlying call This script is a wrapper for the CDMI-API call text_search. It is documented as follows: $return = $obj->text_search($fids) =over 4 =item Parameter and return types =begin html <pre> $fids is a fids $return is a reference to a hash where the key is a fid and the value is an annotations fids is a reference to a list where each element is a fid fid is a string annotations is a reference to a list where each element is an annotation annotation is a reference to a list containing 3 items: 0: a comment 1: an annotator 2: an annotation_time comment is a string annotator is a string annotation_time is an int </pre> =end html =begin text $fids is a fids $return is a reference to a hash where the key is a fid and the value is an annotations fids is a reference to a list where each element is a fid fid is a string annotations is a reference to a list where each element is an annotation annotation is a reference to a list containing 3 items: 0: a comment 1: an annotator 2: an annotation_time comment is a string annotator is a string annotation_time is an int =end text =back =head2 Command-Line Options =over 4 =item -c Column This is used only if the column containing the subsystem is not the last column. =item -i InputFile [ use InputFile, rather than stdin ] =back =head2 Output Format The standard output is a tab-delimited file. It consists of the input file with extra columns added. Input lines that cannot be extended are written to stderr. =cut my $usage = "usage: text_search [-start N] [-count N] [-entity name -entity name ..] search-string\n"; use Bio::KBase::CDMI::CDMIClient; use Bio::KBase::Utilities::ScriptThing; my $start = 0; my $count = 100; my @entities; my $kbO = Bio::KBase::CDMI::CDMIClient->new_for_script('start=s' => \$start, 'count=s' => \$count, 'entity=s' => \@entities); if (@ARGV == 0 || ! $kbO) { print STDERR $usage; exit } my $string = join(" ", @ARGV); my $res = $kbO->text_search($string, $start, $count, \@entities); for my $entity (keys %$res) { my @hdr; for my $hit (@{$res->{$entity}}) { my($weight, $data) = @$hit; if (!@hdr) { @hdr = sort keys %$data; print join("\t", "#", @hdr), "\n"; } print join("\t", $entity, @$data{@hdr}), "\n"; } }