—#!perl
use
strict;
use
Data::Dumper;
use
Carp;
#
# This is a SAS Component
#
=head1 query_entity_ContigSequence
Query the entity ContigSequence.
ContigSequences are strings of DNA. Contigs have an associated
genome, but ContigSequences do not.. We can think of random samples of DNA as a set of ContigSequences.
There are no length constraints imposed on ContigSequences -- they can be either
very short or very long. The basic unit of data that is moved to/from the database
is the ContigChunk, from which ContigSequences are formed. The key
of a ContigSequence is the sequence's MD5 identifier.
Example:
query_entity_ContigSequence -a
=head2 Related entities
The ContigSequence entity has the following relationship links:
=over 4
=item HasSection ContigChunk
=item IsSequenceOf Contig
=back
=head2 Command-Line Options
=over 4
=item -is field,value
Limit the results to entities where the given field has the given value.
=item -like field,value
Limit the results to entities where the given field is LIKE (in the sql sense) the given value.
=item -op operator,field,value
Limit the results to entities where the given field is related to the given value based on the given operator.
The operators supported are as follows. We provide text based alternatives to the comparison
operators so that extra quoting is not required to keep the command-line shell from
confusing them with shell I/O redirection operators.
=over 4
=item < or lt
=item > or gt
=item <= or le
=item >= or ge
=item =
=item LIKE
=back
=item -a
Return all fields.
=item -h
Display a list of the fields available for use.
=item -fields field-list
Choose a set of fields to return. Field-list is a comma-separated list of
strings. The following fields are available:
=over 4
=item length
=back
=back
=head2 Output Format
The standard output is a tab-delimited file containing a column
for each requested field.
=cut
use
Getopt::Long;
#Default fields
my
@all_fields
= (
'length'
);
my
%all_fields
=
map
{
$_
=> 1 }
@all_fields
,
'id'
;
my
$usage
=
"usage: query_entity_ContigSequence [-is field,value] [-like field,value] [-op operator,field,value] [-show-fields] [-a | -f field list] > entity.data"
;
my
$a
;
my
$f
;
my
@fields
;
my
$show_fields
;
my
@query_is
;
my
@query_like
;
my
@query_op
;
my
%op_map
= (
'>'
,
'>'
,
'gt'
,
'>'
,
'<'
,
'<'
,
'lt'
,
'<'
,
'>='
,
'>='
,
'ge'
,
'>='
,
'<='
,
'<='
,
'le'
,
'<='
,
'like'
,
'LIKE'
,
);
my
$geO
= Bio::KBase::CDMI::CDMIClient->new_get_entity_for_script(
"a"
=> \
$a
,
"show-fields"
=> \
$show_fields
,
"h"
=> \
$show_fields
,
"is=s"
=> \
@query_is
,
"like=s"
=> \
@query_like
,
"op=s"
=> \
@query_op
,
"fields=s"
=> \
$f
);
if
(
$show_fields
)
{
STDERR
"Available fields: @all_fields\n"
;
exit
0;
}
if
(
@ARGV
!= 0 || (
$a
&&
$f
))
{
STDERR
$usage
,
"\n"
;
exit
1;
}
if
(
$a
)
{
@fields
=
@all_fields
;
}
elsif
(
$f
) {
my
@err
;
for
my
$field
(
split
(
","
,
$f
))
{
if
(!
$all_fields
{
$field
})
{
push
(
@err
,
$field
);
}
else
{
push
(
@fields
,
$field
);
}
}
if
(
@err
)
{
STDERR
"all_entities_ContigSequence: unknown fields @err. Valid fields are: @all_fields\n"
;
exit
1;
}
}
my
@qry
;
for
my
$ent
(
@query_is
)
{
my
(
$field
,
$value
) =
split
(/,/,
$ent
, 2);
if
(!
$all_fields
{
$field
})
{
die
"$field is not a valid field\n"
;
}
push
(
@qry
, [
$field
,
'='
,
$value
]);
}
for
my
$ent
(
@query_like
)
{
my
(
$field
,
$value
) =
split
(/,/,
$ent
, 2);
if
(!
$all_fields
{
$field
})
{
die
"$field is not a valid field\n"
;
}
push
(
@qry
, [
$field
,
'LIKE'
,
$value
]);
}
for
my
$ent
(
@query_op
)
{
my
(
$op
,
$field
,
$value
) =
split
(/,/,
$ent
, 3);
if
(!
$all_fields
{
$field
})
{
die
"$field is not a valid field\n"
;
}
my
$mapped_op
=
$op_map
{
lc
(
$op
)};
if
(!
$mapped_op
)
{
die
"$op is not a valid operator\n"
;
}
push
(
@qry
, [
$field
,
$mapped_op
,
$value
]);
}
my
$h
=
$geO
->query_entity_ContigSequence(\
@qry
, \
@fields
);
while
(
my
(
$k
,
$v
) =
each
%$h
)
{
join
(
"\t"
,
$k
,
map
{
ref
(
$_
) eq
'ARRAY'
?
join
(
","
,
@$_
) :
$_
}
@$v
{
@fields
}),
"\n"
;
}