er_scripts/get_entity_ContigChunk


            
              1
2
3
4
5
6
7
8
9
10
—
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
              #!perl
use strict;
use Data::Dumper;
use Bio::KBase::Utilities::ScriptThing;
use Carp;
#
# This is a SAS Component
#
=head1 get_entity_ContigChunk
ContigChunks are strings of DNA thought of as being a string in a 4-character alphabet
with an associated ID.  We allow a broader alphabet that includes U (for RNA) and
the standard ambiguity characters.
The notion of ContigChunk was introduced to avoid transferring/manipulating
huge contigs to access small substrings.  A ContigSequence is formed by
concatenating a set of one or more ContigChunks.  Thus, ContigChunks are the
basic units moved from the database to memory.  Their existence should be
hidden from users in most circumstances (users are expected to request
substrings of ContigSequences, and the Kbase software locates the appropriate
ContigChunks).
Example:
    get_entity_ContigChunk -a < ids > table.with.fields.added
would read in a file of ids and add a column for each filed in the entity.
The standard input should be a tab-separated table (i.e., each line
is a tab-separated set of fields).  Normally, the last field in each
line would contain the id. If some other column contains the id,
use
    -c N
where N is the column (from 1) that contains the id.
This is a pipe command. The input is taken from the standard input, and the
output is to the standard output.
=head2 Related entities
The ContigChunk entity has the following relationship links:
=over 4
     
=item IsSectionOf ContigSequence
=back
=head2 Command-Line Options
=over 4
=item -c Column
Use the specified column to define the id of the entity to retrieve.
=item -h
Display a list of the fields available for use.
=item -fields field-list
Choose a set of fields to return. Field-list is a comma-separated list of 
strings. The following fields are available:
=over 4
=item sequence
=back    
=back
=head2 Output Format
The standard output is a tab-delimited file. It consists of the input
file with an extra column added for each requested field.  Input lines that cannot
be extended are written to stderr.  
=cut
use Bio::KBase::CDMI::CDMIClient;
use Getopt::Long;
#Default fields
my @all_fields = ( 'sequence' );
my %all_fields = map { $_ => 1 } @all_fields;
my $usage = "usage: get_entity_ContigChunk [-h] [-c column] [-a | -f field list] < ids > extended.by.a.column(s)";
my $column;
my $a;
my $f;
my $i = "-";
my @fields;
my $show_fields;
my $geO = Bio::KBase::CDMI::CDMIClient->new_get_entity_for_script('c=i'              => \$column,
                                                                  "a"           => \$a,
                                                                  "h"           => \$show_fields,
                                                                  "show-fields" => \$show_fields,
                                                                  "fields=s"    => \$f,
                                                                  'i=s'         => \$i);
if ($show_fields)
{
    print STDERR "Available fields: @all_fields\n";
    exit 0;
}
if ($a && $f) { print STDERR $usage; exit 1 }
if ($a)
{
    @fields = @all_fields;
}
elsif ($f) {
    my @err;
    for my $field (split(",", $f))
    {
        if (!$all_fields{$field})
        {
            push(@err, $field);
        }
        else
        {
            push(@fields, $field);
        }
    }
    if (@err)
    {
        print STDERR "get_entity_ContigChunk: unknown fields @err. Valid fields are: @all_fields\n";
        exit 1;
    }
} else {
    print STDERR $usage;
    exit 1;
}
my $ih;
if ($i eq '-')
{
    $ih = \*STDIN;
}
else
{
    open($ih, "<", $i) or die "Cannot open input file $i: $!\n";
}
while (my @tuples = Bio::KBase::Utilities::ScriptThing::GetBatch($ih, undef, $column)) {
    my @h = map { $_->[0] } @tuples;
    my $h = $geO->get_entity_ContigChunk(\@h, \@fields);
    for my $tuple (@tuples) {
        my @values;
        my ($id, $line) = @$tuple;
        my $v = $h->{$id};
        if (! defined($v))
        {
            #nothing found for this id
            print STDERR $line,"\n";
        } else {
            foreach $_ (@fields) {
                my $val = $v->{$_};
                push (@values, ref($val) eq 'ARRAY' ? join(",", @$val) : $val);
            }
            my $tail = join("\t", @values);
            print "$line\t$tail\n";
        }
    }
}
	Global
`s`	Focus search bar
`?`	Bring up this help dialog
	GitHub
`g` `p`	Go to pull requests
`g` `i`	go to github issues (only if github is preferred repository)
	POD
`g` `a`	Go to author
`g` `c`	Go to changes
`g` `i`	Go to issues
`g` `d`	Go to dist
`g` `r`	Go to repository/SCM
`g` `s`	Go to source
`g` `b`	Go to file browse
	Search terms
module: (e.g. module:Plugin)
distribution: (e.g. distribution:Dancer auth)
author: (e.g. author:SONGMU Redis)
version: (e.g. version:1.00)