Bio/DB/Das/BioSQL.pm - metacpan.org


            
              —
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
—
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
—
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
—
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
—
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
—
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
—
370
371
372
373
374
375
376
377
378
379
380
381
—
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
              
=head1 NAME
Bio::DB::Das::BioSQL - DAS-style access to a BioSQL database
=head1 SYNOPSIS
 # Open up a feature database
 $db = Bio::DB::Das::BioSQL->new(
         driver    => 'mysql',
         dbname    => 'biosql',
         biodbname => 'test',
         host      => 'swiss',
         user      => 'lstein',
         pass      => undef,
         port      => undef,
         namespace => 'namespace',
         version   => version_number );
  # segments are Bio::Das::SegmentI - compliant objects
  @segments = $db->segment(-name  => 'NT_29921.4',
                           -start => 1,
                           -end   => 1000000);
  # fetch a list of features
  @features = $db->features(-segment=>$segment, -type=>['type1','type2','type3']);
  $stream   = $db->get_seq_stream(-type=>['type1','type2','type3']);
  # each feature is a Bio::SeqFeatureI-compliant object
  while (my $feature = $stream->next_seq) {
    # do something ...
  }
  # get all feature types
  @types   = $db->types;
  # count types
  %types   = $db->types(-enumerate=>1);
  @feature = $db->get_feature_by_name($class=>$name);
  @feature = $db->get_feature_by_target($target_name);
  @feature = $db->get_feature_by_attribute($att1=>$value1,$att2=>$value2);
  $feature = $db->get_feature_by_id($id);
  $error = $db->error;
=head1 DESCRIPTION
Bio::DB::Das::BioSQL is a simplified alternative interface to sequence
annotation databases used by the distributed annotation system (see
L<Bio::Das>). In this scheme, the genome is represented as a series of
features, a subset of which are named.  Named features can be used as
reference points for retrieving "segments" (see
L<Bio::DB::Das::Segment>), and these can, in turn, be used as the
basis for exploring the genome further.
In addition to a name, each feature has a "class", which is
essentially a namespace qualifier and a "type", which describes what
type of feature it is.  Das uses the GO consortium's ontology of
feature types, and so the type is actually an object of class
Bio::Das::FeatureTypeI (see L<Bio::Das::FeatureTypeI>). Bio::DB::Das::BioSQL 
provides methods forinterrogating the database for the types it contains 
and the counts of each type.
=head1 FEEDBACK
=head2 Mailing Lists
User feedback is an integral part of the evolution of this and other
Bioperl modules. Send your comments and suggestions preferably to one
of the Bioperl mailing lists.  Your participation is much appreciated.
  bioperl-l@bio.perl.org
=head2 Reporting Bugs
Report bugs to the Bioperl bug tracking system to help us keep track
the bugs and their resolution.  Bug reports can be submitted via email
or the web:
  bioperl-bugs@bio.perl.org
  http://bio.perl.org/bioperl-bugs/
=head1 AUTHORS
Lincoln Stein, Vsevolod (Simon) Ilyushchenko, Brian Osborne
Email lstein@cshl.edu, simonf@cshl.edu
=head1 APPENDIX
The rest of the documentation details each of the object
methods. Internal methods are usually preceded with a _
=cut
package Bio::DB::Das::BioSQL;
use strict;
use Bio::DB::Das::BioSQL::BioDatabaseAdaptor;
use Bio::DB::Das::BioSQL::Segment;
use Bio::DB::Das::BioSQL::Iterator;
use Bio::Root::Root;
use Bio::DasI;
use vars qw($VERSION @ISA);
use constant SEG_CLASS      => 'Bio::DB::Das::BioSQL::Segment';
use constant ADAPTOR_CLASS  => 'Bio::DB::Das::BioSQL::BioDatabaseAdaptor';
use constant ITERATOR_CLASS => 'Bio::DB::Das::BioSQL::Iterator';
$VERSION = 0.05;
@ISA     = qw(Bio::Root::Root Bio::DasI);
# Install horrible patch for GBrowse compatibility
use Bio::SeqFeature::Generic;
=head2 new
 Title   : new
 Usage   : $db    = Bio::DB::Das::BioSQL(
            driver    => 'mysql',
            dbname    => 'biosql',
            biodbname => 'swissprot',
            host      => 'localhost',
            user      => 'jimbo',
            pass      => 'supersecret',
            port      => 3306 );
 Function: Open up a Bio::DB::DasI interface to a BioSQL database
 Returns : a new Bio::DB::Das::BioSQL object
 Args    : See L<Bio::DB::Das::BioSQL::BioDatabaseAdaptor->new_from_registry()
           The new() method takes the same arguments exactly.
=cut
# create new database accessor object
# takes all the same args as a Bio::DB::BioDB class
sub new {
    my $class = shift;
    my $self  = $class->SUPER::new(@_);
    # may throw an exception on new_from_registry()
    my $biosql = $self->_adaptorclass->new_from_registry(@_);
    $self->biosql($biosql);
    $self;
}
=head2 segment
 Title   : segment
 Usage   : $db->segment(@args);
 Function: create a segment object
 Returns : segment object(s)
 Args    : see below
This method generates a Bio::Das::SegmentI object (see
L<Bio::Das::SegmentI>). The segment can be used to find overlapping
features and the raw sequence.
When making the segment() call, you specify the ID of a sequence
landmark (e.g. an accession number, a clone or contig), and a
positional range relative to the landmark.  If no range is specified,
then the entire region spanned by the landmark is used to generate the
segment.
Arguments are -option=E<gt>value pairs as follows:
 -name         ID of the landmark sequence.
 -class        A namespace qualifier.  It is not necessary for the
               database to honor namespace qualifiers, but if it
               does, this is where the qualifier is indicated.
 -version      Version number of the landmark.  It is not necessary for
               the database to honor versions, but if it does, this is
               where the version is indicated.
 -start        Start of the segment relative to landmark.  Positions
               follow standard 1-based sequence rules.  If not specified,
               defaults to the beginning of the landmark.
 -end          End of the segment relative to the landmark.  If not specified,
               defaults to the end of the landmark.
 -absolute
The return value is a list of Bio::Das::SegmentI objects.  If the method
is called in a scalar context and there are no more than one segments
that satisfy the request, then it is allowed to return the segment.
Otherwise, the method must throw a "multiple segment exception".
=cut
sub segment {
    my $self = shift;
    my ( $name, $start, $end, $class, $version, $absolute ) = $self->_rearrange(
        [
            qw(NAME START END CLASS VERSION ABSOLUTE)
        ],
        @_
    );
    my @seq = $self->biosql->fetch_Seq_by_accession($name);
    return unless @seq;
    return map {
        $self->_segclass->new(
            -bioseq    => $_,
            -dbadaptor => $self,
            -start     => $start,
            -end       => $end,
            -absolute  => $absolute
          )
    } @seq;
}
sub get_feature_by_name {
    my ($self) = shift;
    my ( $name, $start, $end, $class, $version, $id ) = $self->_rearrange(
        [
            qw(NAME START END CLASS VERSION FEATURE_ID)
        ],
        @_
    );
    return $self->get_feature_by_primary_key($id) if $id;
    my @seq = $self->biosql->fetch_Seq_by_accession($name);
    return unless @seq;
    return
      map { $self->_segclass->new( -bioseq => $_, -dbadaptor => $self ) } @seq;
}
sub get_feature_by_primary_key {
    my $self    = shift;
    my $key     = shift;
    my $adaptor = $self->biosql->db->get_object_adaptor("Bio::SeqFeatureI");
    map { Bio::DB::Das::BioSQL::Segment->wrap_feature($_) }
      $adaptor->find_by_primary_key($key);
}
sub get_feature_by_primary_id { shift->get_feature_by_primary_key(@_) }
=head2 features
 Title   : features
 Usage   : $db->features(@args)
 Function: get all features, possibly filtered by type
 Returns : a list of Bio::SeqFeatureI objects
 Args    : see below
 Status  : public
This routine will retrieve features in the database regardless of
position.  It can be used to return all features, or a subset based on
their type
Arguments are -option=E<gt>value pairs as follows:
  -types      List of feature types to return.  Argument is an array
              of Bio::Das::FeatureTypeI objects or a set of strings
              that can be converted into FeatureTypeI objects.
  -callback   A callback to invoke on each feature.  The subroutine
              will be passed each Bio::SeqFeatureI object in turn.
  -attributes A hash reference containing attributes to match.
  -segment    A segment
The -attributes argument is a hashref containing one or more attributes
to match against:
  -attributes => { Gene => 'abc-1',
                   Note => 'confirmed' }
Attribute matching is simple exact string matching, and multiple
attributes are ANDed together.
If one provides a callback, it will be invoked on each feature in
turn.  If the callback returns a false value, iteration will be
interrupted.  When a callback is provided, the method returns undef.
=cut
sub features {
    my $self = shift;
    my ( $type, $callback, $attributes, $segment, $seq_id ) =
      $self->_rearrange( [qw(TYPE CALLBACK ATTRIBUTES SEGMENT SEQ_ID)], @_ );
    my @features = $segment->top_SeqFeatures;
    $type = [$type] if ($type && !ref $type);
    if ($type) {
        my %types = map { lc $_ => 1 } @$type;
        @features = grep { $types{ lc $_->method } } @features;
    }
    @features;
}
=head2 types
 Title   : types
 Usage   : $db->types(@args)
 Function: return list of feature types in database
 Returns : a list of Bio::Das::FeatureTypeI objects
 Args    : see below
This routine returns a list of feature types known to the database. It
is also possible to find out how many times each feature occurs.
Arguments are -option=E<gt>value pairs as follows:
  -enumerate  if true, count the features
The returned value will be a list of Bio::Das::FeatureTypeI objects
(see L<Bio::Das::FeatureTypeI>.
If -enumerate is true, then the function returns a hash (not a hash
reference) in which the keys are the stringified versions of
Bio::Das::FeatureTypeI and the values are the number of times each
feature appears in the database.
NOTE: This currently raises a "not-implemented" exception, as the
BioSQL API does not appear to provide this functionality.
=cut
sub types {
    my $self = shift;
    my ($enumerate) = $self->_rearrange( [qw(ENUMERATE)], @_ );
    $self->throw_not_implemented;
}
=head2 search_notes
 Title   : search_notes
 Usage   : $db->search_notes($search_term,$max_results)
 Function: full-text search on features, ENSEMBL-style
 Returns : an array of [$name,$description,$score]
 Args    : see below
This routine performs a full-text search on feature attributes (which
attributes depend on implementation) and returns a list of
[$name,$description,$score], where $name is the feature ID,
$description is a human-readable description such as a locus line, and
$score is the match strength.
THIS METHOD CURRENTLY RETURNS EMPTY BECAUSE I CAN'T GET FETCH_BY_QUERY()
TO WORK.
=cut
=head2 biosql
 Title   : biosql
 Usage   : $biosql  = $db->biosql([$biosql])
 Function: Get/set the underlying Bio::DB::Das::BioSQL::BioDatabaseAdaptor
 Returns : An Bio::DB::Das::BioSQL::BioDatabaseAdaptor
 Args    : A new Bio::DB::Das::BioSQL::BioDatabaseAdaptor (optional)
=cut
sub biosql {
    my $self = shift;
    if (@_) { $self->{biosql} = shift; }
    return $self->{biosql};
}
=head2
Accessor methods to return module names
=cut
sub _segclass      { return SEG_CLASS }
sub _adaptorclass  { return ADAPTOR_CLASS }
sub _iteratorclass { return ITERATOR_CLASS }
=head2 get_seq_stream
 Title   : get_seq_stream
 Usage   : my $seqio = $self->get_seq_stream(-type => $types, -seq_id => $id)
 Function: Performs a query and returns an iterator over it
 Returns : a stream returning Bio::DB::Das::BioSQL::Feature objects
 Args    : -type, -seq_id, -start, -end
           Types should be passed as an array reference or one string.
 Use it like this:
 $stream = $db->get_seq_stream(-type => 'exon', -seq_id => 'NC_122444');
 while (my $exon = $stream->next_seq) {
   print $exon->name,"\n";
 }
=cut
sub get_seq_stream {
    my $self = shift;
    my $segment;
    my ( $type, $seq_id, $start, $end ) =
      $self->_rearrange( [qw(TYPE SEQ_ID START END)], @_ );
    if ( $start && $end ) {
        ($segment) = $self->segment( -name => $seq_id, 
                                     -start => $start, 
                                     -end => $end );
    } else {
        ($segment) = $self->segment( -name => $seq_id );
    }
    # Make $type an array reference if it's not
    $type = [$type] if ( $type && !ref $type);
    my @features = $self->features( -type => $type, -segment => $segment, -seq_id => $seq_id );
    return $self->_iteratorclass->new( \@features );
}
1;
	Global
`s`	Focus search bar
`?`	Bring up this help dialog
	GitHub
`g` `p`	Go to pull requests
`g` `i`	go to github issues (only if github is preferred repository)
	POD
`g` `a`	Go to author
`g` `c`	Go to changes
`g` `i`	Go to issues
`g` `d`	Go to dist
`g` `r`	Go to repository/SCM
`g` `s`	Go to source
`g` `b`	Go to file browse
	Search terms
module: (e.g. module:Plugin)
distribution: (e.g. distribution:Dancer auth)
author: (e.g. author:SONGMU Redis)
version: (e.g. version:1.00)