lib/Bio/DOOP/Sequence.pm


            
              1
2
3
4
5
—
6
7
8
9
10
11
12
13
14
15
16
17
—
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
—
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
—
176
177
178
179
180
181
182
183
184
185
186
—
187
188
189
190
191
192
193
194
195
196
197
—
198
199
200
201
202
203
204
205
206
207
208
—
209
210
211
212
213
214
215
216
217
218
219
—
220
221
222
223
224
225
226
227
228
229
230
—
231
232
233
234
235
236
237
238
239
240
241
—
242
243
244
245
246
247
248
249
250
251
252
—
253
254
255
256
257
258
259
260
261
262
263
—
264
265
266
267
268
269
270
271
272
273
274
—
275
276
277
278
279
280
281
282
283
284
285
—
286
287
288
289
290
291
292
293
294
295
296
—
297
298
299
300
301
302
303
304
305
306
307
308
—
309
310
311
312
313
314
315
316
317
318
319
320
—
321
322
323
324
325
326
327
328
329
330
331
332
333
—
334
335
336
337
338
339
340
341
342
343
344
345
346
—
347
348
349
350
351
352
353
354
355
356
357
358
359
—
360
361
362
363
364
365
366
367
368
369
370
371
—
372
373
374
375
376
377
378
379
380
381
382
383
—
384
385
386
387
388
389
390
391
392
393
394
395
—
396
397
398
399
400
401
402
403
404
405
406
407
408
409
—
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
—
434
435
436
437
438
439
440
441
442
443
444
445
446
447
—
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
—
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
—
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
              package Bio::DOOP::Sequence;
use strict;
use warnings;
use Carp qw(cluck carp verbose);
=head1 NAME
  Bio::DOOP::Sequence - promoter sequence object
=head1 VERSION
Version 0.11
=cut
our $VERSION = '0.11';
=head1 SYNOPSIS
=head1 DESCRIPTION
  This object represents a specific promoter in the database.
  You can access the annotation and the sequence through this object.
=head1 AUTHOR
  Tibor Nagy, Godollo, Hungary and Endre Sebestyen, Martonvasar, Hungary
=head1 METHODS
=head2 new
  $seq = Bio::DOOP::Sequence->new($db,"1234");
  The arguments are the following : Bio::DOOP::DBSQL object, sequence_primary_id
=cut
sub new {
  my $self                 = {};
  my $dummy                = shift;
  my $db                   = shift;
  my $id                   = shift;
  my $i;
  my $ret = $db->query("SELECT * FROM sequence WHERE sequence_primary_id = $id;");
  my @fields = @{$$ret[0]};
  $self->{DB}              = $db;
  $self->{PRIMARY}         = $fields[0];
  $self->{FAKE}            = $fields[1];
  $self->{DB_ID}           = $fields[2];
  $self->{LENGTH}          = $fields[3];
  $self->{DATE}            = $fields[4];
  $self->{VERSION}         = $fields[5];
  $self->{ANNOT}           = $fields[6];
  $self->{ORIG}            = $fields[7];
  $self->{DATA}            = $fields[8];
  $self->{TAXON}           = $fields[9];
  if (defined($self->{ANNOT})){
     $ret = $db->query("SELECT * FROM sequence_annotation WHERE sequence_annotation_primary_id = ".$self->{ANNOT}.";");
     @fields = @{$$ret[0]};
     $self->{MAINDBID}        = $fields[1];
     $self->{UTR}             = $fields[2];
     $self->{DESC}            = $fields[3];
     $self->{GENENAME}        = $fields[4];
  }
  else {
          #cluck"No annotation is available for this promoter sequence! You are on your own now.\n";
  }
  if (defined($self->{DATA})) {
     $ret = $db->query("SELECT * FROM sequence_data WHERE sequence_data_primary_id =".$self->{DATA}.";");
     @fields = @{$$ret[0]};
     $self->{FASTA}           = $fields[2];
     $self->{BLAST}           = $fields[3];
  }
  else {
          #cluck"No sequence data available! Where did it go?\n";
  }
  $ret = $db->query("SELECT * FROM taxon_annotation WHERE taxon_primary_id =".$self->{TAXON}.";");
  @fields = @{$$ret[0]};
  $self->{TAXID}           = $fields[1];
  $self->{TAXNAME}         = $fields[2];
  $self->{TAXCLASS}        = $fields[3];
  my %xref;
  $ret = $db->query("SELECT xref_id,xref_type FROM sequence_xref WHERE sequence_primary_id = $id;");
  for($i = 0; $i < $#$ret+1; $i++){
          @fields = @{$$ret[$i]};
          push @{ $xref{$fields[1]} }, $fields[0];
  }
  $self->{XREF}            = \%xref;
  bless $self;
  return($self);
}
=head2 new_from_dbid
  Create new objects form sequence primary id.
=cut
sub new_from_dbid {
  my $self                 = {};
  my $dummy                = shift;
  my $db                   = shift;
  my $id                   = shift;
  my $i;
  my $ret = $db->query("SELECT * FROM sequence WHERE sequence_id = $id;");
  my @fields = @{$$ret[0]};
  $self->{DB}              = $db;
  $self->{PRIMARY}         = $fields[0];
  $self->{FAKE}            = $fields[1];
  $self->{DB_ID}           = $fields[2];
  $self->{LENGTH}          = $fields[3];
  $self->{DATE}            = $fields[4];
  $self->{VERSION}         = $fields[5];
  $self->{ANNOT}           = $fields[6];
  $self->{ORIG}            = $fields[7];
  $self->{DATA}            = $fields[8];
  $self->{TAXON}           = $fields[9];
  if (defined($self->{ANNOT})){
     $ret = $db->query("SELECT * FROM sequence_annotation WHERE sequence_annotation_primary_id = ".$self->{ANNOT}.";");
     @fields = @{$$ret[0]};
     $self->{MAINDBID}        = $fields[1];
     $self->{UTR}             = $fields[2];
     $self->{DESC}            = $fields[3];
     $self->{GENENAME}        = $fields[4];
  }
  else {
        #cluck"No annotation is available for this promoter sequence! You are on your own now.\n";
  }
  if (defined($self->{DATA})) {
     $ret = $db->query("SELECT * FROM sequence_data WHERE sequence_data_primary_id =".$self->{DATA}.";");
     @fields = @{$$ret[0]};
     $self->{FASTA}           = $fields[2];
     $self->{BLAST}           = $fields[3];
  }
  else {
          #cluck"No sequence data available! Where did it go?\n";
  }
  $ret = $db->query("SELECT * FROM taxon_annotation WHERE taxon_primary_id =".$self->{TAXON}.";");
  @fields = @{$$ret[0]};
  $self->{TAXID}           = $fields[1];
  $self->{TAXNAME}         = $fields[2];
  $self->{TAXCLASS}        = $fields[3];
  my %xref;
  $ret = $db->query("SELECT xref_id,xref_type FROM sequence_xref WHERE sequence_primary_id = $id;");
  for($i = 0; $i < $#$ret+1; $i++){
          @fields = @{$$ret[$i]};
          push @{ $xref{$fields[1]} }, $fields[0];
  }
  $self->{XREF}            = \%xref;
  bless $self;
  return($self);
}
=head2 get_id
  Returns the sequence primary id. This is the internal ID from the MySQL database.
=cut
sub get_id {
  my $self                 = shift;
  return($self->{PRIMARY});
}
=head2 get_fake_id
  Returns the sequence fake GI.
=cut
sub get_fake_id {
  my $self                 = shift;
  return($self->{FAKE});
}
=head2 get_db_id
  Returns the full sequence ID.
=cut
sub get_db_id {
  my $self                 = shift;
  return($self->{DB_ID});
}
=head2 get_length
  Returns the length of the sequence.
=cut
sub get_length {
  my $self                 = shift;
  return($self->{LENGTH});
}
=head2 get_date
  Returns the modification date of the MySQL record.
=cut
sub get_date {
  my $self                 = shift;
  return($self->{DATE});
}
=head2 get_ver
  Returns the version of the sequence.
=cut
sub get_ver {
  my $self                 = shift;
  return($self->{VERSION});
}
=head2 get_annot_id
  Returns the sequence annotation primary id. This is the internal ID from the MySQL database.
=cut
sub get_annot_id {
  my $self                 = shift;
  return($self->{ANNOT});
}
=head2 get_orig_id
  This method is not yet implemented.
=cut
sub get_orig_id {
  my $self                 = shift;
  return($self->{ORIG});
}
=head2 get_data_id
  Returns the sequence data primary id. This is the internal ID from the MySQL database.
=cut
sub get_data_id {
  my $self                 = shift;
  return($self->{DATA});
}
=head2 get_taxon_id
  Returns the taxon annotation primary id. This is the internal ID from the MySQL database.
=cut
sub get_taxon_id {
  my $self                 = shift;
  return($self->{TAXON});
}
=head2 get_data_main_db_id
  Returns the sequence annotation primary id. This is the internal ID from the MySQL database.
=cut
sub get_data_main_db_id {
  my $self                 = shift;
  return($self->{MAINDBID});
}
=head2 get_utr_length
  $utr_length = $seq->get_utr_length;
  Returns the length of the 5' UTR included in the sequence.
=cut
sub get_utr_length {
  my $self                 = shift;
  return($self->{UTR});
}
=head2 get_desc
  print $seq->get_desc,"\n";
  Returns the description of the sequence.
=cut
sub get_desc {
  my $self                 = shift;
  return($self->{DESC});
}
=head2 get_gene_name
  $gene_name = $seq->get_gene_name;
  Returns the gene name of the promoter. If the gene is
  unknow or not annotated, it is empty.
=cut
sub get_gene_name {
  my $self                 = shift;
  return($self->{GENENAME});
}
=head2 get_fasta
  print $seq->get_fasta;
  Returns the promoter sequence in FASTA format.
=cut
sub get_fasta {
  my $self                 = shift;
  my $seq = ">".$self->{DB_ID}."\n".$self->{FASTA}."\n";
  return($seq);
}
=head2 get_raw_seq
  Returns the raw sequence without any other identifier
  Return type: string
=cut
sub get_raw_seq {
  my $self                 = shift;
  my $seq = $self->{FASTA};
  return($seq);
}
=head2 get_blast
  print $seq->get_blast;
  This method is not yet implemented.
=cut
sub get_blast {
  my $self                 = shift;
  return($self->{BLAST});
}
=head2 get_taxid
  $taxid = $seq->get_taxid;
  Returns the NCBI taxon ID of the sequence.
=cut
sub get_taxid {
  my $self                 = shift;
  return($self->{TAXID});
}
=head2 get_taxon_name
  print $seq->get_taxon_name;
  Returns the scientific name of the sequence's taxon ID.
=cut
sub get_taxon_name {
  my $self                 = shift;
  return($self->{TAXNAME});
}
=head2 get_taxon_class
  print $seq->get_taxon_class;
  Returns the taxonomic class of the sequence's taxon ID.
  Used internally, to create monophyletic sets of sequences
  in an orthologous cluster.
=cut
sub get_taxon_class {
  my $self                 = shift;
  return($self->{TAXCLASS});
}
=head2 print_all_xref
  $seq->print_all_xref;
  Prints all the xrefs to other databases.
  Type of xref IDs : 
  go_id            : Gene Ontology ID
  ncbi_gene_id     : NCBI gene ID
  ncbi_cds_gi      : NCBI CDS GI
  ncbi_rna_gi      : NCBI RNA GI
  ncbi_cds_prot_id : NCBI CDS protein ID
  ncbi_rna_tr_id   : NCBI RNA transcript ID
  at_no            : At Number
=cut
sub print_all_xref {
  my $self                 = shift;
  for my $keys ( keys %{ $self->{XREF} }){
          print"$keys: ";
          for (@{ ${ $self->{XREF} }{$keys} }){print "$_ "}
          print"\n";
  }
}
=head2 get_all_xref_keys
  @keys = @{$seq->get_all_xref_keys};
  Returns the arrayref of xref names.
=cut
sub get_all_xref_keys {
  my $self                 = shift;
  my @xrefkeys = keys %{ $self->{XREF} };
  return(\@xrefkeys);
}
=head2 get_xref_value
  @values = @{$seq->get_xref_value("go_id")};
  Returns the arrayref of a given xref's values'.
=cut
sub get_xref_value {
  my $self                 = shift;
  my $key                  = shift;
  if (${ $self->{XREF} }{$key}){
     return(${ $self->{XREF} }{$key});
  }
  else {
     return(-1);
  }
}
=head2 get_all_seq_features
  @seqfeat = @{$seq->get_all_seq_features};
  Returns the arrayref of all sequence features or -1 in case of error
=cut
sub get_all_seq_features {
  my $self                 = shift;
   
  my @seqfeatures;
  # The order of the sequence features is important to correctly draw the picture of the cluster.
  my $query = "SELECT sequence_feature_primary_id FROM sequence_feature WHERE sequence_primary_id =".$self->{PRIMARY}." ORDER BY feature_start;";
  my $ref = $self->{DB}->query($query);
  if ($#$ref == -1){
     #cluck"No sequence feature found!\n";
     return(-1);
  }
  for my $sfpid (@$ref){
          my $sf = Bio::DOOP::SequenceFeature->new($self->{DB},$$sfpid[0]);
          push @seqfeatures, $sf;
  }
  return(\@seqfeatures);
}
=head2 get_all_subsets
  Returns the subset containing the sequence.
=cut
sub get_all_subsets {
  my $self                 = shift;
  my @subsets;
  my $id    = $self->{PRIMARY};
  my $query = "SELECT subset_primary_id FROM subset_xref WHERE sequence_primary_id = $id";
  my $ref   = $self->{DB}->query($query);
  if ($#$ref == -1){
     #cluck"No subset found! This is impossible!!\n";
     return(-1);
  }
  for my $subset (@$ref){
     push @subsets, Bio::DOOP::ClusterSubset->new($self->{DB},$$subset[0]);
  }
  return(\@subsets);
}
1;
	Global
`s`	Focus search bar
`?`	Bring up this help dialog
	GitHub
`g` `p`	Go to pull requests
`g` `i`	go to github issues (only if github is preferred repository)
	POD
`g` `a`	Go to author
`g` `c`	Go to changes
`g` `i`	Go to issues
`g` `d`	Go to dist
`g` `r`	Go to repository/SCM
`g` `s`	Go to source
`g` `b`	Go to file browse
	Search terms
module: (e.g. module:Plugin)
distribution: (e.g. distribution:Dancer auth)
author: (e.g. author:SONGMU Redis)
version: (e.g. version:1.00)