Function: Parses the next alignments of the est2genome result file and
returns the found exons as an array of
Bio::SeqFeature::SimilarityPair objects. Call
this method repeatedly until an empty array is returned to get the
results for all alignments.
The $exon->seq_id() attribute will be set to the identifier of the
respective sequence for both sequences.
The length is accessible via the seqlength()
attribute of $exon->query() and
$exon->est_hit().
Returns : An array (or array reference) of Bio::SeqFeature::SimilarityPair and Bio::SeqFeature::Generic objects
or Bio::SeqFeature::Gene::GeneStructure
Args : flag(1/0) indicating to return Bio::SeqFeature::Gene::GeneStructure or Bio::SeqFeature::SimilarityPair
defaults to 0
=cut
subparse_next_gene {
my($self,$return_gene) = @_;
return$self->_parse_gene_struct if$return_gene;
my$seensegment= 0;
my@features;
my($qstrand,$hstrand) = (1,1);
my$lasthseqname;
while( defined($_= $self->_readline) ) {
if( /Note Best alignment is between (reversed|forward) est and (reversed|forward) genome, (but|and) splice\s+sites imply\s+(forward gene|REVERSED GENE)/) {
if( $seensegment) {
$self->_pushback($_);
returnwantarray? @features: \@features;
}
$hstrand= -1 if$1 eq 'reversed';
$qstrand= -1 if$4 eq 'REVERSED GENE';
#$self->debug( "1=$1, 2=$2, 4=$4\n");
}
elsif( /^Exon/ ) {
my($name,$len,$score,$qstart,$qend,$qseqname,
$hstart,$hend, $hseqname) = split;
$lasthseqname= $hseqname;
my$query= new Bio::SeqFeature::Similarity(-primary=> $name,
-source=> $self->analysis_method,
-seq_id=> $qseqname, # FIXME WHEN WE REDO THE GENERIC NAME CHANGE
-start=> $qstart,
-end=> $qend,
-strand=> $qstrand,
-score=> $score,
-tag=> {
# 'Location' => "$hstart..$hend",
'Sequence'=> "$hseqname",
}
);
my$hit= new Bio::SeqFeature::Similarity(-primary=> 'exon_hit',
-source=> $self->analysis_method,
-seq_id=> $hseqname,
-start=> $hstart,
-end=> $hend,
-strand=> $hstrand,
-score=> $score,
-tag=> {
# 'Location' => "$qstart..$qend",
'Sequence'=> "$qseqname",
}
);
push@features, new Bio::SeqFeature::SimilarityPair
push@features, new Bio::SeqFeature::Generic(-primary=> $2,
-source=> $self->analysis_method,
-start=> $qstart,
-end=> $qend,
-strand=> $qstrand,
-score=> $score,
-seq_id=> $qseqname,
-tag=> {
'Sequence'=> $lasthseqname});
} elsif( /^Span/ ) {
} elsif( /^Segment/ ) {
$seensegment= 1;
} elsif( /^\s+$/ ) { # do nothing
} else{
$self->warn( "unknown line $_\n");
}
}
returnunless( @features);
returnwantarray? @features: \@features;
}
sub_parse_gene_struct {
my($self) = @_;
my$seensegment= 0;
my@features;
my($qstrand,$hstrand) = (1,1);
my$lasthseqname;
my$gene= new Bio::SeqFeature::Gene::GeneStructure(-source=> $self->analysis_method);
my$transcript= new Bio::SeqFeature::Gene::Transcript(-source=> $self->analysis_method);
my@suppf;
my@exon;
while( defined($_= $self->_readline) ) {
if( /Note Best alignment is between (reversed|forward) est and (reversed|forward) genome, (but|and) splice\s+sites imply\s+(forward gene|REVERSED GENE)/) {