use
vars
qw(%MAPPING %MODEMAP
$DEFAULT_BLAST_WRITER_CLASS
$MAX_HSP_OVERLAP
$DEFAULT_SIGNIF
$DEFAULT_SCORE
$DEFAULTREPORTTYPE
)
;
BEGIN {
%MODEMAP
= (
'BlastOutput'
=>
'result'
,
'Iteration'
=>
'iteration'
,
'Hit'
=>
'hit'
,
'Hsp'
=>
'hsp'
);
%MAPPING
= (
'Hsp_bit-score'
=>
'HSP-bits'
,
'Hsp_score'
=>
'HSP-score'
,
'Hsp_evalue'
=>
'HSP-evalue'
,
'Hsp_pvalue'
=>
'HSP-pvalue'
,
'Hsp_query-from'
=>
'HSP-query_start'
,
'Hsp_query-to'
=>
'HSP-query_end'
,
'Hsp_hit-from'
=>
'HSP-hit_start'
,
'Hsp_hit-to'
=>
'HSP-hit_end'
,
'Hsp_positive'
=>
'HSP-conserved'
,
'Hsp_identity'
=>
'HSP-identical'
,
'Hsp_gaps'
=>
'HSP-hsp_gaps'
,
'Hsp_hitgaps'
=>
'HSP-hit_gaps'
,
'Hsp_querygaps'
=>
'HSP-query_gaps'
,
'Hsp_qseq'
=>
'HSP-query_seq'
,
'Hsp_hseq'
=>
'HSP-hit_seq'
,
'Hsp_midline'
=>
'HSP-homology_seq'
,
'Hsp_align-len'
=>
'HSP-hsp_length'
,
'Hsp_query-frame'
=>
'HSP-query_frame'
,
'Hsp_hit-frame'
=>
'HSP-hit_frame'
,
'Hsp_links'
=>
'HSP-links'
,
'Hsp_group'
=>
'HSP-hsp_group'
,
'Hit_id'
=>
'HIT-name'
,
'Hit_len'
=>
'HIT-length'
,
'Hit_accession'
=>
'HIT-accession'
,
'Hit_def'
=>
'HIT-description'
,
'Hit_signif'
=>
'HIT-significance'
,
'Hit_score'
=>
'HIT-score'
,
'Hit_bits'
=>
'HIT-bits'
,
'Iteration_iter-num'
=>
'ITERATION-number'
,
'Iteration_converged'
=>
'ITERATION-converged'
,
'BlastOutput_program'
=>
'RESULT-algorithm_name'
,
'BlastOutput_version'
=>
'RESULT-algorithm_version'
,
'BlastOutput_query-def'
=>
'RESULT-query_name'
,
'BlastOutput_query-len'
=>
'RESULT-query_length'
,
'BlastOutput_query-acc'
=>
'RESULT-query_accession'
,
'BlastOutput_querydesc'
=>
'RESULT-query_description'
,
'BlastOutput_db'
=>
'RESULT-database_name'
,
'BlastOutput_db-len'
=>
'RESULT-database_entries'
,
'BlastOutput_db-let'
=>
'RESULT-database_letters'
,
'BlastOutput_inclusion-threshold'
=>
'RESULT-inclusion_threshold'
,
'Parameters_matrix'
=> {
'RESULT-parameters'
=>
'matrix'
},
'Parameters_expect'
=> {
'RESULT-parameters'
=>
'expect'
},
'Parameters_include'
=> {
'RESULT-parameters'
=>
'include'
},
'Parameters_sc-match'
=> {
'RESULT-parameters'
=>
'match'
},
'Parameters_sc-mismatch'
=> {
'RESULT-parameters'
=>
'mismatch'
},
'Parameters_gap-open'
=> {
'RESULT-parameters'
=>
'gapopen'
},
'Parameters_gap-extend'
=> {
'RESULT-parameters'
=>
'gapext'
},
'Parameters_filter'
=> {
'RESULT-parameters'
=>
'filter'
},
'Parameters_allowgaps'
=> {
'RESULT-parameters'
=>
'allowgaps'
},
'Parameters_full_dbpath'
=> {
'RESULT-parameters'
=>
'full_dbpath'
},
'Statistics_db-len'
=> {
'RESULT-statistics'
=>
'dbentries'
},
'Statistics_db-let'
=> {
'RESULT-statistics'
=>
'dbletters'
},
'Statistics_hsp-len'
=>
{
'RESULT-statistics'
=>
'effective_hsplength'
},
'Statistics_query-len'
=> {
'RESULT-statistics'
=>
'querylength'
},
'Statistics_eff-space'
=> {
'RESULT-statistics'
=>
'effectivespace'
},
'Statistics_eff-spaceused'
=>
{
'RESULT-statistics'
=>
'effectivespaceused'
},
'Statistics_eff-dblen'
=>
{
'RESULT-statistics'
=>
'effectivedblength'
},
'Statistics_kappa'
=> {
'RESULT-statistics'
=>
'kappa'
},
'Statistics_lambda'
=> {
'RESULT-statistics'
=>
'lambda'
},
'Statistics_entropy'
=> {
'RESULT-statistics'
=>
'entropy'
},
'Statistics_gapped_kappa'
=> {
'RESULT-statistics'
=>
'kappa_gapped'
},
'Statistics_gapped_lambda'
=>
{
'RESULT-statistics'
=>
'lambda_gapped'
},
'Statistics_gapped_entropy'
=>
{
'RESULT-statistics'
=>
'entropy_gapped'
},
'Statistics_framewindow'
=>
{
'RESULT-statistics'
=>
'frameshiftwindow'
},
'Statistics_decay'
=> {
'RESULT-statistics'
=>
'decayconst'
},
'Statistics_hit_to_db'
=> {
'RESULT-statistics'
=>
'Hits_to_DB'
},
'Statistics_num_suc_extensions'
=>
{
'RESULT-statistics'
=>
'num_successful_extensions'
},
'Statistics_DFA_states'
=> {
'RESULT-statistics'
=>
'num_dfa_states'
},
'Statistics_DFA_size'
=> {
'RESULT-statistics'
=>
'dfa_size'
},
'Statistics_noprocessors'
=>
{
'RESULT-statistics'
=>
'no_of_processors'
},
'Statistics_neighbortime'
=>
{
'RESULT-statistics'
=>
'neighborhood_generate_time'
},
'Statistics_starttime'
=> {
'RESULT-statistics'
=>
'start_time'
},
'Statistics_endtime'
=> {
'RESULT-statistics'
=>
'end_time'
},
);
for
my
$frame
( 0 .. 3 ) {
for
my
$strand
(
'+'
,
'-'
) {
for
my
$ind
(
qw(length efflength E S W T X X_gapped E2
E2_gapped S2)
)
{
$MAPPING
{
"Statistics_frame$strand$frame\_$ind"
} =
{
'RESULT-statistics'
=>
"Frame$strand$frame\_$ind"
};
}
for
my
$val
(
qw(lambda kappa entropy )
) {
for
my
$type
(
qw(used computed gapped)
) {
my
$key
=
"Statistics_frame$strand$frame\_$val\_$type"
;
my
$val
=
{
'RESULT-statistics'
=>
"Frame$strand$frame\_$val\_$type"
};
$MAPPING
{
$key
} =
$val
;
}
}
}
}
for
my
$stats
(
qw(T A X1 X2 X3 S1 S2 X1_bits X2_bits X3_bits
S1_bits S2_bits num_extensions
num_successful_extensions
seqs_better_than_cutoff
posted_date
search_cputime total_cputime
search_actualtime total_actualtime
no_of_processors ctxfactor)
)
{
my
$key
=
"Statistics_$stats"
;
my
$val
= {
'RESULT-statistics'
=>
$stats
};
$MAPPING
{
$key
} =
$val
;
}
for
my
$param
(
qw(span span1 span2 links warnings notes hspsepsmax
hspsepqmax topcomboN topcomboE postsw cpus wordmask
filter sort_by_pvalue sort_by_count sort_by_highscore
sort_by_totalscore sort_by_subjectlength noseqs gi qtype
qres V B Z Y M N)
)
{
my
$key
=
"Parameters_$param"
;
my
$val
= {
'RESULT-parameters'
=>
$param
};
$MAPPING
{
$key
} =
$val
;
}
$DEFAULT_BLAST_WRITER_CLASS
=
'Bio::Search::Writer::HitTableWriter'
;
$MAX_HSP_OVERLAP
= 2;
$DEFAULTREPORTTYPE
=
'BLASTP'
;
}
sub
_initialize {
my
(
$self
,
@args
) =
@_
;
$self
->SUPER::_initialize(
@args
);
my
$handler
= new Bio::SearchIO::IteratedSearchResultEventBuilder(
@args
);
$self
->attach_EventHandler(
$handler
);
my
(
$min_qlen
,
$check_all
,
$overlap
,
$best
,
$rpttype
) =
$self
->_rearrange(
[
qw(MIN_LENGTH CHECK_ALL_HITS
OVERLAP BEST
REPORT_TYPE)
],
@args
);
defined
$min_qlen
&&
$self
->min_query_length(
$min_qlen
);
defined
$best
&&
$self
->best_hit_only(
$best
);
defined
$check_all
&&
$self
->check_all_hits(
$check_all
);
defined
$rpttype
&& (
$self
->{
'_reporttype'
} =
$rpttype
);
}
sub
attach_EventHandler {
my
(
$self
,
$handler
) =
@_
;
$self
->SUPER::attach_EventHandler(
$handler
);
$self
->{
'_handler_cache'
} =
$handler
;
return
;
}
sub
next_result {
my
(
$self
) =
@_
;
my
$v
=
$self
->verbose;
my
$data
=
''
;
my
$flavor
=
''
;
$self
->{
'_seentop'
} = 0;
my
(
$reporttype
,
$seenquery
,
$reportline
);
my
(
$seeniteration
,
$found_again
);
my
$incl_threshold
=
$self
->inclusion_threshold;
my
$bl2seq_fix
;
$self
->start_document();
my
(
@hit_signifs
);
my
$gapped_stats
= 0;
local
$_
=
"\n"
;
while
(
defined
(
$_
=
$self
->_readline ) ) {
next
if
(/^\s+$/);
next
if
(/CPU
time
:/);
next
if
(/^>\s*$/);
if
(
/^([T]?BLAST[NPX])\s*(.+)$/i
|| /^(PSITBLASTN)\s+(.+)$/i
|| /^(RPS-BLAST)\s*(.+)$/i
|| /^(MEGABLAST)\s*(.+)$/i
|| /^(P?GENEWISE|HFRAME|SWN|TSWN)\s+(.+)/i
)
{
$self
->debug(
"blast.pm: Start of new report: $1 $2\n"
);
if
(
$self
->{
'_seentop'
} ) {
$self
->_pushback(
$_
);
$self
->in_element(
'hsp'
)
&&
$self
->end_element( {
'Name'
=>
'Hsp'
} );
$self
->in_element(
'hit'
)
&&
$self
->end_element( {
'Name'
=>
'Hit'
} );
$self
->within_element(
'iteration'
)
&&
$self
->end_element( {
'Name'
=>
'Iteration'
} );
$self
->end_element( {
'Name'
=>
'BlastOutput'
} );
return
$self
->end_document();
}
$self
->_start_blastoutput;
$reporttype
= $1;
if
(
$reporttype
=~ /RPS-BLAST/) {
$reporttype
.=
'(BLASTP)'
;
}
$reportline
=
$_
;
$self
->element(
{
'Name'
=>
'BlastOutput_program'
,
'Data'
=>
$reporttype
}
);
$self
->element(
{
'Name'
=>
'BlastOutput_version'
,
'Data'
=> $2
}
);
$self
->element(
{
'Name'
=>
'BlastOutput_inclusion-threshold'
,
'Data'
=>
$incl_threshold
}
);
}
elsif
(/^(Searching|Results from round)/) {
next
unless
$1 =~ /Results from round/;
$self
->debug(
"blast.pm: Possible psi blast iterations found...\n"
);
$self
->in_element(
'hsp'
)
&&
$self
->end_element( {
'Name'
=>
'Hsp'
} );
$self
->in_element(
'hit'
)
&&
$self
->end_element( {
'Name'
=>
'Hit'
} );
if
(
defined
$seeniteration
) {
$self
->within_element(
'iteration'
)
&&
$self
->end_element( {
'Name'
=>
'Iteration'
} );
$self
->_start_iteration;
}
else
{
$self
->_start_iteration;
}
$seeniteration
= 1;
}
elsif
(/^Query=\s*(.*)$/) {
$self
->debug(
"blast.pm: Query= found...$_\n"
);
my
$q
= $1;
my
$size
= 0;
if
(
defined
$seenquery
) {
$self
->_pushback(
$reportline
)
if
$reportline
;
$self
->_pushback(
$_
);
$self
->in_element(
'hsp'
)
&&
$self
->end_element( {
'Name'
=>
'Hsp'
} );
$self
->in_element(
'hit'
)
&&
$self
->end_element( {
'Name'
=>
'Hit'
} );
$self
->within_element(
'iteration'
)
&&
$self
->end_element( {
'Name'
=>
'Iteration'
} );
if
(
$bl2seq_fix
) {
$self
->element(
{
'Name'
=>
'BlastOutput_program'
,
'Data'
=>
$reporttype
}
);
}
$self
->end_element( {
'Name'
=>
'BlastOutput'
} );
return
$self
->end_document();
}
else
{
if
( !
defined
$reporttype
) {
$self
->_start_blastoutput;
if
(
defined
$seeniteration
) {
$self
->in_element(
'iteration'
)
&&
$self
->end_element( {
'Name'
=>
'Iteration'
} );
$self
->_start_iteration;
}
else
{
$self
->_start_iteration;
}
$seeniteration
= 1;
}
}
$seenquery
=
$q
;
$_
=
$self
->_readline;
while
(
defined
(
$_
) ) {
if
(/^Database:/) {
$self
->_pushback(
$_
);
last
;
}
chomp
;
if
( /\((\-?[\d,]+)\s+letters.*\)/ || /^Length=(\-?[\d,]+)/ ) {
$size
= $1;
$size
=~ s/,//g;
last
;
}
else
{
$q
.=
" $_"
;
$q
=~ s/ +/ /g;
$q
=~ s/^ | $//g;
}
$_
=
$self
->_readline;
}
chomp
(
$q
);
my
(
$nm
,
$desc
) =
split
( /\s+/,
$q
, 2 );
$self
->element(
{
'Name'
=>
'BlastOutput_query-def'
,
'Data'
=>
$nm
}
);
$self
->element(
{
'Name'
=>
'BlastOutput_query-len'
,
'Data'
=>
$size
}
);
defined
$desc
&&
$desc
=~ s/\s+$//;
$self
->element(
{
'Name'
=>
'BlastOutput_querydesc'
,
'Data'
=>
$desc
}
);
my
(
$acc
,
$version
) =
&_get_accession_version
(
$nm
);
$version
=
defined
(
$version
) &&
length
(
$version
) ?
".$version"
:
""
;
$acc
=
''
unless
defined
(
$acc
);
$self
->element(
{
'Name'
=>
'BlastOutput_query-acc'
,
'Data'
=>
"$acc$version"
}
);
}
elsif
(/Sequences producing significant alignments:/) {
$self
->debug(
"blast.pm: Processing NCBI-BLAST descriptions\n"
);
$flavor
=
'ncbi'
;
if
( !
$self
->in_element(
'iteration'
) ) {
$self
->_start_iteration;
}
descline:
while
(
defined
(
$_
=
$self
->_readline() ) ) {
if
(/^>/
|| /^\s+Database:\s+?/
|| /^Parameters:/
|| /^\s+Subset/
|| /^\s
*Lambda
/
|| /^\s
*Histogram
/
) {
$self
->_pushback(
$_
);
last
descline;
}
elsif
(/([\d\.\+\-eE]+)\s+([\d\.\+\-eE]+)(\s+\d+)?\s*$/) {
my
(
$score
,
$evalue
) = ( $1, $2 );
$evalue
=~ s/^e/1e/i;
my
@line
=
split
;
pop
@line
,
pop
@line
;
if
($3) {
pop
@line
}
push
@hit_signifs
,
[
$evalue
,
$score
,
shift
@line
,
join
(
' '
,
@line
) ];
}
elsif
(/^CONVERGED/i) {
$self
->element(
{
'Name'
=>
'Iteration_converged'
,
'Data'
=> 1
}
);
}
@hit_signifs
=
sort
{
$a
->[0] <=>
$b
->[0]}
@hit_signifs
;
}
}
elsif
(/Sequences producing High-scoring Segment Pairs:/) {
$self
->debug(
"blast.pm: Processing WU-BLAST descriptions\n"
);
$_
=
$self
->_readline();
$flavor
=
'wu'
;
if
( !
$self
->in_element(
'iteration'
) ) {
$self
->_start_iteration;
}
while
(
defined
(
$_
=
$self
->_readline() )
&& !/^\s+$/ )
{
my
@line
=
split
;
pop
@line
;
push
@hit_signifs
,
[
pop
@line
,
pop
@line
,
shift
@line
,
join
(
' '
,
@line
) ];
}
}
elsif
(/^Database:\s*(.+)$/) {
$self
->debug(
"blast.pm: Database: $1\n"
);
my
$db
= $1;
while
(
defined
(
$_
=
$self
->_readline ) ) {
if
(
/^\s+(\-?[\d\,]+|\S+)\s+sequences\;
\s+(\-?[\d,]+|\S+)\s+
total\s+letters/ox
)
{
my
(
$s
,
$l
) = ( $1, $2 );
$s
=~ s/,//g;
$l
=~ s/,//g;
$self
->element(
{
'Name'
=>
'BlastOutput_db-len'
,
'Data'
=>
$s
}
);
$self
->element(
{
'Name'
=>
'BlastOutput_db-let'
,
'Data'
=>
$l
}
);
last
;
}
else
{
chomp
;
$db
.=
$_
;
}
}
$self
->element(
{
'Name'
=>
'BlastOutput_db'
,
'Data'
=>
$db
}
);
}
elsif
(/^\sFeatures\s\w+\sthis\spart\sof\ssubject\ssequence:/) {
while
(
$_
!~ /^\sScore\s=/) {
$self
->debug(
"Bypassing features line: $_"
);
$_
=
$self
->_readline;
}
$self
->_pushback(
$_
);
}
elsif
(/^>\s*(\S+)\s*(.*)?/) {
chomp
;
$self
->debug(
"blast.pm: Hit: $1\n"
);
$self
->in_element(
'hsp'
)
&&
$self
->end_element( {
'Name'
=>
'Hsp'
} );
$self
->in_element(
'hit'
)
&&
$self
->end_element( {
'Name'
=>
'Hit'
} );
if
( !
$self
->within_element(
'result'
) ) {
$self
->_start_blastoutput;
$self
->_start_iteration;
}
elsif
( !
$self
->within_element(
'iteration'
) ) {
$self
->_start_iteration;
}
$self
->start_element( {
'Name'
=>
'Hit'
} );
my
$id
= $1;
my
$restofline
= $2;
$self
->debug(
"Starting a hit: $1 $2\n"
);
$self
->element(
{
'Name'
=>
'Hit_id'
,
'Data'
=>
$id
}
);
my
(
$acc
,
$version
) =
&_get_accession_version
(
$id
);
$self
->element(
{
'Name'
=>
'Hit_accession'
,
'Data'
=>
$acc
}
);
my
$v
=
shift
@hit_signifs
;
if
(
defined
$v
) {
$self
->element(
{
'Name'
=>
'Hit_signif'
,
'Data'
=>
$v
->[0]
}
);
$self
->element(
{
'Name'
=>
'Hit_score'
,
'Data'
=>
$v
->[1]
}
);
}
while
(
defined
(
$_
=
$self
->_readline() ) ) {
next
if
(/^\s+$/);
chomp
;
if
(/Length\s*=\s*([\d,]+)/) {
my
$l
= $1;
$l
=~ s/\,//g;
$self
->element(
{
'Name'
=>
'Hit_len'
,
'Data'
=>
$l
}
);
last
;
}
else
{
$restofline
.=
$_
;
}
}
$restofline
=~ s/\s+/ /g;
$self
->element(
{
'Name'
=>
'Hit_def'
,
'Data'
=>
$restofline
}
);
}
elsif
(/\s+(Plus|Minus) Strand HSPs:/i) {
next
;
}
elsif
(
(
$self
->in_element(
'hit'
) ||
$self
->in_element(
'hsp'
) )
&&
m/Score\s*=\s*(\S+)\s
*bits
\s*
(?:\((\d+)\))?,
\s+Log\-Length\sScore\s*=\s*(\d+)
/ox
)
{
$self
->in_element(
'hsp'
)
&&
$self
->end_element( {
'Name'
=>
'Hsp'
} );
$self
->start_element( {
'Name'
=>
'Hsp'
} );
$self
->debug(
"Got paracel genewise HSP score=$1\n"
);
my
(
$bits
,
$score
,
$evalue
) = ( $1, $2, $3 );
$evalue
=~ s/^e/1e/i;
$self
->element(
{
'Name'
=>
'Hsp_score'
,
'Data'
=>
$score
}
);
$self
->element(
{
'Name'
=>
'Hsp_bit-score'
,
'Data'
=>
$bits
}
);
$self
->element(
{
'Name'
=>
'Hsp_evalue'
,
'Data'
=>
$evalue
}
);
}
elsif
(
(
$self
->in_element(
'hit'
) ||
$self
->in_element(
'hsp'
) )
&&
m/Score\s*=\s*([^,\s]+),
\s
*Expect
\s*=\s*([^,\s]+),
\s
*P
(?:\(\S+\))?\s*=\s*([^,\s]+)
/ox
)
{
$self
->in_element(
'hsp'
)
&&
$self
->end_element( {
'Name'
=>
'Hsp'
} );
$self
->start_element( {
'Name'
=>
'Hsp'
} );
$self
->debug(
"Got paracel hframe HSP score=$1\n"
);
my
(
$score
,
$evalue
,
$pvalue
) = ( $1, $2, $3 );
$evalue
=
"1$evalue"
if
$evalue
=~ /^e/;
$pvalue
=
"1$pvalue"
if
$pvalue
=~ /^e/;
$self
->element(
{
'Name'
=>
'Hsp_score'
,
'Data'
=>
$score
}
);
$self
->element(
{
'Name'
=>
'Hsp_evalue'
,
'Data'
=>
$evalue
}
);
$self
->element(
{
'Name'
=>
'Hsp_pvalue'
,
'Data'
=>
$pvalue
}
);
}
elsif
(
(
$self
->in_element(
'hit'
) ||
$self
->in_element(
'hsp'
) )
&&
m/Score\s*=\s*(\S+)\s*
\(([\d\.]+)\s
*bits
\),
\s
*Expect
\s*=\s*([^,\s]+),
\s*(?:Sum)?\s*
P(?:\(\d+\))?\s*=\s*([^,\s]+)
(?:\s*,\s+Group\s*\=\s*(\d+))?
/ox
)
{
$self
->in_element(
'hsp'
)
&&
$self
->end_element( {
'Name'
=>
'Hsp'
} );
$self
->start_element( {
'Name'
=>
'Hsp'
} );
my
(
$score
,
$bits
,
$evalue
,
$pvalue
,
$group
) =
( $1, $2, $3, $4, $5 );
$evalue
=~ s/^e/1e/i;
$pvalue
=~ s/^e/1e/i;
$self
->element(
{
'Name'
=>
'Hsp_score'
,
'Data'
=>
$score
}
);
$self
->element(
{
'Name'
=>
'Hsp_bit-score'
,
'Data'
=>
$bits
}
);
$self
->element(
{
'Name'
=>
'Hsp_evalue'
,
'Data'
=>
$evalue
}
);
$self
->element(
{
'Name'
=>
'Hsp_pvalue'
,
'Data'
=>
$pvalue
}
);
if
(
defined
$group
) {
$self
->element(
{
'Name'
=>
'Hsp_group'
,
'Data'
=>
$group
}
);
}
}
elsif
(
(
$self
->in_element(
'hit'
) ||
$self
->in_element(
'hsp'
) )
&&
m/Score\s*=\s*(\S+)\s
*bits
\s*
(?:\((\d+)\))?,
\s
*Expect
(?:\(\d+\+?\))?\s*=\s*(\S+)
/ox
)
{
$self
->in_element(
'hsp'
)
&&
$self
->end_element( {
'Name'
=>
'Hsp'
} );
my
(
$bits
,
$score
,
$evalue
) = ( $1, $2, $3 );
$evalue
=~ s/^e/1e/i;
$self
->start_element( {
'Name'
=>
'Hsp'
} );
$self
->element(
{
'Name'
=>
'Hsp_score'
,
'Data'
=>
$score
}
);
$self
->element(
{
'Name'
=>
'Hsp_bit-score'
,
'Data'
=>
$bits
}
);
$self
->element(
{
'Name'
=>
'Hsp_evalue'
,
'Data'
=>
$evalue
}
);
$score
=
''
unless
defined
$score
;
$self
->debug(
"Got NCBI HSP score=$score, evalue $evalue\n"
);
}
elsif
(
$self
->in_element(
'hsp'
)
&& m/Identities\s*=\s*(\d+)\s*\/\s*(\d+)\s*[\d\%\(\)]+\s*
(?:,\s
*Positives
\s*=\s*(\d+)\/(\d+)\s*[\d\%\(\)]+\s*)?
(?:\,\s
*Gaps
\s*=\s*(\d+)\/(\d+))?
/oxi
)
{
$self
->element(
{
'Name'
=>
'Hsp_identity'
,
'Data'
=> $1
}
);
$self
->element(
{
'Name'
=>
'Hsp_align-len'
,
'Data'
=> $2
}
);
if
(
defined
$3 ) {
$self
->element(
{
'Name'
=>
'Hsp_positive'
,
'Data'
=> $3
}
);
}
else
{
$self
->element(
{
'Name'
=>
'Hsp_positive'
,
'Data'
=> $1
}
);
}
if
(
defined
$6 ) {
$self
->element(
{
'Name'
=>
'Hsp_gaps'
,
'Data'
=> $5
}
);
}
$self
->{
'_Query'
} = {
'begin'
=> 0,
'end'
=> 0 };
$self
->{
'_Sbjct'
} = {
'begin'
=> 0,
'end'
=> 0 };
if
(/(Frame\s*=\s*.+)$/) {
$self
->_pushback($1);
}
}
elsif
(
$self
->in_element(
'hsp'
)
&& /Strand\s*=\s*(Plus|Minus)\s*\/\s*(Plus|Minus)/i )
{
unless
(
$reporttype
) {
$self
->{
'_reporttype'
} =
$reporttype
=
'BLASTN'
;
$bl2seq_fix
= 1;
}
next
;
}
elsif
(
$self
->in_element(
'hsp'
)
&& /Links\s*=\s*(\S+)/ox )
{
$self
->element(
{
'Name'
=>
'Hsp_links'
,
'Data'
=> $1
}
);
}
elsif
(
$self
->in_element(
'hsp'
)
&& /Frame\s*=\s*([\+\-][1-3])\s*(\/\s*([\+\-][1-3]))?/ )
{
unless
(
defined
$reporttype
) {
$bl2seq_fix
= 1;
if
( $1 && $2 ) {
$reporttype
=
'TBLASTX'
}
else
{
$reporttype
=
'BLASTX'
;
}
$self
->{
'_reporttype'
} =
$reporttype
;
}
my
(
$queryframe
,
$hitframe
);
if
(
$reporttype
eq
'TBLASTX'
) {
(
$queryframe
,
$hitframe
) = ( $1, $2 );
$hitframe
=~ s/\/\s*//g;
}
elsif
(
$reporttype
eq
'TBLASTN'
||
$reporttype
eq
'PSITBLASTN'
) {
(
$hitframe
,
$queryframe
) = ( $1, 0 );
}
elsif
(
$reporttype
eq
'BLASTX'
||
$reporttype
eq
'RPS-BLAST(BLASTP)'
) {
(
$queryframe
,
$hitframe
) = ( $1, 0 );
if
(
$reporttype
eq
'RPS-BLAST(BLASTP)'
) {
$self
->element(
{
'Name'
=>
'BlastOutput_program'
,
'Data'
=>
'RPS-BLAST(BLASTX)'
}
);
}
}
$self
->element(
{
'Name'
=>
'Hsp_query-frame'
,
'Data'
=>
$queryframe
}
);
$self
->element(
{
'Name'
=>
'Hsp_hit-frame'
,
'Data'
=>
$hitframe
}
);
}
elsif
(/^Parameters:/
|| /^\s+Database:\s+?/
|| /^\s+Subset/
|| /^\s
*Lambda
/
|| /^\s
*Histogram
/
|| (
$self
->in_element(
'hsp'
) && /WARNING|NOTE/ ) )
{
$self
->debug(
"blast.pm: found parameters section \n"
);
$self
->in_element(
'hsp'
)
&&
$self
->end_element( {
'Name'
=>
'Hsp'
} );
$self
->in_element(
'hit'
)
&&
$self
->end_element( {
'Name'
=>
'Hit'
} );
while
(
my
$v
=
shift
@hit_signifs
) {
next
unless
defined
$v
;
$self
->start_element( {
'Name'
=>
'Hit'
} );
my
$id
=
$v
->[2];
my
$desc
=
$v
->[3];
$self
->element(
{
'Name'
=>
'Hit_id'
,
'Data'
=>
$id
}
);
my
(
$acc
,
$version
) =
&_get_accession_version
(
$id
);
$self
->element(
{
'Name'
=>
'Hit_accession'
,
'Data'
=>
$acc
}
);
if
(
defined
$v
) {
$self
->element(
{
'Name'
=>
'Hit_signif'
,
'Data'
=>
$v
->[0]
}
);
$self
->element(
{
'Name'
=>
'Hit_score'
,
'Data'
=>
$v
->[1]
}
);
}
$self
->element(
{
'Name'
=>
'Hit_def'
,
'Data'
=>
$desc
}
);
$self
->end_element( {
'Name'
=>
'Hit'
} );
}
$self
->within_element(
'iteration'
)
&&
$self
->end_element( {
'Name'
=>
'Iteration'
} );
next
if
/^\s+Subset/;
my
$blast
= (/^(\s+Database\:)|(\s
*Lambda
)/) ?
'ncbi'
:
'wublast'
;
if
(/^\s
*Histogram
/) {
$blast
=
'btk'
;
}
my
$last
=
''
;
$self
->element(
{
'Name'
=>
'Parameters_allowgaps'
,
'Data'
=>
'yes'
}
);
while
(
defined
(
$_
=
$self
->_readline ) ) {
if
(
/^(PSI)?([T]?BLAST[NPX])\s*(.+)/i
|| /^MEGABLAST\s*(.+)/i
|| /^(P?GENEWISE|HFRAME|SWN|TSWN)\s+(.+)/i
)
{
$self
->_pushback(
$_
);
last
;
}
elsif
(/^Query=/) {
$self
->_pushback(
$reportline
)
if
$reportline
;
$self
->_pushback(
$_
);
$self
->in_element(
'hsp'
)
&&
$self
->end_element( {
'Name'
=>
'Hsp'
} );
$self
->in_element(
'hit'
)
&&
$self
->end_element( {
'Name'
=>
'Hit'
} );
if
(
$bl2seq_fix
) {
$self
->element(
{
'Name'
=>
'BlastOutput_program'
,
'Data'
=>
$reporttype
}
);
}
$self
->end_element( {
'Name'
=>
'BlastOutput'
} );
return
$self
->end_document();
}
if
( /Number of Sequences:\s+([\d\,]+)/i
|| /of sequences in database:\s+(\-?[\d,]+)/i )
{
my
$c
= $1;
$c
=~ s/\,//g;
$self
->element(
{
'Name'
=>
'Statistics_db-len'
,
'Data'
=>
$c
}
);
}
elsif
(/letters in database:\s+(\-?[\d,]+)/i) {
my
$s
= $1;
$s
=~ s/,//g;
$self
->element(
{
'Name'
=>
'Statistics_db-let'
,
'Data'
=>
$s
}
);
}
elsif
(
$blast
eq
'btk'
) {
next
;
}
elsif
(
$blast
eq
'wublast'
) {
if
(/E=(\S+)/) {
$self
->element(
{
'Name'
=>
'Parameters_expect'
,
'Data'
=> $1
}
);
}
elsif
(/nogaps/) {
$self
->element(
{
'Name'
=>
'Parameters_allowgaps'
,
'Data'
=>
'no'
}
);
}
elsif
(/ctxfactor=(\S+)/) {
$self
->element(
{
'Name'
=>
'Statistics_ctxfactor'
,
'Data'
=> $1
}
);
}
elsif
(
/(postsw|links|span[12]?|warnings|notes|gi|noseqs|qres|qype)/
)
{
$self
->element(
{
'Name'
=>
"Parameters_$1"
,
'Data'
=>
'yes'
}
);
}
elsif
(/(\S+)=(\S+)/) {
$self
->element(
{
'Name'
=>
"Parameters_$1"
,
'Data'
=> $2
}
);
}
elsif
(
$last
=~ /(Frame|Strand)\s+MatID\s+Matrix name/i ) {
my
$firstgapinfo
= 1;
my
$frame
=
undef
;
while
(
defined
(
$_
) && !/^\s+$/ ) {
s/^\s+//;
s/\s+$//;
if
(
$firstgapinfo
&& s/Q=(\d+),R=(\d+)\s+//x )
{
$firstgapinfo
= 0;
$self
->element(
{
'Name'
=>
'Parameters_gap-open'
,
'Data'
=> $1
}
);
$self
->element(
{
'Name'
=>
'Parameters_gap-extend'
,
'Data'
=> $2
}
);
my
@fields
=
split
;
for
my
$type
(
qw(lambda_gapped
kappa_gapped
entropy_gapped)
)
{
next
if
$type
eq
'n/a'
;
if
( !
@fields
) {
warn
"fields is empty for $type\n"
;
next
;
}
$self
->element(
{
'Name'
=>
"Statistics_frame$frame\_$type"
,
'Data'
=>
shift
@fields
}
);
}
}
else
{
my
(
$frameo
,
$matid
,
$matrix
,
@fields
) =
split
;
if
( !
defined
$frame
) {
$self
->element(
{
'Name'
=>
'Parameters_matrix'
,
'Data'
=>
$matrix
}
);
$self
->element(
{
'Name'
=>
'Statistics_lambda'
,
'Data'
=>
$fields
[0]
}
);
$self
->element(
{
'Name'
=>
'Statistics_kappa'
,
'Data'
=>
$fields
[1]
}
);
$self
->element(
{
'Name'
=>
'Statistics_entropy'
,
'Data'
=>
$fields
[2]
}
);
}
$frame
=
$frameo
;
my
$ii
= 0;
for
my
$type
(
qw(lambda_used
kappa_used
entropy_used
lambda_computed
kappa_computed
entropy_computed)
)
{
my
$f
=
$fields
[
$ii
];
next
unless
defined
$f
;
if
(
$f
eq
'same'
) {
$f
=
$fields
[
$ii
- 3 ];
}
$ii
++;
$self
->element(
{
'Name'
=>
"Statistics_frame$frame\_$type"
,
'Data'
=>
$f
}
);
}
}
$_
=
$self
->_readline;
}
$last
=
$_
;
}
elsif
(
$last
=~ /(Frame|Strand)\s+MatID\s+Length/i ) {
my
$frame
=
undef
;
while
(
defined
(
$_
) && !/^\s+/ ) {
s/^\s+//;
s/\s+$//;
my
@fields
=
split
;
if
(
@fields
<= 3 ) {
for
my
$type
(
qw(X_gapped E2_gapped S2)
) {
last
unless
@fields
;
$self
->element(
{
'Name'
=>
"Statistics_frame$frame\_$type"
,
'Data'
=>
shift
@fields
}
);
}
}
else
{
for
my
$type
(
qw(length
efflength
E S W T X E2 S2)
)
{
$self
->element(
{
'Name'
=>
"Statistics_frame$frame\_$type"
,
'Data'
=>
shift
@fields
}
);
}
}
$_
=
$self
->_readline;
}
$last
=
$_
;
}
elsif
(/(\S+\s+\S+)\s+DFA:\s+(\S+)\s+\((.+)\)/) {
if
( $1 eq
'states in'
) {
$self
->element(
{
'Name'
=>
'Statistics_DFA_states'
,
'Data'
=>
"$2 $3"
}
);
}
elsif
( $1 eq
'size of'
) {
$self
->element(
{
'Name'
=>
'Statistics_DFA_size'
,
'Data'
=>
"$2 $3"
}
);
}
}
elsif
(
m/^\s+Time to generate neighborhood:\s+
(\S+\s+\S+\s+\S+)/x
)
{
$self
->element(
{
'Name'
=>
'Statistics_neighbortime'
,
'Data'
=> $1
}
);
}
elsif
(/processors\s+used:\s+(\d+)/) {
$self
->element(
{
'Name'
=>
'Statistics_noprocessors'
,
'Data'
=> $1
}
);
}
elsif
(
m/^\s+(\S+)\s+cpu\s+
time
:\s+
(\S+\s+\S+\s+\S+)
\s+Elapsed:\s+(\S+)/x
)
{
my
$cputype
=
lc
($1);
$self
->element(
{
'Name'
=>
"Statistics_$cputype\_cputime"
,
'Data'
=> $2
}
);
$self
->element(
{
'Name'
=>
"Statistics_$cputype\_actualtime"
,
'Data'
=> $3
}
);
}
elsif
(/^\s+Start:/) {
my
(
$junk
,
$start
,
$stime
,
$end
,
$etime
) =
split
( /\s+(Start|End)\:\s+/,
$_
);
chomp
(
$stime
);
$self
->element(
{
'Name'
=>
'Statistics_starttime'
,
'Data'
=>
$stime
}
);
chomp
(
$etime
);
$self
->element(
{
'Name'
=>
'Statistics_endtime'
,
'Data'
=>
$etime
}
);
}
elsif
(/^\s+Database:\s+(.+)$/) {
$self
->element(
{
'Name'
=>
'Parameters_full_dbpath'
,
'Data'
=> $1
}
);
}
elsif
(/^\s+Posted:\s+(.+)/) {
my
$d
= $1;
chomp
(
$d
);
$self
->element(
{
'Name'
=>
'Statistics_posted_date'
,
'Data'
=>
$d
}
);
}
}
elsif
(
$blast
eq
'ncbi'
) {
if
(m/^Matrix:\s+(.+)\s*$/oxi) {
$self
->element(
{
'Name'
=>
'Parameters_matrix'
,
'Data'
=> $1
}
);
}
elsif
(/^Gapped/) {
$gapped_stats
= 1;
}
elsif
(/^Lambda/) {
$_
=
$self
->_readline;
s/^\s+//;
my
(
$lambda
,
$kappa
,
$entropy
) =
split
;
if
(
$gapped_stats
) {
$self
->element(
{
'Name'
=>
"Statistics_gapped_lambda"
,
'Data'
=>
$lambda
}
);
$self
->element(
{
'Name'
=>
"Statistics_gapped_kappa"
,
'Data'
=>
$kappa
}
);
$self
->element(
{
'Name'
=>
"Statistics_gapped_entropy"
,
'Data'
=>
$entropy
}
);
}
else
{
$self
->element(
{
'Name'
=>
"Statistics_lambda"
,
'Data'
=>
$lambda
}
);
$self
->element(
{
'Name'
=>
"Statistics_kappa"
,
'Data'
=>
$kappa
}
);
$self
->element(
{
'Name'
=>
"Statistics_entropy"
,
'Data'
=>
$entropy
}
);
}
}
elsif
(m/effective\s+search\s+space\s+used:\s+(\d+)/ox) {
$self
->element(
{
'Name'
=>
'Statistics_eff-spaceused'
,
'Data'
=> $1
}
);
}
elsif
(m/effective\s+search\s+space:\s+(\d+)/ox) {
$self
->element(
{
'Name'
=>
'Statistics_eff-space'
,
'Data'
=> $1
}
);
}
elsif
(
m/Gap\s+Penalties:\s+Existence:\s+(\d+)\,
\s+Extension:\s+(\d+)/ox
)
{
$self
->element(
{
'Name'
=>
'Parameters_gap-open'
,
'Data'
=> $1
}
);
$self
->element(
{
'Name'
=>
'Parameters_gap-extend'
,
'Data'
=> $2
}
);
}
elsif
(/effective\s+HSP\s+
length
:\s+(\d+)/) {
$self
->element(
{
'Name'
=>
'Statistics_hsp-len'
,
'Data'
=> $1
}
);
}
elsif
(/effective\s+
length
\s+of\s+query:\s+([\d\,]+)/) {
my
$c
= $1;
$c
=~ s/\,//g;
$self
->element(
{
'Name'
=>
'Statistics_query-len'
,
'Data'
=>
$c
}
);
}
elsif
(/effective\s+
length
\s+of\s+database:\s+([\d\,]+)/) {
my
$c
= $1;
$c
=~ s/\,//g;
$self
->element(
{
'Name'
=>
'Statistics_eff-dblen'
,
'Data'
=>
$c
}
);
}
elsif
(
/^(T|A|X1|X2|X3|S1|S2):\s+(\d+(\.\d+)?)\s+(?:\(\s*(\d+\.\d+) bits\))?/
)
{
my
$v
= $2;
chomp
(
$v
);
$self
->element(
{
'Name'
=>
"Statistics_$1"
,
'Data'
=>
$v
}
);
if
(
defined
$4 ) {
$self
->element(
{
'Name'
=>
"Statistics_$1_bits"
,
'Data'
=> $4
}
);
}
}
elsif
(
m/frameshift\s+window\,
\s+decay\s+const:\s+(\d+)\,\s+([\.\d]+)/x
)
{
$self
->element(
{
'Name'
=>
'Statistics_framewindow'
,
'Data'
=> $1
}
);
$self
->element(
{
'Name'
=>
'Statistics_decay'
,
'Data'
=> $2
}
);
}
elsif
(m/^Number\s+of\s+Hits\s+to\s+DB:\s+(\S+)/ox) {
$self
->element(
{
'Name'
=>
'Statistics_hit_to_db'
,
'Data'
=> $1
}
);
}
elsif
(m/^Number\s+of\s+extensions:\s+(\S+)/ox) {
$self
->element(
{
'Name'
=>
'Statistics_num_extensions'
,
'Data'
=> $1
}
);
}
elsif
(
m/^Number\s+of\s+successful\s+extensions:\s+
(\S+)/ox
)
{
$self
->element(
{
'Name'
=>
'Statistics_num_suc_extensions'
,
'Data'
=> $1
}
);
}
elsif
(
m/^Number\s+of\s+sequences\s+better\s+than\s+
(\S+):\s+(\d+)/ox
)
{
$self
->element(
{
'Name'
=>
'Parameters_expect'
,
'Data'
=> $1
}
);
$self
->element(
{
'Name'
=>
'Statistics_seqs_better_than_cutoff'
,
'Data'
=> $2
}
);
}
elsif
(/^\s+Posted\s+date:\s+(.+)/) {
my
$d
= $1;
chomp
(
$d
);
$self
->element(
{
'Name'
=>
'Statistics_posted_date'
,
'Data'
=>
$d
}
);
}
elsif
( !/^\s+$/ ) {
}
}
$last
=
$_
;
}
}
elsif
(
$self
->in_element(
'hsp'
) ) {
$self
->debug(
"blast.pm: Processing HSP\n"
);
$self
->{
'_reporttype'
} ||=
$DEFAULTREPORTTYPE
;
my
%data
= (
'Query'
=>
''
,
'Mid'
=>
''
,
'Hit'
=>
''
);
my
$len
;
for
(
my
$i
= 0 ;
defined
(
$_
) &&
$i
< 3 ;
$i
++ ) {
if
( (
$i
== 0 && /^\s+$/) ||
/^\s*(?:Lambda|Minus|Plus|Score)/i )
{
$self
->_pushback(
$_
)
if
defined
$_
;
$self
->end_element( {
'Name'
=>
'Hsp'
} );
last
;
}
chomp
;
if
(/^((Query|Sbjct):?\s+(\-?\d+)\s*)(\S+)\s+(\-?\d+)/) {
my
(
$full
,
$type
,
$start
,
$str
,
$end
) =
( $1, $2, $3, $4, $5 );
if
(
$str
eq
'-'
) {
$i
= 3
if
$type
eq
'Sbjct'
;
}
else
{
$data
{
$type
} =
$str
;
}
$len
=
length
(
$full
);
$self
->{
"\_$type"
}->{
'begin'
} =
$start
unless
$self
->{
"_$type"
}->{
'begin'
};
$self
->{
"\_$type"
}->{
'end'
} =
$end
;
}
else
{
$self
->throw(
"no data for midline $_"
)
unless
(
defined
$_
&&
defined
$len
);
$data
{
'Mid'
} =
substr
(
$_
,
$len
);
}
$_
=
$self
->_readline();
}
$self
->characters(
{
'Name'
=>
'Hsp_qseq'
,
'Data'
=>
$data
{
'Query'
}
}
);
$self
->characters(
{
'Name'
=>
'Hsp_hseq'
,
'Data'
=>
$data
{
'Sbjct'
}
}
);
$self
->characters(
{
'Name'
=>
'Hsp_midline'
,
'Data'
=>
$data
{
'Mid'
}
}
);
}
else
{
}
}
$self
->debug(
"blast.pm: End of BlastOutput\n"
);
if
(
$self
->{
'_seentop'
} ) {
$self
->within_element(
'hsp'
)
&&
$self
->end_element( {
'Name'
=>
'Hsp'
} );
$self
->within_element(
'hit'
)
&&
$self
->end_element( {
'Name'
=>
'Hit'
} );
$self
->within_element(
'iteration'
)
&&
$self
->end_element( {
'Name'
=>
'Iteration'
} );
if
(
$bl2seq_fix
) {
$self
->element(
{
'Name'
=>
'BlastOutput_program'
,
'Data'
=>
$reporttype
}
);
}
$self
->end_element( {
'Name'
=>
'BlastOutput'
} );
}
return
$self
->end_document();
}
sub
_start_blastoutput {
my
$self
=
shift
;
$self
->start_element( {
'Name'
=>
'BlastOutput'
} );
$self
->{
'_seentop'
} = 1;
$self
->{
'_result_count'
}++;
$self
->{
'_handler_rc'
} =
undef
;
}
sub
_start_iteration {
my
$self
=
shift
;
$self
->start_element( {
'Name'
=>
'Iteration'
} );
}
sub
_will_handle {
my
(
$self
,
$type
) =
@_
;
my
$handler
=
$self
->{
'_handler_cache'
};
my
$will_handle
=
defined
(
$self
->{
'_will_handle_cache'
}->{
$type
} )
?
$self
->{
'_will_handle_cache'
}->{
$type
}
: (
$self
->{
'_will_handle_cache'
}->{
$type
} =
$handler
->will_handle(
$type
) );
return
$will_handle
?
$handler
:
undef
;
}
sub
start_element {
my
(
$self
,
$data
) =
@_
;
my
$nm
=
$data
->{
'Name'
};
my
$type
=
$MODEMAP
{
$nm
};
if
(
$type
) {
my
$handler
=
$self
->_will_handle(
$type
);
if
(
$handler
) {
my
$func
=
sprintf
(
"start_%s"
,
lc
$type
);
$self
->{
'_handler_rc'
} =
$handler
->
$func
(
$data
->{
'Attributes'
} );
}
else
{
}
unshift
@{
$self
->{
'_elements'
} },
$type
;
if
(
$type
eq
'result'
) {
$self
->{
'_values'
} = {};
$self
->{
'_result'
} =
undef
;
}
else
{
if
(
defined
$self
->{
'_values'
} ) {
foreach
my
$k
(
grep
{ /^\U
$type
\-/ }
keys
%{
$self
->{
'_values'
} }
)
{
delete
$self
->{
'_values'
}->{
$k
};
}
}
}
}
}
sub
end_element {
my
(
$self
,
$data
) =
@_
;
my
$nm
=
$data
->{
'Name'
};
my
$type
=
$MODEMAP
{
$nm
};
my
$rc
;
if
(
$nm
eq
'BlastOutput_program'
) {
if
(
$self
->{
'_last_data'
} =~ /(t?blast[npx])/i ) {
$self
->{
'_reporttype'
} =
uc
$1;
}
$self
->{
'_reporttype'
} ||=
$DEFAULTREPORTTYPE
;
}
if
(
$nm
eq
'Hsp'
) {
foreach
(
qw(Hsp_qseq Hsp_midline Hsp_hseq)
) {
$self
->element(
{
'Name'
=>
$_
,
'Data'
=>
$self
->{
'_last_hspdata'
}->{
$_
}
}
);
}
$self
->{
'_last_hspdata'
} = {};
$self
->element(
{
'Name'
=>
'Hsp_query-from'
,
'Data'
=>
$self
->{
'_Query'
}->{
'begin'
}
}
);
$self
->element(
{
'Name'
=>
'Hsp_query-to'
,
'Data'
=>
$self
->{
'_Query'
}->{
'end'
}
}
);
$self
->element(
{
'Name'
=>
'Hsp_hit-from'
,
'Data'
=>
$self
->{
'_Sbjct'
}->{
'begin'
}
}
);
$self
->element(
{
'Name'
=>
'Hsp_hit-to'
,
'Data'
=>
$self
->{
'_Sbjct'
}->{
'end'
}
}
);
}
if
(
$type
=
$MODEMAP
{
$nm
} ) {
my
$handler
=
$self
->_will_handle(
$type
);
if
(
$handler
) {
my
$func
=
sprintf
(
"end_%s"
,
lc
$type
);
$rc
=
$handler
->
$func
(
$self
->{
'_reporttype'
},
$self
->{
'_values'
} );
}
shift
@{
$self
->{
'_elements'
} };
}
elsif
(
$MAPPING
{
$nm
} ) {
if
(
ref
(
$MAPPING
{
$nm
} ) =~ /hash/i ) {
my
$key
= (
keys
%{
$MAPPING
{
$nm
} } )[0];
$self
->{
'_values'
}->{
$key
}->{
$MAPPING
{
$nm
}->{
$key
} } =
$self
->{
'_last_data'
};
}
else
{
$self
->{
'_values'
}->{
$MAPPING
{
$nm
} } =
$self
->{
'_last_data'
};
}
}
else
{
}
$self
->{
'_last_data'
} =
''
;
$self
->{
'_result'
} =
$rc
if
(
defined
$type
&&
$type
eq
'result'
);
return
$rc
;
}
sub
element {
my
(
$self
,
$data
) =
@_
;
$self
->start_element(
$data
);
$self
->characters(
$data
);
$self
->end_element(
$data
);
}
sub
characters {
my
(
$self
,
$data
) =
@_
;
if
(
$self
->in_element(
'hsp'
)
&&
$data
->{
'Name'
} =~ /^Hsp\_(qseq|hseq|midline)$/ )
{
$self
->{
'_last_hspdata'
}->{
$data
->{
'Name'
} } .=
$data
->{
'Data'
}
if
defined
$data
->{
'Data'
};
}
return
unless
(
defined
$data
->{
'Data'
} &&
$data
->{
'Data'
} !~ /^\s+$/ );
$self
->{
'_last_data'
} =
$data
->{
'Data'
};
}
sub
within_element {
my
(
$self
,
$name
) =
@_
;
return
0
if
( !
defined
$name
&& !
defined
$self
->{
'_elements'
}
||
scalar
@{
$self
->{
'_elements'
} } == 0 );
foreach
( @{
$self
->{
'_elements'
} } ) {
if
(
$_
eq
$name
) {
return
1;
}
}
return
0;
}
sub
in_element {
my
(
$self
,
$name
) =
@_
;
return
0
if
!
defined
$self
->{
'_elements'
}->[0];
return
(
$self
->{
'_elements'
}->[0] eq
$name
);
}
sub
start_document {
my
(
$self
) =
@_
;
$self
->{
'_lasttype'
} =
''
;
$self
->{
'_values'
} = {};
$self
->{
'_result'
} =
undef
;
$self
->{
'_elements'
} = [];
}
sub
end_document {
my
(
$self
,
@args
) =
@_
;
return
$self
->{
'_result'
};
}
sub
write_result {
my
(
$self
,
$blast
,
@args
) =
@_
;
if
( not
defined
(
$self
->writer ) ) {
$self
->
warn
(
"Writer not defined. Using a $DEFAULT_BLAST_WRITER_CLASS"
);
$self
->writer(
$DEFAULT_BLAST_WRITER_CLASS
->new() );
}
$self
->SUPER::write_result(
$blast
,
@args
);
}
sub
result_count {
my
$self
=
shift
;
return
$self
->{
'_result_count'
};
}
sub
report_count {
shift
->result_count }
sub
inclusion_threshold {
shift
->_eventHandler->inclusion_threshold(
@_
);
}
sub
max_significance {
shift
->{
'_handler_cache'
}->max_significance(
@_
) }
sub
signif {
shift
->max_significance(
@_
) }
sub
min_score {
shift
->{
'_handler_cache'
}->max_significance(
@_
) }
sub
min_query_length {
my
$self
=
shift
;
if
(
@_
) {
my
$min_qlen
=
shift
;
if
(
$min_qlen
=~ /\D/ or
$min_qlen
<= 0 ) {
$self
->throw(
-class
=>
'Bio::Root::BadParameter'
,
-text
=>
"Invalid minimum query length value: $min_qlen\n"
.
"Value must be an integer > 0. Value not set."
,
-value
=>
$min_qlen
);
}
$self
->{
'_confirm_qlength'
} = 1;
$self
->{
'_min_query_length'
} =
$min_qlen
;
}
return
$self
->{
'_min_query_length'
};
}
sub
best_hit_only {
my
$self
=
shift
;
if
(
@_
) {
$self
->{
'_best'
} =
shift
; }
$self
->{
'_best'
};
}
sub
check_all_hits {
my
$self
=
shift
;
if
(
@_
) {
$self
->{
'_check_all'
} =
shift
; }
$self
->{
'_check_all'
};
}
sub
_get_accession_version {
my
$id
=
shift
;
if
(
ref
(
$id
) &&
$id
->isa(
'Bio::SearchIO'
) ) {
$id
=
shift
;
}
return
unless
defined
$id
;
my
(
$acc
,
$version
);
if
(
$id
=~ /(gb|emb|dbj|sp|pdb|bbs|
ref
|lcl)\|(.*)\|(.*)/ ) {
(
$acc
,
$version
) =
split
/\./, $2;
}
elsif
(
$id
=~ /(pir|prf|pat|gnl)\|(.*)\|(.*)/ ) {
(
$acc
,
$version
) =
split
/\./, $3;
}
else
{
$acc
=
$id
;
}
return
(
$acc
,
$version
);
}
1;