use
vars
qw(%MAPPING %MODEMAP)
;
BEGIN {
%MODEMAP
= (
'HMMER_Output'
=>
'result'
,
'Hit'
=>
'hit'
,
'Hsp'
=>
'hsp'
);
%MAPPING
= (
'Hsp_bit-score'
=>
'HSP-bits'
,
'Hsp_score'
=>
'HSP-score'
,
'Hsp_evalue'
=>
'HSP-evalue'
,
'Hsp_query-from'
=>
'HSP-query_start'
,
'Hsp_query-to'
=>
'HSP-query_end'
,
'Hsp_hit-from'
=>
'HSP-hit_start'
,
'Hsp_hit-to'
=>
'HSP-hit_end'
,
'Hsp_positive'
=>
'HSP-conserved'
,
'Hsp_identity'
=>
'HSP-identical'
,
'Hsp_gaps'
=>
'HSP-hsp_gaps'
,
'Hsp_hitgaps'
=>
'HSP-hit_gaps'
,
'Hsp_querygaps'
=>
'HSP-query_gaps'
,
'Hsp_qseq'
=>
'HSP-query_seq'
,
'Hsp_hseq'
=>
'HSP-hit_seq'
,
'Hsp_midline'
=>
'HSP-homology_seq'
,
'Hsp_align-len'
=>
'HSP-hsp_length'
,
'Hsp_query-frame'
=>
'HSP-query_frame'
,
'Hsp_hit-frame'
=>
'HSP-hit_frame'
,
'Hit_id'
=>
'HIT-name'
,
'Hit_len'
=>
'HIT-length'
,
'Hit_accession'
=>
'HIT-accession'
,
'Hit_desc'
=>
'HIT-description'
,
'Hit_signif'
=>
'HIT-significance'
,
'Hit_score'
=>
'HIT-score'
,
'HMMER_program'
=>
'RESULT-algorithm_name'
,
'HMMER_version'
=>
'RESULT-algorithm_version'
,
'HMMER_query-def'
=>
'RESULT-query_name'
,
'HMMER_query-len'
=>
'RESULT-query_length'
,
'HMMER_query-acc'
=>
'RESULT-query_accession'
,
'HMMER_querydesc'
=>
'RESULT-query_description'
,
'HMMER_hmm'
=>
'RESULT-hmm_name'
,
'HMMER_seqfile'
=>
'RESULT-sequence_file'
,
'HMMER_db'
=>
'RESULT-database_name'
,
);
}
sub
_initialize {
my
(
$self
,
@args
) =
@_
;
$self
->SUPER::_initialize(
@args
);
$self
->{
'_hmmidline'
} =
'HMMER 3.0b placeholder'
;
$self
->{
'_alnreport'
} = 1;
}
sub
next_result{
my
(
$self
) =
@_
;
my
$seentop
= 0;
my
$reporttype
;
my
(
$last
,
@hit_list
,
@hsp_list
,
%hspinfo
,
%hitinfo
,
%domaincounter
);
local
$/ =
"\n"
;
local
$_
;
my
$verbose
=
$self
->verbose;
$self
->start_document();
local
(
$_
);
if
(!
defined
(
$_
=
$self
->_readline) ) {
return
undef
;
}
else
{
$self
->_pushback(
$_
);
}
while
(
defined
(
$_
=
$self
->_readline ) ) {
my
$lineorig
=
$_
;
chomp
;
if
(
$_
=~ m/^\
my
$prog
= $1;
$self
->start_element( {
'Name'
=>
'HMMER_Output'
} );
$self
->{
'_result_count'
}++;
$self
->element(
{
'Name'
=>
'HMMER_program'
,
'Data'
=>
uc
(
$prog
)
}
);
}
elsif
(
$_
=~ m/^\
my
$version
= $1;
my
$versiondate
= $2;
$self
->{
'_hmmidline'
} =
$_
;
$self
->element(
{
'Name'
=>
'HMMER_version'
,
'Data'
=>
$version
}
);
}
elsif
(
$_
=~ /^\
if
(
$self
->{
'_reporttype'
} eq
'HMMSEARCH'
) {
$self
->{
'_hmmfileline'
} =
$lineorig
;
$self
->element(
{
'Name'
=>
'HMMER_hmm'
,
'Data'
=> $1
}
);
}
elsif
(
$self
->{
'_reporttype'
} eq
'HMMSCAN'
) {
$self
->{
'_hmmseqline'
} =
$lineorig
;
$self
->element(
{
'Name'
=>
'HMMER_seqfile'
,
'Data'
=> $1
}
);
}
}
elsif
(
$_
=~ m/^\
$self
->{
'_alnreport'
} = 0;
}
elsif
(
$_
=~ m/^\
if
(
$self
->{
'_reporttype'
} eq
'HMMSEARCH'
) {
$self
->{
'_hmmseqline'
} =
$lineorig
;
$self
->element(
{
'Name'
=>
'HMMER_seqfile'
,
'Data'
=> $1
}
);
}
elsif
(
$self
->{
'_reporttype'
} eq
'HMMSCAN'
) {
$self
->{
'_hmmfileline'
} =
$lineorig
;
$self
->element(
{
'Name'
=>
'HMMER_hmm'
,
'Data'
=> $1
}
);
}
}
elsif
(
$_
=~ s/^Query:\s+// ) {
unless
( s/\s+\[[L|M]\=(\d+)\]$// ){
warn
"Error parsing length for query, offending line $_\n"
;
exit
(0);
}
my
$querylen
= $1;
$self
->element(
{
'Name'
=>
'HMMER_query-len'
,
'Data'
=>
$querylen
}
);
$self
->element(
{
'Name'
=>
'HMMER_query-def'
,
'Data'
=>
$_
}
);
}
elsif
(
$_
=~ s/^Accession:\s+// ){
s/\s+$//;
$self
->element(
{
'Name'
=>
'HMMER_query-acc'
,
'Data'
=>
$_
}
);
}
elsif
(
$_
=~ s/^Description:\s+// ){
s/\s+$//;
$self
->element(
{
'Name'
=>
'HMMER_querydesc'
,
'Data'
=>
$_
}
);
}
elsif
(
defined
$self
->{
'_reporttype'
} &&
(
$self
->{
'_reporttype'
} eq
'HMMSEARCH'
||
$self
->{
'_reporttype'
} eq
'HMMSCAN'
)
){
if
(
$_
=~ m/Scores
for
complete sequence/){
while
(
defined
(
$_
=
$self
->_readline ) ) {
if
(
$_
=~ m/inclusion threshold/ || m/Domain( and alignment)? annotation
for
each
/ ||
m/\[No hits detected/ || m!^//! ){
$self
->_pushback(
$_
);
last
;
}
next
if
( m/\-\-\-/ || m/^\s+E\-value\s+score/ || m/^$/);
my
(
$eval_full
,
$score_full
,
$bias_full
,
$eval_best
,
$score_best
,
$bias_best
,
$exp
,
$n
,
$hitid
,
$desc
,
@hitline
);
@hitline
=
split
(
" "
,
$_
);
$eval_full
=
shift
@hitline
;
$score_full
=
shift
@hitline
;
$bias_full
=
shift
@hitline
;
$eval_best
=
shift
@hitline
;
$score_best
=
shift
@hitline
;
$bias_best
=
shift
@hitline
;
$exp
=
shift
@hitline
;
$n
=
shift
@hitline
;
$hitid
=
shift
@hitline
;
$desc
=
join
" "
,
@hitline
;
if
( !
defined
(
$desc
) ){
$desc
=
""
;
}
push
@hit_list
, [
$hitid
,
$desc
,
$eval_full
,
$score_full
];
$hitinfo
{
$hitid
}=
$#hit_list
;
}
}
elsif
(
$_
=~ m/inclusion threshold/ ){
while
(
defined
(
$_
=
$self
->_readline ) ) {
if
(
$_
=~ m/Domain( and alignment)? annotation
for
each
/ ||
m/Internal pipeline statistics summary/ ){
$self
->_pushback(
$_
);
last
;
}
next
if
(
$_
=~ m/^$/ );
my
(
$eval_full
,
$score_full
,
$bias_full
,
$eval_best
,
$score_best
,
$bias_best
,
$exp
,
$n
,
$hitid
,
$desc
,
@hitline
);
@hitline
=
split
(
" "
,
$_
);
$eval_full
=
shift
@hitline
;
$score_full
=
shift
@hitline
;
$bias_full
=
shift
@hitline
;
$eval_best
=
shift
@hitline
;
$score_best
=
shift
@hitline
;
$bias_best
=
shift
@hitline
;
$exp
=
shift
@hitline
;
$n
=
shift
@hitline
;
$hitid
=
shift
@hitline
;
$desc
=
join
" "
,
@hitline
;
$hitinfo
{
$hitid
} =
"below_inclusion"
;
}
}
elsif
(
$_
=~ m/Domain( and alignment)? annotation
for
each
/){
@hsp_list
= ();
my
$name
;
while
(
defined
(
$_
=
$self
->_readline ) ) {
if
(
$_
=~ m/Internal pipeline statistics/ || m/\[No targets detected/ ){
$self
->_pushback(
$_
);
last
;
}
if
(
$_
=~ m/^\>\>\s(.*?)\s+/ ) {
$name
= $1;
next
if
(
$hitinfo
{
$name
} eq
"below_inclusion"
);
$domaincounter
{
$name
} = 0;
while
(
defined
(
$_
=
$self
->_readline ) ) {
if
(
$_
=~ m/Internal pipeline statistics/ ||
$_
=~ m/^\>\>/ ){
$self
->_pushback(
$_
);
last
;
}
if
(
$_
=~ m/Alignments
for
each
domain/ ) {
$self
->_pushback(
$_
);
last
;
}
if
(
$_
=~ m/^\s+\
$_
=~ m/^\s\-\-\-\s+/ ||
$_
=~ m/^$/ ){
next
;
}
if
(
my
(
$domain_num
,
$score
,
$bias
,
$ceval
,
$ieval
,
$hmmstart
,
$hmmstop
,
$qalistart
,
$qalistop
,
$envstart
,
$envstop
,
$envbound
,
$acc
) =
m!^\s+(\d+)\s\!*\?*\s+
(\S+)\s+(\S+)\s+
(\S+)\s+(\S+)\s+
(\d+)\s+(\d+).+?
(\d+)\s+(\d+).+?
(\d+)\s+(\d+).+?
(\S+)
\s*$!ox
){
my
@vals
= (
$hmmstart
,
$hmmstop
,
$qalistart
,
$qalistop
,
$score
,
$ceval
,
''
,
''
,
''
);
my
$info
=
$hit_list
[
$hitinfo
{
$name
} ];
if
( !
defined
$info
){
$self
->
warn
(
"Incomplete sequence information; can't find $name, hitinfo says $hitinfo{$name}\n"
);
next
;
}
$domaincounter
{
$name
}++;
my
$hsp_key
=
$name
.
"_"
.
$domaincounter
{
$name
};
push
@hsp_list
, [
$name
,
@vals
];
$hspinfo
{
$hsp_key
} =
$#hsp_list
;
}
else
{
print
"missed this line: $_\n"
;
}
}
}
elsif
(
$_
=~ m/Alignments
for
each
domain/ ) {
my
$domain_count
= 0;
my
$count
= 0;
my
$max_count
= 3;
my
$lastdomain
;
my
$hsp
;
my
(
$hline
,
$midline
,
$qline
);
while
(
defined
(
$_
=
$self
->_readline ) ) {
if
(
$_
=~ m/^\>\>/ ||
$_
=~ m/Internal pipeline statistics/){
$self
->_pushback(
$_
);
last
;
}
elsif
(
$hitinfo
{
$name
} eq
"below_inclusion"
||
$_
=~ m/^$/ ) {
next
;
}
elsif
(
$_
=~ /\s\s\=\=\sdomain\s(\d+)\s+/){
my
$domainnum
= $1;
$count
= 0;
my
$key
=
$name
.
"_"
.
$domainnum
;
$hsp
=
$hsp_list
[
$hspinfo
{
$key
} ];
$hline
=
$$hsp
[-3];
$midline
=
$$hsp
[-2];
$qline
=
$$hsp
[-1];
$lastdomain
=
$name
;
}
elsif
(
$_
=~ m/\s+\S+\sCS$/ ){
my
$modeltrack
=
$_
;
$max_count
++;
$count
++;
next
;
}
elsif
(
$count
==
$max_count
- 3 ){
my
@data
=
split
(
" "
,
$_
);
my
$seq
=
$data
[-2];
$hline
.=
$seq
;
$count
++;
next
;
}
elsif
(
$count
==
$max_count
- 2 ){
$_
=~ s/^\s+//;
$_
=~ s/\s+$//;
$midline
.=
$_
;
$count
++;
next
;
}
elsif
(
$count
==
$max_count
- 1 ){
my
@data
=
split
(
" "
,
$_
);
my
$seq
=
$data
[-2];
$qline
.=
$seq
;
$count
++;
next
;
}
elsif
(
$count
==
$max_count
){
my
$pvals
=
$_
;
$count
= 0;
$max_count
= 3;
$$hsp
[-3] =
$hline
;
$$hsp
[-2] =
$midline
;
$$hsp
[-1] =
$qline
;
next
;
}
else
{
print
"missed $_\n"
;
}
}
}
}
}
elsif
( m/Internal pipeline statistics/ || m!^//! ){
if
(
$self
->within_element(
'hit'
) ) {
if
(
$self
->within_element(
'hsp'
) ) {
$self
->end_element( {
'Name'
=>
'Hsp'
} );
}
$self
->end_element( {
'Name'
=>
'Hit'
} );
}
while
(
defined
(
$_
=
$self
->_readline ) ) {
last
if
(
$_
=~ m/^\/\/$/ );
}
while
(
my
$hit
=
shift
@hit_list
) {
my
$hit_name
=
shift
@$hit
;
my
$hit_desc
=
shift
@$hit
;
my
$hit_signif
=
shift
@$hit
;
my
$hit_score
=
shift
@$hit
;
my
$num_domains
=
$domaincounter
{
$hit_name
} || 0;
$self
->start_element( {
'Name'
=>
'Hit'
} );
$self
->element(
{
'Name'
=>
'Hit_id'
,
'Data'
=>
$hit_name
}
);
$self
->element(
{
'Name'
=>
'Hit_desc'
,
'Data'
=>
$hit_desc
}
);
$self
->element(
{
'Name'
=>
'Hit_signif'
,
'Data'
=>
$hit_signif
}
);
$self
->element(
{
'Name'
=>
'Hit_score'
,
'Data'
=>
$hit_score
}
);
for
my
$i
(1..
$num_domains
) {
my
$key
=
$hit_name
.
"_"
.
$i
;
my
$hsp
=
$hsp_list
[
$hspinfo
{
$key
} ];
if
(
defined
$hsp
) {
my
$hsp_name
=
shift
@$hsp
;
$self
->start_element( {
'Name'
=>
'Hsp'
} );
$self
->element( {
'Name'
=>
'Hsp_identity'
,
'Data'
=> 0
} );
$self
->element( {
'Name'
=>
'Hsp_positive'
,
'Data'
=> 0
} );
$self
->element( {
'Name'
=>
'Hsp_hit-from'
,
'Data'
=>
shift
@$hsp
} );
$self
->element( {
'Name'
=>
'Hsp_hit-to'
,
'Data'
=>
shift
@$hsp
} );
$self
->element( {
'Name'
=>
'Hsp_query-from'
,
'Data'
=>
shift
@$hsp
} );
$self
->element( {
'Name'
=>
'Hsp_query-to'
,
'Data'
=>
shift
@$hsp
} );
$self
->element( {
'Name'
=>
'Hsp_score'
,
'Data'
=>
shift
@$hsp
} );
$self
->element( {
'Name'
=>
'Hsp_evalue'
,
'Data'
=>
shift
@$hsp
} );
$self
->element( {
'Name'
=>
'Hsp_hseq'
,
'Data'
=>
shift
@$hsp
} );
$self
->element( {
'Name'
=>
'Hsp_midline'
,
'Data'
=>
shift
@$hsp
} );
$self
->element( {
'Name'
=>
'Hsp_qseq'
,
'Data'
=>
shift
@$hsp
} );
$self
->end_element( {
'Name'
=>
'Hsp'
} );
}
}
$self
->end_element( {
'Name'
=>
'Hit'
} );
}
@hit_list
= ();
%hitinfo
= ();
last
;
}
}
else
{
print
"missed: $_\n"
;
$self
->debug(
$_
);
}
$last
=
$_
;
}
$self
->end_element( {
'Name'
=>
'HMMER_Output'
} );
my
$result
=
$self
->end_document();
return
$result
;
}
sub
start_element{
my
(
$self
,
$data
) =
@_
;
my
$nm
=
$data
->{
'Name'
};
my
$type
=
$MODEMAP
{
$nm
};
if
(
$type
) {
if
(
$self
->_eventHandler->will_handle(
$type
) ) {
my
$func
=
sprintf
(
"start_%s"
,
lc
$type
);
$self
->_eventHandler->
$func
(
$data
->{
'Attributes'
} );
}
unshift
@{
$self
->{
'_elements'
} },
$type
;
}
if
(
defined
$type
&&
$type
eq
'result'
)
{
$self
->{
'_values'
} = {};
$self
->{
'_result'
} =
undef
;
}
}
sub
end_element{
my
(
$self
,
$data
) =
@_
;
my
$nm
=
$data
->{
'Name'
};
my
$type
=
$MODEMAP
{
$nm
};
my
$rc
;
if
(
$nm
eq
'HMMER_program'
) {
if
(
$self
->{
'_last_data'
} =~ /(HMM\S+)/i ) {
$self
->{
'_reporttype'
} =
uc
$1;
}
}
if
(
$nm
eq
'Hsp'
) {
foreach
(
qw(Hsp_qseq Hsp_midline Hsp_hseq)
) {
my
$data
=
$self
->{
'_last_hspdata'
}->{
$_
};
if
(
$data
&&
$_
eq
'Hsp_hseq'
) {
$data
=~ s/\./-/g;
}
$self
->element(
{
'Name'
=>
$_
,
'Data'
=>
$data
}
);
}
$self
->{
'_last_hspdata'
} = {};
}
if
(
$type
) {
if
(
$self
->_eventHandler->will_handle(
$type
) ) {
my
$func
=
sprintf
(
"end_%s"
,
lc
$type
);
$rc
=
$self
->_eventHandler->
$func
(
$self
->{
'_reporttype'
},
$self
->{
'_values'
} );
}
my
$lastelem
=
shift
@{
$self
->{
'_elements'
} };
}
elsif
(
$MAPPING
{
$nm
} ) {
if
(
ref
(
$MAPPING
{
$nm
} ) =~ /hash/i ) {
my
$key
= (
keys
%{
$MAPPING
{
$nm
} } )[0];
$self
->{
'_values'
}->{
$key
}->{
$MAPPING
{
$nm
}->{
$key
} } =
$self
->{
'_last_data'
};
}
else
{
$self
->{
'_values'
}->{
$MAPPING
{
$nm
} } =
$self
->{
'_last_data'
};
}
}
else
{
$self
->debug(
"unknown nm $nm, ignoring\n"
);
}
$self
->{
'_last_data'
} =
''
;
$self
->{
'_result'
} =
$rc
if
(
defined
$type
&&
$type
eq
'result'
);
return
$rc
;
}
sub
element{
my
(
$self
,
$data
) =
@_
;
$self
->start_element(
$data
);
$self
->characters(
$data
);
$self
->end_element(
$data
);
}
sub
characters{
my
(
$self
,
$data
) =
@_
;
if
(
$self
->in_element(
'hsp'
)
&&
$data
->{
'Name'
} =~ /Hsp\_(qseq|hseq|midline)/o
&&
defined
$data
->{
'Data'
} )
{
$self
->{
'_last_hspdata'
}->{
$data
->{
'Name'
} } .=
$data
->{
'Data'
};
}
return
unless
(
defined
$data
->{
'Data'
} &&
$data
->{
'Data'
} !~ /^\s+$/o );
$self
->{
'_last_data'
} =
$data
->{
'Data'
};
}
sub
within_element{
my
(
$self
,
$name
) =
@_
;
return
0
if
( !
defined
$name
|| !
defined
$self
->{
'_elements'
}
||
scalar
@{
$self
->{
'_elements'
} } == 0 );
foreach
( @{
$self
->{
'_elements'
} } ) {
return
1
if
(
$_
eq
$name
);
}
return
0;
}
sub
in_element{
my
(
$self
,
$name
) =
@_
;
return
0
if
!
defined
$self
->{
'_elements'
}->[0];
return
(
$self
->{
'_elements'
}->[0] eq
$name
);
}
sub
start_document{
my
(
$self
) =
@_
;
$self
->{
'_lasttype'
} =
''
;
$self
->{
'_values'
} = {};
$self
->{
'_result'
} =
undef
;
$self
->{
'_elements'
} = [];
}
sub
end_document{
my
(
$self
) =
@_
;
return
$self
->{
'_result'
};
}
sub
result_count{
my
$self
=
shift
;
return
$self
->{
'_result_count'
};
}
1;