my
%MODEMAP
= (
'Result'
=>
'result'
,
'Hit'
=>
'hit'
,
'Hsp'
=>
'hsp'
);
my
%MAPPING
= (
'Hsp_score'
=>
'HSP-score'
,
'Hsp_custom-data'
=>
'HSP-custom_score'
,
'Hsp_query-from'
=>
'HSP-query_start'
,
'Hsp_query-to'
=>
'HSP-query_end'
,
'Hsp_hit-from'
=>
'HSP-hit_start'
,
'Hsp_hit-to'
=>
'HSP-hit_end'
,
'Hsp_hseq'
=>
'HSP-hit_seq'
,
'Hsp_align-len'
=>
'HSP-hsp_length'
,
'Hsp_structure'
=>
'HSP-meta'
,
'Hsp_stranded'
=>
'HSP-stranded'
,
'Hit_id'
=>
'HIT-name'
,
'Hit_accession'
=>
'HIT-accession'
,
'Hit_gi'
=>
'HIT-ncbi_gi'
,
'Hit_def'
=>
'HIT-description'
,
'Hit_score'
=>
'HIT-score'
,
'RNAMotif_program'
=>
'RESULT-algorithm_name'
,
'RNAMotif_version'
=>
'RESULT-algorithm_version'
,
'RNAMotif_query-def'
=>
'RESULT-query_name'
,
'RNAMotif_query-acc'
=>
'RESULT-query_accession'
,
'RNAMotif_querydesc'
=>
'RESULT-query_description'
,
'RNAMotif_db'
=>
'RESULT-database_name'
,
);
my
@VALID_SYMBOLS
=
qw(5-prime 3-prime single-strand unknown)
;
my
%STRUCTURE_SYMBOLS
= (
'5-prime'
=>
'<'
,
'3-prime'
=>
'>'
,
'single-strand'
=>
'.'
,
'unknown'
=>
'?'
);
my
$DEFAULT_VERSION
=
'3.0.3'
;
sub
_initialize {
my
(
$self
,
@args
) =
@_
;
$self
->SUPER::_initialize(
@args
);
my
(
$version
,
$model
,
$database
,
$maxcutoff
,
$mincutoff
,
$seqdistance
,
$accession
,
$symbols
) =
$self
->_rearrange([
qw(VERSION MODEL DATABASE HSP_MAXSCORE
HSP_MINSCORE SEQ_DISTANCE QUERY_ACC SYMBOLS)
],
@args
);
my
$handler
=
$self
->_eventHandler;
$handler
->register_factory(
'result'
,
FAST::Bio::Factory::ObjectFactory->new(
-type
=>
'FAST::Bio::Search::Result::GenericResult'
,
-interface
=>
'FAST::Bio::Search::Result::ResultI'
,
-verbose
=>
$self
->verbose
)
);
$handler
->register_factory(
'hit'
,
FAST::Bio::Factory::ObjectFactory->new(
-type
=>
'FAST::Bio::Search::Hit::ModelHit'
,
-interface
=>
'FAST::Bio::Search::Hit::HitI'
,
-verbose
=>
$self
->verbose
)
);
$handler
->register_factory(
'hsp'
,
FAST::Bio::Factory::ObjectFactory->new(
-type
=>
'FAST::Bio::Search::HSP::ModelHSP'
,
-interface
=>
'FAST::Bio::Search::HSP::HSPI'
,
-verbose
=>
$self
->verbose
)
);
$model
&&
$self
->model(
$model
);
$database
&&
$self
->database(
$database
);
$accession
&&
$self
->query_accession(
$accession
);
$version
||=
$DEFAULT_VERSION
;
$self
->algorithm_version(
$version
);
$self
->throw(
"Cannot define both a minimal and maximal cutoff"
)
if
(
defined
(
$mincutoff
) &&
defined
(
$maxcutoff
));
defined
(
$mincutoff
) &&
$self
->hsp_minscore(
$mincutoff
);
defined
(
$maxcutoff
) &&
$self
->hsp_maxscore(
$maxcutoff
);
$symbols
||= \
%STRUCTURE_SYMBOLS
;
$self
->structure_symbols(
$symbols
);
}
sub
next_result {
my
(
$self
) =
@_
;
my
$seentop
= 0;
local
$/ =
"\n"
;
local
$_
;
my
(
$rm
,
$d
,
$descriptor
,
$file
,
$oktobuild
);
my
(
$hitid
,
$hitdesc
,
$hspid
,
$lastid
,
$lastscore
);
my
$sprintf
;
my
(
$accession
,
$db
,
$model
) =
(
$self
->query_accession,
$self
->database,
$self
->model);
my
$hsp_min
=
$self
->hsp_minscore;
my
$hsp_max
=
$self
->hsp_maxscore;
my
$version
=
$self
->algorithm_version;
my
$laststart
;
my
$verbose
=
$self
->verbose;
$self
->start_document();
PARSER:
while
(
defined
(
my
$line
=
$self
->_readline ) ) {
next
if
$line
=~ m{^\s+$};
if
(
index
(
$line
,
'#RM'
) == 0) {
if
(
index
(
$line
,
'#RM scored'
) == 0 ) {
if
(
$seentop
) {
$self
->_pushback(
$line
);
last
PARSER;
}
$self
->start_element({
'Name'
=>
'Result'
});
$self
->element_hash({
'RNAMotif_program'
=>
'rnamotif'
,
'RNAMotif_version'
=>
$version
,
'RNAMotif_query-acc'
=>
$accession
,
'RNAMotif_db'
=>
$db
});
$seentop
= 1;
}
elsif
(
index
(
$line
,
'#RM descr'
) == 0) {
(
$rm
,
$d
,
$descriptor
) =
split
' '
,
$line
, 3;
chomp
$descriptor
;
$self
->{
'_descriptor'
} =
$descriptor
;
$self
->element(
{
'Name'
=>
'RNAMotif_querydesc'
,
'Data'
=>
$descriptor
}
);
}
elsif
(
index
(
$line
,
'#RM dfile'
) == 0) {
(
$rm
,
$d
,
$file
) =
split
' '
,
$line
, 3;
chomp
$file
;
$self
->element(
{
'Name'
=>
'RNAMotif_query-def'
,
'Data'
=>
$file
}
);
}
else
{
$self
->debug(
"Unrecognized line: $line"
);
}
}
elsif
(
$line
=~ s{^>}{}) {
chomp
$line
;
(
$hitid
,
$hitdesc
) =
split
' '
,
$line
,2;
if
(
$self
->within_element(
'hit'
) && (
$hitid
ne
$lastid
)) {
$self
->element(
{
'Name'
=>
'Hit_score'
,
'Data'
=>
$lastscore
}
)
if
$lastscore
;
$self
->end_element({
'Name'
=>
'Hit'
});
$self
->start_element({
'Name'
=>
'Hit'
});
}
elsif
(!
$self
->within_element(
'hit'
)) {
$self
->start_element({
'Name'
=>
'Hit'
});
}
my
(
$gi
,
$acc
,
$ver
) =
$self
->_get_seq_identifiers(
$hitid
);
$self
->element_hash({
'Hit_id'
=>
$hitid
,
'Hit_gi'
=>
$gi
,
'Hit_accession'
=>
$ver
?
"$acc.$ver"
:
$acc
?
$acc
:
$hitid
,
'Hit_def'
=>
$hitdesc
}
);
$lastid
=
$hitid
;
}
elsif
(
$line
=~ m{^(\S+)\s+(.+?)\s+(\d+)\s+(\d+)\s+(\d+)\s(.*)$}xm) {
chomp
$line
;
my
$hspid
= $1;
my
(
$score
,
$strand
,
$start
,
$length
,
$seq
) = ($2, $3, $4, $5, $6);
$score
*= 1;
unless
(
$hitid
eq
$hspid
) {
$self
->throw(
"IDs do not match!"
);
}
if
(!
defined
(
$sprintf
)) {
if
(
$score
=~ m{[^0-9.-]+}gxms) {
if
(
defined
$hsp_min
||
defined
$hsp_max
) {
$self
->
warn
(
"HSP data likely contains custom score; "
.
"ignoring min/maxscore"
);
}
$sprintf
=
$oktobuild
= 1;
}
else
{
$sprintf
= 0;
}
}
if
(!
$sprintf
) {
if
((
$hsp_min
&&
$score
<=
$hsp_min
)
|| (
$hsp_max
&& (
$score
>=
$hsp_max
)) ) {
$oktobuild
= 0;
}
else
{
$oktobuild
= 1;
if
(
defined
$hsp_min
&&
$score
>
$hsp_min
) {
$lastscore
=
$score
if
!
$lastscore
||
$score
>
$lastscore
;
}
elsif
(
defined
$hsp_max
&&
$score
<
$hsp_max
) {
$lastscore
=
$score
if
!
$lastscore
||
$score
<
$lastscore
;
}
}
}
if
(
$oktobuild
) {
my
$end
;
if
(
$strand
==0 ) {
$end
=
$start
+
$length
-1;
}
else
{
$end
=
$start
-
$length
+ 1;
}
my
(
$rna
,
$meta
) =
$self
->_motif2meta(
$seq
,
$descriptor
);
$self
->start_element({
'Name'
=>
'Hsp'
});
my
$rnalen
=
$rna
=~
tr
{ATGCatgc}{ATGCatgc};
$self
->element_hash({
'Hsp_stranded'
=>
'HIT'
,
'Hsp_hseq'
=>
$rna
,
'Hsp_query-from'
=> 1,
'Hsp_query-to'
=>
length
(
$rna
),
'Hsp_hit-from'
=>
$start
,
'Hsp_hit-to'
=>
$end
,
'Hsp_structure'
=>
$meta
,
'Hsp_align-len'
=>
length
(
$rna
),
'Hsp_score'
=>
$sprintf
?
undef
:
$score
,
'Hsp_custom-data'
=>
$sprintf
?
$score
:
undef
,
});
$self
->end_element({
'Name'
=>
'Hsp'
});
$oktobuild
= 0
if
(!
$sprintf
);
}
}
}
if
(
$self
->within_element(
'hit'
)) {
$self
->element(
{
'Name'
=>
'Hit_score'
,
'Data'
=>
$lastscore
}
)
if
$lastscore
;
$self
->end_element( {
'Name'
=>
'Hit'
} );
}
if
(
$seentop
) {
$self
->end_element( {
'Name'
=>
'Result'
} );
}
return
$self
->end_document();
}
sub
start_element {
my
(
$self
,
$data
) =
@_
;
my
$nm
=
$data
->{
'Name'
};
my
$type
=
$MODEMAP
{
$nm
};
if
(
$type
) {
if
(
$self
->_eventHandler->will_handle(
$type
) ) {
my
$func
=
sprintf
(
"start_%s"
,
lc
$type
);
$self
->_eventHandler->
$func
(
$data
->{
'Attributes'
} );
}
unshift
@{
$self
->{
'_elements'
} },
$type
;
}
if
(
defined
$type
&&
$type
eq
'result'
)
{
$self
->{
'_values'
} = {};
$self
->{
'_result'
} =
undef
;
}
}
sub
end_element {
my
(
$self
,
$data
) =
@_
;
my
$nm
=
$data
->{
'Name'
};
my
$type
=
$MODEMAP
{
$nm
};
my
$rc
;
if
(
$type
) {
if
(
$self
->_eventHandler->will_handle(
$type
) ) {
my
$func
=
sprintf
(
"end_%s"
,
lc
$type
);
$rc
=
$self
->_eventHandler->
$func
(
$self
->{
'_reporttype'
},
$self
->{
'_values'
} );
}
my
$lastelem
=
shift
@{
$self
->{
'_elements'
} };
}
elsif
(
$MAPPING
{
$nm
} ) {
if
(
ref
(
$MAPPING
{
$nm
} ) =~ /hash/i ) {
my
$key
= (
keys
%{
$MAPPING
{
$nm
} } )[0];
$self
->{
'_values'
}->{
$key
}->{
$MAPPING
{
$nm
}->{
$key
} } =
$self
->{
'_last_data'
};
}
else
{
$self
->{
'_values'
}->{
$MAPPING
{
$nm
} } =
$self
->{
'_last_data'
};
}
}
else
{
$self
->debug(
"unknown nm $nm, ignoring\n"
);
}
$self
->{
'_last_data'
} =
''
;
$self
->{
'_result'
} =
$rc
if
(
defined
$type
&&
$type
eq
'result'
);
return
$rc
;
}
sub
element {
my
(
$self
,
$data
) =
@_
;
$self
->characters(
$data
);
$self
->end_element(
$data
);
}
sub
element_hash {
my
(
$self
,
$data
) =
@_
;
$self
->throw(
"Must provide data hash ref"
)
if
!
$data
|| !
ref
(
$data
);
for
my
$nm
(
sort
keys
%{
$data
}) {
next
if
$data
->{
$nm
} &&
$data
->{
$nm
} =~ m{^\s*$}o;
if
(
$MAPPING
{
$nm
} ) {
if
(
ref
(
$MAPPING
{
$nm
} ) =~ /hash/i ) {
my
$key
= (
keys
%{
$MAPPING
{
$nm
} } )[0];
$self
->{
'_values'
}->{
$key
}->{
$MAPPING
{
$nm
}->{
$key
} } =
$data
->{
$nm
};
}
else
{
$self
->{
'_values'
}->{
$MAPPING
{
$nm
} } =
$data
->{
$nm
};
}
}
}
}
sub
characters {
my
(
$self
,
$data
) =
@_
;
return
unless
(
defined
$data
->{
'Data'
} &&
$data
->{
'Data'
} !~ /^\s+$/o );
$self
->{
'_last_data'
} =
$data
->{
'Data'
};
}
sub
within_element {
my
(
$self
,
$name
) =
@_
;
return
0
if
( !
defined
$name
|| !
defined
$self
->{
'_elements'
}
||
scalar
@{
$self
->{
'_elements'
} } == 0 );
foreach
( @{
$self
->{
'_elements'
} } ) {
return
1
if
(
$_
eq
$name
);
}
return
0;
}
sub
in_element {
my
(
$self
,
$name
) =
@_
;
return
0
if
!
defined
$self
->{
'_elements'
}->[0];
return
(
$self
->{
'_elements'
}->[0] eq
$name
);
}
sub
start_document {
my
(
$self
) =
@_
;
$self
->{
'_lasttype'
} =
''
;
$self
->{
'_values'
} = {};
$self
->{
'_result'
} =
undef
;
$self
->{
'_elements'
} = [];
}
sub
end_document {
my
(
$self
) =
@_
;
return
$self
->{
'_result'
};
}
sub
result_count {
my
$self
=
shift
;
return
$self
->{
'_result_count'
};
}
sub
descriptor {
my
$self
=
shift
;
return
$self
->{
'_descriptor'
} =
shift
if
@_
;
return
$self
->{
'_descriptor'
};
}
sub
model {
shift
->descriptor(
@_
) }
sub
database {
my
$self
=
shift
;
return
$self
->{
'_database'
} =
shift
if
@_
;
return
$self
->{
'_database'
};
}
sub
query_accession {
my
$self
=
shift
;
return
$self
->{
'_query_accession'
} =
shift
if
@_
;
return
$self
->{
'_query_accession'
};
}
sub
algorithm_version {
my
$self
=
shift
;
return
$self
->{
'_algorithm'
} =
shift
if
@_
;
return
$self
->{
'_algorithm'
};
}
sub
hsp_minscore {
my
(
$self
,
$score
) =
shift
;
$self
->throw(
'Minscore not set to a number'
)
if
(
$score
&&
$score
!~ m{[0-9.]+});
return
$self
->{
'_hsp_minscore'
} =
shift
if
@_
;
return
$self
->{
'_hsp_minscore'
};
}
sub
hsp_maxscore {
my
(
$self
,
$score
) =
shift
;
$self
->throw(
'Maxscore not set to a number'
)
if
(
$score
&&
$score
!~ m{[0-9.]+});
return
$self
->{
'_hsp_maxscore'
} =
shift
if
@_
;
return
$self
->{
'_hsp_maxscore'
};
}
sub
structure_symbols {
my
(
$self
,
$delim
) =
@_
;
if
(
$delim
) {
if
(
ref
(
$delim
) =~ m{HASH}) {
my
%data
= %{
$delim
};
for
my
$d
(
@VALID_SYMBOLS
) {
if
(
exists
$data
{
$d
} ) {
$self
->{
'_delimiter'
}->{
$d
} =
$data
{
$d
};
}
}
}
else
{
$self
->throw(
"Args to helix_delimiters() should be in a hash reference"
);
}
}
return
$self
->{
'_delimiter'
};
}
sub
_motif2meta {
my
(
$self
,
$str
,
$descriptor
) =
@_
;
my
(
$rna
,
$meta
);
my
@desc_el
=
split
' '
,
$descriptor
;
my
@seq_el
=
split
' '
,
$str
;
my
$symbol
=
$self
->structure_symbols();
if
(
$#desc_el
!=
$#seq_el
) {
$self
->throw(
"Descriptor elements and seq elements do not match"
);
}
while
(
@desc_el
) {
my
$struct
;
my
(
$seq
,
$motif
) = (
shift
@seq_el
,
shift
@desc_el
);
$struct
= (
index
(
$motif
,
'h5'
) == 0) ?
$symbol
->{
'5-prime'
} :
(
index
(
$motif
,
'h3'
) == 0) ?
$symbol
->{
'3-prime'
} :
(
index
(
$motif
,
'ss'
) == 0) ?
$symbol
->{
'single-strand'
} :
(
index
(
$motif
,
'ctx'
)== 0) ?
$symbol
->{
'single-strand'
} :
$symbol
->{
'unknown'
};
$meta
.=
$struct
x (
length
(
$seq
));
$rna
.=
$seq
;
}
return
(
$rna
,
$meta
);
}
1;