use
vars
qw($RSTFILENAME)
;
BEGIN {
$RSTFILENAME
=
'rst'
;
}
sub
new {
my
(
$class
,
@args
) =
@_
;
my
$self
=
$class
->SUPER::new(
@args
);
$self
->_initialize_io(
@args
);
my
(
$dir
) =
$self
->_rearrange([
qw(DIR)
],
@args
);
$self
->{_dir} =
$dir
if
defined
$dir
;
return
$self
;
}
sub
next_result {
my
(
$self
) =
@_
;
my
%data
;
$self
->_parse_rst();
my
$idlookup
;
$self
->_parse_summary
unless
(
$self
->{
'_summary'
} && !
$self
->{
'_summary'
}->{
'multidata'
});
my
$seqtype
=
$self
->{
'_summary'
}->{
'seqtype'
};
if
(
$seqtype
eq
'CODONML'
||
$seqtype
eq
'AAML'
) {
my
$has_model_line
= 0;
while
(
defined
(
$_
=
$self
->_readline)) {
if
(
$seqtype
eq
'CODONML'
&&
m/^pairwise comparison, codon frequencies:/) {
$self
->_pushback(
$_
);
%data
=
$self
->_parse_PairwiseCodon;
last
;
}
elsif
(
$seqtype
eq
'AAML'
&& m/^ML distances of aa seqs\.$/) {
$self
->_pushback(
$_
);
%data
= (
'-AAMLdistmat'
=>
$self
->_parse_aa_dists());
}
elsif
(m/^Model\s+(\d+)/ ||
((!
$has_model_line
&& m/^TREE/) &&
$seqtype
eq
'CODONML'
)) {
$self
->_pushback(
$_
);
my
$model
=
$self
->_parse_NSsitesBatch;
push
@{
$data
{
'-NSsitesresults'
}},
$model
;
$has_model_line
= 1;
}
elsif
( m/
for
each
branch/ ) {
my
%branch_dnds
=
$self
->_parse_branch_dnds;
if
( !
defined
$data
{
'-trees'
} ) {
warn
(
"No trees have been loaded, can't do anything\n"
);
next
;
}
my
(
$tree
) = @{
$data
{
'-trees'
}};
if
( !
$tree
|| !
ref
(
$tree
) ||
!
$tree
->isa(
'Bio::Tree::Tree'
) ) {
warn
(
"no tree object already stored!\n"
);
next
;
}
while
(
my
(
$k
,
$v
) =
each
%branch_dnds
) {
my
@nodes
;
for
my
$id
(
split
(/\.\./,
$k
) ) {
my
@nodes_L
=
map
{
$tree
->find_node(
-id
=>
$_
) } @{
$idlookup
->{
$id
}};
my
$n
=
@nodes_L
< 2 ?
shift
(
@nodes_L
) :
$tree
->get_lca(
@nodes_L
);
if
( !
$n
) {
warn
(
"no node for $n\n"
);
}
unless
(
$n
->is_Leaf &&
$n
->id) {
$n
->id(
$id
);
}
push
@nodes
,
$n
;
}
my
(
$parent
,
$child
) =
@nodes
;
while
(
my
(
$kk
,
$vv
) =
each
%$v
) {
$child
->add_tag_value(
$kk
,
$vv
);
}
}
}
elsif
(m/^TREE/) {
$self
->_pushback(
$_
);
(
$data
{
'-trees'
},
$idlookup
) =
$self
->_parse_Forestry;
}
elsif
(m/Heuristic tree search by stepwise addition$/ ) {
$self
->throw(
-class
=>
'Bio::Root::NotImplemented'
,
-text
=>
"StepwiseAddition not yet implemented!"
);
}
elsif
(m/Heuristic tree search by NNI perturbation$/) {
$self
->throw(
-class
=>
'Bio::Root::NotImplemented'
,
-text
=>
"NNI Perturbation not yet implemented!"
);
}
elsif
(m/^stage 0:/) {
$self
->throw(
-class
=>
'Bio::Root::NotImplemented'
,
-text
=>
"StarDecomposition not yet implemented!"
);
$self
->_pushback(
$_
);
%data
=
$self
->_parse_StarDecomposition;
last
;
}
}
}
elsif
(
$seqtype
eq
'BASEML'
) {
while
(
defined
(
$_
=
$self
->_readline) ) {
if
( /^Distances:/ ) {
$self
->_pushback(
$_
);
my
(
$kappa
,
$alpha
) =
$self
->_parse_nt_dists();
%data
= (
'-kappa_distmat'
=>
$kappa
,
'-alpha_distmat'
=>
$alpha
);
}
elsif
( /^TREE/ ) {
$self
->_pushback(
$_
);
(
$data
{
'-trees'
},
$idlookup
) =
$self
->_parse_Forestry;
}
}
}
elsif
(
$seqtype
eq
'YN00'
) {
while
(
$_
=
$self
->_readline) {
if
( m/^Estimation by the method|\(B\) Yang & Nielsen \(2000\) method/ ) {
$self
->_pushback(
$_
);
%data
=
$self
->_parse_YN_Pairwise;
last
;
}
}
}
if
(
%data
) {
$data
{
'-version'
} =
$self
->{
'_summary'
}->{
'version'
};
$data
{
'-seqs'
} =
$self
->{
'_summary'
}->{
'seqs'
};
$data
{
'-patterns'
} =
$self
->{
'_summary'
}->{
'patterns'
};
$data
{
'-ngmatrix'
} =
$self
->{
'_summary'
}->{
'ngmatrix'
};
$data
{
'-codonpos'
} =
$self
->{
'_summary'
}->{
'codonposition'
};
$data
{
'-codonfreq'
} =
$self
->{
'_summary'
}->{
'codonfreqs'
};
$data
{
'-model'
} =
$self
->{
'_summary'
}->{
'model'
};
$data
{
'-seqfile'
} =
$self
->{
'_summary'
}->{
'seqfile'
};
$data
{
'-aadistmat'
} =
$self
->{
'_summary'
}->{
'aadistmat'
};
$data
{
'-stats'
} =
$self
->{
'_summary'
}->{
'stats'
};
$data
{
'-aafreq'
} =
$self
->{
'_summary'
}->{
'aafreqs'
};
$data
{
'-ntfreq'
} =
$self
->{
'_summary'
}->{
'ntfreqs'
};
$data
{
'-input_params'
} =
$self
->{
'_summary'
}->{
'inputparams'
};
$data
{
'-rst'
} =
$self
->{
'_rst'
}->{
'rctrted_seqs'
};
$data
{
'-rst_persite'
} =
$self
->{
'_rst'
}->{
'persite'
};
$data
{
'-rst_trees'
} =
$self
->{
'_rst'
}->{
'trees'
};
return
Bio::Tools::Phylo::PAML::Result->new(
%data
);
}
else
{
return
;
}
}
sub
_parse_summary {
my
(
$self
) =
@_
;
my
$SEQTYPES
=
qr( (?: (?: CODON | AA | BASE | CODON2AA )
ML ) | YN00 )x;
while
(
$_
=
$self
->_readline) {
if
( m/^(
$SEQTYPES
) \s+
(?: \(in \s+ ([^\)]+?) \s* \) \s* )?
(\S+) \s*
(?: (.+?) )?
\s* $
/ox
) {
@{
$self
->{
'_summary'
}}{
qw(seqtype version seqfile model)
} = ($1,
$2,
$3,
$4);
defined
$self
->{
'_summary'
}->{
'model'
} &&
$self
->{
'_summary'
}->{
'model'
} =~ s/Model:\s+//;
last
;
}
elsif
(m/^Data set \d$/) {
$self
->{
'_summary'
} = {};
$self
->{
'_summary'
}->{
'multidata'
}++;
}
}
unless
(
defined
$self
->{
'_summary'
}->{
'seqtype'
}) {
$self
->throw(
-class
=>
'Bio::Root::NotImplemented'
,
-text
=>
'Unknown format of PAML output did not see seqtype'
);
}
my
$seqtype
=
$self
->{
'_summary'
}->{
'seqtype'
};
$self
->debug(
"seqtype is $seqtype\n"
);
if
(
$seqtype
eq
"CODONML"
) {
$self
->_parse_inputparams();
$self
->_parse_patterns();
$self
->_parse_seqs();
$self
->_parse_codoncts();
$self
->_parse_codon_freqs();
$self
->_parse_distmat();
}
elsif
(
$seqtype
eq
"AAML"
) {
$self
->_parse_inputparams;
$self
->_parse_patterns();
$self
->_parse_seqs();
$self
->_parse_aa_freqs();
$self
->{
'_summary'
}->{
'aadistmat'
} =
$self
->_parse_aa_dists();
}
elsif
(
$seqtype
eq
"CODON2AAML"
) {
$self
->throw(
-class
=>
'Bio::Root::NotImplemented'
,
-text
=>
'CODON2AAML parsing not yet implemented!'
);
}
elsif
(
$seqtype
eq
"BASEML"
) {
$self
->_parse_patterns();
$self
->_parse_seqs();
$self
->_parse_nt_freqs();
}
elsif
(
$seqtype
eq
"YN00"
) {
$self
->_parse_codon_freqs();
$self
->_parse_codoncts();
$self
->_parse_distmat();
}
else
{
$self
->throw(
-class
=>
'Bio::Root::NotImplemented'
,
-text
=>
'Unknown seqtype, not yet implemented!'
,
-value
=>
$seqtype
);
}
}
sub
_parse_inputparams {
my
(
$self
) =
@_
;
while
(
defined
(
$_
=
$self
->_readline ) ) {
if
(/^((?:Codon frequencies)|(?:Site-class models))\s*:\s+(.+)/ ) {
my
(
$param
,
$val
) = ($1,$2);
$self
->{
'_summary'
}->{
'inputparams'
}->{
$param
} =
$val
;
}
elsif
( /^\s+$/ ) {
next
;
}
elsif
( /^ns\s+=\s+/ ) {
$self
->_pushback(
$_
);
last
;
}
}
}
sub
_parse_codon_freqs {
my
(
$self
) =
@_
;
my
(
$okay
,
$done
) = (0,0);
while
(
defined
(
$_
=
$self
->_readline ) ) {
if
( /^Nei|\(A\) Nei/ ) {
$self
->_pushback(
$_
);
last
}
last
if
(
$done
);
next
if
( /^\s+/);
next
unless
(
$okay
|| /^Codon position x base \(3x4\) table\, overall/ );
$okay
= 1;
if
( s/^position\s+(\d+):\s+// ) {
my
$pos
= $1;
s/\s+$//;
my
@bases
=
split
;
foreach
my
$str
(
@bases
) {
my
(
$base
,
$freq
) =
split
(/:/,
$str
,2);
$self
->{
'_summary'
}->{
'codonposition'
}->[
$pos
-1]->{
$base
} =
$freq
;
}
$done
= 1
if
$pos
== 3;
}
}
$done
= 0;
while
(
defined
(
$_
=
$self
->_readline) ) {
if
( /^Nei\s\&\sGojobori|\(A\)\sNei-Gojobori/ ) {
$self
->_pushback(
$_
);
last
}
last
if
(
$done
);
if
( /^Codon frequencies under model,
for
use
in evolver:/ ){
while
(
defined
(
$_
=
$self
->_readline) ) {
last
if
( /^\s+$/ );
s/^\s+//;
s/\s+$//;
push
@{
$self
->{
'_summary'
}->{
'codonfreqs'
}},[
split
];
}
$done
= 1;
}
}
}
sub
_parse_aa_freqs {
my
(
$self
) =
@_
;
my
(
$okay
,
$done
,
$header
) = (0,0,0);
my
(
@bases
);
my
$numseqs
=
scalar
@{
$self
->{
'_summary'
}->{
'seqs'
} || []};
while
(
defined
(
$_
=
$self
->_readline ) ) {
if
( /^TREE/ || /^AA distances/ ) {
$self
->_pushback(
$_
);
last
}
last
if
(
$done
);
next
if
( /^\s+$/ || /^\(Ambiguity/ );
if
( /^Frequencies\./ ) {
$okay
= 1;
}
elsif
( !
$okay
) {
next
;
}
elsif
( !
$header
) {
s/^\s+//;
@bases
=
split
;
$header
= 1;
$self
->{
'_summary'
}->{
'aafreqs'
} = {};
next
;
}
elsif
( /^\
(\d+)\s+
\(\s*([\d\.]+)\s*\%\s*\)/x){
$self
->{
'_summary'
}->{
'stats'
}->{
'constant_sites'
} = $1;
$self
->{
'_summary'
}->{
'stats'
}->{
'constant_sites_percentage'
} = $2;
}
elsif
( /^ln\s+Lmax\s+\(unconstrained\)\s+\=\s+(\S+)/x ) {
$self
->{
'_summary'
}->{
'stats'
}->{
'loglikelihood'
} = $1;
$done
= 1;
}
else
{
my
(
$seqname
,
@freqs
) =
split
;
my
$basect
= 0;
foreach
my
$f
(
@freqs
) {
$self
->{
'_summary'
}->{
'aafreqs'
}->{
$seqname
}->{
$bases
[
$basect
++]} =
$f
;
}
}
}
}
sub
_parse_StarDecomposition {
my
(
$self
) =
@_
;
my
%data
;
return
%data
;
}
sub
_parse_aa_dists {
my
(
$self
) =
@_
;
my
(
$okay
,
$seen
,
$done
) = (0,0,0);
my
(
%matrix
,
@names
,
@values
);
my
$numseqs
=
scalar
@{
$self
->{
'_summary'
}->{
'seqs'
} || []};
my
$type
=
''
;
while
(
defined
(
$_
=
$self
->_readline ) ) {
last
if
$done
;
if
( /^TREE/ ) {
$self
->_pushback(
$_
);
last
; }
if
( /^\s+$/ ) {
last
if
(
$seen
);
next
;
}
if
( /^(AA|ML) distances/ ) {
$okay
= 1;
$type
= $1;
next
;
}
s/\s+$//g;
if
(
$okay
) {
my
(
$seqname
,
@vl
) =
split
;
$seen
= 1;
my
$i
= 0;
if
(
$type
eq
'ML'
&&
!
@names
&&
@vl
) {
push
@names
,
$self
->{
'_summary'
}->{
'seqs'
}->[0]->display_id;
}
for
my
$s
(
@names
) {
last
unless
@vl
;
$matrix
{
$seqname
}->{
$s
} =
$matrix
{
$s
}->{
$seqname
} =
shift
@vl
;
}
push
@names
,
$seqname
;
$matrix
{
$seqname
}->{
$seqname
} = 0;
}
$done
= 1
if
(
scalar
@names
==
$numseqs
);
}
my
%dist
;
my
$i
= 0;
@values
= ();
foreach
my
$lname
(
@names
) {
my
@row
;
my
$j
= 0;
foreach
my
$rname
(
@names
) {
my
$v
=
$matrix
{
$lname
}->{
$rname
};
$v
=
$matrix
{
$rname
}->{
$lname
}
unless
defined
$v
;
push
@row
,
$v
;
$dist
{
$lname
}{
$rname
} = [
$i
,
$j
++];
}
$i
++;
push
@values
, \
@row
;
}
return
new Bio::Matrix::PhylipDist
(
-program
=>
$self
->{
'_summary'
}->{
'seqtype'
},
-matrix
=> \
%dist
,
-names
=> \
@names
,
-values
=> \
@values
);
}
sub
_parse_patterns {
my
(
$self
) =
@_
;
my
(
$patternct
,
@patterns
,
$ns
,
$ls
);
while
(
defined
(
$_
=
$self
->_readline) ) {
if
( /^Codon position/ ) {
$self
->_pushback(
$_
);
last
;
}
elsif
( /^Codon usage/ ) {
$self
->_pushback(
$_
);
last
;
}
elsif
(
$patternct
) {
last
if
( /^\s+$/ );
s/^\s+//;
push
@patterns
,
split
;
}
elsif
( /^ns\s+\=\s*(\d+)\s+ls\s+\=\s*(\d+)/ ) {
(
$ns
,
$ls
) = ($1,$2);
}
elsif
( /^\
$patternct
= $1;
}
else
{
}
}
$self
->{
'_summary'
}->{
'patterns'
} = {
-patterns
=> \
@patterns
,
-ns
=>
$ns
,
-ls
=>
$ls
};
}
sub
_parse_seqs {
my
(
$self
) =
@_
;
my
(
@firstseq
,
@seqs
);
while
(
defined
(
$_
=
$self
->_readline) ) {
if
( /^(TREE|Codon)/ ) {
$self
->_pushback(
$_
);
last
}
last
if
( /^\s+$/ &&
@seqs
> 0 );
next
if
( /^\s+$/ );
next
if
( /^\d+\s+$/ );
my
(
$name
,
$seqstr
) =
split
(/\s+/,
$_
,2);
$seqstr
=~ s/\s+//g;
unless
(
@firstseq
) {
@firstseq
=
split
(//,
$seqstr
);
push
@seqs
, new Bio::PrimarySeq(
-display_id
=>
$name
,
-seq
=>
$seqstr
);
}
else
{
my
$i
= 0;
my
$v
;
while
((
$v
=
index
(
$seqstr
,
'.'
,
$i
)) >=
$i
) {
substr
(
$seqstr
,
$v
,1,
$firstseq
[
$v
]);
$i
=
$v
;
}
$self
->debug(
"adding seq $seqstr\n"
);
push
@seqs
, new Bio::PrimarySeq(
-display_id
=>
$name
,
-seq
=>
$seqstr
);
}
}
$self
->{
'_summary'
}->{
'seqs'
} = \
@seqs
;
1;
}
sub
_parse_codoncts { }
sub
_parse_distmat {
my
(
$self
) =
@_
;
my
@results
;
my
$ver
= 3.14;
while
(
defined
(
$_
=
$self
->_readline) ) {
next
if
/^\s+$/;
if
(/^\(A\)\sNei-Gojobori\s\(1986\)\smethod/) {
$ver
= 3.15;
$_
=
$self
->_readline;
$_
=
$self
->_readline;
$_
=
$self
->_readline;
$_
=
$self
->_readline;
}
last
;
}
return
unless
(/^Nei\s*\&\s
*Gojobori
/);
$self
->_readline
if
(
$ver
> 3.14);
if
(
$self
->{
'_summary'
}->{
'seqtype'
} eq
'CODONML'
) {
$self
->_readline;
$self
->_readline;
$self
->_readline;
}
my
$seqct
= 0;
my
@seqs
;
while
(
defined
(
$_
=
$self
->_readline ) ) {
last
if
( /^\s+$/ &&
exists
$self
->{
'_summary'
}->{
'ngmatrix'
} );
next
if
( /^\s+$/ || /^NOTE:/i );
chomp
;
my
(
$seq
,
$rest
) =
split
(/\s+/,
$_
,2);
$rest
=
''
unless
defined
$rest
;
my
$j
= 0;
if
(
$self
->{
'_summary'
}->{
'seqtype'
} eq
'YN00'
) {
push
@seqs
, Bio::PrimarySeq->new(
-display_id
=>
$seq
);
}
while
(
$rest
&&
$rest
=~
/(\-?\d+(\.\d+)?)\s*\(\-?(\d+(\.\d+)?)\s+(\-?\d+(\.\d+)?)\)/g ) {
$self
->{
'_summary'
}->{
'ngmatrix'
}->[
$j
++]->[
$seqct
] =
{
'omega'
=> $1,
'dN'
=> $3,
'dS'
=> $5 };
}
$seqct
++;
}
if
(
$self
->{
'_summary'
}->{
'seqtype'
} eq
'YN00'
&&
@seqs
){
$self
->{
'_summary'
}->{
'seqs'
} = \
@seqs
;
}
1;
}
sub
_parse_PairwiseCodon {
my
(
$self
) =
@_
;
my
@result
;
my
(
$a
,
$b
,
$log
,
$model
,
$t
,
$kappa
,
$omega
);
while
(
defined
(
$_
=
$self
->_readline) ) {
if
( /^pairwise comparison, codon frequencies\:\s*(\S+)\./) {
$model
= $1;
}
elsif
( /^(\d+)\s+\((\S+)\)\s+\.\.\.\s+(\d+)\s+\((\S+)\)/ ) {
(
$a
,
$b
) = ($1,$3);
}
elsif
( /^lnL\s+\=\s*(\-?\d+(\.\d+)?)/ ) {
$log
= $1;
if
(
defined
(
$_
=
$self
->_readline) ) {
s/^\s+//;
(
$t
,
$kappa
,
$omega
) =
split
;
}
}
elsif
( m/^t\=\s*(\d+(\.\d+)?)\s+
S\=\s*(\d+(\.\d+)?)\s+
N\=\s*(\d+(\.\d+)?)\s+
dN\/dS\=\s*(\d+(\.\d+)?)\s+
dN\=\s*(\d+(\.\d+)?)\s+
dS\=\s*(\d+(\.\d+)?)/ox ) {
$result
[
$b
-1]->[
$a
-1] = {
'lnL'
=>
$log
,
't'
=>
defined
$t
&&
length
(
$t
) ?
$t
: $1,
'S'
=> $3,
'N'
=> $5,
'kappa'
=>
$kappa
,
'omega'
=>
defined
$omega
&&
length
(
$omega
) ?
$omega
: $7,
'dN'
=> $9,
'dS'
=> $11 };
}
elsif
( /^\s+$/ ) {
next
;
}
elsif
( /^\s+(\d+\.\d+)\s+(\d+\.\d+)\s+(\d+\.\d+)/ ) {
}
else
{
$self
->debug(
"unknown line: $_"
);
}
}
return
(
-mlmatrix
=> \
@result
);
}
sub
_parse_YN_Pairwise {
my
(
$self
) =
@_
;
my
@result
;
while
(
defined
(
$_
=
$self
->_readline) ) {
last
if
( /^seq\.\s+seq\./);
}
while
(
defined
(
$_
=
$self
->_readline) ) {
if
( m/^\s+(\d+)\s+
(\d+)\s+
(\d+(\.\d+))\s+
(\d+(\.\d+))\s+
(\d+(\.\d+))\s+
(\d+(\.\d+))\s+
(\d+(\.\d+))\s+
\-??(\d+(\.\d+))\s+
\+\-\s+
\-??(\d+(\.\d+))\s+
\-??(\d+(\.\d+))\s+
\+\-\s+
\-??(\d+(\.\d+))\s+
/ox
) {
$result
[$2-1]->[$1-1] = {
'S'
=> $3,
'N'
=> $5,
't'
=> $7,
'kappa'
=> $9,
'omega'
=> $11,
'dN'
=> $13,
'dN_SE'
=> $15,
'dS'
=> $17,
'dS_SE'
=> $19,
};
}
elsif
( /^\s+$/ ) {
next
;
}
elsif
( /^\(C\) LWL85, LPB93 & LWLm methods/) {
$self
->_pushback(
$_
);
last
;
}
}
return
(
-mlmatrix
=> \
@result
);
}
sub
_parse_Forestry {
my
(
$self
) =
@_
;
my
(
$instancecount
,
$num_param
,
$loglikelihood
,
$score
,
$done
,
$treelength
) = (0,0,0,0,0,0);
my
$okay
= 0;
my
(
@ids
,
%match
,
@branches
,
@trees
);
while
(
defined
(
$_
=
$self
->_readline) ) {
last
if
$done
;
if
( s/^TREE\s+\
(
$score
) = (s/MP\s+score\:\s+(\S+)\s+$// );
@ids
= /(\d+)[\,\)]/g;
}
elsif
( /^Node\s+\&/ || /^\s+N37/ || /^(CODONML|AAML|YN00|BASEML)/ ||
/^\*\*/ || /^Detailed output identifying parameters/) {
$self
->_pushback(
$_
);
$done
= 1;
last
;
}
elsif
( /^tree\s+
length
\s+\=\s+(\S+)/ ) {
$treelength
= $1;
}
elsif
( /^\s
*lnL
\(.+np\:\s*(\d+)\)\:\s+(\S+)/ ) {
(
$num_param
,
$loglikelihood
) = ($1,$2);
}
elsif
( /^\(/) {
s/([\,:])\s+/$1/g;
my
$treestr
= new IO::String(
$_
);
my
$treeio
= new Bio::TreeIO(
-fh
=>
$treestr
,
-format
=>
'newick'
);
my
$tree
=
$treeio
->next_tree;
if
(
$tree
) {
$tree
->score(
$loglikelihood
);
$tree
->id(
"num_param:$num_param"
);
if
(
$okay
> 0 ) {
if
( !
%match
&&
@ids
) {
my
$i
= 0;
for
my
$m
( /([^():,]+):/g ) {
$match
{
shift
@ids
} = [
$m
];
}
my
%grp
;
while
(
my
$br
=
shift
@branches
) {
my
(
$parent
,
$child
) =
@$br
;
if
(
$match
{
$child
} ) {
push
@{
$match
{
$parent
}}, @{
$match
{
$child
}};
}
else
{
push
@branches
,
$br
;
}
}
if
(
$self
->verbose > 1 ) {
for
my
$k
(
sort
{
$a
<=>
$b
}
keys
%match
) {
$self
->debug(
"$k -> "
,
join
(
","
,@{
$match
{
$k
}}),
"\n"
);
}
}
}
push
@trees
,
$tree
;
}
}
$okay
++;
}
elsif
( /^\s*\d+\.\.\d+/ ) {
push
@branches
,
map
{ [
split
(/\.\./,
$_
)] }
split
;
}
}
return
\
@trees
,\
%match
;
}
sub
_parse_NSsitesBatch {
my
$self
=
shift
;
my
(
%data
,
$idlookup
);
my
(
$okay
,
$done
) =(0,0);
while
(
defined
(
$_
=
$self
->_readline) ) {
last
if
$done
;
next
if
/^\s+$/;
next
unless
(
$okay
|| /^Model\s+\d+/ || /^TREE/);
if
( /^Model\s+(\d+)/ ) {
if
(
$okay
) {
$self
->_pushback(
$_
);
$done
= 1;
}
else
{
chomp
;
$data
{
'-model_num'
} = $1;
(
$data
{
'-model_description'
}) = ( /\:\s+(.+)/ );
$okay
= 1;
}
}
elsif
( /^Time used\:\s+(\S+)/ ) {
$data
{
'-time_used'
} = $1;
$done
= 1;
}
elsif
( /^kappa\s+\(ts\/tv\)\s+\=\s+(\S+)/ ) {
$data
{
'-kappa'
} = $1;
}
elsif
( /^TREE/ ) {
$self
->_pushback(
$_
);
(
$data
{
'-trees'
},
$idlookup
) =
$self
->_parse_Forestry;
if
(
defined
$data
{
'-trees'
} &&
scalar
@{
$data
{
'-trees'
}} ) {
$data
{
'-likelihood'
}=
$data
{
'-trees'
}->[0]->score;
}
$okay
= 1;
}
elsif
( /^omega\s+\(dn\/ds\)\s+\=\s+(\S+)/i ) {
my
@p
= (
q/1.00000/
);
my
@w
= $1;
$data
{
'-dnds_site_classes'
} = {
'p'
=> \
@p
,
'w'
=> \
@w
};
$data
{
q/-num_site_classes/
} = 1;
}
elsif
( /^(Naive Empirical Bayes)|(Bayes Empirical Bayes)|(Positively\sselected\ssites)/i ) {
$self
->_pushback(
$_
);
my
(
$sites
,
$neb
,
$beb
) =
$self
->_parse_Pos_selected_sites;
$data
{
'-pos_sites'
} =
$sites
;
$data
{
'-neb_sites'
} =
$neb
;
$data
{
'-beb_sites'
} =
$beb
;
}
elsif
( /^dN/i ) {
if
( /K\=(\d+)/ ) {
$data
{
'-num_site_classes'
} = $1;
while
(
$_
=
$self
->_readline) {
unless
(
$_
=~ /^\s+$/) {
$self
->_pushback(
$_
);
last
;
}
}
if
( /^site class/ ) {
$self
->_readline;
my
@p
=
$self
->_readline =~ /(\d+\.\d{5})/g;
my
@b_w
=
$self
->_readline =~ /(\d+\.\d{5})/g;
my
@f_w
=
$self
->_readline =~ /(\d+\.\d{5})/g;
my
@w
;
foreach
my
$i
(0..
$#b_w
) {
push
@w
, {
q/background/
=>
$b_w
[
$i
],
q/foreground/
=>
$f_w
[
$i
] };
}
$data
{
'-dnds_site_classes'
} = {
q/p/
=> \
@p
,
q/w/
=> \
@w
};
}
else
{
my
@p
=
$self
->_readline =~ /(\d+\.\d{5})/g;
my
@w
=
$self
->_readline =~ /(\d+\.\d{5})/g;
$data
{
'-dnds_site_classes'
} = {
'p'
=> \
@p
,
'w'
=> \
@w
};
}
}
elsif
( /
for
each
branch/ ) {
my
%branch_dnds
=
$self
->_parse_branch_dnds;
if
( !
defined
$data
{
'-trees'
} ) {
warn
(
"No trees have been loaded, can't do anything\n"
);
next
;
}
my
(
$tree
) = @{
$data
{
'-trees'
}};
if
( !
$tree
|| !
ref
(
$tree
) ||
!
$tree
->isa(
'Bio::Tree::Tree'
) ) {
warn
(
"no tree object already stored!\n"
);
next
;
}
while
(
my
(
$k
,
$v
) =
each
%branch_dnds
) {
my
@nodes
;
for
my
$id
(
split
(/\.\./,
$k
) ) {
my
@nodes_L
=
map
{
$tree
->find_node(
-id
=>
$_
) } @{
$idlookup
->{
$id
}};
my
$n
=
@nodes_L
< 2 ?
shift
(
@nodes_L
) :
$tree
->get_lca(
@nodes_L
);
if
( !
$n
) {
$self
->
warn
(
"no node could be found for $id (no lca?)"
);
}
unless
(
$n
->is_Leaf &&
$n
->id) {
$n
->id(
$id
);
}
push
@nodes
,
$n
;
}
my
(
$parent
,
$child
) =
@nodes
;
while
(
my
(
$kk
,
$vv
) =
each
%$v
) {
$child
->add_tag_value(
$kk
,
$vv
);
}
}
}
}
elsif
( /^Parameters in beta:/ ) {
$_
=
$self
->_readline;
if
( /p\=\s+(\S+)\s+
q\=\
s+(\S+)/ ) {
$data
{
'-shape_params'
} = {
'shape'
=>
'beta'
,
'p'
=> $1,
'q'
=> $2 };
}
else
{
$self
->
warn
(
"unparseable beta parameters: $_"
);
}
}
elsif
( /^Parameters in beta\
&w
\>1:/ ) {
$_
=
$self
->_readline;
my
(
$p0
,
$p
,
$q
,
$p1
,
$w
);
if
( /p0\=\s+(\S+)\s+p\=\s+(\S+)\s+q\=\s+(\S+)/ ) {
$p0
= $1;
$p
= $2;
$q
= $3;
}
else
{
$self
->
warn
(
"unparseable beta parameters: $_"
);
}
$_
=
$self
->_readline;
if
( /\(p1\=\s+(\S+)\)\s+w\=\s*(\S+)/ ) {
$p1
= $1;
$w
= $2;
$data
{
'-shape_params'
} = {
'shape'
=>
'beta'
,
'p0'
=>
$p0
,
'p'
=>
$p
,
'q'
=>
$q
,
'p1'
=>
$p1
,
'w'
=>
$w
};
}
else
{
$self
->
warn
(
"unparseable beta parameters: $_"
);
}
}
elsif
( /^alpha\s+\(gamma\)\s+\=\s+(\S+)/ ) {
my
$gamma
= $1;
$_
=
$self
->_readline;
my
(
@r
,
@f
);
if
( s/^r\s+\(\s*\d+\)\:\s+// ) {
@r
=
split
;
}
$_
=
$self
->_readline;
if
( s/^f\s*\:\s+// ) {
@f
=
split
;
}
$data
{
'-shape_params'
} = {
'shape'
=>
'alpha'
,
'gamma'
=>
$gamma
,
'r'
=> \
@r
,
'f'
=> \
@f
};
}
}
return
new Bio::Tools::Phylo::PAML::ModelResult(
%data
);
}
sub
_parse_Pos_selected_sites {
my
$self
=
shift
;
my
$okay
= 0;
my
(
%sites
) = (
'default'
=> [],
'neb'
=> [],
'beb'
=> []);
my
$type
=
'default'
;
while
(
defined
(
$_
=
$self
->_readline) ) {
next
if
( /^\s+$/ || /^\s+Pr\(w\>1\)/ );
if
( /^Time used/ || /^TREE/) {
$self
->_pushback(
$_
);
last
;
}
if
( /^Naive Empirical Bayes/i ) {
$type
=
'neb'
;
}
elsif
( /^Bayes Empirical Bayes/i ) {
$type
=
'beb'
;
}
elsif
( /^Positively selected sites/ ) {
$okay
= 1;
}
elsif
(
$okay
&& /^\s+(\d+)\s+(\S+)\s+(\-?\d+(?:\.\d+)?)(\**)\s+(\-?\d+(?:\.\d+)?)\s+\+\-\s+(\-?\d+(?:\.\d+)?)/ ) {
my
$signif
= $4;
$signif
=
''
unless
defined
$signif
;
push
@{
$sites
{
$type
}}, [$1,$2,$3,
$signif
,$5,$6];
}
elsif
(
$okay
&& /^\s+(\d+)\s+(\S+)\s+(\-?\d*(?:.\d+))(\**)\s+(\-?\d+(?:\.\d+)?)/ ) {
my
$signif
= $4;
$signif
=
''
unless
defined
$signif
;
push
@{
$sites
{
$type
}}, [$1,$2,$3,
$signif
,$5];
}
elsif
(
$okay
&& /^\s+(\d+)\s+(\S)\s+([\d\.\-\+]+)(\**)/ ) {
my
$signif
= $4;
$signif
=
''
unless
defined
$signif
;
push
@{
$sites
{
$type
}}, [$1,$2,$3,
$signif
];
}
}
return
(
$sites
{
'default'
},
$sites
{
'neb'
},
$sites
{
'beb'
});
}
sub
_parse_branch_dnds {
my
$self
=
shift
;
my
(
$okay
) = (0);
my
%branch_dnds
;
my
@header
;
while
(
defined
(
$_
=
$self
->_readline ) ) {
next
if
( /^\s+$/);
next
unless
(
$okay
|| /^\s+branch\s+t/);
if
( /^\s+branch\s+(.+)/ ) {
s/^\s+//;
@header
=
split
(/\s+/,
$_
);
$okay
= 1;
}
elsif
( /^\s*(\d+\.\.\d+)/ ) {
my
$branch
= $1;
s/^\s+//;
my
$i
=0;
$branch_dnds
{
$branch
} = {
map
{
$header
[
$i
++] =>
$_
}
split
};
}
else
{
$self
->_pushback(
$_
);
last
;
}
}
return
%branch_dnds
;
}
sub
_parse_nt_freqs {
my
(
$self
) =
@_
;
my
(
$okay
,
$done
,
$header
) = (0,0,0);
my
(
@bases
);
my
$numseqs
=
scalar
@{
$self
->{
'_summary'
}->{
'seqs'
} || []};
while
(
defined
(
$_
=
$self
->_readline ) ) {
if
( /^TREE/ || /^Distances/ ) {
$self
->_pushback(
$_
);
last
}
last
if
(
$done
);
next
if
( /^\s+$/ || /^\(Ambiguity/ );
if
( /^Frequencies\./ ) {
$okay
= 1;
}
elsif
( !
$okay
) {
next
;
}
elsif
( !
$header
) {
s/^\s+//;
@bases
=
split
;
$header
= 1;
$self
->{
'_summary'
}->{
'ntfreqs'
} = {};
next
;
}
elsif
( /^\
(\d+)\s+
\(\s*([\d\.]+)\s*\%\s*\)/ox){
$self
->{
'_summary'
}->{
'stats'
}->{
'constant_sites'
} = $1;
$self
->{
'_summary'
}->{
'stats'
}->{
'constant_sites_percentage'
} = $2;
}
elsif
( /^ln\s+Lmax\s+\(unconstrained\)\s+\=\s+(\S+)/ox ) {
$self
->{
'_summary'
}->{
'stats'
}->{
'loglikelihood'
} = $1;
$done
= 1;
}
else
{
my
(
$seqname
,
@freqs
) =
split
;
my
$basect
= 0;
foreach
my
$f
(
@freqs
) {
$self
->{
'_summary'
}->{
'ntfreqs'
}->{
$seqname
}->{
$bases
[
$basect
++]} =
$f
;
}
}
}
}
sub
_parse_nt_dists {
my
(
$self
) =
@_
;
my
(
$okay
,
$seen
,
$done
) = (0,0,0);
my
(
%matrix
,
@names
);
my
$numseqs
=
scalar
@{
$self
->{
'_summary'
}->{
'seqs'
} || []};
my
$type
=
''
;
while
(
defined
(
$_
=
$self
->_readline ) ) {
if
( /^TREE/ ) {
$self
->_pushback(
$_
);
last
; }
last
if
$done
;
next
if
(/^This matrix is not used in later/);
if
( /^\s+$/ ) {
last
if
(
$seen
);
next
;
}
if
( /^Distances:(\S+)\s+\(([^\)]+)\)\s+\(alpha set at (\-?\d+\.\d+)\)/ ) {
$okay
= 1;
$type
= $1;
next
;
}
s/\s+$//g;
if
(
$okay
) {
my
(
$seqname
,
$vl
) =
split
(/\s+/,
$_
,2);
$seen
= 1;
my
$i
= 0;
if
(
defined
$vl
) {
while
(
$vl
=~ /(\-?\d+\.\d+)\s*\(\s*(\-?\d+\.\d+)\s*\)\s*/g ) {
my
(
$kappa
,
$alpha
) = ($1,$2);
$matrix
{
$seqname
}{
$names
[
$i
]} =
$matrix
{
$names
[
$i
]}{
$seqname
} = [
$kappa
,
$alpha
];
$i
++;
}
unless
(
$i
) {
$self
->
warn
(
"no matches for $vl\n"
);
}
}
push
@names
,
$seqname
;
$matrix
{
$seqname
}->{
$seqname
} = [0,0];
}
$done
= 1
if
(
scalar
@names
==
$numseqs
);
}
my
%dist
;
my
$i
= 0;
my
(
@kvalues
,
@avalues
);
foreach
my
$lname
(
@names
) {
my
(
@arow
,
@krow
);
my
$j
= 0;
foreach
my
$rname
(
@names
) {
my
$v
=
$matrix
{
$lname
}{
$rname
};
push
@krow
,
$v
->[0];
push
@arow
,
$v
->[1];
$dist
{
$lname
}{
$rname
} = [
$i
,
$j
++];
}
$i
++;
push
@kvalues
, \
@krow
;
push
@avalues
, \
@arow
;
}
return
(new Bio::Matrix::PhylipDist
(
-program
=>
$self
->{
'_summary'
}->{
'seqtype'
},
-matrix
=> \
%dist
,
-names
=> \
@names
,
-values
=> \
@kvalues
),
new Bio::Matrix::PhylipDist
(
-program
=>
$self
->{
'_summary'
}->{
'seqtype'
},
-matrix
=> \
%dist
,
-names
=> \
@names
,
-values
=> \
@avalues
)
);
}
sub
_parse_rate_parametes {
my
$self
=
shift
;
my
(
%rate_parameters
);
while
(
defined
(
$_
=
$self
->_readline) ) {
if
( /^Rate\s+parameters:\s+/ ) {
s/\s+$//;
$rate_parameters
{
'rate_parameters'
} = [
split
(/\s+/,
$_
)];
}
elsif
(/^Base\s+frequencies:\s+/) {
s/\s+$//;
$rate_parameters
{
'base_frequencies'
} = [
split
(/\s+/,
$_
)];
}
elsif
( m/^Rate\s+matrix\s+Q,\s+Average\s+Ts\/Tv\s+(\([^\)+]+\))?\s*\=\s+
(\-?\d+\.\d+)/x) {
$rate_parameters
{
'average_TsTv'
} = $1;
while
(
defined
(
$_
=
$self
->_readline) ) {
last
if
(/^\s+$/);
if
( /^alpha/ ) {
$self
->_pushback(
$_
);
last
;
}
s/^\s+//;
s/\s+$//;
push
@{
$rate_parameters
{
'rate_matrix_Q'
}}, [
split
];
}
}
elsif
(/^alpha\s+\(gamma,\s+K=\s*(\d+)\s*\)\s*\=\s*(\-?\d+\.\d+)/ ) {
$rate_parameters
{
'K'
} = $1;
$rate_parameters
{
'alpha'
} = $2;
}
elsif
(s/^(r|f):\s+// ) {
my
(
$p
) = $1;
s/\s+$//;
$rate_parameters
{
$p
} = [
split
];
}
}
}
sub
_parse_rst {
my
(
$self
) =
@_
;
return
unless
$self
->{
'_dir'
} && -d
$self
->{
'_dir'
} && -r
$self
->{
'_dir'
};
my
$rstfile
= File::Spec->catfile(
$self
->{
'_dir'
},
$RSTFILENAME
);
return
unless
-e
$rstfile
&& ! -z
$rstfile
;
my
$rstio
= Bio::Root::IO->new(
-file
=>
$rstfile
);
my
(
@firstseq
,
@seqs
,
@trees
,
@per_site_prob
);
my
$count
;
while
(
defined
(
$_
=
$rstio
->_readline ) ) {
if
( /^TREE\s+\
while
(
defined
(
$_
=
$rstio
->_readline) ) {
if
( /tree\s+
with
\s+node\s+labels\s+
for
/) {
my
$tree
= Bio::TreeIO->new(
-noclose
=>1,
-fh
=>
$rstio
->_fh,
-format
=>
'newick'
)->next_tree;
for
my
$n
(
$tree
->get_nodes ) {
my
$id
=
$n
->id;
$id
=~ s/^\s+//;
$id
=~ s/\s+$//;
$n
->id(
$id
);
if
(
defined
(
my
$blen
=
$n
->branch_length) ) {
$blen
=~ s/^\s+//;
$blen
=~ s/\s+$//;
$n
->branch_length(
$blen
);
}
}
push
@trees
,
$tree
;
last
;
}
}
}
elsif
(/^Prob\sof\sbest\scharacter\sat\seach\snode,\slisted\sby\ssite/){
$self
->{
'_rst'
}->{
'persite'
} = [];
while
(
defined
(
$_
=
$rstio
->_readline ) ) {
next
if
(/^Site/ || /^\s+$/ );
if
( s/^\s+(\d+)\s+(\d+)\s+([^:]+)\s+:\s+(.+)// ) {
my
(
$sitenum
,
$freq
,
$extant
,
$ancestral
) = ($1,$2,$3,$4);
my
(
@anc_site
,
@extant_site
);
@anc_site
= {};
@extant_site
= {};
while
(
$extant
=~ s/^([A-Z]{3})\s+\(([A-Z])\)\s+//g ) {
push
@extant_site
, {
'codon'
=>$1,
'aa'
=> $2 };
}
while
(
$ancestral
=~ s/^([A-Z]{3})\s+([A-Z])\s+
(\S+)\s+
\(([A-Z])\s+(\S+)\)\s+//xg
) {
push
@anc_site
, {
'codon'
=> $1,
'aa'
=> $2,
'prob'
=> $3,
'Yang95_aa'
=> $4,
'Yang95_aa_prob'
=> $5};
}
$self
->{
'_rst'
}->{
'persite'
}->[
$sitenum
] = [
@extant_site
,
@anc_site
];
}
elsif
(/^Summary\sof\schanges\salong\sbranches\./ ) {
last
;
}
}
}
elsif
( /^Check\sroot\sfor\sdirections\sof\schange\./ ||
/^Summary\sof\schanges\salong\sbranches\./ ) {
my
(
@branches
,
@branch2node
,
$branch
,
$node
);
my
$tree
=
$trees
[-1];
if
( !
$tree
) {
$self
->
warn
(
"No tree built before parsing Branch changes\n"
);
last
;
}
my
@nodes
= (
map
{
$_
->[0] }
sort
{
$a
->[1] <=>
$b
->[1] }
map
{ [
$_
,
$_
->id =~ /^(\d+)\_?/] }
$tree
->get_nodes);
unshift
@nodes
,
undef
;
while
(
defined
(
$_
=
$rstio
->_readline ) ) {
next
if
/^\s+$/;
if
( m/^List\sof\sextant\sand\sreconstructed\ssequences/ ) {
$rstio
->_pushback(
$_
);
last
;
}
elsif
( /^Branch\s+(\d+):\s+(\d+)\.\.(\d+)\s+/ ) {
my
(
$left
,
$right
);
(
$branch
,
$left
,
$right
) = ($1,$2,$3);
(
$node
) =
$nodes
[
$right
];
if
( !
$node
) {
warn
(
"cannot find $right in $tree ($branch $left..$right)\n"
);
last
;
}
my
(
$n
,
$s
) = (/\(n=\s*(\S+)\s+s=\s*(\S+)\)/);
$node
->add_tag_value(
'n'
,
$n
);
$node
->add_tag_value(
's'
,
$s
);
$branch2node
[
$branch
] =
$right
;
}
elsif
( /^\s+(\d+)\s+([A-Z])\s+(\S+)\s+\-\>\s+([A-Z])\s+(\S+)?/){
my
(
$site
,
$anc
,
$aprob
,
$derived
,
$dprob
)= ($1,$2,$3,$4,$5);
if
( !
$node
) {
$self
->
warn
(
"no branch line was previously parsed!"
);
next
;
}
my
%c
= (
'site'
=>
$site
,
'anc_aa'
=>
$anc
,
'anc_prob'
=>
$aprob
,
'derived_aa'
=>
$derived
,
);
$c
{
'derived_prob'
} =
$dprob
if
defined
$dprob
;
$node
->add_tag_value(
'changes'
,\
%c
);
}
}
}
elsif
( /^Overall\s+accuracy\s+of\s+the\s+(\d+)\s+ancestral\s+sequences:/)
{
my
$line
=
$rstio
->_readline;
$line
=~ s/^\s+//;
$line
=~ s/\s+$//;
my
@overall_site
=
split
(/\s+/,
$line
);
for
( 1..3 ) {
$line
=
$rstio
->_readline;
}
$line
=~ s/^\s+//;
$line
=~ s/\s+$//;
my
@overall_seq
=
split
(/\s+/,
$line
);
if
(
@overall_seq
!=
@overall_site
||
@overall_seq
!=
@seqs
) {
$self
->
warn
(
"out of sync somehow seqs, site scores don't match\n"
);
warn
(
"@seqs @overall_seq @overall_site\n"
);
}
for
(
@seqs
) {
$_
->description(
sprintf
(
"overall_accuracy_site=%s overall_accuracy_seq=%s"
,
shift
@overall_site
,
shift
@overall_seq
));
}
}
elsif
(m/^List of extant and reconstructed sequences/o) {
while
(
defined
(
$_
=
$rstio
->_readline ) ) {
last
if
( /^Overall accuracy of the/ );
last
if
( /^\s+$/ &&
@seqs
> 0 );
next
if
( /^\s+$/ );
next
if
( /^\d+\s+$/ );
if
(
$_
=~ /^node /) {
my
(
$name
,
$num
,
$seqstr
) =
split
(/\s+/,
$_
,3);
$name
.=
$num
;
$seqstr
=~ s/\s+//g;
unless
(
@firstseq
) {
@firstseq
=
split
(//,
$seqstr
);
push
@seqs
, Bio::PrimarySeq->new(
-display_id
=>
$name
,
-seq
=>
$seqstr
);
}
else
{
my
$i
= 0;
my
$v
;
while
((
$v
=
index
(
$seqstr
,
'.'
,
$i
)) >=
$i
) {
substr
(
$seqstr
,
$v
,1,
$firstseq
[
$v
]);
$i
=
$v
;
}
$self
->debug(
"adding seq $seqstr\n"
);
push
@seqs
, Bio::PrimarySeq->new
(
-display_id
=>
$name
,
-seq
=>
$seqstr
);
}
}
}
$self
->{
'_rst'
}->{
'rctrted_seqs'
} = \
@seqs
;
}
else
{
}
}
$self
->{
'_rst'
}->{
'trees'
} = \
@trees
;
return
;
}
1;