use
vars
qw/ @EXPORT_OK /
;
@EXPORT_OK
=
qw/ character_input phylip_pars parse_newick newick_to_svg /
;
sub
character_input {
my
(
$tradition
,
$opts
) =
@_
;
my
$table
=
$tradition
->collation->alignment_table;
if
(
$opts
->{exclude_layer} ) {
my
$newtable
= {
alignment
=> [],
length
=>
$table
->{
length
} };
foreach
my
$row
( @{
$table
->{alignment}} ) {
if
(
$tradition
->has_witness(
$row
->{witness} ) ) {
push
( @{
$newtable
->{alignment}},
$row
);
}
}
$table
=
$newtable
;
}
my
$character_matrix
= _make_character_matrix(
$table
,
$opts
);
my
$input
=
''
;
my
$rows
=
scalar
@{
$character_matrix
};
my
$columns
=
scalar
@{
$character_matrix
->[0]} - 1;
$input
.=
"\t$rows\t$columns\n"
;
foreach
my
$row
( @{
$character_matrix
} ) {
$input
.=
join
(
''
,
@$row
) .
"\n"
;
}
return
$input
;
}
sub
_make_character_matrix {
my
(
$table
,
$opts
) =
@_
;
my
@matrix
=
map
{ [ _normalize_witname(
$_
->{
'witness'
} ) ] }
@{
$table
->{
'alignment'
}};
foreach
my
$token_index
( 0 ..
$table
->{
'length'
} - 1) {
my
@pos_tokens
=
map
{
$_
->{
'tokens'
}->[
$token_index
] }
@{
$table
->{
'alignment'
}};
my
@pos_readings
=
map
{
$_
?
$_
->{
't'
} :
$_
}
@pos_tokens
;
my
@chars
= _convert_characters( \
@pos_readings
,
$opts
);
foreach
my
$idx
( 0 ..
$#matrix
) {
push
( @{
$matrix
[
$idx
]},
$chars
[
$idx
] );
}
}
return
\
@matrix
;
}
sub
_normalize_witname {
my
(
$witname
) =
@_
;
$witname
=~ s/\s+/ /g;
$witname
=~ s/[\[\]\(\)\:;,]//g;
$witname
=
substr
(
$witname
, 0, 10 );
return
sprintf
(
"%-10s"
,
$witname
);
}
sub
_convert_characters {
my
(
$row
,
$opts
) =
@_
;
my
%unique
= (
'__UNDEF__'
=>
'X'
,
'#LACUNA#'
=>
'?'
,
);
my
%equivalent
;
my
%count
;
my
$ctr
= 0;
foreach
my
$rdg
(
@$row
) {
next
unless
$rdg
;
next
if
$rdg
->is_lacuna;
next
if
exists
$unique
{
$rdg
->text};
if
(
ref
(
$opts
->{
'collapse'
} ) eq
'ARRAY'
) {
my
@exclude_types
= @{
$opts
->{
'collapse'
}};
my
@set
=
$rdg
->related_readings(
sub
{
my
$rel
=
shift
;
$rel
->colocated &&
grep
{
$rel
->type eq
$_
}
@exclude_types
} );
push
(
@set
,
$rdg
);
my
$char
=
chr
( 65 +
$ctr
++ );
map
{
$unique
{
$_
->text} =
$char
}
@set
;
$count
{
$rdg
->text} +=
scalar
@set
;
}
else
{
$unique
{
$rdg
->text} =
chr
( 65 +
$ctr
++ );
$count
{
$rdg
->text}++;
}
}
if
(
scalar
(
keys
%unique
) > 8 ) {
foreach
my
$word
(
keys
%count
) {
if
(
$count
{
$word
} == 1 ) {
$unique
{
$word
} =
'?'
;
}
}
}
my
%u
=
reverse
%unique
;
if
(
scalar
(
keys
%u
) > 8 ) {
warn
"Have more than 8 variants on this location; phylip will break"
;
}
my
@chars
=
map
{
$_
?
$unique
{
$_
->text} :
$unique
{
'__UNDEF__'
} }
@$row
;
return
@chars
;
}
sub
phylip_pars {
my
(
$charmatrix
) =
@_
;
my
$phylip_dir
= File::Temp->newdir();
open
( MATRIX,
">$phylip_dir/infile"
) or
die
"Could not write $phylip_dir/infile"
;
print
MATRIX
$charmatrix
;
close
MATRIX;
open
( CMD,
">$phylip_dir/cmdfile"
) or
die
"Could not write $phylip_dir/cmdfile"
;
print
CMD
"Y\n"
;
close
CMD;
my
$program
= File::Which::which(
'pars'
);
unless
(
$program
&& -x
$program
) {
throw(
"Phylip pars not found in path"
);
}
{
local
$CWD
=
$phylip_dir
;
my
@cmd
= (
$program
);
run \
@cmd
,
'<'
,
'cmdfile'
,
'>'
,
'/dev/null'
;
}
my
@outtree
;
if
( -f
"$phylip_dir/outtree"
) {
open
( TREE,
"$phylip_dir/outtree"
) or
die
"Could not open outtree for read"
;
@outtree
= <TREE>;
close
TREE;
}
return
join
(
''
,
@outtree
)
if
@outtree
;
my
@error
;
if
( -f
"$phylip_dir/outfile"
) {
open
( OUTPUT,
"$phylip_dir/outfile"
) or
die
"Could not open output for read"
;
@error
= <OUTPUT>;
close
OUTPUT;
}
else
{
push
(
@error
,
"Neither outtree nor output file was produced!"
);
}
throw(
join
(
''
,
@error
) );
}
sub
parse_newick {
my
$newick
=
shift
;
my
@stemmata
;
my
$forest
= Bio::Phylo::IO->parse(
-format
=>
'newick'
,
-string
=>
$newick
,
);
foreach
my
$tree
( @{
$forest
->get_entities} ) {
my
$stemma
= Text::Tradition::Stemma->new(
graph
=> _graph_from_bio(
$tree
),
is_undirected
=> 1 );
push
(
@stemmata
,
$stemma
);
}
return
\
@stemmata
;
}
sub
_graph_from_bio {
my
$tree
=
shift
;
my
$graph
= Graph->new(
'undirected'
=> 1 );
my
$i
= 0;
my
$classes
= {};
foreach
my
$n
( @{
$tree
->get_terminals} ) {
$classes
->{
$n
->get_name} =
'extant'
;
}
foreach
my
$n
( @{
$tree
->get_internals} ) {
unless
(
defined
$n
->get_name &&
$n
->get_name ne
''
) {
while
(
exists
$classes
->{
$i
} ) {
$i
++;
}
$n
->set_name(
$i
++ );
}
$classes
->{
$n
->get_name} =
'hypothetical'
;
}
_add_tree_children(
$graph
,
$classes
,
undef
, [
$tree
->get_root ]);
return
$graph
;
}
sub
_add_tree_children {
my
(
$graph
,
$classes
,
$parent
,
$tree_children
) =
@_
;
foreach
my
$c
(
@$tree_children
) {
my
$child
=
$c
->get_name;
$graph
->add_vertex(
$child
);
$graph
->set_vertex_attribute(
$child
,
'class'
,
$classes
->{
$child
} );
$graph
->add_path(
$parent
,
$child
)
if
defined
$parent
;
_add_tree_children(
$graph
,
$classes
,
$child
,
$c
->get_children() );
}
}
sub
newick_to_svg {
my
$newick
=
shift
;
my
$program
= File::Which::which(
'figtree'
);
unless
( -x
$program
) {
throw(
"FigTree commandline utility not found in path"
);
}
my
$svg
;
my
$nfile
= File::Temp->new();
print
$nfile
$newick
;
close
$nfile
;
my
@cmd
= (
$program
,
'-graphic'
,
'SVG'
,
$nfile
);
run( \
@cmd
,
">"
, binary(), \
$svg
);
return
decode_utf8(
$svg
);
}
sub
throw {
Text::Tradition::Error->throw(
'ident'
=>
'StemmaUtil error'
,
'message'
=>
$_
[0],
);
}
1;
=head1 LICENSE
This
package
is free software and is provided
"as is"
without express
or implied warranty. You can redistribute it and/or modify it under
the same terms as Perl itself.
=head1 AUTHOR
Tara L Andrews E<lt>aurum
@cpan
.orgE<gt>