use
vars
qw($DEFAULTIDLENGTH $DEFAULTLINELEN $DEFAULTTAGLEN)
;
BEGIN {
$DEFAULTIDLENGTH
= 10;
$DEFAULTLINELEN
= 60;
$DEFAULTTAGLEN
= 10;
}
sub
_initialize {
my
(
$self
,
@args
) =
@_
;
$self
->SUPER::_initialize(
@args
);
my
(
$interleave
,
$linelen
,
$idlinebreak
,
$idlength
,
$flag_SI
,
$tag_length
,
$ws
) =
$self
->_rearrange([
qw(INTERLEAVED
LINE_LENGTH
IDLINEBREAK
IDLENGTH
FLAG_SI
TAG_LENGTH
WRAP_SEQUENTIAL)
],
@args
);
$self
->interleaved(1)
if
(
$interleave
|| !
defined
$interleave
);
$self
->idlength(
$idlength
||
$DEFAULTIDLENGTH
);
$self
->id_linebreak(1)
if
(
$idlinebreak
);
$self
->line_length(
$linelen
)
if
defined
$linelen
&&
$linelen
> 0;
$self
->flag_SI(1)
if
(
$flag_SI
);
$self
->tag_length(
$tag_length
)
if
(
$tag_length
||
$DEFAULTTAGLEN
);
$self
->wrap_sequential(
$ws
? 1 : 0);
1;
}
sub
next_aln {
my
$self
=
shift
;
my
$entry
;
my
(
$seqcount
,
$residuecount
,
%hash
,
$name
,
$str
,
@names
,
$seqname
,
$start
,
$end
,
$count
,
$seq
);
my
$aln
= Bio::SimpleAlign->new(
-source
=>
'phylip'
);
$entry
=
$self
->_readline and
(
$seqcount
,
$residuecount
) =
$entry
=~ /\s*(\d+)\s+(\d+)/;
return
0
unless
$seqcount
and
$residuecount
;
my
$idlen
=
$self
->idlength;
$count
= 0;
my
$iter
= 1;
my
$interleaved
=
$self
->interleaved;
while
(
$entry
=
$self
->_readline) {
last
if
(
$entry
=~ /^\s?$/ &&
$interleaved
);
if
(
$entry
=~ /^\s+(\d+)\s+(\d+)\s*$/) {
$self
->_pushback(
$entry
);
last
;
}
if
(
$entry
=~ /^\s+(.+)$/ ) {
$interleaved
= 0;
$str
= $1;
$str
=~ s/\s//g;
$count
=
scalar
@names
;
$hash
{
$count
} .=
$str
;
}
elsif
(
$entry
=~ /^(.{
$idlen
})\s+(.*)\s$/ ||
$entry
=~ /^(.{
$idlen
})(\S{
$idlen
}\s+.+)\s$/
) {
$name
= $1;
$str
= $2;
$name
=~ s/[\s\/]/_/g;
$name
=~ s/_+$//;
push
@names
,
$name
;
$str
=~ s/\s//g;
$count
=
scalar
@names
;
$hash
{
$count
} =
$str
;
}
elsif
(
$interleaved
) {
if
(
$entry
=~ /^(\S+)\s+(.+)/ ||
$entry
=~ /^(.{
$idlen
})(.*)\s$/ ) {
$name
= $1;
$str
= $2;
$name
=~ s/[\s\/]/_/g;
$name
=~ s/_+$//;
push
@names
,
$name
;
$str
=~ s/\s//g;
$count
=
scalar
@names
;
$hash
{
$count
} =
$str
;
}
else
{
$self
->debug(
"unmatched line: $entry"
);
}
}
$self
->throw(
"Not a valid interleaved PHYLIP file!"
)
if
$count
>
$seqcount
;
}
if
(
$interleaved
) {
$count
= 0;
while
(
$entry
=
$self
->_readline) {
if
(
$entry
=~/\s*\d+\s+\d+/){
$self
->_pushback(
$entry
);
last
;
}
$count
= 0,
next
if
$entry
=~ /^\s$/;
$entry
=~ /\s*(.*)$/ &&
do
{
$str
= $1;
$str
=~ s/\s//g;
$count
++;
$hash
{
$count
} .=
$str
;
};
$self
->throw(
"Not a valid interleaved PHYLIP file! [$count,$seqcount] ($entry)"
)
if
$count
>
$seqcount
;
}
}
return
0
if
scalar
@names
< 1;
$count
= 0;
foreach
$name
(
@names
) {
$count
++;
if
(
$name
=~ /(\S+)\/(\d+)-(\d+)/ ) {
$seqname
= $1;
$start
= $2;
$end
= $3;
}
else
{
$seqname
=
$name
;
$start
= 1;
$str
=
$hash
{
$count
};
$str
=~ s/[^A-Za-z]//g;
$end
=
length
(
$str
);
}
$self
->throw(
"Length of sequence [$seqname] is not [$residuecount] it is "
.CORE::
length
(
$hash
{
$count
}).
"! "
)
unless
CORE::
length
(
$hash
{
$count
}) ==
$residuecount
;
$seq
= new Bio::LocatableSeq(
'-seq'
=>
$hash
{
$count
},
'-id'
=>
$seqname
,
'-start'
=>
$start
,
'-end'
=>
$end
,
);
$aln
->add_seq(
$seq
);
}
return
$aln
;
}
sub
write_aln {
my
(
$self
,
@aln
) =
@_
;
my
$count
= 0;
my
$wrapped
= 0;
my
$maxname
;
my
$width
=
$self
->line_length();
my
(
$length
,
$date
,
$name
,
$seq
,
$miss
,
$pad
,
%hash
,
@arr
,
$tempcount
,
$index
,
$idlength
,
$flag_SI
,
$line_length
,
$tag_length
);
foreach
my
$aln
(
@aln
) {
if
( !
$aln
|| !
$aln
->isa(
'Bio::Align::AlignI'
) ) {
$self
->
warn
(
"Must provide a Bio::Align::AlignI object when calling write_aln"
);
next
;
}
$self
->throw(
"All sequences in the alignment must be the same length"
)
unless
$aln
->is_flush(1) ;
$flag_SI
=
$self
->flag_SI();
$aln
->set_displayname_flat();
$length
=
$aln
->
length
();
if
(
$flag_SI
) {
if
(
$self
->interleaved() ) {
$self
->_print (
sprintf
(
" %s %s I\n"
,
$aln
->no_sequences,
$aln
->
length
));
}
else
{
$self
->_print (
sprintf
(
" %s %s S\n"
,
$aln
->no_sequences,
$aln
->
length
));
}
}
else
{
$self
->_print (
sprintf
(
" %s %s\n"
,
$aln
->no_sequences,
$aln
->
length
));
}
$idlength
=
$self
->idlength();
$line_length
=
$self
->line_length();
$tag_length
=
$self
->tag_length();
foreach
$seq
(
$aln
->each_seq() ) {
$name
=
$aln
->displayname(
$seq
->get_nse);
$name
=
substr
(
$name
, 0,
$idlength
)
if
length
(
$name
) >
$idlength
;
$name
=
sprintf
(
"%-"
.
$idlength
.
"s"
,
$name
);
if
(
$self
->interleaved() ) {
$name
.=
' '
;
}
elsif
(
$self
->id_linebreak) {
$name
.=
"\n"
;
}
my
$seq
=
$seq
->seq();
$seq
=~ s/\./-/g;
$hash
{
$name
} =
$seq
;
push
(
@arr
,
$name
);
}
if
(
$self
->interleaved() ) {
my
$numtags
;
if
(
$tag_length
<=
$line_length
) {
$numtags
= floor(
$line_length
/
$tag_length
);
$line_length
=
$tag_length
*$numtags
;
}
else
{
$numtags
= 1;
}
while
(
$count
<
$length
) {
foreach
$name
(
@arr
) {
my
$dispname
=
$name
;
$dispname
=
''
if
$wrapped
;
$self
->_print (
sprintf
(
"%"
.(
$idlength
+3).
"s"
,
$dispname
));
$tempcount
=
$count
;
$index
= 0;
$self
->debug(
"residue count: $count\n"
)
if
(
$count
%100000 == 0);
while
( (
$tempcount
+
$tag_length
<
$length
) &&
(
$index
<
$numtags
) ) {
$self
->_print (
sprintf
(
"%s "
,
substr
(
$hash
{
$name
},
$tempcount
,
$tag_length
)));
$tempcount
+=
$tag_length
;
$index
++;
}
if
(
$index
<
$numtags
) {
$self
->_print (
sprintf
(
"%s "
,
substr
(
$hash
{
$name
},
$tempcount
)));
$tempcount
+=
$tag_length
;
}
$self
->_print (
"\n"
);
}
$self
->_print (
"\n"
);
$count
=
$tempcount
;
$wrapped
= 1;
}
}
else
{
foreach
$name
(
@arr
) {
my
$dispname
=
$name
;
my
$line
=
sprintf
(
"%s%s\n"
,
$dispname
,
$hash
{
$name
});
if
(
$self
->wrap_sequential ) {
$line
=~ s/(.{1,
$width
})/$1\n/g;
}
$self
->_print (
$line
);
}
}
}
$self
->flush
if
$self
->_flush_on_write &&
defined
$self
->_fh;
return
1;
}
sub
interleaved{
my
(
$self
,
$value
) =
@_
;
my
$previous
=
$self
->{
'_interleaved'
};
if
(
defined
$value
) {
$self
->{
'_interleaved'
} =
$value
;
}
return
$previous
;
}
sub
flag_SI{
my
(
$self
,
$value
) =
@_
;
my
$previous
=
$self
->{
'_flag_SI'
};
if
(
defined
$value
) {
$self
->{
'_flag_SI'
} =
$value
;
}
return
$previous
;
}
sub
idlength {
my
(
$self
,
$value
) =
@_
;
if
(
defined
$value
){
$self
->{
'_idlength'
} =
$value
;
}
return
$self
->{
'_idlength'
};
}
sub
line_length{
my
(
$self
,
$value
) =
@_
;
if
(
defined
$value
) {
$self
->{
'_line_length'
} =
$value
;
}
return
$self
->{
'_line_length'
} ||
$DEFAULTLINELEN
;
}
sub
tag_length{
my
(
$self
,
$value
) =
@_
;
if
(
defined
$value
) {
$self
->{
'_tag_length'
} =
$value
;
}
return
$self
->{
'_tag_length'
} ||
$DEFAULTTAGLEN
;
}
sub
id_linebreak{
my
(
$self
,
$value
) =
@_
;
if
(
defined
$value
) {
$self
->{
'_id_linebreak'
} =
$value
;
}
return
$self
->{
'_id_linebreak'
} || 0;
}
sub
wrap_sequential{
my
(
$self
,
$value
) =
@_
;
if
(
defined
$value
) {
$self
->{
'_wrap_sequential'
} =
$value
;
}
return
$self
->{
'_wrap_sequential'
} || 0;
}
1;