use
vars
qw($LINELENGTH $CLUSTALPRINTVERSION)
;
$LINELENGTH
= 60;
$CLUSTALPRINTVERSION
=
'1.81'
;
sub
_initialize {
my
(
$self
,
@args
) =
@_
;
$self
->SUPER::_initialize(
@args
);
my
(
$percentages
,
$ll
) =
$self
->_rearrange( [
qw(PERCENTAGES LINELENGTH)
],
@args
);
defined
$percentages
&&
$self
->percentages(
$percentages
);
$self
->line_length(
$ll
||
$LINELENGTH
);
}
sub
next_aln {
my
(
$self
) =
@_
;
my
$first_line
;
while
(
$first_line
=
$self
->_readline ) {
last
if
$first_line
!~ /^$/;
}
$self
->_pushback(
$first_line
);
if
(
defined
(
$first_line
=
$self
->_readline )
&&
$first_line
!~ /CLUSTAL/ )
{
$self
->throw(
"trying to parse a file which does not start with a CLUSTAL header"
);
}
my
%alignments
;
my
$aln
= FAST::Bio::SimpleAlign->new(
-source
=>
'clustalw'
,
-verbose
=>
$self
->verbose
);
my
$order
= 0;
my
%order
;
$self
->{_lastline} =
''
;
my
(
$first_block
,
$seen_block
) = (0,0);
while
(
defined
(
$_
=
$self
->_readline ) ) {
next
if
(/^\s+$/ && !
$first_block
);
if
(/^\s$/) {
$seen_block
= 1;
next
;
}
$first_block
= 1;
if
(/CLUSTAL/) {
$self
->_pushback(
$_
);
last
;
}
my
(
$seqname
,
$aln_line
) = (
''
,
''
);
if
(/^\s*(\S+)\s*\/\s*(\d+)-(\d+)\s+(\S+)\s*$/ox) {
(
$seqname
,
$aln_line
) = (
"$1:$2-$3"
, $4 );
}
elsif
(/^\s*(\S+)\s+(\S+)\s*\d*\s*$/ox) {
(
$seqname
,
$aln_line
) = ( $1, $2 );
if
(
$seqname
=~ /^[\*\.\+\:]+$/ ) {
$self
->{_lastline} =
$_
;
next
;
}
}
else
{
$self
->{_lastline} =
$_
;
next
;
}
if
( !
$seen_block
) {
if
(
exists
$order
{
$seqname
}) {
$self
->
warn
(
"Duplicate sequence : $seqname\n"
.
"Can't guarantee alignment quality"
);
}
else
{
$order
{
$seqname
} =
$order
++;
}
}
$alignments
{
$seqname
} .=
$aln_line
;
}
my
(
$sname
,
$start
,
$end
);
foreach
my
$name
(
sort
{
$order
{
$a
} <=>
$order
{
$b
} }
keys
%alignments
) {
if
(
$name
=~ /(\S+):(\d+)-(\d+)/ ) {
(
$sname
,
$start
,
$end
) = ( $1, $2, $3 );
}
else
{
(
$sname
,
$start
) = (
$name
, 1 );
my
$str
=
$alignments
{
$name
};
$str
=~ s/[^A-Za-z]//g;
$end
=
length
(
$str
);
}
my
$seq
= FAST::Bio::LocatableSeq->new
(
'-seq'
=>
$alignments
{
$name
},
'-display_id'
=>
$sname
,
'-start'
=>
$start
,
'-end'
=>
$end
,
'-alphabet'
=>
$self
->alphabet,
);
$aln
->add_seq(
$seq
);
}
return
$aln
if
$aln
->num_sequences;
return
;
}
sub
write_aln {
my
(
$self
,
@aln
) =
@_
;
my
(
$count
,
$length
,
$seq
,
@seq
,
$tempcount
,
$line_len
);
$line_len
=
$self
->line_length ||
$LINELENGTH
;
foreach
my
$aln
(
@aln
) {
if
( !
$aln
|| !
$aln
->isa(
'FAST::Bio::Align::AlignI'
) ) {
$self
->
warn
(
"Must provide a FAST::Bio::Align::AlignI object when calling write_aln"
);
next
;
}
my
$matchline
=
$aln
->match_line;
if
(
$self
->force_displayname_flat ) {
$aln
->set_displayname_flat(1);
}
$self
->_print(
sprintf
(
"CLUSTAL W (%s) multiple sequence alignment\n\n\n"
,
$CLUSTALPRINTVERSION
)
) or
return
;
$length
=
$aln
->
length
();
$count
=
$tempcount
= 0;
@seq
=
$aln
->each_seq();
my
$max
= 22;
foreach
$seq
(
@seq
) {
$max
=
length
(
$aln
->displayname(
$seq
->get_nse() ) )
if
(
length
(
$aln
->displayname(
$seq
->get_nse() ) ) >
$max
);
}
while
(
$count
<
$length
) {
my
(
$linesubstr
,
$first
) = (
''
, 1 );
foreach
$seq
(
@seq
) {
my
(
$substring
);
my
$seqchars
=
$seq
->seq();
SWITCH: {
if
(
length
(
$seqchars
) >= (
$count
+
$line_len
) ) {
$substring
=
substr
(
$seqchars
,
$count
,
$line_len
);
if
(
$first
) {
$linesubstr
=
substr
(
$matchline
,
$count
,
$line_len
);
$first
= 0;
}
last
SWITCH;
}
elsif
(
length
(
$seqchars
) >=
$count
) {
$substring
=
substr
(
$seqchars
,
$count
);
if
(
$first
) {
$linesubstr
=
substr
(
$matchline
,
$count
);
$first
= 0;
}
last
SWITCH;
}
$substring
=
""
;
}
$self
->_print(
sprintf
(
"%-"
.
$max
.
"s %s\n"
,
$aln
->displayname(
$seq
->get_nse() ),
$substring
)
) or
return
;
}
my
$percentages
=
''
;
if
(
$self
->percentages ) {
my
(
$strcpy
) = (
$linesubstr
);
my
$count
= (
$strcpy
=~
tr
/\*// );
$percentages
=
sprintf
(
"\t%d%%"
, 100 * (
$count
/
length
(
$linesubstr
) ) );
}
$self
->_print(
sprintf
(
"%-"
.
$max
.
"s %s%s\n"
,
''
,
$linesubstr
,
$percentages
)
);
$self
->_print(
sprintf
(
"\n\n"
) ) or
return
;
$count
+=
$line_len
;
}
}
$self
->flush
if
$self
->_flush_on_write &&
defined
$self
->_fh;
return
1;
}
sub
percentages {
my
(
$self
,
$value
) =
@_
;
if
(
defined
$value
) {
$self
->{
'_percentages'
} =
$value
;
}
return
$self
->{
'_percentages'
};
}
sub
line_length {
my
(
$self
,
$value
) =
@_
;
if
(
defined
$value
) {
$self
->{
'_line_length'
} =
$value
;
}
return
$self
->{
'_line_length'
};
}
1;