use
vars
qw($AUTOLOAD @ISA $PROGRAM $PROGRAMDIR $PROGRAMNAME
@DBA_SWITCHES @DBA_PARAMS @OTHER_SWITCHES %OK_FIELD)
;
@ISA
=
qw(Bio::Root::Root Bio::Tools::Run::WrapperBase)
;
BEGIN {
@DBA_PARAMS
=
qw(MATCHA MATCHB MATCHC MATCHD GAP BLOCKOPEN UMATCH SINGLE
NOMATCHN PARAMS KBYTE DYMEM DYDEBUG ERRORLOG)
;
@OTHER_SWITCHES
=
qw(OUTFILE)
;
@DBA_SWITCHES
=
qw(HELP SILENT QUIET ERROROFFSTD ALIGN LABEL)
;
foreach
my
$attr
(
@DBA_PARAMS
,
@DBA_SWITCHES
,
@OTHER_SWITCHES
) {
$OK_FIELD
{
$attr
}++; }
}
sub
program_name {
return
'dba'
;
}
sub
program_dir {
return
Bio::Root::IO->catfile(
$ENV
{WISEDIR},
"/src/bin"
)
if
$ENV
{WISEDIR};
}
sub
new {
my
(
$class
,
@args
) =
@_
;
my
$self
=
$class
->SUPER::new(
@args
);
my
(
$attr
,
$value
);
while
(
@args
) {
$attr
=
shift
@args
;
$value
=
shift
@args
;
next
if
(
$attr
=~ /^-/ );
if
(
$attr
=~/
'PROGRAM'
/i ) {
$self
->executable(
$value
);
next
;
}
$self
->
$attr
(
$value
);
}
return
$self
;
}
sub
AUTOLOAD {
my
$self
=
shift
;
my
$attr
=
$AUTOLOAD
;
$attr
=~ s/.*:://;
$attr
=
uc
$attr
;
$self
->throw(
"Unallowed parameter: $attr !"
)
unless
$OK_FIELD
{
$attr
};
$self
->{
$attr
} =
shift
if
@_
;
return
$self
->{
$attr
};
}
sub
version {
my
(
$self
) =
@_
;
my
$exe
=
$self
->executable();
return
undef
unless
defined
$exe
;
my
$string
= `
$exe
-- ` ;
$string
=~ /\(([\d.]+)\)/;
return
$1 ||
undef
;
}
sub
align {
my
(
$self
,
$input
) =
@_
;
my
(
$temp
,
$infile1
,
$infile2
,
$seq
);
my
(
$attr
,
$value
,
$switch
);
(
$infile1
,
$infile2
)=
$self
->_setinput(
$input
);
if
(!(
$infile1
&&
$infile2
)) {
$self
->throw(
"Bad input data (sequences need an id ) or less than 2 sequences in $input !"
);}
my
$param_string
=
$self
->_setparams();
my
@hsps
=
$self
->_run(
$infile1
,
$infile2
,
$param_string
);
return
@hsps
;
}
sub
_run {
my
(
$self
,
$infile1
,
$infile2
,
$param_string
) =
@_
;
my
$instring
;
$self
->debug(
"Program "
.
$self
->executable.
"\n"
);
unless
(
$self
->outfile){
my
(
$tfh
,
$outfile
) =
$self
->io->tempfile(
-dir
=>
$self
->tempdir);
close
(
$tfh
);
undef
$tfh
;
$self
->outfile(
$outfile
);
}
my
$outfile
=
$self
->outfile();
my
$commandstring
=
$self
->executable.
" $param_string -pff $infile1 $infile2 > $outfile"
;
$self
->debug(
"dba command = $commandstring"
);
my
$status
=
system
(
$commandstring
);
$self
->throw(
"DBA call ($commandstring) crashed: $? \n"
)
unless
$status
==0;
my
$hsps
=
$self
->_parse_results(
$outfile
);
return
@{
$hsps
};
}
sub
_parse_results {
my
(
$self
,
$outfile
) =
@_
;
$outfile
||
$self
->throw(
"No outfile specified"
);
my
(
$start
,
$end
,
$name
,
$seqname
,
$seq
,
$seqchar
,
$tempname
,
%align
);
my
$count
= 0;
my
@hsps
;
open
(OUT,
$outfile
);
my
(
%query
,
%subject
);
while
(
my
$entry
= <OUT>){
if
(
$entry
=~ /^>(.+)/ ) {
$tempname
= $1;
if
(
defined
$name
) {
if
(
$count
== 0){
my
@parse
=
split
(
"\t"
,
$name
);
$query
{seqname} =
$parse
[0];
$query
{start} =
$parse
[3];
$query
{end} =
$parse
[4];
$query
{score} =
$parse
[5];
$query
{strand} = (
$parse
[6] eq
'+'
) ? 1 : -1;
my
@tags
=
split
(
";"
,
$parse
[8]);
foreach
my
$tag
(
@tags
){
$tag
=~/(\S+)\s+(\S+)/;
$query
{$1} = $2;
}
$query
{seq} =
$seqchar
;
$count
++;
}
elsif
(
$count
== 1){
my
@parse
=
split
(
"\t"
,
$name
);
$subject
{seqname} =
$parse
[0];
$subject
{start} =
$parse
[3];
$subject
{end} =
$parse
[4];
$subject
{score} =
$parse
[5];
$subject
{strand} = (
$parse
[6] eq
'+'
) ? 1:-1;
my
@tags
=
split
(
";"
,
$parse
[8]);
foreach
my
$tag
(
@tags
){
$tag
=~/(\S+)\s+(\S+)/;
$subject
{$1} = $2;
}
$subject
{seq} =
$seqchar
;
my
$xor
=
$query
{seq}^
$subject
{seq};
my
$identical
=
$xor
=~
tr
/\c@/*/;
$xor
=~
tr
/*/ /c;
my
$hsp
= Bio::Search::HSP::GenericHSP->new(
-algorithm
=>
'DBA'
,
-score
=>
$query
{score},
-hsp_length
=>
length
(
$query
{seq}),
-query_gaps
=>
$query
{gaps},
-hit_gaps
=>
$subject
{gaps},
-query_name
=>
$query
{seqname},
-query_start
=>
$query
{start},
-query_end
=>
$query
{end},
-hit_name
=>
$subject
{seqname},
-hit_start
=>
$subject
{start},
-hit_end
=>
$subject
{end},
-hit_length
=>
length
(
$self
->_subject_seq->seq),
-query_length
=>
length
(
$self
->_query_seq->seq),
-query_seq
=>
$query
{seq},
-hit_seq
=>
$subject
{seq},
-conserved
=>
$identical
,
-identical
=>
$identical
,
-homology_seq
=>
$xor
);
push
@hsps
,
$hsp
;
$count
= 0;
}
}
$name
=
$tempname
;
$seqchar
=
""
;
next
;
}
$entry
=~ s/[^A-Za-z\.\-]//g;
$seqchar
.=
$entry
;
}
if
(
$count
== 1){
my
@parse
=
split
(
"\t"
,
$name
);
$subject
{seqname} =
$parse
[1];
$subject
{start} =
$parse
[3];
$subject
{end} =
$parse
[4];
$subject
{score} =
$parse
[5];
$subject
{strand} = (
$parse
[6] eq
'+'
) ? 1:-1;
my
@tags
=
split
(
";"
,
$parse
[8]);
foreach
my
$tag
(
@tags
){
$tag
=~/(\S+)\s+(\S+)/;
$subject
{$1} = $2;
}
$subject
{seq} =
$seqchar
;
my
$xor
=
$query
{seq}^
$subject
{seq};
my
$identical
=
$xor
=~
tr
/\c@/*/;
$xor
=~
tr
/*/ /c;
my
$hsp
= Bio::Search::HSP::GenericHSP->new(
-algorithm
=>
'DBA'
,
-score
=>
$query
{score},
-hsp_length
=>
length
(
$query
{seq}),
-query_gaps
=>
$query
{gaps},
-hit_gaps
=>
$subject
{gaps},
-query_name
=>
$query
{seqname},
-query_start
=>
$query
{start},
-query_end
=>
$query
{end},
-hit_name
=>
$subject
{seqname},
-hit_start
=>
$subject
{start},
-hit_end
=>
$subject
{end},
-hit_length
=>
length
(
$self
->_subject_seq->seq),
-query_length
=>
length
(
$self
->_query_seq->seq),
-query_seq
=>
$query
{seq},
-hit_seq
=>
$subject
{seq},
-conserved
=>
$identical
,
-identical
=>
$identical
,
-homology_seq
=>
$xor
);
push
@hsps
,
$hsp
;
}
return
\
@hsps
;
}
sub
_setinput {
my
(
$self
,
$input
,
$suffix
) =
@_
;
my
(
$infilename
,
$seq
,
$temp
,
$tfh1
,
$tfh2
,
$outfile1
,
$outfile2
);
if
(
ref
(
$input
) ne
"ARRAY"
){
$infilename
=
$input
;
unless
(-e
$input
){
return
0;}
my
$in
= Bio::SeqIO->new(
-file
=>
$infilename
,
'-format'
=>
'Fasta'
);
(
$tfh1
,
$outfile1
) =
$self
->io->tempfile(
-dir
=>
$self
->tempdir);
(
$tfh2
,
$outfile2
) =
$self
->io->tempfile(
-dir
=>
$self
->tempdir);
my
$out1
= Bio::SeqIO->new(
-fh
=>
$tfh1
,
'-format'
=>
'Fasta'
,
'-flush'
=>1);
my
$out2
= Bio::SeqIO->new(
-fh
=>
$tfh2
,
'-format'
=>
'Fasta'
,
'-flush'
=>1);
my
$seq1
=
$in
->next_seq() ||
return
0;
my
$seq2
=
$in
->next_seq() ||
return
0;
$out1
->write_seq(
$seq1
);
$out2
->write_seq(
$seq2
);
$self
->_query_seq(
$seq1
);
$self
->_subject_seq(
$seq2
);
$out1
->
close
();
$out2
->
close
();
close
(
$tfh1
);
close
(
$tfh2
);
undef
$tfh1
;
undef
$tfh2
;
return
$outfile1
,
$outfile2
;
}
else
{
scalar
(@{
$input
}) == 2 ||
$self
->throw(
"dba alignment can only be run on 2 sequences not."
);
if
(
ref
(
$input
->[0]) eq
""
){
my
$in1
= Bio::SeqIO->new(
-file
=>
$input
->[0],
'-format'
=>
'fasta'
);
my
$in2
= Bio::SeqIO->new(
-file
=>
$input
->[1],
'-format'
=>
'fasta'
);
(
$tfh1
,
$outfile1
) =
$self
->io->tempfile(
-dir
=>
$self
->tempdir);
(
$tfh2
,
$outfile2
) =
$self
->io->tempfile(
-dir
=>
$self
->tempdir);
my
$out1
= Bio::SeqIO->new(
-fh
=>
$tfh1
,
'-format'
=>
'fasta'
);
my
$out2
= Bio::SeqIO->new(
-fh
=>
$tfh2
,
'-format'
=>
'fasta'
);
my
$seq1
=
$in1
->next_seq() ||
return
0;
my
$seq2
=
$in2
->next_seq() ||
return
0;
$out1
->write_seq(
$seq1
);
$out2
->write_seq(
$seq2
);
$self
->_query_seq(
$seq1
);
$self
->_subject_seq(
$seq2
);
close
(
$tfh1
);
close
(
$tfh2
);
undef
$tfh1
;
undef
$tfh2
;
return
$outfile1
,
$outfile2
;
}
elsif
(
$input
->[0]->isa(
"Bio::PrimarySeqI"
) &&
$input
->[1]->isa(
"Bio::PrimarySeqI"
)) {
(
$tfh1
,
$outfile1
) =
$self
->io->tempfile(
-dir
=>
$self
->tempdir);
(
$tfh2
,
$outfile2
) =
$self
->io->tempfile(
-dir
=>
$self
->tempdir);
my
$out1
= Bio::SeqIO->new(
-fh
=>
$tfh1
,
'-format'
=>
'fasta'
);
my
$out2
= Bio::SeqIO->new(
-fh
=>
$tfh2
,
'-format'
=>
'fasta'
);
$out1
->write_seq(
$input
->[0]);
$out2
->write_seq(
$input
->[1]);
$self
->_query_seq(
$input
->[0]);
$self
->_subject_seq(
$input
->[1]);
close
(
$tfh1
);
close
(
$tfh2
);
undef
$tfh1
;
undef
$tfh2
;
return
$outfile1
,
$outfile2
;
}
else
{
return
0;
}
}
return
0;
}
sub
_setparams {
my
(
$attr
,
$value
,
$self
);
$self
=
shift
;
my
$param_string
=
""
;
for
$attr
(
@DBA_PARAMS
) {
$value
=
$self
->
$attr
();
next
unless
(
defined
$value
);
my
$attr_key
=
lc
$attr
;
if
(
$attr_key
=~ /match([ABCDabcd])/i){
$attr_key
=
"match"
.
uc
($1);
}
$attr_key
=
' -'
.
$attr_key
;
$param_string
.=
$attr_key
.
' '
.
$value
;
}
for
$attr
(
@DBA_SWITCHES
) {
$value
=
$self
->
$attr
();
next
unless
(
$value
);
my
$attr_key
=
lc
$attr
;
$attr_key
=
' -'
.
$attr_key
;
$param_string
.=
$attr_key
;
}
return
$param_string
;
}
sub
_query_seq {
my
(
$self
,
$seq
) =
@_
;
if
(
defined
$seq
){
$self
->{
'_query_seq'
} =
$seq
;
}
return
$self
->{
'_query_seq'
};
}
sub
_subject_seq {
my
(
$self
,
$seq
) =
@_
;
if
(
defined
$seq
){
$self
->{
'_subject_seq'
} =
$seq
;
}
return
$self
->{
'_subject_seq'
};
}
1;