sub
new {
my
(
$caller
,
@args
) =
@_
;
my
$self
=
$caller
->SUPER::new(
@args
);
$self
->_initialize(
@args
);
return
$self
;
}
sub
add_locus {
return
shift
->add_loci(
@_
) }
sub
add_loci {
my
(
$self
,
@l
) =
@_
;
my
$space
;
if
(
defined
$l
[0] and not
ref
$l
[0]){
$space
= 0 +
shift
@l
;
}
$self
->{
'_loci'
} = []
unless
defined
$self
->{
'_loci'
};
for
my
$locus
(
@l
){
$self
->debug(
"Saving locus ("
.(
$#l
+1).
" loci, cur:"
.($#{
$self
->{
'_loci'
}}+1).
")"
);
$self
->throw(
'Expecting a Bio::Polloc::LocusI object'
,
$locus
)
unless
UNIVERSAL::can(
$locus
,
'isa'
) and
$locus
->isa(
'Bio::Polloc::LocusI'
);
$locus
->genome(
$self
->genomes->[
$space
])
if
defined
$space
;
push
@{
$self
->{
'_loci'
} },
$locus
;
}
}
sub
loci {
my
$self
=
shift
;
$self
->{
'_loci'
} = []
unless
defined
$self
->{
'_loci'
};
return
$self
->{
'_loci'
};
}
sub
structured_loci {
my
$self
=
shift
;
return
unless
defined
$self
->genomes;
my
$struct
= [];
for
my
$locus
(@{
$self
->loci}){
next
unless
defined
$locus
->genome;
my
$space
= 0;
for
my
$genome
(@{
$self
->genomes}){
$struct
->[
$space
] = []
unless
defined
$struct
->[
$space
];
if
(
$genome
->name eq
$locus
->genome->name){
push
@{
$struct
->[
$space
] },
$locus
;
}
$space
++;
}
}
return
$struct
;
}
sub
locus {
my
(
$self
,
$id
) =
@_
;
for
my
$locus
(@{
$self
->loci}){
return
$locus
if
$locus
->id eq
$id
}
return
;
}
sub
name {
my
(
$self
,
$value
) =
@_
;
$self
->{
'_name'
} =
$value
if
defined
$value
;
return
$self
->{
'_name'
};
}
sub
genomes {
my
(
$self
,
$value
) =
@_
;
$self
->{
'_genomes'
} =
$value
if
defined
$value
;
return
unless
defined
$self
->{
'_genomes'
};
$self
->throw(
"Unexpected type of genomes collection"
,
$self
->{
'_genomes'
})
unless
ref
(
$self
->{
'_genomes'
}) and
ref
(
$self
->{
'_genomes'
})=~m/ARRAY/i;
return
$self
->{
'_genomes'
};
}
sub
featurename {
my
(
$self
,
$value
) =
@_
;
$self
->{
'_featurename'
} =
$value
if
defined
$value
;
return
$self
->{
'_featurename'
};
}
sub
avg_length {
my
$self
=
shift
;
my
$len_avg
= 0;
my
$len_sd
= 0;
if
($
$len_avg
+=
abs
(
$_
->from -
$_
->to)
for
@{
$self
->loci};
$len_avg
=
$len_avg
/($
return
$len_avg
unless
wantarray
;
$len_sd
+= (
abs
(
$_
->from -
$_
->to) -
$len_avg
)**2
for
@{
$self
->loci};
$len_sd
=
sqrt
(
$len_sd
/$
}
elsif
($
$len_avg
=
abs
(
$self
->loci->[0]->from -
$self
->loci->[0]->to);
}
return
wantarray
? (
$len_avg
,
$len_sd
) :
$len_avg
;
}
sub
align_context {
my
(
$self
,
$ref
,
$from
,
$to
) =
@_
;
$from
+=0;
$to
+=0;
$ref
+=0;
return
if
$from
==
$to
and
$ref
!=0;
my
$factory
= Bio::Tools::Run::Alignment::Muscle->new();
$factory
->quiet(1);
my
@seqs
= ();
LOCUS:
for
my
$locus
(@{
$self
->loci}){
my
$seq
=
$locus
->context_seq(
$ref
,
$from
,
$to
);
next
LOCUS
unless
defined
$seq
;
$seq
->display_id(
$locus
->id)
if
defined
$locus
->id;
push
@seqs
,
$seq
;
}
return
unless
$#seqs
>-1; # Impossible without sequences
push
@seqs
, Bio::Seq->new(
-seq
=>
$seqs
[0]->seq,
-id
=>
'dup-seq'
)
unless
$#seqs
>0;
$self
->debug(
"Aligning context sequences"
);
return
$factory
->align(\
@seqs
);
}
sub
fix_strands {
my
(
$self
,
@args
) =
@_
;
my
(
$size
,
$force
) =
$self
->_rearrange([
qw(SIZE FORCE)
],
@args
);
return
if
not
$force
and
defined
$self
->{
'_fixed_strands'
} and
$self
->{
'_fixed_strands'
} == $
$self
->{
'_fixed_strands'
} = $
$self
->_load_module(
'Bio::Polloc::GroupCriteria'
);
return
unless
$
$size
||= 500;
my
$factory
= Bio::Tools::Run::Alignment::Muscle->new();
$factory
->quiet(1);
my
$ref
= [
undef
,
undef
];
LOCUS:
for
my
$lk
(1 .. $
my
$ref_test
= [
Bio::Polloc::GroupCriteria->_build_subseq(
$self
->loci->[
$lk
]->seq,
$self
->loci->[
$lk
]->from -
$size
,
$self
->loci->[
$lk
]->from),
Bio::Polloc::GroupCriteria->_build_subseq(
$self
->loci->[
$lk
]->seq,
$self
->loci->[
$lk
]->to,
$self
->loci->[
$lk
]->to +
$size
)
];
if
(
defined
$ref
->[0] and
defined
$ref
->[1]){
$ref
=
$ref_test
if
defined
$ref_test
->[0] and
defined
$ref_test
->[1]
and
$ref_test
->[0]->
length
>=
$ref
->[0]->
length
and
$ref_test
->[1]->
length
>=
$ref
->[1]->
length
;
}
elsif
(
defined
$ref
->[0] or
defined
$ref
->[1]){
$ref
=
$ref_test
if
defined
$ref_test
->[0] and
defined
$ref_test
->[1];
}
else
{
$ref
=
$ref_test
if
defined
$ref_test
->[0] or
defined
$ref_test
->[1];
}
}
unless
(
defined
$ref
->[0] or
defined
$ref
->[1]){
$self
->debug(
'Impossible to find a suitable reference'
);
return
;
}
$ref
=
defined
$ref
->[0] ?
(
defined
$ref
->[1] ?
Bio::Seq->new(
-seq
=>
$ref
->[0]->seq . (
"N"
x20) .
$ref
->[1]->seq)
:
$ref
->[0]
) :
$ref
->[1];
$ref
->id(
'ref'
);
$self
->loci->[0]->strand(
'+'
);
LOCUS:
for
my
$k
(0 .. $
my
$tgt
= Bio::Polloc::GroupCriteria->_build_subseq(
$self
->loci->[
$k
]->seq,
$self
->loci->[
$k
]->from-
$size
,
$self
->loci->[
$k
]->to+
$size
);
next
LOCUS
unless
$tgt
;
$tgt
->id(
'tgt'
);
my
$tgtrc
=
$tgt
->revcom;
$self
->debug(
"Setting strand for "
.
$self
->loci->[
$k
]->id)
if
defined
$self
->loci->[
$k
]->id;
my
$eval_fun
=
'average_percentage_identity'
;
if
(
$factory
->align([
$ref
,
$tgt
])->
$eval_fun
<
$factory
->align([
$ref
,
$tgtrc
])->
$eval_fun
){
$self
->debug(
"Assuming negative strand, setting locus orientation"
);
$self
->loci->[
$k
]->strand(
'-'
);
}
else
{
$self
->debug(
"Assuming positive strand, setting locus orientation"
);
$self
->loci->[
$k
]->strand(
'+'
);
}
}
}
sub
_initialize {
my
(
$self
,
@args
) =
@_
;
my
(
$name
,
$featurename
,
$genomes
) =
$self
->_rearrange([
qw(NAME FEATURENAME GENOMES)
],
@args
);
$self
->name(
$name
);
$self
->featurename(
$featurename
);
$self
->genomes(
$genomes
);
}
1;