use
version;
our
$VERSION
= version->declare(
"$Bio::ViennaNGS::VERSION"
);
has
'data'
=> (
is
=>
'ro'
,
isa
=>
'HashRef'
,
predicate
=>
'has_data'
,
default
=>
sub
{ {} },
);
has
'region'
=> (
is
=>
'ro'
,
isa
=>
'HashRef'
,
predicate
=>
'has_region'
,
default
=>
sub
{ {} },
);
has
'peaks'
=> (
is
=>
'ro'
,
isa
=>
'HashRef'
,
predicate
=>
'has_peaks'
,
default
=>
sub
{ {} },
);
has
'winsize'
=> (
is
=>
'ro'
,
isa
=>
'Int'
,
predicate
=>
'has_winsize'
,
);
has
'interval'
=> (
is
=>
'ro'
,
isa
=>
'Int'
,
predicate
=>
'has_interval'
,
);
has
'mincov'
=> (
is
=>
'ro'
,
isa
=>
'Int'
,
predicate
=>
'has_mincov'
,
);
has
'length'
=> (
is
=>
'ro'
,
isa
=>
'Int'
,
predicate
=>
'has_length'
,
);
has
'threshold'
=> (
is
=>
'ro'
,
isa
=>
'Value'
,
predicate
=>
'has_threshold'
,
);
sub
populate_data {
my
(
$self
,
$filep
,
$filen
) =
@_
;
my
$this_function
= (
caller
(0))[3];
my
(
$i
,
$element
,
$chr
,
$start
,
$end
,
$val
,
$lastend
);
my
$have_lastend
= 0;
foreach
$element
(@{
$filep
->data}){
unless
(
exists
${
$self
->region}{
$element
->chromosome}{
pos
}{start}){
${
$self
->region}{
$element
->chromosome}{
pos
}{start} =
$element
->start;
}
if
(
$have_lastend
== 1 &&
$element
->start>
$lastend
){
for
(
$i
=
$lastend
;
$i
<
$element
->start;
$i
++){
${
$self
->data}{
$element
->chromosome}{
pos
}[
$i
] = 0.;
}
}
for
(
$i
=
$element
->start;
$i
<
$element
->end;
$i
++){
${
$self
->data}{
$element
->chromosome}{
pos
}[
$i
]=
$element
->dataValue;
}
$lastend
=
$element
->end;
$have_lastend
= 1;
}
$lastend
= 0;
$have_lastend
= 1;
foreach
$element
(@{
$filen
->data}){
if
(
$element
->{dataValue} <= 0){
$element
->{dataValue} *= -1;
}
unless
(
exists
${
$self
->region}{
$element
->chromosome}{neg}{start}){
${
$self
->region}{
$element
->chromosome}{neg}{start} =
$element
->start;
}
if
(
$have_lastend
== 1 &&
$element
->start>
$lastend
){
for
(
$i
=
$lastend
;
$i
<
$element
->start;
$i
++){
${
$self
->data}{
$element
->chromosome}{neg}[
$i
] = 0.;
}
}
for
(
$i
=
$element
->start;
$i
<
$element
->end;
$i
++){
${
$self
->data}{
$element
->chromosome}{neg}[
$i
]=
$element
->dataValue;
}
$lastend
=
$element
->end;
$have_lastend
= 1;
}
return
;
}
sub
raw_peaks {
my
(
$self
,
$dest
,
$prefix
,
$log
) =
@_
;
my
(
$fn
,
$tfn
,
$tfh
,
$maxidx
,
$have_pstart
,
$pstart
,
$pend
,
$chr
);
my
(
$winstart
,
$winend
,
$winsum
,
$mean
,
$lastmax
,
$from
,
$to
);
my
(
$index_of_maxwin
);
my
$suffix
=
"rawpeaks.bed"
;
my
$this_function
= (
caller
(0))[3];
my
$flex
= 10;
croak
"ERROR [$this_function]: $dest does not exist\n"
unless
(-d
$dest
);
if
(
defined
$log
){
open
(LOG,
">"
,
$log
) or croak
"$!"
}
else
{croak
"ERROR [$this_function] \$log not defined"
;}
$fn
=
$prefix
.
"."
.
$suffix
;
(
undef
,
$tfn
) = tempfile(
'RAWPEAKS_XXXXXXX'
,
UNLINK
=>0);
croak
"ERROR [$this_function]: $self->data not available\n"
unless
(
$self
->has_data);
croak
"ERROR [$this_function]: $self->region not available\n"
unless
(
$self
->has_region);
open
(
$tfh
,
">"
,
$tfn
) or croak $!;
$lastmax
= 0;
foreach
$chr
(
keys
(%{
$self
->data})){
$maxidx
= $
$have_pstart
= 0;
$pstart
= -1;
$pend
= -1;
if
(
defined
$log
){
print
LOG
"processing chr $chr [+] strand $maxidx\n"
}
croak
"$chr has not been parsed in input data"
unless
(
exists
$self
->region->{
$chr
}{
pos
}{start});
for
(
$winstart
=
$self
->region->{
$chr
}{
pos
}{start};
$winstart
<(
$maxidx
-
$self
->winsize);
$winstart
+=
$self
->interval){
$winend
=
$winstart
+
$self
->winsize - 1;
$winsum
= sum0 @{
$self
->data->{
$chr
}{
pos
}}[
$winstart
..
$winend
];
$mean
=
$winsum
/
$self
->winsize;
if
(
$mean
>
$lastmax
){
$lastmax
=
$mean
;
$index_of_maxwin
=
$winstart
;
unless
(
$have_pstart
== 1) {
$pstart
=
$winstart
;
$have_pstart
= 1;
}
}
else
{
if
(
$mean
> 0. &&
$mean
<=
$lastmax
*$self
->threshold ){
$pend
=
$winend
;
$have_pstart
= 0;
$from
= min(
$pstart
,
$pend
);
$to
= max(
$pstart
,
$pend
);
next
if
(
$from
<0);
next
if
(
$to
<0);
if
(
$from
-
$flex
> 0){
$from
=
$from
-
$flex
;}
$to
=
$to
+
$flex
;
my
%pk
= (
chr
=>
$chr
,
start
=>
$from
,
end
=>
$to
,
strand
=>
'pos'
,
maxindex
=>
$index_of_maxwin
,
);
push
(@{
$self
->peaks->{
$chr
} }, \
%pk
);
if
(
defined
$log
){
print
LOG
"**PEAK $chr [+] $from-$to max at $index_of_maxwin\n"
}
print
$tfh
"$chr\t$from\t$to\tRAW\t0\t+\n"
;
$lastmax
= 0;
$pstart
= -1;
$pend
= -1;
}
}
}
$lastmax
= 0;
$maxidx
= $
$have_pstart
= 0;
$pstart
= -1;
$pend
= -1;
if
(
defined
$log
){
print
LOG
"processing chr $chr [-] strand $maxidx\n"
}
croak
"$chr has not been parsed in input data"
unless
(
exists
$self
->region->{
$chr
}{neg}{start});
for
(
$winstart
=
$maxidx
;
$winstart
>(
$self
->region->{
$chr
}{neg}{start}+
$self
->winsize);
$winstart
-=
$self
->interval){
$winend
=
$winstart
-
$self
->winsize + 1;
$winsum
= sum0 @{
$self
->data->{
$chr
}{neg}}[
$winend
..
$winstart
];
$mean
=
$winsum
/
$self
->winsize;
if
(
$mean
>
$lastmax
){
$lastmax
=
$mean
;
$index_of_maxwin
=
$winend
;
unless
(
$have_pstart
== 1) {
$pstart
=
$winend
;
$have_pstart
= 1;
}
}
else
{
if
(
$mean
> 0. &&
$mean
<=
$lastmax
*$self
->threshold ){
$pend
=
$winstart
;
$have_pstart
= 0;
$from
= min(
$pstart
,
$pend
);
$to
= max(
$pstart
,
$pend
);
next
if
(
$from
<0);
next
if
(
$to
<0);
if
(
$from
-
$flex
> 0){
$from
=
$from
-
$flex
;}
$to
=
$to
+
$flex
;
my
%pk
= (
chr
=>
$chr
,
start
=>
$from
,
end
=>
$to
,
strand
=>
'neg'
,
maxindex
=>
$index_of_maxwin
,
);
push
(@{
$self
->peaks->{
$chr
} }, \
%pk
);
if
(
defined
$log
){
print
LOG
"**PEAK $chr [-] $from-$to max at $index_of_maxwin\n"
}
print
$tfh
"$chr\t$from\t$to\tRAW\t0\t-\n"
;
$lastmax
= 0;
$pstart
= -1;
$pend
= -1;
}
}
}
}
close
(
$tfh
);
close
(LOG);
sortbed(
$tfn
,
$dest
,
$fn
,0,
$log
);
unlink
(
$tfn
);
}
sub
final_peaks {
my
(
$self
,
$dest
,
$prefix
,
$log
) =
@_
;
my
(
$fn
,
$tfn
,
$tfh
,
$strand
,
$max
,
$idx
,
$val
,
$peak
,
$position
,
$pleft
,
$pright
,
$str
);
my
$suffix
=
"candidatepeaks.bed"
;
my
$this_function
= (
caller
(0))[3];
croak
"ERROR [$this_function]: $dest does not exist\n"
unless
(-d
$dest
);
if
(
defined
$log
){
open
(LOG,
">>"
,
$log
) or croak
"$!"
;}
else
{croak
"ERROR [$this_function] \$log not defined"
;}
$fn
=
$prefix
.
"."
.
$suffix
;
(
undef
,
$tfn
) = tempfile(
'CANDIDATEPEAKS_XXXXXXX'
,
UNLINK
=>0);
croak
"ERROR [$this_function]: $self->data not available\n"
unless
(
$self
->has_data);
croak
"ERROR [$this_function]: $self->peaks not available\n"
unless
(
$self
->has_peaks);
open
(
$tfh
,
">"
,
$tfn
) or croak $!;
foreach
my
$chr
(
keys
(%{
$self
->peaks})){
if
(
defined
$log
){
print
LOG
"filtering candidate peaks in $chr\n"
;}
foreach
$peak
( @{
$self
->peaks->{
$chr
}}){
my
$have_pleft
= 0;
my
$have_pright
= 0;
$strand
=
$$peak
{strand};
if
(
defined
$log
){
print
LOG
">>processing peak $$peak{start}-$$peak{end} [$strand] "
;}
$max
= max @{
$self
->data->{
$chr
}{
$strand
}}[
$$peak
{start}..
$$peak
{end}];
if
(
defined
$log
){
print
LOG
"max is $max"
;}
if
(
$max
<
$self
->mincov){
print
LOG
" ..SKIPPING\n"
;
next
;}
$idx
= first { @{
$self
->data->{
$chr
}{
$strand
}}[
$_
] eq
$max
}
$$peak
{start}..
$$peak
{end};
if
(
defined
$log
){
print
LOG
" at $idx ..KEEPING\n"
;}
die
"maximum at $idx is not within peak region"
if
(
$idx
<
$$peak
{start} ||
$idx
>
$$peak
{end} );
for
(
$position
=
$idx
;
$position
>
$$peak
{start};
$position
--){
$val
= ${
$self
->data->{
$chr
}{
$strand
}}[
$position
];
if
(
defined
$log
){
print
LOG
"** VAL_L at pos $position $val - \$have_pleft=$have_pleft "
;}
if
(
$val
<
$max
*$self
->threshold){
$have_pleft
= 1;
if
(
defined
$log
){
print
LOG
"exiting 'cause $val < "
.
$max
*$self
->threshold.
" - \$have_pleft=$have_pleft\n"
;
}
last
;
}
if
(
defined
$log
){
print
LOG
"\n"
;}
}
$pleft
=
$position
;
for
(
$position
=
$idx
;
$position
<=
$$peak
{end};
$position
++){
$val
= ${
$self
->data->{
$chr
}{
$strand
}}[
$position
];
if
(
defined
$log
){
print
LOG
"** VAL_R at pos $position $val - \$have_pright=$have_pright "
;}
if
(
$val
<
$max
*$self
->threshold){
$have_pright
= 1;
if
(
defined
$log
){
print
LOG
"exiting 'cause $val < "
.
$max
*$self
->threshold.
"- \$have_pright=$have_pright\n"
;
}
last
;
}
if
(
defined
$log
){
print
LOG
"\n"
;}
}
$pright
=
$position
;
if
(
$strand
eq
"neg"
){
$str
=
'-'
;
}
else
{
$str
=
'+'
;
}
if
(
defined
$log
){
print
LOG
"# PEAK characterized at $chr:$pleft-$pright\n"
;}
if
(
$have_pleft
== 1 &&
$have_pright
== 1){
if
(
$pright
-
$pleft
<=
$self
->
length
){
if
(
defined
$log
){
print
$tfh
"$chr\t$pleft\t$pright\tCANDIDATE\t$max\t$str\n"
;}
}
}
}
}
close
(
$tfh
);
sortbed(
$tfn
,
$dest
,
$fn
,0,
$log
);
unlink
(
$tfn
);
}
__PACKAGE__->meta->make_immutable;
1;