use 5.006;
our $VERSION = '1.122';
# Class for a chromatin modification site consisting consecutive significant windows.
package diffReps::ChromaModSite;
use strict;
use MyBioinfo::Common;
use MyShortRead::SRBed;
our @ISA = qw(diffReps::SlideWindow); # Inherited from 'SlideWindow'.
# Constructor.
sub new{
my $class = shift;
my $self = {
chrom => undef,
start => undef,
end => undef,
siteCenter => undef,
retrieved => 0, # boolean tag: whether count data have been retrieved?
tr_cnt => [], # region normalized treatment counts.
co_cnt => [], # region normalized control counts.
tr_cnt_ => [], # normalized treatment counts using #reads.
co_cnt_ => [], # normalized control counts using #reads.
btr_cnt => [], # normalized background treatment counts.
bco_cnt => [], # normalized background control counts.
#trEnr => undef, # treatment enrichment vs. background.
#coEnr => undef, # control enrichment vs. background.
rsumTr => undef, # sum of raw treatment counts.
rsumCo => undef, # sum of raw control counts.
dirn => undef, # change direction: 'Up' or 'Down'.
logFC => undef, # log2 fold change.
pval => undef, # P-value.
padj => undef, # adjusted P-value.
winSta => undef, # best window start.
#winTr => [], # window normalized treatment counts.
#winCo => [], # window normalized control counts.
winFC => undef, # window log2 fold change.
winP => undef, # window P-value.
winQ => undef # window adjusted P-value.
};
bless $self, $class;
return $self;
}
# Copy member data into an anonymous hash table.
sub dcopy{
my $self = shift;
my $new = {};
%{$new} = %{$self};
# Deep copy array members that are array variables.
$new->{tr_cnt} = []; @{$new->{tr_cnt}} = @{$self->{tr_cnt}};
$new->{co_cnt} = []; @{$new->{co_cnt}} = @{$self->{co_cnt}};
$new->{tr_cnt_} = []; @{$new->{tr_cnt_}} = @{$self->{tr_cnt_}};
$new->{co_cnt_} = []; @{$new->{co_cnt_}} = @{$self->{co_cnt_}};
$new->{btr_cnt} = []; @{$new->{btr_cnt}} = @{$self->{btr_cnt}};
$new->{bco_cnt} = []; @{$new->{bco_cnt}} = @{$self->{bco_cnt}};
#$new->{winTr} = []; @{$new->{winTr}} = @{$self->{winTr}};
#$new->{winCo} = []; @{$new->{winCo}} = @{$self->{winCo}};
bless $new, 'diffReps::ChromaModSite';
return $new;
}
# Initialize a region by a significant window.
sub init{
my($self,$gdt,$rwin) = @_;
$self->{chrom} = $rwin->{chrom};
$self->{start} = $rwin->{winN}*$gdt->{step} + 1;
$self->{end} = $rwin->{winN}*$gdt->{step} + $gdt->{winSize};
$self->{siteCenter} = ($self->{start} + $self->{end}) /2;
$self->{retrieved} = 1;
@{$self->{tr_cnt}} = @{$rwin->{tr_cnt}};
@{$self->{co_cnt}} = @{$rwin->{co_cnt}};
@{$self->{tr_cnt_}} = @{$rwin->{tr_cnt_}};
@{$self->{co_cnt_}} = @{$rwin->{co_cnt_}};
@{$self->{btr_cnt}} = @{$rwin->{btr_cnt}};
@{$self->{bco_cnt}} = @{$rwin->{bco_cnt}};
$self->{rsumTr} = $rwin->{rsumTr};
$self->{rsumCo} = $rwin->{rsumCo};
$self->{dirn} = $rwin->{dirn};
$self->{logFC} = $rwin->{logFC};
$self->{pval} = $rwin->{pval};
$self->{winSta} = $self->{start};
#@{$self->{winTr}} = @{$rwin->{tr_cnt}};
#@{$self->{winCo}} = @{$rwin->{co_cnt}};
$self->{winFC} = $rwin->{logFC};
$self->{winP} = $rwin->{pval};
}
# Expand the genomic coordinates. Replace best window if necessary.
# Do not retrieve counts and sums to save some computation.
sub expand{
my($self,$gdt,$rwin) = @_;
$self->{end} = $rwin->{winN}*$gdt->{step} + $gdt->{winSize}; # new region end.
$self->{siteCenter} = ($self->{start} + $self->{end}) /2;
# Invalidate previous diff results.
$self->reset_content(1); # reset everything but keep direction.
# $self->{tr_cnt} = [];
# $self->{co_cnt} = [];
# $self->{rsumTr} = undef;
# $self->{rsumCo} = undef;
# $self->{logFC} = undef;
# Replace best window if necessary.
if($rwin->{pval} < $self->{winP}){
$self->{winSta} = $rwin->{winN}*$gdt->{step} + 1;
#@{$self->{winTr}} = @{$rwin->{tr_cnt}};
#@{$self->{winCo}} = @{$rwin->{co_cnt}};
$self->{winFC} = $rwin->{logFC};
$self->{winP} = $rwin->{pval};
}
}
# Print a formatted record of the current region. Do nothing if the P-val is not defined.
sub print_reg{
my($self,$gdt,$h) = @_;
if(defined $self->{pval}){
# Concatenate treatment and control counts for output.
my @tr_cnt_fmt = fprecision(2, @{$self->{tr_cnt}});
my @co_cnt_fmt = fprecision(2, @{$self->{co_cnt}});
my $tr_cnt_str = join(';', @tr_cnt_fmt);
my $co_cnt_str = join(';', @co_cnt_fmt);
my($tr_bkg_enr, $co_bkg_enr);
if($gdt->{bkgEnr}){
my $btr_m = mean(@{$self->{btr_cnt}});
my $bco_m = mean(@{$self->{bco_cnt}});
$tr_bkg_enr = $btr_m > 0? fprecision(2, mean(@{$self->{tr_cnt_}}) / $btr_m) : INFINITE;
$co_bkg_enr = $bco_m > 0? fprecision(2, mean(@{$self->{co_cnt_}}) / $bco_m) : INFINITE;
}else{
$tr_bkg_enr = 'NA';
$co_bkg_enr = 'NA';
}
# Output: chrom,start,end,length,(treatment read count),(control read count),direction,logFC,pval,padj,
# (window start),(window end),(window logFC),(window pval),(window padj).
print $h join("\t", ($self->{chrom},
$self->{start}, $self->{end}, $self->{end}-$self->{start}+1, # region coordinates.
$tr_cnt_str, $co_cnt_str, # formatted read counts.
fprecision(2, &mean(@{$self->{tr_cnt}})), fprecision(2, &mean(@{$self->{co_cnt}})), # avg read count.
$tr_bkg_enr, $co_bkg_enr, # enrichment vs. background.
$self->{dirn}, fprecision(2,$self->{logFC}), # change direction and logFC.
$self->{pval}, $self->{padj}, # diff analysis info.
$self->{winSta}, $self->{winSta}+$gdt->{winSize}-1, # best window coordinates.
fprecision(2,$self->{winFC}),
$self->{winP}, $self->{winQ})), "\n"; # best window diff info.
}
}
# Do we need to perform stat test for the region?
# If the region contains only one window, this is already done.
sub needStat{
my $self = shift;
return (defined($self->{winP}) && !defined($self->{pval}));
}
# Retrieve normalized read counts for the region. Override parent function.
sub retrieve_norm_cnt{
my($self,$gdt) = @_;
my @atr_cnt; # array treatment counts.
my $i = 0; # iterator for normalization constants.
foreach my $b(@{$gdt->{bedTr}}) {
push @atr_cnt, ($b->get_region_count($self->{chrom}, $self->{start}, $self->{end})
/ $gdt->{normTr}[$i++]);
}
my @aco_cnt; # array control counts.
$i = 0;
foreach my $b(@{$gdt->{bedCo}}) {
push @aco_cnt, ($b->get_region_count($self->{chrom}, $self->{start}, $self->{end})
/ $gdt->{normCo}[$i++]);
}
@{$self->{tr_cnt}} = @atr_cnt;
@{$self->{co_cnt}} = @aco_cnt;
# Retrieve norm count for background if needed.
if($gdt->{bkgEnr}){
my @atr_cnt_; # array treatment counts.
$i = 0; # iterator for normalization constants.
foreach my $b(@{$gdt->{bedTr}}) {
push @atr_cnt_, ($b->get_region_count($self->{chrom}, $self->{start}, $self->{end}) / $gdt->{normTr_}->[$i++]);
}
@{$self->{tr_cnt_}} = @atr_cnt_;
my @aco_cnt_; # array treatment counts.
$i = 0; # iterator for normalization constants.
foreach my $b(@{$gdt->{bedCo}}) {
push @aco_cnt_, ($b->get_region_count($self->{chrom}, $self->{start}, $self->{end}) / $gdt->{normCo_}->[$i++]);
}
@{$self->{co_cnt_}} = @aco_cnt_;
if(@{$gdt->{bedBtr}}){
my @abtr_cnt; # array background treatment counts.
$i = 0; # iterator for normalization constants.
foreach my $b(@{$gdt->{bedBtr}}) {
push @abtr_cnt, ($b->get_region_count($self->{chrom}, $self->{start}, $self->{end}) / $gdt->{normBtr}->[$i++]);
}
@{$self->{btr_cnt}} = @abtr_cnt;
}
if(@{$gdt->{bedBco}}){
my @abco_cnt; # array background treatment counts.
$i = 0; # iterator for normalization constants.
foreach my $b(@{$gdt->{bedBco}}) {
push @abco_cnt, ($b->get_region_count($self->{chrom}, $self->{start}, $self->{end}) / $gdt->{normBco}->[$i++]);
}
@{$self->{bco_cnt}} = @abco_cnt;
}
if(@{$self->{btr_cnt}} == 0){
@{$self->{btr_cnt}} = @{$self->{bco_cnt}};
}
if(@{$self->{bco_cnt}} == 0){
@{$self->{bco_cnt}} = @{$self->{btr_cnt}};
}
}
}
# Retrieve sum of raw read counts for the region. Override parent function.
sub retrieve_raw_sum{
my($self,$gdt) = @_;
$self->{rsumTr} = 0;
foreach my $b(@{$gdt->{bedTr}}) {
$self->{rsumTr} += $b->get_region_count($self->{chrom}, $self->{start}, $self->{end});
}
$self->{rsumCo} = 0;
foreach my $b(@{$gdt->{bedCo}}) {
$self->{rsumCo} += $b->get_region_count($self->{chrom}, $self->{start}, $self->{end});
}
}
# Determine whether a window is within gap.
sub isWithinGap{
my($self,$gdt,$rwin) = @_;
return 0 if !defined $self->{chrom};
my $winSta = $rwin->{winN}*$gdt->{step}+1;
return ($winSta - $self->{end}-1 <= $gdt->{gap} and $rwin->{dirn} eq $self->{dirn});
}
# Class for a sliding window of fixed size.
package diffReps::SlideWindow;
use strict;
use Statistics::TTest;
use MyBioinfo::NBTest;
use MyBioinfo::Common;
use MyBioinfo::Math qw( gtest_gof chisqtest_gof );
use MyShortRead::SRBed;
# Constructor.
sub new{
my $class = shift;
my $self = {
chrom => undef,
winN => undef, # window number to retrieve count from SRBed class.
maxN => undef, # maximum window number.
retrieved => 0, # boolean tag: whether count data have been retrieved?
tr_cnt => [], # normalized treatment counts.
co_cnt => [], # normalized control counts.
tr_cnt_ => [], # normalized treatment counts using #reads.
co_cnt_ => [], # normalized control counts using #reads.
btr_cnt => [], # normalized background treatment counts.
bco_cnt => [], # normalized background control counts.
rsumTr => undef, # sum of raw treatment counts.
rsumCo => undef, # sum of raw control counts.
dirn => undef, # change direction: 'Up' or 'Down'.
logFC => undef, # log2 fold change.
pval => undef # P-value.
};
bless $self, $class;
return $self;
}
# Set the current chromosome name, max window number and reset everything.
# Obtain counts and sums for the first window.
sub set_chrom{
my($self,$gdt,$chrom,$maxN) = @_;
$self->{chrom} = $chrom;
$self->{maxN} = $maxN;
$self->{winN} = 0;
$self->reset_content;
#$self->{retrieved} = 0;
# $self->retrieve_norm_cnt($gdt);
# $self->retrieve_raw_sum($gdt);
#$self->{trEnr} = undef;
#$self->{coEnr} = undef;
# $self->{dirn} = undef; # change direction: 'Up' or 'Down'.
# $self->{logFC} = undef; # log2 fold change.
# $self->{pval} = undef; # P-value.
}
# Reset window number.
sub reset_winN{
my $self = shift;
$self->{winN} = 0;
}
# Reset window/region content.
sub reset_content{
my($self,$keepdi) = @_;
$self->{retrieved} = 0; # boolean tag: whether count data have been retrieved?
$self->{tr_cnt} = []; # normalized treatment counts.
$self->{co_cnt} = []; # normalized control counts.
$self->{tr_cnt_} = []; # normalized treatment counts using #reads.
$self->{co_cnt_} = []; # normalized control counts using #reads.
$self->{btr_cnt} = []; # normalized background treatment counts.
$self->{bco_cnt} = []; # normalized background control counts.
$self->{rsumTr} = undef; # sum of raw treatment counts.
$self->{rsumCo} = undef; # sum of raw control counts.
$self->{dirn} = undef unless $keepdi; # change direction: 'Up' or 'Down'.
$self->{logFC} = undef; # log2 fold change.
$self->{pval} = undef; # P-value.
}
# Retrieve norm/raw count given current window/region location.
sub retrieve_new_cnt{
my($self,$gdt) = @_;
$self->{retrieved} = 1;
$self->retrieve_norm_cnt($gdt);
$self->retrieve_raw_sum($gdt);
}
# Shift the window one position to the right and obtain new counts and sums.
sub move_forward{
my($self,$gdt) = @_;
if($self->{winN} <= $self->{maxN}-1) {
$self->{winN}++;
$self->reset_content;
# $self->retrieve_norm_cnt($gdt);
# $self->retrieve_raw_sum($gdt);
#$self->{trEnr} = undef;
#$self->{coEnr} = undef;
#$self->{dirn} = undef; # change direction: 'Up' or 'Down'.
#$self->{logFC} = undef; # log2 fold change.
#$self->{pval} = undef; # P-value.
return $self->{winN};
}
# elsif($self->{winN} == $self->{maxN}-1){
# $self->{winN} = $self->{maxN};
# $self->{tr_cnt} = [];
# $self->{co_cnt} = [];
# $self->{tr_cnt_} = [];
# $self->{co_cnt_} = [];
# $self->{btr_cnt} = [];
# $self->{bco_cnt} = [];
# #$self->{trEnr} = undef;
# #$self->{coEnr} = undef;
# $self->{dirn} = undef; # change direction: 'Up' or 'Down'.
# $self->{logFC} = undef; # log2 fold change.
# $self->{pval} = undef; # P-value.
# return $self->{winN};
# }
else {return $self->{maxN};}
# Max valid window index should be maxN-1.
}
# Return current window number.
sub get_winN{
my $self = shift;
if($self->{winN} < $self->{maxN}) {return $self->{winN};}
else {return -1;}
}
# Return chromosome name.
sub get_chrom{
my $self = shift;
return $self->{chrom};
}
# Get normalized read counts for external procedures.
sub get_cnt_tr{
my $self = shift;
return @{$self->{tr_cnt}};
}
sub get_cnt_co{
my $self = shift;
return @{$self->{co_cnt}};
}
sub get_cnt_tr_{
my $self = shift;
return @{$self->{tr_cnt_}};
}
sub get_cnt_co_{
my $self = shift;
return @{$self->{co_cnt_}};
}
sub get_cnt_btr{
my $self = shift;
return @{$self->{btr_cnt}};
}
sub get_cnt_bco{
my $self = shift;
return @{$self->{bco_cnt}};
}
# Get differential analysis info.
sub get_diff_info{
my $self = shift;
return {
dirn => $self->{dirn},
logFC => $self->{logFC},
pval => $self->{pval}
}; # anonymous hash.
}
# Retrieve normalized read counts by current window number from SRBed objects.
sub retrieve_norm_cnt{
my($self,$gdt) = @_;
my @atr_cnt; # array treatment counts.
my $i = 0; # iterator for normalization constants.
foreach my $b(@{$gdt->{bedTr}}) {
push @atr_cnt, ($b->get_win_count($self->{chrom}, $self->{winN}) / $gdt->{normTr}->[$i++]);
}
@{$self->{tr_cnt}} = @atr_cnt;
my @aco_cnt; # array control counts.
$i = 0;
foreach my $b(@{$gdt->{bedCo}}) {
push @aco_cnt, ($b->get_win_count($self->{chrom}, $self->{winN}) / $gdt->{normCo}->[$i++]);
}
@{$self->{co_cnt}} = @aco_cnt;
# Retrieve norm count for background if needed.
if($gdt->{bkgEnr}){
my @atr_cnt_; # array treatment counts.
$i = 0; # iterator for normalization constants.
foreach my $b(@{$gdt->{bedTr}}) {
push @atr_cnt_, ($b->get_win_count($self->{chrom}, $self->{winN}) / $gdt->{normTr_}->[$i++]);
}
@{$self->{tr_cnt_}} = @atr_cnt_;
my @aco_cnt_; # array treatment counts.
$i = 0; # iterator for normalization constants.
foreach my $b(@{$gdt->{bedCo}}) {
push @aco_cnt_, ($b->get_win_count($self->{chrom}, $self->{winN}) / $gdt->{normCo_}->[$i++]);
}
@{$self->{co_cnt_}} = @aco_cnt_;
if(@{$gdt->{bedBtr}}){
my @abtr_cnt; # array background treatment counts.
$i = 0; # iterator for normalization constants.
foreach my $b(@{$gdt->{bedBtr}}) {
push @abtr_cnt, ($b->get_win_count($self->{chrom}, $self->{winN}) / $gdt->{normBtr}->[$i++]);
}
@{$self->{btr_cnt}} = @abtr_cnt;
}
if(@{$gdt->{bedBco}}){
my @abco_cnt; # array background treatment counts.
$i = 0; # iterator for normalization constants.
foreach my $b(@{$gdt->{bedBco}}) {
push @abco_cnt, ($b->get_win_count($self->{chrom}, $self->{winN}) / $gdt->{normBco}->[$i++]);
}
@{$self->{bco_cnt}} = @abco_cnt;
}
if(@{$self->{btr_cnt}} == 0){
@{$self->{btr_cnt}} = @{$self->{bco_cnt}};
}
if(@{$self->{bco_cnt}} == 0){
@{$self->{bco_cnt}} = @{$self->{btr_cnt}};
}
}
}
# Retrieve sum of raw read counts by current window number from SRBed objects.
sub retrieve_raw_sum{
my($self,$gdt) = @_;
$self->{rsumTr} = 0;
foreach my $b(@{$gdt->{bedTr}}) {
$self->{rsumTr} += $b->get_win_count($self->{chrom}, $self->{winN});
}
$self->{rsumCo} = 0;
foreach my $b(@{$gdt->{bedCo}}) {
$self->{rsumCo} += $b->get_win_count($self->{chrom}, $self->{winN});
}
}
# Determine whether current bin counts are above cutoff or not.
sub isAboveCutoff{
my($self,$gdt) = @_;
die "Empty SRBed vectors encountered in function: isAboveCutoff.\n"
if @{$gdt->{bedTr}} == 0 or @{$gdt->{bedCo}} == 0;
my $tagTr = 1;
my $tagCo = 1;
if($gdt->{bkgEnr} and $gdt->{useBkg} > 0){ # use background as filter.
if(!$self->{retrieved}){ # retrieve new count if not done so.
$self->retrieve_new_cnt($gdt);
}
my $btr_m = mean(@{$self->{btr_cnt}});
my $bco_m = mean(@{$self->{bco_cnt}});
foreach my $c(@{$self->{tr_cnt_}}){
if(!$c or ($c and $btr_m and $c / $btr_m <= $gdt->{useBkg})){
$tagTr = 0;
last;
}
}
if(!$tagTr){
foreach my $c(@{$self->{co_cnt_}}){
if(!$c or ($c and $bco_m and $c / $bco_m <= $gdt->{useBkg})){
$tagCo = 0;
last;
}
}
}
}else{ # use raw count and equation as filter.
my $i = 0;
foreach my $b(@{$gdt->{bedTr}}){
my $rawcnt = $b->get_win_count($self->{chrom}, $self->{winN});
if(!$rawcnt or $rawcnt <= $gdt->{meanTr}[$i] + $gdt->{nsd}*$gdt->{stdTr}[$i]){
$tagTr = 0;
last;
}
$i++;
}
if(!$tagTr){
$i = 0;
foreach my $b(@{$gdt->{bedCo}}){
my $rawcnt = $b->get_win_count($self->{chrom}, $self->{winN});
if(!$rawcnt or $rawcnt <= $gdt->{meanCo}[$i] + $gdt->{nsd}*$gdt->{stdCo}[$i]){
$tagCo = 0;
last;
}
$i++;
}
}
}
return ($tagTr || $tagCo);
}
# Calculate statistical scores based on current window counts.
sub calc_stat{
my($self,$gdt,$meth,$eps) = @_;
#return $self->{pval} if defined $self->{pval}; # no duplicated calculation please.
return 1 if !defined($self->{chrom}); # do nothing if the region is not defined.
if(!$self->{retrieved}){ # retrieve new count if not done so.
$self->retrieve_new_cnt($gdt);
}
# Check whether counts and sums are ready for stat test.
die "Empty count vectors encountered when doing stat test.\n"
if @{$self->{tr_cnt}} == 0 or @{$self->{co_cnt}} == 0;
die "Undefined raw sums for treatment or control when doing stat test.\n"
if !defined($self->{rsumTr}) or !defined($self->{rsumCo});
# Noramlized mean count for each group.
my $tr_m = mean(@{$self->{tr_cnt}});
my $co_m = mean(@{$self->{co_cnt}});
# Special case: zero count for both groups.
if($tr_m == 0 and $co_m == 0) {
$self->{logFC} = 0;
$self->{dirn} = 'Nons';
$self->{pval} = 1;
}
else {
if($co_m == 0) {$self->{logFC} = INFINITE;}
else {$self->{logFC} = log2($tr_m/$co_m);}
$self->{dirn} = $self->{logFC} > 0? 'Up' : 'Down';
# Perform statistical tests to find P-value.
if($meth eq 'tt'){ # T-test.
my $ttest = new Statistics::TTest;
$ttest->load_data(\@{$self->{tr_cnt}}, \@{$self->{co_cnt}});
$self->{pval} = $ttest->t_prob;
}
elsif($meth eq 'nb'){ # Negative Binomial test.
my $tr_var = var(@{$self->{tr_cnt}});
my $co_var = var(@{$self->{co_cnt}});
my $tr_raw_m = raw_sum_mean($tr_m,$co_m,$gdt->{normTr});
my $co_raw_m = raw_sum_mean($tr_m,$co_m,$gdt->{normCo});
my $tr_raw_v = raw_sum_var($tr_var,$tr_m,$co_m,$gdt->{normTr},$tr_raw_m,$eps);
my $co_raw_v = raw_sum_var($co_var,$tr_m,$co_m,$gdt->{normCo},$co_raw_m,$eps);
$self->{pval} = nb_pval($self->{rsumTr},$self->{rsumCo},$tr_raw_m,$tr_raw_v,$co_raw_m,$co_raw_v,$eps);
}
elsif($meth eq 'gt' or $meth eq 'cs'){
my $tr_sum = sum(@{$self->{tr_cnt}});
my $co_sum = sum(@{$self->{co_cnt}});
my $a_cnt = [$tr_sum, $co_sum]; # ref to array of normalized counts.
my $tr_rep = scalar @{$self->{tr_cnt}};
my $co_rep = scalar @{$self->{co_cnt}};
my $tot_rep = $tr_rep + $co_rep;
my $a_p = [$tr_rep/$tot_rep, $co_rep/$tot_rep]; # ref to array of probabilities.
my @test_res; # result vector: statistic, DOF, p-value.
if($meth eq 'gt'){
@test_res = gtest_gof($a_cnt, $a_p);
}else{
@test_res = chisqtest_gof($a_cnt, $a_p);
}
$self->{pval} = $test_res[2];
}
else{
die "Undefined statistical test! Halt execution.\n";
}
}
return $self->{pval};
}
# Subroutines for NB test. Copied from Common.pm. Written by Ying Jin.
# NOTES: In the original diffPermute program, norm constants were multiplied to
# raw counts to get normalized values. This only saves ignorable execution time
# but causes confusion. They are now reversed, so do the following subroutines.
sub raw_sum_mean{
my ($m1,$m2,$norm_ref)=@_;
my $v = 0;
for my $i(@{$norm_ref}){
$v += $i;
}
return $v*($m1+$m2)/2;
}
sub raw_sum_var{
my ($base_var,$m1,$m2,$norm,$raw_mean,$eps)=@_;
my $base_mean = ($m1+$m2)/2;
my $z =0;
my $s = 0;
for my $i(@{$norm}){
$z += 1/$i;
$s += $i**2;
}
$z = $z/@{$norm};
my $var = $base_var - $z * $base_mean;
if($var < $eps*$base_mean){
$var = $eps*$base_mean;
}
return $var*$s + $raw_mean;
}
###################################
# Class for managing a list of significant regions: add, adjust P-values, output.
package diffReps::RegionList;
use strict;
use MyBioinfo::Common;
use diffReps::DiffRes qw( cmpsite );
# Constructor.
sub new{
my $class = shift;
my $self = {
regList => [], # region list.
adjusted => 0 # bool tag for P-value adjustment.
};
bless $self, $class;
return $self;
}
# Add a significant region into list if the P-value is defined.
sub add{
my($self,$rreg) = @_;
if(defined $rreg->{pval}){
push @{$self->{regList}}, $rreg->dcopy();
}
}
# Add a significant region into list if the P-value is defined.
# Delete the original region.
sub add_del{
my($self,$rreg) = @_;
if(defined $rreg->{pval}){
push @{$self->{regList}}, $rreg->dcopy();
%{$rreg} = %{new diffReps::ChromaModSite};
}
}
# Append another region list to the end.
sub append_list{
my($self,$rl) = @_;
if(@{$rl->{'regList'}}){
push @{$self->{'regList'}}, @{$rl->{'regList'}};
$self->{'adjusted'} = 0;
}
}
# Sort all differential sites according to genomic coordinates.
sub sort{
my $self = shift;
@{$self->{regList}} = sort cmpsite @{$self->{regList}};
}
# Adjust P-values for all regions in the list.
sub adjPval{
my $self = shift;
return if $self->{adjusted};
my $N; # the 'N' in BH formula.
if(@_ > 0) {$N = shift;}
else {$N = @{$self->{regList}};}
# Extract all P-values from the list and feed them to an adjustment procedure.
my(@p1, @p2); # pair of region and window P-values.
foreach my $r(@{$self->{regList}}) {
push @p1, $r->{pval};
push @p2, $r->{winP};
}
my @q1 = padjBH(\@p1, $N);
my @q2 = padjBH(\@p2, $N);
my $i = 0; # iterator for adjusted P-value vector.
foreach my $r(@{$self->{regList}}) {
$r->{padj} = $q1[$i];
$r->{winQ} = $q2[$i++];
}
# Set bool tag.
$self->{adjusted} = 1;
}
# Print header line to file handle.
sub gen_header{
my($self,$hrep) = @_;
# Print header line.
print $hrep "Chrom\tStart\tEnd\tLength\tTreatment.cnt\tControl.cnt\tTreatment.avg\tControl.avg\tTreatment.enr\tControl.enr\tEvent\tlog2FC\tpval\tpadj\twinSta\twinEnd\twinFC\twinP\twinQ\n";
}
# Output all formatted regions.
sub output{
my($self,$gdt,$h) = @_;
if(!$self->{adjusted}) {$self->adjPval();}
foreach my $r(@{$self->{regList}}) {$r->print_reg($gdt,$h);}
}
# Preloaded methods go here.
# Autoload methods go after =cut, and are processed by the autosplit program.
1;
__END__
# Below is stub documentation for your module. You'd better edit it!
=head1 NAME
diffReps::SlideWindow - class to deal with a sliding window.
diffReps::ChromaModSite - class to deal with chromatin modification site, inherited from SlideWindow.
diffReps::RegionList - class for a list of modification regions.
=head1 SYNOPSIS
my $slideWindow = new diffReps::SlideWindow;
my $site = new diffReps::ChromaModSite;
my $regList = new diffReps::RegionList;
=head1 DESCRIPTION
In order to perform differential analysis for ChIP-seq data, we need to define classes and methods
that can retrieve short read counts for a designated window; perform statistics and book-keep important
information.
=head2 EXPORT
None. This is OO designed.
=head1 SEE ALSO
diffReps::DiffRes
Mailing list: https://groups.google.com/forum/#!forum/diffreps-discuss
Web site: https://code.google.com/p/diffreps/
=head1 AUTHOR
Li Shen, E<lt>shenli.sam@gmail.comE<gt>
=head1 COPYRIGHT AND LICENSE
Copyright (C) 2010-2013 by Li Shen
diffReps goes under GNU GPL v3: http://www.gnu.org/licenses/gpl.html
=cut