=head1 NAME

Bio::Polloc::Rule::repeat - A rule of type repeat

=head1 AUTHOR - Luis M. Rodriguez-R

Email lmrodriguezr at gmail dot com

=cut

package Bio::Polloc::Rule::repeat;
use base qw(Bio::Polloc::RuleI);
use strict;
use Bio::Polloc::Polloc::IO;
use Bio::Polloc::LocusI;
use Bio::SeqIO;
our $VERSION = $Bio::Polloc::Polloc::Root::VERSION;

=head1 APPENDIX

Methods provided by the package

=head2 new

Generic initialization method

=cut

sub new {
   my($caller,@args) = @_;
   my $self = $caller->SUPER::new(@args);
   $self->_initialize(@args);
   return $self;
}

=head2 execute

This is where magic happens.  Translates the parameters of the object into a call to
B<mreps>, and scans the sequence for repeats.

=head2 Arguments

The sequence (C<-seq>) as a L<Bio::Seq> or a L<Bio::SeqIO> object

=head3 Returns

An array reference populated with L<Bio::Polloc::Locus::repeat> objects

=cut

sub execute {
   my($self,@args) = @_;
   my($seq) = $self->_rearrange([qw(SEQ)], @args);
   
   $self->throw("You must provide a sequence to evaluate the rule", $seq) unless $seq;
   
   # For Bio::SeqIO objects
   if($seq->isa('Bio::SeqIO')){
      my @feats = ();
      while(my $s = $seq->next_seq){
         push(@feats, @{$self->execute(-seq=>$s)})
      }
      return wantarray ? @feats : \@feats;
   }
   
   $self->throw("Illegal class of sequence '".ref($seq)."'", $seq) unless $seq->isa('Bio::Seq');

   # Include safe_value parameters
   my $cmd_vars = {};
   for my $p ( qw(RES MINSIZE MAXSIZE MINPERIOD MAXPERIOD EXP ALLOWSMALL WIN) ){
      my $tv = $self->_search_value($p);
      $cmd_vars->{"-" . lc $p} = $tv if defined $tv;
   }
   my $minsim = ($self->_search_value("minsim") || 0)+0;
   my $maxsim = ($self->_search_value("maxsim") || 0)+0;
   $maxsim ||= 100;

   # Create the IO master
   my $io = Bio::Polloc::Polloc::IO->new();

   # Search for mreps
   $self->source('mreps');
   my $mreps = $self->_executable(defined $self->ruleset ? $self->ruleset->value("path") : undef)
   	or $self->throw("Could not find the mreps binary");
   
   # Write the sequence
   my($seq_fh, $seq_file) = $io->tempfile;
   close $seq_fh;
   my $seqO = Bio::SeqIO->new(-file=>">$seq_file", -format=>'Fasta');
   $seqO->write_seq($seq);
   
   # Run it
   my @run = ($mreps);
   push @run, %{$cmd_vars};
   push @run, "-fasta", $seq_file;
   push @run, "2>&1";
   push @run, "|";
   $self->debug("Running: ".join(" ",@run));
   my $run = Bio::Polloc::Polloc::IO->new(-file=>join(" ",@run));
   my $ontable = 0;
   my @feats = ();
   while(my $line = $run->_readline){
      if($line =~ m/^ -----------------------------/){
         $ontable = !$ontable;
      }elsif($ontable){
	 chomp $line;
	 #  from   ->       to  :         size    <per.>  [exp.]          err-rate       sequence
	 $line =~ m/^\s+(\d+)\s+->\s+(\d+)\s+:\s+(\d+)\s+<(\d+)>\s+\[([\d\.]+)\]\s+([\d\.]+)\s+([\w\s]+)$/
		or $self->throw("Unexpected line $.",$line,"Bio::Polloc::Polloc::ParsingException");
	 my $score = 100 - $6*100;
	 next if $score > $maxsim or $score < $minsim;
	 my $id = $self->_next_child_id;
	 my $cons = $self->_calculate_consensus($7);
	 push @feats, Bio::Polloc::LocusI->new(
	 		-type=>$self->type, -rule=>$self, -seq=>$seq,
			-from=>$1+0, -to=>$2+0, -strand=>"+",
			-name=>$self->name,
			-id=>(defined $id ? $id : ""),
			-period=>$4+0, -exponent=>$5+0,
			-error=>$6*100,
			-score=>$score,
			-consensus=>$cons,
			-repeats=>$7);
      }
   }
   $run->close();
   return wantarray ? @feats : \@feats;
}

=head2 stringify_value

Produces a readable string containing the value of the rule.

=cut

sub stringify_value {
   my ($self,@args) = @_;
   my $out = "";
   for my $k (keys %{$self->value}){
      $out.= "$k=>".(defined $self->value->{$k} ? $self->value->{$k} : "")." ";
   }
   return $out;
}

=head2 value

Implements the C<_qualify_value()> from the L<Bio::Polloc::RuleI> interface

=head3 Arguments

Value (str or ref-to-hash or ref-to-array).  The supported keys are:

=over

=item -res I<float>

Resolution (allowed error)

=item -minsize I<int>

Minimum size of the repeat

=item -maxsize I<int>

Maximum size of the repeat

=item -minperiod I<float>

Minimum period of the repeat

=item -maxperiod I<float>

Maximum period of the repeat

=item -exp I<float>

Minimum exponent (number of repeats)

=item -allowsmall I<bool (int)>

If true, allows spurious results

=item -win I<float>

Process by sliding windows of size C<2*n> overlaping by C<n>

=item -minsim I<float>

Minimum similarity percent

=item -maxsim I<float>

Maximum similarity percent

=back

=head3 Return

Value (I<hashref> or C<undef>).

=head1 INTERNAL METHODS

Methods intended to be used only within the scope of Bio::Polloc::*

=head2 _calculate_consensus

Attempts to calculate the consensus of the repeat units.

=head3 Arguments

The repetitive sequence with repeat units separated by spaces (I<str>).

=head3 Returns

The consensus sequence (I<str>)

=cut

sub _calculate_consensus {
   my($self,$seq) = @_;
   return unless $seq;
   my $io = Bio::Polloc::Polloc::IO->new();
   my $emma = $io->exists_exe("emma");
   my $cons = $io->exists_exe("cons");
   return "no-emma" unless $emma;
   return "no-cons" unless $cons;
   my ($outseq_fh, $outseq) = $io->tempfile;
   my $i=0;
   print $outseq_fh ">".(++$i)."\n$_\n" for split /\s+/, $seq;
   close $outseq_fh;
   return "err-seq" unless -s $outseq;
   my $outaln = "$outseq.aln";
   my $emmarun = Bio::Polloc::Polloc::IO->new(-file=>"$emma '$outseq' '$outaln' '/dev/null' -auto >/dev/null |");
   while($emmarun->_readline){ print STDERR $_ }
   $emmarun->close();
   unless(-s $outaln){
      unlink $outaln if -e $outaln;
      return "err-aln";
   }
   my $consout = "";
   my $consrun = Bio::Polloc::Polloc::IO->new(-file=>"$cons '$outaln' stdout -auto |");
   while(my $ln = $consrun->_readline){
      chomp $ln;
      next if $ln =~ /^>/;
      $consout .= $ln;
   }
   unlink $outaln;
   $consrun->close();
   $io->close();
   return $consout;
}

=head2 _parameters

=cut

sub _parameters {
   return [qw(RES MINSIZE MAXSIZE MINPERIOD MAXPERIOD EXP ALLOWSMALL WIN MINSIM MAXSIM)];
}

=head2 _executable

Attempts to get the mreps executable.

=cut

sub _executable {
   my($self, $path) = @_;
   my $name = 'mreps';
   my $io = "Bio::Polloc::Polloc::IO";
   $self->debug("Searching the $name binary for $^O");
   # Note the darwin support.  This is because darwin is unable to execute
   # the linux binary (despite its origin)
   my $bin =	$^O =~ /(macos|darwin)/i ? "mreps.macosx.bin" :
   		$^O =~ /mswin/i ? "mreps.exe" :
		"mreps.linux.bin";
   my @where = ('');
   unshift @where, $path if $path;
   for my $p (@where){
   	for my $n (($bin, $name, "$name.bin")){
		my $exe = $io->exists_exe($p . $n);
		return $exe if $exe;
	}
   }
}

=head2 _initialize

=cut

sub _initialize {
   my($self,@args) = @_;
   $self->type('repeat');
}

1;