#!/usr/bin/env perl
# ABSTRACT:
#=============================================================================
# STANDARD MODULES AND PRAGMAS
use 5.008; # Require at least Perl version 5.08
use strict; # Must declare all variables before using them
use warnings; # Emit helpful warnings
use autodie; # Fatal exceptions for common unrecoverable errors (e.g. open)
use Carp qw( croak ); # Throw errors from calling function
#=============================================================================
# ADDITIONAL MODULES
use Getopt::Long::Descriptive; # Parse @ARGV as command line flags and arguments
use lib 'lib';
#=============================================================================
# CONSTANTS
my $TRUE = 1;
my $FALSE = 0;
my $DEFAULT_ITERATIONS = 10;
my $DEFAULT_CONFIG = 'cluster.cfg';
my $AWK_CMD = '$1!~/^#/{print $2}';
my @REQUIRED_FLAGS = qw( cm fasta sto );
# CONSTANTS
#=============================================================================
#=============================================================================
# COMMAND LINE
# Run as a command-line program if not used as a module
main(@ARGV) if !caller();
sub main {
#-------------------------------------------------------------------------
# COMMAND LINE INTERFACE #
# #
my ( $opt, $usage ) = describe_options(
'%c %o <some-arg>',
[ 'cm=s', 'file name for the input covariance model (required)', ],
[ 'fasta=s', 'file name for the FASTA file (required)', ],
[ 'sto=s', 'file name for the Stockholm file (required)', ],
[ 'rounds=i', 'Rounds of covariance model searching (default=10)', ],
[ 'config=s', 'Configuration file (default="cluster.cfg")', ],
[],
[ 'help', 'print usage message and exit' ],
);
my $exit_with_usage = sub {
print "\nUSAGE:\n";
print $usage->text();
exit();
};
# If requested, give usage information regardless of other options
$exit_with_usage->() if $opt->help;
# Make some flags required
my $missing_required = $FALSE;
for my $flag (@REQUIRED_FLAGS) {
if ( !defined $opt->$flag ) {
print "Missing required option '$flag'\n";
$missing_required = $TRUE;
}
}
# Exit with usage statement if any required flags are missing
$exit_with_usage->() if $missing_required;
# #
# COMMAND LINE INTERFACE #
#-------------------------------------------------------------------------
#-------------------------------------------------------------------------
# #
# #
my $config_file = $opt->config || $DEFAULT_CONFIG;
my $config = Bio::App::SELEX::RNAmotifAnalysis::get_config($config_file);
process(
{
cm => $opt->cm,
fasta => $opt->fasta,
sto => $opt->sto,
rounds => $opt->rounds || $DEFAULT_ITERATIONS,
config => $config,
},
);
return;
# #
# #
#-------------------------------------------------------------------------
}
# COMMAND LINE
#=============================================================================
#=============================================================================
#
sub process {
my ($arg_ref) = @_;
my $cm = $arg_ref->{cm};
my $fasta = $arg_ref->{fasta};
my $sto = $arg_ref->{sto};
my $config = $arg_ref->{config};
my $rounds = $arg_ref->{rounds};
my $cmalign = get_full_path_for_executable('cmalign', $config);
my $cmbuild = get_full_path_for_executable('cmbuild', $config);
my $cmcalibrate = get_full_path_for_executable('cmcalibrate',$config);
my $cmsearch = get_full_path_for_executable('cmsearch', $config);
my $stock2fasta = $config->{executables}{stock2fasta};
die 'stock2fasta executable path not found in the configuration file' if ! defined $stock2fasta;
my $round = 1;
my $basename = $cm;
#Strip off extension
$basename =~ s/\. .* \z//gxms;
my %file = map { filenames( $basename, $_ ) } ( 1 .. $rounds + 1 );
print "#round$round\n";
print "$cmcalibrate $cm\n";
print "$cmsearch --toponly -E 0.1 --tabfile $file{1}{tab} $cm $fasta\n";
print "\n";
print "awk '$AWK_CMD' $file{1}{tab} > $file{1}{found}\n";
print "\n";
print "grep -w -f $file{1}{found} $sto > $file{2}{sto}\n";
print "$stock2fasta $file{2}{sto} > $file{2}{fasta}\n";
print "\n";
print "\n";
while ( $round < $rounds ) {
$round++;
my $next_round = $round + 1;
my $current = "${basename}_rnd$round";
my $current_aln_cm = "${current}_aln.cm";
my $current_tab = "$current.tab";
my $current_fasta = "$current.fasta";
my $current_found = "${current}_clusters_found.txt";
my $current_cmaligned_sto = "${current}_cmaligned.sto";
my $next = "${basename}_rnd$next_round";
my $next_sto = "$next.sto";
my $next_fasta = "$next.fasta";
print "#round$round\n";
print "$cmalign -o $current_cmaligned_sto $cm $current_fasta\n";
print "$cmbuild $current_aln_cm $current_cmaligned_sto\n";
print "$cmcalibrate $current_aln_cm\n";
print "$cmsearch --toponly -E 0.1 --tabfile $current_tab $current_aln_cm $fasta\n";
print "\n";
print "awk '$AWK_CMD' $current_tab > $current_found\n";
print "\n";
print "grep -w -f $current_found $sto > $next_sto\n";
print "$stock2fasta $next_sto > $next_fasta\n";
print "\n";
print "\n";
}
return;
}
sub get_full_path_for_executable {
my $exe = shift;
my $config = shift;
my $full_path = $config->{executables}->{$exe} || die "Path for executable '$exe' not found in configuration file";
return $full_path;
}
sub filenames {
my $basename = shift;
my $round = shift;
return (
$round => {
cm => "${basename}_rnd$round.cm",
tab => "${basename}_rnd$round.tab",
found => "${basename}_rnd${round}_clusters_found.txt",
sto => "${basename}_rnd$round.sto",
aln => "${basename}_rnd${round}_aln.cm",
fasta => "${basename}_rnd${round}.fasta",
},
);
}
#
#=============================================================================
1;
#Documentation-----------------------------------------------------------------------------
=pod
=head1 SYNOPSIS
covarianceSearchScripts --cm rna.cm --fasta rna.fasta --sto rna.sto [--rounds 10] [--config cluster.cfg]
=head1 DESCRIPTION
=head1 SUBROUTINES/METHODS
=head1 DIAGNOSTICS
=head1 CONFIGURATION AND ENVIRONMENT
=head1 DEPENDENCIES
=head1 INCOMPATIBILITIES
None known
=head1 BUGS AND LIMITATIONS
There are no known bugs in this module.
Please report problems to molecules <at> cpan <dot> org.
Patches are welcome.
=head1 SEE ALSO
=head1 ACKNOWLEDGEMENTS