#!/usr/bin/env perl
# PODNAME: prune-ali.pl
# ABSTRACT: Prune sequences from ALI files based on id lists
use Modern::Perl '2011';
use autodie;
use Getopt::Euclid qw(:vars);
use Bio::MUST::Core::Utils qw(change_suffix secure_outfile);
my $method = $ARGV_from_must ? 'load_lis' : 'load';
for my $infile (@ARGV_infiles) {
### Processing: $infile
my $list = IdList->$method($infile);
$infile =~ s/$_//xms for @ARGV_in_strip;
my $alifile = change_suffix($infile, '.ali');
### Filtering sequences in: $alifile
my $ali = Ali->load($alifile);
$ali->dont_guess if $ARGV_noguessing;
# optionally negate list
$list = $list->negative_list($ali) if $ARGV_negate_list;
# apply list to Ali
my $pruned_ali = $ARGV_reorder ? $list->reordered_ali($ali)
: $list->filtered_ali($ali)
;
my $outfile = secure_outfile($alifile, $ARGV_out_suffix);
### Output alignment in: $outfile
$pruned_ali->store($outfile);
}
__END__
=pod
=head1 NAME
prune-ali.pl - Prune sequences from ALI files based on id lists
=head1 VERSION
version 0.250200
=head1 USAGE
prune-ali.pl <infiles> [optional arguments]
=head1 REQUIRED ARGUMENTS
=over
=item <infiles>
Path to input IDL files [repeatable argument].
=for Euclid: infiles.type: readable
repeatable
=back
=head1 OPTIONAL ARGUMENTS
=over
=item --in[-strip]=<str>
Substring(s) to strip from infile basenames before attempting to derive other
infile (e.g., ALI files) and outfile names [default: none].
=for Euclid: str.type: string
repeatable
=item --out[-suffix]=<suffix>
Suffix to append to (possibly stripped) infile basenames for deriving
outfile names [default: none]. When not specified, outfile names are taken
from infiles but original infiles are preserved by being appended a .bak
suffix.
=for Euclid: suffix.type: string
=item --[no]guessing
[Don't] guess whether sequences are aligned or not [default: yes].
=item --from-must
Consider the input file as generated by ed/treeplot [default: no]. Currently,
this switches to the legacy .lis format (instead of the modern .idl format).
=item --negate-list
Interpret the list as a negative list instead of a positive list [default:
no]. This means that seqs corresponding to listed ids are discarded.
=item --reorder
Reorder sequences following list [default: no].
=item --version
=item --usage
=item --help
=item --man
Print the usual program information
=back
=head1 AUTHOR
Denis BAURAIN <denis.baurain@uliege.be>
=head1 COPYRIGHT AND LICENSE
This software is copyright (c) 2013 by University of Liege / Unit of Eukaryotic Phylogenomics / Denis BAURAIN.
This is free software; you can redistribute it and/or modify it under
the same terms as the Perl 5 programming language system itself.
=cut