—#!/usr/bin/env perl
# PODNAME: prune-ali.pl
# ABSTRACT: Prune sequences from ALI files based on id lists
use
autodie;
use
Smart::Comments;
use
Bio::MUST::Core;
my
$method
=
$ARGV_from_must
?
'load_lis'
:
'load'
;
for
my
$infile
(
@ARGV_infiles
) {
### Processing: $infile
my
$list
= IdList->
$method
(
$infile
);
$infile
=~ s/
$_
//xms
for
@ARGV_in_strip
;
my
$alifile
= change_suffix(
$infile
,
'.ali'
);
### Filtering sequences in: $alifile
my
$ali
= Ali->load(
$alifile
);
$ali
->dont_guess
if
$ARGV_noguessing
;
# optionally negate list
$list
=
$list
->negative_list(
$ali
)
if
$ARGV_negate_list
;
# apply list to Ali
my
$pruned_ali
=
$ARGV_reorder
?
$list
->reordered_ali(
$ali
)
:
$list
->filtered_ali(
$ali
)
;
my
$outfile
= secure_outfile(
$alifile
,
$ARGV_out_suffix
);
### Output alignment in: $outfile
$pruned_ali
->store(
$outfile
);
}
__END__
=pod
=head1 NAME
prune-ali.pl - Prune sequences from ALI files based on id lists
=head1 VERSION
version 0.250200
=head1 USAGE
prune-ali.pl <infiles> [optional arguments]
=head1 REQUIRED ARGUMENTS
=over
=item <infiles>
Path to input IDL files [repeatable argument].
=for Euclid: infiles.type: readable
repeatable
=back
=head1 OPTIONAL ARGUMENTS
=over
=item --in[-strip]=<str>
Substring(s) to strip from infile basenames before attempting to derive other
infile (e.g., ALI files) and outfile names [default: none].
=for Euclid: str.type: string
repeatable
=item --out[-suffix]=<suffix>
Suffix to append to (possibly stripped) infile basenames for deriving
outfile names [default: none]. When not specified, outfile names are taken
from infiles but original infiles are preserved by being appended a .bak
suffix.
=for Euclid: suffix.type: string
=item --[no]guessing
[Don't] guess whether sequences are aligned or not [default: yes].
=item --from-must
Consider the input file as generated by ed/treeplot [default: no]. Currently,
this switches to the legacy .lis format (instead of the modern .idl format).
=item --negate-list
Interpret the list as a negative list instead of a positive list [default:
no]. This means that seqs corresponding to listed ids are discarded.
=item --reorder
Reorder sequences following list [default: no].
=item --version
=item --usage
=item --help
=item --man
Print the usual program information
=back
=head1 AUTHOR
Denis BAURAIN <denis.baurain@uliege.be>
=head1 COPYRIGHT AND LICENSE
This software is copyright (c) 2013 by University of Liege / Unit of Eukaryotic Phylogenomics / Denis BAURAIN.
This is free software; you can redistribute it and/or modify it under
the same terms as the Perl 5 programming language system itself.
=cut