bin/bc_convert_files - metacpan.org


            
              1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
—
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
              #! /usr/bin/env perl
# BioPerl script bc_convert_files
#
# Please direct questions and support issues to <bioperl-l@bioperl.org>
#
# Copyright 2011-2014 Florent Angly <florent.angly@gmail.com>
#
# You may distribute this module under the same terms as perl itself
use strict;
use warnings;
use Method::Signatures;
use Bio::Community::IO;
use Bio::Community::Meta;
use Getopt::Euclid qw(:minimal_keys);
=head1 NAME
bc_convert_files - Merge/split community files and convert between formats
=head1 SYNOPSIS
  # Format conversion
  bc_convert_files -input_files   my_communities.qiime     \
                   -output_format generic                  \
                   -output_prefix my_converted_communities
  # Merging communities
  bc_convert_files -input_files   some_communities.generic other_communities.generic \
                   -output_format generic                  \
                   -output_prefix my_converted_communities
=head1 DESCRIPTION
This script reads files containing biological communities and converts them to
another format. It also several community files into a single one or splits a
single file into multiple files with a single community in each. Incidentally,
this scripts also removes communities with no members, or species with 0 counts
in all communities.
=head1 REQUIRED ARGUMENTS
=over
=item -if <input_files>... | -input_files <input_files>...
Input files containing the communities to convert. All files must have the same
format, which can be one of generic (tab-delimited table), biom, qiime, gaas or
unifrac. See L<Bio::Community::IO> for more information on these format. Take
note of the <member_identifier> option if you provide multiple input files.
=for Euclid:
   input_files.type: readable
=back
=head1 OPTIONAL ARGUMENTS
=over
=item -op <output_prefix> | -output_prefix <output_prefix>
Path and prefix for the output files. Several output files will be created if
the requested output format can only hold a single community. Default: output_prefix.default
=for Euclid:
   output_prefix.type: string
   output_prefix.default: 'bc_convert_files'
=item -of <output_format> | -output_format <output_format>
File format to use for writing the output communities, e.g. generic (tab-delimited
table), qiime, gaas or unifrac. Default: same as input format
=for Euclid:
   output_format.type: string
=item -mi <member_identifier> | -member_identifier <member_identifier>
When putting communities from different files into a single file, two methods
can be be used to decide if members of different communities are the same or not:
'id' or 'desc'. By default, the 'id' method is assumed: members with the same ID
are considered the same. However, this may not always be true, e.g. when reading
files generated by different programs. In this case, you can decide that members
that have the same description are the same using the 'desc' method.
Default: member_identifier.default
=for Euclid:
   member_identifier.type: string, member_identifier eq 'id' || member_identifier eq 'desc'
   member_identifier.type.error:  <member_identifier> should be 'id' or 'desc', not member_identifier
   member_identifier.default: 'id'
=item -sc <split_communities> | -split_communities <split_communities>
Split the input file(s) and generate one output file per community (1: on, 0: off).
Default: split_communities.default
=for Euclid:
   split_communities.type: integer, split_communities == 0 || split_communities == 1
   split_communities.type.error:  <split_communities> should be 0 or 1, not split_communities
   split_communities.default: 0
=back
=head1 FEEDBACK
=head2 Mailing Lists
User feedback is an integral part of the evolution of this
and other Bioperl modules. Send your comments and suggestions preferably
to one of the Bioperl mailing lists.
Your participation is much appreciated.
  bioperl-l@bioperl.org                  - General discussion
  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
=head2 Support 
Please direct usage questions or support issues to the mailing list:
I<bioperl-l@bioperl.org>
rather than to the module maintainer directly. Many experienced and 
reponsive experts will be able look at the problem and quickly 
address it. Please include a thorough description of the problem 
with code and data examples if at all possible.
=head2 Reporting Bugs
Report bugs to the Bioperl bug tracking system to help us keep track
the bugs and their resolution.  Bug reports can be submitted via the
web:
  http://bugzilla.open-bio.org/
=head1 AUTHOR - Florent Angly
Email florent.angly@gmail.com
=cut
convert( $ARGV{'input_files'}, $ARGV{'output_prefix'}, $ARGV{'output_format'},
         $ARGV{'member_identifier'}, $ARGV{'split_communities'} );
exit;
func convert ($input_files, $output_prefix, $output_format, $member_identifier,
   $split_communities) {
   # Read input communities
   my $i = 0;
   my $meta = Bio::Community::Meta->new( -identify_members_by => $member_identifier );
   for my $input_file (@$input_files) {
      print "Reading file '$input_file'\n";
      my $in = Bio::Community::IO->new( -file => $input_file );
      if (not defined $output_format) {
         $output_format = $in->format;
      }
      while (my $community = $in->next_community) {
         if ($community->name eq '') {
            $i++;
            $community->name($i);
         }
         $meta->add_communities([$community]);
      }
      $in->close;
   }
   # Write output communities
   my $multiple_communities = Bio::Community::IO->new(-format=>$output_format)->multiple_communities;
   my $num = 0;
   my $out;
   my $output_file;
   my $num_communities = $meta->get_communities_count;
   while (my $community = $meta->next_community) {
      if (not defined $out) {
         if ( (($split_communities == 0) && $multiple_communities) || ($num_communities <= 1) ) {
            $output_file = $output_prefix.'.'.$output_format;
         } else {
            $num++;
            $output_file = $output_prefix.'_'.$num.'.'.$output_format;
         }
         $out = Bio::Community::IO->new(
            -format => $output_format,
            -file   => '>'.$output_file,
         );
      }
      print "Writing community '".$community->name."' to file '$output_file'\n";
      $out->write_community($community);
      if ( (not $multiple_communities) || ($split_communities == 1) ) {
         $out->close;
         $out = undef;
      }
   }
   if (defined $out) {
      $out->close;
   }
   return 1;
}
	Global
`s`	Focus search bar
`?`	Bring up this help dialog
	GitHub
`g` `p`	Go to pull requests
`g` `i`	go to github issues (only if github is preferred repository)
	POD
`g` `a`	Go to author
`g` `c`	Go to changes
`g` `i`	Go to issues
`g` `d`	Go to dist
`g` `r`	Go to repository/SCM
`g` `s`	Go to source
`g` `b`	Go to file browse
	Search terms
module: (e.g. module:Plugin)
distribution: (e.g. distribution:Dancer auth)
author: (e.g. author:SONGMU Redis)
version: (e.g. version:1.00)