lib/File/Process/Utils.pm


            
              1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
—
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
              package File::Process::Utils;
use strict;
use warnings;
use Carp;
use Text::CSV_XS;
use Data::Dumper;
use ReadonlyX;
use Scalar::Util qw(reftype);
Readonly our $SUCCESS => 1;
Readonly our $FAILURE => 0;
Readonly our $TRUE    => 1;
Readonly our $FALSE   => 0;
Readonly our $EMPTY => q{};
Readonly our $NL    => "\n";
Readonly our $TAB   => "\t";
Readonly our $PIPE  => q{|};
Readonly our $COMMA => q{,};
use parent qw(Exporter);
our @EXPORT_OK = qw(
  $COMMA
  $EMPTY
  $FAILURE
  $FALSE
  $NL
  $PIPE
  $SUCCESS
  $TAB
  $TRUE
  is_array
  is_hash
  process_csv
);
our %EXPORT_TAGS = (
  'booleans' => [qw($TRUE $FALSE $SUCCESS $FAILURE is_array is_hash)],
  'chars'    => [qw($NL $EMPTY $PIPE $TAB $COMMA)],
  'all'      => \@EXPORT_OK,
);
our $VERSION = '0.12';
########################################################################
sub _is_array { push @_, 'ARRAY'; goto &_is_type; }
sub _is_hash  { push @_, 'HASH';  goto &_is_type; }
sub is_code   { push @_, 'CODE';  goto &_is_type; }
########################################################################
########################################################################
sub is_hash {  ## no critic (RequireArgUnpacking)
########################################################################
  my $result = _is_hash( $_[0] );
  return
    if !$result;
  return wantarray ? %{ ref $_[0] ? $_[0] : {} } : $result;
}
########################################################################
sub is_array {  ## no critic (RequireArgUnpacking)
########################################################################
  my $result = _is_array( $_[0] );
  return
    if !$result;
  return wantarray ? @{ ref $_[0] ? $_[0] : [] } : $result;
}
########################################################################
sub _is_type { return ref $_[0] && reftype( $_[0] ) eq $_[1]; }
########################################################################
########################################################################
sub process_csv {
########################################################################
  my ( $file, %options ) = @_;
  require File::Process;
  my $csv_options = $options{csv_options} // {};
  my $csv = Text::CSV_XS->new($csv_options);
  $options{chomp} //= $TRUE;
  my ( $csv_lines, %info ) = File::Process::process_file(
    $file,
    csv => $csv,
    %options,
    pre => sub {
      my ( $file, $args ) = @_;
      my ( $fh, $all_lines ) = File::Process::pre( $file, $args );
      if ( $args->{'has_headers'} ) {
        my @column_names = $args->{csv}->getline($fh);
        $args->{csv}->column_names(@column_names);
      }
      return ( $fh, $all_lines );
    },
    next_line => sub {
      my ( $fh, $all_lines, $args ) = @_;
      return
           if defined $args->{max_rows}
        && @{$all_lines}
        && @{$all_lines} >= $args->{max_rows};
      my $ref;
      if ( $args->{has_headers} ) {
        $ref = $args->{csv}->getline_hr($fh);
        if ( my (%skips) = is_hash( $args->{skip_list} ) ) {
          for ( keys %skips ) {
            delete $ref->{$_};
          }
        }
      }
      else {
        $ref = $args->{csv}->getline($fh);
        return $ref
          if !$ref;
        if ( !$args->{keep_list} && is_array( $args->{skip_list} ) ) {
          my @keep_list = ( 0 .. $#{$ref} );
          for ( @{ $args->{skip_list} } ) {
            splice @keep_list, $_, 1;
          }
          $args->{keep_list} = \@keep_list;
        }
        if ( $args->{keep_list} ) {
          $ref = [ @{$ref}[ @{ $args->{keep_list} } ] ];
        }
      }
      my %row;
      my $column_keys = $args->{column_names};
      if ( is_array($column_keys) ) {
        if ( !@{$column_keys} ) {
          # generated extra column names as needed
          $column_keys = [ map {"col$_"} ( 0 .. $#{$ref} ) ];
          $args->{column_names} = $column_keys;
        }
      }
      if ($column_keys) {
        %row = map { $column_keys->[$_] => $ref->[$_] } ( 0 .. $#{$ref} );
        if ( my (%skips) = is_hash( $args->{skip_list} ) ) {
          for ( keys %skips ) {
            delete $row{$_};
          }
        }
      }
      # hooks?
      if ( my (@hooks) = is_array( $args->{hooks} ) ) {
        for my $col ( 0 .. $#{$ref} ) {
          is_code $hooks[$col];
          next if !is_code $hooks[$col];
          $ref->[$col] = $hooks[$col]->( $ref->[$col] );
        }
      }
      elsif ( my (%hooks) = is_hash( $args->{hooks} ) ) {
        croak "you just define column_names when 'hooks' is a hash\n"
          if !@{$column_keys};
        for my $column_name ( @{$column_keys} ) {
          next if !is_code $hooks{$column_name};
          $row{$column_name}
            = $hooks{$column_name}->( $row{$column_name} );
        }
      }
      return $column_keys ? \%row : $ref;
    }
  );
  return ( $csv_lines, %info );
}
1;
__END__
## no critic (RequirePodSections)
__END__
=pod
=head1 NAME
File::Process::Utils - commonly used recipes for File::Process
=head1 SYNOPSIS
 use File::Process::Utils qw(process_csv);
 my $obj = process_csv('foo.csv', has_headers => 1);
=head1 DESCRIPTION
Set of utilities that represent some common use cases for L<File::Process>.
=head1 METHODS AND SUBROUTINES
=head2 process_csv
 process_csv(file, options)
Reads a CSV files using L<Text::CSV_XS> and returns an array of hashes
or an array or arrays.
Example:
 my $obj = process_file(
   'foo.csv',
   has_header  => 1,
   csv_options => { sep_char "\t" },
   );
=over
=item file
Filename or file handle of an open CSV file.
=item options
List of options described below.
=over 5
=item column_names
A list of column names that should be used as the CSV header. These
names will be in the keys for the hashes returned.
I<Note: By setting C<column_names> to an empty array, you can force
the return of an array of hashes instead of an array of arrays. The
keys will be set the strings C<col0>..C<col{n-1}>>.
=item csv_options
Hash of options that will be passed through to L<Text::CSV_XS>
=back
=item has_header
Boolean that indicates whether or not the first line of the CSV file
should be considred the column titles.  These will be used as the hash
keys. If C<has_header> is not true, then the first line is considered
data and included in the returned array.
Set C<column_names> to an array of strings that will be used as the
keys instead in lieu of having a header line. If you do not set
C<column_names> and C<has_header> is not true, an array of arrays will
be returned instead of an array of hashes.
=item hooks
An array or hash of subroutines that will be passed each element of a
row and should return a transformed value for that element.
If you pass a hash, keys should represent one of the column names you
passed in the C<columns> argument or one of the generated keys
(C<col{n}>).
If you pass an array, the array should contain a code reference in the
index of the array tha that corresponds to the index in the input you
wish to process.
  my %hooks = ( col1 => sub { uc shift } );
               
  my $obj = process_csv(
    'foo.csv',
    column_names => [],
    keep_open    => 1,
    csv_options  => { sep_char => "\t" },
    hooks        => \%hooks,
  );
Instead of using hooks, which operate at the column level, you could
define your own custom C<process()> method and pass that as an option
to C<process_csv()> as all options are passed through to
C<process_file()>..
  my $obj = process_csv(
    'foo.csv',
    column_names => [],
    keep_open    => 1,
    csv_options  => { sep_char => "\t" },
    process      => sub {
      my ( $fh, $lines, $args, $row ) = @_;
      $row->{col1} = uc $row->{col1};
      return $row;
    }
  );
=item keep_open
Boolean that indicates that the file should not be closed after all
records are read.
=item max_rows
Maximum number of rows to process. If undefined, then all lines of the
file will be processed.
=item skip_list
If column names are being used this is hash of keys that will deleted
from the returned hash list;
If column names are not being used, C<skip_list> is an array of
indexes that will be removed from the returned arrays.
 process_csv(
   'foo.csv',
   has_headers => 1,
   skip_list   => { ssn => 1 }
 );
=back
=head1 SEE ALSO
L<File::Process>, L<Text::ASCIITable::EasyTable>
=head1 AUTHOR
Rob Lauer - <rlauer6@comcast.net>
=cut
	Global
`s`	Focus search bar
`?`	Bring up this help dialog
	GitHub
`g` `p`	Go to pull requests
`g` `i`	go to github issues (only if github is preferred repository)
	POD
`g` `a`	Go to author
`g` `c`	Go to changes
`g` `i`	Go to issues
`g` `d`	Go to dist
`g` `r`	Go to repository/SCM
`g` `s`	Go to source
`g` `b`	Go to file browse
	Search terms
module: (e.g. module:Plugin)
distribution: (e.g. distribution:Dancer auth)
author: (e.g. author:SONGMU Redis)
version: (e.g. version:1.00)