package CSVAWK; # git description: 0.0.1-2-g2eeca26

use strict;
use warnings;

use autodie;
use charnames qw(:full);
use English qw(-no_match_vars);
use File::Basename;
use File::Temp qw(tempdir tempfile);
use Readonly;
use Text::CSV_XS;

use base 'Exporter';
our @EXPORT_OK = qw(csvawk);

our $VERSION = '0.1';

Readonly my $HIDE_FS                  => "\N{INFORMATION SEPARATOR ONE}";
Readonly my $HIDE_RS                  => "\N{INFORMATION SEPARATOR TWO}";
Readonly my %SWITCHES_WITH_PARAMETERS => map { $_ => 1 } qw(
  -f --file
  -F --field-separator
  -v --assign
  -m
  -e --source
  -E --exec
  -i --include
  -l --load
  -W
);
Readonly my $IS_PROGRAM_SWITCH => qr/^-[ef]/mxs;

sub convert_to_identifier {
    my ($str) = @_;
    $str =~ s/\W+/_/mxsg;
    if ( $str !~ m/^[[:alpha:]_]/mxs ) {
        $str = "_$str";
    }
    return $str;
}

sub get_csv_parser {
    my $csv = Text::CSV_XS->new(
        {
            binary    => 1,
            auto_diag => 1,
            eol       => "\n",
        }
    );

    return $csv;
}

sub hide_separators {
    my ($str) = @_;
    $str =~ s/,/$HIDE_FS/mxsg;
    $str =~ s/\n/$HIDE_RS/mxsg;
    return $str;
}

sub restore_separators {
    my ($str) = @_;
    $str =~ s/$HIDE_FS/,/mxsg;
    $str =~ s/$HIDE_RS/\n/mxsg;
    return $str;
}

sub split_arguments {
    my (@args) = @_;
    my ( @files, $has_program_switch );

  ARGUMENT: for my $arg ( reverse @args ) {
        if ( $arg =~ m/^-/mxs ) {
            if ( exists $SWITCHES_WITH_PARAMETERS{$arg} ) {
                pop @files;
            }
            last ARGUMENT;
        }
        push @files, $arg;
    }

    my @other_args = @args[ 0 .. $#args - $#files - 1 ];
  OTHER_ARGUMENT: for my $arg (@other_args) {
        if ( $arg =~ $IS_PROGRAM_SWITCH ) {
            $has_program_switch = 1;
            last OTHER_ARGUMENT;
        }
    }
    if ( !$has_program_switch ) {
        push @other_args, '-e', pop @files;
    }
    return \@other_args, [ reverse @files ];
}

sub get_variables {
    my ($files) = @_;

    my %results;
    my $csv = get_csv_parser();

    for my $file ( @{$files} ) {
        open my $fh, '<', $file;
        my $headers = $csv->getline($fh);
        $results{$file} = [ map { convert_to_identifier($_) } @{$headers} ];
        close $fh;
    }

    return \%results;
}

sub quote_files {
    my ($in_files) = @_;

    my %file_map;
    my $dir = tempdir();
    my $csv = get_csv_parser();

    for my $in_file ( @{$in_files} ) {
        my ( $out, $out_file ) =
          tempfile( basename($in_file) . '.XXXXXXXX', DIR => $dir );
        $file_map{$in_file} = $out_file;
        open my $in, '<', $in_file;
        while ( my $row = $csv->getline($in) ) {
            for my $field ( @{$row} ) {
                $field = hide_separators($field);
            }
            $csv->print( $out, $row );
        }

        close $in;
        close $out;
    }

    return \%file_map;
}

sub build_library {
    my ( $files, $file_map, $variables ) = @_;
    my ( $fh, $filename ) = tempfile( SUFFIX => '.awk' );

    print { *{$fh} } <<'END_AWK';
BEGIN {
    FS = ","
    OFS = ","
}
FNR == 1 {
END_AWK

    for my $file ( @{$files} ) {
        my $tempfile = $file_map->{$file};
        print { *{$fh} } qq(  if (FILENAME == "$tempfile") {\n);
        my $i = 1;
        for my $variable ( @{ $variables->{$file} } ) {
            print { *{$fh} } "    $variable = $i\n";
            $i++;
        }
        print { *{$fh} } "  }\n";
    }
    print { *{$fh} } "}\n";
    close $fh;

    return $filename;
}

sub csvawk {
    my (@args) = @_;
    my $dirname = dirname(__FILE__);
    my ( $other_args, $files ) = split_arguments(@args);
    my $file_map  = quote_files($files);
    my $variables = get_variables($files);
    my $library   = build_library( $files, $file_map, $variables );

    #<<<
    my @command = (
        'awk',
        '-f',
        $library,
        @{$other_args},
        map { $file_map->{$_} } @{$files},
    );
    #>>>

    open my $output, q(-|), @command;
    while ( my $row = <$output> ) {
        print restore_separators($row);
    }
    close $output;

    return 0;
}

1;

__END__

=pod

=head1 NAME

CSVAWK - Pass CSV files to AWK.


=head1 SYNOPSIS

Given a CSV file that can't be parsed naively

  a,b,"c,d",e
  1,2,3,"4
  5"
  6,7,8,9

the command

  csvawk '$a == 1 { print $b, $c_d }' quux.csv

will return

  2,3


=head1 DESCRIPTION

CSVAWK allows processing CSV files to AWK via a (relatively) thin Perl wrapper.


=head1 AUTHOR

Bryan McKelvey <bryan.mckelvey@gmail.com>


=head1 COPYRIGHT

Copyright (c) 2017 Bryan McKelvey.  All rights reserved.

This program is free software; you can redistribute it and/or modify
it under the same terms as Perl itself.  The full text of this license
can be found in the LICENSE file included with this module

=cut