#!/pro/bin/perl
use
5.014000;
our
$VERSION
=
"1.23 - 2023-11-14"
;
sub
usage {
my
$err
=
shift
and
select
STDERR;
print
<<"EOU";
usage: csv2xlsx [options] [-o <xlsx>] [file.csv]
csv2xlsx --help | --man | --info
-s <sep> use <sep> as separator char, auto-detect, default = ','
The string "tab" is allowed.
-e <esc> use <esc> as escape char, auto-detect, default = '"'
The string "undef" is allowed.
-q <quot> use <quot> as quotation char, default = '"'
The string "undef" will disable quotation.
-w <width> use <width> as default minimum column width default = 4
-o <xlsx> write output to file named <xlsx>, defaults
to input file name with .csv replaced with .xlsx
if from standard input, defaults to csv2xlsx.xlsx
-F allow formula's. Otherwise fields starting with
an equal sign are forced to string
--Fa=aaa Define formula action: none/die/croak/diag/empty/undef
--Ft Formula's will be stored as text (formula actions: none)
--Fd Formula's will cause a die
--Fc Formula's will cause a croak
--FD Formula's will cause a warning (this is the default)
--Fe Formula's will be replaced by the empty string
--Fu Formula's will be replaced with an undefined cell
-f force usage of <xlsx> if already exists (unlink before use)
-d <dtfmt> use <dtfmt> as date formats. Default = 'dd-mm-yyyy'
-C <C:fmt> use <fmt> as currency formats for currency <C>, no default
--font=F set default font (default Calibri)
--fs=N set font size (default 10)
'M' = 'mono', 'D' = 'DejaVu Sans', 'DM' = 'DejaVu Sans Mono'
-D cols only convert dates in columns <cols>.
Default is everywhere. -D0 is disable
-L N limit export to N rows
-u CSV is UTF8
--de Some CSV fields might be double-encoded. Try to fix that.
-m merge multiple CSV's into a single xlsx (separate sheets)
-o is required, all arguments should be existing files
--skip-empty Skip empty files on merge
-S <cp> Split CSV on COLUMNxPAT into separate sheets. See --man
or --info for options/features and examples. May repeat.
--sl=C Use column C as sheet label when splitting with -S
-v [<lvl>] verbosity (default = 1)
EOU
exit
$err
;
}
use
Getopt::Long
qw(:config bundling passthrough noignorecase )
;
my
$quo
=
'"'
;
my
$esc
=
'"'
;
my
$wdt
= 4;
my
$fac
=
"diag"
;
my
$dtf
=
"dd-mm-yyyy"
;
my
$crf
=
""
;
my
$opt_v
= 1;
my
$dtc
;
GetOptions (
"V|version"
=>
sub
{
say
$0 =~ s{.*/}{}r,
" [$VERSION]"
;
exit
0; },
"help|?"
=>
sub
{ usage (0); },
"man"
=>
sub
{ pod_nroff (); },
"info"
=>
sub
{ pod_text (); },
"c|s|sep=s"
=> \
my
$sep
,
"q|quo=s"
=> \
$quo
,
"e|esc=s"
=> \
$esc
,
"w|width=i"
=> \
$wdt
,
"o|x|out=s"
=> \
my
$xls
,
"d|date-fmt=s"
=> \
$dtf
,
"D|date-col=s"
=> \
$dtc
,
"C|curr-fmt=s"
=> \
$crf
,
"font=s"
=> \
my
$font
,
"fs|font-size=i"
=> \
my
$font_size
,
"f|force!"
=> \
my
$frc
,
"F|formulas!"
=> \
my
$frm
,
"Fa=s"
=> \
$fac
,
"Ft"
=>
sub
{
$fac
=
"none"
; },
"Fd"
=>
sub
{
$fac
=
"die"
; },
"Fc"
=>
sub
{
$fac
=
"croak"
; },
"FD"
=>
sub
{
$fac
=
"diag"
; },
"Fe"
=>
sub
{
$fac
=
"empty"
; },
"Fu"
=>
sub
{
$fac
=
"undef"
; },
"u|utf-8|utf8!"
=> \
my
$utf
,
"de|fix-utf8!"
=> \
my
$dutf
,
"m|merge!"
=> \
my
$mrg
,
"skip-empty!"
=> \
my
$skip_empty
,
"S|split=s"
=> \
my
@split
,
"sl|split-label=s"
=> \
my
$split_sl
,
"L|row-limit=i"
=> \
my
$row_limit
,
"v|verbose:2"
=> \
$opt_v
,
) or usage (1);
sub
pod_text {
my
$m
=
$ENV
{NO_COLOR} ?
"Pod::Text"
:
"Pod::Text::Color"
;
my
$p
=
$m
->new ();
open
my
$fh
,
">"
, \
my
$out
or
die
"Cannot generate manual: $!\n"
;
$p
->parse_from_file ($0,
$fh
);
close
$fh
;
print
$out
;
exit
0;
}
sub
pod_nroff {
first { -x
"$_/nroff"
}
grep
{ -d }
split
m/:+/ =>
$ENV
{PATH} or pod_text ();
my
$p
= Pod::Man->new ();
open
my
$fh
,
"|-"
,
"nroff"
,
"-man"
or
die
"Cannot generate manual: $!\n"
;
$p
->parse_from_file ($0,
$fh
);
close
$fh
;
exit
0;
}
if
(
$mrg
) {
my
@csv
;
for
(
@ARGV
) {
if
(m/\.xlsx?$/i) {
$xls
and usage (1);
$xls
=
$_
;
next
;
}
if
(m/\.(?:csv|png|jpe?g|bmp|gif|tiff|xpm)$/i && -s) {
push
@csv
=>
$_
;
next
;
}
unless
(
$skip_empty
) {
warn
"Argument $_ is not an existing (CSV) file\n"
;
usage (1);
}
}
$xls
&&
@csv
or usage (1);
@ARGV
=
@csv
;
}
sub
col2col {
my
$l
=
shift
;
$l
=~ m/^[0-9]/ and
return
$l
;
my
$c
= 0;
while
(
$l
=~ s/^([A-Za-z])//) {
$c
= 26 *
$c
+ 1 +
ord
(
uc
$1) -
ord
(
"A"
);
}
$c
;
}
foreach
my
$split
(
@split
) {
my
(
$col
,
$operator
,
$pat
) = (
$split
=~ m{^
([0-9]+|[A-Z]+|[a-z]+)
([=/uU<>])
(.*)
\z}x) or usage (1);
my
$case
=
$col
=~ m/^[a-z]/ ? 1 : 0;
$split
= {
col
=> col2col (
$col
),
op
=>
$operator
,
str
=>
$pat
,
ic
=>
$case
,
lbl
=>
undef
,
};
if
(
$split_sl
) {
(
$col
) = (
$split_sl
=~ m/^([0-9]+|[A-Z]+)$/) or usage (1);
$split
->{lbl} = col2col (
$col
);
}
}
my
$base
=
@ARGV
&& -f
$ARGV
[0] ?
$ARGV
[0] :
"csv2xlsx"
;
$xls
||=
$base
=~ s/(?:\.csv)?$/.xlsx/ir;
-s
$xls
&&
$frc
and
unlink
$xls
;
if
(-s
$xls
) {
print
STDERR
"File '$xls' already exists. Overwrite? [y/N] > N\b"
;
scalar
<STDIN> =~ m/^[yj](?:es|a)?$/i or
exit
;
}
{
open
my
$fh
,
">"
,
$xls
or
die
"$xls: $!\n"
;
close
$fh
;
unlink
$xls
;
}
if
(
$dutf
) {
if
($@) {
$dutf
= 0;
warn
"Cannot load Encode::DoubleEncodedUTF8; --de ignored\n"
;
}
}
my
$wbk
= Excel::Writer::XLSX->new (
$xls
);
$dtf
=~ s/j/y/g;
my
%fmt
= (
dflt
=>
$wbk
->add_format (),
date
=>
$wbk
->add_format (
align
=>
"center"
,
num_format
=>
$dtf
),
rest
=>
$wbk
->add_format (
align
=>
"left"
),
wrap
=>
$wbk
->add_format (
text_wrap
=> 1),
);
$crf
=~ s/^([^:]+):(.*)/$1/ and
$fmt
{currency} =
$wbk
->add_format (
num_format
=>
"$1 $2"
,
align
=>
"right"
,
);
if
(
$font
) {
$font
eq
"M"
and
$font
=
"mono"
;
$font
eq
"D"
and
$font
=
"DejaVu Sans"
;
$font
eq
"DM"
and
$font
=
"DejaVu Sans Mono"
;
$_
->set_font (
$font
)
for
values
%fmt
;
}
if
(
$font_size
) {
$_
->set_size (
$font_size
)
for
values
%fmt
;
}
my
@args
=
@ARGV
?
@ARGV
: (
""
);
foreach
my
$csvf
(
@args
) {
my
$sheetname
=
$csvf
=~ s{\.\w+$}{}ir =~ s{.*/}{}r ||
"Sheet 1"
;
(
$_
=
length
$sheetname
) > 31 and
substr
$sheetname
, 31,
$_
- 31,
""
;
my
(
$wks
,
$w
,
$h
);
$opt_v
> 7 and
warn
"Parsing $csvf into $xls.$sheetname ...\n"
;
if
(
$csvf
=~ m/\.(png|jpe?g|bmp|gif|tiff|xpm)$/i) {
$wks
=
$wbk
->add_worksheet (
$sheetname
);
$wks
->insert_image (1, 1,
$csvf
);
next
;
}
my
$row
;
my
$firstline
;
my
$fh
;
if
(-f
$csvf
) {
$opt_v
and
say
"Reading $csvf"
;
open
$fh
,
"<"
,
$csvf
or
die
"$csvf: $!\n"
;
}
else
{
$opt_v
and
say
"Reading STDIN"
;
$fh
=
*ARGV
;
}
my
$Sep
=
$sep
;
unless
(
$Sep
) {
while
(<
$fh
>) {
m/\S/ or
next
;
$Sep
=
m/[
"\d];["
\d;]/ ?
";"
:
m/[
"\d],["
\d,]/ ?
","
:
m/[
"\d]\t["
\d,]/ ?
"\t"
:
m/\w;[\w;]/ ?
";"
:
m/\w,[\w,]/ ?
","
:
m/\w\t[\w,]/ ?
"\t"
:
m/[
"\d]\|["
\d,]/ ?
"|"
:
m/\w\|[\w,]/ ?
"|"
:
m/,/ && !m/[;\t]/ ?
","
:
m/;/ && !m/[,\t]/ ?
";"
:
m/\t/ && !m/[,;]/ ?
"\t"
:
","
;
$firstline
=
$_
;
last
;
}
$firstline
or
die
"The sourcefile does not contain any usable data\n"
;
}
my
$csv
= Text::CSV_XS-> new ({
sep_char
=>
$Sep
eq
"tab"
?
"\t"
:
$Sep
,
quote_char
=>
$quo
eq
"undef"
?
undef
:
$quo
,
escape_char
=>
$esc
eq
"undef"
?
undef
:
$esc
,
binary
=> 1,
keep_meta_info
=> 1,
auto_diag
=> 1,
formula
=>
$fac
,
});
if
(
$firstline
) {
$csv
->parse (
$firstline
) or
die
$csv
->error_diag ();
$row
= [
$csv
->fields ];
}
if
(
$opt_v
> 3) {
foreach
my
$k
(
qw( sep_char quote_char escape_char )
) {
my
$c
=
$csv
->
$k
() ||
"undef"
;
$c
=~ s/\t/\\t/g;
$c
=~ s/\r/\\r/g;
$c
=~ s/\n/\\n/g;
$c
=~ s/\0/\\0/g;
$c
=~ s/([\x00-\x1f\x7f-\xff])/
sprintf
"\\x{%02x}"
,
ord
$1/ge;
printf
STDERR
"%-11s = %s\n"
,
$k
,
$c
;
}
}
if
(
length
$dtc
) {
if
(
$dtc
eq
"0"
) {
$dtc
= {
-2
=> 0 };
}
else
{
my
$rows
=
$dtc
;
$rows
=~ s/-$/-999/;
$rows
=~ s/-/../g;
eval
"\$dtc = { map { \$_ => 1 } $rows }"
;
}
}
my
@w
;
while
(
$row
&&
@$row
or
$row
=
$csv
->getline (
$fh
)) {
$row_limit
and
$csv
->record_number >
$row_limit
and
last
;
my
@row
=
@$row
;
$opt_v
> 8 and
warn
"@row\n"
;
if
(
@split
) {
my
$ns
= 0;
foreach
my
$split
(
@split
) {
my
(
$i
,
$op
,
$str
,
$case
) = @{
$split
}{
qw( col op str ic )
};
my
$v
=
$row
[
$i
- 1];
if
(
defined
$v
) {
$op
eq
"U"
and
$ns
+= 1;
if
(
$case
) {
$op
eq
"="
and
$ns
+= (
lc
$v
eq
lc
$v
);
$op
eq
"/"
and
$ns
+= (
$v
=~ m/
$str
/i );
}
else
{
$op
eq
"="
and
$ns
+= (
$v
eq
$str
);
$op
eq
"/"
and
$ns
+= (
$v
=~ m/
$str
/ );
if
(
$v
=~ m/^[0-9]+$/) {
$op
eq
"<"
and
$ns
+= (
$v
<
$str
);
$op
eq
">"
and
$ns
+= (
$v
>
$str
);
}
else
{
$op
eq
"<"
and
$ns
+= (
$v
lt
$str
);
$op
eq
">"
and
$ns
+= (
$v
gt
$str
);
}
}
}
else
{
$op
eq
"u"
and
$ns
+= 1;
}
}
$opt_v
> 8 and
warn
join
" "
=>
"Record"
,
$csv
->record_number,
"matched"
,
$ns
,
"out of"
,
scalar
@split
,
"criteria\n"
;
if
(
$ns
==
@split
) {
if
(
@w
) {
$wks
->set_column (
$_
,
$_
,
$w
[
$_
])
for
0 ..
$#w
;
$wks
=
undef
;
}
$split
[0]{lbl} and
$sheetname
=
$row
[
$split
[0]{lbl} - 1];
}
}
unless
(
$wks
) {
$wks
=
$wbk
->add_worksheet (
$sheetname
);
$utf
&& !
$wks
->can (
"write_unicode"
) and
$utf
= 0;
(
$h
,
$w
,
@w
) = (0, 1);
}
@row
>
$w
and
push
@w
=> (
$wdt
) x ((
$w
=
@row
) -
@w
);
foreach
my
$c
(0 ..
$#row
) {
my
$val
=
$row
[
$c
] //
""
;
my
$l
=
length
$val
;
$l
> (
$w
[
$c
] // -1) and
$w
[
$c
] =
$l
;
$dutf
and
$csv
->is_binary (
$c
) and utf8::valid (
$val
) and
$val
= Encode::decode (
"utf-8-de"
,
$val
);
if
(
$utf
and
$csv
->is_binary (
$c
)) {
from_to (
$val
,
"utf-8"
,
"ucs2"
);
$wks
->write_unicode (
$h
,
$c
,
$val
);
next
;
}
if
(
$csv
->is_quoted (
$c
)) {
$val
=~ s/\r\n/\n/g;
if
(
$utf
) {
from_to (
$val
,
"utf-8"
,
"ucs2"
);
$val
=~ m/\n/
?
$wks
->write_unicode (
$h
,
$c
,
$val
,
$fmt
{wrap})
:
$wks
->write_unicode (
$h
,
$c
,
$val
,
$fmt
{dflt});
}
else
{
$val
=~ m/\n/
?
$wks
->write_string (
$h
,
$c
,
$val
,
$fmt
{wrap})
:
$wks
->write_string (
$h
,
$c
,
$val
,
$fmt
{dflt});
}
next
;
}
if
(!
$dtc
or
$dtc
->{
$c
+ 1}) {
my
@d
= (0, 0, 0);
$val
=~ m/^(\d{4})(\d{2})(\d{2})$/ and
@d
= ($1, $2, $3);
$val
=~ m/^(\d{2})-(\d{2})-(\d{4})$/ and
@d
= ($3, $2, $1);
if
(
$d
[2] >= 1 &&
$d
[2] <= 31 &&
$d
[1] >= 1 &&
$d
[1] <= 12 &&
$d
[0] >= 1900 &&
$d
[0] <= 2199) {
my
$dm
= Days_in_Month (
@d
[0,1]);
$d
[2] < 1 and
$d
[2] = 1;
$d
[2] >
$dm
and
$d
[2] =
$dm
;
my
$dt
= 2 + Delta_Days (1900, 1, 1,
@d
);
$wks
->
write
(
$h
,
$c
,
$dt
,
$fmt
{date});
next
;
}
}
if
(
$crf
and
$val
=~ m/^\s*\Q
$crf
\E\s*([0-9.]+)$/) {
$wks
->
write
(
$h
,
$c
, $1 + 0,
$fmt
{currency});
next
;
}
if
(!
$frm
&&
$val
=~ m/^=/) {
$wks
->write_string (
$h
,
$c
,
$val
);
}
else
{
$wks
->
write
(
$h
,
$c
,
$val
,
$fmt
{dflt});
}
}
++
$h
% 100 or
$opt_v
&&
printf
STDERR
"%6d x %6d\r"
,
$w
,
$h
;
}
continue
{
$row
=
undef
}
close
$fh
;
$opt_v
&&
printf
STDERR
"%6d x %6d\n"
,
$w
,
$h
;
$wks
->set_column (
$_
,
$_
,
$w
[
$_
])
for
0 ..
$#w
;
}
$opt_v
and
say
"Writing $xls"
;
$wbk
->
close
();