#!/usr/bin/perl
$^W = 1;
BEGIN {
if
($] < 5.008001) {
plan
skip_all
=>
"UTF8 tests useless in this ancient perl version"
;
}
else
{
plan
tests
=> 93;
}
}
BEGIN {
$ENV
{PERL_TEXT_CSV} =
$ENV
{TEST_PERL_TEXT_CSV} || 0;
require_ok
"Text::CSV"
;
plan
skip_all
=>
"Cannot load Text::CSV"
if
$@;
require
"./t/util.pl"
;
}
my
$tfn
=
"_50test.csv"
; END { -f
$tfn
and
unlink
$tfn
; }
my
$csv
= Text::CSV->new ({
always_quote
=> 1,
keep_meta_info
=> 1,
});
foreach
my
$test
(
[
"\x{0000A0}"
,
"U+0000A0 NO-BREAK SPACE"
],
[
"\x{00200B}"
,
"U+00200B ZERO WIDTH SPACE"
],
[
"\x{000122}"
,
"U+000122 LATIN CAPITAL LETTER G WITH CEDILLA"
],
[
"\x{002C22}"
,
"U+002C22 GLAGOLITIC CAPITAL LETTER SPIDERY HA"
],
[
"\x{000A2C}"
,
"U+000A2C GURMUKHI LETTER BA"
],
[
"\x{000E2C}"
,
"U+000E2C THAI CHARACTER LO CHULA"
],
[
"\x{010A2C}"
,
"U+010A2C KHAROSHTHI LETTER VA"
],
) {
my
(
$u
,
$msg
) =
@$test
;
(
$u
=
"$u\x{0123}"
) =~ s/.$//;
my
@in
= (
""
,
" "
,
$u
,
""
);
my
$exp
=
join
","
,
map
{
qq{"$_"}
}
@in
;
ok (
$csv
->combine (
@in
),
"combine $msg"
);
my
$str
=
$csv
->string;
is_binary (
$str
,
$exp
,
"string $msg"
);
ok (
$csv
->parse (
$str
),
"parse $msg"
);
my
@out
=
$csv
->fields;
is (
scalar
@in
,
scalar
@out
,
"fields $msg"
);
is_binary (
$in
[
$_
],
$out
[
$_
],
"field $_ $msg"
)
for
0 ..
$#in
;
}
is (
$csv
->parse (
qq{"\x{0123}
\n\x{20ac}"}), 0,
"\\n still needs binary"
);
is (
$csv
->binary, 0,
"bin flag still unset"
);
is (
$csv
->error_diag + 0, 2021,
"Error 2021"
);
open
my
$fh
,
">:encoding(utf-8)"
,
$tfn
or
die
"$tfn: $!\n"
;
print
$fh
qq{"\N{LATIN CAPITAL LETTER O WITH STROKE}
l/Vin",0\n};
close
$fh
;
SKIP: {
open
my
$fh
,
"<:encoding(utf-8)"
,
$tfn
or
skip
"Cannot open UTF-8 test file"
, 6;
my
$row
;
ok (
$row
=
$csv
->getline (
$fh
),
"read/parse"
);
is (
$csv
->is_quoted (0), 1,
"First field is quoted"
);
is (
$csv
->is_quoted (1), 0,
"Second field is not quoted"
);
is (
$csv
->is_binary (0), 1,
"First field is binary"
);
is (
$csv
->is_binary (1), 0,
"Second field is not binary"
);
ok (utf8::valid (
$row
->[0]),
"First field is valid utf8"
);
$csv
->combine (
@$row
);
ok (utf8::valid (
$csv
->string),
"Combined string is valid utf8"
);
}
$csv
->always_quote (0);
$csv
->quote_space (0);
$csv
->quote_binary (0);
ok (
$csv
->combine (
" "
, 1,
"\x{20ac} "
),
"Combine"
);
is (
$csv
->string,
qq{ ,1,\x{20ac}
},
"String 0-0"
);
$csv
->quote_binary (1);
ok (
$csv
->combine (
" "
, 1,
"\x{20ac} "
),
"Combine"
);
is (
$csv
->string,
qq{ ,1,"\x{20ac}
"},
"String 0-1"
);
$csv
->quote_space (1);
$csv
->quote_binary (0);
ok (
$csv
->combine (
" "
, 1,
"\x{20ac} "
),
"Combine"
);
is (
$csv
->string,
qq{" ",1,"\x{20ac}
"},
"String 1-0"
);
ok (
$csv
->quote_binary (1),
"quote binary on"
);
ok (
$csv
->combine (
" "
, 1,
"\x{20ac} "
),
"Combine"
);
is (
$csv
->string,
qq{" ",1,"\x{20ac}
"},
"String 1-1"
);
ok (
$csv
->parse (
qq{,1,"f\x{014d}
o, 3"
"56"
,,bar,\r\n}),
"example from XS"
);
is_deeply ([
$csv
->fields], [
""
, 1,
qq{f\x{014d}
o, 3
"56}, "
", "
bar
", "
" ], "
content");
open
$fh
,
">:encoding(utf-8)"
,
$tfn
or
die
"$tfn: $!\n"
;
print
$fh
"euro\n\x{20ac}\neuro\n"
;
close
$fh
;
open
$fh
,
"<:encoding(utf-8)"
,
$tfn
or
die
"$tfn: $!\n"
;
SKIP: {
my
$out
=
""
;
my
$isutf8
= $] < 5.008001 ?
sub
{ !
$_
[0]; } :
sub
{ utf8::is_utf8 (
$out
); };
ok (
$csv
->auto_diag (1),
"auto diag"
);
ok (
$csv
->binary (1),
"set binary"
);
ok (
$csv
->bind_columns (\
$out
),
"bind"
);
ok (
$csv
->getline (
$fh
),
"parse"
);
is (
$csv
->is_binary (0), 0,
"not binary"
);
is (
$out
,
"euro"
,
"euro"
);
ok (!
$isutf8
->(1),
"not utf8"
);
ok (
$csv
->getline (
$fh
),
"parse"
);
is (
$csv
->is_binary (0), 1,
"is binary"
);
is (
$out
,
"\x{20ac}"
,
"euro"
);
ok (
$isutf8
->(0),
"is utf8"
);
ok (
$csv
->getline (
$fh
),
"parse"
);
is (
$csv
->is_binary (0), 0,
"not binary"
);
is (
$out
,
"euro"
,
"euro"
);
ok (!
$isutf8
->(1),
"not utf8"
);
close
$fh
;
}