#!perl -w
BEGIN {
require
'loc_tools.pl'
;
}
my
$tab
=
" "
x 4;
use
Unicode::UCD
qw(search_invlist prop_invmap prop_invlist)
;
my
(
$charname_list
,
$charname_map
,
$format
,
$default
) = prop_invmap(
"Name Alias"
);
sub
get_charname($) {
my
$cp
=
shift
;
my
$name_index
= search_invlist(\@{
$charname_list
},
$cp
);
if
(
defined
$name_index
) {
my
$synonyms
=
$charname_map
->[
$name_index
];
if
(
ref
$synonyms
) {
my
$pat
=
qr/: abbreviation/
;
my
@abbreviations
=
grep
{
$_
=~
$pat
}
@$synonyms
;
if
(
@abbreviations
) {
return
$abbreviations
[0] =~ s/
$pat
//r;
}
}
}
return
charnames::viacode(
$cp
) //
"No name"
;
}
sub
truth($) {
return
(
shift
) ? 1 : 0;
}
my
$base_locale
;
my
$utf8_locale
;
if
(locales_enabled(
'LC_ALL'
)) {
$base_locale
= POSIX::setlocale(
&POSIX::LC_ALL
,
"C"
);
if
(
defined
$base_locale
&&
$base_locale
eq
'C'
) {
for
my
$u
(128 .. 255) {
if
(
chr
(utf8::unicode_to_native(
$u
)) =~ /[[:
print
:]]/) {
undef
$base_locale
;
last
;
}
}
$utf8_locale
= find_utf8_ctype_locale()
if
$base_locale
;
}
}
sub
get_display_locale_or_skip($$) {
my
(
$locale
,
$suffix
) =
@_
;
return
unless
defined
$locale
;
if
(
$locale
eq
""
) {
return
(
""
, 0)
if
$suffix
!~ /LC/;
return
;
}
return
if
$suffix
!~ /LC/;
return
(
" ($locale locale)"
, 0)
if
$locale
eq
$base_locale
;
return
(
" ($locale)"
, 1);
}
sub
try_malforming($$$)
{
my
(
$u
,
$function
,
$using_locale
) =
@_
;
return
0
if
$u
< ((
ord
"A"
== 65) ? 128 : 160);
return
0
if
$function
eq
"ASCII"
;
return
0
if
ord
"A"
!= 65 &&
$function
eq
"CNTRL"
;
return
0
if
$u
> 255 &&
$function
eq
"CNTRL"
;
return
0
if
$u
< 256 && !
$using_locale
&&
$function
=~ /X?DIGIT/;
return
1;
}
my
%properties
= (
alnum
=>
'Word'
,
wordchar
=>
'Word'
,
alphanumeric
=>
'Alnum'
,
alpha
=>
'XPosixAlpha'
,
ascii
=>
'ASCII'
,
blank
=>
'Blank'
,
cntrl
=>
'Control'
,
digit
=>
'Digit'
,
graph
=>
'Graph'
,
idfirst
=>
'_Perl_IDStart'
,
idcont
=>
'_Perl_IDCont'
,
lower
=>
'XPosixLower'
,
print
=>
'Print'
,
psxspc
=>
'XPosixSpace'
,
punct
=>
'XPosixPunct'
,
quotemeta
=>
'_Perl_Quotemeta'
,
space
=>
'XPerlSpace'
,
vertws
=>
'VertSpace'
,
upper
=>
'XPosixUpper'
,
xdigit
=>
'XDigit'
,
);
my
%seen
;
my
@warnings
;
local
$SIG
{__WARN__} =
sub
{
push
@warnings
,
@_
};
my
%utf8_param_code
= (
"_safe"
=> 0,
"_safe, malformed"
=> 1,
);
my
$num_test_files
=
$ENV
{TEST_JOBS} || 1;
$::TEST_CHUNK = 0
if
$num_test_files
== 1 && !
defined
$::TEST_CHUNK;
$num_test_files
= 10
if
$num_test_files
> 10;
my
$property_count
= -1;
foreach
my
$name
(
sort
keys
%properties
,
'octal'
) {
$property_count
++;
next
if
$property_count
%
$num_test_files
!= $::TEST_CHUNK;
my
@invlist
;
if
(
$name
eq
'octal'
) {
push
@invlist
,
ord
"0"
,
ord
"8"
;
}
else
{
my
$property
=
$properties
{
$name
};
@invlist
= prop_invlist(
$property
,
'_perl_core_internal_ok'
);
if
(!
@invlist
) {
if
(! prop_invlist(
$property
,
'_perl_core_internal_ok'
)) {
fail(
"No inversion list found for $property"
);
next
;
}
}
}
my
@code_points
= (0 .. 256);
my
$above_latins
= 0;
foreach
my
$range_start
(
@invlist
) {
next
if
$range_start
< 257;
push
@code_points
,
$range_start
- 1,
$range_start
;
$above_latins
++;
last
if
$above_latins
> 5;
}
push
@code_points
,
ord
"\N{ESTIMATED SYMBOL}"
if
$name
=~ /^id(first|cont)/;
if
(
$name
eq
"idcont"
) {
push
@code_points
,
ord
(
"\N{GREEK ANO TELEIA}"
),
ord
(
"\N{COMBINING GRAVE ACCENT}"
);
}
push
@code_points
, 0x110000;
no
warnings
'non_unicode'
;
for
my
$n
(
@code_points
) {
my
$u
= utf8::native_to_unicode(
$n
);
my
$function
=
uc
(
$name
);
is (
@warnings
, 0,
"Got no unexpected warnings in previous iteration"
)
or diag(
"@warnings"
);
undef
@warnings
;
my
$matches
= search_invlist(\
@invlist
,
$n
);
if
(!
defined
$matches
) {
$matches
= 0;
}
else
{
$matches
= truth(! (
$matches
% 2));
}
my
$ret
;
my
$char_name
= get_charname(
$n
);
my
$display_name
=
sprintf
"\\x{%02X, %s}"
,
$n
,
$char_name
;
my
$display_call
=
"is${function}( $display_name )"
;
foreach
my
$suffix
(
""
,
"_A"
,
"_L1"
,
"_LC"
,
"_uni"
,
"_uvchr"
,
"_LC_uvchr"
,
"_utf8"
,
"_LC_utf8"
)
{
if
(
$name
eq
'vertws'
) {
next
if
$suffix
!~ / ^ _ ( uni | uvchr | utf8 ) $ /x;
}
elsif
(
$name
eq
'alnum'
) {
next
if
$suffix
eq
'_A'
||
$suffix
eq
'_L1'
||
$suffix
eq
'_uvchr'
;
}
elsif
(
$name
eq
'octal'
) {
next
if
$suffix
ne
""
&&
$suffix
ne
'_A'
&&
$suffix
ne
'_L1'
;
}
elsif
(
$name
eq
'quotemeta'
) {
next
if
$suffix
ne
""
}
foreach
my
$locale
(
""
,
$base_locale
,
$utf8_locale
) {
my
(
$display_locale
,
$locale_is_utf8
)
= get_display_locale_or_skip(
$locale
,
$suffix
);
next
unless
defined
$display_locale
;
use
if
$locale
,
"locale"
;
POSIX::setlocale(
&POSIX::LC_ALL
,
$locale
)
if
$locale
;
if
(
$suffix
!~ /utf8/) {
my
$display_call
=
"is${function}$suffix( $display_name )$display_locale"
;
$ret
= truth
eval
"test_is${function}$suffix($n)"
;
if
(is ($@,
""
,
"$display_call didn't give error"
)) {
my
$truth
=
$matches
;
if
(
$truth
) {
if
(
$n
>= 256) {
$truth
= 0
if
$suffix
=~ / ^ ( _A | _L [1C] )? $ /x;
}
elsif
(
$u
>= 128
&&
$name
ne
'quotemeta'
)
{
$truth
= 0
if
$suffix
eq
"_A"
||
$suffix
eq
""
|| (
$suffix
=~ /LC/
&& !
$locale_is_utf8
);
}
}
is (
$ret
,
$truth
,
"${tab}And correctly returns $truth"
);
}
}
else
{
my
$char
=
chr
(
$n
);
utf8::upgrade(
$char
);
$char
=
quotemeta
$char
if
$char
eq
'\\'
||
$char
eq
"'"
;
my
$truth
;
if
(
$suffix
=~ /LC/
&& !
$locale_is_utf8
&&
$n
< 256
&&
$u
>= 128)
{
$truth
= 0;
}
else
{
$truth
=
$matches
;
}
foreach
my
$utf8_param
(
"_safe"
,
"_safe, malformed"
,
)
{
my
$utf8_param_code
=
$utf8_param_code
{
$utf8_param
};
my
$expect_error
=
$utf8_param_code
> 0;
next
if
$expect_error
&& ! try_malforming(
$u
,
$function
,
$suffix
=~ /LC/);
my
$display_call
=
"is${function}$suffix( $display_name"
.
", $utf8_param )$display_locale"
;
$ret
= truth
eval
"test_is${function}$suffix('$char',"
.
" $utf8_param_code)"
;
if
(
$expect_error
) {
isnt ($@,
""
,
"expected and got error in $display_call"
);
like($@,
qr/Malformed UTF-8 character/
,
"${tab}And got expected message"
);
if
(is (
@warnings
, 1,
"${tab}Got a single warning besides"
))
{
like(
$warnings
[0],
qr/Malformed UTF-8 character.*short/
,
"${tab}Got expected warning"
);
}
else
{
diag(
"@warnings"
);
}
undef
@warnings
;
}
elsif
(is ($@,
""
,
"$display_call didn't give error"
)) {
is (
$ret
,
$truth
,
"${tab}And correctly returned $truth"
);
if
(
$utf8_param_code
< 0) {
my
$warnings_ok
;
my
$unique_function
=
"is"
.
$function
.
$suffix
;
if
(!
$seen
{
$unique_function
}++) {
$warnings_ok
= is(
@warnings
, 1,
"${tab}This is first call to"
.
" $unique_function; Got a single"
.
" warning"
);
if
(
$warnings_ok
) {
$warnings_ok
= like(
$warnings
[0],
qr/starting in Perl .* will require an additional parameter/
,
"${tab}The warning was the expected"
.
" deprecation one"
);
}
}
else
{
$warnings_ok
= is(
@warnings
, 0,
"${tab}This subsequent call to"
.
" $unique_function did not warn"
);
}
$warnings_ok
or diag(
"@warnings"
);
undef
@warnings
;
}
}
}
}
}
}
}
}
my
%to_properties
= (
FOLD
=>
'Case_Folding'
,
LOWER
=>
'Lowercase_Mapping'
,
TITLE
=>
'Titlecase_Mapping'
,
UPPER
=>
'Uppercase_Mapping'
,
);
$property_count
= -1;
foreach
my
$name
(
sort
keys
%to_properties
) {
$property_count
++;
next
if
$property_count
%
$num_test_files
!= $::TEST_CHUNK;
my
$property
=
$to_properties
{
$name
};
my
(
$list_ref
,
$map_ref
,
$format
,
$missing
)
= prop_invmap(
$property
, );
if
(!
$list_ref
|| !
$map_ref
) {
fail(
"No inversion map found for $property"
);
next
;
}
if
(
$format
!~ / ^ a l? $ /x) {
fail(
"Unexpected inversion map format ('$format') found for $property"
);
next
;
}
my
@code_points
= (0 .. 256);
my
$above_latins
= 0;
my
$multi_char
= 0;
for
my
$i
(0 .. @{
$list_ref
} - 1) {
my
$range_start
=
$list_ref
->[
$i
];
next
if
$range_start
< 257;
if
(
ref
$map_ref
->[
$i
] &&
$multi_char
< 5) {
push
@code_points
,
$range_start
- 1
if
$code_points
[-1] !=
$range_start
- 1;
push
@code_points
,
$range_start
;
$multi_char
++;
}
elsif
(
$above_latins
< 5) {
push
@code_points
,
$range_start
- 1
if
$code_points
[-1] !=
$range_start
- 1;
push
@code_points
,
$range_start
;
$above_latins
++;
}
last
if
$above_latins
>= 5 &&
$multi_char
>= 5;
}
push
@code_points
, 0x110000;
no
warnings
'non_unicode'
;
for
my
$n
(
@code_points
) {
my
$u
= utf8::native_to_unicode(
$n
);
my
$function
=
$name
;
my
$index
= search_invlist(\@{
$list_ref
},
$n
);
my
$ret
;
my
$char_name
= get_charname(
$n
);
my
$display_name
=
sprintf
"\\N{U+%02X, %s}"
,
$n
,
$char_name
;
foreach
my
$suffix
(
""
,
"_L1"
,
"_LC"
) {
next
if
$suffix
eq
"_L1"
&&
$function
ne
"LOWER"
;
SKIP:
foreach
my
$locale
(
""
,
$base_locale
,
$utf8_locale
) {
next
if
$name
eq
'TITLE'
&&
$suffix
eq
"_LC"
;
my
(
$display_locale
,
$locale_is_utf8
)
= get_display_locale_or_skip(
$locale
,
$suffix
);
next
unless
defined
$display_locale
;
skip(
"to${name}_LC does not work for LATIN SMALL LETTER SHARP S"
.
"$display_locale"
, 1)
if
$u
== 0xDF &&
$name
=~ / FOLD | UPPER /x
&&
$suffix
eq
"_LC"
&&
$locale_is_utf8
;
use
if
$locale
,
"locale"
;
POSIX::setlocale(
&POSIX::LC_ALL
,
$locale
)
if
$locale
;
my
$display_call
=
"to${function}$suffix("
.
" $display_name )$display_locale"
;
$ret
=
eval
"test_to${function}$suffix($n)"
;
if
(is ($@,
""
,
"$display_call didn't give error"
)) {
my
$should_be
;
if
(
$n
> 255) {
$should_be
=
$n
;
}
elsif
(
$u
> 127
&& (
$suffix
eq
""
|| (
$suffix
eq
"_LC"
&& !
$locale_is_utf8
)))
{
$should_be
=
$n
;
}
elsif
(
$map_ref
->[
$index
] !=
$missing
) {
$should_be
=
$map_ref
->[
$index
] +
$n
-
$list_ref
->[
$index
]
}
else
{
$should_be
=
$n
;
}
is (
$ret
,
$should_be
,
sprintf
(
"${tab}And correctly returned 0x%02X"
,
$should_be
));
}
}
}
my
$utf8_should_be
=
""
;
my
$first_ord_should_be
;
if
(
ref
$map_ref
->[
$index
]) {
for
my
$n
(0 .. @{
$map_ref
->[
$index
]} - 1) {
$utf8_should_be
.=
chr
$map_ref
->[
$index
][
$n
];
}
$first_ord_should_be
=
$map_ref
->[
$index
][0];
}
else
{
$first_ord_should_be
= (
$map_ref
->[
$index
] !=
$missing
)
?
$map_ref
->[
$index
] +
$n
-
$list_ref
->[
$index
]
:
$n
;
$utf8_should_be
=
chr
$first_ord_should_be
;
}
utf8::upgrade(
$utf8_should_be
);
foreach
my
$suffix
(
'_uni'
,
'_uvchr'
) {
my
$s
;
my
$len
;
my
$display_call
=
"to${function}$suffix( $display_name )"
;
$ret
=
eval
"test_to${function}$suffix($n)"
;
if
(is ($@,
""
,
"$display_call didn't give error"
)) {
is (
$ret
->[0],
$first_ord_should_be
,
sprintf
(
"${tab}And correctly returned 0x%02X"
,
$first_ord_should_be
));
is (
$ret
->[1],
$utf8_should_be
,
"${tab}Got correct utf8"
);
is (
$ret
->[2],
length
$utf8_should_be
,
"${tab}Got correct number of bytes for utf8 length"
);
}
}
my
$char
=
chr
(
$n
);
utf8::upgrade(
$char
);
$char
=
quotemeta
$char
if
$char
eq
'\\'
||
$char
eq
"'"
;
foreach
my
$utf8_param
(
"_safe"
,
"_safe, malformed"
,
)
{
next
if
$utf8_param
eq
'deprecated mathoms'
&&
$Config
{
'ccflags'
} =~ /-DNO_MATHOMS/;
my
$utf8_param_code
=
$utf8_param_code
{
$utf8_param
};
my
$expect_error
=
$utf8_param_code
> 0;
next
if
$expect_error
&&
$u
< ((
ord
"A"
== 65) ? 128 : 160);
my
$display_call
=
"to${function}_utf8($display_name, $utf8_param )"
;
$ret
=
eval
"test_to${function}_utf8('$char', $utf8_param_code)"
;
if
(
$expect_error
) {
isnt ($@,
""
,
"expected and got error in $display_call"
);
like($@,
qr/Malformed UTF-8 character/
,
"${tab}And got expected message"
);
undef
@warnings
;
}
elsif
(is ($@,
""
,
"$display_call didn't give error"
)) {
is (
$ret
->[0],
$first_ord_should_be
,
sprintf
(
"${tab}And correctly returned 0x%02X"
,
$first_ord_should_be
));
is (
$ret
->[1],
$utf8_should_be
,
"${tab}Got correct utf8"
);
is (
$ret
->[2],
length
$utf8_should_be
,
"${tab}Got correct number of bytes for utf8 length"
);
if
(
$utf8_param_code
< 0) {
my
$warnings_ok
;
if
(!
$seen
{
"${function}_utf8$utf8_param"
}++) {
$warnings_ok
= is(
@warnings
, 1,
"${tab}Got a single warning"
);
if
(
$warnings_ok
) {
my
$expected
;
if
(
$utf8_param_code
== -2) {
my
$lc_func
=
lc
$function
;
$expected
=
qr/starting in Perl .* to_utf8_$lc_func\(\) will be removed/
;
}
else
{
$expected
=
qr/starting in Perl .* will require an additional parameter/
;
}
$warnings_ok
= like(
$warnings
[0],
$expected
,
"${tab}Got expected deprecation warning"
);
}
}
else
{
$warnings_ok
= is(
@warnings
, 0,
"${tab}Deprecation warned only the one time"
);
}
$warnings_ok
or diag(
"@warnings"
);
undef
@warnings
;
}
}
}
}
}
is(
scalar
@warnings
, 0,
"No unexpected warnings were generated in the tests"
)
or diag
@warnings
;
done_testing;