#!/usr/local/bin/perl
BEGIN {
pop
@INC
if
$INC
[-1] eq
'.'
}
our
$VERSION
=
do
{
my
@r
= (
q$Revision: 2.4 $
=~ /\d+/g);
sprintf
"%d."
.
"%02d"
x
$#r
,
@r
};
our
%Opt
;
getopts(
"Dehfv"
, \
%Opt
);
if
(
$Opt
{e}){
eval
{
require
Encode } or
die
"can't load Encode : $@"
;
}
$Opt
{h} and help();
@ARGV
or help();
sub
help{
print
<<
""
;
$0 -[Dehfv] [ucm files ...]
-D debug mode on
-e test
with
Encode module also (requires perl 5.7.3 or higher)
-h shows this message
-f forces roundtrip check even
for
|[123]
-v verbose mode
}
$| = 1;
my
(
%Hdr
,
%U2E
,
%E2U
,
%Fallback
);
my
$in_charmap
= 0;
my
$nerror
= 0;
my
$nwarning
= 0;
sub
nit($;$){
my
(
$msg
,
$level
) =
@_
;
my
$lstr
;
if
(
$level
== 2){
$lstr
=
'notice'
;
}
elsif
(
$level
== 1){
$lstr
=
'warning'
;
$nwarning
++;
}
else
{
$lstr
=
'error'
;
$nerror
++;
}
print
"$ARGV:$lstr in line $.: $msg\n"
;
}
for
$ARGV
(
@ARGV
){
open
UCM,
$ARGV
or
die
"$ARGV:$!"
;
%Hdr
=
%U2E
=
%E2U
=
%Fallback
= ();
$in_charmap
=
$nerror
=
$nwarning
= 0;
$. = 0;
while
(<UCM>){
chomp
;
s/\s*
if
(
$_
eq
"CHARMAP"
){
$in_charmap
= 1;
for
my
$must
(
qw/code_set_name mb_cur_min mb_cur_max/
){
exists
$Hdr
{
$must
} or nit
"<$must> nonexistent"
;
}
$Hdr
{mb_cur_min} >
$Hdr
{mb_cur_max}
and nit
sprintf
(
"mb_cur_min(%d) > mb_cur_max(%d)"
,
$Hdr
{mb_cur_min},
$Hdr
{mb_cur_max});
$in_charmap
= 1;
next
;
}
unless
(
$in_charmap
){
my
(
$hkey
,
$hvalue
) = /^<(\S+)>\s+[\"\']?([^\"\']+)/o or
next
;
$Opt
{D} and
warn
"$hkey => $hvalue"
;
if
(
$hkey
eq
"code_set_name"
){
exists
$Hdr
{code_set_name}
and nit
"Duplicate <code_set_name>: $hkey"
;
}
if
(
$hkey
eq
"code_set_alias"
){
$hvalue
eq
$Hdr
{code_set_name}
and nit
qq(alias "$hvalue" is already in <code_set_name>)
;
}
$Hdr
{
$hkey
} =
$hvalue
;
}
else
{
my
$name
=
$Hdr
{code_set_name};
my
(
$unistr
,
$encstr
,
$fb
) = /^(\S+)\s+(\S+)\s(\S+)/o or
next
;
$Opt
{v} and nit
$_
, 2;
my
$uni
= uniparse(
$unistr
);
my
$enc
= encparse(
$encstr
);
$fb
=~ /^\|([0123])$/ or nit
"malformed fallback: $fb"
;
$fb
= $1;
$Opt
{f} and
$fb
= 0;
unless
(
$fb
== 3){
if
(
exists
$U2E
{
$uni
}){
nit
"dupe encode map: U$uni => $U2E{$uni} and $enc"
, 1;
}
else
{
$U2E
{
$uni
} =
$enc
;
$Fallback
{
$uni
}{
$enc
} = 1
if
$fb
== 1;
if
(
$Opt
{e}) {
my
$e
= hex2enc(
$enc
);
my
$u
= hex2uni(
$uni
);
my
$eu
= Encode::encode(
$name
,
$u
);
$e
eq
$eu
or nit
qq(encode('$name', $uni)
!=
$enc
);
}
}
}
unless
(
$fb
== 1){
if
(
exists
$E2U
{
$enc
}){
nit
"dupe decode map: $enc => U$E2U{$enc} and U$uni"
, 1;
}
else
{
$E2U
{
$enc
} =
$uni
;
$Fallback
{
$enc
}{
$uni
} = 1
if
$fb
== 3;
if
(
$Opt
{e}) {
my
$e
= hex2enc(
$enc
);
my
$u
= hex2uni(
$uni
);
$Opt
{D} and
warn
"$uni, $enc"
;
my
$de
= Encode::decode(
$name
,
$e
);
$de
eq
$u
or nit
qq(decode('$name', $enc)
!=
$uni
);
}
}
}
}
}
$in_charmap
or nit
"Where is CHARMAP?"
;
checkRT();
printf
(
"$ARGV: %s error%s found\n"
,
(
$nerror
== 0 ?
'no'
:
$nerror
),
(
$nerror
> 1 ?
's'
:
''
));
}
exit
;
sub
hex2enc{
pack
(
"C*"
,
map
{
hex
(
$_
)}
split
(
","
,
shift
));
}
sub
hex2uni{
join
(
""
,
map
{
chr
(
hex
(
$_
)) }
split
(
","
,
shift
));
}
sub
checkRT{
for
my
$uni
(
keys
%E2U
){
my
$enc
=
$U2E
{
$uni
} or
next
;
$E2U
{
$U2E
{
$uni
}} eq
$uni
or
$Fallback
{
$uni
}{
$enc
} or
nit
"RT failure: U$uni => $enc =>U$E2U{$U2E{$uni}}"
;
}
for
my
$enc
(
keys
%E2U
){
my
$uni
=
$E2U
{
$enc
} or
next
;
$U2E
{
$E2U
{
$enc
}} eq
$enc
or
$Fallback
{
$enc
}{
$uni
} or
nit
"RT failure: $enc => U$uni => $U2E{$E2U{$enc}}"
;
}
}
sub
uniparse{
my
$str
=
shift
;
my
@u
;
push
@u
, $1
while
(
$str
=~ /\G<U(.*?)>/ig);
for
my
$u
(
@u
){
$u
=~ /^([0-9A-Za-z]+)$/o
or nit
"malformed Unicode character: $u"
;
}
return
join
(
','
,
@u
);
}
sub
encparse{
my
$str
=
shift
;
my
@e
;
for
my
$e
(
split
/\\x/io,
$str
){
$e
or
next
;
$e
=~ /^([0-9A-Za-z]{1,2})$/io
or nit
"Hex $e in $str is bogus"
;
push
@e
, $1;
}
return
join
(
','
,
@e
);
}