#!/usr/bin/perl -w
require
5.000000;
our
$VERSION
;
$VERSION
=
'3.81'
;
my
@exe
=
qw( wget xls2csv )
;
our
(
$ModDir
,
$Module
,
$ID
,
%ID2Names
,
%Alias
,
%Code2ID
,
%ID2Code
,
%Std
,
%Data
);
$ModDir
=
"lib/Locale/Codes"
;
require
"data.country.pl"
;
require
"data.language.pl"
;
require
"data.currency.pl"
;
our
$script_iana_url
=
$language_iana_url
;
require
"data.script.pl"
;
our
$langext_iana_url
=
$language_iana_url
;
require
"data.langext.pl"
;
our
$langvar_iana_url
=
$language_iana_url
;
require
"data.langvar.pl"
;
require
"data.langfam.pl"
;
our
(
$usage
);
my
$COM
= $0;
$COM
=~ s/^.*\///;
$usage
=
"usage:
$COM
OPTIONS
-h/--help : Print help.
-a/--all : Do all steps
-c/--country : Get the country codes
-l/--language : Get the language codes
-r/--currency : Get the currency codes
-s/--script : Get the script codes
-L/--langext : Get the language extension codes
-V/--langvar : Get the language variation codes
-F/--langfam : Get the language family codes
";
my
$do_all
= 0;
my
$do_country
= 0;
my
$do_language
= 0;
my
$do_currency
= 0;
my
$do_script
= 0;
my
$do_langext
= 0;
my
$do_langvar
= 0;
my
$do_langfam
= 0;
while
(
$_
=
shift
) {
(
print
$usage
),
exit
if
(
$_
eq
"-h"
||
$_
eq
"--help"
);
$do_all
= 1,
next
if
(
$_
eq
"-a"
||
$_
eq
"--all"
);
$do_country
= 1,
next
if
(
$_
eq
"-c"
||
$_
eq
"--country"
);
$do_language
= 1,
next
if
(
$_
eq
"-l"
||
$_
eq
"--language"
);
$do_currency
= 1,
next
if
(
$_
eq
"-r"
||
$_
eq
"--currency"
);
$do_script
= 1,
next
if
(
$_
eq
"-s"
||
$_
eq
"--script"
);
$do_langext
= 1,
next
if
(
$_
eq
"-L"
||
$_
eq
"--langext"
);
$do_langvar
= 1,
next
if
(
$_
eq
"-V"
||
$_
eq
"--langvar"
);
$do_langfam
= 1,
next
if
(
$_
eq
"-F"
||
$_
eq
"--langfam"
);
}
foreach
my
$exe
(
@exe
) {
if
(
system
(
"which $exe > /dev/null"
) != 0) {
die
"ERROR: required executable not found: $exe\n"
;
}
}
$ID
=
"0001"
;
%ID2Names
= ();
%Alias
= ();
%Code2ID
= ();
%ID2Code
= ();
%Std
= ();
do_country()
if
(
$do_all
||
$do_country
);
do_language()
if
(
$do_all
||
$do_language
);
do_currency()
if
(
$do_all
||
$do_currency
);
do_script()
if
(
$do_all
||
$do_script
);
do_langext()
if
(
$do_all
||
$do_langext
);
do_langvar()
if
(
$do_all
||
$do_langvar
);
do_langfam()
if
(
$do_all
||
$do_langfam
);
sub
do_country {
print
"Country codes...\n"
;
$Module
=
"Country"
;
_do_codeset(
'country'
,
'iso'
, [
'alpha-2'
,
'alpha-3'
,
'numeric'
],
[
'alpha-2'
,
'alpha-3'
,
'numeric'
]);
_do_codeset(
'country'
,
'iana'
, [
'dom'
],
[
'dom'
]);
_do_codeset(
'country'
,
'un'
, [
'un-numeric'
,
'un-alpha-3'
],
[
'un-numeric'
,
'un-alpha-3'
]);
_do_codeset(
'country'
,
'genc'
, [
'genc-alpha-2'
,
'genc-alpha-3'
,
'genc-numeric'
],
[
'genc-alpha-2'
,
'genc-alpha-3'
,
'genc-numeric'
]);
do_aliases(
"country"
);
write_module(
"country"
);
}
{
my
$in
;
sub
_init_country_genc {
my
$inst
=
qq{
*NOTE* The GENC codes are tricky to download for some reason. Currently, they
are GENC 3.0 Update 11. This is available in internal/genc-3.0.11
(Please download the data manually for GENC country codes. This can be done
using the Chrome browser with the Table Capture (georgemike) extension enabled.
Go to the following URL:
$country_genc_url
Click on:
'Browse'
'Show 100 entries'
Select any part of the table (it is not necessary to select the entire table).
Then right click and launch the table caputure workshop. Click on the
'Edit table data before exporting' icon. Click on the 'Delete header row'
button. Then click on the 'Copy table to clipboard' icon and paste it into
the file.
Select any part of the table (it is not necessary to select the entire
table). Then right click and launch the table caputure workshop.
Click on the 'Copy table to clipboard' icon and paste it into the
file. Remove the headers (which contain '2-Char Code'), one per set of
rows copied.
*NOTE* This currently is required:
If there are more entries than will fit on a single table, repeat this
process but make sure you remove extra header lines (but do not remove the
initial header line).
}
;
$in
= _read_file(
'type'
=>
'csv'
,
'manual'
=> 1,
'inst'
=>
$inst
,
'sep_char'
=>
"\t"
,
'as_list'
=> 1,
'encoding'
=>
'UTF-8'
,
);
1;
}
sub
_read_country_genc {
while
(
@$in
) {
my
$ele
=
shift
(
@$in
);
my
$alpha2
=
$$ele
{
'2-Char Code'
};
next
if
(!
$alpha2
);
next
if
(
$alpha2
eq
'[None]'
);
my
$alpha3
=
$$ele
{
'3-Char Code'
};
my
$num
=
$$ele
{
'Numeric Code'
};
my
$country
=
$$ele
{
'Name'
};
next
if
(
$country
=~ /^entity/i);
my
(
$id
,
$i
);
if
(
exists
$Code2ID
{
'alpha-2'
}{
lc
(
$alpha2
)}) {
(
$id
,
$i
) = @{
$Code2ID
{
'alpha-2'
}{
lc
(
$alpha2
)} };
}
if
(
exists
$Code2ID
{
'alpha-3'
}{
lc
(
$alpha3
)}) {
if
(!
defined
(
$id
)) {
print
"WARNING [genc]: Code mismatch (alpha-3 defined, alpha-2 not): $country\n"
;
next
;
}
my
(
$id2
,
$i2
) = @{
$Code2ID
{
'alpha-3'
}{
lc
(
$alpha3
)} };
if
(
$id
ne
$id2
) {
print
"WARNING [genc]: Code mismatch (alpha-3 != alpha-2): $country\n"
;
next
;
}
}
if
(
exists
$Code2ID
{
'numeric'
}{
$num
}) {
if
(!
defined
(
$id
)) {
print
"WARNING [genc]: Code mismatch (numeric defined, alpha-2 not): $country\n"
;
next
;
}
my
(
$id2
,
$i2
) = @{
$Code2ID
{
'numeric'
}{
$num
} };
if
(
$id
ne
$id2
) {
print
"WARNING [genc]: Code mismatch (numeric != alpha-2): $country\n"
;
next
;
}
}
if
(
exists
$Data
{
'country'
}{
'genc'
}{
'orig'
}{
'name'
}{
$country
}) {
$country
=
$Data
{
'country'
}{
'genc'
}{
'orig'
}{
'name'
}{
$country
};
}
else
{
my
@tmp
=
split
(/\s+/,
$country
);
my
@tmp2
=
map
{
ucfirst
(
lc
(
$_
)) }
@tmp
;
$country
=
join
(
' '
,
@tmp2
);
}
my
@country
;
if
(
exists
$Alias
{
lc
(
$country
)}) {
my
(
$id2
,
$i2
) = @{
$Alias
{
lc
(
$country
)} };
if
(!
defined
(
$id
)) {
(
$id
,
$i
) = (
$id2
,
$i2
);
}
elsif
(
$id
ne
$id2
) {
print
"WARNING [genc]: Code mismatch (alias incorrect): $country\n"
;
next
;
}
my
@name
= @{
$ID2Names
{
$id
} };
@country
= (
$name
[
$i
]);
}
elsif
(
defined
(
$id
)) {
my
@name
= @{
$ID2Names
{
$id
} };
@country
= (_country_name(
$country
),
@name
);
}
else
{
@country
= _country_name(
$country
);
}
return
(
$alpha2
,
$alpha3
,
$num
,
@country
);
}
return
();
}
}
{
my
$in
;
sub
_init_country_un {
$in
= _read_file(
'url'
=>
$country_un_url
,
'type'
=>
'html'
,
'as_list'
=> 0,
'html_strip'
=> [
qw(p div strong br)
],
'html_repl'
=> [
qw( )
],
);
my
$found
= jump_to_row(\
$in
,
"Country or Area"
);
if
(!
$found
) {
die
"ERROR [un]: country code file format changed!\n"
;
}
}
sub
_read_country_un {
while
(1) {
my
@row
= get_row(
"un"
,\
$in
);
return
()
if
(!
@row
);
my
(
$country
,
$num
,
$alpha
) =
@row
;
my
(
$id
,
$i
);
if
(
exists
$Code2ID
{
'alpha-3'
}{
lc
(
$alpha
)}) {
my
(
$id1
,
$i1
) = @{
$Code2ID
{
'alpha-3'
}{
lc
(
$alpha
)} };
if
(
exists
$Code2ID
{
'numeric'
}{
$num
}) {
my
(
$id2
,
$i2
) = @{
$Code2ID
{
'numeric'
}{
$num
} };
if
(
$id1
ne
$id2
) {
print
"WARNING [un]: UN/ISO code alpha/numeric mismatch: $country\n"
;
next
;
}
(
$id
,
$i
) = (
$id1
,
$i1
);
}
else
{
print
"WARNING [un]: UN/ISO code mismatch (alpha defined): $country\n"
;
next
;
}
}
elsif
(
exists
$Code2ID
{
'numeric'
}{
$num
}) {
print
"WARNING [un]: UN/ISO code mismatch (numeric defined): $country\n"
;
next
;
}
my
@country
;
if
(
exists
$Alias
{
lc
(
$country
)}) {
my
(
$id2
,
$i2
) = @{
$Alias
{
lc
(
$country
)} };
if
(!
defined
(
$id
)) {
(
$id
,
$i
) = (
$id2
,
$i2
);
}
elsif
(
$id
ne
$id2
) {
print
"WARNING [un]: UN/ISO code mismatch: $country\n"
;
next
;
}
my
@name
= @{
$ID2Names
{
$id
} };
@country
= (
$name
[
$i
]);
}
elsif
(
defined
(
$id
)) {
my
@name
= @{
$ID2Names
{
$id
} };
@country
= (_country_name(
$country
),
@name
);
}
else
{
@country
= _country_name(
$country
);
}
return
(
$num
,
$alpha
,
@country
);
}
}
}
{
my
$in
;
sub
_init_country_iso {
my
$inst
=
qq{
(Please download the data manually for ISO 3166 country codes.
Currently, this works in chrome with the Table Capture (georgemike)
extension enabled.
Go to the following URL:
$country_iso_url
Click on:
'Online Browsing Platform'
'Officially assigned codes'
300 results per page
Select any part of the table (it is not necessary to select the entire
table). Then right click and launch the table caputure workshop.
Click on the 'Copy table to clipboard' icon and paste it into the
file.
If there are more entries than will fit on a single table, repeat this
process but make sure you remove extra header lines.
}
;
$in
= _read_file(
'type'
=>
'csv'
,
'manual'
=> 1,
'inst'
=>
$inst
,
'sep_char'
=>
"\t"
,
'decode_utf8'
=> 0,
'as_list'
=> 1,
'encoding'
=>
'UTF-8'
,
);
1;
}
sub
_read_country_iso {
while
(
@$in
) {
my
$ele
=
shift
(
@$in
);
my
$name
=
$$ele
{
'English short name'
};
my
$alpha2
=
lc
(
$$ele
{
'Alpha-2 code'
});
my
$alpha3
=
lc
(
$$ele
{
'Alpha-3 code'
});
my
$num
=
$$ele
{
'Numeric'
};
$name
=~ s/\(the/\(The/;
return
(
$alpha2
,
$alpha3
,
$num
,_country_name(
$name
));
}
return
();
}
}
sub
_country_name {
my
(
$name
) =
@_
;
my
@ret
;
if
(
$name
=~ /^(.+), The (.+?) of$/ ||
$name
=~ /^(.+) \(The (.+?) of\)$/) {
my
(
$n1
,
$n2
) = ($1,$2);
push
(
@ret
,
"$n1, The $n2 of"
,
"$n1 (The $n2 of)"
,
"$n1, $n2 of"
,
"$n1 ($n2 of)"
,
"The $n2 of $n1"
,
"$n2 of $n1"
);
}
elsif
(
$name
=~ /^(.+), (.+?) of$/ |\
$name
=~ /^(.+), \((.+?) of\)$/) {
my
(
$n1
,
$n2
) = ($1,$2);
push
(
@ret
,
"$n1, $n2 of"
,
"$n1 ($n2 of)"
,
"$n2 of $n1"
);
}
elsif
(
$name
=~ /^(.+), The$/ ||
$name
=~ /^(.+) \(The\)$/) {
my
(
$n1
) = ($1);
push
(
@ret
,
$n1
,
"The $n1"
,
"$n1, The"
,
"$n1 (The)"
);
}
else
{
push
(
@ret
,
$name
);
}
return
@ret
;
}
{
my
$in
;
my
%codes
;
sub
_init_country_iana {
foreach
my
$code
(
keys
%{
$Code2ID
{
'alpha-2'
} }) {
my
(
$id
,
$idx
) = @{
$Code2ID
{
'alpha-2'
}{
$code
} };
my
$name
=
$ID2Names
{
$id
}[
$idx
];
$codes
{
$code
} =
$name
;
}
$in
= _read_file(
'url'
=>
$country_iana_url
,
'type'
=>
'html'
,
'as_list'
=> 0,
'html_strip'
=> [
qw(a span)
],
);
my
$found
= jump_to_row(\
$in
,
"TLD Manager"
);
if
(!
$found
) {
die
"ERROR [iana]: country code file format changed!\n"
;
}
}
sub
_read_country_iana {
while
(1) {
my
@row
= get_row(
"iana"
,\
$in
);
return
()
if
(!
@row
);
my
(
$dom
,
$type
,
$tmp
) =
@row
;
next
unless
(
$type
eq
"country-code"
&&
$dom
=~ /^\.[a-z][a-z]/);
$dom
=~ s/^\.//;
my
@country
;
if
(
exists
$Code2ID
{
'alpha-2'
}{
$dom
}) {
my
(
$id
,
$i
) = @{
$Code2ID
{
'alpha-2'
}{
$dom
} };
my
@name
= @{
$ID2Names
{
$id
} };
@country
= (
$name
[
$i
]);
}
elsif
(
exists
$codes
{
$dom
}) {
@country
= _country_name(
$codes
{
$dom
});
}
else
{
next
;
}
return
(
$dom
,
@country
);
}
}
}
sub
do_language {
print
"Language codes...\n"
;
$Module
=
"Language"
;
_do_codeset(
'language'
,
'iso2'
, [
'alpha-3'
,
'term'
,
'alpha-2'
],
[
'alpha-3'
,
'term'
,
'alpha-2'
]);
_do_codeset(
'language'
,
'iso5'
, [
'alpha-3'
],
[
'alpha-3'
],
'allow'
);
_do_codeset(
'language'
,
'iana'
, [
'alpha-2'
,
'alpha-3'
],
[
'alpha-2'
,
'alpha-3'
],
'allow'
);
do_aliases(
"language"
);
write_module(
"language"
);
}
{
my
$in
;
sub
_init_language_iso2 {
$in
= _read_file(
'url'
=>
$language_iso2_url
,
'as_list'
=> 1,
'encoding'
=>
'UTF-8'
,
);
}
sub
_read_language_iso2 {
while
(
@$in
) {
my
$line
=
shift
(
@$in
);
next
if
(!
$line
);
my
(
$alpha3
,
$term
,
$alpha2
,
$language
,
$french
) =
split
(/\|/,
$line
);
if
(
length
(
$alpha3
)>3) {
$alpha3
=
substr
(
$alpha3
,
length
(
$alpha3
)-3);
}
my
@language
=
split
(/\s*;\s*/,
$language
);
$term
=
$alpha3
if
(!
$term
);
return
(
$alpha3
,
$term
,
$alpha2
,
@language
);
}
return
();
}
}
{
my
$in
;
sub
_init_language_iso5 {
$in
= _read_file(
'url'
=>
$language_iso5_url
,
'as_list'
=> 0,
);
my
$found
= jump_to_row(\
$in
,
'Identifier'
);
if
(!
$found
) {
die
"ERROR [iso5]: language code file format changed!\n"
;
}
}
sub
_read_language_iso5 {
while
(1) {
my
@row
= get_row(
"iso5"
,\
$in
);
return
()
if
(!
@row
);
my
(
$alpha3
,
$language
) =
@row
;
next
if
(!
$language
);
if
(
$alpha3
&&
$alpha3
!~ /^[a-z][a-z][a-z]$/) {
print
"WARNING [iso5]: Invalid alpha-3 code: $language => $alpha3\n"
;
next
;
}
return
(
$alpha3
,
$language
);
}
}
}
{
my
$in
;
sub
_init_language_iana {
$in
= _read_file(
'url'
=>
$language_iana_url
,
'as_list'
=> 1,
);
shift
(
@$in
)
until
(
$$in
[0] eq
'%%'
);
}
sub
_read_language_iana {
while
(1) {
my
%entry
= _iana_entry(
$in
,
'language'
);
last
if
(!
%entry
);
my
(
@language
,
$code
,
$alpha2
,
$alpha3
);
$code
=
$entry
{
'Subtag'
};
foreach
my
$language
(@{
$entry
{
'Description'
} }) {
push
(
@language
,
$language
);
}
if
(
length
(
$code
) == 2) {
$alpha2
=
lc
(
$code
);
}
else
{
$alpha3
=
lc
(
$code
);
}
return
(
$alpha2
,
$alpha3
,
@language
);
}
return
();
}
}
sub
_iana_entry {
my
(
$in
,
@type
) =
@_
;
my
%type
=
map
{
$_
,1 }
@type
;
my
%entry
;
while
(1) {
%entry
= ();
return
%entry
if
(!
@$in
);
my
$oldkey
;
shift
(
@$in
);
while
(
@$in
&&
$$in
[0] ne
'%%'
) {
my
$line
=
shift
(
@$in
);
while
(
@$in
&&
$$in
[0] =~ /^\s+/) {
$$in
[0] =~ s/^\s+//;
$line
.=
" $$in[0]"
;
shift
(
@$in
);
}
$line
=~ /^(.*?):\s*(.*)$/;
my
(
$key
,
$val
) = ($1,$2);
if
(
$key
eq
'Description'
) {
if
(
exists
$entry
{
$key
}) {
push
( @{
$entry
{
$key
} },
$val
);
}
else
{
$entry
{
$key
} = [
$val
];
}
}
else
{
$entry
{
$key
} =
$val
;
}
}
next
if
(!
%entry
||
exists
$entry
{
'Deprecated'
} ||
!
exists
$entry
{
'Type'
} ||
!
exists
$type
{
$entry
{
'Type'
} });
return
%entry
;
}
}
sub
do_currency {
print
"Currency codes...\n"
;
$Module
=
"Currency"
;
_do_codeset(
'currency'
,
'iso'
, [
'alpha'
,
'num'
], [
'alpha'
,
'num'
]);
do_aliases(
"currency"
);
write_module(
"currency"
);
}
{
my
$in
;
sub
_init_currency_iso {
$in
= _read_file(
'url'
=>
$currency_iso_url
,
'head'
=>
'ENTITY'
,
'as_list'
=> 1,
'type'
=>
'xls'
,
'join'
=> 1,
'encoding'
=>
'UTF-8'
,
);
}
sub
_read_currency_iso {
while
(
@$in
) {
my
$ele
=
shift
(
@$in
);
next
if
(!
$ele
);
my
$currency
=
$$ele
{
'Currency'
};
my
$alpha
=
$$ele
{
'Alphabetic Code'
};
my
$num
=
$$ele
{
'Numeric Code'
};
$num
=
""
if
(!
defined
(
$num
));
$currency
=
""
if
(!
defined
(
$currency
));
$alpha
=
""
if
(!
defined
(
$alpha
));
$currency
=~ s/\s+$//;
if
(
$num
) {
$num
=
"0$num"
while
(
length
(
$num
) < 3);
if
(
$num
!~ /^\d\d\d+$/) {
print
"WARNING [iso]: Invalid numeric code: $currency => $num\n"
;
next
;
}
}
$alpha
=
uc
(
$alpha
);
if
(
$alpha
&&
$alpha
!~ /^[A-Z][A-Z][A-Z]$/) {
print
"WARNING [iso]: Invalid alpha code: $currency => $alpha\n"
;
next
;
}
next
if
(!
$alpha
&& !
$num
);
return
(
$alpha
,
$num
,
$currency
);
}
return
();
}
}
sub
do_script {
print
"Script codes...\n"
;
$Module
=
"Script"
;
_do_codeset(
'script'
,
'iso'
, [
'alpha'
,
'num'
], [
'alpha'
,
'num'
]);
_do_codeset(
'script'
,
'iana'
, [
'alpha'
], [
'alpha'
],
'allow'
);
do_aliases(
"script"
);
write_module(
"script"
);
}
{
my
$in
;
sub
_init_script_iso {
$in
= _read_file(
'url'
=>
$script_iso_url
,
'as_list'
=> 1,
'type'
=>
'text'
,
'chop'
=> 1,
);
}
sub
_read_script_iso {
while
(
@$in
) {
my
$line
=
shift
(
@$in
);
next
if
(!
$line
||
$line
=~ /^\043/);
my
(
$alpha
,
$num
,
$script
) =
split
(/;/,
$line
);
return
(
$alpha
,
$num
,
$script
);
}
return
();
}
}
{
my
$in
;
sub
_init_script_iana {
$in
= _read_file(
'url'
=>
$script_iana_url
,
'as_list'
=> 1,
);
shift
(
@$in
)
until
(
$$in
[0] eq
'%%'
);
}
sub
_read_script_iana {
while
(1) {
my
%entry
= _iana_entry(
$in
,
'script'
);
last
if
(!
%entry
);
my
(
@script
,
$alpha
);
$alpha
=
$entry
{
'Subtag'
};
foreach
my
$script
(@{
$entry
{
'Description'
} }) {
push
(
@script
,
$script
);
}
return
(
$alpha
,
@script
);
}
return
();
}
}
sub
do_langext {
print
"LangExt codes...\n"
;
$Module
=
"LangExt"
;
_do_codeset(
'langext'
,
'iana'
, [
'alpha'
], [
'alpha'
]);
do_aliases(
"langext"
);
write_module(
"langext"
);
}
{
my
$in
;
sub
_init_langext_iana {
$in
= _read_file(
'url'
=>
$langext_iana_url
,
'as_list'
=> 1,
);
shift
(
@$in
)
until
(
$$in
[0] eq
'%%'
);
}
sub
_read_langext_iana {
while
(1) {
my
%entry
= _iana_entry(
$in
,
'extlang'
);
last
if
(!
%entry
);
my
(
@langext
,
$alpha
);
$alpha
=
$entry
{
'Subtag'
};
foreach
my
$langext
(@{
$entry
{
'Description'
} }) {
push
(
@langext
,
$langext
);
}
return
(
$alpha
,
@langext
);
}
return
();
}
}
sub
do_langvar {
print
"LangVar codes...\n"
;
$Module
=
"LangVar"
;
_do_codeset(
'langvar'
,
'iana'
, [
'alpha'
], [
'alpha'
]);
do_aliases(
"langvar"
);
write_module(
"langvar"
);
}
{
my
$in
;
sub
_init_langvar_iana {
$in
= _read_file(
'url'
=>
$langvar_iana_url
,
'as_list'
=> 1,
);
shift
(
@$in
)
until
(
$$in
[0] eq
'%%'
);
}
sub
_read_langvar_iana {
while
(1) {
my
%entry
= _iana_entry(
$in
,
'variant'
);
last
if
(!
%entry
);
my
(
@langvar
,
$alpha
);
$alpha
=
$entry
{
'Subtag'
};
foreach
my
$langvar
(@{
$entry
{
'Description'
} }) {
push
(
@langvar
,
$langvar
);
}
return
(
$alpha
,
@langvar
);
}
return
();
}
}
sub
do_langfam {
print
"LangFam codes...\n"
;
$Module
=
"LangFam"
;
_do_codeset(
'langfam'
,
'iso'
, [
'alpha'
], [
'alpha'
]);
do_aliases(
"langfam"
);
write_module(
"langfam"
);
}
{
my
$in
;
sub
_init_langfam_iso {
$in
= _read_file(
'url'
=>
$langfam_iso_url
,
'type'
=>
'html'
,
'as_list'
=> 0,
'html_strip'
=> [
qw(br p strong div)
],
'html_repl'
=> [
qw( )
],
);
my
$found
= jump_to_row(\
$in
,
"Identifier"
);
if
(!
$found
) {
die
"ERROR [iso]: language family code file format changed!\n"
;
}
}
sub
_read_langfam_iso {
while
(1) {
my
@row
= get_row(
"iso"
,\
$in
);
return
()
if
(!
@row
);
my
(
$alpha
,
$langfam
) =
@row
;
return
()
if
(
$alpha
=~ /class=
"loweralpha"
/);
if
(!
$alpha
|| !
$langfam
) {
$alpha
=
''
if
(!
$alpha
);
$langfam
=
''
if
(!
$langfam
);
print
"WARNING [iso]: Invalid langfam code: $langfam => $alpha\n"
;
next
;
}
$alpha
=
lc
(
$alpha
);
if
(
$alpha
!~ /^[a-z][a-z][a-z]$/) {
print
"WARNING [iso]: Invalid alpha code: $langfam => $alpha\n"
;
next
;
}
return
(
$alpha
,
$langfam
);
}
}
}
sub
_type_hashes {
my
(
$caller
) =
@_
;
return
(
$Data
{
$caller
}{
'alias'
});
}
sub
check_code {
my
(
$type
,
$codeset
,
$code
,
$name
,
$currID
,
$noprint
) =
@_
;
if
(
exists
$Code2ID
{
$codeset
}{
$code
}) {
return
_check_code_exists(
$type
,
$codeset
,
$code
,
$name
,
$currID
);
}
else
{
return
_check_code_new(
$type
,
$codeset
,
$code
,
$name
,
$currID
,
$noprint
);
}
}
sub
_check_code_exists {
my
(
$type
,
$codeset
,
$code
,
$name
,
$currID
) =
@_
;
my
$oldID
=
$Code2ID
{
$codeset
}{
$code
}[0];
if
(
$currID
!=
$oldID
) {
print
"ERROR [$type]: ID mismatch in code: [$codeset, $name, $code, $currID != $oldID ]\n"
;
return
1;
}
if
(
exists
$Alias
{
lc
(
$name
)}) {
my
$altID
=
$Alias
{
lc
(
$name
)}[0];
if
(
$currID
!=
$altID
) {
print
"ERROR [$type]: ID mismatch: [$codeset, $name, $code, $currID != $altID ]\n"
;
return
1;
}
}
else
{
push
@{
$ID2Names
{
$currID
} },
$name
;
my
$i
= $
$Alias
{
lc
(
$name
)} = [
$currID
,
$i
];
}
return
0;
}
sub
_check_code_new {
my
(
$type
,
$codeset
,
$code
,
$name
,
$newID
,
$noprint
) =
@_
;
print
"INFO [$type]: New code: $codeset [$code] => $name\n"
unless
(
$noprint
);
my
$i
;
if
(
exists
$Alias
{
lc
(
$name
)}) {
$i
=
$Alias
{
lc
(
$name
)}[1];
}
else
{
push
@{
$ID2Names
{
$newID
} },
$name
;
$i
= $
$Alias
{
lc
(
$name
)} = [
$newID
,
$i
];
}
$ID2Code
{
$codeset
}{
$newID
} =
$code
;
$Code2ID
{
$codeset
}{
$code
} = [
$newID
,
$i
];
return
0;
}
sub
_get_ID {
my
(
$op
,
$type
,
$name
,
$no_create
) =
@_
;
my
$type_alias
= _type_hashes(
$op
);
my
(
$currID
,
$i
,
$t
);
if
(
exists
$Alias
{
lc
(
$name
)}) {
(
$currID
,
$i
) = @{
$Alias
{
lc
(
$name
)} };
$t
=
"same"
;
}
elsif
(
exists
$$type_alias
{
$name
}) {
my
$c
=
$$type_alias
{
$name
};
if
(!
exists
$Alias
{
lc
(
$c
)}) {
print
"WARNING [$op,$type]: alias referenced before it is defined: $name => $c\n"
;
return
(1);
}
$currID
=
$Alias
{
lc
(
$c
)}[0];
push
@{
$ID2Names
{
$currID
} },
$name
;
$i
= $
$Alias
{
lc
(
$name
)} = [
$currID
,
$i
];
$t
=
"alias"
;
}
else
{
if
(
$no_create
) {
return
(0,-1,-1,
"new"
);
}
$currID
=
$ID
++;
$i
= 0;
$ID2Names
{
$currID
} = [
$name
];
$Alias
{
lc
(
$name
)} = [
$currID
,
$i
];
$t
=
"new"
;
}
return
(0,
$currID
,
$i
,
$t
);
}
sub
_get_ID_new {
my
(
$type
,
$src
,
$second
,
$allow
,
$codes
,
$names
) =
@_
;
my
(
$id
,
$subid
) = (
''
,
''
);
NAME:
foreach
my
$name
(
@$names
) {
if
(
exists
$Alias
{
lc
(
$name
)}) {
my
$i
=
$Alias
{
lc
(
$name
)}[0];
if
(
$id
&&
$i
ne
$id
) {
print
"WARNING [$type,$src]: "
.
"name refers to multiple elements: $name => $id,$i\n"
;
return
(1);
}
$id
=
$i
;
next
NAME;
}
next
NAME
if
(
$id
|| !
$second
||
$allow
);
if
(!
exists
$Data
{
$type
}{
$src
}{
'new'
}{
$name
} &&
!
exists
$Data
{
$type
}{
$src
}{
'orig'
}{
'name'
}) {
print
"WARNING [$type,$src]: "
.
"new name not allowed: $name\n"
;
return
(1);
}
}
foreach
my
$codeset
(
keys
%$codes
) {
my
$code
=
$$codes
{
$codeset
};
if
(
exists
$Code2ID
{
$codeset
}{
$code
}) {
my
(
$i
,
$s
) = @{
$Code2ID
{
$codeset
}{
$code
} };
if
(
$id
&&
$i
ne
$id
) {
print
"WARNING [$type,$src,$codeset]: "
.
"code refers to multiple elements: $code => $id,$i\n"
;
return
(1);
}
(
$id
,
$subid
) = (
$i
,
$s
);
}
}
if
(
$id
) {
my
$name
=
$$names
[0];
if
(
exists
$Alias
{
lc
(
$name
)}) {
$subid
=
$Alias
{
lc
(
$name
)}[1];
}
else
{
push
@{
$ID2Names
{
$id
} },
$name
;
$subid
= $
$Alias
{
lc
(
$name
)} = [
$id
,
$subid
];
}
foreach
$name
(
@$names
) {
if
(!
exists
$Alias
{
lc
(
$name
)}) {
push
@{
$ID2Names
{
$id
} },
$name
;
my
$s
= $
$Alias
{
lc
(
$name
)} = [
$id
,
$s
];
}
}
}
if
(!
$id
) {
$id
=
$ID
++;
$subid
= 0;
$ID2Names
{
$id
} = [
@$names
];
my
$sid
=
$subid
;
foreach
my
$name
(
@$names
) {
$Alias
{
lc
(
$name
)} = [
$id
,
$sid
++ ];
}
}
return
(0,
$id
,
$subid
);
}
sub
do_aliases {
my
(
$caller
) =
@_
;
my
(
$type_alias
) = _type_hashes(
$caller
);
foreach
my
$alias
(
keys
%$type_alias
) {
my
$type
=
$$type_alias
{
$alias
};
next
if
(
exists
$Alias
{
lc
(
$type
)} &&
exists
$Alias
{
lc
(
$alias
)});
if
(!
exists
$Alias
{
lc
(
$type
)} &&
!
exists
$Alias
{
lc
(
$alias
)}) {
print
"WARNING: unused type in alias list: $type\n"
;
print
"WARNING: unused type in alias list: $alias\n"
;
next
;
}
my
(
$typeID
);
if
(
exists
$Alias
{
lc
(
$type
)}) {
$typeID
=
$Alias
{
lc
(
$type
)}[0];
$type
=
$alias
;
}
else
{
$typeID
=
$Alias
{
lc
(
$alias
)}[0];
}
push
@{
$ID2Names
{
$typeID
} },
$type
;
my
$i
= $
$Alias
{
lc
(
$type
)} = [
$typeID
,
$i
];
}
}
sub
write_module {
my
(
$type
) =
@_
;
my
(
%hashes
) = (
"id2names"
=>
"ID2Names"
,
"alias2id"
=>
"Alias"
,
"code2id"
=>
"Code2ID"
,
"id2code"
=>
"ID2Code"
);
my
$file
=
"$ModDir/${Module}_Codes.pm"
;
my
$out
= new IO::File;
$out
->
open
(
">$file"
);
binmode
$out
,
":encoding(UTF-8)"
;
my
$timestamp
= `date`;
chomp
(
$timestamp
);
print
$out
"
package
Locale::Codes::${Module}_Codes;
require
5.006;
our
(\
$VERSION
);
\
$VERSION
=
'3.81'
;
\
$Locale::Codes::Data
{
'$type'
}{
'id'
} =
'$ID'
;
";
foreach
my
$h
(
qw(id2names alias2id code2id id2code)
) {
my
$hash
=
$hashes
{
$h
};
print
$out
"\$Locale::Codes::Data{'$type'}{'$h'} = {\n"
;
_write_hash(
$out
,
$hash
);
print
$out
"};\n\n"
;
}
print
$out
"1;\n"
;
$out
->
close
();
}
sub
_write_hash {
my
(
$out
,
$hashname
) =
@_
;
no
strict
'refs'
;
my
%hash
=
%$hashname
;
_write_subhash(
$out
,3,\
%hash
);
}
sub
_write_subhash {
my
(
$out
,
$indent
,
$hashref
) =
@_
;
my
%hash
=
%$hashref
;
my
$ind
=
" "
x
$indent
;
foreach
my
$key
(
sort
keys
%hash
) {
my
$val
=
$hash
{
$key
};
if
(
ref
(
$val
) eq
"HASH"
) {
print
$out
"${ind}q($key) => {\n"
;
_write_subhash(
$out
,
$indent
+3,
$val
);
print
$out
"${ind} },\n"
;
}
elsif
(
ref
(
$val
) eq
"ARRAY"
) {
print
$out
"${ind}q($key) => [\n"
;
_write_sublist(
$out
,
$indent
+3,
$val
);
print
$out
"${ind} ],\n"
;
}
else
{
print
$out
"${ind}q($key) => q($val),\n"
;
}
}
}
sub
_write_sublist {
my
(
$out
,
$indent
,
$listref
) =
@_
;
my
@list
=
@$listref
;
my
$ind
=
" "
x
$indent
;
foreach
my
$val
(
@list
) {
if
(
ref
(
$val
) eq
"HASH"
) {
print
$out
"${ind}{\n"
;
_write_subhash(
$out
,
$indent
+3,
$val
);
print
$out
"${ind}},\n"
;
}
elsif
(
ref
(
$val
) eq
"ARRAY"
) {
print
$out
"${ind}[\n"
;
_write_sublist(
$out
,
$indent
+3,
$val
);
print
$out
"${ind}],\n"
;
}
else
{
print
$out
"${ind}q($val),\n"
;
}
}
}
sub
_read_file {
my
(
%opts
) =
@_
;
my
$file
;
if
(
exists
$opts
{
'local'
}) {
$file
=
$opts
{
'local'
};
}
else
{
$file
= (
caller
(1))[3];
$file
=~ s/main:://;
}
my
$type
=
$opts
{
'type'
};
$type
=
'text'
if
(!
$type
);
my
$file2
=
''
;
if
(
$type
eq
'html'
) {
$file
.=
".htm"
;
}
elsif
(
$type
eq
'xls'
) {
$file
.=
".xls"
;
}
elsif
(
$type
eq
'xlsx'
) {
$file
.=
".xlsx"
;
}
elsif
(
$type
eq
'zip'
) {
$file2
=
"$file.txt"
;
$file
.=
".zip"
;
}
elsif
(
$type
eq
'csv'
) {
$file
.=
".csv"
;
}
else
{
$file
.=
".txt"
;
}
if
(
$opts
{
'manual'
}) {
while
(! -f
$file
) {
my
$inst
=
$opts
{
'inst'
};
print
$inst
,
"\n"
;
print
"Put the data into the file:\n"
;
print
" $file\n"
;
print
"Strip out any leading/trailing blank lines.\n\n"
;
print
"Press any key to continue...\n"
;
my
$c
= getone();
}
}
else
{
my
$url
=
$opts
{
'url'
};
system
(
"wget -N -q --no-check-certificate -O $file '$url'"
);
}
my
(
@in
);
if
(
$type
eq
'xls'
) {
my
$csv
=
$file
;
$csv
=~ s/.xls/.csv/;
my
$cmd
=
"xls2csv -x $file -c $csv"
;
system
(
$cmd
);
@in
= _read_file_lines(
$csv
,
%opts
);
if
(
$opts
{
'head'
}) {
my
$head
=
$opts
{
'head'
};
while
(
$in
[0] !~ /
$head
/) {
shift
(
@in
);
}
}
my
$n
= _csv_count_columns(
$in
[0]);
if
(
$opts
{
'join'
}) {
my
@tmp
;
LINE:
while
(
@in
) {
my
$line
=
shift
(
@in
);
while
(1) {
my
$nn
= _csv_count_columns(
$line
);
if
(
$nn
==
$n
) {
push
(
@tmp
,
$line
);
next
LINE;
}
elsif
(
$nn
>
$n
) {
print
"ERROR: Invalid line skipped:\n$line\n"
;
next
LINE;
}
else
{
$line
.=
" "
.
shift
(
@in
);
next
;
}
}
}
@in
=
@tmp
;
}
my
$in
= Text::CSV::Slurp->load(
string
=>
join
(
"\n"
,
@in
));
@in
=
@$in
;
$opts
{
'as_list'
} = 1;
}
elsif
(
$type
eq
'xlsx'
) {
my
$excel
= Spreadsheet::XLSX->new(
$file
);
foreach
my
$sheet
(@{
$excel
->{Worksheet}}) {
my
$name
=
$sheet
->{Name};
next
if
(
$opts
{
'sheet'
} &&
$opts
{
'sheet'
} ne
$name
);
$sheet
->{MaxRow} ||=
$sheet
->{MinRow};
foreach
my
$row
(
$sheet
->{MinRow} ..
$sheet
->{MaxRow}) {
$sheet
->{MaxCol} ||=
$sheet
->{MinCol};
my
@row
= ();
foreach
my
$col
(
$sheet
->{MinCol} ..
$sheet
->{MaxCol}) {
my
$cell
=
$sheet
->{Cells}[
$row
][
$col
];
my
$val
=
$cell
->{Val}
if
(
$cell
);
$val
=
''
if
(!
defined
$val
);
push
(
@row
,
"\"$val\""
);
}
push
(
@in
,
join
(
','
,
@row
) .
"\n"
);
}
}
}
elsif
(
$type
eq
'csv'
) {
my
%o
;
foreach
my
$opt
(
qw(sep_char decode_utf8)
) {
if
(
exists
$opts
{
$opt
}) {
$o
{
$opt
} =
$opts
{
$opt
};
}
}
$o
{
'decode_utf8'
} = 0;
@in
= _read_file_lines(
$file
,
%opts
);
my
$in
= Text::CSV::Slurp->load(
string
=>
join
(
"\n"
,
@in
),
%o
);
@in
=
@$in
;
$opts
{
'as_list'
} = 1;
}
elsif
(
$type
eq
'zip'
) {
my
$zip
= Archive::Zip->new(
$file
);
my
@file
=
grep
/
$opts
{
'file'
}/,
$zip
->memberNames();
my
$flag
=
$zip
->extractMember(
$file
[0],
$file2
);
if
(!
defined
(
$flag
)) {
die
"ERROR [iso]: zip file changed format\n"
;
}
@in
= _read_file_lines(
$file2
,
%opts
);
}
else
{
@in
= _read_file_lines(
$file
,
%opts
);
}
if
(
$opts
{
'as_list'
}) {
return
\
@in
;
}
else
{
return
join
(
" "
,
@in
);
}
}
sub
_read_file_lines {
my
(
$file
,
%opts
) =
@_
;
my
@in
;
@in
= `cat
$file
`;
chomp
(
@in
);
chop
(
@in
)
if
(
$opts
{
'chop'
});
if
(
$opts
{
'html_strip'
} ||
$opts
{
'html_repl'
}) {
my
$in
=
join
(
"\n"
,
@in
);
strip_tags(\
$in
,@{
$opts
{
'html_strip'
} })
if
(
$opts
{
'html_strip'
});
if
(
$opts
{
'html_repl'
}) {
foreach
my
$repl
(@{
$opts
{
'html_repl'
} }) {
if
(
ref
(
$repl
)) {
$in
=~ s/
$repl
/ /sg;
}
else
{
$in
=~ s/\Q
$repl
\E/ /sg;
}
}
$in
=~ s/\s+/ /sg;
}
@in
=
split
(
"\n"
,
$in
);
}
return
@in
;
}
sub
_csv_count_columns {
my
(
$line
) =
@_
;
my
$c
= 0;
while
(
$line
) {
if
(
$line
=~ /^"/) {
$line
=~ s/^
".*?($|"
)//;
}
else
{
$line
=~ s/^[^,]*//;
}
$c
++
if
(
$line
=~ s/^,//);
}
return
$c
+1;
}
{
my
$second
;
sub
_do_codeset {
my
(
$type
,
$src
,
$codesets
,
$stdcodesets
,
$allow
) =
@_
;
$allow
= 0
if
(!
$allow
);
if
(!
defined
$second
) {
$second
= 0;
}
else
{
$second
= 1;
}
my
%std
=
map
{
$_
,1 }
@$stdcodesets
;
no
strict
'refs'
;
my
$func
=
"_init_${type}_${src}"
;
&$func
();
$func
=
"_read_${type}_${src}"
;
ELE:
while
(1) {
my
@ele
=
&$func
();
last
if
(!
@ele
);
my
(
%codes
,
@names
);
foreach
my
$codeset
(
@$codesets
) {
my
$code
=
shift
(
@ele
);
next
if
(!
defined
(
$code
) ||
$code
eq
''
||
exists
$Data
{
$type
}{
$src
}{
'ignore'
}{
$codeset
}{
$code
});
$codes
{
$codeset
} =
$code
;
}
foreach
my
$name
(
@ele
) {
if
(
$name
) {
next
ELE
if
(
exists
$Data
{
$type
}{
$src
}{
'ignore'
}{
'name'
}{
$name
});
push
(
@names
,
$name
);
}
}
next
if
(!
@names
&& !
%codes
);
if
(!
@names
) {
my
@codes
=
sort
values
(
%codes
);
print
"WARNING [$type,$src]: Codes with no name: @codes\n"
;
next
;
}
if
(!
%codes
) {
print
"WARNING [$type,$src]: Element with no codes: @names\n"
;
next
;
}
foreach
my
$codeset
(
sort
keys
%codes
) {
my
$code
=
$codes
{
$codeset
};
if
(
exists
$Data
{
$type
}{
$src
}{
'orig'
}{
$codeset
}{
$code
}) {
$codes
{
$codeset
} =
$Data
{
$type
}{
$src
}{
'orig'
}{
$codeset
}{
$code
};
}
}
my
(
%tmp
,
@tmp
);
foreach
my
$name
(
@names
) {
if
(
exists
$Data
{
$type
}{
$src
}{
'orig'
}{
'name'
}{
$name
}) {
$name
=
$Data
{
$type
}{
$src
}{
'orig'
}{
'name'
}{
$name
};
}
next
if
(
exists
$tmp
{
$name
});
$tmp
{
$name
} = 1;
push
(
@tmp
,
$name
);
}
@names
=
@tmp
;
foreach
my
$codeset
(
sort
keys
%codes
) {
my
$code
=
$codes
{
$codeset
};
_ascii_new(
$type
,
$src
,
$codeset
,
$code
);
}
foreach
my
$name
(
@names
) {
_ascii_new(
$type
,
$src
,
'name'
,
$name
);
}
my
(
$err
,
$id
,
$subid
) = _get_ID_new(
$type
,
$src
,
$second
,
$allow
,
\
%codes
,\
@names
);
next
if
(
$err
);
foreach
my
$codeset
(
keys
%codes
) {
my
$code
=
$codes
{
$codeset
};
if
(
$std
{
$codeset
}) {
$Code2ID
{
$codeset
}{
$code
} = [
$id
,
$subid
];
$ID2Code
{
$codeset
}{
$id
} =
$code
;
}
elsif
(!
exists
$Code2ID
{
$codeset
}{
$code
}) {
print
"WARNING [$type,$src,$codeset]: "
.
"new code not added from a non-standard source: $code\n"
;
}
}
}
my
@tmp
;
LINKS:
foreach
my
$links
(@{
$Data
{
$type
}{
'link'
} }) {
my
$id
;
foreach
my
$link
(
@$links
) {
if
(
exists
$Alias
{
lc
(
$link
)}) {
my
$i
=
$Alias
{
lc
(
$link
)}[0];
if
(
$id
&&
$i
!=
$id
) {
print
"WARNING [$type,$src]: "
.
"alias refers to multiple elements: $link\n"
;
next
LINKS;
}
$id
=
$i
;
}
}
if
(
$id
) {
foreach
my
$name
(
@$links
) {
if
(!
exists
$Alias
{
lc
(
$name
)}) {
push
@{
$ID2Names
{
$id
} },
$name
;
my
$subid
= $
$Alias
{
lc
(
$name
)} = [
$id
,
$subid
];
}
}
}
else
{
push
(
@tmp
,
$links
);
}
}
$Data
{
$type
}{
'link'
} = \
@tmp
;
}
}
sub
_ascii_new {
my
(
$type
,
$src
,
$key
,
$val
) =
@_
;
if
(
$val
!~ /^[[:ascii:]]*$/) {
my
$tmp
=
$val
;
$tmp
=~ s/[[:ascii:]]//g;
print
"NON-ASCII [$type,$src,$key]: '$val' [$tmp]\n"
;
}
}
sub
get_row {
my
(
$type
,
$inref
) =
@_
;
return
()
if
(
$$inref
!~ m,^\s*<
tr
,);
if
(
$$inref
!~ s,^(.*?)</
tr
[^>]*>,,) {
die
"ERROR [$type]: malformed HTML\n"
;
}
my
$row
= $1;
if
(
$row
=~ m,<table,) {
die
"ERROR [$type]: embedded table\n"
;
}
my
@row
;
while
(
$row
=~ s,(?:.*?)<(td|th)[^>]*>\s*(.*?)\s*</\1[^>]*>,,) {
my
$val
= $2;
push
(
@row
,
$val
);
}
return
@row
;
}
sub
jump_to_row {
my
(
$inref
,
$header
,
$nested
) =
@_
;
if
(
$nested
) {
my
$err
;
return
0
if
(
$$inref
!~ s,^(.*?)\Q
$header
\E(.*?)</table[^>]*>\s*</td[^>]*>\s*,,);
while
(
$$inref
=~ m,^<td,) {
$err
= strip_entry(
$inref
);
return
0
if
(
$err
);
}
return
0
if
(
$$inref
!~ s,^\s*</
tr
[^>]*>,,);
return
1;
}
if
(
$$inref
=~ s,^(.*?)\Q
$header
\E(.*?)</
tr
[^>]*>\s*(?=<
tr
),,) {
return
1;
}
else
{
return
0;
}
}
sub
jump_to_entry {
my
(
$inref
,
$value
) =
@_
;
if
(
$$inref
=~ s,(.*?)(?=<(?:td|th)[^>]*>\s*\Q
$value
\E\s*),,) {
return
1;
}
else
{
return
0;
}
}
sub
jump_to_table {
my
(
$inref
) =
@_
;
if
(
$$inref
=~ s,(.*?)(?=<table),,) {
return
1;
}
else
{
return
0;
}
}
sub
get_entry {
my
(
$inref
) =
@_
;
if
(
$$inref
=~ s,.*?<td[^>]*>\s*(.*?)\s*</td[^>]*>,,) {
return
$1;
}
return
""
;
}
sub
strip_tags {
my
(
$inref
,
@tags
) =
@_
;
foreach
my
$tag
(
@tags
) {
$$inref
=~ s,</?
$tag
[^>]*>, ,g;
}
}
sub
strip_token {
my
(
$inref
) =
@_
;
$$inref
=~ s,^\s*,,;
if
(
$$inref
=~ s,^</([^>]*)>,,) {
my
$tag
= $1;
$tag
=~ s,\s.*$,,;
return
(
'close'
,
$tag
);
}
elsif
(
$$inref
=~ s,^<([^>]*)>,,) {
my
$tag
= $1;
$tag
=~ s,\s.*$,,;
return
(
'open'
,
$tag
);
}
else
{
$$inref
=~ s,^([^<]*),,;
my
$val
= $1;
$val
=~ s,\s*$,,;
return
(
'val'
,
$val
);
}
}
sub
strip_entry {
my
(
$inref
) =
@_
;
my
(
@tag
);
while
(1) {
my
(
$op
,
$val
) = strip_token(
$inref
);
if
(
$op
eq
'open'
) {
push
(
@tag
,
$val
);
next
;
}
elsif
(
$op
eq
'close'
) {
my
$old
=
pop
(
@tag
);
if
(
$old
ne
$val
) {
return
1;
}
last
if
(!
@tag
);
}
else
{
last
if
(!
@tag
);
next
;
}
}
return
0;
}
BEGIN {
use
POSIX
qw(:termios_h)
;
my
$fd_stdin
=
fileno
(STDIN);
my
$term
= POSIX::Termios->new();
$term
->getattr(
$fd_stdin
);
my
$oterm
=
$term
->getlflag();
my
$echo
= ECHO | ECHOK | ICANON;
my
$noecho
=
$oterm
& ~
$echo
;
sub
cbreak {
$term
->setlflag(
$noecho
);
$term
->setcc(VTIME, 1);
$term
->setattr(
$fd_stdin
, TCSANOW);
}
sub
cooked {
$term
->setlflag(
$oterm
);
$term
->setcc(VTIME, 0);
$term
->setattr(
$fd_stdin
, TCSANOW);
}
sub
getone {
my
$key
=
''
;
cbreak();
sysread
(STDIN,
$key
, 1);
cooked();
return
$key
;
}
}
END { cooked() }