#!/usr/bin/env perl
our
@EXPORT_OK
=
qw{
fixstring
strsort_logiconumerically
}
;
sub
fixstring ( _ );
sub
main ( @ );
sub
strsort_logiconumerically(@);
unless
(
caller
()) {
main();
exit
(0);
}
sub
main(@) {
use
open
< :std :utf8 >;
local
@ARGV
=
@_
if
@_
;
my
@orig
= ();
while
(
my
$old
= <>) {
chomp
$old
;
my
$new
= fixstring(
$old
);
push
@orig
,
$old
;
}
say
for
strsort_logiconumerically(
@orig
);
}
sub
strsort_logiconumerically(@) {
my
$collator
= new Unicode::Collate::
upper_before_lower
=> 1,
preprocess
=> \
&fixstring
,
;
return
$collator
->
sort
(
@_
);
}
sub
fixstring(_) {
local
$_
=
shift
();
s{
( [\x{2160}-\x{2188}]+ )
}{
my
$numerals
= $1;
for
(
$numerals
) {
s{ \N{ROMAN NUMERAL ONE THOUSAND C D} }{
"M"
x 1 }xge;
s{ \N{ROMAN NUMERAL FIVE THOUSAND} }{
"M"
x 5 }xge;
s{ \N{ROMAN NUMERAL TEN THOUSAND} }{
"M"
x 10 }xge;
s{ \N{ROMAN NUMERAL REVERSED ONE HUNDRED} }{
"C"
x 1 }xge;
s{ \N{ROMAN NUMERAL SIX LATE FORM} }{
"VI"
x 1 }xge;
s{ \N{ROMAN NUMERAL FIFTY EARLY FORM} }{
"L"
x 1 }xge;
s{ \N{ROMAN NUMERAL FIFTY THOUSAND} }{
"M"
x 50 }xge;
s{ \N{ROMAN NUMERAL ONE HUNDRED THOUSAND} }{
"M"
x 100 }xge;
}
$numerals
= NFKD(
$numerals
);
Roman::isroman(
$numerals
) ? Roman::arabic(
$numerals
) :
$numerals
;
}xge;
s{ (?| \b ( [IVX]+ ) \b
| \b ( [IVX]{2,} ) \B
| \B ( [IVX]{2,} ) \b
)
}{
my
$num
= $1;
Roman::isroman(
$num
) ? Roman::arabic(
$num
) :
$num
;
}xge;
s{ (?<WHOLE>
(?:
(?! [\N{EM DASH}\N{EN DASH}] )
[\N{PLUS SIGN}\N{PLUS-MINUS SIGN}\N{MINUS-OR-PLUS SIGN}\p{Dash}]
) ?
(?: \b \d{1,3} (?: , \d{3} )+ \b
| \d+
)
)
(?:
\.
(?<FRAC> \d+ )
)?
}{
my
(
$left
,
$right
) = ($1, $2);
$left
=~ s/[\N{COMMA}\N{PLUS SIGN}\N{PLUS-MINUS SIGN}\N{MINUS-OR-PLUS SIGN}]//g;
$left
=~ s/\p{Pd}/-/g;
my
$result
;
if
(
length
$right
) {
$result
=
sprintf
(
" 000%+012d.%s "
,
$left
,
$right
);
}
else
{
$result
=
sprintf
(
" 000%+012d "
,
$left
);
}
$result
=~
tr
[\-+][\N{CYRILLIC CAPITAL LETTER SCHWA}\N{CYRILLIC CAPITAL LETTER BE}];
$result
;
}xge;
s/ \b ALPHA \b /A/xgi;
s/ \b BETA \b /B/xgi;
s/ \b GAMMA \b /G/xgi;
s/ \b DELTA \b /D/xgi;
s/ \b EPSILON \b /E/xgi;
s/ \b ZETA \b /Z/xgi;
s/ \b ETA \b /E/xgi;
s/ \b THETA \b /TH/xgi;
s/ \b IOTA \b /I/xgi;
s/ \b KAPPA \b /K/xgi;
s/ \b LAMDA \b /L/xgi;
s/ \b MU \b /M/xgi;
s/ \b NU \b /N/xgi;
s/ \b XI \b /X/xgi;
s/ \b OMICRON \b /O/xgi;
s/ \b PI \b /P/xgi;
s/ \b RHO \b /R/xgi;
s/ \b SIGMA \b /S/xgi;
s/ \b TAU \b /T/xgi;
s/ \b UPSILON \b /U/xgi;
s/ \b PHI \b /PH/xgi;
s/ \b CHI \b /CH/xgi;
s/ \b PSI \b /PS/xgi;
s/ \b OMEGA \b /O/xgi;
s/ \b ALPHA \B /A/xgi;
s/ \B ALPHA \b /A/xgi;
s/ \b BETA \B /B/xgi;
s/ \B BETA \b /B/xgi;
s/ \N{GREEK SMALL LETTER ALPHA} /a/xg;
s/ \N{GREEK SMALL LETTER BETA} /b/xg;
s/ \N{GREEK SMALL LETTER GAMMA} /g/xg;
s/ \N{GREEK SMALL LETTER DELTA} /d/xg;
s/ \N{GREEK SMALL LETTER EPSILON} /e/xg;
s/ \N{GREEK SMALL LETTER ZETA} /z/xg;
s/ \N{GREEK SMALL LETTER ETA} /e/xg;
s/ \N{GREEK SMALL LETTER THETA} /th/xg;
s/ \N{GREEK SMALL LETTER IOTA} /i/xg;
s/ \N{GREEK SMALL LETTER KAPPA} /k/xg;
s/ \N{GREEK SMALL LETTER LAMDA} /l/xg;
s/ \N{GREEK SMALL LETTER MU} /m/xg;
s/ \N{GREEK SMALL LETTER NU} /n/xg;
s/ \N{GREEK SMALL LETTER XI} /x/xg;
s/ \N{GREEK SMALL LETTER OMICRON} /o/xg;
s/ \N{GREEK SMALL LETTER PI} /p/xg;
s/ \N{GREEK SMALL LETTER RHO} /r/xg;
s/ \N{GREEK SMALL LETTER FINAL SIGMA} /s/xg;
s/ \N{GREEK SMALL LETTER SIGMA} /s/xg;
s/ \N{GREEK SMALL LETTER TAU} /t/xg;
s/ \N{GREEK SMALL LETTER UPSILON} /u/xg;
s/ \N{GREEK SMALL LETTER PHI} /ph/xg;
s/ \N{GREEK SMALL LETTER CHI} /ch/xg;
s/ \N{GREEK SMALL LETTER PSI} /ps/xg;
s/ \N{GREEK SMALL LETTER OMEGA} /o/xg;
s/ \N{GREEK CAPITAL LETTER ALPHA} /A/xg;
s/ \N{GREEK CAPITAL LETTER BETA} /B/xg;
s/ \N{GREEK CAPITAL LETTER GAMMA} /G/xg;
s/ \N{GREEK CAPITAL LETTER DELTA} /D/xg;
s/ \N{GREEK CAPITAL LETTER EPSILON} /E/xg;
s/ \N{GREEK CAPITAL LETTER ZETA} /Z/xg;
s/ \N{GREEK CAPITAL LETTER ETA} /E/xg;
s/ \N{GREEK CAPITAL LETTER THETA} /TH/xg;
s/ \N{GREEK CAPITAL LETTER IOTA} /I/xg;
s/ \N{GREEK CAPITAL LETTER KAPPA} /K/xg;
s/ \N{GREEK CAPITAL LETTER LAMDA} /L/xg;
s/ \N{GREEK CAPITAL LETTER MU} /M/xg;
s/ \N{GREEK CAPITAL LETTER NU} /N/xg;
s/ \N{GREEK CAPITAL LETTER XI} /X/xg;
s/ \N{GREEK CAPITAL LETTER OMICRON} /O/xg;
s/ \N{GREEK CAPITAL LETTER PI} /P/xg;
s/ \N{GREEK CAPITAL LETTER RHO} /R/xg;
s/ \N{GREEK CAPITAL LETTER SIGMA} /S/xg;
s/ \N{GREEK CAPITAL LETTER TAU} /T/xg;
s/ \N{GREEK CAPITAL LETTER UPSILON} /U/xg;
s/ \N{GREEK CAPITAL LETTER PHI} /PH/xg;
s/ \N{GREEK CAPITAL LETTER CHI} /CH/xg;
s/ \N{GREEK CAPITAL LETTER PSI} /PS/xg;
s/ \N{GREEK CAPITAL LETTER OMEGA} /O/xg;
return
$_
;
}
1;