— |
$Text::Fy::Utils::VERSION = '0.09' ;
use 5.020;
our @ISA = qw(Exporter) ;
our @EXPORT = qw() ;
our @EXPORT_OK = qw(asciify isoify simplify commify cv_to_win cv_from_win) ;
my %cp1252_to_uni ;
for (128..159) {
$cp1252_to_uni { chr ( $_ )} = decode( 'cp1252' , chr ( $_ ));
}
my %uni_to_ascii = (
"\x{20ac}" => q{E} ,
"\x{201a}" => q{,} ,
"\x{0192}" => q{f} ,
"\x{201e}" => q{"} ,
"\x{2026}" => q{_} ,
"\x{2020}" => q{+} ,
"\x{02c6}" => q{^} ,
"\x{2030}" => q{%} ,
"\x{2039}" => q{<} ,
"\x{0152}" => q{O} ,
"\x{2018}" => q{'} ,
"\x{2019}" => q{'} ,
"\x{201c}" => q{"} ,
"\x{201d}" => q{"} ,
"\x{2022}" => q{.} ,
"\x{2013}" => q{-} ,
"\x{2014}" => q{-} ,
"\x{20dc}" => q{~} ,
"\x{203a}" => q{>} ,
"\x{203a}" => q{>} ,
"\x{0153}" => q{o} ,
"\x{00a1}" => q{!} ,
"\x{00a2}" => q{c} ,
"\x{00a3}" => q{L} ,
"\x{00a5}" => q{Y} ,
"\x{00a6}" => q{|} ,
"\x{00a9}" => q{C} ,
"\x{00aa}" => q{a} ,
"\x{00ab}" => q{"} ,
"\x{00ac}" => q{-} ,
"\x{00ad}" => q{-} ,
"\x{00ae}" => q{R} ,
"\x{00b2}" => q{2} ,
"\x{00b3}" => q{3} ,
"\x{00b4}" => q{'} ,
"\x{00b7}" => q{.} ,
"\x{00b9}" => q{1} ,
"\x{00ba}" => q{0} ,
"\x{00bb}" => q{"} ,
"\x{00bf}" => q{?} ,
"\x{00c6}" => q{A} ,
"\x{00d7}" => q{x} ,
"\x{00d8}" => q{O} ,
"\x{00df}" => q{s} ,
"\x{00e6}" => q{a} ,
"\x{00f0}" => q{d} ,
"\x{00f8}" => q{o} ,
);
my %uni_to_iso = (
"\x{2013}" => q{-} ,
"\x{2014}" => q{-} ,
"\x{2018}" => q{'} ,
"\x{2019}" => q{'} ,
"\x{201c}" => q{"} ,
"\x{201d}" => q{"} ,
"\x{2026}" => q{_} ,
);
my $convert_c2u = _make_tr(\ %cp1252_to_uni );
my $convert_u2c = _make_tr(\ %cp1252_to_uni , 'R' );
my $convert_u2a = _make_tr(\ %uni_to_ascii );
my $convert_u2i = _make_tr(\ %uni_to_iso );
sub _make_tr {
my ( $href , $rev ) = @_ ;
my $from = join '' , map { sprintf '\x{%04x}' , ord ( $_ ) } sort keys %$href ;
my $to = join '' , map { sprintf '\x{%04x}' , ord ( $href ->{ $_ }) } sort keys %$href ;
my $code = 'sub { $_[0] =~ ' .( $rev ? "tr/$to/$from/" : "tr/$from/$to/" ). '; }' ;
eval $code or die "Can't compile >$code< because $@" ;
}
sub asciify {
_aconvert( $_ [0], 0, 0);
}
sub isoify {
_aconvert( $_ [0], 1, 0);
}
sub simplify {
_aconvert( $_ [0], 2, 0);
}
sub _aconvert {
my ( $text , $loc_m , $loc_w ) = @_ ;
$convert_u2i ->( $text );
if ( $loc_w ) {
$convert_c2u ->( $text );
}
if ( $loc_m == 1) {
$text = NFC( $text ) =~ s{\p{Diacriticals}} '' xmsgr;
if ( $loc_w ) {
$convert_u2c ->( $text );
}
$text =~ s{([^\x00-\xff])}{NFD($1)}xmsge;
$text =~ s{\p{Diacriticals}} '' xmsg;
$text = encode( 'iso-8859-1' , $text );
}
else {
$convert_u2a ->( $text ) if $loc_m == 2;
$text = encode( 'iso-8859-1' , NFD( $text ) =~ s{\p{Diacriticals}} '' xmsgr);
$text =~ s{\P{ASCII}} '?' xmsg;
}
return $text ;
}
sub cv_from_win {
my ( $buf ) = @_ ;
$convert_c2u ->( $buf );
return $buf ;
}
sub cv_to_win {
my ( $buf ) = @_ ;
$convert_u2c ->( $buf );
return $buf ;
}
sub commify {
local $_ = shift ;
my ( $sep ) = @_ ;
$sep //= '_' ;
my $len = length ( $_ );
for my $i (1.. $len ) {
last unless s/^([-+]?\d+)(\d{3})/$1 $sep $2/;
}
return $_ ;
}
1;
|