#!/usr/bin/perl
binmode
(STDOUT,
':utf8'
);
my
$opts
= {};
GetOptions(
$opts
,
'help|h|?'
,
'version|v'
,
'inputencoding|i=s'
,
'filter|f!'
,
'scheme|s=s'
,
'normalize|n!'
,
'normalization|N=s'
,
'strip_outer_braces|b!'
);
our
$VERSION
=
'0.04'
;
die
usage()
if
exists
$opts
->{
'help'
};
die
version()
if
exists
$opts
->{
'version'
};
my
$text
;
if
(
$opts
->{filter}) {
while
(<STDIN>) {
$text
.=
$_
}
croak
"Huh? There is nothing to convert!\n"
if
(!
$text
or
$text
=~ /^\s*$/)
}
else
{
my
$infile
=
$ARGV
[0] or
die
usage();
$text
= read_file(
$infile
) or croak
"Cannot read input file '$infile'"
;
}
my
%ld_opts
= ();
if
(
exists
$opts
->{inputencoding}) {
my
$encoding
=
$opts
->{inputencoding};
$text
= decode(
$encoding
,
$text
);
}
if
(
exists
$opts
->{scheme} ) {
$ld_opts
{scheme} =
$opts
->{scheme}
}
if
(
exists
$opts
->{normalize} ) {
die
"The option 'normalize' has been removed:\n\t use '--normalization 0' to suppress normalization\n"
}
if
(
exists
$opts
->{normalization} ) {
if
(!
$opts
->{normalization} or
$opts
->{normalization} eq
'undef'
) {
$ld_opts
{normalize} = 0
}
else
{
$ld_opts
{normalization} =
$opts
->{normalization}
}
}
if
(
exists
$opts
->{strip_outer_braces} ) {
$ld_opts
{strip_outer_braces} =
$opts
->{strip_outer_braces}
}
$text
= decode_utf8(
$text
);
print
latex_decode(
$text
,
%ld_opts
);
sub
version {
my
$me
=
"latex2utf8"
;
qq[
$me Version: $VERSION
\n]
}
sub
usage {
qq/
Usage: latex2utf8 infile > outfile
Options:
--help|-h Show this help message.
--version|-v Display version number.
--filter|-f Use script as a filter, using standard input instead of
an input file
--inputencoding|-i [encoding]
Encoding used in the input file (or STDIN if using
the option --filter)
--scheme|-s Decoding scheme to use (possible values are 'base',
'extra', 'full'; default = 'extra')
--normalization|-N [form]
The normalization form to use (default = 'NFC')
(with a value of 0 or undef the output will not be
normalized with Unicode::Normalize)
--strip_outer_braces|-b
Remove curly braces around characters (boolean)
(e.g. "saut{\\\'e}" => "sauté")
(See "perldoc LaTeX::Decode" for more information on the last three options.)
Example: latex2utf8 -i latin1 -s base -N NFD infile.tex > outfile.tex
\n/
}