our
$READLINE_PROGRESS_QUANTUM
= 25;
sub
create {
my
(
$class
,
$source
,
%options
) =
@_
;
if
(
$options
{content}) {
my
(
$dir
,
$name
,
$ext
) = pathname_split(
$source
);
$options
{source} =
$source
;
$options
{shortsource} =
"$name.$ext"
;
return
$class
->new(
$options
{content},
%options
); }
$options
{source} =
''
;
return
$class
->new(
$source
,
%options
); }
elsif
(!
defined
$source
) {
return
$class
->new(
''
,
%options
); }
else
{
my
$type
= pathname_protocol(
$source
);
my
$newclass
=
"LaTeXML::Core::Mouth::$type"
;
if
(!
$newclass
->can(
'new'
)) {
require
"LaTeXML/Core/Mouth/$type.pm"
; }
return
$newclass
->new(
$source
,
%options
); } }
sub
new {
my
(
$class
,
$string
,
%options
) =
@_
;
$string
=
q{}
unless
defined
$string
;
my
$self
=
bless
{
source
=>
$options
{source},
shortsource
=>
$options
{shortsource},
fordefinitions
=> (
$options
{fordefinitions} ? 1 : 0),
notes
=> (
$options
{notes} ? 1 : 0),
},
$class
;
$self
->openString(
$string
);
$self
->initialize;
return
$self
; }
sub
openString {
my
(
$self
,
$string
) =
@_
;
if
(
defined
$string
) {
if
(utf8::is_utf8(
$string
)) { }
elsif
(
my
$encoding
=
$STATE
->lookupValue(
'PERL_INPUT_ENCODING'
)) {
$string
= decode(
$encoding
,
$string
, Encode::FB_DEFAULT);
if
(
$string
=~ s/\x{FFFD}/ /g) {
Info(
'misdefined'
,
$encoding
,
$self
,
"input isn't valid under encoding $encoding"
); } } }
$$self
{string} =
$string
;
$$self
{buffer} = [(
defined
$string
? splitLines(
$string
) : ())];
return
; }
sub
initialize {
my
(
$self
) =
@_
;
$$self
{lineno} = 0;
$$self
{colno} = 0;
$$self
{chars} = [];
$$self
{nchars} = 0;
if
(
$$self
{notes}) {
my
$source
=
defined
(
$$self
{source}) ? (
$$self
{source} ||
'Literal String'
) :
'Anonymous String'
;
$$self
{note_message} =
"Processing "
. (
$$self
{fordefinitions} ?
"definitions"
:
"content"
)
.
" "
.
$source
;
ProgressSpinup(
$$self
{note_message}); }
if
(
$$self
{fordefinitions}) {
$$self
{saved_at_cc} =
$STATE
->lookupCatcode(
'@'
);
$$self
{SAVED_INCLUDE_COMMENTS} =
$STATE
->lookupValue(
'INCLUDE_COMMENTS'
);
$STATE
->assignCatcode(
'@'
=> CC_LETTER);
$STATE
->assignValue(
INCLUDE_COMMENTS
=> 0); }
return
; }
sub
finish {
my
(
$self
) =
@_
;
return
if
$$self
{finished};
$$self
{finished} = 1;
$$self
{buffer} = [];
$$self
{lineno} = 0;
$$self
{colno} = 0;
$$self
{chars} = [];
$$self
{nchars} = 0;
if
(
$$self
{fordefinitions}) {
$STATE
->assignCatcode(
'@'
=>
$$self
{saved_at_cc});
$STATE
->assignValue(
INCLUDE_COMMENTS
=>
$$self
{SAVED_INCLUDE_COMMENTS}); }
if
(
$$self
{notes}) {
ProgressSpindown(
$$self
{note_message}); }
return
; }
sub
splitLines {
my
(
$string
) =
@_
;
my
@lines
=
split
(/\r\n|\r|\n/s,
$string
, -1);
if
(
@lines
&&
$lines
[-1] eq
''
) {
pop
(
@lines
); }
return
@lines
; }
sub
splitChars {
my
(
$line
) =
@_
;
return
[
$line
=~ m/\X/g]; }
sub
getNextLine {
my
(
$self
) =
@_
;
return
unless
scalar
(@{
$$self
{buffer} });
my
$line
=
shift
(@{
$$self
{buffer} });
return
$line
; }
sub
hasMoreInput {
my
(
$self
) =
@_
;
return
!
$self
->isEOL ||
scalar
(@{
$$self
{buffer} }); }
sub
getNextChar {
my
(
$self
) =
@_
;
if
(
$$self
{colno} <
$$self
{nchars}) {
my
$ch
=
$$self
{chars}[
$$self
{colno}++];
my
$cc
=
$$STATE
{catcode}{
$ch
}[0] // CC_OTHER;
if
((
$cc
== CC_SUPER)
&& (
$$self
{colno} + 1 <
$$self
{nchars}) && (
$ch
eq
$$self
{chars}[
$$self
{colno}])) {
my
(
$c1
,
$c2
);
if
((
$$self
{colno} + 2 <
$$self
{nchars})
&& ((
$c1
=
$$self
{chars}[
$$self
{colno} + 1]) =~ /^[0-9a-f]$/)
&& ((
$c2
=
$$self
{chars}[
$$self
{colno} + 2]) =~ /^[0-9a-f]$/)) {
$ch
=
chr
(
hex
(
$c1
.
$c2
));
splice
(@{
$$self
{chars} },
$$self
{colno} - 1, 4,
$ch
);
$$self
{nchars} -= 3; }
else
{
my
$c
=
$$self
{chars}[
$$self
{colno} + 1];
my
$cn
=
ord
(
$c
);
$ch
=
chr
(
$cn
+ (
$cn
>= 64 ? -64 : 64));
splice
(@{
$$self
{chars} },
$$self
{colno} - 1, 3,
$ch
);
$$self
{nchars} -= 2; }
$cc
=
$STATE
->lookupCatcode(
$ch
) // CC_OTHER; }
return
(
$ch
,
$cc
); }
else
{
return
(
undef
,
undef
); } }
sub
stringify {
my
(
$self
) =
@_
;
return
"Mouth[<string>\@$$self{lineno}x$$self{colno}]"
; }
sub
getLocator {
my
(
$self
) =
@_
;
my
(
$toLine
,
$toCol
,
$fromLine
,
$fromCol
) = (
$$self
{lineno},
$$self
{colno});
my
$maxCol
= (
$$self
{nchars} ?
$$self
{nchars} - 1 : 0);
if
((
defined
$toCol
) && (
$toCol
>=
$maxCol
)) {
$fromLine
=
$toLine
;
$fromCol
= 0; }
else
{
$fromLine
=
$toLine
;
$fromCol
=
$toCol
; }
return
LaTeXML::Common::Locator->new(
$$self
{source},
$fromLine
,
$fromCol
,
$toLine
,
$toCol
); }
sub
getSource {
my
(
$self
) =
@_
;
return
$$self
{source}; }
sub
handle_escape {
my
(
$self
) =
@_
;
my
(
$ch
,
$cc
) = getNextChar(
$self
);
my
$cs
=
"\\"
.
$ch
;
if
((
defined
$cc
) && (
$cc
== CC_LETTER)) {
while
(((
$ch
,
$cc
) = getNextChar(
$self
)) &&
$ch
&& (
$cc
== CC_LETTER)) {
$cs
.=
$ch
; }
$$self
{skipping_spaces} = 1;
$$self
{colno}--
if
(
defined
$cc
) && (
$cc
!= CC_LETTER); }
return
T_CS(
$cs
); }
sub
handle_EOL {
my
(
$self
) =
@_
;
my
$token
= (
$$self
{colno} == 1
? T_CS(
'\par'
)
: (
$STATE
->lookupValue(
'PRESERVE_NEWLINES'
) ? Token(
"\n"
, CC_SPACE) : T_SPACE));
$$self
{colno} =
$$self
{nchars};
return
$token
; }
sub
handle_space {
my
(
$self
) =
@_
;
my
(
$ch
,
$cc
);
while
(((
$ch
,
$cc
) = getNextChar(
$self
)) && (
defined
$ch
) && ((
$cc
== CC_SPACE) || (
$cc
== CC_EOL))) { }
$$self
{colno}--
if
(
$$self
{colno} <=
$$self
{nchars}) && (
defined
$ch
);
return
T_SPACE; }
sub
handle_comment {
my
(
$self
) =
@_
;
my
$n
=
$$self
{colno};
$$self
{colno} =
$$self
{nchars};
my
$comment
=
join
(
''
, @{
$$self
{chars} }[
$n
..
$$self
{nchars} - 1]);
$comment
=~ s/^\s+//;
$comment
=~ s/\s+$//;
if
(
$comment
&&
$STATE
->lookupValue(
'INCLUDE_COMMENTS'
)) {
return
T_COMMENT(
$comment
); }
elsif
((
$STATE
->lookupValue(
'PRESERVE_NEWLINES'
) || 0) > 1) {
return
T_MARKER(
'EOL'
); }
else
{
return
; } }
my
%LETTER
= ();
my
%OTHER
= ();
my
%ACTIVE
= ();
my
@DISPATCH
= (
\
&handle_escape
,
sub
{ (
$_
[1] eq
'{'
? T_BEGIN : Token(
$_
[1], CC_BEGIN)) },
sub
{ (
$_
[1] eq
'}'
? T_END : Token(
$_
[1], CC_END)) },
sub
{ (
$_
[1] eq
'$'
? T_MATH : Token(
$_
[1], CC_MATH)) },
sub
{ (
$_
[1] eq
'&'
? T_ALIGN : Token(
$_
[1], CC_ALIGN)) },
\
&handle_EOL
,
sub
{ (
$_
[1] eq
'#'
? T_PARAM : Token(
$_
[1], CC_PARAM)) }, # T_PARAM
sub
{ (
$_
[1] eq
'^'
? T_SUPER : Token(
$_
[1], CC_SUPER)) },
sub
{ (
$_
[1] eq
'_'
? T_SUB : Token(
$_
[1], CC_SUB)) },
sub
{
undef
; },
\
&handle_space
,
sub
{
$LETTER
{
$_
[1] } || (
$LETTER
{
$_
[1] } = T_LETTER(
$_
[1])); },
sub
{
$OTHER
{
$_
[1] } || (
$OTHER
{
$_
[1] } = T_OTHER(
$_
[1])); },
sub
{
$ACTIVE
{
$_
[1] } || (
$ACTIVE
{
$_
[1] } = T_ACTIVE(
$_
[1])); },
\
&handle_comment
,
sub
{ T_OTHER(
$_
[1]); }
);
sub
readToken {
my
(
$self
) =
@_
;
while
(1) {
if
(
$$self
{colno} >=
$$self
{nchars}) {
$$self
{lineno}++;
$$self
{colno} = 0;
my
$line
=
$self
->getNextLine;
my
$read_mode
= ((
$STATE
->lookupValue(
'PRESERVE_NEWLINES'
) || 0) > 1);
my
$eolch
=
"\r"
;
if
(
my
$eol
=
$STATE
->lookupDefinition(T_CS(
'\endlinechar'
))) {
$eol
=
$eol
->valueOf()->valueOf;
$eolch
= ((
$eol
> 0) && (
$eol
<= 255) ?
chr
(
$eol
) :
undef
); }
if
(!
defined
$line
) {
my
$eolcc
= ((
defined
$eolch
) &&
$STATE
->lookupCatcode(
$eolch
)) // CC_OTHER;
my
$eoftoken
=
$read_mode
&& (
defined
$eolch
) && !
$$self
{at_eof} &&
$$self
{source}
&& (
$eolcc
== CC_EOL ? T_CS(
'\par'
)
: Token(
$eolch
,
$eolcc
));
$$self
{at_eof} = 1;
$$self
{chars} = [];
$$self
{nchars} = 0;
return
$eoftoken
if
$eoftoken
;
return
; }
if
(
$$self
{source}) {
$line
=~ s/ *$//s; }
$line
.=
$eolch
if
defined
$eolch
;
$$self
{chars} = splitChars(
$line
);
$$self
{nchars} =
scalar
(@{
$$self
{chars} });
while
((
$$self
{colno} <
$$self
{nchars})
&& ((
$$STATE
{catcode}{
$$self
{chars}[
$$self
{colno}] }[0] || CC_OTHER) == CC_SPACE)) {
$$self
{colno}++; }
return
T_MARKER(
'EOL'
)
if
$read_mode
&& (
$$self
{colno} >=
$$self
{nchars}) && ((!
defined
$eolch
) || (
$eolch
ne
"\r"
));
if
(((
$$self
{lineno} %
$READLINE_PROGRESS_QUANTUM
) == 0) &&
$STATE
->lookupValue(
'INCLUDE_COMMENTS'
)) {
return
T_COMMENT(
"**** "
. (
$$self
{shortsource} ||
'String'
) .
" Line $$self{lineno} ****"
); }
}
if
(
$$self
{skipping_spaces}) {
my
(
$ch
,
$cc
);
while
(((
$ch
,
$cc
) = getNextChar(
$self
)) && (
defined
$ch
) && (
$cc
== CC_SPACE)) { }
$$self
{colno}--
if
(
$$self
{colno} <=
$$self
{nchars}) && (
defined
$cc
) && (
$cc
!= CC_SPACE);
if
((
defined
$cc
) && (
$cc
== CC_EOL)) {
getNextChar(
$self
);
$$self
{colno}--
if
(
$$self
{colno} <
$$self
{nchars}); }
$$self
{skipping_spaces} = 0; }
my
(
$ch
,
$cc
) = getNextChar(
$self
);
my
$token
= (
defined
$cc
?
$DISPATCH
[
$cc
] :
undef
);
$token
=
&$token
(
$self
,
$ch
)
if
ref
$token
eq
'CODE'
;
return
$token
if
defined
$token
;
}
return
; }
sub
readTokens {
my
(
$self
) =
@_
;
my
@tokens
= ();
while
(
defined
(
my
$token
=
$self
->readToken())) {
push
(
@tokens
,
$token
); }
while
(
@tokens
&&
$tokens
[-1]->getCatcode == CC_SPACE) {
pop
(
@tokens
); }
return
Tokens(
@tokens
); }
sub
readRawLine {
my
(
$self
,
$noread
) =
@_
;
my
$line
;
if
(
$$self
{colno} <
$$self
{nchars}) {
$line
=
join
(
''
, @{
$$self
{chars} }[
$$self
{colno} ..
$$self
{nchars} - 1]);
$line
=~ s/\r$//s;
$$self
{colno} =
$$self
{nchars}; }
elsif
(
$noread
) {
$line
=
''
; }
else
{
$line
=
$self
->getNextLine;
if
(!
defined
$line
) {
$$self
{at_eof} = 1;
$$self
{chars} = [];
$$self
{nchars} = 0;
$$self
{colno} = 0; }
else
{
$line
=~ s/ *$//s;
$$self
{lineno}++;
$$self
{chars} = splitChars(
$line
);
$$self
{nchars} =
scalar
(@{
$$self
{chars} });
$$self
{colno} =
$$self
{nchars}; } }
return
$line
; }
sub
isEOL {
my
(
$self
) =
@_
;
my
$savecolno
=
$$self
{colno};
if
(
$$self
{skipping_spaces}) {
my
(
$ch
,
$cc
);
while
(((
$ch
,
$cc
) = getNextChar(
$self
)) && (
defined
$ch
) && (
$cc
== CC_SPACE)) { }
$$self
{colno}--
if
(
$$self
{colno} <=
$$self
{nchars}) && (
defined
$cc
) && (
$cc
!= CC_SPACE);
if
((
defined
$cc
) && (
$cc
== CC_EOL)) {
getNextChar(
$self
);
$$self
{colno}--
if
(
$$self
{colno} <
$$self
{nchars}); } }
my
$eol
=
$$self
{colno} >=
$$self
{nchars};
$$self
{colno} =
$savecolno
;
return
$eol
; }
1;