The Perl and Raku Conference 2025: Greenville, South Carolina - June 27-29 Learn more

# /=====================================================================\ #
# | LaTeXML::Core::Mouth::file | #
# | Analog of TeX's Mouth: for reading from files | #
# |=====================================================================| #
# | Part of LaTeXML: | #
# | Public domain software, produced as part of work done by the | #
# | United States Government & not subject to copyright in the US. | #
# |---------------------------------------------------------------------| #
# | Bruce Miller <bruce.miller@nist.gov> #_# | #
# \=========================================================ooo==U==ooo=/ #
use strict;
use Encode;
sub new {
my ($class, $pathname, %options) = @_;
my ($dir, $name, $ext) = pathname_split($pathname);
my $self = bless { source => $pathname, shortsource => "$name.$ext" }, $class;
$$self{fordefinitions} = 1 if $options{fordefinitions};
$$self{notes} = 1 if $options{notes};
$self->openFile($pathname);
$self->initialize;
return $self; }
sub openFile {
my ($self, $pathname) = @_;
my $IN;
if (!-r $pathname) {
Fatal('I/O', 'unreadable', $self, "File $pathname is not readable."); }
elsif ((!-z $pathname) && (-B $pathname)) {
Fatal('invalid', 'binary', $self, "Input file $pathname appears to be binary."); }
open($IN, '<', $pathname)
|| Fatal('I/O', 'open', $self, "Can't open $pathname for reading", $!);
$$self{IN} = $IN;
$$self{buffer} = [];
return; }
sub finish {
my ($self) = @_;
$self->SUPER::finish;
if ($$self{IN}) {
close(\*{ $$self{IN} }); $$self{IN} = undef; }
return; }
sub hasMoreInput {
my ($self) = @_;
# ($$self{colno} < $$self{nchars}) || $$self{IN}; }
return ($$self{colno} < $$self{nchars}) || scalar(@{ $$self{buffer} }) || $$self{IN}; }
sub getNextLine {
my ($self) = @_;
if (!scalar(@{ $$self{buffer} })) {
return unless $$self{IN};
my $fh = \*{ $$self{IN} };
my $line = <$fh>;
if (!defined $line) {
close($fh); $$self{IN} = undef;
return; }
else {
push(@{ $$self{buffer} }, LaTeXML::Core::Mouth::splitLines($line)); } }
my $line = (shift(@{ $$self{buffer} }) || '');
if ($line) {
if (my $encoding = $STATE->lookupValue('PERL_INPUT_ENCODING')) {
# Note that if chars in the input cannot be decoded, they are replaced by \x{FFFD}
# I _think_ that for TeX's behaviour we actually should turn such un-decodeable chars in to space(?).
$line = decode($encoding, $line, Encode::FB_DEFAULT);
if ($line =~ s/\x{FFFD}/ /g) { # Just remove the replacement chars, and warn (or Info?)
Info('misdefined', $encoding, $self, "input isn't valid under encoding $encoding"); } } }
$line .= "\r"; # put line ending back!
if (!($$self{lineno} % 25)) {
NoteProgressDetailed("[#$$self{lineno}]"); }
return $line; }
sub stringify {
my ($self) = @_;
return "Mouth[$$self{source}\@$$self{lineno}x$$self{colno}]"; }
#======================================================================
1;
__END__
=pod
=head1 NAME
C<LaTeXML::Core::Mouth::file> - tokenize the input from a file
=head1 DESCRIPTION
A C<LaTeXML::Core::Mouth> (and subclasses) is responsible for I<tokenizing>, ie.
converting plain text and strings into L<LaTeXML::Core::Token>s according to the
current category codes (catcodes) stored in the C<LaTeXML::Core::State>.
=head1 AUTHOR
Bruce Miller <bruce.miller@nist.gov>
=head1 COPYRIGHT
Public domain software, produced as part of work done by the
United States Government & not subject to copyright in the US.
=cut