Sponsoring The Perl Toolchain Summit 2025: Help make this important event another success Learn more

# /=====================================================================\ #
# | LaTeXML::Post::MathML | #
# | MathML generator for LaTeXML | #
# |=====================================================================| #
# | Part of LaTeXML: | #
# | Public domain software, produced as part of work done by the | #
# | United States Government & not subject to copyright in the US. | #
# |---------------------------------------------------------------------| #
# | Bruce Miller <bruce.miller@nist.gov> #_# | #
# \=========================================================ooo==U==ooo=/ #
use strict;
use List::Util qw(max);
use base qw(Exporter);
our @EXPORT = (
qw( &DefMathML ),
qw( &pmml &pmml_scriptsize &pmml_smaller
&pmml_mi &pmml_mo &pmml_mn &pmml_bigop
&pmml_punctuate &pmml_parenthesize
&pmml_infix &pmml_script &pmml_summation),
qw( &cmml &cmml_share &cmml_shared &cmml_leaf
&cmml_or_compose &cmml_synth_not &cmml_synth_complement),
);
my $mmlURI = "http://www.w3.org/1998/Math/MathML"; # CONSTANT
# ================================================================================
# LaTeXML::MathML Base-level Math Formatter for LaTeXML's Parsed Math.
# Cooperate with the parsed math structure generated by LaTeXML::Math and
# convert into presentation & content MathML.
# (See LaTeXML::Post::MathML::Presentation, LaTeXML::Post::MathML::Content)
# ================================================================================
# Some clarity to work out:
# We're trying to convert either parsed or unparsed math (sometimes intertwined).
# How clearly do these have to be separated?
# at least, sub/superscripts do not attach to anything meaningful.
# ================================================================================
#================================================================================
# Useful switches when creating a converter with special needs.
# plane1 : use Unicode plane 1 characters for math letters
# hackplane1 : use a hybrid of plane1 for script and fraktur,
# otherwise regular chars with mathvariant
# nestmath : allow m:math to be nested within m:mtext
# otherwise flatten to m:mrow sequence of m:mtext and other math bits.
# usemfenced : whether to use mfenced instead of mrow
# this would be desired for MathML-CSS profile,
# but (I think) mrow usually gets better handling in firefox,..?
#================================================================================
#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
# Top level
#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
sub preprocess {
my ($self, $doc, @nodes) = @_;
# Set up rational, modern, defaults.
$$self{hackplane1} = 0 unless $$self{hackplane1};
$$self{plane1} = 1 if $$self{hackplane1} || !defined $$self{plane1};
$$self{nestmath} = 0 unless $$self{nestmath};
$doc->adjust_latexml_doctype('MathML'); # Add MathML if LaTeXML dtd.
$doc->addNamespace($mmlURI, 'm');
return; }
# Works for pmml, cmml
sub outerWrapper {
my ($self, $doc, $xmath, $mml) = @_;
my $math = $xmath->parentNode;
my $mode = $math->getAttribute('mode') || 'inline';
my @img = ();
if (my $src = $math->getAttribute('imagesrc')) {
my $depth = $math->getAttribute('imagedepth');
@img = (altimg => $src,
'altimg-width' => $math->getAttribute('imagewidth') . 'px',
'altimg-height' => $math->getAttribute('imageheight') . 'px',
'altimg-valign' => ($depth ? -$depth . 'px' : undef)); } # Note the sign!
my @rdfa = map { my $val = ($math->getAttribute($_) || $xmath->getAttribute($_)); $val ? ($_ => $val) : () }
qw(about resource property rel rev typeof datatype content);
my $wrapped = ['m:math', { display => ($mode eq 'display' ? 'block' : 'inline'),
class => $math->getAttribute('class'),
alttext => $math->getAttribute('tex'),
#### Handy for debugging math
### title => $math->getAttribute('text'),
@rdfa,
@img },
$mml];
# Associate the generated node with the source XMath node, but don't cross-reference
$self->associateNode($wrapped, $xmath, 1);
return $wrapped; }
# Map mimetype to Official MathML encodings
our %ENCODINGS = (
'application/mathml-presentation+xml' => 'MathML-Presentation',
'application/mathml-content+xml' => 'MathML-Content',
'image/svg+xml' => 'SVG1.1',
);
sub rawIDSuffix {
return '.msvg'; }
# This works for either pmml or cmml.
sub combineParallel {
my ($self, $doc, $xmath, $primary, @secondaries) = @_;
my $id = $xmath->getAttribute('fragid');
my @alt = ();
foreach my $secondary (@secondaries) {
my $mimetype = $$secondary{mimetype} || 'unknown';
my $encoding = $ENCODINGS{$mimetype} || $mimetype;
if ($mimetype =~ /^application\/mathml/) { # Some flavor of MathML? simple case
push(@alt, ['m:annotation-xml', { encoding => $encoding },
$$secondary{xml}]); }
elsif (my $xml = $$secondary{xml}) { # Other XML? may need wrapping.
push(@alt, ['m:annotation-xml', { encoding => $encoding },
$$secondary{processor}->outerWrapper($doc, $xmath, $xml)]); }
elsif (my $src = $$secondary{src}) { # something referred to by a file? Image, maybe?
push(@alt, ['m:annotation', { encoding => $encoding, src => $src }]); }
elsif (my $string = $$secondary{string}) { # simple string data?
push(@alt, ['m:annotation', { encoding => $encoding }, $string]); }
# anything else ignore?
}
return { processor => $self, mimetype => $$primary{mimetype},
xml => (@alt ? ['m:semantics', {}, $$primary{xml}, @alt] : $$primary{xml}) }; }
# $self->convertNode($doc,$node);
# will be handled by specific Presentation or Content MathML converters; See at END.
# $self->translateNode($doc,$XMath,$style,$embedding)
# returns the translation of the XMath node (but doesn't insert it)
# $style will be either 'display' or 'text' (if relevant),
# The result should be wrapped as necessary for the result to
# be embedded within the tag $embedding.
# Eg. for parallel markup.
# See END for presentation, content and parallel versions.
sub getQName {
my ($node) = @_;
return $LaTeXML::Post::DOCUMENT->getQName($node); }
# Add a cross-reference linkage (eg. xref) onto $node to refer to the given $id.
# (presumably $id is the id of a node created by another Math Postprocessor
# from the same source XMath node that generated $node)
sub addCrossref {
my ($self, $node, $id) = @_;
$node->setAttribute(xref => $id);
return; }
#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
# General translation utilities.
#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
sub realize {
my ($node, $branch) = @_;
return (ref $node) ? $LaTeXML::Post::DOCUMENT->realizeXMNode($node, $branch) : $node; }
# For a node that is a (possibly embellished) operator,
# find the underlying role.
my %EMBELLISHING_ROLE = ( # CONSTANT
SUPERSCRIPTOP => 1, SUBSCRIPTOP => 1,
OVERACCENT => 1, UNDERACCENT => 1, MODIFIER => 1, MODIFIEROP => 1);
sub getOperatorRole {
my ($node) = @_;
if (!$node) {
return; }
elsif (my $role = $node->getAttribute('role')) {
return $role; }
elsif (getQName($node) eq 'ltx:XMApp') {
my ($op, $base) = element_nodes($node);
return ($EMBELLISHING_ROLE{ $op->getAttribute('role') || '' }
? getOperatorRole($base)
: undef); } }
#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
# Table of Translators for presentation|content
#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
# All translators take XMath XML::LibXML nodes as arguments,
# and return an intermediate form (ie. array form) of MathML to be added.
# DANGER!!! These accumulate all the DefMathML declarations.
# They're fixed after the module has been loaded, so are Daemon Safe,
# but probably should be going into (post) STATE, so that they are extensible.
# IN FACT, I'm already taking baby-steps to export DefMathML (and needed helpers),
# in order to assist these extensions, so that will bring up daemon issues pretty quick.
our $MMLTable_P = {};
our $MMLTable_C = {};
sub DefMathML {
my ($key, $presentation, $content) = @_;
$$MMLTable_P{$key} = $presentation if $presentation;
$$MMLTable_C{$key} = $content if $content;
return; }
sub lookupPresenter {
my ($mode, $role, $name) = @_;
$name = '?' unless $name;
$role = '?' unless $role;
return $$MMLTable_P{"$mode:$role:$name"} || $$MMLTable_P{"$mode:?:$name"}
|| $$MMLTable_P{"$mode:$role:?"} || $$MMLTable_P{"$mode:?:?"}; }
sub lookupContent {
my ($mode, $role, $name) = @_;
# Content-first lookup. Idea:
# If we have a meaning/name provided, we can make a csymbol.
# 1. Sometimes we can make a role-specific adaptation to the symbol, so check that first
return $name ? (($role && $$MMLTable_C{"$mode:$role:$name"}) ||
# 2. Sometimes we want to make a name-specific adaptation, check that second
$$MMLTable_C{"$mode:?:$name"} ||
# 3. If no special code, but we have a name, use a generic handler for this element
$$MMLTable_C{"$mode:?:?"}) : (
# 4. If we do not have a name, check for a role-based handler
($role && $$MMLTable_C{"$mode:$role:?"}) ||
# 5. Always use a default handler if nothing is known
$$MMLTable_C{"$mode:?:?"}); }
#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
# Various needed maps
#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
my %stylestep = ( # CONSTANT
display => 'text', text => 'script',
script => 'scriptscript', scriptscript => 'scriptscript');
my %stylesize = ( # CONSTANT
display => '100%', text => '100%',
script => '70%', scriptscript => '50%');
my %style_script_step = ( # CONSTANT
display => 'script', text => 'script',
script => 'scriptscript', scriptscript => 'scriptscript');
# Attributes for m:mstyle when changing between two mathstyles
my %stylemap = ( # CONSTANT
display => { text => { displaystyle => 'false' },
script => { displaystyle => 'false', scriptlevel => '+1' },
scriptscript => { displaystyle => 'false', scriptlevel => '+2' } },
text => { display => { displaystyle => 'true' },
script => { scriptlevel => '+1' },
scriptscript => { scriptlevel => '+2' } },
script => { display => { displaystyle => 'true', scriptlevel => '-1' },
text => { scriptlevel => '-1' },
scriptscript => { scriptlevel => '+1' } },
scriptscript => { display => { displaystyle => 'true', scriptlevel => '-2' },
text => { scriptlevel => '-2' },
script => { scriptlevel => '-1' } });
# Similar to above, but for use when there are no MathML structures used
# that NEED displaystyle to be set; presumably only to set a fontsize context
my %stylemap2 = ( # CONSTANT
display => { text => {},
script => { scriptlevel => '+1' },
scriptscript => { scriptlevel => '+2' } },
text => { display => {},
script => { scriptlevel => '+1' },
scriptscript => { scriptlevel => '+2' } },
script => { display => { displaystyle => 'true', scriptlevel => '-1' },
text => { scriptlevel => '-1' },
scriptscript => { scriptlevel => '+1' } },
scriptscript => { display => { displaystyle => 'true', scriptlevel => '-2' },
text => { scriptlevel => '-2' },
script => { scriptlevel => '-1' } });
#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
# Support functions for Presentation MathML
#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
sub pmml_top {
my ($self, $node, $style) = @_;
# These bindings reflect the style, font, size & color that we are displaying in.
# Ie. if you want to draw in that size & color, you'll get it automatically.
local $LaTeXML::MathML::STYLE = $style || 'text';
local $LaTeXML::MathML::FONT = find_inherited_attribute($node, 'font');
# $LaTeXML::MathML::FONT = undef
# if $LaTeXML::MathML::FONT && !$mathvariants{$LaTeXML::MathML::FONT}; # verify sane font
local $LaTeXML::MathML::SIZE = find_inherited_attribute($node, 'fontsize') || '100%';
local $LaTeXML::MathML::COLOR = find_inherited_attribute($node, 'color');
local $LaTeXML::MathML::BGCOLOR = find_inherited_attribute($node, 'backgroundcolor');
local $LaTeXML::MathML::OPACITY = find_inherited_attribute($node, 'opacity');
local $LaTeXML::MathML::DESIRED_SIZE = $LaTeXML::MathML::SIZE;
my @result = map { pmml($_) } element_nodes($node);
my $result = scalar(@result) > 1 ? ['m:mrow', {}, @result] : $result[0];
$self->adjust_spacing($result); # Resolve spacing TeX vs MathML
return $result; }
sub find_inherited_attribute {
my ($node, $attribute) = @_;
# Check for inherited style attributes, but stop at non-LaTeXML nodes
# [or at least be aware that the attribute may have totally different format or even meaning!]
while ($node && isElementNode($node) && ($node->namespaceURI eq 'http://dlmf.nist.gov/LaTeXML')) {
if (my $value = $node->getAttribute($attribute)) {
return $value; }
$node = $node->parentNode; }
return; }
# Convert a node that will automatically be made smaller, due to its context,
# such as in the numerator or denominator of a fraction.
sub pmml_smaller {
my ($node) = @_;
local $LaTeXML::MathML::STYLE = $stylestep{$LaTeXML::MathML::STYLE};
local $LaTeXML::MathML::SIZE = $stylesize{$LaTeXML::MathML::STYLE};
return pmml($node); }
# Convert a node that will automatically be made scriptsize,
# such as sub- or superscripts.
sub pmml_scriptsize {
my ($script) = @_;
local $LaTeXML::MathML::STYLE = $style_script_step{$LaTeXML::MathML::STYLE};
local $LaTeXML::MathML::SIZE = $stylesize{$LaTeXML::MathML::STYLE};
return ($script ? pmml($script) : ['m:mrow']); }
sub pmml {
my ($node) = @_;
return unless $node;
# [since we follow split/scan, use the fragid, not xml:id! TO SOLVE LATER]
# Do the core conversion.
# Fetch the "real" node, if this is an XMRef to one; also use the OTHER's id!
my $refr;
if (getQName($node) eq 'ltx:XMRef') {
$refr = $node;
$node = realize($node); }
# Bind any other style information from the refering node or the current node
# so that any tokens synthesized from strings recover that style.
local $LaTeXML::MathML::DESIRED_SIZE = _getattr($refr, $node, 'fontsize') || $LaTeXML::MathML::DESIRED_SIZE;
local $LaTeXML::MathML::COLOR = _getattr($refr, $node, 'color') || $LaTeXML::MathML::COLOR;
local $LaTeXML::MathML::BGCOLOR = _getattr($refr, $node, 'backgroundcolor')
|| $LaTeXML::MathML::BGCOLOR;
local $LaTeXML::MathML::OPACITY = _getattr($refr, $node, 'opacity') || $LaTeXML::MathML::OPACITY;
my $result = pmml_internal($node);
# Let customization annotate the result.
# Now possibly wrap the result in a row, enclose, etc, if needed
my $e = _getattr($refr, $node, 'enclose');
my $cl = join(' ', grep { $_ } $refr && $refr->getAttribute('class'), $node->getAttribute('class'));
# Wrap in an enclose, if there's an enclose attribute (Ugh!)
$result = ['m:menclose', { notation => $e }, $result] if $e;
# Add spacing last; outside parens & enclosing (?) But defer until spacing resolution
if (ref $result eq 'ARRAY') {
my $l = _getspace($refr, $node, 'lpadding');
my $r = _getspace($refr, $node, 'rpadding');
$$result[1]{_lpadding} = $l if $l;
$$result[1]{_rpadding} = $r if $r; }
if ($cl && ((ref $result) eq 'ARRAY')) { # Add classs, if any and different
my $ocl = $$result[1]{class};
$$result[1]{class} = (!$ocl || ($ocl eq $cl) ? $cl : "$ocl $cl"); }
# Associate the generated node with the source XMath node.
if (my $role = _getattr($refr, $node, 'role')) {
$$result[1]{_role} = $role; }
$LaTeXML::Post::MATHPROCESSOR->associateNode($result, $node);
return $result; }
sub first_element {
my ($node) = @_;
my $c = $node->firstChild;
while ($c) {
return $c if $c->nodeType == XML_ELEMENT_NODE;
$c = $c->nextSibling; }
return; }
sub _getattr {
my ($refr, $node, $attribute) = @_;
return ($refr && $refr->getAttribute($attribute)) || $node && $node->getAttribute($attribute); }
sub _getspace {
my ($refr, $node, $attribute) = @_;
my $refspace = $refr && $refr->getAttribute($attribute);
my $nodespace = $node && $node->getAttribute($attribute);
return ($refspace ? getXMHintSpacing($refspace) : 0)
+ ($nodespace ? getXMHintSpacing($nodespace) : 0); }
# Needs to be a utility somewhere...
# Return in em !!
sub getXMHintSpacing {
my ($width) = @_;
if ($width && ($width =~ /^([\d\.\+\-]+)(pt|mu|em)(\s+plus\s+[\d\.]+pt)?(\s+minus\s+[\d\.]+pt)?$/)) {
return ($2 eq 'em' ? $1 : ($2 eq 'mu' ? $1 / 18.0 : $1 / 10.0)); } # Assuming 10pt font!!!!
else {
return 0; } }
my $NBSP = pack('U', 0xA0); # CONSTANT
sub pmml_internal {
my ($node) = @_;
return ['m:merror', {}, ['m:mtext', {}, "Missing Subexpression"]] unless $node;
my $self = $LaTeXML::Post::MATHPROCESSOR;
my $doc = $LaTeXML::Post::DOCUMENT;
my $tag = getQName($node);
my $role = $node->getAttribute('role');
if ($tag eq 'ltx:XMath') {
return pmml_row(map { pmml($_) } element_nodes($node)); } # Really multiple nodes???
elsif ($tag eq 'ltx:XMDual') {
my ($content, $presentation) = element_nodes($node);
return pmml($presentation); }
elsif (($tag eq 'ltx:XMWrap') || ($tag eq 'ltx:XMArg')) { # Only present if parsing failed!
return pmml_maybe_resize($node, pmml_row(map { pmml($_) } element_nodes($node))); }
elsif ($tag eq 'ltx:XMApp') {
my ($op, @args) = element_nodes($node);
if (!$op) {
return ['m:merror', {}, ['m:mtext', {}, "Missing Operator"]]; }
elsif ($role && ($role =~ /^(FLOAT|POST)(SUB|SUPER)SCRIPT$/)) {
# (FLOAT|POST)(SUB|SUPER)SCRIPT's should NOT remain in successfully parsed math.
# This conversion creates something "presentable", though doubtfully correct (empty mi?)
# Really should mark & make a fake parsing pass to & group open/close pairs & attach scripts
return [($2 eq 'SUB' ? 'm:msub' : 'm:msup'), {}, ['m:mi'],
pmml_scriptsize($op)]; }
else {
my $rop = realize($op);
my $style =
$rop->getAttribute('mathstyle') || $op->getAttribute('mathstyle');
my $ostyle = $LaTeXML::MathML::STYLE;
local $LaTeXML::MathML::STYLE
= ($style && $stylestep{$style} ? $style : $LaTeXML::MathML::STYLE);
my $result = &{ lookupPresenter('Apply', getOperatorRole($rop), $rop->getAttribute('meaning'))
}($op, @args);
$result = pmml_maybe_resize($node, $result);
my $needsmathstyle = needsMathstyle($result);
my %styleattr = %{ ($style && ($needsmathstyle
? $stylemap{$ostyle}{$style}
: $stylemap2{$ostyle}{$style})) || {} };
$result = ['m:mstyle', {%styleattr}, $result] if keys %styleattr;
return $result; } }
elsif ($tag eq 'ltx:XMTok') {
return &{ lookupPresenter('Token', $role, $node->getAttribute('meaning')) }($node); }
elsif ($tag eq 'ltx:XMHint') {
return &{ lookupPresenter('Hint', $role, $node->getAttribute('meaning')) }($node); }
elsif ($tag eq 'ltx:XMArray') {
my $width = $node->getAttribute('width');
my $style = $node->getAttribute('mathstyle');
my $vattach = $node->getAttribute('vattach');
my $rowsep = $node->getAttribute('rowsep') || '0pt';
my $colsep = $node->getAttribute('colsep') || '5pt';
$vattach = 'axis' if !$vattach || ($vattach eq 'middle'); # roughly MathML's axis?
$vattach = 'bottom1' if $vattach && ($vattach eq 'top');
my $ostyle = $LaTeXML::MathML::STYLE;
local $LaTeXML::MathML::STYLE
= ($style && $stylestep{$style} ? $style : $LaTeXML::MathML::STYLE);
my @rows = ();
my $nrows = 0;
my $ncols = 0;
my @spanned = (); # record columns to be skipped
foreach my $row (element_nodes($node)) {
my @cols = ();
my $nc = 0;
$nrows++;
foreach my $col (element_nodes($row)) {
$nc++;
$spanned[$nc - 1]-- if $spanned[$nc - 1];
next if $spanned[$nc - 1]; # Omit this mtd, if spanned by another!
my $a = $col->getAttribute('align');
my $b = $col->getAttribute('border');
my $bc = ($b ? join(' ', map { 'ltx_border_' . $_ } split(/\s/, $b)) : $b);
my $th = $col->getAttribute('thead');
my $hc = ($th ? join(' ', map { 'ltx_th_' . $_ } split(/\s/, $th)) : '');
my $cl = $col->getAttribute('class');
my $c = ($bc ? ($hc ? "$bc $hc" : $bc) : $hc);
my $cs = $col->getAttribute('colspan');
my $rs = $col->getAttribute('rowspan');
my @cell = filter_row(map { pmml($_) } element_nodes($col));
if ($rs || $cs) { # Note following cells to be omitted from MathML
for (my $i = 0 ; $i < ($cs || 1) ; $i++) {
$spanned[$nc - 1 + $i] = ($rs || 1); } }
push(@cols, ['m:mtd', { ($a && ($a ne 'center')
? (columnalign => $a, class => 'ltx_align_' . $a) : ()),
($c || $cl ? (class => ($c && $cl ? "$c $cl" : $c || $cl)) : ()),
($cs ? (columnspan => $cs) : ()),
($rs ? (rowspan => $rs) : ()) },
@cell]); }
$ncols = $nc if $nc > $ncols;
push(@rows, ['m:mtr', {}, @cols]); }
$rowsep = undef if $nrows < 2;
$colsep = undef if $ncols < 2;
my $result = ['m:mtable', { ($vattach ne 'axis' ? (align => $vattach) : ()),
($rowsep ? (rowspacing => $rowsep) : ()),
($colsep ? (columnspacing => $colsep) : ()),
($width ? (width => $width) : ()),
# Mozilla seems to need some encouragement?
($LaTeXML::MathML::STYLE eq 'display' ? (displaystyle => 'true') : ()) },
@rows];
my $needsmathstyle = needsMathstyle($result);
my %styleattr = %{ ($style && ($needsmathstyle
? $stylemap{$ostyle}{$style}
: $stylemap2{$ostyle}{$style})) || {} };
$result = ['m:mstyle', {%styleattr}, $result] if keys %styleattr;
$result = pmml_maybe_resize($node, $result);
return $result; }
elsif ($tag eq 'ltx:XMText') {
my @c = $node->childNodes;
my $result;
if (!$$self{nestmath}) {
$result = pmml_row(map { pmml_text_aux($_) } @c); }
else {
$result = ['m:mtext', {}, $self->convertXMTextContent($doc, 1, @c)]; }
return pmml_maybe_resize($node, $result); }
elsif ($tag eq 'ltx:ERROR') {
my $cl = $node->getAttribute('class');
return ['m:merror', { class => join(' ', grep { $_ } 'ltx_ERROR', $cl) },
['m:mtext', {}, $node->textContent]]; }
else {
my $text = $node->textContent; # Spaces are significant here
$text =~ s/^\s+/$NBSP/;
$text =~ s/\s+$/$NBSP/;
return ['m:mtext', {}, $text]; } }
sub needsMathstyle {
my ($node) = @_;
if (ref $node eq 'ARRAY') {
my ($tag, $attr, @children) = @$node;
return 1 if $tag eq 'm:mfrac';
return 1 if $$attr{_largeop};
return 0 if ($tag eq 'm:mstyle') && defined $$attr{displaystyle};
return 1 if grep { needsMathstyle($_) } @children; }
return; }
# Use mpadded instead of mrow if size has been given
# And maybe this is a convenient place to deal with frames?
sub pmml_maybe_resize {
my ($node, $result) = @_;
return $result unless ref $node;
my $parent;
# There MAY be relevant attributes on a containing XMDual (if any)!!!
if ((ref $node) && ($node->nodeType == XML_ELEMENT_NODE)
&& ($parent = $node->parentNode) && (getQName($parent) eq 'ltx:XMDual')) { }
else { $parent = undef; }
my $width = $node->getAttribute('width') || ($parent && $parent->getAttribute('width'));
my $height = $node->getAttribute('height') || ($parent && $parent->getAttribute('height'));
my $depth = $node->getAttribute('depth') || ($parent && $parent->getAttribute('depth'));
my $xoff = $node->getAttribute('xoffset') || ($parent && $parent->getAttribute('xoffset'));
my $yoff = $node->getAttribute('yoffset') || ($parent && $parent->getAttribute('yoffset'));
if ($width || $height || $depth || $xoff || $yoff) {
if ($$result[0] eq 'm:mpadded') { }
elsif ($$result[0] eq 'm:mrow') {
$$result[0] = 'm:mpadded'; }
else {
$result = ['m:mpadded', {}, $result]; }
my $attr = $$result[1];
if ($yoff) { # assume this means to move the BOX? (not just the contents?)
if (!$height) {
if ($yoff =~ /^-/) { $height = $yoff; }
else { $height = "+" . $yoff; } }
if (!$depth) {
if ($yoff =~ /^-/) { $depth = $yoff; $depth =~ s/^-/+/; }
else { $depth = "-" . $yoff; } } }
$$attr{width} = $width if $width;
$$attr{height} = $height if $height;
$$attr{depth} = $depth if $depth;
$$attr{lspace} = $xoff if $xoff;
$$attr{voffset} = $yoff if $yoff; }
if (my $frame = $node->getAttribute('framed')) {
my $attr = $$result[1];
my $c = $$attr{class};
my $class = 'ltx_framed_' . $frame;
$$attr{class} = ($c ? $c . ' ' . $class : $class);
if (my $color = $node->getAttribute('framecolor')) {
my $s = $$attr{style};
my $style = 'border-color: ' . $color;
$$attr{style} = ($s ? $s . '; ' . $style : $style); } }
return $result; }
sub filter_row {
my (@items) = @_;
return grep { !$$_[1]{_ignorable}; } grep { $_ } @items; }
sub pmml_row {
my (@items) = @_;
@items = filter_row(@items);
return (scalar(@items) == 1 ? $items[0] : ['m:mrow', {}, @items]); }
sub pmml_unrow {
my ($mml) = @_;
if ($mml && (ref $mml) && ($$mml[0] eq 'm:mrow') && !scalar(keys %{ $$mml[1] })) {
my ($tag, $attr, @children) = @$mml;
return @children; }
else {
return ($mml); } }
sub pmml_parenthesize {
my ($item, $open, $close) = @_;
if (!$open && !$close) {
return $item; }
# OR, maybe we should just use mfenced?
# mfenced is better for CSS profile.
# when the insides are line-broken, induces a less traditional appearance
# (however, line-breaking inside of a mrow w/parens needs some special treatment too! scripts!!)
elsif ($$LaTeXML::Post::MATHPROCESSOR{usemfenced}) {
return ['m:mfenced', { open => ($open || ''), close => ($close || '') }, $item]; }
else {
return ['m:mrow', {},
($open ? (pmml_mo($open, role => 'OPEN')) : ()),
$item,
($close ? (pmml_mo($close, role => 'CLOSE')) : ())]; } }
# never used?
sub pmml_punctuate {
my ($separators, @items) = @_;
$separators = '' unless defined $separators;
my $lastsep = ', ';
my @arglist;
if (@items) {
push(@arglist, shift(@items));
while (@items) {
if ($separators =~ s/^(.*?)( |$)//) { # delimited by SINGLE SPACE!!
$lastsep = $1 if $1; }
push(@arglist, pmml_mo($lastsep, role => 'PUNCT'), shift(@items)); } }
return pmml_row(@arglist); }
# args are XMath nodes
# This is suitable for use as an Apply handler.
sub pmml_infix {
my ($op, @args) = @_;
$op = realize($op) if ref $op;
return ['m:mrow', {}] unless $op && @args; # ??
my @items = ();
if (scalar(@args) == 1) { # Infix with 1 arg is presumably Prefix! (aka Operator)
push(@items,
(ref $op && (getQName($op) ne 'ltx:XMTok') ? pmml($op) : pmml_mo($op, role => 'OPERATOR')),
pmml($args[0])); }
else {
## push(@items, pmml(shift(@args)));
# Experiment at flattening?
my $role = (ref $op ? getOperatorRole($op) : 'none') || 'none';
my $arg1 = realize(shift(@args));
if (($role eq 'ADDOP')
&& (getQName($arg1) eq 'ltx:XMApp')
&& ((getOperatorRole((element_nodes($arg1))[0]) || 'none') eq $role)) {
push(@items, pmml_unrow(pmml($arg1))); }
else {
push(@items, pmml($arg1)); }
while (@args) {
push(@items, (ref $op ? pmml($op) : pmml_mo($op)));
push(@items, pmml(shift(@args))); } }
return pmml_row(@items); }
my %default_token_content = (
MULOP => "\x{2062}", ADDOP => "\x{2064}", PUNCT => "\x{2063}");
# Remaps some mathvariants to a simpler subset of Unicode
my %plane1hackable = ( # CONSTANT
script => 'script',
'bold-script' => 'script',
fraktur => 'fraktur',
'bold-fraktur' => 'fraktur',
'double-struck' => 'double-struck');
# Given an item (string or token element w/attributes) and latexml attributes,
# convert the string to the appropriate unicode (possibly plane1)
# & MathML presentation attributes (mathvariant, mathsize, mathcolor, stretchy, ....).
# Attempt to only add MathML attributes that are different from the defaults (eg. OperatorDictionary for m:mo)
# $tag specifies the element that these attributes will apply to (some attributes disallowed)
# The argument %attr overrides the corresponding attributes on $item (if it's an element)
sub stylizeContent {
my ($item, $tag, %attr) = @_;
# Get the basic attributes
my $iselement = (ref $item) eq 'XML::LibXML::Element'; # Item can be string or even Text node.
my $role = $attr{role} || ($iselement && $item->getAttribute('role'));
my $font = $attr{font} || ($iselement && $item->getAttribute('font'))
|| $LaTeXML::MathML::FONT;
my $size = $attr{fontsize} || ($iselement && $item->getAttribute('fontsize'))
|| $LaTeXML::MathML::DESIRED_SIZE;
my $color = $attr{color} || ($iselement && $item->getAttribute('color'))
|| $LaTeXML::MathML::COLOR;
my $bgcolor = $attr{backgroundcolor} && ($iselement && $item->getAttribute('backgroundcolor'))
|| $LaTeXML::MathML::BGCOLOR;
my $opacity = $attr{opacity} || ($iselement && $item->getAttribute('opacity'))
|| $LaTeXML::MathML::OPACITY;
my $class = join(' ', grep { $_; } $attr{class}, ($iselement && $item->getAttribute('class')));
my $cssstyle = join('; ', grep { $_; } $attr{ccsstyle}, ($iselement && $item->getAttribute('cssstyle')));
my $variant = ($font ? unicode_mathvariant($font) : '');
my $istoken = $tag =~ /^m:(?:mi|mo|mn)$/; # mrow? well, no....
my $href = $istoken && ($attr{href} || ($iselement && $item->getAttribute('href')));
my $title = $istoken && ($attr{title} || ($iselement && $item->getAttribute('title')));
my $text = (ref $item ? $item->textContent : $item);
# Implied attributes, relevant for mo operators
my $stretchy = ((defined $attr{stretchy} ? $attr{stretchy} : ($iselement && $item->getAttribute('stretchy')))
|| 'false') eq 'true';
my $isfence = $role && ($role =~ /^(OPEN|CLOSE|MIDDLE)$/);
my $issep = $role && ($role eq 'PUNCT');
my $islargeop = $role && ($role =~ /^(SUMOP|INTOP)$/);
my $ismoveop = $role && ($role =~ /^(SUMOP|INTOP|BIGOP|LIMITOP)$/); # Not DIFFOP
my $issymm = $islargeop || ($text eq '/'); # WANTS to be symmetric
my $pos = ($iselement && $item->getAttribute('scriptpos')) || 'post';
# First figure out the actual text content to use; Adjust font, variant, class for styling
if ((!defined $text) || ($text eq '')) { # Failsafe for empty tokens?
if (my $default = $role && $default_token_content{$role}) {
$text = $default; }
else {
$text = ($iselement ? $item->getAttribute('name') || $item->getAttribute('meaning') || $role : '?');
$color = 'red'; } }
elsif (($text eq '-') && $role && (($role eq 'ADDOP') || ($role eq 'OPERATOR'))) { # MathML Core prefers unicode minus
$text = "\x{2212}"; }
# Special case for single char identifiers?
if (($tag eq 'm:mi') && ($text =~ /^.$/)) { # Single char in mi? (what about m:ci?)
if ($variant eq 'italic') { $variant = undef; } # Defaults to italic
elsif (!$variant) { $variant = 'normal'; } } # must say so explicitly.
elsif ($font && !$variant) {
Warn('unexpected', $font, undef, "Unrecognized font variant '$font'"); $variant = ''; }
# Should we map to Unicode's Plane 1 blocks for Mathematical Alphanumeric Symbols?
# Only upper & lower case latin & greek, and also numerals can be mapped.
# For each mathvariant, and for each of those 5 groups, there is a linear mapping,
# EXCEPT for chars defined before Plain 1, which already exist in lower blocks.
# Get desired mapping strategy
my $plane1 = $$LaTeXML::Post::MATHPROCESSOR{plane1};
my $plane1hack = $$LaTeXML::Post::MATHPROCESSOR{hackplane1};
my $u_variant = $variant
&& ($plane1hack ? $plane1hackable{$variant}
: ($plane1 ? $variant : undef));
my $u_text = $u_variant && unicode_convert($text, $u_variant);
if ((defined $u_text) && ($u_text ne '')) { # didn't remap the text ? Keep text & variant
$text = $u_text;
$variant = ($plane1hack && ($variant ne $u_variant) && ($variant =~ /^bold/)
? 'bold' : undef); } # Possibly keep variant bold
# Use class (css) to patchup some weak translations
if (!$font) { }
elsif ($font =~ /caligraphic/) {
# Note that this is unlikely to have effect when plane1 chars are used!
$class = ($class ? $class . ' ' : '') . 'ltx_font_mathcaligraphic'; }
elsif ($font =~ /script/) {
$class = ($class ? $class . ' ' : '') . 'ltx_font_mathscript'; }
elsif (($font =~ /fraktur/) && ($text =~ /^[\+\-\d\.]*$/)) { # fraktur number?
$class = ($class ? $class . ' ' : '') . 'ltx_font_oldstyle'; }
elsif ($font =~ /smallcaps/) {
$class = ($class ? $class . ' ' : '') . 'ltx_font_smallcaps'; }
if ($opacity) {
$cssstyle = ($cssstyle ? $cssstyle . ';' : '') . "opacity:$opacity"; }
# Now, look up any OperatorDictionary properties
my %props = ($tag eq 'm:mo' ? opdict_lookup($text, $role) : ());
# Resolve stretch & size
$stretchy = undef if ($tag ne 'm:mo'); # Only allowed on m:mo!
$size = undef if $stretchy; # Ignore size, if we're stretching.
my $stretchyhack = undef;
if ($text =~ /^[\x{2061}\x{2062}\x{2063}]*$/) { # invisible get no size or stretchiness
$stretchy = $size = undef; }
if ($size) {
if ($size eq ($LaTeXML::MathML::SIZE || 'text')) { # If default size, no need to mention.
$size = undef; }
# If requested relative size, and in script or scriptscript, we'll need to adjust the size
elsif (($size =~ /%$/) && ($LaTeXML::MathML::STYLE and $LaTeXML::MathML::STYLE =~ /script/)) {
my $req = $size; $req =~ s/%$//;
my $ex = $stylesize{$LaTeXML::MathML::STYLE}; $ex =~ s/%$//;
$size = int(100 * $req / $ex) . '%' if $ex; }
# Note that symmetric is only allowed when stretchy, which looks crappy for specific sizes
# so we'll pretend that delimiters are still stretchy, but restrict size by minsize & maxsize
# (Thanks Peter Krautzberger)
# Really we should check the Operator Dictionary to see if it's expected to be symmetric
if ($size) { # if non-default size
if ($issymm || $props{symmetric}) { # but should be symmetric?
$stretchyhack = 1;
$stretchy = 1; } # so, pretend we asked for stretchy
elsif ($tag eq 'm:mo') {
$stretchy = undef; } } } # Conversely, if size specifically set, don't stretch it!
return ($text,
($variant ? (mathvariant => $variant) : ()),
($size ? ($stretchyhack
? (minsize => $size, maxsize => $size)
: (mathsize => $size))
: ()),
($color ? (mathcolor => $color) : ()),
($bgcolor ? (mathbackground => $bgcolor) : ()),
($cssstyle ? (style => $cssstyle) : ()),
($class ? (class => $class) : ()),
($href ? (href => $href) : ()),
($title ? (title => $title) : ()),
# mo specific additions
(($stretchy xor $props{stretchy}) ? (stretchy => ($stretchy ? 'true' : 'false')) : ()),
(($isfence xor $props{fence}) ? (fence => ($isfence ? 'true' : 'false')) : ()),
(($issep xor $props{separator}) ? (separator => ($issep ? 'true' : 'false')) : ()),
(($islargeop xor $props{largeop}) ? (largeop => ($islargeop ? 'true' : 'false')) : ()),
($islargeop ? (_largeop => 1) : ()), # For needsMathStyle
($issymm && !$props{symmetric} && ($stretchy || $props{stretchy})
? (symmetric => 'true') : ()),
# If an operator has specifically located it's scripts, don't let mathml move them.
# A bit non-optimal, as Firefox is rather more generous than OpDict with movablelimits
($ismoveop && (($pos =~ /mid/) || $LaTeXML::MathML::NOMOVABLELIMITS) ? (movablelimits => 'false') : ()),
# Store spacing for later spacing resolution
(defined $props{lspace} ? (_lspace => $props{lspace}) : ()),
(defined $props{rspace} ? (_rspace => $props{rspace}) : ()),
); }
# Generally, $item in the following ought to be a string.
sub pmml_mi {
my ($item, %attr) = @_;
my ($text, %mmlattr) = stylizeContent($item, 'm:mi', %attr);
return pmml_maybe_resize($item, ['m:mi', {%mmlattr}, $text]); }
# Really, the same issues as with mi.
sub pmml_mn {
my ($item, %attr) = @_;
my ($text, %mmlattr) = stylizeContent($item, 'm:mn', %attr);
return pmml_maybe_resize($item, ['m:mn', {%mmlattr}, $text]); }
# Note that $item should be either a string, or at most, an XMTok
sub pmml_mo {
my ($item, %attr) = @_;
my ($text, %mmlattr) = stylizeContent($item, 'm:mo', %attr);
return pmml_maybe_resize($item, ['m:mo', { %mmlattr, }, $text]); }
sub pmml_bigop {
my ($op) = @_;
my $style = $op->getAttribute('mathstyle');
my %styleattr = %{ ($style && ($style ne $LaTeXML::MathML::STYLE)
&& $stylemap{$LaTeXML::MathML::STYLE}{$style}) || {} };
local $LaTeXML::MathML::STYLE
= ($style && $stylestep{$style} ? $style : $LaTeXML::MathML::STYLE);
my $mml = pmml_mo($op);
$mml = ['m:mstyle', {%styleattr}, $mml] if keys %styleattr;
return $mml; }
# Since we're keeping track of display style, under/over vs. sub/super
# We've got to keep track of MathML's desire to do it for us,
# and be prepared to override it.
# When we encounter a script, we've got to look into the possibly embellished
# operator for more scripts, and attempt to decipher (based on scriptpos attribute)
# the various positionings (pre, mid, post) and determine whether
# prescripts, multiscripts, munderover or msubsup should be used.
#
# Depending on which order the pre/post sub/super/over/under scripts appear,
# we may end up with a multiscript, or scripts applied to embellished operators.
# In particular, we may end up with postscripts applied to an object with over/under;
# OR, the other way around.
# In the latter case, we may have limits on a primed sum, in which case
# we will want to adjust the spacing so the limits center on the sum WITHOUT the primes!!!!
#
# Moreoever, the inner operator may be a largeop and need to be displaystyle;
# since mstyle doesn't nest well inside the scripts, we'll handle that too.
# We also make sure the eventual inner operator (if any) has movablelimits disabled.
sub pmml_script {
my ($op, $base, $script) = @_;
# disentangle base & pre/post-scripts
my ($innerbase, $prescripts, $midscripts, $postscripts, $emb_left, $emb_right)
= pmml_script_decipher($op, $base, $script);
# check if base needs displaystyle.
my $style = $innerbase->getAttribute('mathstyle');
if ($style && ($style ne $LaTeXML::MathML::STYLE)) {
return ['m:mstyle', { displaystyle => ($style eq 'display' ? 'true' : 'false') },
pmml_script_multi_layout(
pmml_script_mid_layout($innerbase, $midscripts, $emb_left, $emb_right),
$prescripts, $postscripts)]; }
else {
return pmml_script_multi_layout(
pmml_script_mid_layout($innerbase, $midscripts, $emb_left, $emb_right),
$prescripts, $postscripts); } }
sub pmml_script_mid_layout {
my ($base, $midscripts, $emb_left, $emb_right) = @_;
if (scalar(@$midscripts) == 0) {
##### TRY this to block an extra mstyle
{ local $LaTeXML::MathML::STYLE = $base->getAttribute('mathstyle') || $LaTeXML::MathML::STYLE;
$base = pmml($base); }
return $base; }
else {
{ local $LaTeXML::MathML::NOMOVABLELIMITS = 1;
##### TRY this to block an extra mstyle
local $LaTeXML::MathML::STYLE = $base->getAttribute('mathstyle') || $LaTeXML::MathML::STYLE;
$base = pmml($base); }
# Get the (possibly padded) over & under scripts (if any)
my $result = $base;
for my $midscript (@$midscripts) {
my $under = (!defined $$midscript[0] ? undef
: pmml_scriptsize_padded($$midscript[0], $emb_left, $emb_right));
my $over = (!defined $$midscript[1] ? undef
: pmml_scriptsize_padded($$midscript[1], $emb_left, $emb_right));
if (!defined $over) {
$result = ['m:munder', {}, $result, $under]; }
elsif (!defined $under) {
$result = ['m:mover', {}, $result, $over]; }
else {
$result = ['m:munderover', {}, $result, $under, $over]; } }
return $result; } }
# Convert an over or under script to scriptsize,
# but pad by phantoms of the base's embellishments, if any.
# This is to handle primed sums, etc....
sub pmml_scriptsize_padded {
my ($script, $emb_left, $emb_right) = @_;
return ($emb_left || $emb_right
? ['m:mrow', {},
($emb_left ? (['m:mphantom', {}, pmml_scriptsize($emb_left)]) : ()),
pmml_scriptsize($script),
($emb_right ? (['m:mphantom', {}, pmml_scriptsize($emb_right)]) : ())]
: pmml_scriptsize($script)); }
# base is already converted
sub pmml_script_multi_layout {
my ($base, $prescripts, $postscripts) = @_;
if (scalar(@$prescripts) > 0) {
return ['m:mmultiscripts', {},
$base,
(map { (pmml_scriptsize($_->[0]), pmml_scriptsize($_->[1])) } @$postscripts),
['m:mprescripts'],
(map { (pmml_scriptsize($_->[0]), pmml_scriptsize($_->[1])) } @$prescripts)]; }
elsif (scalar(@$postscripts) > 1) {
return ['m:mmultiscripts', {},
$base,
(map { (pmml_scriptsize($_->[0]), pmml_scriptsize($_->[1])) } @$postscripts)]; }
elsif (scalar(@$postscripts) == 0) {
return $base; }
elsif (!defined $$postscripts[0][1]) {
return ['m:msub', {}, $base, pmml_scriptsize($$postscripts[0][0])]; }
elsif (!defined $$postscripts[0][0]) {
return ['m:msup', {}, $base, pmml_scriptsize($$postscripts[0][1])]; }
else {
return ['m:msubsup', {}, $base,
pmml_scriptsize($$postscripts[0][0]), pmml_scriptsize($$postscripts[0][1])]; } }
# Various pre, post and even mid scripts can be wrapped around a base element.
# Try to decipher such a nesting (in the XMath element) to collect these separate groups
# They propbably shouldn't be stirred up, but appear in pre, mid,post order,
# otherwise it's not at all clear how this was expected to look; likely an upstream error?
# Nor should there be more than a single sub & single sup mid positioned script!
sub pmml_script_decipher {
my ($op, $base, $script) = @_;
my (@pres, @mids, @posts);
my ($prelevel, $midlevel, $postlevel) = (0, 0, 0);
my $sawmid = 0;
my ($emb_left, $emb_right) = (undef, undef); # embellishments of base on left & right
my ($y) = ($op->getAttribute('role') || '') =~ /^(SUPER|SUB)SCRIPTOP$/;
my ($pos, $level) = ($op->getAttribute('scriptpos') || 'post0')
=~ /^(pre|mid|post)?(\d+)?$/;
if ($pos eq 'pre') {
if ($y eq 'SUB') {
push(@pres, [$script, undef]); $prelevel = $level; }
elsif ($y eq 'SUPER') {
push(@pres, [undef, $script]); $prelevel = $level; } }
elsif ($pos eq 'mid') {
$sawmid = 1;
if ($y eq 'SUB') {
push(@mids, [$script, undef]); $midlevel = $level; }
elsif ($y eq 'SUPER') {
push(@mids, [undef, $script]); $midlevel = $level; } }
else { # else it's post
if ($y eq 'SUB') {
push(@posts, [$script, undef]); $postlevel = $level; }
elsif ($y eq 'SUPER') {
push(@posts, [undef, $script]); $postlevel = $level; } }
# Examine $base to see if there are nested scripts.
# We'll fold them together they seem to be on the appropriate levels
# Keep from having multiple scripts when $loc is stack!!!
while (1) {
$base = realize($base, 'presentation');
last unless getQName($base) eq 'ltx:XMApp';
my ($xop, $xbase, $xscript) = element_nodes($base);
last unless (getQName($xop) eq 'ltx:XMTok');
my ($ny) = ($xop->getAttribute('role') || '') =~ /^(SUPER|SUB)SCRIPTOP$/;
last unless $ny;
my ($nx, $nl) = ($xop->getAttribute('scriptpos') || 'post0')
=~ /^(pre|mid|post)?(\d+)?$/;
my $spos = ($ny eq 'SUB' ? 0 : 1);
if ($nx eq 'pre') {
push(@pres, [undef, undef]) # New empty pair (?)
if ($prelevel ne $nl) || $pres[-1][$spos];
$pres[-1][$spos] = $xscript; $prelevel = $nl; }
elsif ($nx eq 'mid') {
$sawmid = 1;
unshift(@mids, [undef, undef]) # New empty pair (?)
if ($midlevel ne $nl) || $mids[0][$spos];
$mids[0][$spos] = $xscript; $midlevel = $nl; }
else {
if ($sawmid) { # If we already saw mid-scripts (over/under); check for embellishmnt
$emb_right = $xscript;
last; }
unshift(@posts, [undef, undef]) # New empty pair (?)
if ($postlevel ne $nl) || $posts[0][$spos];
$posts[0][$spos] = $xscript; $postlevel = $nl; }
$base = $xbase;
}
return ($base, [@pres], [@mids], [@posts], $emb_left, $emb_right); }
# Handle text contents.
# Note that (currently) MathML doesn't allow math nested in m:mtext,
# nor in fact any other markup within m:mtext,
# but LaTeXML creates that, if the document is structured that way.
# Here we try to flatten the contents to strings, but keep the math as math
sub pmml_text_aux {
my ($node, %attr) = @_;
return () unless $node;
my $type = $node->nodeType;
if ($type == XML_TEXT_NODE) {
my ($string, %mmlattr) = stylizeContent($node, 'm:mtext', %attr);
$string =~ s/^\s+/$NBSP/; $string =~ s/\s+$/$NBSP/;
return ['m:mtext', {%mmlattr}, $string]; }
elsif ($type == XML_DOCUMENT_FRAG_NODE) {
return map { pmml_text_aux($_, %attr) } $node->childNodes; }
elsif ($type == XML_ELEMENT_NODE) {
if (my $font = $node->getAttribute('font')) { $attr{font} = $font; }
if (my $size = $node->getAttribute('fontsize')) { $attr{fontsize} = $size; }
if (my $color = $node->getAttribute('color')) { $attr{color} = $color; }
if (my $bgcolor = $node->getAttribute('backgroundcolor')) { $attr{backgroundcolor} = $bgcolor; }
if (my $opacity = $node->getAttribute('opacity')) { $attr{opacity} = $opacity; }
my $tag = getQName($node);
if ($tag eq 'ltx:Math') {
# [NOTE BUG!!! we're not passing through the context... (but maybe pick it up anyway)]
# If XMath still there, convert it now.
if (my $xmath = $LaTeXML::Post::DOCUMENT->findnode('ltx:XMath', $node)) {
return pmml($xmath); }
# Otherwise, may already have gotten converted ? return that
elsif (my $mml = $LaTeXML::Post::DOCUMENT->findnode('m:math', $node)) {
return $mml->childNodes; }
else { # ???
return (); } }
elsif (($tag eq 'ltx:text') # ltx:text element is fine, if we can manage the attributes!
&& (!grep { $node->hasAttribute($_) } qw(framed framecolor))) {
return pmml_maybe_resize($node, pmml_row(map { pmml_text_aux($_, %attr) } $node->childNodes)); }
else {
# We could just recurse on raw content like this, but it loses a lot...
### map(pmml_text_aux($_,%attr), $node->childNodes); }}
# So, let's just include the raw latexml markup, let the xslt convert it
# And hope that the ultimate agent can deal with it!
my ($ignore, %mmlattr) = stylizeContent($node, 'm:mtext', %attr);
delete $mmlattr{stretchy}; # not useful (not too sure
Warn('unexpected', 'nested-math', $node,
"We're getting m:math nested within an m:mtext")
if $LaTeXML::Post::DOCUMENT->findnodes('.//ltx:Math', $node);
return ['m:mtext', {%mmlattr}, $LaTeXML::Post::DOCUMENT->cloneNode($node, 'nest')]; } }
else {
return (); } }
#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
# Adjust the spacing by comparing TeX & author specified spacing to MathML's defaults
sub adjust_spacing {
my ($self, $mml) = @_;
space_walk($self, $mml);
return; }
our %tag_arg_pattern = (
(map { $_ => 'atom'; } qw(m:mi m:mo m:mn m:ms m:mtext)),
(map { $_ => 'mrow'; } qw(m:mrow m:mpadded m:msqrt m:mstyle m:merror m:mphantom m:mtd)),
## (map { $_=>'col'; } qw(m:mfrac m:mroot m:mtable m:mtr
## m:msub m:msup m:msubsup m:munder m:mover m:munderover m:mmultiscripts))
);
# Walk through the MathML tree (ASSUMED represented in ARRAY form)
# to determine spacing between visually adjacent items according to TeX rules
# and adjust if these do not correspond to MathML's rule.
# Since we're unwinding mrows with wild abandon, it is now difficult to insert a new item (eg. m:mspace),
# since we've lost the potitions of the items within the actual tree!
sub space_walk {
my ($self, $node) = @_;
return unless $node;
my ($tag, $attr, @children) = @$node;
my $type = $tag_arg_pattern{$tag} || 'other';
if ($type eq 'atom') { } # Atomic things don't need any adjustments
elsif ($type eq 'mrow') { # look at adjacent pairs in mrow-like things
my @nodes = @children;
my $prev = shift(@nodes);
while ($prev && ($$prev[0] eq 'm:mrow')) { # Unwrap mrows
unshift(@nodes, @$prev[2 .. $#$prev]);
$prev = shift(@nodes); }
space_walk($self, $prev);
while (my $next = shift(@nodes)) {
my $invisop; # Save Invisible operators as potential target for (l|r)space
if (($$next[0] eq 'm:mo') && $$next[2] && ($$next[2] =~ /^[\x{2061}\x{2062}\x{2063}]*$/)) {
$invisop = $next;
$next = shift(@nodes);
last unless $next; }
if ($$next[0] eq 'm:mrow') { # Unwrap mrows
unshift(@nodes, @$next[2 .. $#$next]);
unshift(@nodes, $invisop) if $invisop;
next; }
# Unwrap scripts, too; Note that TeX doesn't group according to scripts.
# In fact, the scripts attach even to ")" and don't affect the spacing between atoms
if ($$next[0] =~ /^m:(msup|msub|munder|mover|mmultiscripts)/) {
unshift(@nodes, $$next[2]); # Unwrap the base
foreach my $script (@$next[3 .. $#$next]) { # but recurse on the scripts
space_walk($self, $script); }
unshift(@nodes, $invisop) if $invisop;
next; }
space_walk($self, $next); # Recurse, if necessary into any non-atomic atoms.
adjust_pair($self, $prev, $next, $invisop); # Finally, Adjust the spacing between this pair.
$prev = $next; } }
else { # Anything else, just process children individually
map { space_walk($self, $_); } @children; }
return; }
# A minimal size computation; Currently only for tokens.
sub compute_size {
my ($node) = @_;
my ($tag, $attr, @children) = @$node;
my $type = $tag_arg_pattern{$tag} || 'other';
if ($type eq 'atom') {
my $font = LaTeXML::Common::Font->mathDefault();
my ($w, $h, $d) = $font->computeStringSize($children[0]);
# Heuristic (ridiculous) hack to accommodate MathJax fonts
if ($w && $$attr{class} && ($$attr{class} =~ /mathscript/)) {
$w = $w->larger(LaTeXML::Common::Dimension->new(10 * 65535)); } # Minimum of 10pt
return ($w, $h, $d); }
else {
return; } }
# This maps LaTeXML's role values to the smaller set of TeX's math atom types
our $role_atomtype = {
ATOM => 'Ord',
UNKNOWN => 'Ord',
ID => 'Ord',
NUMBER => 'Ord',
ARRAY => 'Inner',
RELOP => 'Rel',
OPEN => 'Open',
CLOSE => 'Close',
MIDDLE => 'Ord',
PUNCT => 'Punct',
VERTBAR => 'Punct',
PERIOD => 'Punct',
METARELOP => 'Rel',
MODIFIEROP => 'Rel',
MODIFIER => 'Rel',
ARROW => 'Rel',
ADDOP => 'Bin',
MULOP => 'Bin',
BINOP => 'Bin',
POSTFIX => 'Ord', #?
FUNCTION => 'Ord',
OPFUNCTION => 'Op',
TRIGFUNCTION => 'Op',
APPLYOP => 'Bin',
COMPOSEOP => 'Bin',
SUPOP => 'Ord', #?
BIGOP => 'Op',
SUMOP => 'Op',
INTOP => 'Op',
LIMITOP => 'Op',
DIFFOP => 'Ord', #? Wants to be close to arg, but preferably has \, before
OPERATOR => 'Op',
POSTSUBSCRIPT => 'Inner',
POSTSUPERSCRIPT => 'Inner',
FLOATSUPERSCRIPT => 'Inner',
FLOATSUBSCRIPT => 'Inner', };
# very rough approximation...
our $atomtype_form = {
Op => 'prefix', Bin => 'infix', Rel => 'infix', Open => 'prefix', Close => 'postfix', Punct => 'postfix' };
# Need array of left & right atom types to TeX's intended spacing
# 0 : no space; 1 : thin space; 2 : medium space; 3 thick space.
# 0, 3, 4, 5 muskip, respectively
# (use negative for spacing only in display & text style; use 0 for * which means "shouldn't happen")
# THUS, we'll need to know which style!!!
our $atompair_spacing = {
Ord => { Ord => 0, Op => 1, Bin => -2, Rel => -3, Open => 0, Close => 0, Punct => 0, Inner => -1 },
Op => { Ord => 1, Op => 1, Bin => 0, Rel => -3, Open => 0, Close => 0, Punct => 0, Inner => -1 },
Bin => { Ord => -2, Op => -2, Bin => 0, Rel => 0, Open => -2, Close => 0, Punct => 0, Inner => -2 },
Rel => { Ord => -3, Op => -3, Bin => 0, Rel => 0, Open => -3, Close => 0, Punct => 0, Inner => -3 },
Open => { Ord => 0, Op => 0, Bin => 0, Rel => 0, Open => 0, Close => 0, Punct => 0, Inner => 0 },
Close => { Ord => 0, Op => 1, Bin => -2, Rel => -3, Open => 0, Close => 0, Punct => 0, Inner => -1 },
Punct => { Ord => -1, Op => -1, Bin => 0, Rel => -1, Open => -1, Close => -1, Punct => -1, Inner => -1 },
Inner => { Ord => -1, Op => 1, Bin => -2, Rel => -3, Open => -1, Close => 0, Punct => -1, Inner => -1 },
## Inner => { Ord => 0, Op => 1, Bin => -2, Rel => -3, Open => 0, Close => 0, Punct => 0, Inner => -1 },
};
# Maybe I'm confused about Inner (\frac??), but the TeX book values seem to give larger spacing than I see in pdf
# It really seems to behave more like an Ord
# The above is NOT quite Knuth's table; but pdf looks to me like \frac[Inner] ( has NO space between!
our @tex_spacing = (0, 0.167, 0.222, 0.2778); # 0,3mu=3/18em, 4mu, 5mu
our %embellisher = map { $_ => 1; } qw(m:msub m:msup m:msubsup m:munder m:mover m:munderover);
##our %m_atomtype = ('m:mfrac'=>'Inner','m:marray'=>'Inner','m:mspace'=>'Ord');
# NOTE: I'm treating mfrac as an Ord, NOT an Inner (which seems to get too much spacing, more than even pdf!?)
our %m_atomtype = ('m:mfrac' => 'Ord', 'm:marray' => 'Inner', 'm:mspace' => 'Ord');
our $epsilon = 0.01; # ems; Ignore any differences below this
our $fudge = 0.3; # ems; don't complain if we can't adjust less than this
sub adjust_pair {
my ($self, $prev, $next, $invisop) = @_;
# Determine the spacing TeX wants between $prev & $next
# Consider the spacing on the right of $prev and left of $next.
# Combine any explict padding with TeX spacing based on math atom categories
my ($iprev, $inext) = ($prev, $next); # Inner (possibly embellished, possibly operator) for OperatorDictionary
while ($iprev && $embellisher{ $$iprev[0] }) { $iprev = $$iprev[2]; }
while ($inext && $embellisher{ $$inext[0] }) { $inext = $$inext[2]; }
my $prev_req_right = $$prev[1]{_rpadding} // 0; # Author spacing, in em
my $next_req_left = $$next[1]{_lpadding} // 0;
my $prev_role = $$iprev[1]{_role} // 'ATOM';
my $next_role = $$inext[1]{_role} // 'ATOM';
my $prev_type = $m_atomtype{ $$iprev[0] } || $$role_atomtype{$prev_role} || 'Ord';
my $next_type = $m_atomtype{ $$inext[0] } || $$role_atomtype{$next_role} || 'Ord';
my $tex_code = $$atompair_spacing{$prev_type}{$next_type} // 0;
my $tex_space = $tex_spacing[abs($tex_code)];
my $req_space = $prev_req_right + $next_req_left;
my $target = $req_space + $tex_space;
# Now find the default spacing from MathML's operator dictionary (stored by stylizeContent)
my $prev_dict_right = $$iprev[1]{_rspace} // 0;
my $next_dict_left = $$inext[1]{_lspace} // 0;
my $default = ($prev_dict_right + $next_dict_left);
my $needs_adjustment = abs($target - $default) > $epsilon; # not ignorably small?
Debug("SPACEWALK: TeX $tex_space + $req_space vs OpDict $default : "
. ($needs_adjustment ? colorizeString("ADJUSTMENT $target", 'warning') : " NO adjustment")
# . ($invisop ? " [with invisible op]" : '')
. LaTeXML::Post::MathProcessor::shownode($prev) . " $prev_type "
. LaTeXML::Post::MathProcessor::shownode($next) . " $next_type "
) if # $needs_adjustment &&
$LaTeXML::DEBUG{mathspacing};
return unless $needs_adjustment;
# Note that in MML Core, neither mspace nor mpadded can have negative width!
# It also does not support relative width using +/- prefix!
# So, the only alternative is to create an mpadded with an ADJUSTED width.
# NOTE: spacing in ems seems to be more portable.
if ($target < 0) { # Ugh. "rewrap" $prev in m:mpadded, IN PLACE!
my ($w, $h, $d) = compute_size($prev); # BAD & Wrong!
if ($w) {
my $reqw = $w->ptValue / 10.0 + $target;
$reqw = 0 if $reqw < 0;
splice(@$prev, 0, $#$prev + 1, 'm:mpadded', { width => fmt_em($reqw) }, [@$prev]); } }
elsif ($$prev[0] eq 'm:mspace') { $$prev[1]{width} = fmt_em($target + getXMHintSpacing($$prev[1]{width})); }
elsif ($$next[0] eq 'm:mspace') { $$next[1]{width} = fmt_em($target + getXMHintSpacing($$next[1]{width})); }
elsif ($invisop) { $$invisop[1]{lspace} = fmt_em($target); }
elsif (($$prev[0] eq 'm:mo') && ($$next[0] eq 'm:mo')) { # BOTH are mo, so account for each's spacing
my $p = $$prev[1]{_rspace} // 0;
my $n = $$next[1]{_lspace} // 0;
my $rem;
if (($rem = $target - $n) >= 0) {
$$prev[1]{rspace} = fmt_em($rem > $epsilon ? $rem : 0); }
elsif (($rem = $target - $p) >= 0) {
$$next[1]{lspace} = fmt_em($rem > $epsilon ? $rem : 0); }
else {
$rem = $target / 2;
$$prev[1]{rspace} = $rem . 'em' if $rem != $p;
$$next[1]{lspace} = $rem . 'em' if $rem != $n; } }
elsif ($$prev[0] eq 'm:mo') { $$prev[1]{rspace} = fmt_em($target); }
elsif ($$next[0] eq 'm:mo') { $$next[1]{lspace} = fmt_em($target); }
elsif (abs($target - $default) > $fudge) {
Info('ignored', 'spacing', undef,
"No place to set spacing to $target (default $default) between"
. LaTeXML::Post::MathProcessor::shownode($prev)
. " and " . LaTeXML::Post::MathProcessor::shownode($next)); }
return; }
sub fmt_em { return ($_[0] ? sprintf("%.3fem", $_[0]) : '0em'); }
#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
# Support functions for Content MathML
#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
sub cmml_top {
my ($self, $node) = @_;
local $LaTeXML::MathML::STYLE = 'text';
local $LaTeXML::MathML::FONT = find_inherited_attribute($node, 'font');
local $LaTeXML::MathML::SIZE = find_inherited_attribute($node, 'fontsize') || '100%';
local $LaTeXML::MathML::COLOR = find_inherited_attribute($node, 'color');
local $LaTeXML::MathML::BGCOLOR = find_inherited_attribute($node, 'backgroundcolor');
local $LaTeXML::MathML::OPACITY = find_inherited_attribute($node, 'opacity');
local $LaTeXML::MathML::DESIRED_SIZE = $LaTeXML::MathML::SIZE;
return cmml_contents($node); }
sub cmml {
my ($node) = @_;
if (getQName($node) eq 'ltx:XMRef') {
$node = realize($node); }
my $result = cmml_internal($node);
# Associate the generated node with the source XMath node.
$LaTeXML::Post::MATHPROCESSOR->associateNode($result, $node);
return $result; }
sub cmml_internal {
my ($node) = @_;
return ['m:merror', {}, ['m:mtext', {}, "Missing Subexpression"]] unless $node;
$node = realize($node) if getQName($node) eq 'ltx:XMRef';
my $tag = getQName($node);
if ($tag eq 'ltx:XMDual') {
my ($content, $presentation) = element_nodes($node);
return cmml($content); }
elsif (($tag eq 'ltx:XMWrap') || ($tag eq 'ltx:XMArg')) { # Only present if parsing failed!
return cmml_contents($node); }
elsif ($tag eq 'ltx:XMApp') {
if (my $meaning = $node->getAttribute('meaning')) {
return &{ lookupContent('Token', $node->getAttribute('role'), $meaning) }($node); }
# Experiment: If XMApp has role ID, we treat it as a "Decorated Symbol"
if (($node->getAttribute('role') || '') eq 'ID') {
return cmml_decoratedSymbol($node); }
else {
my ($op, @args) = element_nodes($node);
my $rop = $op;
if (!$op || !($rop = realize($op))) {
return ['m:merror', {}, ['m:mtext', {}, "Missing Operator"]]; }
else {
return &{ lookupContent('Apply', $rop->getAttribute('role'), $rop->getAttribute('meaning')) }($op, @args); } } }
elsif ($tag eq 'ltx:XMTok') {
return &{ lookupContent('Token', $node->getAttribute('role'), $node->getAttribute('meaning')) }($node); }
elsif ($tag eq 'ltx:XMHint') { # ????
return &{ lookupContent('Hint', $node->getAttribute('role'), $node->getAttribute('meaning')) }($node); }
elsif ($tag eq 'ltx:XMArray') {
return &{ lookupContent('Array', $node->getAttribute('role'), $node->getAttribute('meaning')) }($node); }
elsif ($tag eq 'ltx:XMText') {
return cmml_decoratedSymbol($node); }
else {
return cmml_decoratedSymbol($node); } }
# Convert the contents of a node, which normally should contain a single child.
# It may be empty (assumed to be an error),
# or contain multiple nodes (presumably not properly parsed).
# We really should use m:cerror here, but need to find appropriate csymbol cd:name
sub cmml_contents {
my ($node) = @_;
my ($item, @rest) = element_nodes($node);
if (!$item) {
return ['m:cerror', {}, ['m:csymbol', { cd => 'ambiguous' }, 'missing-subexpression']]; }
elsif (@rest) {
return cmml_unparsed($item, @rest); }
else {
return cmml($item); } }
sub cmml_unparsed {
my (@nodes) = @_;
my @results = ();
foreach my $node (@nodes) {
# Deal with random, unknown symbols, but still record association.
if ((getQName($node) eq 'ltx:XMTok')
&& (($node->getAttribute('role') || 'UNKNOWN') eq 'UNKNOWN')) {
my $result = ['m:csymbol', { cd => 'unknown' }, $node->textContent];
$LaTeXML::Post::MATHPROCESSOR->associateNode($result, $node);
push(@results, $result); }
else {
push(@results, cmml($node)); } }
return ['m:cerror', {},
['m:csymbol', { cd => 'ambiguous' }, 'fragments'],
@results]; }
# Or csymbol if there's some kind of "defining" attribute?
sub cmml_leaf {
my ($item) = @_;
if (my $meaning = (ref $item) && $item->getAttribute('meaning')) {
if (my $cd = $item->getAttribute('omcd')) {
return ['m:csymbol', { cd => $cd }, $meaning]; }
elsif (($item->getAttribute('role') || '') eq 'NUMBER') {
# special case, numbers with a meaning attribute
return ['m:cn', { type => ($meaning =~ /^[+-]?\d+$/ ? 'integer' : 'float') }, $meaning]; }
else {
return ['m:csymbol', { cd => 'latexml' }, $meaning]; } }
else {
my ($content, %mmlattr) = stylizeContent($item, 'm:ci');
if (my $mv = $mmlattr{mathvariant}) {
$content = $mv . "-" . $content; }
return ['m:ci', {}, $content]; } }
# Experimental; for an XMApp with role=ID, we treat it as a ci
# or ultimately as csymbol, if it had defining attributes,
# but we format its contents as pmml
sub cmml_decoratedSymbol {
my ($item) = @_;
# Presumably, if we're claiming this blob has "meaning", we should just get a csymbol
if (my $meaning = (ref $item) && $item->getAttribute('meaning')) {
my $cd = $item->getAttribute('omcd') || 'latexml';
return ['m:csymbol', { cd => $cd }, $meaning]; }
else { # Otherwise, wrap as needed
return ['m:ci', {}, pmml($item)]; } }
# Return the NOT of the argument.
sub cmml_not {
my ($arg) = @_;
return ['m:apply', {}, ['m:not', {}], cmml($arg)]; }
sub cmml_synth_not {
my ($op, @args) = @_;
return ['m:apply', {}, ['m:not', {}], ['m:apply', {}, [$op, {}], map { cmml($_) } @args]]; }
# Return the result of converting the arguments, but reversed.
sub cmml_synth_complement {
my ($op, @args) = @_;
return ['m:apply', {}, [$op, {}], map { cmml($_) } reverse(@args)]; }
# Given an XMath node, Make sure it has an id so it can be shared, then convert to cmml
sub cmml_shared {
my ($node) = @_;
$LaTeXML::Post::DOCUMENT->generateNodeID($node, 'sh');
return cmml($node); }
# Given an XMath node, convert to cmml share form
sub cmml_share {
my ($node) = @_;
my $fragid = $node->getAttribute('fragid');
if ($fragid) {
return ['m:share', { href => '#' . $fragid . $LaTeXML::Post::MATHPROCESSOR->IDSuffix }]; }
else { # No fragid should be error/warning or something???
Warn('expected', 'fragid', $node,
"Shared node is missing fragid");
return ['m:share']; } }
sub cmml_or_compose {
my ($operators, @args) = @_;
my @operators = @$operators;
if (scalar(@operators) == 1) {
return ['m:apply', {}, [shift(@operators), {}], map { cmml($_) } @args]; }
else {
my @parts = (['m:apply', {}, [shift(@operators), {}], map { cmml_shared($_) } @args]);
foreach my $op (@operators) {
push(@parts, ['m:apply', {}, [shift(@operators), {}], map { cmml_share($_) } @args]); }
return ['m:or', {}, @parts]; } }
#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
# Tranlators
#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
# Organized according to the MathML Content element lists.
# As a general rule, presentation conversions are based on role
# (eg "Token:role:?"), whereas content conversions are based
# on meaning or name (eg. "Token:?:meaning").
#======================================================================
# Token elements:
# cn, ci, csymbol
DefMathML("Token:?:?", \&pmml_mi, \&cmml_leaf);
DefMathML("Token:PUNCT:?", \&pmml_mo, undef);
DefMathML("Token:PERIOD:?", \&pmml_mo, undef);
DefMathML("Token:OPEN:?", \&pmml_mo, undef);
DefMathML("Token:CLOSE:?", \&pmml_mo, undef);
DefMathML("Token:MIDDLE:?", \&pmml_mo, undef);
DefMathML("Token:VERTBAR:?", \&pmml_mo, undef);
DefMathML("Token:ARROW:?", \&pmml_mo, undef);
DefMathML("Token:OVERACCENT:?", \&pmml_mo, undef);
DefMathML("Token:UNDERACCENT:?", \&pmml_mo, undef);
DefMathML("Token:NUMBER:?", \&pmml_mn, sub {
my $n = $_[0]->textContent;
return ['m:cn', { type => ($n =~ /^[+-]?\d+$/ ? 'integer' : 'float') }, $n]; });
DefMathML("Token:?:absent", sub { return ['m:mi', {}] });
# Hints normally would have disappeared during parsing
# (turned into punctuation or padding?)
# but if they survive (unparsed?) turn them into space
DefMathML('Hint:?:?', sub {
my ($node) = @_;
my $w = getXMHintSpacing($node->getAttribute('width'));
# note that we MUST return some node (not undef)
['m:mspace', { ($w ? (width => $w . 'em') : (_ignorable => 1)) }]; },
sub { undef; }); # Should Disappear from cmml!
# At presentation level, these are essentially adorned tokens.
# args are (accent,base)
# Note: accents are drawn at same size as base!
DefMathML('Apply:OVERACCENT:?', sub {
my ($accent, $base) = @_;
if (getQName($base) eq 'ltx:XMApp') {
my ($xaccent, $xbase) = element_nodes($base);
if (($xaccent->getAttribute('role') || '') eq 'UNDERACCENT') {
return ['m:munderover', { accent => 'true', accentunder => 'true' },
pmml($xbase), pmml_scriptsize($xaccent), pmml($accent)]; } }
return ['m:mover', { accent => 'true' }, pmml($base), pmml($accent)]; });
DefMathML('Apply:UNDERACCENT:?', sub {
my ($accent, $base) = @_;
if (getQName($base) eq 'ltx:XMApp') {
my ($xaccent, $xbase) = element_nodes($base);
if (($xaccent->getAttribute('role') || '') eq 'OVERACCENT') {
return ['m:munderover', { accent => 'true', accentunder => 'true' },
pmml($xbase), pmml_scriptsize($accent), pmml($xaccent)]; } }
return ['m:munder', { accentunder => 'true' }, pmml($base), pmml($accent)]; });
DefMathML('Apply:ENCLOSE:?', sub {
my ($op, $base) = @_;
my $enclosure = $op->getAttribute('enclose');
my $color = $op->getAttribute('color') || $LaTeXML::MathML::COLOR;
return ['m:menclose', { notation => $enclosure, mathcolor => $color },
($color ? ['m:mstyle', { mathcolor => $LaTeXML::MathML::COLOR || 'black' }, pmml($base)]
: pmml($base))]; });
#======================================================================
# Basic Content elements:
# apply, interval, inverse, sep, condition, declare, lambda, compose, ident,
# domain, codomain, image, domainofapplication, piecewise, piece, otherwise
DefMathML("Token:APPLYOP:?", \&pmml_mo, undef); # APPLYOP is (only) \x{2061}; FUNCTION APPLICATION
DefMathML("Token:OPERATOR:?", \&pmml_mo, undef);
DefMathML('Apply:?:?', sub {
my ($op, @args) = @_;
my $pop = pmml($op);
my $inner = $pop;
while ($inner && ($$inner[0] ne 'm:mo')) {
last unless $$inner[0] =~ /^m:(?:msub|msup|munder|mover|mprescripts)/;
$inner = $$inner[2]; }
my $is_mo = $inner && ($$inner[0] eq 'm:mo');
return ['m:mrow', {},
$pop, ($is_mo ? () : pmml_mo("\x{2061}")), # FUNCTION APPLICATION only if not an m:mo
map { pmml($_) } @args]; },
sub {
my ($op, @args) = @_;
return ['m:apply', {}, cmml($op), map { cmml($_) } @args]; });
DefMathML('Apply:COMPOSEOP:?', \&pmml_infix, undef);
DefMathML("Token:COMPOSEOP:?", \&pmml_mo, undef);
DefMathML("Token:DIFFOP:?", \&pmml_mo, undef);
# In pragmatic CMML, these are containers
DefMathML("Apply:?:open-interval", undef, sub {
my ($op, @args) = @_;
return ['m:interval', { closure => "open" }, map { cmml($_) } @args]; });
DefMathML("Apply:?:closed-interval", undef, sub {
my ($op, @args) = @_;
return ['m:interval', { closure => "closed" }, map { cmml($_) } @args]; });
DefMathML("Apply:?:closed-open-interval", undef, sub {
my ($op, @args) = @_;
return ['m:interval', { closure => "closed-open" }, map { cmml($_) } @args]; });
DefMathML("Apply:?:open-closed-interval", undef, sub {
my ($op, @args) = @_;
return ['m:interval', { closure => "open-closed" }, map { cmml($_) } @args]; });
DefMathML("Token:?:inverse", undef, sub { return ['m:inverse']; });
DefMathML("Token:?:lambda", undef, sub { return ['m:lambda']; });
DefMathML("Token:?:compose", undef, sub { return ['m:compose']; });
DefMathML("Token:?:identity", undef, sub { return ['m:ident']; });
DefMathML("Token:?:domain", undef, sub { return ['m:domain']; });
DefMathML("Token:?:codomain", undef, sub { return ['m:codomain']; });
DefMathML("Token:?:image", undef, sub { return ['m:image']; });
# m:piece, m:piecewise & m:otherwise are generated as part of a cases construct
DefMathML("Array:?:cases", undef, sub {
my ($node) = @_;
my @rows = ();
my @otherwises;
foreach my $row (element_nodes($node)) {
my @items = element_nodes($row);
my $n = scalar(@items);
if ($n == 0) { } # empty row, just skip
elsif ($n == 1) { # No condition? Perhaps it means "otherwise" ?
push(@otherwises, $items[0]); }
elsif ($items[1]->textContent eq 'otherwise') { # more robust test?
push(@otherwises, $items[0]); }
else { # Really, the 2nd cell needs to be "Looked at"; may contain "if","when" or "unless"?!?!
push(@rows, ['m:piece', {}, cmml_contents($items[0]), cmml_contents($items[1])]); } }
if (@otherwises) {
if (@otherwises > 1) {
Warn('unexpected', 'otherwise', $node,
"Cases statement seems to have multiple otherwise clauses",
@otherwises); }
push(@rows, ['m:otherwise', {}, cmml_contents($otherwises[0])]); }
return ['m:piecewise', {}, @rows]; });
#======================================================================
# Arithmetic, Algebra and Logic:
# quotient, factorial, divide, max, min, minus, plus, power, rem, times, root
# gcd, and, or, xor, not, implies, forall, exists, abs, conjugate, arg, real,
# imaginary, lcm, floor, ceiling.
# BRM:
DefMathML("Token:ADDOP:?", \&pmml_mo, undef);
DefMathML("Token:ADDOP:plus", undef, sub { return ['m:plus']; });
DefMathML("Token:ADDOP:minus", undef, sub { return ['m:minus']; });
DefMathML('Apply:ADDOP:?', \&pmml_infix, undef);
DefMathML("Token:MULOP:?", \&pmml_mo, undef);
DefMathML('Apply:MULOP:?', \&pmml_infix, undef);
# Unsatisfactory BINOP = ADDOP or MULOP ???
DefMathML("Token:BINOP:?", \&pmml_mo, undef);
DefMathML('Apply:BINOP:?', \&pmml_infix, undef);
DefMathML('Apply:FRACOP:?', sub {
my ($op, $num, $den, @more) = @_;
my $thickness = $op->getAttribute('thickness');
my $color = $op->getAttribute('color') || $LaTeXML::MathML::COLOR;
my $bevelled = grep { $_ eq 'ltx_bevelled' } split(/\s+/, $op->getAttribute('class') || '');
return ['m:mfrac', { (defined $thickness ? (linethickness => $thickness) : ()),
($color ? (mathcolor => $color) : ()),
($bevelled ? (bevelled => 'true') : ()) },
pmml_smaller($num), pmml_smaller($den)]; });
DefMathML('Apply:MODIFIEROP:?', \&pmml_infix, undef);
DefMathML("Token:MODIFIEROP:?", \&pmml_mo, undef);
DefMathML('Apply:MIDDLE:?', \&pmml_infix, undef);
DefMathML("Token:SUPOP:?", \&pmml_mo, undef);
DefMathML('Apply:SUPERSCRIPTOP:?', \&pmml_script, undef);
DefMathML('Apply:SUBSCRIPTOP:?', \&pmml_script, undef);
DefMathML('Token:SUPERSCRIPTOP:?', undef, sub {
return ['m:csymbol', { cd => 'ambiguous' }, 'superscript']; });
DefMathML('Token:SUBSCRIPTOP:?', undef, sub {
return ['m:csymbol', { cd => 'ambiguous' }, 'subscript']; });
DefMathML('Apply:POSTFIX:?', sub { # Reverse presentation, no @apply
return ['m:mrow', {}, pmml($_[1]), pmml($_[0])]; });
DefMathML("Token:POSTFIX:?", \&pmml_mo, undef);
DefMathML('Apply:?:square-root',
sub {
my $color = $_[0]->getAttribute('color') || $LaTeXML::MathML::COLOR;
return ['m:msqrt', { ($color ? (mathcolor => $color) : ()) }, pmml($_[1])]; },
sub { return ['m:apply', {}, ['m:root', {}], cmml($_[1])]; });
DefMathML('Apply:?:nth-root',
sub {
my $color = $_[0]->getAttribute('color') || $LaTeXML::MathML::COLOR;
return ['m:mroot', { ($color ? (mathcolor => $color) : ()) }, pmml($_[2]), pmml_scriptsize($_[1])]; },
sub { return ['m:apply', {}, ['m:root', {}], ['m:degree', {}, cmml($_[1])], cmml($_[2])]; });
# Note MML's distinction between quotient and divide: quotient yeilds an integer
DefMathML("Token:?:quotient", undef, sub { return ['m:quotient']; });
DefMathML("Token:?:factorial", undef, sub { return ['m:factorial']; });
DefMathML("Token:?:divide", undef, sub { return ['m:divide']; });
DefMathML("Token:?:maximum", undef, sub { return ['m:max']; });
DefMathML("Token:?:minimum", undef, sub { return ['m:min']; });
DefMathML("Token:?:minus", undef, sub { return ['m:minus']; });
DefMathML("Token:?:uminus", undef, sub { return ['m:uminus']; });
DefMathML("Token:?:plus", undef, sub { return ['m:plus']; });
DefMathML("Token:?:power", undef, sub { return ['m:power']; });
DefMathML("Token:?:remainder", undef, sub { return ['m:rem']; });
DefMathML("Token:?:times", undef, sub { return ['m:times']; });
DefMathML("Token:?:gcd", undef, sub { return ['m:gcd']; });
DefMathML("Token:?:and", undef, sub { return ['m:and']; });
DefMathML("Token:?:or", undef, sub { return ['m:or']; });
DefMathML("Token:?:xor", undef, sub { return ['m:xor']; });
DefMathML("Token:?:not", undef, sub { return ['m:not']; });
DefMathML("Token:?:implies", undef, sub { return ['m:implies']; });
DefMathML("Token:?:forall", undef, sub { return ['m:forall']; });
DefMathML("Token:?:exists", undef, sub { return ['m:exists']; });
DefMathML("Token:?:absolute-value", undef, sub { return ['m:abs']; });
DefMathML("Token:?:conjugate", undef, sub { return ['m:conjugate']; });
DefMathML("Token:?:argument", undef, sub { return ['m:arg']; });
DefMathML("Token:?:real-part", undef, sub { return ['m:real']; });
DefMathML("Token:?:imaginary-part", undef, sub { return ['m:imaginary']; });
DefMathML("Token:?:lcm", undef, sub { return ['m:lcm']; });
DefMathML("Token:?:floor", undef, sub { return ['m:floor']; });
DefMathML("Token:?:ceiling", undef, sub { return ['m:ceiling']; });
#======================================================================
# Relations:
# eq, neq, gt, lt, geq, leq, equivalent, approx, factorof
DefMathML("Token:RELOP:?", \&pmml_mo);
DefMathML("Token:?:equals", undef, sub { return ['m:eq']; });
DefMathML("Token:?:not-equals", undef, sub { return ['m:neq']; });
DefMathML("Token:?:greater-than", undef, sub { return ['m:gt']; });
DefMathML("Token:?:less-than", undef, sub { return ['m:lt']; });
DefMathML("Token:?:greater-than-or-equals", undef, sub { return ['m:geq']; });
DefMathML("Token:?:less-than-or-equals", undef, sub { return ['m:leq']; });
DefMathML("Token:?:equivalent-to", undef, sub { return ['m:equivalent']; });
DefMathML("Token:?:approximately-equals", undef, sub { return ['m:approx']; });
DefMathML("Apply:?:not-approximately-equals", undef, sub {
my ($op, @args) = @_;
return cmml_synth_not('m:approx', @args); });
DefMathML("Apply:?:less-than-or-approximately-equals", undef, sub {
my ($op, @args) = @_;
return cmml_or_compose(['m:lt', 'm:approx'], @args); });
DefMathML("Token:?:factor-of", undef, sub { return ['m:factorof']; });
DefMathML("Token:METARELOP:?", \&pmml_mo);
DefMathML('Apply:RELOP:?', \&pmml_infix);
DefMathML('Apply:METARELOP:?', \&pmml_infix);
# Top level relations
DefMathML('Apply:?:formulae', sub {
my ($op, @elements) = @_;
return pmml_row(map { pmml($_) } @elements); },
sub {
my ($op, @elements) = @_;
return ['m:apply', {},
['m:csymbol', { cd => 'ambiguous' }, 'formulae-sequence'],
map { cmml($_) } @elements];
});
DefMathML('Apply:?:multirelation',
sub {
my ($op, @elements) = @_;
# This presumes that the relational operators scattered through here
# will be recognized as such by pmml and turned into m:mo
return pmml_row(map { pmml($_) } @elements); },
sub {
my ($op, @elements) = @_;
my $lhs = cmml(shift(@elements));
return $lhs unless @elements;
my @relations = ();
while (@elements) {
my $rel = shift(@elements);
my $rhs = shift(@elements);
push(@relations, ['m:apply', {}, cmml($rel), $lhs, cmml_shared($rhs)]);
$lhs = cmml_share($rhs); }
return (scalar(@relations) > 1 ? ['m:apply', {}, ['m:and', {}], @relations] : $relations[0]); }
);
#======================================================================
# Calculus and Vector Calculus:
# int, diff, partialdiff, lowlimit, uplimit, bvar, degree,
# divergence, grad, curl, laplacian.
DefMathML("Token:INTOP:?", \&pmml_bigop);
DefMathML("Token:LIMITOP:?", \&pmml_mo);
DefMathML('Apply:ARROW:?', \&pmml_infix);
DefMathML("Token:?:integral", undef, sub { return ['m:int']; });
DefMathML("Token:?:differential", undef, sub { return ['m:diff']; });
DefMathML("Token:?:partial-differential", undef, sub { return ['m:partialdiff']; });
# lowlimit, uplimit, degree ?
DefMathML("Token:?:divergence", undef, sub { return ['m:divergence']; });
DefMathML("Token:?:gradient", undef, sub { return ['m:grad']; });
DefMathML("Token:?:curl", undef, sub { return ['m:curl']; });
DefMathML("Token:?:laplacian", undef, sub { return ['m:laplacian']; });
#======================================================================
# Theory of Sets,
# set, list, union, intersect, in, notin, subset, prsubset, notsubset, notprsubset,
# setdiff, card, cartesianproduct.
DefMathML("Apply:?:set", undef, sub {
my ($op, @args) = @_;
return ['m:set', {}, map { cmml($_) } @args]; });
DefMathML("Apply:?:list", undef, sub {
my ($op, @args) = @_;
return ['m:list', {}, map { cmml($_) } @args]; });
DefMathML("Token:?:union", undef, sub { return ['m:union']; });
DefMathML("Token:?:intersection", undef, sub { return ['m:intersect']; });
DefMathML("Token:?:element-of", undef, sub { return ['m:in']; });
DefMathML("Token:?:not-element-of", undef, sub { return ['m:notin']; });
DefMathML("Apply:?:contains", undef, sub {
my ($op, @args) = @_;
return cmml_synth_complement('m:in', @args); });
DefMathML("Apply:?:not-contains", undef, sub {
my ($op, @args) = @_;
return cmml_synth_complement('m:notin', @args); });
DefMathML("Token:?:subset-of", undef, sub { return ['m:subset']; });
DefMathML("Token:?:subset-of-or-equals", undef, sub { return ['m:subset']; });
DefMathML("Token:?:subset-of-and-not-equals", undef, sub { return ['m:prsubset']; });
DefMathML("Apply:?:superset-of", undef, sub {
my ($op, @args) = @_;
return cmml_synth_complement('m:subset', @args); });
DefMathML("Apply:?:superset-of-or-equals", undef, sub {
my ($op, @args) = @_;
return cmml_synth_complement('m:subset', @args); });
DefMathML("Apply:?:superset-of-and-not-equals", undef, sub {
my ($op, @args) = @_;
return cmml_synth_complement('m:prsubset', @args); });
DefMathML("Token:?:set-minus", undef, sub { return ['m:setdiff']; });
DefMathML("Token:?:cardinality", undef, sub { return ['m:card']; });
DefMathML("Token:?:cartesian-product", undef, sub { return ['m:cartesianproduct']; });
#======================================================================
# Sequences and Series:
# sum, product, limit, tendsto
# (but see calculus for limit too!!)
DefMathML("Token:BIGOP:?", \&pmml_bigop);
DefMathML("Token:SUMOP:?", \&pmml_bigop);
# ?? or something....
sub pmml_summation {
my ($op, $body) = @_;
return ['m:mrow', {}, pmml($op), pmml($body)]; }
DefMathML('Apply:BIGOP:?', \&pmml_summation);
DefMathML('Apply:INTOP:?', \&pmml_summation);
DefMathML('Apply:SUMOP:?', \&pmml_summation);
DefMathML('Apply:?:limit-from', sub {
my ($op, $arg, $dir) = @_;
['m:mrow', {}, pmml($arg), pmml($dir)]; });
DefMathML('Apply:?:annotated', sub {
my ($op, $var, $annotation) = @_;
return ['m:mrow', {}, pmml($var),
['m:mspace', { width => '0.3888888888888889em' }],
pmml($annotation)]; });
DefMathML("Token:?:sum", undef, sub { return ['m:sum']; });
DefMathML("Token:?:prod", undef, sub { return ['m:prod']; });
DefMathML("Token:?:limit", undef, sub { return ['m:limit']; });
DefMathML("Token:?:tends-to", undef, sub { return ['m:tendsto']; });
#======================================================================
# Elementary Classical Functions,
# exp, ln, log, sin, cos tan, sec, csc, cot, sinh, cosh, tanh, sech, csch, coth,
# arcsin, arccos, arctan, arccosh, arccot, arccoth, arccsc, arccsch, arcsec, arcsech,
# arcsinh, arctanh
DefMathML("Token:?:exponential", undef, sub { return ['m:exp']; });
DefMathML("Token:?:natural-logarithm", undef, sub { return ['m:ln']; });
DefMathML("Token:?:logarithm", undef, sub { return ['m:log']; });
DefMathML("Token:?:sine", undef, sub { return ['m:sin']; });
DefMathML("Token:?:cosine", undef, sub { return ['m:cos']; });
DefMathML("Token:?:tangent", undef, sub { return ['m:tan']; });
DefMathML("Token:?:secant", undef, sub { return ['m:sec']; });
DefMathML("Token:?:cosecant", undef, sub { return ['m:csc']; });
DefMathML("Token:?:cotangent", undef, sub { return ['m:cot']; });
DefMathML("Token:?:hyperbolic-sine", undef, sub { return ['m:sinh']; });
DefMathML("Token:?:hyperbolic-cosine", undef, sub { return ['m:cosh']; });
DefMathML("Token:?:hyperbolic-tangent", undef, sub { return ['m:tanh']; });
DefMathML("Token:?:hyperbolic-secant", undef, sub { return ['m:sech']; });
DefMathML("Token:?:hyperbolic-cosecant", undef, sub { return ['m:csch']; });
DefMathML("Token:?:hyperbolic-cotantent", undef, sub { return ['m:coth']; });
DefMathML("Token:?:inverse-sine", undef, sub { return ['m:arcsin']; });
DefMathML("Token:?:inverse-cosine", undef, sub { return ['m:arccos']; });
DefMathML("Token:?:inverse-tangent", undef, sub { return ['m:arctan']; });
DefMathML("Token:?:inverse-secant", undef, sub { return ['m:arcsec']; });
DefMathML("Token:?:inverse-cosecant", undef, sub { return ['m:arccsc']; });
DefMathML("Token:?:inverse-cotangent", undef, sub { return ['m:arccot']; });
DefMathML("Token:?:inverse-hyperbolic-sine", undef, sub { return ['m:arcsinh']; });
DefMathML("Token:?:inverse-hyperbolic-cosine", undef, sub { return ['m:arccosh']; });
DefMathML("Token:?:inverse-hyperbolic-tangent", undef, sub { return ['m:arctanh']; });
DefMathML("Token:?:inverse-hyperbolic-secant", undef, sub { return ['m:arcsech']; });
DefMathML("Token:?:inverse-hyperbolic-cosecant", undef, sub { return ['m:arccsch']; });
DefMathML("Token:?:inverse-hyperbolic-cotangent", undef, sub { return ['m:arccoth']; });
#======================================================================
# Statistics:
# mean, sdev, variance, median, mode, moment, momentabout
DefMathML("Token:?:mean", undef, sub { return ['m:mean']; });
DefMathML("Token:?:standard-deviation", undef, sub { return ['m:sdev']; });
DefMathML("Token:?:variance", undef, sub { return ['m:var']; });
DefMathML("Token:?:median", undef, sub { return ['m:median']; });
DefMathML("Token:?:mode", undef, sub { return ['m:mode']; });
DefMathML("Token:?:moment", undef, sub { return ['m:moment']; });
# momentabout ???
#======================================================================
# Linear Algebra:
# vector, matrix, matrixrow, determinant, transpose, selector,
# vectorproduct, scalarproduct, outerproduct.
DefMathML("Apply:?:vector", undef, sub {
my ($op, @args) = @_;
return ['m:vector', {}, map { cmml($_) } @args]; });
#DefMathML("Token:?:matrix", undef, sub { return ['m:matrix']; });
DefMathML("Token:?:determinant", undef, sub { return ['m:determinant']; });
DefMathML("Token:?:transpose", undef, sub { return ['m:transpose']; });
DefMathML("Token:?:selector", undef, sub { return ['m:selector']; });
DefMathML("Token:?:vector-product", undef, sub { return ['m:vectorproduct']; });
DefMathML("Token:?:scalar-product", undef, sub { return ['m:scalarproduct']; });
DefMathML("Token:?:outer-product", undef, sub { return ['m:outerproduct']; });
# So by default any Array is a Matrix? hmmm....
DefMathML("Array:?:?", undef, sub {
my ($node) = @_;
return ['m:matrix', {},
map { ['m:matrixrow', {}, map { cmml_contents($_) } element_nodes($_)] }
element_nodes($node)]; });
#======================================================================
# Semantic Mapping Elements
# annotation, semantics, annotation-xml
#======================================================================
# Constant and Symbol Elements
# integers, reals, rationals, naturalnumbers, complexes, primes,
# exponentiale, imaginaryi, notanumber, true, false, emptyset, pi,
# eulergamma, infinity
DefMathML("Token:ID:integers", undef, sub { return ['m:integers']; });
DefMathML("Token:ID:reals", undef, sub { return ['m:reals']; });
DefMathML("Token:ID:rationals", undef, sub { return ['m:rationals']; });
DefMathML("Token:ID:numbers", undef, sub { return ['m:naturalnumbers']; });
DefMathML("Token:ID:complexes", undef, sub { return ['m:complexes']; });
DefMathML("Token:ID:primes", undef, sub { return ['m:primes']; });
DefMathML("Token:ID:exponential-e", undef, sub { return ['m:exponentiale']; });
DefMathML("Token:ID:imaginary-i", undef, sub { return ['m:imaginaryi']; });
DefMathML("Token:ID:notanumber", undef, sub { return ['m:notanumber']; });
DefMathML("Token:ID:true", undef, sub { return ['m:true']; });
DefMathML("Token:ID:false", undef, sub { return ['m:false']; });
DefMathML("Token:ID:empty-set", undef, sub { return ['m:emptyset']; });
DefMathML("Token:ID:circular-pi", undef, sub { return ['m:pi']; });
DefMathML("Token:ID:Euler-constant", undef, sub { return ['m:eulergamma']; });
DefMathML("Token:ID:infinity", undef, sub { return ['m:infinity']; });
#======================================================================
# Purely presentational constructs.
# An issue here:
# Some constructs are pretty purely presentational. Hopefully, these would
# only appear in XWrap's or in the presentation branch of an XMDual, so we won't
# attempt to convert them to content. But if we do, should we warn?
# ================================================================================
# More exotic things
# ================================================================================
# cfrac! Ugh!
# Have to deal w/ screwy structure:
# If denom is a sum/diff then last summand can be: cdots, cfrac
# or invisibleTimes of cdots and something which could also be a cfrac!
# There is some really messy manipulation of display/text style...probably not all correct.
# This really should be handled earlier by an XMDual.
sub do_cfrac {
my ($numer, $denom) = @_;
if (getQName($denom) eq 'ltx:XMApp') { # Denominator is some kind of application
my ($denomop, @denomargs) = element_nodes($denom);
if ((($denomop->getAttribute('role') || '') eq 'ADDOP') # Is it a sum or difference?
|| (($denomop->textContent || '') eq "\x{22EF}")) { # OR a \cdots
my $last = pop(@denomargs); # Check last operand in denominator.
# this is the current contribution to the cfrac (if we match the last term)
my $curr = ['m:mfrac', {}, pmml_smaller($numer),
['m:mrow', {},
(@denomargs > 1 ? pmml_infix($denomop, @denomargs) : pmml_smaller($denomargs[0])),
pmml_smaller($denomop)]];
if (($last->textContent || '') eq "\x{22EF}") { # Denom ends w/ \cdots
return ($curr, pmml_smaller($last)); } # bring dots up to toplevel
elsif (getQName($last) eq 'ltx:XMApp') { # Denom ends w/ application --- what kind?
my ($lastop, @lastargs) = element_nodes($last);
if (($lastop->getAttribute('meaning') || '') eq 'continued-fraction') { # Denom ends w/ cfrac, pull it to toplevel
return ($curr, do_cfrac(@lastargs)); }
elsif ((($lastop->textContent || '') eq "\x{2062}") # Denom ends w/ * (invisible)
&& (scalar(@lastargs) == 2) && (($lastargs[0]->textContent || '') eq "\x{22EF}")) {
return ($curr, pmml_smaller($lastargs[0]), pmml_smaller($lastargs[1])); } } } }
return ['m:mfrac', {}, pmml_smaller($numer), pmml_smaller($denom)]; }
DefMathML('Apply:?:continued-fraction', sub {
my ($op, $numer, $denom) = @_;
my $style = (($op->getAttribute('name') || '') eq 'cfrac-inline' ? 'inline' : 'display');
if ($style eq 'inline') {
return pmml_row(do_cfrac($numer, $denom)); }
else {
return ['m:mfrac', {}, pmml_smaller($numer), pmml_smaller($denom)]; } });
#================================================================================
# A Hack for Demo/Testing Purposes ONLY!!!
# [Illustrates that we'd like these to be defineable in bindings!]
DefMathML('Apply:?:hack-definite-integral', undef,
sub {
my ($op, $lower, $upper, $integrand, $variable) = @_;
return ['m:apply', {},
['m:int'],
['m:bvar', {}, cmml($variable)],
['m:lowlimit', {}, cmml($lower)],
['m:uplimit', {}, cmml($upper)],
cmml($integrand)]; });
#================================================================================
1;
__END__
=pod
=head1 NAME
C<LaTeXML::Post::MathML> - Post-Processing modules for converting math to MathML.
=head1 SYNOPSIS
C<LaTeXML::Post::MathML> is the abstract base class for the MathML Postprocessor;
C<LaTeXML::Post::MathML::Presentation> and C<LaTeXML::Post::MathML::Content>
convert XMath to either Presentation or Content MathML, or with that format
as the principle branch for Parallel markup.
=head1 DESCRIPTION
The conversion is carried out primarly by a tree walk of the C<XMath> expression;
appropriate handlers are selected and called depending on the operators and forms encountered.
Handlers can be defined on applications of operators, or on tokens;
when a token is applied, it's application handler takes precedence over it's token handler
=head2 C<< DefMathML($key,$presentation,$content); >>
Defines presentation and content handlers for C<$key>.
C<$key> is of the form C<TYPE:ROLE:MEANING>, where
TYPE : is one either C<Token> or C<Apply> (or C<Hint> ?)
ROLE : is a grammatical role (on XMath tokens)
MEANING : is the meaning attribute (on XMath tokens)
Any of these can be C<?> to match any role or meaning;
matches of both are preferred, then match of meaning
or role, or neither.
The subroutine handlers for presentation and content are given
by C<$presentation> and C<$content>, respectively.
Either can be C<undef>, in which case some other matching
handler will be invoked.
For C<Token> handlers, the arguments passed are the token node;
for C<Apply> handler, the arguments passed are the operator node
and any arguments.
However, it looks like some C<TOKEN> handlers are being defined
to take C<$content,%attributes> being the string content of the token,
and the token's attributes!
=head2 Presentation Conversion Utilties
=over
=item C<< $mmlpost->pmml_top($node,$style); >>
This is the top-level converter applied to an C<XMath> node.
It establishes a local context for font, style, size, etc.
It generally does the bulk of the work for a PresentationMathML's C<translateNode>,
although the latter wraps the actual C<m:math> element around it.
(C<style> is display or text).
=item C<pmml($node)>, C<pmml_smaller($node)>, C<pmml_scriptsizsize($node)>
Converts the C<XMath> C<$node> to Presentation MathML.
The latter two are used when the context calls for smaller (eg. fraction parts)
or scriptsize (eg sub or superscript) size or style, so that the size encoded
within C<$node> will be properly accounted for.
=item C<pmml_mi($node,%attributes)>, C<pmml_mn($node,%attributes)>, C<pmml_mo($node,%attributes)>
These are C<Token> handlers, to create C<m:mi>, C<m:mn> and C<m:mo> elements,
respectively. When called as a handler, they will be supplied only with an C<XMath>
node (typically an C<XMTok>). For convenient reuse, these functions may also be called
on a 'virtual' token: with C<$node> being a string (that would have been the text
content of the C<XMTok>), and the C<%attributes> that would have been the token's attributes.
=item C<pmml_infix($op,@args)>, C<pmml_script($op,@args)>, C<pmml_bigop($op,@args)>
These are C<Apply> handlers, for handling general infix, sub or superscript,
or bigop (eg. summations) constructs. They are called with the operator
token, followed by the arguments; all are C<XMath> elements.
=item C<pmml_row(@items)>
This wraps an C<m:mrow> around the already converted C<@items> if need;
That is, if there is only a single item it is returned without the C<m:mrow>.
=item C<pmml_unrow($pmml)>
This perverse utility takes something that has already been converted
to Presentation MathML. If the argument is an C<m:mrow>, it returns a list of the
mathml elements within that row, otherwise it returns a list containing
the single element C<$pmml>.
=item C<pmml_parenthesize($item,$open,$close)>
This utility parenthesizes the (already converted MathML) C<$item> with the string delimiters
C<$open> and C<$close>. These are converted to an C<m:mrow> with C<m:mo> for the fences,
unless the C<usemfenced> switch is set, in which case C<m:mfenced> is used.
=item C<pmml_punctuate($separators,@items) >
This utility creates an C<m:mrow> by interjecting the punctuation
between suceessive items in the list of already converted C<@items>.
If there are more than one character in C<$separators> the first
is used between the first pair, the next between the next pair;
if the separators is exhausted, the last is repeated between remaining pairs.
C<$separators> defaults to (repeated) comma.
=back
=head2 Content Conversion Utilties
=over
=item C<$mmlpost->cmml_top($node); >
This is the top-level converter applied to an C<XMath> node.
It establishes a local context for font, style, size, etc (were it needed).
It generally does the bulk of the work for a ContentMathML's C<translateNode>,
although the latter wraps the actual C<m:math> element around it.
=item C<cmml($node)>
Converts the C<XMath> C<$node> to Content MathML.
=item C<cmml_leaf($token)>
Converts the C<XMath> token to an C<m:ci>, C<m:cn> or C<m:csymbol>, under appropriate circumstances.
=item C<cmml_decoratedSymbol($item)>
Similar to C<cmml_leaf>, but used when an operator is itself, apparently, an application.
This converts C<$item> to Presentation MathML to use for the content of the C<m:ci>.
=item C<cmml_not($arg)>
Construct the not of the argument C<$arg>.
=item C<cmml_synth_not($op,@args)>
Synthesize an operator by applying C<m:not> to another operator (C<$op>) applied to its C<@args>
(C<XMath> elements that will be converted to Content MathML).
This is useful to define a handler for, eg., c<not-approximately-equals> in terms
of c<m:approx>.
=item C<cmml_synth_complement($op,@args)>
Synthesize an operator by applying a complementary operator (C<$op>) to the reverse of its C<@args>
(C<XMath> elements that will be converted to Content MathML).
This is useful to define a handler for, eg. C<superset-of-or-equals> using C<m:subset>.
=item C<cmml_or_compose($operators,@args)>
Synthesize an operator that stands for the C<or> of several other operators
(eg. c<less-than-or-similar-to-or-approximately-equals>) by composing it
of the C<m:or> of applying each of C<m:less> and C<m:approx> to the arguments.
The first operator is applied to the converted arguments, while the rest
are applied to C<m:share> elements referring to the previous ones.
=item C<cmml_share($node)>
Converts the C<XMath> C<$node> to Content MathML, after assuring that it has an id,
so that it can be shared.
=item C<cmml_shared($node)>
Generates a C<m:share> element referting to C<$node>, which should have
an id (such as after calling C<cmml_share>).
=back
=head1 Math Processors, Generally.
We should probably formalize the idea of a Math Processor as an
abstract class, but let this description provide a starting overview.
A MathProcessor follows the API of C<LaTeXML::Post> processors, by
handling C<process>, which invokes C<processNode> on all C<Math> nodes;
That latter inserts the result of either C<translateNode> or
C<translateParallel>, applied to the C<XMath> representation, into the C<Math> node.
Parallel translation is done whenever additional MathProcessors have
been specified, via the C<setParallel> method; these are simply other
MathProcessors following the same API.
=cut