t/170_grammar_coverage.t


            
              1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
              # -*- CPERL -*-
#**********************************************************************
# Test cases for LaTeXML
#**********************************************************************
use strict;
use warnings;
use Test::More;
use LaTeXML;
use LaTeXML::MathParser;
local $::RD_TRACE = 1;
use LaTeXML::Common::Config;
use Data::Dumper;
local $Data::Dumper::Sortkeys = 1;
if (!$ENV{"CI"}) {
  plan skip_all => "Only checked in continuous integration.";
  done_testing();
  exit;
}
# Obtain the rule pairs from MathGrammar, which we want to exhaustively test:
my %grammar_dependencies = obtain_dependencies();
my $opts = LaTeXML::Common::Config->new(input_limit => 100, verbosity=>-2);
my $converter = LaTeXML->get_converter($opts);
$converter->prepare_session($opts);
my %tested_dependencies = ();
my @core_tests = parser_test_filenames();
for my $test (@core_tests) {
  note("grammar coverage $test...");
  my $regularized_log = '';
  my $response;
  my $log_handle;
  open($log_handle, ">>", \$regularized_log) or croak("Can't redirect STDERR to log! Dying...");
  {
    local *STDERR       = *$log_handle;
    binmode(STDERR, ':encoding(UTF-8)');
    $response = $converter->convert($test);
#  my $regularized_log = $response->{log};
    }
  # Preprocess split lines back to single lines, e.g.
  # 2|AnythingAn|>>Matched subrule:                    |
  #  |          |[modifierFormulae]<< (return value:   |
  # -- TO:
  # 2|AnythingAn|>>Matched subrule: [modifierFormulae]<< (return value:   |
  # Also:
  # 10|  bigop   |(consumed: [ SUMOP:sum:1])            |
  # 9|preScripte|>>Matched subrule: [$arg[0]]<< (return|
  #   |          |value: [<XMTok                        |
  $regularized_log =~ s/\:\s+\|\n\s*\|\s+\|\[/\: \[/g;
  note($response->{status});
  my @log_lines = split("\n", $regularized_log);
  my $prev_line = '';
  for my $line (@log_lines) {
    if ($line =~ /(\w+)\s*\|(?:(?:\>\>(?:\.*)Matched(?:\(keep\))? (?:subrule|production))|(?:\(consumed))\:\s*\[\s*(\w+|\$arg\[\d+\])/) {
      my $parent = $1;
      my $child = $2;
      if ($child =~ /^\$arg/) {
        if ($prev_line =~ /^\s*\d+\|\s*(\w+)\s*\|/) {
          $child = $1;
        }
      }
      if ($parent ne $child) {
        $tested_dependencies{$parent}{$child} = 1;
      }
    }
    $prev_line = $line;
  }
}
my $ok_count = 0;
my $missing_count = 0;
my $extra_count = 0;
my %missing = ();
my %extra = ();
delete $grammar_dependencies{'Start'}; # never reported in terse log
# Single lexeme top-level rules never parse, BECAUSE the grammar is never run on 1-lexeme formulae
delete $grammar_dependencies{'AnythingAn'}{"FLOATSUPERSCRIPT"};
delete $grammar_dependencies{'AnythingAn'}{"MODIFIER"};
# Reachable conceptually by an ambiguous grammar, but not in the RecDescent algorithm
# AnyOp variants are not reached as Formula variants take precedents (such as Factor's preScripted variants)
delete $grammar_dependencies{'AnyOp'}{"OPERATOR"};
delete $grammar_dependencies{'AnyOp'}{"addScripts"};
delete $grammar_dependencies{'AnyOp'}{"preScripted"};
delete $grammar_dependencies{'argPunct'}{'VERTBAR'};
delete $grammar_dependencies{'Expression'}{'punctExpr'}; # Unreachable, due to Formula -> punctExpr
delete $grammar_dependencies{'aSuperscri'}{'AnyOp'};
delete $grammar_dependencies{'aSuperscri'}{'Expression'};
# These are odd to have been recorded, since at least the intention is that they are preceded by
# aSubscript/aSuperscript. We have explicit tests for the relevant cases.
delete $grammar_dependencies{'Superscrip'}{'endPunct'};
delete $grammar_dependencies{'Subscript'}{'endPunct'};
# forbid rules should never match, don't check them here.
# TODO: We need tests for the always-failing productions!
delete $grammar_dependencies{'doubtArgs'}{'forbidArgs'};
delete $grammar_dependencies{'requireArg'};
# Needs regex enhancement
# preScripted -> bigop
# \sum ^2
# preScripted -> ATOM_OR_ID
# \frac12 _1$
for my $rule(grep {!/^_/} keys %tested_dependencies) {
  my $subrules = $tested_dependencies{$rule};
  for my $subrule(keys %$subrules) {
    if ($rule ne $subrule) {
      if ($grammar_dependencies{$rule}{$subrule}) {
        delete $grammar_dependencies{$rule}{$subrule};
        $ok_count += 1;
      } else {
        $extra_count += 1;
        $extra{$rule}{$subrule} = 1;
      }
    }
  }
}
for my $rule(keys %grammar_dependencies) {
  my $subrules = $grammar_dependencies{$rule} || ();
  for my $subrule (keys %$subrules) {
    if ($rule ne $subrule) {
      $missing_count += 1;
      $missing{$rule}{$subrule} = 1;
    }
  }
}
ok($ok_count > 100, "Tested a big subset of MathGrammar");
# print STDERR "Extra: \n", Dumper(\%extra);
is($missing_count, 0, "MathGrammar dependencies (currently tested in $ok_count cases), were not matched in the following cases: \n".Dumper(\%missing));
# Allow these for now, until we figure out how to check for the (s) variant rules
# for example: (endPunct Formula { [$item[1],$item[2]]; })(s)
# is($extra_count, 0, "Tests had rules which were matched, but not recorded in grammar metadata: \n".Dumper(\%extra));
done_testing();
#**********************************************************************
# Auxiliary functions, should these be in a utility module?
#**********************************************************************
sub parser_test_filenames {
  my $directory = "t/parse";
  my $dir;
  if (!opendir($dir, $directory)) {
    # Can't read directory? Fail (assumed single) test.
    return do_fail($directory, "Couldn't read directory $directory:$!"); }
  else {
    my @dir_contents = sort readdir($dir);
    my $t;
    my @core_tests   = map { (($t = $_) =~ /\.tex$/      ? ("t/parse/$t") : ()); } @dir_contents;
    closedir($dir);
    @core_tests;
  }
}
use LaTeXML::MathGrammar;
sub obtain_dependencies {
  my $internalparser = LaTeXML::MathGrammar->new();
  my %dependencies = ();
  my @rule_names = grep {!/^_/} keys %{$$internalparser{rules}};
  for my $rule (@rule_names) {
    # Direct calls are edges,
    my $calls = $$internalparser{rules}{$rule}{"calls"} || [];
    for my $call(@$calls) {
      if ($call !~ /^_/) {
        $dependencies{c14n($rule)}{$call} = 1;
      }
    }
    # Also! argcode are edges from their subrule (e.g. preScripted[bigop] is an edge preScripted -> bigop)
    my $prods = $$internalparser{rules}{$rule}{"prods"} || [];
    for my $prod (@$prods) {
      my $items = $$prod{items} || [];
      for my $item (@$items) {
        if ($$item{argcode} && $$item{subrule}) {
          if ($$item{argcode} =~/^\['(.+)'\]$/) {
            $dependencies{c14n($$item{subrule})}{$1} = 1;
          }
        }
      }
    }
  }
  %dependencies
}
sub c14n {
  my $rule = shift;
  # RD_TRACE only gives us upto 10 characters of the leading rule name,
  # so we're forced to trim the top
  return substr($rule, 0,10);
}
1;
	Global
`s`	Focus search bar
`?`	Bring up this help dialog
	GitHub
`g` `p`	Go to pull requests
`g` `i`	go to github issues (only if github is preferred repository)
	POD
`g` `a`	Go to author
`g` `c`	Go to changes
`g` `i`	Go to issues
`g` `d`	Go to dist
`g` `r`	Go to repository/SCM
`g` `s`	Go to source
`g` `b`	Go to file browse
	Search terms
module: (e.g. module:Plugin)
distribution: (e.g. distribution:Dancer auth)
author: (e.g. author:SONGMU Redis)
version: (e.g. version:1.00)