# -*- cperl -*-
# FYI: -*-mode: Lisp; fill-column: 75; comment-column: 50; -*-
#
BEGIN {
$SIG{__WARN__} = sub { print STDERR shift; $DB::single = 1; };
};
package chain;
use fields qw(RULEIDX FIRSTLA);
package Parse::YALALR::Build;
use Parse::YALALR::Common;
use Parse::YALALR::Read;
use Parse::YALALR::Vector;
use Parse::YALALR::Kernel;
use Parse::YALALR::Parser;
use Carp;
# Load in the explanation extensions. The BEGIN {require} stuff is
# just to make it clear that this is not an independent module; it
# would work to say use instead.
BEGIN { require 'Parse/YALALR/Explain.pl'; };
use fields
(parser =>
# Lookup tables
item2state => # { stringified item ref => state that contains it }
itemmap => # { "statenum_itemidx" => item }
quickstate => # { 96-bit hash of kernel items in a state => state }
FIRST => #
nullable => # { nullable symbol }
chainrules => # { A => { B => [ chainrules for A=>B ] }
# chainrule : [ grammar_index for rule => vec(??) ]
# misc & unclassified
why_nullable => #
chainreachable => #
chainfirst => #
WHY_FIRST => #
why => # Whether to compute the WHY information
'temp_tokmap'); #
use strict;
use Carp qw(verbose croak);
# Parse::YALALR::Build::new
#
# Reads the grammar file (Parse::YALALR::Read::read), collects all the
# interesting information (Parse::YALALR::Build::collect_grammar), and
# then builds the parser (Parse::YALALR::Build::build)
#
sub new {
my ($class, $lang, $data, %opts) = @_;
$data = Parse::YALALR::Read->read($lang, $data)
unless UNIVERSAL::isa($data, 'Parse::YALALR::Read');
# print "Done reading at ".time."= t0+".(time-$::t0)."\n";
$class = ref $class if ref $class;
no strict 'refs';
my Parse::YALALR::Build $self = bless [\%{"$class\::FIELDS"}], $class;
$self->{why} = $opts{why};
$self->{parser} = Parse::YALALR::Parser->new(%opts);
$self->collect_grammar($data); # Remember to add START -> S
$self->build();
return $self;
}
sub parser ($) { $_[0]->{parser} }
sub build {
my ($self) = @_;
$self->compute_NULLABLE();
$self->compute_FIRST();
$self->compute_chainFIRSTs();
$self->compute_chains(); # Change this to demand-driven?
$self->generate_parser();
return $self;
}
sub decide_token {
my Parse::YALALR::Build $self = shift;
my ($str) = @_;
return 0 if ref $str;
return 1 if defined $self->{temp_tokmap}->{$str};
return 1 if $str =~ /^'/;
# return 1 if $str =~ /^[A-Z_]+$/;
return 0;
}
# collect_grammar
#
# INPUT:
# $data->{rules} : [ [ lhs, [ rhssym ], prec ] ]
# - rhssym is a SCALAR ref if it's an action. deref to get perl code.
# Will be blessed into <lang>CODE if normal code;
# <lang>CONDITION if a conditional (<lang> is C or perl)
# - prec is a symbol to inherit precedence from, or '<default>'
#
# OUTPUT:
# $self->{grammar} : array of all symbols in all rules, each separated by $nil
# $self->{code} : [ code_index => code_subroutine ]
# $self->{rule_code} : [ rulepos => code_subroutine ]
# $self->{ruletable} : [ nonterminal => [ grammar_index of lhs for rule ] ]
# $self->{epsilonrules}
# $self->{chainrules} : { A => { B => [ chainrules for A=>B ] }
# chainrule : [ grammar_index for rule => vec(FIRST??) ]
# $self->{nonterminals} : [ symbol ]
# $self->{tokens} : [ symbol ]
# $self->{ntflag} : [ symbol => boolean (is symmap[symbol] a nonterminal?) ]
# $self->{precedence} : [ token => <precedence, associativity> ]
# $self->{rule_precedence} : [ rule => <precedence, associativity> ]
#
# All symbols are converted to indexes in $self->{symmap}, which is built
# as a side effect.
#
sub collect_grammar {
my Parse::YALALR::Build $self = shift;
my ($data) = @_;
my $parser = $self->parser;
my $nil = $parser->{nil};
my $end = $parser->{end};
my $error = $parser->{error};
$parser->register_token('error');
# Add the START -> S rule
my $something;
if (exists $data->{start_symbol}) {
$something = $data->{start_symbol};
} else {
$something = $data->{rules}->[0]->[0];
}
unshift(@{$data->{rules}}, [ '<START>', [ $something ] ]);
$parser->{startsym} = $parser->{symmap}->add_value('<START>');
$parser->{startrule} = 1; # HACK
foreach my $token (@{$data->{tokens}}) {
$parser->register_token($token);
}
foreach my $precset (@{$data->{precedence}}) {
foreach my $token (@{$precset->[1]}) {
$parser->register_token($token);
$parser->{symmap}->add_value($token);
}
}
my @rules;
my %rules; # { nt => [ rule ] }
my @epsilonrules;
my %chainrules;
my @grammar;
my @code;
my $code_ctr = 0;
my $i = 0;
my %ruleprecs; # For rules with hardcoded %prec things
for my $rule (@{$data->{rules}}) {
my ($lhs, $rhs, $prec) = @$rule;
my $istok = $self->decide_token($lhs);
$lhs = $parser->{symmap}->get_index($lhs);
my $rulepos = $i;
push(@rules, $rulepos);
push(@{$rules{$lhs}}, $rulepos);
$ruleprecs{$rulepos} = $prec;
$grammar[$i++] = $lhs;
$parser->{ntflag}->[$lhs] = !$istok;
my $epsilonrule_flag = 1;
foreach my $j (0..$#$rhs) {
my $sym = $rhs->[$j];
my $isnonterminal = ! $self->decide_token($sym);
if (ref $sym) {
print "SYM=$sym\n";
print "ref=".(ref $sym)."\n";
print "yes\n" if (scalar(ref $sym) =~ /^perl/);
if (scalar(ref $sym) =~ /^perl/) {
$sym = eval "sub { my \@v = \@_; $$sym; }";
} else {
$sym = sub { print STDERR "Unrunnable ".(ref $sym)." called\n" };
}
# Code
my $codesym = '@'.(++$code_ctr);
$code[$code_ctr] = $sym;
$parser->{rule_code}->{$rulepos} = $sym;
$sym = $parser->{symmap}->get_index($codesym);
$parser->{codesyms}->{$sym} = $codesym; # Used as boolean map
$isnonterminal = '(code)';
if ($j != $#$rhs) {
push(@{$data->{rules}}, [ $codesym, [ ] ]);
} else {
$parser->{end_action_symbols}->{$sym} = 1;
$parser->{ntflag}->[$sym] = $isnonterminal;
next;
}
} else {
$sym = $parser->{symmap}->get_index($sym);
$epsilonrule_flag = 0;
}
$grammar[$i++] = $sym;
$parser->{ntflag}->[$sym] = $isnonterminal;
}
push(@epsilonrules, $rulepos) if $epsilonrule_flag;
$grammar[$i++] = $nil;
}
# Must do this while we can still muck with the symmap
$parser->{grammar} = \@grammar;
$self->compute_precedence($data->{precedence}, \%ruleprecs);
$parser->{nilvec} = $parser->{symmap}->make_onevec($nil);
my $endvec = $parser->{symmap}->make_onevec($end);
my $bogus;
($parser->{init_state}) =
$self->fetch_or_create_state([ [ $parser->new_item(1, $endvec), undef ] ], undef);
# Compute chainrules
foreach my $rule (@rules) {
my $lhs = $grammar[$rule];
my $rhs0 = $grammar[$rule + 1];
if ($parser->is_nonterminal($rhs0)) {
push(@{$chainrules{$lhs}->{$rhs0}}, bless [ \%chain::FIELDS,
$rule,
undef ], 'chain');
}
}
# For debugging: describe how to print out chainrules
$parser->register_dump('chain' => sub {
my ($self, $chain, $asXML) = @_;
$self->dump_rule($chain->{RULEIDX}, undef, $asXML)." F=".
$self->dump_symvec($chain->{FIRSTLA}, $asXML);
});
$parser->{code} = \@code;
$parser->{rules} = \@rules;
$parser->{rulenum} = { map { $rules[$_] => $_ } 0 .. $#rules };
$parser->{ruletable} = \%rules; # { A => [ rule A -> ... ] }
$parser->{epsilonrules} = \@epsilonrules;
$self->{chainrules} = \%chainrules;
$parser->{nonterminals} =
[ grep { $parser->{ntflag}->[$_] } 0 .. $#{$parser->{ntflag}} ];
$parser->{tokens} =
[ grep { !$parser->{ntflag}->[$_] } 0 .. $#{$parser->{ntflag}} ];
}
# compute_precedence
#
# INPUT:
# $precinfo : [ precedence layer ]
# $hardcoded : { rule => symbol to inherit precedence from or "<default>" }
# precedence layer : <associativity, [tokens]>
# $parser->{grammar} : see above
#
# OUTPUT:
# $parser->{precedence} : [ token => <precedence, associativity> ]
# $parser->{rule_precedence} : [ rule => <precedence, associativity> ]
#
sub compute_precedence {
my Parse::YALALR::Build $self = shift;
my Parse::YALALR::Parser $parser = $self->{parser};
my ($precinfo, $hardcoded) = @_;
# Grab out the info from the precedence declarations
my $prec = 0;
foreach my $preclayer (@$precinfo) {
my ($assoc, $tokens) = @$preclayer;
$assoc = 'none' if $assoc eq 'token';
foreach my $token (@$tokens) {
$token = $parser->{symmap}->get_index($token);
$parser->{precedence}->[$token] = [ $prec, $assoc ];
# print "Token precedence($token) := $prec ($assoc)\n";
}
} continue {
$prec++;
};
# Compute the rule precedences.
# It is the precedence of the %prec token, if given. Otherwise it
# is the precedence of the last terminal, if any. Otherwise it is
# undefined.
my $nil = $parser->{nil};
my $rule;
my $lastterm;
for (my $i = 0; $i < @{$parser->{grammar}}; $i++) {
my $sym = $parser->{grammar}->[$i];
if ($sym == $nil) {
my $hard = $hardcoded->{$rule};
if (defined $hard && $hard ne '<default>') {
my $p = $parser->{rule_precedence}->[$rule] =
$parser->{precedence}->[$parser->{symmap}->get_index($hard)];
} elsif (defined $lastterm) {
my $p = $parser->{rule_precedence}->[$rule] =
$parser->{precedence}->[$lastterm];
}
undef $rule;
undef $lastterm;
} elsif (!defined $rule) {
$rule = $i;
} else {
$lastterm = $sym if $parser->is_token($sym);
}
}
}
sub isdef {
my %x = @_;
while (my ($name, $val) = each %x) {
print "$name is ", (defined $val ? 'defined' : 'undefined'), "\n";
}
}
# method FIRST(vec1 vec2 vec3...)
#
# Returns a vector of FIRST(vec1 vec2 vec3...)
# Will include nil if all vectors contain nil.
#
sub FIRST {
my $self = shift;
my Parse::YALALR::Parser $parser = $self->{parser};
my $nil = $parser->{nil};
my $first = shift;
if (ref $first) {
croak("FIRST(ref ".(ref $first).") called");
}
my $second;
while (vec($first, $nil, 1) && defined($second = shift)) {
vec($first, $nil, 1) = 0; # Clear out the epsilon
$first |= $second; # first will only contain nil if second has it
}
return $first;
}
# method FIRST_nonvec(A B C...)
#
# Returns a vector of FIRST(A B C...)
# where the arguments are symbols. Will include nil if all symbols given
# are nullable.
#
sub FIRST_nonvec {
my Parse::YALALR::Build $self = shift;
my Parse::YALALR::Parser $parser = $self->{parser};
my $A = shift;
my $nil = $parser->{nil};
my $nilvec = $parser->{nilvec};
my $symmap = $parser->{symmap};
return $nilvec if !defined $A;
my $first;
if ($parser->is_nonterminal($A)) {
$first = $self->{FIRST}->{$A};
} else {
$first = $symmap->make_onevec($A);
}
my $next;
while (vec($first, $nil, 1) && defined($next = shift)) {
vec($first, $nil, 1) = 0; # Clear out the epsilon
if ($parser->is_nonterminal($next)) {
$next = $self->{FIRST}->{$next};
} else {
$next = $symmap->make_onevec($next);
}
$first |= $next;
}
return $first;
}
# Could a (small) n^2 be removed by computing all of these at once?
# Or are few asked for? (Guess so; doesn't show up in profiling)
sub get_first_nextalpha {
my ($self, $I) = @_;
return $self->FIRST_nonvec($self->parser->get_dotalpha($I->{GRAMIDX} + 1));
}
sub hidden_shift {
my ($self, $rule, $first) = @_;
return [ $rule + 1, $first ];
}
# fetch_or_create_state
#
# Args:
# items: [ <generated item, source item> ]
# The source item is the Real item; the generated item is just a holder
# for the necessary information (specifically, a GRAMIDX and a lookahead set)
# and that reference will never be used inside any real state.
# source_state: The state that caused this set of items to be generated.
#
# $self->{quickstate} : [ item_ofs => state ]
# state : { id => id, items => [ item ],
# la_effects => [ items index => [ item ] ] }
# where item_ofs is the first item state->{items}->[0]
#
# la_effects is the set of outward edges from a kernel item to the
# kernel items of other states that the lookaheads should propagate to.
#
# Returns:
# state in scalar context, <state,changes> in list context
# - state is the state generated
# - changes is undefined if the state was created from scratch,
# otherwise a reference to a (probably empty) list of items
# whose lookaheads changed
#
sub fetch_or_create_state {
my Parse::YALALR::Build $self = shift;
my Parse::YALALR::Parser $parser = $self->{parser};
my ($edges, $source_state) = @_;
croak("must have at least one item") if @$edges == 0;
# $DB::single = 1 if defined $source_state && $source_state->{id} == 4;
# Canonicalize $edges -> @canon_items by removing duplicates
# { GRAMIDX of generated item => lookahead for item }
my %canon_items;
# { GRAMIDX of generated item => [ causing item, la, lawhy ] }
# lawhy : <'generated'|'propagated'|'epsilon-generated', causeidx, la>
my %causes;
# { GRAMIDX of generating item that propagates its lookaheads => boolean }
my %propagating_cause;
for my $edge (@$edges) {
my ($item, $cause) = @$edge;
if (defined $cause) {
my $cause_restla = $self->get_first_nextalpha($cause);
$propagating_cause{$cause->{GRAMIDX}} = 1
if vec($cause_restla, $parser->{nil}, 1);
}
my $idx = $item->{GRAMIDX};
if (exists $canon_items{$idx}) {
$canon_items{$idx} |= $item->{LA};
} else {
$canon_items{$idx} = $item->{LA};
}
if ($self->{why}) {
while (my ($la, $lawhy) = each %{$item->{LA_WHY}}) {
push(@{$causes{$idx}}, [ $cause, $la, $lawhy ]);
}
} else {
push(@{$causes{$idx}}, [ $cause ] );
}
}
# 96-bit hash value
# TODO: Compute a hash of the set of items in a state.
# It would be nice if the hash were insensitive to the order
# of items in the set. We don't need 96 bits if we do a pairwise
# comparison to check for sure, but we could get away with a
# simple hash -> state table instead of hash -> [ state ] if
# we use lots of bits. (96 bits means less than 1 chance in a million
# of getting a collision with 256,000 states. Assuming a truly random
# hash function, which this is nowhere close to.)
my $h1 = 0;
my $h2 = 0;
my $h3 = 0;
# Order-independent hash
{
use integer;
foreach (keys %canon_items) {
$h1 ^= (($_ + 1) * 149706587);
$h2 ^= (($_ + 1) * 4243838327);
$h3 ^= (($_ + 1) * 1347946109);
}
}
my $hash = pack("LLL", $h1, $h2, $h3);
my $fetched = $self->{quickstate}->{$hash};
# Found it!
if (defined $fetched) {
for my $fitem (@{$fetched->{items}}) {
# Merge lookaheads
my $merge = $canon_items{$fitem->{GRAMIDX}};
$fitem->{LA} |= $merge;
# Add in the new edges to the item lookahead dependency graph
for my $cause (@{$causes{$fitem->{GRAMIDX}}}) {
my ($src_item, $la, $lawhy) = @$cause;
if ($self->{why}) {
push(@{ $src_item->{DESTS} }, $fitem);
push(@{ $fitem->{SOURCES} }, $src_item);
$DB::single = 1 if $fitem->{GRAMIDX} == 35;
if ($fetched != $source_state) {
$fitem->{LA_WHY}->{$la} = $lawhy;
}
}
next if ! $propagating_cause{$cause->[0]->{GRAMIDX}};
$lawhy->[1] = $src_item;
$self->add_item_edge($source_state, $src_item,
$fetched, $fitem,
$la => $lawhy);
}
}
return ($fetched);
}
# Didn't find it, create a new state.
# Create the items in the new state. These will be the Real items if
# the state is new, otherwise, they're just stores for the information
# to be merged into the fetched state.
my @canon_items;
while (my ($idx, $la) = each %canon_items) {
push(@canon_items, bless [ \%item::FIELDS, $idx, $la ], 'item');
}
@canon_items = sort { $a->{GRAMIDX} <=> $b->{GRAMIDX} } @canon_items;
# FIXME
# Create the new state itself
my $state = Parse::YALALR::Kernel->new($parser, \@canon_items);
# Register each item in the kernel with the causing kernel item
# in the source state.
if (defined $source_state) {
foreach my $item (@canon_items) {
foreach my $cause (@{$causes{$item->{GRAMIDX}}}) {
my ($src_item, $la, $lawhy) = @$cause;
if ($self->{why}) {
push(@{ $src_item->{DESTS} }, $item);
push(@{ $item->{SOURCES} }, $src_item);
$DB::single = 1 if $item->{GRAMIDX} == 35;
die if $item == $lawhy->[1];
$item->{LA_WHY}->{$la} = $lawhy;
}
# Check whether the source_state is A -> \alpha . X \beta,
# where \beta is nullable. If so, any change in the lookaheads
# of the source_state should be propagated to the state
# being created.
next unless $propagating_cause{$src_item->{GRAMIDX}};
$lawhy->[1] = $src_item;
# print STDERR "$src_item->{GRAMIDX} $lawhy->[2]\n";
$self->add_item_edge($source_state, $src_item,
$state, $item,
$la => $lawhy);
}
}
}
if ($self->{why}) {
# Fill in the map from GRAMIDX -> [ kernel item ]
for my $item (@canon_items) {
push(@{ $parser->{items}->{$item->{GRAMIDX}} }, $item);
}
}
$self->{quickstate}->{$hash} = $state;
$parser->{states}->[$state->{id}] = $state;
return ($state, 1);
}
sub compute_NULLABLE {
my Parse::YALALR::Build $self = shift;
my Parse::YALALR::Parser $parser = $self->{parser};
my $grammar = $parser->{grammar};
my $nil = $parser->{nil};
# { B => [ A, rule A -> B..., item A -> . B... ] }
my %might_cause_nullable;
foreach my $nt (@{$parser->{nonterminals}}) {
$might_cause_nullable{$nt} = []; # Avoid @{undef}
}
# Set up the might_cause_nullable cache
RULE: foreach my $rule (@{$parser->{rules}}) {
my $item = $rule;
next if $grammar->[$item + 1] == $nil;
my $rhssym;
while (($rhssym = $grammar->[++$item]) != $nil) {
next RULE if $parser->is_token($rhssym);
}
push(@{$might_cause_nullable{$grammar->[$rule + 1]}},
[ $grammar->[$rule], $rule, $rule + 1 ]);
}
# Go through the epsilon rules and set the immediately nullable ones,
# but also push stuff on the queue
my @mightq;
my %nullable;
my %why_nullable;
foreach my $rule (@{$parser->{epsilonrules}}) {
my $lhs = $grammar->[$rule];
next if $nullable{$lhs};
$nullable{$lhs} = 1;
$why_nullable{$lhs} = $rule;
push(@mightq, @{$might_cause_nullable{$lhs}});
$might_cause_nullable{$lhs} = [];
}
foreach my $nulsym (keys %{ $parser->{end_action_symbols} }) {
$nullable{$nulsym} = 1;
$why_nullable{$nulsym} = "is an action";
push(@mightq, @{$might_cause_nullable{$nulsym}});
$might_cause_nullable{$nulsym} = [];
}
while (my $might = pop(@mightq)) {
my ($nullcand, $rule, $dot) = @$might;
next if $nullable{$nullcand};
# Skip other nullable symbols
++$dot;
++$dot while ($grammar->[$dot] != $nil && $nullable{$grammar->[$dot]});
# If still some non-nullable symbols left, put it back on the
# might_cause_nullable map.
if ($grammar->[$dot] != $nil) {
push(@{$might_cause_nullable{$grammar->[$dot]}},
[ $nullcand, $rule, $dot ]);
} else {
# Found new nullable symbol! Push its stuff onto the list
my $nulledsym = $grammar->[$rule];
# Now wait a minute! We might have already figured this out from
# something else on the list! (Stupid kids...)
if (!$nullable{$nulledsym}) {
$nullable{$nulledsym} = 1;
$why_nullable{$nulledsym} = $rule;
push(@mightq, @{$might_cause_nullable{$nulledsym}});
$might_cause_nullable{$nulledsym} = [];
}
}
}
$self->{nullable} = \%nullable;
$self->{why_nullable} = \%why_nullable if $self->{why};
}
sub nullable_vec {
my ($self, $vec) = @_;
return vec($vec, $self->{nullable}, 1);
}
# optimize by keeping only one A goesto B rule.
sub compute_FIRST {
my Parse::YALALR::Build $self = shift;
my Parse::YALALR::Parser $parser = $self->{parser};
my $grammar = $parser->{grammar};
my $nullable = $self->{nullable};
my $nil = $parser->{nil};
# WHY_FIRST : { A => { t => <rule,reason,?parent> } }
# where reason : 'nullable'|'propagated'
#
# reason = 'nullable':
# t is in FIRST(A) because rule A : \a1 t \a2 and NULLABLE(\a1)
# reason = 'propagated', parent = B
# t is in FIRST(A) because rule A : \a3 B \a4
# and NULLABLE(\a3) and t is in FIRST(B)
#
my %WHY_FIRST;
my %FIRST;
my $add_to_first = sub {
my ($sym, $tok, $rule, $parent) = @_;
$FIRST{$sym} = "" if !defined $FIRST{$sym};
vec($FIRST{$sym}, $tok, 1) = 1;
if ($self->{why}) {
if (defined $parent) {
my $reason = ($tok == $parent) ? 'nullable' : 'propagated';
$WHY_FIRST{$sym}->{$tok} = [ $rule, $reason, $parent ];
# print "Set WHY_FIRST{".$parser->dump_sym($sym)."=>{".$parser->dump_sym($tok)."=> <".$parser->dump_rule($rule).",$reason,".$parser->dump_sym($parent).">}}\n";
} else {
$WHY_FIRST{$sym}->{$tok} = [ $rule ];
}
}
};
# Initialize FIRST of all nonterminals to the empty set. This
# isn't used below, but will eliminate uses of undefined values
# later.
foreach my $sym (@{$parser->{nonterminals}}) {
$FIRST{$sym} = '';
}
# Set up the goesto graph.
# goesto{A} = [ B -> \alpha . A \beta ] means that
# FIRST(B) \contains FIRST(A) because \alpha is nullable.
my %goesto;
foreach my $rule (@{$parser->{rules}}) {
my $item = $rule + 1;
while ($grammar->[$item] != $nil) {
my $sym = $grammar->[$item++];
push(@{$goesto{$sym}}, $rule);
last if $parser->is_token($sym) || !$nullable->{$sym};
}
}
# Foreach token, do a BFS of the goesto graph, propagating the
# token to the FIRST sets of everything reached.
#
# Default all WHY_FIRSTs to 'propagated'
for my $tok (@{$parser->{tokens}}) {
my %visited;
my @queue;
push(@queue, \$tok); # Push a marker on
push(@queue, @{$goesto{$tok}}) if defined $goesto{$tok};
my $parent;
while (defined(my $x = shift(@queue))) {
if (ref $x) {
$parent = $$x;
} else {
my $rule = $x;
my $sym = $grammar->[$rule];
if (!$visited{$sym}) {
$visited{$sym} = 1;
$add_to_first->($sym, $tok, $rule, $parent);
if (defined $goesto{$sym}) {
push(@queue, \$sym);
push(@queue, @{$goesto{$sym}})
}
}
}
}
}
# epsilons need to be in FIRST sets also. But they're trivial
# with NULLABLE.
if ($self->{why}) {
foreach (keys %$nullable) {
$add_to_first->($_, $nil, $self->{why_nullable}->{$_});
}
} else {
foreach (keys %$nullable) {
$add_to_first->($_, $nil);
}
}
$self->{FIRST} = \%FIRST;
$self->{WHY_FIRST} = \%WHY_FIRST;
}
# Chain rules
#
# $self->{chainrules} = { A => { B => [ <A -> B \alpha,FIRST(\alpha)> ] } }
#
# INCORRECT:
# $self->{chainreachable} = { A => { B => [ <X -> B \beta2, FIRST(\beta1)> ] }}
# where A ->* X \beta1
# X -> B \beta2 (\beta1 is the accumulation of symbols required to
# reach X, which produces B)
#
# CORRECT: See the description later
#
# $self->{chainrules} = { nt_A => { nt_B => [ < rule, first > ] } }
# aka { A => { B => [ < A -> B \alpha, FIRST(\alpha) > ] } }
# Should really convert this to on-demand someday, too
sub compute_chainFIRSTs {
my Parse::YALALR::Build $self = shift;
my Parse::YALALR::Parser $parser = $self->{parser};
my $grammar = $parser->{grammar};
my $chainrules = $self->{chainrules};
my $nil = $parser->{nil};
foreach my $X (values %$chainrules) {
foreach my $cruleset (values %$X) {
foreach my $crule (@$cruleset) {
# Point to B in A -> B x y z, will incr to x before using
my $i = $crule->{RULEIDX} + 1;
my @rhs;
while ($grammar->[++$i] != $nil) {
push(@rhs, $grammar->[$i]);
}
$crule->{FIRSTLA} = $self->FIRST_nonvec(@rhs);
}
}
}
}
# chainreachable: {A => {B => rule} } means A ->* B \alpha, where
# no nonterminals died to get to B \alpha (== last rule in leftmost
# derivation was not epsilon rule, so no A -> C B x -> B x). The rule
# given is just some rule C -> B \beta, where C is reachable from A
# in zero or more steps. Mostly used as a boolean flag, but can be
# helpful for why.
#
# chainfirst: {A => {B => firstvec} } means firstvec is the union of the
# FIRST of all \alpha in A ->* B \alpha (no nonterminals die). It will
# be used for expanding X -> something1 . A something2, f1: this generates
# B -> ..., FIRST(\beta something2 f1) when A ->* B \beta (no dead nts).
#
sub compute_chain {
my Parse::YALALR::Build $self = shift;
my Parse::YALALR::Parser $parser = $self->{parser};
my ($A) = @_;
my $chainrules = $self->{chainrules};
my @todo;
my %chainreachable;
my %first;
my $nullfs = $parser->{symmap}->make_nullvec;
push(@todo, $A);
while (my $X = pop(@todo)) {
my $pushed = 0;
foreach my $B (keys %{$chainrules->{$X}}) {
if (!exists $chainreachable{$B}) {
$chainreachable{$B} = $chainrules->{$X}{$B}->[0]->{RULEIDX};
push(@todo, $B);
$pushed = 1;
}
my $oldfs = $first{$B} || $nullfs;
foreach my $crule (@{$chainrules->{$X}{$B}}) {
my $propfs = $self->FIRST($crule->{FIRSTLA}, $first{$X});
my $newfs = $propfs | $oldfs;
if (($newfs & ~$oldfs) !~ /^\0*$/s) {
$first{$B} = $newfs;
push(@todo, $B) unless $pushed;
$pushed = 1;
# why_chain_la(A)(B)(propfs & ~oldfs) = crule->{RULEIDX}
}
}
}
}
$self->{chainreachable}->{$A} = \%chainreachable;
$self->{chainfirst}->{$A} = \%first;
}
# This function should go away someday
sub compute_chains {
my Parse::YALALR::Build $self = shift;
my Parse::YALALR::Parser $parser = $self->{parser};
foreach my $nt (@{$parser->{nonterminals}}) {
$self->compute_chain($nt);
}
}
# generate_parser
#
# Main entry point for creating a parser. Uses a bunch of precomputed data.
#
# Algorithm:
# Do a BFS creation of the state graph
# For each state (== kernel) during the BFS construction
# Foreach item in the kernel
# If it's a reduce, call add_reduce(kernel, lhs symbol, lookahead)
# If it's A => \a1 . t \a2, push(shifto[t], new kernel)
# If it's A => \a1 . B \a2, do the same as above,
# but also handle everything reachable from B => . \a3 (see below)
# Scan through the complete shifto sets and fetch or create the new
# kernel resulting from the shift (I'm including both terminals
# and nonterminals in shifto, as usual).
# If the kernel is new, enqueue it.
# The lookaheads for the reduces are tricky. SEE BELOW.
#
# The tricky part is handling the implicit kernel expansion. We have
# A => \a1 . B \a2, f1 (f1 is the lookahead)
#
# Let f2 = FIRST(\a2 with lookahead f1)
#
# Do a simple reduce|shift action, as above, for all rules
# B => \a3, f2
#
# Then, foreach X such that B =>+ X \a4 (use $self->{chainreachable} to find),
# let f = FIRST(\a4 f2) (FIRST(\a4) is $self->{chainfirst})
# Do the reduce|shift actions for each rule X -> \a5, using f as the lookahead
#
# To illustrate, we have something like:
# A => \a1 . B \a2, f1
# B => . X \a4, f2=FIRST(\a2 f1)
# X => \a5, FIRST(\a4 f2)
#
# (in general, B =>* . X \a4)
#
sub generate_parser {
my Parse::YALALR::Build $self = shift;
my Parse::YALALR::Parser $parser = $self->{parser};
my $grammar = $parser->{grammar};
my $nil = $parser->{nil};
my $nilvec = $parser->{nilvec};
my %epsilon_items; # state id => [ generated item X -> . ]
my @kq;
push(@kq, $parser->{init_state}); # START -> . S, $;
while (defined(my $K = pop(@kq))) {
my @epsilon_items;
my %shifto; # { symbol => [ item ] }
my %shifto_why; # { symbol => <item,reason> }
KERNEL_ITEM:
foreach my $I (@{$K->{items}}) {
my $next = $parser->get_dot($I);
# If rule is A -> \a1 . then add a reduce and go to the next item
if ($next == $nil) {
# Off end. Reduce.
my $lhs = $I->{GRAMIDX};
# Find symbol to reduce to
while ($lhs > 0 && $grammar->[$lhs - 1] != $nil) { $lhs--; }
# FIXME: Add assertion that kernel item is not A -> .
$parser->add_reduce($K, $lhs, $I->{LA}, $I, 'kernel');
next KERNEL_ITEM;
}
# Nope, so rule is A -> \a1 . X \a2
my $I2 = $parser->get_shift($I);
push(@{$shifto{$next}}, [ $I2, $I ]);
$DB::single = 1 if $I2->{GRAMIDX} == 35;
$I2->{LA_WHY}->{$I->{LA}} = [ 'propagated', $I ]
unless $I2->{GRAMIDX} == $I->{GRAMIDX};
$shifto_why{$next} = [ $I2, 'kernel' ];
# If X is a terminal, no need to expand
next KERNEL_ITEM if $parser->is_token($next);
# In fact, rule is A -> \a1 . B \a2, f1 (B is nonterminal)
# Oh boy. Chain rules.
my $B = $next; # Just renaming
my $f1 = $parser->get_la($I);
my $a2 = $self->get_first_nextalpha($I);
my $F_a2_f1 = $self->FIRST($a2, $f1);
# $item_prop is the item to blame for lookaheads in the
# reduces that will be added. It's just $I or undef. We'll
# undef it as soon as we hit something non-nullable.
my $item_prop = $I;
undef $item_prop if !$self->nullable_vec($a2);
# First, handle the rules for B (if B ->+ B..., then we'll
# visit B again in the following loop, but for now we
# just want B -> . \a3, FIRST(\a2 f1))
foreach my $rule ($parser->get_rules($B)) {
my $x = $grammar->[$rule+1];
if ($x == $nil) {
my $eI = $parser->make_item($rule, 0, $F_a2_f1);
$DB::single = 1 if $eI->{GRAMIDX} == 35;
$eI->{LA_WHY}->{$F_a2_f1} =
[ 'epsilon-generated', $I, $parser->{nilvec}, $a2 ]
unless $eI->{GRAMIDX} == $I->{GRAMIDX};
push(@epsilon_items, $eI);
$parser->add_reduce($K, $rule, $F_a2_f1, $I, 'chained');
} else {
# I2 := B -> . \a3, FIRST(\a2 f1)
my $I2 = $parser->make_shift($rule + 1, $F_a2_f1);
push(@{$shifto{$x}}, [ $I2, $I ]);
$DB::single = 1 if $I2->{GRAMIDX} == 35;
$I2->{LA_WHY}->{$F_a2_f1} = [ 'generated', $I, $a2 ]
unless $I2 == $I;
# Don't use this explanation if there's a simpler
if ($self->{why} && !defined $shifto_why{$x}) {
$shifto_why{$x} = [ $I2, 'chained', $rule + 1 ];
}
}
}
foreach my $X (keys %{$self->{chainreachable}{$B}}) {
# f3 = FIRST(everything up to just before \a2)
my $f3 = $self->{chainfirst}->{$B}{$X} || $nilvec;
my $f = $self->FIRST($f3, $F_a2_f1);
# undefine $item_prop if FIRST(f3 a2) doesn't contain
# epsilon. It'll already be undef if a2 is not
# nullable, so just test f3
undef $item_prop if !$self->nullable_vec($f3);
foreach my $rule ($parser->get_rules($X)) {
my $x = $grammar->[$rule+1];
if ($x == $nil) {
my $eI = $parser->make_item($rule, 0, $f);
$DB::single = 1 if $eI->{GRAMIDX} == 35;
$eI->{LA_WHY}->{$f} =
[ 'epsilon-generated', $I, $f3, $a2 ]
unless $eI->{GRAMIDX} == $I->{GRAMIDX};
push(@epsilon_items, $eI);
$parser->add_reduce($K, $rule, $f, $I, 'chained');
} else {
my $I2 = $parser->make_shift($rule + 1, $f);
push(@{$shifto{$x}}, [ $I2, $I ]);
$DB::single = 1 if $I2->{GRAMIDX} == 35;
$I2->{LA_WHY}->{$f} = [ 'chain-generated', $I, $f3, $a2 ]
unless $I2->{GRAMIDX} == $I->{GRAMIDX};
# Don't use this explanation if there's a simpler
if ($self->{why} && !defined $shifto_why{$x}) {
$shifto_why{$x} = [ $I2, 'chained', $self->{chainreachable}{$B}{$X} + 1 ];
}
}
}
}
} # foreach item I in kernel K
# Merge epsilon items with the same core
my %canonical; # { GRAMIDX => item }
for my $item (@epsilon_items) {
if (exists $canonical{$item->{GRAMIDX}}) {
} else {
}
}
$epsilon_items{$K->{id}} = \@epsilon_items;
# Create all the new states and add shift actions
while (my ($sym, $edges) = each %shifto) {
my ($K2, $new) = $self->fetch_or_create_state($edges, $K);
$parser->add_shift($K, $sym, $K2);
$K->{SHIFT_WHY}->{$sym} = $shifto_why{$sym}
if $self->{why};
# Stick new states in the queue
push(@kq, $K2) if $new;
}
}
$self->create_item2state_map() if $self->{why};
$self->effects_to_causes() if $self->{why};
$self->propagate_lookaheads();
$self->create_reduces();
}
sub add_item_edge {
my Parse::YALALR::Build $self = shift;
my Parse::YALALR::Parser $parser = $self->{parser};
my ($K0, $I0, $K1, $I1, $la, $reason) = @_;
$I0->{EFFECTS}->{$I1} = $I1;
$I1->{LA_WHY}->{$la} ||= $reason
if $self->{why} && $I0 != $I1;
}
sub propagate_lookaheads {
my Parse::YALALR::Build $self = shift;
my Parse::YALALR::Parser $parser = $self->{parser};
# Start out search with all kernel items
my @Q = map { @{$_->{items}} } @{$parser->{states}};
# Keep track of what's already in the queue to avoid adding duplicates
my %Q = map { $_ => 1 } @Q;
# Keep propagating changes until equilibrium is reached
while (my $change = shift(@Q)) {
delete $Q{$change};
# print "Propagating $change->{GRAMIDX}...\n";
foreach (values %{$change->{EFFECTS}}) {
(my $newla = $_->{LA}) |= $change->{LA};
if ($newla ne $_->{LA}) {
if ($self->{why}) {
my $changela = $newla ^ $_->{LA};
$DB::single = 1 if (vec($changela, $parser->{end}, 1) && $_->{GRAMIDX} == 27);
if ($change->{GRAMIDX} + 1 == $_->{GRAMIDX}) {
$_->{LA_WHY}{$changela} = [ 'propagated', $change ]
unless $_->{GRAMIDX} == $change->{GRAMIDX};
} else {
$_->{LA_WHY}{$changela} = [ 'generated', $change ]
unless $_->{GRAMIDX} == $change->{GRAMIDX};
}
}
$_->{LA} = $newla;
if (!exists $Q{$_}) {
push(@Q, $_);
$Q{$_} = 1;
}
}
}
}
}
# Update all reductions given the current lookaheads of the items they
# depend upon.
sub create_reduces {
my Parse::YALALR::Build $self = shift;
my Parse::YALALR::Parser $parser = $self->{parser};
for my $K (@{$parser->{states}}) {
for my $rinfo (@{$K->{reduces}}) {
my ($la, $rule, $parent) = @$rinfo;
if ($parent) {
$rinfo->[0] |= $parent->{LA};
if ($self->{why} && $rinfo->[0] ne $la) {
$K->{REDUCE_WHY}->{$la ^ $rinfo->[0]} =
[ $rule, $parent, 'propagated' ];
}
}
}
}
}
sub resolve_rr {
my ($self, $state, $sym, $old, $new) = @_;
my Parse::YALALR::Parser $parser = $self->{parser};
my $id = $state->{id};
my $prec1 = $parser->{rule_precedence}->[$old->[0]];
my $prec2 = $parser->{rule_precedence}->[$new];
if (defined $prec1 && defined $prec2 && $prec1->[0] != $prec2->[0]) {
print "Precedence resolved reduce/reduce conflict in state $id on token ", $parser->dump_sym($sym), ": ";
if ($prec1->[0] < $prec2->[0]) {
print $parser->dump_rule($old->[0])."\n";
return $old;
} else {
my $grammar = $parser->{grammar};
print $parser->dump_rule($new)."\n";
return bless [ $new, $grammar->[$new], $parser->rule_size($new) ],
'reduce';
}
} else {
print "Arbitrarily resolved reduce/reduce conflict in state $id on token ", $parser->dump_sym($sym), ": ",
$parser->dump_rule($old->[0]), "\n";
return $old;
}
}
sub resolve_sr {
my ($self, $state, $sym, $old, $new) = @_;
my Parse::YALALR::Parser $parser = $self->{parser};
my $id = $state->{id};
my $prec1 = $parser->{precedence}->[$sym];
my $prec2 = $parser->{rule_precedence}->[$new];
# print "RESOLVING shift $sym vs rule $new\n";
my $grammar = $parser->{grammar};
my $reduce_rule = bless [ $new, $grammar->[$new], $parser->rule_size($new) ],
'reduce';
if (defined $prec1 && defined $prec2) {
if ($prec1->[0] != $prec2->[0]) {
print "Precedence resolved shift/reduce conflict in state $id on token ", $parser->dump_sym($sym), ": ";
if ($prec1->[0] < $prec2->[0]) {
print $parser->dump_action($old)."\n";
return $old;
} else {
my $grammar = $parser->{grammar};
print $parser->dump_rule($new)."\n";
return $reduce_rule;
}
}
if ($prec1->[1] eq 'left') {
print "Left associativity resolved shift/reduce conflict in state $id on token ", $parser->dump_sym($sym), ": reduce\n";
return $reduce_rule;
} elsif ($prec1->[1] eq 'right') {
print "Right associativity resolved shift/reduce conflict in state $id on token ", $parser->dump_sym($sym), ": shift\n";
return $old;
} elsif ($prec1->[1] eq 'nonassoc') {
print "Nonassociative operator, resolved shift/reduce conflict in state $id on token ", $parser->dump_sym($sym), ": error\n";
return undef;
} else {
die "What the hell is this?: $prec1->[1]";
}
}
print "Arbitrarily resolved shift/reduce conflict in state $id on token ", $parser->dump_sym($sym), ": ",
$parser->dump_action($old), "\n";
print " (prec of ".$parser->dump_sym($sym)." is $prec1->[0] ($prec1->[1]))\n"
if defined $prec1;
print " (prec of rule is $prec2->[0] ($prec2->[1]))\n"
if defined $prec2;
return $old;
}
sub resolve {
my ($self, $state, $sym, $old, $new) = @_;
my Parse::YALALR::Parser $parser = $self->{parser};
if ($old->[0] eq 'reduce' && $new->[0] eq 'reduce') {
return $self->resolve_rr($state, $sym, $old->[1], $new->[1]);
} elsif ($old->[0] eq 'shift' && $new->[0] eq 'reduce') {
return $self->resolve_sr($state, $sym, $old->[1], $new->[1]);
} else {
return $self->resolve_sr($state, $sym, $new->[1], $old->[1]);
}
}
# build_table
#
# INPUT:
# $self->{states} : [ state ]
# state : { 'id' => state number,
# 'shifts' => { symbol => to_state },
# 'reduces' => { lookahead => rule : grammar_index },
# }
#
# OUTPUT:
# $self->{states}[i]{actions} : [ symbol => shiftact|reduceact ]
# (equiv, the above state += { 'actions' => [ symbol => shiftact|reduceact ] })
# shiftact : to_state
# reduceact : [ rule, lhs, number of elts in rhs ] : 'reduce'
#
sub build_table {
my Parse::YALALR::Build $self = shift;
my Parse::YALALR::Parser $parser = $self->parser;
foreach my $state (@{$parser->{states}}) {
my @actions;
my $id = $state->{id};
while (my ($sym, $dest) = each %{$state->{shifts}}) {
$actions[$sym] = $dest;
}
foreach (@{$state->{reduces}}) {
my ($la, $rule, $item) = @$_;
foreach my $sym ($parser->{symmap}->get_indices($la)) {
if (defined $actions[$sym]) {
if (ref $actions[$sym] eq 'reduce') {
if ($actions[$sym]->[0] != $rule) {
$actions[$sym] =
$self->resolve($state, $sym,
[ 'reduce', $actions[$sym] ],
[ 'reduce', $rule ]);
next;
} # else no conflict
} else {
$actions[$sym] =
$self->resolve($state, $sym,
[ 'shift', $actions[$sym] ],
[ 'reduce', $rule ]);
next;
}
}
my $sz_rhs = $parser->rule_size($rule);
my $lhs = $parser->{grammar}->[$rule];
$actions[$sym] = bless [ $rule, $lhs, $sz_rhs ], 'reduce';
}
}
$state->{actions} = \@actions;
}
}
1;