The Perl and Raku Conference 2025: Greenville, South Carolina - June 27-29 Learn more

/*
* Copyright 2001-2006 Adrian Thurston <thurston@complang.org>
*/
/* This file is part of Ragel.
*
* Ragel is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* Ragel is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with Ragel; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#include <iostream>
#include <iomanip>
#include <errno.h>
#include <limits.h>
#include <stdlib.h>
/* Parsing. */
#include "ragel.h"
#include "rlparse.h"
#include "parsetree.h"
using namespace std;
ostream &operator<<( ostream &out, const NameRef &nameRef );
ostream &operator<<( ostream &out, const NameInst &nameInst );
/* Convert the literal string which comes in from the scanner into an array of
* characters with escapes and options interpreted. Also null terminates the
* string. Though this null termination should not be relied on for
* interpreting literals in the parser because the string may contain \0 */
char *prepareLitString( const InputLoc &loc, const char *data, long length,
long &resLen, bool &caseInsensitive )
{
char *resData = new char[length+1];
caseInsensitive = false;
const char *src = data + 1;
const char *end = data + length - 1;
while ( *end != '\'' && *end != '\"' ) {
if ( *end == 'i' )
caseInsensitive = true;
else {
error( loc ) << "literal string '" << *end <<
"' option not supported" << endl;
}
end -= 1;
}
char *dest = resData;
long len = 0;
while ( src != end ) {
if ( *src == '\\' ) {
switch ( src[1] ) {
case '0': dest[len++] = '\0'; break;
case 'a': dest[len++] = '\a'; break;
case 'b': dest[len++] = '\b'; break;
case 't': dest[len++] = '\t'; break;
case 'n': dest[len++] = '\n'; break;
case 'v': dest[len++] = '\v'; break;
case 'f': dest[len++] = '\f'; break;
case 'r': dest[len++] = '\r'; break;
case '\n': break;
default: dest[len++] = src[1]; break;
}
src += 2;
}
else {
dest[len++] = *src++;
}
}
resLen = len;
resData[resLen] = 0;
return resData;
}
FsmAp *VarDef::walk( ParseData *pd )
{
/* We enter into a new name scope. */
NameFrame nameFrame = pd->enterNameScope( true, 1 );
/* Recurse on the expression. */
FsmAp *rtnVal = machineDef->walk( pd );
/* Do the tranfer of local error actions. */
LocalErrDictEl *localErrDictEl = pd->localErrDict.find( name );
if ( localErrDictEl != 0 ) {
for ( StateList::Iter state = rtnVal->stateList; state.lte(); state++ )
rtnVal->transferErrorActions( state, localErrDictEl->value );
}
/* If the expression below is a join operation with multiple expressions
* then it just had epsilon transisions resolved. If it is a join
* with only a single expression then run the epsilon op now. */
if ( machineDef->type == MachineDef::JoinType && machineDef->join->exprList.length() == 1 )
rtnVal->epsilonOp();
/* We can now unset entry points that are not longer used. */
pd->unsetObsoleteEntries( rtnVal );
/* If the name of the variable is referenced then add the entry point to
* the graph. */
if ( pd->curNameInst->numRefs > 0 )
rtnVal->setEntry( pd->curNameInst->id, rtnVal->startState );
/* Pop the name scope. */
pd->popNameScope( nameFrame );
return rtnVal;
}
void VarDef::makeNameTree( const InputLoc &loc, ParseData *pd )
{
/* The variable definition enters a new scope. */
NameInst *prevNameInst = pd->curNameInst;
pd->curNameInst = pd->addNameInst( loc, name, false );
if ( machineDef->type == MachineDef::LongestMatchType )
pd->curNameInst->isLongestMatch = true;
/* Recurse. */
machineDef->makeNameTree( pd );
/* The name scope ends, pop the name instantiation. */
pd->curNameInst = prevNameInst;
}
void VarDef::resolveNameRefs( ParseData *pd )
{
/* Entering into a new scope. */
NameFrame nameFrame = pd->enterNameScope( true, 1 );
/* Recurse. */
machineDef->resolveNameRefs( pd );
/* The name scope ends, pop the name instantiation. */
pd->popNameScope( nameFrame );
}
InputLoc LongestMatchPart::getLoc()
{
return action != 0 ? action->loc : semiLoc;
}
/*
* If there are any LMs then all of the following entry points must reset
* tokstart:
*
* 1. fentry(StateRef)
* 2. ftoto(StateRef), fcall(StateRef), fnext(StateRef)
* 3. targt of any transition that has an fcall (the return loc).
* 4. start state of all longest match routines.
*/
Action *LongestMatch::newAction( ParseData *pd, const InputLoc &loc,
const char *name, InlineList *inlineList )
{
Action *action = new Action( loc, name, inlineList, pd->nextCondId++ );
action->actionRefs.append( pd->curNameInst );
pd->actionList.append( action );
action->isLmAction = true;
return action;
}
void LongestMatch::makeActions( ParseData *pd )
{
/* Make actions that set the action id. */
for ( LmPartList::Iter lmi = *longestMatchList; lmi.lte(); lmi++ ) {
/* For each part create actions for setting the match type. We need
* to do this so that the actions will go into the actionIndex. */
InlineList *inlineList = new InlineList;
inlineList->append( new InlineItem( lmi->getLoc(), this, lmi,
InlineItem::LmSetActId ) );
char *actName = new char[50];
sprintf( actName, "store%i", lmi->longestMatchId );
lmi->setActId = newAction( pd, lmi->getLoc(), actName, inlineList );
}
/* Make actions that execute the user action and restart on the last
* character. */
for ( LmPartList::Iter lmi = *longestMatchList; lmi.lte(); lmi++ ) {
/* For each part create actions for setting the match type. We need
* to do this so that the actions will go into the actionIndex. */
InlineList *inlineList = new InlineList;
inlineList->append( new InlineItem( lmi->getLoc(), this, lmi,
InlineItem::LmOnLast ) );
char *actName = new char[50];
sprintf( actName, "last%i", lmi->longestMatchId );
lmi->actOnLast = newAction( pd, lmi->getLoc(), actName, inlineList );
}
/* Make actions that execute the user action and restart on the next
* character. These actions will set tokend themselves (it is the current
* char). */
for ( LmPartList::Iter lmi = *longestMatchList; lmi.lte(); lmi++ ) {
/* For each part create actions for setting the match type. We need
* to do this so that the actions will go into the actionIndex. */
InlineList *inlineList = new InlineList;
inlineList->append( new InlineItem( lmi->getLoc(), this, lmi,
InlineItem::LmOnNext ) );
char *actName = new char[50];
sprintf( actName, "next%i", lmi->longestMatchId );
lmi->actOnNext = newAction( pd, lmi->getLoc(), actName, inlineList );
}
/* Make actions that execute the user action and restart at tokend. These
* actions execute some time after matching the last char. */
for ( LmPartList::Iter lmi = *longestMatchList; lmi.lte(); lmi++ ) {
/* For each part create actions for setting the match type. We need
* to do this so that the actions will go into the actionIndex. */
InlineList *inlineList = new InlineList;
inlineList->append( new InlineItem( lmi->getLoc(), this, lmi,
InlineItem::LmOnLagBehind ) );
char *actName = new char[50];
sprintf( actName, "lag%i", lmi->longestMatchId );
lmi->actLagBehind = newAction( pd, lmi->getLoc(), actName, inlineList );
}
InputLoc loc;
loc.line = 1;
loc.col = 1;
loc.fileName = "NONE";
/* Create the error action. */
InlineList *il6 = new InlineList;
il6->append( new InlineItem( loc, this, 0, InlineItem::LmSwitch ) );
lmActSelect = newAction( pd, loc, "switch", il6 );
}
void LongestMatch::findName( ParseData *pd )
{
NameInst *nameInst = pd->curNameInst;
while ( nameInst->name == 0 ) {
nameInst = nameInst->parent;
/* Since every machine must must have a name, we should always find a
* name for the longest match. */
assert( nameInst != 0 );
}
name = nameInst->name;
}
void LongestMatch::makeNameTree( ParseData *pd )
{
/* Create an anonymous scope for the longest match. Will be used for
* restarting machine after matching a token. */
NameInst *prevNameInst = pd->curNameInst;
pd->curNameInst = pd->addNameInst( loc, 0, false );
/* Recurse into all parts of the longest match operator. */
for ( LmPartList::Iter lmi = *longestMatchList; lmi.lte(); lmi++ )
lmi->join->makeNameTree( pd );
/* Traverse the name tree upwards to find a name for this lm. */
findName( pd );
/* Also make the longest match's actions at this point. */
makeActions( pd );
/* The name scope ends, pop the name instantiation. */
pd->curNameInst = prevNameInst;
}
void LongestMatch::resolveNameRefs( ParseData *pd )
{
/* The longest match gets its own name scope. */
NameFrame nameFrame = pd->enterNameScope( true, 1 );
/* Take an action reference for each longest match item and recurse. */
for ( LmPartList::Iter lmi = *longestMatchList; lmi.lte(); lmi++ ) {
/* Record the reference if the item has an action. */
if ( lmi->action != 0 )
lmi->action->actionRefs.append( pd->localNameScope );
/* Recurse down the join. */
lmi->join->resolveNameRefs( pd );
}
/* The name scope ends, pop the name instantiation. */
pd->popNameScope( nameFrame );
}
void LongestMatch::restart( FsmAp *graph, TransAp *trans )
{
StateAp *fromState = trans->fromState;
graph->detachTrans( fromState, trans->toState, trans );
graph->attachTrans( fromState, graph->startState, trans );
}
void LongestMatch::runLongestMatch( ParseData *pd, FsmAp *graph )
{
graph->markReachableFromHereStopFinal( graph->startState );
for ( StateList::Iter ms = graph->stateList; ms.lte(); ms++ ) {
if ( ms->stateBits & STB_ISMARKED ) {
ms->lmItemSet.insert( 0 );
ms->stateBits &= ~ STB_ISMARKED;
}
}
/* Transfer the first item of non-empty lmAction tables to the item sets
* of the states that follow. Exclude states that have no transitions out.
* This must happen on a separate pass so that on each iteration of the
* next pass we have the item set entries from all lmAction tables. */
for ( StateList::Iter st = graph->stateList; st.lte(); st++ ) {
for ( TransList::Iter trans = st->outList; trans.lte(); trans++ ) {
if ( trans->lmActionTable.length() > 0 ) {
LmActionTableEl *lmAct = trans->lmActionTable.data;
StateAp *toState = trans->toState;
assert( toState );
/* Can only optimize this if there are no transitions out.
* Note there can be out transitions going nowhere with
* actions and they too must inhibit this optimization. */
if ( toState->outList.length() > 0 ) {
/* Fill the item sets. */
graph->markReachableFromHereStopFinal( toState );
for ( StateList::Iter ms = graph->stateList; ms.lte(); ms++ ) {
if ( ms->stateBits & STB_ISMARKED ) {
ms->lmItemSet.insert( lmAct->value );
ms->stateBits &= ~ STB_ISMARKED;
}
}
}
}
}
}
/* The lmItem sets are now filled, telling us which longest match rules
* can succeed in which states. First determine if we need to make sure
* act is defaulted to zero. We need to do this if there are any states
* with lmItemSet.length() > 1 and NULL is included. That is, that the
* switch may get called when in fact nothing has been matched. */
int maxItemSetLength = 0;
graph->markReachableFromHereStopFinal( graph->startState );
for ( StateList::Iter ms = graph->stateList; ms.lte(); ms++ ) {
if ( ms->stateBits & STB_ISMARKED ) {
if ( ms->lmItemSet.length() > maxItemSetLength )
maxItemSetLength = ms->lmItemSet.length();
ms->stateBits &= ~ STB_ISMARKED;
}
}
/* The actions executed on starting to match a token. */
graph->isolateStartState();
graph->startState->toStateActionTable.setAction( pd->initTokStartOrd, pd->initTokStart );
graph->startState->fromStateActionTable.setAction( pd->setTokStartOrd, pd->setTokStart );
if ( maxItemSetLength > 1 ) {
/* The longest match action switch may be called when tokens are
* matched, in which case act must be initialized, there must be a
* case to handle the error, and the generated machine will require an
* error state. */
lmSwitchHandlesError = true;
pd->lmRequiresErrorState = true;
graph->startState->toStateActionTable.setAction( pd->initActIdOrd, pd->initActId );
}
/* The place to store transitions to restart. It maybe possible for the
* restarting to affect the searching through the graph that follows. For
* now take the safe route and save the list of transitions to restart
* until after all searching is done. */
Vector<TransAp*> restartTrans;
/* Set actions that do immediate token recognition, set the longest match part
* id and set the token ending. */
for ( StateList::Iter st = graph->stateList; st.lte(); st++ ) {
for ( TransList::Iter trans = st->outList; trans.lte(); trans++ ) {
if ( trans->lmActionTable.length() > 0 ) {
LmActionTableEl *lmAct = trans->lmActionTable.data;
StateAp *toState = trans->toState;
assert( toState );
/* Can only optimize this if there are no transitions out.
* Note there can be out transitions going nowhere with
* actions and they too must inhibit this optimization. */
if ( toState->outList.length() == 0 ) {
/* Can execute the immediate action for the longest match
* part. Redirect the action to the start state.
*
* NOTE: When we need to inhibit on_last due to leaving
* actions the above test suffices. If the state has out
* actions then it will fail because the out action will
* have been transferred to an error transition, which
* makes the outlist non-empty. */
trans->actionTable.setAction( lmAct->key,
lmAct->value->actOnLast );
restartTrans.append( trans );
}
else {
/* Look for non final states that have a non-empty item
* set. If these are present then we need to record the
* end of the token. Also Find the highest item set
* length reachable from here (excluding at transtions to
* final states). */
bool nonFinalNonEmptyItemSet = false;
maxItemSetLength = 0;
graph->markReachableFromHereStopFinal( toState );
for ( StateList::Iter ms = graph->stateList; ms.lte(); ms++ ) {
if ( ms->stateBits & STB_ISMARKED ) {
if ( ms->lmItemSet.length() > 0 && !ms->isFinState() )
nonFinalNonEmptyItemSet = true;
if ( ms->lmItemSet.length() > maxItemSetLength )
maxItemSetLength = ms->lmItemSet.length();
ms->stateBits &= ~ STB_ISMARKED;
}
}
/* If there are reachable states that are not final and
* have non empty item sets or that have an item set
* length greater than one then we need to set tokend
* because the error action that matches the token will
* require it. */
if ( nonFinalNonEmptyItemSet || maxItemSetLength > 1 )
trans->actionTable.setAction( pd->setTokEndOrd, pd->setTokEnd );
/* Some states may not know which longest match item to
* execute, must set it. */
if ( maxItemSetLength > 1 ) {
/* There are transitions out, another match may come. */
trans->actionTable.setAction( lmAct->key,
lmAct->value->setActId );
}
}
}
}
}
/* Now that all graph searching is done it certainly safe set the
* restarting. It may be safe above, however this must be verified. */
for ( Vector<TransAp*>::Iter pt = restartTrans; pt.lte(); pt++ )
restart( graph, *pt );
int lmErrActionOrd = pd->curActionOrd++;
/* Embed the error for recognizing a char. */
for ( StateList::Iter st = graph->stateList; st.lte(); st++ ) {
if ( st->lmItemSet.length() == 1 && st->lmItemSet[0] != 0 ) {
if ( st->isFinState() ) {
/* On error execute the onActNext action, which knows that
* the last character of the token was one back and restart. */
graph->setErrorTarget( st, graph->startState, &lmErrActionOrd,
&st->lmItemSet[0]->actOnNext, 1 );
st->eofActionTable.setAction( lmErrActionOrd,
st->lmItemSet[0]->actOnNext );
st->eofTarget = graph->startState;
}
else {
graph->setErrorTarget( st, graph->startState, &lmErrActionOrd,
&st->lmItemSet[0]->actLagBehind, 1 );
st->eofActionTable.setAction( lmErrActionOrd,
st->lmItemSet[0]->actLagBehind );
st->eofTarget = graph->startState;
}
}
else if ( st->lmItemSet.length() > 1 ) {
/* Need to use the select. Take note of which items the select
* is needed for so only the necessary actions are included. */
for ( LmItemSet::Iter plmi = st->lmItemSet; plmi.lte(); plmi++ ) {
if ( *plmi != 0 )
(*plmi)->inLmSelect = true;
}
/* On error, execute the action select and go to the start state. */
graph->setErrorTarget( st, graph->startState, &lmErrActionOrd,
&lmActSelect, 1 );
st->eofActionTable.setAction( lmErrActionOrd, lmActSelect );
st->eofTarget = graph->startState;
}
}
/* Finally, the start state should be made final. */
graph->setFinState( graph->startState );
}
void LongestMatch::transferScannerLeavingActions( FsmAp *graph )
{
for ( StateList::Iter st = graph->stateList; st.lte(); st++ ) {
if ( st->outActionTable.length() > 0 )
graph->setErrorActions( st, st->outActionTable );
}
}
FsmAp *LongestMatch::walk( ParseData *pd )
{
/* The longest match has it's own name scope. */
NameFrame nameFrame = pd->enterNameScope( true, 1 );
/* Make each part of the longest match. */
FsmAp **parts = new FsmAp*[longestMatchList->length()];
LmPartList::Iter lmi = *longestMatchList;
for ( int i = 0; lmi.lte(); lmi++, i++ ) {
/* Create the machine and embed the setting of the longest match id. */
parts[i] = lmi->join->walk( pd );
parts[i]->longMatchAction( pd->curActionOrd++, lmi );
}
/* Before we union the patterns we need to deal with leaving actions. They
* are transfered to error transitions out of the final states (like local
* error actions) and to eof actions. In the scanner we need to forbid
* on_last for any final state that has an leaving action. */
for ( int i = 0; i < longestMatchList->length(); i++ )
transferScannerLeavingActions( parts[i] );
/* Union machines one and up with machine zero. The grammar dictates that
* there will always be at least one part. */
FsmAp *rtnVal = parts[0];
for ( int i = 1; i < longestMatchList->length(); i++ ) {
rtnVal->unionOp( parts[i] );
afterOpMinimize( rtnVal );
}
runLongestMatch( pd, rtnVal );
/* Pop the name scope. */
pd->popNameScope( nameFrame );
delete[] parts;
return rtnVal;
}
FsmAp *MachineDef::walk( ParseData *pd )
{
FsmAp *rtnVal = 0;
switch ( type ) {
case JoinType:
rtnVal = join->walk( pd );
break;
case LongestMatchType:
rtnVal = longestMatch->walk( pd );
break;
case LengthDefType:
condData->lastCondKey.increment();
rtnVal = new FsmAp();
rtnVal->concatFsm( condData->lastCondKey );
break;
}
return rtnVal;
}
void MachineDef::makeNameTree( ParseData *pd )
{
switch ( type ) {
case JoinType:
join->makeNameTree( pd );
break;
case LongestMatchType:
longestMatch->makeNameTree( pd );
break;
case LengthDefType:
break;
}
}
void MachineDef::resolveNameRefs( ParseData *pd )
{
switch ( type ) {
case JoinType:
join->resolveNameRefs( pd );
break;
case LongestMatchType:
longestMatch->resolveNameRefs( pd );
break;
case LengthDefType:
break;
}
}
/* Construct with a location and the first expression. */
Join::Join( const InputLoc &loc, Expression *expr )
:
loc(loc)
{
exprList.append( expr );
}
/* Construct with a location and the first expression. */
Join::Join( Expression *expr )
{
exprList.append( expr );
}
/* Walk an expression node. */
FsmAp *Join::walk( ParseData *pd )
{
if ( exprList.length() > 1 )
return walkJoin( pd );
else
return exprList.head->walk( pd );
}
/* There is a list of expressions to join. */
FsmAp *Join::walkJoin( ParseData *pd )
{
/* We enter into a new name scope. */
NameFrame nameFrame = pd->enterNameScope( true, 1 );
/* Evaluate the machines. */
FsmAp **fsms = new FsmAp*[exprList.length()];
ExprList::Iter expr = exprList;
for ( int e = 0; e < exprList.length(); e++, expr++ )
fsms[e] = expr->walk( pd );
/* Get the start and final names. Final is
* guaranteed to exist, start is not. */
NameInst *startName = pd->curNameInst->start;
NameInst *finalName = pd->curNameInst->final;
int startId = -1;
if ( startName != 0 ) {
/* Take note that there was an implicit link to the start machine. */
pd->localNameScope->referencedNames.append( startName );
startId = startName->id;
}
/* A final id of -1 indicates there is no epsilon that references the
* final state, therefor do not create one or set an entry point to it. */
int finalId = -1;
if ( finalName->numRefs > 0 )
finalId = finalName->id;
/* Join machines 1 and up onto machine 0. */
FsmAp *retFsm = fsms[0];
retFsm->joinOp( startId, finalId, fsms+1, exprList.length()-1 );
/* We can now unset entry points that are not longer used. */
pd->unsetObsoleteEntries( retFsm );
/* Pop the name scope. */
pd->popNameScope( nameFrame );
delete[] fsms;
return retFsm;
}
void Join::makeNameTree( ParseData *pd )
{
if ( exprList.length() > 1 ) {
/* Create the new anonymous scope. */
NameInst *prevNameInst = pd->curNameInst;
pd->curNameInst = pd->addNameInst( loc, 0, false );
/* Join scopes need an implicit "final" target. */
pd->curNameInst->final = new NameInst( InputLoc(), pd->curNameInst, "final",
pd->nextNameId++, false );
/* Recurse into all expressions in the list. */
for ( ExprList::Iter expr = exprList; expr.lte(); expr++ )
expr->makeNameTree( pd );
/* The name scope ends, pop the name instantiation. */
pd->curNameInst = prevNameInst;
}
else {
/* Recurse into the single expression. */
exprList.head->makeNameTree( pd );
}
}
void Join::resolveNameRefs( ParseData *pd )
{
/* Branch on whether or not there is to be a join. */
if ( exprList.length() > 1 ) {
/* The variable definition enters a new scope. */
NameFrame nameFrame = pd->enterNameScope( true, 1 );
/* The join scope must contain a start label. */
NameSet resolved = pd->resolvePart( pd->localNameScope, "start", true );
if ( resolved.length() > 0 ) {
/* Take the first. */
pd->curNameInst->start = resolved[0];
if ( resolved.length() > 1 ) {
/* Complain about the multiple references. */
error(loc) << "join operation has multiple start labels" << endl;
errorStateLabels( resolved );
}
}
/* Make sure there is a start label. */
if ( pd->curNameInst->start != 0 ) {
/* There is an implicit reference to start name. */
pd->curNameInst->start->numRefs += 1;
}
else {
/* No start label. */
error(loc) << "join operation has no start label" << endl;
}
/* Recurse into all expressions in the list. */
for ( ExprList::Iter expr = exprList; expr.lte(); expr++ )
expr->resolveNameRefs( pd );
/* The name scope ends, pop the name instantiation. */
pd->popNameScope( nameFrame );
}
else {
/* Recurse into the single expression. */
exprList.head->resolveNameRefs( pd );
}
}
/* Clean up after an expression node. */
Expression::~Expression()
{
switch ( type ) {
case OrType: case IntersectType: case SubtractType:
case StrongSubtractType:
delete expression;
delete term;
break;
case TermType:
delete term;
break;
case BuiltinType:
break;
}
}
/* Evaluate a single expression node. */
FsmAp *Expression::walk( ParseData *pd, bool lastInSeq )
{
FsmAp *rtnVal = 0;
switch ( type ) {
case OrType: {
/* Evaluate the expression. */
rtnVal = expression->walk( pd, false );
/* Evaluate the term. */
FsmAp *rhs = term->walk( pd );
/* Perform union. */
rtnVal->unionOp( rhs );
afterOpMinimize( rtnVal, lastInSeq );
break;
}
case IntersectType: {
/* Evaluate the expression. */
rtnVal = expression->walk( pd );
/* Evaluate the term. */
FsmAp *rhs = term->walk( pd );
/* Perform intersection. */
rtnVal->intersectOp( rhs );
afterOpMinimize( rtnVal, lastInSeq );
break;
}
case SubtractType: {
/* Evaluate the expression. */
rtnVal = expression->walk( pd );
/* Evaluate the term. */
FsmAp *rhs = term->walk( pd );
/* Perform subtraction. */
rtnVal->subtractOp( rhs );
afterOpMinimize( rtnVal, lastInSeq );
break;
}
case StrongSubtractType: {
/* Evaluate the expression. */
rtnVal = expression->walk( pd );
/* Evaluate the term and pad it with any* machines. */
FsmAp *rhs = dotStarFsm( pd );
FsmAp *termFsm = term->walk( pd );
FsmAp *trailAnyStar = dotStarFsm( pd );
rhs->concatOp( termFsm );
rhs->concatOp( trailAnyStar );
/* Perform subtraction. */
rtnVal->subtractOp( rhs );
afterOpMinimize( rtnVal, lastInSeq );
break;
}
case TermType: {
/* Return result of the term. */
rtnVal = term->walk( pd );
break;
}
case BuiltinType: {
/* Duplicate the builtin. */
rtnVal = makeBuiltin( builtin, pd );
break;
}
}
return rtnVal;
}
void Expression::makeNameTree( ParseData *pd )
{
switch ( type ) {
case OrType:
case IntersectType:
case SubtractType:
case StrongSubtractType:
expression->makeNameTree( pd );
term->makeNameTree( pd );
break;
case TermType:
term->makeNameTree( pd );
break;
case BuiltinType:
break;
}
}
void Expression::resolveNameRefs( ParseData *pd )
{
switch ( type ) {
case OrType:
case IntersectType:
case SubtractType:
case StrongSubtractType:
expression->resolveNameRefs( pd );
term->resolveNameRefs( pd );
break;
case TermType:
term->resolveNameRefs( pd );
break;
case BuiltinType:
break;
}
}
/* Clean up after a term node. */
Term::~Term()
{
switch ( type ) {
case ConcatType:
case RightStartType:
case RightFinishType:
case LeftType:
delete term;
delete factorWithAug;
break;
case FactorWithAugType:
delete factorWithAug;
break;
}
}
/* Evaluate a term node. */
FsmAp *Term::walk( ParseData *pd, bool lastInSeq )
{
FsmAp *rtnVal = 0;
switch ( type ) {
case ConcatType: {
/* Evaluate the Term. */
rtnVal = term->walk( pd, false );
/* Evaluate the FactorWithRep. */
FsmAp *rhs = factorWithAug->walk( pd );
/* Perform concatenation. */
rtnVal->concatOp( rhs );
afterOpMinimize( rtnVal, lastInSeq );
break;
}
case RightStartType: {
/* Evaluate the Term. */
rtnVal = term->walk( pd );
/* Evaluate the FactorWithRep. */
FsmAp *rhs = factorWithAug->walk( pd );
/* Set up the priority descriptors. The left machine gets the
* lower priority where as the right get the higher start priority. */
priorDescs[0].key = pd->nextPriorKey++;
priorDescs[0].priority = 0;
rtnVal->allTransPrior( pd->curPriorOrd++, &priorDescs[0] );
/* The start transitions of the right machine gets the higher
* priority. Use the same unique key. */
priorDescs[1].key = priorDescs[0].key;
priorDescs[1].priority = 1;
rhs->startFsmPrior( pd->curPriorOrd++, &priorDescs[1] );
/* Perform concatenation. */
rtnVal->concatOp( rhs );
afterOpMinimize( rtnVal, lastInSeq );
break;
}
case RightFinishType: {
/* Evaluate the Term. */
rtnVal = term->walk( pd );
/* Evaluate the FactorWithRep. */
FsmAp *rhs = factorWithAug->walk( pd );
/* Set up the priority descriptors. The left machine gets the
* lower priority where as the finishing transitions to the right
* get the higher priority. */
priorDescs[0].key = pd->nextPriorKey++;
priorDescs[0].priority = 0;
rtnVal->allTransPrior( pd->curPriorOrd++, &priorDescs[0] );
/* The finishing transitions of the right machine get the higher
* priority. Use the same unique key. */
priorDescs[1].key = priorDescs[0].key;
priorDescs[1].priority = 1;
rhs->finishFsmPrior( pd->curPriorOrd++, &priorDescs[1] );
/* If the right machine's start state is final we need to guard
* against the left machine persisting by moving through the empty
* string. */
if ( rhs->startState->isFinState() ) {
rhs->startState->outPriorTable.setPrior(
pd->curPriorOrd++, &priorDescs[1] );
}
/* Perform concatenation. */
rtnVal->concatOp( rhs );
afterOpMinimize( rtnVal, lastInSeq );
break;
}
case LeftType: {
/* Evaluate the Term. */
rtnVal = term->walk( pd );
/* Evaluate the FactorWithRep. */
FsmAp *rhs = factorWithAug->walk( pd );
/* Set up the priority descriptors. The left machine gets the
* higher priority. */
priorDescs[0].key = pd->nextPriorKey++;
priorDescs[0].priority = 1;
rtnVal->allTransPrior( pd->curPriorOrd++, &priorDescs[0] );
/* The right machine gets the lower priority. We cannot use
* allTransPrior here in case the start state of the right machine
* is final. It would allow the right machine thread to run along
* with the left if just passing through the start state. Using
* startFsmPrior prevents this. */
priorDescs[1].key = priorDescs[0].key;
priorDescs[1].priority = 0;
rhs->startFsmPrior( pd->curPriorOrd++, &priorDescs[1] );
/* Perform concatenation. */
rtnVal->concatOp( rhs );
afterOpMinimize( rtnVal, lastInSeq );
break;
}
case FactorWithAugType: {
rtnVal = factorWithAug->walk( pd );
break;
}
}
return rtnVal;
}
void Term::makeNameTree( ParseData *pd )
{
switch ( type ) {
case ConcatType:
case RightStartType:
case RightFinishType:
case LeftType:
term->makeNameTree( pd );
factorWithAug->makeNameTree( pd );
break;
case FactorWithAugType:
factorWithAug->makeNameTree( pd );
break;
}
}
void Term::resolveNameRefs( ParseData *pd )
{
switch ( type ) {
case ConcatType:
case RightStartType:
case RightFinishType:
case LeftType:
term->resolveNameRefs( pd );
factorWithAug->resolveNameRefs( pd );
break;
case FactorWithAugType:
factorWithAug->resolveNameRefs( pd );
break;
}
}
/* Clean up after a factor with augmentation node. */
FactorWithAug::~FactorWithAug()
{
delete factorWithRep;
/* Walk the vector of parser actions, deleting function names. */
/* Clean up priority descriptors. */
if ( priorDescs != 0 )
delete[] priorDescs;
}
void FactorWithAug::assignActions( ParseData *pd, FsmAp *graph, int *actionOrd )
{
/* Assign actions. */
for ( int i = 0; i < actions.length(); i++ ) {
switch ( actions[i].type ) {
/* Transition actions. */
case at_start:
graph->startFsmAction( actionOrd[i], actions[i].action );
afterOpMinimize( graph );
break;
case at_all:
graph->allTransAction( actionOrd[i], actions[i].action );
break;
case at_finish:
graph->finishFsmAction( actionOrd[i], actions[i].action );
break;
case at_leave:
graph->leaveFsmAction( actionOrd[i], actions[i].action );
break;
/* Global error actions. */
case at_start_gbl_error:
graph->startErrorAction( actionOrd[i], actions[i].action, 0 );
afterOpMinimize( graph );
break;
case at_all_gbl_error:
graph->allErrorAction( actionOrd[i], actions[i].action, 0 );
break;
case at_final_gbl_error:
graph->finalErrorAction( actionOrd[i], actions[i].action, 0 );
break;
case at_not_start_gbl_error:
graph->notStartErrorAction( actionOrd[i], actions[i].action, 0 );
break;
case at_not_final_gbl_error:
graph->notFinalErrorAction( actionOrd[i], actions[i].action, 0 );
break;
case at_middle_gbl_error:
graph->middleErrorAction( actionOrd[i], actions[i].action, 0 );
break;
/* Local error actions. */
case at_start_local_error:
graph->startErrorAction( actionOrd[i], actions[i].action,
actions[i].localErrKey );
afterOpMinimize( graph );
break;
case at_all_local_error:
graph->allErrorAction( actionOrd[i], actions[i].action,
actions[i].localErrKey );
break;
case at_final_local_error:
graph->finalErrorAction( actionOrd[i], actions[i].action,
actions[i].localErrKey );
break;
case at_not_start_local_error:
graph->notStartErrorAction( actionOrd[i], actions[i].action,
actions[i].localErrKey );
break;
case at_not_final_local_error:
graph->notFinalErrorAction( actionOrd[i], actions[i].action,
actions[i].localErrKey );
break;
case at_middle_local_error:
graph->middleErrorAction( actionOrd[i], actions[i].action,
actions[i].localErrKey );
break;
/* EOF actions. */
case at_start_eof:
graph->startEOFAction( actionOrd[i], actions[i].action );
afterOpMinimize( graph );
break;
case at_all_eof:
graph->allEOFAction( actionOrd[i], actions[i].action );
break;
case at_final_eof:
graph->finalEOFAction( actionOrd[i], actions[i].action );
break;
case at_not_start_eof:
graph->notStartEOFAction( actionOrd[i], actions[i].action );
break;
case at_not_final_eof:
graph->notFinalEOFAction( actionOrd[i], actions[i].action );
break;
case at_middle_eof:
graph->middleEOFAction( actionOrd[i], actions[i].action );
break;
/* To State Actions. */
case at_start_to_state:
graph->startToStateAction( actionOrd[i], actions[i].action );
afterOpMinimize( graph );
break;
case at_all_to_state:
graph->allToStateAction( actionOrd[i], actions[i].action );
break;
case at_final_to_state:
graph->finalToStateAction( actionOrd[i], actions[i].action );
break;
case at_not_start_to_state:
graph->notStartToStateAction( actionOrd[i], actions[i].action );
break;
case at_not_final_to_state:
graph->notFinalToStateAction( actionOrd[i], actions[i].action );
break;
case at_middle_to_state:
graph->middleToStateAction( actionOrd[i], actions[i].action );
break;
/* From State Actions. */
case at_start_from_state:
graph->startFromStateAction( actionOrd[i], actions[i].action );
afterOpMinimize( graph );
break;
case at_all_from_state:
graph->allFromStateAction( actionOrd[i], actions[i].action );
break;
case at_final_from_state:
graph->finalFromStateAction( actionOrd[i], actions[i].action );
break;
case at_not_start_from_state:
graph->notStartFromStateAction( actionOrd[i], actions[i].action );
break;
case at_not_final_from_state:
graph->notFinalFromStateAction( actionOrd[i], actions[i].action );
break;
case at_middle_from_state:
graph->middleFromStateAction( actionOrd[i], actions[i].action );
break;
/* Remaining cases, prevented by the parser. */
default:
assert( false );
break;
}
}
}
void FactorWithAug::assignPriorities( FsmAp *graph, int *priorOrd )
{
/* Assign priorities. */
for ( int i = 0; i < priorityAugs.length(); i++ ) {
switch ( priorityAugs[i].type ) {
case at_start:
graph->startFsmPrior( priorOrd[i], &priorDescs[i]);
/* Start fsm priorities are a special case that may require
* minimization afterwards. */
afterOpMinimize( graph );
break;
case at_all:
graph->allTransPrior( priorOrd[i], &priorDescs[i] );
break;
case at_finish:
graph->finishFsmPrior( priorOrd[i], &priorDescs[i] );
break;
case at_leave:
graph->leaveFsmPrior( priorOrd[i], &priorDescs[i] );
break;
default:
/* Parser Prevents this case. */
break;
}
}
}
void FactorWithAug::assignConditions( FsmAp *graph )
{
for ( int i = 0; i < conditions.length(); i++ ) {
switch ( conditions[i].type ) {
/* Transition actions. */
case at_start:
graph->startFsmCondition( conditions[i].action, conditions[i].sense );
afterOpMinimize( graph );
break;
case at_all:
graph->allTransCondition( conditions[i].action, conditions[i].sense );
break;
case at_leave:
graph->leaveFsmCondition( conditions[i].action, conditions[i].sense );
break;
default:
break;
}
}
}
/* Evaluate a factor with augmentation node. */
FsmAp *FactorWithAug::walk( ParseData *pd )
{
/* Enter into the scopes created for the labels. */
NameFrame nameFrame = pd->enterNameScope( false, labels.length() );
/* Make the array of function orderings. */
int *actionOrd = 0;
if ( actions.length() > 0 )
actionOrd = new int[actions.length()];
/* First walk the list of actions, assigning order to all starting
* actions. */
for ( int i = 0; i < actions.length(); i++ ) {
if ( actions[i].type == at_start ||
actions[i].type == at_start_gbl_error ||
actions[i].type == at_start_local_error ||
actions[i].type == at_start_to_state ||
actions[i].type == at_start_from_state ||
actions[i].type == at_start_eof )
actionOrd[i] = pd->curActionOrd++;
}
/* Evaluate the factor with repetition. */
FsmAp *rtnVal = factorWithRep->walk( pd );
/* Compute the remaining action orderings. */
for ( int i = 0; i < actions.length(); i++ ) {
if ( actions[i].type != at_start &&
actions[i].type != at_start_gbl_error &&
actions[i].type != at_start_local_error &&
actions[i].type != at_start_to_state &&
actions[i].type != at_start_from_state &&
actions[i].type != at_start_eof )
actionOrd[i] = pd->curActionOrd++;
}
/* Embed conditions. */
assignConditions( rtnVal );
/* Embed actions. */
assignActions( pd, rtnVal , actionOrd );
/* Make the array of priority orderings. Orderings are local to this walk
* of the factor with augmentation. */
int *priorOrd = 0;
if ( priorityAugs.length() > 0 )
priorOrd = new int[priorityAugs.length()];
/* Walk all priorities, assigning the priority ordering. */
for ( int i = 0; i < priorityAugs.length(); i++ )
priorOrd[i] = pd->curPriorOrd++;
/* If the priority descriptors have not been made, make them now. Make
* priority descriptors for each priority asignment that will be passed to
* the fsm. Used to keep track of the key, value and used bit. */
if ( priorDescs == 0 && priorityAugs.length() > 0 ) {
priorDescs = new PriorDesc[priorityAugs.length()];
for ( int i = 0; i < priorityAugs.length(); i++ ) {
/* Init the prior descriptor for the priority setting. */
priorDescs[i].key = priorityAugs[i].priorKey;
priorDescs[i].priority = priorityAugs[i].priorValue;
}
}
/* Assign priorities into the machine. */
assignPriorities( rtnVal, priorOrd );
/* Assign epsilon transitions. */
for ( int e = 0; e < epsilonLinks.length(); e++ ) {
/* Get the name, which may not exist. If it doesn't then silently
* ignore it because an error has already been reported. */
NameInst *epTarg = pd->epsilonResolvedLinks[pd->nextEpsilonResolvedLink++];
if ( epTarg != 0 ) {
/* Make the epsilon transitions. */
rtnVal->epsilonTrans( epTarg->id );
/* Note that we have made a link to the name. */
pd->localNameScope->referencedNames.append( epTarg );
}
}
/* Set entry points for labels. */
if ( labels.length() > 0 ) {
/* Pop the names. */
pd->resetNameScope( nameFrame );
/* Make labels that are referenced into entry points. */
for ( int i = 0; i < labels.length(); i++ ) {
pd->enterNameScope( false, 1 );
/* Will always be found. */
NameInst *name = pd->curNameInst;
/* If the name is referenced then set the entry point. */
if ( name->numRefs > 0 )
rtnVal->setEntry( name->id, rtnVal->startState );
}
pd->popNameScope( nameFrame );
}
if ( priorOrd != 0 )
delete[] priorOrd;
if ( actionOrd != 0 )
delete[] actionOrd;
return rtnVal;
}
void FactorWithAug::makeNameTree( ParseData *pd )
{
/* Add the labels to the tree of instantiated names. Each label
* makes a new scope. */
NameInst *prevNameInst = pd->curNameInst;
for ( int i = 0; i < labels.length(); i++ )
pd->curNameInst = pd->addNameInst( labels[i].loc, labels[i].data, true );
/* Recurse, then pop the names. */
factorWithRep->makeNameTree( pd );
pd->curNameInst = prevNameInst;
}
void FactorWithAug::resolveNameRefs( ParseData *pd )
{
/* Enter into the name scope created by any labels. */
NameFrame nameFrame = pd->enterNameScope( false, labels.length() );
/* Note action references. */
for ( int i = 0; i < actions.length(); i++ )
actions[i].action->actionRefs.append( pd->localNameScope );
/* Recurse first. IMPORTANT: we must do the exact same traversal as when
* the tree is constructed. */
factorWithRep->resolveNameRefs( pd );
/* Resolve epsilon transitions. */
for ( int ep = 0; ep < epsilonLinks.length(); ep++ ) {
/* Get the link. */
EpsilonLink &link = epsilonLinks[ep];
NameInst *resolvedName = 0;
if ( link.target.length() == 1 && strcmp( link.target.data[0], "final" ) == 0 ) {
/* Epsilon drawn to an implicit final state. An implicit final is
* only available in join operations. */
resolvedName = pd->localNameScope->final;
}
else {
/* Do an search for the name. */
NameSet resolved;
pd->resolveFrom( resolved, pd->localNameScope, link.target, 0 );
if ( resolved.length() > 0 ) {
/* Take the first one. */
resolvedName = resolved[0];
if ( resolved.length() > 1 ) {
/* Complain about the multiple references. */
error(link.loc) << "state reference " << link.target <<
" resolves to multiple entry points" << endl;
errorStateLabels( resolved );
}
}
}
/* This is tricky, we stuff resolved epsilon transitions into one long
* vector in the parse data structure. Since the name resolution and
* graph generation both do identical walks of the parse tree we
* should always find the link resolutions in the right place. */
pd->epsilonResolvedLinks.append( resolvedName );
if ( resolvedName != 0 ) {
/* Found the name, bump of the reference count on it. */
resolvedName->numRefs += 1;
}
else {
/* Complain, no recovery action, the epsilon op will ignore any
* epsilon transitions whose names did not resolve. */
error(link.loc) << "could not resolve label " << link.target << endl;
}
}
if ( labels.length() > 0 )
pd->popNameScope( nameFrame );
}
/* Clean up after a factor with repetition node. */
FactorWithRep::~FactorWithRep()
{
switch ( type ) {
case StarType: case StarStarType: case OptionalType: case PlusType:
case ExactType: case MaxType: case MinType: case RangeType:
delete factorWithRep;
break;
case FactorWithNegType:
delete factorWithNeg;
break;
}
}
/* Evaluate a factor with repetition node. */
FsmAp *FactorWithRep::walk( ParseData *pd )
{
FsmAp *retFsm = 0;
switch ( type ) {
case StarType: {
/* Evaluate the FactorWithRep. */
retFsm = factorWithRep->walk( pd );
if ( retFsm->startState->isFinState() ) {
warning(loc) << "applying kleene star to a machine that "
"accepts zero length word" << endl;
retFsm->unsetFinState( retFsm->startState );
}
/* Shift over the start action orders then do the kleene star. */
pd->curActionOrd += retFsm->shiftStartActionOrder( pd->curActionOrd );
retFsm->starOp( );
afterOpMinimize( retFsm );
break;
}
case StarStarType: {
/* Evaluate the FactorWithRep. */
retFsm = factorWithRep->walk( pd );
if ( retFsm->startState->isFinState() ) {
warning(loc) << "applying kleene star to a machine that "
"accepts zero length word" << endl;
}
/* Set up the prior descs. All gets priority one, whereas leaving gets
* priority zero. Make a unique key so that these priorities don't
* interfere with any priorities set by the user. */
priorDescs[0].key = pd->nextPriorKey++;
priorDescs[0].priority = 1;
retFsm->allTransPrior( pd->curPriorOrd++, &priorDescs[0] );
/* Leaveing gets priority 0. Use same unique key. */
priorDescs[1].key = priorDescs[0].key;
priorDescs[1].priority = 0;
retFsm->leaveFsmPrior( pd->curPriorOrd++, &priorDescs[1] );
/* Shift over the start action orders then do the kleene star. */
pd->curActionOrd += retFsm->shiftStartActionOrder( pd->curActionOrd );
retFsm->starOp( );
afterOpMinimize( retFsm );
break;
}
case OptionalType: {
/* Make the null fsm. */
FsmAp *nu = new FsmAp();
nu->lambdaFsm( );
/* Evaluate the FactorWithRep. */
retFsm = factorWithRep->walk( pd );
/* Perform the question operator. */
retFsm->unionOp( nu );
afterOpMinimize( retFsm );
break;
}
case PlusType: {
/* Evaluate the FactorWithRep. */
retFsm = factorWithRep->walk( pd );
if ( retFsm->startState->isFinState() ) {
warning(loc) << "applying plus operator to a machine that "
"accepts zero length word" << endl;
}
/* Need a duplicated for the star end. */
FsmAp *dup = new FsmAp( *retFsm );
/* The start func orders need to be shifted before doing the star. */
pd->curActionOrd += dup->shiftStartActionOrder( pd->curActionOrd );
/* Star the duplicate. */
dup->starOp( );
afterOpMinimize( dup );
retFsm->concatOp( dup );
afterOpMinimize( retFsm );
break;
}
case ExactType: {
/* Get an int from the repetition amount. */
if ( lowerRep == 0 ) {
/* No copies. Don't need to evaluate the factorWithRep.
* This Defeats the purpose so give a warning. */
warning(loc) << "exactly zero repetitions results "
"in the null machine" << endl;
retFsm = new FsmAp();
retFsm->lambdaFsm();
}
else {
/* Evaluate the first FactorWithRep. */
retFsm = factorWithRep->walk( pd );
if ( retFsm->startState->isFinState() ) {
warning(loc) << "applying repetition to a machine that "
"accepts zero length word" << endl;
}
/* The start func orders need to be shifted before doing the
* repetition. */
pd->curActionOrd += retFsm->shiftStartActionOrder( pd->curActionOrd );
/* Do the repetition on the machine. Already guarded against n == 0 */
retFsm->repeatOp( lowerRep );
afterOpMinimize( retFsm );
}
break;
}
case MaxType: {
/* Get an int from the repetition amount. */
if ( upperRep == 0 ) {
/* No copies. Don't need to evaluate the factorWithRep.
* This Defeats the purpose so give a warning. */
warning(loc) << "max zero repetitions results "
"in the null machine" << endl;
retFsm = new FsmAp();
retFsm->lambdaFsm();
}
else {
/* Evaluate the first FactorWithRep. */
retFsm = factorWithRep->walk( pd );
if ( retFsm->startState->isFinState() ) {
warning(loc) << "applying max repetition to a machine that "
"accepts zero length word" << endl;
}
/* The start func orders need to be shifted before doing the
* repetition. */
pd->curActionOrd += retFsm->shiftStartActionOrder( pd->curActionOrd );
/* Do the repetition on the machine. Already guarded against n == 0 */
retFsm->optionalRepeatOp( upperRep );
afterOpMinimize( retFsm );
}
break;
}
case MinType: {
/* Evaluate the repeated machine. */
retFsm = factorWithRep->walk( pd );
if ( retFsm->startState->isFinState() ) {
warning(loc) << "applying min repetition to a machine that "
"accepts zero length word" << endl;
}
/* The start func orders need to be shifted before doing the repetition
* and the kleene star. */
pd->curActionOrd += retFsm->shiftStartActionOrder( pd->curActionOrd );
if ( lowerRep == 0 ) {
/* Acts just like a star op on the machine to return. */
retFsm->starOp( );
afterOpMinimize( retFsm );
}
else {
/* Take a duplicate for the plus. */
FsmAp *dup = new FsmAp( *retFsm );
/* Do repetition on the first half. */
retFsm->repeatOp( lowerRep );
afterOpMinimize( retFsm );
/* Star the duplicate. */
dup->starOp( );
afterOpMinimize( dup );
/* Tak on the kleene star. */
retFsm->concatOp( dup );
afterOpMinimize( retFsm );
}
break;
}
case RangeType: {
/* Check for bogus range. */
if ( upperRep - lowerRep < 0 ) {
error(loc) << "invalid range repetition" << endl;
/* Return null machine as recovery. */
retFsm = new FsmAp();
retFsm->lambdaFsm();
}
else if ( lowerRep == 0 && upperRep == 0 ) {
/* No copies. Don't need to evaluate the factorWithRep. This
* defeats the purpose so give a warning. */
warning(loc) << "zero to zero repetitions results "
"in the null machine" << endl;
retFsm = new FsmAp();
retFsm->lambdaFsm();
}
else {
/* Now need to evaluate the repeated machine. */
retFsm = factorWithRep->walk( pd );
if ( retFsm->startState->isFinState() ) {
warning(loc) << "applying range repetition to a machine that "
"accepts zero length word" << endl;
}
/* The start func orders need to be shifted before doing both kinds
* of repetition. */
pd->curActionOrd += retFsm->shiftStartActionOrder( pd->curActionOrd );
if ( lowerRep == 0 ) {
/* Just doing max repetition. Already guarded against n == 0. */
retFsm->optionalRepeatOp( upperRep );
afterOpMinimize( retFsm );
}
else if ( lowerRep == upperRep ) {
/* Just doing exact repetition. Already guarded against n == 0. */
retFsm->repeatOp( lowerRep );
afterOpMinimize( retFsm );
}
else {
/* This is the case that 0 < lowerRep < upperRep. Take a
* duplicate for the optional repeat. */
FsmAp *dup = new FsmAp( *retFsm );
/* Do repetition on the first half. */
retFsm->repeatOp( lowerRep );
afterOpMinimize( retFsm );
/* Do optional repetition on the second half. */
dup->optionalRepeatOp( upperRep - lowerRep );
afterOpMinimize( dup );
/* Tak on the duplicate machine. */
retFsm->concatOp( dup );
afterOpMinimize( retFsm );
}
}
break;
}
case FactorWithNegType: {
/* Evaluate the Factor. Pass it up. */
retFsm = factorWithNeg->walk( pd );
break;
}}
return retFsm;
}
void FactorWithRep::makeNameTree( ParseData *pd )
{
switch ( type ) {
case StarType:
case StarStarType:
case OptionalType:
case PlusType:
case ExactType:
case MaxType:
case MinType:
case RangeType:
factorWithRep->makeNameTree( pd );
break;
case FactorWithNegType:
factorWithNeg->makeNameTree( pd );
break;
}
}
void FactorWithRep::resolveNameRefs( ParseData *pd )
{
switch ( type ) {
case StarType:
case StarStarType:
case OptionalType:
case PlusType:
case ExactType:
case MaxType:
case MinType:
case RangeType:
factorWithRep->resolveNameRefs( pd );
break;
case FactorWithNegType:
factorWithNeg->resolveNameRefs( pd );
break;
}
}
/* Clean up after a factor with negation node. */
FactorWithNeg::~FactorWithNeg()
{
switch ( type ) {
case NegateType:
case CharNegateType:
delete factorWithNeg;
break;
case FactorType:
delete factor;
break;
}
}
/* Evaluate a factor with negation node. */
FsmAp *FactorWithNeg::walk( ParseData *pd )
{
FsmAp *retFsm = 0;
switch ( type ) {
case NegateType: {
/* Evaluate the factorWithNeg. */
FsmAp *toNegate = factorWithNeg->walk( pd );
/* Negation is subtract from dot-star. */
retFsm = dotStarFsm( pd );
retFsm->subtractOp( toNegate );
afterOpMinimize( retFsm );
break;
}
case CharNegateType: {
/* Evaluate the factorWithNeg. */
FsmAp *toNegate = factorWithNeg->walk( pd );
/* CharNegation is subtract from dot. */
retFsm = dotFsm( pd );
retFsm->subtractOp( toNegate );
afterOpMinimize( retFsm );
break;
}
case FactorType: {
/* Evaluate the Factor. Pass it up. */
retFsm = factor->walk( pd );
break;
}}
return retFsm;
}
void FactorWithNeg::makeNameTree( ParseData *pd )
{
switch ( type ) {
case NegateType:
case CharNegateType:
factorWithNeg->makeNameTree( pd );
break;
case FactorType:
factor->makeNameTree( pd );
break;
}
}
void FactorWithNeg::resolveNameRefs( ParseData *pd )
{
switch ( type ) {
case NegateType:
case CharNegateType:
factorWithNeg->resolveNameRefs( pd );
break;
case FactorType:
factor->resolveNameRefs( pd );
break;
}
}
/* Clean up after a factor node. */
Factor::~Factor()
{
switch ( type ) {
case LiteralType:
delete literal;
break;
case RangeType:
delete range;
break;
case OrExprType:
delete reItem;
break;
case RegExprType:
delete regExpr;
break;
case ReferenceType:
break;
case ParenType:
delete join;
break;
case LongestMatchType:
delete longestMatch;
break;
}
}
/* Evaluate a factor node. */
FsmAp *Factor::walk( ParseData *pd )
{
FsmAp *rtnVal = 0;
switch ( type ) {
case LiteralType:
rtnVal = literal->walk( pd );
break;
case RangeType:
rtnVal = range->walk( pd );
break;
case OrExprType:
rtnVal = reItem->walk( pd, 0 );
break;
case RegExprType:
rtnVal = regExpr->walk( pd, 0 );
break;
case ReferenceType:
rtnVal = varDef->walk( pd );
break;
case ParenType:
rtnVal = join->walk( pd );
break;
case LongestMatchType:
rtnVal = longestMatch->walk( pd );
break;
}
return rtnVal;
}
void Factor::makeNameTree( ParseData *pd )
{
switch ( type ) {
case LiteralType:
case RangeType:
case OrExprType:
case RegExprType:
break;
case ReferenceType:
varDef->makeNameTree( loc, pd );
break;
case ParenType:
join->makeNameTree( pd );
break;
case LongestMatchType:
longestMatch->makeNameTree( pd );
break;
}
}
void Factor::resolveNameRefs( ParseData *pd )
{
switch ( type ) {
case LiteralType:
case RangeType:
case OrExprType:
case RegExprType:
break;
case ReferenceType:
varDef->resolveNameRefs( pd );
break;
case ParenType:
join->resolveNameRefs( pd );
break;
case LongestMatchType:
longestMatch->resolveNameRefs( pd );
break;
}
}
/* Clean up a range object. Must delete the two literals. */
Range::~Range()
{
delete lowerLit;
delete upperLit;
}
/* Evaluate a range. Gets the lower an upper key and makes an fsm range. */
FsmAp *Range::walk( ParseData *pd )
{
/* Construct and verify the suitability of the lower end of the range. */
FsmAp *lowerFsm = lowerLit->walk( pd );
if ( !lowerFsm->checkSingleCharMachine() ) {
error(lowerLit->token.loc) <<
"bad range lower end, must be a single character" << endl;
}
/* Construct and verify the upper end. */
FsmAp *upperFsm = upperLit->walk( pd );
if ( !upperFsm->checkSingleCharMachine() ) {
error(upperLit->token.loc) <<
"bad range upper end, must be a single character" << endl;
}
/* Grab the keys from the machines, then delete them. */
Key lowKey = lowerFsm->startState->outList.head->lowKey;
Key highKey = upperFsm->startState->outList.head->lowKey;
delete lowerFsm;
delete upperFsm;
/* Validate the range. */
if ( lowKey > highKey ) {
/* Recover by setting upper to lower; */
error(lowerLit->token.loc) << "lower end of range is greater then upper end" << endl;
highKey = lowKey;
}
/* Return the range now that it is validated. */
FsmAp *retFsm = new FsmAp();
retFsm->rangeFsm( lowKey, highKey );
return retFsm;
}
/* Evaluate a literal object. */
FsmAp *Literal::walk( ParseData *pd )
{
/* FsmAp to return, is the alphabet signed. */
FsmAp *rtnVal = 0;
switch ( type ) {
case Number: {
/* Make the fsm key in int format. */
Key fsmKey = makeFsmKeyNum( token.data, token.loc, pd );
/* Make the new machine. */
rtnVal = new FsmAp();
rtnVal->concatFsm( fsmKey );
break;
}
case LitString: {
/* Make the array of keys in int format. */
long length;
bool caseInsensitive;
char *data = prepareLitString( token.loc, token.data, token.length,
length, caseInsensitive );
Key *arr = new Key[length];
makeFsmKeyArray( arr, data, length, pd );
/* Make the new machine. */
rtnVal = new FsmAp();
if ( caseInsensitive )
rtnVal->concatFsmCI( arr, length );
else
rtnVal->concatFsm( arr, length );
delete[] data;
delete[] arr;
break;
}}
return rtnVal;
}
/* Clean up after a regular expression object. */
RegExpr::~RegExpr()
{
switch ( type ) {
case RecurseItem:
delete regExpr;
delete item;
break;
case Empty:
break;
}
}
/* Evaluate a regular expression object. */
FsmAp *RegExpr::walk( ParseData *pd, RegExpr *rootRegex )
{
/* This is the root regex, pass down a pointer to this. */
if ( rootRegex == 0 )
rootRegex = this;
FsmAp *rtnVal = 0;
switch ( type ) {
case RecurseItem: {
/* Walk both items. */
rtnVal = regExpr->walk( pd, rootRegex );
FsmAp *fsm2 = item->walk( pd, rootRegex );
rtnVal->concatOp( fsm2 );
break;
}
case Empty: {
rtnVal = new FsmAp();
rtnVal->lambdaFsm();
break;
}
}
return rtnVal;
}
/* Clean up after an item in a regular expression. */
ReItem::~ReItem()
{
switch ( type ) {
case Data:
case Dot:
break;
case OrBlock:
case NegOrBlock:
delete orBlock;
break;
}
}
/* Evaluate a regular expression object. */
FsmAp *ReItem::walk( ParseData *pd, RegExpr *rootRegex )
{
/* The fsm to return, is the alphabet signed? */
FsmAp *rtnVal = 0;
switch ( type ) {
case Data: {
/* Move the data into an integer array and make a concat fsm. */
Key *arr = new Key[token.length];
makeFsmKeyArray( arr, token.data, token.length, pd );
/* Make the concat fsm. */
rtnVal = new FsmAp();
if ( rootRegex != 0 && rootRegex->caseInsensitive )
rtnVal->concatFsmCI( arr, token.length );
else
rtnVal->concatFsm( arr, token.length );
delete[] arr;
break;
}
case Dot: {
/* Make the dot fsm. */
rtnVal = dotFsm( pd );
break;
}
case OrBlock: {
/* Get the or block and minmize it. */
rtnVal = orBlock->walk( pd, rootRegex );
if ( rtnVal == 0 ) {
rtnVal = new FsmAp();
rtnVal->lambdaFsm();
}
rtnVal->minimizePartition2();
break;
}
case NegOrBlock: {
/* Get the or block and minimize it. */
FsmAp *fsm = orBlock->walk( pd, rootRegex );
fsm->minimizePartition2();
/* Make a dot fsm and subtract from it. */
rtnVal = dotFsm( pd );
rtnVal->subtractOp( fsm );
rtnVal->minimizePartition2();
break;
}
}
/* If the item is followed by a star, then apply the star op. */
if ( star ) {
if ( rtnVal->startState->isFinState() ) {
warning(loc) << "applying kleene star to a machine that "
"accepts zero length word" << endl;
}
rtnVal->starOp();
rtnVal->minimizePartition2();
}
return rtnVal;
}
/* Clean up after an or block of a regular expression. */
ReOrBlock::~ReOrBlock()
{
switch ( type ) {
case RecurseItem:
delete orBlock;
delete item;
break;
case Empty:
break;
}
}
/* Evaluate an or block of a regular expression. */
FsmAp *ReOrBlock::walk( ParseData *pd, RegExpr *rootRegex )
{
FsmAp *rtnVal = 0;
switch ( type ) {
case RecurseItem: {
/* Evaluate the two fsm. */
FsmAp *fsm1 = orBlock->walk( pd, rootRegex );
FsmAp *fsm2 = item->walk( pd, rootRegex );
if ( fsm1 == 0 )
rtnVal = fsm2;
else {
fsm1->unionOp( fsm2 );
rtnVal = fsm1;
}
break;
}
case Empty: {
rtnVal = 0;
break;
}
}
return rtnVal;;
}
/* Evaluate an or block item of a regular expression. */
FsmAp *ReOrItem::walk( ParseData *pd, RegExpr *rootRegex )
{
/* The return value, is the alphabet signed? */
FsmAp *rtnVal = 0;
switch ( type ) {
case Data: {
/* Make the or machine. */
rtnVal = new FsmAp();
/* Put the or data into an array of ints. Note that we find unique
* keys. Duplicates are silently ignored. The alternative would be to
* issue warning or an error but since we can't with [a0-9a] or 'a' |
* 'a' don't bother here. */
KeySet keySet;
makeFsmUniqueKeyArray( keySet, token.data, token.length,
rootRegex != 0 ? rootRegex->caseInsensitive : false, pd );
/* Run the or operator. */
rtnVal->orFsm( keySet.data, keySet.length() );
break;
}
case Range: {
/* Make the upper and lower keys. */
Key lowKey = makeFsmKeyChar( lower, pd );
Key highKey = makeFsmKeyChar( upper, pd );
/* Validate the range. */
if ( lowKey > highKey ) {
/* Recover by setting upper to lower; */
error(loc) << "lower end of range is greater then upper end" << endl;
highKey = lowKey;
}
/* Make the range machine. */
rtnVal = new FsmAp();
rtnVal->rangeFsm( lowKey, highKey );
if ( rootRegex != 0 && rootRegex->caseInsensitive ) {
if ( lowKey <= 'Z' && 'A' <= highKey ) {
Key otherLow = lowKey < 'A' ? Key('A') : lowKey;
Key otherHigh = 'Z' < highKey ? Key('Z') : highKey;
otherLow = 'a' + ( otherLow - 'A' );
otherHigh = 'a' + ( otherHigh - 'A' );
FsmAp *otherRange = new FsmAp();
otherRange->rangeFsm( otherLow, otherHigh );
rtnVal->unionOp( otherRange );
rtnVal->minimizePartition2();
}
else if ( lowKey <= 'z' && 'a' <= highKey ) {
Key otherLow = lowKey < 'a' ? Key('a') : lowKey;
Key otherHigh = 'z' < highKey ? Key('z') : highKey;
otherLow = 'A' + ( otherLow - 'a' );
otherHigh = 'A' + ( otherHigh - 'a' );
FsmAp *otherRange = new FsmAp();
otherRange->rangeFsm( otherLow, otherHigh );
rtnVal->unionOp( otherRange );
rtnVal->minimizePartition2();
}
}
break;
}}
return rtnVal;
}