#include <iostream>
#include <iomanip>
#include <errno.h>
#include <limits.h>
#include <stdlib.h>
#include "ragel.h"
#include "rlparse.h"
#include "parsetree.h"
using
namespace
std;
ostream &operator<<( ostream &out,
const
NameRef &nameRef );
ostream &operator<<( ostream &out,
const
NameInst &nameInst );
char
*prepareLitString(
const
InputLoc &loc,
const
char
*data,
long
length,
long
&resLen,
bool
&caseInsensitive )
{
char
*resData =
new
char
[length+1];
caseInsensitive =
false
;
const
char
*src = data + 1;
const
char
*end = data + length - 1;
while
( *end !=
'\''
&& *end !=
'\"'
) {
if
( *end ==
'i'
)
caseInsensitive =
true
;
else
{
error( loc ) <<
"literal string '"
<< *end <<
"' option not supported"
<< endl;
}
end -= 1;
}
char
*dest = resData;
long
len = 0;
while
( src != end ) {
if
( *src ==
'\\'
) {
switch
( src[1] ) {
case
'0'
: dest[len++] =
'\0'
;
break
;
case
'a'
: dest[len++] =
'\a'
;
break
;
case
'b'
: dest[len++] =
'\b'
;
break
;
case
't'
: dest[len++] =
'\t'
;
break
;
case
'n'
: dest[len++] =
'\n'
;
break
;
case
'v'
: dest[len++] =
'\v'
;
break
;
case
'f'
: dest[len++] =
'\f'
;
break
;
case
'r'
: dest[len++] =
'\r'
;
break
;
case
'\n'
:
break
;
default
: dest[len++] = src[1];
break
;
}
src += 2;
}
else
{
dest[len++] = *src++;
}
}
resLen = len;
resData[resLen] = 0;
return
resData;
}
FsmAp *VarDef::walk( ParseData *pd )
{
NameFrame nameFrame = pd->enterNameScope(
true
, 1 );
FsmAp *rtnVal = machineDef->walk( pd );
LocalErrDictEl *localErrDictEl = pd->localErrDict.find( name );
if
( localErrDictEl != 0 ) {
for
( StateList::Iter state = rtnVal->stateList; state.lte(); state++ )
rtnVal->transferErrorActions( state, localErrDictEl->value );
}
if
( machineDef->type == MachineDef::JoinType && machineDef->join->exprList.length() == 1 )
rtnVal->epsilonOp();
pd->unsetObsoleteEntries( rtnVal );
if
( pd->curNameInst->numRefs > 0 )
rtnVal->setEntry( pd->curNameInst->id, rtnVal->startState );
pd->popNameScope( nameFrame );
return
rtnVal;
}
void
VarDef::makeNameTree(
const
InputLoc &loc, ParseData *pd )
{
NameInst *prevNameInst = pd->curNameInst;
pd->curNameInst = pd->addNameInst( loc, name,
false
);
if
( machineDef->type == MachineDef::LongestMatchType )
pd->curNameInst->isLongestMatch =
true
;
machineDef->makeNameTree( pd );
pd->curNameInst = prevNameInst;
}
void
VarDef::resolveNameRefs( ParseData *pd )
{
NameFrame nameFrame = pd->enterNameScope(
true
, 1 );
machineDef->resolveNameRefs( pd );
pd->popNameScope( nameFrame );
}
InputLoc LongestMatchPart::getLoc()
{
return
action != 0 ? action->loc : semiLoc;
}
Action *LongestMatch::newAction( ParseData *pd,
const
InputLoc &loc,
const
char
*name, InlineList *inlineList )
{
Action *action =
new
Action( loc, name, inlineList, pd->nextCondId++ );
action->actionRefs.append( pd->curNameInst );
pd->actionList.append( action );
action->isLmAction =
true
;
return
action;
}
void
LongestMatch::makeActions( ParseData *pd )
{
for
( LmPartList::Iter lmi = *longestMatchList; lmi.lte(); lmi++ ) {
InlineList *inlineList =
new
InlineList;
inlineList->append(
new
InlineItem( lmi->getLoc(),
this
, lmi,
InlineItem::LmSetActId ) );
char
*actName =
new
char
[50];
sprintf
( actName,
"store%i"
, lmi->longestMatchId );
lmi->setActId = newAction( pd, lmi->getLoc(), actName, inlineList );
}
for
( LmPartList::Iter lmi = *longestMatchList; lmi.lte(); lmi++ ) {
InlineList *inlineList =
new
InlineList;
inlineList->append(
new
InlineItem( lmi->getLoc(),
this
, lmi,
InlineItem::LmOnLast ) );
char
*actName =
new
char
[50];
sprintf
( actName,
"last%i"
, lmi->longestMatchId );
lmi->actOnLast = newAction( pd, lmi->getLoc(), actName, inlineList );
}
for
( LmPartList::Iter lmi = *longestMatchList; lmi.lte(); lmi++ ) {
InlineList *inlineList =
new
InlineList;
inlineList->append(
new
InlineItem( lmi->getLoc(),
this
, lmi,
InlineItem::LmOnNext ) );
char
*actName =
new
char
[50];
sprintf
( actName,
"next%i"
, lmi->longestMatchId );
lmi->actOnNext = newAction( pd, lmi->getLoc(), actName, inlineList );
}
for
( LmPartList::Iter lmi = *longestMatchList; lmi.lte(); lmi++ ) {
InlineList *inlineList =
new
InlineList;
inlineList->append(
new
InlineItem( lmi->getLoc(),
this
, lmi,
InlineItem::LmOnLagBehind ) );
char
*actName =
new
char
[50];
sprintf
( actName,
"lag%i"
, lmi->longestMatchId );
lmi->actLagBehind = newAction( pd, lmi->getLoc(), actName, inlineList );
}
InputLoc loc;
loc.line = 1;
loc.col = 1;
loc.fileName =
"NONE"
;
InlineList *il6 =
new
InlineList;
il6->append(
new
InlineItem( loc,
this
, 0, InlineItem::LmSwitch ) );
lmActSelect = newAction( pd, loc,
"switch"
, il6 );
}
void
LongestMatch::findName( ParseData *pd )
{
NameInst *nameInst = pd->curNameInst;
while
( nameInst->name == 0 ) {
nameInst = nameInst->parent;
assert
( nameInst != 0 );
}
name = nameInst->name;
}
void
LongestMatch::makeNameTree( ParseData *pd )
{
NameInst *prevNameInst = pd->curNameInst;
pd->curNameInst = pd->addNameInst( loc, 0,
false
);
for
( LmPartList::Iter lmi = *longestMatchList; lmi.lte(); lmi++ )
lmi->join->makeNameTree( pd );
findName( pd );
makeActions( pd );
pd->curNameInst = prevNameInst;
}
void
LongestMatch::resolveNameRefs( ParseData *pd )
{
NameFrame nameFrame = pd->enterNameScope(
true
, 1 );
for
( LmPartList::Iter lmi = *longestMatchList; lmi.lte(); lmi++ ) {
if
( lmi->action != 0 )
lmi->action->actionRefs.append( pd->localNameScope );
lmi->join->resolveNameRefs( pd );
}
pd->popNameScope( nameFrame );
}
void
LongestMatch::restart( FsmAp *graph, TransAp *trans )
{
StateAp *fromState = trans->fromState;
graph->detachTrans( fromState, trans->toState, trans );
graph->attachTrans( fromState, graph->startState, trans );
}
void
LongestMatch::runLongestMatch( ParseData *pd, FsmAp *graph )
{
graph->markReachableFromHereStopFinal( graph->startState );
for
( StateList::Iter ms = graph->stateList; ms.lte(); ms++ ) {
if
( ms->stateBits & STB_ISMARKED ) {
ms->lmItemSet.insert( 0 );
ms->stateBits &= ~ STB_ISMARKED;
}
}
for
( StateList::Iter st = graph->stateList; st.lte(); st++ ) {
for
( TransList::Iter trans = st->outList; trans.lte(); trans++ ) {
if
( trans->lmActionTable.length() > 0 ) {
LmActionTableEl *lmAct = trans->lmActionTable.data;
StateAp *toState = trans->toState;
assert
( toState );
if
( toState->outList.length() > 0 ) {
graph->markReachableFromHereStopFinal( toState );
for
( StateList::Iter ms = graph->stateList; ms.lte(); ms++ ) {
if
( ms->stateBits & STB_ISMARKED ) {
ms->lmItemSet.insert( lmAct->value );
ms->stateBits &= ~ STB_ISMARKED;
}
}
}
}
}
}
int
maxItemSetLength = 0;
graph->markReachableFromHereStopFinal( graph->startState );
for
( StateList::Iter ms = graph->stateList; ms.lte(); ms++ ) {
if
( ms->stateBits & STB_ISMARKED ) {
if
( ms->lmItemSet.length() > maxItemSetLength )
maxItemSetLength = ms->lmItemSet.length();
ms->stateBits &= ~ STB_ISMARKED;
}
}
graph->isolateStartState();
graph->startState->toStateActionTable.setAction( pd->initTokStartOrd, pd->initTokStart );
graph->startState->fromStateActionTable.setAction( pd->setTokStartOrd, pd->setTokStart );
if
( maxItemSetLength > 1 ) {
lmSwitchHandlesError =
true
;
pd->lmRequiresErrorState =
true
;
graph->startState->toStateActionTable.setAction( pd->initActIdOrd, pd->initActId );
}
Vector<TransAp*> restartTrans;
for
( StateList::Iter st = graph->stateList; st.lte(); st++ ) {
for
( TransList::Iter trans = st->outList; trans.lte(); trans++ ) {
if
( trans->lmActionTable.length() > 0 ) {
LmActionTableEl *lmAct = trans->lmActionTable.data;
StateAp *toState = trans->toState;
assert
( toState );
if
( toState->outList.length() == 0 ) {
trans->actionTable.setAction( lmAct->key,
lmAct->value->actOnLast );
restartTrans.append( trans );
}
else
{
bool
nonFinalNonEmptyItemSet =
false
;
maxItemSetLength = 0;
graph->markReachableFromHereStopFinal( toState );
for
( StateList::Iter ms = graph->stateList; ms.lte(); ms++ ) {
if
( ms->stateBits & STB_ISMARKED ) {
if
( ms->lmItemSet.length() > 0 && !ms->isFinState() )
nonFinalNonEmptyItemSet =
true
;
if
( ms->lmItemSet.length() > maxItemSetLength )
maxItemSetLength = ms->lmItemSet.length();
ms->stateBits &= ~ STB_ISMARKED;
}
}
if
( nonFinalNonEmptyItemSet || maxItemSetLength > 1 )
trans->actionTable.setAction( pd->setTokEndOrd, pd->setTokEnd );
if
( maxItemSetLength > 1 ) {
trans->actionTable.setAction( lmAct->key,
lmAct->value->setActId );
}
}
}
}
}
for
( Vector<TransAp*>::Iter pt = restartTrans; pt.lte(); pt++ )
restart( graph, *pt );
int
lmErrActionOrd = pd->curActionOrd++;
for
( StateList::Iter st = graph->stateList; st.lte(); st++ ) {
if
( st->lmItemSet.length() == 1 && st->lmItemSet[0] != 0 ) {
if
( st->isFinState() ) {
graph->setErrorTarget( st, graph->startState, &lmErrActionOrd,
&st->lmItemSet[0]->actOnNext, 1 );
st->eofActionTable.setAction( lmErrActionOrd,
st->lmItemSet[0]->actOnNext );
st->eofTarget = graph->startState;
}
else
{
graph->setErrorTarget( st, graph->startState, &lmErrActionOrd,
&st->lmItemSet[0]->actLagBehind, 1 );
st->eofActionTable.setAction( lmErrActionOrd,
st->lmItemSet[0]->actLagBehind );
st->eofTarget = graph->startState;
}
}
else
if
( st->lmItemSet.length() > 1 ) {
for
( LmItemSet::Iter plmi = st->lmItemSet; plmi.lte(); plmi++ ) {
if
( *plmi != 0 )
(*plmi)->inLmSelect =
true
;
}
graph->setErrorTarget( st, graph->startState, &lmErrActionOrd,
&lmActSelect, 1 );
st->eofActionTable.setAction( lmErrActionOrd, lmActSelect );
st->eofTarget = graph->startState;
}
}
graph->setFinState( graph->startState );
}
void
LongestMatch::transferScannerLeavingActions( FsmAp *graph )
{
for
( StateList::Iter st = graph->stateList; st.lte(); st++ ) {
if
( st->outActionTable.length() > 0 )
graph->setErrorActions( st, st->outActionTable );
}
}
FsmAp *LongestMatch::walk( ParseData *pd )
{
NameFrame nameFrame = pd->enterNameScope(
true
, 1 );
FsmAp **parts =
new
FsmAp*[longestMatchList->length()];
LmPartList::Iter lmi = *longestMatchList;
for
(
int
i = 0; lmi.lte(); lmi++, i++ ) {
parts[i] = lmi->join->walk( pd );
parts[i]->longMatchAction( pd->curActionOrd++, lmi );
}
for
(
int
i = 0; i < longestMatchList->length(); i++ )
transferScannerLeavingActions( parts[i] );
FsmAp *rtnVal = parts[0];
for
(
int
i = 1; i < longestMatchList->length(); i++ ) {
rtnVal->unionOp( parts[i] );
afterOpMinimize( rtnVal );
}
runLongestMatch( pd, rtnVal );
pd->popNameScope( nameFrame );
delete
[] parts;
return
rtnVal;
}
FsmAp *MachineDef::walk( ParseData *pd )
{
FsmAp *rtnVal = 0;
switch
( type ) {
case
JoinType:
rtnVal = join->walk( pd );
break
;
case
LongestMatchType:
rtnVal = longestMatch->walk( pd );
break
;
case
LengthDefType:
condData->lastCondKey.increment();
rtnVal =
new
FsmAp();
rtnVal->concatFsm( condData->lastCondKey );
break
;
}
return
rtnVal;
}
void
MachineDef::makeNameTree( ParseData *pd )
{
switch
( type ) {
case
JoinType:
join->makeNameTree( pd );
break
;
case
LongestMatchType:
longestMatch->makeNameTree( pd );
break
;
case
LengthDefType:
break
;
}
}
void
MachineDef::resolveNameRefs( ParseData *pd )
{
switch
( type ) {
case
JoinType:
join->resolveNameRefs( pd );
break
;
case
LongestMatchType:
longestMatch->resolveNameRefs( pd );
break
;
case
LengthDefType:
break
;
}
}
Join::Join(
const
InputLoc &loc, Expression *expr )
:
loc(loc)
{
exprList.append( expr );
}
Join::Join( Expression *expr )
{
exprList.append( expr );
}
FsmAp *Join::walk( ParseData *pd )
{
if
( exprList.length() > 1 )
return
walkJoin( pd );
else
return
exprList.head->walk( pd );
}
FsmAp *Join::walkJoin( ParseData *pd )
{
NameFrame nameFrame = pd->enterNameScope(
true
, 1 );
FsmAp **fsms =
new
FsmAp*[exprList.length()];
ExprList::Iter expr = exprList;
for
(
int
e = 0; e < exprList.length(); e++, expr++ )
fsms[e] = expr->walk( pd );
NameInst *startName = pd->curNameInst->start;
NameInst *finalName = pd->curNameInst->final;
int
startId = -1;
if
( startName != 0 ) {
pd->localNameScope->referencedNames.append( startName );
startId = startName->id;
}
int
finalId = -1;
if
( finalName->numRefs > 0 )
finalId = finalName->id;
FsmAp *retFsm = fsms[0];
retFsm->joinOp( startId, finalId, fsms+1, exprList.length()-1 );
pd->unsetObsoleteEntries( retFsm );
pd->popNameScope( nameFrame );
delete
[] fsms;
return
retFsm;
}
void
Join::makeNameTree( ParseData *pd )
{
if
( exprList.length() > 1 ) {
NameInst *prevNameInst = pd->curNameInst;
pd->curNameInst = pd->addNameInst( loc, 0,
false
);
pd->curNameInst->final =
new
NameInst( InputLoc(), pd->curNameInst,
"final"
,
pd->nextNameId++,
false
);
for
( ExprList::Iter expr = exprList; expr.lte(); expr++ )
expr->makeNameTree( pd );
pd->curNameInst = prevNameInst;
}
else
{
exprList.head->makeNameTree( pd );
}
}
void
Join::resolveNameRefs( ParseData *pd )
{
if
( exprList.length() > 1 ) {
NameFrame nameFrame = pd->enterNameScope(
true
, 1 );
NameSet resolved = pd->resolvePart( pd->localNameScope,
"start"
,
true
);
if
( resolved.length() > 0 ) {
pd->curNameInst->start = resolved[0];
if
( resolved.length() > 1 ) {
error(loc) <<
"join operation has multiple start labels"
<< endl;
errorStateLabels( resolved );
}
}
if
( pd->curNameInst->start != 0 ) {
pd->curNameInst->start->numRefs += 1;
}
else
{
error(loc) <<
"join operation has no start label"
<< endl;
}
for
( ExprList::Iter expr = exprList; expr.lte(); expr++ )
expr->resolveNameRefs( pd );
pd->popNameScope( nameFrame );
}
else
{
exprList.head->resolveNameRefs( pd );
}
}
Expression::~Expression()
{
switch
( type ) {
case
OrType:
case
IntersectType:
case
SubtractType:
case
StrongSubtractType:
delete
expression;
delete
term;
break
;
case
TermType:
delete
term;
break
;
case
BuiltinType:
break
;
}
}
FsmAp *Expression::walk( ParseData *pd,
bool
lastInSeq )
{
FsmAp *rtnVal = 0;
switch
( type ) {
case
OrType: {
rtnVal = expression->walk( pd,
false
);
FsmAp *rhs = term->walk( pd );
rtnVal->unionOp( rhs );
afterOpMinimize( rtnVal, lastInSeq );
break
;
}
case
IntersectType: {
rtnVal = expression->walk( pd );
FsmAp *rhs = term->walk( pd );
rtnVal->intersectOp( rhs );
afterOpMinimize( rtnVal, lastInSeq );
break
;
}
case
SubtractType: {
rtnVal = expression->walk( pd );
FsmAp *rhs = term->walk( pd );
rtnVal->subtractOp( rhs );
afterOpMinimize( rtnVal, lastInSeq );
break
;
}
case
StrongSubtractType: {
rtnVal = expression->walk( pd );
FsmAp *rhs = dotStarFsm( pd );
FsmAp *termFsm = term->walk( pd );
FsmAp *trailAnyStar = dotStarFsm( pd );
rhs->concatOp( termFsm );
rhs->concatOp( trailAnyStar );
rtnVal->subtractOp( rhs );
afterOpMinimize( rtnVal, lastInSeq );
break
;
}
case
TermType: {
rtnVal = term->walk( pd );
break
;
}
case
BuiltinType: {
rtnVal = makeBuiltin( builtin, pd );
break
;
}
}
return
rtnVal;
}
void
Expression::makeNameTree( ParseData *pd )
{
switch
( type ) {
case
OrType:
case
IntersectType:
case
SubtractType:
case
StrongSubtractType:
expression->makeNameTree( pd );
term->makeNameTree( pd );
break
;
case
TermType:
term->makeNameTree( pd );
break
;
case
BuiltinType:
break
;
}
}
void
Expression::resolveNameRefs( ParseData *pd )
{
switch
( type ) {
case
OrType:
case
IntersectType:
case
SubtractType:
case
StrongSubtractType:
expression->resolveNameRefs( pd );
term->resolveNameRefs( pd );
break
;
case
TermType:
term->resolveNameRefs( pd );
break
;
case
BuiltinType:
break
;
}
}
Term::~Term()
{
switch
( type ) {
case
ConcatType:
case
RightStartType:
case
RightFinishType:
case
LeftType:
delete
term;
delete
factorWithAug;
break
;
case
FactorWithAugType:
delete
factorWithAug;
break
;
}
}
FsmAp *Term::walk( ParseData *pd,
bool
lastInSeq )
{
FsmAp *rtnVal = 0;
switch
( type ) {
case
ConcatType: {
rtnVal = term->walk( pd,
false
);
FsmAp *rhs = factorWithAug->walk( pd );
rtnVal->concatOp( rhs );
afterOpMinimize( rtnVal, lastInSeq );
break
;
}
case
RightStartType: {
rtnVal = term->walk( pd );
FsmAp *rhs = factorWithAug->walk( pd );
priorDescs[0].key = pd->nextPriorKey++;
priorDescs[0].priority = 0;
rtnVal->allTransPrior( pd->curPriorOrd++, &priorDescs[0] );
priorDescs[1].key = priorDescs[0].key;
priorDescs[1].priority = 1;
rhs->startFsmPrior( pd->curPriorOrd++, &priorDescs[1] );
rtnVal->concatOp( rhs );
afterOpMinimize( rtnVal, lastInSeq );
break
;
}
case
RightFinishType: {
rtnVal = term->walk( pd );
FsmAp *rhs = factorWithAug->walk( pd );
priorDescs[0].key = pd->nextPriorKey++;
priorDescs[0].priority = 0;
rtnVal->allTransPrior( pd->curPriorOrd++, &priorDescs[0] );
priorDescs[1].key = priorDescs[0].key;
priorDescs[1].priority = 1;
rhs->finishFsmPrior( pd->curPriorOrd++, &priorDescs[1] );
if
( rhs->startState->isFinState() ) {
rhs->startState->outPriorTable.setPrior(
pd->curPriorOrd++, &priorDescs[1] );
}
rtnVal->concatOp( rhs );
afterOpMinimize( rtnVal, lastInSeq );
break
;
}
case
LeftType: {
rtnVal = term->walk( pd );
FsmAp *rhs = factorWithAug->walk( pd );
priorDescs[0].key = pd->nextPriorKey++;
priorDescs[0].priority = 1;
rtnVal->allTransPrior( pd->curPriorOrd++, &priorDescs[0] );
priorDescs[1].key = priorDescs[0].key;
priorDescs[1].priority = 0;
rhs->startFsmPrior( pd->curPriorOrd++, &priorDescs[1] );
rtnVal->concatOp( rhs );
afterOpMinimize( rtnVal, lastInSeq );
break
;
}
case
FactorWithAugType: {
rtnVal = factorWithAug->walk( pd );
break
;
}
}
return
rtnVal;
}
void
Term::makeNameTree( ParseData *pd )
{
switch
( type ) {
case
ConcatType:
case
RightStartType:
case
RightFinishType:
case
LeftType:
term->makeNameTree( pd );
factorWithAug->makeNameTree( pd );
break
;
case
FactorWithAugType:
factorWithAug->makeNameTree( pd );
break
;
}
}
void
Term::resolveNameRefs( ParseData *pd )
{
switch
( type ) {
case
ConcatType:
case
RightStartType:
case
RightFinishType:
case
LeftType:
term->resolveNameRefs( pd );
factorWithAug->resolveNameRefs( pd );
break
;
case
FactorWithAugType:
factorWithAug->resolveNameRefs( pd );
break
;
}
}
FactorWithAug::~FactorWithAug()
{
delete
factorWithRep;
if
( priorDescs != 0 )
delete
[] priorDescs;
}
void
FactorWithAug::assignActions( ParseData *pd, FsmAp *graph,
int
*actionOrd )
{
for
(
int
i = 0; i < actions.length(); i++ ) {
switch
( actions[i].type ) {
case
at_start:
graph->startFsmAction( actionOrd[i], actions[i].action );
afterOpMinimize( graph );
break
;
case
at_all:
graph->allTransAction( actionOrd[i], actions[i].action );
break
;
case
at_finish:
graph->finishFsmAction( actionOrd[i], actions[i].action );
break
;
case
at_leave:
graph->leaveFsmAction( actionOrd[i], actions[i].action );
break
;
case
at_start_gbl_error:
graph->startErrorAction( actionOrd[i], actions[i].action, 0 );
afterOpMinimize( graph );
break
;
case
at_all_gbl_error:
graph->allErrorAction( actionOrd[i], actions[i].action, 0 );
break
;
case
at_final_gbl_error:
graph->finalErrorAction( actionOrd[i], actions[i].action, 0 );
break
;
case
at_not_start_gbl_error:
graph->notStartErrorAction( actionOrd[i], actions[i].action, 0 );
break
;
case
at_not_final_gbl_error:
graph->notFinalErrorAction( actionOrd[i], actions[i].action, 0 );
break
;
case
at_middle_gbl_error:
graph->middleErrorAction( actionOrd[i], actions[i].action, 0 );
break
;
case
at_start_local_error:
graph->startErrorAction( actionOrd[i], actions[i].action,
actions[i].localErrKey );
afterOpMinimize( graph );
break
;
case
at_all_local_error:
graph->allErrorAction( actionOrd[i], actions[i].action,
actions[i].localErrKey );
break
;
case
at_final_local_error:
graph->finalErrorAction( actionOrd[i], actions[i].action,
actions[i].localErrKey );
break
;
case
at_not_start_local_error:
graph->notStartErrorAction( actionOrd[i], actions[i].action,
actions[i].localErrKey );
break
;
case
at_not_final_local_error:
graph->notFinalErrorAction( actionOrd[i], actions[i].action,
actions[i].localErrKey );
break
;
case
at_middle_local_error:
graph->middleErrorAction( actionOrd[i], actions[i].action,
actions[i].localErrKey );
break
;
case
at_start_eof:
graph->startEOFAction( actionOrd[i], actions[i].action );
afterOpMinimize( graph );
break
;
case
at_all_eof:
graph->allEOFAction( actionOrd[i], actions[i].action );
break
;
case
at_final_eof:
graph->finalEOFAction( actionOrd[i], actions[i].action );
break
;
case
at_not_start_eof:
graph->notStartEOFAction( actionOrd[i], actions[i].action );
break
;
case
at_not_final_eof:
graph->notFinalEOFAction( actionOrd[i], actions[i].action );
break
;
case
at_middle_eof:
graph->middleEOFAction( actionOrd[i], actions[i].action );
break
;
case
at_start_to_state:
graph->startToStateAction( actionOrd[i], actions[i].action );
afterOpMinimize( graph );
break
;
case
at_all_to_state:
graph->allToStateAction( actionOrd[i], actions[i].action );
break
;
case
at_final_to_state:
graph->finalToStateAction( actionOrd[i], actions[i].action );
break
;
case
at_not_start_to_state:
graph->notStartToStateAction( actionOrd[i], actions[i].action );
break
;
case
at_not_final_to_state:
graph->notFinalToStateAction( actionOrd[i], actions[i].action );
break
;
case
at_middle_to_state:
graph->middleToStateAction( actionOrd[i], actions[i].action );
break
;
case
at_start_from_state:
graph->startFromStateAction( actionOrd[i], actions[i].action );
afterOpMinimize( graph );
break
;
case
at_all_from_state:
graph->allFromStateAction( actionOrd[i], actions[i].action );
break
;
case
at_final_from_state:
graph->finalFromStateAction( actionOrd[i], actions[i].action );
break
;
case
at_not_start_from_state:
graph->notStartFromStateAction( actionOrd[i], actions[i].action );
break
;
case
at_not_final_from_state:
graph->notFinalFromStateAction( actionOrd[i], actions[i].action );
break
;
case
at_middle_from_state:
graph->middleFromStateAction( actionOrd[i], actions[i].action );
break
;
default
:
assert
(
false
);
break
;
}
}
}
void
FactorWithAug::assignPriorities( FsmAp *graph,
int
*priorOrd )
{
for
(
int
i = 0; i < priorityAugs.length(); i++ ) {
switch
( priorityAugs[i].type ) {
case
at_start:
graph->startFsmPrior( priorOrd[i], &priorDescs[i]);
afterOpMinimize( graph );
break
;
case
at_all:
graph->allTransPrior( priorOrd[i], &priorDescs[i] );
break
;
case
at_finish:
graph->finishFsmPrior( priorOrd[i], &priorDescs[i] );
break
;
case
at_leave:
graph->leaveFsmPrior( priorOrd[i], &priorDescs[i] );
break
;
default
:
break
;
}
}
}
void
FactorWithAug::assignConditions( FsmAp *graph )
{
for
(
int
i = 0; i < conditions.length(); i++ ) {
switch
( conditions[i].type ) {
case
at_start:
graph->startFsmCondition( conditions[i].action, conditions[i].sense );
afterOpMinimize( graph );
break
;
case
at_all:
graph->allTransCondition( conditions[i].action, conditions[i].sense );
break
;
case
at_leave:
graph->leaveFsmCondition( conditions[i].action, conditions[i].sense );
break
;
default
:
break
;
}
}
}
FsmAp *FactorWithAug::walk( ParseData *pd )
{
NameFrame nameFrame = pd->enterNameScope(
false
, labels.length() );
int
*actionOrd = 0;
if
( actions.length() > 0 )
actionOrd =
new
int
[actions.length()];
for
(
int
i = 0; i < actions.length(); i++ ) {
if
( actions[i].type == at_start ||
actions[i].type == at_start_gbl_error ||
actions[i].type == at_start_local_error ||
actions[i].type == at_start_to_state ||
actions[i].type == at_start_from_state ||
actions[i].type == at_start_eof )
actionOrd[i] = pd->curActionOrd++;
}
FsmAp *rtnVal = factorWithRep->walk( pd );
for
(
int
i = 0; i < actions.length(); i++ ) {
if
( actions[i].type != at_start &&
actions[i].type != at_start_gbl_error &&
actions[i].type != at_start_local_error &&
actions[i].type != at_start_to_state &&
actions[i].type != at_start_from_state &&
actions[i].type != at_start_eof )
actionOrd[i] = pd->curActionOrd++;
}
assignConditions( rtnVal );
assignActions( pd, rtnVal , actionOrd );
int
*priorOrd = 0;
if
( priorityAugs.length() > 0 )
priorOrd =
new
int
[priorityAugs.length()];
for
(
int
i = 0; i < priorityAugs.length(); i++ )
priorOrd[i] = pd->curPriorOrd++;
if
( priorDescs == 0 && priorityAugs.length() > 0 ) {
priorDescs =
new
PriorDesc[priorityAugs.length()];
for
(
int
i = 0; i < priorityAugs.length(); i++ ) {
priorDescs[i].key = priorityAugs[i].priorKey;
priorDescs[i].priority = priorityAugs[i].priorValue;
}
}
assignPriorities( rtnVal, priorOrd );
for
(
int
e = 0; e < epsilonLinks.length(); e++ ) {
NameInst *epTarg = pd->epsilonResolvedLinks[pd->nextEpsilonResolvedLink++];
if
( epTarg != 0 ) {
rtnVal->epsilonTrans( epTarg->id );
pd->localNameScope->referencedNames.append( epTarg );
}
}
if
( labels.length() > 0 ) {
pd->resetNameScope( nameFrame );
for
(
int
i = 0; i < labels.length(); i++ ) {
pd->enterNameScope(
false
, 1 );
NameInst *name = pd->curNameInst;
if
( name->numRefs > 0 )
rtnVal->setEntry( name->id, rtnVal->startState );
}
pd->popNameScope( nameFrame );
}
if
( priorOrd != 0 )
delete
[] priorOrd;
if
( actionOrd != 0 )
delete
[] actionOrd;
return
rtnVal;
}
void
FactorWithAug::makeNameTree( ParseData *pd )
{
NameInst *prevNameInst = pd->curNameInst;
for
(
int
i = 0; i < labels.length(); i++ )
pd->curNameInst = pd->addNameInst( labels[i].loc, labels[i].data,
true
);
factorWithRep->makeNameTree( pd );
pd->curNameInst = prevNameInst;
}
void
FactorWithAug::resolveNameRefs( ParseData *pd )
{
NameFrame nameFrame = pd->enterNameScope(
false
, labels.length() );
for
(
int
i = 0; i < actions.length(); i++ )
actions[i].action->actionRefs.append( pd->localNameScope );
factorWithRep->resolveNameRefs( pd );
for
(
int
ep = 0; ep < epsilonLinks.length(); ep++ ) {
EpsilonLink &link = epsilonLinks[ep];
NameInst *resolvedName = 0;
if
( link.target.length() == 1 &&
strcmp
( link.target.data[0],
"final"
) == 0 ) {
resolvedName = pd->localNameScope->final;
}
else
{
NameSet resolved;
pd->resolveFrom( resolved, pd->localNameScope, link.target, 0 );
if
( resolved.length() > 0 ) {
resolvedName = resolved[0];
if
( resolved.length() > 1 ) {
error(link.loc) <<
"state reference "
<< link.target <<
" resolves to multiple entry points"
<< endl;
errorStateLabels( resolved );
}
}
}
pd->epsilonResolvedLinks.append( resolvedName );
if
( resolvedName != 0 ) {
resolvedName->numRefs += 1;
}
else
{
error(link.loc) <<
"could not resolve label "
<< link.target << endl;
}
}
if
( labels.length() > 0 )
pd->popNameScope( nameFrame );
}
FactorWithRep::~FactorWithRep()
{
switch
( type ) {
case
StarType:
case
StarStarType:
case
OptionalType:
case
PlusType:
case
ExactType:
case
MaxType:
case
MinType:
case
RangeType:
delete
factorWithRep;
break
;
case
FactorWithNegType:
delete
factorWithNeg;
break
;
}
}
FsmAp *FactorWithRep::walk( ParseData *pd )
{
FsmAp *retFsm = 0;
switch
( type ) {
case
StarType: {
retFsm = factorWithRep->walk( pd );
if
( retFsm->startState->isFinState() ) {
warning(loc) <<
"applying kleene star to a machine that "
"accepts zero length word"
<< endl;
retFsm->unsetFinState( retFsm->startState );
}
pd->curActionOrd += retFsm->shiftStartActionOrder( pd->curActionOrd );
retFsm->starOp( );
afterOpMinimize( retFsm );
break
;
}
case
StarStarType: {
retFsm = factorWithRep->walk( pd );
if
( retFsm->startState->isFinState() ) {
warning(loc) <<
"applying kleene star to a machine that "
"accepts zero length word"
<< endl;
}
priorDescs[0].key = pd->nextPriorKey++;
priorDescs[0].priority = 1;
retFsm->allTransPrior( pd->curPriorOrd++, &priorDescs[0] );
priorDescs[1].key = priorDescs[0].key;
priorDescs[1].priority = 0;
retFsm->leaveFsmPrior( pd->curPriorOrd++, &priorDescs[1] );
pd->curActionOrd += retFsm->shiftStartActionOrder( pd->curActionOrd );
retFsm->starOp( );
afterOpMinimize( retFsm );
break
;
}
case
OptionalType: {
FsmAp *nu =
new
FsmAp();
nu->lambdaFsm( );
retFsm = factorWithRep->walk( pd );
retFsm->unionOp( nu );
afterOpMinimize( retFsm );
break
;
}
case
PlusType: {
retFsm = factorWithRep->walk( pd );
if
( retFsm->startState->isFinState() ) {
warning(loc) <<
"applying plus operator to a machine that "
"accepts zero length word"
<< endl;
}
FsmAp *dup =
new
FsmAp( *retFsm );
pd->curActionOrd += dup->shiftStartActionOrder( pd->curActionOrd );
dup->starOp( );
afterOpMinimize( dup );
retFsm->concatOp( dup );
afterOpMinimize( retFsm );
break
;
}
case
ExactType: {
if
( lowerRep == 0 ) {
warning(loc) <<
"exactly zero repetitions results "
"in the null machine"
<< endl;
retFsm =
new
FsmAp();
retFsm->lambdaFsm();
}
else
{
retFsm = factorWithRep->walk( pd );
if
( retFsm->startState->isFinState() ) {
warning(loc) <<
"applying repetition to a machine that "
"accepts zero length word"
<< endl;
}
pd->curActionOrd += retFsm->shiftStartActionOrder( pd->curActionOrd );
retFsm->repeatOp( lowerRep );
afterOpMinimize( retFsm );
}
break
;
}
case
MaxType: {
if
( upperRep == 0 ) {
warning(loc) <<
"max zero repetitions results "
"in the null machine"
<< endl;
retFsm =
new
FsmAp();
retFsm->lambdaFsm();
}
else
{
retFsm = factorWithRep->walk( pd );
if
( retFsm->startState->isFinState() ) {
warning(loc) <<
"applying max repetition to a machine that "
"accepts zero length word"
<< endl;
}
pd->curActionOrd += retFsm->shiftStartActionOrder( pd->curActionOrd );
retFsm->optionalRepeatOp( upperRep );
afterOpMinimize( retFsm );
}
break
;
}
case
MinType: {
retFsm = factorWithRep->walk( pd );
if
( retFsm->startState->isFinState() ) {
warning(loc) <<
"applying min repetition to a machine that "
"accepts zero length word"
<< endl;
}
pd->curActionOrd += retFsm->shiftStartActionOrder( pd->curActionOrd );
if
( lowerRep == 0 ) {
retFsm->starOp( );
afterOpMinimize( retFsm );
}
else
{
FsmAp *dup =
new
FsmAp( *retFsm );
retFsm->repeatOp( lowerRep );
afterOpMinimize( retFsm );
dup->starOp( );
afterOpMinimize( dup );
retFsm->concatOp( dup );
afterOpMinimize( retFsm );
}
break
;
}
case
RangeType: {
if
( upperRep - lowerRep < 0 ) {
error(loc) <<
"invalid range repetition"
<< endl;
retFsm =
new
FsmAp();
retFsm->lambdaFsm();
}
else
if
( lowerRep == 0 && upperRep == 0 ) {
warning(loc) <<
"zero to zero repetitions results "
"in the null machine"
<< endl;
retFsm =
new
FsmAp();
retFsm->lambdaFsm();
}
else
{
retFsm = factorWithRep->walk( pd );
if
( retFsm->startState->isFinState() ) {
warning(loc) <<
"applying range repetition to a machine that "
"accepts zero length word"
<< endl;
}
pd->curActionOrd += retFsm->shiftStartActionOrder( pd->curActionOrd );
if
( lowerRep == 0 ) {
retFsm->optionalRepeatOp( upperRep );
afterOpMinimize( retFsm );
}
else
if
( lowerRep == upperRep ) {
retFsm->repeatOp( lowerRep );
afterOpMinimize( retFsm );
}
else
{
FsmAp *dup =
new
FsmAp( *retFsm );
retFsm->repeatOp( lowerRep );
afterOpMinimize( retFsm );
dup->optionalRepeatOp( upperRep - lowerRep );
afterOpMinimize( dup );
retFsm->concatOp( dup );
afterOpMinimize( retFsm );
}
}
break
;
}
case
FactorWithNegType: {
retFsm = factorWithNeg->walk( pd );
break
;
}}
return
retFsm;
}
void
FactorWithRep::makeNameTree( ParseData *pd )
{
switch
( type ) {
case
StarType:
case
StarStarType:
case
OptionalType:
case
PlusType:
case
ExactType:
case
MaxType:
case
MinType:
case
RangeType:
factorWithRep->makeNameTree( pd );
break
;
case
FactorWithNegType:
factorWithNeg->makeNameTree( pd );
break
;
}
}
void
FactorWithRep::resolveNameRefs( ParseData *pd )
{
switch
( type ) {
case
StarType:
case
StarStarType:
case
OptionalType:
case
PlusType:
case
ExactType:
case
MaxType:
case
MinType:
case
RangeType:
factorWithRep->resolveNameRefs( pd );
break
;
case
FactorWithNegType:
factorWithNeg->resolveNameRefs( pd );
break
;
}
}
FactorWithNeg::~FactorWithNeg()
{
switch
( type ) {
case
NegateType:
case
CharNegateType:
delete
factorWithNeg;
break
;
case
FactorType:
delete
factor;
break
;
}
}
FsmAp *FactorWithNeg::walk( ParseData *pd )
{
FsmAp *retFsm = 0;
switch
( type ) {
case
NegateType: {
FsmAp *toNegate = factorWithNeg->walk( pd );
retFsm = dotStarFsm( pd );
retFsm->subtractOp( toNegate );
afterOpMinimize( retFsm );
break
;
}
case
CharNegateType: {
FsmAp *toNegate = factorWithNeg->walk( pd );
retFsm = dotFsm( pd );
retFsm->subtractOp( toNegate );
afterOpMinimize( retFsm );
break
;
}
case
FactorType: {
retFsm = factor->walk( pd );
break
;
}}
return
retFsm;
}
void
FactorWithNeg::makeNameTree( ParseData *pd )
{
switch
( type ) {
case
NegateType:
case
CharNegateType:
factorWithNeg->makeNameTree( pd );
break
;
case
FactorType:
factor->makeNameTree( pd );
break
;
}
}
void
FactorWithNeg::resolveNameRefs( ParseData *pd )
{
switch
( type ) {
case
NegateType:
case
CharNegateType:
factorWithNeg->resolveNameRefs( pd );
break
;
case
FactorType:
factor->resolveNameRefs( pd );
break
;
}
}
Factor::~Factor()
{
switch
( type ) {
case
LiteralType:
delete
literal;
break
;
case
RangeType:
delete
range;
break
;
case
OrExprType:
delete
reItem;
break
;
case
RegExprType:
delete
regExpr;
break
;
case
ReferenceType:
break
;
case
ParenType:
delete
join;
break
;
case
LongestMatchType:
delete
longestMatch;
break
;
}
}
FsmAp *Factor::walk( ParseData *pd )
{
FsmAp *rtnVal = 0;
switch
( type ) {
case
LiteralType:
rtnVal = literal->walk( pd );
break
;
case
RangeType:
rtnVal = range->walk( pd );
break
;
case
OrExprType:
rtnVal = reItem->walk( pd, 0 );
break
;
case
RegExprType:
rtnVal = regExpr->walk( pd, 0 );
break
;
case
ReferenceType:
rtnVal = varDef->walk( pd );
break
;
case
ParenType:
rtnVal = join->walk( pd );
break
;
case
LongestMatchType:
rtnVal = longestMatch->walk( pd );
break
;
}
return
rtnVal;
}
void
Factor::makeNameTree( ParseData *pd )
{
switch
( type ) {
case
LiteralType:
case
RangeType:
case
OrExprType:
case
RegExprType:
break
;
case
ReferenceType:
varDef->makeNameTree( loc, pd );
break
;
case
ParenType:
join->makeNameTree( pd );
break
;
case
LongestMatchType:
longestMatch->makeNameTree( pd );
break
;
}
}
void
Factor::resolveNameRefs( ParseData *pd )
{
switch
( type ) {
case
LiteralType:
case
RangeType:
case
OrExprType:
case
RegExprType:
break
;
case
ReferenceType:
varDef->resolveNameRefs( pd );
break
;
case
ParenType:
join->resolveNameRefs( pd );
break
;
case
LongestMatchType:
longestMatch->resolveNameRefs( pd );
break
;
}
}
Range::~Range()
{
delete
lowerLit;
delete
upperLit;
}
FsmAp *Range::walk( ParseData *pd )
{
FsmAp *lowerFsm = lowerLit->walk( pd );
if
( !lowerFsm->checkSingleCharMachine() ) {
error(lowerLit->token.loc) <<
"bad range lower end, must be a single character"
<< endl;
}
FsmAp *upperFsm = upperLit->walk( pd );
if
( !upperFsm->checkSingleCharMachine() ) {
error(upperLit->token.loc) <<
"bad range upper end, must be a single character"
<< endl;
}
Key lowKey = lowerFsm->startState->outList.head->lowKey;
Key highKey = upperFsm->startState->outList.head->lowKey;
delete
lowerFsm;
delete
upperFsm;
if
( lowKey > highKey ) {
error(lowerLit->token.loc) <<
"lower end of range is greater then upper end"
<< endl;
highKey = lowKey;
}
FsmAp *retFsm =
new
FsmAp();
retFsm->rangeFsm( lowKey, highKey );
return
retFsm;
}
FsmAp *Literal::walk( ParseData *pd )
{
FsmAp *rtnVal = 0;
switch
( type ) {
case
Number: {
Key fsmKey = makeFsmKeyNum( token.data, token.loc, pd );
rtnVal =
new
FsmAp();
rtnVal->concatFsm( fsmKey );
break
;
}
case
LitString: {
long
length;
bool
caseInsensitive;
char
*data = prepareLitString( token.loc, token.data, token.length,
length, caseInsensitive );
Key *arr =
new
Key[length];
makeFsmKeyArray( arr, data, length, pd );
rtnVal =
new
FsmAp();
if
( caseInsensitive )
rtnVal->concatFsmCI( arr, length );
else
rtnVal->concatFsm( arr, length );
delete
[] data;
delete
[] arr;
break
;
}}
return
rtnVal;
}
RegExpr::~RegExpr()
{
switch
( type ) {
case
RecurseItem:
delete
regExpr;
delete
item;
break
;
case
Empty:
break
;
}
}
FsmAp *RegExpr::walk( ParseData *pd, RegExpr *rootRegex )
{
if
( rootRegex == 0 )
rootRegex =
this
;
FsmAp *rtnVal = 0;
switch
( type ) {
case
RecurseItem: {
rtnVal = regExpr->walk( pd, rootRegex );
FsmAp *fsm2 = item->walk( pd, rootRegex );
rtnVal->concatOp( fsm2 );
break
;
}
case
Empty: {
rtnVal =
new
FsmAp();
rtnVal->lambdaFsm();
break
;
}
}
return
rtnVal;
}
ReItem::~ReItem()
{
switch
( type ) {
case
Data:
case
Dot:
break
;
case
OrBlock:
case
NegOrBlock:
delete
orBlock;
break
;
}
}
FsmAp *ReItem::walk( ParseData *pd, RegExpr *rootRegex )
{
FsmAp *rtnVal = 0;
switch
( type ) {
case
Data: {
Key *arr =
new
Key[token.length];
makeFsmKeyArray( arr, token.data, token.length, pd );
rtnVal =
new
FsmAp();
if
( rootRegex != 0 && rootRegex->caseInsensitive )
rtnVal->concatFsmCI( arr, token.length );
else
rtnVal->concatFsm( arr, token.length );
delete
[] arr;
break
;
}
case
Dot: {
rtnVal = dotFsm( pd );
break
;
}
case
OrBlock: {
rtnVal = orBlock->walk( pd, rootRegex );
if
( rtnVal == 0 ) {
rtnVal =
new
FsmAp();
rtnVal->lambdaFsm();
}
rtnVal->minimizePartition2();
break
;
}
case
NegOrBlock: {
FsmAp *fsm = orBlock->walk( pd, rootRegex );
fsm->minimizePartition2();
rtnVal = dotFsm( pd );
rtnVal->subtractOp( fsm );
rtnVal->minimizePartition2();
break
;
}
}
if
( star ) {
if
( rtnVal->startState->isFinState() ) {
warning(loc) <<
"applying kleene star to a machine that "
"accepts zero length word"
<< endl;
}
rtnVal->starOp();
rtnVal->minimizePartition2();
}
return
rtnVal;
}
ReOrBlock::~ReOrBlock()
{
switch
( type ) {
case
RecurseItem:
delete
orBlock;
delete
item;
break
;
case
Empty:
break
;
}
}
FsmAp *ReOrBlock::walk( ParseData *pd, RegExpr *rootRegex )
{
FsmAp *rtnVal = 0;
switch
( type ) {
case
RecurseItem: {
FsmAp *fsm1 = orBlock->walk( pd, rootRegex );
FsmAp *fsm2 = item->walk( pd, rootRegex );
if
( fsm1 == 0 )
rtnVal = fsm2;
else
{
fsm1->unionOp( fsm2 );
rtnVal = fsm1;
}
break
;
}
case
Empty: {
rtnVal = 0;
break
;
}
}
return
rtnVal;;
}
FsmAp *ReOrItem::walk( ParseData *pd, RegExpr *rootRegex )
{
FsmAp *rtnVal = 0;
switch
( type ) {
case
Data: {
rtnVal =
new
FsmAp();
KeySet keySet;
makeFsmUniqueKeyArray( keySet, token.data, token.length,
rootRegex != 0 ? rootRegex->caseInsensitive :
false
, pd );
rtnVal->orFsm( keySet.data, keySet.length() );
break
;
}
case
Range: {
Key lowKey = makeFsmKeyChar( lower, pd );
Key highKey = makeFsmKeyChar( upper, pd );
if
( lowKey > highKey ) {
error(loc) <<
"lower end of range is greater then upper end"
<< endl;
highKey = lowKey;
}
rtnVal =
new
FsmAp();
rtnVal->rangeFsm( lowKey, highKey );
if
( rootRegex != 0 && rootRegex->caseInsensitive ) {
if
( lowKey <=
'Z'
&&
'A'
<= highKey ) {
Key otherLow = lowKey <
'A'
? Key(
'A'
) : lowKey;
Key otherHigh =
'Z'
< highKey ? Key(
'Z'
) : highKey;
otherLow =
'a'
+ ( otherLow -
'A'
);
otherHigh =
'a'
+ ( otherHigh -
'A'
);
FsmAp *otherRange =
new
FsmAp();
otherRange->rangeFsm( otherLow, otherHigh );
rtnVal->unionOp( otherRange );
rtnVal->minimizePartition2();
}
else
if
( lowKey <=
'z'
&&
'a'
<= highKey ) {
Key otherLow = lowKey <
'a'
? Key(
'a'
) : lowKey;
Key otherHigh =
'z'
< highKey ? Key(
'z'
) : highKey;
otherLow =
'A'
+ ( otherLow -
'a'
);
otherHigh =
'A'
+ ( otherHigh -
'a'
);
FsmAp *otherRange =
new
FsmAp();
otherRange->rangeFsm( otherLow, otherHigh );
rtnVal->unionOp( otherRange );
rtnVal->minimizePartition2();
}
}
break
;
}}
return
rtnVal;
}