#include "unicode/utypes.h"
#if !UCONFIG_NO_BREAK_ITERATION
#include "unicode/unistr.h"
#include "rbbitblb.h"
#include "rbbirb.h"
#include "rbbisetb.h"
#include "rbbidata.h"
#include "cstring.h"
#include "uassert.h"
U_NAMESPACE_BEGIN
RBBITableBuilder::RBBITableBuilder(RBBIRuleBuilder *rb, RBBINode **rootNode) :
fTree(*rootNode) {
fRB = rb;
fStatus = fRB->fStatus;
fDStates = new UVector(*fStatus);
}
RBBITableBuilder::~RBBITableBuilder() {
int i;
for (i=0; i<fDStates->size(); i++) {
delete (RBBIStateDescriptor *)fDStates->elementAt(i);
}
delete fDStates;
}
void RBBITableBuilder::build() {
if (U_FAILURE(*fStatus)) {
return;
}
if (fTree==NULL) {
return;
}
fTree = fTree->flattenVariables();
if (fRB->fDebugEnv && uprv_strstr(fRB->fDebugEnv, "ftree")) {
RBBIDebugPrintf("Parse tree after flattening variable references.\n");
fTree->printTree(TRUE);
}
RBBINode *cn = new RBBINode(RBBINode::opCat);
cn->fLeftChild = fTree;
fTree->fParent = cn;
cn->fRightChild = new RBBINode(RBBINode::endMark);
cn->fRightChild->fParent = cn;
fTree = cn;
fTree->flattenSets();
if (fRB->fDebugEnv && uprv_strstr(fRB->fDebugEnv, "stree")) {
RBBIDebugPrintf("Parse tree after flattening Unicode Set references.\n");
fTree->printTree(TRUE);
}
calcNullable(fTree);
calcFirstPos(fTree);
calcLastPos(fTree);
calcFollowPos(fTree);
if (fRB->fDebugEnv && uprv_strstr(fRB->fDebugEnv, "pos")) {
RBBIDebugPrintf("\n\n");
printPosSets(fTree);
}
buildStateTable();
flagAcceptingStates();
flagLookAheadStates();
flagTaggedStates();
if (fRB->fDebugEnv && uprv_strstr(fRB->fDebugEnv, "states")) {printStates();};
}
void RBBITableBuilder::calcNullable(RBBINode *n) {
if (n == NULL) {
return;
}
if (n->fType == RBBINode::setRef ||
n->fType == RBBINode::endMark ) {
n->fNullable = FALSE;
return;
}
if (n->fType == RBBINode::lookAhead || n->fType == RBBINode::tag) {
n->fNullable = TRUE;
return;
}
calcNullable(n->fLeftChild);
calcNullable(n->fRightChild);
if (n->fType == RBBINode::opOr) {
n->fNullable = n->fLeftChild->fNullable || n->fRightChild->fNullable;
}
else if (n->fType == RBBINode::opCat) {
n->fNullable = n->fLeftChild->fNullable && n->fRightChild->fNullable;
}
else if (n->fType == RBBINode::opStar || n->fType == RBBINode::opQuestion) {
n->fNullable = TRUE;
}
else {
n->fNullable = FALSE;
}
}
void RBBITableBuilder::calcFirstPos(RBBINode *n) {
if (n == NULL) {
return;
}
if (n->fType == RBBINode::leafChar ||
n->fType == RBBINode::endMark ||
n->fType == RBBINode::lookAhead ||
n->fType == RBBINode::tag) {
n->fFirstPosSet->addElement(n, *fStatus);
return;
}
calcFirstPos(n->fLeftChild);
calcFirstPos(n->fRightChild);
if (n->fType == RBBINode::opOr) {
setAdd(n->fFirstPosSet, n->fLeftChild->fFirstPosSet);
setAdd(n->fFirstPosSet, n->fRightChild->fFirstPosSet);
}
else if (n->fType == RBBINode::opCat) {
setAdd(n->fFirstPosSet, n->fLeftChild->fFirstPosSet);
if (n->fLeftChild->fNullable) {
setAdd(n->fFirstPosSet, n->fRightChild->fFirstPosSet);
}
}
else if (n->fType == RBBINode::opStar ||
n->fType == RBBINode::opQuestion ||
n->fType == RBBINode::opPlus) {
setAdd(n->fFirstPosSet, n->fLeftChild->fFirstPosSet);
}
}
void RBBITableBuilder::calcLastPos(RBBINode *n) {
if (n == NULL) {
return;
}
if (n->fType == RBBINode::leafChar ||
n->fType == RBBINode::endMark ||
n->fType == RBBINode::lookAhead ||
n->fType == RBBINode::tag) {
n->fLastPosSet->addElement(n, *fStatus);
return;
}
calcLastPos(n->fLeftChild);
calcLastPos(n->fRightChild);
if (n->fType == RBBINode::opOr) {
setAdd(n->fLastPosSet, n->fLeftChild->fLastPosSet);
setAdd(n->fLastPosSet, n->fRightChild->fLastPosSet);
}
else if (n->fType == RBBINode::opCat) {
setAdd(n->fLastPosSet, n->fRightChild->fLastPosSet);
if (n->fRightChild->fNullable) {
setAdd(n->fLastPosSet, n->fLeftChild->fLastPosSet);
}
}
else if (n->fType == RBBINode::opStar ||
n->fType == RBBINode::opQuestion ||
n->fType == RBBINode::opPlus) {
setAdd(n->fLastPosSet, n->fLeftChild->fLastPosSet);
}
}
void RBBITableBuilder::calcFollowPos(RBBINode *n) {
if (n == NULL ||
n->fType == RBBINode::leafChar ||
n->fType == RBBINode::endMark) {
return;
}
calcFollowPos(n->fLeftChild);
calcFollowPos(n->fRightChild);
if (n->fType == RBBINode::opCat) {
RBBINode *i; uint32_t ix;
UVector *LastPosOfLeftChild = n->fLeftChild->fLastPosSet;
for (ix=0; ix<(uint32_t)LastPosOfLeftChild->size(); ix++) {
i = (RBBINode *)LastPosOfLeftChild->elementAt(ix);
setAdd(i->fFollowPos, n->fRightChild->fFirstPosSet);
}
}
if (n->fType == RBBINode::opStar ||
n->fType == RBBINode::opPlus) {
RBBINode *i; uint32_t ix;
for (ix=0; ix<(uint32_t)n->fLastPosSet->size(); ix++) {
i = (RBBINode *)n->fLastPosSet->elementAt(ix);
setAdd(i->fFollowPos, n->fFirstPosSet);
}
}
}
void RBBITableBuilder::buildStateTable() {
int lastInputSymbol = fRB->fSetBuilder->getNumCharCategories() - 1;
RBBIStateDescriptor *failState = new RBBIStateDescriptor(lastInputSymbol, fStatus);
failState->fPositions = new UVector(*fStatus);
fDStates->addElement(failState, *fStatus);
RBBIStateDescriptor *initialState = new RBBIStateDescriptor(lastInputSymbol, fStatus);
initialState->fPositions = new UVector(*fStatus);
setAdd(initialState->fPositions, fTree->fFirstPosSet);
fDStates->addElement(initialState, *fStatus);
for (;;) {
RBBIStateDescriptor *T = NULL;
int32_t tx;
for (tx=1; tx<fDStates->size(); tx++) {
RBBIStateDescriptor *temp;
temp = (RBBIStateDescriptor *)fDStates->elementAt(tx);
if (temp->fMarked == FALSE) {
T = temp;
break;
}
}
if (T == NULL) {
break;
}
T->fMarked = TRUE;
int32_t a;
for (a = 1; a<=lastInputSymbol; a++) {
UVector *U = NULL;
RBBINode *p;
int32_t px;
for (px=0; px<T->fPositions->size(); px++) {
p = (RBBINode *)T->fPositions->elementAt(px);
if ((p->fType == RBBINode::leafChar) && (p->fVal == a)) {
if (U == NULL) {
U = new UVector(*fStatus);
}
setAdd(U, p->fFollowPos);
}
}
int32_t ux = 0;
UBool UinDstates = FALSE;
if (U != NULL) {
U_ASSERT(U->size() > 0);
int ix;
for (ix=0; ix<fDStates->size(); ix++) {
RBBIStateDescriptor *temp2;
temp2 = (RBBIStateDescriptor *)fDStates->elementAt(ix);
if (setEquals(U, temp2->fPositions)) {
delete U;
U = temp2->fPositions;
ux = ix;
UinDstates = TRUE;
break;
}
}
if (!UinDstates)
{
RBBIStateDescriptor *newState = new RBBIStateDescriptor(lastInputSymbol, fStatus);
newState->fPositions = U;
fDStates->addElement(newState, *fStatus);
ux = fDStates->size()-1;
}
T->fDtran->setElementAt(ux, a);
}
}
}
}
void RBBITableBuilder::flagAcceptingStates() {
UVector endMarkerNodes(*fStatus);
RBBINode *endMarker;
int32_t i;
int32_t n;
fTree->findNodes(&endMarkerNodes, RBBINode::endMark, *fStatus);
for (i=0; i<endMarkerNodes.size(); i++) {
endMarker = (RBBINode *)endMarkerNodes.elementAt(i);
for (n=0; n<fDStates->size(); n++) {
RBBIStateDescriptor *sd = (RBBIStateDescriptor *)fDStates->elementAt(n);
if (sd->fPositions->indexOf(endMarker) >= 0) {
sd->fAccepting = endMarker->fVal;
if (sd->fAccepting == 0) {
sd->fAccepting = -1;
}
if (endMarker->fLookAheadEnd) {
sd->fLookAhead = sd->fAccepting;
}
}
}
}
}
void RBBITableBuilder::flagLookAheadStates() {
UVector lookAheadNodes(*fStatus);
RBBINode *lookAheadNode;
int32_t i;
int32_t n;
fTree->findNodes(&lookAheadNodes, RBBINode::lookAhead, *fStatus);
for (i=0; i<lookAheadNodes.size(); i++) {
lookAheadNode = (RBBINode *)lookAheadNodes.elementAt(i);
for (n=0; n<fDStates->size(); n++) {
RBBIStateDescriptor *sd = (RBBIStateDescriptor *)fDStates->elementAt(n);
if (sd->fPositions->indexOf(lookAheadNode) >= 0) {
sd->fLookAhead = lookAheadNode->fVal;
}
}
}
}
void RBBITableBuilder::flagTaggedStates() {
UVector tagNodes(*fStatus);
RBBINode *tagNode;
int32_t i;
int32_t n;
fTree->findNodes(&tagNodes, RBBINode::tag, *fStatus);
for (i=0; i<tagNodes.size(); i++) { tagNode = (RBBINode *)tagNodes.elementAt(i);
for (n=0; n<fDStates->size(); n++) { RBBIStateDescriptor *sd = (RBBIStateDescriptor *)fDStates->elementAt(n);
if (sd->fPositions->indexOf(tagNode) >= 0) { if (sd->fTagVal < tagNode->fVal) {
sd->fTagVal = tagNode->fVal;
}
}
}
}
}
void RBBITableBuilder::setAdd(UVector *dest, UVector *source) {
int destOriginalSize = dest->size();
int sourceSize = source->size();
int32_t si, di;
for (si=0; si<sourceSize; si++) {
void *elToAdd = source->elementAt(si);
for (di=0; di<destOriginalSize; di++) {
if (dest->elementAt(di) == elToAdd) {
goto elementAlreadyInDest;
}
}
dest->addElement(elToAdd, *fStatus);
elementAlreadyInDest: ;
}
}
UBool RBBITableBuilder::setEquals(UVector *a, UVector *b) {
int32_t aSize = a->size();
int32_t bSize = b->size();
if (aSize != bSize) {
return FALSE;
}
int32_t ax;
int32_t bx;
int32_t firstBx = 0;
void *aVal;
void *bVal = NULL;
for (ax=0; ax<aSize; ax++) {
aVal = a->elementAt(ax);
for (bx=firstBx; bx<bSize; bx++) {
bVal = b->elementAt(bx);
if (aVal == bVal) {
if (bx==firstBx) {
firstBx++;
}
break;
}
}
if (aVal != bVal) {
return FALSE;
}
}
return TRUE;
}
void RBBITableBuilder::printPosSets(RBBINode *n) {
#ifdef RBBI_DEBUG
if (n==NULL) {
return;
}
n->print();
RBBIDebugPrintf(" Nullable: %s\n", n->fNullable?"TRUE":"FALSE");
RBBIDebugPrintf(" firstpos: ");
printSet(n->fFirstPosSet);
RBBIDebugPrintf(" lastpos: ");
printSet(n->fLastPosSet);
RBBIDebugPrintf(" followpos: ");
printSet(n->fFollowPos);
printPosSets(n->fLeftChild);
printPosSets(n->fRightChild);
#endif
}
int32_t RBBITableBuilder::getTableSize() {
int32_t size = 0;
int32_t numRows;
int32_t numCols;
int32_t rowSize;
if (fTree == NULL) {
return 0;
}
size = sizeof(RBBIStateTable) - 4;
numRows = fDStates->size();
numCols = fRB->fSetBuilder->getNumCharCategories();
rowSize = sizeof(RBBIStateTableRow) + sizeof(uint16_t)*(numCols-2);
size += numRows * rowSize;
return size;
}
void RBBITableBuilder::exportTable(void *where) {
RBBIStateTable *table = (RBBIStateTable *)where;
uint32_t state;
int col;
if (U_FAILURE(*fStatus) || fTree == NULL) {
return;
}
if (fRB->fSetBuilder->getNumCharCategories() > 0x7fff ||
fDStates->size() > 0x7fff) {
*fStatus = U_BRK_INTERNAL_ERROR;
return;
}
table->fRowLen = sizeof(RBBIStateTableRow) +
sizeof(uint16_t) * (fRB->fSetBuilder->getNumCharCategories() - 2);
table->fNumStates = fDStates->size();
for (state=0; state<table->fNumStates; state++) {
RBBIStateDescriptor *sd = (RBBIStateDescriptor *)fDStates->elementAt(state);
RBBIStateTableRow *row = (RBBIStateTableRow *)(table->fTableData + state*table->fRowLen);
U_ASSERT (-32768 < sd->fAccepting && sd->fAccepting <= 32767);
U_ASSERT (-32768 < sd->fLookAhead && sd->fLookAhead <= 32767);
row->fAccepting = (int16_t)sd->fAccepting;
row->fLookAhead = (int16_t)sd->fLookAhead;
row->fTag = (int16_t)sd->fTagVal;
for (col=0; col<fRB->fSetBuilder->getNumCharCategories(); col++) {
row->fNextState[col] = (uint16_t)sd->fDtran->elementAti(col);
}
}
}
void RBBITableBuilder::printSet(UVector *s) {
#ifdef RBBI_DEBUG
int32_t i;
for (i=0; i<s->size(); i++) {
void *v = s->elementAt(i);
RBBIDebugPrintf("%10p", v);
}
RBBIDebugPrintf("\n");
#endif
}
void RBBITableBuilder::printStates() {
#ifdef RBBI_DEBUG
int c; int n;
RBBIDebugPrintf("state | i n p u t s y m b o l s \n");
RBBIDebugPrintf(" | Acc LA Tag");
for (c=0; c<fRB->fSetBuilder->getNumCharCategories(); c++) {RBBIDebugPrintf(" %2d", c);};
RBBIDebugPrintf("\n");
RBBIDebugPrintf(" |---------------");
for (c=0; c<fRB->fSetBuilder->getNumCharCategories(); c++) {RBBIDebugPrintf("---");};
RBBIDebugPrintf("\n");
for (n=0; n<fDStates->size(); n++) {
RBBIStateDescriptor *sd = (RBBIStateDescriptor *)fDStates->elementAt(n);
RBBIDebugPrintf(" %3d | " , n);
RBBIDebugPrintf("%3d %3d %5d ", sd->fAccepting, sd->fLookAhead, sd->fTagVal);
for (c=0; c<fRB->fSetBuilder->getNumCharCategories(); c++) {
RBBIDebugPrintf(" %2d", sd->fDtran->elementAti(c));
}
RBBIDebugPrintf("\n");
}
RBBIDebugPrintf("\n\n");
#endif
}
RBBIStateDescriptor::RBBIStateDescriptor(int lastInputSymbol, UErrorCode *fStatus) {
fMarked = FALSE;
fAccepting = 0;
fLookAhead = 0;
fTagVal = 0;
fPositions = NULL;
fDtran = NULL;
if (U_FAILURE(*fStatus)) {
return;
}
fDtran = new UVector(lastInputSymbol+1, *fStatus);
if (fDtran == NULL) {
*fStatus = U_MEMORY_ALLOCATION_ERROR;
return;
}
fDtran->setSize(lastInputSymbol+1); }
RBBIStateDescriptor::~RBBIStateDescriptor() {
delete fPositions;
delete fDtran;
fPositions = NULL;
fDtran = NULL;
}
U_NAMESPACE_END
#endif