interim commit - enhancing rex1.c

This commit is contained in:
hyung-hwan 2009-11-20 07:32:55 +00:00
parent 9d3084f455
commit a6162f3eea

View File

@ -258,11 +258,11 @@ static qse_rex_node_t* newbranchnode (
return n; return n;
} }
#define CHECK_END(builder) \ #define CHECK_END(com) \
do { \ do { \
if (builder->ptr >= builder->ptn.end) \ if (com->ptr >= com->end) \
{ \ { \
builder->errnum = QSE_REX_EEND; \ /*com->errnum = QSE_REX_EEND;*/ \
return -1; \ return -1; \
} \ } \
} while(0) } while(0)
@ -277,7 +277,18 @@ static qse_rex_node_t* newbranchnode (
(c >= QSE_T('A') && c <= QSE_T('F'))? c-QSE_T('A')+10: \ (c >= QSE_T('A') && c <= QSE_T('F'))? c-QSE_T('A')+10: \
c-QSE_T('a')+10) c-QSE_T('a')+10)
static int getc (comp_t* com) #define IS_SPE(com,ch) ((com)->c.value == (ch) && !(com)->c.escaped)
#define IS_ESC(com) ((com)->c.escaped)
#define IS_EOF(com) ((com)->c.value == QSE_CHAR_EOF)
enum
{
LEVEL_NORMAL,
LEVEL_CHARSET,
LEVEL_RANGE
};
static int getc (comp_t* com, int level)
{ {
if (com->ptr >= com->end) if (com->ptr >= com->end)
{ {
@ -295,7 +306,7 @@ static int getc (comp_t* com)
{ {
qse_char_t c; qse_char_t c;
CHECK_END (builder); CHECK_END (com);
c = *com->ptr++; c = *com->ptr++;
if (c == QSE_T('n')) c = QSE_T('\n'); if (c == QSE_T('n')) c = QSE_T('\n');
@ -311,13 +322,13 @@ static int getc (comp_t* com)
c = c - QSE_T('0'); c = c - QSE_T('0');
CHECK_END (builder); CHECK_END (com);
cx = *com->ptr++; cx = *com->ptr++;
if (cx >= QSE_T('0') && cx <= QSE_T('7')) if (cx >= QSE_T('0') && cx <= QSE_T('7'))
{ {
c = c * 8 + cx - QSE_T('0'); c = c * 8 + cx - QSE_T('0');
CHECK_END (builder); CHECK_END (com);
cx = *com->ptr++; cx = *com->ptr++;
if (cx >= QSE_T('0') && cx <= QSE_T('7')) if (cx >= QSE_T('0') && cx <= QSE_T('7'))
{ {
@ -329,13 +340,13 @@ static int getc (comp_t* com)
{ {
qse_char_t cx; qse_char_t cx;
CHECK_END (builder); CHECK_END (com);
cx = *com->ptr++; cx = *com->ptr++;
if (IS_HEX(cx)) if (IS_HEX(cx))
{ {
c = HEX_TO_NUM(cx); c = HEX_TO_NUM(cx);
CHECK_END (builder); CHECK_END (com);
cx = *com->ptr++; cx = *com->ptr++;
if (IS_HEX(cx)) if (IS_HEX(cx))
{ {
@ -348,7 +359,7 @@ static int getc (comp_t* com)
{ {
qse_char_t cx; qse_char_t cx;
CHECK_END (builder); CHECK_END (com);
cx = *com->ptr++; cx = *com->ptr++;
if (IS_HEX(cx)) if (IS_HEX(cx))
{ {
@ -358,7 +369,7 @@ static int getc (comp_t* com)
for (i = 0; i < 3; i++) for (i = 0; i < 3; i++)
{ {
CHECK_END (builder); CHECK_END (com);
cx = *com->ptr++; cx = *com->ptr++;
if (!IS_HEX(cx)) break; if (!IS_HEX(cx)) break;
@ -370,7 +381,7 @@ static int getc (comp_t* com)
{ {
qse_char_t cx; qse_char_t cx;
CHECK_END (builder); CHECK_END (com);
cx = *com->ptr++; cx = *com->ptr++;
if (IS_HEX(cx)) if (IS_HEX(cx))
{ {
@ -380,7 +391,7 @@ static int getc (comp_t* com)
for (i = 0; i < 7; i++) for (i = 0; i < 7; i++)
{ {
CHECK_END (builder); CHECK_END (com);
cx = *com->ptr++; cx = *com->ptr++;
if (!IS_HEX(cx)) break; if (!IS_HEX(cx)) break;
@ -397,13 +408,13 @@ static int getc (comp_t* com)
} }
else else
{ {
if (level == LEVEL_TOP) if (level == LEVEL_NORMAL)
{ {
if (com->c.value == QSE_T('[') || if (com->c.value == QSE_T('[') ||
com->c.value == QSE_T('|') || com->c.value == QSE_T('|') ||
com->c.value == QSE_T('^') || com->c.value == QSE_T('^') ||
com->c.value == QSE_T('$') || com->c.value == QSE_T('$') ||
(!(com->option & QSE_REX_BUILD_NOBOUND) && (/*!(com->option & QSE_REX_BUILD_NOBOUND) && TODO:*/
com->c.value == QSE_T('{')) || com->c.value == QSE_T('{')) ||
com->c.value == QSE_T('+') || com->c.value == QSE_T('+') ||
com->c.value == QSE_T('?') || com->c.value == QSE_T('?') ||
@ -446,10 +457,14 @@ static qse_rex_node_t* comp2 (comp_t* c)
{ {
qse_rex_node_t* n; qse_rex_node_t* n;
if (!IS_ESC(c))
{
switch (c->c.value) switch (c->c.value)
{ {
case QSE_T('('): case QSE_T('('):
{ {
/* enter a subgroup */
qse_rex_node_t* x, * ge; qse_rex_node_t* x, * ge;
n = newgroupnode (c, QSE_NULL); n = newgroupnode (c, QSE_NULL);
@ -462,7 +477,7 @@ static qse_rex_node_t* comp2 (comp_t* c)
return QSE_NULL; return QSE_NULL;
} }
if (getc(c) <= -1) if (getc(c,LEVEL_NORMAL) <= -1)
{ {
// freere (ge); // freere (ge);
// freere (n); // freere (n);
@ -478,7 +493,7 @@ static qse_rex_node_t* comp2 (comp_t* c)
return QSE_NULL; return QSE_NULL;
} }
if (c->c.value != QSE_T(')')) if (!IS_SPE(c,QSE_T(')')))
{ {
qse_printf (QSE_T("expecting )\n")); qse_printf (QSE_T("expecting )\n"));
// UNBALANCED PAREN. // UNBALANCED PAREN.
@ -488,7 +503,7 @@ qse_printf (QSE_T("expecting )\n"));
} }
c->gdepth--; c->gdepth--;
if (getc(c) <= -1) if (getc(c,LEVEL_NORMAL) <= -1)
{ {
// freere (x); // freere (x);
// freere (n); // freere (n);
@ -496,13 +511,15 @@ qse_printf (QSE_T("expecting )\n"));
} }
n->u.g.head = x; n->u.g.head = x;
break; break;
} }
case QSE_T('.'): case QSE_T('.'):
n = newnode (c, QSE_REX_NODE_ANYCHAR); n = newnode (c, QSE_REX_NODE_ANYCHAR);
if (n == QSE_NULL) return QSE_NULL; if (n == QSE_NULL) return QSE_NULL;
if (getc(c) <= -1) if (getc(c,LEVEL_NORMAL) <= -1)
{ {
// TODO: error handling.. // TODO: error handling..
return QSE_NULL; return QSE_NULL;
@ -512,7 +529,7 @@ qse_printf (QSE_T("expecting )\n"));
case QSE_T('^'): case QSE_T('^'):
n = newnode (c, QSE_REX_NODE_BOL); n = newnode (c, QSE_REX_NODE_BOL);
if (n == QSE_NULL) return QSE_NULL; if (n == QSE_NULL) return QSE_NULL;
if (getc(c) <= -1) if (getc(c,LEVEL_NORMAL) <= -1)
{ {
// TODO: error handling.. // TODO: error handling..
return QSE_NULL; return QSE_NULL;
@ -522,7 +539,7 @@ qse_printf (QSE_T("expecting )\n"));
case QSE_T('$'): case QSE_T('$'):
n = newnode (c, QSE_REX_NODE_EOL); n = newnode (c, QSE_REX_NODE_EOL);
if (n == QSE_NULL) return QSE_NULL; if (n == QSE_NULL) return QSE_NULL;
if (getc(c) <= -1) if (getc(c,LEVEL_NORMAL) <= -1)
{ {
// TODO: error handling.. // TODO: error handling..
return QSE_NULL; return QSE_NULL;
@ -532,28 +549,39 @@ qse_printf (QSE_T("expecting )\n"));
/* /*
case QSE_T('['): case QSE_T('['):
.... break;
*/ */
default: default:
goto normal_char;
}
}
else
{
normal_char:
/* normal character */ /* normal character */
n = newcharnode (c, c->c.value); n = newcharnode (c, c->c.value);
if (n == QSE_NULL) return QSE_NULL; if (n == QSE_NULL) return QSE_NULL;
if (getc(c) <= -1) if (getc(c,LEVEL_NORMAL) <= -1)
{ {
// TODO: error handling.. // TODO: error handling..
return QSE_NULL; return QSE_NULL;
} }
break;
} }
n->occ.min = 1;
n->occ.max = 1;
if (!IS_ESC(c))
{
/* handle the occurrence specifier, if any */ /* handle the occurrence specifier, if any */
switch (c->c)
switch (c->c.value)
{ {
case QSE_T('?'): case QSE_T('?'):
n->occ.min = 0; n->occ.min = 0;
n->occ.max = 1; n->occ.max = 1;
if (getc(c) <= -1) if (getc(c,LEVEL_NORMAL) <= -1)
{ {
// TODO: error handling.. // TODO: error handling..
//free n //free n
@ -564,7 +592,7 @@ qse_printf (QSE_T("expecting )\n"));
case QSE_T('*'): case QSE_T('*'):
n->occ.min = 0; n->occ.min = 0;
n->occ.max = OCC_MAX; n->occ.max = OCC_MAX;
if (getc(c) <= -1) if (getc(c,LEVEL_NORMAL) <= -1)
{ {
// TODO: error handling.. // TODO: error handling..
//free n //free n
@ -575,7 +603,7 @@ qse_printf (QSE_T("expecting )\n"));
case QSE_T('+'): case QSE_T('+'):
n->occ.min = 1; n->occ.min = 1;
n->occ.max = OCC_MAX; n->occ.max = OCC_MAX;
if (getc(c) <= -1) if (getc(c,LEVEL_NORMAL) <= -1)
{ {
// TODO: error handling.. // TODO: error handling..
//free n //free n
@ -585,13 +613,14 @@ qse_printf (QSE_T("expecting )\n"));
/* /*
case QSE_T('{'): case QSE_T('{'):
// TODO -------------- // TODO:
if (com->rex->option & QSE_REX_BUILD_NOBOUND)
{
}
break; break;
*/ */
default: }
n->occ.min = 1;
n->occ.max = 1;
} }
return n; return n;
@ -604,8 +633,8 @@ static qse_rex_node_t* comp1 (comp_t* c, pair_t* pair)
pair->tail = pair->head; pair->tail = pair->head;
while (c->c.value != QSE_T('|') && c->c.value != QSE_CHAR_EOF && while (!IS_SPE(c,QSE_T('|')) && !IS_EOF(c) &&
!(c->gdepth >= 0 && c->c.value == QSE_T(')'))) !(c->gdepth > 0 && IS_SPE(c,QSE_T(')'))))
{ {
qse_rex_node_t* tmp = comp2 (c); qse_rex_node_t* tmp = comp2 (c);
if (tmp == QSE_NULL) if (tmp == QSE_NULL)
@ -630,9 +659,9 @@ static qse_rex_node_t* comp0 (comp_t* c, qse_rex_node_t* ge)
if (left == QSE_NULL) return QSE_NULL; if (left == QSE_NULL) return QSE_NULL;
xpair.tail->next = ge; xpair.tail->next = ge;
while (c->c.value == QSE_T('|')) while (IS_SPE(c,QSE_T('|')))
{ {
if (getc (c) <= -1) if (getc(c,LEVEL_NORMAL) <= -1)
{ {
//freere (left); //freere (left);
return QSE_NULL; return QSE_NULL;
@ -663,12 +692,7 @@ qse_rex_node_t* qse_rex_comp (
qse_rex_t* rex, const qse_char_t* ptr, qse_size_t len) qse_rex_t* rex, const qse_char_t* ptr, qse_size_t len)
{ {
comp_t c; comp_t c;
qse_rex_node_t* end, * body;
if (rex->code != QSE_NULL)
{
freeallnodes (rex->code);
rex->code = QSE_NULL;
}
c.rex = rex; c.rex = rex;
c.re.ptr = ptr; c.re.ptr = ptr;
@ -682,38 +706,30 @@ qse_rex_node_t* qse_rex_comp (
c.gdepth = 0; c.gdepth = 0;
c.start = QSE_NULL; c.start = QSE_NULL;
if (getc(&c) <= -1) return QSE_NULL; /* read the first character */
if (getc(&c,LEVEL_NORMAL) <= -1) return QSE_NULL;
c.start = newstartnode (&c); c.start = newstartnode (&c);
if (c.start != QSE_NULL) if (c.start == QSE_NULL) return QSE_NULL;
{
qse_rex_node_t* end;
end = newendnode (&c); end = newendnode (&c);
if (end == QSE_NULL) if (end == QSE_NULL)
{ {
freenode (c.start, c.rex->mmgr); freenode (c.start, c.rex->mmgr);
c.start = QSE_NULL; return QSE_NULL;
}
else
{
qse_rex_node_t* tmp;
/*tmp = comp0 (&c, QSE_NULL);*/
tmp = comp0 (&c, end);
if (tmp == QSE_NULL)
{
/*freenode (c.start, c.rex->mmgr);*/
freeallnodes (c.start);
c.start = QSE_NULL;
}
else
{
qse_printf (QSE_T("start has tmp...\n"));
c.start->next = tmp;
}
}
} }
body = comp0 (&c, end);
if (body == QSE_NULL)
{
freeallnodes (c.start);
return QSE_NULL;
}
c.start->next = body;
if (rex->code != QSE_NULL) freeallnodes (rex->code);
rex->code = c.start; rex->code = c.start;
return rex->code; return rex->code;
} }
@ -854,15 +870,18 @@ static int addcands (
} }
else if (candnode->id == QSE_REX_NODE_BRANCH) else if (candnode->id == QSE_REX_NODE_BRANCH)
{ {
group_t* groupdup; group_t* gx = group;
QSE_ASSERT (candnode->next == QSE_NULL); QSE_ASSERT (candnode->next == QSE_NULL);
groupdup = dupgroups (e, group); if (group != QSE_NULL)
if (groupdup == QSE_NULL) return -1; {
gx = dupgroups (e, group);
if (gx == QSE_NULL) return -1;
}
if (addcands (e, group, prevnode, candnode->u.b.left, mptr) <= -1) return -1; if (addcands (e, group, prevnode, candnode->u.b.left, mptr) <= -1) return -1;
if (addcands (e, groupdup, prevnode, candnode->u.b.right, mptr) <= -1) return -1; if (addcands (e, gx, prevnode, candnode->u.b.right, mptr) <= -1) return -1;
} }
else if (candnode->id == QSE_REX_NODE_GROUP) else if (candnode->id == QSE_REX_NODE_GROUP)
{ {
@ -1050,18 +1069,25 @@ static int exec (exec_t* e)
do do
{ {
/* kind of swap the next set and the current set by swapping indices */ /* swap the pending and active set indices.
* the pending set becomes active after which the match()
* function tries each candidate in it. New candidates
* are added into the pending set which will become active
* later when the loop reaches here again */
int tmp = e->cand.pending; int tmp = e->cand.pending;
e->cand.pending = e->cand.active; e->cand.pending = e->cand.active;
e->cand.active = tmp; e->cand.active = tmp;
/* check if there are any next candidates */
if (QSE_LDA_SIZE(&e->cand.set[e->cand.active]) <= 0) if (QSE_LDA_SIZE(&e->cand.set[e->cand.active]) <= 0)
{ {
/* if no more candidates, break */ /* we can't go on with no candidates in the
* active set. */
break; break;
} }
/* clear the pending set */
qse_lda_clear (&e->cand.set[e->cand.pending]);
{ {
int i; int i;
qse_printf (QSE_T("SET=")); qse_printf (QSE_T("SET="));
@ -1082,9 +1108,6 @@ for (i = 0; i < QSE_LDA_SIZE(&e->cand.set[e->cand.active]); i++)
qse_printf (QSE_T("\n")); qse_printf (QSE_T("\n"));
} }
/* clear the array to hold the next candidates */
qse_lda_clear (&e->cand.set[e->cand.pending]);
if (match (e) <= -1) return -1; if (match (e) <= -1) return -1;
} }
while (1); while (1);