interim commit - enhancing rex1.c

This commit is contained in:
hyung-hwan 2009-11-20 07:32:55 +00:00
parent 9d3084f455
commit a6162f3eea

View File

@ -258,11 +258,11 @@ static qse_rex_node_t* newbranchnode (
return n; return n;
} }
#define CHECK_END(builder) \ #define CHECK_END(com) \
do { \ do { \
if (builder->ptr >= builder->ptn.end) \ if (com->ptr >= com->end) \
{ \ { \
builder->errnum = QSE_REX_EEND; \ /*com->errnum = QSE_REX_EEND;*/ \
return -1; \ return -1; \
} \ } \
} while(0) } while(0)
@ -277,7 +277,18 @@ static qse_rex_node_t* newbranchnode (
(c >= QSE_T('A') && c <= QSE_T('F'))? c-QSE_T('A')+10: \ (c >= QSE_T('A') && c <= QSE_T('F'))? c-QSE_T('A')+10: \
c-QSE_T('a')+10) c-QSE_T('a')+10)
static int getc (comp_t* com) #define IS_SPE(com,ch) ((com)->c.value == (ch) && !(com)->c.escaped)
#define IS_ESC(com) ((com)->c.escaped)
#define IS_EOF(com) ((com)->c.value == QSE_CHAR_EOF)
enum
{
LEVEL_NORMAL,
LEVEL_CHARSET,
LEVEL_RANGE
};
static int getc (comp_t* com, int level)
{ {
if (com->ptr >= com->end) if (com->ptr >= com->end)
{ {
@ -295,7 +306,7 @@ static int getc (comp_t* com)
{ {
qse_char_t c; qse_char_t c;
CHECK_END (builder); CHECK_END (com);
c = *com->ptr++; c = *com->ptr++;
if (c == QSE_T('n')) c = QSE_T('\n'); if (c == QSE_T('n')) c = QSE_T('\n');
@ -311,13 +322,13 @@ static int getc (comp_t* com)
c = c - QSE_T('0'); c = c - QSE_T('0');
CHECK_END (builder); CHECK_END (com);
cx = *com->ptr++; cx = *com->ptr++;
if (cx >= QSE_T('0') && cx <= QSE_T('7')) if (cx >= QSE_T('0') && cx <= QSE_T('7'))
{ {
c = c * 8 + cx - QSE_T('0'); c = c * 8 + cx - QSE_T('0');
CHECK_END (builder); CHECK_END (com);
cx = *com->ptr++; cx = *com->ptr++;
if (cx >= QSE_T('0') && cx <= QSE_T('7')) if (cx >= QSE_T('0') && cx <= QSE_T('7'))
{ {
@ -329,13 +340,13 @@ static int getc (comp_t* com)
{ {
qse_char_t cx; qse_char_t cx;
CHECK_END (builder); CHECK_END (com);
cx = *com->ptr++; cx = *com->ptr++;
if (IS_HEX(cx)) if (IS_HEX(cx))
{ {
c = HEX_TO_NUM(cx); c = HEX_TO_NUM(cx);
CHECK_END (builder); CHECK_END (com);
cx = *com->ptr++; cx = *com->ptr++;
if (IS_HEX(cx)) if (IS_HEX(cx))
{ {
@ -348,7 +359,7 @@ static int getc (comp_t* com)
{ {
qse_char_t cx; qse_char_t cx;
CHECK_END (builder); CHECK_END (com);
cx = *com->ptr++; cx = *com->ptr++;
if (IS_HEX(cx)) if (IS_HEX(cx))
{ {
@ -358,7 +369,7 @@ static int getc (comp_t* com)
for (i = 0; i < 3; i++) for (i = 0; i < 3; i++)
{ {
CHECK_END (builder); CHECK_END (com);
cx = *com->ptr++; cx = *com->ptr++;
if (!IS_HEX(cx)) break; if (!IS_HEX(cx)) break;
@ -370,7 +381,7 @@ static int getc (comp_t* com)
{ {
qse_char_t cx; qse_char_t cx;
CHECK_END (builder); CHECK_END (com);
cx = *com->ptr++; cx = *com->ptr++;
if (IS_HEX(cx)) if (IS_HEX(cx))
{ {
@ -380,7 +391,7 @@ static int getc (comp_t* com)
for (i = 0; i < 7; i++) for (i = 0; i < 7; i++)
{ {
CHECK_END (builder); CHECK_END (com);
cx = *com->ptr++; cx = *com->ptr++;
if (!IS_HEX(cx)) break; if (!IS_HEX(cx)) break;
@ -397,13 +408,13 @@ static int getc (comp_t* com)
} }
else else
{ {
if (level == LEVEL_TOP) if (level == LEVEL_NORMAL)
{ {
if (com->c.value == QSE_T('[') || if (com->c.value == QSE_T('[') ||
com->c.value == QSE_T('|') || com->c.value == QSE_T('|') ||
com->c.value == QSE_T('^') || com->c.value == QSE_T('^') ||
com->c.value == QSE_T('$') || com->c.value == QSE_T('$') ||
(!(com->option & QSE_REX_BUILD_NOBOUND) && (/*!(com->option & QSE_REX_BUILD_NOBOUND) && TODO:*/
com->c.value == QSE_T('{')) || com->c.value == QSE_T('{')) ||
com->c.value == QSE_T('+') || com->c.value == QSE_T('+') ||
com->c.value == QSE_T('?') || com->c.value == QSE_T('?') ||
@ -446,152 +457,170 @@ static qse_rex_node_t* comp2 (comp_t* c)
{ {
qse_rex_node_t* n; qse_rex_node_t* n;
switch (c->c.value) if (!IS_ESC(c))
{ {
case QSE_T('('): switch (c->c.value)
{ {
qse_rex_node_t* x, * ge; case QSE_T('('):
n = newgroupnode (c, QSE_NULL);
if (n == QSE_NULL) return QSE_NULL;
ge = newgroupendnode (c, n);
if (ge == QSE_NULL)
{ {
// free n /* enter a subgroup */
return QSE_NULL;
}
if (getc(c) <= -1) qse_rex_node_t* x, * ge;
{
// freere (ge);
// freere (n);
return QSE_NULL;
}
c->gdepth++; n = newgroupnode (c, QSE_NULL);
x = comp0 (c, ge); if (n == QSE_NULL) return QSE_NULL;
if (x == QSE_NULL)
{
// freere (ge);
// freere (n);
return QSE_NULL;
}
if (c->c.value != QSE_T(')')) ge = newgroupendnode (c, n);
{ if (ge == QSE_NULL)
{
// free n
return QSE_NULL;
}
if (getc(c,LEVEL_NORMAL) <= -1)
{
// freere (ge);
// freere (n);
return QSE_NULL;
}
c->gdepth++;
x = comp0 (c, ge);
if (x == QSE_NULL)
{
// freere (ge);
// freere (n);
return QSE_NULL;
}
if (!IS_SPE(c,QSE_T(')')))
{
qse_printf (QSE_T("expecting )\n")); qse_printf (QSE_T("expecting )\n"));
// UNBALANCED PAREN. // UNBALANCED PAREN.
// freere (x); // freere (x);
// freere (n); // freere (n);
return QSE_NULL; return QSE_NULL;
}
c->gdepth--;
if (getc(c,LEVEL_NORMAL) <= -1)
{
// freere (x);
// freere (n);
return QSE_NULL;
}
n->u.g.head = x;
break;
} }
c->gdepth--;
if (getc(c) <= -1)
{
// freere (x);
// freere (n);
return QSE_NULL;
}
n->u.g.head = x; case QSE_T('.'):
break; n = newnode (c, QSE_REX_NODE_ANYCHAR);
if (n == QSE_NULL) return QSE_NULL;
if (getc(c,LEVEL_NORMAL) <= -1)
{
// TODO: error handling..
return QSE_NULL;
}
break;
case QSE_T('^'):
n = newnode (c, QSE_REX_NODE_BOL);
if (n == QSE_NULL) return QSE_NULL;
if (getc(c,LEVEL_NORMAL) <= -1)
{
// TODO: error handling..
return QSE_NULL;
}
break;
case QSE_T('$'):
n = newnode (c, QSE_REX_NODE_EOL);
if (n == QSE_NULL) return QSE_NULL;
if (getc(c,LEVEL_NORMAL) <= -1)
{
// TODO: error handling..
return QSE_NULL;
}
break;
/*
case QSE_T('['):
break;
*/
default:
goto normal_char;
}
}
else
{
normal_char:
/* normal character */
n = newcharnode (c, c->c.value);
if (n == QSE_NULL) return QSE_NULL;
if (getc(c,LEVEL_NORMAL) <= -1)
{
// TODO: error handling..
return QSE_NULL;
} }
case QSE_T('.'):
n = newnode (c, QSE_REX_NODE_ANYCHAR);
if (n == QSE_NULL) return QSE_NULL;
if (getc(c) <= -1)
{
// TODO: error handling..
return QSE_NULL;
}
break;
case QSE_T('^'):
n = newnode (c, QSE_REX_NODE_BOL);
if (n == QSE_NULL) return QSE_NULL;
if (getc(c) <= -1)
{
// TODO: error handling..
return QSE_NULL;
}
break;
case QSE_T('$'):
n = newnode (c, QSE_REX_NODE_EOL);
if (n == QSE_NULL) return QSE_NULL;
if (getc(c) <= -1)
{
// TODO: error handling..
return QSE_NULL;
}
break;
/*
case QSE_T('['):
....
*/
default:
/* normal character */
n = newcharnode (c, c->c.value);
if (n == QSE_NULL) return QSE_NULL;
if (getc(c) <= -1)
{
// TODO: error handling..
return QSE_NULL;
}
break;
} }
/* handle the occurrence specifier, if any */ n->occ.min = 1;
switch (c->c) n->occ.max = 1;
if (!IS_ESC(c))
{ {
case QSE_T('?'): /* handle the occurrence specifier, if any */
n->occ.min = 0;
n->occ.max = 1;
if (getc(c) <= -1)
{
// TODO: error handling..
//free n
return QSE_NULL;
}
break;
case QSE_T('*'): switch (c->c.value)
n->occ.min = 0; {
n->occ.max = OCC_MAX; case QSE_T('?'):
if (getc(c) <= -1) n->occ.min = 0;
{ n->occ.max = 1;
// TODO: error handling.. if (getc(c,LEVEL_NORMAL) <= -1)
//free n {
return QSE_NULL; // TODO: error handling..
} //free n
break; return QSE_NULL;
}
break;
case QSE_T('+'): case QSE_T('*'):
n->occ.min = 1; n->occ.min = 0;
n->occ.max = OCC_MAX; n->occ.max = OCC_MAX;
if (getc(c) <= -1) if (getc(c,LEVEL_NORMAL) <= -1)
{ {
// TODO: error handling.. // TODO: error handling..
//free n //free n
return QSE_NULL; return QSE_NULL;
} }
break; break;
/* case QSE_T('+'):
case QSE_T('{'): n->occ.min = 1;
// TODO -------------- n->occ.max = OCC_MAX;
break; if (getc(c,LEVEL_NORMAL) <= -1)
*/ {
// TODO: error handling..
//free n
return QSE_NULL;
}
break;
default: /*
n->occ.min = 1; case QSE_T('{'):
n->occ.max = 1; // TODO:
if (com->rex->option & QSE_REX_BUILD_NOBOUND)
{
}
break;
*/
}
} }
return n; return n;
@ -604,8 +633,8 @@ static qse_rex_node_t* comp1 (comp_t* c, pair_t* pair)
pair->tail = pair->head; pair->tail = pair->head;
while (c->c.value != QSE_T('|') && c->c.value != QSE_CHAR_EOF && while (!IS_SPE(c,QSE_T('|')) && !IS_EOF(c) &&
!(c->gdepth >= 0 && c->c.value == QSE_T(')'))) !(c->gdepth > 0 && IS_SPE(c,QSE_T(')'))))
{ {
qse_rex_node_t* tmp = comp2 (c); qse_rex_node_t* tmp = comp2 (c);
if (tmp == QSE_NULL) if (tmp == QSE_NULL)
@ -630,9 +659,9 @@ static qse_rex_node_t* comp0 (comp_t* c, qse_rex_node_t* ge)
if (left == QSE_NULL) return QSE_NULL; if (left == QSE_NULL) return QSE_NULL;
xpair.tail->next = ge; xpair.tail->next = ge;
while (c->c.value == QSE_T('|')) while (IS_SPE(c,QSE_T('|')))
{ {
if (getc (c) <= -1) if (getc(c,LEVEL_NORMAL) <= -1)
{ {
//freere (left); //freere (left);
return QSE_NULL; return QSE_NULL;
@ -663,12 +692,7 @@ qse_rex_node_t* qse_rex_comp (
qse_rex_t* rex, const qse_char_t* ptr, qse_size_t len) qse_rex_t* rex, const qse_char_t* ptr, qse_size_t len)
{ {
comp_t c; comp_t c;
qse_rex_node_t* end, * body;
if (rex->code != QSE_NULL)
{
freeallnodes (rex->code);
rex->code = QSE_NULL;
}
c.rex = rex; c.rex = rex;
c.re.ptr = ptr; c.re.ptr = ptr;
@ -682,38 +706,30 @@ qse_rex_node_t* qse_rex_comp (
c.gdepth = 0; c.gdepth = 0;
c.start = QSE_NULL; c.start = QSE_NULL;
if (getc(&c) <= -1) return QSE_NULL; /* read the first character */
if (getc(&c,LEVEL_NORMAL) <= -1) return QSE_NULL;
c.start = newstartnode (&c); c.start = newstartnode (&c);
if (c.start != QSE_NULL) if (c.start == QSE_NULL) return QSE_NULL;
end = newendnode (&c);
if (end == QSE_NULL)
{ {
qse_rex_node_t* end; freenode (c.start, c.rex->mmgr);
end = newendnode (&c); return QSE_NULL;
if (end == QSE_NULL)
{
freenode (c.start, c.rex->mmgr);
c.start = QSE_NULL;
}
else
{
qse_rex_node_t* tmp;
/*tmp = comp0 (&c, QSE_NULL);*/
tmp = comp0 (&c, end);
if (tmp == QSE_NULL)
{
/*freenode (c.start, c.rex->mmgr);*/
freeallnodes (c.start);
c.start = QSE_NULL;
}
else
{
qse_printf (QSE_T("start has tmp...\n"));
c.start->next = tmp;
}
}
} }
body = comp0 (&c, end);
if (body == QSE_NULL)
{
freeallnodes (c.start);
return QSE_NULL;
}
c.start->next = body;
if (rex->code != QSE_NULL) freeallnodes (rex->code);
rex->code = c.start; rex->code = c.start;
return rex->code; return rex->code;
} }
@ -854,15 +870,18 @@ static int addcands (
} }
else if (candnode->id == QSE_REX_NODE_BRANCH) else if (candnode->id == QSE_REX_NODE_BRANCH)
{ {
group_t* groupdup; group_t* gx = group;
QSE_ASSERT (candnode->next == QSE_NULL); QSE_ASSERT (candnode->next == QSE_NULL);
groupdup = dupgroups (e, group); if (group != QSE_NULL)
if (groupdup == QSE_NULL) return -1; {
gx = dupgroups (e, group);
if (gx == QSE_NULL) return -1;
}
if (addcands (e, group, prevnode, candnode->u.b.left, mptr) <= -1) return -1; if (addcands (e, group, prevnode, candnode->u.b.left, mptr) <= -1) return -1;
if (addcands (e, groupdup, prevnode, candnode->u.b.right, mptr) <= -1) return -1; if (addcands (e, gx, prevnode, candnode->u.b.right, mptr) <= -1) return -1;
} }
else if (candnode->id == QSE_REX_NODE_GROUP) else if (candnode->id == QSE_REX_NODE_GROUP)
{ {
@ -1050,18 +1069,25 @@ static int exec (exec_t* e)
do do
{ {
/* kind of swap the next set and the current set by swapping indices */ /* swap the pending and active set indices.
* the pending set becomes active after which the match()
* function tries each candidate in it. New candidates
* are added into the pending set which will become active
* later when the loop reaches here again */
int tmp = e->cand.pending; int tmp = e->cand.pending;
e->cand.pending = e->cand.active; e->cand.pending = e->cand.active;
e->cand.active = tmp; e->cand.active = tmp;
/* check if there are any next candidates */
if (QSE_LDA_SIZE(&e->cand.set[e->cand.active]) <= 0) if (QSE_LDA_SIZE(&e->cand.set[e->cand.active]) <= 0)
{ {
/* if no more candidates, break */ /* we can't go on with no candidates in the
* active set. */
break; break;
} }
/* clear the pending set */
qse_lda_clear (&e->cand.set[e->cand.pending]);
{ {
int i; int i;
qse_printf (QSE_T("SET=")); qse_printf (QSE_T("SET="));
@ -1082,9 +1108,6 @@ for (i = 0; i < QSE_LDA_SIZE(&e->cand.set[e->cand.active]); i++)
qse_printf (QSE_T("\n")); qse_printf (QSE_T("\n"));
} }
/* clear the array to hold the next candidates */
qse_lda_clear (&e->cand.set[e->cand.pending]);
if (match (e) <= -1) return -1; if (match (e) <= -1) return -1;
} }
while (1); while (1);