added simple optmizations to rex1.c

This commit is contained in:
hyung-hwan 2009-12-09 07:15:00 +00:00
parent c7d45b6099
commit b5dd6fa606
2 changed files with 153 additions and 40 deletions

View File

@ -1,5 +1,5 @@
/* /*
* $Id: rex.h 309 2009-11-27 13:46:49Z hyunghwan.chung $ * $Id: rex.h 310 2009-12-08 13:15:00Z hyunghwan.chung $
* *
Copyright 2006-2009 Chung, Hyung-Hwan. Copyright 2006-2009 Chung, Hyung-Hwan.
This file is part of QSE. This file is part of QSE.
@ -154,6 +154,7 @@ struct qse_rex_node_t
struct struct
{ {
qse_rex_node_t* head; qse_rex_node_t* head;
qse_rex_node_t* end;
} g; } g;
struct struct

View File

@ -270,11 +270,9 @@ static qse_rex_node_t* newnopnode (comp_t* c)
return newnode (c, QSE_REX_NODE_NOP); return newnode (c, QSE_REX_NODE_NOP);
} }
static qse_rex_node_t* newgroupnode (comp_t* c, qse_rex_node_t* head) static qse_rex_node_t* newgroupnode (comp_t* c)
{ {
qse_rex_node_t* n = newnode (c, QSE_REX_NODE_GROUP); return newnode (c, QSE_REX_NODE_GROUP);
if (n != QSE_NULL) n->u.g.head = head;
return n;
} }
static qse_rex_node_t* newgroupendnode (comp_t* c, qse_rex_node_t* group) static qse_rex_node_t* newgroupendnode (comp_t* c, qse_rex_node_t* group)
@ -740,7 +738,7 @@ static qse_rex_node_t* comp2 (comp_t* com)
qse_rex_node_t* x, * ge; qse_rex_node_t* x, * ge;
n = newgroupnode (com, QSE_NULL); n = newgroupnode (com);
if (n == QSE_NULL) return QSE_NULL; if (n == QSE_NULL) return QSE_NULL;
ge = newgroupendnode (com, n); ge = newgroupendnode (com, n);
@ -749,6 +747,9 @@ static qse_rex_node_t* comp2 (comp_t* com)
if (getc_esc(com) <= -1) return QSE_NULL; if (getc_esc(com) <= -1) return QSE_NULL;
com->gdepth++; com->gdepth++;
/* pass the GROUPEND node so that the
* last node in the subgroup links to
* this GROUPEND node. */
x = comp0 (com, ge); x = comp0 (com, ge);
if (x == QSE_NULL) return QSE_NULL; if (x == QSE_NULL) return QSE_NULL;
@ -762,6 +763,7 @@ static qse_rex_node_t* comp2 (comp_t* com)
if (getc_esc(com) <= -1) return QSE_NULL; if (getc_esc(com) <= -1) return QSE_NULL;
n->u.g.head = x; n->u.g.head = x;
n->u.g.end = ge;
break; break;
} }
@ -845,11 +847,18 @@ static qse_rex_node_t* comp2 (comp_t* com)
return n; return n;
} }
/* compile a list of atoms at the outermost level and/or
* within a subgroup */
static qse_rex_node_t* comp1 (comp_t* c, pair_t* pair) static qse_rex_node_t* comp1 (comp_t* c, pair_t* pair)
{ {
pair->head = newnopnode (c); pair->head = newnopnode (c);
if (pair->head == QSE_NULL) return QSE_NULL; if (pair->head == QSE_NULL) return QSE_NULL;
#ifdef DONOT_SKIP_NOP
pair->head->occ.min = 1;
pair->head->occ.max = 1;
#endif
pair->tail = pair->head; pair->tail = pair->head;
while (!IS_SPE(c,QSE_T('|')) && !IS_EOF(c) && while (!IS_SPE(c,QSE_T('|')) && !IS_EOF(c) &&
@ -858,6 +867,18 @@ static qse_rex_node_t* comp1 (comp_t* c, pair_t* pair)
qse_rex_node_t* tmp = comp2 (c); qse_rex_node_t* tmp = comp2 (c);
if (tmp == QSE_NULL) return QSE_NULL; if (tmp == QSE_NULL) return QSE_NULL;
if (tmp->id == QSE_REX_NODE_GROUP)
{
/* simple optimization to remove an empty group */
qse_rex_node_t* gg = tmp->u.g.head;
while (gg->id == QSE_REX_NODE_NOP) gg = gg->next;
if (gg->id == QSE_REX_NODE_GROUPEND)
{
/* exclude an empty subgroup */
continue;
}
}
pair->tail->next = tmp; pair->tail->next = tmp;
pair->tail = tmp; pair->tail = tmp;
} }
@ -1160,6 +1181,7 @@ static int addsimplecand (
qse_size_t occ, const qse_char_t* mptr) qse_size_t occ, const qse_char_t* mptr)
{ {
QSE_ASSERT ( QSE_ASSERT (
node->id == QSE_REX_NODE_NOP ||
node->id == QSE_REX_NODE_BOL || node->id == QSE_REX_NODE_BOL ||
node->id == QSE_REX_NODE_EOL || node->id == QSE_REX_NODE_EOL ||
node->id == QSE_REX_NODE_ANY || node->id == QSE_REX_NODE_ANY ||
@ -1177,12 +1199,21 @@ static int addsimplecand (
/*if (node->id == QSE_REX_NODE_CHAR) /*if (node->id == QSE_REX_NODE_CHAR)
qse_printf (QSE_T("adding %d %c\n"), node->id, node->u.c); qse_printf (QSE_T("adding %d %c\n"), node->id, node->u.c);
else else
qse_printf (QSE_T("adding %d NA\n"), node->id);*/ qse_printf (QSE_T("adding %d NA\n"), node->id);
*/
if (qse_lda_search (
&e->cand.set[e->cand.pending],
0,
&cand, 1) != QSE_LDA_NIL)
{
return 0;
}
if (qse_lda_insert ( if (qse_lda_insert (
&e->cand.set[e->cand.pending], &e->cand.set[e->cand.pending],
QSE_LDA_SIZE(&e->cand.set[e->cand.pending]), QSE_LDA_SIZE(&e->cand.set[e->cand.pending]),
&cand, 1) == (qse_size_t)-1) &cand, 1) == QSE_LDA_NIL)
{ {
e->rex->errnum = QSE_REX_ENOMEM; e->rex->errnum = QSE_REX_ENOMEM;
return -1; return -1;
@ -1193,13 +1224,22 @@ qse_printf (QSE_T("adding %d NA\n"), node->id);*/
return 0; return 0;
} }
/* addcands() function add a candicate from candnode.
* if candnode is not a simple node, it traverses further
* until it reaches a simple node. prevnode is the last
* GROUPEND node visited during traversal. If no GROUPEND
* is visited yet, it can be any starting node */
static int addcands ( static int addcands (
exec_t* e, group_t* group, qse_rex_node_t* prevnode, exec_t* e, group_t* group, qse_rex_node_t* prevnode,
qse_rex_node_t* candnode, const qse_char_t* mptr) qse_rex_node_t* candnode, const qse_char_t* mptr)
{ {
warpback:
#ifndef DONOT_SKIP_NOP
/* skip all NOP nodes */ /* skip all NOP nodes */
while (candnode != QSE_NULL && candnode->id == QSE_REX_NODE_NOP) while (candnode != QSE_NULL && candnode->id == QSE_REX_NODE_NOP)
candnode = candnode->next; candnode = candnode->next;
#endif
/* nothing to add */ /* nothing to add */
if (candnode == QSE_NULL) return 0; if (candnode == QSE_NULL) return 0;
@ -1252,22 +1292,30 @@ static int addcands (
case QSE_REX_NODE_GROUP: case QSE_REX_NODE_GROUP:
{ {
int n; int n;
qse_rex_node_t* front;
if (candnode->occ.min <= 0) /*qse_printf (QSE_T("GROUP %p PREV %p\n"), candnode, prevnode);*/
/* skip all NOP nodes */
front = candnode->u.g.head;
while (front->id == QSE_REX_NODE_NOP)
front = front->next;
if (front->id == QSE_REX_NODE_GROUPEND)
{ {
/* if the group node is optional, /* if GROUPEND is reached, the group
* add the next node to the candidate array. */ * is empty. jump to the next node
* regardless of its occurrence.
refupgroupstack (group); * however, this will never be reached
n = addcands (e, group, * as it has been removed in comp() */
prevnode, candnode->next, mptr); candnode = candnode->next;
refdowngroupstack (group, e->rex->mmgr); goto warpback;
if (n <= -1) return -1;
} }
if (candnode->occ.max > 0) if (candnode->occ.max > 0)
{ {
/* add the first node in a subgroup
* as a candidate */
group_t* gx; group_t* gx;
/* push the current group node (candnode) to /* push the current group node (candnode) to
@ -1285,13 +1333,29 @@ static int addcands (
/* add the first node in the group to /* add the first node in the group to
* the candidate array */ * the candidate array */
refupgroupstack (gx); refupgroupstack (gx);
n = addcands (e, gx, n = addcands (e, gx, prevnode, front, mptr);
candnode, candnode->u.g.head, mptr);
refdowngroupstack (gx, e->rex->mmgr); refdowngroupstack (gx, e->rex->mmgr);
if (n <= -1) return -1; if (n <= -1) return -1;
} }
if (candnode->occ.min <= 0)
{
/* if the group node is optional,
* add the next node to the candidate array. */
/* BEGIN avoid recursion */
#if 0
refupgroupstack (group);
n = addcands (e, group, prevnode, candnode->next, mptr);
refdowngroupstack (group, e->rex->mmgr);
if (n <= -1) return -1;
#endif
candnode = candnode->next;
goto warpback;
/* END avoid recursion */
}
break; break;
} }
@ -1302,8 +1366,10 @@ static int addcands (
qse_rex_node_t* node; qse_rex_node_t* node;
qse_size_t occ; qse_size_t occ;
QSE_ASSERTX (group != QSE_NULL && group->next != QSE_NULL, /*qse_printf (QSE_T("GROUPEND %p PREV %p\n"), candnode, prevnode);*/
"GROUPEND reached must be paired up with a GROUP"); QSE_ASSERTX (
group != QSE_NULL && group->next != QSE_NULL,
"GROUPEND must be paired up with GROUP");
if (prevnode == candnode) if (prevnode == candnode)
{ {
@ -1323,7 +1389,10 @@ static int addcands (
occ = top->occ; occ = top->occ;
node = top->node; node = top->node;
QSE_ASSERT (node == candnode->u.ge.group); QSE_ASSERTX (node == candnode->u.ge.group,
"The GROUP node in the group stack must be the one "
"pairing up with the GROUPEND node."
);
if (occ >= node->occ.min) if (occ >= node->occ.min)
{ {
@ -1366,19 +1435,34 @@ static int addcands (
} }
refupgroupstack (gx); refupgroupstack (gx);
n = addcands (e, gx, candnode, node->next, mptr); if (prevnode != QSE_NULL && prevnode->id == QSE_REX_NODE_GROUPEND)
{
n = addcands (e, gx, prevnode, node->next, mptr);
}
else
{
n = addcands (e, gx, candnode, node->next, mptr);
}
refdowngroupstack (gx, e->rex->mmgr); refdowngroupstack (gx, e->rex->mmgr);
if (n <= -1) return -1; if (n <= -1) return -1;
} }
if (occ < node->occ.max) if (occ < node->occ.max)
{ {
/* need to repeat itself. */ /* repeat itself. */
refupgroupstack (group);
n = addcands (e, group, candnode, node->u.g.head, mptr);
refdowngroupstack (group, e->rex->mmgr);
/* BEGIN avoid recursion */
#if 0
refupgroupstack (group);
n = addcands (e, group, prevnode, node->u.g.head, mptr);
refdowngroupstack (group, e->rex->mmgr);
if (n <= -1) return -1; if (n <= -1) return -1;
#endif
prevnode = candnode;
candnode = node->u.g.head;
goto warpback;
/* END avoid recursion */
} }
break; break;
@ -1388,16 +1472,6 @@ static int addcands (
{ {
int n; int n;
if (candnode->occ.min <= 0)
{
/* if the node is optional,
* add the next node to the candidate array */
refupgroupstack (group);
n = addcands (e, group, prevnode, candnode->next, mptr);
refdowngroupstack (group, e->rex->mmgr);
if (n <= -1) return -1;
}
if (candnode->occ.max > 0) if (candnode->occ.max > 0)
{ {
group_t* gx; group_t* gx;
@ -1423,6 +1497,24 @@ static int addcands (
if (n <= -1) return -1; if (n <= -1) return -1;
} }
if (candnode->occ.min <= 0)
{
/* if the node is optional,
* add the next node to the candidate array */
/* BEGIN avoid recursion */
#if 0
refupgroupstack (group);
n = addcands (e, group, prevnode, candnode->next, mptr);
refdowngroupstack (group, e->rex->mmgr);
if (n <= -1) return -1;
#endif
candnode = candnode->next;
goto warpback;
/* END avoid recursion */
}
break; break;
} }
} }
@ -1507,6 +1599,12 @@ static int match (exec_t* e)
switch (node->id) switch (node->id)
{ {
#ifdef DONOT_SKIP_NOP
case QSE_REX_NODE_NOP:
nmptr = cand->mptr;
break;
#endif
case QSE_REX_NODE_BOL: case QSE_REX_NODE_BOL:
if (cand->mptr == e->str.ptr) if (cand->mptr == e->str.ptr)
{ {
@ -1568,7 +1666,8 @@ static int match (exec_t* e)
"QSE_REX_NODE_EOL, " "QSE_REX_NODE_EOL, "
"QSE_REX_NODE_ANY, " "QSE_REX_NODE_ANY, "
"QSE_REX_NODE_CHAR, " "QSE_REX_NODE_CHAR, "
"QSE_REX_NODE_CSET"); "QSE_REX_NODE_CSET, "
"QSE_REX_NODE_NOP");
break; break;
} }
@ -1705,6 +1804,16 @@ static void refdowngroupstack_incand (qse_lda_t* lda, void* dptr, qse_size_t dle
refdowngroupstack (((cand_t*)dptr)->group, lda->mmgr); refdowngroupstack (((cand_t*)dptr)->group, lda->mmgr);
} }
static int comp_cand (qse_lda_t* lda,
const void* dptr1, qse_size_t dlen1,
const void* dptr2, qse_size_t dlen2)
{
cand_t* c1 = (cand_t*)dptr1;
cand_t* c2 = (cand_t*)dptr2;
if (c1->node == c2->node) return 0;
return 1;
}
static int init_exec_dds (exec_t* e, qse_mmgr_t* mmgr) static int init_exec_dds (exec_t* e, qse_mmgr_t* mmgr)
{ {
/* initializes dynamic data structures */ /* initializes dynamic data structures */
@ -1729,6 +1838,9 @@ static int init_exec_dds (exec_t* e, qse_mmgr_t* mmgr)
qse_lda_setfreeer (&e->cand.set[0], refdowngroupstack_incand); qse_lda_setfreeer (&e->cand.set[0], refdowngroupstack_incand);
qse_lda_setfreeer (&e->cand.set[1], refdowngroupstack_incand); qse_lda_setfreeer (&e->cand.set[1], refdowngroupstack_incand);
qse_lda_setcomper (&e->cand.set[0], comp_cand);
qse_lda_setcomper (&e->cand.set[1], comp_cand);
return 0; return 0;
} }