added simple optmizations to rex1.c

This commit is contained in:
hyung-hwan 2009-12-09 07:15:00 +00:00
parent c7d45b6099
commit b5dd6fa606
2 changed files with 153 additions and 40 deletions

View File

@ -1,5 +1,5 @@
/*
* $Id: rex.h 309 2009-11-27 13:46:49Z hyunghwan.chung $
* $Id: rex.h 310 2009-12-08 13:15:00Z hyunghwan.chung $
*
Copyright 2006-2009 Chung, Hyung-Hwan.
This file is part of QSE.
@ -154,6 +154,7 @@ struct qse_rex_node_t
struct
{
qse_rex_node_t* head;
qse_rex_node_t* end;
} g;
struct

View File

@ -270,11 +270,9 @@ static qse_rex_node_t* newnopnode (comp_t* c)
return newnode (c, QSE_REX_NODE_NOP);
}
static qse_rex_node_t* newgroupnode (comp_t* c, qse_rex_node_t* head)
static qse_rex_node_t* newgroupnode (comp_t* c)
{
qse_rex_node_t* n = newnode (c, QSE_REX_NODE_GROUP);
if (n != QSE_NULL) n->u.g.head = head;
return n;
return newnode (c, QSE_REX_NODE_GROUP);
}
static qse_rex_node_t* newgroupendnode (comp_t* c, qse_rex_node_t* group)
@ -740,7 +738,7 @@ static qse_rex_node_t* comp2 (comp_t* com)
qse_rex_node_t* x, * ge;
n = newgroupnode (com, QSE_NULL);
n = newgroupnode (com);
if (n == QSE_NULL) return QSE_NULL;
ge = newgroupendnode (com, n);
@ -749,6 +747,9 @@ static qse_rex_node_t* comp2 (comp_t* com)
if (getc_esc(com) <= -1) return QSE_NULL;
com->gdepth++;
/* pass the GROUPEND node so that the
* last node in the subgroup links to
* this GROUPEND node. */
x = comp0 (com, ge);
if (x == QSE_NULL) return QSE_NULL;
@ -762,6 +763,7 @@ static qse_rex_node_t* comp2 (comp_t* com)
if (getc_esc(com) <= -1) return QSE_NULL;
n->u.g.head = x;
n->u.g.end = ge;
break;
}
@ -845,11 +847,18 @@ static qse_rex_node_t* comp2 (comp_t* com)
return n;
}
/* compile a list of atoms at the outermost level and/or
* within a subgroup */
static qse_rex_node_t* comp1 (comp_t* c, pair_t* pair)
{
pair->head = newnopnode (c);
if (pair->head == QSE_NULL) return QSE_NULL;
#ifdef DONOT_SKIP_NOP
pair->head->occ.min = 1;
pair->head->occ.max = 1;
#endif
pair->tail = pair->head;
while (!IS_SPE(c,QSE_T('|')) && !IS_EOF(c) &&
@ -858,6 +867,18 @@ static qse_rex_node_t* comp1 (comp_t* c, pair_t* pair)
qse_rex_node_t* tmp = comp2 (c);
if (tmp == QSE_NULL) return QSE_NULL;
if (tmp->id == QSE_REX_NODE_GROUP)
{
/* simple optimization to remove an empty group */
qse_rex_node_t* gg = tmp->u.g.head;
while (gg->id == QSE_REX_NODE_NOP) gg = gg->next;
if (gg->id == QSE_REX_NODE_GROUPEND)
{
/* exclude an empty subgroup */
continue;
}
}
pair->tail->next = tmp;
pair->tail = tmp;
}
@ -1160,6 +1181,7 @@ static int addsimplecand (
qse_size_t occ, const qse_char_t* mptr)
{
QSE_ASSERT (
node->id == QSE_REX_NODE_NOP ||
node->id == QSE_REX_NODE_BOL ||
node->id == QSE_REX_NODE_EOL ||
node->id == QSE_REX_NODE_ANY ||
@ -1177,12 +1199,21 @@ static int addsimplecand (
/*if (node->id == QSE_REX_NODE_CHAR)
qse_printf (QSE_T("adding %d %c\n"), node->id, node->u.c);
else
qse_printf (QSE_T("adding %d NA\n"), node->id);*/
qse_printf (QSE_T("adding %d NA\n"), node->id);
*/
if (qse_lda_search (
&e->cand.set[e->cand.pending],
0,
&cand, 1) != QSE_LDA_NIL)
{
return 0;
}
if (qse_lda_insert (
&e->cand.set[e->cand.pending],
QSE_LDA_SIZE(&e->cand.set[e->cand.pending]),
&cand, 1) == (qse_size_t)-1)
&cand, 1) == QSE_LDA_NIL)
{
e->rex->errnum = QSE_REX_ENOMEM;
return -1;
@ -1193,13 +1224,22 @@ qse_printf (QSE_T("adding %d NA\n"), node->id);*/
return 0;
}
/* addcands() function add a candicate from candnode.
* if candnode is not a simple node, it traverses further
* until it reaches a simple node. prevnode is the last
* GROUPEND node visited during traversal. If no GROUPEND
* is visited yet, it can be any starting node */
static int addcands (
exec_t* e, group_t* group, qse_rex_node_t* prevnode,
qse_rex_node_t* candnode, const qse_char_t* mptr)
{
warpback:
#ifndef DONOT_SKIP_NOP
/* skip all NOP nodes */
while (candnode != QSE_NULL && candnode->id == QSE_REX_NODE_NOP)
candnode = candnode->next;
#endif
/* nothing to add */
if (candnode == QSE_NULL) return 0;
@ -1252,24 +1292,32 @@ static int addcands (
case QSE_REX_NODE_GROUP:
{
int n;
qse_rex_node_t* front;
if (candnode->occ.min <= 0)
/*qse_printf (QSE_T("GROUP %p PREV %p\n"), candnode, prevnode);*/
/* skip all NOP nodes */
front = candnode->u.g.head;
while (front->id == QSE_REX_NODE_NOP)
front = front->next;
if (front->id == QSE_REX_NODE_GROUPEND)
{
/* if the group node is optional,
* add the next node to the candidate array. */
refupgroupstack (group);
n = addcands (e, group,
prevnode, candnode->next, mptr);
refdowngroupstack (group, e->rex->mmgr);
if (n <= -1) return -1;
/* if GROUPEND is reached, the group
* is empty. jump to the next node
* regardless of its occurrence.
* however, this will never be reached
* as it has been removed in comp() */
candnode = candnode->next;
goto warpback;
}
if (candnode->occ.max > 0)
{
/* add the first node in a subgroup
* as a candidate */
group_t* gx;
/* push the current group node (candnode) to
* the group stack. if candnode->next is
* added to the candidate array, which means
@ -1285,12 +1333,28 @@ static int addcands (
/* add the first node in the group to
* the candidate array */
refupgroupstack (gx);
n = addcands (e, gx,
candnode, candnode->u.g.head, mptr);
n = addcands (e, gx, prevnode, front, mptr);
refdowngroupstack (gx, e->rex->mmgr);
if (n <= -1) return -1;
}
if (candnode->occ.min <= 0)
{
/* if the group node is optional,
* add the next node to the candidate array. */
/* BEGIN avoid recursion */
#if 0
refupgroupstack (group);
n = addcands (e, group, prevnode, candnode->next, mptr);
refdowngroupstack (group, e->rex->mmgr);
if (n <= -1) return -1;
#endif
candnode = candnode->next;
goto warpback;
/* END avoid recursion */
}
break;
}
@ -1302,8 +1366,10 @@ static int addcands (
qse_rex_node_t* node;
qse_size_t occ;
QSE_ASSERTX (group != QSE_NULL && group->next != QSE_NULL,
"GROUPEND reached must be paired up with a GROUP");
/*qse_printf (QSE_T("GROUPEND %p PREV %p\n"), candnode, prevnode);*/
QSE_ASSERTX (
group != QSE_NULL && group->next != QSE_NULL,
"GROUPEND must be paired up with GROUP");
if (prevnode == candnode)
{
@ -1323,7 +1389,10 @@ static int addcands (
occ = top->occ;
node = top->node;
QSE_ASSERT (node == candnode->u.ge.group);
QSE_ASSERTX (node == candnode->u.ge.group,
"The GROUP node in the group stack must be the one "
"pairing up with the GROUPEND node."
);
if (occ >= node->occ.min)
{
@ -1366,19 +1435,34 @@ static int addcands (
}
refupgroupstack (gx);
n = addcands (e, gx, candnode, node->next, mptr);
if (prevnode != QSE_NULL && prevnode->id == QSE_REX_NODE_GROUPEND)
{
n = addcands (e, gx, prevnode, node->next, mptr);
}
else
{
n = addcands (e, gx, candnode, node->next, mptr);
}
refdowngroupstack (gx, e->rex->mmgr);
if (n <= -1) return -1;
}
if (occ < node->occ.max)
{
/* need to repeat itself. */
refupgroupstack (group);
n = addcands (e, group, candnode, node->u.g.head, mptr);
refdowngroupstack (group, e->rex->mmgr);
/* repeat itself. */
/* BEGIN avoid recursion */
#if 0
refupgroupstack (group);
n = addcands (e, group, prevnode, node->u.g.head, mptr);
refdowngroupstack (group, e->rex->mmgr);
if (n <= -1) return -1;
#endif
prevnode = candnode;
candnode = node->u.g.head;
goto warpback;
/* END avoid recursion */
}
break;
@ -1388,16 +1472,6 @@ static int addcands (
{
int n;
if (candnode->occ.min <= 0)
{
/* if the node is optional,
* add the next node to the candidate array */
refupgroupstack (group);
n = addcands (e, group, prevnode, candnode->next, mptr);
refdowngroupstack (group, e->rex->mmgr);
if (n <= -1) return -1;
}
if (candnode->occ.max > 0)
{
group_t* gx;
@ -1423,6 +1497,24 @@ static int addcands (
if (n <= -1) return -1;
}
if (candnode->occ.min <= 0)
{
/* if the node is optional,
* add the next node to the candidate array */
/* BEGIN avoid recursion */
#if 0
refupgroupstack (group);
n = addcands (e, group, prevnode, candnode->next, mptr);
refdowngroupstack (group, e->rex->mmgr);
if (n <= -1) return -1;
#endif
candnode = candnode->next;
goto warpback;
/* END avoid recursion */
}
break;
}
}
@ -1507,6 +1599,12 @@ static int match (exec_t* e)
switch (node->id)
{
#ifdef DONOT_SKIP_NOP
case QSE_REX_NODE_NOP:
nmptr = cand->mptr;
break;
#endif
case QSE_REX_NODE_BOL:
if (cand->mptr == e->str.ptr)
{
@ -1568,7 +1666,8 @@ static int match (exec_t* e)
"QSE_REX_NODE_EOL, "
"QSE_REX_NODE_ANY, "
"QSE_REX_NODE_CHAR, "
"QSE_REX_NODE_CSET");
"QSE_REX_NODE_CSET, "
"QSE_REX_NODE_NOP");
break;
}
@ -1705,6 +1804,16 @@ static void refdowngroupstack_incand (qse_lda_t* lda, void* dptr, qse_size_t dle
refdowngroupstack (((cand_t*)dptr)->group, lda->mmgr);
}
static int comp_cand (qse_lda_t* lda,
const void* dptr1, qse_size_t dlen1,
const void* dptr2, qse_size_t dlen2)
{
cand_t* c1 = (cand_t*)dptr1;
cand_t* c2 = (cand_t*)dptr2;
if (c1->node == c2->node) return 0;
return 1;
}
static int init_exec_dds (exec_t* e, qse_mmgr_t* mmgr)
{
/* initializes dynamic data structures */
@ -1729,6 +1838,9 @@ static int init_exec_dds (exec_t* e, qse_mmgr_t* mmgr)
qse_lda_setfreeer (&e->cand.set[0], refdowngroupstack_incand);
qse_lda_setfreeer (&e->cand.set[1], refdowngroupstack_incand);
qse_lda_setcomper (&e->cand.set[0], comp_cand);
qse_lda_setcomper (&e->cand.set[1], comp_cand);
return 0;
}