added simple optmizations to rex1.c
This commit is contained in:
parent
c7d45b6099
commit
b5dd6fa606
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* $Id: rex.h 309 2009-11-27 13:46:49Z hyunghwan.chung $
|
||||
* $Id: rex.h 310 2009-12-08 13:15:00Z hyunghwan.chung $
|
||||
*
|
||||
Copyright 2006-2009 Chung, Hyung-Hwan.
|
||||
This file is part of QSE.
|
||||
@ -154,6 +154,7 @@ struct qse_rex_node_t
|
||||
struct
|
||||
{
|
||||
qse_rex_node_t* head;
|
||||
qse_rex_node_t* end;
|
||||
} g;
|
||||
|
||||
struct
|
||||
|
@ -270,11 +270,9 @@ static qse_rex_node_t* newnopnode (comp_t* c)
|
||||
return newnode (c, QSE_REX_NODE_NOP);
|
||||
}
|
||||
|
||||
static qse_rex_node_t* newgroupnode (comp_t* c, qse_rex_node_t* head)
|
||||
static qse_rex_node_t* newgroupnode (comp_t* c)
|
||||
{
|
||||
qse_rex_node_t* n = newnode (c, QSE_REX_NODE_GROUP);
|
||||
if (n != QSE_NULL) n->u.g.head = head;
|
||||
return n;
|
||||
return newnode (c, QSE_REX_NODE_GROUP);
|
||||
}
|
||||
|
||||
static qse_rex_node_t* newgroupendnode (comp_t* c, qse_rex_node_t* group)
|
||||
@ -740,7 +738,7 @@ static qse_rex_node_t* comp2 (comp_t* com)
|
||||
|
||||
qse_rex_node_t* x, * ge;
|
||||
|
||||
n = newgroupnode (com, QSE_NULL);
|
||||
n = newgroupnode (com);
|
||||
if (n == QSE_NULL) return QSE_NULL;
|
||||
|
||||
ge = newgroupendnode (com, n);
|
||||
@ -749,6 +747,9 @@ static qse_rex_node_t* comp2 (comp_t* com)
|
||||
if (getc_esc(com) <= -1) return QSE_NULL;
|
||||
|
||||
com->gdepth++;
|
||||
/* pass the GROUPEND node so that the
|
||||
* last node in the subgroup links to
|
||||
* this GROUPEND node. */
|
||||
x = comp0 (com, ge);
|
||||
if (x == QSE_NULL) return QSE_NULL;
|
||||
|
||||
@ -762,6 +763,7 @@ static qse_rex_node_t* comp2 (comp_t* com)
|
||||
if (getc_esc(com) <= -1) return QSE_NULL;
|
||||
|
||||
n->u.g.head = x;
|
||||
n->u.g.end = ge;
|
||||
break;
|
||||
}
|
||||
|
||||
@ -845,11 +847,18 @@ static qse_rex_node_t* comp2 (comp_t* com)
|
||||
return n;
|
||||
}
|
||||
|
||||
/* compile a list of atoms at the outermost level and/or
|
||||
* within a subgroup */
|
||||
static qse_rex_node_t* comp1 (comp_t* c, pair_t* pair)
|
||||
{
|
||||
pair->head = newnopnode (c);
|
||||
if (pair->head == QSE_NULL) return QSE_NULL;
|
||||
|
||||
#ifdef DONOT_SKIP_NOP
|
||||
pair->head->occ.min = 1;
|
||||
pair->head->occ.max = 1;
|
||||
#endif
|
||||
|
||||
pair->tail = pair->head;
|
||||
|
||||
while (!IS_SPE(c,QSE_T('|')) && !IS_EOF(c) &&
|
||||
@ -858,6 +867,18 @@ static qse_rex_node_t* comp1 (comp_t* c, pair_t* pair)
|
||||
qse_rex_node_t* tmp = comp2 (c);
|
||||
if (tmp == QSE_NULL) return QSE_NULL;
|
||||
|
||||
if (tmp->id == QSE_REX_NODE_GROUP)
|
||||
{
|
||||
/* simple optimization to remove an empty group */
|
||||
qse_rex_node_t* gg = tmp->u.g.head;
|
||||
while (gg->id == QSE_REX_NODE_NOP) gg = gg->next;
|
||||
if (gg->id == QSE_REX_NODE_GROUPEND)
|
||||
{
|
||||
/* exclude an empty subgroup */
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
pair->tail->next = tmp;
|
||||
pair->tail = tmp;
|
||||
}
|
||||
@ -1160,6 +1181,7 @@ static int addsimplecand (
|
||||
qse_size_t occ, const qse_char_t* mptr)
|
||||
{
|
||||
QSE_ASSERT (
|
||||
node->id == QSE_REX_NODE_NOP ||
|
||||
node->id == QSE_REX_NODE_BOL ||
|
||||
node->id == QSE_REX_NODE_EOL ||
|
||||
node->id == QSE_REX_NODE_ANY ||
|
||||
@ -1177,12 +1199,21 @@ static int addsimplecand (
|
||||
/*if (node->id == QSE_REX_NODE_CHAR)
|
||||
qse_printf (QSE_T("adding %d %c\n"), node->id, node->u.c);
|
||||
else
|
||||
qse_printf (QSE_T("adding %d NA\n"), node->id);*/
|
||||
qse_printf (QSE_T("adding %d NA\n"), node->id);
|
||||
*/
|
||||
|
||||
if (qse_lda_search (
|
||||
&e->cand.set[e->cand.pending],
|
||||
0,
|
||||
&cand, 1) != QSE_LDA_NIL)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (qse_lda_insert (
|
||||
&e->cand.set[e->cand.pending],
|
||||
QSE_LDA_SIZE(&e->cand.set[e->cand.pending]),
|
||||
&cand, 1) == (qse_size_t)-1)
|
||||
&cand, 1) == QSE_LDA_NIL)
|
||||
{
|
||||
e->rex->errnum = QSE_REX_ENOMEM;
|
||||
return -1;
|
||||
@ -1193,13 +1224,22 @@ qse_printf (QSE_T("adding %d NA\n"), node->id);*/
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* addcands() function add a candicate from candnode.
|
||||
* if candnode is not a simple node, it traverses further
|
||||
* until it reaches a simple node. prevnode is the last
|
||||
* GROUPEND node visited during traversal. If no GROUPEND
|
||||
* is visited yet, it can be any starting node */
|
||||
static int addcands (
|
||||
exec_t* e, group_t* group, qse_rex_node_t* prevnode,
|
||||
qse_rex_node_t* candnode, const qse_char_t* mptr)
|
||||
{
|
||||
warpback:
|
||||
|
||||
#ifndef DONOT_SKIP_NOP
|
||||
/* skip all NOP nodes */
|
||||
while (candnode != QSE_NULL && candnode->id == QSE_REX_NODE_NOP)
|
||||
candnode = candnode->next;
|
||||
#endif
|
||||
|
||||
/* nothing to add */
|
||||
if (candnode == QSE_NULL) return 0;
|
||||
@ -1252,22 +1292,30 @@ static int addcands (
|
||||
case QSE_REX_NODE_GROUP:
|
||||
{
|
||||
int n;
|
||||
qse_rex_node_t* front;
|
||||
|
||||
if (candnode->occ.min <= 0)
|
||||
/*qse_printf (QSE_T("GROUP %p PREV %p\n"), candnode, prevnode);*/
|
||||
|
||||
/* skip all NOP nodes */
|
||||
front = candnode->u.g.head;
|
||||
while (front->id == QSE_REX_NODE_NOP)
|
||||
front = front->next;
|
||||
if (front->id == QSE_REX_NODE_GROUPEND)
|
||||
{
|
||||
/* if the group node is optional,
|
||||
* add the next node to the candidate array. */
|
||||
|
||||
refupgroupstack (group);
|
||||
n = addcands (e, group,
|
||||
prevnode, candnode->next, mptr);
|
||||
refdowngroupstack (group, e->rex->mmgr);
|
||||
|
||||
if (n <= -1) return -1;
|
||||
/* if GROUPEND is reached, the group
|
||||
* is empty. jump to the next node
|
||||
* regardless of its occurrence.
|
||||
* however, this will never be reached
|
||||
* as it has been removed in comp() */
|
||||
candnode = candnode->next;
|
||||
goto warpback;
|
||||
}
|
||||
|
||||
if (candnode->occ.max > 0)
|
||||
{
|
||||
/* add the first node in a subgroup
|
||||
* as a candidate */
|
||||
|
||||
group_t* gx;
|
||||
|
||||
/* push the current group node (candnode) to
|
||||
@ -1285,13 +1333,29 @@ static int addcands (
|
||||
/* add the first node in the group to
|
||||
* the candidate array */
|
||||
refupgroupstack (gx);
|
||||
n = addcands (e, gx,
|
||||
candnode, candnode->u.g.head, mptr);
|
||||
n = addcands (e, gx, prevnode, front, mptr);
|
||||
refdowngroupstack (gx, e->rex->mmgr);
|
||||
|
||||
if (n <= -1) return -1;
|
||||
}
|
||||
|
||||
if (candnode->occ.min <= 0)
|
||||
{
|
||||
/* if the group node is optional,
|
||||
* add the next node to the candidate array. */
|
||||
|
||||
/* BEGIN avoid recursion */
|
||||
#if 0
|
||||
refupgroupstack (group);
|
||||
n = addcands (e, group, prevnode, candnode->next, mptr);
|
||||
refdowngroupstack (group, e->rex->mmgr);
|
||||
if (n <= -1) return -1;
|
||||
#endif
|
||||
candnode = candnode->next;
|
||||
goto warpback;
|
||||
/* END avoid recursion */
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
@ -1302,8 +1366,10 @@ static int addcands (
|
||||
qse_rex_node_t* node;
|
||||
qse_size_t occ;
|
||||
|
||||
QSE_ASSERTX (group != QSE_NULL && group->next != QSE_NULL,
|
||||
"GROUPEND reached must be paired up with a GROUP");
|
||||
/*qse_printf (QSE_T("GROUPEND %p PREV %p\n"), candnode, prevnode);*/
|
||||
QSE_ASSERTX (
|
||||
group != QSE_NULL && group->next != QSE_NULL,
|
||||
"GROUPEND must be paired up with GROUP");
|
||||
|
||||
if (prevnode == candnode)
|
||||
{
|
||||
@ -1323,7 +1389,10 @@ static int addcands (
|
||||
|
||||
occ = top->occ;
|
||||
node = top->node;
|
||||
QSE_ASSERT (node == candnode->u.ge.group);
|
||||
QSE_ASSERTX (node == candnode->u.ge.group,
|
||||
"The GROUP node in the group stack must be the one "
|
||||
"pairing up with the GROUPEND node."
|
||||
);
|
||||
|
||||
if (occ >= node->occ.min)
|
||||
{
|
||||
@ -1366,19 +1435,34 @@ static int addcands (
|
||||
}
|
||||
|
||||
refupgroupstack (gx);
|
||||
if (prevnode != QSE_NULL && prevnode->id == QSE_REX_NODE_GROUPEND)
|
||||
{
|
||||
n = addcands (e, gx, prevnode, node->next, mptr);
|
||||
}
|
||||
else
|
||||
{
|
||||
n = addcands (e, gx, candnode, node->next, mptr);
|
||||
}
|
||||
refdowngroupstack (gx, e->rex->mmgr);
|
||||
if (n <= -1) return -1;
|
||||
}
|
||||
|
||||
if (occ < node->occ.max)
|
||||
{
|
||||
/* need to repeat itself. */
|
||||
refupgroupstack (group);
|
||||
n = addcands (e, group, candnode, node->u.g.head, mptr);
|
||||
refdowngroupstack (group, e->rex->mmgr);
|
||||
/* repeat itself. */
|
||||
|
||||
/* BEGIN avoid recursion */
|
||||
#if 0
|
||||
refupgroupstack (group);
|
||||
n = addcands (e, group, prevnode, node->u.g.head, mptr);
|
||||
refdowngroupstack (group, e->rex->mmgr);
|
||||
if (n <= -1) return -1;
|
||||
#endif
|
||||
|
||||
prevnode = candnode;
|
||||
candnode = node->u.g.head;
|
||||
goto warpback;
|
||||
/* END avoid recursion */
|
||||
}
|
||||
|
||||
break;
|
||||
@ -1388,16 +1472,6 @@ static int addcands (
|
||||
{
|
||||
int n;
|
||||
|
||||
if (candnode->occ.min <= 0)
|
||||
{
|
||||
/* if the node is optional,
|
||||
* add the next node to the candidate array */
|
||||
refupgroupstack (group);
|
||||
n = addcands (e, group, prevnode, candnode->next, mptr);
|
||||
refdowngroupstack (group, e->rex->mmgr);
|
||||
if (n <= -1) return -1;
|
||||
}
|
||||
|
||||
if (candnode->occ.max > 0)
|
||||
{
|
||||
group_t* gx;
|
||||
@ -1423,6 +1497,24 @@ static int addcands (
|
||||
if (n <= -1) return -1;
|
||||
}
|
||||
|
||||
if (candnode->occ.min <= 0)
|
||||
{
|
||||
/* if the node is optional,
|
||||
* add the next node to the candidate array */
|
||||
|
||||
/* BEGIN avoid recursion */
|
||||
#if 0
|
||||
refupgroupstack (group);
|
||||
n = addcands (e, group, prevnode, candnode->next, mptr);
|
||||
refdowngroupstack (group, e->rex->mmgr);
|
||||
if (n <= -1) return -1;
|
||||
#endif
|
||||
candnode = candnode->next;
|
||||
goto warpback;
|
||||
/* END avoid recursion */
|
||||
}
|
||||
|
||||
|
||||
break;
|
||||
}
|
||||
}
|
||||
@ -1507,6 +1599,12 @@ static int match (exec_t* e)
|
||||
|
||||
switch (node->id)
|
||||
{
|
||||
#ifdef DONOT_SKIP_NOP
|
||||
case QSE_REX_NODE_NOP:
|
||||
nmptr = cand->mptr;
|
||||
break;
|
||||
#endif
|
||||
|
||||
case QSE_REX_NODE_BOL:
|
||||
if (cand->mptr == e->str.ptr)
|
||||
{
|
||||
@ -1568,7 +1666,8 @@ static int match (exec_t* e)
|
||||
"QSE_REX_NODE_EOL, "
|
||||
"QSE_REX_NODE_ANY, "
|
||||
"QSE_REX_NODE_CHAR, "
|
||||
"QSE_REX_NODE_CSET");
|
||||
"QSE_REX_NODE_CSET, "
|
||||
"QSE_REX_NODE_NOP");
|
||||
|
||||
break;
|
||||
}
|
||||
@ -1705,6 +1804,16 @@ static void refdowngroupstack_incand (qse_lda_t* lda, void* dptr, qse_size_t dle
|
||||
refdowngroupstack (((cand_t*)dptr)->group, lda->mmgr);
|
||||
}
|
||||
|
||||
static int comp_cand (qse_lda_t* lda,
|
||||
const void* dptr1, qse_size_t dlen1,
|
||||
const void* dptr2, qse_size_t dlen2)
|
||||
{
|
||||
cand_t* c1 = (cand_t*)dptr1;
|
||||
cand_t* c2 = (cand_t*)dptr2;
|
||||
if (c1->node == c2->node) return 0;
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int init_exec_dds (exec_t* e, qse_mmgr_t* mmgr)
|
||||
{
|
||||
/* initializes dynamic data structures */
|
||||
@ -1729,6 +1838,9 @@ static int init_exec_dds (exec_t* e, qse_mmgr_t* mmgr)
|
||||
qse_lda_setfreeer (&e->cand.set[0], refdowngroupstack_incand);
|
||||
qse_lda_setfreeer (&e->cand.set[1], refdowngroupstack_incand);
|
||||
|
||||
qse_lda_setcomper (&e->cand.set[0], comp_cand);
|
||||
qse_lda_setcomper (&e->cand.set[1], comp_cand);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user