added simple optmizations to rex1.c
This commit is contained in:
parent
c7d45b6099
commit
b5dd6fa606
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* $Id: rex.h 309 2009-11-27 13:46:49Z hyunghwan.chung $
|
* $Id: rex.h 310 2009-12-08 13:15:00Z hyunghwan.chung $
|
||||||
*
|
*
|
||||||
Copyright 2006-2009 Chung, Hyung-Hwan.
|
Copyright 2006-2009 Chung, Hyung-Hwan.
|
||||||
This file is part of QSE.
|
This file is part of QSE.
|
||||||
@ -154,6 +154,7 @@ struct qse_rex_node_t
|
|||||||
struct
|
struct
|
||||||
{
|
{
|
||||||
qse_rex_node_t* head;
|
qse_rex_node_t* head;
|
||||||
|
qse_rex_node_t* end;
|
||||||
} g;
|
} g;
|
||||||
|
|
||||||
struct
|
struct
|
||||||
|
@ -270,11 +270,9 @@ static qse_rex_node_t* newnopnode (comp_t* c)
|
|||||||
return newnode (c, QSE_REX_NODE_NOP);
|
return newnode (c, QSE_REX_NODE_NOP);
|
||||||
}
|
}
|
||||||
|
|
||||||
static qse_rex_node_t* newgroupnode (comp_t* c, qse_rex_node_t* head)
|
static qse_rex_node_t* newgroupnode (comp_t* c)
|
||||||
{
|
{
|
||||||
qse_rex_node_t* n = newnode (c, QSE_REX_NODE_GROUP);
|
return newnode (c, QSE_REX_NODE_GROUP);
|
||||||
if (n != QSE_NULL) n->u.g.head = head;
|
|
||||||
return n;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static qse_rex_node_t* newgroupendnode (comp_t* c, qse_rex_node_t* group)
|
static qse_rex_node_t* newgroupendnode (comp_t* c, qse_rex_node_t* group)
|
||||||
@ -740,7 +738,7 @@ static qse_rex_node_t* comp2 (comp_t* com)
|
|||||||
|
|
||||||
qse_rex_node_t* x, * ge;
|
qse_rex_node_t* x, * ge;
|
||||||
|
|
||||||
n = newgroupnode (com, QSE_NULL);
|
n = newgroupnode (com);
|
||||||
if (n == QSE_NULL) return QSE_NULL;
|
if (n == QSE_NULL) return QSE_NULL;
|
||||||
|
|
||||||
ge = newgroupendnode (com, n);
|
ge = newgroupendnode (com, n);
|
||||||
@ -749,6 +747,9 @@ static qse_rex_node_t* comp2 (comp_t* com)
|
|||||||
if (getc_esc(com) <= -1) return QSE_NULL;
|
if (getc_esc(com) <= -1) return QSE_NULL;
|
||||||
|
|
||||||
com->gdepth++;
|
com->gdepth++;
|
||||||
|
/* pass the GROUPEND node so that the
|
||||||
|
* last node in the subgroup links to
|
||||||
|
* this GROUPEND node. */
|
||||||
x = comp0 (com, ge);
|
x = comp0 (com, ge);
|
||||||
if (x == QSE_NULL) return QSE_NULL;
|
if (x == QSE_NULL) return QSE_NULL;
|
||||||
|
|
||||||
@ -762,6 +763,7 @@ static qse_rex_node_t* comp2 (comp_t* com)
|
|||||||
if (getc_esc(com) <= -1) return QSE_NULL;
|
if (getc_esc(com) <= -1) return QSE_NULL;
|
||||||
|
|
||||||
n->u.g.head = x;
|
n->u.g.head = x;
|
||||||
|
n->u.g.end = ge;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -845,11 +847,18 @@ static qse_rex_node_t* comp2 (comp_t* com)
|
|||||||
return n;
|
return n;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* compile a list of atoms at the outermost level and/or
|
||||||
|
* within a subgroup */
|
||||||
static qse_rex_node_t* comp1 (comp_t* c, pair_t* pair)
|
static qse_rex_node_t* comp1 (comp_t* c, pair_t* pair)
|
||||||
{
|
{
|
||||||
pair->head = newnopnode (c);
|
pair->head = newnopnode (c);
|
||||||
if (pair->head == QSE_NULL) return QSE_NULL;
|
if (pair->head == QSE_NULL) return QSE_NULL;
|
||||||
|
|
||||||
|
#ifdef DONOT_SKIP_NOP
|
||||||
|
pair->head->occ.min = 1;
|
||||||
|
pair->head->occ.max = 1;
|
||||||
|
#endif
|
||||||
|
|
||||||
pair->tail = pair->head;
|
pair->tail = pair->head;
|
||||||
|
|
||||||
while (!IS_SPE(c,QSE_T('|')) && !IS_EOF(c) &&
|
while (!IS_SPE(c,QSE_T('|')) && !IS_EOF(c) &&
|
||||||
@ -858,6 +867,18 @@ static qse_rex_node_t* comp1 (comp_t* c, pair_t* pair)
|
|||||||
qse_rex_node_t* tmp = comp2 (c);
|
qse_rex_node_t* tmp = comp2 (c);
|
||||||
if (tmp == QSE_NULL) return QSE_NULL;
|
if (tmp == QSE_NULL) return QSE_NULL;
|
||||||
|
|
||||||
|
if (tmp->id == QSE_REX_NODE_GROUP)
|
||||||
|
{
|
||||||
|
/* simple optimization to remove an empty group */
|
||||||
|
qse_rex_node_t* gg = tmp->u.g.head;
|
||||||
|
while (gg->id == QSE_REX_NODE_NOP) gg = gg->next;
|
||||||
|
if (gg->id == QSE_REX_NODE_GROUPEND)
|
||||||
|
{
|
||||||
|
/* exclude an empty subgroup */
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
pair->tail->next = tmp;
|
pair->tail->next = tmp;
|
||||||
pair->tail = tmp;
|
pair->tail = tmp;
|
||||||
}
|
}
|
||||||
@ -1160,6 +1181,7 @@ static int addsimplecand (
|
|||||||
qse_size_t occ, const qse_char_t* mptr)
|
qse_size_t occ, const qse_char_t* mptr)
|
||||||
{
|
{
|
||||||
QSE_ASSERT (
|
QSE_ASSERT (
|
||||||
|
node->id == QSE_REX_NODE_NOP ||
|
||||||
node->id == QSE_REX_NODE_BOL ||
|
node->id == QSE_REX_NODE_BOL ||
|
||||||
node->id == QSE_REX_NODE_EOL ||
|
node->id == QSE_REX_NODE_EOL ||
|
||||||
node->id == QSE_REX_NODE_ANY ||
|
node->id == QSE_REX_NODE_ANY ||
|
||||||
@ -1177,12 +1199,21 @@ static int addsimplecand (
|
|||||||
/*if (node->id == QSE_REX_NODE_CHAR)
|
/*if (node->id == QSE_REX_NODE_CHAR)
|
||||||
qse_printf (QSE_T("adding %d %c\n"), node->id, node->u.c);
|
qse_printf (QSE_T("adding %d %c\n"), node->id, node->u.c);
|
||||||
else
|
else
|
||||||
qse_printf (QSE_T("adding %d NA\n"), node->id);*/
|
qse_printf (QSE_T("adding %d NA\n"), node->id);
|
||||||
|
*/
|
||||||
|
|
||||||
|
if (qse_lda_search (
|
||||||
|
&e->cand.set[e->cand.pending],
|
||||||
|
0,
|
||||||
|
&cand, 1) != QSE_LDA_NIL)
|
||||||
|
{
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
if (qse_lda_insert (
|
if (qse_lda_insert (
|
||||||
&e->cand.set[e->cand.pending],
|
&e->cand.set[e->cand.pending],
|
||||||
QSE_LDA_SIZE(&e->cand.set[e->cand.pending]),
|
QSE_LDA_SIZE(&e->cand.set[e->cand.pending]),
|
||||||
&cand, 1) == (qse_size_t)-1)
|
&cand, 1) == QSE_LDA_NIL)
|
||||||
{
|
{
|
||||||
e->rex->errnum = QSE_REX_ENOMEM;
|
e->rex->errnum = QSE_REX_ENOMEM;
|
||||||
return -1;
|
return -1;
|
||||||
@ -1193,13 +1224,22 @@ qse_printf (QSE_T("adding %d NA\n"), node->id);*/
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* addcands() function add a candicate from candnode.
|
||||||
|
* if candnode is not a simple node, it traverses further
|
||||||
|
* until it reaches a simple node. prevnode is the last
|
||||||
|
* GROUPEND node visited during traversal. If no GROUPEND
|
||||||
|
* is visited yet, it can be any starting node */
|
||||||
static int addcands (
|
static int addcands (
|
||||||
exec_t* e, group_t* group, qse_rex_node_t* prevnode,
|
exec_t* e, group_t* group, qse_rex_node_t* prevnode,
|
||||||
qse_rex_node_t* candnode, const qse_char_t* mptr)
|
qse_rex_node_t* candnode, const qse_char_t* mptr)
|
||||||
{
|
{
|
||||||
|
warpback:
|
||||||
|
|
||||||
|
#ifndef DONOT_SKIP_NOP
|
||||||
/* skip all NOP nodes */
|
/* skip all NOP nodes */
|
||||||
while (candnode != QSE_NULL && candnode->id == QSE_REX_NODE_NOP)
|
while (candnode != QSE_NULL && candnode->id == QSE_REX_NODE_NOP)
|
||||||
candnode = candnode->next;
|
candnode = candnode->next;
|
||||||
|
#endif
|
||||||
|
|
||||||
/* nothing to add */
|
/* nothing to add */
|
||||||
if (candnode == QSE_NULL) return 0;
|
if (candnode == QSE_NULL) return 0;
|
||||||
@ -1252,24 +1292,32 @@ static int addcands (
|
|||||||
case QSE_REX_NODE_GROUP:
|
case QSE_REX_NODE_GROUP:
|
||||||
{
|
{
|
||||||
int n;
|
int n;
|
||||||
|
qse_rex_node_t* front;
|
||||||
|
|
||||||
if (candnode->occ.min <= 0)
|
/*qse_printf (QSE_T("GROUP %p PREV %p\n"), candnode, prevnode);*/
|
||||||
|
|
||||||
|
/* skip all NOP nodes */
|
||||||
|
front = candnode->u.g.head;
|
||||||
|
while (front->id == QSE_REX_NODE_NOP)
|
||||||
|
front = front->next;
|
||||||
|
if (front->id == QSE_REX_NODE_GROUPEND)
|
||||||
{
|
{
|
||||||
/* if the group node is optional,
|
/* if GROUPEND is reached, the group
|
||||||
* add the next node to the candidate array. */
|
* is empty. jump to the next node
|
||||||
|
* regardless of its occurrence.
|
||||||
refupgroupstack (group);
|
* however, this will never be reached
|
||||||
n = addcands (e, group,
|
* as it has been removed in comp() */
|
||||||
prevnode, candnode->next, mptr);
|
candnode = candnode->next;
|
||||||
refdowngroupstack (group, e->rex->mmgr);
|
goto warpback;
|
||||||
|
|
||||||
if (n <= -1) return -1;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (candnode->occ.max > 0)
|
if (candnode->occ.max > 0)
|
||||||
{
|
{
|
||||||
|
/* add the first node in a subgroup
|
||||||
|
* as a candidate */
|
||||||
|
|
||||||
group_t* gx;
|
group_t* gx;
|
||||||
|
|
||||||
/* push the current group node (candnode) to
|
/* push the current group node (candnode) to
|
||||||
* the group stack. if candnode->next is
|
* the group stack. if candnode->next is
|
||||||
* added to the candidate array, which means
|
* added to the candidate array, which means
|
||||||
@ -1285,12 +1333,28 @@ static int addcands (
|
|||||||
/* add the first node in the group to
|
/* add the first node in the group to
|
||||||
* the candidate array */
|
* the candidate array */
|
||||||
refupgroupstack (gx);
|
refupgroupstack (gx);
|
||||||
n = addcands (e, gx,
|
n = addcands (e, gx, prevnode, front, mptr);
|
||||||
candnode, candnode->u.g.head, mptr);
|
|
||||||
refdowngroupstack (gx, e->rex->mmgr);
|
refdowngroupstack (gx, e->rex->mmgr);
|
||||||
|
|
||||||
if (n <= -1) return -1;
|
if (n <= -1) return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (candnode->occ.min <= 0)
|
||||||
|
{
|
||||||
|
/* if the group node is optional,
|
||||||
|
* add the next node to the candidate array. */
|
||||||
|
|
||||||
|
/* BEGIN avoid recursion */
|
||||||
|
#if 0
|
||||||
|
refupgroupstack (group);
|
||||||
|
n = addcands (e, group, prevnode, candnode->next, mptr);
|
||||||
|
refdowngroupstack (group, e->rex->mmgr);
|
||||||
|
if (n <= -1) return -1;
|
||||||
|
#endif
|
||||||
|
candnode = candnode->next;
|
||||||
|
goto warpback;
|
||||||
|
/* END avoid recursion */
|
||||||
|
}
|
||||||
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
@ -1302,8 +1366,10 @@ static int addcands (
|
|||||||
qse_rex_node_t* node;
|
qse_rex_node_t* node;
|
||||||
qse_size_t occ;
|
qse_size_t occ;
|
||||||
|
|
||||||
QSE_ASSERTX (group != QSE_NULL && group->next != QSE_NULL,
|
/*qse_printf (QSE_T("GROUPEND %p PREV %p\n"), candnode, prevnode);*/
|
||||||
"GROUPEND reached must be paired up with a GROUP");
|
QSE_ASSERTX (
|
||||||
|
group != QSE_NULL && group->next != QSE_NULL,
|
||||||
|
"GROUPEND must be paired up with GROUP");
|
||||||
|
|
||||||
if (prevnode == candnode)
|
if (prevnode == candnode)
|
||||||
{
|
{
|
||||||
@ -1323,7 +1389,10 @@ static int addcands (
|
|||||||
|
|
||||||
occ = top->occ;
|
occ = top->occ;
|
||||||
node = top->node;
|
node = top->node;
|
||||||
QSE_ASSERT (node == candnode->u.ge.group);
|
QSE_ASSERTX (node == candnode->u.ge.group,
|
||||||
|
"The GROUP node in the group stack must be the one "
|
||||||
|
"pairing up with the GROUPEND node."
|
||||||
|
);
|
||||||
|
|
||||||
if (occ >= node->occ.min)
|
if (occ >= node->occ.min)
|
||||||
{
|
{
|
||||||
@ -1366,19 +1435,34 @@ static int addcands (
|
|||||||
}
|
}
|
||||||
|
|
||||||
refupgroupstack (gx);
|
refupgroupstack (gx);
|
||||||
n = addcands (e, gx, candnode, node->next, mptr);
|
if (prevnode != QSE_NULL && prevnode->id == QSE_REX_NODE_GROUPEND)
|
||||||
|
{
|
||||||
|
n = addcands (e, gx, prevnode, node->next, mptr);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
n = addcands (e, gx, candnode, node->next, mptr);
|
||||||
|
}
|
||||||
refdowngroupstack (gx, e->rex->mmgr);
|
refdowngroupstack (gx, e->rex->mmgr);
|
||||||
if (n <= -1) return -1;
|
if (n <= -1) return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (occ < node->occ.max)
|
if (occ < node->occ.max)
|
||||||
{
|
{
|
||||||
/* need to repeat itself. */
|
/* repeat itself. */
|
||||||
refupgroupstack (group);
|
|
||||||
n = addcands (e, group, candnode, node->u.g.head, mptr);
|
|
||||||
refdowngroupstack (group, e->rex->mmgr);
|
|
||||||
|
|
||||||
|
/* BEGIN avoid recursion */
|
||||||
|
#if 0
|
||||||
|
refupgroupstack (group);
|
||||||
|
n = addcands (e, group, prevnode, node->u.g.head, mptr);
|
||||||
|
refdowngroupstack (group, e->rex->mmgr);
|
||||||
if (n <= -1) return -1;
|
if (n <= -1) return -1;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
prevnode = candnode;
|
||||||
|
candnode = node->u.g.head;
|
||||||
|
goto warpback;
|
||||||
|
/* END avoid recursion */
|
||||||
}
|
}
|
||||||
|
|
||||||
break;
|
break;
|
||||||
@ -1388,16 +1472,6 @@ static int addcands (
|
|||||||
{
|
{
|
||||||
int n;
|
int n;
|
||||||
|
|
||||||
if (candnode->occ.min <= 0)
|
|
||||||
{
|
|
||||||
/* if the node is optional,
|
|
||||||
* add the next node to the candidate array */
|
|
||||||
refupgroupstack (group);
|
|
||||||
n = addcands (e, group, prevnode, candnode->next, mptr);
|
|
||||||
refdowngroupstack (group, e->rex->mmgr);
|
|
||||||
if (n <= -1) return -1;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (candnode->occ.max > 0)
|
if (candnode->occ.max > 0)
|
||||||
{
|
{
|
||||||
group_t* gx;
|
group_t* gx;
|
||||||
@ -1423,6 +1497,24 @@ static int addcands (
|
|||||||
if (n <= -1) return -1;
|
if (n <= -1) return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (candnode->occ.min <= 0)
|
||||||
|
{
|
||||||
|
/* if the node is optional,
|
||||||
|
* add the next node to the candidate array */
|
||||||
|
|
||||||
|
/* BEGIN avoid recursion */
|
||||||
|
#if 0
|
||||||
|
refupgroupstack (group);
|
||||||
|
n = addcands (e, group, prevnode, candnode->next, mptr);
|
||||||
|
refdowngroupstack (group, e->rex->mmgr);
|
||||||
|
if (n <= -1) return -1;
|
||||||
|
#endif
|
||||||
|
candnode = candnode->next;
|
||||||
|
goto warpback;
|
||||||
|
/* END avoid recursion */
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -1507,6 +1599,12 @@ static int match (exec_t* e)
|
|||||||
|
|
||||||
switch (node->id)
|
switch (node->id)
|
||||||
{
|
{
|
||||||
|
#ifdef DONOT_SKIP_NOP
|
||||||
|
case QSE_REX_NODE_NOP:
|
||||||
|
nmptr = cand->mptr;
|
||||||
|
break;
|
||||||
|
#endif
|
||||||
|
|
||||||
case QSE_REX_NODE_BOL:
|
case QSE_REX_NODE_BOL:
|
||||||
if (cand->mptr == e->str.ptr)
|
if (cand->mptr == e->str.ptr)
|
||||||
{
|
{
|
||||||
@ -1568,7 +1666,8 @@ static int match (exec_t* e)
|
|||||||
"QSE_REX_NODE_EOL, "
|
"QSE_REX_NODE_EOL, "
|
||||||
"QSE_REX_NODE_ANY, "
|
"QSE_REX_NODE_ANY, "
|
||||||
"QSE_REX_NODE_CHAR, "
|
"QSE_REX_NODE_CHAR, "
|
||||||
"QSE_REX_NODE_CSET");
|
"QSE_REX_NODE_CSET, "
|
||||||
|
"QSE_REX_NODE_NOP");
|
||||||
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
@ -1705,6 +1804,16 @@ static void refdowngroupstack_incand (qse_lda_t* lda, void* dptr, qse_size_t dle
|
|||||||
refdowngroupstack (((cand_t*)dptr)->group, lda->mmgr);
|
refdowngroupstack (((cand_t*)dptr)->group, lda->mmgr);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int comp_cand (qse_lda_t* lda,
|
||||||
|
const void* dptr1, qse_size_t dlen1,
|
||||||
|
const void* dptr2, qse_size_t dlen2)
|
||||||
|
{
|
||||||
|
cand_t* c1 = (cand_t*)dptr1;
|
||||||
|
cand_t* c2 = (cand_t*)dptr2;
|
||||||
|
if (c1->node == c2->node) return 0;
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
static int init_exec_dds (exec_t* e, qse_mmgr_t* mmgr)
|
static int init_exec_dds (exec_t* e, qse_mmgr_t* mmgr)
|
||||||
{
|
{
|
||||||
/* initializes dynamic data structures */
|
/* initializes dynamic data structures */
|
||||||
@ -1729,6 +1838,9 @@ static int init_exec_dds (exec_t* e, qse_mmgr_t* mmgr)
|
|||||||
qse_lda_setfreeer (&e->cand.set[0], refdowngroupstack_incand);
|
qse_lda_setfreeer (&e->cand.set[0], refdowngroupstack_incand);
|
||||||
qse_lda_setfreeer (&e->cand.set[1], refdowngroupstack_incand);
|
qse_lda_setfreeer (&e->cand.set[1], refdowngroupstack_incand);
|
||||||
|
|
||||||
|
qse_lda_setcomper (&e->cand.set[0], comp_cand);
|
||||||
|
qse_lda_setcomper (&e->cand.set[1], comp_cand);
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user