diff --git a/qse/include/qse/cmn/rex.h b/qse/include/qse/cmn/rex.h
index 8ae3630e..474a8bf7 100644
--- a/qse/include/qse/cmn/rex.h
+++ b/qse/include/qse/cmn/rex.h
@@ -1,5 +1,5 @@
/*
- * $Id: rex.h 300 2009-11-13 14:01:57Z hyunghwan.chung $
+ * $Id: rex.h 302 2009-11-18 13:47:12Z hyunghwan.chung $
*
Copyright 2006-2009 Chung, Hyung-Hwan.
This file is part of QSE.
@@ -113,6 +113,8 @@ enum qse_rex_node_id_t
QSE_REX_NODE_START,
QSE_REX_NODE_END,
QSE_REX_NODE_NOP,
+ QSE_REX_NODE_BOL, /* beginning of line */
+ QSE_REX_NODE_EOL, /* end of line */
QSE_REX_NODE_ANYCHAR, /* dot */
QSE_REX_NODE_CHAR, /* single character */
QSE_REX_NODE_CHARSET, /* character set */
@@ -125,7 +127,7 @@ typedef enum qse_rex_node_id_t qse_rex_node_id_t;
typedef struct qse_rex_node_t qse_rex_node_t;
struct qse_rex_node_t
{
- qse_rex_node_t* link; /* link for management. not used for startnode */
+ qse_rex_node_t* link; /* for internal management. not used for startnode */
qse_rex_node_t* next;
qse_rex_node_id_t id;
diff --git a/qse/lib/cmn/rex1.c b/qse/lib/cmn/rex1.c
index 3beff122..ba770484 100644
--- a/qse/lib/cmn/rex1.c
+++ b/qse/lib/cmn/rex1.c
@@ -1,8 +1,21 @@
/*
* $Id$
+ *
+ Copyright 2006-2009 Chung, Hyung-Hwan.
+ This file is part of QSE.
-{LICENSE HERE}
+ QSE is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Lesser General Public License as
+ published by the Free Software Foundation, either version 3 of
+ the License, or (at your option) any later version.
+ QSE is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with QSE. If not, see .
*/
#include
@@ -10,7 +23,6 @@
#include
#include "mem.h"
-#define GETC(c) do { if getc(c) <= -1) return -1; } while (0)
#define OCC_MAX QSE_TYPE_MAX(qse_size_t)
struct qse_rex_t
@@ -27,11 +39,22 @@ struct comp_t
qse_rex_t* rex;
qse_cstr_t re;
+
const qse_char_t* ptr;
const qse_char_t* end;
- qse_cint_t c;
- qse_size_t grouplvl;
+ struct
+ {
+ enum
+ {
+ CT_NORMAL,
+ CT_SPECIAL
+ } type;
+ qse_cint_t value;
+ int escaped;
+ } c;
+
+ qse_size_t gdepth; /* group depth */
qse_rex_node_t* start;
};
@@ -40,12 +63,27 @@ struct exec_t
{
qse_rex_t* rex;
- qse_cstr_t str;
- qse_cstr_t sub;
+ struct
+ {
+ const qse_char_t* ptr;
+ const qse_char_t* end;
+ } str;
- qse_lda_t cand[2]; /* candidate arrays */
- int xxx, yyy;
- qse_size_t matched;
+ struct
+ {
+ const qse_char_t* ptr;
+ const qse_char_t* end;
+ } sub;
+
+ struct
+ {
+ int active;
+ int pending;
+ qse_lda_t set[2]; /* candidate arrays */
+ } cand;
+
+ qse_size_t nmatches;
+ const qse_char_t* matchend; /* 1 character past the match end */
};
typedef struct pair_t pair_t;
@@ -66,9 +104,18 @@ struct group_t
typedef struct cand_t cand_t;
struct cand_t
{
- qse_rex_node_t* node;
- qse_size_t occ;
- group_t* group;
+ qse_rex_node_t* node;
+ qse_size_t occ;
+
+ /* the stack of groups that this candidate belongs to.
+ * it is in the singliy linked list form */
+ group_t* group;
+
+ /* match pointer. the number of character advancement
+ * differs across various node types. BOL and EOL don't advance to
+ * the next character on match while ANYCHAR and CHAR do on match.
+ * therefore, the match pointer is managed per candidate basis. */
+ const qse_char_t* mptr;
};
qse_rex_t* qse_rex_open (qse_mmgr_t* mmgr, qse_size_t xtn, void* code)
@@ -211,12 +258,185 @@ static qse_rex_node_t* newbranchnode (
return n;
}
-static int getc (comp_t* c)
+#define CHECK_END(builder) \
+ do { \
+ if (builder->ptr >= builder->ptn.end) \
+ { \
+ builder->errnum = QSE_REX_EEND; \
+ return -1; \
+ } \
+ } while(0)
+
+#define IS_HEX(c) \
+ ((c >= QSE_T('0') && c <= QSE_T('9')) || \
+ (c >= QSE_T('A') && c <= QSE_T('F')) || \
+ (c >= QSE_T('a') && c <= QSE_T('f')))
+
+#define HEX_TO_NUM(c) \
+ ((c >= QSE_T('0') && c <= QSE_T('9'))? c-QSE_T('0'): \
+ (c >= QSE_T('A') && c <= QSE_T('F'))? c-QSE_T('A')+10: \
+ c-QSE_T('a')+10)
+
+static int getc (comp_t* com)
{
- c->c = (c->ptr < c->end)? *c->ptr++: QSE_CHAR_EOF;
-if (c->c == QSE_CHAR_EOF)
+ if (com->ptr >= com->end)
+ {
+ com->c.type = CT_NORMAL;
+ com->c.value = QSE_CHAR_EOF;
+ com->c.escaped = 0;
+ return 0;
+ }
+
+ com->c.type = CT_NORMAL;
+ com->c.value = *com->ptr++;
+ com->c.escaped = QSE_FALSE;
+
+ if (com->c.value == QSE_T('\\'))
+ {
+ qse_char_t c;
+
+ CHECK_END (builder);
+ c = *com->ptr++;
+
+ if (c == QSE_T('n')) c = QSE_T('\n');
+ else if (c == QSE_T('r')) c = QSE_T('\r');
+ else if (c == QSE_T('t')) c = QSE_T('\t');
+ else if (c == QSE_T('f')) c = QSE_T('\f');
+ else if (c == QSE_T('b')) c = QSE_T('\b');
+ else if (c == QSE_T('v')) c = QSE_T('\v');
+ else if (c == QSE_T('a')) c = QSE_T('\a');
+ else if (c >= QSE_T('0') && c <= QSE_T('7'))
+ {
+ qse_char_t cx;
+
+ c = c - QSE_T('0');
+
+ CHECK_END (builder);
+ cx = *com->ptr++;
+ if (cx >= QSE_T('0') && cx <= QSE_T('7'))
+ {
+ c = c * 8 + cx - QSE_T('0');
+
+ CHECK_END (builder);
+ cx = *com->ptr++;
+ if (cx >= QSE_T('0') && cx <= QSE_T('7'))
+ {
+ c = c * 8 + cx - QSE_T('0');
+ }
+ }
+ }
+ else if (c == QSE_T('x'))
+ {
+ qse_char_t cx;
+
+ CHECK_END (builder);
+ cx = *com->ptr++;
+ if (IS_HEX(cx))
+ {
+ c = HEX_TO_NUM(cx);
+
+ CHECK_END (builder);
+ cx = *com->ptr++;
+ if (IS_HEX(cx))
+ {
+ c = c * 16 + HEX_TO_NUM(cx);
+ }
+ }
+ }
+ #ifdef QSE_CHAR_IS_WCHAR
+ else if (c == QSE_T('u') && QSE_SIZEOF(qse_char_t) >= 2)
+ {
+ qse_char_t cx;
+
+ CHECK_END (builder);
+ cx = *com->ptr++;
+ if (IS_HEX(cx))
+ {
+ qse_size_t i;
+
+ c = HEX_TO_NUM(cx);
+
+ for (i = 0; i < 3; i++)
+ {
+ CHECK_END (builder);
+ cx = *com->ptr++;
+
+ if (!IS_HEX(cx)) break;
+ c = c * 16 + HEX_TO_NUM(cx);
+ }
+ }
+ }
+ else if (c == QSE_T('U') && QSE_SIZEOF(qse_char_t) >= 4)
+ {
+ qse_char_t cx;
+
+ CHECK_END (builder);
+ cx = *com->ptr++;
+ if (IS_HEX(cx))
+ {
+ qse_size_t i;
+
+ c = HEX_TO_NUM(cx);
+
+ for (i = 0; i < 7; i++)
+ {
+ CHECK_END (builder);
+ cx = *com->ptr++;
+
+ if (!IS_HEX(cx)) break;
+ c = c * 16 + HEX_TO_NUM(cx);
+ }
+ }
+ }
+ #endif
+
+ com->c.value = c;
+ com->c.escaped = QSE_TRUE;
+
+ return 0;
+ }
+ else
+ {
+ if (level == LEVEL_TOP)
+ {
+ if (com->c.value == QSE_T('[') ||
+ com->c.value == QSE_T('|') ||
+ com->c.value == QSE_T('^') ||
+ com->c.value == QSE_T('$') ||
+ (!(com->option & QSE_REX_BUILD_NOBOUND) &&
+ com->c.value == QSE_T('{')) ||
+ com->c.value == QSE_T('+') ||
+ com->c.value == QSE_T('?') ||
+ com->c.value == QSE_T('*') ||
+ com->c.value == QSE_T('.') ||
+ com->c.value == QSE_T('(') ||
+ com->c.value == QSE_T(')'))
+ {
+ com->c.type = CT_SPECIAL;
+ }
+ }
+ else if (level == LEVEL_CHARSET)
+ {
+ if (com->c.value == QSE_T(']'))
+ {
+ com->c.type = CT_SPECIAL;
+ }
+ }
+ else if (level == LEVEL_RANGE)
+ {
+ if (com->c.value == QSE_T(',') ||
+ com->c.value == QSE_T('}'))
+ {
+ com->c.type = CT_SPECIAL;
+ }
+ }
+ }
+#if 0
+ com->c = (com->ptr < com->end)? *com->ptr++: QSE_CHAR_EOF;
+if (com->c == QSE_CHAR_EOF)
qse_printf (QSE_T("getc => \n"));
-else qse_printf (QSE_T("getc => %c\n"), c->c);
+else qse_printf (QSE_T("getc => %c\n"), com->c);
+#endif
return 0;
}
@@ -226,7 +446,7 @@ static qse_rex_node_t* comp2 (comp_t* c)
{
qse_rex_node_t* n;
- switch (c->c)
+ switch (c->c.value)
{
case QSE_T('('):
{
@@ -249,7 +469,7 @@ static qse_rex_node_t* comp2 (comp_t* c)
return QSE_NULL;
}
- c->grouplvl++;
+ c->gdepth++;
x = comp0 (c, ge);
if (x == QSE_NULL)
{
@@ -258,7 +478,7 @@ static qse_rex_node_t* comp2 (comp_t* c)
return QSE_NULL;
}
- if (c->c != QSE_T(')'))
+ if (c->c.value != QSE_T(')'))
{
qse_printf (QSE_T("expecting )\n"));
// UNBALANCED PAREN.
@@ -267,7 +487,7 @@ qse_printf (QSE_T("expecting )\n"));
return QSE_NULL;
}
- c->grouplvl--;
+ c->gdepth--;
if (getc(c) <= -1)
{
// freere (x);
@@ -289,6 +509,27 @@ qse_printf (QSE_T("expecting )\n"));
}
break;
+ case QSE_T('^'):
+ n = newnode (c, QSE_REX_NODE_BOL);
+ if (n == QSE_NULL) return QSE_NULL;
+ if (getc(c) <= -1)
+ {
+ // TODO: error handling..
+ return QSE_NULL;
+ }
+ break;
+
+ case QSE_T('$'):
+ n = newnode (c, QSE_REX_NODE_EOL);
+ if (n == QSE_NULL) return QSE_NULL;
+ if (getc(c) <= -1)
+ {
+ // TODO: error handling..
+ return QSE_NULL;
+ }
+ break;
+
+
/*
case QSE_T('['):
....
@@ -296,7 +537,7 @@ qse_printf (QSE_T("expecting )\n"));
default:
/* normal character */
- n = newcharnode (c, c->c);
+ n = newcharnode (c, c->c.value);
if (n == QSE_NULL) return QSE_NULL;
if (getc(c) <= -1)
{
@@ -363,8 +604,8 @@ static qse_rex_node_t* comp1 (comp_t* c, pair_t* pair)
pair->tail = pair->head;
- while (c->c != QSE_T('|') && c->c != QSE_CHAR_EOF &&
- !(c->grouplvl >= 0 && c->c == QSE_T(')')))
+ while (c->c.value != QSE_T('|') && c->c.value != QSE_CHAR_EOF &&
+ !(c->gdepth >= 0 && c->c.value == QSE_T(')')))
{
qse_rex_node_t* tmp = comp2 (c);
if (tmp == QSE_NULL)
@@ -389,7 +630,7 @@ static qse_rex_node_t* comp0 (comp_t* c, qse_rex_node_t* ge)
if (left == QSE_NULL) return QSE_NULL;
xpair.tail->next = ge;
- while (c->c == QSE_T('|'))
+ while (c->c.value == QSE_T('|'))
{
if (getc (c) <= -1)
{
@@ -435,8 +676,10 @@ qse_rex_node_t* qse_rex_comp (
c.ptr = ptr;
c.end = ptr + len;
- c.c = QSE_CHAR_EOF;
- c.grouplvl = 0;
+
+ c.c.value = QSE_CHAR_EOF;
+
+ c.gdepth = 0;
c.start = QSE_NULL;
if (getc(&c) <= -1) return QSE_NULL;
@@ -454,11 +697,11 @@ qse_rex_node_t* qse_rex_comp (
else
{
qse_rex_node_t* tmp;
- //tmp = comp0 (&c, QSE_NULL);
+ /*tmp = comp0 (&c, QSE_NULL);*/
tmp = comp0 (&c, end);
if (tmp == QSE_NULL)
{
- //freenode (c.start, c.rex->mmgr);
+ /*freenode (c.start, c.rex->mmgr);*/
freeallnodes (c.start);
c.start = QSE_NULL;
}
@@ -474,29 +717,95 @@ qse_printf (QSE_T("start has tmp...\n"));
return rex->code;
}
-static group_t* pushgroup (exec_t* e, group_t* pg, qse_rex_node_t* gn)
+static group_t* dupgroups (exec_t* e, group_t* g)
{
- group_t* g;
- QSE_ASSERT (gn->id == QSE_REX_NODE_GROUP);
+ group_t* yg, * xg = QSE_NULL;
- g = (group_t*) QSE_MMGR_ALLOC (e->rex->mmgr, QSE_SIZEOF(*g));
- if (g == QSE_NULL)
+ QSE_ASSERT (g != QSE_NULL);
+
+ if (g->next != QSE_NULL)
+ {
+ /* TODO: make it non recursive or
+ * implement stack overflow protection */
+ xg = dupgroups (e, g->next);
+ if (xg == QSE_NULL) return QSE_NULL;
+ }
+
+ yg = (group_t*) QSE_MMGR_ALLOC (e->rex->mmgr, QSE_SIZEOF(*g));
+ if (yg == QSE_NULL)
+ {
+ /* TODO: freegroups (xg); */
+ /* TODO: set error info */
+ return QSE_NULL;
+ }
+
+ QSE_MEMCPY (yg, g, QSE_SIZEOF(*yg));
+ yg->next = xg;
+
+ return yg;
+}
+
+static void freegroup (exec_t* e, group_t* group)
+{
+ QSE_ASSERT (group != QSE_NULL);
+ QSE_MMGR_FREE (e->rex->mmgr, group);
+}
+
+static void freegroups (exec_t* e, group_t* group)
+{
+ group_t* next;
+
+ while (group != QSE_NULL)
+ {
+ next = group->next;
+ freegroup (e, group);
+ group = next;
+ }
+}
+
+static group_t* pushgroup (exec_t* e, group_t* group, qse_rex_node_t* newgn)
+{
+ group_t* newg;
+
+ QSE_ASSERT (newgn->id == QSE_REX_NODE_GROUP);
+
+ newg = (group_t*) QSE_MMGR_ALLOC (e->rex->mmgr, QSE_SIZEOF(*newg));
+ if (newg == QSE_NULL)
{
/* TODO: set error info */
return QSE_NULL;
}
- g->node = gn;
- g->occ = 0;
- g->next = pg;
+ newg->node = newgn;
+ newg->occ = 0;
+ newg->next = group;
- return g;
+ return newg;
+}
+
+static group_t* pushgroupdup (exec_t* e, group_t* pg, qse_rex_node_t* gn)
+{
+ group_t* gs = QSE_NULL;
+
+ /* duplicate the group stack if necessary */
+ if (pg != QSE_NULL)
+ {
+ gs = dupgroups (e, pg);
+ if (gs == QSE_NULL) return QSE_NULL;
+ }
+
+ /* and push a new group to the stack */
+ return pushgroup (e, gs, gn);
}
static int addsimplecand (
- exec_t* e, cand_t* pcand, qse_rex_node_t* node, qse_size_t occ)
+ exec_t* e, group_t* group, qse_rex_node_t* node,
+ qse_size_t occ, const qse_char_t* mptr)
{
QSE_ASSERT (
+ node->id == QSE_REX_NODE_BOL ||
+ node->id == QSE_REX_NODE_EOL ||
+ node->id == QSE_REX_NODE_ANYCHAR ||
node->id == QSE_REX_NODE_CHAR ||
node->id == QSE_REX_NODE_CHARSET
);
@@ -505,16 +814,17 @@ static int addsimplecand (
cand.node = node;
cand.occ = occ;
- cand.group = pcand->group;
+ cand.group = group;
+ cand.mptr = mptr;
-if (node->id == QSE_REX_NODE_CHAR)
+/*if (node->id == QSE_REX_NODE_CHAR)
qse_printf (QSE_T("adding %d %c\n"), node->id, node->u.c);
else
-qse_printf (QSE_T("adding %d NA\n"), node->id);
+qse_printf (QSE_T("adding %d NA\n"), node->id);*/
if (qse_lda_insert (
- &e->cand[e->xxx],
- QSE_LDA_SIZE(&e->cand[e->xxx]),
+ &e->cand.set[e->cand.pending],
+ QSE_LDA_SIZE(&e->cand.set[e->cand.pending]),
&cand, 1) == (qse_size_t)-1)
{
/* TODO: set error code: ENOERR */
@@ -524,116 +834,188 @@ qse_printf (QSE_T("adding %d NA\n"), node->id);
return 0;
}
-static int addnextcands (exec_t* e, group_t* group, qse_rex_node_t* cur)
+static int addcands (
+ exec_t* e, group_t* group, qse_rex_node_t* prevnode,
+ qse_rex_node_t* candnode, const qse_char_t* mptr)
{
/* skip all NOP nodes */
- while (cur && cur->id == QSE_REX_NODE_NOP) cur = cur->next;
+ while (candnode != QSE_NULL && candnode->id == QSE_REX_NODE_NOP)
+ candnode = candnode->next;
/* nothing to add */
- if (cur == QSE_NULL) return 0;
+ if (candnode == QSE_NULL) return 0;
- if (cur->id == QSE_REX_NODE_END)
+ if (candnode->id == QSE_REX_NODE_END)
{
qse_printf (QSE_T("== ADDING THE END(MATCH) NODE MEANING MATCH FOUND == \n"));
- e->matched++;
+ if (e->matchend == QSE_NULL || mptr >= e->matchend)
+ e->matchend = mptr;
+ e->nmatches++;
}
- else if (cur->id == QSE_REX_NODE_BRANCH)
+ else if (candnode->id == QSE_REX_NODE_BRANCH)
{
- #if 0
- QSE_ASSERT (cur->next == QSE_NULL);
- if (addnextcands (e, group, cur->u.b.left) <= -1) return -1;
- if (addnextcands (e, group, cur->u.b.right) <= -1) return -1;
- #endif
+ group_t* groupdup;
+
+ QSE_ASSERT (candnode->next == QSE_NULL);
+
+ groupdup = dupgroups (e, group);
+ if (groupdup == QSE_NULL) return -1;
+
+ if (addcands (e, group, prevnode, candnode->u.b.left, mptr) <= -1) return -1;
+ if (addcands (e, groupdup, prevnode, candnode->u.b.right, mptr) <= -1) return -1;
}
- else if (cur->id == QSE_REX_NODE_GROUP)
+ else if (candnode->id == QSE_REX_NODE_GROUP)
{
- group_t* g = pushgroup (e, group, cur);
- if (g == QSE_NULL) return -1;
+ group_t* groupdup;
- /* add the first node in the group */
- if (addnextcands (e, g, cur->u.g.head) <= -1) return -1;
-
- if (cur->occ.min <= 0)
+ if (candnode->occ.min <= 0)
{
/* if the group node is optional,
- * add the next node to the candidate array.
- * branch case => dup group */
- if (addnextcands (e, group, cur->next) <= -1) return -1;
+ * add the next node to the candidate array. */
+ if (addcands (e, group, prevnode, candnode->next, mptr) <= -1) return -1;
}
+
+ /* push the candnoderent group node (candnode) to the group
+ * stack duplicated. */
+ groupdup = pushgroupdup (e, group, candnode);
+ if (groupdup == QSE_NULL) return -1;
+
+ /* add the first node in the group */
+ if (addcands (e, groupdup, candnode, candnode->u.g.head, mptr) <= -1) return -1;
+
}
- else if (cur->id == QSE_REX_NODE_GROUPEND)
+ else if (candnode->id == QSE_REX_NODE_GROUPEND)
{
- group_t* group;
qse_rex_node_t* node;
+ qse_size_t occ;
- group = cand->group;
- QSE_ASSERT (group != QSE_NULL);
+ QSE_ASSERTX (group != QSE_NULL,
+ "GROUPEND reached must be paired up with a GROUP");
- node = group->node;
- QSE_ASSERT (node == cur->u.ge.group);
-
- if (group->occ < node->occ.max)
+ if (prevnode != candnode)
+ /*if (prevnode == QSE_NULL || prevnode->id != QSE_REX_NODE_GROUPEND)*/
{
- /* need to repeat itself */
group->occ++;
- if (addnextcands (e, cand, node->u.g.head) <= -1) return -1;
- }
- if (group->occ >= node->occ.min)
- {
- /* take the next atom as a candidate.
- * it is actually a branch case. */
-
- cand = dupgrouppoppingtop (cand);
-
- if (addnextcands (e, pg, node->next) <= -1) return -1;
+ occ = group->occ;
+ node = group->node;
+ QSE_ASSERT (node == candnode->u.ge.group);
+
+ if (occ >= node->occ.min)
+ {
+ group_t* gx = group->next;
+
+ /* take the next atom as a candidate.
+ * it is actually a branch case. move on. */
+
+ if (occ < node->occ.max)
+ {
+ /* check if the group will be repeated.
+ * if so, duplicate the group stack excluding
+ * the top. it goes along a different path and
+ * hence requires a duplicated group stack. */
+ if (group->next != QSE_NULL)
+ {
+ gx = dupgroups (e, group->next);
+ if (gx == QSE_NULL) return -1;
+ }
+ }
+
+ if (addcands (e, gx, candnode, node->next, mptr) <= -1) return -1;
+ }
+
+ if (occ < node->occ.max)
+ {
+ /* need to repeat itself. */
+ if (addcands (e, group, candnode, node->u.g.head, mptr) <= -1) return -1;
+ }
}
}
else
{
- if (addsimplecand (e, cand, cur, 1) <= -1) return -1;
- if (cur->occ.min <= 0)
+ group_t* gx = group;
+
+ if (candnode->occ.min <= 0)
{
/* if the node is optional,
- * add the next node to the candidate array */
- if (addnextcands (e, pg, cur->next) <= -1) return -1;
+ * add the next node to the candidate array */
+ if (addcands (e, group, prevnode, candnode->next, mptr) <= -1) return -1;
+
+ if (group != QSE_NULL)
+ {
+ gx = dupgroups (e, group);
+ if (gx == QSE_NULL) return -1;
+ }
}
+
+ if (addsimplecand (e, gx, candnode, 1, mptr) <= -1) return -1;
}
return 0;
}
-static int match (exec_t* e, const qse_char_t* curp)
+static int match (exec_t* e)
{
qse_size_t i;
- qse_char_t curc = *curp;
- for (i = 0; i < QSE_LDA_SIZE(&e->cand[e->yyy]); i++)
+ QSE_ASSERT (QSE_LDA_SIZE(&e->cand.set[e->cand.active]) > 0);
+
+ for (i = 0; i < QSE_LDA_SIZE(&e->cand.set[e->cand.active]); i++)
{
- cand_t* cand = QSE_LDA_DPTR(&e->cand[e->yyy],i);
+ cand_t* cand = QSE_LDA_DPTR(&e->cand.set[e->cand.active],i);
qse_rex_node_t* node = cand->node;
+ const qse_char_t* nmptr = QSE_NULL;
- if (node->id == QSE_REX_NODE_CHAR)
+ switch (node->id)
{
- if (node->u.c == curc)
- {
- qse_printf (QSE_T("matched %c\n"), node->u.c);
+ case QSE_REX_NODE_BOL:
+ if (cand->mptr == e->str.ptr) nmptr = cand->mptr;
+ break;
+ case QSE_REX_NODE_EOL:
+ if (cand->mptr >= e->str.end) nmptr = cand->mptr;
+ break;
+
+ case QSE_REX_NODE_ANYCHAR:
+ if (cand->mptr < e->sub.end) nmptr = cand->mptr + 1;
+ break;
+
+ case QSE_REX_NODE_CHAR:
+ if (cand->mptr < e->sub.end && node->u.c == *cand->mptr) nmptr = cand->mptr + 1;
+ //qse_printf (QSE_T("matched %c\n"), node->u.c);
+ break;
+
+ case QSE_REX_NODE_CHARSET:
+ qse_printf (QSE_T("charset not implemented...\n"));
+ break;
+
+ default:
+ // TODO: set error code -> internal error. this should not happen
+ return -1;
+ }
+
+ if (nmptr != QSE_NULL)
+ {
+ if (cand->occ >= node->occ.min)
+ {
+ group_t* gx = cand->group;
if (cand->occ < node->occ.max)
{
- if (addsimplecand (e, cand, node, cand->occ+1) <= -1) return -1;
- }
- if (cand->occ >= node->occ.min)
- {
-
- if (addnextcands (e, cand, node->next) <= -1) return -1;
+ if (cand->group != QSE_NULL)
+ {
+ gx = dupgroups (e, cand->group);
+ if (gx == QSE_NULL) return -1;
+ }
}
+
+ /* move on to the next candidate */
+ if (addcands (e, gx, node, node->next, nmptr) <= -1) return -1;
+ }
+ if (cand->occ < node->occ.max)
+ {
+ /* repeat itself more */
+ if (addsimplecand (e, cand->group, node, cand->occ+1, nmptr) <= -1) return -1;
}
- }
- else
- {
- QSE_ASSERT (node->id == QSE_REX_NODE_CHARSET);
- qse_printf (QSE_T("charset not implemented...\n"));
}
}
@@ -642,73 +1024,110 @@ static int match (exec_t* e, const qse_char_t* curp)
static int exec (exec_t* e)
{
- const qse_char_t* ptr = e->sub.ptr;
- const qse_char_t* end = e->sub.ptr + e->sub.len;
+ int n;
- e->matched = 0;
- e->xxx = 0;
- e->yyy = 1;
+ e->nmatches = 0;
+ e->matchend = QSE_NULL;
- /* collect the initial candidates to cand[xxx] */
- qse_lda_clear (&e->cand[e->xxx]);
+ e->cand.pending = 0;
+ e->cand.active = 1;
- if (addnextcands (e, QSE_NULL, e->rex->code->next) <= -1) return -1;
+ /* empty the pending set to collect the initial candidates */
+ qse_lda_clear (&e->cand.set[e->cand.pending]);
- while (ptr < end)
+ /* the first node must be the START node */
+ QSE_ASSERT (e->rex->code->id == QSE_REX_NODE_START);
+
+ /* addcands() collects a set of candidates into the pending set */
+ n = addcands (
+ e, /* execution structure */
+ QSE_NULL, /* doesn't belong to any groups yet */
+ e->rex->code, /* dummy previous node, the start node */
+ e->rex->code->next, /* start from the second node */
+ e->sub.ptr /* current match pointer */
+ );
+ if (n <= -1) return -1;
+
+ do
{
- /* kind of swap cand[xxx] and cand[yyy] by swapping indices */
- int tmp = e->xxx;
- e->xxx = e->yyy;
- e->yyy = tmp;
+ /* kind of swap the next set and the current set by swapping indices */
+ int tmp = e->cand.pending;
+ e->cand.pending = e->cand.active;
+ e->cand.active = tmp;
/* check if there are any next candidates */
- if (QSE_LDA_SIZE(&e->cand[e->yyy]) <= 0)
+ if (QSE_LDA_SIZE(&e->cand.set[e->cand.active]) <= 0)
{
- /* if none, break */
+ /* if no more candidates, break */
break;
}
+{
+int i;
+qse_printf (QSE_T("SET="));
+for (i = 0; i < QSE_LDA_SIZE(&e->cand.set[e->cand.active]); i++)
+{
+ cand_t* cand = QSE_LDA_DPTR(&e->cand.set[e->cand.active],i);
+ qse_rex_node_t* node = cand->node;
+
+ if (node->id == QSE_REX_NODE_CHAR)
+ qse_printf (QSE_T("%c "), node->u.c);
+ else if (node->id == QSE_REX_NODE_ANYCHAR)
+ qse_printf (QSE_T(". "), node->u.c);
+ else if (node->id == QSE_REX_NODE_BOL)
+ qse_printf (QSE_T("^ "));
+ else if (node->id == QSE_REX_NODE_EOL)
+ qse_printf (QSE_T("$ "));
+}
+qse_printf (QSE_T("\n"));
+}
+
/* clear the array to hold the next candidates */
- qse_lda_clear (&e->cand[e->xxx]);
+ qse_lda_clear (&e->cand.set[e->cand.pending]);
-qse_printf (QSE_T("MATCHING %c\n"), *ptr);
- if (match (e, ptr) <= -1) return -1;
-
- ptr++;
+ if (match (e) <= -1) return -1;
}
+ while (1);
- qse_printf (QSE_T("TOTAL MATCHES FOUND... %d\n"), e->matched);
+if (e->nmatches > 0)
+{
+ qse_printf (QSE_T("MATCH: %d [%.*s]\n"),
+ (int)(e->matchend - e->sub.ptr),
+ (int)(e->matchend - e->sub.ptr), e->sub.ptr);
+}
+
+ qse_printf (QSE_T("TOTAL MATCHES FOUND... %d\n"), e->nmatches);
return 0;
}
static int init_exec_dds (exec_t* e, qse_mmgr_t* mmgr)
{
/* initializes dynamic data structures */
- if (qse_lda_init (&e->cand[0], mmgr, 100) == QSE_NULL)
+ if (qse_lda_init (&e->cand.set[0], mmgr, 100) == QSE_NULL)
{
/* TOOD: set error */
return -1;
}
- if (qse_lda_init (&e->cand[1], mmgr, 100) == QSE_NULL)
+ if (qse_lda_init (&e->cand.set[1], mmgr, 100) == QSE_NULL)
{
/* TOOD: set error */
- qse_lda_fini (&e->cand[0]);
+ qse_lda_fini (&e->cand.set[0]);
return -1;
}
- qse_lda_setscale (&e->cand[0], QSE_SIZEOF(cand_t));
- qse_lda_setscale (&e->cand[1], QSE_SIZEOF(cand_t));
+ qse_lda_setscale (&e->cand.set[0], QSE_SIZEOF(cand_t));
+ qse_lda_setscale (&e->cand.set[1], QSE_SIZEOF(cand_t));
- qse_lda_setcopier (&e->cand[0], QSE_LDA_COPIER_INLINE);
- qse_lda_setcopier (&e->cand[1], QSE_LDA_COPIER_INLINE);
+ qse_lda_setcopier (&e->cand.set[0], QSE_LDA_COPIER_INLINE);
+ qse_lda_setcopier (&e->cand.set[1], QSE_LDA_COPIER_INLINE);
return 0;
}
static void fini_exec_dds (exec_t* e)
{
- qse_lda_fini (&e->cand[1]);
- qse_lda_fini (&e->cand[0]);
+ qse_lda_fini (&e->cand.set[1]);
+ qse_lda_fini (&e->cand.set[0]);
}
int qse_rex_exec (qse_rex_t* rex,
@@ -727,14 +1146,13 @@ int qse_rex_exec (qse_rex_t* rex,
QSE_MEMSET (&e, 0, QSE_SIZEOF(e));
e.rex = rex;
e.str.ptr = str;
- e.str.len = len;
+ e.str.end = str + len;
e.sub.ptr = substr;
- e.sub.len = sublen;
+ e.sub.end = substr + sublen;
if (init_exec_dds (&e, rex->mmgr) <= -1) return -1;
-// TOOD: may have to execute exec in case sublen is 0.
- while (e.sub.len > 0)
+ while (e.sub.ptr <= e.sub.end)
{
n = exec (&e);
if (n <= -1)
@@ -743,10 +1161,9 @@ int qse_rex_exec (qse_rex_t* rex,
break;
}
- if (e.matched > 0) break;
+ if (e.nmatches > 0) break;
e.sub.ptr++;
- e.sub.len--;
}
fini_exec_dds (&e);
diff --git a/qse/samples/cmn/rex.cpp b/qse/samples/cmn/rex.cpp
index 00aa7dc5..eb89ff34 100644
--- a/qse/samples/cmn/rex.cpp
+++ b/qse/samples/cmn/rex.cpp
@@ -248,6 +248,18 @@ void MyFrame::drawNode (wxDC& dc, qse_rex_node_t* n)
{
dc.DrawText (_T("
"), nodex, nodey);
}
+ else if (n->id == QSE_REX_NODE_BOL)
+ {
+ dc.DrawText (_T("<^>"), nodex, nodey);
+ }
+ else if (n->id == QSE_REX_NODE_EOL)
+ {
+ dc.DrawText (_T("<$>"), nodex, nodey);
+ }
+ else if (n->id == QSE_REX_NODE_ANYCHAR)
+ {
+ dc.DrawText (_T(""), nodex, nodey);
+ }
else if (n->id == QSE_REX_NODE_CHAR)
{
qse_char_t x[2];
@@ -274,7 +286,7 @@ void MyFrame::drawNode (wxDC& dc, qse_rex_node_t* n)
}
else if (n->id == QSE_REX_NODE_NOP)
{
- dc.DrawText (_T(""), nodex, nodey);
+ dc.DrawText (_T(""), nodex, nodey);
}
}
@@ -287,19 +299,19 @@ void MyFrame::drawChain (wxDC& dc, qse_rex_node_t* n)
if (t->id == QSE_REX_NODE_BRANCH)
{
drawNode (dc, t);
- nodex += 50;
+ nodex += 40;
int oldx = nodex;
drawChain (dc, t->u.b.left);
nodex = oldx;
- nodey += 50;
+ nodey += 40;
drawChain (dc, t->u.b.right);
}
else
{
drawNode (dc, t);
- nodex += 50;
+ nodex += 40;
}
if (t->id == QSE_REX_NODE_GROUP)