fixed a minor bug in calling tre_add_tags()

added qse_tre_open()/qse_tre_close()/qse_tre_geterrnum()/qse_tre_geterrmsg()
added a parameter to return the number of submatches into qse_tre_compx()/qse_tre_comp()
This commit is contained in:
hyung-hwan 2011-09-02 08:45:06 +00:00
parent 6aba3f8f89
commit 1efa41052e
7 changed files with 177 additions and 50 deletions

View File

@ -46,7 +46,7 @@ typedef enum qse_tre_errnum_t qse_tre_errnum_t;
typedef struct qse_tre_t qse_tre_t; typedef struct qse_tre_t qse_tre_t;
struct qse_tre_t struct qse_tre_t
{ {
qse_mmgr_t* mmgr; QSE_DEFINE_COMMON_FIELDS (tre)
qse_tre_errnum_t errnum; qse_tre_errnum_t errnum;
qse_size_t re_nsub; /* Number of parenthesized subexpressions. */ qse_size_t re_nsub; /* Number of parenthesized subexpressions. */
@ -95,6 +95,17 @@ struct qse_tre_strsrc_t
extern "C" { extern "C" {
#endif #endif
QSE_DEFINE_COMMON_FUNCTIONS (tre)
qse_tre_t* qse_tre_open (
qse_mmgr_t* mmgr,
qse_size_t xtnsize
);
void qse_tre_close (
qse_tre_t* tre
);
int qse_tre_init ( int qse_tre_init (
qse_tre_t* tre, qse_tre_t* tre,
qse_mmgr_t* mmgr qse_mmgr_t* mmgr
@ -104,17 +115,26 @@ void qse_tre_fini (
qse_tre_t* tre qse_tre_t* tre
); );
qse_tre_errnum_t qse_tre_geterrnum (
qse_tre_t* tre
);
const qse_char_t* qse_tre_geterrmsg (
qse_tre_t* tre
);
int qse_tre_compx ( int qse_tre_compx (
qse_tre_t* tre, qse_tre_t* tre,
const qse_char_t* regex, const qse_char_t* regex,
qse_size_t n, qse_size_t n,
unsigned int* nsubmat,
int cflags int cflags
); );
int qse_tre_comp ( int qse_tre_comp (
qse_tre_t* tre, qse_tre_t* tre,
const qse_char_t* regex, const qse_char_t* regex,
unsigned int* nsubmat,
int cflags int cflags
); );
@ -122,8 +142,16 @@ int qse_tre_execx (
qse_tre_t* tre, qse_tre_t* tre,
const qse_char_t* str, const qse_char_t* str,
qse_size_t len, qse_size_t len,
qse_tre_match_t* pmatch,
qse_size_t nmatch,
int eflags
);
int qse_tre_exec (
qse_tre_t* tre,
const qse_char_t* str,
qse_tre_match_t* pmatch,
qse_size_t nmatch, qse_size_t nmatch,
qse_tre_match_t pmatch[],
int eflags int eflags
); );

View File

@ -187,14 +187,14 @@ tre_purge_regset(int *regset, tre_tnfa_t *tnfa, int tag)
subexpressions marked for submatch addressing can be traced. */ subexpressions marked for submatch addressing can be traced. */
static reg_errcode_t static reg_errcode_t
tre_add_tags(tre_mem_t mem, tre_stack_t *stack, tre_ast_node_t *tree, tre_add_tags(tre_mem_t mem, tre_stack_t *stack, tre_ast_node_t *tree,
tre_tnfa_t *tnfa) tre_tnfa_t *tnfa, int first_pass)
{ {
reg_errcode_t status = REG_OK; reg_errcode_t status = REG_OK;
tre_addtags_symbol_t symbol; tre_addtags_symbol_t symbol;
tre_ast_node_t *node = tree; /* Tree node we are currently looking at. */ tre_ast_node_t *node = tree; /* Tree node we are currently looking at. */
int bottom = tre_stack_num_objects(stack); int bottom = tre_stack_num_objects(stack);
/* True for first pass (counting number of needed tags) */ /* True for first pass (counting number of needed tags) */
int first_pass = (mem == NULL || tnfa == NULL); /*int first_pass = (mem == NULL || tnfa == NULL);*/
int *regset, *orig_regset; int *regset, *orig_regset;
int num_tags = 0; /* Total number of tags. */ int num_tags = 0; /* Total number of tags. */
int num_minimals = 0; /* Number of special minimal tags. */ int num_minimals = 0; /* Number of special minimal tags. */
@ -1972,7 +1972,8 @@ int tre_compile (regex_t *preg, const tre_char_t *regex, size_t n, int cflags)
DPRINT(("tre_compile: setting up tags\n")); DPRINT(("tre_compile: setting up tags\n"));
/* Figure out how many tags we will need. */ /* Figure out how many tags we will need. */
errcode = tre_add_tags(NULL, stack, tree, tnfa); /*errcode = tre_add_tags(NULL, stack, tree, tnfa); */
errcode = tre_add_tags(mem, stack, tree, tnfa, 1);
if (errcode != REG_OK) if (errcode != REG_OK)
ERROR_EXIT(errcode); ERROR_EXIT(errcode);
#ifdef TRE_DEBUG #ifdef TRE_DEBUG
@ -2000,7 +2001,7 @@ int tre_compile (regex_t *preg, const tre_char_t *regex, size_t n, int cflags)
ERROR_EXIT(REG_ESPACE); ERROR_EXIT(REG_ESPACE);
tnfa->submatch_data = submatch_data; tnfa->submatch_data = submatch_data;
errcode = tre_add_tags(mem, stack, tree, tnfa); errcode = tre_add_tags(mem, stack, tree, tnfa, 0);
if (errcode != REG_OK) if (errcode != REG_OK)
ERROR_EXIT(errcode); ERROR_EXIT(errcode);

View File

@ -22,10 +22,9 @@
#include "tre-compile.h" #include "tre-compile.h"
#include <qse/cmn/str.h> #include <qse/cmn/str.h>
#if 0
QSE_IMPLEMENT_COMMON_FUNCTIONS (tre) QSE_IMPLEMENT_COMMON_FUNCTIONS (tre)
qse_tre_t* qse_tre_open (qse_mmgr_t* mmgr, qse_size_t xtn, qse_tre_code_t* code) qse_tre_t* qse_tre_open (qse_mmgr_t* mmgr, qse_size_t xtnsize)
{ {
qse_tre_t* tre; qse_tre_t* tre;
@ -39,10 +38,10 @@ qse_tre_t* qse_tre_open (qse_mmgr_t* mmgr, qse_size_t xtn, qse_tre_code_t* code)
if (mmgr == QSE_NULL) return QSE_NULL; if (mmgr == QSE_NULL) return QSE_NULL;
} }
tre = (qse_tre_t*) QSE_MMGR_ALLOC (mmgr, QSE_SIZEOF(qse_tre_t) + xtn); tre = (qse_tre_t*) QSE_MMGR_ALLOC (mmgr, QSE_SIZEOF(qse_tre_t) + xtnsize);
if (tre == QSE_NULL) return QSE_NULL; if (tre == QSE_NULL) return QSE_NULL;
if (qse_tre_init (tre, mmgr, code) <= -1) if (qse_tre_init (tre, mmgr) <= -1)
{ {
QSE_MMGR_FREE (mmgr, tre); QSE_MMGR_FREE (mmgr, tre);
return QSE_NULL; return QSE_NULL;
@ -56,15 +55,6 @@ void qse_tre_close (qse_tre_t* tre)
qse_tre_fini (tre); qse_tre_fini (tre);
QSE_MMGR_FREE (tre->mmgr, tre); QSE_MMGR_FREE (tre->mmgr, tre);
} }
#endif
/*
tre_regcomp.c - TRE POSIX compatible regex compilation functions.
This software is released under a BSD-style license.
See the file LICENSE for details and copyright.
*/
int qse_tre_init (qse_tre_t* tre, qse_mmgr_t* mmgr) int qse_tre_init (qse_tre_t* tre, qse_mmgr_t* mmgr)
{ {
@ -78,38 +68,48 @@ int qse_tre_init (qse_tre_t* tre, qse_mmgr_t* mmgr)
void qse_tre_fini (qse_tre_t* tre) void qse_tre_fini (qse_tre_t* tre)
{ {
if (tre->value) if (tre->TRE_REGEX_T_FIELD)
{ {
tre_free (tre); tre_free (tre);
tre->value = QSE_NULL; tre->TRE_REGEX_T_FIELD = QSE_NULL;
} }
} }
int qse_tre_compx (
int qse_tre_compx (qse_tre_t* tre, const qse_char_t* regex, qse_size_t n, int cflags) qse_tre_t* tre, const qse_char_t* regex, qse_size_t n,
unsigned int* nsubmat, int cflags)
{ {
int ret; int ret;
if (tre->value) if (tre->TRE_REGEX_T_FIELD)
{ {
tre_free (tre); tre_free (tre);
tre->value = QSE_NULL; tre->TRE_REGEX_T_FIELD = QSE_NULL;
} }
ret = tre_compile (tre, regex, n, cflags); ret = tre_compile (tre, regex, n, cflags);
if (ret > 0) if (ret > 0)
{ {
tre->value = QSE_NULL; /* just to make sure */ tre->TRE_REGEX_T_FIELD = QSE_NULL; /* just to make sure */
tre->errnum = ret; tre->errnum = ret;
return -1; return -1;
} }
if (nsubmat)
{
*nsubmat = ((struct tnfa*)tre->TRE_REGEX_T_FIELD)->num_submatches;
}
return 0; return 0;
} }
int qse_tre_comp (qse_tre_t* tre, const qse_char_t* regex, int cflags) int qse_tre_comp (
qse_tre_t* tre, const qse_char_t* regex,
unsigned int* nsubmat, int cflags)
{ {
return qse_tre_compx (tre, regex, (regex? qse_strlen(regex):0), cflags); return qse_tre_compx (
tre, regex, (regex? qse_strlen(regex):0),
nsubmat, cflags
);
} }
/* Fills the POSIX.2 regmatch_t array according to the TNFA tag and match /* Fills the POSIX.2 regmatch_t array according to the TNFA tag and match
@ -238,11 +238,11 @@ static int tre_match(
int qse_tre_execx ( int qse_tre_execx (
qse_tre_t* tre, const qse_char_t *str, qse_size_t len, qse_tre_t* tre, const qse_char_t *str, qse_size_t len,
qse_size_t nmatch, regmatch_t pmatch[], int eflags) regmatch_t* pmatch, qse_size_t nmatch, int eflags)
{ {
int ret; int ret;
if (tre->value == QSE_NULL) if (tre->TRE_REGEX_T_FIELD == QSE_NULL)
{ {
/* regular expression is bad as none is compiled yet */ /* regular expression is bad as none is compiled yet */
tre->errnum = QSE_TRE_EBADPAT; tre->errnum = QSE_TRE_EBADPAT;
@ -264,9 +264,9 @@ int qse_tre_execx (
int qse_tre_exec ( int qse_tre_exec (
qse_tre_t* tre, const qse_char_t* str, qse_tre_t* tre, const qse_char_t* str,
qse_size_t nmatch, regmatch_t pmatch[], int eflags) regmatch_t* pmatch, qse_size_t nmatch, int eflags)
{ {
return qse_tre_execx (tre, str, (unsigned)-1, nmatch, pmatch, eflags); return qse_tre_execx (tre, str, (qse_size_t)-1, pmatch, nmatch, eflags);
} }
#if 0 #if 0
@ -277,3 +277,33 @@ int qse_tre_execsrc (
return tre_match (preg, str, (unsigned)-1, STR_USER, nmatch, pmatch, eflags); return tre_match (preg, str, (unsigned)-1, STR_USER, nmatch, pmatch, eflags);
} }
#endif #endif
qse_tre_errnum_t qse_tre_geterrnum (qse_tre_t* tre)
{
return tre->errnum;
}
const qse_char_t* qse_tre_geterrmsg (qse_tre_t* tre)
{
static const qse_char_t* errstr[] =
{
QSE_T("no error"),
QSE_T("no sufficient memory available"),
QSE_T("no match"),
QSE_T("invalid regular expression"),
QSE_T("unknown collating element"),
QSE_T("unknown character class name"),
QSE_T("trailing backslash"),
QSE_T("invalid backreference"),
QSE_T("bracket imbalance"),
QSE_T("parenthesis imbalance"),
QSE_T("brace imbalance"),
QSE_T("invalid bracket content"),
QSE_T("invalid use of range operator"),
QSE_T("invalid use of repetition operators")
};
return (tre->errnum >= 0 && tre->errnum < QSE_COUNTOF(errstr))?
errstr[tre->errnum]: QSE_T("unknown error");
}

View File

@ -94,7 +94,7 @@ static qse_htb_walk_t walk_headers (qse_htb_t* htb, qse_htb_pair_t* pair, void*
hwctx->ret = -1; hwctx->ret = -1;
return QSE_HTB_WALK_STOP; return QSE_HTB_WALK_STOP;
} }
return QSE_HTB_WALK_FORWARD; return QSE_HTB_WALK_FORWARD;
} }
int qse_htre_walkheaders ( int qse_htre_walkheaders (

View File

@ -1,5 +1,5 @@
/* /*
* $Id: sed.c 556 2011-08-31 15:43:46Z hyunghwan.chung $ * $Id: sed.c 557 2011-09-01 14:45:06Z hyunghwan.chung $
* *
Copyright 2006-2011 Chung, Hyung-Hwan. Copyright 2006-2011 Chung, Hyung-Hwan.
This file is part of QSE. This file is part of QSE.
@ -261,7 +261,7 @@ static void free_command (qse_sed_t* sed, qse_sed_cmd_t* cmd)
case QSE_SED_CMD_APPEND: case QSE_SED_CMD_APPEND:
case QSE_SED_CMD_INSERT: case QSE_SED_CMD_INSERT:
case QSE_SED_CMD_CHANGE: case QSE_SED_CMD_CHANGE:
if (cmd->u.text.ptr != QSE_NULL) if (cmd->u.text.ptr)
QSE_MMGR_FREE (sed->mmgr, cmd->u.text.ptr); QSE_MMGR_FREE (sed->mmgr, cmd->u.text.ptr);
break; break;
@ -269,27 +269,27 @@ static void free_command (qse_sed_t* sed, qse_sed_cmd_t* cmd)
case QSE_SED_CMD_READ_FILELN: case QSE_SED_CMD_READ_FILELN:
case QSE_SED_CMD_WRITE_FILE: case QSE_SED_CMD_WRITE_FILE:
case QSE_SED_CMD_WRITE_FILELN: case QSE_SED_CMD_WRITE_FILELN:
if (cmd->u.file.ptr != QSE_NULL) if (cmd->u.file.ptr)
QSE_MMGR_FREE (sed->mmgr, cmd->u.file.ptr); QSE_MMGR_FREE (sed->mmgr, cmd->u.file.ptr);
break; break;
case QSE_SED_CMD_BRANCH: case QSE_SED_CMD_BRANCH:
case QSE_SED_CMD_BRANCH_COND: case QSE_SED_CMD_BRANCH_COND:
if (cmd->u.branch.label.ptr != QSE_NULL) if (cmd->u.branch.label.ptr)
QSE_MMGR_FREE (sed->mmgr, cmd->u.branch.label.ptr); QSE_MMGR_FREE (sed->mmgr, cmd->u.branch.label.ptr);
break; break;
case QSE_SED_CMD_SUBSTITUTE: case QSE_SED_CMD_SUBSTITUTE:
if (cmd->u.subst.file.ptr != QSE_NULL) if (cmd->u.subst.file.ptr)
QSE_MMGR_FREE (sed->mmgr, cmd->u.subst.file.ptr); QSE_MMGR_FREE (sed->mmgr, cmd->u.subst.file.ptr);
if (cmd->u.subst.rpl.ptr != QSE_NULL) if (cmd->u.subst.rpl.ptr)
QSE_MMGR_FREE (sed->mmgr, cmd->u.subst.rpl.ptr); QSE_MMGR_FREE (sed->mmgr, cmd->u.subst.rpl.ptr);
if (cmd->u.subst.rex != QSE_NULL) if (cmd->u.subst.rex)
qse_freerex (sed->mmgr, cmd->u.subst.rex); qse_freerex (sed->mmgr, cmd->u.subst.rex);
break; break;
case QSE_SED_CMD_TRANSLATE: case QSE_SED_CMD_TRANSLATE:
if (cmd->u.transet.ptr != QSE_NULL) if (cmd->u.transet.ptr)
QSE_MMGR_FREE (sed->mmgr, cmd->u.transet.ptr); QSE_MMGR_FREE (sed->mmgr, cmd->u.transet.ptr);
break; break;
@ -346,6 +346,39 @@ static void* compile_rex (qse_sed_t* sed, qse_char_t rxend)
} }
} }
#if 0
{
qse_tre_t* tre;
tre = qse_tre_open (sed->mmgr, 0);
if (tre)
{
if (qse_tre_comp (tre,
QSE_STR_PTR(&sed->tmp.rex),
QSE_STR_LEN(&sed->tmp.rex),
QSE_NULL,
QSE_TRE_EXTENDED) <= -1)
{
qse_tre_close (tre);
goto fail:
}
return tre;
}
else
{
SETERR1 (
sed, QSE_SED_EREXBL,
QSE_STR_PTR(&sed->tmp.rex),
QSE_STR_LEN(&sed->tmp.rex),
&sed->src.loc
);
return QSE_NULL;
}
}
#endif
code = qse_buildrex ( code = qse_buildrex (
sed->mmgr, sed->mmgr,
sed->depth.rex.build, sed->depth.rex.build,

View File

@ -40,7 +40,8 @@ qse_rex_setoption (rex, QSE_REX_STRICT);
str.ptr = argv[2]; str.ptr = argv[2];
str.len = qse_strlen(argv[2]); str.len = qse_strlen(argv[2]);
qse_printf (QSE_T("compile ok\n")); qse_printf (QSE_T("compile ok\n"));
n = qse_rex_exec (rex, &str, &str, &matstr); n = qse_rex_exec (rex, &str, &str, &matstr);
if (n <= -1) if (n <= -1)
{ {

View File

@ -1,35 +1,69 @@
#include <qse/cmn/tre.h>
#include <qse/cmn/main.h> #include <qse/cmn/main.h>
#include <qse/cmn/tre.h>
#include <qse/cmn/mem.h>
#include <qse/cmn/misc.h>
#include <qse/cmn/stdio.h> #include <qse/cmn/stdio.h>
static int test_main (int argc, qse_char_t* argv[], qse_char_t* envp[]) static int test_main (int argc, qse_char_t* argv[], qse_char_t* envp[])
{ {
qse_tre_t tre; qse_tre_t tre;
unsigned int nsubmat;
qse_tre_match_t* mat = QSE_NULL;
if (argc != 3)
{
qse_printf (QSE_T("USAGE: %s pattern string\n"),
qse_basename(argv[0]));
return -1;
}
qse_tre_init (&tre, QSE_NULL); qse_tre_init (&tre, QSE_NULL);
if (qse_tre_comp (&tre, argv[1], QSE_TRE_EXTENDED|QSE_TRE_NOSUBREG) <= -1) if (qse_tre_comp (&tre, argv[1], &nsubmat, QSE_TRE_EXTENDED) <= -1)
{ {
qse_printf (QSE_T("Cannot compile pattern [%s] - %d\n"), argv[1], QSE_TRE_ERRNUM(&tre)); qse_printf (QSE_T("ERROR: Cannot compile pattern [%s] - %s\n"), argv[1], qse_tre_geterrmsg(&tre));
goto oops; goto oops;
} }
if (qse_tre_exec(&tre, argv[2], (size_t) 0, NULL, 0) <= -1) if (nsubmat > 0)
{ {
if (QSE_TRE_ERRNUM(&tre) == QSE_TRE_ENOMATCH) qse_printf (QSE_T("no match\n")); mat = QSE_MMGR_ALLOC (qse_tre_getmmgr(&tre), QSE_SIZEOF(*mat) * nsubmat);
else qse_printf (QSE_T("ERROR %d\n"), QSE_TRE_ERRNUM(&tre)); if (mat == QSE_NULL)
goto oops; {
qse_printf (QSE_T("ERROR: Cannot allocate submatch array\n"));
goto oops;
}
}
if (qse_tre_exec(&tre, argv[2], mat, nsubmat, 0) <= -1)
{
if (QSE_TRE_ERRNUM(&tre) == QSE_TRE_ENOMATCH) qse_printf (QSE_T("Match: NO\n"));
else
{
qse_printf (QSE_T("ERROR: Cannot not match pattern - %s\n"), qse_tre_geterrmsg(&tre));
goto oops;
}
} }
else else
{ {
qse_printf (QSE_T("match...\n")); unsigned int i;
qse_printf (QSE_T("Match: YES\n"));
for (i = 0; i < nsubmat; i++)
{
if (mat[i].rm_so == -1) break;
qse_printf (QSE_T("SUBMATCH[%u] = [%.*s]\n"), i,
(int)(mat[i].rm_eo - mat[i].rm_so), &argv[2][mat[i].rm_so]);
}
} }
if (mat) QSE_MMGR_FREE (qse_tre_getmmgr(&tre), mat);
qse_tre_fini (&tre); qse_tre_fini (&tre);
return 0; return 0;
oops: oops:
if (mat) QSE_MMGR_FREE (qse_tre_getmmgr(&tre), mat);
qse_tre_fini (&tre); qse_tre_fini (&tre);
return -1; return -1;
} }