qse/lib/cmn/tre.c

288 lines
7.4 KiB
C
Raw Normal View History

2011-09-01 09:43:46 +00:00
/*
* $Id$
*
Copyright (c) 2006-2019 Chung, Hyung-Hwan. All rights reserved.
2011-09-01 09:43:46 +00:00
2014-11-19 14:42:24 +00:00
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
2011-09-01 09:43:46 +00:00
2014-11-19 14:42:24 +00:00
THIS SOFTWARE IS PROVIDED BY THE AUTHOR "AS IS" AND ANY EXPRESS OR
IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
2011-09-01 09:43:46 +00:00
*/
#include "tre.h"
#include "tre-compile.h"
#include <qse/cmn/str.h>
qse_tre_t* qse_tre_open (qse_mmgr_t* mmgr, qse_size_t xtnsize)
2011-09-01 09:43:46 +00:00
{
qse_tre_t* tre;
tre = (qse_tre_t*) QSE_MMGR_ALLOC (mmgr, QSE_SIZEOF(qse_tre_t) + xtnsize);
2011-09-01 09:43:46 +00:00
if (tre == QSE_NULL) return QSE_NULL;
if (qse_tre_init (tre, mmgr) <= -1)
2011-09-01 09:43:46 +00:00
{
QSE_MMGR_FREE (mmgr, tre);
return QSE_NULL;
}
2014-07-11 14:17:00 +00:00
QSE_MEMSET (QSE_XTN(tre), 0, xtnsize);
return tre;
2011-09-01 09:43:46 +00:00
}
void qse_tre_close (qse_tre_t* tre)
{
qse_tre_fini (tre);
QSE_MMGR_FREE (tre->mmgr, tre);
}
int qse_tre_init (qse_tre_t* tre, qse_mmgr_t* mmgr)
{
QSE_MEMSET (tre, 0, QSE_SIZEOF(*tre));
tre->mmgr = mmgr;
return 0;
}
void qse_tre_fini (qse_tre_t* tre)
{
if (tre->TRE_REGEX_T_FIELD)
2011-09-01 09:43:46 +00:00
{
tre_free (tre);
tre->TRE_REGEX_T_FIELD = QSE_NULL;
2011-09-01 09:43:46 +00:00
}
}
qse_mmgr_t* qse_tre_getmmgr (qse_tre_t* tre)
{
return tre->mmgr;
}
void* qse_tre_getxtn (qse_tre_t* tre)
{
return QSE_XTN (tre);
}
int qse_tre_compx (qse_tre_t* tre, const qse_char_t* regex, qse_size_t n, unsigned int* nsubmat, int cflags)
2011-09-01 09:43:46 +00:00
{
int ret;
if (tre->TRE_REGEX_T_FIELD)
2011-09-01 09:43:46 +00:00
{
tre_free (tre);
tre->TRE_REGEX_T_FIELD = QSE_NULL;
2011-09-01 09:43:46 +00:00
}
ret = tre_compile(tre, regex, n, cflags);
2011-09-01 09:43:46 +00:00
if (ret > 0)
{
tre->TRE_REGEX_T_FIELD = QSE_NULL; /* just to make sure */
2011-09-01 09:43:46 +00:00
tre->errnum = ret;
return -1;
2011-09-01 09:43:46 +00:00
}
if (nsubmat)
{
*nsubmat = ((struct tnfa*)tre->TRE_REGEX_T_FIELD)->num_submatches;
}
2011-09-01 09:43:46 +00:00
return 0;
}
int qse_tre_comp (qse_tre_t* tre, const qse_char_t* regex, unsigned int* nsubmat, int cflags)
2011-09-01 09:43:46 +00:00
{
return qse_tre_compx(tre, regex, (regex? qse_strlen(regex):0), nsubmat, cflags);
2011-09-01 09:43:46 +00:00
}
/* Fills the POSIX.2 regmatch_t array according to the TNFA tag and match
endpoint values. */
void tre_fill_pmatch(size_t nmatch, regmatch_t pmatch[], int cflags,
const tre_tnfa_t *tnfa, int *tags, int match_eo)
{
tre_submatch_data_t *submatch_data;
unsigned int i, j;
int *parents;
i = 0;
if (match_eo >= 0 && !(cflags & REG_NOSUB))
{
/* Construct submatch offsets from the tags. */
DPRINT(("end tag = t%d = %d\n", tnfa->end_tag, match_eo));
submatch_data = tnfa->submatch_data;
while (i < tnfa->num_submatches && i < nmatch)
{
if (submatch_data[i].so_tag == tnfa->end_tag)
pmatch[i].rm_so = match_eo;
else
pmatch[i].rm_so = tags[submatch_data[i].so_tag];
if (submatch_data[i].eo_tag == tnfa->end_tag)
pmatch[i].rm_eo = match_eo;
else
pmatch[i].rm_eo = tags[submatch_data[i].eo_tag];
/* If either of the endpoints were not used, this submatch
was not part of the match. */
if (pmatch[i].rm_so == -1 || pmatch[i].rm_eo == -1)
pmatch[i].rm_so = pmatch[i].rm_eo = -1;
DPRINT(("pmatch[%d] = {t%d = %d, t%d = %d}\n", i,
submatch_data[i].so_tag, pmatch[i].rm_so,
submatch_data[i].eo_tag, pmatch[i].rm_eo));
i++;
}
/* Reset all submatches that are not within all of their parent
submatches. */
i = 0;
while (i < tnfa->num_submatches && i < nmatch)
{
if (pmatch[i].rm_eo == -1)
assert(pmatch[i].rm_so == -1);
assert(pmatch[i].rm_so <= pmatch[i].rm_eo);
parents = submatch_data[i].parents;
if (parents != QSE_NULL)
for (j = 0; parents[j] >= 0; j++)
{
DPRINT(("pmatch[%d] parent %d\n", i, parents[j]));
if (pmatch[i].rm_so < pmatch[parents[j]].rm_so
|| pmatch[i].rm_eo > pmatch[parents[j]].rm_eo)
pmatch[i].rm_so = pmatch[i].rm_eo = -1;
}
i++;
}
}
while (i < nmatch)
{
pmatch[i].rm_so = -1;
pmatch[i].rm_eo = -1;
i++;
}
}
/*
Wrapper functions for POSIX compatible regexp matching.
*/
int tre_have_backrefs(const regex_t *preg)
{
tre_tnfa_t *tnfa = (void *)preg->TRE_REGEX_T_FIELD;
return tnfa->have_backrefs;
}
static int tre_match(
const regex_t* preg, const void *string, qse_size_t len,
tre_str_type_t type, qse_size_t nmatch, regmatch_t pmatch[],
int eflags)
{
tre_tnfa_t *tnfa = (void *)preg->TRE_REGEX_T_FIELD;
reg_errcode_t status;
int *tags = QSE_NULL, eo;
if (tnfa->num_tags > 0 && nmatch > 0)
{
tags = xmalloc (preg->mmgr, sizeof(*tags) * tnfa->num_tags);
if (tags == QSE_NULL) return REG_ESPACE;
}
/* Dispatch to the appropriate matcher. */
if (tnfa->have_backrefs || (eflags & REG_BACKTRACKING_MATCHER))
2011-09-01 09:43:46 +00:00
{
/* The regex has back references, use the backtracking matcher. */
status = tre_tnfa_run_backtrack (
preg->mmgr, tnfa, string, (int)len, type,
tags, eflags, &eo);
}
else
{
/* Exact matching, no back references, use the parallel matcher. */
status = tre_tnfa_run_parallel (
preg->mmgr, tnfa, string, (int)len, type,
tags, eflags, &eo);
}
if (status == REG_OK)
/* A match was found, so fill the submatch registers. */
tre_fill_pmatch(nmatch, pmatch, tnfa->cflags, tnfa, tags, eo);
if (tags) xfree (preg->mmgr, tags);
return status;
}
int qse_tre_execx (
qse_tre_t* tre, const qse_char_t *str, qse_size_t len,
regmatch_t* pmatch, qse_size_t nmatch, int eflags)
2011-09-01 09:43:46 +00:00
{
int ret;
if (tre->TRE_REGEX_T_FIELD == QSE_NULL)
2011-09-01 09:43:46 +00:00
{
/* regular expression is bad as none is compiled yet */
tre->errnum = QSE_TRE_EBADPAT;
return -1;
}
#if defined(QSE_CHAR_IS_WCHAR)
2011-09-01 09:43:46 +00:00
ret = tre_match (tre, str, len, STR_WIDE, nmatch, pmatch, eflags);
#else
ret = tre_match (tre, str, len, STR_BYTE, nmatch, pmatch, eflags);
#endif
if (ret > 0)
{
tre->errnum = ret;
return -1;
}
return 0;
}
int qse_tre_exec (
qse_tre_t* tre, const qse_char_t* str,
regmatch_t* pmatch, qse_size_t nmatch, int eflags)
2011-09-01 09:43:46 +00:00
{
return qse_tre_execx (tre, str, (qse_size_t)-1, pmatch, nmatch, eflags);
2011-09-01 09:43:46 +00:00
}
qse_tre_errnum_t qse_tre_geterrnum (qse_tre_t* tre)
{
return tre->errnum;
}
const qse_char_t* qse_tre_geterrmsg (qse_tre_t* tre)
{
static const qse_char_t* errstr[] =
{
QSE_T("no error"),
QSE_T("no sufficient memory available"),
QSE_T("no match"),
QSE_T("invalid regular expression"),
QSE_T("unknown collating element"),
QSE_T("unknown character class name"),
QSE_T("trailing backslash"),
QSE_T("invalid backreference"),
QSE_T("bracket imbalance"),
QSE_T("parenthesis imbalance"),
QSE_T("brace imbalance"),
QSE_T("invalid bracket content"),
QSE_T("invalid use of range operator"),
QSE_T("invalid use of repetition operators")
};
return (tre->errnum >= 0 && tre->errnum < QSE_COUNTOF(errstr))?
errstr[tre->errnum]: QSE_T("unknown error");
}