2011-09-01 09:43:46 +00:00
|
|
|
/*
|
|
|
|
* $Id$
|
|
|
|
*
|
2019-06-06 05:28:23 +00:00
|
|
|
Copyright (c) 2006-2019 Chung, Hyung-Hwan. All rights reserved.
|
2011-09-01 09:43:46 +00:00
|
|
|
|
2014-11-19 14:42:24 +00:00
|
|
|
Redistribution and use in source and binary forms, with or without
|
|
|
|
modification, are permitted provided that the following conditions
|
|
|
|
are met:
|
|
|
|
1. Redistributions of source code must retain the above copyright
|
|
|
|
notice, this list of conditions and the following disclaimer.
|
|
|
|
2. Redistributions in binary form must reproduce the above copyright
|
|
|
|
notice, this list of conditions and the following disclaimer in the
|
|
|
|
documentation and/or other materials provided with the distribution.
|
2011-09-01 09:43:46 +00:00
|
|
|
|
2014-11-19 14:42:24 +00:00
|
|
|
THIS SOFTWARE IS PROVIDED BY THE AUTHOR "AS IS" AND ANY EXPRESS OR
|
|
|
|
IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
|
|
|
OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
|
|
|
IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
|
|
|
|
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
|
|
|
|
NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
|
|
|
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
|
|
|
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
|
|
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
|
|
|
|
THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
2011-09-01 09:43:46 +00:00
|
|
|
*/
|
|
|
|
|
|
|
|
#include "tre.h"
|
|
|
|
#include "tre-compile.h"
|
|
|
|
#include <qse/cmn/str.h>
|
|
|
|
|
2011-09-02 08:45:06 +00:00
|
|
|
qse_tre_t* qse_tre_open (qse_mmgr_t* mmgr, qse_size_t xtnsize)
|
2011-09-01 09:43:46 +00:00
|
|
|
{
|
|
|
|
qse_tre_t* tre;
|
|
|
|
|
2011-09-02 08:45:06 +00:00
|
|
|
tre = (qse_tre_t*) QSE_MMGR_ALLOC (mmgr, QSE_SIZEOF(qse_tre_t) + xtnsize);
|
2011-09-01 09:43:46 +00:00
|
|
|
if (tre == QSE_NULL) return QSE_NULL;
|
|
|
|
|
2011-09-02 08:45:06 +00:00
|
|
|
if (qse_tre_init (tre, mmgr) <= -1)
|
2011-09-01 09:43:46 +00:00
|
|
|
{
|
|
|
|
QSE_MMGR_FREE (mmgr, tre);
|
|
|
|
return QSE_NULL;
|
|
|
|
}
|
|
|
|
|
2014-07-11 14:17:00 +00:00
|
|
|
QSE_MEMSET (QSE_XTN(tre), 0, xtnsize);
|
2011-09-03 09:27:44 +00:00
|
|
|
return tre;
|
2011-09-01 09:43:46 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
void qse_tre_close (qse_tre_t* tre)
|
|
|
|
{
|
|
|
|
qse_tre_fini (tre);
|
|
|
|
QSE_MMGR_FREE (tre->mmgr, tre);
|
|
|
|
}
|
|
|
|
|
|
|
|
int qse_tre_init (qse_tre_t* tre, qse_mmgr_t* mmgr)
|
|
|
|
{
|
|
|
|
QSE_MEMSET (tre, 0, QSE_SIZEOF(*tre));
|
|
|
|
tre->mmgr = mmgr;
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
void qse_tre_fini (qse_tre_t* tre)
|
|
|
|
{
|
2011-09-02 08:45:06 +00:00
|
|
|
if (tre->TRE_REGEX_T_FIELD)
|
2011-09-01 09:43:46 +00:00
|
|
|
{
|
|
|
|
tre_free (tre);
|
2011-09-02 08:45:06 +00:00
|
|
|
tre->TRE_REGEX_T_FIELD = QSE_NULL;
|
2011-09-01 09:43:46 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2012-12-01 13:13:13 +00:00
|
|
|
qse_mmgr_t* qse_tre_getmmgr (qse_tre_t* tre)
|
|
|
|
{
|
|
|
|
return tre->mmgr;
|
|
|
|
}
|
|
|
|
|
|
|
|
void* qse_tre_getxtn (qse_tre_t* tre)
|
|
|
|
{
|
|
|
|
return QSE_XTN (tre);
|
|
|
|
}
|
|
|
|
|
2019-04-26 16:55:57 +00:00
|
|
|
int qse_tre_compx (qse_tre_t* tre, const qse_char_t* regex, qse_size_t n, unsigned int* nsubmat, int cflags)
|
2011-09-01 09:43:46 +00:00
|
|
|
{
|
|
|
|
int ret;
|
|
|
|
|
2011-09-02 08:45:06 +00:00
|
|
|
if (tre->TRE_REGEX_T_FIELD)
|
2011-09-01 09:43:46 +00:00
|
|
|
{
|
|
|
|
tre_free (tre);
|
2011-09-02 08:45:06 +00:00
|
|
|
tre->TRE_REGEX_T_FIELD = QSE_NULL;
|
2011-09-01 09:43:46 +00:00
|
|
|
}
|
|
|
|
|
2019-04-26 16:55:57 +00:00
|
|
|
ret = tre_compile(tre, regex, n, cflags);
|
2011-09-01 09:43:46 +00:00
|
|
|
if (ret > 0)
|
|
|
|
{
|
2011-09-02 08:45:06 +00:00
|
|
|
tre->TRE_REGEX_T_FIELD = QSE_NULL; /* just to make sure */
|
2011-09-01 09:43:46 +00:00
|
|
|
tre->errnum = ret;
|
2019-04-26 16:55:57 +00:00
|
|
|
return -1;
|
2011-09-01 09:43:46 +00:00
|
|
|
}
|
|
|
|
|
2011-09-02 08:45:06 +00:00
|
|
|
if (nsubmat)
|
|
|
|
{
|
|
|
|
*nsubmat = ((struct tnfa*)tre->TRE_REGEX_T_FIELD)->num_submatches;
|
|
|
|
}
|
2011-09-01 09:43:46 +00:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2019-04-26 16:55:57 +00:00
|
|
|
int qse_tre_comp (qse_tre_t* tre, const qse_char_t* regex, unsigned int* nsubmat, int cflags)
|
2011-09-01 09:43:46 +00:00
|
|
|
{
|
2019-04-26 16:55:57 +00:00
|
|
|
return qse_tre_compx(tre, regex, (regex? qse_strlen(regex):0), nsubmat, cflags);
|
2011-09-01 09:43:46 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/* Fills the POSIX.2 regmatch_t array according to the TNFA tag and match
|
|
|
|
endpoint values. */
|
|
|
|
void tre_fill_pmatch(size_t nmatch, regmatch_t pmatch[], int cflags,
|
|
|
|
const tre_tnfa_t *tnfa, int *tags, int match_eo)
|
|
|
|
{
|
|
|
|
tre_submatch_data_t *submatch_data;
|
|
|
|
unsigned int i, j;
|
|
|
|
int *parents;
|
|
|
|
|
|
|
|
i = 0;
|
|
|
|
if (match_eo >= 0 && !(cflags & REG_NOSUB))
|
|
|
|
{
|
|
|
|
/* Construct submatch offsets from the tags. */
|
|
|
|
DPRINT(("end tag = t%d = %d\n", tnfa->end_tag, match_eo));
|
|
|
|
submatch_data = tnfa->submatch_data;
|
|
|
|
while (i < tnfa->num_submatches && i < nmatch)
|
|
|
|
{
|
|
|
|
if (submatch_data[i].so_tag == tnfa->end_tag)
|
|
|
|
pmatch[i].rm_so = match_eo;
|
|
|
|
else
|
|
|
|
pmatch[i].rm_so = tags[submatch_data[i].so_tag];
|
|
|
|
|
|
|
|
if (submatch_data[i].eo_tag == tnfa->end_tag)
|
|
|
|
pmatch[i].rm_eo = match_eo;
|
|
|
|
else
|
|
|
|
pmatch[i].rm_eo = tags[submatch_data[i].eo_tag];
|
|
|
|
|
|
|
|
/* If either of the endpoints were not used, this submatch
|
|
|
|
was not part of the match. */
|
|
|
|
if (pmatch[i].rm_so == -1 || pmatch[i].rm_eo == -1)
|
|
|
|
pmatch[i].rm_so = pmatch[i].rm_eo = -1;
|
|
|
|
|
|
|
|
DPRINT(("pmatch[%d] = {t%d = %d, t%d = %d}\n", i,
|
|
|
|
submatch_data[i].so_tag, pmatch[i].rm_so,
|
|
|
|
submatch_data[i].eo_tag, pmatch[i].rm_eo));
|
|
|
|
i++;
|
|
|
|
}
|
|
|
|
/* Reset all submatches that are not within all of their parent
|
|
|
|
submatches. */
|
|
|
|
i = 0;
|
|
|
|
while (i < tnfa->num_submatches && i < nmatch)
|
|
|
|
{
|
|
|
|
if (pmatch[i].rm_eo == -1)
|
|
|
|
assert(pmatch[i].rm_so == -1);
|
|
|
|
assert(pmatch[i].rm_so <= pmatch[i].rm_eo);
|
|
|
|
|
|
|
|
parents = submatch_data[i].parents;
|
|
|
|
if (parents != QSE_NULL)
|
|
|
|
for (j = 0; parents[j] >= 0; j++)
|
|
|
|
{
|
|
|
|
DPRINT(("pmatch[%d] parent %d\n", i, parents[j]));
|
|
|
|
if (pmatch[i].rm_so < pmatch[parents[j]].rm_so
|
|
|
|
|| pmatch[i].rm_eo > pmatch[parents[j]].rm_eo)
|
|
|
|
pmatch[i].rm_so = pmatch[i].rm_eo = -1;
|
|
|
|
}
|
|
|
|
i++;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
while (i < nmatch)
|
|
|
|
{
|
|
|
|
pmatch[i].rm_so = -1;
|
|
|
|
pmatch[i].rm_eo = -1;
|
|
|
|
i++;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
Wrapper functions for POSIX compatible regexp matching.
|
|
|
|
*/
|
|
|
|
|
|
|
|
int tre_have_backrefs(const regex_t *preg)
|
|
|
|
{
|
|
|
|
tre_tnfa_t *tnfa = (void *)preg->TRE_REGEX_T_FIELD;
|
|
|
|
return tnfa->have_backrefs;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int tre_match(
|
|
|
|
const regex_t* preg, const void *string, qse_size_t len,
|
|
|
|
tre_str_type_t type, qse_size_t nmatch, regmatch_t pmatch[],
|
|
|
|
int eflags)
|
|
|
|
{
|
|
|
|
tre_tnfa_t *tnfa = (void *)preg->TRE_REGEX_T_FIELD;
|
|
|
|
reg_errcode_t status;
|
|
|
|
int *tags = QSE_NULL, eo;
|
|
|
|
if (tnfa->num_tags > 0 && nmatch > 0)
|
|
|
|
{
|
|
|
|
tags = xmalloc (preg->mmgr, sizeof(*tags) * tnfa->num_tags);
|
|
|
|
if (tags == QSE_NULL) return REG_ESPACE;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Dispatch to the appropriate matcher. */
|
2013-04-06 13:39:56 +00:00
|
|
|
if (tnfa->have_backrefs || (eflags & REG_BACKTRACKING_MATCHER))
|
2011-09-01 09:43:46 +00:00
|
|
|
{
|
|
|
|
/* The regex has back references, use the backtracking matcher. */
|
|
|
|
status = tre_tnfa_run_backtrack (
|
|
|
|
preg->mmgr, tnfa, string, (int)len, type,
|
|
|
|
tags, eflags, &eo);
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
/* Exact matching, no back references, use the parallel matcher. */
|
|
|
|
status = tre_tnfa_run_parallel (
|
|
|
|
preg->mmgr, tnfa, string, (int)len, type,
|
|
|
|
tags, eflags, &eo);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (status == REG_OK)
|
|
|
|
/* A match was found, so fill the submatch registers. */
|
|
|
|
tre_fill_pmatch(nmatch, pmatch, tnfa->cflags, tnfa, tags, eo);
|
|
|
|
if (tags) xfree (preg->mmgr, tags);
|
|
|
|
return status;
|
|
|
|
}
|
|
|
|
|
|
|
|
int qse_tre_execx (
|
|
|
|
qse_tre_t* tre, const qse_char_t *str, qse_size_t len,
|
2011-09-02 08:45:06 +00:00
|
|
|
regmatch_t* pmatch, qse_size_t nmatch, int eflags)
|
2011-09-01 09:43:46 +00:00
|
|
|
{
|
|
|
|
int ret;
|
|
|
|
|
2011-09-02 08:45:06 +00:00
|
|
|
if (tre->TRE_REGEX_T_FIELD == QSE_NULL)
|
2011-09-01 09:43:46 +00:00
|
|
|
{
|
|
|
|
/* regular expression is bad as none is compiled yet */
|
|
|
|
tre->errnum = QSE_TRE_EBADPAT;
|
|
|
|
return -1;
|
|
|
|
}
|
2014-11-14 02:44:20 +00:00
|
|
|
#if defined(QSE_CHAR_IS_WCHAR)
|
2011-09-01 09:43:46 +00:00
|
|
|
ret = tre_match (tre, str, len, STR_WIDE, nmatch, pmatch, eflags);
|
|
|
|
#else
|
|
|
|
ret = tre_match (tre, str, len, STR_BYTE, nmatch, pmatch, eflags);
|
|
|
|
#endif
|
|
|
|
if (ret > 0)
|
|
|
|
{
|
|
|
|
tre->errnum = ret;
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
int qse_tre_exec (
|
|
|
|
qse_tre_t* tre, const qse_char_t* str,
|
2011-09-02 08:45:06 +00:00
|
|
|
regmatch_t* pmatch, qse_size_t nmatch, int eflags)
|
2011-09-01 09:43:46 +00:00
|
|
|
{
|
2011-09-02 08:45:06 +00:00
|
|
|
return qse_tre_execx (tre, str, (qse_size_t)-1, pmatch, nmatch, eflags);
|
2011-09-01 09:43:46 +00:00
|
|
|
}
|
|
|
|
|
2011-09-02 08:45:06 +00:00
|
|
|
qse_tre_errnum_t qse_tre_geterrnum (qse_tre_t* tre)
|
|
|
|
{
|
|
|
|
return tre->errnum;
|
|
|
|
}
|
|
|
|
|
|
|
|
const qse_char_t* qse_tre_geterrmsg (qse_tre_t* tre)
|
|
|
|
{
|
|
|
|
static const qse_char_t* errstr[] =
|
|
|
|
{
|
|
|
|
QSE_T("no error"),
|
|
|
|
QSE_T("no sufficient memory available"),
|
|
|
|
QSE_T("no match"),
|
|
|
|
QSE_T("invalid regular expression"),
|
|
|
|
QSE_T("unknown collating element"),
|
|
|
|
QSE_T("unknown character class name"),
|
|
|
|
QSE_T("trailing backslash"),
|
|
|
|
QSE_T("invalid backreference"),
|
|
|
|
QSE_T("bracket imbalance"),
|
|
|
|
QSE_T("parenthesis imbalance"),
|
|
|
|
QSE_T("brace imbalance"),
|
|
|
|
QSE_T("invalid bracket content"),
|
|
|
|
QSE_T("invalid use of range operator"),
|
|
|
|
QSE_T("invalid use of repetition operators")
|
|
|
|
};
|
|
|
|
|
|
|
|
return (tre->errnum >= 0 && tre->errnum < QSE_COUNTOF(errstr))?
|
|
|
|
errstr[tre->errnum]: QSE_T("unknown error");
|
|
|
|
}
|
|
|
|
|