hawk/lib/tre.c

290 lines
7.8 KiB
C
Raw Permalink Normal View History

2019-12-13 04:29:58 +00:00
/*
Copyright (c) 2006-2020 Chung, Hyung-Hwan. All rights reserved.
2019-12-13 04:29:58 +00:00
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE AUTHOR "AS IS" AND ANY EXPRESS OR
IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "tre-prv.h"
#include "tre-compile.h"
2019-12-18 08:34:44 +00:00
hawk_tre_t* hawk_tre_open (hawk_gem_t* gem, hawk_oow_t xtnsize)
2019-12-13 04:29:58 +00:00
{
hawk_tre_t* tre;
2019-12-18 08:34:44 +00:00
tre = (hawk_tre_t*)hawk_gem_allocmem(gem, HAWK_SIZEOF(hawk_tre_t) + xtnsize);
2019-12-13 04:29:58 +00:00
if (!tre) return HAWK_NULL;
2019-12-18 08:34:44 +00:00
if (hawk_tre_init(tre, gem) <= -1)
2019-12-13 04:29:58 +00:00
{
2019-12-18 08:34:44 +00:00
hawk_gem_freemem (gem, tre);
2019-12-13 04:29:58 +00:00
return HAWK_NULL;
}
HAWK_MEMSET (tre + 1, 0, xtnsize);
return tre;
}
void hawk_tre_close (hawk_tre_t* tre)
{
hawk_tre_fini (tre);
2019-12-18 08:34:44 +00:00
hawk_gem_freemem (tre->gem, tre);
2019-12-13 04:29:58 +00:00
}
2019-12-18 08:34:44 +00:00
int hawk_tre_init (hawk_tre_t* tre, hawk_gem_t* gem)
2019-12-13 04:29:58 +00:00
{
HAWK_MEMSET (tre, 0, HAWK_SIZEOF(*tre));
2019-12-18 08:34:44 +00:00
tre->gem = gem;
2019-12-13 04:29:58 +00:00
return 0;
}
void hawk_tre_fini (hawk_tre_t* tre)
{
2024-05-02 13:47:30 +00:00
if (tre->TRE_REGEX_T_FIELD)
2019-12-13 04:29:58 +00:00
{
tre_free (tre);
tre->TRE_REGEX_T_FIELD = HAWK_NULL;
}
}
int hawk_tre_compx (hawk_tre_t* tre, const hawk_ooch_t* regex, hawk_oow_t n, unsigned int* nsubmat, int cflags)
{
int ret;
2024-05-02 13:47:30 +00:00
if (tre->TRE_REGEX_T_FIELD)
2019-12-13 04:29:58 +00:00
{
tre_free (tre);
tre->TRE_REGEX_T_FIELD = HAWK_NULL;
}
ret = tre_compile(tre, regex, n, cflags);
2024-05-02 13:47:30 +00:00
if (ret > 0)
2019-12-13 04:29:58 +00:00
{
tre->TRE_REGEX_T_FIELD = HAWK_NULL; /* just to make sure */
2019-12-18 15:28:31 +00:00
hawk_gem_seterrnum (tre->gem, HAWK_NULL, ret);
2019-12-13 04:29:58 +00:00
return -1;
}
2024-05-02 13:47:30 +00:00
if (nsubmat)
2019-12-13 04:29:58 +00:00
{
*nsubmat = ((struct tnfa*)tre->TRE_REGEX_T_FIELD)->num_submatches;
}
return 0;
}
int hawk_tre_comp (hawk_tre_t* tre, const hawk_ooch_t* regex, unsigned int* nsubmat, int cflags)
{
return hawk_tre_compx(tre, regex, (regex? hawk_count_oocstr(regex):0), nsubmat, cflags);
}
/* Fills the POSIX.2 regmatch_t array according to the TNFA tag and match
endpoint values. */
void tre_fill_pmatch (size_t nmatch, regmatch_t pmatch[], int cflags,
const tre_tnfa_t *tnfa, int *tags, int match_eo)
2019-12-13 04:29:58 +00:00
{
tre_submatch_data_t *submatch_data;
unsigned int i, j;
int *parents;
i = 0;
if (match_eo >= 0 && !(cflags & REG_NOSUB))
{
/* Construct submatch offsets from the tags. */
DPRINT(("end tag = t%d = %d\n", tnfa->end_tag, match_eo));
submatch_data = tnfa->submatch_data;
while (i < tnfa->num_submatches && i < nmatch)
{
if (submatch_data[i].so_tag == tnfa->end_tag)
pmatch[i].rm_so = match_eo;
else
pmatch[i].rm_so = tags[submatch_data[i].so_tag];
if (submatch_data[i].eo_tag == tnfa->end_tag)
pmatch[i].rm_eo = match_eo;
else
pmatch[i].rm_eo = tags[submatch_data[i].eo_tag];
/* If either of the endpoints were not used, this submatch
was not part of the match. */
if (pmatch[i].rm_so == -1 || pmatch[i].rm_eo == -1)
pmatch[i].rm_so = pmatch[i].rm_eo = -1;
DPRINT(("pmatch[%d] = {t%d = %d, t%d = %d}\n", i,
submatch_data[i].so_tag, pmatch[i].rm_so,
submatch_data[i].eo_tag, pmatch[i].rm_eo));
i++;
}
/* Reset all submatches that are not within all of their parent
submatches. */
i = 0;
while (i < tnfa->num_submatches && i < nmatch)
{
if (pmatch[i].rm_eo == -1)
assert(pmatch[i].rm_so == -1);
assert(pmatch[i].rm_so <= pmatch[i].rm_eo);
parents = submatch_data[i].parents;
if (parents != HAWK_NULL)
for (j = 0; parents[j] >= 0; j++)
{
DPRINT(("pmatch[%d] parent %d\n", i, parents[j]));
if (pmatch[i].rm_so < pmatch[parents[j]].rm_so
|| pmatch[i].rm_eo > pmatch[parents[j]].rm_eo)
pmatch[i].rm_so = pmatch[i].rm_eo = -1;
}
i++;
}
}
while (i < nmatch)
{
pmatch[i].rm_so = -1;
pmatch[i].rm_eo = -1;
i++;
}
}
/*
Wrapper functions for POSIX compatible regexp matching.
*/
int tre_have_backrefs (const regex_t *preg)
2019-12-13 04:29:58 +00:00
{
tre_tnfa_t *tnfa = (void *)preg->TRE_REGEX_T_FIELD;
return tnfa->have_backrefs;
}
static int tre_match (
2019-12-13 04:29:58 +00:00
const regex_t* preg, const void *string, hawk_oow_t len,
tre_str_type_t type, hawk_oow_t nmatch, regmatch_t pmatch[],
int eflags)
{
tre_tnfa_t *tnfa = (void *)preg->TRE_REGEX_T_FIELD;
reg_errcode_t status;
int *tags = HAWK_NULL, eo;
if (tnfa->num_tags > 0 && nmatch > 0)
{
tags = xmalloc(preg->gem, sizeof(*tags) * tnfa->num_tags);
2019-12-13 04:29:58 +00:00
if (tags == HAWK_NULL) return REG_ESPACE;
}
/* Dispatch to the appropriate matcher. */
if (tnfa->have_backrefs || (eflags & REG_BACKTRACKING_MATCHER))
{
/* The regex has back references, use the backtracking matcher. */
status = tre_tnfa_run_backtrack (
2019-12-18 08:34:44 +00:00
preg->gem, tnfa, string, (int)len, type,
2019-12-13 04:29:58 +00:00
tags, eflags, &eo);
}
else
{
/* Exact matching, no back references, use the parallel matcher. */
status = tre_tnfa_run_parallel (
2019-12-18 08:34:44 +00:00
preg->gem, tnfa, string, (int)len, type,
2019-12-13 04:29:58 +00:00
tags, eflags, &eo);
}
if (status == REG_OK)
/* A match was found, so fill the submatch registers. */
tre_fill_pmatch(nmatch, pmatch, tnfa->cflags, tnfa, tags, eo);
2019-12-18 08:34:44 +00:00
if (tags) xfree (preg->gem, tags);
2019-12-13 04:29:58 +00:00
return status;
}
int hawk_tre_execx (
hawk_tre_t* tre, const hawk_ooch_t *str, hawk_oow_t len,
regmatch_t* pmatch, hawk_oow_t nmatch, int eflags, hawk_gem_t* errgem)
2019-12-13 04:29:58 +00:00
{
int ret;
if (tre->TRE_REGEX_T_FIELD == HAWK_NULL)
{
/* regular expression is bad as none is compiled yet */
hawk_gem_seterrnum ((errgem? errgem: tre->gem), HAWK_NULL, HAWK_EREXBADPAT);
2019-12-13 04:29:58 +00:00
return -1;
}
#if defined(HAWK_OOCH_IS_UCH)
ret = tre_match(tre, str, len, STR_WIDE, nmatch, pmatch, eflags);
2019-12-13 04:29:58 +00:00
#else
ret = tre_match(tre, str, len, STR_BYTE, nmatch, pmatch, eflags);
2019-12-13 04:29:58 +00:00
#endif
2024-05-02 13:47:30 +00:00
if (ret > 0)
2019-12-13 04:29:58 +00:00
{
hawk_gem_seterrnum ((errgem? errgem: tre->gem), HAWK_NULL, ret);
2019-12-18 08:34:44 +00:00
return -1;
2019-12-13 04:29:58 +00:00
}
2024-05-02 13:47:30 +00:00
2019-12-13 04:29:58 +00:00
return 0;
}
2019-12-13 04:29:58 +00:00
int hawk_tre_exec (
hawk_tre_t* tre, const hawk_ooch_t* str,
regmatch_t* pmatch, hawk_oow_t nmatch, int eflags, hawk_gem_t* errgem)
2019-12-13 04:29:58 +00:00
{
return hawk_tre_execx(tre, str, (hawk_oow_t)-1, pmatch, nmatch, eflags, errgem);
2019-12-13 04:29:58 +00:00
}
int hawk_tre_execuchars (
hawk_tre_t* tre, const hawk_uch_t* str, hawk_oow_t len,
regmatch_t* pmatch, hawk_oow_t nmatch, int eflags, hawk_gem_t* errgem)
{
int ret;
if (tre->TRE_REGEX_T_FIELD == HAWK_NULL)
{
/* regular expression is bad as none is compiled yet */
hawk_gem_seterrnum ((errgem? errgem: tre->gem), HAWK_NULL, HAWK_EREXBADPAT);
return -1;
}
ret = tre_match(tre, str, len, STR_WIDE, nmatch, pmatch, eflags);
2024-05-02 13:47:30 +00:00
if (ret > 0)
{
hawk_gem_seterrnum ((errgem? errgem: tre->gem), HAWK_NULL, ret);
return -1;
}
2024-05-02 13:47:30 +00:00
return 0;
}
int hawk_tre_execbchars (
hawk_tre_t* tre, const hawk_bch_t* str, hawk_oow_t len,
regmatch_t* pmatch, hawk_oow_t nmatch, int eflags, hawk_gem_t* errgem)
{
int ret;
if (tre->TRE_REGEX_T_FIELD == HAWK_NULL)
{
/* regular expression is bad as none is compiled yet */
hawk_gem_seterrnum ((errgem? errgem: tre->gem), HAWK_NULL, HAWK_EREXBADPAT);
return -1;
}
ret = tre_match(tre, str, len, STR_BYTE, nmatch, pmatch, eflags);
2024-05-02 13:47:30 +00:00
if (ret > 0)
{
hawk_gem_seterrnum ((errgem? errgem: tre->gem), HAWK_NULL, ret);
return -1;
}
2024-05-02 13:47:30 +00:00
return 0;
}