/* * $Id$ * Copyright (c) 2006-2019 Chung, Hyung-Hwan. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. THIS SOFTWARE IS PROVIDED BY THE AUTHOR "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "tre.h" #include "tre-compile.h" #include qse_tre_t* qse_tre_open (qse_mmgr_t* mmgr, qse_size_t xtnsize) { qse_tre_t* tre; tre = (qse_tre_t*) QSE_MMGR_ALLOC (mmgr, QSE_SIZEOF(qse_tre_t) + xtnsize); if (tre == QSE_NULL) return QSE_NULL; if (qse_tre_init (tre, mmgr) <= -1) { QSE_MMGR_FREE (mmgr, tre); return QSE_NULL; } QSE_MEMSET (QSE_XTN(tre), 0, xtnsize); return tre; } void qse_tre_close (qse_tre_t* tre) { qse_tre_fini (tre); QSE_MMGR_FREE (tre->mmgr, tre); } int qse_tre_init (qse_tre_t* tre, qse_mmgr_t* mmgr) { QSE_MEMSET (tre, 0, QSE_SIZEOF(*tre)); tre->mmgr = mmgr; return 0; } void qse_tre_fini (qse_tre_t* tre) { if (tre->TRE_REGEX_T_FIELD) { tre_free (tre); tre->TRE_REGEX_T_FIELD = QSE_NULL; } } qse_mmgr_t* qse_tre_getmmgr (qse_tre_t* tre) { return tre->mmgr; } void* qse_tre_getxtn (qse_tre_t* tre) { return QSE_XTN (tre); } int qse_tre_compx (qse_tre_t* tre, const qse_char_t* regex, qse_size_t n, unsigned int* nsubmat, int cflags) { int ret; if (tre->TRE_REGEX_T_FIELD) { tre_free (tre); tre->TRE_REGEX_T_FIELD = QSE_NULL; } ret = tre_compile(tre, regex, n, cflags); if (ret > 0) { tre->TRE_REGEX_T_FIELD = QSE_NULL; /* just to make sure */ tre->errnum = ret; return -1; } if (nsubmat) { *nsubmat = ((struct tnfa*)tre->TRE_REGEX_T_FIELD)->num_submatches; } return 0; } int qse_tre_comp (qse_tre_t* tre, const qse_char_t* regex, unsigned int* nsubmat, int cflags) { return qse_tre_compx(tre, regex, (regex? qse_strlen(regex):0), nsubmat, cflags); } /* Fills the POSIX.2 regmatch_t array according to the TNFA tag and match endpoint values. */ void tre_fill_pmatch(size_t nmatch, regmatch_t pmatch[], int cflags, const tre_tnfa_t *tnfa, int *tags, int match_eo) { tre_submatch_data_t *submatch_data; unsigned int i, j; int *parents; i = 0; if (match_eo >= 0 && !(cflags & REG_NOSUB)) { /* Construct submatch offsets from the tags. */ DPRINT(("end tag = t%d = %d\n", tnfa->end_tag, match_eo)); submatch_data = tnfa->submatch_data; while (i < tnfa->num_submatches && i < nmatch) { if (submatch_data[i].so_tag == tnfa->end_tag) pmatch[i].rm_so = match_eo; else pmatch[i].rm_so = tags[submatch_data[i].so_tag]; if (submatch_data[i].eo_tag == tnfa->end_tag) pmatch[i].rm_eo = match_eo; else pmatch[i].rm_eo = tags[submatch_data[i].eo_tag]; /* If either of the endpoints were not used, this submatch was not part of the match. */ if (pmatch[i].rm_so == -1 || pmatch[i].rm_eo == -1) pmatch[i].rm_so = pmatch[i].rm_eo = -1; DPRINT(("pmatch[%d] = {t%d = %d, t%d = %d}\n", i, submatch_data[i].so_tag, pmatch[i].rm_so, submatch_data[i].eo_tag, pmatch[i].rm_eo)); i++; } /* Reset all submatches that are not within all of their parent submatches. */ i = 0; while (i < tnfa->num_submatches && i < nmatch) { if (pmatch[i].rm_eo == -1) assert(pmatch[i].rm_so == -1); assert(pmatch[i].rm_so <= pmatch[i].rm_eo); parents = submatch_data[i].parents; if (parents != QSE_NULL) for (j = 0; parents[j] >= 0; j++) { DPRINT(("pmatch[%d] parent %d\n", i, parents[j])); if (pmatch[i].rm_so < pmatch[parents[j]].rm_so || pmatch[i].rm_eo > pmatch[parents[j]].rm_eo) pmatch[i].rm_so = pmatch[i].rm_eo = -1; } i++; } } while (i < nmatch) { pmatch[i].rm_so = -1; pmatch[i].rm_eo = -1; i++; } } /* Wrapper functions for POSIX compatible regexp matching. */ int tre_have_backrefs(const regex_t *preg) { tre_tnfa_t *tnfa = (void *)preg->TRE_REGEX_T_FIELD; return tnfa->have_backrefs; } static int tre_match( const regex_t* preg, const void *string, qse_size_t len, tre_str_type_t type, qse_size_t nmatch, regmatch_t pmatch[], int eflags) { tre_tnfa_t *tnfa = (void *)preg->TRE_REGEX_T_FIELD; reg_errcode_t status; int *tags = QSE_NULL, eo; if (tnfa->num_tags > 0 && nmatch > 0) { tags = xmalloc (preg->mmgr, sizeof(*tags) * tnfa->num_tags); if (tags == QSE_NULL) return REG_ESPACE; } /* Dispatch to the appropriate matcher. */ if (tnfa->have_backrefs || (eflags & REG_BACKTRACKING_MATCHER)) { /* The regex has back references, use the backtracking matcher. */ status = tre_tnfa_run_backtrack ( preg->mmgr, tnfa, string, (int)len, type, tags, eflags, &eo); } else { /* Exact matching, no back references, use the parallel matcher. */ status = tre_tnfa_run_parallel ( preg->mmgr, tnfa, string, (int)len, type, tags, eflags, &eo); } if (status == REG_OK) /* A match was found, so fill the submatch registers. */ tre_fill_pmatch(nmatch, pmatch, tnfa->cflags, tnfa, tags, eo); if (tags) xfree (preg->mmgr, tags); return status; } int qse_tre_execx ( qse_tre_t* tre, const qse_char_t *str, qse_size_t len, regmatch_t* pmatch, qse_size_t nmatch, int eflags) { int ret; if (tre->TRE_REGEX_T_FIELD == QSE_NULL) { /* regular expression is bad as none is compiled yet */ tre->errnum = QSE_TRE_EBADPAT; return -1; } #if defined(QSE_CHAR_IS_WCHAR) ret = tre_match (tre, str, len, STR_WIDE, nmatch, pmatch, eflags); #else ret = tre_match (tre, str, len, STR_BYTE, nmatch, pmatch, eflags); #endif if (ret > 0) { tre->errnum = ret; return -1; } return 0; } int qse_tre_exec ( qse_tre_t* tre, const qse_char_t* str, regmatch_t* pmatch, qse_size_t nmatch, int eflags) { return qse_tre_execx (tre, str, (qse_size_t)-1, pmatch, nmatch, eflags); } qse_tre_errnum_t qse_tre_geterrnum (qse_tre_t* tre) { return tre->errnum; } const qse_char_t* qse_tre_geterrmsg (qse_tre_t* tre) { static const qse_char_t* errstr[] = { QSE_T("no error"), QSE_T("no sufficient memory available"), QSE_T("no match"), QSE_T("invalid regular expression"), QSE_T("unknown collating element"), QSE_T("unknown character class name"), QSE_T("trailing backslash"), QSE_T("invalid backreference"), QSE_T("bracket imbalance"), QSE_T("parenthesis imbalance"), QSE_T("brace imbalance"), QSE_T("invalid bracket content"), QSE_T("invalid use of range operator"), QSE_T("invalid use of repetition operators") }; return (tre->errnum >= 0 && tre->errnum < QSE_COUNTOF(errstr))? errstr[tre->errnum]: QSE_T("unknown error"); }