hawk/lib/misc.c

259 lines
7.2 KiB
C
Raw Normal View History

2019-12-13 04:29:58 +00:00
/*
Copyright (c) 2006-2020 Chung, Hyung-Hwan. All rights reserved.
2019-12-13 04:29:58 +00:00
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE AUTHOR "AS IS" AND ANY EXPRESS OR
IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "hawk-prv.h"
#undef char_t
#undef xcs_t
#undef is_xch_space
2020-12-18 17:16:58 +00:00
#undef to_xch_space
#undef match_rex_with_xcs
#undef split_xchars_to_fields
#undef tokenize_xchars
#undef tokenize_xchars_by_rex
#define char_t hawk_bch_t
#define xcs_t hawk_bcs_t
#define is_xch_space hawk_is_bch_space
2020-12-18 17:16:58 +00:00
#define to_xch_upper hawk_to_bch_upper
#define match_rex_with_xcs hawk_rtx_matchrexwithbcs
#define split_xchars_to_fields hawk_rtx_fldbchars
#define tokenize_xchars hawk_rtx_tokbcharswithbchars
#define tokenize_xchars_by_rex hawk_rtx_tokbcharsbyrex
#include "misc-imp.h"
#undef char_t
#undef xcs_t
#undef is_xch_space
2020-12-18 17:16:58 +00:00
#undef to_xch_upper
#undef match_rex_with_xcs
#undef split_xchars_to_fields
#undef tokenize_xchars
#undef tokenize_xchars_by_rex
#define char_t hawk_uch_t
#define xcs_t hawk_ucs_t
#define is_xch_space hawk_is_uch_space
2020-12-18 17:16:58 +00:00
#define to_xch_upper hawk_to_uch_upper
#define match_rex_with_xcs hawk_rtx_matchrexwithucs
#define split_xchars_to_fields hawk_rtx_flduchars
#define tokenize_xchars hawk_rtx_tokucharswithuchars
#define tokenize_xchars_by_rex hawk_rtx_tokucharsbyrex
2019-12-13 04:29:58 +00:00
#include "misc-imp.h"
2019-12-13 04:29:58 +00:00
static int matchtre_ucs (hawk_tre_t* tre, int opt, const hawk_ucs_t* str, hawk_ucs_t* mat, hawk_ucs_t submat[9], hawk_gem_t* errgem)
{
int n;
/*hawk_tre_match_t match[10] = { { 0, 0 }, };*/
hawk_tre_match_t match[10];
HAWK_MEMSET (match, 0, HAWK_SIZEOF(match));
n = hawk_tre_execuchars(tre, str->ptr, str->len, match, HAWK_COUNTOF(match), opt, errgem);
if (n <= -1)
{
if (hawk_gem_geterrnum(errgem) == HAWK_EREXNOMAT) return 0;
return -1;
}
HAWK_ASSERT (match[0].rm_so != -1);
if (mat)
{
mat->ptr = &str->ptr[match[0].rm_so];
mat->len = match[0].rm_eo - match[0].rm_so;
}
if (submat)
{
int i;
2024-05-02 13:47:30 +00:00
/* you must intialize submat before you pass into this
* function because it can abort filling */
for (i = 1; i < HAWK_COUNTOF(match); i++)
{
2024-05-02 13:47:30 +00:00
if (match[i].rm_so != -1)
{
submat[i-1].ptr = &str->ptr[match[i].rm_so];
submat[i-1].len = match[i].rm_eo - match[i].rm_so;
}
}
}
return 1;
}
static int matchtre_bcs (hawk_tre_t* tre, int opt, const hawk_bcs_t* str, hawk_bcs_t* mat, hawk_bcs_t submat[9], hawk_gem_t* errgem)
{
int n;
/*hawk_tre_match_t match[10] = { { 0, 0 }, };*/
hawk_tre_match_t match[10];
HAWK_MEMSET (match, 0, HAWK_SIZEOF(match));
n = hawk_tre_execbchars(tre, str->ptr, str->len, match, HAWK_COUNTOF(match), opt, errgem);
if (n <= -1)
{
if (hawk_gem_geterrnum(errgem) == HAWK_EREXNOMAT) return 0;
return -1;
}
HAWK_ASSERT (match[0].rm_so != -1);
if (mat)
{
mat->ptr = &str->ptr[match[0].rm_so];
mat->len = match[0].rm_eo - match[0].rm_so;
}
if (submat)
{
int i;
2024-05-02 13:47:30 +00:00
/* you must intialize submat before you pass into this
* function because it can abort filling */
for (i = 1; i < HAWK_COUNTOF(match); i++)
{
2024-05-02 13:47:30 +00:00
if (match[i].rm_so != -1)
{
submat[i-1].ptr = &str->ptr[match[i].rm_so];
submat[i-1].len = match[i].rm_eo - match[i].rm_so;
}
}
}
return 1;
}
int hawk_rtx_matchvalwithucs (hawk_rtx_t* rtx, hawk_val_t* val, const hawk_ucs_t* str, const hawk_ucs_t* substr, hawk_ucs_t* match, hawk_ucs_t submat[9])
2019-12-13 04:29:58 +00:00
{
2019-12-18 15:28:31 +00:00
int ignorecase, x;
int opt = HAWK_TRE_BACKTRACKING; /* TODO: option... HAWK_TRE_BACKTRACKING ??? */
hawk_tre_t* code;
hawk_val_type_t v_type;
2019-12-13 04:29:58 +00:00
2019-12-18 15:28:31 +00:00
ignorecase = rtx->gbl.ignorecase;
2019-12-13 04:29:58 +00:00
v_type = HAWK_RTX_GETVALTYPE(rtx, val);
if (v_type == HAWK_VAL_REX)
2019-12-13 04:29:58 +00:00
{
2019-12-18 15:28:31 +00:00
code = ((hawk_val_rex_t*)val)->code[ignorecase];
2019-12-13 04:29:58 +00:00
}
2024-05-02 13:47:30 +00:00
else
2019-12-13 04:29:58 +00:00
{
/* convert to a string and build a regular expression */
hawk_oocs_t tmp;
tmp.ptr = hawk_rtx_getvaloocstr(rtx, val, &tmp.len);
2019-12-13 04:29:58 +00:00
if (tmp.ptr == HAWK_NULL) return -1;
x = ignorecase? hawk_rtx_buildrex(rtx, tmp.ptr, tmp.len, HAWK_NULL, &code):
hawk_rtx_buildrex(rtx, tmp.ptr, tmp.len, &code, HAWK_NULL);
2019-12-13 04:29:58 +00:00
hawk_rtx_freevaloocstr (rtx, val, tmp.ptr);
if (x <= -1) return -1;
2019-12-13 04:29:58 +00:00
}
x = matchtre_ucs(
code, ((str->ptr == substr->ptr)? opt: (opt | HAWK_TRE_NOTBOL)),
substr, match, submat, hawk_rtx_getgem(rtx)
);
2024-05-02 13:47:30 +00:00
if (v_type == HAWK_VAL_REX)
{
/* nothing to free */
}
else
{
hawk_tre_close (code);
}
return x;
}
int hawk_rtx_matchvalwithbcs (hawk_rtx_t* rtx, hawk_val_t* val, const hawk_bcs_t* str, const hawk_bcs_t* substr, hawk_bcs_t* match, hawk_bcs_t submat[9])
{
int ignorecase, x;
int opt = HAWK_TRE_BACKTRACKING; /* TODO: option... HAWK_TRE_BACKTRACKING ??? */
hawk_tre_t* code;
hawk_val_type_t v_type;
ignorecase = rtx->gbl.ignorecase;
v_type = HAWK_RTX_GETVALTYPE(rtx, val);
if (v_type == HAWK_VAL_REX)
{
code = ((hawk_val_rex_t*)val)->code[ignorecase];
}
2024-05-02 13:47:30 +00:00
else
{
/* convert to a string and build a regular expression */
hawk_oocs_t tmp;
tmp.ptr = hawk_rtx_getvaloocstr(rtx, val, &tmp.len);
if (HAWK_UNLIKELY(!tmp.ptr)) return -1;
x = ignorecase? hawk_rtx_buildrex(rtx, tmp.ptr, tmp.len, HAWK_NULL, &code):
hawk_rtx_buildrex(rtx, tmp.ptr, tmp.len, &code, HAWK_NULL);
hawk_rtx_freevaloocstr (rtx, val, tmp.ptr);
if (x <= -1) return -1;
}
x = matchtre_bcs(
code, ((str->ptr == substr->ptr)? opt: (opt | HAWK_TRE_NOTBOL)),
substr, match, submat, hawk_rtx_getgem(rtx)
2019-12-13 04:29:58 +00:00
);
2024-05-02 13:47:30 +00:00
if (v_type == HAWK_VAL_REX)
2019-12-13 04:29:58 +00:00
{
/* nothing to free */
}
else
{
hawk_tre_close (code);
2019-12-13 04:29:58 +00:00
}
return x;
}
int hawk_rtx_matchrexwithucs (hawk_rtx_t* rtx, hawk_tre_t* code, const hawk_ucs_t* str, const hawk_ucs_t* substr, hawk_ucs_t* match, hawk_ucs_t submat[9])
{
int opt = HAWK_TRE_BACKTRACKING; /* TODO: option... HAWK_TRE_BACKTRACKING or others??? */
return matchtre_ucs(
code, ((str->ptr == substr->ptr)? opt: (opt | HAWK_TRE_NOTBOL)),
substr, match, submat, hawk_rtx_getgem(rtx)
);
}
int hawk_rtx_matchrexwithbcs (hawk_rtx_t* rtx, hawk_tre_t* code, const hawk_bcs_t* str, const hawk_bcs_t* substr, hawk_bcs_t* match, hawk_bcs_t submat[9])
{
int opt = HAWK_TRE_BACKTRACKING; /* TODO: option... HAWK_TRE_BACKTRACKING or others??? */
return matchtre_bcs(
code, ((str->ptr == substr->ptr)? opt: (opt | HAWK_TRE_NOTBOL)),
substr, match, submat, hawk_rtx_getgem(rtx)
);
}