266 lines
		
	
	
		
			7.3 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
			
		
		
	
	
			266 lines
		
	
	
		
			7.3 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
| /*
 | |
|     Copyright (c) 2006-2020 Chung, Hyung-Hwan. All rights reserved.
 | |
| 
 | |
|     Redistribution and use in source and binary forms, with or without
 | |
|     modification, are permitted provided that the following conditions
 | |
|     are met:
 | |
|     1. Redistributions of source code must retain the above copyright
 | |
|        notice, this list of conditions and the following disclaimer.
 | |
|     2. Redistributions in binary form must reproduce the above copyright
 | |
|        notice, this list of conditions and the following disclaimer in the
 | |
|        documentation and/or other materials provided with the distribution.
 | |
| 
 | |
|     THIS SOFTWARE IS PROVIDED BY THE AUTHOR "AS IS" AND ANY EXPRESS OR
 | |
|     IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 | |
|     OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
 | |
|     IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
 | |
|     INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
 | |
|     NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 | |
|     DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 | |
|     THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 | |
|     (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
 | |
|     THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 | |
|  */
 | |
| 
 | |
| #include "hawk-prv.h"
 | |
| 
 | |
| 
 | |
| #undef char_t
 | |
| #undef xcs_t
 | |
| #undef is_xch_space
 | |
| #undef to_xch_space
 | |
| #undef match_rex_with_xcs
 | |
| #undef split_xchars_to_fields
 | |
| #undef tokenize_xchars
 | |
| #undef tokenize_xchars_by_rex
 | |
| 
 | |
| #define char_t hawk_bch_t
 | |
| #define xcs_t hawk_bcs_t
 | |
| #define is_xch_space hawk_is_bch_space
 | |
| #define to_xch_upper hawk_to_bch_upper
 | |
| #define match_rex_with_xcs hawk_rtx_matchrexwithbcs
 | |
| 
 | |
| #define split_xchars_to_fields hawk_rtx_fldbchars
 | |
| #define tokenize_xchars hawk_rtx_tokbcharswithbchars
 | |
| #define tokenize_xchars_by_rex hawk_rtx_tokbcharsbyrex
 | |
| 
 | |
| #include "misc-imp.h"
 | |
| 
 | |
| #undef char_t
 | |
| #undef xcs_t
 | |
| #undef is_xch_space
 | |
| #undef to_xch_upper
 | |
| #undef match_rex_with_xcs
 | |
| #undef split_xchars_to_fields
 | |
| #undef tokenize_xchars
 | |
| #undef tokenize_xchars_by_rex
 | |
| 
 | |
| #define char_t hawk_uch_t
 | |
| #define xcs_t hawk_ucs_t
 | |
| #define is_xch_space hawk_is_uch_space
 | |
| #define to_xch_upper hawk_to_uch_upper
 | |
| #define match_rex_with_xcs hawk_rtx_matchrexwithucs
 | |
| 
 | |
| #define split_xchars_to_fields hawk_rtx_flduchars
 | |
| #define tokenize_xchars hawk_rtx_tokucharswithuchars
 | |
| #define tokenize_xchars_by_rex hawk_rtx_tokucharsbyrex
 | |
| 
 | |
| #include "misc-imp.h"
 | |
| 
 | |
| 
 | |
| static int matchtre_ucs (hawk_tre_t* tre, int opt, const hawk_ucs_t* str, hawk_ucs_t* mat, hawk_ucs_t submat[9], hawk_gem_t* errgem)
 | |
| {
 | |
| 	int n;
 | |
| 	/*hawk_tre_match_t match[10] = { { 0, 0 }, };*/
 | |
| 	hawk_tre_match_t match[10];
 | |
| 
 | |
| 	HAWK_MEMSET (match, 0, HAWK_SIZEOF(match));
 | |
| 	n = hawk_tre_execuchars(tre, str->ptr, str->len, match, HAWK_COUNTOF(match), opt, errgem);
 | |
| 	if (n <= -1)
 | |
| 	{
 | |
| 		if (hawk_gem_geterrnum(errgem) == HAWK_EREXNOMAT) return 0;
 | |
| 		return -1;
 | |
| 	}
 | |
| 
 | |
| 	HAWK_ASSERT (match[0].rm_so != -1);
 | |
| 	if (mat)
 | |
| 	{
 | |
| 		mat->ptr = &str->ptr[match[0].rm_so];
 | |
| 		mat->len = match[0].rm_eo - match[0].rm_so;
 | |
| 	}
 | |
| 
 | |
| 	if (submat)
 | |
| 	{
 | |
| 		int i;
 | |
| 
 | |
| 		/* you must intialize submat before you pass into this
 | |
| 		 * function because it can abort filling */
 | |
| 		for (i = 1; i < HAWK_COUNTOF(match); i++)
 | |
| 		{
 | |
| 			if (match[i].rm_so != -1)
 | |
| 			{
 | |
| 				submat[i - 1].ptr = &str->ptr[match[i].rm_so];
 | |
| 				submat[i - 1].len = match[i].rm_eo - match[i].rm_so;
 | |
| 			}
 | |
| 			else
 | |
| 			{
 | |
| 				submat[i - 1].ptr = HAWK_NULL;
 | |
| 				submat[i - 1].len = 0;
 | |
| 			}
 | |
| 		}
 | |
| 	}
 | |
| 	return 1;
 | |
| }
 | |
| 
 | |
| static int matchtre_bcs (hawk_tre_t* tre, int opt, const hawk_bcs_t* str, hawk_bcs_t* mat, hawk_bcs_t submat[9], hawk_gem_t* errgem)
 | |
| {
 | |
| 	int n;
 | |
| 	/*hawk_tre_match_t match[10] = { { 0, 0 }, };*/
 | |
| 	hawk_tre_match_t match[10];
 | |
| 
 | |
| 	HAWK_MEMSET (match, 0, HAWK_SIZEOF(match));
 | |
| 	n = hawk_tre_execbchars(tre, str->ptr, str->len, match, HAWK_COUNTOF(match), opt, errgem);
 | |
| 	if (n <= -1)
 | |
| 	{
 | |
| 		if (hawk_gem_geterrnum(errgem) == HAWK_EREXNOMAT) return 0;
 | |
| 		return -1;
 | |
| 	}
 | |
| 
 | |
| 	HAWK_ASSERT (match[0].rm_so != -1);
 | |
| 	if (mat)
 | |
| 	{
 | |
| 		mat->ptr = &str->ptr[match[0].rm_so];
 | |
| 		mat->len = match[0].rm_eo - match[0].rm_so;
 | |
| 	}
 | |
| 
 | |
| 	if (submat)
 | |
| 	{
 | |
| 		int i;
 | |
| 
 | |
| 		for (i = 1; i < HAWK_COUNTOF(match); i++)
 | |
| 		{
 | |
| 			if (match[i].rm_so != -1)
 | |
| 			{
 | |
| 				submat[i - 1].ptr = &str->ptr[match[i].rm_so];
 | |
| 				submat[i - 1].len = match[i].rm_eo - match[i].rm_so;
 | |
| 			}
 | |
| 			else
 | |
| 			{
 | |
| 				submat[i - 1].ptr = HAWK_NULL;
 | |
| 				submat[i - 1].len = 0;
 | |
| 			}
 | |
| 		}
 | |
| 	}
 | |
| 	return 1;
 | |
| }
 | |
| 
 | |
| int hawk_rtx_matchvalwithucs (hawk_rtx_t* rtx, hawk_val_t* val, const hawk_ucs_t* str, const hawk_ucs_t* substr, hawk_ucs_t* match, hawk_ucs_t submat[9])
 | |
| {
 | |
| 	int ignorecase, x;
 | |
| 	int opt = HAWK_TRE_BACKTRACKING; /* TODO: option... HAWK_TRE_BACKTRACKING ??? */
 | |
| 	hawk_tre_t* code;
 | |
| 	hawk_val_type_t v_type;
 | |
| 
 | |
| 	ignorecase = rtx->gbl.ignorecase;
 | |
| 
 | |
| 	v_type = HAWK_RTX_GETVALTYPE(rtx, val);
 | |
| 	if (v_type == HAWK_VAL_REX)
 | |
| 	{
 | |
| 		code = ((hawk_val_rex_t*)val)->code[ignorecase];
 | |
| 	}
 | |
| 	else
 | |
| 	{
 | |
| 		/* convert to a string and build a regular expression */
 | |
| 		hawk_oocs_t tmp;
 | |
| 
 | |
| 		tmp.ptr = hawk_rtx_getvaloocstr(rtx, val, &tmp.len);
 | |
| 		if (tmp.ptr == HAWK_NULL) return -1;
 | |
| 
 | |
| 		x = ignorecase? hawk_rtx_buildrex(rtx, tmp.ptr, tmp.len, HAWK_NULL, &code):
 | |
| 		                hawk_rtx_buildrex(rtx, tmp.ptr, tmp.len, &code, HAWK_NULL);
 | |
| 		hawk_rtx_freevaloocstr (rtx, val, tmp.ptr);
 | |
| 		if (x <= -1) return -1;
 | |
| 	}
 | |
| 
 | |
| 	x = matchtre_ucs(
 | |
| 		code, ((str->ptr == substr->ptr)? opt: (opt | HAWK_TRE_NOTBOL)),
 | |
| 		substr, match, submat, hawk_rtx_getgem(rtx)
 | |
| 	);
 | |
| 
 | |
| 	if (v_type == HAWK_VAL_REX)
 | |
| 	{
 | |
| 		/* nothing to free */
 | |
| 	}
 | |
| 	else
 | |
| 	{
 | |
| 		hawk_tre_close (code);
 | |
| 	}
 | |
| 
 | |
| 	return x;
 | |
| }
 | |
| 
 | |
| 
 | |
| int hawk_rtx_matchvalwithbcs (hawk_rtx_t* rtx, hawk_val_t* val, const hawk_bcs_t* str, const hawk_bcs_t* substr, hawk_bcs_t* match, hawk_bcs_t submat[9])
 | |
| {
 | |
| 	int ignorecase, x;
 | |
| 	int opt = HAWK_TRE_BACKTRACKING; /* TODO: option... HAWK_TRE_BACKTRACKING ??? */
 | |
| 	hawk_tre_t* code;
 | |
| 	hawk_val_type_t v_type;
 | |
| 
 | |
| 	ignorecase = rtx->gbl.ignorecase;
 | |
| 
 | |
| 	v_type = HAWK_RTX_GETVALTYPE(rtx, val);
 | |
| 	if (v_type == HAWK_VAL_REX)
 | |
| 	{
 | |
| 		code = ((hawk_val_rex_t*)val)->code[ignorecase];
 | |
| 	}
 | |
| 	else
 | |
| 	{
 | |
| 		/* convert to a string and build a regular expression */
 | |
| 		hawk_oocs_t tmp;
 | |
| 
 | |
| 		tmp.ptr = hawk_rtx_getvaloocstr(rtx, val, &tmp.len);
 | |
| 		if (HAWK_UNLIKELY(!tmp.ptr)) return -1;
 | |
| 
 | |
| 		x = ignorecase? hawk_rtx_buildrex(rtx, tmp.ptr, tmp.len, HAWK_NULL, &code):
 | |
| 		                hawk_rtx_buildrex(rtx, tmp.ptr, tmp.len, &code, HAWK_NULL);
 | |
| 		hawk_rtx_freevaloocstr (rtx, val, tmp.ptr);
 | |
| 		if (x <= -1) return -1;
 | |
| 	}
 | |
| 
 | |
| 	x = matchtre_bcs(
 | |
| 		code, ((str->ptr == substr->ptr)? opt: (opt | HAWK_TRE_NOTBOL)),
 | |
| 		substr, match, submat, hawk_rtx_getgem(rtx)
 | |
| 	);
 | |
| 
 | |
| 	if (v_type == HAWK_VAL_REX)
 | |
| 	{
 | |
| 		/* nothing to free */
 | |
| 	}
 | |
| 	else
 | |
| 	{
 | |
| 		hawk_tre_close (code);
 | |
| 	}
 | |
| 
 | |
| 	return x;
 | |
| }
 | |
| 
 | |
| 
 | |
| int hawk_rtx_matchrexwithucs (hawk_rtx_t* rtx, hawk_tre_t* code, const hawk_ucs_t* str, const hawk_ucs_t* substr, hawk_ucs_t* match, hawk_ucs_t submat[9])
 | |
| {
 | |
| 	int opt = HAWK_TRE_BACKTRACKING; /* TODO: option... HAWK_TRE_BACKTRACKING or others??? */
 | |
| 	return matchtre_ucs(
 | |
| 		code, ((str->ptr == substr->ptr)? opt: (opt | HAWK_TRE_NOTBOL)),
 | |
| 		substr, match, submat, hawk_rtx_getgem(rtx)
 | |
| 	);
 | |
| }
 | |
| 
 | |
| int hawk_rtx_matchrexwithbcs (hawk_rtx_t* rtx, hawk_tre_t* code, const hawk_bcs_t* str, const hawk_bcs_t* substr, hawk_bcs_t* match, hawk_bcs_t submat[9])
 | |
| {
 | |
| 	int opt = HAWK_TRE_BACKTRACKING; /* TODO: option... HAWK_TRE_BACKTRACKING or others??? */
 | |
| 	return matchtre_bcs(
 | |
| 		code, ((str->ptr == substr->ptr)? opt: (opt | HAWK_TRE_NOTBOL)),
 | |
| 		substr, match, submat, hawk_rtx_getgem(rtx)
 | |
| 	);
 | |
| }
 |