239 lines
		
	
	
		
			7.3 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
			
		
		
	
	
			239 lines
		
	
	
		
			7.3 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
| /*
 | |
|     Copyright (c) 2006-2020 Chung, Hyung-Hwan. All rights reserved.
 | |
| 
 | |
|     Redistribution and use in source and binary forms, with or without
 | |
|     modification, are permitted provided that the following conditions
 | |
|     are met:
 | |
|     1. Redistributions of source code must retain the above copyright
 | |
|        notice, this list of conditions and the following disclaimer.
 | |
|     2. Redistributions in binary form must reproduce the above copyright
 | |
|        notice, this list of conditions and the following disclaimer in the
 | |
|        documentation and/or other materials provided with the distribution.
 | |
| 
 | |
|     THIS SOFTWARE IS PROVIDED BY THE AUTHOR "AS IS" AND ANY EXPRESS OR
 | |
|     IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 | |
|     OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
 | |
|     IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
 | |
|     INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
 | |
|     NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 | |
|     DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 | |
|     THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 | |
|     (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
 | |
|     THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 | |
|  */
 | |
| 
 | |
| /*
 | |
|   tre-match-ut.h - TRE matcher helper definitions
 | |
| 
 | |
| This is the license, copyright notice, and disclaimer for TRE, a regex
 | |
| matching package (library and tools) with support for approximate
 | |
| matching.
 | |
| 
 | |
| Copyright (c) 2001-2009 Ville Laurikari <vl@iki.fi>
 | |
| All rights reserved.
 | |
| 
 | |
| Redistribution and use in source and binary forms, with or without
 | |
| modification, are permitted provided that the following conditions
 | |
| are met:
 | |
| 
 | |
|   1. Redistributions of source code must retain the above copyright
 | |
|      notice, this list of conditions and the following disclaimer.
 | |
| 
 | |
|   2. Redistributions in binary form must reproduce the above copyright
 | |
|      notice, this list of conditions and the following disclaimer in the
 | |
|      documentation and/or other materials provided with the distribution.
 | |
| 
 | |
| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS
 | |
| ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 | |
| LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 | |
| A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT
 | |
| HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 | |
| SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 | |
| LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 | |
| DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 | |
| THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 | |
| (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 | |
| OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 | |
| */
 | |
| 
 | |
| #ifdef TRE_WCHAR
 | |
| 
 | |
| #ifdef TRE_MULTIBYTE
 | |
| 
 | |
| /* Wide character and multibyte support. */
 | |
| 
 | |
| #define GET_NEXT_WCHAR() \
 | |
| 	do { \
 | |
| 		prev_c = next_c; \
 | |
| 		if (type == STR_BYTE) \
 | |
| 		{ \
 | |
| 			pos++; \
 | |
| 			if (len >= 0 && pos >= len) \
 | |
| 				next_c = '\0'; \
 | |
| 			else \
 | |
| 				next_c = (unsigned char)(*str_byte++); \
 | |
| 		} \
 | |
| 		else if (type == STR_WIDE) \
 | |
| 		{ \
 | |
| 			pos++; \
 | |
| 			if (len >= 0 && pos >= len) \
 | |
| 				next_c = '\0'; \
 | |
| 			else \
 | |
| 				next_c = *str_wide++; \
 | |
| 		} \
 | |
| 		else if (type == STR_MBS) \
 | |
| 		{ \
 | |
| 			pos += pos_add_next; \
 | |
| 			if (str_byte == NULL) \
 | |
| 				next_c = '\0'; \
 | |
| 			else \
 | |
| 			{  \
 | |
| 				size_t w;  \
 | |
| 				int max;  \
 | |
| 				if (len >= 0)  \
 | |
| 					max = len - pos;  \
 | |
| 				else  \
 | |
| 					max = 32;  \
 | |
| 				if (max <= 0)  \
 | |
| 				{  \
 | |
| 					next_c = '\0';  \
 | |
| 					pos_add_next = 1;  \
 | |
| 				}  \
 | |
| 				else  \
 | |
| 				{  \
 | |
| 					w = hawk_mbrtowc(str_byte, (size_t)max, &next_c, &mbstate); \
 | |
| 					if (w <= 0 || w > max) \
 | |
| 						return REG_NOMATCH;  \
 | |
| 					if (next_c == '\0' && len >= 0) \
 | |
| 					{ \
 | |
| 						pos_add_next = 1; \
 | |
| 						next_c = 0; \
 | |
| 						str_byte++; \
 | |
| 					} \
 | |
| 					else \
 | |
| 					{ \
 | |
| 						pos_add_next = w; \
 | |
| 						str_byte += w; \
 | |
| 					} \
 | |
| 				} \
 | |
| 			} \
 | |
| 		} \
 | |
| 	} while(/*CONSTCOND*/0)
 | |
| 
 | |
| #else /* !TRE_MULTIBYTE */
 | |
| 
 | |
| /* Wide character support, no multibyte support. */
 | |
| 
 | |
| #define GET_NEXT_WCHAR() \
 | |
| do { \
 | |
| 	prev_c = next_c;  \
 | |
| 	if (type == STR_BYTE) \
 | |
| 	{ \
 | |
| 		pos++; \
 | |
| 		if (len >= 0 && pos >= len) next_c = '\0'; \
 | |
| 		else	next_c = (unsigned char)(*str_byte++);   \
 | |
| 	}  \
 | |
| 	else if (type == STR_WIDE) \
 | |
| 	{ \
 | |
| 		pos++; \
 | |
| 		if (len >= 0 && pos >= len) next_c = '\0'; \
 | |
| 		else next_c = *str_wide++; \
 | |
| 	} \
 | |
| } while(/*CONSTCOND*/0)
 | |
| 
 | |
| #endif /* !TRE_MULTIBYTE */
 | |
| 
 | |
| #else /* !TRE_WCHAR */
 | |
| 
 | |
| /* No wide character or multibyte support. */
 | |
| 
 | |
| #define GET_NEXT_WCHAR()	\
 | |
| 	do { \
 | |
| 		prev_c = next_c; \
 | |
| 		if (type == STR_BYTE) \
 | |
| 		{ \
 | |
| 			pos++; \
 | |
| 			if (len >= 0 && pos >= len) next_c = '\0'; \
 | |
| 			else	next_c = (unsigned char)(*str_byte++); \
 | |
| 		} \
 | |
| 	} while(/*CONSTCOND*/0)
 | |
| 
 | |
| #endif /* !TRE_WCHAR */
 | |
| 
 | |
| 
 | |
| 
 | |
| #define IS_WORD_CHAR(c)	 ((c) == HAWK_T('_') || tre_isalnum(c))
 | |
| 
 | |
| #define CHECK_ASSERTIONS(assertions) \
 | |
|   (((assertions & ASSERT_AT_BOL) \
 | |
|     && (pos > 0 || reg_notbol) \
 | |
|     && (prev_c != HAWK_T('\n') || !reg_newline)) \
 | |
|    || ((assertions & ASSERT_AT_EOL) \
 | |
|        && (next_c != HAWK_T('\0') || reg_noteol) \
 | |
|        && (next_c != HAWK_T('\n') || !reg_newline)) \
 | |
|    || ((assertions & ASSERT_AT_BOW) \
 | |
|        && (IS_WORD_CHAR(prev_c) || !IS_WORD_CHAR(next_c))) \
 | |
|    || ((assertions & ASSERT_AT_EOW) \
 | |
|        && (!IS_WORD_CHAR(prev_c) || IS_WORD_CHAR(next_c))) \
 | |
|    || ((assertions & ASSERT_AT_WB) \
 | |
|        && (pos != 0 && next_c != HAWK_T('\0') \
 | |
| 	   && IS_WORD_CHAR(prev_c) == IS_WORD_CHAR(next_c))) \
 | |
|    || ((assertions & ASSERT_AT_WB_NEG) \
 | |
|        && (pos == 0 || next_c == HAWK_T('\0') \
 | |
| 	   || IS_WORD_CHAR(prev_c) != IS_WORD_CHAR(next_c))))
 | |
| 
 | |
| #define CHECK_CHAR_CLASSES(trans_i, tnfa, eflags)                             \
 | |
|   (((trans_i->assertions & ASSERT_CHAR_CLASS)                                 \
 | |
|        && !(tnfa->cflags & REG_ICASE)                                         \
 | |
|        && !tre_isctype((tre_cint_t)prev_c, trans_i->u.class))                 \
 | |
|     || ((trans_i->assertions & ASSERT_CHAR_CLASS)                             \
 | |
|         && (tnfa->cflags & REG_ICASE)                                         \
 | |
|         && !tre_isctype(tre_tolower((tre_cint_t)prev_c),trans_i->u.class)     \
 | |
|         && !tre_isctype(tre_toupper((tre_cint_t)prev_c),trans_i->u.class))    \
 | |
|     || ((trans_i->assertions & ASSERT_CHAR_CLASS_NEG)                         \
 | |
|         && tre_neg_char_classes_match(trans_i->neg_classes,(tre_cint_t)prev_c,\
 | |
|                                       tnfa->cflags & REG_ICASE)))
 | |
| 
 | |
| 
 | |
| 
 | |
| 
 | |
| /* Returns 1 if `t1' wins `t2', 0 otherwise. */
 | |
| HAWK_INLINE static int
 | |
| tre_tag_order(int num_tags, tre_tag_direction_t *tag_directions, int *t1, int *t2)
 | |
| {
 | |
| 	int i;
 | |
| 	for (i = 0; i < num_tags; i++)
 | |
| 	{
 | |
| 		if (tag_directions[i] == TRE_TAG_MINIMIZE)
 | |
| 		{
 | |
| 			if (t1[i] < t2[i])
 | |
| 				return 1;
 | |
| 			if (t1[i] > t2[i])
 | |
| 				return 0;
 | |
| 		}
 | |
| 		else
 | |
| 		{
 | |
| 			if (t1[i] > t2[i])
 | |
| 				return 1;
 | |
| 			if (t1[i] < t2[i])
 | |
| 				return 0;
 | |
| 		}
 | |
| 	}
 | |
| 	/*  assert(0);*/
 | |
| 	return 0;
 | |
| }
 | |
| 
 | |
| HAWK_INLINE static int
 | |
| tre_neg_char_classes_match(tre_ctype_t *classes, tre_cint_t wc, int icase)
 | |
| {
 | |
| 	DPRINT(("neg_char_classes_test: %p, %d, %d\n", classes, wc, icase));
 | |
| 	while (*classes != (tre_ctype_t)0)
 | |
| 		if ((!icase && tre_isctype(wc, *classes))
 | |
| 		        || (icase && (tre_isctype(tre_toupper(wc), *classes)
 | |
| 		                      || tre_isctype(tre_tolower(wc), *classes))))
 | |
| 			return 1; /* Match. */
 | |
| 		else
 | |
| 			classes++;
 | |
| 	return 0; /* No match. */
 | |
| }
 |