| 
									
										
										
										
											2019-12-13 04:29:58 +00:00
										 |  |  | /*
 | 
					
						
							| 
									
										
										
										
											2020-04-16 03:42:30 +00:00
										 |  |  |     Copyright (c) 2006-2020 Chung, Hyung-Hwan. All rights reserved. | 
					
						
							| 
									
										
										
										
											2019-12-13 04:29:58 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |     Redistribution and use in source and binary forms, with or without | 
					
						
							|  |  |  |     modification, are permitted provided that the following conditions | 
					
						
							|  |  |  |     are met: | 
					
						
							|  |  |  |     1. Redistributions of source code must retain the above copyright | 
					
						
							|  |  |  |        notice, this list of conditions and the following disclaimer. | 
					
						
							|  |  |  |     2. Redistributions in binary form must reproduce the above copyright | 
					
						
							|  |  |  |        notice, this list of conditions and the following disclaimer in the | 
					
						
							|  |  |  |        documentation and/or other materials provided with the distribution. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     THIS SOFTWARE IS PROVIDED BY THE AUTHOR "AS IS" AND ANY EXPRESS OR | 
					
						
							|  |  |  |     IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES | 
					
						
							|  |  |  |     OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. | 
					
						
							|  |  |  |     IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, | 
					
						
							|  |  |  |     INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT | 
					
						
							|  |  |  |     NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | 
					
						
							|  |  |  |     DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | 
					
						
							|  |  |  |     THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | 
					
						
							|  |  |  |     (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF | 
					
						
							|  |  |  |     THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | 
					
						
							|  |  |  |  */ | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | /*
 | 
					
						
							|  |  |  |   tre-parse.c - Regexp parser | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | This is the license, copyright notice, and disclaimer for TRE, a regex | 
					
						
							|  |  |  | matching package (library and tools) with support for approximate | 
					
						
							|  |  |  | matching. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | Copyright (c) 2001-2009 Ville Laurikari <vl@iki.fi> | 
					
						
							|  |  |  | All rights reserved. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | Redistribution and use in source and binary forms, with or without | 
					
						
							|  |  |  | modification, are permitted provided that the following conditions | 
					
						
							|  |  |  | are met: | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   1. Redistributions of source code must retain the above copyright | 
					
						
							|  |  |  |      notice, this list of conditions and the following disclaimer. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   2. Redistributions in binary form must reproduce the above copyright | 
					
						
							|  |  |  |      notice, this list of conditions and the following disclaimer in the | 
					
						
							|  |  |  |      documentation and/or other materials provided with the distribution. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS | 
					
						
							|  |  |  | ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | 
					
						
							|  |  |  | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | 
					
						
							|  |  |  | A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT | 
					
						
							|  |  |  | HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | 
					
						
							|  |  |  | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | 
					
						
							|  |  |  | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | 
					
						
							|  |  |  | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | 
					
						
							|  |  |  | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | 
					
						
							|  |  |  | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | 
					
						
							|  |  |  | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | 
					
						
							|  |  |  | */ | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | /*
 | 
					
						
							|  |  |  |   This parser is just a simple recursive descent parser for POSIX.2 | 
					
						
							|  |  |  |   regexps.  The parser supports both the obsolete default syntax and | 
					
						
							|  |  |  |   the "extended" syntax, and some nonstandard extensions. | 
					
						
							|  |  |  | */ | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #include "tre-prv.h"
 | 
					
						
							|  |  |  | #include "tre-ast.h"
 | 
					
						
							|  |  |  | #include "tre-stack.h"
 | 
					
						
							|  |  |  | #include "tre-parse.h"
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | /* Characters with special meanings in regexp syntax. */ | 
					
						
							|  |  |  | #define CHAR_PIPE	   HAWK_T('|')
 | 
					
						
							|  |  |  | #define CHAR_LPAREN	   HAWK_T('(')
 | 
					
						
							|  |  |  | #define CHAR_RPAREN	   HAWK_T(')')
 | 
					
						
							|  |  |  | #define CHAR_LBRACE	   HAWK_T('{')
 | 
					
						
							|  |  |  | #define CHAR_RBRACE	   HAWK_T('}')
 | 
					
						
							|  |  |  | #define CHAR_LBRACKET	   HAWK_T('[')
 | 
					
						
							|  |  |  | #define CHAR_RBRACKET	   HAWK_T(']')
 | 
					
						
							|  |  |  | #define CHAR_MINUS	   HAWK_T('-')
 | 
					
						
							|  |  |  | #define CHAR_STAR	   HAWK_T('*')
 | 
					
						
							|  |  |  | #define CHAR_QUESTIONMARK  HAWK_T('?')
 | 
					
						
							|  |  |  | #define CHAR_PLUS	   HAWK_T('+')
 | 
					
						
							|  |  |  | #define CHAR_PERIOD	   HAWK_T('.')
 | 
					
						
							|  |  |  | #define CHAR_COLON	   HAWK_T(':')
 | 
					
						
							|  |  |  | #define CHAR_EQUAL	   HAWK_T('=')
 | 
					
						
							|  |  |  | #define CHAR_COMMA	   HAWK_T(',')
 | 
					
						
							|  |  |  | #define CHAR_CARET	   HAWK_T('^')
 | 
					
						
							|  |  |  | #define CHAR_DOLLAR	   HAWK_T('$')
 | 
					
						
							|  |  |  | #define CHAR_BACKSLASH	   HAWK_T('\\')
 | 
					
						
							|  |  |  | #define CHAR_HASH	   HAWK_T('#')
 | 
					
						
							|  |  |  | #define CHAR_TILDE	   HAWK_T('~')
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | /* Some macros for expanding \w, \s, etc. */ | 
					
						
							|  |  |  | static const struct tre_macro_struct | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  | 	const char c; | 
					
						
							|  |  |  | 	const char *expansion; | 
					
						
							|  |  |  | } tre_macros[] = | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  | 	{'t', "\t"},	   {'n', "\n"},		   {'r', "\r"}, | 
					
						
							|  |  |  | 	{'f', "\f"},	   {'a', "\a"},		   {'e', "\033"}, | 
					
						
							|  |  |  | 	{'w', "[[:alnum:]_]"}, {'W', "[^[:alnum:]_]"}, {'s', "[[:space:]]"}, | 
					
						
							|  |  |  | 	{'S', "[^[:space:]]"}, {'d', "[[:digit:]]"},   {'D', "[^[:digit:]]"}, | 
					
						
							|  |  |  | 	{ 0, NULL } | 
					
						
							|  |  |  | }; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static HAWK_INLINE int xdigit_to_num (hawk_ooch_t c) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  | 	return (c >= '0' && c <= '9')? (c - '0'): | 
					
						
							|  |  |  | 	       (c >= 'A' && c <= 'F')? (c - 'A' + 10): | 
					
						
							|  |  |  | 	       (c >= 'a' && c <= 'f')? (c - 'a' + 10): -1; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | /* Expands a macro delimited by `regex' and `regex_end' to `buf', which
 | 
					
						
							|  |  |  |    must have at least `len' items.  Sets buf[0] to zero if the there | 
					
						
							|  |  |  |    is no match in `tre_macros'. */ | 
					
						
							|  |  |  | static void | 
					
						
							|  |  |  | tre_expand_macro(const tre_char_t *regex, const tre_char_t *regex_end, | 
					
						
							|  |  |  |                  tre_char_t *buf, size_t buf_len) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  | 	int i; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	buf[0] = 0; | 
					
						
							|  |  |  | 	if (regex >= regex_end) | 
					
						
							|  |  |  | 		return; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	for (i = 0; tre_macros[i].expansion; i++) | 
					
						
							|  |  |  | 	{ | 
					
						
							|  |  |  | 		if (tre_macros[i].c == *regex) | 
					
						
							|  |  |  | 		{ | 
					
						
							|  |  |  | 			unsigned int j; | 
					
						
							|  |  |  | 			DPRINT(("Expanding macro '%c' => '%s'\n", | 
					
						
							|  |  |  | 			        tre_macros[i].c, tre_macros[i].expansion)); | 
					
						
							| 
									
										
										
										
											2020-12-02 16:07:06 +00:00
										 |  |  | 			/* HAWK */ | 
					
						
							|  |  |  | 			/*for (j = 0; tre_macros[i].expansion[j] && j < buf_len; j++)*/ | 
					
						
							|  |  |  | 			for (j = 0; tre_macros[i].expansion[j] && j < buf_len - 1; j++) | 
					
						
							| 
									
										
										
										
											2019-12-13 04:29:58 +00:00
										 |  |  | 				buf[j] = tre_macros[i].expansion[j]; | 
					
						
							| 
									
										
										
										
											2020-12-02 16:07:06 +00:00
										 |  |  | 			/* END HAWK */ | 
					
						
							| 
									
										
										
										
											2019-12-13 04:29:58 +00:00
										 |  |  | 			buf[j] = 0; | 
					
						
							|  |  |  | 			break; | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static reg_errcode_t | 
					
						
							|  |  |  | tre_new_item(tre_mem_t mem, int min, int max, int *i, int *max_i, tre_ast_node_t ***items) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  | 	reg_errcode_t status; | 
					
						
							|  |  |  | 	tre_ast_node_t **array = *items; | 
					
						
							|  |  |  | 	/* Allocate more space if necessary. */ | 
					
						
							|  |  |  | 	if (*i >= *max_i) | 
					
						
							|  |  |  | 	{ | 
					
						
							|  |  |  | 		tre_ast_node_t **new_items; | 
					
						
							|  |  |  | 		DPRINT(("out of array space, i = %d\n", *i)); | 
					
						
							|  |  |  | 		/* If the array is already 1024 items large, give up -- there's
 | 
					
						
							|  |  |  | 		 probably an error in the regexp (e.g. not a '\0' terminated | 
					
						
							|  |  |  | 		 string and missing ']') */ | 
					
						
							|  |  |  | 		if (*max_i > 1024) | 
					
						
							|  |  |  | 			return REG_ESPACE; | 
					
						
							|  |  |  | 		*max_i *= 2; | 
					
						
							| 
									
										
										
										
											2019-12-18 08:34:44 +00:00
										 |  |  | 		new_items = xrealloc(mem->gem, array, sizeof(*items) * *max_i); | 
					
						
							| 
									
										
										
										
											2019-12-13 04:29:58 +00:00
										 |  |  | 		if (new_items == NULL) | 
					
						
							|  |  |  | 			return REG_ESPACE; | 
					
						
							|  |  |  | 		*items = array = new_items; | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	array[*i] = tre_ast_new_literal(mem, min, max, -1); | 
					
						
							|  |  |  | 	status = array[*i] == NULL ? REG_ESPACE : REG_OK; | 
					
						
							|  |  |  | 	(*i)++; | 
					
						
							|  |  |  | 	return status; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #if defined(HAWK_OOCH_IS_BCH)
 | 
					
						
							|  |  |  | /* Expands a character class to character ranges. */ | 
					
						
							|  |  |  | static reg_errcode_t | 
					
						
							|  |  |  | tre_expand_ctype(tre_mem_t mem, tre_ctype_t class, tre_ast_node_t ***items, | 
					
						
							|  |  |  |                  int *i, int *max_i, int cflags) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  | 	reg_errcode_t status = REG_OK; | 
					
						
							|  |  |  | 	tre_cint_t c; | 
					
						
							|  |  |  | 	int j, min = -1, max = 0; | 
					
						
							|  |  |  | 	/* HAWK: deleted */ | 
					
						
							|  |  |  | 	/*assert(TRE_MB_CUR_MAX == 1);*/ | 
					
						
							|  |  |  | 	/* END HAWK */ | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	DPRINT(("  expanding class to character ranges\n")); | 
					
						
							|  |  |  | 	for (j = 0; (j < 256) && (status == REG_OK); j++) | 
					
						
							|  |  |  | 	{ | 
					
						
							|  |  |  | 		c = j; | 
					
						
							| 
									
										
										
										
											2024-05-02 22:47:30 +09:00
										 |  |  | 		if (tre_isctype(c, class) || | 
					
						
							|  |  |  | 		    ((cflags & REG_ICASE) && (tre_isctype(tre_tolower(c), class) || | 
					
						
							| 
									
										
										
										
											2019-12-13 04:29:58 +00:00
										 |  |  | 		                              tre_isctype(tre_toupper(c), class)))) | 
					
						
							|  |  |  | 		{ | 
					
						
							|  |  |  | 			if (min < 0) min = c; | 
					
						
							|  |  |  | 			max = c; | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 		else if (min >= 0) | 
					
						
							|  |  |  | 		{ | 
					
						
							|  |  |  | 			DPRINT(("  range %c (%d) to %c (%d)\n", min, min, max, max)); | 
					
						
							|  |  |  | 			status = tre_new_item(mem, min, max, i, max_i, items); | 
					
						
							|  |  |  | 			min = -1; | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	if (min >= 0 && status == REG_OK) | 
					
						
							|  |  |  | 		status = tre_new_item(mem, min, max, i, max_i, items); | 
					
						
							|  |  |  | 	return status; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | #endif
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static int | 
					
						
							|  |  |  | tre_compare_items(const void *a, const void *b, void* ctx) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  | 	const tre_ast_node_t *node_a = *(tre_ast_node_t * const *)a; | 
					
						
							|  |  |  | 	const tre_ast_node_t *node_b = *(tre_ast_node_t * const *)b; | 
					
						
							|  |  |  | 	tre_literal_t *l_a = node_a->obj, *l_b = node_b->obj; | 
					
						
							| 
									
										
										
										
											2020-12-02 16:07:06 +00:00
										 |  |  | 	/* HAWK: changed int to long */ | 
					
						
							|  |  |  | 	/*int a_min = l_a->code_min, b_min = l_b->code_min;*/ | 
					
						
							|  |  |  | 	long a_min = l_a->code_min, b_min = l_b->code_min; | 
					
						
							|  |  |  | 	/* END HAWK */ | 
					
						
							| 
									
										
										
										
											2019-12-13 04:29:58 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  | 	if (a_min < b_min) | 
					
						
							|  |  |  | 		return -1; | 
					
						
							|  |  |  | 	else if (a_min > b_min) | 
					
						
							|  |  |  | 		return 1; | 
					
						
							|  |  |  | 	else | 
					
						
							|  |  |  | 		return 0; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | /* Maximum number of character classes that can occur in a negated bracket
 | 
					
						
							|  |  |  |    expression.	*/ | 
					
						
							|  |  |  | #define MAX_NEG_CLASSES 64
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | /* Maximum length of character class names. */ | 
					
						
							|  |  |  | #define MAX_CLASS_NAME
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #define REST(re) (int)(ctx->re_end - (re)), (re)
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static reg_errcode_t | 
					
						
							|  |  |  | tre_parse_bracket_items(tre_parse_ctx_t *ctx, int negate, | 
					
						
							|  |  |  |                         tre_ctype_t neg_classes[], int *num_neg_classes, | 
					
						
							|  |  |  |                         tre_ast_node_t ***items, int *num_items, | 
					
						
							|  |  |  |                         int *items_size) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  | 	const tre_char_t *re = ctx->re; | 
					
						
							|  |  |  | 	reg_errcode_t status = REG_OK; | 
					
						
							|  |  |  | 	tre_ctype_t class = (tre_ctype_t)0; | 
					
						
							|  |  |  | 	int i = *num_items; | 
					
						
							|  |  |  | 	int max_i = *items_size; | 
					
						
							|  |  |  | 	int skip; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	/* Build an array of the items in the bracket expression. */ | 
					
						
							|  |  |  | 	while (status == REG_OK) | 
					
						
							|  |  |  | 	{ | 
					
						
							|  |  |  | 		skip = 0; | 
					
						
							|  |  |  | 		if (re == ctx->re_end) | 
					
						
							|  |  |  | 		{ | 
					
						
							|  |  |  | 			status = REG_EBRACK; | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 		else if (*re == CHAR_RBRACKET && re > ctx->re) | 
					
						
							|  |  |  | 		{ | 
					
						
							|  |  |  | 			DPRINT(("tre_parse_bracket:	done: '%.*" STRF "'\n", REST(re))); | 
					
						
							|  |  |  | 			re++; | 
					
						
							|  |  |  | 			break; | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 		else | 
					
						
							|  |  |  | 		{ | 
					
						
							|  |  |  | 			tre_cint_t min = 0, max = 0; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 			class = (tre_ctype_t)0; | 
					
						
							|  |  |  | 			if (re + 2 < ctx->re_end | 
					
						
							|  |  |  | 			        && *(re + 1) == CHAR_MINUS && *(re + 2) != CHAR_RBRACKET) | 
					
						
							|  |  |  | 			{ | 
					
						
							|  |  |  | 				DPRINT(("tre_parse_bracket:  range: '%.*" STRF "'\n", REST(re))); | 
					
						
							|  |  |  | 				min = *re; | 
					
						
							|  |  |  | 				max = *(re + 2); | 
					
						
							|  |  |  | 				re += 3; | 
					
						
							|  |  |  | 				/* XXX - Should use collation order instead of encoding values
 | 
					
						
							|  |  |  | 				 in character ranges. */ | 
					
						
							|  |  |  | 				if (min > max) | 
					
						
							|  |  |  | 					status = REG_ERANGE; | 
					
						
							|  |  |  | 			} | 
					
						
							| 
									
										
										
										
											2020-03-09 15:36:01 +00:00
										 |  |  | 		/* HAWK: handle \ as an escaper  */ | 
					
						
							| 
									
										
										
										
											2020-03-09 15:29:31 +00:00
										 |  |  | 			else if (re + 1 < ctx->re_end && *re == CHAR_BACKSLASH) | 
					
						
							|  |  |  | 			{ | 
					
						
							|  |  |  | 				/* escaped character inside [] */ | 
					
						
							|  |  |  | 				min = max = *(re + 1); | 
					
						
							|  |  |  | 				re += 2; | 
					
						
							|  |  |  | 			} | 
					
						
							| 
									
										
										
										
											2020-03-09 15:36:01 +00:00
										 |  |  | 		/* END HAWK */ | 
					
						
							| 
									
										
										
										
											2019-12-13 04:29:58 +00:00
										 |  |  | 			else if (re + 1 < ctx->re_end | 
					
						
							|  |  |  | 			         && *re == CHAR_LBRACKET && *(re + 1) == CHAR_PERIOD) | 
					
						
							|  |  |  | 				status = REG_ECOLLATE; | 
					
						
							|  |  |  | 			else if (re + 1 < ctx->re_end | 
					
						
							|  |  |  | 			         && *re == CHAR_LBRACKET && *(re + 1) == CHAR_EQUAL) | 
					
						
							|  |  |  | 				status = REG_ECOLLATE; | 
					
						
							|  |  |  | 			else if (re + 1 < ctx->re_end | 
					
						
							|  |  |  | 			         && *re == CHAR_LBRACKET && *(re + 1) == CHAR_COLON) | 
					
						
							|  |  |  | 			{ | 
					
						
							|  |  |  | 				const tre_char_t *endptr = re + 2; | 
					
						
							| 
									
										
										
										
											2020-12-02 16:07:06 +00:00
										 |  |  | 				/* HAWK: changed int to hawk_oow_t */ | 
					
						
							|  |  |  | 				/*int len;*/ | 
					
						
							|  |  |  | 				hawk_oow_t len; | 
					
						
							|  |  |  | 				/* END HAWK */ | 
					
						
							| 
									
										
										
										
											2019-12-13 04:29:58 +00:00
										 |  |  | 				DPRINT(("tre_parse_bracket:  class: '%.*" STRF "'\n", REST(re))); | 
					
						
							|  |  |  | 				while (endptr < ctx->re_end && *endptr != CHAR_COLON) endptr++; | 
					
						
							|  |  |  | 				if (endptr != ctx->re_end) | 
					
						
							|  |  |  | 				{ | 
					
						
							|  |  |  | 					/* HAWK: bug fix of not checking ending ] */ | 
					
						
							|  |  |  | 					if (*(endptr + 1) != CHAR_RBRACKET) status = REG_ECTYPE; | 
					
						
							|  |  |  | 					else | 
					
						
							|  |  |  | 					{ | 
					
						
							|  |  |  | 					/* END HAWK */ | 
					
						
							|  |  |  | 						len = MIN(endptr - re - 2, 63); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 						if (hawk_oochars_to_ooch_prop(re + 2, len, &class) <= -1) status = REG_ECTYPE; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 						/* Optimize character classes for 8 bit character sets. */ | 
					
						
							|  |  |  | #if defined(HAWK_OOCH_IS_BCH)
 | 
					
						
							|  |  |  | 						/* HAWK: not possible to count on MB_CUR_MAX since
 | 
					
						
							| 
									
										
										
										
											2024-05-02 22:47:30 +09:00
										 |  |  | 						 *      this library is designed to support per-object | 
					
						
							| 
									
										
										
										
											2019-12-13 04:29:58 +00:00
										 |  |  | 						 *      or per-context character encoding using hawk_cmgr_t */ | 
					
						
							|  |  |  | 						/* if (status == REG_OK && TRE_MB_CUR_MAX == 1) */ | 
					
						
							|  |  |  | 						/* END HAWK */ | 
					
						
							| 
									
										
										
										
											2020-03-09 15:29:31 +00:00
										 |  |  | 						if (status == REG_OK) | 
					
						
							| 
									
										
										
										
											2019-12-13 04:29:58 +00:00
										 |  |  | 						{ | 
					
						
							|  |  |  | 							status = tre_expand_ctype(ctx->mem, class, items, &i, &max_i, ctx->cflags); | 
					
						
							|  |  |  | 							class = (tre_ctype_t)0; | 
					
						
							|  |  |  | 							skip = 1; | 
					
						
							|  |  |  | 						} | 
					
						
							|  |  |  | #endif
 | 
					
						
							|  |  |  | 						re = endptr + 2; | 
					
						
							|  |  |  | 					} | 
					
						
							|  |  |  | 				} | 
					
						
							|  |  |  | 				else status = REG_ECTYPE; | 
					
						
							|  |  |  | 				min = 0; | 
					
						
							|  |  |  | 				max = TRE_CHAR_MAX; | 
					
						
							|  |  |  | 			} | 
					
						
							|  |  |  | 			else | 
					
						
							|  |  |  | 			{ | 
					
						
							|  |  |  | 				DPRINT(("tre_parse_bracket:   char: '%.*" STRF "'\n", REST(re))); | 
					
						
							|  |  |  | 				if (*re == CHAR_MINUS && *(re + 1) != CHAR_RBRACKET && ctx->re != re) | 
					
						
							|  |  |  | 					/* Two ranges are not allowed to share and endpoint. */ | 
					
						
							|  |  |  | 					status = REG_ERANGE; | 
					
						
							|  |  |  | 				min = max = *re++; | 
					
						
							|  |  |  | 			} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 			if (status != REG_OK) break; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 			if (class && negate) | 
					
						
							|  |  |  | 			{ | 
					
						
							|  |  |  | 				if (*num_neg_classes >= MAX_NEG_CLASSES) | 
					
						
							|  |  |  | 					status = REG_ESPACE; | 
					
						
							|  |  |  | 				else | 
					
						
							|  |  |  | 					neg_classes[(*num_neg_classes)++] = class; | 
					
						
							|  |  |  | 			} | 
					
						
							|  |  |  | 			else if (!skip) | 
					
						
							|  |  |  | 			{ | 
					
						
							|  |  |  | 				status = tre_new_item(ctx->mem, min, max, &i, &max_i, items); | 
					
						
							|  |  |  | 				if (status != REG_OK) break; | 
					
						
							|  |  |  | 				((tre_literal_t*)((*items)[i-1])->obj)->u.class = class; | 
					
						
							|  |  |  | 			} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 			/* Add opposite-case counterpoints if REG_ICASE is present.
 | 
					
						
							|  |  |  | 			   This is broken if there are more than two "same" characters. */ | 
					
						
							|  |  |  | 			if ((ctx->cflags & REG_ICASE) && !class && status == REG_OK && !skip) | 
					
						
							|  |  |  | 			{ | 
					
						
							|  |  |  | 				tre_cint_t cmin, ccurr; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 				DPRINT(("adding opposite-case counterpoints\n")); | 
					
						
							|  |  |  | 				while (min <= max) | 
					
						
							|  |  |  | 				{ | 
					
						
							|  |  |  | 					if (tre_islower(min)) | 
					
						
							|  |  |  | 					{ | 
					
						
							|  |  |  | 						cmin = ccurr = tre_toupper(min++); | 
					
						
							|  |  |  | 						while (tre_islower(min) && tre_toupper(min) == ccurr + 1 && min <= max) | 
					
						
							|  |  |  | 							ccurr = tre_toupper(min++); | 
					
						
							|  |  |  | 						status = tre_new_item(ctx->mem, cmin, ccurr, &i, &max_i, items); | 
					
						
							|  |  |  | 					} | 
					
						
							|  |  |  | 					else if (tre_isupper(min)) | 
					
						
							|  |  |  | 					{ | 
					
						
							|  |  |  | 						cmin = ccurr = tre_tolower(min++); | 
					
						
							|  |  |  | 						while (tre_isupper(min) && tre_tolower(min) == ccurr + 1 && min <= max) | 
					
						
							|  |  |  | 							ccurr = tre_tolower(min++); | 
					
						
							|  |  |  | 						status = tre_new_item(ctx->mem, cmin, ccurr, &i, &max_i, items); | 
					
						
							|  |  |  | 					} | 
					
						
							|  |  |  | 					else min++; | 
					
						
							|  |  |  | 					if (status != REG_OK) break; | 
					
						
							|  |  |  | 				} | 
					
						
							|  |  |  | 				if (status != REG_OK) break; | 
					
						
							|  |  |  | 			} | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	*num_items = i; | 
					
						
							|  |  |  | 	*items_size = max_i; | 
					
						
							|  |  |  | 	ctx->re = re; | 
					
						
							|  |  |  | 	return status; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static reg_errcode_t | 
					
						
							|  |  |  | tre_parse_bracket(tre_parse_ctx_t *ctx, tre_ast_node_t **result) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  | 	tre_ast_node_t *node = NULL; | 
					
						
							|  |  |  | 	int negate = 0; | 
					
						
							|  |  |  | 	reg_errcode_t status = REG_OK; | 
					
						
							|  |  |  | 	tre_ast_node_t **items, *u, *n; | 
					
						
							|  |  |  | 	int i = 0, j, max_i = 32, curr_max, curr_min; | 
					
						
							|  |  |  | 	tre_ctype_t neg_classes[MAX_NEG_CLASSES]; | 
					
						
							|  |  |  | 	int num_neg_classes = 0; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	/* Start off with an array of `max_i' elements. */ | 
					
						
							| 
									
										
										
										
											2019-12-18 08:34:44 +00:00
										 |  |  | 	items = xmalloc(ctx->mem->gem, sizeof(*items) * max_i); | 
					
						
							| 
									
										
										
										
											2019-12-13 04:29:58 +00:00
										 |  |  | 	if (items == NULL) return REG_ESPACE; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	if (*ctx->re == CHAR_CARET) | 
					
						
							|  |  |  | 	{ | 
					
						
							|  |  |  | 		DPRINT(("tre_parse_bracket: negate: '%.*" STRF "'\n", REST(ctx->re))); | 
					
						
							|  |  |  | 		negate = 1; | 
					
						
							|  |  |  | 		ctx->re++; | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	status = tre_parse_bracket_items(ctx, negate, neg_classes, &num_neg_classes, &items, &i, &max_i); | 
					
						
							|  |  |  | 	if (status != REG_OK) goto parse_bracket_done; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	/* Sort the array if we need to negate it. */ | 
					
						
							|  |  |  | 	if (negate) hawk_qsort(items, (unsigned)i, sizeof(*items), tre_compare_items, HAWK_NULL); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	curr_max = curr_min = 0; | 
					
						
							|  |  |  | 	/* Build a union of the items in the array, negated if necessary. */ | 
					
						
							|  |  |  | 	for (j = 0; j < i && status == REG_OK; j++) | 
					
						
							|  |  |  | 	{ | 
					
						
							|  |  |  | 		int min, max; | 
					
						
							|  |  |  | 		tre_literal_t *l = items[j]->obj; | 
					
						
							|  |  |  | 		min = l->code_min; | 
					
						
							|  |  |  | 		max = l->code_max; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 		DPRINT(("item: %d - %d, class %ld, curr_max = %d\n", | 
					
						
							|  |  |  | 		        (int)l->code_min, (int)l->code_max, (long)l->u.class, curr_max)); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 		if (negate) | 
					
						
							|  |  |  | 		{ | 
					
						
							|  |  |  | 			if (min < curr_max) | 
					
						
							|  |  |  | 			{ | 
					
						
							|  |  |  | 				/* Overlap. */ | 
					
						
							|  |  |  | 				curr_max = MAX(max + 1, curr_max); | 
					
						
							|  |  |  | 				DPRINT(("overlap, curr_max = %d\n", curr_max)); | 
					
						
							|  |  |  | 				l = NULL; | 
					
						
							|  |  |  | 			} | 
					
						
							|  |  |  | 			else | 
					
						
							|  |  |  | 			{ | 
					
						
							|  |  |  | 				/* No overlap. */ | 
					
						
							|  |  |  | 				curr_max = min - 1; | 
					
						
							|  |  |  | 				if (curr_max >= curr_min) | 
					
						
							|  |  |  | 				{ | 
					
						
							|  |  |  | 					DPRINT(("no overlap\n")); | 
					
						
							|  |  |  | 					l->code_min = curr_min; | 
					
						
							|  |  |  | 					l->code_max = curr_max; | 
					
						
							|  |  |  | 				} | 
					
						
							|  |  |  | 				else | 
					
						
							|  |  |  | 				{ | 
					
						
							|  |  |  | 					DPRINT(("no overlap, zero room\n")); | 
					
						
							|  |  |  | 					l = NULL; | 
					
						
							|  |  |  | 				} | 
					
						
							|  |  |  | 				curr_min = curr_max = max + 1; | 
					
						
							|  |  |  | 			} | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 		if (l != NULL) | 
					
						
							|  |  |  | 		{ | 
					
						
							|  |  |  | 			int k; | 
					
						
							|  |  |  | 			DPRINT(("creating %d - %d\n", (int)l->code_min, (int)l->code_max)); | 
					
						
							|  |  |  | 			l->position = ctx->position; | 
					
						
							|  |  |  | 			if (num_neg_classes > 0) | 
					
						
							|  |  |  | 			{ | 
					
						
							|  |  |  | 				l->neg_classes = tre_mem_alloc(ctx->mem, (sizeof(l->neg_classes) * (num_neg_classes + 1))); | 
					
						
							|  |  |  | 				if (l->neg_classes == NULL) | 
					
						
							|  |  |  | 				{ | 
					
						
							|  |  |  | 					status = REG_ESPACE; | 
					
						
							|  |  |  | 					break; | 
					
						
							|  |  |  | 				} | 
					
						
							|  |  |  | 				for (k = 0; k < num_neg_classes; k++) l->neg_classes[k] = neg_classes[k]; | 
					
						
							|  |  |  | 				l->neg_classes[k] = (tre_ctype_t)0; | 
					
						
							|  |  |  | 			} | 
					
						
							|  |  |  | 			else | 
					
						
							|  |  |  | 			{ | 
					
						
							|  |  |  | 				l->neg_classes = NULL; | 
					
						
							|  |  |  | 			} | 
					
						
							|  |  |  | 			if (node == NULL) | 
					
						
							|  |  |  | 			{ | 
					
						
							|  |  |  | 				node = items[j]; | 
					
						
							|  |  |  | 			} | 
					
						
							|  |  |  | 			else | 
					
						
							|  |  |  | 			{ | 
					
						
							|  |  |  | 				u = tre_ast_new_union(ctx->mem, node, items[j]); | 
					
						
							|  |  |  | 				if (u == NULL) | 
					
						
							|  |  |  | 					status = REG_ESPACE; | 
					
						
							|  |  |  | 				node = u; | 
					
						
							|  |  |  | 			} | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	if (status != REG_OK) goto parse_bracket_done; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	if (negate) | 
					
						
							|  |  |  | 	{ | 
					
						
							|  |  |  | 		int k; | 
					
						
							|  |  |  | 		DPRINT(("final: creating %d - %d\n", curr_min, (int)TRE_CHAR_MAX)); | 
					
						
							|  |  |  | 		n = tre_ast_new_literal(ctx->mem, curr_min, TRE_CHAR_MAX, ctx->position); | 
					
						
							| 
									
										
										
										
											2024-05-02 22:47:30 +09:00
										 |  |  | 		if (n == NULL) | 
					
						
							| 
									
										
										
										
											2019-12-13 04:29:58 +00:00
										 |  |  | 		{ | 
					
						
							|  |  |  | 			status = REG_ESPACE; | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 		else | 
					
						
							|  |  |  | 		{ | 
					
						
							|  |  |  | 			tre_literal_t *l = n->obj; | 
					
						
							|  |  |  | 			if (num_neg_classes > 0) | 
					
						
							|  |  |  | 			{ | 
					
						
							|  |  |  | 				l->neg_classes = tre_mem_alloc(ctx->mem, | 
					
						
							|  |  |  | 				                               (sizeof(l->neg_classes) | 
					
						
							|  |  |  | 				                                * (num_neg_classes + 1))); | 
					
						
							|  |  |  | 				if (l->neg_classes == NULL) | 
					
						
							|  |  |  | 				{ | 
					
						
							|  |  |  | 					status = REG_ESPACE; | 
					
						
							|  |  |  | 					goto parse_bracket_done; | 
					
						
							|  |  |  | 				} | 
					
						
							|  |  |  | 				for (k = 0; k < num_neg_classes; k++) | 
					
						
							|  |  |  | 					l->neg_classes[k] = neg_classes[k]; | 
					
						
							|  |  |  | 				l->neg_classes[k] = (tre_ctype_t)0; | 
					
						
							|  |  |  | 			} | 
					
						
							|  |  |  | 			else | 
					
						
							|  |  |  | 			{ | 
					
						
							|  |  |  | 				l->neg_classes = NULL; | 
					
						
							|  |  |  | 			} | 
					
						
							|  |  |  | 			if (node == NULL) | 
					
						
							|  |  |  | 			{ | 
					
						
							|  |  |  | 				node = n; | 
					
						
							|  |  |  | 			} | 
					
						
							|  |  |  | 			else | 
					
						
							|  |  |  | 			{ | 
					
						
							|  |  |  | 				u = tre_ast_new_union(ctx->mem, node, n); | 
					
						
							|  |  |  | 				if (u == NULL) status = REG_ESPACE; | 
					
						
							|  |  |  | 				node = u; | 
					
						
							|  |  |  | 			} | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	if (status != REG_OK) goto parse_bracket_done; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #ifdef TRE_DEBUG
 | 
					
						
							|  |  |  | 	tre_ast_print(node); | 
					
						
							|  |  |  | #endif /* TRE_DEBUG */
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | parse_bracket_done: | 
					
						
							| 
									
										
										
										
											2019-12-18 08:34:44 +00:00
										 |  |  | 	xfree(ctx->mem->gem, items); | 
					
						
							| 
									
										
										
										
											2019-12-13 04:29:58 +00:00
										 |  |  | 	ctx->position++; | 
					
						
							|  |  |  | 	*result = node; | 
					
						
							|  |  |  | 	return status; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | /* Parses a positive decimal integer.  Returns -1 if the string does not
 | 
					
						
							|  |  |  |    contain a valid number. */ | 
					
						
							|  |  |  | static int | 
					
						
							|  |  |  | tre_parse_int(const tre_char_t **regex, const tre_char_t *regex_end) | 
					
						
							|  |  |  | { | 
					
						
							| 
									
										
										
										
											2020-12-02 16:07:06 +00:00
										 |  |  | 	/* HAWK : added overflow check with other code optimizations */ | 
					
						
							| 
									
										
										
										
											2019-12-13 04:29:58 +00:00
										 |  |  | 	int num = -1; | 
					
						
							|  |  |  | 	const tre_char_t *r = *regex; | 
					
						
							| 
									
										
										
										
											2020-12-02 16:07:06 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  | 	if (r < regex_end && *r >= HAWK_T('0') && *r <= HAWK_T('9')) | 
					
						
							| 
									
										
										
										
											2019-12-13 04:29:58 +00:00
										 |  |  | 	{ | 
					
						
							| 
									
										
										
										
											2020-12-02 16:07:06 +00:00
										 |  |  | 		int ever_overflowed = 0; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 		num = 0; | 
					
						
							|  |  |  | 		do | 
					
						
							|  |  |  | 		{ | 
					
						
							|  |  |  | 			if (num > (HAWK_TYPE_MAX(int) - 9) / 10) ever_overflowed = 1; | 
					
						
							|  |  |  | 			num = num * 10 + *r - HAWK_T('0'); | 
					
						
							|  |  |  | 			r++; | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 		while (r < regex_end && *r >= HAWK_T('0') && *r <= HAWK_T('9')); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 		if (ever_overflowed) num = -1; | 
					
						
							| 
									
										
										
										
											2019-12-13 04:29:58 +00:00
										 |  |  | 	} | 
					
						
							|  |  |  | 	*regex = r; | 
					
						
							|  |  |  | 	return num; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static reg_errcode_t | 
					
						
							|  |  |  | tre_parse_bound(tre_parse_ctx_t *ctx, tre_ast_node_t **result) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  | 	int min, max, i; | 
					
						
							|  |  |  | 	int cost_ins, cost_del, cost_subst, cost_max; | 
					
						
							|  |  |  | 	int limit_ins, limit_del, limit_subst, limit_err; | 
					
						
							|  |  |  | 	const tre_char_t *r = ctx->re; | 
					
						
							|  |  |  | 	const tre_char_t *start; | 
					
						
							|  |  |  | 	int minimal = (ctx->cflags & REG_UNGREEDY) ? 1 : 0; | 
					
						
							|  |  |  | 	int approx = 0; | 
					
						
							|  |  |  | 	int costs_set = 0; | 
					
						
							|  |  |  | 	int counts_set = 0; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	cost_ins = cost_del = cost_subst = cost_max = TRE_PARAM_UNSET; | 
					
						
							|  |  |  | 	limit_ins = limit_del = limit_subst = limit_err = TRE_PARAM_UNSET; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	/* Parse number (minimum repetition count). */ | 
					
						
							|  |  |  | 	min = -1; | 
					
						
							|  |  |  | 	if (r < ctx->re_end && *r >= HAWK_T('0') && *r <= HAWK_T('9')) | 
					
						
							|  |  |  | 	{ | 
					
						
							|  |  |  | 		DPRINT(("tre_parse:	  min count: '%.*" STRF "'\n", REST(r))); | 
					
						
							|  |  |  | 		min = tre_parse_int(&r, ctx->re_end); | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	/* Parse comma and second number (maximum repetition count). */ | 
					
						
							|  |  |  | 	max = min; | 
					
						
							|  |  |  | 	if (r < ctx->re_end && *r == CHAR_COMMA) | 
					
						
							|  |  |  | 	{ | 
					
						
							|  |  |  | 		r++; | 
					
						
							|  |  |  | 		DPRINT(("tre_parse:   max count: '%.*" STRF "'\n", REST(r))); | 
					
						
							|  |  |  | 		max = tre_parse_int(&r, ctx->re_end); | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	/* Check that the repeat counts are sane. */ | 
					
						
							| 
									
										
										
										
											2024-05-02 22:47:30 +09:00
										 |  |  | 	/*if ((max >= 0 && min > max) || max > RE_DUP_MAX) return REG_BADBR;
 | 
					
						
							| 
									
										
										
										
											2019-12-13 04:29:58 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  | 	hyunghwan.chung: | 
					
						
							|  |  |  | 	this original check still allows something like {100000,} | 
					
						
							| 
									
										
										
										
											2024-05-02 22:47:30 +09:00
										 |  |  | 	while it does not allow {1,256}. Why is RE_DUP_MAX necessary? | 
					
						
							| 
									
										
										
										
											2019-12-13 04:29:58 +00:00
										 |  |  | 	*/ | 
					
						
							|  |  |  | 	if ((max >= 0 && min > max)) return REG_BADBR; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	/*
 | 
					
						
							|  |  |  | 	 '{' | 
					
						
							|  |  |  | 	   optionally followed immediately by a number == minimum repcount | 
					
						
							|  |  |  | 	   optionally followed by , then a number == maximum repcount | 
					
						
							|  |  |  | 	    + then a number == maximum insertion count | 
					
						
							|  |  |  | 	    - then a number == maximum deletion count | 
					
						
							|  |  |  | 	    # then a number == maximum substitution count
 | 
					
						
							|  |  |  | 	    ~ then a number == maximum number of errors | 
					
						
							|  |  |  | 	    Any of +, -, # or ~ without followed by a number means that | 
					
						
							|  |  |  | 	    the maximum count/number of errors is infinite. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	    An equation of the form | 
					
						
							|  |  |  | 	Xi + Yd + Zs < C | 
					
						
							|  |  |  | 	    can be specified to set costs and the cost limit to a value | 
					
						
							|  |  |  | 	    different from the default value: | 
					
						
							|  |  |  | 	- X is the cost of an insertion | 
					
						
							|  |  |  | 	- Y is the cost of a deletion | 
					
						
							|  |  |  | 	- Z is the cost of a substitution | 
					
						
							|  |  |  | 	- C is the maximum cost | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	    If no count limit or cost is set for an operation, the operation | 
					
						
							|  |  |  | 	    is not allowed at all. | 
					
						
							|  |  |  | 	*/ | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	do | 
					
						
							|  |  |  | 	{ | 
					
						
							|  |  |  | 		int done; | 
					
						
							|  |  |  | 		start = r; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 		/* Parse count limit settings */ | 
					
						
							|  |  |  | 		done = 0; | 
					
						
							|  |  |  | 		if (!counts_set) | 
					
						
							|  |  |  | 			while (r + 1 < ctx->re_end && !done) | 
					
						
							|  |  |  | 			{ | 
					
						
							|  |  |  | 				switch (*r) | 
					
						
							|  |  |  | 				{ | 
					
						
							|  |  |  | 				case CHAR_PLUS:  /* Insert limit */ | 
					
						
							|  |  |  | 					DPRINT(("tre_parse:   ins limit: '%.*" STRF "'\n", REST(r))); | 
					
						
							|  |  |  | 					r++; | 
					
						
							|  |  |  | 					limit_ins = tre_parse_int(&r, ctx->re_end); | 
					
						
							|  |  |  | 					if (limit_ins < 0) | 
					
						
							|  |  |  | 						limit_ins = HAWK_TYPE_MAX(int); | 
					
						
							|  |  |  | 					counts_set = 1; | 
					
						
							|  |  |  | 					break; | 
					
						
							|  |  |  | 				case CHAR_MINUS: /* Delete limit */ | 
					
						
							|  |  |  | 					DPRINT(("tre_parse:   del limit: '%.*" STRF "'\n", REST(r))); | 
					
						
							|  |  |  | 					r++; | 
					
						
							|  |  |  | 					limit_del = tre_parse_int(&r, ctx->re_end); | 
					
						
							|  |  |  | 					if (limit_del < 0) | 
					
						
							|  |  |  | 						limit_del = HAWK_TYPE_MAX(int); | 
					
						
							|  |  |  | 					counts_set = 1; | 
					
						
							|  |  |  | 					break; | 
					
						
							|  |  |  | 				case CHAR_HASH:  /* Substitute limit */ | 
					
						
							|  |  |  | 					DPRINT(("tre_parse: subst limit: '%.*" STRF "'\n", REST(r))); | 
					
						
							|  |  |  | 					r++; | 
					
						
							|  |  |  | 					limit_subst = tre_parse_int(&r, ctx->re_end); | 
					
						
							|  |  |  | 					if (limit_subst < 0) | 
					
						
							|  |  |  | 						limit_subst = HAWK_TYPE_MAX(int); | 
					
						
							|  |  |  | 					counts_set = 1; | 
					
						
							|  |  |  | 					break; | 
					
						
							|  |  |  | 				case CHAR_TILDE: /* Maximum number of changes */ | 
					
						
							|  |  |  | 					DPRINT(("tre_parse: count limit: '%.*" STRF "'\n", REST(r))); | 
					
						
							|  |  |  | 					r++; | 
					
						
							|  |  |  | 					limit_err = tre_parse_int(&r, ctx->re_end); | 
					
						
							|  |  |  | 					if (limit_err < 0) | 
					
						
							|  |  |  | 						limit_err = HAWK_TYPE_MAX(int); | 
					
						
							|  |  |  | 					approx = 1; | 
					
						
							|  |  |  | 					break; | 
					
						
							|  |  |  | 				case CHAR_COMMA: | 
					
						
							|  |  |  | 					r++; | 
					
						
							|  |  |  | 					break; | 
					
						
							|  |  |  | 				case HAWK_T(' '): | 
					
						
							|  |  |  | 					r++; | 
					
						
							|  |  |  | 					break; | 
					
						
							|  |  |  | 				case HAWK_T('}'): | 
					
						
							|  |  |  | 					done = 1; | 
					
						
							|  |  |  | 					break; | 
					
						
							|  |  |  | 				default: | 
					
						
							|  |  |  | 					done = 1; | 
					
						
							|  |  |  | 					break; | 
					
						
							|  |  |  | 				} | 
					
						
							|  |  |  | 			} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 		/* Parse cost restriction equation. */ | 
					
						
							|  |  |  | 		done = 0; | 
					
						
							|  |  |  | 		if (!costs_set) | 
					
						
							|  |  |  | 			while (r + 1 < ctx->re_end && !done) | 
					
						
							|  |  |  | 			{ | 
					
						
							|  |  |  | 				switch (*r) | 
					
						
							|  |  |  | 				{ | 
					
						
							|  |  |  | 				case CHAR_PLUS: | 
					
						
							|  |  |  | 				case HAWK_T(' '): | 
					
						
							|  |  |  | 					r++; | 
					
						
							|  |  |  | 					break; | 
					
						
							|  |  |  | 				case HAWK_T('<'): | 
					
						
							|  |  |  | 					DPRINT(("tre_parse:    max cost: '%.*" STRF "'\n", REST(r))); | 
					
						
							|  |  |  | 					r++; | 
					
						
							|  |  |  | 					while (*r == HAWK_T(' ')) | 
					
						
							|  |  |  | 						r++; | 
					
						
							|  |  |  | 					cost_max = tre_parse_int(&r, ctx->re_end); | 
					
						
							|  |  |  | 					if (cost_max < 0) | 
					
						
							|  |  |  | 						cost_max = HAWK_TYPE_MAX(int); | 
					
						
							|  |  |  | 					else | 
					
						
							|  |  |  | 						cost_max--; | 
					
						
							|  |  |  | 					approx = 1; | 
					
						
							|  |  |  | 					break; | 
					
						
							|  |  |  | 				case CHAR_COMMA: | 
					
						
							|  |  |  | 					r++; | 
					
						
							|  |  |  | 					done = 1; | 
					
						
							|  |  |  | 					break; | 
					
						
							|  |  |  | 				default: | 
					
						
							|  |  |  | 					if (*r >= HAWK_T('0') && *r <= HAWK_T('9')) | 
					
						
							|  |  |  | 					{ | 
					
						
							|  |  |  | #ifdef TRE_DEBUG
 | 
					
						
							|  |  |  | 						const tre_char_t *sr = r; | 
					
						
							|  |  |  | #endif /* TRE_DEBUG */
 | 
					
						
							|  |  |  | 						int cost = tre_parse_int(&r, ctx->re_end); | 
					
						
							|  |  |  | 						/* XXX - make sure r is not past end. */ | 
					
						
							|  |  |  | 						switch (*r) | 
					
						
							|  |  |  | 						{ | 
					
						
							|  |  |  | 						case HAWK_T('i'):	/* Insert cost */ | 
					
						
							|  |  |  | 							DPRINT(("tre_parse:    ins cost: '%.*" STRF "'\n", | 
					
						
							|  |  |  | 							        REST(sr))); | 
					
						
							|  |  |  | 							r++; | 
					
						
							|  |  |  | 							cost_ins = cost; | 
					
						
							|  |  |  | 							costs_set = 1; | 
					
						
							|  |  |  | 							break; | 
					
						
							|  |  |  | 						case HAWK_T('d'):	/* Delete cost */ | 
					
						
							|  |  |  | 							DPRINT(("tre_parse:    del cost: '%.*" STRF "'\n", | 
					
						
							|  |  |  | 							        REST(sr))); | 
					
						
							|  |  |  | 							r++; | 
					
						
							|  |  |  | 							cost_del = cost; | 
					
						
							|  |  |  | 							costs_set = 1; | 
					
						
							|  |  |  | 							break; | 
					
						
							|  |  |  | 						case HAWK_T('s'):	/* Substitute cost */ | 
					
						
							|  |  |  | 							DPRINT(("tre_parse:  subst cost: '%.*" STRF "'\n", | 
					
						
							|  |  |  | 							        REST(sr))); | 
					
						
							|  |  |  | 							r++; | 
					
						
							|  |  |  | 							cost_subst = cost; | 
					
						
							|  |  |  | 							costs_set = 1; | 
					
						
							|  |  |  | 							break; | 
					
						
							|  |  |  | 						default: | 
					
						
							|  |  |  | 							return REG_BADBR; | 
					
						
							|  |  |  | 						} | 
					
						
							|  |  |  | 					} | 
					
						
							|  |  |  | 					else | 
					
						
							|  |  |  | 					{ | 
					
						
							|  |  |  | 						done = 1; | 
					
						
							|  |  |  | 						break; | 
					
						
							|  |  |  | 					} | 
					
						
							|  |  |  | 				} | 
					
						
							|  |  |  | 			} | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	while (start != r); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	/* Missing }. */ | 
					
						
							|  |  |  | 	if (r >= ctx->re_end) | 
					
						
							|  |  |  | 		return REG_EBRACE; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	/* Empty contents of {}. */ | 
					
						
							|  |  |  | 	if (r == ctx->re) | 
					
						
							|  |  |  | 		return REG_BADBR; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	/* Parse the ending '}' or '\}'.*/ | 
					
						
							|  |  |  | 	if (ctx->cflags & REG_EXTENDED) | 
					
						
							|  |  |  | 	{ | 
					
						
							|  |  |  | 		if (r >= ctx->re_end || *r != CHAR_RBRACE) | 
					
						
							|  |  |  | 			return REG_BADBR; | 
					
						
							|  |  |  | 		r++; | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	else | 
					
						
							|  |  |  | 	{ | 
					
						
							|  |  |  | 		if (r + 1 >= ctx->re_end | 
					
						
							|  |  |  | 		        || *r != CHAR_BACKSLASH | 
					
						
							|  |  |  | 		        || *(r + 1) != CHAR_RBRACE) | 
					
						
							|  |  |  | 			return REG_BADBR; | 
					
						
							|  |  |  | 		r += 2; | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	/* Parse trailing '?' marking minimal repetition. */ | 
					
						
							|  |  |  | 	if (r < ctx->re_end) | 
					
						
							|  |  |  | 	{ | 
					
						
							|  |  |  | 		if (*r == CHAR_QUESTIONMARK) | 
					
						
							|  |  |  | 		{ | 
					
						
							|  |  |  | 			minimal = !(ctx->cflags & REG_UNGREEDY); | 
					
						
							|  |  |  | 			r++; | 
					
						
							|  |  |  | 		} | 
					
						
							| 
									
										
										
										
											2024-05-02 22:47:30 +09:00
										 |  |  | /* HAWK - commented out for minimal impact on backward compatibility.
 | 
					
						
							| 
									
										
										
										
											2019-12-13 04:29:58 +00:00
										 |  |  |  *       X{x,y}* X{x,y}+ */ | 
					
						
							|  |  |  | #if 0
 | 
					
						
							|  |  |  | 		else if (*r == CHAR_STAR || *r == CHAR_PLUS) | 
					
						
							|  |  |  | 		{ | 
					
						
							|  |  |  | 			/* These are reserved for future extensions. */ | 
					
						
							|  |  |  | 			return REG_BADRPT; | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | #endif
 | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	/* Create the AST node(s). */ | 
					
						
							|  |  |  | 	if (min == 0 && max == 0) | 
					
						
							|  |  |  | 	{ | 
					
						
							|  |  |  | 		*result = tre_ast_new_literal(ctx->mem, EMPTY, -1, -1); | 
					
						
							|  |  |  | 		if (*result == NULL) return REG_ESPACE; | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	else | 
					
						
							|  |  |  | 	{ | 
					
						
							|  |  |  | 		if (min < 0 && max < 0) | 
					
						
							|  |  |  | 			/* Only approximate parameters set, no repetitions. */ | 
					
						
							|  |  |  | 			min = max = 1; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 		*result = tre_ast_new_iter(ctx->mem, *result, min, max, minimal); | 
					
						
							|  |  |  | 		if (!*result) | 
					
						
							|  |  |  | 			return REG_ESPACE; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 		/* If approximate matching parameters are set, add them to the
 | 
					
						
							|  |  |  | 		 iteration node. */ | 
					
						
							|  |  |  | 		if (approx || costs_set || counts_set) | 
					
						
							|  |  |  | 		{ | 
					
						
							|  |  |  | 			int *params; | 
					
						
							|  |  |  | 			tre_iteration_t *iter = (*result)->obj; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 			if (costs_set || counts_set) | 
					
						
							|  |  |  | 			{ | 
					
						
							|  |  |  | 				if (limit_ins == TRE_PARAM_UNSET) | 
					
						
							|  |  |  | 				{ | 
					
						
							|  |  |  | 					if (cost_ins == TRE_PARAM_UNSET) | 
					
						
							|  |  |  | 						limit_ins = 0; | 
					
						
							|  |  |  | 					else | 
					
						
							|  |  |  | 						limit_ins = HAWK_TYPE_MAX(int); | 
					
						
							|  |  |  | 				} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 				if (limit_del == TRE_PARAM_UNSET) | 
					
						
							|  |  |  | 				{ | 
					
						
							|  |  |  | 					if (cost_del == TRE_PARAM_UNSET) | 
					
						
							|  |  |  | 						limit_del = 0; | 
					
						
							|  |  |  | 					else | 
					
						
							|  |  |  | 						limit_del = HAWK_TYPE_MAX(int); | 
					
						
							|  |  |  | 				} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 				if (limit_subst == TRE_PARAM_UNSET) | 
					
						
							|  |  |  | 				{ | 
					
						
							|  |  |  | 					if (cost_subst == TRE_PARAM_UNSET) | 
					
						
							|  |  |  | 						limit_subst = 0; | 
					
						
							|  |  |  | 					else | 
					
						
							|  |  |  | 						limit_subst = HAWK_TYPE_MAX(int); | 
					
						
							|  |  |  | 				} | 
					
						
							|  |  |  | 			} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 			if (cost_max == TRE_PARAM_UNSET) | 
					
						
							|  |  |  | 				cost_max = HAWK_TYPE_MAX(int); | 
					
						
							|  |  |  | 			if (limit_err == TRE_PARAM_UNSET) | 
					
						
							|  |  |  | 				limit_err = HAWK_TYPE_MAX(int); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 			ctx->have_approx = 1; | 
					
						
							|  |  |  | 			params = tre_mem_alloc(ctx->mem, sizeof(*params) * TRE_PARAM_LAST); | 
					
						
							|  |  |  | 			if (!params) | 
					
						
							|  |  |  | 				return REG_ESPACE; | 
					
						
							|  |  |  | 			for (i = 0; i < TRE_PARAM_LAST; i++) | 
					
						
							|  |  |  | 				params[i] = TRE_PARAM_UNSET; | 
					
						
							|  |  |  | 			params[TRE_PARAM_COST_INS] = cost_ins; | 
					
						
							|  |  |  | 			params[TRE_PARAM_COST_DEL] = cost_del; | 
					
						
							|  |  |  | 			params[TRE_PARAM_COST_SUBST] = cost_subst; | 
					
						
							|  |  |  | 			params[TRE_PARAM_COST_MAX] = cost_max; | 
					
						
							|  |  |  | 			params[TRE_PARAM_MAX_INS] = limit_ins; | 
					
						
							|  |  |  | 			params[TRE_PARAM_MAX_DEL] = limit_del; | 
					
						
							|  |  |  | 			params[TRE_PARAM_MAX_SUBST] = limit_subst; | 
					
						
							|  |  |  | 			params[TRE_PARAM_MAX_ERR] = limit_err; | 
					
						
							|  |  |  | 			iter->params = params; | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	DPRINT(("tre_parse_bound: min %d, max %d, costs [%d,%d,%d, total %d], " | 
					
						
							|  |  |  | 	        "limits [%d,%d,%d, total %d]\n", | 
					
						
							|  |  |  | 	        min, max, cost_ins, cost_del, cost_subst, cost_max, | 
					
						
							|  |  |  | 	        limit_ins, limit_del, limit_subst, limit_err)); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	ctx->re = r; | 
					
						
							|  |  |  | 	return REG_OK; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | typedef enum | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  | 	PARSE_RE = 0, | 
					
						
							|  |  |  | 	PARSE_ATOM, | 
					
						
							|  |  |  | 	PARSE_MARK_FOR_SUBMATCH, | 
					
						
							|  |  |  | 	PARSE_BRANCH, | 
					
						
							|  |  |  | 	PARSE_PIECE, | 
					
						
							|  |  |  | 	PARSE_CATENATION, | 
					
						
							|  |  |  | 	PARSE_POST_CATENATION, | 
					
						
							|  |  |  | 	PARSE_UNION, | 
					
						
							|  |  |  | 	PARSE_POST_UNION, | 
					
						
							|  |  |  | 	PARSE_POSTFIX, | 
					
						
							|  |  |  | 	PARSE_RESTORE_CFLAGS | 
					
						
							|  |  |  | } tre_parse_re_stack_symbol_t; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | reg_errcode_t tre_parse(tre_parse_ctx_t *ctx) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  | 	tre_ast_node_t *result = NULL; | 
					
						
							|  |  |  | 	tre_parse_re_stack_symbol_t symbol; | 
					
						
							|  |  |  | 	reg_errcode_t status = REG_OK; | 
					
						
							|  |  |  | 	tre_stack_t *stack = ctx->stack; | 
					
						
							|  |  |  | 	int bottom = tre_stack_num_objects(stack); | 
					
						
							|  |  |  | 	int depth = 0; | 
					
						
							|  |  |  | 	int temporary_cflags = 0; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	DPRINT(("tre_parse: parsing '%.*" STRF "', len = %d\n", | 
					
						
							|  |  |  | 	        ctx->len, ctx->re, ctx->len)); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	if (!ctx->nofirstsub) | 
					
						
							|  |  |  | 	{ | 
					
						
							|  |  |  | 		STACK_PUSH(stack, int, ctx->submatch_id); | 
					
						
							|  |  |  | 		STACK_PUSH(stack, int, PARSE_MARK_FOR_SUBMATCH); | 
					
						
							|  |  |  | 		ctx->submatch_id++; | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	STACK_PUSH(stack, int, PARSE_RE); | 
					
						
							|  |  |  | 	ctx->re_start = ctx->re; | 
					
						
							|  |  |  | 	ctx->re_end = ctx->re + ctx->len; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	/* The following is basically just a recursive descent parser.  I use
 | 
					
						
							|  |  |  | 	   an explicit stack instead of recursive functions mostly because of | 
					
						
							|  |  |  | 	   two reasons: compatibility with systems which have an overflowable | 
					
						
							|  |  |  | 	   call stack, and efficiency (both in lines of code and speed).  */ | 
					
						
							|  |  |  | 	while (tre_stack_num_objects(stack) > bottom && status == REG_OK) | 
					
						
							|  |  |  | 	{ | 
					
						
							|  |  |  | 		if (status != REG_OK) break; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 		symbol = tre_stack_pop_int(stack); | 
					
						
							|  |  |  | 		switch (symbol) | 
					
						
							|  |  |  | 		{ | 
					
						
							|  |  |  | 		case PARSE_RE: | 
					
						
							|  |  |  | 			/* Parse a full regexp.  A regexp is one or more branches,
 | 
					
						
							|  |  |  | 			   separated by the union operator `|'. */ | 
					
						
							|  |  |  | #ifdef REG_LITERAL
 | 
					
						
							|  |  |  | 			if (!(ctx->cflags & REG_LITERAL) | 
					
						
							|  |  |  | 			        && ctx->cflags & REG_EXTENDED) | 
					
						
							|  |  |  | #endif /* REG_LITERAL */
 | 
					
						
							|  |  |  | 				STACK_PUSHX(stack, int, PARSE_UNION); | 
					
						
							|  |  |  | 			STACK_PUSHX(stack, int, PARSE_BRANCH); | 
					
						
							|  |  |  | 			break; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 		case PARSE_BRANCH: | 
					
						
							|  |  |  | 			/* Parse a branch.  A branch is one or more pieces, concatenated.
 | 
					
						
							|  |  |  | 			   A piece is an atom possibly followed by a postfix operator. */ | 
					
						
							|  |  |  | 			STACK_PUSHX(stack, int, PARSE_CATENATION); | 
					
						
							|  |  |  | 			STACK_PUSHX(stack, int, PARSE_PIECE); | 
					
						
							|  |  |  | 			break; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 		case PARSE_PIECE: | 
					
						
							|  |  |  | 			/* Parse a piece.  A piece is an atom possibly followed by one
 | 
					
						
							|  |  |  | 			   or more postfix operators. */ | 
					
						
							|  |  |  | #ifdef REG_LITERAL
 | 
					
						
							|  |  |  | 			if (!(ctx->cflags & REG_LITERAL)) | 
					
						
							|  |  |  | #endif /* REG_LITERAL */
 | 
					
						
							|  |  |  | 				STACK_PUSHX(stack, int, PARSE_POSTFIX); | 
					
						
							|  |  |  | 			STACK_PUSHX(stack, int, PARSE_ATOM); | 
					
						
							|  |  |  | 			break; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 		case PARSE_CATENATION: | 
					
						
							|  |  |  | 			/* If the expression has not ended, parse another piece. */ | 
					
						
							|  |  |  | 		{ | 
					
						
							|  |  |  | 			tre_char_t c; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 			if (ctx->re >= ctx->re_end) break; | 
					
						
							|  |  |  | 			c = *ctx->re; | 
					
						
							|  |  |  | #ifdef REG_LITERAL
 | 
					
						
							|  |  |  | 			if (!(ctx->cflags & REG_LITERAL)) | 
					
						
							|  |  |  | 			{ | 
					
						
							|  |  |  | #endif /* REG_LITERAL */
 | 
					
						
							|  |  |  | 				if (ctx->cflags & REG_EXTENDED && c == CHAR_PIPE) | 
					
						
							|  |  |  | 					break; | 
					
						
							|  |  |  | 				if ((ctx->cflags & REG_EXTENDED | 
					
						
							|  |  |  | 				        && c == CHAR_RPAREN && depth > 0) | 
					
						
							|  |  |  | 				        || (!(ctx->cflags & REG_EXTENDED) | 
					
						
							|  |  |  | 				            && (c == CHAR_BACKSLASH | 
					
						
							|  |  |  | 				                && *(ctx->re + 1) == CHAR_RPAREN))) | 
					
						
							|  |  |  | 				{ | 
					
						
							|  |  |  | 					if (!(ctx->cflags & REG_EXTENDED) && depth == 0) | 
					
						
							|  |  |  | 						status = REG_EPAREN; | 
					
						
							|  |  |  | 					DPRINT(("tre_parse:	  group end: '%.*" STRF "'\n", | 
					
						
							|  |  |  | 					        REST(ctx->re))); | 
					
						
							|  |  |  | 					depth--; | 
					
						
							|  |  |  | 					if (!(ctx->cflags & REG_EXTENDED)) | 
					
						
							|  |  |  | 						ctx->re += 2; | 
					
						
							|  |  |  | 					break; | 
					
						
							|  |  |  | 				} | 
					
						
							|  |  |  | #ifdef REG_LITERAL
 | 
					
						
							|  |  |  | 			} | 
					
						
							|  |  |  | #endif /* REG_LITERAL */
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #ifdef REG_RIGHT_ASSOC
 | 
					
						
							|  |  |  | 			if (ctx->cflags & REG_RIGHT_ASSOC) | 
					
						
							|  |  |  | 			{ | 
					
						
							|  |  |  | 				/* Right associative concatenation. */ | 
					
						
							|  |  |  | 				STACK_PUSHX(stack, voidptr, result); | 
					
						
							|  |  |  | 				STACK_PUSHX(stack, int, PARSE_POST_CATENATION); | 
					
						
							|  |  |  | 				STACK_PUSHX(stack, int, PARSE_CATENATION); | 
					
						
							|  |  |  | 				STACK_PUSHX(stack, int, PARSE_PIECE); | 
					
						
							|  |  |  | 			} | 
					
						
							|  |  |  | 			else | 
					
						
							| 
									
										
										
										
											2024-05-02 22:47:30 +09:00
										 |  |  | #endif
 | 
					
						
							| 
									
										
										
										
											2019-12-13 04:29:58 +00:00
										 |  |  | 			{ /* REG_RIGHT_ASSOC */ | 
					
						
							|  |  |  | 				/* Default case, left associative concatenation. */ | 
					
						
							|  |  |  | 				STACK_PUSHX(stack, int, PARSE_CATENATION); | 
					
						
							|  |  |  | 				STACK_PUSHX(stack, voidptr, result); | 
					
						
							|  |  |  | 				STACK_PUSHX(stack, int, PARSE_POST_CATENATION); | 
					
						
							|  |  |  | 				STACK_PUSHX(stack, int, PARSE_PIECE); | 
					
						
							|  |  |  | 			} | 
					
						
							|  |  |  | 			break; | 
					
						
							|  |  |  | 		} | 
					
						
							| 
									
										
										
										
											2024-05-02 22:47:30 +09:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2019-12-13 04:29:58 +00:00
										 |  |  | 		case PARSE_POST_CATENATION: | 
					
						
							|  |  |  | 		{ | 
					
						
							|  |  |  | 			tre_ast_node_t *tree = tre_stack_pop_voidptr(stack); | 
					
						
							|  |  |  | 			tre_ast_node_t *tmp_node; | 
					
						
							|  |  |  | 			tmp_node = tre_ast_new_catenation(ctx->mem, tree, result); | 
					
						
							|  |  |  | 			if (!tmp_node) | 
					
						
							|  |  |  | 				return REG_ESPACE; | 
					
						
							|  |  |  | 			result = tmp_node; | 
					
						
							|  |  |  | 			break; | 
					
						
							|  |  |  | 		} | 
					
						
							| 
									
										
										
										
											2024-05-02 22:47:30 +09:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2019-12-13 04:29:58 +00:00
										 |  |  | 		case PARSE_UNION: | 
					
						
							|  |  |  | 			if (ctx->re >= ctx->re_end) break; | 
					
						
							|  |  |  | 	#ifdef REG_LITERAL
 | 
					
						
							|  |  |  | 			if (ctx->cflags & REG_LITERAL) break; | 
					
						
							|  |  |  | 	#endif /* REG_LITERAL */
 | 
					
						
							|  |  |  | 			switch (*ctx->re) | 
					
						
							|  |  |  | 			{ | 
					
						
							|  |  |  | 			case CHAR_PIPE: | 
					
						
							|  |  |  | 				DPRINT(("tre_parse:	union: '%.*" STRF "'\n", | 
					
						
							|  |  |  | 				        REST(ctx->re))); | 
					
						
							|  |  |  | 				STACK_PUSHX(stack, int, PARSE_UNION); | 
					
						
							|  |  |  | 				STACK_PUSHX(stack, voidptr, result); | 
					
						
							|  |  |  | 				STACK_PUSHX(stack, int, PARSE_POST_UNION); | 
					
						
							|  |  |  | 				STACK_PUSHX(stack, int, PARSE_BRANCH); | 
					
						
							|  |  |  | 				ctx->re++; | 
					
						
							|  |  |  | 				break; | 
					
						
							| 
									
										
										
										
											2024-05-02 22:47:30 +09:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2019-12-13 04:29:58 +00:00
										 |  |  | 			case CHAR_RPAREN: | 
					
						
							|  |  |  | 				ctx->re++; | 
					
						
							|  |  |  | 				break; | 
					
						
							| 
									
										
										
										
											2024-05-02 22:47:30 +09:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2019-12-13 04:29:58 +00:00
										 |  |  | 			default: | 
					
						
							|  |  |  | 				break; | 
					
						
							|  |  |  | 			} | 
					
						
							|  |  |  | 			break; | 
					
						
							| 
									
										
										
										
											2024-05-02 22:47:30 +09:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2019-12-13 04:29:58 +00:00
										 |  |  | 		case PARSE_POST_UNION: | 
					
						
							|  |  |  | 		{ | 
					
						
							|  |  |  | 			tre_ast_node_t *tmp_node; | 
					
						
							|  |  |  | 			tre_ast_node_t *tree = tre_stack_pop_voidptr(stack); | 
					
						
							|  |  |  | 			tmp_node = tre_ast_new_union(ctx->mem, tree, result); | 
					
						
							|  |  |  | 			if (!tmp_node) | 
					
						
							|  |  |  | 				return REG_ESPACE; | 
					
						
							|  |  |  | 			result = tmp_node; | 
					
						
							|  |  |  | 			break; | 
					
						
							|  |  |  | 		} | 
					
						
							| 
									
										
										
										
											2024-05-02 22:47:30 +09:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2019-12-13 04:29:58 +00:00
										 |  |  | 		case PARSE_POSTFIX: | 
					
						
							|  |  |  | 			/* Parse postfix operators. */ | 
					
						
							|  |  |  | 			if (ctx->re >= ctx->re_end) | 
					
						
							|  |  |  | 				break; | 
					
						
							|  |  |  | 	#ifdef REG_LITERAL
 | 
					
						
							|  |  |  | 			if (ctx->cflags & REG_LITERAL) | 
					
						
							|  |  |  | 				break; | 
					
						
							|  |  |  | 	#endif /* REG_LITERAL */
 | 
					
						
							|  |  |  | 			switch (*ctx->re) | 
					
						
							|  |  |  | 			{ | 
					
						
							|  |  |  | 			case CHAR_PLUS: | 
					
						
							|  |  |  | 			case CHAR_QUESTIONMARK: | 
					
						
							|  |  |  | 				if (!(ctx->cflags & REG_EXTENDED)) break; | 
					
						
							|  |  |  | 				/*FALLTHROUGH*/ | 
					
						
							|  |  |  | 			case CHAR_STAR: | 
					
						
							|  |  |  | 	/* HAWK - added this label */ | 
					
						
							|  |  |  | 	parse_star: | 
					
						
							|  |  |  | 	/* END HAWK */ | 
					
						
							|  |  |  | 			{ | 
					
						
							|  |  |  | 				tre_ast_node_t *tmp_node; | 
					
						
							|  |  |  | 				int minimal = (ctx->cflags & REG_UNGREEDY) ? 1 : 0; | 
					
						
							|  |  |  | 				int rep_min = 0; | 
					
						
							|  |  |  | 				int rep_max = -1; | 
					
						
							|  |  |  | 	#ifdef TRE_DEBUG
 | 
					
						
							|  |  |  | 				const tre_char_t *tmp_re; | 
					
						
							|  |  |  | 	#endif
 | 
					
						
							| 
									
										
										
										
											2024-05-02 22:47:30 +09:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2019-12-13 04:29:58 +00:00
										 |  |  | 				if (*ctx->re == CHAR_PLUS) /* HAWK: case CHAR_PLUS fell through down here */ | 
					
						
							|  |  |  | 					rep_min = 1; | 
					
						
							|  |  |  | 				if (*ctx->re == CHAR_QUESTIONMARK) /* HAWK: case CHAR_QUESTIONMARK fell though down here */ | 
					
						
							|  |  |  | 					rep_max = 1; | 
					
						
							|  |  |  | 	#ifdef TRE_DEBUG
 | 
					
						
							|  |  |  | 				tmp_re = ctx->re; | 
					
						
							|  |  |  | 	#endif
 | 
					
						
							| 
									
										
										
										
											2024-05-02 22:47:30 +09:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2019-12-13 04:29:58 +00:00
										 |  |  | 				if (ctx->re + 1 < ctx->re_end) | 
					
						
							|  |  |  | 				{ | 
					
						
							|  |  |  | 					if (*(ctx->re + 1) == CHAR_QUESTIONMARK) /* HAWK: +?, ??, *? */ | 
					
						
							|  |  |  | 					{ | 
					
						
							|  |  |  | 						minimal = !(ctx->cflags & REG_UNGREEDY); | 
					
						
							|  |  |  | 						ctx->re++; | 
					
						
							|  |  |  | 					} | 
					
						
							|  |  |  | 	/* HAWK - TRE has provisions for ** or *+ as a special repetition operator.
 | 
					
						
							|  |  |  | 	 *       however, that seems to break backward compatibility. | 
					
						
							|  |  |  | 	 *       '+' in 'a*+' is not treated as a normal character with the | 
					
						
							|  |  |  | 	 *       following block enabled. So let me comment it out */ | 
					
						
							|  |  |  | 	#if 0
 | 
					
						
							|  |  |  | 					else if (*(ctx->re + 1) == CHAR_STAR | 
					
						
							|  |  |  | 					         || *(ctx->re + 1) == CHAR_PLUS) | 
					
						
							|  |  |  | 					{ | 
					
						
							|  |  |  | 						/* These are reserved for future extensions. */ | 
					
						
							|  |  |  | 						return REG_BADRPT; | 
					
						
							|  |  |  | 					} | 
					
						
							|  |  |  | 	#endif
 | 
					
						
							|  |  |  | 				} | 
					
						
							| 
									
										
										
										
											2024-05-02 22:47:30 +09:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2019-12-13 04:29:58 +00:00
										 |  |  | 				DPRINT(("tre_parse: %s star: '%.*" STRF "'\n", | 
					
						
							|  |  |  | 				        minimal ? "  minimal" : "greedy", REST(tmp_re))); | 
					
						
							|  |  |  | 				ctx->re++; | 
					
						
							|  |  |  | 				tmp_node = tre_ast_new_iter(ctx->mem, result, rep_min, rep_max, | 
					
						
							|  |  |  | 				                            minimal); | 
					
						
							|  |  |  | 				if (tmp_node == NULL) | 
					
						
							|  |  |  | 					return REG_ESPACE; | 
					
						
							|  |  |  | 				result = tmp_node; | 
					
						
							|  |  |  | 				STACK_PUSHX(stack, int, PARSE_POSTFIX); | 
					
						
							| 
									
										
										
										
											2024-05-02 22:47:30 +09:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2019-12-13 04:29:58 +00:00
										 |  |  | 				break; | 
					
						
							|  |  |  | 			} | 
					
						
							| 
									
										
										
										
											2024-05-02 22:47:30 +09:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2019-12-13 04:29:58 +00:00
										 |  |  | 			case CHAR_BACKSLASH: | 
					
						
							|  |  |  | 				/* "\{" is special without REG_EXTENDED */ | 
					
						
							|  |  |  | 				/* HAWK  - also handle \+ and \? */ | 
					
						
							|  |  |  | 				/*
 | 
					
						
							|  |  |  | 				if (!(ctx->cflags & REG_EXTENDED) | 
					
						
							|  |  |  | 				        && ctx->re + 1 < ctx->re_end | 
					
						
							|  |  |  | 				        && *(ctx->re + 1) == CHAR_LBRACE) | 
					
						
							|  |  |  | 				{ | 
					
						
							|  |  |  | 					ctx->re++; | 
					
						
							|  |  |  | 					goto parse_brace; | 
					
						
							|  |  |  | 				} | 
					
						
							|  |  |  | 				else | 
					
						
							|  |  |  | 					break; | 
					
						
							|  |  |  | 				*/ | 
					
						
							|  |  |  | 				if (!(ctx->cflags & REG_EXTENDED) && ctx->re + 1 < ctx->re_end) | 
					
						
							|  |  |  | 				{ | 
					
						
							| 
									
										
										
										
											2024-05-02 22:47:30 +09:00
										 |  |  | 					if (*(ctx->re + 1) == CHAR_LBRACE) | 
					
						
							| 
									
										
										
										
											2019-12-13 04:29:58 +00:00
										 |  |  | 					{ | 
					
						
							|  |  |  | 						ctx->re++; | 
					
						
							|  |  |  | 						goto parse_brace; | 
					
						
							|  |  |  | 					} | 
					
						
							| 
									
										
										
										
											2024-05-02 22:47:30 +09:00
										 |  |  | 					else if (*(ctx->re + 1) == CHAR_PLUS || | 
					
						
							|  |  |  | 					         *(ctx->re + 1) == CHAR_QUESTIONMARK) | 
					
						
							| 
									
										
										
										
											2019-12-13 04:29:58 +00:00
										 |  |  | 					{ | 
					
						
							|  |  |  | 						ctx->re++; | 
					
						
							|  |  |  | 						goto parse_star; | 
					
						
							|  |  |  | 					} | 
					
						
							|  |  |  | 				} | 
					
						
							|  |  |  | 				break; | 
					
						
							|  |  |  | 				/* END HAWK */ | 
					
						
							| 
									
										
										
										
											2024-05-02 22:47:30 +09:00
										 |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2019-12-13 04:29:58 +00:00
										 |  |  | 			case CHAR_LBRACE: | 
					
						
							|  |  |  | 				/* "{" is literal without REG_EXTENDED */ | 
					
						
							|  |  |  | 				if (!(ctx->cflags & REG_EXTENDED)) break; | 
					
						
							|  |  |  | 				/* HAWK */ | 
					
						
							|  |  |  | 				if (ctx->cflags & REG_NOBOUND) break; | 
					
						
							|  |  |  | 				/* END HAWK */ | 
					
						
							| 
									
										
										
										
											2024-05-02 22:47:30 +09:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2019-12-13 04:29:58 +00:00
										 |  |  | 	parse_brace: | 
					
						
							|  |  |  | 				DPRINT(("tre_parse:	bound: '%.*" STRF "'\n", | 
					
						
							|  |  |  | 				        REST(ctx->re))); | 
					
						
							|  |  |  | 				ctx->re++; | 
					
						
							| 
									
										
										
										
											2024-05-02 22:47:30 +09:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2019-12-13 04:29:58 +00:00
										 |  |  | 				status = tre_parse_bound(ctx, &result); | 
					
						
							|  |  |  | 				if (status != REG_OK) | 
					
						
							|  |  |  | 					return status; | 
					
						
							|  |  |  | 				STACK_PUSHX(stack, int, PARSE_POSTFIX); | 
					
						
							|  |  |  | 				break; | 
					
						
							|  |  |  | 			} | 
					
						
							| 
									
										
										
										
											2024-05-02 22:47:30 +09:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2019-12-13 04:29:58 +00:00
										 |  |  | 			break; | 
					
						
							| 
									
										
										
										
											2024-05-02 22:47:30 +09:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2019-12-13 04:29:58 +00:00
										 |  |  | 		case PARSE_ATOM: | 
					
						
							| 
									
										
										
										
											2024-05-02 22:47:30 +09:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2019-12-13 04:29:58 +00:00
										 |  |  | 			/* Parse an atom.  An atom is a regular expression enclosed in `()',
 | 
					
						
							|  |  |  | 			   an empty set of `()', a bracket expression, `.', `^', `$', | 
					
						
							|  |  |  | 			   a `\' followed by a character, or a single character. */ | 
					
						
							| 
									
										
										
										
											2024-05-02 22:47:30 +09:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2019-12-13 04:29:58 +00:00
										 |  |  | 			/* End of regexp? (empty string). */ | 
					
						
							|  |  |  | 			if (ctx->re >= ctx->re_end) goto parse_literal; | 
					
						
							| 
									
										
										
										
											2024-05-02 22:47:30 +09:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2019-12-13 04:29:58 +00:00
										 |  |  | 	#ifdef REG_LITERAL
 | 
					
						
							|  |  |  | 			if (ctx->cflags & REG_LITERAL) goto parse_literal; | 
					
						
							|  |  |  | 	#endif /* REG_LITERAL */
 | 
					
						
							| 
									
										
										
										
											2024-05-02 22:47:30 +09:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2019-12-13 04:29:58 +00:00
										 |  |  | 			switch (*ctx->re) | 
					
						
							|  |  |  | 			{ | 
					
						
							|  |  |  | 			case CHAR_LPAREN:  /* parenthesized subexpression */ | 
					
						
							| 
									
										
										
										
											2024-05-02 22:47:30 +09:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2019-12-13 04:29:58 +00:00
										 |  |  | 				/* Handle "(?...)" extensions.  They work in a way similar
 | 
					
						
							|  |  |  | 				 to Perls corresponding extensions. */ | 
					
						
							|  |  |  | 				/* HAWK: added ctx->cflags & REG_NONSTDEXT */ | 
					
						
							|  |  |  | 				if ((ctx->cflags & REG_NONSTDEXT) && | 
					
						
							| 
									
										
										
										
											2024-05-02 22:47:30 +09:00
										 |  |  | 				    (ctx->cflags & REG_EXTENDED) && | 
					
						
							| 
									
										
										
										
											2019-12-13 04:29:58 +00:00
										 |  |  | 				    *(ctx->re + 1) == CHAR_QUESTIONMARK) | 
					
						
							|  |  |  | 				{ | 
					
						
							|  |  |  | 					int new_cflags = ctx->cflags; | 
					
						
							|  |  |  | 					int bit = 1; | 
					
						
							|  |  |  | 					DPRINT(("tre_parse:	extension: '%.*" STRF "\n", REST(ctx->re))); | 
					
						
							|  |  |  | 					ctx->re += 2; | 
					
						
							|  |  |  | 					while (/*CONSTCOND*/1) | 
					
						
							|  |  |  | 					{ | 
					
						
							|  |  |  | 						if (*ctx->re == HAWK_T('i')) | 
					
						
							|  |  |  | 						{ | 
					
						
							|  |  |  | 							DPRINT(("tre_parse:	    icase: '%.*" STRF "\n", REST(ctx->re))); | 
					
						
							|  |  |  | 							if (bit) | 
					
						
							|  |  |  | 								new_cflags |= REG_ICASE; | 
					
						
							|  |  |  | 							else | 
					
						
							|  |  |  | 								new_cflags &= ~REG_ICASE; | 
					
						
							|  |  |  | 							ctx->re++; | 
					
						
							|  |  |  | 						} | 
					
						
							|  |  |  | 						else if (*ctx->re == HAWK_T('n')) | 
					
						
							|  |  |  | 						{ | 
					
						
							|  |  |  | 							DPRINT(("tre_parse:	  newline: '%.*" STRF "\n", REST(ctx->re))); | 
					
						
							|  |  |  | 							if (bit) | 
					
						
							|  |  |  | 								new_cflags |= REG_NEWLINE; | 
					
						
							|  |  |  | 							else | 
					
						
							|  |  |  | 								new_cflags &= ~REG_NEWLINE; | 
					
						
							|  |  |  | 							ctx->re++; | 
					
						
							|  |  |  | 						} | 
					
						
							|  |  |  | 	#ifdef REG_RIGHT_ASSOC
 | 
					
						
							|  |  |  | 						else if (*ctx->re == HAWK_T('r')) | 
					
						
							|  |  |  | 						{ | 
					
						
							|  |  |  | 							DPRINT(("tre_parse: right assoc: '%.*" STRF "\n", REST(ctx->re))); | 
					
						
							|  |  |  | 							if (bit) | 
					
						
							|  |  |  | 								new_cflags |= REG_RIGHT_ASSOC; | 
					
						
							|  |  |  | 							else | 
					
						
							|  |  |  | 								new_cflags &= ~REG_RIGHT_ASSOC; | 
					
						
							|  |  |  | 							ctx->re++; | 
					
						
							|  |  |  | 						} | 
					
						
							|  |  |  | 	#endif /* REG_RIGHT_ASSOC */
 | 
					
						
							|  |  |  | 	#ifdef REG_UNGREEDY
 | 
					
						
							|  |  |  | 						else if (*ctx->re == HAWK_T('U')) | 
					
						
							|  |  |  | 						{ | 
					
						
							|  |  |  | 							DPRINT(("tre_parse:    ungreedy: '%.*" STRF "\n", REST(ctx->re))); | 
					
						
							|  |  |  | 							if (bit) | 
					
						
							|  |  |  | 								new_cflags |= REG_UNGREEDY; | 
					
						
							|  |  |  | 							else | 
					
						
							|  |  |  | 								new_cflags &= ~REG_UNGREEDY; | 
					
						
							|  |  |  | 							ctx->re++; | 
					
						
							|  |  |  | 						} | 
					
						
							|  |  |  | 	#endif /* REG_UNGREEDY */
 | 
					
						
							|  |  |  | 						else if (*ctx->re == CHAR_MINUS) | 
					
						
							|  |  |  | 						{ | 
					
						
							|  |  |  | 							DPRINT(("tre_parse:	 turn off: '%.*" STRF "\n", | 
					
						
							|  |  |  | 							        REST(ctx->re))); | 
					
						
							|  |  |  | 							ctx->re++; | 
					
						
							|  |  |  | 							bit = 0; | 
					
						
							|  |  |  | 						} | 
					
						
							|  |  |  | 						else if (*ctx->re == CHAR_COLON) | 
					
						
							|  |  |  | 						{ | 
					
						
							|  |  |  | 							DPRINT(("tre_parse:	 no group: '%.*" STRF "\n", | 
					
						
							|  |  |  | 							        REST(ctx->re))); | 
					
						
							|  |  |  | 							ctx->re++; | 
					
						
							|  |  |  | 							depth++; | 
					
						
							|  |  |  | 							break; | 
					
						
							|  |  |  | 						} | 
					
						
							|  |  |  | 						else if (*ctx->re == CHAR_HASH) | 
					
						
							|  |  |  | 						{ | 
					
						
							|  |  |  | 							DPRINT(("tre_parse:    comment: '%.*" STRF "\n", | 
					
						
							|  |  |  | 							        REST(ctx->re))); | 
					
						
							|  |  |  | 							/* A comment can contain any character except a
 | 
					
						
							|  |  |  | 							   right parenthesis */ | 
					
						
							|  |  |  | 							while (*ctx->re != CHAR_RPAREN | 
					
						
							|  |  |  | 							        && ctx->re < ctx->re_end) | 
					
						
							|  |  |  | 								ctx->re++; | 
					
						
							|  |  |  | 							if (*ctx->re == CHAR_RPAREN && ctx->re < ctx->re_end) | 
					
						
							|  |  |  | 							{ | 
					
						
							|  |  |  | 								ctx->re++; | 
					
						
							|  |  |  | 								break; | 
					
						
							|  |  |  | 							} | 
					
						
							|  |  |  | 							else | 
					
						
							|  |  |  | 								return REG_BADPAT; | 
					
						
							|  |  |  | 						} | 
					
						
							|  |  |  | 						else if (*ctx->re == CHAR_RPAREN) | 
					
						
							|  |  |  | 						{ | 
					
						
							|  |  |  | 							ctx->re++; | 
					
						
							|  |  |  | 							break; | 
					
						
							|  |  |  | 						} | 
					
						
							|  |  |  | 						else | 
					
						
							|  |  |  | 							return REG_BADPAT; | 
					
						
							|  |  |  | 					} | 
					
						
							| 
									
										
										
										
											2024-05-02 22:47:30 +09:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2019-12-13 04:29:58 +00:00
										 |  |  | 					/* Turn on the cflags changes for the rest of the
 | 
					
						
							|  |  |  | 					   enclosing group. */ | 
					
						
							|  |  |  | 					STACK_PUSHX(stack, int, ctx->cflags); | 
					
						
							|  |  |  | 					STACK_PUSHX(stack, int, PARSE_RESTORE_CFLAGS); | 
					
						
							|  |  |  | 					STACK_PUSHX(stack, int, PARSE_RE); | 
					
						
							|  |  |  | 					ctx->cflags = new_cflags; | 
					
						
							|  |  |  | 					break; | 
					
						
							|  |  |  | 				} | 
					
						
							| 
									
										
										
										
											2024-05-02 22:47:30 +09:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2019-12-13 04:29:58 +00:00
										 |  |  | 				if (ctx->cflags & REG_EXTENDED | 
					
						
							|  |  |  | 				        || (ctx->re > ctx->re_start | 
					
						
							|  |  |  | 				            && *(ctx->re - 1) == CHAR_BACKSLASH)) | 
					
						
							|  |  |  | 				{ | 
					
						
							|  |  |  | 					depth++; | 
					
						
							|  |  |  | 					/* HAWK: added ctx->cflags & REG_NONSTDEXT */ | 
					
						
							|  |  |  | 					if ((ctx->cflags & REG_NONSTDEXT) && | 
					
						
							| 
									
										
										
										
											2024-05-02 22:47:30 +09:00
										 |  |  | 					     ctx->re + 2 < ctx->re_end && | 
					
						
							| 
									
										
										
										
											2019-12-13 04:29:58 +00:00
										 |  |  | 					     *(ctx->re + 1) == CHAR_QUESTIONMARK && | 
					
						
							|  |  |  | 					     *(ctx->re + 2) == CHAR_COLON) | 
					
						
							|  |  |  | 					{ | 
					
						
							|  |  |  | 						/* HAWK: \(?: or (?: depending on REG_EXTENDED */ | 
					
						
							|  |  |  | 						DPRINT(("tre_parse: group begin: '%.*" STRF | 
					
						
							|  |  |  | 						        "', no submatch\n", REST(ctx->re))); | 
					
						
							|  |  |  | 						/* Don't mark for submatching. */ | 
					
						
							|  |  |  | 						ctx->re += 3; | 
					
						
							|  |  |  | 						STACK_PUSHX(stack, int, PARSE_RE); | 
					
						
							|  |  |  | 					} | 
					
						
							|  |  |  | 					else | 
					
						
							|  |  |  | 					{ | 
					
						
							|  |  |  | 						DPRINT(("tre_parse: group begin: '%.*" STRF | 
					
						
							|  |  |  | 						        "', submatch %d\n", REST(ctx->re), | 
					
						
							|  |  |  | 						        ctx->submatch_id)); | 
					
						
							|  |  |  | 						ctx->re++; | 
					
						
							|  |  |  | 						/* First parse a whole RE, then mark the resulting tree
 | 
					
						
							|  |  |  | 						 for submatching. */ | 
					
						
							|  |  |  | 						STACK_PUSHX(stack, int, ctx->submatch_id); | 
					
						
							|  |  |  | 						STACK_PUSHX(stack, int, PARSE_MARK_FOR_SUBMATCH); | 
					
						
							|  |  |  | 						STACK_PUSHX(stack, int, PARSE_RE); | 
					
						
							|  |  |  | 						ctx->submatch_id++; | 
					
						
							|  |  |  | 					} | 
					
						
							|  |  |  | 				} | 
					
						
							|  |  |  | 				else | 
					
						
							|  |  |  | 					goto parse_literal; | 
					
						
							|  |  |  | 				break; | 
					
						
							| 
									
										
										
										
											2024-05-02 22:47:30 +09:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2019-12-13 04:29:58 +00:00
										 |  |  | 			case CHAR_RPAREN:  /* end of current subexpression */ | 
					
						
							| 
									
										
										
										
											2020-12-02 16:07:06 +00:00
										 |  |  | 				/* HAWK: fixed the condition */ | 
					
						
							|  |  |  | 				/* if ((ctx->cflags & REG_EXTENDED && depth > 0)
 | 
					
						
							| 
									
										
										
										
											2019-12-13 04:29:58 +00:00
										 |  |  | 				        || (ctx->re > ctx->re_start | 
					
						
							| 
									
										
										
										
											2020-12-02 16:07:06 +00:00
										 |  |  | 				            && *(ctx->re - 1) == CHAR_BACKSLASH)) */ | 
					
						
							|  |  |  | 				if (((ctx->cflags & REG_EXTENDED) && depth > 0) || | 
					
						
							|  |  |  | 				    (!(ctx->cflags & REG_EXTENDED) && ctx->re > ctx->re_start && *(ctx->re - 1) == CHAR_BACKSLASH)) | 
					
						
							|  |  |  | 				/* END HAWK */ | 
					
						
							| 
									
										
										
										
											2019-12-13 04:29:58 +00:00
										 |  |  | 				{ | 
					
						
							|  |  |  | 					DPRINT(("tre_parse:	    empty: '%.*" STRF "'\n", REST(ctx->re))); | 
					
						
							|  |  |  | 					/* We were expecting an atom, but instead the current
 | 
					
						
							|  |  |  | 					   subexpression was closed.	POSIX leaves the meaning of | 
					
						
							|  |  |  | 					   this to be implementation-defined.	 We interpret this as | 
					
						
							|  |  |  | 					   an empty expression (which matches an empty string).  */ | 
					
						
							|  |  |  | 					result = tre_ast_new_literal(ctx->mem, EMPTY, -1, -1); | 
					
						
							|  |  |  | 					if (result == NULL) return REG_ESPACE; | 
					
						
							|  |  |  | 					if (!(ctx->cflags & REG_EXTENDED)) ctx->re--; | 
					
						
							|  |  |  | 				} | 
					
						
							|  |  |  | 				else | 
					
						
							|  |  |  | 					goto parse_literal; | 
					
						
							|  |  |  | 				break; | 
					
						
							| 
									
										
										
										
											2024-05-02 22:47:30 +09:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2019-12-13 04:29:58 +00:00
										 |  |  | 			case CHAR_LBRACKET: /* bracket expression */ | 
					
						
							|  |  |  | 				DPRINT(("tre_parse:     bracket: '%.*" STRF "'\n", REST(ctx->re))); | 
					
						
							|  |  |  | 				ctx->re++; | 
					
						
							|  |  |  | 				status = tre_parse_bracket(ctx, &result); | 
					
						
							|  |  |  | 				if (status != REG_OK) return status; | 
					
						
							|  |  |  | 				break; | 
					
						
							| 
									
										
										
										
											2024-05-02 22:47:30 +09:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2019-12-13 04:29:58 +00:00
										 |  |  | 			case CHAR_BACKSLASH: | 
					
						
							|  |  |  | 				/* If this is "\(" or "\)" chew off the backslash and
 | 
					
						
							|  |  |  | 				 try again. */ | 
					
						
							|  |  |  | 				if (!(ctx->cflags & REG_EXTENDED) | 
					
						
							|  |  |  | 				        && ctx->re + 1 < ctx->re_end | 
					
						
							|  |  |  | 				        && (*(ctx->re + 1) == CHAR_LPAREN | 
					
						
							|  |  |  | 				            || *(ctx->re + 1) == CHAR_RPAREN)) | 
					
						
							|  |  |  | 				{ | 
					
						
							|  |  |  | 					ctx->re++; | 
					
						
							|  |  |  | 					STACK_PUSHX(stack, int, PARSE_ATOM); | 
					
						
							|  |  |  | 					break; | 
					
						
							|  |  |  | 				} | 
					
						
							| 
									
										
										
										
											2024-05-02 22:47:30 +09:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2019-12-13 04:29:58 +00:00
										 |  |  | 				/* If a macro is used, parse the expanded macro recursively. */ | 
					
						
							|  |  |  | 				{ | 
					
						
							|  |  |  | 					tre_char_t buf[64]; | 
					
						
							|  |  |  | 					tre_expand_macro(ctx->re + 1, ctx->re_end, buf, HAWK_COUNTOF(buf)); | 
					
						
							|  |  |  | 					if (buf[0] != 0) | 
					
						
							|  |  |  | 					{ | 
					
						
							|  |  |  | 						tre_parse_ctx_t subctx; | 
					
						
							|  |  |  | 						HAWK_MEMCPY (&subctx, ctx, sizeof(subctx)); | 
					
						
							|  |  |  | 						subctx.re = buf; | 
					
						
							|  |  |  | 						subctx.len = tre_strlen(buf); | 
					
						
							|  |  |  | 						subctx.nofirstsub = 1; | 
					
						
							|  |  |  | 						status = tre_parse(&subctx); | 
					
						
							|  |  |  | 						if (status != REG_OK) return status; | 
					
						
							|  |  |  | 						ctx->re += 2; | 
					
						
							|  |  |  | 						ctx->position = subctx.position; | 
					
						
							|  |  |  | 						result = subctx.result; | 
					
						
							|  |  |  | 						break; | 
					
						
							|  |  |  | 					} | 
					
						
							|  |  |  | 				} | 
					
						
							| 
									
										
										
										
											2024-05-02 22:47:30 +09:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2019-12-13 04:29:58 +00:00
										 |  |  | 				if (ctx->re + 1 >= ctx->re_end) | 
					
						
							|  |  |  | 				{ | 
					
						
							|  |  |  | 					/* Trailing backslash. */ | 
					
						
							|  |  |  | 					return REG_EESCAPE; | 
					
						
							|  |  |  | 				} | 
					
						
							| 
									
										
										
										
											2024-05-02 22:47:30 +09:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2019-12-13 04:29:58 +00:00
										 |  |  | 	#ifdef REG_LITERAL
 | 
					
						
							|  |  |  | 				if (*(ctx->re + 1) == HAWK_T('Q')) | 
					
						
							|  |  |  | 				{ | 
					
						
							|  |  |  | 					DPRINT(("tre_parse: tmp literal: '%.*" STRF "'\n", | 
					
						
							|  |  |  | 					        REST(ctx->re))); | 
					
						
							|  |  |  | 					ctx->cflags |= REG_LITERAL; | 
					
						
							|  |  |  | 					temporary_cflags |= REG_LITERAL; | 
					
						
							|  |  |  | 					ctx->re += 2; | 
					
						
							|  |  |  | 					STACK_PUSHX(stack, int, PARSE_ATOM); | 
					
						
							|  |  |  | 					break; | 
					
						
							|  |  |  | 				} | 
					
						
							|  |  |  | 	#endif /* REG_LITERAL */
 | 
					
						
							| 
									
										
										
										
											2024-05-02 22:47:30 +09:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2019-12-13 04:29:58 +00:00
										 |  |  | 				DPRINT(("tre_parse:  bleep: '%.*" STRF "'\n", REST(ctx->re))); | 
					
						
							|  |  |  | 				ctx->re++; | 
					
						
							|  |  |  | 				switch (*ctx->re) | 
					
						
							|  |  |  | 				{ | 
					
						
							|  |  |  | 				case HAWK_T('b'): | 
					
						
							|  |  |  | 					result = tre_ast_new_literal(ctx->mem, ASSERTION, ASSERT_AT_WB, -1); | 
					
						
							|  |  |  | 					ctx->re++; | 
					
						
							|  |  |  | 					break; | 
					
						
							|  |  |  | 				case HAWK_T('B'): | 
					
						
							|  |  |  | 					result = tre_ast_new_literal(ctx->mem, ASSERTION, ASSERT_AT_WB_NEG, -1); | 
					
						
							|  |  |  | 					ctx->re++; | 
					
						
							|  |  |  | 					break; | 
					
						
							|  |  |  | 				case HAWK_T('<'): | 
					
						
							|  |  |  | 					result = tre_ast_new_literal(ctx->mem, ASSERTION, ASSERT_AT_BOW, -1); | 
					
						
							|  |  |  | 					ctx->re++; | 
					
						
							|  |  |  | 					break; | 
					
						
							|  |  |  | 				case HAWK_T('>'): | 
					
						
							|  |  |  | 					result = tre_ast_new_literal(ctx->mem, ASSERTION, ASSERT_AT_EOW, -1); | 
					
						
							|  |  |  | 					ctx->re++; | 
					
						
							|  |  |  | 					break; | 
					
						
							|  |  |  | 				case HAWK_T('x'): | 
					
						
							|  |  |  | 					ctx->re++; | 
					
						
							|  |  |  | 					if (ctx->re[0] != CHAR_LBRACE && ctx->re < ctx->re_end) | 
					
						
							|  |  |  | 					{ | 
					
						
							|  |  |  | 					/* HAWK */ | 
					
						
							|  |  |  | 					#if 0
 | 
					
						
							|  |  |  | 						/* 8 bit hex char. */ | 
					
						
							|  |  |  | 						char tmp[3] = {0, 0, 0}; | 
					
						
							|  |  |  | 						long val; | 
					
						
							|  |  |  | 						DPRINT(("tre_parse:  8 bit hex: '%.*" STRF "'\n", | 
					
						
							|  |  |  | 						        REST(ctx->re - 2))); | 
					
						
							| 
									
										
										
										
											2024-05-02 22:47:30 +09:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2019-12-13 04:29:58 +00:00
										 |  |  | 						if (tre_isxdigit(ctx->re[0]) && ctx->re < ctx->re_end) | 
					
						
							|  |  |  | 						{ | 
					
						
							|  |  |  | 							tmp[0] = (char)ctx->re[0]; | 
					
						
							|  |  |  | 							ctx->re++; | 
					
						
							|  |  |  | 						} | 
					
						
							|  |  |  | 						if (tre_isxdigit(ctx->re[0]) && ctx->re < ctx->re_end) | 
					
						
							|  |  |  | 						{ | 
					
						
							|  |  |  | 							tmp[1] = (char)ctx->re[0]; | 
					
						
							|  |  |  | 							ctx->re++; | 
					
						
							|  |  |  | 						} | 
					
						
							|  |  |  | 						val = strtol(tmp, NULL, 16); | 
					
						
							|  |  |  | 					#endif
 | 
					
						
							|  |  |  | 						long val = 0; | 
					
						
							|  |  |  | 						int tmp; | 
					
						
							|  |  |  | 						if ((tmp = xdigit_to_num(ctx->re[0])) >= 0 && ctx->re < ctx->re_end) | 
					
						
							|  |  |  | 						{ | 
					
						
							|  |  |  | 							val = val * 16 + tmp; | 
					
						
							|  |  |  | 							ctx->re++; | 
					
						
							|  |  |  | 						} | 
					
						
							|  |  |  | 						if ((tmp = xdigit_to_num(ctx->re[1])) >= 0 && ctx->re < ctx->re_end) | 
					
						
							|  |  |  | 						{ | 
					
						
							|  |  |  | 							val = val * 16 + tmp; | 
					
						
							|  |  |  | 							ctx->re++; | 
					
						
							|  |  |  | 						} | 
					
						
							| 
									
										
										
										
											2024-05-02 22:47:30 +09:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2019-12-13 04:29:58 +00:00
										 |  |  | 						result = tre_ast_new_literal(ctx->mem, (int)val, (int)val, ctx->position); | 
					
						
							|  |  |  | 						ctx->position++; | 
					
						
							|  |  |  | 						break; | 
					
						
							|  |  |  | 					} | 
					
						
							|  |  |  | 					else if (ctx->re < ctx->re_end) | 
					
						
							|  |  |  | 					{ | 
					
						
							|  |  |  | 						/* Wide char. */ | 
					
						
							|  |  |  | 					/* HAWK */ | 
					
						
							|  |  |  | 					#if 0
 | 
					
						
							|  |  |  | 						char tmp[32]; | 
					
						
							|  |  |  | 						long val; | 
					
						
							|  |  |  | 						int i = 0; | 
					
						
							|  |  |  | 						ctx->re++; | 
					
						
							|  |  |  | 						while (ctx->re_end - ctx->re >= 0) | 
					
						
							|  |  |  | 						{ | 
					
						
							|  |  |  | 							if (ctx->re[0] == CHAR_RBRACE) | 
					
						
							|  |  |  | 								break; | 
					
						
							|  |  |  | 							if (tre_isxdigit(ctx->re[0])) | 
					
						
							|  |  |  | 							{ | 
					
						
							|  |  |  | 								tmp[i] = (char)ctx->re[0]; | 
					
						
							|  |  |  | 								i++; | 
					
						
							|  |  |  | 								ctx->re++; | 
					
						
							|  |  |  | 								continue; | 
					
						
							|  |  |  | 							} | 
					
						
							|  |  |  | 							return REG_EBRACE; | 
					
						
							|  |  |  | 						} | 
					
						
							|  |  |  | 						ctx->re++; | 
					
						
							|  |  |  | 						tmp[i] = 0; | 
					
						
							|  |  |  | 						val = strtol(tmp, NULL, 16); | 
					
						
							|  |  |  | 					#endif
 | 
					
						
							|  |  |  | 						long val = 0; | 
					
						
							|  |  |  | 						int tmp; | 
					
						
							| 
									
										
										
										
											2024-05-02 22:47:30 +09:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2019-12-13 04:29:58 +00:00
										 |  |  | 						ctx->re++; | 
					
						
							|  |  |  | 						while (ctx->re_end - ctx->re >= 0) | 
					
						
							|  |  |  | 						{ | 
					
						
							|  |  |  | 							if (ctx->re[0] == CHAR_RBRACE) | 
					
						
							|  |  |  | 								break; | 
					
						
							|  |  |  | 							tmp = xdigit_to_num(ctx->re[0]); | 
					
						
							|  |  |  | 							if (tmp >= 0) | 
					
						
							|  |  |  | 							{ | 
					
						
							|  |  |  | 								val = val * 16 + tmp; | 
					
						
							|  |  |  | 								ctx->re++; | 
					
						
							|  |  |  | 								continue; | 
					
						
							|  |  |  | 							} | 
					
						
							|  |  |  | 							return REG_EBRACE; | 
					
						
							|  |  |  | 						} | 
					
						
							| 
									
										
										
										
											2024-05-02 22:47:30 +09:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2019-12-13 04:29:58 +00:00
										 |  |  | 						result = tre_ast_new_literal(ctx->mem, (int)val, (int)val, ctx->position); | 
					
						
							|  |  |  | 						ctx->position++; | 
					
						
							|  |  |  | 						break; | 
					
						
							|  |  |  | 					} | 
					
						
							|  |  |  | 					/*FALLTHROUGH*/ | 
					
						
							| 
									
										
										
										
											2024-05-02 22:47:30 +09:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2019-12-13 04:29:58 +00:00
										 |  |  | 				default: | 
					
						
							|  |  |  | 					if (tre_isdigit(*ctx->re)) | 
					
						
							|  |  |  | 					{ | 
					
						
							|  |  |  | 						/* Back reference. */ | 
					
						
							|  |  |  | 						int val = *ctx->re - HAWK_T('0'); | 
					
						
							|  |  |  | 						DPRINT(("tre_parse:     backref: '%.*" STRF "'\n", REST(ctx->re - 1))); | 
					
						
							|  |  |  | 						result = tre_ast_new_literal(ctx->mem, BACKREF, val, ctx->position); | 
					
						
							|  |  |  | 						if (result == NULL) return REG_ESPACE; | 
					
						
							|  |  |  | 						ctx->position++; | 
					
						
							|  |  |  | 						ctx->max_backref = MAX(val, ctx->max_backref); | 
					
						
							|  |  |  | 						ctx->re++; | 
					
						
							|  |  |  | 					} | 
					
						
							|  |  |  | 					else | 
					
						
							|  |  |  | 					{ | 
					
						
							|  |  |  | 						/* Escaped character. */ | 
					
						
							|  |  |  | 						DPRINT(("tre_parse:     escaped: '%.*" STRF "'\n", REST(ctx->re - 1))); | 
					
						
							|  |  |  | 						result = tre_ast_new_literal(ctx->mem, *ctx->re, *ctx->re, ctx->position); | 
					
						
							|  |  |  | 						ctx->position++; | 
					
						
							|  |  |  | 						ctx->re++; | 
					
						
							|  |  |  | 					} | 
					
						
							|  |  |  | 					break; | 
					
						
							|  |  |  | 				} | 
					
						
							|  |  |  | 				if (result == NULL) | 
					
						
							|  |  |  | 					return REG_ESPACE; | 
					
						
							|  |  |  | 				break; | 
					
						
							| 
									
										
										
										
											2024-05-02 22:47:30 +09:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2019-12-13 04:29:58 +00:00
										 |  |  | 			case CHAR_PERIOD:	 /* the any-symbol */ | 
					
						
							|  |  |  | 				DPRINT(("tre_parse:	  any: '%.*" STRF "'\n", | 
					
						
							|  |  |  | 				        REST(ctx->re))); | 
					
						
							|  |  |  | 				if (ctx->cflags & REG_NEWLINE) | 
					
						
							|  |  |  | 				{ | 
					
						
							|  |  |  | 					tre_ast_node_t *tmp1; | 
					
						
							|  |  |  | 					tre_ast_node_t *tmp2; | 
					
						
							|  |  |  | 					/* exclude new line */ | 
					
						
							|  |  |  | 					tmp1 = tre_ast_new_literal(ctx->mem, 0, HAWK_T('\n') - 1, ctx->position); | 
					
						
							|  |  |  | 					if (!tmp1) return REG_ESPACE; | 
					
						
							|  |  |  | 					tmp2 = tre_ast_new_literal(ctx->mem, HAWK_T('\n') + 1, TRE_CHAR_MAX, ctx->position + 1); | 
					
						
							|  |  |  | 					if (!tmp2) return REG_ESPACE; | 
					
						
							|  |  |  | 					result = tre_ast_new_union(ctx->mem, tmp1, tmp2); | 
					
						
							|  |  |  | 					if (!result) return REG_ESPACE; | 
					
						
							|  |  |  | 					ctx->position += 2; | 
					
						
							|  |  |  | 				} | 
					
						
							|  |  |  | 				else | 
					
						
							|  |  |  | 				{ | 
					
						
							|  |  |  | 					/* all characters */ | 
					
						
							|  |  |  | 					result = tre_ast_new_literal(ctx->mem, 0, TRE_CHAR_MAX, ctx->position); | 
					
						
							|  |  |  | 					if (!result) return REG_ESPACE; | 
					
						
							|  |  |  | 					ctx->position++; | 
					
						
							|  |  |  | 				} | 
					
						
							|  |  |  | 				ctx->re++; | 
					
						
							|  |  |  | 				break; | 
					
						
							| 
									
										
										
										
											2024-05-02 22:47:30 +09:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2019-12-13 04:29:58 +00:00
										 |  |  | 			case CHAR_CARET:	 /* beginning of line assertion */ | 
					
						
							|  |  |  | 				/* '^' has a special meaning everywhere in EREs, and in the
 | 
					
						
							|  |  |  | 				 beginning of the RE and after \( is BREs. */ | 
					
						
							|  |  |  | 				if (ctx->cflags & REG_EXTENDED | 
					
						
							|  |  |  | 				        || (ctx->re - 2 >= ctx->re_start | 
					
						
							|  |  |  | 				            && *(ctx->re - 2) == CHAR_BACKSLASH | 
					
						
							|  |  |  | 				            && *(ctx->re - 1) == CHAR_LPAREN) | 
					
						
							|  |  |  | 				        || ctx->re == ctx->re_start) | 
					
						
							|  |  |  | 				{ | 
					
						
							|  |  |  | 					DPRINT(("tre_parse:	      BOL: '%.*" STRF "'\n", | 
					
						
							|  |  |  | 					        REST(ctx->re))); | 
					
						
							|  |  |  | 					result = tre_ast_new_literal(ctx->mem, ASSERTION, ASSERT_AT_BOL, -1); | 
					
						
							|  |  |  | 					if (result == NULL) return REG_ESPACE; | 
					
						
							|  |  |  | 					ctx->re++; | 
					
						
							|  |  |  | 				} | 
					
						
							|  |  |  | 				else | 
					
						
							|  |  |  | 					goto parse_literal; | 
					
						
							|  |  |  | 				break; | 
					
						
							| 
									
										
										
										
											2024-05-02 22:47:30 +09:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2019-12-13 04:29:58 +00:00
										 |  |  | 			case CHAR_DOLLAR:	 /* end of line assertion. */ | 
					
						
							|  |  |  | 				/* '$' is special everywhere in EREs, and in the end of the
 | 
					
						
							|  |  |  | 				 string and before \) is BREs. */ | 
					
						
							|  |  |  | 				if (ctx->cflags & REG_EXTENDED | 
					
						
							|  |  |  | 				        || (ctx->re + 2 < ctx->re_end | 
					
						
							|  |  |  | 				            && *(ctx->re + 1) == CHAR_BACKSLASH | 
					
						
							|  |  |  | 				            && *(ctx->re + 2) == CHAR_RPAREN) | 
					
						
							|  |  |  | 				        || ctx->re + 1 == ctx->re_end) | 
					
						
							|  |  |  | 				{ | 
					
						
							|  |  |  | 					DPRINT(("tre_parse:	      EOL: '%.*" STRF "'\n", | 
					
						
							|  |  |  | 					        REST(ctx->re))); | 
					
						
							|  |  |  | 					result = tre_ast_new_literal(ctx->mem, ASSERTION, ASSERT_AT_EOL, -1); | 
					
						
							|  |  |  | 					if (result == NULL) | 
					
						
							|  |  |  | 						return REG_ESPACE; | 
					
						
							|  |  |  | 					ctx->re++; | 
					
						
							|  |  |  | 				} | 
					
						
							|  |  |  | 				else | 
					
						
							|  |  |  | 					goto parse_literal; | 
					
						
							|  |  |  | 				break; | 
					
						
							| 
									
										
										
										
											2024-05-02 22:47:30 +09:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2019-12-13 04:29:58 +00:00
										 |  |  | 			default: | 
					
						
							|  |  |  | 	parse_literal: | 
					
						
							| 
									
										
										
										
											2024-05-02 22:47:30 +09:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2019-12-13 04:29:58 +00:00
										 |  |  | 				if (temporary_cflags && ctx->re + 1 < ctx->re_end | 
					
						
							|  |  |  | 				        && *ctx->re == CHAR_BACKSLASH && *(ctx->re + 1) == HAWK_T('E')) | 
					
						
							|  |  |  | 				{ | 
					
						
							|  |  |  | 					DPRINT(("tre_parse:	 end tmps: '%.*" STRF "'\n", REST(ctx->re))); | 
					
						
							|  |  |  | 					ctx->cflags &= ~temporary_cflags; | 
					
						
							|  |  |  | 					temporary_cflags = 0; | 
					
						
							|  |  |  | 					ctx->re += 2; | 
					
						
							|  |  |  | 					STACK_PUSHX(stack, int, PARSE_PIECE); | 
					
						
							|  |  |  | 					break; | 
					
						
							|  |  |  | 				} | 
					
						
							| 
									
										
										
										
											2024-05-02 22:47:30 +09:00
										 |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2019-12-13 04:29:58 +00:00
										 |  |  | 				/* We are expecting an atom.  If the subexpression (or the whole
 | 
					
						
							| 
									
										
										
										
											2020-12-02 16:07:06 +00:00
										 |  |  | 				 regexp) ends here, we interpret it as an empty expression | 
					
						
							| 
									
										
										
										
											2019-12-13 04:29:58 +00:00
										 |  |  | 				 (which matches an empty string).  */ | 
					
						
							|  |  |  | 				if ( | 
					
						
							|  |  |  | 	#ifdef REG_LITERAL
 | 
					
						
							|  |  |  | 				    !(ctx->cflags & REG_LITERAL) && | 
					
						
							|  |  |  | 	#endif /* REG_LITERAL */
 | 
					
						
							|  |  |  | 				    (ctx->re >= ctx->re_end | 
					
						
							|  |  |  | 				     || *ctx->re == CHAR_STAR | 
					
						
							|  |  |  | 				     || (ctx->cflags & REG_EXTENDED | 
					
						
							|  |  |  | 				         && (*ctx->re == CHAR_PIPE | 
					
						
							|  |  |  | 					/* HAWK */ | 
					
						
							|  |  |  | 				             /*|| *ctx->re == CHAR_LBRACE*/ | 
					
						
							|  |  |  | 				             || (*ctx->re == CHAR_LBRACE && !(ctx->cflags & REG_NOBOUND)) | 
					
						
							|  |  |  | 					/* END HAWK */ | 
					
						
							|  |  |  | 				             || *ctx->re == CHAR_PLUS | 
					
						
							|  |  |  | 				             || *ctx->re == CHAR_QUESTIONMARK)) | 
					
						
							|  |  |  | 				     /* Test for "\)" in BRE mode. */ | 
					
						
							|  |  |  | 				     || (!(ctx->cflags & REG_EXTENDED) | 
					
						
							|  |  |  | 				         && ctx->re + 1 < ctx->re_end | 
					
						
							|  |  |  | 				         && *ctx->re == CHAR_BACKSLASH | 
					
						
							|  |  |  | 				         && *(ctx->re + 1) == CHAR_LBRACE))) | 
					
						
							|  |  |  | 				{ | 
					
						
							|  |  |  | 					DPRINT(("tre_parse:	    empty: '%.*" STRF "'\n", REST(ctx->re))); | 
					
						
							|  |  |  | 					result = tre_ast_new_literal(ctx->mem, EMPTY, -1, -1); | 
					
						
							|  |  |  | 					if (!result) return REG_ESPACE; | 
					
						
							|  |  |  | 					break; | 
					
						
							|  |  |  | 				} | 
					
						
							| 
									
										
										
										
											2024-05-02 22:47:30 +09:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2019-12-13 04:29:58 +00:00
										 |  |  | 				DPRINT(("tre_parse:     literal: '%.*" STRF "'\n", | 
					
						
							|  |  |  | 				        REST(ctx->re))); | 
					
						
							|  |  |  | 				/* Note that we can't use an tre_isalpha() test here, since there
 | 
					
						
							|  |  |  | 				 may be characters which are alphabetic but neither upper or | 
					
						
							|  |  |  | 				 lower case. */ | 
					
						
							|  |  |  | 				if (ctx->cflags & REG_ICASE && (tre_isupper(*ctx->re) || tre_islower(*ctx->re))) | 
					
						
							|  |  |  | 				{ | 
					
						
							|  |  |  | 					tre_ast_node_t *tmp1; | 
					
						
							|  |  |  | 					tre_ast_node_t *tmp2; | 
					
						
							| 
									
										
										
										
											2024-05-02 22:47:30 +09:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2019-12-13 04:29:58 +00:00
										 |  |  | 					/* XXX - Can there be more than one opposite-case
 | 
					
						
							|  |  |  | 					   counterpoints for some character in some locale?  Or | 
					
						
							|  |  |  | 					   more than two characters which all should be regarded | 
					
						
							|  |  |  | 					   the same character if case is ignored?  If yes, there | 
					
						
							|  |  |  | 					   does not seem to be a portable way to detect it.  I guess | 
					
						
							|  |  |  | 					   that at least for multi-character collating elements there | 
					
						
							|  |  |  | 					   could be several opposite-case counterpoints, but they | 
					
						
							|  |  |  | 					   cannot be supported portably anyway. */ | 
					
						
							|  |  |  | 					tmp1 = tre_ast_new_literal(ctx->mem, tre_toupper(*ctx->re), tre_toupper(*ctx->re), ctx->position); | 
					
						
							|  |  |  | 					if (!tmp1) return REG_ESPACE; | 
					
						
							|  |  |  | 					tmp2 = tre_ast_new_literal(ctx->mem, tre_tolower(*ctx->re), tre_tolower(*ctx->re), ctx->position); | 
					
						
							|  |  |  | 					if (!tmp2) return REG_ESPACE; | 
					
						
							|  |  |  | 					result = tre_ast_new_union(ctx->mem, tmp1, tmp2); | 
					
						
							|  |  |  | 					if (!result) return REG_ESPACE; | 
					
						
							|  |  |  | 				} | 
					
						
							|  |  |  | 				else | 
					
						
							|  |  |  | 				{ | 
					
						
							|  |  |  | 					result = tre_ast_new_literal(ctx->mem, *ctx->re, *ctx->re, ctx->position); | 
					
						
							|  |  |  | 					if (!result) return REG_ESPACE; | 
					
						
							|  |  |  | 				} | 
					
						
							|  |  |  | 				ctx->position++; | 
					
						
							|  |  |  | 				ctx->re++; | 
					
						
							|  |  |  | 				break; | 
					
						
							|  |  |  | 			} | 
					
						
							|  |  |  | 			break; | 
					
						
							| 
									
										
										
										
											2024-05-02 22:47:30 +09:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2019-12-13 04:29:58 +00:00
										 |  |  | 		case PARSE_MARK_FOR_SUBMATCH: | 
					
						
							|  |  |  | 		{ | 
					
						
							|  |  |  | 			int submatch_id = tre_stack_pop_int(stack); | 
					
						
							| 
									
										
										
										
											2024-05-02 22:47:30 +09:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2019-12-13 04:29:58 +00:00
										 |  |  | 			if (result->submatch_id >= 0) | 
					
						
							|  |  |  | 			{ | 
					
						
							|  |  |  | 				tre_ast_node_t *n, *tmp_node; | 
					
						
							|  |  |  | 				n = tre_ast_new_literal(ctx->mem, EMPTY, -1, -1); | 
					
						
							|  |  |  | 				if (n == NULL) return REG_ESPACE; | 
					
						
							|  |  |  | 				tmp_node = tre_ast_new_catenation(ctx->mem, n, result); | 
					
						
							|  |  |  | 				if (tmp_node == NULL) return REG_ESPACE; | 
					
						
							|  |  |  | 				tmp_node->num_submatches = result->num_submatches; | 
					
						
							|  |  |  | 				result = tmp_node; | 
					
						
							|  |  |  | 			} | 
					
						
							|  |  |  | 			result->submatch_id = submatch_id; | 
					
						
							|  |  |  | 			result->num_submatches++; | 
					
						
							|  |  |  | 			break; | 
					
						
							|  |  |  | 		} | 
					
						
							| 
									
										
										
										
											2024-05-02 22:47:30 +09:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2019-12-13 04:29:58 +00:00
										 |  |  | 		case PARSE_RESTORE_CFLAGS: | 
					
						
							|  |  |  | 			ctx->cflags = tre_stack_pop_int(stack); | 
					
						
							|  |  |  | 			break; | 
					
						
							| 
									
										
										
										
											2024-05-02 22:47:30 +09:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2019-12-13 04:29:58 +00:00
										 |  |  | 		default: | 
					
						
							|  |  |  | 			assert(0); | 
					
						
							|  |  |  | 			break; | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 	} | 
					
						
							| 
									
										
										
										
											2024-05-02 22:47:30 +09:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2019-12-13 04:29:58 +00:00
										 |  |  | 	/* Check for missing closing parentheses. */ | 
					
						
							|  |  |  | 	if (depth > 0) | 
					
						
							|  |  |  | 		return REG_EPAREN; | 
					
						
							| 
									
										
										
										
											2024-05-02 22:47:30 +09:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2019-12-13 04:29:58 +00:00
										 |  |  | 	if (status == REG_OK) | 
					
						
							|  |  |  | 		ctx->result = result; | 
					
						
							| 
									
										
										
										
											2024-05-02 22:47:30 +09:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2019-12-13 04:29:58 +00:00
										 |  |  | 	return status; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | /* EOF */ |