| 
									
										
										
										
											2019-12-13 04:29:58 +00:00
										 |  |  | /*
 | 
					
						
							| 
									
										
										
										
											2020-04-16 03:42:30 +00:00
										 |  |  |     Copyright (c) 2006-2020 Chung, Hyung-Hwan. All rights reserved. | 
					
						
							| 
									
										
										
										
											2019-12-13 04:29:58 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |     Redistribution and use in source and binary forms, with or without | 
					
						
							|  |  |  |     modification, are permitted provided that the following conditions | 
					
						
							|  |  |  |     are met: | 
					
						
							|  |  |  |     1. Redistributions of source code must retain the above copyright | 
					
						
							|  |  |  |        notice, this list of conditions and the following disclaimer. | 
					
						
							|  |  |  |     2. Redistributions in binary form must reproduce the above copyright | 
					
						
							|  |  |  |        notice, this list of conditions and the following disclaimer in the | 
					
						
							|  |  |  |        documentation and/or other materials provided with the distribution. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     THIS SOFTWARE IS PROVIDED BY THE AUTHOR "AS IS" AND ANY EXPRESS OR | 
					
						
							|  |  |  |     IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES | 
					
						
							|  |  |  |     OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. | 
					
						
							|  |  |  |     IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, | 
					
						
							|  |  |  |     INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT | 
					
						
							|  |  |  |     NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | 
					
						
							|  |  |  |     DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | 
					
						
							|  |  |  |     THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | 
					
						
							|  |  |  |     (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF | 
					
						
							|  |  |  |     THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | 
					
						
							|  |  |  |  */ | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | /*
 | 
					
						
							|  |  |  |   tre-match-parallel.c - TRE parallel regex matching engine | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | This is the license, copyright notice, and disclaimer for TRE, a regex | 
					
						
							|  |  |  | matching package (library and tools) with support for approximate | 
					
						
							|  |  |  | matching. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | Copyright (c) 2001-2009 Ville Laurikari <vl@iki.fi> | 
					
						
							|  |  |  | All rights reserved. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | Redistribution and use in source and binary forms, with or without | 
					
						
							|  |  |  | modification, are permitted provided that the following conditions | 
					
						
							|  |  |  | are met: | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   1. Redistributions of source code must retain the above copyright | 
					
						
							|  |  |  |      notice, this list of conditions and the following disclaimer. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   2. Redistributions in binary form must reproduce the above copyright | 
					
						
							|  |  |  |      notice, this list of conditions and the following disclaimer in the | 
					
						
							|  |  |  |      documentation and/or other materials provided with the distribution. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS | 
					
						
							|  |  |  | ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | 
					
						
							|  |  |  | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | 
					
						
							|  |  |  | A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT | 
					
						
							|  |  |  | HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | 
					
						
							|  |  |  | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | 
					
						
							|  |  |  | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | 
					
						
							|  |  |  | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | 
					
						
							|  |  |  | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | 
					
						
							|  |  |  | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | 
					
						
							|  |  |  | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | 
					
						
							|  |  |  | */ | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | /*
 | 
					
						
							|  |  |  |   This algorithm searches for matches basically by reading characters | 
					
						
							|  |  |  |   in the searched string one by one, starting at the beginning.	 All | 
					
						
							|  |  |  |   matching paths in the TNFA are traversed in parallel.	 When two or | 
					
						
							|  |  |  |   more paths reach the same state, exactly one is chosen according to | 
					
						
							|  |  |  |   tag ordering rules; if returning submatches is not required it does | 
					
						
							|  |  |  |   not matter which path is chosen. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   The worst case time required for finding the leftmost and longest | 
					
						
							|  |  |  |   match, or determining that there is no match, is always linearly | 
					
						
							|  |  |  |   dependent on the length of the text being searched. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   This algorithm cannot handle TNFAs with back referencing nodes. | 
					
						
							|  |  |  |   See `tre-match-backtrack.c'. | 
					
						
							|  |  |  | */ | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #include "tre-prv.h"
 | 
					
						
							|  |  |  | #include "tre-match-ut.h"
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | typedef struct | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  | 	tre_tnfa_transition_t *state; | 
					
						
							|  |  |  | 	int *tags; | 
					
						
							|  |  |  | } tre_tnfa_reach_t; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | typedef struct | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  | 	int pos; | 
					
						
							|  |  |  | 	int **tags; | 
					
						
							|  |  |  | } tre_reach_pos_t; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #ifdef TRE_DEBUG
 | 
					
						
							|  |  |  | static void | 
					
						
							|  |  |  | tre_print_reach(const tre_tnfa_t *tnfa, tre_tnfa_reach_t *reach, int num_tags) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  | 	int i; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	while (reach->state != NULL) | 
					
						
							|  |  |  | 	{ | 
					
						
							|  |  |  | 		DPRINT((" %p", (void *)reach->state)); | 
					
						
							|  |  |  | 		if (num_tags > 0) | 
					
						
							|  |  |  | 		{ | 
					
						
							|  |  |  | 			DPRINT(("/")); | 
					
						
							|  |  |  | 			for (i = 0; i < num_tags; i++) | 
					
						
							|  |  |  | 			{ | 
					
						
							|  |  |  | 				DPRINT(("%d:%d", i, reach->tags[i])); | 
					
						
							|  |  |  | 				if (i < (num_tags-1)) | 
					
						
							|  |  |  | 					DPRINT((",")); | 
					
						
							|  |  |  | 			} | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 		reach++; | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	DPRINT(("\n")); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | #endif /* TRE_DEBUG */
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | reg_errcode_t | 
					
						
							| 
									
										
										
										
											2019-12-18 08:34:44 +00:00
										 |  |  | tre_tnfa_run_parallel(hawk_gem_t* gem, const tre_tnfa_t *tnfa, const void *string, int len, | 
					
						
							| 
									
										
										
										
											2019-12-13 04:29:58 +00:00
										 |  |  |                       tre_str_type_t type, int *match_tags, int eflags, | 
					
						
							|  |  |  |                       int *match_end_ofs) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  | 	/* State variables required by GET_NEXT_WCHAR. */ | 
					
						
							|  |  |  | 	tre_char_t prev_c = 0, next_c = 0; | 
					
						
							|  |  |  | 	const char *str_byte = string; | 
					
						
							|  |  |  | 	int pos = -1; | 
					
						
							|  |  |  | 	unsigned int pos_add_next = 1; | 
					
						
							|  |  |  | #ifdef TRE_WCHAR
 | 
					
						
							|  |  |  | 	const hawk_uch_t *str_wide = string; | 
					
						
							|  |  |  | #ifdef TRE_MBSTATE
 | 
					
						
							|  |  |  | 	hawk_mbstate_t mbstate; | 
					
						
							|  |  |  | #endif
 | 
					
						
							|  |  |  | #endif /* TRE_WCHAR */
 | 
					
						
							|  |  |  | 	int reg_notbol = eflags & REG_NOTBOL; | 
					
						
							|  |  |  | 	int reg_noteol = eflags & REG_NOTEOL; | 
					
						
							|  |  |  | 	int reg_newline = tnfa->cflags & REG_NEWLINE; | 
					
						
							|  |  |  | 	int str_user_end = 0; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	char *buf; | 
					
						
							|  |  |  | 	tre_tnfa_transition_t *trans_i; | 
					
						
							|  |  |  | 	tre_tnfa_reach_t *reach, *reach_next, *reach_i, *reach_next_i; | 
					
						
							|  |  |  | 	tre_reach_pos_t *reach_pos; | 
					
						
							|  |  |  | 	int *tag_i; | 
					
						
							|  |  |  | 	int num_tags, i; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	int match_eo = -1;	   /* end offset of match (-1 if no match found yet) */ | 
					
						
							|  |  |  | 	int new_match = 0; | 
					
						
							|  |  |  | 	int *tmp_tags = NULL; | 
					
						
							|  |  |  | 	int *tmp_iptr; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #ifdef TRE_MBSTATE
 | 
					
						
							|  |  |  | 	HAWK_MEMSET(&mbstate, '\0', sizeof(mbstate)); | 
					
						
							|  |  |  | #endif /* TRE_MBSTATE */
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	DPRINT(("tre_tnfa_run_parallel, input type %d\n", type)); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	if (!match_tags) | 
					
						
							|  |  |  | 		num_tags = 0; | 
					
						
							|  |  |  | 	else | 
					
						
							|  |  |  | 		num_tags = tnfa->num_tags; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	/* Allocate memory for temporary data required for matching.	This needs to
 | 
					
						
							|  |  |  | 	   be done for every matching operation to be thread safe.  This allocates | 
					
						
							|  |  |  | 	   everything in a single large block from the stack frame using alloca() | 
					
						
							|  |  |  | 	   or with malloc() if alloca is unavailable. */ | 
					
						
							|  |  |  | 	{ | 
					
						
							|  |  |  | 		int tbytes, rbytes, pbytes, xbytes, total_bytes; | 
					
						
							|  |  |  | 		char *tmp_buf; | 
					
						
							|  |  |  | 		/* Compute the length of the block we need. */ | 
					
						
							|  |  |  | 		tbytes = sizeof(*tmp_tags) * num_tags; | 
					
						
							|  |  |  | 		rbytes = sizeof(*reach_next) * (tnfa->num_states + 1); | 
					
						
							|  |  |  | 		pbytes = sizeof(*reach_pos) * tnfa->num_states; | 
					
						
							|  |  |  | 		xbytes = sizeof(int) * num_tags; | 
					
						
							|  |  |  | 		total_bytes = | 
					
						
							|  |  |  | 		    (sizeof(long) - 1) * 4 /* for alignment paddings */ | 
					
						
							|  |  |  | 		    + (rbytes + xbytes * tnfa->num_states) * 2 + tbytes + pbytes; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 		/* Allocate the memory. */ | 
					
						
							| 
									
										
										
										
											2019-12-18 08:34:44 +00:00
										 |  |  | 		buf = xmalloc(gem, (unsigned)total_bytes); | 
					
						
							| 
									
										
										
										
											2019-12-13 04:29:58 +00:00
										 |  |  | 		if (buf == NULL) return REG_ESPACE; | 
					
						
							|  |  |  | 		HAWK_MEMSET(buf, 0, (size_t)total_bytes); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 		/* Get the various pointers within tmp_buf (properly aligned). */ | 
					
						
							|  |  |  | 		tmp_tags = (void *)buf; | 
					
						
							|  |  |  | 		tmp_buf = buf + tbytes; | 
					
						
							|  |  |  | 		tmp_buf += ALIGN(tmp_buf, hawk_uintptr_t); | 
					
						
							|  |  |  | 		reach_next = (void *)tmp_buf; | 
					
						
							|  |  |  | 		tmp_buf += rbytes; | 
					
						
							|  |  |  | 		tmp_buf += ALIGN(tmp_buf, hawk_uintptr_t); | 
					
						
							|  |  |  | 		reach = (void *)tmp_buf; | 
					
						
							|  |  |  | 		tmp_buf += rbytes; | 
					
						
							|  |  |  | 		tmp_buf += ALIGN(tmp_buf, hawk_uintptr_t); | 
					
						
							|  |  |  | 		reach_pos = (void *)tmp_buf; | 
					
						
							|  |  |  | 		tmp_buf += pbytes; | 
					
						
							|  |  |  | 		tmp_buf += ALIGN(tmp_buf, hawk_uintptr_t); | 
					
						
							|  |  |  | 		for (i = 0; i < tnfa->num_states; i++) | 
					
						
							|  |  |  | 		{ | 
					
						
							|  |  |  | 			reach[i].tags = (void *)tmp_buf; | 
					
						
							|  |  |  | 			tmp_buf += xbytes; | 
					
						
							|  |  |  | 			reach_next[i].tags = (void *)tmp_buf; | 
					
						
							|  |  |  | 			tmp_buf += xbytes; | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	for (i = 0; i < tnfa->num_states; i++) | 
					
						
							|  |  |  | 		reach_pos[i].pos = -1; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	/* If only one character can start a match, find it first. */ | 
					
						
							|  |  |  | 	if (tnfa->first_char >= 0 && type == STR_BYTE && str_byte) | 
					
						
							|  |  |  | 	{ | 
					
						
							|  |  |  | 		const char *orig_str = str_byte; | 
					
						
							|  |  |  | 		int first = tnfa->first_char; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 		if (len >= 0) | 
					
						
							| 
									
										
										
										
											2020-12-16 15:13:29 +00:00
										 |  |  | 			str_byte = hawk_find_bchar_in_bchars(orig_str, (size_t)len, first); | 
					
						
							| 
									
										
										
										
											2019-12-13 04:29:58 +00:00
										 |  |  | 		else | 
					
						
							|  |  |  | 			str_byte = hawk_find_bchar_in_bcstr(orig_str, first); | 
					
						
							|  |  |  | 		if (str_byte == NULL) | 
					
						
							|  |  |  | 		{ | 
					
						
							| 
									
										
										
										
											2019-12-18 08:34:44 +00:00
										 |  |  | 			if (buf) xfree(gem, buf); | 
					
						
							| 
									
										
										
										
											2019-12-13 04:29:58 +00:00
										 |  |  | 			return REG_NOMATCH; | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 		DPRINT(("skipped %lu chars\n", (unsigned long)(str_byte - orig_str))); | 
					
						
							|  |  |  | 		if (str_byte >= orig_str + 1) | 
					
						
							|  |  |  | 			prev_c = (unsigned char)*(str_byte - 1); | 
					
						
							|  |  |  | 		next_c = (unsigned char)*str_byte; | 
					
						
							|  |  |  | 		pos = str_byte - orig_str; | 
					
						
							|  |  |  | 		if (len < 0 || pos < len) | 
					
						
							|  |  |  | 			str_byte++; | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	else | 
					
						
							|  |  |  | 	{ | 
					
						
							|  |  |  | 		GET_NEXT_WCHAR(); | 
					
						
							|  |  |  | 		pos = 0; | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #if 0
 | 
					
						
							|  |  |  | 	/* Skip over characters that cannot possibly be the first character
 | 
					
						
							|  |  |  | 	   of a match. */ | 
					
						
							|  |  |  | 	if (tnfa->firstpos_chars != NULL) | 
					
						
							|  |  |  | 	{ | 
					
						
							|  |  |  | 		char *chars = tnfa->firstpos_chars; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 		if (len < 0) | 
					
						
							|  |  |  | 		{ | 
					
						
							|  |  |  | 			const char *orig_str = str_byte; | 
					
						
							|  |  |  | 			/* XXX - use strpbrk() and wcspbrk() because they might be
 | 
					
						
							|  |  |  | 			   optimized for the target architecture.  Try also strcspn() | 
					
						
							|  |  |  | 			   and wcscspn() and compare the speeds. */ | 
					
						
							|  |  |  | 			while (next_c != HAWK_T('\0') && !chars[next_c]) | 
					
						
							|  |  |  | 			{ | 
					
						
							|  |  |  | 				next_c = *str_byte++; | 
					
						
							|  |  |  | 			} | 
					
						
							|  |  |  | 			prev_c = *(str_byte - 2); | 
					
						
							|  |  |  | 			pos += str_byte - orig_str; | 
					
						
							|  |  |  | 			DPRINT(("skipped %d chars\n", str_byte - orig_str)); | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 		else | 
					
						
							|  |  |  | 		{ | 
					
						
							|  |  |  | 			while (pos <= len && !chars[next_c]) | 
					
						
							|  |  |  | 			{ | 
					
						
							|  |  |  | 				prev_c = next_c; | 
					
						
							|  |  |  | 				next_c = (unsigned char)(*str_byte++); | 
					
						
							|  |  |  | 				pos++; | 
					
						
							|  |  |  | 			} | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | #endif
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	DPRINT(("length: %d\n", len)); | 
					
						
							|  |  |  | 	DPRINT(("pos:chr/code | states and tags\n")); | 
					
						
							|  |  |  | 	DPRINT(("-------------+------------------------------------------------\n")); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	reach_next_i = reach_next; | 
					
						
							|  |  |  | 	while (/*CONSTCOND*/1) | 
					
						
							|  |  |  | 	{ | 
					
						
							|  |  |  | 		/* If no match found yet, add the initial states to `reach_next'. */ | 
					
						
							|  |  |  | 		if (match_eo < 0) | 
					
						
							|  |  |  | 		{ | 
					
						
							|  |  |  | 			DPRINT((" init >")); | 
					
						
							|  |  |  | 			trans_i = tnfa->initial; | 
					
						
							|  |  |  | 			while (trans_i->state != NULL) | 
					
						
							|  |  |  | 			{ | 
					
						
							|  |  |  | 				if (reach_pos[trans_i->state_id].pos < pos) | 
					
						
							|  |  |  | 				{ | 
					
						
							|  |  |  | 					if (trans_i->assertions | 
					
						
							|  |  |  | 					        && CHECK_ASSERTIONS(trans_i->assertions)) | 
					
						
							|  |  |  | 					{ | 
					
						
							|  |  |  | 						DPRINT(("assertion failed\n")); | 
					
						
							|  |  |  | 						trans_i++; | 
					
						
							|  |  |  | 						continue; | 
					
						
							|  |  |  | 					} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 					DPRINT((" %p", (void *)trans_i->state)); | 
					
						
							|  |  |  | 					reach_next_i->state = trans_i->state; | 
					
						
							|  |  |  | 					for (i = 0; i < num_tags; i++) | 
					
						
							|  |  |  | 						reach_next_i->tags[i] = -1; | 
					
						
							|  |  |  | 					tag_i = trans_i->tags; | 
					
						
							|  |  |  | 					if (tag_i) | 
					
						
							|  |  |  | 						while (*tag_i >= 0) | 
					
						
							|  |  |  | 						{ | 
					
						
							|  |  |  | 							if (*tag_i < num_tags) | 
					
						
							|  |  |  | 								reach_next_i->tags[*tag_i] = pos; | 
					
						
							|  |  |  | 							tag_i++; | 
					
						
							|  |  |  | 						} | 
					
						
							|  |  |  | 					if (reach_next_i->state == tnfa->final) | 
					
						
							|  |  |  | 					{ | 
					
						
							|  |  |  | 						DPRINT(("	 found empty match\n")); | 
					
						
							|  |  |  | 						match_eo = pos; | 
					
						
							|  |  |  | 						new_match = 1; | 
					
						
							|  |  |  | 						for (i = 0; i < num_tags; i++) | 
					
						
							|  |  |  | 							match_tags[i] = reach_next_i->tags[i]; | 
					
						
							|  |  |  | 					} | 
					
						
							|  |  |  | 					reach_pos[trans_i->state_id].pos = pos; | 
					
						
							|  |  |  | 					reach_pos[trans_i->state_id].tags = &reach_next_i->tags; | 
					
						
							|  |  |  | 					reach_next_i++; | 
					
						
							|  |  |  | 				} | 
					
						
							|  |  |  | 				trans_i++; | 
					
						
							|  |  |  | 			} | 
					
						
							|  |  |  | 			DPRINT(("\n")); | 
					
						
							|  |  |  | 			reach_next_i->state = NULL; | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 		else | 
					
						
							|  |  |  | 		{ | 
					
						
							|  |  |  | 			if (num_tags == 0 || reach_next_i == reach_next) | 
					
						
							|  |  |  | 				/*?We have found a match. */ | 
					
						
							|  |  |  | 				break; | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 		/* Check for end of string. */ | 
					
						
							|  |  |  | 		if (len < 0) | 
					
						
							|  |  |  | 		{ | 
					
						
							|  |  |  | 			if (next_c == HAWK_T('\0')) | 
					
						
							|  |  |  | 				break; | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 		else | 
					
						
							|  |  |  | 		{ | 
					
						
							|  |  |  | 			if (pos >= len) | 
					
						
							|  |  |  | 				break; | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 		GET_NEXT_WCHAR(); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #ifdef TRE_DEBUG
 | 
					
						
							|  |  |  | 		DPRINT(("%3d:%2lc/%05d |", pos - 1, (tre_cint_t)prev_c, (int)prev_c)); | 
					
						
							|  |  |  | 		tre_print_reach(tnfa, reach_next, num_tags); | 
					
						
							|  |  |  | 		DPRINT(("%3d:%2lc/%05d |", pos, (tre_cint_t)next_c, (int)next_c)); | 
					
						
							|  |  |  | 		tre_print_reach(tnfa, reach_next, num_tags); | 
					
						
							|  |  |  | #endif /* TRE_DEBUG */
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 		/* Swap `reach' and `reach_next'. */ | 
					
						
							|  |  |  | 		reach_i = reach; | 
					
						
							|  |  |  | 		reach = reach_next; | 
					
						
							|  |  |  | 		reach_next = reach_i; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 		/* For each state in `reach', weed out states that don't fulfill the
 | 
					
						
							|  |  |  | 			 minimal matching conditions. */ | 
					
						
							|  |  |  | 		if (tnfa->num_minimals && new_match) | 
					
						
							|  |  |  | 		{ | 
					
						
							|  |  |  | 			new_match = 0; | 
					
						
							|  |  |  | 			reach_next_i = reach_next; | 
					
						
							|  |  |  | 			for (reach_i = reach; reach_i->state; reach_i++) | 
					
						
							|  |  |  | 			{ | 
					
						
							|  |  |  | 				int skip = 0; | 
					
						
							|  |  |  | 				for (i = 0; tnfa->minimal_tags[i] >= 0; i += 2) | 
					
						
							|  |  |  | 				{ | 
					
						
							|  |  |  | 					int end = tnfa->minimal_tags[i]; | 
					
						
							|  |  |  | 					int start = tnfa->minimal_tags[i + 1]; | 
					
						
							|  |  |  | 					DPRINT(("  Minimal start %d, end %d\n", start, end)); | 
					
						
							|  |  |  | 					if (end >= num_tags) | 
					
						
							|  |  |  | 					{ | 
					
						
							|  |  |  | 						DPRINT(("	 Throwing %p out.\n", reach_i->state)); | 
					
						
							|  |  |  | 						skip = 1; | 
					
						
							|  |  |  | 						break; | 
					
						
							|  |  |  | 					} | 
					
						
							|  |  |  | 					else if (reach_i->tags[start] == match_tags[start] | 
					
						
							|  |  |  | 					         && reach_i->tags[end] < match_tags[end]) | 
					
						
							|  |  |  | 					{ | 
					
						
							|  |  |  | 						DPRINT(("	 Throwing %p out because t%d < %d\n", | 
					
						
							|  |  |  | 						        reach_i->state, end, match_tags[end])); | 
					
						
							|  |  |  | 						skip = 1; | 
					
						
							|  |  |  | 						break; | 
					
						
							|  |  |  | 					} | 
					
						
							|  |  |  | 				} | 
					
						
							|  |  |  | 				if (!skip) | 
					
						
							|  |  |  | 				{ | 
					
						
							|  |  |  | 					reach_next_i->state = reach_i->state; | 
					
						
							|  |  |  | 					tmp_iptr = reach_next_i->tags; | 
					
						
							|  |  |  | 					reach_next_i->tags = reach_i->tags; | 
					
						
							|  |  |  | 					reach_i->tags = tmp_iptr; | 
					
						
							|  |  |  | 					reach_next_i++; | 
					
						
							|  |  |  | 				} | 
					
						
							|  |  |  | 			} | 
					
						
							|  |  |  | 			reach_next_i->state = NULL; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 			/* Swap `reach' and `reach_next'. */ | 
					
						
							|  |  |  | 			reach_i = reach; | 
					
						
							|  |  |  | 			reach = reach_next; | 
					
						
							|  |  |  | 			reach_next = reach_i; | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 		/* For each state in `reach' see if there is a transition leaving with
 | 
					
						
							|  |  |  | 			 the current input symbol to a state not yet in `reach_next', and | 
					
						
							|  |  |  | 			 add the destination states to `reach_next'. */ | 
					
						
							|  |  |  | 		reach_next_i = reach_next; | 
					
						
							|  |  |  | 		for (reach_i = reach; reach_i->state; reach_i++) | 
					
						
							|  |  |  | 		{ | 
					
						
							|  |  |  | 			for (trans_i = reach_i->state; trans_i->state; trans_i++) | 
					
						
							|  |  |  | 			{ | 
					
						
							|  |  |  | 				/* Does this transition match the input symbol? */ | 
					
						
							|  |  |  | 				if (trans_i->code_min <= (tre_cint_t)prev_c && trans_i->code_max >= (tre_cint_t)prev_c) | 
					
						
							|  |  |  | 				{ | 
					
						
							| 
									
										
										
										
											2024-05-02 22:47:30 +09:00
										 |  |  | 					if (trans_i->assertions && | 
					
						
							|  |  |  | 					    (CHECK_ASSERTIONS(trans_i->assertions) || | 
					
						
							| 
									
										
										
										
											2019-12-13 04:29:58 +00:00
										 |  |  | 					     CHECK_CHAR_CLASSES(trans_i, tnfa, eflags))) | 
					
						
							|  |  |  | 					{ | 
					
						
							|  |  |  | 						DPRINT(("assertion failed\n")); | 
					
						
							|  |  |  | 						continue; | 
					
						
							|  |  |  | 					} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 					/* Compute the tags after this transition. */ | 
					
						
							|  |  |  | 					for (i = 0; i < num_tags; i++) tmp_tags[i] = reach_i->tags[i]; | 
					
						
							|  |  |  | 					tag_i = trans_i->tags; | 
					
						
							|  |  |  | 					if (tag_i != NULL) | 
					
						
							|  |  |  | 					{ | 
					
						
							|  |  |  | 						while (*tag_i >= 0) | 
					
						
							|  |  |  | 						{ | 
					
						
							|  |  |  | 							if (*tag_i < num_tags) | 
					
						
							|  |  |  | 								tmp_tags[*tag_i] = pos; | 
					
						
							|  |  |  | 							tag_i++; | 
					
						
							|  |  |  | 						} | 
					
						
							|  |  |  | 					} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 					if (reach_pos[trans_i->state_id].pos < pos) | 
					
						
							|  |  |  | 					{ | 
					
						
							|  |  |  | 						/* Found an unvisited node. */ | 
					
						
							|  |  |  | 						reach_next_i->state = trans_i->state; | 
					
						
							|  |  |  | 						tmp_iptr = reach_next_i->tags; | 
					
						
							|  |  |  | 						reach_next_i->tags = tmp_tags; | 
					
						
							|  |  |  | 						tmp_tags = tmp_iptr; | 
					
						
							|  |  |  | 						reach_pos[trans_i->state_id].pos = pos; | 
					
						
							|  |  |  | 						reach_pos[trans_i->state_id].tags = &reach_next_i->tags; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 						if (reach_next_i->state == tnfa->final | 
					
						
							|  |  |  | 						        && (match_eo == -1 || (num_tags > 0 && reach_next_i->tags[0] <= match_tags[0]))) | 
					
						
							|  |  |  | 						{ | 
					
						
							|  |  |  | 							DPRINT(("  found match %p\n", trans_i->state)); | 
					
						
							|  |  |  | 							match_eo = pos; | 
					
						
							|  |  |  | 							new_match = 1; | 
					
						
							|  |  |  | 							for (i = 0; i < num_tags; i++) match_tags[i] = reach_next_i->tags[i]; | 
					
						
							|  |  |  | 						} | 
					
						
							|  |  |  | 						reach_next_i++; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 					} | 
					
						
							|  |  |  | 					else | 
					
						
							|  |  |  | 					{ | 
					
						
							|  |  |  | 						assert(reach_pos[trans_i->state_id].pos == pos); | 
					
						
							|  |  |  | 						/* Another path has also reached this state.  We choose
 | 
					
						
							|  |  |  | 									 the winner by examining the tag values for both | 
					
						
							|  |  |  | 									 paths. */ | 
					
						
							|  |  |  | 						if (tre_tag_order(num_tags, tnfa->tag_directions, | 
					
						
							|  |  |  | 						                  tmp_tags, | 
					
						
							|  |  |  | 						                  *reach_pos[trans_i->state_id].tags)) | 
					
						
							|  |  |  | 						{ | 
					
						
							|  |  |  | 							/* The new path wins. */ | 
					
						
							|  |  |  | 							tmp_iptr = *reach_pos[trans_i->state_id].tags; | 
					
						
							|  |  |  | 							*reach_pos[trans_i->state_id].tags = tmp_tags; | 
					
						
							|  |  |  | 							if (trans_i->state == tnfa->final) | 
					
						
							|  |  |  | 							{ | 
					
						
							|  |  |  | 								DPRINT(("	 found better match\n")); | 
					
						
							|  |  |  | 								match_eo = pos; | 
					
						
							|  |  |  | 								new_match = 1; | 
					
						
							|  |  |  | 								for (i = 0; i < num_tags; i++) | 
					
						
							|  |  |  | 									match_tags[i] = tmp_tags[i]; | 
					
						
							|  |  |  | 							} | 
					
						
							|  |  |  | 							tmp_tags = tmp_iptr; | 
					
						
							|  |  |  | 						} | 
					
						
							|  |  |  | 					} | 
					
						
							|  |  |  | 				} | 
					
						
							|  |  |  | 			} | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 		reach_next_i->state = NULL; | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	DPRINT(("match end offset = %d\n", match_eo)); | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2019-12-18 08:34:44 +00:00
										 |  |  | 	if (buf) xfree(gem, buf); | 
					
						
							| 
									
										
										
										
											2019-12-13 04:29:58 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  | 	*match_end_ofs = match_eo; | 
					
						
							|  |  |  | 	return match_eo >= 0 ? REG_OK : REG_NOMATCH; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | /* EOF */ |