| 
									
										
										
										
											2020-11-13 02:50:20 +00:00
										 |  |  | /*
 | 
					
						
							|  |  |  |     Copyright (c) 2006-2020 Chung, Hyung-Hwan. All rights reserved. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     Redistribution and use in source and binary forms, with or without | 
					
						
							|  |  |  |     modification, are permitted provided that the following conditions | 
					
						
							|  |  |  |     are met: | 
					
						
							|  |  |  |     1. Redistributions of source code must retain the above copyright | 
					
						
							|  |  |  |        notice, this list of conditions and the following disclaimer. | 
					
						
							|  |  |  |     2. Redistributions in binary form must reproduce the above copyright | 
					
						
							|  |  |  |        notice, this list of conditions and the following disclaimer in the | 
					
						
							|  |  |  |        documentation and/or other materials provided with the distribution. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     THIS SOFTWARE IS PROVIDED BY THE AUTHOR "AS IS" AND ANY EXPRESS OR | 
					
						
							|  |  |  |     IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES | 
					
						
							|  |  |  |     OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. | 
					
						
							|  |  |  |     IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, | 
					
						
							|  |  |  |     INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT | 
					
						
							|  |  |  |     NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | 
					
						
							|  |  |  |     DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | 
					
						
							|  |  |  |     THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | 
					
						
							|  |  |  |     (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF | 
					
						
							|  |  |  |     THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | 
					
						
							|  |  |  |  */ | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-11-13 14:56:15 +00:00
										 |  |  | char_t* split_xchars_to_fields (hawk_rtx_t* rtx, char_t* str, hawk_oow_t len, char_t fs, char_t ec, char_t lq, char_t rq, xcs_t* tok) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  | 	char_t* p = str; | 
					
						
							|  |  |  | 	char_t* end = str + len; | 
					
						
							|  |  |  | 	int escaped = 0, quoted = 0; | 
					
						
							|  |  |  | 	char_t* ts; /* token start */ | 
					
						
							|  |  |  | 	char_t* tp; /* points to one char past the last token char */ | 
					
						
							|  |  |  | 	char_t* xp; /* points to one char past the last effective char */ | 
					
						
							| 
									
										
										
										
											2025-09-27 11:37:25 +09:00
										 |  |  | 	int escape_doubling; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	/* to extract "abc""def" as abc"def */ | 
					
						
							|  |  |  | 	escape_doubling = (ec == lq && ec == rq); | 
					
						
							| 
									
										
										
										
											2020-11-13 14:56:15 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  | 	/* skip leading spaces */ | 
					
						
							|  |  |  | 	while (p < end && is_xch_space(*p)) p++; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	/* initialize token pointers */ | 
					
						
							| 
									
										
										
										
											2024-05-02 22:47:30 +09:00
										 |  |  | 	ts = tp = xp = p; | 
					
						
							| 
									
										
										
										
											2020-11-13 14:56:15 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  | 	while (p < end) | 
					
						
							|  |  |  | 	{ | 
					
						
							| 
									
										
										
										
											2025-10-01 23:48:42 +09:00
										 |  |  | 		char_t c = *p; | 
					
						
							| 
									
										
										
										
											2020-11-13 14:56:15 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  | 		if (escaped) | 
					
						
							|  |  |  | 		{ | 
					
						
							|  |  |  | 			*tp++ = c; xp = tp; p++; | 
					
						
							|  |  |  | 			escaped = 0; | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 		else | 
					
						
							|  |  |  | 		{ | 
					
						
							| 
									
										
										
										
											2025-09-27 11:37:25 +09:00
										 |  |  | 			if (!escape_doubling && c == ec) | 
					
						
							| 
									
										
										
										
											2020-11-13 14:56:15 +00:00
										 |  |  | 			{ | 
					
						
							| 
									
										
										
										
											2025-09-27 11:37:25 +09:00
										 |  |  | 				/* normal escaping is never activated if escaping with two repeated characters is on */ | 
					
						
							| 
									
										
										
										
											2020-11-13 14:56:15 +00:00
										 |  |  | 				escaped = 1; | 
					
						
							|  |  |  | 				p++; | 
					
						
							|  |  |  | 			} | 
					
						
							|  |  |  | 			else if (quoted) | 
					
						
							|  |  |  | 			{ | 
					
						
							|  |  |  | 				if (c == rq) | 
					
						
							|  |  |  | 				{ | 
					
						
							| 
									
										
										
										
											2025-09-27 11:37:25 +09:00
										 |  |  | 					if (escape_doubling && (p + 1) < end && *(p + 1) == rq) | 
					
						
							|  |  |  | 					{ | 
					
						
							|  |  |  | 						p++; | 
					
						
							|  |  |  | 						goto not_rq; | 
					
						
							|  |  |  | 					} | 
					
						
							| 
									
										
										
										
											2020-11-13 14:56:15 +00:00
										 |  |  | 					quoted = 0; | 
					
						
							|  |  |  | 					p++; | 
					
						
							|  |  |  | 				} | 
					
						
							|  |  |  | 				else | 
					
						
							|  |  |  | 				{ | 
					
						
							| 
									
										
										
										
											2025-09-27 11:37:25 +09:00
										 |  |  | 				not_rq: | 
					
						
							| 
									
										
										
										
											2020-11-13 14:56:15 +00:00
										 |  |  | 					*tp++ = c; xp = tp; p++; | 
					
						
							|  |  |  | 				} | 
					
						
							|  |  |  | 			} | 
					
						
							| 
									
										
										
										
											2024-05-02 22:47:30 +09:00
										 |  |  | 			else | 
					
						
							| 
									
										
										
										
											2020-11-13 14:56:15 +00:00
										 |  |  | 			{ | 
					
						
							|  |  |  | 				if (c == fs) | 
					
						
							|  |  |  | 				{ | 
					
						
							|  |  |  | 					tok->ptr = ts; | 
					
						
							|  |  |  | 					tok->len = xp - ts; | 
					
						
							|  |  |  | 					p++; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 					if (is_xch_space(fs)) | 
					
						
							|  |  |  | 					{ | 
					
						
							|  |  |  | 						while (p < end && *p == fs) p++; | 
					
						
							|  |  |  | 						if (p >= end) return HAWK_NULL; | 
					
						
							|  |  |  | 					} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 					return p; | 
					
						
							|  |  |  | 				} | 
					
						
							| 
									
										
										
										
											2024-05-02 22:47:30 +09:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-11-13 14:56:15 +00:00
										 |  |  | 				if (c == lq) | 
					
						
							|  |  |  | 				{ | 
					
						
							|  |  |  | 					quoted = 1; | 
					
						
							|  |  |  | 					p++; | 
					
						
							|  |  |  | 				} | 
					
						
							|  |  |  | 				else | 
					
						
							|  |  |  | 				{ | 
					
						
							|  |  |  | 					*tp++ = c; p++; | 
					
						
							| 
									
										
										
										
											2024-05-02 22:47:30 +09:00
										 |  |  | 					if (!is_xch_space(c)) xp = tp; | 
					
						
							| 
									
										
										
										
											2020-11-13 14:56:15 +00:00
										 |  |  | 				} | 
					
						
							|  |  |  | 			} | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-05-02 22:47:30 +09:00
										 |  |  | 	if (escaped) | 
					
						
							| 
									
										
										
										
											2020-11-13 14:56:15 +00:00
										 |  |  | 	{ | 
					
						
							| 
									
										
										
										
											2024-05-02 22:47:30 +09:00
										 |  |  | 		/* if it is still escaped, the last character must be
 | 
					
						
							| 
									
										
										
										
											2020-11-13 14:56:15 +00:00
										 |  |  | 		 * the escaper itself. treat it as a normal character */ | 
					
						
							|  |  |  | 		*xp++ = ec; | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	tok->ptr = ts; | 
					
						
							|  |  |  | 	tok->len = xp - ts; | 
					
						
							|  |  |  | 	return HAWK_NULL; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-11-13 02:50:20 +00:00
										 |  |  | char_t* tokenize_xchars (hawk_rtx_t* rtx, const char_t* s, hawk_oow_t len, const char_t* delim, hawk_oow_t delim_len, xcs_t* tok) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  | 	const char_t* p = s, *d; | 
					
						
							|  |  |  | 	const char_t* end = s + len; | 
					
						
							|  |  |  | 	const char_t* sp = HAWK_NULL, * ep = HAWK_NULL; | 
					
						
							|  |  |  | 	const char_t* delim_end = delim + delim_len; | 
					
						
							| 
									
										
										
										
											2024-05-02 22:47:30 +09:00
										 |  |  | 	char_t c; | 
					
						
							| 
									
										
										
										
											2020-11-13 02:50:20 +00:00
										 |  |  | 	int delim_mode; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #define __DELIM_NULL      0
 | 
					
						
							|  |  |  | #define __DELIM_EMPTY     1
 | 
					
						
							|  |  |  | #define __DELIM_SPACES    2
 | 
					
						
							|  |  |  | #define __DELIM_NOSPACES  3
 | 
					
						
							|  |  |  | #define __DELIM_COMPOSITE 4
 | 
					
						
							|  |  |  | 	if (delim == HAWK_NULL) delim_mode = __DELIM_NULL; | 
					
						
							| 
									
										
										
										
											2024-05-02 22:47:30 +09:00
										 |  |  | 	else | 
					
						
							| 
									
										
										
										
											2020-11-13 02:50:20 +00:00
										 |  |  | 	{ | 
					
						
							|  |  |  | 		delim_mode = __DELIM_EMPTY; | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-05-02 22:47:30 +09:00
										 |  |  | 		for (d = delim; d < delim_end; d++) | 
					
						
							| 
									
										
										
										
											2020-11-13 02:50:20 +00:00
										 |  |  | 		{ | 
					
						
							| 
									
										
										
										
											2024-05-02 22:47:30 +09:00
										 |  |  | 			if (is_xch_space(*d)) | 
					
						
							| 
									
										
										
										
											2020-11-13 02:50:20 +00:00
										 |  |  | 			{ | 
					
						
							|  |  |  | 				if (delim_mode == __DELIM_EMPTY) | 
					
						
							|  |  |  | 					delim_mode = __DELIM_SPACES; | 
					
						
							|  |  |  | 				else if (delim_mode == __DELIM_NOSPACES) | 
					
						
							|  |  |  | 				{ | 
					
						
							|  |  |  | 					delim_mode = __DELIM_COMPOSITE; | 
					
						
							|  |  |  | 					break; | 
					
						
							|  |  |  | 				} | 
					
						
							|  |  |  | 			} | 
					
						
							|  |  |  | 			else | 
					
						
							|  |  |  | 			{ | 
					
						
							|  |  |  | 				if (delim_mode == __DELIM_EMPTY) | 
					
						
							|  |  |  | 					delim_mode = __DELIM_NOSPACES; | 
					
						
							|  |  |  | 				else if (delim_mode == __DELIM_SPACES) | 
					
						
							|  |  |  | 				{ | 
					
						
							|  |  |  | 					delim_mode = __DELIM_COMPOSITE; | 
					
						
							|  |  |  | 					break; | 
					
						
							|  |  |  | 				} | 
					
						
							|  |  |  | 			} | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 		/* TODO: verify the following statement... */ | 
					
						
							| 
									
										
										
										
											2024-05-02 22:47:30 +09:00
										 |  |  | 		if (delim_mode == __DELIM_SPACES && | 
					
						
							|  |  |  | 		    delim_len == 1 && | 
					
						
							| 
									
										
										
										
											2020-11-13 02:50:20 +00:00
										 |  |  | 		    delim[0] != ' ') delim_mode = __DELIM_NOSPACES; | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-05-02 22:47:30 +09:00
										 |  |  | 	if (delim_mode == __DELIM_NULL) | 
					
						
							|  |  |  | 	{ | 
					
						
							|  |  |  | 		/* when HAWK_NULL is given as "delim", it trims off the
 | 
					
						
							| 
									
										
										
										
											2020-11-13 02:50:20 +00:00
										 |  |  | 		 * leading and trailing spaces characters off the source | 
					
						
							|  |  |  | 		 * string "s" eventually. */ | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 		while (p < end && is_xch_space(*p)) p++; | 
					
						
							| 
									
										
										
										
											2024-05-02 22:47:30 +09:00
										 |  |  | 		while (p < end) | 
					
						
							| 
									
										
										
										
											2020-11-13 02:50:20 +00:00
										 |  |  | 		{ | 
					
						
							|  |  |  | 			c = *p; | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-05-02 22:47:30 +09:00
										 |  |  | 			if (!is_xch_space(c)) | 
					
						
							| 
									
										
										
										
											2020-11-13 02:50:20 +00:00
										 |  |  | 			{ | 
					
						
							|  |  |  | 				if (sp == HAWK_NULL) sp = p; | 
					
						
							|  |  |  | 				ep = p; | 
					
						
							|  |  |  | 			} | 
					
						
							|  |  |  | 			p++; | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	else if (delim_mode == __DELIM_EMPTY) | 
					
						
							|  |  |  | 	{ | 
					
						
							|  |  |  | 		/* each character in the source string "s" becomes a token. */ | 
					
						
							|  |  |  | 		if (p < end) | 
					
						
							|  |  |  | 		{ | 
					
						
							|  |  |  | 			c = *p; | 
					
						
							|  |  |  | 			sp = p; | 
					
						
							|  |  |  | 			ep = p++; | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 	} | 
					
						
							| 
									
										
										
										
											2024-05-02 22:47:30 +09:00
										 |  |  | 	else if (delim_mode == __DELIM_SPACES) | 
					
						
							| 
									
										
										
										
											2020-11-13 02:50:20 +00:00
										 |  |  | 	{ | 
					
						
							|  |  |  | 		/* each token is delimited by space characters. all leading
 | 
					
						
							|  |  |  | 		 * and trailing spaces are removed. */ | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 		while (p < end && is_xch_space(*p)) p++; | 
					
						
							| 
									
										
										
										
											2024-05-02 22:47:30 +09:00
										 |  |  | 		while (p < end) | 
					
						
							| 
									
										
										
										
											2020-11-13 02:50:20 +00:00
										 |  |  | 		{ | 
					
						
							|  |  |  | 			c = *p; | 
					
						
							|  |  |  | 			if (is_xch_space(c)) break; | 
					
						
							|  |  |  | 			if (sp == HAWK_NULL) sp = p; | 
					
						
							|  |  |  | 			ep = p++; | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 		while (p < end && is_xch_space(*p)) p++; | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	else if (delim_mode == __DELIM_NOSPACES) | 
					
						
							|  |  |  | 	{ | 
					
						
							| 
									
										
										
										
											2024-05-02 22:47:30 +09:00
										 |  |  | 		/* each token is delimited by one of charaters
 | 
					
						
							| 
									
										
										
										
											2020-11-13 02:50:20 +00:00
										 |  |  | 		 * in the delimeter set "delim". */ | 
					
						
							|  |  |  | 		if (rtx->gbl.ignorecase) | 
					
						
							|  |  |  | 		{ | 
					
						
							| 
									
										
										
										
											2024-05-02 22:47:30 +09:00
										 |  |  | 			while (p < end) | 
					
						
							| 
									
										
										
										
											2020-11-13 02:50:20 +00:00
										 |  |  | 			{ | 
					
						
							| 
									
										
										
										
											2020-12-18 17:16:58 +00:00
										 |  |  | 				c = to_xch_upper(*p); | 
					
						
							| 
									
										
										
										
											2024-05-02 22:47:30 +09:00
										 |  |  | 				for (d = delim; d < delim_end; d++) | 
					
						
							| 
									
										
										
										
											2020-11-13 02:50:20 +00:00
										 |  |  | 				{ | 
					
						
							| 
									
										
										
										
											2020-12-18 17:16:58 +00:00
										 |  |  | 					if (c == to_xch_upper(*d)) goto exit_loop; | 
					
						
							| 
									
										
										
										
											2020-11-13 02:50:20 +00:00
										 |  |  | 				} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 				if (sp == HAWK_NULL) sp = p; | 
					
						
							|  |  |  | 				ep = p++; | 
					
						
							|  |  |  | 			} | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 		else | 
					
						
							|  |  |  | 		{ | 
					
						
							| 
									
										
										
										
											2024-05-02 22:47:30 +09:00
										 |  |  | 			while (p < end) | 
					
						
							| 
									
										
										
										
											2020-11-13 02:50:20 +00:00
										 |  |  | 			{ | 
					
						
							|  |  |  | 				c = *p; | 
					
						
							| 
									
										
										
										
											2024-05-02 22:47:30 +09:00
										 |  |  | 				for (d = delim; d < delim_end; d++) | 
					
						
							| 
									
										
										
										
											2020-11-13 02:50:20 +00:00
										 |  |  | 				{ | 
					
						
							|  |  |  | 					if (c == *d) goto exit_loop; | 
					
						
							|  |  |  | 				} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 				if (sp == HAWK_NULL) sp = p; | 
					
						
							|  |  |  | 				ep = p++; | 
					
						
							|  |  |  | 			} | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 	} | 
					
						
							| 
									
										
										
										
											2024-05-02 22:47:30 +09:00
										 |  |  | 	else /* if (delim_mode == __DELIM_COMPOSITE) */ | 
					
						
							| 
									
										
										
										
											2020-11-13 02:50:20 +00:00
										 |  |  | 	{ | 
					
						
							|  |  |  | 		/* each token is delimited by one of non-space charaters
 | 
					
						
							|  |  |  | 		 * in the delimeter set "delim". however, all space characters | 
					
						
							|  |  |  | 		 * surrounding the token are removed */ | 
					
						
							|  |  |  | 		while (p < end && is_xch_space(*p)) p++; | 
					
						
							|  |  |  | 		if (rtx->gbl.ignorecase) | 
					
						
							|  |  |  | 		{ | 
					
						
							| 
									
										
										
										
											2024-05-02 22:47:30 +09:00
										 |  |  | 			while (p < end) | 
					
						
							| 
									
										
										
										
											2020-11-13 02:50:20 +00:00
										 |  |  | 			{ | 
					
						
							| 
									
										
										
										
											2020-12-18 17:16:58 +00:00
										 |  |  | 				c = to_xch_upper(*p); | 
					
						
							| 
									
										
										
										
											2024-05-02 22:47:30 +09:00
										 |  |  | 				if (is_xch_space(c)) | 
					
						
							| 
									
										
										
										
											2020-11-13 02:50:20 +00:00
										 |  |  | 				{ | 
					
						
							|  |  |  | 					p++; | 
					
						
							|  |  |  | 					continue; | 
					
						
							|  |  |  | 				} | 
					
						
							| 
									
										
										
										
											2024-05-02 22:47:30 +09:00
										 |  |  | 				for (d = delim; d < delim_end; d++) | 
					
						
							| 
									
										
										
										
											2020-11-13 02:50:20 +00:00
										 |  |  | 				{ | 
					
						
							| 
									
										
										
										
											2020-12-18 17:16:58 +00:00
										 |  |  | 					if (c == to_xch_upper(*d)) goto exit_loop; | 
					
						
							| 
									
										
										
										
											2020-11-13 02:50:20 +00:00
										 |  |  | 				} | 
					
						
							|  |  |  | 				if (sp == HAWK_NULL) sp = p; | 
					
						
							|  |  |  | 				ep = p++; | 
					
						
							|  |  |  | 			} | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 		else | 
					
						
							|  |  |  | 		{ | 
					
						
							| 
									
										
										
										
											2024-05-02 22:47:30 +09:00
										 |  |  | 			while (p < end) | 
					
						
							| 
									
										
										
										
											2020-11-13 02:50:20 +00:00
										 |  |  | 			{ | 
					
						
							|  |  |  | 				c = *p; | 
					
						
							| 
									
										
										
										
											2024-05-02 22:47:30 +09:00
										 |  |  | 				if (is_xch_space(c)) | 
					
						
							| 
									
										
										
										
											2020-11-13 02:50:20 +00:00
										 |  |  | 				{ | 
					
						
							|  |  |  | 					p++; | 
					
						
							|  |  |  | 					continue; | 
					
						
							|  |  |  | 				} | 
					
						
							| 
									
										
										
										
											2024-05-02 22:47:30 +09:00
										 |  |  | 				for (d = delim; d < delim_end; d++) | 
					
						
							| 
									
										
										
										
											2020-11-13 02:50:20 +00:00
										 |  |  | 				{ | 
					
						
							|  |  |  | 					if (c == *d) goto exit_loop; | 
					
						
							|  |  |  | 				} | 
					
						
							|  |  |  | 				if (sp == HAWK_NULL) sp = p; | 
					
						
							|  |  |  | 				ep = p++; | 
					
						
							|  |  |  | 			} | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | exit_loop: | 
					
						
							| 
									
										
										
										
											2024-05-02 22:47:30 +09:00
										 |  |  | 	if (sp == HAWK_NULL) | 
					
						
							| 
									
										
										
										
											2020-11-13 02:50:20 +00:00
										 |  |  | 	{ | 
					
						
							|  |  |  | 		tok->ptr = HAWK_NULL; | 
					
						
							|  |  |  | 		tok->len = (hawk_oow_t)0; | 
					
						
							|  |  |  | 	} | 
					
						
							| 
									
										
										
										
											2024-05-02 22:47:30 +09:00
										 |  |  | 	else | 
					
						
							| 
									
										
										
										
											2020-11-13 02:50:20 +00:00
										 |  |  | 	{ | 
					
						
							|  |  |  | 		tok->ptr = (char_t*)sp; | 
					
						
							| 
									
										
										
										
											2024-05-02 22:47:30 +09:00
										 |  |  | 		tok->len = ep - sp + 1; | 
					
						
							| 
									
										
										
										
											2020-11-13 02:50:20 +00:00
										 |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	/* if HAWK_NULL is returned, this function should not be called again */ | 
					
						
							|  |  |  | 	if (p >= end) return HAWK_NULL; | 
					
						
							| 
									
										
										
										
											2024-05-02 22:47:30 +09:00
										 |  |  | 	if (delim_mode == __DELIM_EMPTY || | 
					
						
							| 
									
										
										
										
											2020-11-13 02:50:20 +00:00
										 |  |  | 	    delim_mode == __DELIM_SPACES) return (char_t*)p; | 
					
						
							|  |  |  | 	return (char_t*)++p; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-11-13 14:56:15 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  | char_t* tokenize_xchars_by_rex (hawk_rtx_t* rtx, const char_t* str, hawk_oow_t len, const char_t* substr, hawk_oow_t sublen, hawk_tre_t* rex, xcs_t* tok) | 
					
						
							| 
									
										
										
										
											2020-11-13 02:50:20 +00:00
										 |  |  | { | 
					
						
							| 
									
										
										
										
											2020-11-13 14:56:15 +00:00
										 |  |  | 	int n; | 
					
						
							|  |  |  | 	hawk_oow_t i; | 
					
						
							|  |  |  | 	xcs_t match, s, cursub, realsub; | 
					
						
							| 
									
										
										
										
											2020-11-13 02:50:20 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-11-13 14:56:15 +00:00
										 |  |  | 	s.ptr = (char_t*)str; | 
					
						
							|  |  |  | 	s.len = len; | 
					
						
							| 
									
										
										
										
											2020-11-13 02:50:20 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-11-13 14:56:15 +00:00
										 |  |  | 	cursub.ptr = (char_t*)substr; | 
					
						
							|  |  |  | 	cursub.len = sublen; | 
					
						
							| 
									
										
										
										
											2020-11-13 02:50:20 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-11-13 14:56:15 +00:00
										 |  |  | 	realsub.ptr = (char_t*)substr; | 
					
						
							|  |  |  | 	realsub.len = sublen; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	while (cursub.len > 0) | 
					
						
							| 
									
										
										
										
											2020-11-13 02:50:20 +00:00
										 |  |  | 	{ | 
					
						
							| 
									
										
										
										
											2020-11-13 14:56:15 +00:00
										 |  |  | 		n = match_rex_with_xcs(rtx, rex, &s, &cursub, &match, HAWK_NULL); | 
					
						
							|  |  |  | 		if (n <= -1) return HAWK_NULL; | 
					
						
							| 
									
										
										
										
											2020-11-13 02:50:20 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-11-13 14:56:15 +00:00
										 |  |  | 		if (n == 0) | 
					
						
							| 
									
										
										
										
											2020-11-13 02:50:20 +00:00
										 |  |  | 		{ | 
					
						
							| 
									
										
										
										
											2020-11-13 14:56:15 +00:00
										 |  |  | 			/* no match has been found. return the entire string as a token */ | 
					
						
							|  |  |  | 			hawk_rtx_seterrnum (rtx, HAWK_NULL, HAWK_ENOERR); /* reset HAWK_EREXNOMAT to no error */ | 
					
						
							|  |  |  | 			tok->ptr = realsub.ptr; | 
					
						
							|  |  |  | 			tok->len = realsub.len; | 
					
						
							| 
									
										
										
										
											2024-05-02 22:47:30 +09:00
										 |  |  | 			return HAWK_NULL; | 
					
						
							| 
									
										
										
										
											2020-11-13 02:50:20 +00:00
										 |  |  | 		} | 
					
						
							| 
									
										
										
										
											2020-11-13 14:56:15 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  | 		HAWK_ASSERT (n == 1); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 		if (match.len == 0) | 
					
						
							| 
									
										
										
										
											2020-11-13 02:50:20 +00:00
										 |  |  | 		{ | 
					
						
							| 
									
										
										
										
											2020-11-13 14:56:15 +00:00
										 |  |  | 			/* the match length is zero. */ | 
					
						
							|  |  |  | 			cursub.ptr++; | 
					
						
							|  |  |  | 			cursub.len--; | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 		else if (HAWK_RTX_IS_STRIPRECSPC_ON(rtx)) | 
					
						
							|  |  |  | 		{ | 
					
						
							|  |  |  | 			/* match at the beginning of the input string */ | 
					
						
							| 
									
										
										
										
											2024-05-02 22:47:30 +09:00
										 |  |  | 			if (match.ptr == substr) | 
					
						
							| 
									
										
										
										
											2020-11-13 02:50:20 +00:00
										 |  |  | 			{ | 
					
						
							| 
									
										
										
										
											2020-11-13 14:56:15 +00:00
										 |  |  | 				for (i = 0; i < match.len; i++) | 
					
						
							| 
									
										
										
										
											2020-11-13 02:50:20 +00:00
										 |  |  | 				{ | 
					
						
							| 
									
										
										
										
											2020-11-13 14:56:15 +00:00
										 |  |  | 					if (!is_xch_space(match.ptr[i])) goto exit_loop; | 
					
						
							| 
									
										
										
										
											2020-11-13 02:50:20 +00:00
										 |  |  | 				} | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-05-02 22:47:30 +09:00
										 |  |  | 				/* the match that is all spaces at the
 | 
					
						
							| 
									
										
										
										
											2020-11-13 14:56:15 +00:00
										 |  |  | 				 * beginning of the input string is skipped */ | 
					
						
							|  |  |  | 				cursub.ptr += match.len; | 
					
						
							|  |  |  | 				cursub.len -= match.len; | 
					
						
							| 
									
										
										
										
											2020-11-13 02:50:20 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-11-13 14:56:15 +00:00
										 |  |  | 				/* adjust the substring by skipping the leading
 | 
					
						
							|  |  |  | 				 * spaces and retry matching */ | 
					
						
							|  |  |  | 				realsub.ptr = (char_t*)substr + match.len; | 
					
						
							|  |  |  | 				realsub.len -= match.len; | 
					
						
							| 
									
										
										
										
											2020-11-13 02:50:20 +00:00
										 |  |  | 			} | 
					
						
							| 
									
										
										
										
											2020-11-13 14:56:15 +00:00
										 |  |  | 			else break; | 
					
						
							| 
									
										
										
										
											2020-11-13 02:50:20 +00:00
										 |  |  | 		} | 
					
						
							| 
									
										
										
										
											2020-11-13 14:56:15 +00:00
										 |  |  | 		else break; | 
					
						
							| 
									
										
										
										
											2020-11-13 02:50:20 +00:00
										 |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-11-13 14:56:15 +00:00
										 |  |  | exit_loop: | 
					
						
							|  |  |  | 	hawk_rtx_seterrnum (rtx, HAWK_NULL, HAWK_ENOERR); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	if (cursub.len <= 0) | 
					
						
							| 
									
										
										
										
											2020-11-13 02:50:20 +00:00
										 |  |  | 	{ | 
					
						
							| 
									
										
										
										
											2020-11-13 14:56:15 +00:00
										 |  |  | 		tok->ptr = realsub.ptr; | 
					
						
							|  |  |  | 		tok->len = realsub.len; | 
					
						
							| 
									
										
										
										
											2024-05-02 22:47:30 +09:00
										 |  |  | 		return HAWK_NULL; | 
					
						
							| 
									
										
										
										
											2020-11-13 02:50:20 +00:00
										 |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-11-13 14:56:15 +00:00
										 |  |  | 	tok->ptr = realsub.ptr; | 
					
						
							|  |  |  | 	tok->len = match.ptr - realsub.ptr; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	for (i = 0; i < match.len; i++) | 
					
						
							|  |  |  | 	{ | 
					
						
							|  |  |  | 		if (!is_xch_space(match.ptr[i])) | 
					
						
							|  |  |  | 		{ | 
					
						
							|  |  |  | 			/* the match contains a non-space character. */ | 
					
						
							|  |  |  | 			return (char_t*)match.ptr+match.len; | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	/* the match is all spaces */ | 
					
						
							|  |  |  | 	if (HAWK_RTX_IS_STRIPRECSPC_ON(rtx)) | 
					
						
							|  |  |  | 	{ | 
					
						
							|  |  |  | 		/* if the match reached the last character in the input string,
 | 
					
						
							|  |  |  | 		 * it returns HAWK_NULL to terminate tokenization. */ | 
					
						
							|  |  |  | 		return (match.ptr+match.len >= substr+sublen)? HAWK_NULL: ((char_t*)match.ptr+match.len); | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	else | 
					
						
							|  |  |  | 	{ | 
					
						
							| 
									
										
										
										
											2024-05-02 22:47:30 +09:00
										 |  |  | 		/* if the match went beyond the the last character in the input
 | 
					
						
							| 
									
										
										
										
											2020-11-13 14:56:15 +00:00
										 |  |  | 		 * string, it returns HAWK_NULL to terminate tokenization. */ | 
					
						
							|  |  |  | 		return (match.ptr+match.len > substr+sublen)? HAWK_NULL: ((char_t*)match.ptr+match.len); | 
					
						
							|  |  |  | 	} | 
					
						
							| 
									
										
										
										
											2020-11-13 02:50:20 +00:00
										 |  |  | } |