implementing hcl_feed()
This commit is contained in:
		
							
								
								
									
										26
									
								
								bin/main.c
									
									
									
									
									
								
							
							
						
						
									
										26
									
								
								bin/main.c
									
									
									
									
									
								
							| @ -855,16 +855,6 @@ int main (int argc, char* argv[]) | ||||
| 	 * -- instead of returning -1 immediately. --*/ | ||||
| 	set_signal (SIGINT, handle_sigint); | ||||
|  | ||||
| #if 0 | ||||
| hcl_prbfmt (hcl, "this is good %s %10hs %hs\n", "whole new world. 1234567890 from this point onward, any failure leasd to jumping to oops label",  "as이거 좋은거잖아dkfjsdakfjsadklfjasd", "1111"); | ||||
| { | ||||
| 	hcl_uch_t fmt[] = {'G','G','%','l','s', 'a','b','c','-','-','%','0','2','0','x','\0'}; | ||||
| hcl_uch_t ustr[] = {'A','B','C', 'X','Y','Z','Q','Q','\0'}; | ||||
| hcl_prufmt (hcl, fmt, ustr, 0x6789); | ||||
| hcl_logufmt (hcl, HCL_LOG_WARN, fmt, ustr, 0x6789); | ||||
| } | ||||
| #endif | ||||
|  | ||||
| #if 0 | ||||
| // TODO: change the option name | ||||
| // in the INTERACTIVE mode, the compiler generates MAKE_FUNCTION for lambda functions. | ||||
| @ -879,6 +869,22 @@ hcl_logufmt (hcl, HCL_LOG_WARN, fmt, ustr, 0x6789); | ||||
| 	cflags = 0; | ||||
| 	if (xtn->reader_istty) cflags = HCL_COMPILE_CLEAR_CODE | HCL_COMPILE_CLEAR_FNBLK; | ||||
|  | ||||
| #if 0 | ||||
| { | ||||
| hcl_oow_t slen; | ||||
| hcl_ooch_t* scr = hcl_dupbtooocstr(hcl, "(:::::..##..|,:{{}}..\n....)(#(#[", &slen); | ||||
| if (hcl_feed (hcl, scr, slen) <= -1) | ||||
| { | ||||
| 	if (hcl->errnum == HCL_ESYNERR) print_synerr (hcl); | ||||
| 	else hcl_logbfmt (hcl, HCL_LOG_STDERR, "ERROR: cannot feed - [%d] %js\n", hcl_geterrnum(hcl), hcl_geterrmsg(hcl)); | ||||
| } | ||||
|  | ||||
| hcl_endfeed (hcl); | ||||
| } | ||||
| hcl_close (hcl); | ||||
| return 0; | ||||
| #endif | ||||
|  | ||||
| 	while (1) | ||||
| 	{ | ||||
| 		hcl_cnode_t* obj; | ||||
|  | ||||
| @ -149,12 +149,14 @@ enum hcl_iotok_type_t | ||||
|  | ||||
| 	HCL_IOTOK_IDENT, | ||||
| 	HCL_IOTOK_IDENT_DOTTED, | ||||
| 	HCL_IOTOK_DOT, | ||||
| 	HCL_IOTOK_ELLIPSIS, | ||||
| 	HCL_IOTOK_COLON, | ||||
| 	HCL_IOTOK_DOT,       /* . */ | ||||
| 	HCL_IOTOK_DBLDOTS,   /* .. */ | ||||
| 	HCL_IOTOK_ELLIPSIS,  /* ... */ | ||||
| 	HCL_IOTOK_COLON,     /* : */ | ||||
| 	HCL_IOTOK_DBLCOLONS, /* :: */ | ||||
| 	HCL_IOTOK_TRPCOLONS, /* ::: */ | ||||
| 	HCL_IOTOK_DCSTAR,    /* ::* */ | ||||
| 	HCL_IOTOK_COMMA, | ||||
| 	HCL_IOTOK_COMMA,     /* , */ | ||||
| 	HCL_IOTOK_LPAREN,    /* ( */ | ||||
| 	HCL_IOTOK_RPAREN,    /* ) */ | ||||
| 	HCL_IOTOK_LPARCOLON, /* (: */ | ||||
| @ -496,6 +498,23 @@ struct hcl_rstl_t | ||||
| 	hcl_rstl_t* prev; | ||||
| }; | ||||
|  | ||||
| typedef struct hcl_feed_dt_t hcl_feed_dt_t; | ||||
| struct hcl_feed_dt_t | ||||
| { | ||||
| 	int row_start; | ||||
| 	int row_end; | ||||
| 	int col_next; | ||||
| }; | ||||
|  | ||||
| enum hcl_feed_lx_state_t | ||||
| { | ||||
| 	HCL_FEED_LX_START, | ||||
| 	HCL_FEED_LX_DELIM_TOKEN, | ||||
| 	HCL_FEED_LX_COMMENT, | ||||
| 	HCL_FEED_LX_SHARP_TOKEN | ||||
| }; | ||||
| typedef enum hcl_feed_lx_state_t hcl_feed_lx_state_t; | ||||
|  | ||||
| struct hcl_compiler_t | ||||
| { | ||||
| 	/* output handler */ | ||||
| @ -541,6 +560,33 @@ struct hcl_compiler_t | ||||
| 	} r; /* reading */ | ||||
| 	/* == END READER == */ | ||||
|  | ||||
| 	struct | ||||
| 	{ | ||||
| 		struct | ||||
| 		{ | ||||
| 			hcl_feed_lx_state_t state; | ||||
| 			hcl_ioloc_t loc; | ||||
| 		} lx; | ||||
| 		hcl_feed_dt_t dt; /* delimiter token */ | ||||
|  | ||||
| 		struct | ||||
| 		{ | ||||
| 			int code; | ||||
| 			union | ||||
| 			{ | ||||
| 				struct | ||||
| 				{ | ||||
| 					int x; | ||||
| 				} xxx; | ||||
| 				struct | ||||
| 				{ | ||||
| 					int x; | ||||
| 				} yyy; | ||||
| 			} u; | ||||
| 		} st[100]; | ||||
| 		hcl_ooi_t top; | ||||
| 	} feed; | ||||
|  | ||||
| 	/* == COMPILER STACK == */ | ||||
| 	struct | ||||
| 	{ | ||||
| @ -577,6 +623,8 @@ struct hcl_compiler_t | ||||
| 		hcl_clsblk_info_t* info; | ||||
| 		hcl_oow_t info_capa; | ||||
| 	} clsblk; /* class block */ | ||||
|  | ||||
| 	 | ||||
| }; | ||||
| #endif | ||||
|  | ||||
|  | ||||
| @ -2192,6 +2192,15 @@ HCL_EXPORT hcl_ooi_t hcl_proutufmt ( | ||||
| ); | ||||
|  | ||||
| #if defined(HCL_INCLUDE_COMPILER) | ||||
|  | ||||
| HCL_EXPORT int hcl_feed ( | ||||
| 	hcl_t*            hcl, | ||||
| 	const hcl_ooch_t* data, | ||||
| 	hcl_oow_t         len | ||||
| ); | ||||
|  | ||||
| #define hcl_endfeed(hcl) (hcl_feed((hcl), HCL_NULL, 0)) | ||||
|  | ||||
| HCL_EXPORT int hcl_compile ( | ||||
| 	hcl_t*       hcl, | ||||
| 	hcl_cnode_t* obj, | ||||
|  | ||||
							
								
								
									
										668
									
								
								lib/read.c
									
									
									
									
									
								
							
							
						
						
									
										668
									
								
								lib/read.c
									
									
									
									
									
								
							| @ -289,6 +289,12 @@ static HCL_INLINE int is_spacechar (hcl_ooci_t c) | ||||
| 	} | ||||
| } | ||||
|  | ||||
| static HCL_INLINE int is_linebreak (hcl_ooci_t c) | ||||
| { | ||||
| 	/* TODO: different line end conventions? */ | ||||
| 	return c == '\n'; | ||||
| } | ||||
|  | ||||
| static HCL_INLINE int is_alphachar (hcl_ooci_t c) | ||||
| { | ||||
| /* TODO: support full unicode */ | ||||
| @ -313,11 +319,12 @@ static HCL_INLINE int is_alnumchar (hcl_ooci_t c) | ||||
| 	return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9'); | ||||
| } | ||||
|  | ||||
| static HCL_INLINE int is_delimiter (hcl_ooci_t c) | ||||
| static HCL_INLINE int is_delimchar (hcl_ooci_t c) | ||||
| { | ||||
| 	return c == '(' || c == ')' || c == '[' || c == ']' || c == '{' || c == '}' || | ||||
| 	       c == '\"' || c == '\'' || c == '#' || c == ';' || c == '|' || c == '.' || | ||||
| 	       c == ',' || c == ':' || is_spacechar(c) || c == HCL_UCI_EOF; | ||||
| 	       c == ';' || c == '|' || c == ',' || c == '.' || c == ':' ||  | ||||
| 	       /* the first characters of tokens in delim_token_tab up to this point */ | ||||
| 	       c == '#'  || c == '\"' || c == '\'' || is_spacechar(c) || c == HCL_UCI_EOF; | ||||
| } | ||||
|  | ||||
| static int copy_string_to (hcl_t* hcl, const hcl_oocs_t* src, hcl_oocs_t* dst, hcl_oow_t* dst_capa, int append, hcl_ooch_t add_delim) | ||||
| @ -733,14 +740,14 @@ static int get_radix_number (hcl_t* hcl, hcl_ooci_t rc, int radix) | ||||
| 	} | ||||
| 	while (CHAR_TO_NUM(c, radix) < radix); | ||||
|  | ||||
| 	if (!is_delimiter(c)) | ||||
| 	if (!is_delimchar(c)) | ||||
| 	{ | ||||
| 		do | ||||
| 		{ | ||||
| 			ADD_TOKEN_CHAR(hcl, c); | ||||
| 			GET_CHAR_TO (hcl, c); | ||||
| 		} | ||||
| 		while (!is_delimiter(c)); | ||||
| 		while (!is_delimchar(c)); | ||||
|  | ||||
| 		hcl_setsynerrbfmt (hcl, HCL_SYNERR_NUMLIT, TOKEN_LOC(hcl), TOKEN_NAME(hcl), | ||||
| 			"invalid digit in radixed number in %.*js", hcl->c->tok.name.len, hcl->c->tok.name.ptr); | ||||
| @ -763,19 +770,26 @@ static int get_sharp_token (hcl_t* hcl) | ||||
| 	GET_CHAR_TO (hcl, c); | ||||
|  | ||||
| 	/* | ||||
| 	 * #bBBBB binary | ||||
| 	 * #oOOOO octal | ||||
| 	 * #xXXXX hexadecimal | ||||
| 	 * #oOOOO octal | ||||
| 	 * #bBBBB binary | ||||
| 	 * #eDDD   error | ||||
| 	 * #pHHH   smptr | ||||
| 	 * #nil | ||||
| 	 * #true | ||||
| 	 * #false | ||||
| 	 * #include | ||||
| 	 * #\C      character | ||||
| 	 * #\xHHHH  unicode character | ||||
| 	 * #\UHHHH  unicode character | ||||
| 	 * #\uHHHH  unicode character | ||||
| 	 * #\backspace | ||||
| 	 * #\linefeed | ||||
| 	 * #\newline | ||||
| 	 * #\nul | ||||
| 	 * #\page | ||||
| 	 * #\return | ||||
| 	 * #\rubout | ||||
| 	 * #\space | ||||
| 	 * #\tab | ||||
| 	 * #\vtab | ||||
| 	 * #include | ||||
| 	 * #[ ]     byte array | ||||
| 	 * #( )     qlist | ||||
| 	 */ | ||||
| @ -809,7 +823,7 @@ static int get_sharp_token (hcl_t* hcl) | ||||
| 			ADD_TOKEN_CHAR (hcl, '\\'); | ||||
|  | ||||
| 			GET_CHAR_TO (hcl, c); | ||||
| 			if (is_delimiter(c)) | ||||
| 			if (is_delimchar(c)) | ||||
| 			{ | ||||
| 				hcl_setsynerrbfmt (hcl, HCL_SYNERR_CHARLIT, TOKEN_LOC(hcl), TOKEN_NAME(hcl), | ||||
| 					"no valid character after #\\ in %.*js", hcl->c->tok.name.len, hcl->c->tok.name.ptr); | ||||
| @ -822,7 +836,7 @@ static int get_sharp_token (hcl_t* hcl) | ||||
| 				ADD_TOKEN_CHAR (hcl, c); | ||||
| 				GET_CHAR_TO (hcl, c); | ||||
| 			} | ||||
| 			while (!is_delimiter(c)); | ||||
| 			while (!is_delimchar(c)); | ||||
|  | ||||
| 			if (TOKEN_NAME_LEN(hcl) >= 4) | ||||
| 			{ | ||||
| @ -849,10 +863,8 @@ static int get_sharp_token (hcl_t* hcl) | ||||
| 								"invalid hexadecimal character in %.*js", TOKEN_NAME_LEN(hcl), TOKEN_NAME_PTR(hcl)); | ||||
| 							return -1; | ||||
| 						} | ||||
|  | ||||
| 						c = c * 16 + CHAR_TO_NUM(hcl->c->tok.name.ptr[i], 16); /* don't care if it is for 'p' */ | ||||
| 					} | ||||
|  | ||||
| 				} | ||||
| 			#if (HCL_SIZEOF_OOCH_T >= 2) | ||||
| 				else if (TOKEN_NAME_CHAR(hcl, 2) == 'u') | ||||
| @ -868,26 +880,22 @@ static int get_sharp_token (hcl_t* hcl) | ||||
| 					goto hexcharlit; | ||||
| 				} | ||||
| 			#endif | ||||
| 				else if (does_token_name_match(hcl, VOCA_SPACE)) | ||||
| 				else if (does_token_name_match(hcl, VOCA_BACKSPACE)) | ||||
| 				{ | ||||
| 					c = ' '; | ||||
| 					c = '\b'; | ||||
| 				} | ||||
| 				else if (does_token_name_match(hcl, VOCA_LINEFEED)) | ||||
| 				{ | ||||
| 					c = '\n'; | ||||
| 				} | ||||
| 				else if (does_token_name_match(hcl, VOCA_NEWLINE)) | ||||
| 				{ | ||||
| 					/* TODO: convert it to host newline convention. how to handle if it's composed of 2 letters like \r\n? */ | ||||
| 					c = '\n'; | ||||
| 				} | ||||
| 				else if (does_token_name_match(hcl, VOCA_BACKSPACE)) | ||||
| 				else if (does_token_name_match(hcl, VOCA_NUL)) /* null character. not #nil */ | ||||
| 				{ | ||||
| 					c = '\b'; | ||||
| 				} | ||||
| 				else if (does_token_name_match(hcl, VOCA_TAB)) | ||||
| 				{ | ||||
| 					c = '\t'; | ||||
| 				} | ||||
| 				else if (does_token_name_match(hcl, VOCA_LINEFEED)) | ||||
| 				{ | ||||
| 					c = '\n'; | ||||
| 					c = '\0'; | ||||
| 				} | ||||
| 				else if (does_token_name_match(hcl, VOCA_PAGE)) | ||||
| 				{ | ||||
| @ -897,18 +905,22 @@ static int get_sharp_token (hcl_t* hcl) | ||||
| 				{ | ||||
| 					c = '\r'; | ||||
| 				} | ||||
| 				else if (does_token_name_match(hcl, VOCA_NUL)) /* null character. not #nil */ | ||||
| 				else if (does_token_name_match(hcl, VOCA_RUBOUT)) | ||||
| 				{ | ||||
| 					c = '\0'; | ||||
| 					c = '\x7F'; /* DEL */ | ||||
| 				} | ||||
| 				else if (does_token_name_match(hcl, VOCA_SPACE)) | ||||
| 				{ | ||||
| 					c = ' '; | ||||
| 				} | ||||
| 				else if (does_token_name_match(hcl, VOCA_TAB)) | ||||
| 				{ | ||||
| 					c = '\t'; | ||||
| 				} | ||||
| 				else if (does_token_name_match(hcl, VOCA_VTAB)) | ||||
| 				{ | ||||
| 					c = '\v'; | ||||
| 				} | ||||
| 				else if (does_token_name_match(hcl, VOCA_RUBOUT)) | ||||
| 				{ | ||||
| 					c = '\x7F'; /* DEL */ | ||||
| 				} | ||||
| 				else | ||||
| 				{ | ||||
| 					hcl_setsynerrbfmt (hcl, HCL_SYNERR_CHARLIT, TOKEN_LOC(hcl), TOKEN_NAME(hcl), | ||||
| @ -945,7 +957,7 @@ static int get_sharp_token (hcl_t* hcl) | ||||
| 			break; | ||||
|  | ||||
| 		default: | ||||
| 			if (is_delimiter(c)) | ||||
| 			if (is_delimchar(c)) | ||||
| 			{ | ||||
| 				/* EOF, whitespace, etc */ | ||||
| 				hcl_setsynerrbfmt (hcl, HCL_SYNERR_HASHLIT, TOKEN_LOC(hcl), TOKEN_NAME(hcl), | ||||
| @ -954,13 +966,12 @@ static int get_sharp_token (hcl_t* hcl) | ||||
| 			} | ||||
|  | ||||
| 			ADD_TOKEN_CHAR (hcl, '#'); | ||||
| 		long_name: | ||||
| 			do | ||||
| 			{ | ||||
| 				ADD_TOKEN_CHAR (hcl, c); | ||||
| 				GET_CHAR_TO (hcl, c); | ||||
| 			} | ||||
| 			while (!is_delimiter(c)); | ||||
| 			while (!is_delimchar(c)); | ||||
|  | ||||
| 			if (does_token_name_match(hcl, VOCA_INCLUDE)) | ||||
| 			{ | ||||
| @ -1275,7 +1286,7 @@ retry: | ||||
|  | ||||
| 		default: | ||||
| 		ident: | ||||
| 			if (is_delimiter(c)) | ||||
| 			if (is_delimchar(c)) | ||||
| 			{ | ||||
| 				hcl_setsynerrbfmt (hcl, HCL_SYNERR_ILCHR, TOKEN_LOC(hcl), HCL_NULL, "illegal character %jc encountered", c); | ||||
| 				return -1; | ||||
| @ -1304,7 +1315,7 @@ retry: | ||||
|  | ||||
| 				read_more_seg: | ||||
| 					GET_CHAR_TO (hcl, c); | ||||
| 					if (!is_delimiter(c)) | ||||
| 					if (!is_delimchar(c)) | ||||
| 					{ | ||||
| 						hcl_oow_t start; | ||||
| 						hcl_oocs_t seg; | ||||
| @ -1318,7 +1329,7 @@ retry: | ||||
| 							ADD_TOKEN_CHAR (hcl, c); | ||||
| 							GET_CHAR_TO (hcl, c); | ||||
| 						} | ||||
| 						while (!is_delimiter(c)); | ||||
| 						while (!is_delimchar(c)); | ||||
|  | ||||
| 						seg.ptr = &TOKEN_NAME_CHAR(hcl,start); | ||||
| 						seg.len = TOKEN_NAME_LEN(hcl) - start; | ||||
| @ -1340,7 +1351,7 @@ retry: | ||||
| 					} | ||||
| 					break; | ||||
| 				} | ||||
| 				else if (is_delimiter(c)) | ||||
| 				else if (is_delimchar(c)) | ||||
| 				{ | ||||
| 					unget_char (hcl, &hcl->c->lxc); | ||||
| 					break; | ||||
| @ -2412,3 +2423,580 @@ void hcl_detachio (hcl_t* hcl) | ||||
| 	} | ||||
| } | ||||
|  | ||||
|  | ||||
|  | ||||
| /* ---------------------------------------------------------------------- */ | ||||
|  | ||||
| static void init_feed (hcl_t* hcl) | ||||
| { | ||||
| 	hcl->c->feed.lx.state = HCL_FEED_LX_START; | ||||
| 	hcl->c->feed.lx.loc.line = 1; | ||||
| 	hcl->c->feed.lx.loc.colm = 1; | ||||
| 	hcl->c->feed.lx.loc.file = HCL_NULL; | ||||
|  | ||||
| 	hcl->c->feed.top = -1; | ||||
| } | ||||
|  | ||||
| static int push_feed_state (hcl_t* hcl, int code) | ||||
| { | ||||
| 	if (hcl->c->feed.top >= HCL_COUNTOF(hcl->c->feed.st) - 1) /* TODO: use a dynamically allocated stack? */ | ||||
| 	{ | ||||
| 		hcl_seterrbfmt (hcl, HCL_EBUFFULL, "feed state stack full");  | ||||
| 		return -1; | ||||
| 	} | ||||
|  | ||||
| 	hcl->c->feed.top++; | ||||
| 	HCL_MEMSET (&hcl->c->feed.st[hcl->c->feed.top], 0, HCL_SIZEOF(hcl->c->feed.st[hcl->c->feed.top])); | ||||
| 	hcl->c->feed.st[hcl->c->feed.top].code = code; | ||||
| 	return 0; | ||||
| } | ||||
|  | ||||
| static void pop_feed_state (hcl_t* hcl) | ||||
| { | ||||
| 	HCL_ASSERT (hcl, hcl->c->feed.top >= 0); | ||||
| 	hcl->c->feed.top--; | ||||
| } | ||||
|  | ||||
| struct delim_token_t | ||||
| { | ||||
| 	const char*      t_value; | ||||
| 	hcl_oow_t        t_len; | ||||
| 	hcl_iotok_type_t t_type; | ||||
| }; | ||||
| typedef struct delim_token_t delim_token_t; | ||||
|  | ||||
| static delim_token_t delim_token_tab[] = | ||||
| { | ||||
| 	/* [NOTE 1]  | ||||
| 	 *  if you add a new token, ensure the first character is listed in is_delimchar()  | ||||
| 	 *  | ||||
| 	 * [NOTE 2] | ||||
| 	 *  for the implementation limitation in find_delim_token_char(), | ||||
| 	 *  the entries in this table must be laid out in a certain way. | ||||
| 	 *  | ||||
| 	 *    Group the items with the same prefix together. | ||||
| 	 *    List the shorter before the longer items in the same group. | ||||
| 	 *    The length must not differ by greater than 1 between 2 items in the same group. | ||||
| 	 */ | ||||
|  | ||||
| 	{ "(",        1, HCL_IOTOK_LPAREN }, | ||||
| 	{ "(:",       2, HCL_IOTOK_LPARCOLON }, | ||||
| 	{ ")",        1, HCL_IOTOK_RPAREN }, | ||||
|  | ||||
| 	{ "[",        1, HCL_IOTOK_LBRACK }, | ||||
| 	{ "]",        1, HCL_IOTOK_RBRACK }, | ||||
| 	 | ||||
| 	{ "{",        1, HCL_IOTOK_LBRACE }, | ||||
| 	{ "}",        1, HCL_IOTOK_RBRACE }, | ||||
|  | ||||
| 	{ "|",        1, HCL_IOTOK_VBAR }, | ||||
| 	{ ",",        1, HCL_IOTOK_COMMA }, | ||||
|  | ||||
| 	{ ".",        1, HCL_IOTOK_DOT }, | ||||
| 	{ "..",       2, HCL_IOTOK_DBLDOTS }, | ||||
| 	{ "...",      3, HCL_IOTOK_ELLIPSIS }, | ||||
|  | ||||
| 	{ ":",        1, HCL_IOTOK_COLON }, | ||||
| 	{ "::",       2, HCL_IOTOK_DBLCOLONS }, | ||||
| 	{ "::*",      3, HCL_IOTOK_DCSTAR }, | ||||
| 	{ ":::",      3, HCL_IOTOK_TRPCOLONS  } | ||||
| }; | ||||
|  | ||||
| static int find_delim_token_char (hcl_t* hcl, const hcl_ooci_t c, int row_start, int row_end, int col, hcl_feed_dt_t* dt) | ||||
| { | ||||
| 	int found = 0, i; | ||||
|  | ||||
| 	for (i = row_start; i <= row_end; i++) | ||||
| 	{ | ||||
| //printf (">>> %d %d %d  col=>%d  c=>%jc\n", i, row_start, row_end, col, c); | ||||
| 		if (col < delim_token_tab[i].t_len && c == delim_token_tab[i].t_value[col])  | ||||
| 		{ | ||||
| //printf ("MATCH [%jc] [%jc]\n", c, delim_token_tab[i].t_value[col]); | ||||
| 			if (!found) dt->row_start = i; | ||||
| 			dt->row_end = i; | ||||
| 			found = 1; | ||||
| 		} | ||||
| 		else if (found) break; | ||||
| 	} | ||||
|  | ||||
| 	if (found) dt->col_next = col + 1; | ||||
| //printf ("**** return %d %d\n", dt->row_start, dt->row_end); | ||||
| 	return found; | ||||
| } | ||||
|  | ||||
| static HCL_INLINE int feed_wrap_up (hcl_t* hcl, hcl_iotok_type_t type) | ||||
| { | ||||
| 	SET_TOKEN_TYPE (hcl, type); | ||||
|  | ||||
| HCL_DEBUG4 (hcl, "TOKEN LEN %zu=>[%.*js] %d\n", TOKEN_NAME_LEN(hcl), TOKEN_NAME_LEN(hcl), TOKEN_NAME_PTR(hcl), TOKEN_TYPE(hcl)); | ||||
| /* TOOD: fire token callback or something */ | ||||
|  | ||||
| 	hcl->c->feed.lx.state = HCL_FEED_LX_START; | ||||
| 	return 0; | ||||
| } | ||||
|  | ||||
| static int feed_wrap_up_with_char (hcl_t* hcl, hcl_ooci_t c, hcl_iotok_type_t type) | ||||
| { | ||||
| 	ADD_TOKEN_CHAR (hcl, c); | ||||
| 	return feed_wrap_up(hcl, type); | ||||
| } | ||||
|  | ||||
| static int feed_wrap_up_with_str (hcl_t* hcl, const hcl_ooch_t* str, hcl_oow_t len, hcl_iotok_type_t type) | ||||
| { | ||||
| 	ADD_TOKEN_STR (hcl, str, len); | ||||
| 	return feed_wrap_up(hcl, type); | ||||
| } | ||||
|  | ||||
| static int feed_continue (hcl_t* hcl, hcl_feed_lx_state_t state) | ||||
| { | ||||
| 	hcl->c->feed.lx.state = state; | ||||
| 	return 0; | ||||
| } | ||||
|  | ||||
| static int feed_continue_with_char (hcl_t* hcl, hcl_ooci_t c, hcl_feed_lx_state_t state) | ||||
| { | ||||
| 	ADD_TOKEN_CHAR (hcl, c); | ||||
| 	hcl->c->feed.lx.state = state; | ||||
| 	return 0; | ||||
| } | ||||
|  | ||||
|  | ||||
|  | ||||
| #define FEED_WRAP_UP(hcl, type) do { if (feed_wrap_up(hcl, type) <= -1) return -1; } while(0) | ||||
| #define FEED_WRAP_UP_WITH_CHAR(hcl, c, type) do { if (feed_wrap_up_with_char(hcl, c, type) <= -1) return -1; } while(0) | ||||
| #define FEED_WRAP_UP_WITH_CHARS(hcl, str, len, type) do { if (feed_wrap_up_with_str(hcl, str, len, type) <= -1) return -1; } while(0) | ||||
| #define FEED_CONTINUE(hcl, state) do { if (feed_continue(hcl, state) <= -1) return -1; } while(0) | ||||
| #define FEED_CONTINUE_WITH_CHAR(hcl, c, state) do { if (feed_continue_with_char(hcl, c, state) <= -1) return -1; } while(0) | ||||
|  | ||||
| #define FEED_LX_STATE(hcl) ((hcl)->c->feed.lx.state) | ||||
| #define FEED_LX_LOC(hcl) (&((hcl)->c->feed.lx.loc)) | ||||
|  | ||||
| static int feed_lx_start (hcl_t* hcl, hcl_ooci_t c) | ||||
| { | ||||
| 	HCL_ASSERT (hcl, FEED_LX_STATE(hcl) == HCL_FEED_LX_START); | ||||
|  | ||||
| 	/* clear the token name, reset its location */ | ||||
| 	SET_TOKEN_TYPE (hcl, HCL_IOTOK_EOF); /* is it correct? */ | ||||
| 	CLEAR_TOKEN_NAME (hcl); | ||||
|  | ||||
| //HCL_DEBUG1 (hcl, "XXX[%jc]\n", c); | ||||
| 	if (find_delim_token_char(hcl, c, 0, HCL_COUNTOF(delim_token_tab) - 1, 0, &hcl->c->feed.dt))  | ||||
| 	{ | ||||
| 		/* the character is one of the first character of a delimiter token */ | ||||
| 		if (hcl->c->feed.dt.row_start == hcl->c->feed.dt.row_end && hcl->c->feed.dt.col_next == delim_token_tab[hcl->c->feed.dt.row_start].t_len) | ||||
| 		{ | ||||
| 			FEED_WRAP_UP_WITH_CHAR (hcl, c, delim_token_tab[hcl->c->feed.dt.row_start].t_type); | ||||
| 		} | ||||
| 		else | ||||
| 		{ | ||||
| 			FEED_CONTINUE_WITH_CHAR (hcl, c, HCL_FEED_LX_DELIM_TOKEN); /* consume c and move to HCL_FEED_LX_DELIM_TOKEN state */ | ||||
| 		} | ||||
| 		goto consumed; | ||||
| 	} | ||||
|  | ||||
| 	switch (c) | ||||
| 	{ | ||||
| 		case HCL_OOCI_EOF: | ||||
| 		{ | ||||
| 			int n; | ||||
| #if 0 | ||||
| 			n = end_include(hcl); | ||||
| 			if (n <= -1) return -1; | ||||
| 			if (n >= 1) goto retry; | ||||
| #endif | ||||
| 			FEED_WRAP_UP_WITH_CHARS (hcl, vocas[VOCA_EOF].str, vocas[VOCA_EOF].len, HCL_IOTOK_EOF); | ||||
| 			break; | ||||
| 		} | ||||
|  | ||||
| 		case ';': | ||||
| 			FEED_CONTINUE_WITH_CHAR (hcl, c, HCL_FEED_LX_COMMENT); | ||||
| 			break; | ||||
|  | ||||
| 		case '#': | ||||
| 			FEED_CONTINUE_WITH_CHAR (hcl, c, HCL_FEED_LX_SHARP_TOKEN); | ||||
| 			break; | ||||
|  | ||||
| #if 0 | ||||
| 		case '\"': | ||||
| 			if (get_string(hcl, '\"', '\\', 0, 0) <= -1) return -1; | ||||
| 			break; | ||||
|  | ||||
| 		case '\'': | ||||
| 			if (get_string(hcl, '\'', '\\', 0, 0) <= -1) return -1; | ||||
| 			if (hcl->c->tok.name.len != 1) | ||||
| 			{ | ||||
| 				hcl_setsynerr (hcl, HCL_SYNERR_CHARLIT, TOKEN_LOC(hcl), TOKEN_NAME(hcl)); | ||||
| 				return -1; | ||||
| 			} | ||||
| 			SET_TOKEN_TYPE (hcl, HCL_IOTOK_CHARLIT); | ||||
| 			break; | ||||
|  | ||||
| 		case '#': | ||||
| 			if (get_sharp_token(hcl) <= -1) return -1; | ||||
| 			break; | ||||
|  | ||||
| 		case '+': | ||||
| 		case '-': | ||||
| 			oldc = c; | ||||
| 			GET_CHAR_TO (hcl, c); | ||||
| 			if(is_digitchar(c)) | ||||
| 			{ | ||||
| 				unget_char (hcl, &hcl->c->lxc); | ||||
| 				c = oldc; | ||||
| 				goto numlit; | ||||
| 			} | ||||
| 			else if (c == '#') | ||||
| 			{ | ||||
| 				int radix; | ||||
| 				hcl_iolxc_t sharp; | ||||
|  | ||||
| 				sharp = hcl->c->lxc; /* back up '#' */ | ||||
|  | ||||
| 				GET_CHAR_TO (hcl, c); | ||||
| 				switch (c) | ||||
| 				{ | ||||
| 					case 'b': | ||||
| 						radix = 2; | ||||
| 						goto radnumlit; | ||||
| 					case 'o': | ||||
| 						radix = 8; | ||||
| 						goto radnumlit; | ||||
| 					case 'x': | ||||
| 						radix = 16; | ||||
| 					radnumlit: | ||||
| 						ADD_TOKEN_CHAR (hcl, oldc); | ||||
| 						if (get_radix_number(hcl, c, radix) <= -1) return -1; | ||||
| 						break; | ||||
|  | ||||
| 					default: | ||||
| 						unget_char (hcl, &hcl->c->lxc); | ||||
| 						unget_char (hcl, &sharp); | ||||
| 						c = oldc; | ||||
| 						goto ident; | ||||
| 				} | ||||
| 			} | ||||
| 			else | ||||
| 			{ | ||||
| 				unget_char (hcl, &hcl->c->lxc); | ||||
| 				c = oldc; | ||||
| 				goto ident; | ||||
| 			} | ||||
| 			break; | ||||
|  | ||||
| 		case '0': case '1': case '2': case '3': case '4': | ||||
| 		case '5': case '6': case '7': case '8': case '9': | ||||
| 		numlit: | ||||
| 			SET_TOKEN_TYPE (hcl, HCL_IOTOK_NUMLIT); | ||||
| 			while (1) | ||||
| 			{ | ||||
| 				ADD_TOKEN_CHAR (hcl, c); | ||||
| 				GET_CHAR_TO (hcl, c); | ||||
| 				if (TOKEN_TYPE(hcl) == HCL_IOTOK_NUMLIT && c == '.') | ||||
| 				{ | ||||
| 					SET_TOKEN_TYPE (hcl, HCL_IOTOK_FPDECLIT); | ||||
| 					ADD_TOKEN_CHAR (hcl, c); | ||||
| 					GET_CHAR_TO (hcl, c); | ||||
| 					if (!is_digitchar(c)) | ||||
| 					{ | ||||
| 						/* the first character after the decimal point is not a decimal digit */ | ||||
| 						hcl_setsynerrbfmt (hcl, HCL_SYNERR_NUMLIT, TOKEN_LOC(hcl), TOKEN_NAME(hcl), "invalid numeric literal with no digit after decimal point"); | ||||
| 						return -1; | ||||
| 					} | ||||
| 				} | ||||
|  | ||||
| 				if (!is_digitchar(c)) | ||||
| 				{ | ||||
| 					unget_char (hcl, &hcl->c->lxc); | ||||
| 					break; | ||||
| 				} | ||||
| 			} | ||||
|  | ||||
| 			break; | ||||
|  | ||||
| 		default: | ||||
| 		ident: | ||||
| 			if (is_delimchar(c)) | ||||
| 			{ | ||||
| 				hcl_setsynerrbfmt (hcl, HCL_SYNERR_ILCHR, TOKEN_LOC(hcl), HCL_NULL, "illegal character %jc encountered", c); | ||||
| 				return -1; | ||||
| 			} | ||||
|  | ||||
| 			SET_TOKEN_TYPE (hcl, HCL_IOTOK_IDENT); | ||||
| 			while (1) | ||||
| 			{ | ||||
| 				ADD_TOKEN_CHAR (hcl, c); | ||||
| 				GET_CHAR_TO (hcl, c); | ||||
|  | ||||
| 				if (c == '.') | ||||
| 				{ | ||||
| 					hcl_iolxc_t period; | ||||
| 					hcl_iotok_type_t type; | ||||
|  | ||||
| 					type = classify_ident_token(hcl, TOKEN_NAME(hcl)); | ||||
| 					if (type != HCL_IOTOK_IDENT) | ||||
| 					{ | ||||
| 						SET_TOKEN_TYPE (hcl, type); | ||||
| 						unget_char (hcl, &hcl->c->lxc); | ||||
| 						break; | ||||
| 					} | ||||
|  | ||||
| 					period = hcl->c->lxc; | ||||
|  | ||||
| 				read_more_seg: | ||||
| 					GET_CHAR_TO (hcl, c); | ||||
| 					if (!is_delimchar(c)) | ||||
| 					{ | ||||
| 						hcl_oow_t start; | ||||
| 						hcl_oocs_t seg; | ||||
|  | ||||
| 						SET_TOKEN_TYPE (hcl, HCL_IOTOK_IDENT_DOTTED); | ||||
| 						ADD_TOKEN_CHAR (hcl, '.'); | ||||
|  | ||||
| 						start = TOKEN_NAME_LEN(hcl); | ||||
| 						do | ||||
| 						{ | ||||
| 							ADD_TOKEN_CHAR (hcl, c); | ||||
| 							GET_CHAR_TO (hcl, c); | ||||
| 						} | ||||
| 						while (!is_delimchar(c)); | ||||
|  | ||||
| 						seg.ptr = &TOKEN_NAME_CHAR(hcl,start); | ||||
| 						seg.len = TOKEN_NAME_LEN(hcl) - start; | ||||
| 						if (classify_ident_token(hcl, &seg) != HCL_IOTOK_IDENT) | ||||
| 						{ | ||||
| 							hcl_setsynerr (hcl, HCL_SYNERR_MSEGIDENT, TOKEN_LOC(hcl), TOKEN_NAME(hcl)); | ||||
| 							return -1; | ||||
| 						} | ||||
|  | ||||
| 						if (c == '.') goto read_more_seg; | ||||
|  | ||||
| 						unget_char (hcl, &hcl->c->lxc); | ||||
| 						break; | ||||
| 					} | ||||
| 					else | ||||
| 					{ | ||||
| 						unget_char (hcl, &hcl->c->lxc); | ||||
| 						unget_char (hcl, &period); | ||||
| 					} | ||||
| 					break; | ||||
| 				} | ||||
| 				else if (is_delimchar(c)) | ||||
| 				{ | ||||
| 					unget_char (hcl, &hcl->c->lxc); | ||||
| 					break; | ||||
| 				} | ||||
| 			} | ||||
|  | ||||
| 			if (TOKEN_TYPE(hcl) == HCL_IOTOK_IDENT) | ||||
| 			{ | ||||
| 				hcl_iotok_type_t type; | ||||
| 				type = classify_ident_token(hcl, TOKEN_NAME(hcl)); | ||||
| 				SET_TOKEN_TYPE (hcl, type); | ||||
| 			} | ||||
| 			break; | ||||
| #endif | ||||
| 	} | ||||
|  | ||||
| consumed: | ||||
| 	return 1; | ||||
|  | ||||
| not_consumed: | ||||
| 	return 0; | ||||
| } | ||||
|  | ||||
| static int feed_lx_delim_token (hcl_t* hcl, hcl_ooci_t c) | ||||
| { | ||||
| 	if (find_delim_token_char(hcl, c, hcl->c->feed.dt.row_start, hcl->c->feed.dt.row_end, hcl->c->feed.dt.col_next, &hcl->c->feed.dt))  | ||||
| 	{ | ||||
| 		if (hcl->c->feed.dt.row_start == hcl->c->feed.dt.row_end && hcl->c->feed.dt.col_next == delim_token_tab[hcl->c->feed.dt.row_start].t_len) | ||||
| 		{ | ||||
| 			/* complete token and switch to the HCL_FEED_LX_START state */ | ||||
| 			FEED_WRAP_UP_WITH_CHAR (hcl, c, delim_token_tab[hcl->c->feed.dt.row_start].t_type);  | ||||
| 		} | ||||
| 		else | ||||
| 		{ | ||||
| 			ADD_TOKEN_CHAR(hcl, c); | ||||
| 		} | ||||
| 		goto consumed; | ||||
| 	} | ||||
| 	else | ||||
| 	{ | ||||
| 		/* the longest match so far */ | ||||
| 		FEED_WRAP_UP(hcl, delim_token_tab[hcl->c->feed.dt.row_start].t_type);  | ||||
| 		goto not_consumed; | ||||
| 	} | ||||
|  | ||||
| consumed: | ||||
| 	return 1; | ||||
|  | ||||
| not_consumed: | ||||
| 	return 0; | ||||
| } | ||||
|  | ||||
| static int feed_lx_comment (hcl_t* hcl, hcl_ooci_t c) | ||||
| { | ||||
| 	if (is_linebreak(c)) FEED_CONTINUE (hcl, HCL_FEED_LX_START); | ||||
| 	return 1; /* consumed */ | ||||
| } | ||||
|  | ||||
| static int feed_lx_sharp_token (hcl_t* hcl, hcl_ooci_t c) | ||||
| { | ||||
| 	/* | ||||
| 	 * #xXXXX hexadecimal | ||||
| 	 * #oOOOO octal | ||||
| 	 * #bBBBB binary | ||||
| 	 * #eDDD   error | ||||
| 	 * #pHHH   smptr | ||||
| 	 * #\C      character | ||||
| 	 * #\xHHHH  unicode character | ||||
| 	 * #\UHHHH  unicode character | ||||
| 	 * #\uHHHH  unicode character | ||||
| 	 * #\backspace | ||||
| 	 * #\linefeed | ||||
| 	 * #\newline | ||||
| 	 * #\nul | ||||
| 	 * #\page | ||||
| 	 * #\return | ||||
| 	 * #\rubout | ||||
| 	 * #\space | ||||
| 	 * #\tab | ||||
| 	 * #\vtab | ||||
| 	 * #include | ||||
| 	 * #[ ]     byte array | ||||
| 	 * #( )     qlist | ||||
| 	 */ | ||||
|  | ||||
| 	switch (c) | ||||
| 	{ | ||||
| 		case '#': | ||||
| 		case '!': | ||||
| 			/* ## comment start | ||||
| 			 * #! also comment start. | ||||
| 			 * ; comment start */ | ||||
| 			FEED_CONTINUE_WITH_CHAR (hcl, c, HCL_FEED_LX_COMMENT); | ||||
| 			goto consumed; | ||||
|  | ||||
| 		case '[': | ||||
| 			FEED_WRAP_UP_WITH_CHAR (hcl, c, HCL_IOTOK_BAPAREN); | ||||
| 			goto consumed; | ||||
|  | ||||
| 		case '(': | ||||
| 			FEED_WRAP_UP_WITH_CHAR (hcl, c, HCL_IOTOK_QLPAREN); | ||||
| 			goto consumed; | ||||
|  | ||||
| 		default: | ||||
| // TODO: fix this part | ||||
| 			if (is_spacechar(c) || c == HCL_UCI_EOF) | ||||
| 				hcl_setsynerrbfmt (hcl, HCL_SYNERR_HASHLIT, FEED_LX_LOC(hcl), HCL_NULL, | ||||
| 					"no character after the hash sign"); | ||||
| 			else | ||||
| 				hcl_setsynerrbfmt (hcl, HCL_SYNERR_HASHLIT, FEED_LX_LOC(hcl), HCL_NULL, | ||||
| 					"invalid character after the hash sign - %jc", c); | ||||
| 			return -1; | ||||
| 	} | ||||
|  | ||||
| consumed: | ||||
| 	return 1; | ||||
|  | ||||
| not_consumed: | ||||
| 	return 0; | ||||
| } | ||||
|  | ||||
| static int feed_char (hcl_t* hcl, hcl_ooci_t c) | ||||
| { | ||||
| /* TODO: track line number and column number? */ | ||||
| 	switch (FEED_LX_STATE(hcl)) | ||||
| 	{ | ||||
| 		case HCL_FEED_LX_START:       return feed_lx_start(hcl, c); | ||||
| 		case HCL_FEED_LX_DELIM_TOKEN: return feed_lx_delim_token(hcl, c); | ||||
| 		case HCL_FEED_LX_COMMENT:     return feed_lx_comment(hcl, c); | ||||
| 		case HCL_FEED_LX_SHARP_TOKEN: return feed_lx_sharp_token(hcl, c); | ||||
|  | ||||
| /* | ||||
| 		case HCL_FEED_LX_DQSTR: | ||||
|  			return feed_lx_dqstr(hcl, c); | ||||
|  | ||||
| 		case HCL_FEED_LX_SQSTR: | ||||
| 			return feed_lxsqstr(hcl, c); | ||||
|  | ||||
| 		case HCL_FEED_LX_COMMENT: | ||||
| 			break; | ||||
|  | ||||
| 		case HCL_FEED_LX_CSTR: | ||||
| 			break; | ||||
|  | ||||
| 		case HCL_FEED_LX_DIRECTIVE: | ||||
| 			break; | ||||
| */ | ||||
|  | ||||
| 		default: | ||||
| 			/* INVALID STATE */ | ||||
| 			break; | ||||
| 	} | ||||
|  | ||||
|  | ||||
| 	return 0; | ||||
| } | ||||
|  | ||||
| int hcl_feed (hcl_t* hcl, const hcl_ooch_t* data, hcl_oow_t len) | ||||
| { | ||||
| /* TODO: need to return the number of processed characters? | ||||
|  *       need to stop after the first complete expression? */ | ||||
|  | ||||
| 	hcl_oow_t i; | ||||
| 	int x; | ||||
|  | ||||
| 	if (data)  | ||||
| 	{ | ||||
| 		for (i = 0; i < len; )  | ||||
| 		{ | ||||
| 			x = feed_char(hcl, data[i]); | ||||
| 			if (x <= -1) return -1; | ||||
| 			i += x; | ||||
| 			if (x > 0) | ||||
| 			{ | ||||
| 				if (is_linebreak(data[i])) | ||||
| 				{ | ||||
| 					hcl->c->feed.lx.loc.line++; | ||||
| 					hcl->c->feed.lx.loc.colm = 1; | ||||
| 				} | ||||
| 				else | ||||
| 				{ | ||||
| 					hcl->c->feed.lx.loc.colm++; | ||||
| 				} | ||||
| 			} | ||||
| 		} | ||||
| 	} | ||||
| 	else | ||||
| 	{ | ||||
| 		for (i = 0; i < 1;) | ||||
| 		{ | ||||
| 			x = feed_char(hcl, HCL_OOCI_EOF); | ||||
| 			if (x <= -1) return -1; | ||||
| 			i += x; | ||||
| 		} | ||||
| 	} | ||||
|  | ||||
| 	return 0; | ||||
| } | ||||
|  | ||||
|  | ||||
| /* | ||||
| hcl_setopt (ON_EXPRESSION CALLBACK??? ); | ||||
|  | ||||
|  | ||||
|  | ||||
| hcl_feed (hcl, "(hello) (10)", 12); | ||||
| 	> on_token | ||||
| 	> on_expression | ||||
| 	> on_eof | ||||
|  | ||||
| default callback for on_expression? | ||||
| 	compile | ||||
| 	execute??/ if in the interactive mode? (say it's used as a network protocol. execute each expression when received....) | ||||
|  | ||||
| default callback for on_eof? | ||||
|  	execute or terminate? | ||||
|  | ||||
|  | ||||
| */ | ||||
|  | ||||
		Reference in New Issue
	
	Block a user