implementing hcl_feed()

2022-05-25 14:23:43 +00:00
parent 982a7c1592
commit 60c9fe8ac9
4 changed files with 706 additions and 55 deletions
--- a/lib/read.c
+++ b/lib/read.c
@ -289,6 +289,12 @@ static HCL_INLINE int is_spacechar (hcl_ooci_t c)
 	}
 }

+static HCL_INLINE int is_linebreak (hcl_ooci_t c)
+{
+	/* TODO: different line end conventions? */
+	return c == '\n';
+}
+
 static HCL_INLINE int is_alphachar (hcl_ooci_t c)
 {
 /* TODO: support full unicode */
@ -313,11 +319,12 @@ static HCL_INLINE int is_alnumchar (hcl_ooci_t c)
 	return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9');
 }

-static HCL_INLINE int is_delimiter (hcl_ooci_t c)
+static HCL_INLINE int is_delimchar (hcl_ooci_t c)
 {
 	return c == '(' || c == ')' || c == '[' || c == ']' || c == '{' || c == '}' ||
-	       c == '\"' || c == '\'' || c == '#' || c == ';' || c == '|' || c == '.' ||
-	       c == ',' || c == ':' || is_spacechar(c) || c == HCL_UCI_EOF;
+	       c == ';' || c == '|' || c == ',' || c == '.' || c == ':' || 
+	       /* the first characters of tokens in delim_token_tab up to this point */
+	       c == '#'  || c == '\"' || c == '\'' || is_spacechar(c) || c == HCL_UCI_EOF;
 }

 static int copy_string_to (hcl_t* hcl, const hcl_oocs_t* src, hcl_oocs_t* dst, hcl_oow_t* dst_capa, int append, hcl_ooch_t add_delim)
@ -733,14 +740,14 @@ static int get_radix_number (hcl_t* hcl, hcl_ooci_t rc, int radix)
 	}
 	while (CHAR_TO_NUM(c, radix) < radix);

-	if (!is_delimiter(c))
+	if (!is_delimchar(c))
 	{
 		do
 		{
 			ADD_TOKEN_CHAR(hcl, c);
 			GET_CHAR_TO (hcl, c);
 		}
-		while (!is_delimiter(c));
+		while (!is_delimchar(c));

 		hcl_setsynerrbfmt (hcl, HCL_SYNERR_NUMLIT, TOKEN_LOC(hcl), TOKEN_NAME(hcl),
 			"invalid digit in radixed number in %.*js", hcl->c->tok.name.len, hcl->c->tok.name.ptr);
@ -763,19 +770,26 @@ static int get_sharp_token (hcl_t* hcl)
 	GET_CHAR_TO (hcl, c);

 	/*
-	 * #bBBBB binary
-	 * #oOOOO octal
 	 * #xXXXX hexadecimal
+	 * #oOOOO octal
+	 * #bBBBB binary
 	 * #eDDD   error
 	 * #pHHH   smptr
-	 * #nil
-	 * #true
-	 * #false
-	 * #include
 	 * #\C      character
 	 * #\xHHHH  unicode character
 	 * #\UHHHH  unicode character
 	 * #\uHHHH  unicode character
+	 * #\backspace
+	 * #\linefeed
+	 * #\newline
+	 * #\nul
+	 * #\page
+	 * #\return
+	 * #\rubout
+	 * #\space
+	 * #\tab
+	 * #\vtab
+	 * #include
 	 * #[ ]     byte array
 	 * #( )     qlist
 	 */
@ -809,7 +823,7 @@ static int get_sharp_token (hcl_t* hcl)
 			ADD_TOKEN_CHAR (hcl, '\\');

 			GET_CHAR_TO (hcl, c);
-			if (is_delimiter(c))
+			if (is_delimchar(c))
 			{
 				hcl_setsynerrbfmt (hcl, HCL_SYNERR_CHARLIT, TOKEN_LOC(hcl), TOKEN_NAME(hcl),
 					"no valid character after #\\ in %.*js", hcl->c->tok.name.len, hcl->c->tok.name.ptr);
@ -822,7 +836,7 @@ static int get_sharp_token (hcl_t* hcl)
 				ADD_TOKEN_CHAR (hcl, c);
 				GET_CHAR_TO (hcl, c);
 			}
-			while (!is_delimiter(c));
+			while (!is_delimchar(c));

 			if (TOKEN_NAME_LEN(hcl) >= 4)
 			{
@ -849,10 +863,8 @@ static int get_sharp_token (hcl_t* hcl)
 								"invalid hexadecimal character in %.*js", TOKEN_NAME_LEN(hcl), TOKEN_NAME_PTR(hcl));
 							return -1;
 						}
-
 						c = c * 16 + CHAR_TO_NUM(hcl->c->tok.name.ptr[i], 16); /* don't care if it is for 'p' */
 					}
-
 				}
 			#if (HCL_SIZEOF_OOCH_T >= 2)
 				else if (TOKEN_NAME_CHAR(hcl, 2) == 'u')
@ -868,26 +880,22 @@ static int get_sharp_token (hcl_t* hcl)
 					goto hexcharlit;
 				}
 			#endif
-				else if (does_token_name_match(hcl, VOCA_SPACE))
+				else if (does_token_name_match(hcl, VOCA_BACKSPACE))
 				{
-					c = ' ';
+					c = '\b';
+				}
+				else if (does_token_name_match(hcl, VOCA_LINEFEED))
+				{
+					c = '\n';
 				}
 				else if (does_token_name_match(hcl, VOCA_NEWLINE))
 				{
 					/* TODO: convert it to host newline convention. how to handle if it's composed of 2 letters like \r\n? */
 					c = '\n';
 				}
-				else if (does_token_name_match(hcl, VOCA_BACKSPACE))
+				else if (does_token_name_match(hcl, VOCA_NUL)) /* null character. not #nil */
 				{
-					c = '\b';
-				}
-				else if (does_token_name_match(hcl, VOCA_TAB))
-				{
-					c = '\t';
-				}
-				else if (does_token_name_match(hcl, VOCA_LINEFEED))
-				{
-					c = '\n';
+					c = '\0';
 				}
 				else if (does_token_name_match(hcl, VOCA_PAGE))
 				{
@ -897,18 +905,22 @@ static int get_sharp_token (hcl_t* hcl)
 				{
 					c = '\r';
 				}
-				else if (does_token_name_match(hcl, VOCA_NUL)) /* null character. not #nil */
+				else if (does_token_name_match(hcl, VOCA_RUBOUT))
 				{
-					c = '\0';
+					c = '\x7F'; /* DEL */
+				}
+				else if (does_token_name_match(hcl, VOCA_SPACE))
+				{
+					c = ' ';
+				}
+				else if (does_token_name_match(hcl, VOCA_TAB))
+				{
+					c = '\t';
 				}
 				else if (does_token_name_match(hcl, VOCA_VTAB))
 				{
 					c = '\v';
 				}
-				else if (does_token_name_match(hcl, VOCA_RUBOUT))
-				{
-					c = '\x7F'; /* DEL */
-				}
 				else
 				{
 					hcl_setsynerrbfmt (hcl, HCL_SYNERR_CHARLIT, TOKEN_LOC(hcl), TOKEN_NAME(hcl),
@ -945,7 +957,7 @@ static int get_sharp_token (hcl_t* hcl)
 			break;

 		default:
-			if (is_delimiter(c))
+			if (is_delimchar(c))
 			{
 				/* EOF, whitespace, etc */
 				hcl_setsynerrbfmt (hcl, HCL_SYNERR_HASHLIT, TOKEN_LOC(hcl), TOKEN_NAME(hcl),
@ -954,15 +966,14 @@ static int get_sharp_token (hcl_t* hcl)
 			}

 			ADD_TOKEN_CHAR (hcl, '#');
-		long_name:
 			do
 			{
 				ADD_TOKEN_CHAR (hcl, c);
 				GET_CHAR_TO (hcl, c);
 			}
-			while (!is_delimiter(c));
+			while (!is_delimchar(c));

-			if (does_token_name_match (hcl, VOCA_INCLUDE))
+			if (does_token_name_match(hcl, VOCA_INCLUDE))
 			{
 				SET_TOKEN_TYPE (hcl, HCL_IOTOK_INCLUDE);
 			}
@ -1275,7 +1286,7 @@ retry:

 		default:
 		ident:
-			if (is_delimiter(c))
+			if (is_delimchar(c))
 			{
 				hcl_setsynerrbfmt (hcl, HCL_SYNERR_ILCHR, TOKEN_LOC(hcl), HCL_NULL, "illegal character %jc encountered", c);
 				return -1;
@ -1304,7 +1315,7 @@ retry:

 				read_more_seg:
 					GET_CHAR_TO (hcl, c);
-					if (!is_delimiter(c))
+					if (!is_delimchar(c))
 					{
 						hcl_oow_t start;
 						hcl_oocs_t seg;
@ -1318,7 +1329,7 @@ retry:
 							ADD_TOKEN_CHAR (hcl, c);
 							GET_CHAR_TO (hcl, c);
 						}
-						while (!is_delimiter(c));
+						while (!is_delimchar(c));

 						seg.ptr = &TOKEN_NAME_CHAR(hcl,start);
 						seg.len = TOKEN_NAME_LEN(hcl) - start;
@ -1340,7 +1351,7 @@ retry:
 					}
 					break;
 				}
-				else if (is_delimiter(c))
+				else if (is_delimchar(c))
 				{
 					unget_char (hcl, &hcl->c->lxc);
 					break;
@ -2412,3 +2423,580 @@ void hcl_detachio (hcl_t* hcl)
 	}
 }

+
+
+/* ---------------------------------------------------------------------- */
+
+static void init_feed (hcl_t* hcl)
+{
+	hcl->c->feed.lx.state = HCL_FEED_LX_START;
+	hcl->c->feed.lx.loc.line = 1;
+	hcl->c->feed.lx.loc.colm = 1;
+	hcl->c->feed.lx.loc.file = HCL_NULL;
+
+	hcl->c->feed.top = -1;
+}
+
+static int push_feed_state (hcl_t* hcl, int code)
+{
+	if (hcl->c->feed.top >= HCL_COUNTOF(hcl->c->feed.st) - 1) /* TODO: use a dynamically allocated stack? */
+	{
+		hcl_seterrbfmt (hcl, HCL_EBUFFULL, "feed state stack full"); 
+		return -1;
+	}
+
+	hcl->c->feed.top++;
+	HCL_MEMSET (&hcl->c->feed.st[hcl->c->feed.top], 0, HCL_SIZEOF(hcl->c->feed.st[hcl->c->feed.top]));
+	hcl->c->feed.st[hcl->c->feed.top].code = code;
+	return 0;
+}
+
+static void pop_feed_state (hcl_t* hcl)
+{
+	HCL_ASSERT (hcl, hcl->c->feed.top >= 0);
+	hcl->c->feed.top--;
+}
+
+struct delim_token_t
+{
+	const char*      t_value;
+	hcl_oow_t        t_len;
+	hcl_iotok_type_t t_type;
+};
+typedef struct delim_token_t delim_token_t;
+
+static delim_token_t delim_token_tab[] =
+{
+	/* [NOTE 1] 
+	 *  if you add a new token, ensure the first character is listed in is_delimchar() 
+	 * 
+	 * [NOTE 2]
+	 *  for the implementation limitation in find_delim_token_char(),
+	 *  the entries in this table must be laid out in a certain way.
+	 * 
+	 *    Group the items with the same prefix together.
+	 *    List the shorter before the longer items in the same group.
+	 *    The length must not differ by greater than 1 between 2 items in the same group.
+	 */
+
+	{ "(",        1, HCL_IOTOK_LPAREN },
+	{ "(:",       2, HCL_IOTOK_LPARCOLON },
+	{ ")",        1, HCL_IOTOK_RPAREN },
+
+	{ "[",        1, HCL_IOTOK_LBRACK },
+	{ "]",        1, HCL_IOTOK_RBRACK },
+	
+	{ "{",        1, HCL_IOTOK_LBRACE },
+	{ "}",        1, HCL_IOTOK_RBRACE },
+
+	{ "|",        1, HCL_IOTOK_VBAR },
+	{ ",",        1, HCL_IOTOK_COMMA },
+
+	{ ".",        1, HCL_IOTOK_DOT },
+	{ "..",       2, HCL_IOTOK_DBLDOTS },
+	{ "...",      3, HCL_IOTOK_ELLIPSIS },
+
+	{ ":",        1, HCL_IOTOK_COLON },
+	{ "::",       2, HCL_IOTOK_DBLCOLONS },
+	{ "::*",      3, HCL_IOTOK_DCSTAR },
+	{ ":::",      3, HCL_IOTOK_TRPCOLONS  }
+};
+
+static int find_delim_token_char (hcl_t* hcl, const hcl_ooci_t c, int row_start, int row_end, int col, hcl_feed_dt_t* dt)
+{
+	int found = 0, i;
+
+	for (i = row_start; i <= row_end; i++)
+	{
+//printf (">>> %d %d %d  col=>%d  c=>%jc\n", i, row_start, row_end, col, c);
+		if (col < delim_token_tab[i].t_len && c == delim_token_tab[i].t_value[col]) 
+		{
+//printf ("MATCH [%jc] [%jc]\n", c, delim_token_tab[i].t_value[col]);
+			if (!found) dt->row_start = i;
+			dt->row_end = i;
+			found = 1;
+		}
+		else if (found) break;
+	}
+
+	if (found) dt->col_next = col + 1;
+//printf ("**** return %d %d\n", dt->row_start, dt->row_end);
+	return found;
+}
+
+static HCL_INLINE int feed_wrap_up (hcl_t* hcl, hcl_iotok_type_t type)
+{
+	SET_TOKEN_TYPE (hcl, type);
+
+HCL_DEBUG4 (hcl, "TOKEN LEN %zu=>[%.*js] %d\n", TOKEN_NAME_LEN(hcl), TOKEN_NAME_LEN(hcl), TOKEN_NAME_PTR(hcl), TOKEN_TYPE(hcl));
+/* TOOD: fire token callback or something */
+
+	hcl->c->feed.lx.state = HCL_FEED_LX_START;
+	return 0;
+}
+
+static int feed_wrap_up_with_char (hcl_t* hcl, hcl_ooci_t c, hcl_iotok_type_t type)
+{
+	ADD_TOKEN_CHAR (hcl, c);
+	return feed_wrap_up(hcl, type);
+}
+
+static int feed_wrap_up_with_str (hcl_t* hcl, const hcl_ooch_t* str, hcl_oow_t len, hcl_iotok_type_t type)
+{
+	ADD_TOKEN_STR (hcl, str, len);
+	return feed_wrap_up(hcl, type);
+}
+
+static int feed_continue (hcl_t* hcl, hcl_feed_lx_state_t state)
+{
+	hcl->c->feed.lx.state = state;
+	return 0;
+}
+
+static int feed_continue_with_char (hcl_t* hcl, hcl_ooci_t c, hcl_feed_lx_state_t state)
+{
+	ADD_TOKEN_CHAR (hcl, c);
+	hcl->c->feed.lx.state = state;
+	return 0;
+}
+
+
+
+#define FEED_WRAP_UP(hcl, type) do { if (feed_wrap_up(hcl, type) <= -1) return -1; } while(0)
+#define FEED_WRAP_UP_WITH_CHAR(hcl, c, type) do { if (feed_wrap_up_with_char(hcl, c, type) <= -1) return -1; } while(0)
+#define FEED_WRAP_UP_WITH_CHARS(hcl, str, len, type) do { if (feed_wrap_up_with_str(hcl, str, len, type) <= -1) return -1; } while(0)
+#define FEED_CONTINUE(hcl, state) do { if (feed_continue(hcl, state) <= -1) return -1; } while(0)
+#define FEED_CONTINUE_WITH_CHAR(hcl, c, state) do { if (feed_continue_with_char(hcl, c, state) <= -1) return -1; } while(0)
+
+#define FEED_LX_STATE(hcl) ((hcl)->c->feed.lx.state)
+#define FEED_LX_LOC(hcl) (&((hcl)->c->feed.lx.loc))
+
+static int feed_lx_start (hcl_t* hcl, hcl_ooci_t c)
+{
+	HCL_ASSERT (hcl, FEED_LX_STATE(hcl) == HCL_FEED_LX_START);
+
+	/* clear the token name, reset its location */
+	SET_TOKEN_TYPE (hcl, HCL_IOTOK_EOF); /* is it correct? */
+	CLEAR_TOKEN_NAME (hcl);
+
+//HCL_DEBUG1 (hcl, "XXX[%jc]\n", c);
+	if (find_delim_token_char(hcl, c, 0, HCL_COUNTOF(delim_token_tab) - 1, 0, &hcl->c->feed.dt)) 
+	{
+		/* the character is one of the first character of a delimiter token */
+		if (hcl->c->feed.dt.row_start == hcl->c->feed.dt.row_end && hcl->c->feed.dt.col_next == delim_token_tab[hcl->c->feed.dt.row_start].t_len)
+		{
+			FEED_WRAP_UP_WITH_CHAR (hcl, c, delim_token_tab[hcl->c->feed.dt.row_start].t_type);
+		}
+		else
+		{
+			FEED_CONTINUE_WITH_CHAR (hcl, c, HCL_FEED_LX_DELIM_TOKEN); /* consume c and move to HCL_FEED_LX_DELIM_TOKEN state */
+		}
+		goto consumed;
+	}
+
+	switch (c)
+	{
+		case HCL_OOCI_EOF:
+		{
+			int n;
+#if 0
+			n = end_include(hcl);
+			if (n <= -1) return -1;
+			if (n >= 1) goto retry;
+#endif
+			FEED_WRAP_UP_WITH_CHARS (hcl, vocas[VOCA_EOF].str, vocas[VOCA_EOF].len, HCL_IOTOK_EOF);
+			break;
+		}
+
+		case ';':
+			FEED_CONTINUE_WITH_CHAR (hcl, c, HCL_FEED_LX_COMMENT);
+			break;
+
+		case '#':
+			FEED_CONTINUE_WITH_CHAR (hcl, c, HCL_FEED_LX_SHARP_TOKEN);
+			break;
+
+#if 0
+		case '\"':
+			if (get_string(hcl, '\"', '\\', 0, 0) <= -1) return -1;
+			break;
+
+		case '\'':
+			if (get_string(hcl, '\'', '\\', 0, 0) <= -1) return -1;
+			if (hcl->c->tok.name.len != 1)
+			{
+				hcl_setsynerr (hcl, HCL_SYNERR_CHARLIT, TOKEN_LOC(hcl), TOKEN_NAME(hcl));
+				return -1;
+			}
+			SET_TOKEN_TYPE (hcl, HCL_IOTOK_CHARLIT);
+			break;
+
+		case '#':
+			if (get_sharp_token(hcl) <= -1) return -1;
+			break;
+
+		case '+':
+		case '-':
+			oldc = c;
+			GET_CHAR_TO (hcl, c);
+			if(is_digitchar(c))
+			{
+				unget_char (hcl, &hcl->c->lxc);
+				c = oldc;
+				goto numlit;
+			}
+			else if (c == '#')
+			{
+				int radix;
+				hcl_iolxc_t sharp;
+
+				sharp = hcl->c->lxc; /* back up '#' */
+
+				GET_CHAR_TO (hcl, c);
+				switch (c)
+				{
+					case 'b':
+						radix = 2;
+						goto radnumlit;
+					case 'o':
+						radix = 8;
+						goto radnumlit;
+					case 'x':
+						radix = 16;
+					radnumlit:
+						ADD_TOKEN_CHAR (hcl, oldc);
+						if (get_radix_number(hcl, c, radix) <= -1) return -1;
+						break;
+
+					default:
+						unget_char (hcl, &hcl->c->lxc);
+						unget_char (hcl, &sharp);
+						c = oldc;
+						goto ident;
+				}
+			}
+			else
+			{
+				unget_char (hcl, &hcl->c->lxc);
+				c = oldc;
+				goto ident;
+			}
+			break;
+
+		case '0': case '1': case '2': case '3': case '4':
+		case '5': case '6': case '7': case '8': case '9':
+		numlit:
+			SET_TOKEN_TYPE (hcl, HCL_IOTOK_NUMLIT);
+			while (1)
+			{
+				ADD_TOKEN_CHAR (hcl, c);
+				GET_CHAR_TO (hcl, c);
+				if (TOKEN_TYPE(hcl) == HCL_IOTOK_NUMLIT && c == '.')
+				{
+					SET_TOKEN_TYPE (hcl, HCL_IOTOK_FPDECLIT);
+					ADD_TOKEN_CHAR (hcl, c);
+					GET_CHAR_TO (hcl, c);
+					if (!is_digitchar(c))
+					{
+						/* the first character after the decimal point is not a decimal digit */
+						hcl_setsynerrbfmt (hcl, HCL_SYNERR_NUMLIT, TOKEN_LOC(hcl), TOKEN_NAME(hcl), "invalid numeric literal with no digit after decimal point");
+						return -1;
+					}
+				}
+
+				if (!is_digitchar(c))
+				{
+					unget_char (hcl, &hcl->c->lxc);
+					break;
+				}
+			}
+
+			break;
+
+		default:
+		ident:
+			if (is_delimchar(c))
+			{
+				hcl_setsynerrbfmt (hcl, HCL_SYNERR_ILCHR, TOKEN_LOC(hcl), HCL_NULL, "illegal character %jc encountered", c);
+				return -1;
+			}
+
+			SET_TOKEN_TYPE (hcl, HCL_IOTOK_IDENT);
+			while (1)
+			{
+				ADD_TOKEN_CHAR (hcl, c);
+				GET_CHAR_TO (hcl, c);
+
+				if (c == '.')
+				{
+					hcl_iolxc_t period;
+					hcl_iotok_type_t type;
+
+					type = classify_ident_token(hcl, TOKEN_NAME(hcl));
+					if (type != HCL_IOTOK_IDENT)
+					{
+						SET_TOKEN_TYPE (hcl, type);
+						unget_char (hcl, &hcl->c->lxc);
+						break;
+					}
+
+					period = hcl->c->lxc;
+
+				read_more_seg:
+					GET_CHAR_TO (hcl, c);
+					if (!is_delimchar(c))
+					{
+						hcl_oow_t start;
+						hcl_oocs_t seg;
+
+						SET_TOKEN_TYPE (hcl, HCL_IOTOK_IDENT_DOTTED);
+						ADD_TOKEN_CHAR (hcl, '.');
+
+						start = TOKEN_NAME_LEN(hcl);
+						do
+						{
+							ADD_TOKEN_CHAR (hcl, c);
+							GET_CHAR_TO (hcl, c);
+						}
+						while (!is_delimchar(c));
+
+						seg.ptr = &TOKEN_NAME_CHAR(hcl,start);
+						seg.len = TOKEN_NAME_LEN(hcl) - start;
+						if (classify_ident_token(hcl, &seg) != HCL_IOTOK_IDENT)
+						{
+							hcl_setsynerr (hcl, HCL_SYNERR_MSEGIDENT, TOKEN_LOC(hcl), TOKEN_NAME(hcl));
+							return -1;
+						}
+
+						if (c == '.') goto read_more_seg;
+
+						unget_char (hcl, &hcl->c->lxc);
+						break;
+					}
+					else
+					{
+						unget_char (hcl, &hcl->c->lxc);
+						unget_char (hcl, &period);
+					}
+					break;
+				}
+				else if (is_delimchar(c))
+				{
+					unget_char (hcl, &hcl->c->lxc);
+					break;
+				}
+			}
+
+			if (TOKEN_TYPE(hcl) == HCL_IOTOK_IDENT)
+			{
+				hcl_iotok_type_t type;
+				type = classify_ident_token(hcl, TOKEN_NAME(hcl));
+				SET_TOKEN_TYPE (hcl, type);
+			}
+			break;
+#endif
+	}
+
+consumed:
+	return 1;
+
+not_consumed:
+	return 0;
+}
+
+static int feed_lx_delim_token (hcl_t* hcl, hcl_ooci_t c)
+{
+	if (find_delim_token_char(hcl, c, hcl->c->feed.dt.row_start, hcl->c->feed.dt.row_end, hcl->c->feed.dt.col_next, &hcl->c->feed.dt)) 
+	{
+		if (hcl->c->feed.dt.row_start == hcl->c->feed.dt.row_end && hcl->c->feed.dt.col_next == delim_token_tab[hcl->c->feed.dt.row_start].t_len)
+		{
+			/* complete token and switch to the HCL_FEED_LX_START state */
+			FEED_WRAP_UP_WITH_CHAR (hcl, c, delim_token_tab[hcl->c->feed.dt.row_start].t_type); 
+		}
+		else
+		{
+			ADD_TOKEN_CHAR(hcl, c);
+		}
+		goto consumed;
+	}
+	else
+	{
+		/* the longest match so far */
+		FEED_WRAP_UP(hcl, delim_token_tab[hcl->c->feed.dt.row_start].t_type); 
+		goto not_consumed;
+	}
+
+consumed:
+	return 1;
+
+not_consumed:
+	return 0;
+}
+
+static int feed_lx_comment (hcl_t* hcl, hcl_ooci_t c)
+{
+	if (is_linebreak(c)) FEED_CONTINUE (hcl, HCL_FEED_LX_START);
+	return 1; /* consumed */
+}
+
+static int feed_lx_sharp_token (hcl_t* hcl, hcl_ooci_t c)
+{
+	/*
+	 * #xXXXX hexadecimal
+	 * #oOOOO octal
+	 * #bBBBB binary
+	 * #eDDD   error
+	 * #pHHH   smptr
+	 * #\C      character
+	 * #\xHHHH  unicode character
+	 * #\UHHHH  unicode character
+	 * #\uHHHH  unicode character
+	 * #\backspace
+	 * #\linefeed
+	 * #\newline
+	 * #\nul
+	 * #\page
+	 * #\return
+	 * #\rubout
+	 * #\space
+	 * #\tab
+	 * #\vtab
+	 * #include
+	 * #[ ]     byte array
+	 * #( )     qlist
+	 */
+
+	switch (c)
+	{
+		case '#':
+		case '!':
+			/* ## comment start
+			 * #! also comment start.
+			 * ; comment start */
+			FEED_CONTINUE_WITH_CHAR (hcl, c, HCL_FEED_LX_COMMENT);
+			goto consumed;
+
+		case '[':
+			FEED_WRAP_UP_WITH_CHAR (hcl, c, HCL_IOTOK_BAPAREN);
+			goto consumed;
+
+		case '(':
+			FEED_WRAP_UP_WITH_CHAR (hcl, c, HCL_IOTOK_QLPAREN);
+			goto consumed;
+
+		default:
+// TODO: fix this part
+			if (is_spacechar(c) || c == HCL_UCI_EOF)
+				hcl_setsynerrbfmt (hcl, HCL_SYNERR_HASHLIT, FEED_LX_LOC(hcl), HCL_NULL,
+					"no character after the hash sign");
+			else
+				hcl_setsynerrbfmt (hcl, HCL_SYNERR_HASHLIT, FEED_LX_LOC(hcl), HCL_NULL,
+					"invalid character after the hash sign - %jc", c);
+			return -1;
+	}
+
+consumed:
+	return 1;
+
+not_consumed:
+	return 0;
+}
+
+static int feed_char (hcl_t* hcl, hcl_ooci_t c)
+{
+/* TODO: track line number and column number? */
+	switch (FEED_LX_STATE(hcl))
+	{
+		case HCL_FEED_LX_START:       return feed_lx_start(hcl, c);
+		case HCL_FEED_LX_DELIM_TOKEN: return feed_lx_delim_token(hcl, c);
+		case HCL_FEED_LX_COMMENT:     return feed_lx_comment(hcl, c);
+		case HCL_FEED_LX_SHARP_TOKEN: return feed_lx_sharp_token(hcl, c);
+
+/*
+		case HCL_FEED_LX_DQSTR:
+ 			return feed_lx_dqstr(hcl, c);
+
+		case HCL_FEED_LX_SQSTR:
+			return feed_lxsqstr(hcl, c);
+
+		case HCL_FEED_LX_COMMENT:
+			break;
+
+		case HCL_FEED_LX_CSTR:
+			break;
+
+		case HCL_FEED_LX_DIRECTIVE:
+			break;
+*/
+
+		default:
+			/* INVALID STATE */
+			break;
+	}
+
+
+	return 0;
+}
+
+int hcl_feed (hcl_t* hcl, const hcl_ooch_t* data, hcl_oow_t len)
+{
+/* TODO: need to return the number of processed characters?
+ *       need to stop after the first complete expression? */
+
+	hcl_oow_t i;
+	int x;
+
+	if (data) 
+	{
+		for (i = 0; i < len; ) 
+		{
+			x = feed_char(hcl, data[i]);
+			if (x <= -1) return -1;
+			i += x;
+			if (x > 0)
+			{
+				if (is_linebreak(data[i]))
+				{
+					hcl->c->feed.lx.loc.line++;
+					hcl->c->feed.lx.loc.colm = 1;
+				}
+				else
+				{
+					hcl->c->feed.lx.loc.colm++;
+				}
+			}
+		}
+	}
+	else
+	{
+		for (i = 0; i < 1;)
+		{
+			x = feed_char(hcl, HCL_OOCI_EOF);
+			if (x <= -1) return -1;
+			i += x;
+		}
+	}
+
+	return 0;
+}
+
+
+/*
+hcl_setopt (ON_EXPRESSION CALLBACK??? );
+
+
+
+hcl_feed (hcl, "(hello) (10)", 12);
+	> on_token
+	> on_expression
+	> on_eof
+
+default callback for on_expression?
+	compile
+	execute??/ if in the interactive mode? (say it's used as a network protocol. execute each expression when received....)
+
+default callback for on_eof?
+ 	execute or terminate?
+
+
+*/