implementing hcl_feed()
This commit is contained in:
parent
982a7c1592
commit
60c9fe8ac9
26
bin/main.c
26
bin/main.c
@ -855,16 +855,6 @@ int main (int argc, char* argv[])
|
||||
* -- instead of returning -1 immediately. --*/
|
||||
set_signal (SIGINT, handle_sigint);
|
||||
|
||||
#if 0
|
||||
hcl_prbfmt (hcl, "this is good %s %10hs %hs\n", "whole new world. 1234567890 from this point onward, any failure leasd to jumping to oops label", "as이거 좋은거잖아dkfjsdakfjsadklfjasd", "1111");
|
||||
{
|
||||
hcl_uch_t fmt[] = {'G','G','%','l','s', 'a','b','c','-','-','%','0','2','0','x','\0'};
|
||||
hcl_uch_t ustr[] = {'A','B','C', 'X','Y','Z','Q','Q','\0'};
|
||||
hcl_prufmt (hcl, fmt, ustr, 0x6789);
|
||||
hcl_logufmt (hcl, HCL_LOG_WARN, fmt, ustr, 0x6789);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if 0
|
||||
// TODO: change the option name
|
||||
// in the INTERACTIVE mode, the compiler generates MAKE_FUNCTION for lambda functions.
|
||||
@ -879,6 +869,22 @@ hcl_logufmt (hcl, HCL_LOG_WARN, fmt, ustr, 0x6789);
|
||||
cflags = 0;
|
||||
if (xtn->reader_istty) cflags = HCL_COMPILE_CLEAR_CODE | HCL_COMPILE_CLEAR_FNBLK;
|
||||
|
||||
#if 0
|
||||
{
|
||||
hcl_oow_t slen;
|
||||
hcl_ooch_t* scr = hcl_dupbtooocstr(hcl, "(:::::..##..|,:{{}}..\n....)(#(#[", &slen);
|
||||
if (hcl_feed (hcl, scr, slen) <= -1)
|
||||
{
|
||||
if (hcl->errnum == HCL_ESYNERR) print_synerr (hcl);
|
||||
else hcl_logbfmt (hcl, HCL_LOG_STDERR, "ERROR: cannot feed - [%d] %js\n", hcl_geterrnum(hcl), hcl_geterrmsg(hcl));
|
||||
}
|
||||
|
||||
hcl_endfeed (hcl);
|
||||
}
|
||||
hcl_close (hcl);
|
||||
return 0;
|
||||
#endif
|
||||
|
||||
while (1)
|
||||
{
|
||||
hcl_cnode_t* obj;
|
||||
|
@ -149,12 +149,14 @@ enum hcl_iotok_type_t
|
||||
|
||||
HCL_IOTOK_IDENT,
|
||||
HCL_IOTOK_IDENT_DOTTED,
|
||||
HCL_IOTOK_DOT,
|
||||
HCL_IOTOK_ELLIPSIS,
|
||||
HCL_IOTOK_COLON,
|
||||
HCL_IOTOK_DOT, /* . */
|
||||
HCL_IOTOK_DBLDOTS, /* .. */
|
||||
HCL_IOTOK_ELLIPSIS, /* ... */
|
||||
HCL_IOTOK_COLON, /* : */
|
||||
HCL_IOTOK_DBLCOLONS, /* :: */
|
||||
HCL_IOTOK_TRPCOLONS, /* ::: */
|
||||
HCL_IOTOK_DCSTAR, /* ::* */
|
||||
HCL_IOTOK_COMMA,
|
||||
HCL_IOTOK_COMMA, /* , */
|
||||
HCL_IOTOK_LPAREN, /* ( */
|
||||
HCL_IOTOK_RPAREN, /* ) */
|
||||
HCL_IOTOK_LPARCOLON, /* (: */
|
||||
@ -496,6 +498,23 @@ struct hcl_rstl_t
|
||||
hcl_rstl_t* prev;
|
||||
};
|
||||
|
||||
typedef struct hcl_feed_dt_t hcl_feed_dt_t;
|
||||
struct hcl_feed_dt_t
|
||||
{
|
||||
int row_start;
|
||||
int row_end;
|
||||
int col_next;
|
||||
};
|
||||
|
||||
enum hcl_feed_lx_state_t
|
||||
{
|
||||
HCL_FEED_LX_START,
|
||||
HCL_FEED_LX_DELIM_TOKEN,
|
||||
HCL_FEED_LX_COMMENT,
|
||||
HCL_FEED_LX_SHARP_TOKEN
|
||||
};
|
||||
typedef enum hcl_feed_lx_state_t hcl_feed_lx_state_t;
|
||||
|
||||
struct hcl_compiler_t
|
||||
{
|
||||
/* output handler */
|
||||
@ -541,6 +560,33 @@ struct hcl_compiler_t
|
||||
} r; /* reading */
|
||||
/* == END READER == */
|
||||
|
||||
struct
|
||||
{
|
||||
struct
|
||||
{
|
||||
hcl_feed_lx_state_t state;
|
||||
hcl_ioloc_t loc;
|
||||
} lx;
|
||||
hcl_feed_dt_t dt; /* delimiter token */
|
||||
|
||||
struct
|
||||
{
|
||||
int code;
|
||||
union
|
||||
{
|
||||
struct
|
||||
{
|
||||
int x;
|
||||
} xxx;
|
||||
struct
|
||||
{
|
||||
int x;
|
||||
} yyy;
|
||||
} u;
|
||||
} st[100];
|
||||
hcl_ooi_t top;
|
||||
} feed;
|
||||
|
||||
/* == COMPILER STACK == */
|
||||
struct
|
||||
{
|
||||
@ -577,6 +623,8 @@ struct hcl_compiler_t
|
||||
hcl_clsblk_info_t* info;
|
||||
hcl_oow_t info_capa;
|
||||
} clsblk; /* class block */
|
||||
|
||||
|
||||
};
|
||||
#endif
|
||||
|
||||
|
@ -2192,6 +2192,15 @@ HCL_EXPORT hcl_ooi_t hcl_proutufmt (
|
||||
);
|
||||
|
||||
#if defined(HCL_INCLUDE_COMPILER)
|
||||
|
||||
HCL_EXPORT int hcl_feed (
|
||||
hcl_t* hcl,
|
||||
const hcl_ooch_t* data,
|
||||
hcl_oow_t len
|
||||
);
|
||||
|
||||
#define hcl_endfeed(hcl) (hcl_feed((hcl), HCL_NULL, 0))
|
||||
|
||||
HCL_EXPORT int hcl_compile (
|
||||
hcl_t* hcl,
|
||||
hcl_cnode_t* obj,
|
||||
|
670
lib/read.c
670
lib/read.c
@ -289,6 +289,12 @@ static HCL_INLINE int is_spacechar (hcl_ooci_t c)
|
||||
}
|
||||
}
|
||||
|
||||
static HCL_INLINE int is_linebreak (hcl_ooci_t c)
|
||||
{
|
||||
/* TODO: different line end conventions? */
|
||||
return c == '\n';
|
||||
}
|
||||
|
||||
static HCL_INLINE int is_alphachar (hcl_ooci_t c)
|
||||
{
|
||||
/* TODO: support full unicode */
|
||||
@ -313,11 +319,12 @@ static HCL_INLINE int is_alnumchar (hcl_ooci_t c)
|
||||
return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9');
|
||||
}
|
||||
|
||||
static HCL_INLINE int is_delimiter (hcl_ooci_t c)
|
||||
static HCL_INLINE int is_delimchar (hcl_ooci_t c)
|
||||
{
|
||||
return c == '(' || c == ')' || c == '[' || c == ']' || c == '{' || c == '}' ||
|
||||
c == '\"' || c == '\'' || c == '#' || c == ';' || c == '|' || c == '.' ||
|
||||
c == ',' || c == ':' || is_spacechar(c) || c == HCL_UCI_EOF;
|
||||
c == ';' || c == '|' || c == ',' || c == '.' || c == ':' ||
|
||||
/* the first characters of tokens in delim_token_tab up to this point */
|
||||
c == '#' || c == '\"' || c == '\'' || is_spacechar(c) || c == HCL_UCI_EOF;
|
||||
}
|
||||
|
||||
static int copy_string_to (hcl_t* hcl, const hcl_oocs_t* src, hcl_oocs_t* dst, hcl_oow_t* dst_capa, int append, hcl_ooch_t add_delim)
|
||||
@ -733,14 +740,14 @@ static int get_radix_number (hcl_t* hcl, hcl_ooci_t rc, int radix)
|
||||
}
|
||||
while (CHAR_TO_NUM(c, radix) < radix);
|
||||
|
||||
if (!is_delimiter(c))
|
||||
if (!is_delimchar(c))
|
||||
{
|
||||
do
|
||||
{
|
||||
ADD_TOKEN_CHAR(hcl, c);
|
||||
GET_CHAR_TO (hcl, c);
|
||||
}
|
||||
while (!is_delimiter(c));
|
||||
while (!is_delimchar(c));
|
||||
|
||||
hcl_setsynerrbfmt (hcl, HCL_SYNERR_NUMLIT, TOKEN_LOC(hcl), TOKEN_NAME(hcl),
|
||||
"invalid digit in radixed number in %.*js", hcl->c->tok.name.len, hcl->c->tok.name.ptr);
|
||||
@ -763,19 +770,26 @@ static int get_sharp_token (hcl_t* hcl)
|
||||
GET_CHAR_TO (hcl, c);
|
||||
|
||||
/*
|
||||
* #bBBBB binary
|
||||
* #oOOOO octal
|
||||
* #xXXXX hexadecimal
|
||||
* #oOOOO octal
|
||||
* #bBBBB binary
|
||||
* #eDDD error
|
||||
* #pHHH smptr
|
||||
* #nil
|
||||
* #true
|
||||
* #false
|
||||
* #include
|
||||
* #\C character
|
||||
* #\xHHHH unicode character
|
||||
* #\UHHHH unicode character
|
||||
* #\uHHHH unicode character
|
||||
* #\backspace
|
||||
* #\linefeed
|
||||
* #\newline
|
||||
* #\nul
|
||||
* #\page
|
||||
* #\return
|
||||
* #\rubout
|
||||
* #\space
|
||||
* #\tab
|
||||
* #\vtab
|
||||
* #include
|
||||
* #[ ] byte array
|
||||
* #( ) qlist
|
||||
*/
|
||||
@ -809,7 +823,7 @@ static int get_sharp_token (hcl_t* hcl)
|
||||
ADD_TOKEN_CHAR (hcl, '\\');
|
||||
|
||||
GET_CHAR_TO (hcl, c);
|
||||
if (is_delimiter(c))
|
||||
if (is_delimchar(c))
|
||||
{
|
||||
hcl_setsynerrbfmt (hcl, HCL_SYNERR_CHARLIT, TOKEN_LOC(hcl), TOKEN_NAME(hcl),
|
||||
"no valid character after #\\ in %.*js", hcl->c->tok.name.len, hcl->c->tok.name.ptr);
|
||||
@ -822,7 +836,7 @@ static int get_sharp_token (hcl_t* hcl)
|
||||
ADD_TOKEN_CHAR (hcl, c);
|
||||
GET_CHAR_TO (hcl, c);
|
||||
}
|
||||
while (!is_delimiter(c));
|
||||
while (!is_delimchar(c));
|
||||
|
||||
if (TOKEN_NAME_LEN(hcl) >= 4)
|
||||
{
|
||||
@ -849,10 +863,8 @@ static int get_sharp_token (hcl_t* hcl)
|
||||
"invalid hexadecimal character in %.*js", TOKEN_NAME_LEN(hcl), TOKEN_NAME_PTR(hcl));
|
||||
return -1;
|
||||
}
|
||||
|
||||
c = c * 16 + CHAR_TO_NUM(hcl->c->tok.name.ptr[i], 16); /* don't care if it is for 'p' */
|
||||
}
|
||||
|
||||
}
|
||||
#if (HCL_SIZEOF_OOCH_T >= 2)
|
||||
else if (TOKEN_NAME_CHAR(hcl, 2) == 'u')
|
||||
@ -868,26 +880,22 @@ static int get_sharp_token (hcl_t* hcl)
|
||||
goto hexcharlit;
|
||||
}
|
||||
#endif
|
||||
else if (does_token_name_match(hcl, VOCA_SPACE))
|
||||
else if (does_token_name_match(hcl, VOCA_BACKSPACE))
|
||||
{
|
||||
c = ' ';
|
||||
c = '\b';
|
||||
}
|
||||
else if (does_token_name_match(hcl, VOCA_LINEFEED))
|
||||
{
|
||||
c = '\n';
|
||||
}
|
||||
else if (does_token_name_match(hcl, VOCA_NEWLINE))
|
||||
{
|
||||
/* TODO: convert it to host newline convention. how to handle if it's composed of 2 letters like \r\n? */
|
||||
c = '\n';
|
||||
}
|
||||
else if (does_token_name_match(hcl, VOCA_BACKSPACE))
|
||||
else if (does_token_name_match(hcl, VOCA_NUL)) /* null character. not #nil */
|
||||
{
|
||||
c = '\b';
|
||||
}
|
||||
else if (does_token_name_match(hcl, VOCA_TAB))
|
||||
{
|
||||
c = '\t';
|
||||
}
|
||||
else if (does_token_name_match(hcl, VOCA_LINEFEED))
|
||||
{
|
||||
c = '\n';
|
||||
c = '\0';
|
||||
}
|
||||
else if (does_token_name_match(hcl, VOCA_PAGE))
|
||||
{
|
||||
@ -897,18 +905,22 @@ static int get_sharp_token (hcl_t* hcl)
|
||||
{
|
||||
c = '\r';
|
||||
}
|
||||
else if (does_token_name_match(hcl, VOCA_NUL)) /* null character. not #nil */
|
||||
else if (does_token_name_match(hcl, VOCA_RUBOUT))
|
||||
{
|
||||
c = '\0';
|
||||
c = '\x7F'; /* DEL */
|
||||
}
|
||||
else if (does_token_name_match(hcl, VOCA_SPACE))
|
||||
{
|
||||
c = ' ';
|
||||
}
|
||||
else if (does_token_name_match(hcl, VOCA_TAB))
|
||||
{
|
||||
c = '\t';
|
||||
}
|
||||
else if (does_token_name_match(hcl, VOCA_VTAB))
|
||||
{
|
||||
c = '\v';
|
||||
}
|
||||
else if (does_token_name_match(hcl, VOCA_RUBOUT))
|
||||
{
|
||||
c = '\x7F'; /* DEL */
|
||||
}
|
||||
else
|
||||
{
|
||||
hcl_setsynerrbfmt (hcl, HCL_SYNERR_CHARLIT, TOKEN_LOC(hcl), TOKEN_NAME(hcl),
|
||||
@ -945,7 +957,7 @@ static int get_sharp_token (hcl_t* hcl)
|
||||
break;
|
||||
|
||||
default:
|
||||
if (is_delimiter(c))
|
||||
if (is_delimchar(c))
|
||||
{
|
||||
/* EOF, whitespace, etc */
|
||||
hcl_setsynerrbfmt (hcl, HCL_SYNERR_HASHLIT, TOKEN_LOC(hcl), TOKEN_NAME(hcl),
|
||||
@ -954,15 +966,14 @@ static int get_sharp_token (hcl_t* hcl)
|
||||
}
|
||||
|
||||
ADD_TOKEN_CHAR (hcl, '#');
|
||||
long_name:
|
||||
do
|
||||
{
|
||||
ADD_TOKEN_CHAR (hcl, c);
|
||||
GET_CHAR_TO (hcl, c);
|
||||
}
|
||||
while (!is_delimiter(c));
|
||||
while (!is_delimchar(c));
|
||||
|
||||
if (does_token_name_match (hcl, VOCA_INCLUDE))
|
||||
if (does_token_name_match(hcl, VOCA_INCLUDE))
|
||||
{
|
||||
SET_TOKEN_TYPE (hcl, HCL_IOTOK_INCLUDE);
|
||||
}
|
||||
@ -1275,7 +1286,7 @@ retry:
|
||||
|
||||
default:
|
||||
ident:
|
||||
if (is_delimiter(c))
|
||||
if (is_delimchar(c))
|
||||
{
|
||||
hcl_setsynerrbfmt (hcl, HCL_SYNERR_ILCHR, TOKEN_LOC(hcl), HCL_NULL, "illegal character %jc encountered", c);
|
||||
return -1;
|
||||
@ -1304,7 +1315,7 @@ retry:
|
||||
|
||||
read_more_seg:
|
||||
GET_CHAR_TO (hcl, c);
|
||||
if (!is_delimiter(c))
|
||||
if (!is_delimchar(c))
|
||||
{
|
||||
hcl_oow_t start;
|
||||
hcl_oocs_t seg;
|
||||
@ -1318,7 +1329,7 @@ retry:
|
||||
ADD_TOKEN_CHAR (hcl, c);
|
||||
GET_CHAR_TO (hcl, c);
|
||||
}
|
||||
while (!is_delimiter(c));
|
||||
while (!is_delimchar(c));
|
||||
|
||||
seg.ptr = &TOKEN_NAME_CHAR(hcl,start);
|
||||
seg.len = TOKEN_NAME_LEN(hcl) - start;
|
||||
@ -1340,7 +1351,7 @@ retry:
|
||||
}
|
||||
break;
|
||||
}
|
||||
else if (is_delimiter(c))
|
||||
else if (is_delimchar(c))
|
||||
{
|
||||
unget_char (hcl, &hcl->c->lxc);
|
||||
break;
|
||||
@ -2412,3 +2423,580 @@ void hcl_detachio (hcl_t* hcl)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
static void init_feed (hcl_t* hcl)
|
||||
{
|
||||
hcl->c->feed.lx.state = HCL_FEED_LX_START;
|
||||
hcl->c->feed.lx.loc.line = 1;
|
||||
hcl->c->feed.lx.loc.colm = 1;
|
||||
hcl->c->feed.lx.loc.file = HCL_NULL;
|
||||
|
||||
hcl->c->feed.top = -1;
|
||||
}
|
||||
|
||||
static int push_feed_state (hcl_t* hcl, int code)
|
||||
{
|
||||
if (hcl->c->feed.top >= HCL_COUNTOF(hcl->c->feed.st) - 1) /* TODO: use a dynamically allocated stack? */
|
||||
{
|
||||
hcl_seterrbfmt (hcl, HCL_EBUFFULL, "feed state stack full");
|
||||
return -1;
|
||||
}
|
||||
|
||||
hcl->c->feed.top++;
|
||||
HCL_MEMSET (&hcl->c->feed.st[hcl->c->feed.top], 0, HCL_SIZEOF(hcl->c->feed.st[hcl->c->feed.top]));
|
||||
hcl->c->feed.st[hcl->c->feed.top].code = code;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void pop_feed_state (hcl_t* hcl)
|
||||
{
|
||||
HCL_ASSERT (hcl, hcl->c->feed.top >= 0);
|
||||
hcl->c->feed.top--;
|
||||
}
|
||||
|
||||
struct delim_token_t
|
||||
{
|
||||
const char* t_value;
|
||||
hcl_oow_t t_len;
|
||||
hcl_iotok_type_t t_type;
|
||||
};
|
||||
typedef struct delim_token_t delim_token_t;
|
||||
|
||||
static delim_token_t delim_token_tab[] =
|
||||
{
|
||||
/* [NOTE 1]
|
||||
* if you add a new token, ensure the first character is listed in is_delimchar()
|
||||
*
|
||||
* [NOTE 2]
|
||||
* for the implementation limitation in find_delim_token_char(),
|
||||
* the entries in this table must be laid out in a certain way.
|
||||
*
|
||||
* Group the items with the same prefix together.
|
||||
* List the shorter before the longer items in the same group.
|
||||
* The length must not differ by greater than 1 between 2 items in the same group.
|
||||
*/
|
||||
|
||||
{ "(", 1, HCL_IOTOK_LPAREN },
|
||||
{ "(:", 2, HCL_IOTOK_LPARCOLON },
|
||||
{ ")", 1, HCL_IOTOK_RPAREN },
|
||||
|
||||
{ "[", 1, HCL_IOTOK_LBRACK },
|
||||
{ "]", 1, HCL_IOTOK_RBRACK },
|
||||
|
||||
{ "{", 1, HCL_IOTOK_LBRACE },
|
||||
{ "}", 1, HCL_IOTOK_RBRACE },
|
||||
|
||||
{ "|", 1, HCL_IOTOK_VBAR },
|
||||
{ ",", 1, HCL_IOTOK_COMMA },
|
||||
|
||||
{ ".", 1, HCL_IOTOK_DOT },
|
||||
{ "..", 2, HCL_IOTOK_DBLDOTS },
|
||||
{ "...", 3, HCL_IOTOK_ELLIPSIS },
|
||||
|
||||
{ ":", 1, HCL_IOTOK_COLON },
|
||||
{ "::", 2, HCL_IOTOK_DBLCOLONS },
|
||||
{ "::*", 3, HCL_IOTOK_DCSTAR },
|
||||
{ ":::", 3, HCL_IOTOK_TRPCOLONS }
|
||||
};
|
||||
|
||||
static int find_delim_token_char (hcl_t* hcl, const hcl_ooci_t c, int row_start, int row_end, int col, hcl_feed_dt_t* dt)
|
||||
{
|
||||
int found = 0, i;
|
||||
|
||||
for (i = row_start; i <= row_end; i++)
|
||||
{
|
||||
//printf (">>> %d %d %d col=>%d c=>%jc\n", i, row_start, row_end, col, c);
|
||||
if (col < delim_token_tab[i].t_len && c == delim_token_tab[i].t_value[col])
|
||||
{
|
||||
//printf ("MATCH [%jc] [%jc]\n", c, delim_token_tab[i].t_value[col]);
|
||||
if (!found) dt->row_start = i;
|
||||
dt->row_end = i;
|
||||
found = 1;
|
||||
}
|
||||
else if (found) break;
|
||||
}
|
||||
|
||||
if (found) dt->col_next = col + 1;
|
||||
//printf ("**** return %d %d\n", dt->row_start, dt->row_end);
|
||||
return found;
|
||||
}
|
||||
|
||||
static HCL_INLINE int feed_wrap_up (hcl_t* hcl, hcl_iotok_type_t type)
|
||||
{
|
||||
SET_TOKEN_TYPE (hcl, type);
|
||||
|
||||
HCL_DEBUG4 (hcl, "TOKEN LEN %zu=>[%.*js] %d\n", TOKEN_NAME_LEN(hcl), TOKEN_NAME_LEN(hcl), TOKEN_NAME_PTR(hcl), TOKEN_TYPE(hcl));
|
||||
/* TOOD: fire token callback or something */
|
||||
|
||||
hcl->c->feed.lx.state = HCL_FEED_LX_START;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int feed_wrap_up_with_char (hcl_t* hcl, hcl_ooci_t c, hcl_iotok_type_t type)
|
||||
{
|
||||
ADD_TOKEN_CHAR (hcl, c);
|
||||
return feed_wrap_up(hcl, type);
|
||||
}
|
||||
|
||||
static int feed_wrap_up_with_str (hcl_t* hcl, const hcl_ooch_t* str, hcl_oow_t len, hcl_iotok_type_t type)
|
||||
{
|
||||
ADD_TOKEN_STR (hcl, str, len);
|
||||
return feed_wrap_up(hcl, type);
|
||||
}
|
||||
|
||||
static int feed_continue (hcl_t* hcl, hcl_feed_lx_state_t state)
|
||||
{
|
||||
hcl->c->feed.lx.state = state;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int feed_continue_with_char (hcl_t* hcl, hcl_ooci_t c, hcl_feed_lx_state_t state)
|
||||
{
|
||||
ADD_TOKEN_CHAR (hcl, c);
|
||||
hcl->c->feed.lx.state = state;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
|
||||
#define FEED_WRAP_UP(hcl, type) do { if (feed_wrap_up(hcl, type) <= -1) return -1; } while(0)
|
||||
#define FEED_WRAP_UP_WITH_CHAR(hcl, c, type) do { if (feed_wrap_up_with_char(hcl, c, type) <= -1) return -1; } while(0)
|
||||
#define FEED_WRAP_UP_WITH_CHARS(hcl, str, len, type) do { if (feed_wrap_up_with_str(hcl, str, len, type) <= -1) return -1; } while(0)
|
||||
#define FEED_CONTINUE(hcl, state) do { if (feed_continue(hcl, state) <= -1) return -1; } while(0)
|
||||
#define FEED_CONTINUE_WITH_CHAR(hcl, c, state) do { if (feed_continue_with_char(hcl, c, state) <= -1) return -1; } while(0)
|
||||
|
||||
#define FEED_LX_STATE(hcl) ((hcl)->c->feed.lx.state)
|
||||
#define FEED_LX_LOC(hcl) (&((hcl)->c->feed.lx.loc))
|
||||
|
||||
static int feed_lx_start (hcl_t* hcl, hcl_ooci_t c)
|
||||
{
|
||||
HCL_ASSERT (hcl, FEED_LX_STATE(hcl) == HCL_FEED_LX_START);
|
||||
|
||||
/* clear the token name, reset its location */
|
||||
SET_TOKEN_TYPE (hcl, HCL_IOTOK_EOF); /* is it correct? */
|
||||
CLEAR_TOKEN_NAME (hcl);
|
||||
|
||||
//HCL_DEBUG1 (hcl, "XXX[%jc]\n", c);
|
||||
if (find_delim_token_char(hcl, c, 0, HCL_COUNTOF(delim_token_tab) - 1, 0, &hcl->c->feed.dt))
|
||||
{
|
||||
/* the character is one of the first character of a delimiter token */
|
||||
if (hcl->c->feed.dt.row_start == hcl->c->feed.dt.row_end && hcl->c->feed.dt.col_next == delim_token_tab[hcl->c->feed.dt.row_start].t_len)
|
||||
{
|
||||
FEED_WRAP_UP_WITH_CHAR (hcl, c, delim_token_tab[hcl->c->feed.dt.row_start].t_type);
|
||||
}
|
||||
else
|
||||
{
|
||||
FEED_CONTINUE_WITH_CHAR (hcl, c, HCL_FEED_LX_DELIM_TOKEN); /* consume c and move to HCL_FEED_LX_DELIM_TOKEN state */
|
||||
}
|
||||
goto consumed;
|
||||
}
|
||||
|
||||
switch (c)
|
||||
{
|
||||
case HCL_OOCI_EOF:
|
||||
{
|
||||
int n;
|
||||
#if 0
|
||||
n = end_include(hcl);
|
||||
if (n <= -1) return -1;
|
||||
if (n >= 1) goto retry;
|
||||
#endif
|
||||
FEED_WRAP_UP_WITH_CHARS (hcl, vocas[VOCA_EOF].str, vocas[VOCA_EOF].len, HCL_IOTOK_EOF);
|
||||
break;
|
||||
}
|
||||
|
||||
case ';':
|
||||
FEED_CONTINUE_WITH_CHAR (hcl, c, HCL_FEED_LX_COMMENT);
|
||||
break;
|
||||
|
||||
case '#':
|
||||
FEED_CONTINUE_WITH_CHAR (hcl, c, HCL_FEED_LX_SHARP_TOKEN);
|
||||
break;
|
||||
|
||||
#if 0
|
||||
case '\"':
|
||||
if (get_string(hcl, '\"', '\\', 0, 0) <= -1) return -1;
|
||||
break;
|
||||
|
||||
case '\'':
|
||||
if (get_string(hcl, '\'', '\\', 0, 0) <= -1) return -1;
|
||||
if (hcl->c->tok.name.len != 1)
|
||||
{
|
||||
hcl_setsynerr (hcl, HCL_SYNERR_CHARLIT, TOKEN_LOC(hcl), TOKEN_NAME(hcl));
|
||||
return -1;
|
||||
}
|
||||
SET_TOKEN_TYPE (hcl, HCL_IOTOK_CHARLIT);
|
||||
break;
|
||||
|
||||
case '#':
|
||||
if (get_sharp_token(hcl) <= -1) return -1;
|
||||
break;
|
||||
|
||||
case '+':
|
||||
case '-':
|
||||
oldc = c;
|
||||
GET_CHAR_TO (hcl, c);
|
||||
if(is_digitchar(c))
|
||||
{
|
||||
unget_char (hcl, &hcl->c->lxc);
|
||||
c = oldc;
|
||||
goto numlit;
|
||||
}
|
||||
else if (c == '#')
|
||||
{
|
||||
int radix;
|
||||
hcl_iolxc_t sharp;
|
||||
|
||||
sharp = hcl->c->lxc; /* back up '#' */
|
||||
|
||||
GET_CHAR_TO (hcl, c);
|
||||
switch (c)
|
||||
{
|
||||
case 'b':
|
||||
radix = 2;
|
||||
goto radnumlit;
|
||||
case 'o':
|
||||
radix = 8;
|
||||
goto radnumlit;
|
||||
case 'x':
|
||||
radix = 16;
|
||||
radnumlit:
|
||||
ADD_TOKEN_CHAR (hcl, oldc);
|
||||
if (get_radix_number(hcl, c, radix) <= -1) return -1;
|
||||
break;
|
||||
|
||||
default:
|
||||
unget_char (hcl, &hcl->c->lxc);
|
||||
unget_char (hcl, &sharp);
|
||||
c = oldc;
|
||||
goto ident;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
unget_char (hcl, &hcl->c->lxc);
|
||||
c = oldc;
|
||||
goto ident;
|
||||
}
|
||||
break;
|
||||
|
||||
case '0': case '1': case '2': case '3': case '4':
|
||||
case '5': case '6': case '7': case '8': case '9':
|
||||
numlit:
|
||||
SET_TOKEN_TYPE (hcl, HCL_IOTOK_NUMLIT);
|
||||
while (1)
|
||||
{
|
||||
ADD_TOKEN_CHAR (hcl, c);
|
||||
GET_CHAR_TO (hcl, c);
|
||||
if (TOKEN_TYPE(hcl) == HCL_IOTOK_NUMLIT && c == '.')
|
||||
{
|
||||
SET_TOKEN_TYPE (hcl, HCL_IOTOK_FPDECLIT);
|
||||
ADD_TOKEN_CHAR (hcl, c);
|
||||
GET_CHAR_TO (hcl, c);
|
||||
if (!is_digitchar(c))
|
||||
{
|
||||
/* the first character after the decimal point is not a decimal digit */
|
||||
hcl_setsynerrbfmt (hcl, HCL_SYNERR_NUMLIT, TOKEN_LOC(hcl), TOKEN_NAME(hcl), "invalid numeric literal with no digit after decimal point");
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
if (!is_digitchar(c))
|
||||
{
|
||||
unget_char (hcl, &hcl->c->lxc);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
break;
|
||||
|
||||
default:
|
||||
ident:
|
||||
if (is_delimchar(c))
|
||||
{
|
||||
hcl_setsynerrbfmt (hcl, HCL_SYNERR_ILCHR, TOKEN_LOC(hcl), HCL_NULL, "illegal character %jc encountered", c);
|
||||
return -1;
|
||||
}
|
||||
|
||||
SET_TOKEN_TYPE (hcl, HCL_IOTOK_IDENT);
|
||||
while (1)
|
||||
{
|
||||
ADD_TOKEN_CHAR (hcl, c);
|
||||
GET_CHAR_TO (hcl, c);
|
||||
|
||||
if (c == '.')
|
||||
{
|
||||
hcl_iolxc_t period;
|
||||
hcl_iotok_type_t type;
|
||||
|
||||
type = classify_ident_token(hcl, TOKEN_NAME(hcl));
|
||||
if (type != HCL_IOTOK_IDENT)
|
||||
{
|
||||
SET_TOKEN_TYPE (hcl, type);
|
||||
unget_char (hcl, &hcl->c->lxc);
|
||||
break;
|
||||
}
|
||||
|
||||
period = hcl->c->lxc;
|
||||
|
||||
read_more_seg:
|
||||
GET_CHAR_TO (hcl, c);
|
||||
if (!is_delimchar(c))
|
||||
{
|
||||
hcl_oow_t start;
|
||||
hcl_oocs_t seg;
|
||||
|
||||
SET_TOKEN_TYPE (hcl, HCL_IOTOK_IDENT_DOTTED);
|
||||
ADD_TOKEN_CHAR (hcl, '.');
|
||||
|
||||
start = TOKEN_NAME_LEN(hcl);
|
||||
do
|
||||
{
|
||||
ADD_TOKEN_CHAR (hcl, c);
|
||||
GET_CHAR_TO (hcl, c);
|
||||
}
|
||||
while (!is_delimchar(c));
|
||||
|
||||
seg.ptr = &TOKEN_NAME_CHAR(hcl,start);
|
||||
seg.len = TOKEN_NAME_LEN(hcl) - start;
|
||||
if (classify_ident_token(hcl, &seg) != HCL_IOTOK_IDENT)
|
||||
{
|
||||
hcl_setsynerr (hcl, HCL_SYNERR_MSEGIDENT, TOKEN_LOC(hcl), TOKEN_NAME(hcl));
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (c == '.') goto read_more_seg;
|
||||
|
||||
unget_char (hcl, &hcl->c->lxc);
|
||||
break;
|
||||
}
|
||||
else
|
||||
{
|
||||
unget_char (hcl, &hcl->c->lxc);
|
||||
unget_char (hcl, &period);
|
||||
}
|
||||
break;
|
||||
}
|
||||
else if (is_delimchar(c))
|
||||
{
|
||||
unget_char (hcl, &hcl->c->lxc);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (TOKEN_TYPE(hcl) == HCL_IOTOK_IDENT)
|
||||
{
|
||||
hcl_iotok_type_t type;
|
||||
type = classify_ident_token(hcl, TOKEN_NAME(hcl));
|
||||
SET_TOKEN_TYPE (hcl, type);
|
||||
}
|
||||
break;
|
||||
#endif
|
||||
}
|
||||
|
||||
consumed:
|
||||
return 1;
|
||||
|
||||
not_consumed:
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int feed_lx_delim_token (hcl_t* hcl, hcl_ooci_t c)
|
||||
{
|
||||
if (find_delim_token_char(hcl, c, hcl->c->feed.dt.row_start, hcl->c->feed.dt.row_end, hcl->c->feed.dt.col_next, &hcl->c->feed.dt))
|
||||
{
|
||||
if (hcl->c->feed.dt.row_start == hcl->c->feed.dt.row_end && hcl->c->feed.dt.col_next == delim_token_tab[hcl->c->feed.dt.row_start].t_len)
|
||||
{
|
||||
/* complete token and switch to the HCL_FEED_LX_START state */
|
||||
FEED_WRAP_UP_WITH_CHAR (hcl, c, delim_token_tab[hcl->c->feed.dt.row_start].t_type);
|
||||
}
|
||||
else
|
||||
{
|
||||
ADD_TOKEN_CHAR(hcl, c);
|
||||
}
|
||||
goto consumed;
|
||||
}
|
||||
else
|
||||
{
|
||||
/* the longest match so far */
|
||||
FEED_WRAP_UP(hcl, delim_token_tab[hcl->c->feed.dt.row_start].t_type);
|
||||
goto not_consumed;
|
||||
}
|
||||
|
||||
consumed:
|
||||
return 1;
|
||||
|
||||
not_consumed:
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int feed_lx_comment (hcl_t* hcl, hcl_ooci_t c)
|
||||
{
|
||||
if (is_linebreak(c)) FEED_CONTINUE (hcl, HCL_FEED_LX_START);
|
||||
return 1; /* consumed */
|
||||
}
|
||||
|
||||
static int feed_lx_sharp_token (hcl_t* hcl, hcl_ooci_t c)
|
||||
{
|
||||
/*
|
||||
* #xXXXX hexadecimal
|
||||
* #oOOOO octal
|
||||
* #bBBBB binary
|
||||
* #eDDD error
|
||||
* #pHHH smptr
|
||||
* #\C character
|
||||
* #\xHHHH unicode character
|
||||
* #\UHHHH unicode character
|
||||
* #\uHHHH unicode character
|
||||
* #\backspace
|
||||
* #\linefeed
|
||||
* #\newline
|
||||
* #\nul
|
||||
* #\page
|
||||
* #\return
|
||||
* #\rubout
|
||||
* #\space
|
||||
* #\tab
|
||||
* #\vtab
|
||||
* #include
|
||||
* #[ ] byte array
|
||||
* #( ) qlist
|
||||
*/
|
||||
|
||||
switch (c)
|
||||
{
|
||||
case '#':
|
||||
case '!':
|
||||
/* ## comment start
|
||||
* #! also comment start.
|
||||
* ; comment start */
|
||||
FEED_CONTINUE_WITH_CHAR (hcl, c, HCL_FEED_LX_COMMENT);
|
||||
goto consumed;
|
||||
|
||||
case '[':
|
||||
FEED_WRAP_UP_WITH_CHAR (hcl, c, HCL_IOTOK_BAPAREN);
|
||||
goto consumed;
|
||||
|
||||
case '(':
|
||||
FEED_WRAP_UP_WITH_CHAR (hcl, c, HCL_IOTOK_QLPAREN);
|
||||
goto consumed;
|
||||
|
||||
default:
|
||||
// TODO: fix this part
|
||||
if (is_spacechar(c) || c == HCL_UCI_EOF)
|
||||
hcl_setsynerrbfmt (hcl, HCL_SYNERR_HASHLIT, FEED_LX_LOC(hcl), HCL_NULL,
|
||||
"no character after the hash sign");
|
||||
else
|
||||
hcl_setsynerrbfmt (hcl, HCL_SYNERR_HASHLIT, FEED_LX_LOC(hcl), HCL_NULL,
|
||||
"invalid character after the hash sign - %jc", c);
|
||||
return -1;
|
||||
}
|
||||
|
||||
consumed:
|
||||
return 1;
|
||||
|
||||
not_consumed:
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int feed_char (hcl_t* hcl, hcl_ooci_t c)
|
||||
{
|
||||
/* TODO: track line number and column number? */
|
||||
switch (FEED_LX_STATE(hcl))
|
||||
{
|
||||
case HCL_FEED_LX_START: return feed_lx_start(hcl, c);
|
||||
case HCL_FEED_LX_DELIM_TOKEN: return feed_lx_delim_token(hcl, c);
|
||||
case HCL_FEED_LX_COMMENT: return feed_lx_comment(hcl, c);
|
||||
case HCL_FEED_LX_SHARP_TOKEN: return feed_lx_sharp_token(hcl, c);
|
||||
|
||||
/*
|
||||
case HCL_FEED_LX_DQSTR:
|
||||
return feed_lx_dqstr(hcl, c);
|
||||
|
||||
case HCL_FEED_LX_SQSTR:
|
||||
return feed_lxsqstr(hcl, c);
|
||||
|
||||
case HCL_FEED_LX_COMMENT:
|
||||
break;
|
||||
|
||||
case HCL_FEED_LX_CSTR:
|
||||
break;
|
||||
|
||||
case HCL_FEED_LX_DIRECTIVE:
|
||||
break;
|
||||
*/
|
||||
|
||||
default:
|
||||
/* INVALID STATE */
|
||||
break;
|
||||
}
|
||||
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int hcl_feed (hcl_t* hcl, const hcl_ooch_t* data, hcl_oow_t len)
|
||||
{
|
||||
/* TODO: need to return the number of processed characters?
|
||||
* need to stop after the first complete expression? */
|
||||
|
||||
hcl_oow_t i;
|
||||
int x;
|
||||
|
||||
if (data)
|
||||
{
|
||||
for (i = 0; i < len; )
|
||||
{
|
||||
x = feed_char(hcl, data[i]);
|
||||
if (x <= -1) return -1;
|
||||
i += x;
|
||||
if (x > 0)
|
||||
{
|
||||
if (is_linebreak(data[i]))
|
||||
{
|
||||
hcl->c->feed.lx.loc.line++;
|
||||
hcl->c->feed.lx.loc.colm = 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
hcl->c->feed.lx.loc.colm++;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
for (i = 0; i < 1;)
|
||||
{
|
||||
x = feed_char(hcl, HCL_OOCI_EOF);
|
||||
if (x <= -1) return -1;
|
||||
i += x;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
hcl_setopt (ON_EXPRESSION CALLBACK??? );
|
||||
|
||||
|
||||
|
||||
hcl_feed (hcl, "(hello) (10)", 12);
|
||||
> on_token
|
||||
> on_expression
|
||||
> on_eof
|
||||
|
||||
default callback for on_expression?
|
||||
compile
|
||||
execute??/ if in the interactive mode? (say it's used as a network protocol. execute each expression when received....)
|
||||
|
||||
default callback for on_eof?
|
||||
execute or terminate?
|
||||
|
||||
|
||||
*/
|
||||
|
Loading…
x
Reference in New Issue
Block a user