From 65aacaaf4f5b65234aa6f8872a98211294ac6155 Mon Sep 17 00:00:00 2001 From: hyung-hwan Date: Fri, 22 Jul 2022 08:02:14 +0000 Subject: [PATCH] some work done for the feed based reader --- bin/main.c | 7 +- lib/err.c | 4 +- lib/hcl-prv.h | 44 +++- lib/hcl.h | 4 +- lib/read.c | 717 +++++++++++++++++++++++++++++++------------------- 5 files changed, 488 insertions(+), 288 deletions(-) diff --git a/bin/main.c b/bin/main.c index 3a48810..df697b1 100644 --- a/bin/main.c +++ b/bin/main.c @@ -821,11 +821,14 @@ static int feed_loop (hcl_t* hcl, xtn_t* xtn, int cflags, int verbose) return -1; } + /*(setvbuf (fp, NULL, _IONBF, 0);*/ + while (1) { hcl_oow_t n; - n = fread(&buf[len], 1, HCL_COUNTOF(buf) - len, fp); + /*n = fread(&buf[len], 1, HCL_COUNTOF(buf) - len, fp);*/ + n = read(fileno(fp), &buf[len], HCL_COUNTOF(buf) - len); if (n > 0) { int x; @@ -861,7 +864,7 @@ static int feed_loop (hcl_t* hcl, xtn_t* xtn, int cflags, int verbose) } } - if (feof(fp)) + if (n == 0 || feof(fp)) { if (len > 0) { diff --git a/lib/err.c b/lib/err.c index 206da98..9f38015 100644 --- a/lib/err.c +++ b/lib/err.c @@ -101,9 +101,9 @@ static char* synerrstr[] = "illegal character", "illegal token", "comment not closed", - "string/character not closed", - "invalid hashed literal", "wrong character literal", + "wrong string literal", + "invalid hashed literal", "invalid numeric literal", "out of integer range", "wrong error literal", diff --git a/lib/hcl-prv.h b/lib/hcl-prv.h index 3c23912..8aedb17 100644 --- a/lib/hcl-prv.h +++ b/lib/hcl-prv.h @@ -524,22 +524,41 @@ struct hcl_rstl_t hcl_rstl_t* prev; }; -typedef struct hcl_feed_dt_t hcl_feed_dt_t; -struct hcl_feed_dt_t +typedef struct hcl_flx_dt_t hcl_flx_dt_t; /* delemiter token */ +struct hcl_flx_dt_t { int row_start; int row_end; int col_next; }; -enum hcl_feed_lx_state_t +typedef struct hcl_flx_qt_t hcl_flx_qt_t; /* quoted token */ +struct hcl_flx_qt_t { - HCL_FEED_LX_START, - HCL_FEED_LX_DELIM_TOKEN, - HCL_FEED_LX_COMMENT, - HCL_FEED_LX_SHARP_TOKEN + /* input data */ + hcl_ooch_t end_char; + hcl_ooch_t esc_char; + hcl_oow_t min_len; + hcl_oow_t max_len; + hcl_iotok_type_t tok_type; + hcl_synerrnum_t synerr_code; + int regex; + + /* state data */ + int escaped; + int digit_count; + hcl_ooci_t c_acc; }; -typedef enum hcl_feed_lx_state_t hcl_feed_lx_state_t; + +enum hcl_flx_state_t +{ + HCL_FLX_START, + HCL_FLX_DELIM_TOKEN, + HCL_FLX_COMMENT, + HCL_FLX_SHARP_TOKEN, + HCL_FLX_QUOTED_TOKEN +}; +typedef enum hcl_flx_state_t hcl_flx_state_t; struct hcl_compiler_t { @@ -590,10 +609,15 @@ struct hcl_compiler_t { struct { - hcl_feed_lx_state_t state; + hcl_flx_state_t state; hcl_ioloc_t loc; + + union + { + hcl_flx_dt_t dt; /* delimiter token */ + hcl_flx_qt_t qt; /* quoted token */ + } u; } lx; - hcl_feed_dt_t dt; /* delimiter token */ struct { diff --git a/lib/hcl.h b/lib/hcl.h index 722272c..2aca05a 100644 --- a/lib/hcl.h +++ b/lib/hcl.h @@ -105,9 +105,9 @@ enum hcl_synerrnum_t HCL_SYNERR_ILCHR, /* illegal character */ HCL_SYNERR_ILTOK, /* invalid token */ HCL_SYNERR_CMTNC, /* comment not closed */ - HCL_SYNERR_STRCHRNC, /* string/character not closed */ - HCL_SYNERR_HASHLIT, /* wrong hashed literal */ HCL_SYNERR_CHARLIT, /* wrong character literal */ + HCL_SYNERR_STRLIT, /* wrong string literal */ + HCL_SYNERR_HASHLIT, /* wrong hashed literal */ HCL_SYNERR_NUMLIT , /* invalid numeric literal */ HCL_SYNERR_NUMRANGE, /* number range error */ HCL_SYNERR_ERRLIT, /* wrong error literal */ diff --git a/lib/read.c b/lib/read.c index 8a1b4f4..4a9f286 100644 --- a/lib/read.c +++ b/lib/read.c @@ -391,6 +391,7 @@ static int copy_string_to (hcl_t* hcl, const hcl_oocs_t* src, hcl_oocs_t* dst, h #define CLEAR_TOKEN_NAME(hcl) ((hcl)->c->tok.name.len = 0) #define SET_TOKEN_TYPE(hcl,tv) ((hcl)->c->tok.type = (tv)) +#define SET_TOKEN_LOC(hcl,locv) ((hcl)->c->tok.loc = *(locv)) #define TOKEN_TYPE(hcl) ((hcl)->c->tok.type) #define TOKEN_NAME(hcl) (&(hcl)->c->tok.name) @@ -552,7 +553,7 @@ static int skip_comment (hcl_t* hcl) return 0; } -static int get_string (hcl_t* hcl, hcl_ooch_t end_char, hcl_ooch_t esc_char, int regex, hcl_oow_t preescaped) +static int get_string (hcl_t* hcl, hcl_ooch_t end_char, hcl_ooch_t esc_char, int regex, hcl_oow_t preescaped, hcl_synerrnum_t synerr_code) { hcl_ooci_t c; hcl_oow_t escaped = preescaped; @@ -567,7 +568,7 @@ static int get_string (hcl_t* hcl, hcl_ooch_t end_char, hcl_ooch_t esc_char, int if (c == HCL_OOCI_EOF) { - hcl_setsynerr (hcl, HCL_SYNERR_STRCHRNC, TOKEN_LOC(hcl) /*LEXER_LOC(hcl)*/, HCL_NULL); + hcl_setsynerr (hcl, synerr_code, TOKEN_LOC(hcl) /*LEXER_LOC(hcl)*/, HCL_NULL); return -1; } @@ -1036,7 +1037,7 @@ retry: /* clear the token name, reset its location */ SET_TOKEN_TYPE (hcl, HCL_IOTOK_EOF); /* is it correct? */ CLEAR_TOKEN_NAME (hcl); - hcl->c->tok.loc = hcl->c->lxc.l; /* set token location */ + SET_TOKEN_LOC (hcl, LEXER_LOC(hcl)); c = hcl->c->lxc.c; @@ -1187,14 +1188,13 @@ retry: ADD_TOKEN_CHAR (hcl, c); break; - case '\"': - if (get_string(hcl, '\"', '\\', 0, 0) <= -1) return -1; + if (get_string(hcl, '\"', '\\', 0, 0, HCL_SYNERR_STRLIT) <= -1) return -1; break; case '\'': - if (get_string(hcl, '\'', '\\', 0, 0) <= -1) return -1; - if (hcl->c->tok.name.len != 1) + if (get_string(hcl, '\'', '\\', 0, 0, HCL_SYNERR_CHARLIT) <= -1) return -1; + if (TOKEN_NAME_LEN(hcl) != 1) { hcl_setsynerr (hcl, HCL_SYNERR_CHARLIT, TOKEN_LOC(hcl), TOKEN_NAME(hcl)); return -1; @@ -2218,218 +2218,11 @@ hcl_cnodetoobj (hcl_t* hcl, hcl_cnode_t* x) } */ -/* ------------------------------------------------------------------------ */ - -/* TODO: rename compiler to something else that can include reader, printer, and compiler - * move compiler intialization/finalization here to more common place */ - -static void gc_compiler (hcl_t* hcl) -{ - hcl->c->r.s = hcl_moveoop(hcl, hcl->c->r.s); - hcl->c->r.e = hcl_moveoop(hcl, hcl->c->r.e); -} - -static void fini_compiler (hcl_t* hcl) -{ - /* called before the hcl object is closed */ - if (hcl->c) - { - if (hcl->c->cfs.ptr) - { - hcl_freemem (hcl, hcl->c->cfs.ptr); - hcl->c->cfs.ptr = HCL_NULL; - hcl->c->cfs.top = -1; - hcl->c->cfs.capa = 0; - } - - if (hcl->c->tv.s.ptr) - { - hcl_freemem (hcl, hcl->c->tv.s.ptr); - hcl->c->tv.s.ptr = HCL_NULL; - hcl->c->tv.s.len = 0; - hcl->c->tv.capa = 0; - hcl->c->tv.wcount = 0; - } - HCL_ASSERT (hcl, hcl->c->tv.capa == 0); - HCL_ASSERT (hcl, hcl->c->tv.wcount == 0); - - if (hcl->c->cblk.info) - { - hcl_freemem (hcl, hcl->c->cblk.info); - hcl->c->cblk.info = HCL_NULL; - hcl->c->cblk.info_capa = 0; - hcl->c->cblk.depth = -1; - } - - if (hcl->c->clsblk.info) - { - hcl_freemem (hcl, hcl->c->clsblk.info); - hcl->c->clsblk.info = HCL_NULL; - hcl->c->clsblk.info_capa = 0; - hcl->c->clsblk.depth = -1; - } - - if (hcl->c->fnblk.info) - { - hcl_freemem (hcl, hcl->c->fnblk.info); - hcl->c->fnblk.info = HCL_NULL; - hcl->c->fnblk.info_capa = 0; - hcl->c->fnblk.depth = -1; - } - - clear_io_names (hcl); - if (hcl->c->tok.name.ptr) hcl_freemem (hcl, hcl->c->tok.name.ptr); - - hcl_detachio (hcl); - - hcl_freemem (hcl, hcl->c); - hcl->c = HCL_NULL; - } -} - -int hcl_attachio (hcl_t* hcl, hcl_ioimpl_t reader, hcl_ioimpl_t printer) -{ - int n; - hcl_cb_t* cbp = HCL_NULL; - - if (!reader || !printer) - { - hcl_seterrbfmt (hcl, HCL_EINVAL, "reader and/or printer not supplied"); - return -1; - } - - if (!hcl->c) - { - hcl_cb_t cb; - - HCL_MEMSET (&cb, 0, HCL_SIZEOF(cb)); - cb.gc = gc_compiler; - cb.fini = fini_compiler; - cbp = hcl_regcb(hcl, &cb); - if (!cbp) return -1; - - hcl->c = (hcl_compiler_t*)hcl_callocmem(hcl, HCL_SIZEOF(*hcl->c)); - if (HCL_UNLIKELY(!hcl->c)) - { - hcl_deregcb (hcl, cbp); - return -1; - } - - hcl->c->ilchr_ucs.ptr = &hcl->c->ilchr; - hcl->c->ilchr_ucs.len = 1; - - hcl->c->r.s = hcl->_nil; - hcl->c->r.e = hcl->_nil; - - hcl->c->cfs.top = -1; - hcl->c->cblk.depth = -1; - hcl->c->clsblk.depth = -1; - hcl->c->fnblk.depth = -1; - } - else if (hcl->c->reader || hcl->c->printer) - { - hcl_seterrnum (hcl, HCL_EPERM); /* TODO: change this error code */ - return -1; - } - - /* Some IO names could have been stored in earlier calls to this function. - * I clear such names before i begin this function. i don't clear it - * at the end of this function because i may be referenced as an error - * location */ - clear_io_names (hcl); - - /* initialize some key fields */ - hcl->c->printer = printer; - hcl->c->reader = reader; - hcl->c->nungots = 0; - - /* The name field and the includer field are HCL_NULL - * for the main stream */ - HCL_MEMSET (&hcl->c->inarg, 0, HCL_SIZEOF(hcl->c->inarg)); - hcl->c->inarg.line = 1; - hcl->c->inarg.colm = 1; - - /* open the top-level stream */ - n = hcl->c->reader(hcl, HCL_IO_OPEN, &hcl->c->inarg); - if (n <= -1) goto oops; - - HCL_MEMSET (&hcl->c->outarg, 0, HCL_SIZEOF(hcl->c->outarg)); - n = hcl->c->printer(hcl, HCL_IO_OPEN, &hcl->c->outarg); - if (n <= -1) - { - hcl->c->reader (hcl, HCL_IO_CLOSE, &hcl->c->inarg); - goto oops; - } - - /* the stream is open. set it as the current input stream */ - hcl->c->curinp = &hcl->c->inarg; - return 0; - -oops: - if (cbp) - { - hcl_deregcb (hcl, cbp); - hcl_freemem (hcl, hcl->c); - hcl->c = HCL_NULL; - } - else - { - hcl->c->printer = HCL_NULL; - hcl->c->reader = HCL_NULL; - } - return -1; -} - -void hcl_flushio (hcl_t* hcl) -{ - if (hcl->c) - { - if (hcl->c->printer) hcl->c->printer (hcl, HCL_IO_FLUSH, &hcl->c->outarg); - } -} - -void hcl_detachio (hcl_t* hcl) -{ - /* an error occurred and control has reached here - * probably, some included files might not have been - * closed. close them */ - - if (hcl->c) - { - if (hcl->c->reader) - { - while (hcl->c->curinp != &hcl->c->inarg) - { - hcl_ioinarg_t* prev; - - /* nothing much to do about a close error */ - hcl->c->reader (hcl, HCL_IO_CLOSE, hcl->c->curinp); - - prev = hcl->c->curinp->includer; - HCL_ASSERT (hcl, hcl->c->curinp->name != HCL_NULL); - hcl_freemem (hcl, hcl->c->curinp); - hcl->c->curinp = prev; - } - - hcl->c->reader (hcl, HCL_IO_CLOSE, hcl->c->curinp); - hcl->c->reader = HCL_NULL; /* ready for another attachment */ - } - - if (hcl->c->printer) - { - hcl->c->printer (hcl, HCL_IO_CLOSE, &hcl->c->outarg); - hcl->c->printer = HCL_NULL; /* ready for another attachment */ - } - } -} - - - /* ---------------------------------------------------------------------- */ static void init_feed (hcl_t* hcl) { - hcl->c->feed.lx.state = HCL_FEED_LX_START; + hcl->c->feed.lx.state = HCL_FLX_START; hcl->c->feed.lx.loc.line = 1; hcl->c->feed.lx.loc.colm = 1; hcl->c->feed.lx.loc.file = HCL_NULL; @@ -2502,7 +2295,7 @@ static delim_token_t delim_token_tab[] = { ":::", 3, HCL_IOTOK_TRPCOLONS } }; -static int find_delim_token_char (hcl_t* hcl, const hcl_ooci_t c, int row_start, int row_end, int col, hcl_feed_dt_t* dt) +static int find_delim_token_char (hcl_t* hcl, const hcl_ooci_t c, int row_start, int row_end, int col, hcl_flx_dt_t* dt) { int found = 0, i; @@ -2528,10 +2321,10 @@ static HCL_INLINE int feed_wrap_up (hcl_t* hcl, hcl_iotok_type_t type) { SET_TOKEN_TYPE (hcl, type); -HCL_DEBUG4 (hcl, "TOKEN LEN %zu=>[%.*js] %d\n", TOKEN_NAME_LEN(hcl), TOKEN_NAME_LEN(hcl), TOKEN_NAME_PTR(hcl), TOKEN_TYPE(hcl)); +HCL_DEBUG6 (hcl, "TOKEN LEN %zu=>[%.*js] %d LOC=%d,%d\n", TOKEN_NAME_LEN(hcl), TOKEN_NAME_LEN(hcl), TOKEN_NAME_PTR(hcl), TOKEN_TYPE(hcl), (int)TOKEN_LOC(hcl)->line, (int)TOKEN_LOC(hcl)->colm); /* TOOD: fire token callback or something */ - hcl->c->feed.lx.state = HCL_FEED_LX_START; + hcl->c->feed.lx.state = HCL_FLX_START; return 0; } @@ -2547,49 +2340,56 @@ static int feed_wrap_up_with_str (hcl_t* hcl, const hcl_ooch_t* str, hcl_oow_t l return feed_wrap_up(hcl, type); } -static int feed_continue (hcl_t* hcl, hcl_feed_lx_state_t state) +static int feed_continue (hcl_t* hcl, hcl_flx_state_t state) { hcl->c->feed.lx.state = state; return 0; } -static int feed_continue_with_char (hcl_t* hcl, hcl_ooci_t c, hcl_feed_lx_state_t state) +static int feed_continue_with_char (hcl_t* hcl, hcl_ooci_t c, hcl_flx_state_t state) { ADD_TOKEN_CHAR (hcl, c); hcl->c->feed.lx.state = state; return 0; } - - #define FEED_WRAP_UP(hcl, type) do { if (feed_wrap_up(hcl, type) <= -1) return -1; } while(0) #define FEED_WRAP_UP_WITH_CHAR(hcl, c, type) do { if (feed_wrap_up_with_char(hcl, c, type) <= -1) return -1; } while(0) #define FEED_WRAP_UP_WITH_CHARS(hcl, str, len, type) do { if (feed_wrap_up_with_str(hcl, str, len, type) <= -1) return -1; } while(0) #define FEED_CONTINUE(hcl, state) do { if (feed_continue(hcl, state) <= -1) return -1; } while(0) #define FEED_CONTINUE_WITH_CHAR(hcl, c, state) do { if (feed_continue_with_char(hcl, c, state) <= -1) return -1; } while(0) -#define FEED_LX_STATE(hcl) ((hcl)->c->feed.lx.state) -#define FEED_LX_LOC(hcl) (&((hcl)->c->feed.lx.loc)) +#define FLX_STATE(hcl) ((hcl)->c->feed.lx.state) +#define FLX_LOC(hcl) (&((hcl)->c->feed.lx.loc)) -static int feed_lx_start (hcl_t* hcl, hcl_ooci_t c) +/* short-cuts to lexer state data */ +#define FLX_DT(hcl) (&((hcl)->c->feed.lx.u.dt)) +#define FLX_QT(hcl) (&((hcl)->c->feed.lx.u.qt)) + +static int flx_start (hcl_t* hcl, hcl_ooci_t c) { - HCL_ASSERT (hcl, FEED_LX_STATE(hcl) == HCL_FEED_LX_START); + HCL_ASSERT (hcl, FLX_STATE(hcl) == HCL_FLX_START); + + if (is_spacechar(c)) goto consumed; /* skip spaces */ /* clear the token name, reset its location */ SET_TOKEN_TYPE (hcl, HCL_IOTOK_EOF); /* is it correct? */ CLEAR_TOKEN_NAME (hcl); + SET_TOKEN_LOC (hcl, &hcl->c->feed.lx.loc); //HCL_DEBUG1 (hcl, "XXX[%jc]\n", c); - if (find_delim_token_char(hcl, c, 0, HCL_COUNTOF(delim_token_tab) - 1, 0, &hcl->c->feed.dt)) + if (find_delim_token_char(hcl, c, 0, HCL_COUNTOF(delim_token_tab) - 1, 0, FLX_DT(hcl))) { - /* the character is one of the first character of a delimiter token */ - if (hcl->c->feed.dt.row_start == hcl->c->feed.dt.row_end && hcl->c->feed.dt.col_next == delim_token_tab[hcl->c->feed.dt.row_start].t_len) + /* the character is one of the first character of a delimiter token such as (, [, :, etc */ + if (FLX_DT(hcl)->row_start == FLX_DT(hcl)->row_end && + FLX_DT(hcl)->col_next == delim_token_tab[FLX_DT(hcl)->row_start].t_len) { - FEED_WRAP_UP_WITH_CHAR (hcl, c, delim_token_tab[hcl->c->feed.dt.row_start].t_type); + /* single character delimiter token */ + FEED_WRAP_UP_WITH_CHAR (hcl, c, delim_token_tab[FLX_DT(hcl)->row_start].t_type); } else { - FEED_CONTINUE_WITH_CHAR (hcl, c, HCL_FEED_LX_DELIM_TOKEN); /* consume c and move to HCL_FEED_LX_DELIM_TOKEN state */ + FEED_CONTINUE_WITH_CHAR (hcl, c, HCL_FLX_DELIM_TOKEN); /* consume c and move to HCL_FLX_DELIM_TOKEN state */ } goto consumed; } @@ -2609,28 +2409,36 @@ static int feed_lx_start (hcl_t* hcl, hcl_ooci_t c) } case ';': - FEED_CONTINUE_WITH_CHAR (hcl, c, HCL_FEED_LX_COMMENT); + FEED_CONTINUE_WITH_CHAR (hcl, c, HCL_FLX_COMMENT); break; case '#': - FEED_CONTINUE_WITH_CHAR (hcl, c, HCL_FEED_LX_SHARP_TOKEN); + FEED_CONTINUE_WITH_CHAR (hcl, c, HCL_FLX_SHARP_TOKEN); break; -#if 0 case '\"': - if (get_string(hcl, '\"', '\\', 0, 0) <= -1) return -1; + HCL_MEMSET (FLX_QT(hcl), 0, HCL_SIZEOF(*FLX_QT(hcl))); + FLX_QT(hcl)->end_char = c; + FLX_QT(hcl)->esc_char = '\\'; + FLX_QT(hcl)->min_len = 0; + FLX_QT(hcl)->max_len = HCL_TYPE_MAX(hcl_oow_t); + FLX_QT(hcl)->tok_type = HCL_IOTOK_STRLIT; + FLX_QT(hcl)->synerr_code = HCL_SYNERR_STRLIT; + FEED_CONTINUE (hcl, HCL_FLX_QUOTED_TOKEN); /* discard the quote itself. move on the the QUOTED_TOKEN state */ break; case '\'': - if (get_string(hcl, '\'', '\\', 0, 0) <= -1) return -1; - if (hcl->c->tok.name.len != 1) - { - hcl_setsynerr (hcl, HCL_SYNERR_CHARLIT, TOKEN_LOC(hcl), TOKEN_NAME(hcl)); - return -1; - } - SET_TOKEN_TYPE (hcl, HCL_IOTOK_CHARLIT); + HCL_MEMSET (FLX_QT(hcl), 0, HCL_SIZEOF(*FLX_QT(hcl))); + FLX_QT(hcl)->end_char = c; + FLX_QT(hcl)->esc_char = '\\'; + FLX_QT(hcl)->min_len = 1; + FLX_QT(hcl)->max_len = 1; + FLX_QT(hcl)->tok_type = HCL_IOTOK_CHARLIT; + FLX_QT(hcl)->synerr_code = HCL_SYNERR_CHARLIT; + FEED_CONTINUE (hcl, HCL_FLX_QUOTED_TOKEN); /* discard the quote itself. move on the the QUOTED_TOKEN state */ break; +#if 0 case '#': if (get_sharp_token(hcl) <= -1) return -1; break; @@ -2804,14 +2612,15 @@ not_consumed: return 0; } -static int feed_lx_delim_token (hcl_t* hcl, hcl_ooci_t c) +static int flx_delim_token (hcl_t* hcl, hcl_ooci_t c) { - if (find_delim_token_char(hcl, c, hcl->c->feed.dt.row_start, hcl->c->feed.dt.row_end, hcl->c->feed.dt.col_next, &hcl->c->feed.dt)) + if (find_delim_token_char(hcl, c, FLX_DT(hcl)->row_start, FLX_DT(hcl)->row_end, FLX_DT(hcl)->col_next, FLX_DT(hcl))) { - if (hcl->c->feed.dt.row_start == hcl->c->feed.dt.row_end && hcl->c->feed.dt.col_next == delim_token_tab[hcl->c->feed.dt.row_start].t_len) + if (FLX_DT(hcl)->row_start == FLX_DT(hcl)->row_end && + FLX_DT(hcl)->col_next == delim_token_tab[FLX_DT(hcl)->row_start].t_len) { - /* complete token and switch to the HCL_FEED_LX_START state */ - FEED_WRAP_UP_WITH_CHAR (hcl, c, delim_token_tab[hcl->c->feed.dt.row_start].t_type); + /* complete token and switch to the HCL_FLX_START state */ + FEED_WRAP_UP_WITH_CHAR (hcl, c, delim_token_tab[FLX_DT(hcl)->row_start].t_type); } else { @@ -2822,7 +2631,7 @@ static int feed_lx_delim_token (hcl_t* hcl, hcl_ooci_t c) else { /* the longest match so far */ - FEED_WRAP_UP(hcl, delim_token_tab[hcl->c->feed.dt.row_start].t_type); + FEED_WRAP_UP(hcl, delim_token_tab[FLX_DT(hcl)->row_start].t_type); goto not_consumed; } @@ -2833,13 +2642,13 @@ not_consumed: return 0; } -static int feed_lx_comment (hcl_t* hcl, hcl_ooci_t c) +static int flx_comment (hcl_t* hcl, hcl_ooci_t c) { - if (is_linebreak(c)) FEED_CONTINUE (hcl, HCL_FEED_LX_START); + if (is_linebreak(c)) FEED_CONTINUE (hcl, HCL_FLX_START); return 1; /* consumed */ } -static int feed_lx_sharp_token (hcl_t* hcl, hcl_ooci_t c) +static int flx_sharp_token (hcl_t* hcl, hcl_ooci_t c) { /* * #xXXXX hexadecimal @@ -2873,7 +2682,7 @@ static int feed_lx_sharp_token (hcl_t* hcl, hcl_ooci_t c) /* ## comment start * #! also comment start. * ; comment start */ - FEED_CONTINUE_WITH_CHAR (hcl, c, HCL_FEED_LX_COMMENT); + FEED_CONTINUE_WITH_CHAR (hcl, c, HCL_FLX_COMMENT); goto consumed; case '[': @@ -2887,10 +2696,10 @@ static int feed_lx_sharp_token (hcl_t* hcl, hcl_ooci_t c) default: // TODO: fix this part if (is_spacechar(c) || c == HCL_UCI_EOF) - hcl_setsynerrbfmt (hcl, HCL_SYNERR_HASHLIT, FEED_LX_LOC(hcl), HCL_NULL, + hcl_setsynerrbfmt (hcl, HCL_SYNERR_HASHLIT, FLX_LOC(hcl), HCL_NULL, "no character after the hash sign"); else - hcl_setsynerrbfmt (hcl, HCL_SYNERR_HASHLIT, FEED_LX_LOC(hcl), HCL_NULL, + hcl_setsynerrbfmt (hcl, HCL_SYNERR_HASHLIT, FLX_LOC(hcl), HCL_NULL, "invalid character after the hash sign - %jc", c); return -1; } @@ -2902,30 +2711,186 @@ not_consumed: return 0; } +static int flx_quoted_token (hcl_t* hcl, hcl_ooci_t c) /* double-quoted string */ +{ + hcl_flx_qt_t* qt = FLX_QT(hcl); + + if (c == HCL_OOCI_EOF) + { + hcl_setsynerr (hcl, qt->synerr_code, TOKEN_LOC(hcl) /*hcl->c->feed.lx.loc?*/, HCL_NULL); + return -1; + } + + if (qt->escaped == 3) + { + if (c >= '0' && c <= '7') + { + /* more octal digits */ + qt->c_acc = qt->c_acc * 8 + c - '0'; + qt->digit_count++; + if (qt->digit_count >= qt->escaped) + { + /* should i limit the max to 0xFF/0377? + * if (qt->c_acc > 0377) qt->c_acc = 0377;*/ + ADD_TOKEN_CHAR (hcl, qt->c_acc); + qt->escaped = 0; + } + goto consumed; + } + else + { + ADD_TOKEN_CHAR (hcl, qt->c_acc); + qt->escaped = 0; + } + } + else if (qt->escaped == 2 || qt->escaped == 4 || qt->escaped == 8) + { + if (c >= '0' && c <= '9') + { + qt->c_acc = qt->c_acc * 16 + c - '0'; + qt->digit_count++; + if (qt->digit_count >= qt->escaped) + { + ADD_TOKEN_CHAR (hcl, qt->c_acc); + qt->escaped = 0; + } + goto consumed; + } + else if (c >= 'A' && c <= 'F') + { + qt->c_acc = qt->c_acc * 16 + c - 'A' + 10; + qt->digit_count++; + if (qt->digit_count >= qt->escaped) + { + ADD_TOKEN_CHAR (hcl, qt->c_acc); + qt->escaped = 0; + } + goto consumed; + } + else if (c >= 'a' && c <= 'f') + { + qt->c_acc = qt->c_acc * 16 + c - 'a' + 10; + qt->digit_count++; + if (qt->digit_count >= qt->escaped) + { + ADD_TOKEN_CHAR (hcl, qt->c_acc); + qt->escaped = 0; + } + goto consumed; + } + else + { + hcl_ooch_t rc; + rc = (qt->escaped == 2)? 'x': + (qt->escaped == 4)? 'u': 'U'; + if (qt->digit_count == 0) + ADD_TOKEN_CHAR (hcl, rc); + else ADD_TOKEN_CHAR (hcl, qt->c_acc); + + qt->escaped = 0; + } + } + + if (qt->escaped == 0 && c == qt->end_char) + { + /* terminating quote */ + FEED_WRAP_UP (hcl, qt->tok_type); /* HCL_IOTOK_STRLIT or HCL_IOTOK_CHARLIT */ + if (TOKEN_NAME_LEN(hcl) < qt->min_len) + { + hcl_setsynerr (hcl, qt->synerr_code, TOKEN_LOC(hcl), HCL_NULL); + return -1; + } + goto consumed; + } + + if (qt->escaped == 0 && c == qt->esc_char) + { + qt->escaped = 1; + goto consumed; + } + + if (qt->escaped == 1) + { + if (c == 'a') c = '\a'; + else if (c == 'b') c = '\b'; + else if (c == 'f') c = '\f'; + else if (c == 'n') c = '\n'; + else if (c == 'r') c = '\r'; + else if (c == 't') c = '\t'; + else if (c == 'v') c = '\v'; + else if (c >= '0' && c <= '7' && !qt->regex) + { + /* i don't support the octal notation for a regular expression. + * it conflicts with the backreference notation between \1 and \7 inclusive. */ + qt->escaped = 3; + qt->digit_count = 1; + qt->c_acc = c - '0'; + goto consumed; + } + else if (c == 'x') + { + qt->escaped = 2; + qt->digit_count = 0; + qt->c_acc = 0; + goto consumed; + } + #if (HCL_SIZEOF_OOCH_T >= 2) + else if (c == 'u') + { + qt->escaped = 4; + qt->digit_count = 0; + qt->c_acc = 0; + goto consumed; + } + #endif + #if (HCL_SIZEOF_OOCH_T >= 4) + else if (c == 'U') + { + qt->escaped = 8; + qt->digit_count = 0; + qt->c_acc = 0; + goto consumed; + } + #endif + else if (qt->regex) + { + /* if the following character doesn't compose a proper + * escape sequence, keep the escape character. + * an unhandled escape sequence can be handled + * outside this function since the escape character + * is preserved.*/ + ADD_TOKEN_CHAR (hcl, qt->esc_char); + } + + qt->escaped = 0; + } + + ADD_TOKEN_CHAR (hcl, c); + +consumed: + if (TOKEN_NAME_LEN(hcl) > qt->max_len) + { + hcl_setsynerr (hcl, qt->synerr_code, TOKEN_LOC(hcl), HCL_NULL); + return -1; + } + return 1; +} + static int feed_char (hcl_t* hcl, hcl_ooci_t c) { -/* TODO: track line number and column number? */ - switch (FEED_LX_STATE(hcl)) + switch (FLX_STATE(hcl)) { - case HCL_FEED_LX_START: return feed_lx_start(hcl, c); - case HCL_FEED_LX_DELIM_TOKEN: return feed_lx_delim_token(hcl, c); - case HCL_FEED_LX_COMMENT: return feed_lx_comment(hcl, c); - case HCL_FEED_LX_SHARP_TOKEN: return feed_lx_sharp_token(hcl, c); + case HCL_FLX_START: return flx_start(hcl, c); + case HCL_FLX_DELIM_TOKEN: return flx_delim_token(hcl, c); + case HCL_FLX_COMMENT: return flx_comment(hcl, c); + case HCL_FLX_SHARP_TOKEN: return flx_sharp_token(hcl, c); + case HCL_FLX_QUOTED_TOKEN: return flx_quoted_token(hcl, c); /* - case HCL_FEED_LX_DQSTR: - return feed_lx_dqstr(hcl, c); + case HCL_FLX_SQSTR: + return flx_sqstr(hcl, c); - case HCL_FEED_LX_SQSTR: - return feed_lxsqstr(hcl, c); - - case HCL_FEED_LX_COMMENT: - break; - - case HCL_FEED_LX_CSTR: - break; - - case HCL_FEED_LX_DIRECTIVE: + case HCL_FLX_DIRECTIVE: break; */ @@ -3003,3 +2968,211 @@ default callback for on_eof? */ + + +/* ------------------------------------------------------------------------ */ + +/* TODO: rename compiler to something else that can include reader, printer, and compiler + * move compiler intialization/finalization here to more common place */ + +static void gc_compiler (hcl_t* hcl) +{ + hcl->c->r.s = hcl_moveoop(hcl, hcl->c->r.s); + hcl->c->r.e = hcl_moveoop(hcl, hcl->c->r.e); +} + +static void fini_compiler (hcl_t* hcl) +{ + /* called before the hcl object is closed */ + if (hcl->c) + { + if (hcl->c->cfs.ptr) + { + hcl_freemem (hcl, hcl->c->cfs.ptr); + hcl->c->cfs.ptr = HCL_NULL; + hcl->c->cfs.top = -1; + hcl->c->cfs.capa = 0; + } + + if (hcl->c->tv.s.ptr) + { + hcl_freemem (hcl, hcl->c->tv.s.ptr); + hcl->c->tv.s.ptr = HCL_NULL; + hcl->c->tv.s.len = 0; + hcl->c->tv.capa = 0; + hcl->c->tv.wcount = 0; + } + HCL_ASSERT (hcl, hcl->c->tv.capa == 0); + HCL_ASSERT (hcl, hcl->c->tv.wcount == 0); + + if (hcl->c->cblk.info) + { + hcl_freemem (hcl, hcl->c->cblk.info); + hcl->c->cblk.info = HCL_NULL; + hcl->c->cblk.info_capa = 0; + hcl->c->cblk.depth = -1; + } + + if (hcl->c->clsblk.info) + { + hcl_freemem (hcl, hcl->c->clsblk.info); + hcl->c->clsblk.info = HCL_NULL; + hcl->c->clsblk.info_capa = 0; + hcl->c->clsblk.depth = -1; + } + + if (hcl->c->fnblk.info) + { + hcl_freemem (hcl, hcl->c->fnblk.info); + hcl->c->fnblk.info = HCL_NULL; + hcl->c->fnblk.info_capa = 0; + hcl->c->fnblk.depth = -1; + } + + clear_io_names (hcl); + if (hcl->c->tok.name.ptr) hcl_freemem (hcl, hcl->c->tok.name.ptr); + + hcl_detachio (hcl); + + hcl_freemem (hcl, hcl->c); + hcl->c = HCL_NULL; + } +} + +int hcl_attachio (hcl_t* hcl, hcl_ioimpl_t reader, hcl_ioimpl_t printer) +{ + int n; + hcl_cb_t* cbp = HCL_NULL; + + if (!reader || !printer) + { + hcl_seterrbfmt (hcl, HCL_EINVAL, "reader and/or printer not supplied"); + return -1; + } + + if (!hcl->c) + { + hcl_cb_t cb; + + HCL_MEMSET (&cb, 0, HCL_SIZEOF(cb)); + cb.gc = gc_compiler; + cb.fini = fini_compiler; + cbp = hcl_regcb(hcl, &cb); + if (!cbp) return -1; + + hcl->c = (hcl_compiler_t*)hcl_callocmem(hcl, HCL_SIZEOF(*hcl->c)); + if (HCL_UNLIKELY(!hcl->c)) + { + hcl_deregcb (hcl, cbp); + return -1; + } + + hcl->c->ilchr_ucs.ptr = &hcl->c->ilchr; + hcl->c->ilchr_ucs.len = 1; + + hcl->c->r.s = hcl->_nil; + hcl->c->r.e = hcl->_nil; + + hcl->c->cfs.top = -1; + hcl->c->cblk.depth = -1; + hcl->c->clsblk.depth = -1; + hcl->c->fnblk.depth = -1; + + init_feed (hcl); + } + else if (hcl->c->reader || hcl->c->printer) + { + hcl_seterrnum (hcl, HCL_EPERM); /* TODO: change this error code */ + return -1; + } + + /* Some IO names could have been stored in earlier calls to this function. + * I clear such names before i begin this function. i don't clear it + * at the end of this function because i may be referenced as an error + * location */ + clear_io_names (hcl); + + /* initialize some key fields */ + hcl->c->printer = printer; + hcl->c->reader = reader; + hcl->c->nungots = 0; + + /* The name field and the includer field are HCL_NULL + * for the main stream */ + HCL_MEMSET (&hcl->c->inarg, 0, HCL_SIZEOF(hcl->c->inarg)); + hcl->c->inarg.line = 1; + hcl->c->inarg.colm = 1; + + /* open the top-level stream */ + n = hcl->c->reader(hcl, HCL_IO_OPEN, &hcl->c->inarg); + if (n <= -1) goto oops; + + HCL_MEMSET (&hcl->c->outarg, 0, HCL_SIZEOF(hcl->c->outarg)); + n = hcl->c->printer(hcl, HCL_IO_OPEN, &hcl->c->outarg); + if (n <= -1) + { + hcl->c->reader (hcl, HCL_IO_CLOSE, &hcl->c->inarg); + goto oops; + } + + /* the stream is open. set it as the current input stream */ + hcl->c->curinp = &hcl->c->inarg; + return 0; + +oops: + if (cbp) + { + hcl_deregcb (hcl, cbp); + hcl_freemem (hcl, hcl->c); + hcl->c = HCL_NULL; + } + else + { + hcl->c->printer = HCL_NULL; + hcl->c->reader = HCL_NULL; + } + return -1; +} + +void hcl_flushio (hcl_t* hcl) +{ + if (hcl->c) + { + if (hcl->c->printer) hcl->c->printer (hcl, HCL_IO_FLUSH, &hcl->c->outarg); + } +} + +void hcl_detachio (hcl_t* hcl) +{ + /* an error occurred and control has reached here + * probably, some included files might not have been + * closed. close them */ + + if (hcl->c) + { + if (hcl->c->reader) + { + while (hcl->c->curinp != &hcl->c->inarg) + { + hcl_ioinarg_t* prev; + + /* nothing much to do about a close error */ + hcl->c->reader (hcl, HCL_IO_CLOSE, hcl->c->curinp); + + prev = hcl->c->curinp->includer; + HCL_ASSERT (hcl, hcl->c->curinp->name != HCL_NULL); + hcl_freemem (hcl, hcl->c->curinp); + hcl->c->curinp = prev; + } + + hcl->c->reader (hcl, HCL_IO_CLOSE, hcl->c->curinp); + hcl->c->reader = HCL_NULL; /* ready for another attachment */ + } + + if (hcl->c->printer) + { + hcl->c->printer (hcl, HCL_IO_CLOSE, &hcl->c->outarg); + hcl->c->printer = HCL_NULL; /* ready for another attachment */ + } + } +}