some work done for the feed based reader

This commit is contained in:
hyung-hwan 2022-07-22 08:02:14 +00:00
parent 7f268293f0
commit 65aacaaf4f
5 changed files with 488 additions and 288 deletions

View File

@ -821,11 +821,14 @@ static int feed_loop (hcl_t* hcl, xtn_t* xtn, int cflags, int verbose)
return -1; return -1;
} }
/*(setvbuf (fp, NULL, _IONBF, 0);*/
while (1) while (1)
{ {
hcl_oow_t n; hcl_oow_t n;
n = fread(&buf[len], 1, HCL_COUNTOF(buf) - len, fp); /*n = fread(&buf[len], 1, HCL_COUNTOF(buf) - len, fp);*/
n = read(fileno(fp), &buf[len], HCL_COUNTOF(buf) - len);
if (n > 0) if (n > 0)
{ {
int x; int x;
@ -861,7 +864,7 @@ static int feed_loop (hcl_t* hcl, xtn_t* xtn, int cflags, int verbose)
} }
} }
if (feof(fp)) if (n == 0 || feof(fp))
{ {
if (len > 0) if (len > 0)
{ {

View File

@ -101,9 +101,9 @@ static char* synerrstr[] =
"illegal character", "illegal character",
"illegal token", "illegal token",
"comment not closed", "comment not closed",
"string/character not closed",
"invalid hashed literal",
"wrong character literal", "wrong character literal",
"wrong string literal",
"invalid hashed literal",
"invalid numeric literal", "invalid numeric literal",
"out of integer range", "out of integer range",
"wrong error literal", "wrong error literal",

View File

@ -524,22 +524,41 @@ struct hcl_rstl_t
hcl_rstl_t* prev; hcl_rstl_t* prev;
}; };
typedef struct hcl_feed_dt_t hcl_feed_dt_t; typedef struct hcl_flx_dt_t hcl_flx_dt_t; /* delemiter token */
struct hcl_feed_dt_t struct hcl_flx_dt_t
{ {
int row_start; int row_start;
int row_end; int row_end;
int col_next; int col_next;
}; };
enum hcl_feed_lx_state_t typedef struct hcl_flx_qt_t hcl_flx_qt_t; /* quoted token */
struct hcl_flx_qt_t
{ {
HCL_FEED_LX_START, /* input data */
HCL_FEED_LX_DELIM_TOKEN, hcl_ooch_t end_char;
HCL_FEED_LX_COMMENT, hcl_ooch_t esc_char;
HCL_FEED_LX_SHARP_TOKEN hcl_oow_t min_len;
hcl_oow_t max_len;
hcl_iotok_type_t tok_type;
hcl_synerrnum_t synerr_code;
int regex;
/* state data */
int escaped;
int digit_count;
hcl_ooci_t c_acc;
}; };
typedef enum hcl_feed_lx_state_t hcl_feed_lx_state_t;
enum hcl_flx_state_t
{
HCL_FLX_START,
HCL_FLX_DELIM_TOKEN,
HCL_FLX_COMMENT,
HCL_FLX_SHARP_TOKEN,
HCL_FLX_QUOTED_TOKEN
};
typedef enum hcl_flx_state_t hcl_flx_state_t;
struct hcl_compiler_t struct hcl_compiler_t
{ {
@ -590,10 +609,15 @@ struct hcl_compiler_t
{ {
struct struct
{ {
hcl_feed_lx_state_t state; hcl_flx_state_t state;
hcl_ioloc_t loc; hcl_ioloc_t loc;
union
{
hcl_flx_dt_t dt; /* delimiter token */
hcl_flx_qt_t qt; /* quoted token */
} u;
} lx; } lx;
hcl_feed_dt_t dt; /* delimiter token */
struct struct
{ {

View File

@ -105,9 +105,9 @@ enum hcl_synerrnum_t
HCL_SYNERR_ILCHR, /* illegal character */ HCL_SYNERR_ILCHR, /* illegal character */
HCL_SYNERR_ILTOK, /* invalid token */ HCL_SYNERR_ILTOK, /* invalid token */
HCL_SYNERR_CMTNC, /* comment not closed */ HCL_SYNERR_CMTNC, /* comment not closed */
HCL_SYNERR_STRCHRNC, /* string/character not closed */
HCL_SYNERR_HASHLIT, /* wrong hashed literal */
HCL_SYNERR_CHARLIT, /* wrong character literal */ HCL_SYNERR_CHARLIT, /* wrong character literal */
HCL_SYNERR_STRLIT, /* wrong string literal */
HCL_SYNERR_HASHLIT, /* wrong hashed literal */
HCL_SYNERR_NUMLIT , /* invalid numeric literal */ HCL_SYNERR_NUMLIT , /* invalid numeric literal */
HCL_SYNERR_NUMRANGE, /* number range error */ HCL_SYNERR_NUMRANGE, /* number range error */
HCL_SYNERR_ERRLIT, /* wrong error literal */ HCL_SYNERR_ERRLIT, /* wrong error literal */

View File

@ -391,6 +391,7 @@ static int copy_string_to (hcl_t* hcl, const hcl_oocs_t* src, hcl_oocs_t* dst, h
#define CLEAR_TOKEN_NAME(hcl) ((hcl)->c->tok.name.len = 0) #define CLEAR_TOKEN_NAME(hcl) ((hcl)->c->tok.name.len = 0)
#define SET_TOKEN_TYPE(hcl,tv) ((hcl)->c->tok.type = (tv)) #define SET_TOKEN_TYPE(hcl,tv) ((hcl)->c->tok.type = (tv))
#define SET_TOKEN_LOC(hcl,locv) ((hcl)->c->tok.loc = *(locv))
#define TOKEN_TYPE(hcl) ((hcl)->c->tok.type) #define TOKEN_TYPE(hcl) ((hcl)->c->tok.type)
#define TOKEN_NAME(hcl) (&(hcl)->c->tok.name) #define TOKEN_NAME(hcl) (&(hcl)->c->tok.name)
@ -552,7 +553,7 @@ static int skip_comment (hcl_t* hcl)
return 0; return 0;
} }
static int get_string (hcl_t* hcl, hcl_ooch_t end_char, hcl_ooch_t esc_char, int regex, hcl_oow_t preescaped) static int get_string (hcl_t* hcl, hcl_ooch_t end_char, hcl_ooch_t esc_char, int regex, hcl_oow_t preescaped, hcl_synerrnum_t synerr_code)
{ {
hcl_ooci_t c; hcl_ooci_t c;
hcl_oow_t escaped = preescaped; hcl_oow_t escaped = preescaped;
@ -567,7 +568,7 @@ static int get_string (hcl_t* hcl, hcl_ooch_t end_char, hcl_ooch_t esc_char, int
if (c == HCL_OOCI_EOF) if (c == HCL_OOCI_EOF)
{ {
hcl_setsynerr (hcl, HCL_SYNERR_STRCHRNC, TOKEN_LOC(hcl) /*LEXER_LOC(hcl)*/, HCL_NULL); hcl_setsynerr (hcl, synerr_code, TOKEN_LOC(hcl) /*LEXER_LOC(hcl)*/, HCL_NULL);
return -1; return -1;
} }
@ -1036,7 +1037,7 @@ retry:
/* clear the token name, reset its location */ /* clear the token name, reset its location */
SET_TOKEN_TYPE (hcl, HCL_IOTOK_EOF); /* is it correct? */ SET_TOKEN_TYPE (hcl, HCL_IOTOK_EOF); /* is it correct? */
CLEAR_TOKEN_NAME (hcl); CLEAR_TOKEN_NAME (hcl);
hcl->c->tok.loc = hcl->c->lxc.l; /* set token location */ SET_TOKEN_LOC (hcl, LEXER_LOC(hcl));
c = hcl->c->lxc.c; c = hcl->c->lxc.c;
@ -1187,14 +1188,13 @@ retry:
ADD_TOKEN_CHAR (hcl, c); ADD_TOKEN_CHAR (hcl, c);
break; break;
case '\"': case '\"':
if (get_string(hcl, '\"', '\\', 0, 0) <= -1) return -1; if (get_string(hcl, '\"', '\\', 0, 0, HCL_SYNERR_STRLIT) <= -1) return -1;
break; break;
case '\'': case '\'':
if (get_string(hcl, '\'', '\\', 0, 0) <= -1) return -1; if (get_string(hcl, '\'', '\\', 0, 0, HCL_SYNERR_CHARLIT) <= -1) return -1;
if (hcl->c->tok.name.len != 1) if (TOKEN_NAME_LEN(hcl) != 1)
{ {
hcl_setsynerr (hcl, HCL_SYNERR_CHARLIT, TOKEN_LOC(hcl), TOKEN_NAME(hcl)); hcl_setsynerr (hcl, HCL_SYNERR_CHARLIT, TOKEN_LOC(hcl), TOKEN_NAME(hcl));
return -1; return -1;
@ -2218,218 +2218,11 @@ hcl_cnodetoobj (hcl_t* hcl, hcl_cnode_t* x)
} }
*/ */
/* ------------------------------------------------------------------------ */
/* TODO: rename compiler to something else that can include reader, printer, and compiler
* move compiler intialization/finalization here to more common place */
static void gc_compiler (hcl_t* hcl)
{
hcl->c->r.s = hcl_moveoop(hcl, hcl->c->r.s);
hcl->c->r.e = hcl_moveoop(hcl, hcl->c->r.e);
}
static void fini_compiler (hcl_t* hcl)
{
/* called before the hcl object is closed */
if (hcl->c)
{
if (hcl->c->cfs.ptr)
{
hcl_freemem (hcl, hcl->c->cfs.ptr);
hcl->c->cfs.ptr = HCL_NULL;
hcl->c->cfs.top = -1;
hcl->c->cfs.capa = 0;
}
if (hcl->c->tv.s.ptr)
{
hcl_freemem (hcl, hcl->c->tv.s.ptr);
hcl->c->tv.s.ptr = HCL_NULL;
hcl->c->tv.s.len = 0;
hcl->c->tv.capa = 0;
hcl->c->tv.wcount = 0;
}
HCL_ASSERT (hcl, hcl->c->tv.capa == 0);
HCL_ASSERT (hcl, hcl->c->tv.wcount == 0);
if (hcl->c->cblk.info)
{
hcl_freemem (hcl, hcl->c->cblk.info);
hcl->c->cblk.info = HCL_NULL;
hcl->c->cblk.info_capa = 0;
hcl->c->cblk.depth = -1;
}
if (hcl->c->clsblk.info)
{
hcl_freemem (hcl, hcl->c->clsblk.info);
hcl->c->clsblk.info = HCL_NULL;
hcl->c->clsblk.info_capa = 0;
hcl->c->clsblk.depth = -1;
}
if (hcl->c->fnblk.info)
{
hcl_freemem (hcl, hcl->c->fnblk.info);
hcl->c->fnblk.info = HCL_NULL;
hcl->c->fnblk.info_capa = 0;
hcl->c->fnblk.depth = -1;
}
clear_io_names (hcl);
if (hcl->c->tok.name.ptr) hcl_freemem (hcl, hcl->c->tok.name.ptr);
hcl_detachio (hcl);
hcl_freemem (hcl, hcl->c);
hcl->c = HCL_NULL;
}
}
int hcl_attachio (hcl_t* hcl, hcl_ioimpl_t reader, hcl_ioimpl_t printer)
{
int n;
hcl_cb_t* cbp = HCL_NULL;
if (!reader || !printer)
{
hcl_seterrbfmt (hcl, HCL_EINVAL, "reader and/or printer not supplied");
return -1;
}
if (!hcl->c)
{
hcl_cb_t cb;
HCL_MEMSET (&cb, 0, HCL_SIZEOF(cb));
cb.gc = gc_compiler;
cb.fini = fini_compiler;
cbp = hcl_regcb(hcl, &cb);
if (!cbp) return -1;
hcl->c = (hcl_compiler_t*)hcl_callocmem(hcl, HCL_SIZEOF(*hcl->c));
if (HCL_UNLIKELY(!hcl->c))
{
hcl_deregcb (hcl, cbp);
return -1;
}
hcl->c->ilchr_ucs.ptr = &hcl->c->ilchr;
hcl->c->ilchr_ucs.len = 1;
hcl->c->r.s = hcl->_nil;
hcl->c->r.e = hcl->_nil;
hcl->c->cfs.top = -1;
hcl->c->cblk.depth = -1;
hcl->c->clsblk.depth = -1;
hcl->c->fnblk.depth = -1;
}
else if (hcl->c->reader || hcl->c->printer)
{
hcl_seterrnum (hcl, HCL_EPERM); /* TODO: change this error code */
return -1;
}
/* Some IO names could have been stored in earlier calls to this function.
* I clear such names before i begin this function. i don't clear it
* at the end of this function because i may be referenced as an error
* location */
clear_io_names (hcl);
/* initialize some key fields */
hcl->c->printer = printer;
hcl->c->reader = reader;
hcl->c->nungots = 0;
/* The name field and the includer field are HCL_NULL
* for the main stream */
HCL_MEMSET (&hcl->c->inarg, 0, HCL_SIZEOF(hcl->c->inarg));
hcl->c->inarg.line = 1;
hcl->c->inarg.colm = 1;
/* open the top-level stream */
n = hcl->c->reader(hcl, HCL_IO_OPEN, &hcl->c->inarg);
if (n <= -1) goto oops;
HCL_MEMSET (&hcl->c->outarg, 0, HCL_SIZEOF(hcl->c->outarg));
n = hcl->c->printer(hcl, HCL_IO_OPEN, &hcl->c->outarg);
if (n <= -1)
{
hcl->c->reader (hcl, HCL_IO_CLOSE, &hcl->c->inarg);
goto oops;
}
/* the stream is open. set it as the current input stream */
hcl->c->curinp = &hcl->c->inarg;
return 0;
oops:
if (cbp)
{
hcl_deregcb (hcl, cbp);
hcl_freemem (hcl, hcl->c);
hcl->c = HCL_NULL;
}
else
{
hcl->c->printer = HCL_NULL;
hcl->c->reader = HCL_NULL;
}
return -1;
}
void hcl_flushio (hcl_t* hcl)
{
if (hcl->c)
{
if (hcl->c->printer) hcl->c->printer (hcl, HCL_IO_FLUSH, &hcl->c->outarg);
}
}
void hcl_detachio (hcl_t* hcl)
{
/* an error occurred and control has reached here
* probably, some included files might not have been
* closed. close them */
if (hcl->c)
{
if (hcl->c->reader)
{
while (hcl->c->curinp != &hcl->c->inarg)
{
hcl_ioinarg_t* prev;
/* nothing much to do about a close error */
hcl->c->reader (hcl, HCL_IO_CLOSE, hcl->c->curinp);
prev = hcl->c->curinp->includer;
HCL_ASSERT (hcl, hcl->c->curinp->name != HCL_NULL);
hcl_freemem (hcl, hcl->c->curinp);
hcl->c->curinp = prev;
}
hcl->c->reader (hcl, HCL_IO_CLOSE, hcl->c->curinp);
hcl->c->reader = HCL_NULL; /* ready for another attachment */
}
if (hcl->c->printer)
{
hcl->c->printer (hcl, HCL_IO_CLOSE, &hcl->c->outarg);
hcl->c->printer = HCL_NULL; /* ready for another attachment */
}
}
}
/* ---------------------------------------------------------------------- */ /* ---------------------------------------------------------------------- */
static void init_feed (hcl_t* hcl) static void init_feed (hcl_t* hcl)
{ {
hcl->c->feed.lx.state = HCL_FEED_LX_START; hcl->c->feed.lx.state = HCL_FLX_START;
hcl->c->feed.lx.loc.line = 1; hcl->c->feed.lx.loc.line = 1;
hcl->c->feed.lx.loc.colm = 1; hcl->c->feed.lx.loc.colm = 1;
hcl->c->feed.lx.loc.file = HCL_NULL; hcl->c->feed.lx.loc.file = HCL_NULL;
@ -2502,7 +2295,7 @@ static delim_token_t delim_token_tab[] =
{ ":::", 3, HCL_IOTOK_TRPCOLONS } { ":::", 3, HCL_IOTOK_TRPCOLONS }
}; };
static int find_delim_token_char (hcl_t* hcl, const hcl_ooci_t c, int row_start, int row_end, int col, hcl_feed_dt_t* dt) static int find_delim_token_char (hcl_t* hcl, const hcl_ooci_t c, int row_start, int row_end, int col, hcl_flx_dt_t* dt)
{ {
int found = 0, i; int found = 0, i;
@ -2528,10 +2321,10 @@ static HCL_INLINE int feed_wrap_up (hcl_t* hcl, hcl_iotok_type_t type)
{ {
SET_TOKEN_TYPE (hcl, type); SET_TOKEN_TYPE (hcl, type);
HCL_DEBUG4 (hcl, "TOKEN LEN %zu=>[%.*js] %d\n", TOKEN_NAME_LEN(hcl), TOKEN_NAME_LEN(hcl), TOKEN_NAME_PTR(hcl), TOKEN_TYPE(hcl)); HCL_DEBUG6 (hcl, "TOKEN LEN %zu=>[%.*js] %d LOC=%d,%d\n", TOKEN_NAME_LEN(hcl), TOKEN_NAME_LEN(hcl), TOKEN_NAME_PTR(hcl), TOKEN_TYPE(hcl), (int)TOKEN_LOC(hcl)->line, (int)TOKEN_LOC(hcl)->colm);
/* TOOD: fire token callback or something */ /* TOOD: fire token callback or something */
hcl->c->feed.lx.state = HCL_FEED_LX_START; hcl->c->feed.lx.state = HCL_FLX_START;
return 0; return 0;
} }
@ -2547,49 +2340,56 @@ static int feed_wrap_up_with_str (hcl_t* hcl, const hcl_ooch_t* str, hcl_oow_t l
return feed_wrap_up(hcl, type); return feed_wrap_up(hcl, type);
} }
static int feed_continue (hcl_t* hcl, hcl_feed_lx_state_t state) static int feed_continue (hcl_t* hcl, hcl_flx_state_t state)
{ {
hcl->c->feed.lx.state = state; hcl->c->feed.lx.state = state;
return 0; return 0;
} }
static int feed_continue_with_char (hcl_t* hcl, hcl_ooci_t c, hcl_feed_lx_state_t state) static int feed_continue_with_char (hcl_t* hcl, hcl_ooci_t c, hcl_flx_state_t state)
{ {
ADD_TOKEN_CHAR (hcl, c); ADD_TOKEN_CHAR (hcl, c);
hcl->c->feed.lx.state = state; hcl->c->feed.lx.state = state;
return 0; return 0;
} }
#define FEED_WRAP_UP(hcl, type) do { if (feed_wrap_up(hcl, type) <= -1) return -1; } while(0) #define FEED_WRAP_UP(hcl, type) do { if (feed_wrap_up(hcl, type) <= -1) return -1; } while(0)
#define FEED_WRAP_UP_WITH_CHAR(hcl, c, type) do { if (feed_wrap_up_with_char(hcl, c, type) <= -1) return -1; } while(0) #define FEED_WRAP_UP_WITH_CHAR(hcl, c, type) do { if (feed_wrap_up_with_char(hcl, c, type) <= -1) return -1; } while(0)
#define FEED_WRAP_UP_WITH_CHARS(hcl, str, len, type) do { if (feed_wrap_up_with_str(hcl, str, len, type) <= -1) return -1; } while(0) #define FEED_WRAP_UP_WITH_CHARS(hcl, str, len, type) do { if (feed_wrap_up_with_str(hcl, str, len, type) <= -1) return -1; } while(0)
#define FEED_CONTINUE(hcl, state) do { if (feed_continue(hcl, state) <= -1) return -1; } while(0) #define FEED_CONTINUE(hcl, state) do { if (feed_continue(hcl, state) <= -1) return -1; } while(0)
#define FEED_CONTINUE_WITH_CHAR(hcl, c, state) do { if (feed_continue_with_char(hcl, c, state) <= -1) return -1; } while(0) #define FEED_CONTINUE_WITH_CHAR(hcl, c, state) do { if (feed_continue_with_char(hcl, c, state) <= -1) return -1; } while(0)
#define FEED_LX_STATE(hcl) ((hcl)->c->feed.lx.state) #define FLX_STATE(hcl) ((hcl)->c->feed.lx.state)
#define FEED_LX_LOC(hcl) (&((hcl)->c->feed.lx.loc)) #define FLX_LOC(hcl) (&((hcl)->c->feed.lx.loc))
static int feed_lx_start (hcl_t* hcl, hcl_ooci_t c) /* short-cuts to lexer state data */
#define FLX_DT(hcl) (&((hcl)->c->feed.lx.u.dt))
#define FLX_QT(hcl) (&((hcl)->c->feed.lx.u.qt))
static int flx_start (hcl_t* hcl, hcl_ooci_t c)
{ {
HCL_ASSERT (hcl, FEED_LX_STATE(hcl) == HCL_FEED_LX_START); HCL_ASSERT (hcl, FLX_STATE(hcl) == HCL_FLX_START);
if (is_spacechar(c)) goto consumed; /* skip spaces */
/* clear the token name, reset its location */ /* clear the token name, reset its location */
SET_TOKEN_TYPE (hcl, HCL_IOTOK_EOF); /* is it correct? */ SET_TOKEN_TYPE (hcl, HCL_IOTOK_EOF); /* is it correct? */
CLEAR_TOKEN_NAME (hcl); CLEAR_TOKEN_NAME (hcl);
SET_TOKEN_LOC (hcl, &hcl->c->feed.lx.loc);
//HCL_DEBUG1 (hcl, "XXX[%jc]\n", c); //HCL_DEBUG1 (hcl, "XXX[%jc]\n", c);
if (find_delim_token_char(hcl, c, 0, HCL_COUNTOF(delim_token_tab) - 1, 0, &hcl->c->feed.dt)) if (find_delim_token_char(hcl, c, 0, HCL_COUNTOF(delim_token_tab) - 1, 0, FLX_DT(hcl)))
{ {
/* the character is one of the first character of a delimiter token */ /* the character is one of the first character of a delimiter token such as (, [, :, etc */
if (hcl->c->feed.dt.row_start == hcl->c->feed.dt.row_end && hcl->c->feed.dt.col_next == delim_token_tab[hcl->c->feed.dt.row_start].t_len) if (FLX_DT(hcl)->row_start == FLX_DT(hcl)->row_end &&
FLX_DT(hcl)->col_next == delim_token_tab[FLX_DT(hcl)->row_start].t_len)
{ {
FEED_WRAP_UP_WITH_CHAR (hcl, c, delim_token_tab[hcl->c->feed.dt.row_start].t_type); /* single character delimiter token */
FEED_WRAP_UP_WITH_CHAR (hcl, c, delim_token_tab[FLX_DT(hcl)->row_start].t_type);
} }
else else
{ {
FEED_CONTINUE_WITH_CHAR (hcl, c, HCL_FEED_LX_DELIM_TOKEN); /* consume c and move to HCL_FEED_LX_DELIM_TOKEN state */ FEED_CONTINUE_WITH_CHAR (hcl, c, HCL_FLX_DELIM_TOKEN); /* consume c and move to HCL_FLX_DELIM_TOKEN state */
} }
goto consumed; goto consumed;
} }
@ -2609,28 +2409,36 @@ static int feed_lx_start (hcl_t* hcl, hcl_ooci_t c)
} }
case ';': case ';':
FEED_CONTINUE_WITH_CHAR (hcl, c, HCL_FEED_LX_COMMENT); FEED_CONTINUE_WITH_CHAR (hcl, c, HCL_FLX_COMMENT);
break; break;
case '#': case '#':
FEED_CONTINUE_WITH_CHAR (hcl, c, HCL_FEED_LX_SHARP_TOKEN); FEED_CONTINUE_WITH_CHAR (hcl, c, HCL_FLX_SHARP_TOKEN);
break; break;
#if 0
case '\"': case '\"':
if (get_string(hcl, '\"', '\\', 0, 0) <= -1) return -1; HCL_MEMSET (FLX_QT(hcl), 0, HCL_SIZEOF(*FLX_QT(hcl)));
FLX_QT(hcl)->end_char = c;
FLX_QT(hcl)->esc_char = '\\';
FLX_QT(hcl)->min_len = 0;
FLX_QT(hcl)->max_len = HCL_TYPE_MAX(hcl_oow_t);
FLX_QT(hcl)->tok_type = HCL_IOTOK_STRLIT;
FLX_QT(hcl)->synerr_code = HCL_SYNERR_STRLIT;
FEED_CONTINUE (hcl, HCL_FLX_QUOTED_TOKEN); /* discard the quote itself. move on the the QUOTED_TOKEN state */
break; break;
case '\'': case '\'':
if (get_string(hcl, '\'', '\\', 0, 0) <= -1) return -1; HCL_MEMSET (FLX_QT(hcl), 0, HCL_SIZEOF(*FLX_QT(hcl)));
if (hcl->c->tok.name.len != 1) FLX_QT(hcl)->end_char = c;
{ FLX_QT(hcl)->esc_char = '\\';
hcl_setsynerr (hcl, HCL_SYNERR_CHARLIT, TOKEN_LOC(hcl), TOKEN_NAME(hcl)); FLX_QT(hcl)->min_len = 1;
return -1; FLX_QT(hcl)->max_len = 1;
} FLX_QT(hcl)->tok_type = HCL_IOTOK_CHARLIT;
SET_TOKEN_TYPE (hcl, HCL_IOTOK_CHARLIT); FLX_QT(hcl)->synerr_code = HCL_SYNERR_CHARLIT;
FEED_CONTINUE (hcl, HCL_FLX_QUOTED_TOKEN); /* discard the quote itself. move on the the QUOTED_TOKEN state */
break; break;
#if 0
case '#': case '#':
if (get_sharp_token(hcl) <= -1) return -1; if (get_sharp_token(hcl) <= -1) return -1;
break; break;
@ -2804,14 +2612,15 @@ not_consumed:
return 0; return 0;
} }
static int feed_lx_delim_token (hcl_t* hcl, hcl_ooci_t c) static int flx_delim_token (hcl_t* hcl, hcl_ooci_t c)
{ {
if (find_delim_token_char(hcl, c, hcl->c->feed.dt.row_start, hcl->c->feed.dt.row_end, hcl->c->feed.dt.col_next, &hcl->c->feed.dt)) if (find_delim_token_char(hcl, c, FLX_DT(hcl)->row_start, FLX_DT(hcl)->row_end, FLX_DT(hcl)->col_next, FLX_DT(hcl)))
{ {
if (hcl->c->feed.dt.row_start == hcl->c->feed.dt.row_end && hcl->c->feed.dt.col_next == delim_token_tab[hcl->c->feed.dt.row_start].t_len) if (FLX_DT(hcl)->row_start == FLX_DT(hcl)->row_end &&
FLX_DT(hcl)->col_next == delim_token_tab[FLX_DT(hcl)->row_start].t_len)
{ {
/* complete token and switch to the HCL_FEED_LX_START state */ /* complete token and switch to the HCL_FLX_START state */
FEED_WRAP_UP_WITH_CHAR (hcl, c, delim_token_tab[hcl->c->feed.dt.row_start].t_type); FEED_WRAP_UP_WITH_CHAR (hcl, c, delim_token_tab[FLX_DT(hcl)->row_start].t_type);
} }
else else
{ {
@ -2822,7 +2631,7 @@ static int feed_lx_delim_token (hcl_t* hcl, hcl_ooci_t c)
else else
{ {
/* the longest match so far */ /* the longest match so far */
FEED_WRAP_UP(hcl, delim_token_tab[hcl->c->feed.dt.row_start].t_type); FEED_WRAP_UP(hcl, delim_token_tab[FLX_DT(hcl)->row_start].t_type);
goto not_consumed; goto not_consumed;
} }
@ -2833,13 +2642,13 @@ not_consumed:
return 0; return 0;
} }
static int feed_lx_comment (hcl_t* hcl, hcl_ooci_t c) static int flx_comment (hcl_t* hcl, hcl_ooci_t c)
{ {
if (is_linebreak(c)) FEED_CONTINUE (hcl, HCL_FEED_LX_START); if (is_linebreak(c)) FEED_CONTINUE (hcl, HCL_FLX_START);
return 1; /* consumed */ return 1; /* consumed */
} }
static int feed_lx_sharp_token (hcl_t* hcl, hcl_ooci_t c) static int flx_sharp_token (hcl_t* hcl, hcl_ooci_t c)
{ {
/* /*
* #xXXXX hexadecimal * #xXXXX hexadecimal
@ -2873,7 +2682,7 @@ static int feed_lx_sharp_token (hcl_t* hcl, hcl_ooci_t c)
/* ## comment start /* ## comment start
* #! also comment start. * #! also comment start.
* ; comment start */ * ; comment start */
FEED_CONTINUE_WITH_CHAR (hcl, c, HCL_FEED_LX_COMMENT); FEED_CONTINUE_WITH_CHAR (hcl, c, HCL_FLX_COMMENT);
goto consumed; goto consumed;
case '[': case '[':
@ -2887,10 +2696,10 @@ static int feed_lx_sharp_token (hcl_t* hcl, hcl_ooci_t c)
default: default:
// TODO: fix this part // TODO: fix this part
if (is_spacechar(c) || c == HCL_UCI_EOF) if (is_spacechar(c) || c == HCL_UCI_EOF)
hcl_setsynerrbfmt (hcl, HCL_SYNERR_HASHLIT, FEED_LX_LOC(hcl), HCL_NULL, hcl_setsynerrbfmt (hcl, HCL_SYNERR_HASHLIT, FLX_LOC(hcl), HCL_NULL,
"no character after the hash sign"); "no character after the hash sign");
else else
hcl_setsynerrbfmt (hcl, HCL_SYNERR_HASHLIT, FEED_LX_LOC(hcl), HCL_NULL, hcl_setsynerrbfmt (hcl, HCL_SYNERR_HASHLIT, FLX_LOC(hcl), HCL_NULL,
"invalid character after the hash sign - %jc", c); "invalid character after the hash sign - %jc", c);
return -1; return -1;
} }
@ -2902,30 +2711,186 @@ not_consumed:
return 0; return 0;
} }
static int flx_quoted_token (hcl_t* hcl, hcl_ooci_t c) /* double-quoted string */
{
hcl_flx_qt_t* qt = FLX_QT(hcl);
if (c == HCL_OOCI_EOF)
{
hcl_setsynerr (hcl, qt->synerr_code, TOKEN_LOC(hcl) /*hcl->c->feed.lx.loc?*/, HCL_NULL);
return -1;
}
if (qt->escaped == 3)
{
if (c >= '0' && c <= '7')
{
/* more octal digits */
qt->c_acc = qt->c_acc * 8 + c - '0';
qt->digit_count++;
if (qt->digit_count >= qt->escaped)
{
/* should i limit the max to 0xFF/0377?
* if (qt->c_acc > 0377) qt->c_acc = 0377;*/
ADD_TOKEN_CHAR (hcl, qt->c_acc);
qt->escaped = 0;
}
goto consumed;
}
else
{
ADD_TOKEN_CHAR (hcl, qt->c_acc);
qt->escaped = 0;
}
}
else if (qt->escaped == 2 || qt->escaped == 4 || qt->escaped == 8)
{
if (c >= '0' && c <= '9')
{
qt->c_acc = qt->c_acc * 16 + c - '0';
qt->digit_count++;
if (qt->digit_count >= qt->escaped)
{
ADD_TOKEN_CHAR (hcl, qt->c_acc);
qt->escaped = 0;
}
goto consumed;
}
else if (c >= 'A' && c <= 'F')
{
qt->c_acc = qt->c_acc * 16 + c - 'A' + 10;
qt->digit_count++;
if (qt->digit_count >= qt->escaped)
{
ADD_TOKEN_CHAR (hcl, qt->c_acc);
qt->escaped = 0;
}
goto consumed;
}
else if (c >= 'a' && c <= 'f')
{
qt->c_acc = qt->c_acc * 16 + c - 'a' + 10;
qt->digit_count++;
if (qt->digit_count >= qt->escaped)
{
ADD_TOKEN_CHAR (hcl, qt->c_acc);
qt->escaped = 0;
}
goto consumed;
}
else
{
hcl_ooch_t rc;
rc = (qt->escaped == 2)? 'x':
(qt->escaped == 4)? 'u': 'U';
if (qt->digit_count == 0)
ADD_TOKEN_CHAR (hcl, rc);
else ADD_TOKEN_CHAR (hcl, qt->c_acc);
qt->escaped = 0;
}
}
if (qt->escaped == 0 && c == qt->end_char)
{
/* terminating quote */
FEED_WRAP_UP (hcl, qt->tok_type); /* HCL_IOTOK_STRLIT or HCL_IOTOK_CHARLIT */
if (TOKEN_NAME_LEN(hcl) < qt->min_len)
{
hcl_setsynerr (hcl, qt->synerr_code, TOKEN_LOC(hcl), HCL_NULL);
return -1;
}
goto consumed;
}
if (qt->escaped == 0 && c == qt->esc_char)
{
qt->escaped = 1;
goto consumed;
}
if (qt->escaped == 1)
{
if (c == 'a') c = '\a';
else if (c == 'b') c = '\b';
else if (c == 'f') c = '\f';
else if (c == 'n') c = '\n';
else if (c == 'r') c = '\r';
else if (c == 't') c = '\t';
else if (c == 'v') c = '\v';
else if (c >= '0' && c <= '7' && !qt->regex)
{
/* i don't support the octal notation for a regular expression.
* it conflicts with the backreference notation between \1 and \7 inclusive. */
qt->escaped = 3;
qt->digit_count = 1;
qt->c_acc = c - '0';
goto consumed;
}
else if (c == 'x')
{
qt->escaped = 2;
qt->digit_count = 0;
qt->c_acc = 0;
goto consumed;
}
#if (HCL_SIZEOF_OOCH_T >= 2)
else if (c == 'u')
{
qt->escaped = 4;
qt->digit_count = 0;
qt->c_acc = 0;
goto consumed;
}
#endif
#if (HCL_SIZEOF_OOCH_T >= 4)
else if (c == 'U')
{
qt->escaped = 8;
qt->digit_count = 0;
qt->c_acc = 0;
goto consumed;
}
#endif
else if (qt->regex)
{
/* if the following character doesn't compose a proper
* escape sequence, keep the escape character.
* an unhandled escape sequence can be handled
* outside this function since the escape character
* is preserved.*/
ADD_TOKEN_CHAR (hcl, qt->esc_char);
}
qt->escaped = 0;
}
ADD_TOKEN_CHAR (hcl, c);
consumed:
if (TOKEN_NAME_LEN(hcl) > qt->max_len)
{
hcl_setsynerr (hcl, qt->synerr_code, TOKEN_LOC(hcl), HCL_NULL);
return -1;
}
return 1;
}
static int feed_char (hcl_t* hcl, hcl_ooci_t c) static int feed_char (hcl_t* hcl, hcl_ooci_t c)
{ {
/* TODO: track line number and column number? */ switch (FLX_STATE(hcl))
switch (FEED_LX_STATE(hcl))
{ {
case HCL_FEED_LX_START: return feed_lx_start(hcl, c); case HCL_FLX_START: return flx_start(hcl, c);
case HCL_FEED_LX_DELIM_TOKEN: return feed_lx_delim_token(hcl, c); case HCL_FLX_DELIM_TOKEN: return flx_delim_token(hcl, c);
case HCL_FEED_LX_COMMENT: return feed_lx_comment(hcl, c); case HCL_FLX_COMMENT: return flx_comment(hcl, c);
case HCL_FEED_LX_SHARP_TOKEN: return feed_lx_sharp_token(hcl, c); case HCL_FLX_SHARP_TOKEN: return flx_sharp_token(hcl, c);
case HCL_FLX_QUOTED_TOKEN: return flx_quoted_token(hcl, c);
/* /*
case HCL_FEED_LX_DQSTR: case HCL_FLX_SQSTR:
return feed_lx_dqstr(hcl, c); return flx_sqstr(hcl, c);
case HCL_FEED_LX_SQSTR: case HCL_FLX_DIRECTIVE:
return feed_lxsqstr(hcl, c);
case HCL_FEED_LX_COMMENT:
break;
case HCL_FEED_LX_CSTR:
break;
case HCL_FEED_LX_DIRECTIVE:
break; break;
*/ */
@ -3003,3 +2968,211 @@ default callback for on_eof?
*/ */
/* ------------------------------------------------------------------------ */
/* TODO: rename compiler to something else that can include reader, printer, and compiler
* move compiler intialization/finalization here to more common place */
static void gc_compiler (hcl_t* hcl)
{
hcl->c->r.s = hcl_moveoop(hcl, hcl->c->r.s);
hcl->c->r.e = hcl_moveoop(hcl, hcl->c->r.e);
}
static void fini_compiler (hcl_t* hcl)
{
/* called before the hcl object is closed */
if (hcl->c)
{
if (hcl->c->cfs.ptr)
{
hcl_freemem (hcl, hcl->c->cfs.ptr);
hcl->c->cfs.ptr = HCL_NULL;
hcl->c->cfs.top = -1;
hcl->c->cfs.capa = 0;
}
if (hcl->c->tv.s.ptr)
{
hcl_freemem (hcl, hcl->c->tv.s.ptr);
hcl->c->tv.s.ptr = HCL_NULL;
hcl->c->tv.s.len = 0;
hcl->c->tv.capa = 0;
hcl->c->tv.wcount = 0;
}
HCL_ASSERT (hcl, hcl->c->tv.capa == 0);
HCL_ASSERT (hcl, hcl->c->tv.wcount == 0);
if (hcl->c->cblk.info)
{
hcl_freemem (hcl, hcl->c->cblk.info);
hcl->c->cblk.info = HCL_NULL;
hcl->c->cblk.info_capa = 0;
hcl->c->cblk.depth = -1;
}
if (hcl->c->clsblk.info)
{
hcl_freemem (hcl, hcl->c->clsblk.info);
hcl->c->clsblk.info = HCL_NULL;
hcl->c->clsblk.info_capa = 0;
hcl->c->clsblk.depth = -1;
}
if (hcl->c->fnblk.info)
{
hcl_freemem (hcl, hcl->c->fnblk.info);
hcl->c->fnblk.info = HCL_NULL;
hcl->c->fnblk.info_capa = 0;
hcl->c->fnblk.depth = -1;
}
clear_io_names (hcl);
if (hcl->c->tok.name.ptr) hcl_freemem (hcl, hcl->c->tok.name.ptr);
hcl_detachio (hcl);
hcl_freemem (hcl, hcl->c);
hcl->c = HCL_NULL;
}
}
int hcl_attachio (hcl_t* hcl, hcl_ioimpl_t reader, hcl_ioimpl_t printer)
{
int n;
hcl_cb_t* cbp = HCL_NULL;
if (!reader || !printer)
{
hcl_seterrbfmt (hcl, HCL_EINVAL, "reader and/or printer not supplied");
return -1;
}
if (!hcl->c)
{
hcl_cb_t cb;
HCL_MEMSET (&cb, 0, HCL_SIZEOF(cb));
cb.gc = gc_compiler;
cb.fini = fini_compiler;
cbp = hcl_regcb(hcl, &cb);
if (!cbp) return -1;
hcl->c = (hcl_compiler_t*)hcl_callocmem(hcl, HCL_SIZEOF(*hcl->c));
if (HCL_UNLIKELY(!hcl->c))
{
hcl_deregcb (hcl, cbp);
return -1;
}
hcl->c->ilchr_ucs.ptr = &hcl->c->ilchr;
hcl->c->ilchr_ucs.len = 1;
hcl->c->r.s = hcl->_nil;
hcl->c->r.e = hcl->_nil;
hcl->c->cfs.top = -1;
hcl->c->cblk.depth = -1;
hcl->c->clsblk.depth = -1;
hcl->c->fnblk.depth = -1;
init_feed (hcl);
}
else if (hcl->c->reader || hcl->c->printer)
{
hcl_seterrnum (hcl, HCL_EPERM); /* TODO: change this error code */
return -1;
}
/* Some IO names could have been stored in earlier calls to this function.
* I clear such names before i begin this function. i don't clear it
* at the end of this function because i may be referenced as an error
* location */
clear_io_names (hcl);
/* initialize some key fields */
hcl->c->printer = printer;
hcl->c->reader = reader;
hcl->c->nungots = 0;
/* The name field and the includer field are HCL_NULL
* for the main stream */
HCL_MEMSET (&hcl->c->inarg, 0, HCL_SIZEOF(hcl->c->inarg));
hcl->c->inarg.line = 1;
hcl->c->inarg.colm = 1;
/* open the top-level stream */
n = hcl->c->reader(hcl, HCL_IO_OPEN, &hcl->c->inarg);
if (n <= -1) goto oops;
HCL_MEMSET (&hcl->c->outarg, 0, HCL_SIZEOF(hcl->c->outarg));
n = hcl->c->printer(hcl, HCL_IO_OPEN, &hcl->c->outarg);
if (n <= -1)
{
hcl->c->reader (hcl, HCL_IO_CLOSE, &hcl->c->inarg);
goto oops;
}
/* the stream is open. set it as the current input stream */
hcl->c->curinp = &hcl->c->inarg;
return 0;
oops:
if (cbp)
{
hcl_deregcb (hcl, cbp);
hcl_freemem (hcl, hcl->c);
hcl->c = HCL_NULL;
}
else
{
hcl->c->printer = HCL_NULL;
hcl->c->reader = HCL_NULL;
}
return -1;
}
void hcl_flushio (hcl_t* hcl)
{
if (hcl->c)
{
if (hcl->c->printer) hcl->c->printer (hcl, HCL_IO_FLUSH, &hcl->c->outarg);
}
}
void hcl_detachio (hcl_t* hcl)
{
/* an error occurred and control has reached here
* probably, some included files might not have been
* closed. close them */
if (hcl->c)
{
if (hcl->c->reader)
{
while (hcl->c->curinp != &hcl->c->inarg)
{
hcl_ioinarg_t* prev;
/* nothing much to do about a close error */
hcl->c->reader (hcl, HCL_IO_CLOSE, hcl->c->curinp);
prev = hcl->c->curinp->includer;
HCL_ASSERT (hcl, hcl->c->curinp->name != HCL_NULL);
hcl_freemem (hcl, hcl->c->curinp);
hcl->c->curinp = prev;
}
hcl->c->reader (hcl, HCL_IO_CLOSE, hcl->c->curinp);
hcl->c->reader = HCL_NULL; /* ready for another attachment */
}
if (hcl->c->printer)
{
hcl->c->printer (hcl, HCL_IO_CLOSE, &hcl->c->outarg);
hcl->c->printer = HCL_NULL; /* ready for another attachment */
}
}
}