coding include file handling in the feed-based reader

This commit is contained in:
hyung-hwan 2022-07-28 14:07:18 +00:00
parent aab704ac4b
commit 2543b0d634
4 changed files with 466 additions and 340 deletions

View File

@ -825,7 +825,7 @@ static int feed_loop (hcl_t* hcl, xtn_t* xtn, int cflags, int verbose)
while (1) while (1)
{ {
hcl_oow_t n; hcl_ooi_t n;
/*n = fread(&buf[len], 1, HCL_COUNTOF(buf) - len, fp);*/ /*n = fread(&buf[len], 1, HCL_COUNTOF(buf) - len, fp);*/
n = read(fileno(fp), &buf[len], HCL_COUNTOF(buf) - len); n = read(fileno(fp), &buf[len], HCL_COUNTOF(buf) - len);
@ -868,7 +868,7 @@ static int feed_loop (hcl_t* hcl, xtn_t* xtn, int cflags, int verbose)
} }
break; break;
} }
if (ferror(fp)) if (n <= -1 || ferror(fp))
{ {
hcl_logbfmt (hcl, HCL_LOG_STDERR, "ERROR: unable to read %hs - %hs\n", xtn->read_path, strerror(errno)); hcl_logbfmt (hcl, HCL_LOG_STDERR, "ERROR: unable to read %hs - %hs\n", xtn->read_path, strerror(errno));
goto oops; goto oops;

View File

@ -632,6 +632,7 @@ struct hcl_frd_t
int level; int level;
int array_level; int array_level;
int flagv; int flagv;
int expect_include_file;
hcl_cnode_t* obj; hcl_cnode_t* obj;
}; };

View File

@ -1257,7 +1257,8 @@ struct hcl_ioinarg_t
{ {
hcl_oow_t pos; hcl_oow_t pos;
hcl_oow_t len; hcl_oow_t len;
int state; /* <<B.STATE>>
int state;*/
} b; } b;
hcl_oow_t line; hcl_oow_t line;

View File

@ -461,6 +461,7 @@ static int get_char (hcl_t* hcl)
return 0; return 0;
} }
/* <<B.STATE>> -> probably not needed any more?
if (hcl->c->curinp->b.state == -1) if (hcl->c->curinp->b.state == -1)
{ {
hcl->c->curinp->b.state = 0; hcl->c->curinp->b.state = 0;
@ -471,6 +472,7 @@ static int get_char (hcl_t* hcl)
hcl->c->curinp->b.state = 0; hcl->c->curinp->b.state = 0;
goto return_eof; goto return_eof;
} }
*/
if (hcl->c->curinp->b.pos >= hcl->c->curinp->b.len) if (hcl->c->curinp->b.pos >= hcl->c->curinp->b.len)
{ {
@ -478,7 +480,9 @@ static int get_char (hcl_t* hcl)
if (hcl->c->curinp->xlen <= 0) if (hcl->c->curinp->xlen <= 0)
{ {
/* <<B.STATE>>
return_eof: return_eof:
*/
hcl->c->curinp->lxc.c = HCL_OOCI_EOF; hcl->c->curinp->lxc.c = HCL_OOCI_EOF;
hcl->c->curinp->lxc.l.line = hcl->c->curinp->line; hcl->c->curinp->lxc.l.line = hcl->c->curinp->line;
hcl->c->curinp->lxc.l.colm = hcl->c->curinp->colm; hcl->c->curinp->lxc.l.colm = hcl->c->curinp->colm;
@ -2277,24 +2281,96 @@ static void fst_pop (hcl_t* hcl)
hcl->c->feed.top--; hcl->c->feed.top--;
} }
static int feed_begin_include (hcl_t* hcl)
{
hcl_ioinarg_t* arg;
const hcl_ooch_t* io_name;
io_name = add_io_name(hcl, TOKEN_NAME(hcl));
if (HCL_UNLIKELY(!io_name)) return -1;
arg = (hcl_ioinarg_t*)hcl_callocmem(hcl, HCL_SIZEOF(*arg));
if (HCL_UNLIKELY(!arg)) goto oops;
arg->name = io_name;
arg->line = 1;
arg->colm = 1;
/*arg->nl = '\0';*/
arg->includer = hcl->c->curinp;
if (hcl->c->reader(hcl, HCL_IO_OPEN, arg) <= -1)
{
hcl_setsynerrbfmt (hcl, HCL_SYNERR_INCLUDE, TOKEN_LOC(hcl), TOKEN_NAME(hcl), "unable to include %js", io_name);
goto oops;
}
/* switch to the includee's stream */
hcl->c->curinp = arg;
/* hcl->c->depth.incl++; */
return 0;
oops:
if (arg) hcl_freemem (hcl, arg);
return -1;
}
static int feed_end_include (hcl_t* hcl)
{
int x;
hcl_ioinarg_t* cur;
if (hcl->c->curinp == &hcl->c->inarg) return 0; /* no include */
/* if it is an included file, close it and
* retry to read a character from an outer file */
x = hcl->c->reader(hcl, HCL_IO_CLOSE, hcl->c->curinp);
/* if closing has failed, still destroy the
* sio structure first as normal and return
* the failure below. this way, the caller
* does not call HCL_IO_CLOSE on
* hcl->c->curinp again. */
cur = hcl->c->curinp;
hcl->c->curinp = hcl->c->curinp->includer;
HCL_ASSERT (hcl, cur->name != HCL_NULL);
hcl_freemem (hcl, cur);
/* hcl->parse.depth.incl--; */
if (x != 0)
{
/* the failure mentioned above is returned here */
return -1;
}
hcl->c->lxc = hcl->c->curinp->lxc;
return 1; /* ended the included file successfully */
}
static int feed_process_token (hcl_t* hcl) static int feed_process_token (hcl_t* hcl)
{ {
hcl_frd_t* frd = &hcl->c->feed.rd; hcl_frd_t* frd = &hcl->c->feed.rd;
HCL_DEBUG6 (hcl, "TOKEN LEN %zu=>[%.*js] %d LOC=%d,%d\n", TOKEN_NAME_LEN(hcl), TOKEN_NAME_LEN(hcl), TOKEN_NAME_PTR(hcl), TOKEN_TYPE(hcl), (int)TOKEN_LOC(hcl)->line, (int)TOKEN_LOC(hcl)->colm); /* this function composes an s-expression non-recursively
#if 0
/* this function read an s-expression non-recursively
* by manipulating its own stack. */ * by manipulating its own stack. */
int level = 0, array_level = 0, flagv = 0; /*hcl_logbfmt (hcl, HCL_LOG_STDERR, "TOKEN => [%.*js] type=%d\n", TOKEN_NAME_LEN(hcl), TOKEN_NAME_PTR(hcl), TOKEN_TYPE(hcl));*/
hcl_cnode_t* obj = HCL_NULL; if (frd->expect_include_file)
#endif
/*
while (1)
{ {
redo: if (TOKEN_TYPE(hcl) != HCL_IOTOK_STRLIT)
*/ {
hcl_setsynerr (hcl, HCL_SYNERR_STRING, TOKEN_LOC(hcl), TOKEN_NAME(hcl));
goto oops;
}
frd->expect_include_file = 0;
if (feed_begin_include(hcl) <= -1) goto oops;
goto ok;
}
switch (TOKEN_TYPE(hcl)) switch (TOKEN_TYPE(hcl))
{ {
default: default:
@ -2302,23 +2378,15 @@ HCL_DEBUG6 (hcl, "TOKEN LEN %zu=>[%.*js] %d LOC=%d,%d\n", TOKEN_NAME_LEN(hcl),
goto oops; goto oops;
case HCL_IOTOK_EOF: case HCL_IOTOK_EOF:
/* TODO: change the code. not an error? */
hcl_setsynerr (hcl, HCL_SYNERR_EOF, TOKEN_LOC(hcl), TOKEN_NAME(hcl)); hcl_setsynerr (hcl, HCL_SYNERR_EOF, TOKEN_LOC(hcl), TOKEN_NAME(hcl));
goto oops; goto oops;
#if 0
/* this one is special?? */
case HCL_IOTOK_INCLUDE: case HCL_IOTOK_INCLUDE:
/* TODO: should i limit where #include can be specified? /* TODO: should i limit where #include can be specified?
* disallow it inside a list literal or an array literal? */ * disallow it inside a list literal or an array literal? */
GET_TOKEN_WITH_GOTO (hcl, oops); frd->expect_include_file = 1;
if (TOKEN_TYPE(hcl) != HCL_IOTOK_STRLIT) goto ok;
{
hcl_setsynerr (hcl, HCL_SYNERR_STRING, TOKEN_LOC(hcl), TOKEN_NAME(hcl));
goto oops;
}
if (begin_include(hcl) <= -1) goto oops;
goto redo;
#endif
case HCL_IOTOK_LBRACK: /* [] */ case HCL_IOTOK_LBRACK: /* [] */
frd->flagv = 0; frd->flagv = 0;
@ -2363,8 +2431,7 @@ HCL_DEBUG6 (hcl, "TOKEN LEN %zu=>[%.*js] %d LOC=%d,%d\n", TOKEN_NAME_LEN(hcl),
if (LIST_FLAG_GET_CONCODE(frd->flagv) == HCL_CONCODE_ARRAY) frd->array_level++; if (LIST_FLAG_GET_CONCODE(frd->flagv) == HCL_CONCODE_ARRAY) frd->array_level++;
/* read the next token */ /* read the next token */
// GET_TOKEN_WITH_GOTO (hcl, oops); goto ok;
goto redo;
case HCL_IOTOK_DOT: case HCL_IOTOK_DOT:
if (frd->level <= 0 || !can_dot_list(hcl)) if (frd->level <= 0 || !can_dot_list(hcl))
@ -2377,8 +2444,7 @@ HCL_DEBUG6 (hcl, "TOKEN LEN %zu=>[%.*js] %d LOC=%d,%d\n", TOKEN_NAME_LEN(hcl),
goto oops; goto oops;
} }
// GET_TOKEN_WITH_GOTO (hcl, oops); goto ok;
goto redo;
case HCL_IOTOK_COLON: case HCL_IOTOK_COLON:
if (frd->level <= 0 || !can_colon_list(hcl)) if (frd->level <= 0 || !can_colon_list(hcl))
@ -2387,8 +2453,7 @@ HCL_DEBUG6 (hcl, "TOKEN LEN %zu=>[%.*js] %d LOC=%d,%d\n", TOKEN_NAME_LEN(hcl),
goto oops; goto oops;
} }
// GET_TOKEN_WITH_GOTO (hcl, oops); goto ok;
goto redo;
case HCL_IOTOK_COMMA: case HCL_IOTOK_COMMA:
if (frd->level <= 0 || !can_comma_list(hcl)) if (frd->level <= 0 || !can_comma_list(hcl))
@ -2397,8 +2462,7 @@ HCL_DEBUG6 (hcl, "TOKEN LEN %zu=>[%.*js] %d LOC=%d,%d\n", TOKEN_NAME_LEN(hcl),
goto oops; goto oops;
} }
// GET_TOKEN_WITH_GOTO (hcl, oops); goto ok;
goto redo;
case HCL_IOTOK_RPAREN: /* xlist (), qlist #() */ case HCL_IOTOK_RPAREN: /* xlist (), qlist #() */
case HCL_IOTOK_RBRACK: /* bytearray #[], array[] */ case HCL_IOTOK_RBRACK: /* bytearray #[], array[] */
@ -2586,7 +2650,6 @@ HCL_DEBUG6 (hcl, "TOKEN LEN %zu=>[%.*js] %d LOC=%d,%d\n", TOKEN_NAME_LEN(hcl),
case HCL_IOTOK_IDENT_DOTTED: case HCL_IOTOK_IDENT_DOTTED:
frd->obj = hcl_makecnodedsymbol(hcl, TOKEN_LOC(hcl), TOKEN_NAME(hcl)); frd->obj = hcl_makecnodedsymbol(hcl, TOKEN_LOC(hcl), TOKEN_NAME(hcl));
break; break;
} }
if (!frd->obj) goto oops; if (!frd->obj) goto oops;
@ -2624,7 +2687,14 @@ HCL_DEBUG6 (hcl, "TOKEN LEN %zu=>[%.*js] %d LOC=%d,%d\n", TOKEN_NAME_LEN(hcl),
HCL_ASSERT (hcl, hcl->c->r.st == HCL_NULL); HCL_ASSERT (hcl, hcl->c->r.st == HCL_NULL);
HCL_ASSERT (hcl, frd->obj != HCL_NULL); HCL_ASSERT (hcl, frd->obj != HCL_NULL);
hcl_logbfmt (hcl, HCL_LOG_STDERR, "GOT CNODE %p\n", frd->obj); /* TODO: error handling, etc */
hcl_compile(hcl, frd->obj, HCL_COMPILE_CLEAR_CODE | HCL_COMPILE_CLEAR_FNBLK); /* flags 0 if non-interactive */
hcl_freecnode (hcl, frd->obj); /* not needed any more */
frd->obj = HCL_NULL;
hcl_decode (hcl, 0, hcl_getbclen(hcl));
hcl_execute (hcl);
hcl_flushio (hcl);
} }
else else
{ {
@ -2640,14 +2710,8 @@ hcl_logbfmt (hcl, HCL_LOG_STDERR, "GOT CNODE %p\n", frd->obj);
clear_comma_colon_flag (hcl); clear_comma_colon_flag (hcl);
} }
#if 0
/* read the next token */
GET_TOKEN_WITH_GOTO (hcl, oops);
}
#endif
ok:
redo:
return 0; return 0;
oops: oops:
@ -3270,10 +3334,7 @@ static int flx_plain_ident (hcl_t* hcl, hcl_ooci_t c) /* identifier */
if (pi->seg_len == 0) if (pi->seg_len == 0)
{ {
/* this must be the second segment if flx_plain_ident() has been scheduled hcl_setsynerrbfmt (hcl, HCL_SYNERR_MSEGIDENT, TOKEN_LOC(hcl), TOKEN_NAME(hcl), "blank segment");
* with a valid identifier character at first */
HCL_ASSERT (hcl, pi->seg_count >= 1);
hcl_setsynerr (hcl, HCL_SYNERR_MSEGIDENT, TOKEN_LOC(hcl), TOKEN_NAME(hcl));
return -1; return -1;
} }
@ -3588,7 +3649,14 @@ static int flx_signed_token (hcl_t* hcl, hcl_ooci_t c)
else else
{ {
init_flx_pi (FLX_PI(hcl)); init_flx_pi (FLX_PI(hcl));
FLX_PI(hcl)->char_count++; /* the sign becomes the part of the identifier. */
/* the sign is already in the token name buffer.
* adjust the state data for the sign. */
HCL_ASSERT (hcl, TOKEN_NAME_LEN(hcl) == 1);
FLX_PI(hcl)->char_count++;
FLX_PI(hcl)->seg_len++;
/* let refeeding of 'c' happen at the next iteration */
FEED_CONTINUE (hcl, HCL_FLX_PLAIN_IDENT); FEED_CONTINUE (hcl, HCL_FLX_PLAIN_IDENT);
goto not_consumed; goto not_consumed;
} }
@ -3604,6 +3672,7 @@ not_consumed:
static int feed_char (hcl_t* hcl, hcl_ooci_t c) static int feed_char (hcl_t* hcl, hcl_ooci_t c)
{ {
/*hcl_logbfmt (hcl, HCL_LOG_STDERR, "FEED->[%jc] %d STATE->%d\n", c, c, FLX_STATE(hcl));*/
switch (FLX_STATE(hcl)) switch (FLX_STATE(hcl))
{ {
case HCL_FLX_START: return flx_start(hcl, c); case HCL_FLX_START: return flx_start(hcl, c);
@ -3628,6 +3697,54 @@ static int feed_char (hcl_t* hcl, hcl_ooci_t c)
return -1; return -1;
} }
static int feed_from_included (hcl_t* hcl)
{
int x;
hcl_ooch_t lc;
HCL_ASSERT (hcl, hcl->c->curinp != HCL_NULL && hcl->c->curinp != &hcl->c->inarg);
do
{
if (hcl->c->curinp->b.pos >= hcl->c->curinp->b.len)
{
if (hcl->c->reader(hcl, HCL_IO_READ, hcl->c->curinp) <= -1)
{
return -1;
}
if (hcl->c->curinp->xlen <= 0)
{
/* got EOF */
#if 0
x = feed_char(hcl, HCL_OOCI_EOF); /* TODO: or call feed_end_include? */
if (x <= -1) return -1;
#else
feed_end_include (hcl);
if (hcl->c->curinp != &hcl->c->inarg)
{
/* advance the pointer that should have been done when the include file name has been read */
hcl->c->curinp->b.pos++;
}
continue;
#endif
}
hcl->c->curinp->b.pos = 0;
hcl->c->curinp->b.len = hcl->c->curinp->xlen;
}
lc = hcl->c->curinp->buf[hcl->c->curinp->b.pos];
x = feed_char(hcl, lc);
if (x <= -1) return -1;
hcl->c->curinp->b.pos += x;
}
while (hcl->c->curinp != &hcl->c->inarg);
return 0;
}
int hcl_feed (hcl_t* hcl, const hcl_ooch_t* data, hcl_oow_t len) int hcl_feed (hcl_t* hcl, const hcl_ooch_t* data, hcl_oow_t len)
{ {
/* TODO: need to return the number of processed characters? /* TODO: need to return the number of processed characters?
@ -3641,7 +3758,7 @@ int hcl_feed (hcl_t* hcl, const hcl_ooch_t* data, hcl_oow_t len)
for (i = 0; i < len; ) for (i = 0; i < len; )
{ {
x = feed_char(hcl, data[i]); x = feed_char(hcl, data[i]);
if (x <= -1) return -1; if (x <= -1) return -1; /* TODO: return the number of processed characters via an argument? */
if (x > 0) if (x > 0)
{ {
@ -3655,9 +3772,16 @@ int hcl_feed (hcl_t* hcl, const hcl_ooch_t* data, hcl_oow_t len)
{ {
hcl->c->feed.lx.loc.colm++; hcl->c->feed.lx.loc.colm++;
} }
i += x; i += x; /* x is supposed to be 1. otherwise, some characters may get skipped. */
} }
/* feed again if not consumed */
if (hcl->c->curinp != &hcl->c->inarg && feed_from_included(hcl) <= -1)
{
/* TODO: return the number of processed characters via an argument? */
return -1;
}
/* feed data[i] again if not consumed */
} }
} }
else else