coding include file handling in the feed-based reader

This commit is contained in:
hyung-hwan 2022-07-28 14:07:18 +00:00
parent aab704ac4b
commit 2543b0d634
4 changed files with 466 additions and 340 deletions

View File

@ -126,7 +126,7 @@ static HCL_INLINE int open_input (hcl_t* hcl, hcl_ioinarg_t* arg)
#if defined(HCL_OOCH_IS_UCH)
if (hcl_convootobcstr(hcl, arg->name, &ucslen, HCL_NULL, &bcslen) <= -1) goto oops;
#else
bcslen = hcl_count_bcstr (arg->name);
bcslen = hcl_count_bcstr(arg->name);
#endif
fn = ((bb_t*)arg->includer->handle)->fn;
@ -134,7 +134,7 @@ static HCL_INLINE int open_input (hcl_t* hcl, hcl_ioinarg_t* arg)
fb = get_base_name(fn);
parlen = fb - fn;
bb = (bb_t*)hcl_callocmem (hcl, HCL_SIZEOF(*bb) + (HCL_SIZEOF(hcl_bch_t) * (parlen + bcslen + 1)));
bb = (bb_t*)hcl_callocmem(hcl, HCL_SIZEOF(*bb) + (HCL_SIZEOF(hcl_bch_t) * (parlen + bcslen + 1)));
if (!bb) goto oops;
bb->fn = (hcl_bch_t*)(bb + 1);
@ -825,7 +825,7 @@ static int feed_loop (hcl_t* hcl, xtn_t* xtn, int cflags, int verbose)
while (1)
{
hcl_oow_t n;
hcl_ooi_t n;
/*n = fread(&buf[len], 1, HCL_COUNTOF(buf) - len, fp);*/
n = read(fileno(fp), &buf[len], HCL_COUNTOF(buf) - len);
@ -868,7 +868,7 @@ static int feed_loop (hcl_t* hcl, xtn_t* xtn, int cflags, int verbose)
}
break;
}
if (ferror(fp))
if (n <= -1 || ferror(fp))
{
hcl_logbfmt (hcl, HCL_LOG_STDERR, "ERROR: unable to read %hs - %hs\n", xtn->read_path, strerror(errno));
goto oops;

View File

@ -632,6 +632,7 @@ struct hcl_frd_t
int level;
int array_level;
int flagv;
int expect_include_file;
hcl_cnode_t* obj;
};

View File

@ -1257,7 +1257,8 @@ struct hcl_ioinarg_t
{
hcl_oow_t pos;
hcl_oow_t len;
int state;
/* <<B.STATE>>
int state;*/
} b;
hcl_oow_t line;

View File

@ -461,6 +461,7 @@ static int get_char (hcl_t* hcl)
return 0;
}
/* <<B.STATE>> -> probably not needed any more?
if (hcl->c->curinp->b.state == -1)
{
hcl->c->curinp->b.state = 0;
@ -471,6 +472,7 @@ static int get_char (hcl_t* hcl)
hcl->c->curinp->b.state = 0;
goto return_eof;
}
*/
if (hcl->c->curinp->b.pos >= hcl->c->curinp->b.len)
{
@ -478,7 +480,9 @@ static int get_char (hcl_t* hcl)
if (hcl->c->curinp->xlen <= 0)
{
/* <<B.STATE>>
return_eof:
*/
hcl->c->curinp->lxc.c = HCL_OOCI_EOF;
hcl->c->curinp->lxc.l.line = hcl->c->curinp->line;
hcl->c->curinp->lxc.l.colm = hcl->c->curinp->colm;
@ -2277,24 +2281,96 @@ static void fst_pop (hcl_t* hcl)
hcl->c->feed.top--;
}
static int feed_begin_include (hcl_t* hcl)
{
hcl_ioinarg_t* arg;
const hcl_ooch_t* io_name;
io_name = add_io_name(hcl, TOKEN_NAME(hcl));
if (HCL_UNLIKELY(!io_name)) return -1;
arg = (hcl_ioinarg_t*)hcl_callocmem(hcl, HCL_SIZEOF(*arg));
if (HCL_UNLIKELY(!arg)) goto oops;
arg->name = io_name;
arg->line = 1;
arg->colm = 1;
/*arg->nl = '\0';*/
arg->includer = hcl->c->curinp;
if (hcl->c->reader(hcl, HCL_IO_OPEN, arg) <= -1)
{
hcl_setsynerrbfmt (hcl, HCL_SYNERR_INCLUDE, TOKEN_LOC(hcl), TOKEN_NAME(hcl), "unable to include %js", io_name);
goto oops;
}
/* switch to the includee's stream */
hcl->c->curinp = arg;
/* hcl->c->depth.incl++; */
return 0;
oops:
if (arg) hcl_freemem (hcl, arg);
return -1;
}
static int feed_end_include (hcl_t* hcl)
{
int x;
hcl_ioinarg_t* cur;
if (hcl->c->curinp == &hcl->c->inarg) return 0; /* no include */
/* if it is an included file, close it and
* retry to read a character from an outer file */
x = hcl->c->reader(hcl, HCL_IO_CLOSE, hcl->c->curinp);
/* if closing has failed, still destroy the
* sio structure first as normal and return
* the failure below. this way, the caller
* does not call HCL_IO_CLOSE on
* hcl->c->curinp again. */
cur = hcl->c->curinp;
hcl->c->curinp = hcl->c->curinp->includer;
HCL_ASSERT (hcl, cur->name != HCL_NULL);
hcl_freemem (hcl, cur);
/* hcl->parse.depth.incl--; */
if (x != 0)
{
/* the failure mentioned above is returned here */
return -1;
}
hcl->c->lxc = hcl->c->curinp->lxc;
return 1; /* ended the included file successfully */
}
static int feed_process_token (hcl_t* hcl)
{
hcl_frd_t* frd = &hcl->c->feed.rd;
HCL_DEBUG6 (hcl, "TOKEN LEN %zu=>[%.*js] %d LOC=%d,%d\n", TOKEN_NAME_LEN(hcl), TOKEN_NAME_LEN(hcl), TOKEN_NAME_PTR(hcl), TOKEN_TYPE(hcl), (int)TOKEN_LOC(hcl)->line, (int)TOKEN_LOC(hcl)->colm);
#if 0
/* this function read an s-expression non-recursively
/* this function composes an s-expression non-recursively
* by manipulating its own stack. */
int level = 0, array_level = 0, flagv = 0;
hcl_cnode_t* obj = HCL_NULL;
#endif
/*
while (1)
/*hcl_logbfmt (hcl, HCL_LOG_STDERR, "TOKEN => [%.*js] type=%d\n", TOKEN_NAME_LEN(hcl), TOKEN_NAME_PTR(hcl), TOKEN_TYPE(hcl));*/
if (frd->expect_include_file)
{
redo:
*/
if (TOKEN_TYPE(hcl) != HCL_IOTOK_STRLIT)
{
hcl_setsynerr (hcl, HCL_SYNERR_STRING, TOKEN_LOC(hcl), TOKEN_NAME(hcl));
goto oops;
}
frd->expect_include_file = 0;
if (feed_begin_include(hcl) <= -1) goto oops;
goto ok;
}
switch (TOKEN_TYPE(hcl))
{
default:
@ -2302,23 +2378,15 @@ HCL_DEBUG6 (hcl, "TOKEN LEN %zu=>[%.*js] %d LOC=%d,%d\n", TOKEN_NAME_LEN(hcl),
goto oops;
case HCL_IOTOK_EOF:
/* TODO: change the code. not an error? */
hcl_setsynerr (hcl, HCL_SYNERR_EOF, TOKEN_LOC(hcl), TOKEN_NAME(hcl));
goto oops;
#if 0
/* this one is special?? */
case HCL_IOTOK_INCLUDE:
/* TODO: should i limit where #include can be specified?
* disallow it inside a list literal or an array literal? */
GET_TOKEN_WITH_GOTO (hcl, oops);
if (TOKEN_TYPE(hcl) != HCL_IOTOK_STRLIT)
{
hcl_setsynerr (hcl, HCL_SYNERR_STRING, TOKEN_LOC(hcl), TOKEN_NAME(hcl));
goto oops;
}
if (begin_include(hcl) <= -1) goto oops;
goto redo;
#endif
frd->expect_include_file = 1;
goto ok;
case HCL_IOTOK_LBRACK: /* [] */
frd->flagv = 0;
@ -2363,8 +2431,7 @@ HCL_DEBUG6 (hcl, "TOKEN LEN %zu=>[%.*js] %d LOC=%d,%d\n", TOKEN_NAME_LEN(hcl),
if (LIST_FLAG_GET_CONCODE(frd->flagv) == HCL_CONCODE_ARRAY) frd->array_level++;
/* read the next token */
// GET_TOKEN_WITH_GOTO (hcl, oops);
goto redo;
goto ok;
case HCL_IOTOK_DOT:
if (frd->level <= 0 || !can_dot_list(hcl))
@ -2377,8 +2444,7 @@ HCL_DEBUG6 (hcl, "TOKEN LEN %zu=>[%.*js] %d LOC=%d,%d\n", TOKEN_NAME_LEN(hcl),
goto oops;
}
// GET_TOKEN_WITH_GOTO (hcl, oops);
goto redo;
goto ok;
case HCL_IOTOK_COLON:
if (frd->level <= 0 || !can_colon_list(hcl))
@ -2387,8 +2453,7 @@ HCL_DEBUG6 (hcl, "TOKEN LEN %zu=>[%.*js] %d LOC=%d,%d\n", TOKEN_NAME_LEN(hcl),
goto oops;
}
// GET_TOKEN_WITH_GOTO (hcl, oops);
goto redo;
goto ok;
case HCL_IOTOK_COMMA:
if (frd->level <= 0 || !can_comma_list(hcl))
@ -2397,8 +2462,7 @@ HCL_DEBUG6 (hcl, "TOKEN LEN %zu=>[%.*js] %d LOC=%d,%d\n", TOKEN_NAME_LEN(hcl),
goto oops;
}
// GET_TOKEN_WITH_GOTO (hcl, oops);
goto redo;
goto ok;
case HCL_IOTOK_RPAREN: /* xlist (), qlist #() */
case HCL_IOTOK_RBRACK: /* bytearray #[], array[] */
@ -2459,7 +2523,7 @@ HCL_DEBUG6 (hcl, "TOKEN LEN %zu=>[%.*js] %d LOC=%d,%d\n", TOKEN_NAME_LEN(hcl),
goto oops;
}
#endif
frd->obj =leave_list(hcl, &frd->flagv, &oldflagv);
frd->obj = leave_list(hcl, &frd->flagv, &oldflagv);
frd->level--;
if (LIST_FLAG_GET_CONCODE(oldflagv) == HCL_CONCODE_ARRAY) frd->array_level--;
@ -2474,39 +2538,39 @@ HCL_DEBUG6 (hcl, "TOKEN LEN %zu=>[%.*js] %d LOC=%d,%d\n", TOKEN_NAME_LEN(hcl),
hcl_setsynerr (hcl, HCL_SYNERR_VBARBANNED, TOKEN_LOC(hcl), TOKEN_NAME(hcl));
goto oops;
}
frd->obj =read_vlist(hcl);
frd->obj = read_vlist(hcl);
break;
case HCL_IOTOK_NIL:
frd->obj =hcl_makecnodenil(hcl, TOKEN_LOC(hcl), TOKEN_NAME(hcl));
frd->obj = hcl_makecnodenil(hcl, TOKEN_LOC(hcl), TOKEN_NAME(hcl));
break;
case HCL_IOTOK_TRUE:
frd->obj =hcl_makecnodetrue(hcl, TOKEN_LOC(hcl), TOKEN_NAME(hcl));
frd->obj = hcl_makecnodetrue(hcl, TOKEN_LOC(hcl), TOKEN_NAME(hcl));
break;
case HCL_IOTOK_FALSE:
frd->obj =hcl_makecnodefalse(hcl, TOKEN_LOC(hcl), TOKEN_NAME(hcl));
frd->obj = hcl_makecnodefalse(hcl, TOKEN_LOC(hcl), TOKEN_NAME(hcl));
break;
case HCL_IOTOK_SELF:
frd->obj =hcl_makecnodeself(hcl, TOKEN_LOC(hcl), TOKEN_NAME(hcl));
frd->obj = hcl_makecnodeself(hcl, TOKEN_LOC(hcl), TOKEN_NAME(hcl));
break;
case HCL_IOTOK_SUPER:
frd->obj =hcl_makecnodesuper(hcl, TOKEN_LOC(hcl), TOKEN_NAME(hcl));
frd->obj = hcl_makecnodesuper(hcl, TOKEN_LOC(hcl), TOKEN_NAME(hcl));
break;
case HCL_IOTOK_ELLIPSIS:
frd->obj =hcl_makecnodeellipsis(hcl, TOKEN_LOC(hcl), TOKEN_NAME(hcl));
frd->obj = hcl_makecnodeellipsis(hcl, TOKEN_LOC(hcl), TOKEN_NAME(hcl));
break;
case HCL_IOTOK_TRPCOLONS:
frd->obj =hcl_makecnodetrpcolons(hcl, TOKEN_LOC(hcl), TOKEN_NAME(hcl));
frd->obj = hcl_makecnodetrpcolons(hcl, TOKEN_LOC(hcl), TOKEN_NAME(hcl));
break;
case HCL_IOTOK_DCSTAR:
frd->obj =hcl_makecnodedcstar(hcl, TOKEN_LOC(hcl), TOKEN_NAME(hcl));
frd->obj = hcl_makecnodedcstar(hcl, TOKEN_LOC(hcl), TOKEN_NAME(hcl));
break;
case HCL_IOTOK_SMPTRLIT:
@ -2527,7 +2591,7 @@ HCL_DEBUG6 (hcl, "TOKEN LEN %zu=>[%.*js] %d LOC=%d,%d\n", TOKEN_NAME_LEN(hcl),
goto oops;
}
frd->obj =hcl_makecnodesmptrlit(hcl, TOKEN_LOC(hcl), TOKEN_NAME(hcl), v);
frd->obj = hcl_makecnodesmptrlit(hcl, TOKEN_LOC(hcl), TOKEN_NAME(hcl), v);
break;
}
@ -2549,44 +2613,43 @@ HCL_DEBUG6 (hcl, "TOKEN LEN %zu=>[%.*js] %d LOC=%d,%d\n", TOKEN_NAME_LEN(hcl),
}
}
frd->obj =hcl_makecnodeerrlit(hcl, TOKEN_LOC(hcl), TOKEN_NAME(hcl), v);
frd->obj = hcl_makecnodeerrlit(hcl, TOKEN_LOC(hcl), TOKEN_NAME(hcl), v);
break;
}
case HCL_IOTOK_CHARLIT:
frd->obj =hcl_makecnodecharlit(hcl, TOKEN_LOC(hcl), TOKEN_NAME(hcl), TOKEN_NAME_CHAR(hcl, 0));
frd->obj = hcl_makecnodecharlit(hcl, TOKEN_LOC(hcl), TOKEN_NAME(hcl), TOKEN_NAME_CHAR(hcl, 0));
break;
case HCL_IOTOK_NUMLIT:
frd->obj =hcl_makecnodenumlit(hcl, TOKEN_LOC(hcl), TOKEN_NAME(hcl));
frd->obj = hcl_makecnodenumlit(hcl, TOKEN_LOC(hcl), TOKEN_NAME(hcl));
break;
case HCL_IOTOK_RADNUMLIT:
frd->obj =hcl_makecnoderadnumlit(hcl, TOKEN_LOC(hcl), TOKEN_NAME(hcl));
frd->obj = hcl_makecnoderadnumlit(hcl, TOKEN_LOC(hcl), TOKEN_NAME(hcl));
break;
case HCL_IOTOK_FPDECLIT:
frd->obj =hcl_makecnodefpdeclit(hcl, TOKEN_LOC(hcl), TOKEN_NAME(hcl));
frd->obj = hcl_makecnodefpdeclit(hcl, TOKEN_LOC(hcl), TOKEN_NAME(hcl));
break;
/*
case HCL_IOTOK_REAL:
frd->obj =hcl_makerealnum(hcl, HCL_IOTOK_RVAL(hcl));
frd->obj = hcl_makerealnum(hcl, HCL_IOTOK_RVAL(hcl));
break;
*/
case HCL_IOTOK_STRLIT:
frd->obj =hcl_makecnodestrlit(hcl, TOKEN_LOC(hcl), TOKEN_NAME(hcl));
frd->obj = hcl_makecnodestrlit(hcl, TOKEN_LOC(hcl), TOKEN_NAME(hcl));
break;
case HCL_IOTOK_IDENT:
frd->obj =hcl_makecnodesymbol(hcl, TOKEN_LOC(hcl), TOKEN_NAME(hcl));
frd->obj = hcl_makecnodesymbol(hcl, TOKEN_LOC(hcl), TOKEN_NAME(hcl));
break;
case HCL_IOTOK_IDENT_DOTTED:
frd->obj =hcl_makecnodedsymbol(hcl, TOKEN_LOC(hcl), TOKEN_NAME(hcl));
frd->obj = hcl_makecnodedsymbol(hcl, TOKEN_LOC(hcl), TOKEN_NAME(hcl));
break;
}
if (!frd->obj) goto oops;
@ -2624,7 +2687,14 @@ HCL_DEBUG6 (hcl, "TOKEN LEN %zu=>[%.*js] %d LOC=%d,%d\n", TOKEN_NAME_LEN(hcl),
HCL_ASSERT (hcl, hcl->c->r.st == HCL_NULL);
HCL_ASSERT (hcl, frd->obj != HCL_NULL);
hcl_logbfmt (hcl, HCL_LOG_STDERR, "GOT CNODE %p\n", frd->obj);
/* TODO: error handling, etc */
hcl_compile(hcl, frd->obj, HCL_COMPILE_CLEAR_CODE | HCL_COMPILE_CLEAR_FNBLK); /* flags 0 if non-interactive */
hcl_freecnode (hcl, frd->obj); /* not needed any more */
frd->obj = HCL_NULL;
hcl_decode (hcl, 0, hcl_getbclen(hcl));
hcl_execute (hcl);
hcl_flushio (hcl);
}
else
{
@ -2640,14 +2710,8 @@ hcl_logbfmt (hcl, HCL_LOG_STDERR, "GOT CNODE %p\n", frd->obj);
clear_comma_colon_flag (hcl);
}
#if 0
/* read the next token */
GET_TOKEN_WITH_GOTO (hcl, oops);
}
#endif
redo:
ok:
return 0;
oops:
@ -3270,10 +3334,7 @@ static int flx_plain_ident (hcl_t* hcl, hcl_ooci_t c) /* identifier */
if (pi->seg_len == 0)
{
/* this must be the second segment if flx_plain_ident() has been scheduled
* with a valid identifier character at first */
HCL_ASSERT (hcl, pi->seg_count >= 1);
hcl_setsynerr (hcl, HCL_SYNERR_MSEGIDENT, TOKEN_LOC(hcl), TOKEN_NAME(hcl));
hcl_setsynerrbfmt (hcl, HCL_SYNERR_MSEGIDENT, TOKEN_LOC(hcl), TOKEN_NAME(hcl), "blank segment");
return -1;
}
@ -3588,7 +3649,14 @@ static int flx_signed_token (hcl_t* hcl, hcl_ooci_t c)
else
{
init_flx_pi (FLX_PI(hcl));
FLX_PI(hcl)->char_count++; /* the sign becomes the part of the identifier. */
/* the sign is already in the token name buffer.
* adjust the state data for the sign. */
HCL_ASSERT (hcl, TOKEN_NAME_LEN(hcl) == 1);
FLX_PI(hcl)->char_count++;
FLX_PI(hcl)->seg_len++;
/* let refeeding of 'c' happen at the next iteration */
FEED_CONTINUE (hcl, HCL_FLX_PLAIN_IDENT);
goto not_consumed;
}
@ -3604,6 +3672,7 @@ not_consumed:
static int feed_char (hcl_t* hcl, hcl_ooci_t c)
{
/*hcl_logbfmt (hcl, HCL_LOG_STDERR, "FEED->[%jc] %d STATE->%d\n", c, c, FLX_STATE(hcl));*/
switch (FLX_STATE(hcl))
{
case HCL_FLX_START: return flx_start(hcl, c);
@ -3628,6 +3697,54 @@ static int feed_char (hcl_t* hcl, hcl_ooci_t c)
return -1;
}
static int feed_from_included (hcl_t* hcl)
{
int x;
hcl_ooch_t lc;
HCL_ASSERT (hcl, hcl->c->curinp != HCL_NULL && hcl->c->curinp != &hcl->c->inarg);
do
{
if (hcl->c->curinp->b.pos >= hcl->c->curinp->b.len)
{
if (hcl->c->reader(hcl, HCL_IO_READ, hcl->c->curinp) <= -1)
{
return -1;
}
if (hcl->c->curinp->xlen <= 0)
{
/* got EOF */
#if 0
x = feed_char(hcl, HCL_OOCI_EOF); /* TODO: or call feed_end_include? */
if (x <= -1) return -1;
#else
feed_end_include (hcl);
if (hcl->c->curinp != &hcl->c->inarg)
{
/* advance the pointer that should have been done when the include file name has been read */
hcl->c->curinp->b.pos++;
}
continue;
#endif
}
hcl->c->curinp->b.pos = 0;
hcl->c->curinp->b.len = hcl->c->curinp->xlen;
}
lc = hcl->c->curinp->buf[hcl->c->curinp->b.pos];
x = feed_char(hcl, lc);
if (x <= -1) return -1;
hcl->c->curinp->b.pos += x;
}
while (hcl->c->curinp != &hcl->c->inarg);
return 0;
}
int hcl_feed (hcl_t* hcl, const hcl_ooch_t* data, hcl_oow_t len)
{
/* TODO: need to return the number of processed characters?
@ -3641,7 +3758,7 @@ int hcl_feed (hcl_t* hcl, const hcl_ooch_t* data, hcl_oow_t len)
for (i = 0; i < len; )
{
x = feed_char(hcl, data[i]);
if (x <= -1) return -1;
if (x <= -1) return -1; /* TODO: return the number of processed characters via an argument? */
if (x > 0)
{
@ -3655,9 +3772,16 @@ int hcl_feed (hcl_t* hcl, const hcl_ooch_t* data, hcl_oow_t len)
{
hcl->c->feed.lx.loc.colm++;
}
i += x;
i += x; /* x is supposed to be 1. otherwise, some characters may get skipped. */
}
/* feed again if not consumed */
if (hcl->c->curinp != &hcl->c->inarg && feed_from_included(hcl) <= -1)
{
/* TODO: return the number of processed characters via an argument? */
return -1;
}
/* feed data[i] again if not consumed */
}
}
else