From 20d8a81a3470dca48f921244773312b50a830f21 Mon Sep 17 00:00:00 2001 From: hyung-hwan Date: Sun, 18 Feb 2024 10:12:56 +0900 Subject: [PATCH] wip - working on byte cci handler --- lib/hcl.h | 10 +++++-- lib/read.c | 85 ++++++++++++++++++++++++++++++++++++++++++++--------- pas/hcl.pas | 9 ++++-- 3 files changed, 86 insertions(+), 18 deletions(-) diff --git a/lib/hcl.h b/lib/hcl.h index 2755659..5d08aab 100644 --- a/lib/hcl.h +++ b/lib/hcl.h @@ -1230,7 +1230,7 @@ struct hcl_io_cciarg_t * [OUT] place data here for #HCL_IO_READ or #HCL_IO_READ_BYTES */ int is_bytes; /* set this to non-zero if the handler fills the buffer with bytes */ - struct + union { hcl_ooch_t c[2048]; /* TODO: resize this if necessary */ hcl_uint8_t b[2048 * HCL_SIZEOF(hcl_ooch_t)]; /* TODO: resize this if necessary */ @@ -1253,7 +1253,13 @@ struct hcl_io_cciarg_t { hcl_oow_t pos; hcl_oow_t len; - } b; + } b; /* buffer(buf.c or buf.b) usage status */ + + struct + { + hcl_uint8_t buf[HCL_MBLEN_MAX]; + hcl_oow_t len; + } rsd; /* residue bytes for HCL_IO_READ_BYTES */ hcl_oow_t line; hcl_oow_t colm; diff --git a/lib/read.c b/lib/read.c index 2346ff8..b4f8882 100644 --- a/lib/read.c +++ b/lib/read.c @@ -1023,13 +1023,18 @@ static int feed_begin_include (hcl_t* hcl) const hcl_ooch_t* io_name; io_name = add_sr_name(hcl, TOKEN_NAME(hcl)); - if (HCL_UNLIKELY(!io_name)) return -1; + if (HCL_UNLIKELY(!io_name)) + { + const hcl_ooch_t* orgmsg = hcl_backuperrmsg(hcl); + hcl_seterrbfmt (hcl, HCL_ERRNUM(hcl), "unable to include %.*js for name registration failure - %js", TOKEN_NAME_LEN(hcl), TOKEN_NAME_PTR(hcl), orgmsg); + return -1; + } arg = (hcl_io_cciarg_t*)hcl_callocmem(hcl, HCL_SIZEOF(*arg)); if (HCL_UNLIKELY(!arg)) { const hcl_ooch_t* orgmsg = hcl_backuperrmsg(hcl); - hcl_seterrbfmt (hcl, HCL_ERRNUM(hcl), "failed to allocate source input structure - %js", orgmsg); + hcl_seterrbfmt (hcl, HCL_ERRNUM(hcl), "unable to include %.*js for memory allocation failure - %js", TOKEN_NAME_LEN(hcl), TOKEN_NAME_PTR(hcl), orgmsg); goto oops; } @@ -1037,6 +1042,7 @@ static int feed_begin_include (hcl_t* hcl) arg->line = 1; arg->colm = 1; /*arg->nl = '\0';*/ + /*arg->is_bytes = 0;*/ arg->includer = hcl->c->curinp; if (hcl->c->cci_rdr(hcl, HCL_IO_OPEN, arg) <= -1) @@ -1145,7 +1151,7 @@ static int auto_forge_xlist_if_at_block_beginning (hcl_t* hcl, hcl_frd_t* frd) { int forged_flagv; - /* both MLIST and ALIST begin as XLIST and get converted to MLIST + /* both MLIST and ALIST begin as XLIST and get converted to MLIST * or ALIST after more tokens are processed. so handling of MLIST * or ALIST is needed at this phase */ forged_flagv = AUTO_FORGED; @@ -2931,38 +2937,81 @@ static int feed_from_includee (hcl_t* hcl) if (curinp->is_bytes) { hcl_cmgr_t* cmgr; - hcl_oow_t avail, n; + hcl_oow_t avail, inplen, n; + hcl_oow_t saved_rsd_len; cmgr = HCL_CMGR(hcl); + + start_over: if (curinp->b.pos >= curinp->b.len) { x = hcl->c->cci_rdr(hcl, HCL_IO_READ_BYTES, curinp); - if (x <= -1) return -1; + if (x <= -1) + { + const hcl_ooch_t* orgmsg = hcl_backuperrmsg(hcl); + hcl_seterrbfmt (hcl, HCL_ERRNUM(hcl), "unable to read bytes from %js - %js", curinp->name, orgmsg); + return -1; + } if (curinp->xlen <= 0) { /* got EOF from an included stream */ -/* TODO: if there is residue bytes from the current stream. error... */ + if (curinp->rsd.len > 0) + { + hcl_seterrbfmt (hcl, HCL_EECERR, "incomplete byte sequence in %js", curinp->name); + return -1; + } feed_end_include (hcl); curinp = hcl->c->curinp; continue; } curinp->b.pos = 0; + curinp->b.len = curinp->xlen; } + avail = curinp->b.len - curinp->b.pos; /* available in the read buffer */ + saved_rsd_len = curinp->rsd.len; - avail = curinp->b.len - curinp->b.pos; - n = cmgr->bctouc(curinp->buf.b[curinp->b.pos], avail, &c); + if (curinp->rsd.len > 0) + { + hcl_oow_t cpl; /* number of bytes to copy to the residue buffer */ + HCL_ASSERT (hcl, curinp->b.pos == 0); + cpl = HCL_COUNTOF(curinp->rsd.buf) - curinp->rsd.len; + if (cpl > 0) + { + if (cpl > avail) cpl = avail; + HCL_MEMCPY(&curinp->rsd.buf[curinp->rsd.len], curinp->buf.b, cpl); + curinp->rsd.len += cpl; + curinp->b.pos += cpl; /* advance this because the bytes moved to the residue buffer */ + } + inplen = curinp->rsd.len; + n = cmgr->bctouc(curinp->rsd.buf, inplen, &c); + } + else + { + inplen = avail; + n = cmgr->bctouc(&curinp->buf.b[curinp->b.pos], inplen, &c); + } if (n == 0) /* invalid sequence */ { + /* TODO: more accurate locatin of the invalid byte sequence */ + hcl_seterrbfmt (hcl, HCL_EECERR, "invalid byte sequence in %js", curinp->name); + return -1; } - if (n > avail) + if (n > inplen) { /* incomplete sequence */ - HCL_ASSERT (hcl, avail < HCL_MBLEN_MAX); - /* TODO: move to the internal buffer and start over */ + HCL_ASSERT (hcl, avail < HCL_COUNTOF(curinp->rsd.buf)); + + /* TODO: wrong */ + HCL_MEMCPY (curinp->rsd.buf, &curinp->buf.b[curinp->b.pos], avail); + curinp->rsd.len = avail; + curinp->b.pos = curinp->b.len; + goto start_over; } - taken = n; + + /* how much taken from the read buffer as input */ + taken = n - saved_rsd_len; } else { @@ -2970,8 +3019,13 @@ static int feed_from_includee (hcl_t* hcl) if (curinp->b.pos >= curinp->b.len) { x = hcl->c->cci_rdr(hcl, HCL_IO_READ, curinp); - if (x <= -1) return -1; - + if (x <= -1) + { + /* TODO: more accurate locatin of failure */ + const hcl_ooch_t* orgmsg = hcl_backuperrmsg(hcl); + hcl_seterrbfmt (hcl, HCL_ERRNUM(hcl), "unable to read %js - %js", curinp->name, orgmsg); + return -1; + } if (curinp->xlen <= 0) { /* got EOF from an included stream */ @@ -2997,6 +3051,9 @@ static int feed_from_includee (hcl_t* hcl) /* consumed */ feed_update_lx_loc (hcl, c); curinp->b.pos += taken; + #if defined(HCL_OOCH_IS_UCH) + curinp->rsd.len = 0; /* needed for byte reading only */ + #endif } if (hcl->c->feed.rd.do_include_file) diff --git a/pas/hcl.pas b/pas/hcl.pas index 09b131e..a88d76f 100644 --- a/pas/hcl.pas +++ b/pas/hcl.pas @@ -3,9 +3,10 @@ unit HCL; {$mode objfpc}{$H+} {$linklib hcl} {$linklib c} +{$linklib dl} +{$linklib gcc} {$if defined(HCL_LIB_QUADMATH_REQUIRED)} -{$linklib gcc} {$linklib quadmath} {$endif} @@ -40,6 +41,7 @@ type IO_FLUSH ); +{$packrecords c} CciArgPtr = ^CciArg; CciArg = record (* this record must follow the public part of hcl_io_cciarg_t in hcl.h *) name: pwidechar; @@ -49,6 +51,7 @@ type xlen: System.SizeUint; includer: CciArgPtr; end; +{$packrecords normal} Interp = class protected @@ -217,16 +220,19 @@ var begin (* check if the main stream is requested. * it doesn't have to be handled because the main stream must be handled via feeding *) + if arg^.includer = nil then exit(0); case cmd of IO_OPEN: begin + (* TODO: remember the parent path and load from the parent directory if necessary*) f := SysUtils.FileOpen(arg^.name, SysUtils.fmOpenRead); if f <= -1 then begin // TODO: set error info.... exit(-1); end; arg^.handle := pointer(f); + arg^.is_bytes := 1; end; IO_CLOSE: begin @@ -254,7 +260,6 @@ begin ; (* the following operations are prohibited on the code input stream: - IO_READ: IO_WRITE: IO_WRITE_BYTES: *)