From ef3b2bd141dc47786d37870c4b88275d45baeac9 Mon Sep 17 00:00:00 2001 From: hyung-hwan Date: Mon, 19 Feb 2024 20:48:14 +0900 Subject: [PATCH] enhanced the byte cci handler code --- lib/hcl-prv.h | 4 +++ lib/hcl.h | 2 -- lib/read.c | 92 +++++++++++++++++++++++++++++-------------------- lib/std.c | 4 +-- pas/Makefile.am | 2 +- pas/Makefile.in | 2 +- pas/hcl.pas | 81 +++++++++---------------------------------- 7 files changed, 79 insertions(+), 108 deletions(-) diff --git a/lib/hcl-prv.h b/lib/hcl-prv.h index 78bb7b3..632a158 100644 --- a/lib/hcl-prv.h +++ b/lib/hcl-prv.h @@ -1842,6 +1842,10 @@ hcl_pfrc_t hcl_pf_semaphore_group_add_semaphore (hcl_t* hcl, hcl_mod_t* mod, hcl hcl_pfrc_t hcl_pf_semaphore_group_remove_semaphore (hcl_t* hcl, hcl_mod_t* mod, hcl_ooi_t nargs); hcl_pfrc_t hcl_pf_semaphore_group_wait (hcl_t* hcl, hcl_mod_t* mod, hcl_ooi_t nargs); +/* ========================================================================= */ +/* std.c */ +/* ========================================================================= */ +hcl_errnum_t hcl_syserrstrb (hcl_t* hcl, int syserr_type, int syserr_code, hcl_bch_t* buf, hcl_oow_t len); #if defined(__cplusplus) } diff --git a/lib/hcl.h b/lib/hcl.h index e42fba4..4b7b5dd 100644 --- a/lib/hcl.h +++ b/lib/hcl.h @@ -1208,8 +1208,6 @@ struct hcl_lxc_t }; typedef struct hcl_lxc_t hcl_lxc_t; - -/*#define HCL_CCI_BUF_LEN (1)*/ #if !defined(HCL_CCI_BUF_LEN) #define HCL_CCI_BUF_LEN (2048) #endif diff --git a/lib/read.c b/lib/read.c index 6be17e5..64a6415 100644 --- a/lib/read.c +++ b/lib/read.c @@ -219,6 +219,10 @@ static HCL_INLINE int is_delimchar (hcl_ooci_t c) return c == '(' || c == ')' || c == '[' || c == ']' || c == '{' || c == '}' || c == '|' || c == ',' || c == '.' || c == ':' || c == ';' || /* the first characters of tokens in delim_token_tab up to this point */ + +#if defined(HCL_OOCH_IS_UCH) + c == L'“' || c == L'”' || +#endif c == '#' || c == '\"' || c == '\'' || c == '\\' || is_spacechar(c) || c == HCL_OOCI_EOF; } @@ -1047,8 +1051,8 @@ static int feed_begin_include (hcl_t* hcl) if (hcl->c->cci_rdr(hcl, HCL_IO_OPEN, arg) <= -1) { - const hcl_ooch_t* org_errmsg = hcl_backuperrmsg(hcl); - hcl_setsynerrbfmt (hcl, HCL_SYNERR_INCLUDE, TOKEN_LOC(hcl), TOKEN_NAME(hcl), "unable to include %js - %js", io_name, org_errmsg); + const hcl_ooch_t* orgmsg = hcl_backuperrmsg(hcl); + hcl_setsynerrbfmt (hcl, HCL_SYNERR_INCLUDE, TOKEN_LOC(hcl), TOKEN_NAME(hcl), "unable to include %js - %js", io_name, orgmsg); goto oops; } @@ -2005,6 +2009,13 @@ static int flx_start (hcl_t* hcl, hcl_ooci_t c) FEED_CONTINUE (hcl, HCL_FLX_QUOTED_TOKEN); /* discard the quote itself. move on the the QUOTED_TOKEN state */ goto consumed; +#if defined(HCL_OOCH_IS_UCH) + case L'“': + init_flx_qt (FLX_QT(hcl), HCL_TOK_STRLIT, HCL_SYNERR_STRLIT, L'”', '\\', 0, HCL_TYPE_MAX(hcl_oow_t), 0); + FEED_CONTINUE (hcl, HCL_FLX_QUOTED_TOKEN); /* discard the quote itself. move on the the QUOTED_TOKEN state */ + goto consumed; +#endif + case '\'': init_flx_qt (FLX_QT(hcl), HCL_TOK_CHARLIT, HCL_SYNERR_CHARLIT, c, '\\', 1, 1, 0); FEED_CONTINUE (hcl, HCL_FLX_QUOTED_TOKEN); /* discard the quote itself. move on the the QUOTED_TOKEN state */ @@ -2920,6 +2931,23 @@ static void feed_update_lx_loc (hcl_t* hcl, hcl_ooci_t ch) } } +static hcl_oow_t move_cci_residue_bytes (hcl_io_cciarg_t* curinp) +{ + hcl_oow_t cpl; + + cpl = HCL_COUNTOF(curinp->rsd.buf) - curinp->rsd.len; + if (cpl > 0) + { + hcl_oow_t avail; + avail = curinp->b.len - curinp->b.pos; /* available in the read buffer */ + if (cpl > avail) cpl = avail; + HCL_MEMCPY(&curinp->rsd.buf[curinp->rsd.len], &curinp->buf.b[curinp->b.pos], cpl); + curinp->rsd.len += cpl; + curinp->b.pos += cpl; /* advance the position because the bytes moved to the residue buffer */ + } + return curinp->rsd.len; +} + static int feed_from_includee (hcl_t* hcl) { int x; @@ -2937,8 +2965,8 @@ static int feed_from_includee (hcl_t* hcl) if (curinp->is_bytes) { hcl_cmgr_t* cmgr; - hcl_oow_t avail, inplen, n; - hcl_oow_t saved_rsd_len; + const hcl_uint8_t* inpptr; + hcl_oow_t inplen, n; cmgr = HCL_CMGR(hcl); @@ -2968,57 +2996,48 @@ static int feed_from_includee (hcl_t* hcl) curinp->b.pos = 0; curinp->b.len = curinp->xlen; -//printf ("curinp->xlen = %d\n", (int)curinp->xlen); } - avail = curinp->b.len - curinp->b.pos; /* available in the read buffer */ - saved_rsd_len = curinp->rsd.len; -//printf ("saved_rsd_len = %d avail=%d\n", (int)saved_rsd_len, (int)avail); - if (saved_rsd_len > 0) + if (curinp->rsd.len > 0) { /* there is data in the residue buffer. use the residue buffer to * locate a proper multi-byte sequence */ - hcl_oow_t cpl; /* number of bytes to copy to the residue buffer */ HCL_ASSERT (hcl, curinp->b.pos == 0); - cpl = HCL_COUNTOF(curinp->rsd.buf) - curinp->rsd.len; - if (cpl > 0) - { - if (cpl > avail) cpl = avail; - HCL_MEMCPY(&curinp->rsd.buf[curinp->rsd.len], curinp->buf.b, cpl); - curinp->rsd.len += cpl; - curinp->b.pos += cpl; /* advance this because the bytes moved to the residue buffer */ - } - inplen = curinp->rsd.len; - n = cmgr->bctouc(&curinp->rsd.buf[0], inplen, &c); -//printf ("residue -> inplen = %d cpl = %d avail=%d\n", (int)inplen, (int)cpl, (int)avail); - if (n > 0 && n <= inplen) curinp->b.pos -= curinp->rsd.len - saved_rsd_len; + inplen = move_cci_residue_bytes(curinp); + inpptr = &curinp->rsd.buf[0]; } else { - inplen = avail; - n = cmgr->bctouc(&curinp->buf.b[curinp->b.pos], inplen, &c); + inplen = curinp->b.len - curinp->b.pos; + inpptr = &curinp->buf.b[curinp->b.pos]; } + + n = cmgr->bctouc((const hcl_bch_t*)inpptr, inplen, &c); if (n == 0) /* invalid sequence */ { - /* TODO: more accurate locatin of the invalid byte sequence */ + /* TODO: more accurate location of the invalid byte sequence */ hcl_seterrbfmt (hcl, HCL_EECERR, "invalid byte sequence in %js", curinp->name); return -1; } if (n > inplen) /* incomplete sequence */ { - hcl_oow_t cpl; HCL_ASSERT (hcl, curinp->rsd.len < HCL_COUNTOF(curinp->rsd.buf)); - cpl = HCL_COUNTOF(curinp->rsd.buf) - curinp->rsd.len; - if (cpl > avail) cpl = avail; - HCL_MEMCPY(&curinp->rsd.buf[curinp->rsd.len], &curinp->buf.b[curinp->b.pos], cpl); - curinp->rsd.len += cpl; - curinp->b.pos += cpl; + move_cci_residue_bytes (curinp); goto start_over; } - /* how much taken from the read buffer as input */ - HCL_ASSERT (hcl, n >= saved_rsd_len); - taken = n - saved_rsd_len; + if (curinp->rsd.len > 0) + { + /* move_cci_residue_bytes() advanced curinp->b.pos without checking + * the needed number of bytes to form a character. it must backoff by + * the number of excessive bytes moved to the residue buffer */ + curinp->b.pos -= curinp->rsd.len - n; + taken = 0; /* treat it as if no bytes are taken in this case */ + } + else + { + taken = n; + } } else { @@ -3028,7 +3047,7 @@ static int feed_from_includee (hcl_t* hcl) x = hcl->c->cci_rdr(hcl, HCL_IO_READ, curinp); if (x <= -1) { - /* TODO: more accurate locatin of failure */ + /* TODO: more accurate location of failure */ const hcl_ooch_t* orgmsg = hcl_backuperrmsg(hcl); hcl_seterrbfmt (hcl, HCL_ERRNUM(hcl), "unable to read %js - %js", curinp->name, orgmsg); return -1; @@ -3051,7 +3070,6 @@ static int feed_from_includee (hcl_t* hcl) } #endif -//hcl_logbfmt(hcl, HCL_LOG_STDERR, "[%jc]\n", c); x = feed_char(hcl, c); if (x <= -1) return -1; if (x >= 1) @@ -3060,7 +3078,7 @@ static int feed_from_includee (hcl_t* hcl) feed_update_lx_loc (hcl, c); curinp->b.pos += taken; #if defined(HCL_OOCH_IS_UCH) - curinp->rsd.len = 0; /* needed for byte reading only */ + curinp->rsd.len = 0; /* clear up the residue byte buffer. needed for byte reading only */ #endif } diff --git a/lib/std.c b/lib/std.c index d505e32..4183e19 100644 --- a/lib/std.c +++ b/lib/std.c @@ -850,7 +850,7 @@ static hcl_errnum_t macerr_to_errnum (int errcode) } #endif -static hcl_errnum_t _syserrstrb (hcl_t* hcl, int syserr_type, int syserr_code, hcl_bch_t* buf, hcl_oow_t len) +hcl_errnum_t hcl_syserrstrb (hcl_t* hcl, int syserr_type, int syserr_code, hcl_bch_t* buf, hcl_oow_t len) { switch (syserr_type) { @@ -3214,7 +3214,7 @@ hcl_t* hcl_openstdwithmmgr (hcl_mmgr_t* mmgr, hcl_oow_t xtnsize, hcl_errnum_t* e vmprim.alloc_heap = alloc_heap; vmprim.free_heap = free_heap; vmprim.log_write = log_write; - vmprim.syserrstrb = _syserrstrb; + vmprim.syserrstrb = hcl_syserrstrb; vmprim.assertfail = _assertfail; vmprim.dl_startup = dl_startup; vmprim.dl_cleanup = dl_cleanup; diff --git a/pas/Makefile.am b/pas/Makefile.am index 07e535e..c5b6a21 100644 --- a/pas/Makefile.am +++ b/pas/Makefile.am @@ -5,7 +5,7 @@ hcl_SOURCES = hcl.pas main.pas hcl_CPPFLAGS = hcl_DEPENDENCIES = hcl.bin -PASFLAGS = -Mobjfpc -FcUTF8 -g +PASFLAGS = -Mobjfpc -FcUTF8 -Sm -g if HCL_LIB_QUADMATH_REQUIRED PASFLAGS += -dHCL_LIB_QUADMATH_REQUIRED endif diff --git a/pas/Makefile.in b/pas/Makefile.in index a36a8e6..057375d 100644 --- a/pas/Makefile.in +++ b/pas/Makefile.in @@ -313,7 +313,7 @@ AUTOMAKE_OPTIONS = nostdinc hcl_SOURCES = hcl.pas main.pas hcl_CPPFLAGS = hcl_DEPENDENCIES = hcl.bin -PASFLAGS = -Mobjfpc -FcUTF8 -g $(am__append_1) +PASFLAGS = -Mobjfpc -FcUTF8 -Sm -g $(am__append_1) hcl_LINK = cp -pf hcl.bin $(builddir)/hcl$(EXEEXT) || echo all: all-am diff --git a/pas/hcl.pas b/pas/hcl.pas index dbdff5f..87a8f4d 100644 --- a/pas/hcl.pas +++ b/pas/hcl.pas @@ -41,13 +41,17 @@ type IO_FLUSH ); +{$ifndef HCL_CCI_BUF_LEN} +{$define HCL_CCI_BUF_LEN := 2048} +{$endif} + {$packrecords c} CciArgPtr = ^CciArg; CciArg = record (* this record must follow the public part of hcl_io_cciarg_t in hcl.h *) name: pwidechar; handle: pointer; is_bytes: integer; - buf: array[0..(2048 - 1)] of widechar; + buf: array[0..(HCL_CCI_BUF_LEN - 1)] of widechar; xlen: System.SizeUint; includer: CciArgPtr; end; @@ -108,7 +112,10 @@ function hcl_getoption(handle: pointer; option: Option; value: pointer): integer procedure hcl_seterrnum (handle: pointer; errnum: integer); cdecl; external; function hcl_geterrnum(handle: pointer): integer; cdecl; external; + +procedure hcl_seterrbmsg (handle: pointer; errnum: integer; errmsg: pansichar); cdecl; external; function hcl_geterrbmsg(handle: pointer): pansichar; cdecl; external; + function hcl_ignite(handle: pointer; heapsize: System.SizeUint): integer; cdecl; external; function hcl_addbuiltinprims(handle: pointer): integer; cdecl; external; function hcl_beginfeed(handle: pointer; on_cnode: pointer): integer; cdecl; external; @@ -126,6 +133,7 @@ function hcl_execute(handle: pointer): pointer; cdecl; external; procedure hcl_abort(handle: pointer) cdecl; external; procedure hcl_getsynerr(handle: pointer; synerr: SynerrPtr) cdecl; external; +function hcl_syserrstrb(handle: pointer; syserr_type: integer; syserr_code: integer; buf: pansichar; len: System.SizeUint): integer; cdecl; external; function hcl_count_ucstr(ptr: pwidechar): System.SizeUint; cdecl; external; (*----- end external hcl function -----*) @@ -212,11 +220,11 @@ begin end; end; -{$if 1} function cci_handler(handle: pointer; cmd: IoCmd; arg: CciArgPtr): integer; cdecl; var f: System.THandle; len: System.LongInt; + err: System.Integer; begin (* check if the main stream is requested. * it doesn't have to be handled because the main stream must be handled via feeding *) @@ -226,9 +234,10 @@ begin case cmd of IO_OPEN: begin (* TODO: remember the parent path and load from the parent directory if necessary*) - f := SysUtils.FileOpen(arg^.name, SysUtils.fmOpenRead); - if f <= -1 then begin - // TODO: set error info.... + f := SysUtils.FileOpen(System.UTF8Encode(arg^.name), SysUtils.fmOpenRead); + if f = System.THandle(-1) then begin + err := SysUtils.GetLastOSError(); + hcl_seterrbmsg(handle, hcl_syserrstrb(handle, 0, err, nil, 0), pansichar(SysUtils.SysErrorMessage(err))); exit(-1); end; arg^.handle := pointer(f); @@ -248,8 +257,9 @@ begin IO_READ_BYTES: begin f := System.THandle(arg^.handle); len := SysUtils.FileRead(f, arg^.buf, System.SizeOf(arg^.buf)); + //len := SysUtils.FileRead(f, arg^.buf, 1); if len <= -1 then begin - // TODO: set error info + hcl_seterrbmsg(handle, hcl_syserrstrb(handle, 0, err, nil, 0), pansichar(SysUtils.SysErrorMessage(err))); exit(-1); end; arg^.xlen := len; @@ -271,65 +281,6 @@ begin exit(0); end; -{$else} -function cci_handler(handle: pointer; cmd: IoCmd; arg: CciArgPtr): integer; cdecl; -var - f: Classes.TFileStream; - len: System.LongInt; -begin - (* check if the main stream is requested. - * it doesn't have to be handled because the main stream must be handled via feeding *) - if arg^.includer = nil then exit(0); - - try - case cmd of - IO_OPEN: begin - f := Classes.TFileStream.Create(arg^.name, SysUtils.fmOpenRead); - arg^.handle := pointer(f); - end; - - IO_CLOSE: begin - f := Classes.TFileStream(arg^.handle); - f.Destroy(); - end; - - IO_READ: begin - f := Classes.TFileStream(arg^.handle); - f.ReadBuffer(arg^.buf, System.SizeOf(arg^.buf)); - if len <= -1 then begin - // TODO: set error info - exit(-1); - end; - arg^.xlen := len; - end; - - IO_FLUSH: - (* no effect on an input stream *) - ; - - (* the following operations are prohibited on the code input stream: - IO_READ_BYTES: - IO_WRITE: - IO_WRITE_BYTES: - *) - else begin - hcl_seterrnum(handle, 999); (* TODO: change error code *) - exit(-1); - end; - end; - - except - on e: Exception do - writeln ('exception:', e.Message); - else - writeln ('unknonw exception'); - - exit(-1); - end; - - exit(0); -end; -{$endif} procedure Interp.CompileFile(filename: pansichar); var