From ac38c62b0d40fced50dbc2e9a4c39247eda2bde7 Mon Sep 17 00:00:00 2001 From: hyung-hwan Date: Sat, 17 Feb 2024 09:16:44 +0900 Subject: [PATCH] wip - implementing byte cci handler --- lib/err.c | 8 ++-- lib/fmt.c | 16 +++---- lib/hcl.h | 24 +++++----- lib/read.c | 134 ++++++++++++++++++++++++++++------------------------ lib/std.c | 13 ++--- lib/utl.c | 8 ++-- pas/hcl.pas | 1 + 7 files changed, 110 insertions(+), 94 deletions(-) diff --git a/lib/err.c b/lib/err.c index f7127d8..7c07864 100644 --- a/lib/err.c +++ b/lib/err.c @@ -247,7 +247,7 @@ const hcl_bch_t* hcl_geterrbmsg (hcl_t* hcl) msg = (hcl->errmsg.len <= 0)? hcl_errnum_to_errstr(hcl->errnum): hcl->errmsg.buf; mbslen = HCL_COUNTOF(hcl->errmsg.xerrmsg); - hcl_conv_ucstr_to_bcstr_with_cmgr (msg, &wcslen, hcl->errmsg.xerrmsg, &mbslen, hcl_getcmgr(hcl)); + hcl_conv_ucstr_to_bcstr_with_cmgr (msg, &wcslen, hcl->errmsg.xerrmsg, &mbslen, HCL_CMGR(hcl)); return hcl->errmsg.xerrmsg; #endif @@ -262,7 +262,7 @@ const hcl_uch_t* hcl_geterrumsg (hcl_t* hcl) msg = (hcl->errmsg.len <= 0)? hcl_errnum_to_errstr(hcl->errnum): hcl->errmsg.buf; wcslen = HCL_COUNTOF(hcl->errmsg.xerrmsg); - hcl_conv_bcstr_to_ucstr_with_cmgr (msg, &mbslen, hcl->errmsg.xerrmsg, &wcslen, hcl_getcmgr(hcl), 1); + hcl_conv_bcstr_to_ucstr_with_cmgr (msg, &mbslen, hcl->errmsg.xerrmsg, &wcslen, HCL_CMGR(hcl), 1); return hcl->errmsg.xerrmsg; #else @@ -309,7 +309,7 @@ static int err_bcs (hcl_fmtout_t* fmtout, const hcl_bch_t* ptr, hcl_oow_t len) #if defined(HCL_OOCH_IS_UCH) if (max <= 0) return 1; - hcl_conv_bchars_to_uchars_with_cmgr (ptr, &len, &hcl->errmsg.buf[hcl->errmsg.len], &max, hcl_getcmgr(hcl), 1); + hcl_conv_bchars_to_uchars_with_cmgr (ptr, &len, &hcl->errmsg.buf[hcl->errmsg.len], &max, HCL_CMGR(hcl), 1); hcl->errmsg.len += max; #else if (len > max) len = max; @@ -337,7 +337,7 @@ static int err_ucs (hcl_fmtout_t* fmtout, const hcl_uch_t* ptr, hcl_oow_t len) hcl->errmsg.len += len; #else if (max <= 0) return 1; - hcl_conv_uchars_to_bchars_with_cmgr (ptr, &len, &hcl->errmsg.buf[hcl->errmsg.len], &max, hcl_getcmgr(hcl)); + hcl_conv_uchars_to_bchars_with_cmgr (ptr, &len, &hcl->errmsg.buf[hcl->errmsg.len], &max, HCL_CMGR(hcl)); hcl->errmsg.len += max; #endif hcl->errmsg.buf[hcl->errmsg.len] = '\0'; diff --git a/lib/fmt.c b/lib/fmt.c index f67ff8c..a3c6ff6 100644 --- a/lib/fmt.c +++ b/lib/fmt.c @@ -1639,7 +1639,7 @@ static int log_ucs (hcl_fmtout_t* fmtout, const hcl_uch_t* ptr, hcl_oow_t len) { len = rem; bcslen = HCL_COUNTOF(bcs); - hcl_conv_uchars_to_bchars_with_cmgr(ptr, &len, bcs, &bcslen, hcl_getcmgr(hcl)); + hcl_conv_uchars_to_bchars_with_cmgr(ptr, &len, bcs, &bcslen, HCL_CMGR(hcl)); log_bcs(fmtout, bcs, bcslen); rem -= len; ptr += len; @@ -1663,7 +1663,7 @@ static int log_bcs (hcl_fmtout_t* fmtout, const hcl_bch_t* ptr, hcl_oow_t len) { len = rem; ucslen = HCL_COUNTOF(ucs); - hcl_conv_bchars_to_uchars_with_cmgr(ptr, &len, ucs, &ucslen, hcl_getcmgr(hcl), 1); + hcl_conv_bchars_to_uchars_with_cmgr(ptr, &len, ucs, &ucslen, HCL_CMGR(hcl), 1); log_ucs(fmtout, ucs, ucslen); rem -= len; ptr += len; @@ -1826,7 +1826,7 @@ static int print_bcs (hcl_fmtout_t* fmtout, const hcl_bch_t* ptr, hcl_oow_t len) { bcslen = len; ucslen = HCL_COUNTOF(ucsbuf); - hcl_conv_bchars_to_uchars_with_cmgr(ptr, &bcslen, ucsbuf, &ucslen, hcl_getcmgr(hcl), 1); + hcl_conv_bchars_to_uchars_with_cmgr(ptr, &bcslen, ucsbuf, &ucslen, HCL_CMGR(hcl), 1); ucsptr = ucsbuf; while (ucslen > 0) @@ -1923,7 +1923,7 @@ static int print_ucs (hcl_fmtout_t* fmtout, const hcl_uch_t* ptr, hcl_oow_t len) { ucslen = len; bcslen = HCL_COUNTOF(bcsbuf); - hcl_conv_uchars_to_bchars_with_cmgr(ptr, &ucslen, bcsbuf, &bcslen, hcl_getcmgr(hcl)); + hcl_conv_uchars_to_bchars_with_cmgr(ptr, &ucslen, bcsbuf, &bcslen, HCL_CMGR(hcl)); bcsptr = bcsbuf; while (bcslen > 0) @@ -2026,7 +2026,7 @@ static int sprint_bcs (hcl_fmtout_t* fmtout, const hcl_bch_t* ptr, hcl_oow_t len #if defined(HCL_OOCH_IS_UCH) blen = len; - hcl_conv_bchars_to_uchars_with_cmgr (ptr, &blen, HCL_NULL, &oolen, hcl_getcmgr(hcl), 1); + hcl_conv_bchars_to_uchars_with_cmgr (ptr, &blen, HCL_NULL, &oolen, HCL_CMGR(hcl), 1); #else oolen = len; #endif @@ -2047,7 +2047,7 @@ static int sprint_bcs (hcl_fmtout_t* fmtout, const hcl_bch_t* ptr, hcl_oow_t len } #if defined(HCL_OOCH_IS_UCH) - hcl_conv_bchars_to_uchars_with_cmgr (ptr, &len, &hcl->sprintf.xbuf.ptr[hcl->sprintf.xbuf.len], &oolen, hcl_getcmgr(hcl), 1); + hcl_conv_bchars_to_uchars_with_cmgr (ptr, &len, &hcl->sprintf.xbuf.ptr[hcl->sprintf.xbuf.len], &oolen, HCL_CMGR(hcl), 1); #else HCL_MEMCPY (&hcl->sprintf.xbuf.ptr[hcl->sprintf.xbuf.len], ptr, len * HCL_SIZEOF(*ptr)); #endif @@ -2067,7 +2067,7 @@ static int sprint_ucs (hcl_fmtout_t* fmtout, const hcl_uch_t* ptr, hcl_oow_t len oolen = len; #else ulen = len; - hcl_conv_uchars_to_bchars_with_cmgr (ptr, &ulen, HCL_NULL, &oolen, hcl_getcmgr(hcl)); + hcl_conv_uchars_to_bchars_with_cmgr (ptr, &ulen, HCL_NULL, &oolen, HCL_CMGR(hcl)); #endif if (oolen > unused) @@ -2088,7 +2088,7 @@ static int sprint_ucs (hcl_fmtout_t* fmtout, const hcl_uch_t* ptr, hcl_oow_t len #if defined(HCL_OOCH_IS_UCH) HCL_MEMCPY (&hcl->sprintf.xbuf.ptr[hcl->sprintf.xbuf.len], ptr, len * HCL_SIZEOF(*ptr)); #else - hcl_conv_uchars_to_bchars_with_cmgr (ptr, &len, &hcl->sprintf.xbuf.ptr[hcl->sprintf.xbuf.len], &oolen, hcl_getcmgr(hcl)); + hcl_conv_uchars_to_bchars_with_cmgr (ptr, &len, &hcl->sprintf.xbuf.ptr[hcl->sprintf.xbuf.len], &oolen, HCL_CMGR(hcl)); #endif hcl->sprintf.xbuf.len += oolen; diff --git a/lib/hcl.h b/lib/hcl.h index d689401..2755659 100644 --- a/lib/hcl.h +++ b/lib/hcl.h @@ -1227,14 +1227,14 @@ struct hcl_io_cciarg_t void* handle; /** - * [OUT] place data here for #HCL_IO_READ + * [OUT] place data here for #HCL_IO_READ or #HCL_IO_READ_BYTES */ - hcl_ooch_t buf[2047]; /* TODO: resize this if necessary */ - hcl_ooch_t is_bytes; -#if defined(HCL_OOCH_IS_UCH) - hcl_bch_t b_int[10]; - hcl_bch_t bbuf[2037]; -#endif + int is_bytes; /* set this to non-zero if the handler fills the buffer with bytes */ + struct + { + hcl_ooch_t c[2048]; /* TODO: resize this if necessary */ + hcl_uint8_t b[2048 * HCL_SIZEOF(hcl_ooch_t)]; /* TODO: resize this if necessary */ + } buf; /** * [OUT] place the number of characters read here for #HCL_IO_READ @@ -2046,12 +2046,14 @@ static HCL_INLINE hcl_mmgr_t* hcl_getmmgr (hcl_t* hcl) { return hcl->_mmgr; } static HCL_INLINE hcl_cmgr_t* hcl_getcmgr (hcl_t* hcl) { return hcl->_cmgr; } static HCL_INLINE void hcl_setcmgr (hcl_t* hcl, hcl_cmgr_t* cmgr) { hcl->_cmgr = cmgr; } #else -# define hcl_getxtn(hcl) ((void*)((hcl_uint8_t*)hcl + ((hcl_t*)hcl)->_instsize)) -# define hcl_getmmgr(hcl) (((hcl_t*)(hcl))->_mmgr) -# define hcl_getcmgr(hcl) (((hcl_t*)(hcl))->_cmgr) -# define hcl_setcmgr(hcl,cmgr) (((hcl_t*)(hcl))->_cmgr = (cmgr)) +#define hcl_getxtn(hcl) ((void*)((hcl_uint8_t*)hcl + ((hcl_t*)hcl)->_instsize)) +#define hcl_getmmgr(hcl) (((hcl_t*)(hcl))->_mmgr) +#define hcl_getcmgr(hcl) (((hcl_t*)(hcl))->_cmgr) +#define hcl_setcmgr(hcl,cmgr) (((hcl_t*)(hcl))->_cmgr = (cmgr)) #endif +#define HCL_MMGR(hcl) (((hcl_t*)(hcl))->_mmgr) +#define HCL_CMGR(hcl) (((hcl_t*)(hcl))->_cmgr) #define HCL_ERRNUM(hcl) (((hcl_t*)(hcl))->errnum) HCL_EXPORT hcl_errnum_t hcl_geterrnum ( diff --git a/lib/read.c b/lib/read.c index 6cb86da..2346ff8 100644 --- a/lib/read.c +++ b/lib/read.c @@ -393,7 +393,7 @@ static int _get_char (hcl_t* hcl, hcl_io_cciarg_t* inp) } } - lc = inp->buf[inp->b.pos++]; + lc = inp->buf.c[inp->b.pos++]; inp->lxc.c = lc; inp->lxc.l.line = inp->line; @@ -2655,7 +2655,7 @@ static int flx_quoted_token (hcl_t* hcl, hcl_ooci_t c) /* string, character */ /* qt->tok_type + qt->is_byte assumes that the token types for * byte-string and byte-character literals are 1 greater than - * string and charcter literals. * see the definition of + * string and character literals. * see the definition of * hcl_tok_type_t in hcl-prv.h */ FEED_WRAP_UP (hcl, qt->tok_type + qt->is_byte); /* HCL_TOK_STRLIT or HCL_TOK_CHARLIT */ if (TOKEN_NAME_LEN(hcl) < qt->min_len) goto invalid_token; @@ -2914,78 +2914,89 @@ static void feed_update_lx_loc (hcl_t* hcl, hcl_ooci_t ch) } } -#if 0 -/*TODO: support the byte cci stream*/ - -static int read_cci_stream (hcl_t* hcl) -{ - int x; - hcl_io_cciarg_t* arg; - static hcl_io_cmd_t read_cmd[] = - { - HCL_IO_READ, - HCL_IO_READ_BYTES - }; - - arg = hcl->c->curinp; - - /*x = hcl->c->cci_rdr(hcl, HCL_IO_READ, hcl->c->curinp);*/ - x = hcl->c->cci_rdr(hcl, read_cmd[!!arg->is_bytes], hcl->c->curinp); - if (x <= -1) return -1; - -#if defined(HCL_OOCH_IS_UCH) - if (arg->is_bytes) - { - hcl_oow_t bcslen, ucslen, remlen; - - bcslen = arg->bytes.len; - ucslen = HCL_COUNTOF(arg->buf); - x = hcl_convbtooochars(hcl, arg->bytes.buf, &bcslen, arg->buf, &ucslen); - if (x <= -1 && ucslen <= 0) return -1; - - remlen = bb->len - bcslen; - if (remlen > 0) HCL_MEMMOVE (bb->buf, &bb->buf[bcslen], remlen); - bb->len = remlen; - - arg->xlen = ucslen; - } -#endif - - return 0; -} -#endif - static int feed_from_includee (hcl_t* hcl) { int x; + hcl_ooch_t c; + hcl_io_cciarg_t* curinp; HCL_ASSERT (hcl, hcl->c->curinp != HCL_NULL && hcl->c->curinp != &hcl->c->cci_arg); + curinp = hcl->c->curinp; do { - if (hcl->c->curinp->b.pos >= hcl->c->curinp->b.len) - { - x = hcl->c->cci_rdr(hcl, HCL_IO_READ, hcl->c->curinp); - if (x <= -1) return -1; + hcl_oow_t taken; - if (hcl->c->curinp->xlen <= 0) + #if defined(HCL_OOCH_IS_UCH) + if (curinp->is_bytes) + { + hcl_cmgr_t* cmgr; + hcl_oow_t avail, n; + + cmgr = HCL_CMGR(hcl); + if (curinp->b.pos >= curinp->b.len) { - /* got EOF from an included stream */ - feed_end_include (hcl); - continue; + x = hcl->c->cci_rdr(hcl, HCL_IO_READ_BYTES, curinp); + if (x <= -1) return -1; + + if (curinp->xlen <= 0) + { + /* got EOF from an included stream */ +/* TODO: if there is residue bytes from the current stream. error... */ + feed_end_include (hcl); + curinp = hcl->c->curinp; + continue; + } + + curinp->b.pos = 0; } - hcl->c->curinp->b.pos = 0; - hcl->c->curinp->b.len = hcl->c->curinp->xlen; + avail = curinp->b.len - curinp->b.pos; + n = cmgr->bctouc(curinp->buf.b[curinp->b.pos], avail, &c); + if (n == 0) /* invalid sequence */ + { + } + if (n > avail) + { + /* incomplete sequence */ + HCL_ASSERT (hcl, avail < HCL_MBLEN_MAX); + /* TODO: move to the internal buffer and start over */ + } + taken = n; } + else + { + #endif + if (curinp->b.pos >= curinp->b.len) + { + x = hcl->c->cci_rdr(hcl, HCL_IO_READ, curinp); + if (x <= -1) return -1; - x = feed_char(hcl, hcl->c->curinp->buf[hcl->c->curinp->b.pos]); + if (curinp->xlen <= 0) + { + /* got EOF from an included stream */ + feed_end_include (hcl); + curinp = hcl->c->curinp; + continue; + } + + curinp->b.pos = 0; + curinp->b.len = curinp->xlen; + } + + c = curinp->buf.c[curinp->b.pos]; + taken = 1; + #if defined(HCL_OOCH_IS_UCH) + } + #endif + + x = feed_char(hcl, c); if (x <= -1) return -1; if (x >= 1) { /* consumed */ - feed_update_lx_loc (hcl, hcl->c->curinp->buf[hcl->c->curinp->b.pos]); - hcl->c->curinp->b.pos += x; + feed_update_lx_loc (hcl, c); + curinp->b.pos += taken; } if (hcl->c->feed.rd.do_include_file) @@ -2996,9 +3007,10 @@ static int feed_from_includee (hcl_t* hcl) * value of feed_char() is used to advance the hcl->c->curinp->b.pos pointer. */ hcl->c->feed.rd.do_include_file = 0; /* clear this regardless of inclusion result */ if (feed_begin_include(hcl) <= -1) return -1; + curinp = hcl->c->curinp; } } - while (hcl->c->curinp != &hcl->c->cci_arg); + while (curinp != &hcl->c->cci_arg); return 0; } @@ -3118,7 +3130,7 @@ int hcl_feedbchars (hcl_t* hcl, const hcl_bch_t* data, hcl_oow_t len) inpos = 0; - if (hcl->c->feed.rsd.len > 0) + if (hcl->c->feed.rsd.len > 0) /* residue length greater than 0 */ { hcl_oow_t rsdlen; @@ -3131,7 +3143,7 @@ int hcl_feedbchars (hcl_t* hcl, const hcl_bch_t* data, hcl_oow_t len) inlen = hcl->c->feed.rsd.len; outlen = 1; /* ensure that it can only convert 1 character */ - n = hcl_conv_bchars_to_uchars_with_cmgr(hcl->c->feed.rsd.buf, &inlen, outbuf, &outlen, hcl_getcmgr(hcl), 0); + n = hcl_conv_bchars_to_uchars_with_cmgr(hcl->c->feed.rsd.buf, &inlen, outbuf, &outlen, HCL_CMGR(hcl), 0); if (outlen > 0) { @@ -3184,9 +3196,9 @@ int hcl_feedbchars (hcl_t* hcl, const hcl_bch_t* data, hcl_oow_t len) inlen = len; outlen = HCL_COUNTOF(outbuf); - /* hcl_convbtouchars() does not differentiate between illegal charcter and incomplete sequence. + /* hcl_convbtouchars() does not differentiate between illegal character and incomplete sequence. * use a lower-level function that hcl_convbtouchars() uses */ - n = hcl_conv_bchars_to_uchars_with_cmgr(&data[inpos], &inlen, outbuf, &outlen, hcl_getcmgr(hcl), 0); + n = hcl_conv_bchars_to_uchars_with_cmgr(&data[inpos], &inlen, outbuf, &outlen, HCL_CMGR(hcl), 0); if (outlen > 0 && hcl_feed(hcl, outbuf, outlen) <= -1) return -1; if (n <= -1) diff --git a/lib/std.c b/lib/std.c index d90be35..d505e32 100644 --- a/lib/std.c +++ b/lib/std.c @@ -916,7 +916,8 @@ static hcl_errnum_t _syserrstrb (hcl_t* hcl, int syserr_type, int syserr_code, h * x > 1024 in case the XSI version before glibc 2.13 returns * a positive error code upon failure */ x = (hcl_oow_t)strerror_r(syserr_code, buf, len); - if (buf[0] == '\0' && x != 0 && x > 1024 && x != (hcl_oow_t)-1) hcl_copy_bcstr (buf, len, x); + if (x != (hcl_oow_t)buf && buf[0] == '\0' && x != 0 && x > 1024 && x != (hcl_oow_t)-1) + hcl_copy_bcstr (buf, len, (void*)x); } #else /* this may be thread unsafe */ @@ -3388,7 +3389,7 @@ static HCL_INLINE int close_cci_stream (hcl_t* hcl, hcl_io_cciarg_t* arg) if (!arg->includer && arg->name) { /* main stream closing */ - hcl_freemem (hcl, arg->name); + hcl_freemem (hcl, (hcl_ooch_t*)arg->name); arg->name = HCL_NULL; } /* END HACK */ @@ -3437,8 +3438,8 @@ static HCL_INLINE int read_cci_stream (hcl_t* hcl, hcl_io_cciarg_t* arg) #if defined(HCL_OOCH_IS_UCH) bcslen = bb->len; - ucslen = HCL_COUNTOF(arg->buf); - x = hcl_convbtooochars(hcl, bb->buf, &bcslen, arg->buf, &ucslen); + ucslen = HCL_COUNTOF(arg->buf.c); + x = hcl_convbtooochars(hcl, bb->buf, &bcslen, arg->buf.c, &ucslen); if (x <= -1 && ucslen <= 0) return -1; /* if ucslen is greater than 0, i assume that some characters have been * converted properly. as the loop above reads an entire line if not too @@ -3446,9 +3447,9 @@ static HCL_INLINE int read_cci_stream (hcl_t* hcl, hcl_io_cciarg_t* arg) * successful conversion of at least 1 ooch character. so no explicit * check for the incomplete sequence error is required */ #else - bcslen = (bb->len < HCL_COUNTOF(arg->buf))? bb->len: HCL_COUNTOF(arg->buf); + bcslen = (bb->len < HCL_COUNTOF(arg->buf.c))? bb->len: HCL_COUNTOF(arg->buf.c); ucslen = bcslen; - hcl_copy_bchars (arg->buf, bb->buf, bcslen); + hcl_copy_bchars (arg->buf.c, bb->buf, bcslen); #endif remlen = bb->len - bcslen; diff --git a/lib/utl.c b/lib/utl.c index 39a6942..3060512 100644 --- a/lib/utl.c +++ b/lib/utl.c @@ -994,7 +994,7 @@ int hcl_convbtouchars (hcl_t* hcl, const hcl_bch_t* bcs, hcl_oow_t* bcslen, hcl_ /* length bound */ int n; - n = hcl_conv_bchars_to_uchars_with_cmgr(bcs, bcslen, ucs, ucslen, hcl_getcmgr(hcl), 0); + n = hcl_conv_bchars_to_uchars_with_cmgr(bcs, bcslen, ucs, ucslen, HCL_CMGR(hcl), 0); if (n <= -1) { @@ -1010,7 +1010,7 @@ int hcl_convutobchars (hcl_t* hcl, const hcl_uch_t* ucs, hcl_oow_t* ucslen, hcl_ /* length bound */ int n; - n = hcl_conv_uchars_to_bchars_with_cmgr(ucs, ucslen, bcs, bcslen, hcl_getcmgr(hcl)); + n = hcl_conv_uchars_to_bchars_with_cmgr(ucs, ucslen, bcs, bcslen, HCL_CMGR(hcl)); if (n <= -1) { @@ -1025,7 +1025,7 @@ int hcl_convbtoucstr (hcl_t* hcl, const hcl_bch_t* bcs, hcl_oow_t* bcslen, hcl_u /* null-terminated. */ int n; - n = hcl_conv_bcstr_to_ucstr_with_cmgr(bcs, bcslen, ucs, ucslen, hcl_getcmgr(hcl), 0); + n = hcl_conv_bcstr_to_ucstr_with_cmgr(bcs, bcslen, ucs, ucslen, HCL_CMGR(hcl), 0); if (n <= -1) { @@ -1040,7 +1040,7 @@ int hcl_convutobcstr (hcl_t* hcl, const hcl_uch_t* ucs, hcl_oow_t* ucslen, hcl_b /* null-terminated */ int n; - n = hcl_conv_ucstr_to_bcstr_with_cmgr(ucs, ucslen, bcs, bcslen, hcl_getcmgr(hcl)); + n = hcl_conv_ucstr_to_bcstr_with_cmgr(ucs, ucslen, bcs, bcslen, HCL_CMGR(hcl)); if (n <= -1) { diff --git a/pas/hcl.pas b/pas/hcl.pas index 3002d98..09b131e 100644 --- a/pas/hcl.pas +++ b/pas/hcl.pas @@ -44,6 +44,7 @@ type CciArg = record (* this record must follow the public part of hcl_io_cciarg_t in hcl.h *) name: pwidechar; handle: pointer; + is_bytes: integer; buf: array[0..2047] of widechar; xlen: System.SizeUint; includer: CciArgPtr;