more input stream handling code
All checks were successful
continuous-integration/drone/push Build is passing

This commit is contained in:
hyung-hwan 2024-05-19 17:05:51 +09:00
parent 0bf106532d
commit 042ecc73d4
6 changed files with 235 additions and 57 deletions

View File

@ -187,7 +187,7 @@ func hcl_go_cci_handler(c *C.hcl_t, cmd C.hcl_io_cmd_t, arg unsafe.Pointer) C.in
return -1 return -1
} }
ioarg.is_bytes = 0 ioarg.byte_oriented = 0
if unsafe.Sizeof(buf[0]) == unsafe.Sizeof(dummy) { if unsafe.Sizeof(buf[0]) == unsafe.Sizeof(dummy) {
C.memcpy( C.memcpy(
unsafe.Pointer(&ioarg.buf[0]), unsafe.Pointer(&ioarg.buf[0]),

View File

@ -1268,7 +1268,7 @@ struct hcl_io_cciarg_t
* the caller issues HCL_IO_READ_BYTES if it's set to non-zero, expecting bytes. * the caller issues HCL_IO_READ_BYTES if it's set to non-zero, expecting bytes.
* otherwise it issues HCL_IO_READ expecting characters. * otherwise it issues HCL_IO_READ expecting characters.
*/ */
int is_bytes; int byte_oriented;
/** /**
* [OUT] place data here for #HCL_IO_READ or #HCL_IO_READ_BYTES * [OUT] place data here for #HCL_IO_READ or #HCL_IO_READ_BYTES
@ -1326,7 +1326,7 @@ struct hcl_io_udiarg_t
/** /**
* [OUT] indicates if HCL_IO_READ_BYTES is implemented * [OUT] indicates if HCL_IO_READ_BYTES is implemented
*/ */
int is_bytes; int byte_oriented;
/** /**
* [OUT] place data in c for #HCL_IO_READ and in d for #HCL_IO_READ_BYTES * [OUT] place data in c for #HCL_IO_READ and in d for #HCL_IO_READ_BYTES
@ -1342,10 +1342,20 @@ struct hcl_io_udiarg_t
*/ */
hcl_oow_t xlen; hcl_oow_t xlen;
/**
* Internal use only. Don't touch these. /*-----------------------------------------------------------------*/
*/ /*----------- from here down, internal use only -------------------*/
hcl_oow_t pos; struct
{
hcl_oow_t pos;
hcl_oow_t len;
} b; /* buffer(buf.c or buf.b) usage status */
struct
{
hcl_uint8_t buf[HCL_MBLEN_MAX];
hcl_oow_t len;
} rsd; /* residue bytes for HCL_IO_READ_BYTES */
}; };
typedef struct hcl_io_udoarg_t hcl_io_udoarg_t; typedef struct hcl_io_udoarg_t hcl_io_udoarg_t;

View File

@ -215,42 +215,182 @@ static hcl_pfrc_t pf_sprintf (hcl_t* hcl, hcl_mod_t* mod, hcl_ooi_t nargs)
/* ------------------------------------------------------------------------- */ /* ------------------------------------------------------------------------- */
static hcl_oow_t move_udi_residue_bytes (hcl_io_udiarg_t* curinp)
{
hcl_oow_t cpl;
cpl = HCL_COUNTOF(curinp->rsd.buf) - curinp->rsd.len;
if (cpl > 0)
{
hcl_oow_t avail;
avail = curinp->b.len - curinp->b.pos; /* available in the read buffer */
if (cpl > avail) cpl = avail;
HCL_MEMCPY(&curinp->rsd.buf[curinp->rsd.len], &curinp->buf.b[curinp->b.pos], cpl);
curinp->rsd.len += cpl;
curinp->b.pos += cpl; /* advance the position because the bytes moved to the residue buffer */
}
return curinp->rsd.len;
}
static int get_udi_char (hcl_t* hcl, hcl_ooch_t* ch) static int get_udi_char (hcl_t* hcl, hcl_ooch_t* ch)
{ {
if (hcl->io.udi_arg.pos >= hcl->io.udi_arg.xlen) hcl_io_udiarg_t* curinp;
{ hcl_ooch_t c;
hcl->io.udi_arg.pos = 0; hcl_oow_t taken;
hcl->io.udi_arg.xlen = 0; int x;
if (hcl->io.udi_rdr(hcl, HCL_IO_READ, &hcl->io.udi_arg) <= -1) return -1;
if (hcl->io.udi_arg.xlen <= 0) return 0; /* EOF */
hcl->io.udi_arg.is_bytes = 0;
}
*ch = hcl->io.udi_arg.buf.c[hcl->io.udi_arg.pos++]; curinp = &hcl->io.udi_arg;
#if defined(HCL_OOCH_IS_UCH)
if (curinp->byte_oriented)
{
hcl_cmgr_t* cmgr;
const hcl_uint8_t* inpptr;
hcl_oow_t inplen, n;
cmgr = HCL_CMGR(hcl);
start_over:
if (curinp->b.pos >= curinp->b.len)
{
x = hcl->io.udi_rdr(hcl, HCL_IO_READ_BYTES, curinp);
if (x <= -1)
{
const hcl_ooch_t* orgmsg = hcl_backuperrmsg(hcl);
hcl_seterrbfmt (hcl, HCL_ERRNUM(hcl), "unable to read bytes from input stream - %js", orgmsg);
return -1;
}
if (curinp->xlen <= 0)
{
/* got EOF from an included stream */
if (curinp->rsd.len > 0)
{
hcl_seterrbfmt (hcl, HCL_EECERR, "incomplete byte sequence in input stream");
return -1;
}
return 0;
}
curinp->b.pos = 0;
curinp->b.len = curinp->xlen;
}
if (curinp->rsd.len > 0)
{
/* there is data in the residue buffer. use the residue buffer to
* locate a proper multi-byte sequence */
HCL_ASSERT (hcl, curinp->b.pos == 0);
inplen = move_udi_residue_bytes(curinp);
inpptr = &curinp->rsd.buf[0];
}
else
{
inplen = curinp->b.len - curinp->b.pos;
inpptr = &curinp->buf.b[curinp->b.pos];
}
n = cmgr->bctouc((const hcl_bch_t*)inpptr, inplen, &c);
if (n == 0) /* invalid sequence */
{
/* TODO: more accurate location of the invalid byte sequence */
hcl_seterrbfmt (hcl, HCL_EECERR, "invalid byte sequence in input stream");
return -1;
}
if (n > inplen) /* incomplete sequence */
{
HCL_ASSERT (hcl, curinp->rsd.len < HCL_COUNTOF(curinp->rsd.buf));
move_udi_residue_bytes (curinp);
goto start_over;
}
if (curinp->rsd.len > 0)
{
/* move_cci_residue_bytes() advanced curinp->b.pos without checking
* the needed number of bytes to form a character. it must backoff by
* the number of excessive bytes moved to the residue buffer */
curinp->b.pos -= curinp->rsd.len - n;
taken = 0; /* treat it as if no bytes are taken in this case */
}
else
{
taken = n;
}
}
else
{
#endif
if (curinp->b.pos >= curinp->b.len)
{
x = hcl->io.udi_rdr(hcl, HCL_IO_READ, curinp);
if (x <= -1)
{
/* TODO: more accurate location of failure */
const hcl_ooch_t* orgmsg = hcl_backuperrmsg(hcl);
hcl_seterrbfmt (hcl, HCL_ERRNUM(hcl), "unable to read input stream - %js", orgmsg);
return -1;
}
if (curinp->xlen <= 0)
{
/* got EOF from an included stream */
return 0;
}
curinp->b.pos = 0;
curinp->b.len = curinp->xlen;
}
c = curinp->buf.c[curinp->b.pos];
taken = 1;
#if defined(HCL_OOCH_IS_UCH)
}
#endif
curinp->b.pos += taken;
#if defined(HCL_OOCH_IS_UCH)
curinp->rsd.len = 0; /* clear up the residue byte buffer. needed for byte reading only */
#endif
*ch = c;
return 1; return 1;
} }
static int get_udi_byte (hcl_t* hcl, hcl_uint8_t* bt) static int get_udi_byte (hcl_t* hcl, hcl_uint8_t* bt)
{ {
hcl_io_udiarg_t* curinp;
int x;
#if defined(HCL_OOCH_IS_UCH) #if defined(HCL_OOCH_IS_UCH)
if (!hcl->io.udi_arg.is_bytes) if (!hcl->io.udi_arg.byte_oriented)
{ {
hcl_seterrbfmt (hcl, HCL_EPERM, "prohibited byte-oriented input"); /* TODO: convert characters to bytes? but do we know the original encoding? */
hcl_seterrbfmt (hcl, HCL_EPERM, "byte-oriented input prohibited on character-oriented stream");
return -1; return -1;
} }
#endif #endif
if (hcl->io.udi_arg.pos >= hcl->io.udi_arg.xlen) curinp = &hcl->io.udi_arg;
if (curinp->b.pos >= curinp->b.len)
{ {
hcl->io.udi_arg.pos = 0; x = hcl->io.udi_rdr(hcl, HCL_IO_READ_BYTES, curinp);
hcl->io.udi_arg.xlen = 0; if (x <= -1)
if (hcl->io.udi_rdr(hcl, HCL_IO_READ_BYTES, &hcl->io.udi_arg) <= -1) return -1; {
if (hcl->io.udi_arg.xlen <= 0) return 0; /* EOF */ const hcl_ooch_t* orgmsg = hcl_backuperrmsg(hcl);
hcl->io.udi_arg.is_bytes = 1; hcl_seterrbfmt (hcl, HCL_ERRNUM(hcl), "unable to read input stream - %js", orgmsg);
return -1;
}
if (curinp->xlen <= 0)
{
/* got EOF from an included stream */
return 0;
}
curinp->b.pos = 0;
curinp->b.len = curinp->xlen;
} }
*bt = hcl->io.udi_arg.buf.b[hcl->io.udi_arg.pos++]; *bt = curinp->buf.b[curinp->b.pos++];
return 1; return 1;
} }
@ -990,6 +1130,9 @@ static hcl_pfrc_t pf_object_new (hcl_t* hcl, hcl_mod_t* mod, hcl_ooi_t nargs)
static pf_t builtin_prims[] = static pf_t builtin_prims[] =
{ {
/* TODO: move these primitives to modules... */
{ 0, 0, pf_getbyte, 7, { 'g','e','t','b','y','t','e' } },
{ 0, 0, pf_getch, 5, { 'g','e','t','c','h' } }, { 0, 0, pf_getch, 5, { 'g','e','t','c','h' } },
{ 0, HCL_TYPE_MAX(hcl_oow_t), pf_log, 3, { 'l','o','g' } }, { 0, HCL_TYPE_MAX(hcl_oow_t), pf_log, 3, { 'l','o','g' } },
{ 1, HCL_TYPE_MAX(hcl_oow_t), pf_logf, 4, { 'l','o','g','f' } }, { 1, HCL_TYPE_MAX(hcl_oow_t), pf_logf, 4, { 'l','o','g','f' } },

View File

@ -1109,7 +1109,7 @@ static int feed_begin_include (hcl_t* hcl)
arg->line = 1; arg->line = 1;
arg->colm = 1; arg->colm = 1;
/*arg->nl = '\0';*/ /*arg->nl = '\0';*/
/*arg->is_bytes = 0;*/ /*arg->byte_oriented = 0;*/
arg->includer = hcl->c->curinp; arg->includer = hcl->c->curinp;
if (hcl->c->cci_rdr(hcl, HCL_IO_OPEN, arg) <= -1) if (hcl->c->cci_rdr(hcl, HCL_IO_OPEN, arg) <= -1)
@ -3087,7 +3087,7 @@ static int feed_from_includee (hcl_t* hcl)
hcl_oow_t taken; hcl_oow_t taken;
#if defined(HCL_OOCH_IS_UCH) #if defined(HCL_OOCH_IS_UCH)
if (curinp->is_bytes) if (curinp->byte_oriented)
{ {
hcl_cmgr_t* cmgr; hcl_cmgr_t* cmgr;
const hcl_uint8_t* inpptr; const hcl_uint8_t* inpptr;

View File

@ -3534,6 +3534,7 @@ static HCL_INLINE int open_udi_stream (hcl_t* hcl, hcl_io_udiarg_t* arg)
goto oops; goto oops;
} }
arg->byte_oriented = 1;
arg->handle = bb; arg->handle = bb;
return 0; return 0;
@ -3567,36 +3568,56 @@ static HCL_INLINE int read_udi_stream (hcl_t* hcl, hcl_io_udiarg_t* arg)
bb_t* bb; bb_t* bb;
hcl_oow_t bcslen, ucslen, remlen; hcl_oow_t bcslen, ucslen, remlen;
int x; int x;
#if defined(HCL_OOCH_IS_UCH)
int fetched = 0;
#endif
bb = (bb_t*)arg->handle; bb = (bb_t*)arg->handle;
HCL_ASSERT (hcl, bb != HCL_NULL && bb->fp != HCL_NULL); HCL_ASSERT (hcl, bb != HCL_NULL && bb->fp != HCL_NULL);
do
{
x = fgetc(bb->fp);
if (x == EOF)
{
if (ferror((FILE*)bb->fp))
{
hcl_seterrbfmtwithsyserr (hcl, 0, errno, "unable to read udi stream");
return -1;
}
break;
}
bb->buf[bb->len++] = x; if (bb->len > 0)
{
#if defined(HCL_OOCH_IS_UCH)
real_fetch:
fetched = 1;
#endif
do
{
x = fgetc(bb->fp);
if (x == EOF)
{
if (ferror((FILE*)bb->fp))
{
hcl_seterrbfmtwithsyserr (hcl, 0, errno, "unable to read udi stream");
return -1;
}
break;
}
bb->buf[bb->len++] = x;
}
while (bb->len < HCL_COUNTOF(bb->buf) && x != '\r' && x != '\n');
} }
while (bb->len < HCL_COUNTOF(bb->buf) && x != '\r' && x != '\n');
#if defined(HCL_OOCH_IS_UCH) #if defined(HCL_OOCH_IS_UCH)
bcslen = bb->len; bcslen = bb->len;
ucslen = HCL_COUNTOF(arg->buf.c); ucslen = HCL_COUNTOF(arg->buf.c);
x = hcl_convbtooochars(hcl, bb->buf, &bcslen, arg->buf.c, &ucslen); x = hcl_convbtooochars(hcl, bb->buf, &bcslen, arg->buf.c, &ucslen);
#if 0
if (x <= -1 && ucslen <= 0) return -1; if (x <= -1 && ucslen <= 0) return -1;
/* if ucslen is greater than 0, i assume that some characters have been /* if ucslen is greater than 0, i assume that some characters have been
* converted properly. as the loop above reads an entire line if not too * converted properly. as the loop above reads an entire line if not too
* large, the incomplete sequence error (x == -3) must happen after * large, the incomplete sequence error (x == -3) must happen after
* successful conversion of at least 1 ooch character. so no explicit * successful conversion of at least 1 ooch character. so no explicit
* check for the incomplete sequence error is required */ * check for the incomplete sequence error is required */
#else
if (x <= -1) return -1;
if (ucslen < 0)
{
if (!fetched) goto real_fetch;
return -1;
}
#endif
#else #else
bcslen = (bb->len < HCL_COUNTOF(arg->buf.c))? bb->len: HCL_COUNTOF(arg->buf.c); bcslen = (bb->len < HCL_COUNTOF(arg->buf.c))? bb->len: HCL_COUNTOF(arg->buf.c);
ucslen = bcslen; ucslen = bcslen;
@ -3620,22 +3641,26 @@ static HCL_INLINE int read_udi_stream_bytes (hcl_t* hcl, hcl_io_udiarg_t* arg)
bb = (bb_t*)arg->handle; bb = (bb_t*)arg->handle;
HCL_ASSERT (hcl, bb != HCL_NULL && bb->fp != HCL_NULL); HCL_ASSERT (hcl, bb != HCL_NULL && bb->fp != HCL_NULL);
do
{
x = fgetc(bb->fp);
if (x == EOF)
{
if (ferror((FILE*)bb->fp))
{
hcl_seterrbfmtwithsyserr (hcl, 0, errno, "unable to read udi stream");
return -1;
}
break;
}
bb->buf[bb->len++] = x; if (bb->len <= 0)
{
do
{
x = fgetc(bb->fp);
if (x == EOF)
{
if (ferror((FILE*)bb->fp))
{
hcl_seterrbfmtwithsyserr (hcl, 0, errno, "unable to read udi stream");
return -1;
}
break;
}
bb->buf[bb->len++] = x;
}
while (bb->len < HCL_COUNTOF(bb->buf) && x != '\r' && x != '\n');
} }
while (bb->len < HCL_COUNTOF(bb->buf) && x != '\r' && x != '\n');
bcslen = (bb->len < HCL_COUNTOF(arg->buf.b))? bb->len: HCL_COUNTOF(arg->buf.b); bcslen = (bb->len < HCL_COUNTOF(arg->buf.b))? bb->len: HCL_COUNTOF(arg->buf.b);
ucslen = bcslen; ucslen = bcslen;

View File

@ -50,7 +50,7 @@ type
CciArg = record (* this record must follow the public part of hcl_io_cciarg_t in hcl.h *) CciArg = record (* this record must follow the public part of hcl_io_cciarg_t in hcl.h *)
name: pwidechar; name: pwidechar;
handle: pointer; handle: pointer;
is_bytes: integer; byte_oriented: integer;
buf: array[0..(HCL_CCI_BUF_LEN - 1)] of widechar; buf: array[0..(HCL_CCI_BUF_LEN - 1)] of widechar;
xlen: System.SizeUint; xlen: System.SizeUint;
includer: CciArgPtr; includer: CciArgPtr;
@ -296,7 +296,7 @@ begin
nf^.name := name; nf^.name := name;
arg^.handle := pointer(nf); arg^.handle := pointer(nf);
arg^.is_bytes := 1; arg^.byte_oriented := 1;
end; end;
IO_CLOSE: begin IO_CLOSE: begin