added a new global variable BRS for byte reading.

wrote some more functions to support byte reading
This commit is contained in:
hyung-hwan 2020-01-14 14:55:34 +00:00
parent 32344f2c0c
commit b7fecc3172
6 changed files with 117 additions and 50 deletions

View File

@ -410,6 +410,7 @@ struct hawk_rtx_t
struct struct
{ {
void* brs[2];
void* rs[2]; void* rs[2];
void* fs[2]; void* fs[2];
int ignorecase; int ignorecase;

View File

@ -337,7 +337,7 @@ static hawk_rbt_walk_t unload_module (hawk_rbt_t* rbt, hawk_rbt_pair_t* pair, vo
hawk_t* awk = (hawk_t*)ctx; hawk_t* awk = (hawk_t*)ctx;
hawk_mod_data_t* md; hawk_mod_data_t* md;
md = HAWK_RBT_VPTR(pair); md = HAWK_RBT_VPTR(pair);
if (md->mod.unload) md->mod.unload (&md->mod, awk); if (md->mod.unload) md->mod.unload (&md->mod, awk);
if (md->handle) awk->prm.modclose (awk, md->handle); if (md->handle) awk->prm.modclose (awk, md->handle);

View File

@ -1288,6 +1288,7 @@ typedef const hawk_ooch_t* (*hawk_errstr_t) (
enum hawk_gbl_id_t enum hawk_gbl_id_t
{ {
/* this table should match gtab in parse.c. /* this table should match gtab in parse.c.
*
* in addition, hawk_rtx_setgbl also counts * in addition, hawk_rtx_setgbl also counts
* on the order of these values. * on the order of these values.
* *
@ -1298,6 +1299,7 @@ enum hawk_gbl_id_t
* but is this check really necessary??? * but is this check really necessary???
*/ */
HAWK_GBL_BRS,
HAWK_GBL_CONVFMT, HAWK_GBL_CONVFMT,
HAWK_GBL_FILENAME, HAWK_GBL_FILENAME,
HAWK_GBL_FNR, HAWK_GBL_FNR,
@ -1318,7 +1320,7 @@ enum hawk_gbl_id_t
/* these are not not the actual IDs and are used internally only /* these are not not the actual IDs and are used internally only
* Make sure you update these values properly if you add more * Make sure you update these values properly if you add more
* ID definitions, however */ * ID definitions, however */
HAWK_MIN_GBL_ID = HAWK_GBL_CONVFMT, HAWK_MIN_GBL_ID = HAWK_GBL_BRS,
HAWK_MAX_GBL_ID = HAWK_GBL_SUBSEP HAWK_MAX_GBL_ID = HAWK_GBL_SUBSEP
}; };
typedef enum hawk_gbl_id_t hawk_gbl_id_t; typedef enum hawk_gbl_id_t hawk_gbl_id_t;

View File

@ -324,6 +324,8 @@ static global_t gtab[] =
* this table must match the order of the hawk_gbl_id_t enumerators * this table must match the order of the hawk_gbl_id_t enumerators
*/ */
{ HAWK_T("BRS"), 3, 0 },
/* output real-to-str conversion format for other cases than 'print' */ /* output real-to-str conversion format for other cases than 'print' */
{ HAWK_T("CONVFMT"), 7, 0 }, { HAWK_T("CONVFMT"), 7, 0 },
@ -360,6 +362,7 @@ static global_t gtab[] =
{ HAWK_T("ORS"), 3, HAWK_RIO }, { HAWK_T("ORS"), 3, HAWK_RIO },
{ HAWK_T("RLENGTH"), 7, 0 }, { HAWK_T("RLENGTH"), 7, 0 },
{ HAWK_T("RS"), 2, 0 }, { HAWK_T("RS"), 2, 0 },
{ HAWK_T("RSTART"), 6, 0 }, { HAWK_T("RSTART"), 6, 0 },

View File

@ -196,7 +196,7 @@ static HAWK_INLINE int resolve_rs (hawk_rtx_t* rtx, hawk_val_t* rs, hawk_oocs_t*
hawk_val_type_t rs_vtype; hawk_val_type_t rs_vtype;
rs_vtype = HAWK_RTX_GETVALTYPE (rtx, rs); rs_vtype = HAWK_RTX_GETVALTYPE(rtx, rs);
switch (rs_vtype) switch (rs_vtype)
{ {
@ -219,6 +219,36 @@ static HAWK_INLINE int resolve_rs (hawk_rtx_t* rtx, hawk_val_t* rs, hawk_oocs_t*
return ret; return ret;
} }
static HAWK_INLINE int resolve_brs (hawk_rtx_t* rtx, hawk_val_t* brs, hawk_bcs_t* rrs)
{
/* record separator for bytes reading */
int ret = 0;
hawk_val_type_t brs_vtype;
brs_vtype = HAWK_RTX_GETVALTYPE(rtx, brs);
switch (brs_vtype)
{
case HAWK_VAL_NIL:
rrs->ptr = HAWK_NULL;
rrs->len = 0;
break;
case HAWK_VAL_MBS:
rrs->ptr = ((hawk_val_mbs_t*)brs)->val.ptr;
rrs->len = ((hawk_val_mbs_t*)brs)->val.len;
break;
default:
rrs->ptr = hawk_rtx_valtobcstrdup(rtx, brs, &rrs->len);
if (rrs->ptr == HAWK_NULL) ret = -1;
break;
}
return ret;
}
static HAWK_INLINE int match_long_rs (hawk_rtx_t* rtx, hawk_ooecs_t* buf, hawk_rio_arg_t* p) static HAWK_INLINE int match_long_rs (hawk_rtx_t* rtx, hawk_ooecs_t* buf, hawk_rio_arg_t* p)
{ {
hawk_oocs_t match; hawk_oocs_t match;
@ -276,18 +306,16 @@ static HAWK_INLINE int match_long_rs (hawk_rtx_t* rtx, hawk_ooecs_t* buf, hawk_r
return ret; return ret;
} }
static HAWK_INLINE int match_long_brs(hawk_rtx_t* rtx, hawk_becs_t* buf, hawk_rio_arg_t* p)
#if 0
static HAWK_INLINE int match_long_rs_bytes (hawk_rtx_t* rtx, hawk_becs_t* buf, hawk_rio_arg_t* p)
{ {
hawk_oocs_t match; hawk_bcs_t match;
int ret; int ret;
HAWK_ASSERT (rtx->gbl.rs[0] != HAWK_NULL); HAWK_ASSERT (rtx->gbl.brs[0] != HAWK_NULL);
HAWK_ASSERT (rtx->gbl.rs[1] != HAWK_NULL); HAWK_ASSERT (rtx->gbl.brs[1] != HAWK_NULL);
ret = hawk_rtx_matchrex(rtx, rtx->gbl.rs[rtx->gbl.ignorecase], HAWK_BECS_OOCS(buf), HAWK_BECS_OOCS(buf), &match, HAWK_NULL); /*TODO: mbs match rex */
ret = hawk_rtx_matchrex(rtx, rtx->gbl.brs[rtx->gbl.ignorecase], HAWK_BECS_OOCS(buf), HAWK_BECS_OOCS(buf), &match, HAWK_NULL);
if (ret >= 1) if (ret >= 1)
{ {
if (p->in.eof) if (p->in.eof)
@ -314,14 +342,14 @@ static HAWK_INLINE int match_long_rs_bytes (hawk_rtx_t* rtx, hawk_becs_t* buf, h
* of the buffer is not indeterministic as we don't have the * of the buffer is not indeterministic as we don't have the
* full input yet. * full input yet.
*/ */
const hawk_ooch_t* be = HAWK_BECS_PTR(buf) + HAWK_BECS_LEN(buf); const hawk_bch_t* be = HAWK_BECS_PTR(buf) + HAWK_BECS_LEN(buf);
const hawk_ooch_t* me = match.ptr + match.len; const hawk_bch_t* me = match.ptr + match.len;
if (me < be) if (me < be)
{ {
/* the match ends before the ending boundary. /* the match ends before the ending boundary.
* it must be the longest match. drop the RS part * it must be the longest match. drop the BRS part
* and the characters after RS. */ * and the characters after BRS. */
HAWK_BECS_LEN(buf) -= match.len + (be - me); HAWK_BECS_LEN(buf) -= match.len + (be - me);
p->in.pos -= (be - me); p->in.pos -= (be - me);
} }
@ -336,8 +364,6 @@ static HAWK_INLINE int match_long_rs_bytes (hawk_rtx_t* rtx, hawk_becs_t* buf, h
return ret; return ret;
} }
#endif
int hawk_rtx_readio (hawk_rtx_t* rtx, int in_type, const hawk_ooch_t* name, hawk_ooecs_t* buf) int hawk_rtx_readio (hawk_rtx_t* rtx, int in_type, const hawk_ooch_t* name, hawk_ooecs_t* buf)
{ {
hawk_rio_arg_t* p; hawk_rio_arg_t* p;
@ -650,7 +676,7 @@ int hawk_rtx_readio (hawk_rtx_t* rtx, int in_type, const hawk_ooch_t* name, hawk
} }
} }
if (rrs.ptr && HAWK_RTX_GETVALTYPE (rtx, rs) != HAWK_VAL_STR) hawk_rtx_freemem (rtx, rrs.ptr); if (rrs.ptr && HAWK_RTX_GETVALTYPE(rtx, rs) != HAWK_VAL_STR) hawk_rtx_freemem (rtx, rrs.ptr);
hawk_rtx_refdownval (rtx, rs); hawk_rtx_refdownval (rtx, rs);
return ret; return ret;
@ -664,8 +690,8 @@ int hawk_rtx_readiobytes (hawk_rtx_t* rtx, int in_type, const hawk_ooch_t* name,
hawk_rio_impl_t handler; hawk_rio_impl_t handler;
int ret; int ret;
hawk_val_t* rs; hawk_val_t* brs;
hawk_oocs_t rrs; hawk_bcs_t rrs;
hawk_oow_t line_len = 0; hawk_oow_t line_len = 0;
hawk_bch_t c = '\0', pc; hawk_bch_t c = '\0', pc;
@ -677,12 +703,12 @@ int hawk_rtx_readiobytes (hawk_rtx_t* rtx, int in_type, const hawk_ooch_t* name,
hawk_becs_clear (buf); hawk_becs_clear (buf);
/* get the record separator */ /* get the record separator */
rs = hawk_rtx_getgbl(rtx, HAWK_GBL_RS); brs = hawk_rtx_getgbl(rtx, HAWK_GBL_BRS);
hawk_rtx_refupval (rtx, rs); hawk_rtx_refupval (rtx, brs);
if (resolve_rs(rtx, rs, &rrs) <= -1) if (resolve_brs(rtx, brs, &rrs) <= -1)
{ {
hawk_rtx_refdownval (rtx, rs); hawk_rtx_refdownval (rtx, brs);
return -1; return -1;
} }
@ -740,7 +766,7 @@ int hawk_rtx_readiobytes (hawk_rtx_t* rtx, int in_type, const hawk_ooch_t* name,
/* TODO: handle different line terminator */ /* TODO: handle different line terminator */
/* drop the line terminator from the record /* drop the line terminator from the record
* if RS is a blank line and EOF is reached. */ * if RS is a blank line and EOF is reached. */
if (HAWK_BECS_LASTCHAR(buf) == HAWK_T'\n') if (HAWK_BECS_LASTCHAR(buf) == '\n')
{ {
HAWK_BECS_LEN(buf) -= 1; HAWK_BECS_LEN(buf) -= 1;
if (rtx->awk->opt.trait & HAWK_CRLF) if (rtx->awk->opt.trait & HAWK_CRLF)
@ -760,7 +786,7 @@ int hawk_rtx_readiobytes (hawk_rtx_t* rtx, int in_type, const hawk_ooch_t* name,
* At EOF, the match at the end is considered * At EOF, the match at the end is considered
* the longest as there are no more characters * the longest as there are no more characters
* left */ * left */
int n = match_long_rs_bytes(rtx, buf, p); int n = match_long_brs(rtx, buf, p);
if (n != 0) if (n != 0)
{ {
if (n <= -1) ret = -1; if (n <= -1) ret = -1;
@ -970,7 +996,7 @@ int hawk_rtx_readiobytes (hawk_rtx_t* rtx, int in_type, const hawk_ooch_t* name,
} }
} }
if (rrs.ptr && HAWK_RTX_GETVALTYPE (rtx, rs) != HAWK_VAL_STR) hawk_rtx_freemem (rtx, rrs.ptr); if (rrs.ptr && HAWK_RTX_GETVALTYPE(rtx, brs) != HAWK_VAL_MBS) hawk_rtx_freemem (rtx, rrs.ptr);
hawk_rtx_refdownval (rtx, rs); hawk_rtx_refdownval (rtx, rs);
return ret; return ret;

View File

@ -352,6 +352,45 @@ static int set_global (hawk_rtx_t* rtx, int idx, hawk_nde_var_t* var, hawk_val_t
/* perform actual assignment or assignment-like operation */ /* perform actual assignment or assignment-like operation */
switch (idx) switch (idx)
{ {
case HAWK_GBL_BRS:
{
hawk_bcs_t rss;
/* due to the expression evaluation rule, the
* regular expression can not be an assigned
* value */
HAWK_ASSERT (vtype != HAWK_VAL_REX);
rss.ptr = hawk_rtx_getvalbcstr(rtx, val, &rss.len);
if (!rss.ptr) return -1;
if (rtx->gbl.brs[0])
{
hawk_rtx_freerex (rtx, rtx->gbl.brs[0], rtx->gbl.brs[1]);
rtx->gbl.brs[0] = HAWK_NULL;
rtx->gbl.brs[1] = HAWK_NULL;
}
if (rss.len > 1)
{
hawk_tre_t* rex, * irex;
/* compile the regular expression */
/* TODO: mbs buildrex */
if (hawk_rtx_buildrex(rtx, rss.ptr, rss.len, &rex, &irex) <= -1)
{
hawk_rtx_freevalbcstr (rtx, val, rss.ptr);
return -1;
}
rtx->gbl.brs[0] = rex;
rtx->gbl.brs[1] = irex;
}
hawk_rtx_freevalbcstr (rtx, val, rss.ptr);
break;
}
case HAWK_GBL_CONVFMT: case HAWK_GBL_CONVFMT:
{ {
hawk_oow_t i; hawk_oow_t i;
@ -542,24 +581,13 @@ static int set_global (hawk_rtx_t* rtx, int idx, hawk_nde_var_t* var, hawk_val_t
{ {
hawk_oocs_t rss; hawk_oocs_t rss;
if (vtype == HAWK_VAL_STR) /* due to the expression evaluation rule, the
{ * regular expression can not be an assigned
rss = ((hawk_val_str_t*)val)->val; * value */
} HAWK_ASSERT (vtype != HAWK_VAL_REX);
else
{
hawk_rtx_valtostr_out_t out;
/* due to the expression evaluation rule, the rss.ptr = hawk_rtx_getvaloocstr(rtx, val, &rss.len);
* regular expression can not be an assigned if (!rss.ptr) return -1;
* value */
HAWK_ASSERT (vtype != HAWK_VAL_REX);
out.type = HAWK_RTX_VALTOSTR_CPLDUP;
if (hawk_rtx_valtostr(rtx, val, &out) <= -1) return -1;
rss = out.u.cpldup;
}
if (rtx->gbl.rs[0]) if (rtx->gbl.rs[0])
{ {
@ -575,7 +603,7 @@ static int set_global (hawk_rtx_t* rtx, int idx, hawk_nde_var_t* var, hawk_val_t
/* compile the regular expression */ /* compile the regular expression */
if (hawk_rtx_buildrex(rtx, rss.ptr, rss.len, &rex, &irex) <= -1) if (hawk_rtx_buildrex(rtx, rss.ptr, rss.len, &rex, &irex) <= -1)
{ {
if (vtype != HAWK_VAL_STR) hawk_rtx_freemem (rtx, rss.ptr); hawk_rtx_freevaloocstr (rtx, val, rss.ptr);
return -1; return -1;
} }
@ -583,8 +611,7 @@ static int set_global (hawk_rtx_t* rtx, int idx, hawk_nde_var_t* var, hawk_val_t
rtx->gbl.rs[1] = irex; rtx->gbl.rs[1] = irex;
} }
if (vtype != HAWK_VAL_STR) hawk_rtx_freemem (rtx, rss.ptr); hawk_rtx_freevaloocstr (rtx, val, rss.ptr);
break; break;
} }
@ -642,7 +669,7 @@ HAWK_INLINE void hawk_rtx_setretval (hawk_rtx_t* rtx, hawk_val_t* val)
HAWK_INLINE int hawk_rtx_setgbl (hawk_rtx_t* rtx, int id, hawk_val_t* val) HAWK_INLINE int hawk_rtx_setgbl (hawk_rtx_t* rtx, int id, hawk_val_t* val)
{ {
HAWK_ASSERT (id >= 0 && id < (int)HAWK_ARR_SIZE(rtx->awk->parse.gbls)); HAWK_ASSERT (id >= 0 && id < (int)HAWK_ARR_SIZE(rtx->awk->parse.gbls));
return set_global (rtx, id, HAWK_NULL, val, 0); return set_global(rtx, id, HAWK_NULL, val, 0);
} }
int hawk_rtx_setfilename (hawk_rtx_t* rtx, const hawk_ooch_t* name, hawk_oow_t len) int hawk_rtx_setfilename (hawk_rtx_t* rtx, const hawk_ooch_t* name, hawk_oow_t len)
@ -954,6 +981,8 @@ static int init_rtx (hawk_rtx_t* rtx, hawk_t* awk, hawk_rio_cbs_t* rio)
rtx->rio.chain = HAWK_NULL; rtx->rio.chain = HAWK_NULL;
} }
rtx->gbl.brs[0] = HAWK_NULL;
rtx->gbl.brs[1] = HAWK_NULL;
rtx->gbl.rs[0] = HAWK_NULL; rtx->gbl.rs[0] = HAWK_NULL;
rtx->gbl.rs[1] = HAWK_NULL; rtx->gbl.rs[1] = HAWK_NULL;
rtx->gbl.fs[0] = HAWK_NULL; rtx->gbl.fs[0] = HAWK_NULL;
@ -1001,6 +1030,12 @@ static void fini_rtx (hawk_rtx_t* rtx, int fini_globals)
hawk_rtx_cleario (rtx); hawk_rtx_cleario (rtx);
HAWK_ASSERT (rtx->rio.chain == HAWK_NULL); HAWK_ASSERT (rtx->rio.chain == HAWK_NULL);
if (rtx->gbl.brs[0])
{
hawk_rtx_freerex (rtx, rtx->gbl.brs[0], rtx->gbl.brs[1]);
rtx->gbl.brs[0] = HAWK_NULL;
rtx->gbl.brs[1] = HAWK_NULL;
}
if (rtx->gbl.rs[0]) if (rtx->gbl.rs[0])
{ {
hawk_rtx_freerex (rtx, rtx->gbl.rs[0], rtx->gbl.rs[1]); hawk_rtx_freerex (rtx, rtx->gbl.rs[0], rtx->gbl.rs[1]);
@ -1220,8 +1255,8 @@ static int defaultify_globals (hawk_rtx_t* rtx)
static struct gtab_t gtab[7] = static struct gtab_t gtab[7] =
{ {
{ HAWK_GBL_CONVFMT, { DEFAULT_CONVFMT, DEFAULT_CONVFMT } }, { HAWK_GBL_CONVFMT, { DEFAULT_CONVFMT, DEFAULT_CONVFMT } },
{ HAWK_GBL_FILENAME, { HAWK_NULL, HAWK_NULL } }, { HAWK_GBL_FILENAME, { HAWK_NULL, HAWK_NULL } },
{ HAWK_GBL_OFILENAME, { HAWK_NULL, HAWK_NULL } }, { HAWK_GBL_OFILENAME, { HAWK_NULL, HAWK_NULL } },
{ HAWK_GBL_OFMT, { DEFAULT_OFMT, DEFAULT_OFMT } }, { HAWK_GBL_OFMT, { DEFAULT_OFMT, DEFAULT_OFMT } },
{ HAWK_GBL_OFS, { DEFAULT_OFS, DEFAULT_OFS } }, { HAWK_GBL_OFS, { DEFAULT_OFS, DEFAULT_OFS } },
{ HAWK_GBL_ORS, { DEFAULT_ORS, DEFAULT_ORS_CRLF } }, { HAWK_GBL_ORS, { DEFAULT_ORS, DEFAULT_ORS_CRLF } },