From b7fecc317231305d086992636613298755ae08d7 Mon Sep 17 00:00:00 2001 From: hyung-hwan Date: Tue, 14 Jan 2020 14:55:34 +0000 Subject: [PATCH] added a new global variable BRS for byte reading. wrote some more functions to support byte reading --- hawk/lib/hawk-prv.h | 1 + hawk/lib/hawk.c | 2 +- hawk/lib/hawk.h | 4 ++- hawk/lib/parse.c | 3 ++ hawk/lib/rio.c | 76 ++++++++++++++++++++++++++++-------------- hawk/lib/run.c | 81 ++++++++++++++++++++++++++++++++------------- 6 files changed, 117 insertions(+), 50 deletions(-) diff --git a/hawk/lib/hawk-prv.h b/hawk/lib/hawk-prv.h index 65e47267..91841d7e 100644 --- a/hawk/lib/hawk-prv.h +++ b/hawk/lib/hawk-prv.h @@ -410,6 +410,7 @@ struct hawk_rtx_t struct { + void* brs[2]; void* rs[2]; void* fs[2]; int ignorecase; diff --git a/hawk/lib/hawk.c b/hawk/lib/hawk.c index 5bda8ecb..0ca2e809 100644 --- a/hawk/lib/hawk.c +++ b/hawk/lib/hawk.c @@ -337,7 +337,7 @@ static hawk_rbt_walk_t unload_module (hawk_rbt_t* rbt, hawk_rbt_pair_t* pair, vo hawk_t* awk = (hawk_t*)ctx; hawk_mod_data_t* md; - md = HAWK_RBT_VPTR(pair); + md = HAWK_RBT_VPTR(pair); if (md->mod.unload) md->mod.unload (&md->mod, awk); if (md->handle) awk->prm.modclose (awk, md->handle); diff --git a/hawk/lib/hawk.h b/hawk/lib/hawk.h index e1769143..6217d1ef 100644 --- a/hawk/lib/hawk.h +++ b/hawk/lib/hawk.h @@ -1288,6 +1288,7 @@ typedef const hawk_ooch_t* (*hawk_errstr_t) ( enum hawk_gbl_id_t { /* this table should match gtab in parse.c. + * * in addition, hawk_rtx_setgbl also counts * on the order of these values. * @@ -1298,6 +1299,7 @@ enum hawk_gbl_id_t * but is this check really necessary??? */ + HAWK_GBL_BRS, HAWK_GBL_CONVFMT, HAWK_GBL_FILENAME, HAWK_GBL_FNR, @@ -1318,7 +1320,7 @@ enum hawk_gbl_id_t /* these are not not the actual IDs and are used internally only * Make sure you update these values properly if you add more * ID definitions, however */ - HAWK_MIN_GBL_ID = HAWK_GBL_CONVFMT, + HAWK_MIN_GBL_ID = HAWK_GBL_BRS, HAWK_MAX_GBL_ID = HAWK_GBL_SUBSEP }; typedef enum hawk_gbl_id_t hawk_gbl_id_t; diff --git a/hawk/lib/parse.c b/hawk/lib/parse.c index 9cd26d48..a7840f41 100644 --- a/hawk/lib/parse.c +++ b/hawk/lib/parse.c @@ -324,6 +324,8 @@ static global_t gtab[] = * this table must match the order of the hawk_gbl_id_t enumerators */ + { HAWK_T("BRS"), 3, 0 }, + /* output real-to-str conversion format for other cases than 'print' */ { HAWK_T("CONVFMT"), 7, 0 }, @@ -360,6 +362,7 @@ static global_t gtab[] = { HAWK_T("ORS"), 3, HAWK_RIO }, { HAWK_T("RLENGTH"), 7, 0 }, + { HAWK_T("RS"), 2, 0 }, { HAWK_T("RSTART"), 6, 0 }, diff --git a/hawk/lib/rio.c b/hawk/lib/rio.c index 08f38e9b..508fe905 100644 --- a/hawk/lib/rio.c +++ b/hawk/lib/rio.c @@ -196,7 +196,7 @@ static HAWK_INLINE int resolve_rs (hawk_rtx_t* rtx, hawk_val_t* rs, hawk_oocs_t* hawk_val_type_t rs_vtype; - rs_vtype = HAWK_RTX_GETVALTYPE (rtx, rs); + rs_vtype = HAWK_RTX_GETVALTYPE(rtx, rs); switch (rs_vtype) { @@ -219,6 +219,36 @@ static HAWK_INLINE int resolve_rs (hawk_rtx_t* rtx, hawk_val_t* rs, hawk_oocs_t* return ret; } +static HAWK_INLINE int resolve_brs (hawk_rtx_t* rtx, hawk_val_t* brs, hawk_bcs_t* rrs) +{ + /* record separator for bytes reading */ + + int ret = 0; + hawk_val_type_t brs_vtype; + + brs_vtype = HAWK_RTX_GETVALTYPE(rtx, brs); + + switch (brs_vtype) + { + case HAWK_VAL_NIL: + rrs->ptr = HAWK_NULL; + rrs->len = 0; + break; + + case HAWK_VAL_MBS: + rrs->ptr = ((hawk_val_mbs_t*)brs)->val.ptr; + rrs->len = ((hawk_val_mbs_t*)brs)->val.len; + break; + + default: + rrs->ptr = hawk_rtx_valtobcstrdup(rtx, brs, &rrs->len); + if (rrs->ptr == HAWK_NULL) ret = -1; + break; + } + + return ret; +} + static HAWK_INLINE int match_long_rs (hawk_rtx_t* rtx, hawk_ooecs_t* buf, hawk_rio_arg_t* p) { hawk_oocs_t match; @@ -276,18 +306,16 @@ static HAWK_INLINE int match_long_rs (hawk_rtx_t* rtx, hawk_ooecs_t* buf, hawk_r return ret; } - -#if 0 - -static HAWK_INLINE int match_long_rs_bytes (hawk_rtx_t* rtx, hawk_becs_t* buf, hawk_rio_arg_t* p) +static HAWK_INLINE int match_long_brs(hawk_rtx_t* rtx, hawk_becs_t* buf, hawk_rio_arg_t* p) { - hawk_oocs_t match; + hawk_bcs_t match; int ret; - HAWK_ASSERT (rtx->gbl.rs[0] != HAWK_NULL); - HAWK_ASSERT (rtx->gbl.rs[1] != HAWK_NULL); + HAWK_ASSERT (rtx->gbl.brs[0] != HAWK_NULL); + HAWK_ASSERT (rtx->gbl.brs[1] != HAWK_NULL); - ret = hawk_rtx_matchrex(rtx, rtx->gbl.rs[rtx->gbl.ignorecase], HAWK_BECS_OOCS(buf), HAWK_BECS_OOCS(buf), &match, HAWK_NULL); +/*TODO: mbs match rex */ + ret = hawk_rtx_matchrex(rtx, rtx->gbl.brs[rtx->gbl.ignorecase], HAWK_BECS_OOCS(buf), HAWK_BECS_OOCS(buf), &match, HAWK_NULL); if (ret >= 1) { if (p->in.eof) @@ -314,14 +342,14 @@ static HAWK_INLINE int match_long_rs_bytes (hawk_rtx_t* rtx, hawk_becs_t* buf, h * of the buffer is not indeterministic as we don't have the * full input yet. */ - const hawk_ooch_t* be = HAWK_BECS_PTR(buf) + HAWK_BECS_LEN(buf); - const hawk_ooch_t* me = match.ptr + match.len; + const hawk_bch_t* be = HAWK_BECS_PTR(buf) + HAWK_BECS_LEN(buf); + const hawk_bch_t* me = match.ptr + match.len; if (me < be) { /* the match ends before the ending boundary. - * it must be the longest match. drop the RS part - * and the characters after RS. */ + * it must be the longest match. drop the BRS part + * and the characters after BRS. */ HAWK_BECS_LEN(buf) -= match.len + (be - me); p->in.pos -= (be - me); } @@ -336,8 +364,6 @@ static HAWK_INLINE int match_long_rs_bytes (hawk_rtx_t* rtx, hawk_becs_t* buf, h return ret; } -#endif - int hawk_rtx_readio (hawk_rtx_t* rtx, int in_type, const hawk_ooch_t* name, hawk_ooecs_t* buf) { hawk_rio_arg_t* p; @@ -650,7 +676,7 @@ int hawk_rtx_readio (hawk_rtx_t* rtx, int in_type, const hawk_ooch_t* name, hawk } } - if (rrs.ptr && HAWK_RTX_GETVALTYPE (rtx, rs) != HAWK_VAL_STR) hawk_rtx_freemem (rtx, rrs.ptr); + if (rrs.ptr && HAWK_RTX_GETVALTYPE(rtx, rs) != HAWK_VAL_STR) hawk_rtx_freemem (rtx, rrs.ptr); hawk_rtx_refdownval (rtx, rs); return ret; @@ -664,8 +690,8 @@ int hawk_rtx_readiobytes (hawk_rtx_t* rtx, int in_type, const hawk_ooch_t* name, hawk_rio_impl_t handler; int ret; - hawk_val_t* rs; - hawk_oocs_t rrs; + hawk_val_t* brs; + hawk_bcs_t rrs; hawk_oow_t line_len = 0; hawk_bch_t c = '\0', pc; @@ -677,12 +703,12 @@ int hawk_rtx_readiobytes (hawk_rtx_t* rtx, int in_type, const hawk_ooch_t* name, hawk_becs_clear (buf); /* get the record separator */ - rs = hawk_rtx_getgbl(rtx, HAWK_GBL_RS); - hawk_rtx_refupval (rtx, rs); + brs = hawk_rtx_getgbl(rtx, HAWK_GBL_BRS); + hawk_rtx_refupval (rtx, brs); - if (resolve_rs(rtx, rs, &rrs) <= -1) + if (resolve_brs(rtx, brs, &rrs) <= -1) { - hawk_rtx_refdownval (rtx, rs); + hawk_rtx_refdownval (rtx, brs); return -1; } @@ -740,7 +766,7 @@ int hawk_rtx_readiobytes (hawk_rtx_t* rtx, int in_type, const hawk_ooch_t* name, /* TODO: handle different line terminator */ /* drop the line terminator from the record * if RS is a blank line and EOF is reached. */ - if (HAWK_BECS_LASTCHAR(buf) == HAWK_T'\n') + if (HAWK_BECS_LASTCHAR(buf) == '\n') { HAWK_BECS_LEN(buf) -= 1; if (rtx->awk->opt.trait & HAWK_CRLF) @@ -760,7 +786,7 @@ int hawk_rtx_readiobytes (hawk_rtx_t* rtx, int in_type, const hawk_ooch_t* name, * At EOF, the match at the end is considered * the longest as there are no more characters * left */ - int n = match_long_rs_bytes(rtx, buf, p); + int n = match_long_brs(rtx, buf, p); if (n != 0) { if (n <= -1) ret = -1; @@ -970,7 +996,7 @@ int hawk_rtx_readiobytes (hawk_rtx_t* rtx, int in_type, const hawk_ooch_t* name, } } - if (rrs.ptr && HAWK_RTX_GETVALTYPE (rtx, rs) != HAWK_VAL_STR) hawk_rtx_freemem (rtx, rrs.ptr); + if (rrs.ptr && HAWK_RTX_GETVALTYPE(rtx, brs) != HAWK_VAL_MBS) hawk_rtx_freemem (rtx, rrs.ptr); hawk_rtx_refdownval (rtx, rs); return ret; diff --git a/hawk/lib/run.c b/hawk/lib/run.c index 89b59e4d..9e8dcd67 100644 --- a/hawk/lib/run.c +++ b/hawk/lib/run.c @@ -352,6 +352,45 @@ static int set_global (hawk_rtx_t* rtx, int idx, hawk_nde_var_t* var, hawk_val_t /* perform actual assignment or assignment-like operation */ switch (idx) { + case HAWK_GBL_BRS: + { + hawk_bcs_t rss; + + /* due to the expression evaluation rule, the + * regular expression can not be an assigned + * value */ + HAWK_ASSERT (vtype != HAWK_VAL_REX); + + rss.ptr = hawk_rtx_getvalbcstr(rtx, val, &rss.len); + if (!rss.ptr) return -1; + + if (rtx->gbl.brs[0]) + { + hawk_rtx_freerex (rtx, rtx->gbl.brs[0], rtx->gbl.brs[1]); + rtx->gbl.brs[0] = HAWK_NULL; + rtx->gbl.brs[1] = HAWK_NULL; + } + + if (rss.len > 1) + { + hawk_tre_t* rex, * irex; + + /* compile the regular expression */ +/* TODO: mbs buildrex */ + if (hawk_rtx_buildrex(rtx, rss.ptr, rss.len, &rex, &irex) <= -1) + { + hawk_rtx_freevalbcstr (rtx, val, rss.ptr); + return -1; + } + + rtx->gbl.brs[0] = rex; + rtx->gbl.brs[1] = irex; + } + + hawk_rtx_freevalbcstr (rtx, val, rss.ptr); + break; + } + case HAWK_GBL_CONVFMT: { hawk_oow_t i; @@ -542,24 +581,13 @@ static int set_global (hawk_rtx_t* rtx, int idx, hawk_nde_var_t* var, hawk_val_t { hawk_oocs_t rss; - if (vtype == HAWK_VAL_STR) - { - rss = ((hawk_val_str_t*)val)->val; - } - else - { - hawk_rtx_valtostr_out_t out; + /* due to the expression evaluation rule, the + * regular expression can not be an assigned + * value */ + HAWK_ASSERT (vtype != HAWK_VAL_REX); - /* due to the expression evaluation rule, the - * regular expression can not be an assigned - * value */ - HAWK_ASSERT (vtype != HAWK_VAL_REX); - - out.type = HAWK_RTX_VALTOSTR_CPLDUP; - if (hawk_rtx_valtostr(rtx, val, &out) <= -1) return -1; - - rss = out.u.cpldup; - } + rss.ptr = hawk_rtx_getvaloocstr(rtx, val, &rss.len); + if (!rss.ptr) return -1; if (rtx->gbl.rs[0]) { @@ -575,7 +603,7 @@ static int set_global (hawk_rtx_t* rtx, int idx, hawk_nde_var_t* var, hawk_val_t /* compile the regular expression */ if (hawk_rtx_buildrex(rtx, rss.ptr, rss.len, &rex, &irex) <= -1) { - if (vtype != HAWK_VAL_STR) hawk_rtx_freemem (rtx, rss.ptr); + hawk_rtx_freevaloocstr (rtx, val, rss.ptr); return -1; } @@ -583,8 +611,7 @@ static int set_global (hawk_rtx_t* rtx, int idx, hawk_nde_var_t* var, hawk_val_t rtx->gbl.rs[1] = irex; } - if (vtype != HAWK_VAL_STR) hawk_rtx_freemem (rtx, rss.ptr); - + hawk_rtx_freevaloocstr (rtx, val, rss.ptr); break; } @@ -642,7 +669,7 @@ HAWK_INLINE void hawk_rtx_setretval (hawk_rtx_t* rtx, hawk_val_t* val) HAWK_INLINE int hawk_rtx_setgbl (hawk_rtx_t* rtx, int id, hawk_val_t* val) { HAWK_ASSERT (id >= 0 && id < (int)HAWK_ARR_SIZE(rtx->awk->parse.gbls)); - return set_global (rtx, id, HAWK_NULL, val, 0); + return set_global(rtx, id, HAWK_NULL, val, 0); } int hawk_rtx_setfilename (hawk_rtx_t* rtx, const hawk_ooch_t* name, hawk_oow_t len) @@ -954,6 +981,8 @@ static int init_rtx (hawk_rtx_t* rtx, hawk_t* awk, hawk_rio_cbs_t* rio) rtx->rio.chain = HAWK_NULL; } + rtx->gbl.brs[0] = HAWK_NULL; + rtx->gbl.brs[1] = HAWK_NULL; rtx->gbl.rs[0] = HAWK_NULL; rtx->gbl.rs[1] = HAWK_NULL; rtx->gbl.fs[0] = HAWK_NULL; @@ -1001,6 +1030,12 @@ static void fini_rtx (hawk_rtx_t* rtx, int fini_globals) hawk_rtx_cleario (rtx); HAWK_ASSERT (rtx->rio.chain == HAWK_NULL); + if (rtx->gbl.brs[0]) + { + hawk_rtx_freerex (rtx, rtx->gbl.brs[0], rtx->gbl.brs[1]); + rtx->gbl.brs[0] = HAWK_NULL; + rtx->gbl.brs[1] = HAWK_NULL; + } if (rtx->gbl.rs[0]) { hawk_rtx_freerex (rtx, rtx->gbl.rs[0], rtx->gbl.rs[1]); @@ -1220,8 +1255,8 @@ static int defaultify_globals (hawk_rtx_t* rtx) static struct gtab_t gtab[7] = { { HAWK_GBL_CONVFMT, { DEFAULT_CONVFMT, DEFAULT_CONVFMT } }, - { HAWK_GBL_FILENAME, { HAWK_NULL, HAWK_NULL } }, - { HAWK_GBL_OFILENAME, { HAWK_NULL, HAWK_NULL } }, + { HAWK_GBL_FILENAME, { HAWK_NULL, HAWK_NULL } }, + { HAWK_GBL_OFILENAME, { HAWK_NULL, HAWK_NULL } }, { HAWK_GBL_OFMT, { DEFAULT_OFMT, DEFAULT_OFMT } }, { HAWK_GBL_OFS, { DEFAULT_OFS, DEFAULT_OFS } }, { HAWK_GBL_ORS, { DEFAULT_ORS, DEFAULT_ORS_CRLF } },