added a new global variable BRS for byte reading.

wrote some more functions to support byte reading
This commit is contained in:
hyung-hwan 2020-01-14 14:55:34 +00:00
parent 32344f2c0c
commit b7fecc3172
6 changed files with 117 additions and 50 deletions

View File

@ -410,6 +410,7 @@ struct hawk_rtx_t
struct
{
void* brs[2];
void* rs[2];
void* fs[2];
int ignorecase;

View File

@ -1288,6 +1288,7 @@ typedef const hawk_ooch_t* (*hawk_errstr_t) (
enum hawk_gbl_id_t
{
/* this table should match gtab in parse.c.
*
* in addition, hawk_rtx_setgbl also counts
* on the order of these values.
*
@ -1298,6 +1299,7 @@ enum hawk_gbl_id_t
* but is this check really necessary???
*/
HAWK_GBL_BRS,
HAWK_GBL_CONVFMT,
HAWK_GBL_FILENAME,
HAWK_GBL_FNR,
@ -1318,7 +1320,7 @@ enum hawk_gbl_id_t
/* these are not not the actual IDs and are used internally only
* Make sure you update these values properly if you add more
* ID definitions, however */
HAWK_MIN_GBL_ID = HAWK_GBL_CONVFMT,
HAWK_MIN_GBL_ID = HAWK_GBL_BRS,
HAWK_MAX_GBL_ID = HAWK_GBL_SUBSEP
};
typedef enum hawk_gbl_id_t hawk_gbl_id_t;

View File

@ -324,6 +324,8 @@ static global_t gtab[] =
* this table must match the order of the hawk_gbl_id_t enumerators
*/
{ HAWK_T("BRS"), 3, 0 },
/* output real-to-str conversion format for other cases than 'print' */
{ HAWK_T("CONVFMT"), 7, 0 },
@ -360,6 +362,7 @@ static global_t gtab[] =
{ HAWK_T("ORS"), 3, HAWK_RIO },
{ HAWK_T("RLENGTH"), 7, 0 },
{ HAWK_T("RS"), 2, 0 },
{ HAWK_T("RSTART"), 6, 0 },

View File

@ -196,7 +196,7 @@ static HAWK_INLINE int resolve_rs (hawk_rtx_t* rtx, hawk_val_t* rs, hawk_oocs_t*
hawk_val_type_t rs_vtype;
rs_vtype = HAWK_RTX_GETVALTYPE (rtx, rs);
rs_vtype = HAWK_RTX_GETVALTYPE(rtx, rs);
switch (rs_vtype)
{
@ -219,6 +219,36 @@ static HAWK_INLINE int resolve_rs (hawk_rtx_t* rtx, hawk_val_t* rs, hawk_oocs_t*
return ret;
}
static HAWK_INLINE int resolve_brs (hawk_rtx_t* rtx, hawk_val_t* brs, hawk_bcs_t* rrs)
{
/* record separator for bytes reading */
int ret = 0;
hawk_val_type_t brs_vtype;
brs_vtype = HAWK_RTX_GETVALTYPE(rtx, brs);
switch (brs_vtype)
{
case HAWK_VAL_NIL:
rrs->ptr = HAWK_NULL;
rrs->len = 0;
break;
case HAWK_VAL_MBS:
rrs->ptr = ((hawk_val_mbs_t*)brs)->val.ptr;
rrs->len = ((hawk_val_mbs_t*)brs)->val.len;
break;
default:
rrs->ptr = hawk_rtx_valtobcstrdup(rtx, brs, &rrs->len);
if (rrs->ptr == HAWK_NULL) ret = -1;
break;
}
return ret;
}
static HAWK_INLINE int match_long_rs (hawk_rtx_t* rtx, hawk_ooecs_t* buf, hawk_rio_arg_t* p)
{
hawk_oocs_t match;
@ -276,18 +306,16 @@ static HAWK_INLINE int match_long_rs (hawk_rtx_t* rtx, hawk_ooecs_t* buf, hawk_r
return ret;
}
#if 0
static HAWK_INLINE int match_long_rs_bytes (hawk_rtx_t* rtx, hawk_becs_t* buf, hawk_rio_arg_t* p)
static HAWK_INLINE int match_long_brs(hawk_rtx_t* rtx, hawk_becs_t* buf, hawk_rio_arg_t* p)
{
hawk_oocs_t match;
hawk_bcs_t match;
int ret;
HAWK_ASSERT (rtx->gbl.rs[0] != HAWK_NULL);
HAWK_ASSERT (rtx->gbl.rs[1] != HAWK_NULL);
HAWK_ASSERT (rtx->gbl.brs[0] != HAWK_NULL);
HAWK_ASSERT (rtx->gbl.brs[1] != HAWK_NULL);
ret = hawk_rtx_matchrex(rtx, rtx->gbl.rs[rtx->gbl.ignorecase], HAWK_BECS_OOCS(buf), HAWK_BECS_OOCS(buf), &match, HAWK_NULL);
/*TODO: mbs match rex */
ret = hawk_rtx_matchrex(rtx, rtx->gbl.brs[rtx->gbl.ignorecase], HAWK_BECS_OOCS(buf), HAWK_BECS_OOCS(buf), &match, HAWK_NULL);
if (ret >= 1)
{
if (p->in.eof)
@ -314,14 +342,14 @@ static HAWK_INLINE int match_long_rs_bytes (hawk_rtx_t* rtx, hawk_becs_t* buf, h
* of the buffer is not indeterministic as we don't have the
* full input yet.
*/
const hawk_ooch_t* be = HAWK_BECS_PTR(buf) + HAWK_BECS_LEN(buf);
const hawk_ooch_t* me = match.ptr + match.len;
const hawk_bch_t* be = HAWK_BECS_PTR(buf) + HAWK_BECS_LEN(buf);
const hawk_bch_t* me = match.ptr + match.len;
if (me < be)
{
/* the match ends before the ending boundary.
* it must be the longest match. drop the RS part
* and the characters after RS. */
* it must be the longest match. drop the BRS part
* and the characters after BRS. */
HAWK_BECS_LEN(buf) -= match.len + (be - me);
p->in.pos -= (be - me);
}
@ -336,8 +364,6 @@ static HAWK_INLINE int match_long_rs_bytes (hawk_rtx_t* rtx, hawk_becs_t* buf, h
return ret;
}
#endif
int hawk_rtx_readio (hawk_rtx_t* rtx, int in_type, const hawk_ooch_t* name, hawk_ooecs_t* buf)
{
hawk_rio_arg_t* p;
@ -650,7 +676,7 @@ int hawk_rtx_readio (hawk_rtx_t* rtx, int in_type, const hawk_ooch_t* name, hawk
}
}
if (rrs.ptr && HAWK_RTX_GETVALTYPE (rtx, rs) != HAWK_VAL_STR) hawk_rtx_freemem (rtx, rrs.ptr);
if (rrs.ptr && HAWK_RTX_GETVALTYPE(rtx, rs) != HAWK_VAL_STR) hawk_rtx_freemem (rtx, rrs.ptr);
hawk_rtx_refdownval (rtx, rs);
return ret;
@ -664,8 +690,8 @@ int hawk_rtx_readiobytes (hawk_rtx_t* rtx, int in_type, const hawk_ooch_t* name,
hawk_rio_impl_t handler;
int ret;
hawk_val_t* rs;
hawk_oocs_t rrs;
hawk_val_t* brs;
hawk_bcs_t rrs;
hawk_oow_t line_len = 0;
hawk_bch_t c = '\0', pc;
@ -677,12 +703,12 @@ int hawk_rtx_readiobytes (hawk_rtx_t* rtx, int in_type, const hawk_ooch_t* name,
hawk_becs_clear (buf);
/* get the record separator */
rs = hawk_rtx_getgbl(rtx, HAWK_GBL_RS);
hawk_rtx_refupval (rtx, rs);
brs = hawk_rtx_getgbl(rtx, HAWK_GBL_BRS);
hawk_rtx_refupval (rtx, brs);
if (resolve_rs(rtx, rs, &rrs) <= -1)
if (resolve_brs(rtx, brs, &rrs) <= -1)
{
hawk_rtx_refdownval (rtx, rs);
hawk_rtx_refdownval (rtx, brs);
return -1;
}
@ -740,7 +766,7 @@ int hawk_rtx_readiobytes (hawk_rtx_t* rtx, int in_type, const hawk_ooch_t* name,
/* TODO: handle different line terminator */
/* drop the line terminator from the record
* if RS is a blank line and EOF is reached. */
if (HAWK_BECS_LASTCHAR(buf) == HAWK_T'\n')
if (HAWK_BECS_LASTCHAR(buf) == '\n')
{
HAWK_BECS_LEN(buf) -= 1;
if (rtx->awk->opt.trait & HAWK_CRLF)
@ -760,7 +786,7 @@ int hawk_rtx_readiobytes (hawk_rtx_t* rtx, int in_type, const hawk_ooch_t* name,
* At EOF, the match at the end is considered
* the longest as there are no more characters
* left */
int n = match_long_rs_bytes(rtx, buf, p);
int n = match_long_brs(rtx, buf, p);
if (n != 0)
{
if (n <= -1) ret = -1;
@ -970,7 +996,7 @@ int hawk_rtx_readiobytes (hawk_rtx_t* rtx, int in_type, const hawk_ooch_t* name,
}
}
if (rrs.ptr && HAWK_RTX_GETVALTYPE (rtx, rs) != HAWK_VAL_STR) hawk_rtx_freemem (rtx, rrs.ptr);
if (rrs.ptr && HAWK_RTX_GETVALTYPE(rtx, brs) != HAWK_VAL_MBS) hawk_rtx_freemem (rtx, rrs.ptr);
hawk_rtx_refdownval (rtx, rs);
return ret;

View File

@ -352,6 +352,45 @@ static int set_global (hawk_rtx_t* rtx, int idx, hawk_nde_var_t* var, hawk_val_t
/* perform actual assignment or assignment-like operation */
switch (idx)
{
case HAWK_GBL_BRS:
{
hawk_bcs_t rss;
/* due to the expression evaluation rule, the
* regular expression can not be an assigned
* value */
HAWK_ASSERT (vtype != HAWK_VAL_REX);
rss.ptr = hawk_rtx_getvalbcstr(rtx, val, &rss.len);
if (!rss.ptr) return -1;
if (rtx->gbl.brs[0])
{
hawk_rtx_freerex (rtx, rtx->gbl.brs[0], rtx->gbl.brs[1]);
rtx->gbl.brs[0] = HAWK_NULL;
rtx->gbl.brs[1] = HAWK_NULL;
}
if (rss.len > 1)
{
hawk_tre_t* rex, * irex;
/* compile the regular expression */
/* TODO: mbs buildrex */
if (hawk_rtx_buildrex(rtx, rss.ptr, rss.len, &rex, &irex) <= -1)
{
hawk_rtx_freevalbcstr (rtx, val, rss.ptr);
return -1;
}
rtx->gbl.brs[0] = rex;
rtx->gbl.brs[1] = irex;
}
hawk_rtx_freevalbcstr (rtx, val, rss.ptr);
break;
}
case HAWK_GBL_CONVFMT:
{
hawk_oow_t i;
@ -542,24 +581,13 @@ static int set_global (hawk_rtx_t* rtx, int idx, hawk_nde_var_t* var, hawk_val_t
{
hawk_oocs_t rss;
if (vtype == HAWK_VAL_STR)
{
rss = ((hawk_val_str_t*)val)->val;
}
else
{
hawk_rtx_valtostr_out_t out;
/* due to the expression evaluation rule, the
* regular expression can not be an assigned
* value */
HAWK_ASSERT (vtype != HAWK_VAL_REX);
out.type = HAWK_RTX_VALTOSTR_CPLDUP;
if (hawk_rtx_valtostr(rtx, val, &out) <= -1) return -1;
rss = out.u.cpldup;
}
rss.ptr = hawk_rtx_getvaloocstr(rtx, val, &rss.len);
if (!rss.ptr) return -1;
if (rtx->gbl.rs[0])
{
@ -575,7 +603,7 @@ static int set_global (hawk_rtx_t* rtx, int idx, hawk_nde_var_t* var, hawk_val_t
/* compile the regular expression */
if (hawk_rtx_buildrex(rtx, rss.ptr, rss.len, &rex, &irex) <= -1)
{
if (vtype != HAWK_VAL_STR) hawk_rtx_freemem (rtx, rss.ptr);
hawk_rtx_freevaloocstr (rtx, val, rss.ptr);
return -1;
}
@ -583,8 +611,7 @@ static int set_global (hawk_rtx_t* rtx, int idx, hawk_nde_var_t* var, hawk_val_t
rtx->gbl.rs[1] = irex;
}
if (vtype != HAWK_VAL_STR) hawk_rtx_freemem (rtx, rss.ptr);
hawk_rtx_freevaloocstr (rtx, val, rss.ptr);
break;
}
@ -642,7 +669,7 @@ HAWK_INLINE void hawk_rtx_setretval (hawk_rtx_t* rtx, hawk_val_t* val)
HAWK_INLINE int hawk_rtx_setgbl (hawk_rtx_t* rtx, int id, hawk_val_t* val)
{
HAWK_ASSERT (id >= 0 && id < (int)HAWK_ARR_SIZE(rtx->awk->parse.gbls));
return set_global (rtx, id, HAWK_NULL, val, 0);
return set_global(rtx, id, HAWK_NULL, val, 0);
}
int hawk_rtx_setfilename (hawk_rtx_t* rtx, const hawk_ooch_t* name, hawk_oow_t len)
@ -954,6 +981,8 @@ static int init_rtx (hawk_rtx_t* rtx, hawk_t* awk, hawk_rio_cbs_t* rio)
rtx->rio.chain = HAWK_NULL;
}
rtx->gbl.brs[0] = HAWK_NULL;
rtx->gbl.brs[1] = HAWK_NULL;
rtx->gbl.rs[0] = HAWK_NULL;
rtx->gbl.rs[1] = HAWK_NULL;
rtx->gbl.fs[0] = HAWK_NULL;
@ -1001,6 +1030,12 @@ static void fini_rtx (hawk_rtx_t* rtx, int fini_globals)
hawk_rtx_cleario (rtx);
HAWK_ASSERT (rtx->rio.chain == HAWK_NULL);
if (rtx->gbl.brs[0])
{
hawk_rtx_freerex (rtx, rtx->gbl.brs[0], rtx->gbl.brs[1]);
rtx->gbl.brs[0] = HAWK_NULL;
rtx->gbl.brs[1] = HAWK_NULL;
}
if (rtx->gbl.rs[0])
{
hawk_rtx_freerex (rtx, rtx->gbl.rs[0], rtx->gbl.rs[1]);