added a new global variable BRS for byte reading.
wrote some more functions to support byte reading
This commit is contained in:
parent
32344f2c0c
commit
b7fecc3172
@ -410,6 +410,7 @@ struct hawk_rtx_t
|
||||
|
||||
struct
|
||||
{
|
||||
void* brs[2];
|
||||
void* rs[2];
|
||||
void* fs[2];
|
||||
int ignorecase;
|
||||
|
@ -1288,6 +1288,7 @@ typedef const hawk_ooch_t* (*hawk_errstr_t) (
|
||||
enum hawk_gbl_id_t
|
||||
{
|
||||
/* this table should match gtab in parse.c.
|
||||
*
|
||||
* in addition, hawk_rtx_setgbl also counts
|
||||
* on the order of these values.
|
||||
*
|
||||
@ -1298,6 +1299,7 @@ enum hawk_gbl_id_t
|
||||
* but is this check really necessary???
|
||||
*/
|
||||
|
||||
HAWK_GBL_BRS,
|
||||
HAWK_GBL_CONVFMT,
|
||||
HAWK_GBL_FILENAME,
|
||||
HAWK_GBL_FNR,
|
||||
@ -1318,7 +1320,7 @@ enum hawk_gbl_id_t
|
||||
/* these are not not the actual IDs and are used internally only
|
||||
* Make sure you update these values properly if you add more
|
||||
* ID definitions, however */
|
||||
HAWK_MIN_GBL_ID = HAWK_GBL_CONVFMT,
|
||||
HAWK_MIN_GBL_ID = HAWK_GBL_BRS,
|
||||
HAWK_MAX_GBL_ID = HAWK_GBL_SUBSEP
|
||||
};
|
||||
typedef enum hawk_gbl_id_t hawk_gbl_id_t;
|
||||
|
@ -324,6 +324,8 @@ static global_t gtab[] =
|
||||
* this table must match the order of the hawk_gbl_id_t enumerators
|
||||
*/
|
||||
|
||||
{ HAWK_T("BRS"), 3, 0 },
|
||||
|
||||
/* output real-to-str conversion format for other cases than 'print' */
|
||||
{ HAWK_T("CONVFMT"), 7, 0 },
|
||||
|
||||
@ -360,6 +362,7 @@ static global_t gtab[] =
|
||||
{ HAWK_T("ORS"), 3, HAWK_RIO },
|
||||
|
||||
{ HAWK_T("RLENGTH"), 7, 0 },
|
||||
|
||||
{ HAWK_T("RS"), 2, 0 },
|
||||
|
||||
{ HAWK_T("RSTART"), 6, 0 },
|
||||
|
@ -196,7 +196,7 @@ static HAWK_INLINE int resolve_rs (hawk_rtx_t* rtx, hawk_val_t* rs, hawk_oocs_t*
|
||||
hawk_val_type_t rs_vtype;
|
||||
|
||||
|
||||
rs_vtype = HAWK_RTX_GETVALTYPE (rtx, rs);
|
||||
rs_vtype = HAWK_RTX_GETVALTYPE(rtx, rs);
|
||||
|
||||
switch (rs_vtype)
|
||||
{
|
||||
@ -219,6 +219,36 @@ static HAWK_INLINE int resolve_rs (hawk_rtx_t* rtx, hawk_val_t* rs, hawk_oocs_t*
|
||||
return ret;
|
||||
}
|
||||
|
||||
static HAWK_INLINE int resolve_brs (hawk_rtx_t* rtx, hawk_val_t* brs, hawk_bcs_t* rrs)
|
||||
{
|
||||
/* record separator for bytes reading */
|
||||
|
||||
int ret = 0;
|
||||
hawk_val_type_t brs_vtype;
|
||||
|
||||
brs_vtype = HAWK_RTX_GETVALTYPE(rtx, brs);
|
||||
|
||||
switch (brs_vtype)
|
||||
{
|
||||
case HAWK_VAL_NIL:
|
||||
rrs->ptr = HAWK_NULL;
|
||||
rrs->len = 0;
|
||||
break;
|
||||
|
||||
case HAWK_VAL_MBS:
|
||||
rrs->ptr = ((hawk_val_mbs_t*)brs)->val.ptr;
|
||||
rrs->len = ((hawk_val_mbs_t*)brs)->val.len;
|
||||
break;
|
||||
|
||||
default:
|
||||
rrs->ptr = hawk_rtx_valtobcstrdup(rtx, brs, &rrs->len);
|
||||
if (rrs->ptr == HAWK_NULL) ret = -1;
|
||||
break;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static HAWK_INLINE int match_long_rs (hawk_rtx_t* rtx, hawk_ooecs_t* buf, hawk_rio_arg_t* p)
|
||||
{
|
||||
hawk_oocs_t match;
|
||||
@ -276,18 +306,16 @@ static HAWK_INLINE int match_long_rs (hawk_rtx_t* rtx, hawk_ooecs_t* buf, hawk_r
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
#if 0
|
||||
|
||||
static HAWK_INLINE int match_long_rs_bytes (hawk_rtx_t* rtx, hawk_becs_t* buf, hawk_rio_arg_t* p)
|
||||
static HAWK_INLINE int match_long_brs(hawk_rtx_t* rtx, hawk_becs_t* buf, hawk_rio_arg_t* p)
|
||||
{
|
||||
hawk_oocs_t match;
|
||||
hawk_bcs_t match;
|
||||
int ret;
|
||||
|
||||
HAWK_ASSERT (rtx->gbl.rs[0] != HAWK_NULL);
|
||||
HAWK_ASSERT (rtx->gbl.rs[1] != HAWK_NULL);
|
||||
HAWK_ASSERT (rtx->gbl.brs[0] != HAWK_NULL);
|
||||
HAWK_ASSERT (rtx->gbl.brs[1] != HAWK_NULL);
|
||||
|
||||
ret = hawk_rtx_matchrex(rtx, rtx->gbl.rs[rtx->gbl.ignorecase], HAWK_BECS_OOCS(buf), HAWK_BECS_OOCS(buf), &match, HAWK_NULL);
|
||||
/*TODO: mbs match rex */
|
||||
ret = hawk_rtx_matchrex(rtx, rtx->gbl.brs[rtx->gbl.ignorecase], HAWK_BECS_OOCS(buf), HAWK_BECS_OOCS(buf), &match, HAWK_NULL);
|
||||
if (ret >= 1)
|
||||
{
|
||||
if (p->in.eof)
|
||||
@ -314,14 +342,14 @@ static HAWK_INLINE int match_long_rs_bytes (hawk_rtx_t* rtx, hawk_becs_t* buf, h
|
||||
* of the buffer is not indeterministic as we don't have the
|
||||
* full input yet.
|
||||
*/
|
||||
const hawk_ooch_t* be = HAWK_BECS_PTR(buf) + HAWK_BECS_LEN(buf);
|
||||
const hawk_ooch_t* me = match.ptr + match.len;
|
||||
const hawk_bch_t* be = HAWK_BECS_PTR(buf) + HAWK_BECS_LEN(buf);
|
||||
const hawk_bch_t* me = match.ptr + match.len;
|
||||
|
||||
if (me < be)
|
||||
{
|
||||
/* the match ends before the ending boundary.
|
||||
* it must be the longest match. drop the RS part
|
||||
* and the characters after RS. */
|
||||
* it must be the longest match. drop the BRS part
|
||||
* and the characters after BRS. */
|
||||
HAWK_BECS_LEN(buf) -= match.len + (be - me);
|
||||
p->in.pos -= (be - me);
|
||||
}
|
||||
@ -336,8 +364,6 @@ static HAWK_INLINE int match_long_rs_bytes (hawk_rtx_t* rtx, hawk_becs_t* buf, h
|
||||
return ret;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
int hawk_rtx_readio (hawk_rtx_t* rtx, int in_type, const hawk_ooch_t* name, hawk_ooecs_t* buf)
|
||||
{
|
||||
hawk_rio_arg_t* p;
|
||||
@ -650,7 +676,7 @@ int hawk_rtx_readio (hawk_rtx_t* rtx, int in_type, const hawk_ooch_t* name, hawk
|
||||
}
|
||||
}
|
||||
|
||||
if (rrs.ptr && HAWK_RTX_GETVALTYPE (rtx, rs) != HAWK_VAL_STR) hawk_rtx_freemem (rtx, rrs.ptr);
|
||||
if (rrs.ptr && HAWK_RTX_GETVALTYPE(rtx, rs) != HAWK_VAL_STR) hawk_rtx_freemem (rtx, rrs.ptr);
|
||||
hawk_rtx_refdownval (rtx, rs);
|
||||
|
||||
return ret;
|
||||
@ -664,8 +690,8 @@ int hawk_rtx_readiobytes (hawk_rtx_t* rtx, int in_type, const hawk_ooch_t* name,
|
||||
hawk_rio_impl_t handler;
|
||||
int ret;
|
||||
|
||||
hawk_val_t* rs;
|
||||
hawk_oocs_t rrs;
|
||||
hawk_val_t* brs;
|
||||
hawk_bcs_t rrs;
|
||||
|
||||
hawk_oow_t line_len = 0;
|
||||
hawk_bch_t c = '\0', pc;
|
||||
@ -677,12 +703,12 @@ int hawk_rtx_readiobytes (hawk_rtx_t* rtx, int in_type, const hawk_ooch_t* name,
|
||||
hawk_becs_clear (buf);
|
||||
|
||||
/* get the record separator */
|
||||
rs = hawk_rtx_getgbl(rtx, HAWK_GBL_RS);
|
||||
hawk_rtx_refupval (rtx, rs);
|
||||
brs = hawk_rtx_getgbl(rtx, HAWK_GBL_BRS);
|
||||
hawk_rtx_refupval (rtx, brs);
|
||||
|
||||
if (resolve_rs(rtx, rs, &rrs) <= -1)
|
||||
if (resolve_brs(rtx, brs, &rrs) <= -1)
|
||||
{
|
||||
hawk_rtx_refdownval (rtx, rs);
|
||||
hawk_rtx_refdownval (rtx, brs);
|
||||
return -1;
|
||||
}
|
||||
|
||||
@ -740,7 +766,7 @@ int hawk_rtx_readiobytes (hawk_rtx_t* rtx, int in_type, const hawk_ooch_t* name,
|
||||
/* TODO: handle different line terminator */
|
||||
/* drop the line terminator from the record
|
||||
* if RS is a blank line and EOF is reached. */
|
||||
if (HAWK_BECS_LASTCHAR(buf) == HAWK_T'\n')
|
||||
if (HAWK_BECS_LASTCHAR(buf) == '\n')
|
||||
{
|
||||
HAWK_BECS_LEN(buf) -= 1;
|
||||
if (rtx->awk->opt.trait & HAWK_CRLF)
|
||||
@ -760,7 +786,7 @@ int hawk_rtx_readiobytes (hawk_rtx_t* rtx, int in_type, const hawk_ooch_t* name,
|
||||
* At EOF, the match at the end is considered
|
||||
* the longest as there are no more characters
|
||||
* left */
|
||||
int n = match_long_rs_bytes(rtx, buf, p);
|
||||
int n = match_long_brs(rtx, buf, p);
|
||||
if (n != 0)
|
||||
{
|
||||
if (n <= -1) ret = -1;
|
||||
@ -970,7 +996,7 @@ int hawk_rtx_readiobytes (hawk_rtx_t* rtx, int in_type, const hawk_ooch_t* name,
|
||||
}
|
||||
}
|
||||
|
||||
if (rrs.ptr && HAWK_RTX_GETVALTYPE (rtx, rs) != HAWK_VAL_STR) hawk_rtx_freemem (rtx, rrs.ptr);
|
||||
if (rrs.ptr && HAWK_RTX_GETVALTYPE(rtx, brs) != HAWK_VAL_MBS) hawk_rtx_freemem (rtx, rrs.ptr);
|
||||
hawk_rtx_refdownval (rtx, rs);
|
||||
|
||||
return ret;
|
||||
|
@ -352,6 +352,45 @@ static int set_global (hawk_rtx_t* rtx, int idx, hawk_nde_var_t* var, hawk_val_t
|
||||
/* perform actual assignment or assignment-like operation */
|
||||
switch (idx)
|
||||
{
|
||||
case HAWK_GBL_BRS:
|
||||
{
|
||||
hawk_bcs_t rss;
|
||||
|
||||
/* due to the expression evaluation rule, the
|
||||
* regular expression can not be an assigned
|
||||
* value */
|
||||
HAWK_ASSERT (vtype != HAWK_VAL_REX);
|
||||
|
||||
rss.ptr = hawk_rtx_getvalbcstr(rtx, val, &rss.len);
|
||||
if (!rss.ptr) return -1;
|
||||
|
||||
if (rtx->gbl.brs[0])
|
||||
{
|
||||
hawk_rtx_freerex (rtx, rtx->gbl.brs[0], rtx->gbl.brs[1]);
|
||||
rtx->gbl.brs[0] = HAWK_NULL;
|
||||
rtx->gbl.brs[1] = HAWK_NULL;
|
||||
}
|
||||
|
||||
if (rss.len > 1)
|
||||
{
|
||||
hawk_tre_t* rex, * irex;
|
||||
|
||||
/* compile the regular expression */
|
||||
/* TODO: mbs buildrex */
|
||||
if (hawk_rtx_buildrex(rtx, rss.ptr, rss.len, &rex, &irex) <= -1)
|
||||
{
|
||||
hawk_rtx_freevalbcstr (rtx, val, rss.ptr);
|
||||
return -1;
|
||||
}
|
||||
|
||||
rtx->gbl.brs[0] = rex;
|
||||
rtx->gbl.brs[1] = irex;
|
||||
}
|
||||
|
||||
hawk_rtx_freevalbcstr (rtx, val, rss.ptr);
|
||||
break;
|
||||
}
|
||||
|
||||
case HAWK_GBL_CONVFMT:
|
||||
{
|
||||
hawk_oow_t i;
|
||||
@ -542,24 +581,13 @@ static int set_global (hawk_rtx_t* rtx, int idx, hawk_nde_var_t* var, hawk_val_t
|
||||
{
|
||||
hawk_oocs_t rss;
|
||||
|
||||
if (vtype == HAWK_VAL_STR)
|
||||
{
|
||||
rss = ((hawk_val_str_t*)val)->val;
|
||||
}
|
||||
else
|
||||
{
|
||||
hawk_rtx_valtostr_out_t out;
|
||||
|
||||
/* due to the expression evaluation rule, the
|
||||
* regular expression can not be an assigned
|
||||
* value */
|
||||
HAWK_ASSERT (vtype != HAWK_VAL_REX);
|
||||
|
||||
out.type = HAWK_RTX_VALTOSTR_CPLDUP;
|
||||
if (hawk_rtx_valtostr(rtx, val, &out) <= -1) return -1;
|
||||
|
||||
rss = out.u.cpldup;
|
||||
}
|
||||
rss.ptr = hawk_rtx_getvaloocstr(rtx, val, &rss.len);
|
||||
if (!rss.ptr) return -1;
|
||||
|
||||
if (rtx->gbl.rs[0])
|
||||
{
|
||||
@ -575,7 +603,7 @@ static int set_global (hawk_rtx_t* rtx, int idx, hawk_nde_var_t* var, hawk_val_t
|
||||
/* compile the regular expression */
|
||||
if (hawk_rtx_buildrex(rtx, rss.ptr, rss.len, &rex, &irex) <= -1)
|
||||
{
|
||||
if (vtype != HAWK_VAL_STR) hawk_rtx_freemem (rtx, rss.ptr);
|
||||
hawk_rtx_freevaloocstr (rtx, val, rss.ptr);
|
||||
return -1;
|
||||
}
|
||||
|
||||
@ -583,8 +611,7 @@ static int set_global (hawk_rtx_t* rtx, int idx, hawk_nde_var_t* var, hawk_val_t
|
||||
rtx->gbl.rs[1] = irex;
|
||||
}
|
||||
|
||||
if (vtype != HAWK_VAL_STR) hawk_rtx_freemem (rtx, rss.ptr);
|
||||
|
||||
hawk_rtx_freevaloocstr (rtx, val, rss.ptr);
|
||||
break;
|
||||
}
|
||||
|
||||
@ -642,7 +669,7 @@ HAWK_INLINE void hawk_rtx_setretval (hawk_rtx_t* rtx, hawk_val_t* val)
|
||||
HAWK_INLINE int hawk_rtx_setgbl (hawk_rtx_t* rtx, int id, hawk_val_t* val)
|
||||
{
|
||||
HAWK_ASSERT (id >= 0 && id < (int)HAWK_ARR_SIZE(rtx->awk->parse.gbls));
|
||||
return set_global (rtx, id, HAWK_NULL, val, 0);
|
||||
return set_global(rtx, id, HAWK_NULL, val, 0);
|
||||
}
|
||||
|
||||
int hawk_rtx_setfilename (hawk_rtx_t* rtx, const hawk_ooch_t* name, hawk_oow_t len)
|
||||
@ -954,6 +981,8 @@ static int init_rtx (hawk_rtx_t* rtx, hawk_t* awk, hawk_rio_cbs_t* rio)
|
||||
rtx->rio.chain = HAWK_NULL;
|
||||
}
|
||||
|
||||
rtx->gbl.brs[0] = HAWK_NULL;
|
||||
rtx->gbl.brs[1] = HAWK_NULL;
|
||||
rtx->gbl.rs[0] = HAWK_NULL;
|
||||
rtx->gbl.rs[1] = HAWK_NULL;
|
||||
rtx->gbl.fs[0] = HAWK_NULL;
|
||||
@ -1001,6 +1030,12 @@ static void fini_rtx (hawk_rtx_t* rtx, int fini_globals)
|
||||
hawk_rtx_cleario (rtx);
|
||||
HAWK_ASSERT (rtx->rio.chain == HAWK_NULL);
|
||||
|
||||
if (rtx->gbl.brs[0])
|
||||
{
|
||||
hawk_rtx_freerex (rtx, rtx->gbl.brs[0], rtx->gbl.brs[1]);
|
||||
rtx->gbl.brs[0] = HAWK_NULL;
|
||||
rtx->gbl.brs[1] = HAWK_NULL;
|
||||
}
|
||||
if (rtx->gbl.rs[0])
|
||||
{
|
||||
hawk_rtx_freerex (rtx, rtx->gbl.rs[0], rtx->gbl.rs[1]);
|
||||
|
Loading…
Reference in New Issue
Block a user