enhanced str::split() to handle byte strings better
This commit is contained in:
parent
4a60654b49
commit
166c18c7d0
351
hawk/lib/fnc.c
351
hawk/lib/fnc.c
@ -787,216 +787,23 @@ int hawk_fnc_substr (hawk_rtx_t* rtx, const hawk_fnc_info_t* fi)
|
||||
return 0;
|
||||
}
|
||||
|
||||
#if 0
|
||||
static int split_mbs (hawk_rtx_t* rtx, const hawk_fnc_info_t* fi)
|
||||
{
|
||||
hawk_oow_t nargs;
|
||||
hawk_val_t* a0, * a2, * t1, * t2;
|
||||
hawk_val_type_t a2_vtype, t1_vtype;
|
||||
|
||||
hawk_bcs_t str;
|
||||
hawk_bcs_t fs;
|
||||
hawk_bch_t* fs_free = HAWK_NULL;
|
||||
const hawk_bch_t* p;
|
||||
hawk_oow_t str_left, org_len;
|
||||
hawk_tre_t* fs_rex = HAWK_NULL;
|
||||
hawk_tre_t* fs_rex_free = HAWK_NULL;
|
||||
|
||||
hawk_bcs_t tok;
|
||||
hawk_int_t nflds;
|
||||
int x;
|
||||
|
||||
str.ptr = HAWK_NULL;
|
||||
str.len = 0;
|
||||
|
||||
nargs = hawk_rtx_getnargs(rtx);
|
||||
HAWK_ASSERT (nargs >= 2 && nargs <= 3);
|
||||
|
||||
a0 = hawk_rtx_getarg(rtx, 0);
|
||||
a2 = (nargs >= 3)? hawk_rtx_getarg(rtx, 2): HAWK_NULL;
|
||||
|
||||
str.ptr = hawk_rtx_getvalbcstr(rtx, a0, &str.len);
|
||||
if (HAWK_UNLIKELY(!str.ptr)) goto oops;
|
||||
|
||||
if (!a2)
|
||||
{
|
||||
/* get the value from FS */
|
||||
t1 = hawk_rtx_getgbl(rtx, HAWK_GBL_FS);
|
||||
t1_vtype = HAWK_RTX_GETVALTYPE(rtx, t1);
|
||||
if (t1_vtype == HAWK_VAL_NIL)
|
||||
{
|
||||
fs.ptr = " ";
|
||||
fs.len = 1;
|
||||
}
|
||||
else if (t1_vtype == HAWK_VAL_MBS)
|
||||
{
|
||||
fs.ptr = ((hawk_val_mbs_t*)t1)->val.ptr;
|
||||
fs.len = ((hawk_val_mbs_t*)t1)->val.len;
|
||||
}
|
||||
else
|
||||
{
|
||||
fs.ptr = hawk_rtx_valtobcstrdup(rtx, t1, &fs.len);
|
||||
if (HAWK_UNLIKELY(!fs.ptr)) goto oops;
|
||||
fs_free = (hawk_bch_t*)fs.ptr;
|
||||
}
|
||||
|
||||
if (fs.len > 1) fs_rex = rtx->gbl.fs[rtx->gbl.ignorecase];
|
||||
}
|
||||
else
|
||||
{
|
||||
a2_vtype = HAWK_RTX_GETVALTYPE(rtx, a2);
|
||||
|
||||
if (a2_vtype == HAWK_VAL_REX)
|
||||
{
|
||||
/* the third parameter is a regular expression */
|
||||
fs_rex = ((hawk_val_rex_t*)a2)->code[rtx->gbl.ignorecase];
|
||||
|
||||
/* make the loop below to take fs_rex by
|
||||
* setting fs_len greater than 1*/
|
||||
fs.ptr = HAWK_NULL;
|
||||
fs.len = 2;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (a2_vtype == HAWK_VAL_MBS)
|
||||
{
|
||||
fs.ptr = ((hawk_val_mbs_t*)a2)->val.ptr;
|
||||
fs.len = ((hawk_val_mbs_t*)a2)->val.len;
|
||||
}
|
||||
else
|
||||
{
|
||||
fs.ptr = hawk_rtx_valtobcstrdup(rtx, a2, &fs.len);
|
||||
if (fs.ptr == HAWK_NULL) goto oops;
|
||||
fs_free = (hawk_bch_t*)fs.ptr;
|
||||
}
|
||||
|
||||
if (fs.len > 1)
|
||||
{
|
||||
int x;
|
||||
|
||||
x = rtx->gbl.ignorecase?
|
||||
hawk_rtx_buildrex(rtx, fs.ptr, fs.len, HAWK_NULL, &fs_rex):
|
||||
hawk_rtx_buildrex(rtx, fs.ptr, fs.len, &fs_rex, HAWK_NULL);
|
||||
if (x <= -1) goto oops;
|
||||
|
||||
fs_rex_free = fs_rex;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
t1 = hawk_rtx_makearrval(rtx);
|
||||
if (HAWK_UNLIKELY(!t1)) goto oops;
|
||||
|
||||
hawk_rtx_refupval (rtx, t1);
|
||||
x = hawk_rtx_setrefval(rtx, (hawk_val_ref_t*)hawk_rtx_getarg(rtx, 1), t1);
|
||||
hawk_rtx_refdownval (rtx, t1);
|
||||
if (HAWK_UNLIKELY(x <= -1)) goto oops;
|
||||
|
||||
/* fill the map with actual values */
|
||||
p = str.ptr; str_left = str.len; org_len = str.len;
|
||||
nflds = 0;
|
||||
|
||||
while (p)
|
||||
{
|
||||
hawk_bch_t key_buf[HAWK_SIZEOF(hawk_int_t)*8+2];
|
||||
hawk_oow_t key_len;
|
||||
|
||||
if (fs.len <= 1)
|
||||
{
|
||||
p = hawk_rtx_tokoocharswithoochars(rtx, p, str.len, fs.ptr, fs.len, &tok);
|
||||
}
|
||||
else
|
||||
{
|
||||
p = hawk_rtx_tokoocharsbyrex(rtx, str.ptr, org_len, p, str.len, fs_rex, &tok);
|
||||
if (p == HAWK_NULL && hawk_rtx_geterrnum(rtx) != HAWK_ENOERR)
|
||||
{
|
||||
goto oops;
|
||||
}
|
||||
}
|
||||
|
||||
if (nflds == 0 && p == HAWK_NULL && tok.len == 0)
|
||||
{
|
||||
/* no field at all*/
|
||||
break;
|
||||
}
|
||||
|
||||
HAWK_ASSERT ((tok.ptr != HAWK_NULL && tok.len > 0) || tok.len == 0);
|
||||
|
||||
/* create the field string - however, the split function must
|
||||
* create a numeric value if the string is a number */
|
||||
/*t2 = hawk_rtx_makembsvalwithbcs (rtx, &tok);*/
|
||||
/*t2 = hawk_rtx_makenmbsvalwithbcs(rtx, &tok); */
|
||||
t2 = hawk_rtx_makenumormbsvalwithbchars(rtx, tok.ptr, tok.len);
|
||||
if (HAWK_UNLIKELY(!t2)) goto oops;
|
||||
|
||||
/* put it into the map */
|
||||
key_len = hawk_int_to_oocstr(++nflds, 10, HAWK_NULL, key_buf, HAWK_COUNTOF(key_buf));
|
||||
HAWK_ASSERT (key_len != (hawk_oow_t)-1);
|
||||
|
||||
if (hawk_rtx_setarrvalfld(rtx, t1, key_buf, key_len, t2) == HAWK_NULL)
|
||||
{
|
||||
hawk_rtx_refupval (rtx, t2);
|
||||
hawk_rtx_refdownval (rtx, t2);
|
||||
goto oops;
|
||||
}
|
||||
|
||||
str.len = str_left - (p - str.ptr);
|
||||
}
|
||||
|
||||
/*if (str_free) hawk_rtx_freemem (rtx, str_free);*/
|
||||
hawk_rtx_freevalbcstr (rtx, a0, str.ptr);
|
||||
|
||||
if (fs_free) hawk_rtx_freemem (rtx, fs_free);
|
||||
|
||||
if (fs_rex_free)
|
||||
{
|
||||
if (rtx->gbl.ignorecase)
|
||||
hawk_rtx_freerex (rtx, HAWK_NULL, fs_rex_free);
|
||||
else
|
||||
hawk_rtx_freerex (rtx, fs_rex_free, HAWK_NULL);
|
||||
}
|
||||
|
||||
/*nflds--;*/
|
||||
|
||||
t1 = hawk_rtx_makeintval(rtx, nflds);
|
||||
if (HAWK_UNLIKELY(!t1)) return -1;
|
||||
|
||||
hawk_rtx_setretval (rtx, t1);
|
||||
return 0;
|
||||
|
||||
oops:
|
||||
if (str.ptr) hawk_rtx_freevalbcstr (rtx, a0, str.ptr);
|
||||
|
||||
if (fs_free) hawk_rtx_freemem (rtx, fs_free);
|
||||
|
||||
if (fs_rex_free)
|
||||
{
|
||||
if (rtx->gbl.ignorecase)
|
||||
hawk_rtx_freerex (rtx, HAWK_NULL, fs_rex_free);
|
||||
else
|
||||
hawk_rtx_freerex (rtx, fs_rex_free, HAWK_NULL);
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
#endif
|
||||
|
||||
static int fnc_split (hawk_rtx_t* rtx, const hawk_fnc_info_t* fi, int use_array)
|
||||
{
|
||||
hawk_oow_t nargs;
|
||||
hawk_val_t* a0, * a2, * t1, * t2;
|
||||
hawk_val_type_t a2_vtype, t1_vtype;
|
||||
hawk_val_t* a0, * a2, * t0, * t1, * t2;
|
||||
|
||||
hawk_oocs_t str;
|
||||
hawk_oocs_t fs;
|
||||
hawk_ooch_t* fs_free = HAWK_NULL;
|
||||
const hawk_ooch_t* p;
|
||||
hawk_ooch_t* p;
|
||||
|
||||
hawk_oow_t str_left, org_len;
|
||||
hawk_tre_t* fs_rex = HAWK_NULL;
|
||||
hawk_tre_t* fs_rex_free = HAWK_NULL;
|
||||
|
||||
hawk_oocs_t tok;
|
||||
hawk_int_t nflds;
|
||||
int x;
|
||||
int x, byte_str, do_fld = 0;
|
||||
|
||||
str.ptr = HAWK_NULL;
|
||||
str.len = 0;
|
||||
@ -1007,65 +814,41 @@ static int fnc_split (hawk_rtx_t* rtx, const hawk_fnc_info_t* fi, int use_array)
|
||||
a0 = hawk_rtx_getarg(rtx, 0);
|
||||
a2 = (nargs >= 3)? hawk_rtx_getarg (rtx, 2): HAWK_NULL;
|
||||
|
||||
str.ptr = hawk_rtx_getvaloocstr(rtx, a0, &str.len);
|
||||
if (HAWK_UNLIKELY(!str.ptr)) goto oops;
|
||||
str.ptr = HAWK_NULL;
|
||||
str.len = 0;
|
||||
|
||||
if (!a2)
|
||||
/* field seperator */
|
||||
t0 = a2? a2: hawk_rtx_getgbl(rtx, HAWK_GBL_FS); /* if a2 is not available, get the value from FS */
|
||||
|
||||
if (HAWK_RTX_GETVALTYPE(rtx, t0) == HAWK_VAL_NIL)
|
||||
{
|
||||
/* get the value from FS */
|
||||
t1 = hawk_rtx_getgbl(rtx, HAWK_GBL_FS);
|
||||
t1_vtype = HAWK_RTX_GETVALTYPE(rtx, t1);
|
||||
if (t1_vtype == HAWK_VAL_NIL)
|
||||
{
|
||||
fs.ptr = HAWK_T(" ");
|
||||
fs.len = 1;
|
||||
}
|
||||
else if (t1_vtype == HAWK_VAL_STR)
|
||||
{
|
||||
fs.ptr = ((hawk_val_str_t*)t1)->val.ptr;
|
||||
fs.len = ((hawk_val_str_t*)t1)->val.len;
|
||||
}
|
||||
else
|
||||
{
|
||||
fs.ptr = hawk_rtx_valtooocstrdup(rtx, t1, &fs.len);
|
||||
if (HAWK_UNLIKELY(!fs.ptr)) goto oops;
|
||||
fs_free = (hawk_ooch_t*)fs.ptr;
|
||||
}
|
||||
|
||||
if (fs.len > 1) fs_rex = rtx->gbl.fs[rtx->gbl.ignorecase];
|
||||
fs.ptr = HAWK_T(" ");
|
||||
fs.len = 1;
|
||||
}
|
||||
else
|
||||
else if (HAWK_RTX_GETVALTYPE(rtx, t0) == HAWK_VAL_REX)
|
||||
{
|
||||
a2_vtype = HAWK_RTX_GETVALTYPE (rtx, a2);
|
||||
/* regular expression */
|
||||
fs_rex = ((hawk_val_rex_t*)t0)->code[rtx->gbl.ignorecase];
|
||||
|
||||
if (a2_vtype == HAWK_VAL_REX)
|
||||
/* make the tokenizing loop below to take fs_rex by setting fs_len greater than 1*/
|
||||
fs.ptr = HAWK_NULL;
|
||||
fs.len = 2;
|
||||
}
|
||||
else
|
||||
{
|
||||
fs.ptr = hawk_rtx_getvaloocstr(rtx, t0, &fs.len);
|
||||
if (HAWK_UNLIKELY(!fs.ptr)) goto oops;
|
||||
|
||||
fs_free = fs.ptr;
|
||||
|
||||
if (fs.len == 5 && fs.ptr[0] == '?')
|
||||
{
|
||||
/* the third parameter is a regular expression */
|
||||
fs_rex = ((hawk_val_rex_t*)a2)->code[rtx->gbl.ignorecase];
|
||||
|
||||
/* make the loop below to take fs_rex by
|
||||
* setting fs_len greater than 1*/
|
||||
fs.ptr = HAWK_NULL;
|
||||
fs.len = 2;
|
||||
do_fld = 1;
|
||||
}
|
||||
else
|
||||
else if (fs.len > 1)
|
||||
{
|
||||
if (a2_vtype == HAWK_VAL_STR)
|
||||
if (a2)
|
||||
{
|
||||
fs.ptr = ((hawk_val_str_t*)a2)->val.ptr;
|
||||
fs.len = ((hawk_val_str_t*)a2)->val.len;
|
||||
}
|
||||
else
|
||||
{
|
||||
fs.ptr = hawk_rtx_valtooocstrdup(rtx, a2, &fs.len);
|
||||
if (fs.ptr == HAWK_NULL) goto oops;
|
||||
fs_free = (hawk_ooch_t*)fs.ptr;
|
||||
}
|
||||
|
||||
if (fs.len > 1)
|
||||
{
|
||||
int x;
|
||||
|
||||
x = rtx->gbl.ignorecase?
|
||||
hawk_rtx_buildrex(rtx, fs.ptr, fs.len, HAWK_NULL, &fs_rex):
|
||||
hawk_rtx_buildrex(rtx, fs.ptr, fs.len, &fs_rex, HAWK_NULL);
|
||||
@ -1073,9 +856,28 @@ static int fnc_split (hawk_rtx_t* rtx, const hawk_fnc_info_t* fi, int use_array)
|
||||
|
||||
fs_rex_free = fs_rex;
|
||||
}
|
||||
else
|
||||
{
|
||||
fs_rex = rtx->gbl.fs[rtx->gbl.ignorecase];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* the first parameter - string to split */
|
||||
if (HAWK_RTX_GETVALTYPE(rtx, a0) == HAWK_VAL_MBS)
|
||||
{
|
||||
byte_str = 1;
|
||||
str.ptr = do_fld? hawk_rtx_valtobcstrdup(rtx, a0, &str.len):
|
||||
hawk_rtx_getvalbcstr(rtx, a0, &str.len);
|
||||
}
|
||||
else
|
||||
{
|
||||
byte_str = 0;
|
||||
str.ptr = do_fld? hawk_rtx_valtooocstrdup(rtx, a0, &str.len):
|
||||
hawk_rtx_getvaloocstr(rtx, a0, &str.len);
|
||||
}
|
||||
if (HAWK_UNLIKELY(!str.ptr)) goto oops;
|
||||
|
||||
t1 = use_array? hawk_rtx_makearrval(rtx, 16): hawk_rtx_makemapval(rtx);
|
||||
if (HAWK_UNLIKELY(!t1)) goto oops;
|
||||
|
||||
@ -1090,20 +892,23 @@ static int fnc_split (hawk_rtx_t* rtx, const hawk_fnc_info_t* fi, int use_array)
|
||||
|
||||
while (p)
|
||||
{
|
||||
hawk_ooch_t key_buf[HAWK_SIZEOF(hawk_int_t)*8+2];
|
||||
hawk_oow_t key_len;
|
||||
|
||||
if (fs.len <= 1)
|
||||
if (fs_rex)
|
||||
{
|
||||
p = hawk_rtx_tokoocharswithoochars(rtx, p, str.len, fs.ptr, fs.len, &tok);
|
||||
p = byte_str? hawk_rtx_tokbcharsbyrex(rtx, str.ptr, org_len, p, str.len, fs_rex, &tok):
|
||||
hawk_rtx_tokoocharsbyrex(rtx, str.ptr, org_len, p, str.len, fs_rex, &tok);
|
||||
if (p && hawk_rtx_geterrnum(rtx) != HAWK_ENOERR) goto oops;
|
||||
}
|
||||
else if (do_fld)
|
||||
{
|
||||
/* [NOTE] even if byte_str is true, the field seperator is of the ooch type.
|
||||
* there may be some data truncation and related issues */
|
||||
p = byte_str? hawk_rtx_fldbchars(rtx, p, str.len, fs.ptr[1], fs.ptr[2], fs.ptr[3], fs.ptr[4], &tok):
|
||||
hawk_rtx_fldoochars(rtx, p, str.len, fs.ptr[1], fs.ptr[2], fs.ptr[3], fs.ptr[4], &tok);
|
||||
}
|
||||
else
|
||||
{
|
||||
p = hawk_rtx_tokoocharsbyrex(rtx, str.ptr, org_len, p, str.len, fs_rex, &tok);
|
||||
if (p == HAWK_NULL && hawk_rtx_geterrnum(rtx) != HAWK_ENOERR)
|
||||
{
|
||||
goto oops;
|
||||
}
|
||||
p = byte_str? hawk_rtx_tokbcharswithbchars(rtx, p, str.len, fs.ptr, fs.len, &tok):
|
||||
hawk_rtx_tokoocharswithoochars(rtx, p, str.len, fs.ptr, fs.len, &tok);
|
||||
}
|
||||
|
||||
if (nflds == 0 && p == HAWK_NULL && tok.len == 0)
|
||||
@ -1118,7 +923,8 @@ static int fnc_split (hawk_rtx_t* rtx, const hawk_fnc_info_t* fi, int use_array)
|
||||
* create a numeric value if the string is a number */
|
||||
/*t2 = hawk_rtx_makestrvalwithoocs (rtx, &tok);*/
|
||||
/*t2 = hawk_rtx_makenstrvalwithoocs(rtx, &tok); */
|
||||
t2 = hawk_rtx_makenumorstrvalwithoochars(rtx, tok.ptr, tok.len);
|
||||
t2 = byte_str? hawk_rtx_makenumormbsvalwithbchars(rtx, tok.ptr, tok.len):
|
||||
hawk_rtx_makenumorstrvalwithoochars(rtx, tok.ptr, tok.len);
|
||||
if (HAWK_UNLIKELY(!t2)) goto oops;
|
||||
|
||||
if (use_array)
|
||||
@ -1133,6 +939,9 @@ static int fnc_split (hawk_rtx_t* rtx, const hawk_fnc_info_t* fi, int use_array)
|
||||
else
|
||||
{
|
||||
/* put it into the map */
|
||||
hawk_ooch_t key_buf[HAWK_SIZEOF(hawk_int_t)*8+2];
|
||||
hawk_oow_t key_len;
|
||||
|
||||
key_len = hawk_int_to_oocstr(++nflds, 10, HAWK_NULL, key_buf, HAWK_COUNTOF(key_buf));
|
||||
HAWK_ASSERT (key_len != (hawk_oow_t)-1);
|
||||
|
||||
@ -1144,13 +953,17 @@ static int fnc_split (hawk_rtx_t* rtx, const hawk_fnc_info_t* fi, int use_array)
|
||||
}
|
||||
}
|
||||
|
||||
str.len = str_left - (p - str.ptr);
|
||||
if (byte_str)
|
||||
str.len = str_left - ((p - str.ptr) * HAWK_SIZEOF_OOCH_T);
|
||||
else
|
||||
str.len = str_left - (p - str.ptr);
|
||||
}
|
||||
|
||||
/*if (str_free) hawk_rtx_freemem (rtx, str_free);*/
|
||||
hawk_rtx_freevaloocstr (rtx, a0, str.ptr);
|
||||
if (do_fld) hawk_rtx_freemem (rtx, str.ptr);
|
||||
else if (byte_str) hawk_rtx_freevalbcstr (rtx, a0, str.ptr);
|
||||
else hawk_rtx_freevaloocstr (rtx, a0, str.ptr);
|
||||
|
||||
if (fs_free) hawk_rtx_freemem (rtx, fs_free);
|
||||
if (fs_free) hawk_rtx_freevaloocstr (rtx, t0, fs_free);
|
||||
|
||||
if (fs_rex_free)
|
||||
{
|
||||
@ -1160,16 +973,19 @@ static int fnc_split (hawk_rtx_t* rtx, const hawk_fnc_info_t* fi, int use_array)
|
||||
hawk_rtx_freerex (rtx, fs_rex_free, HAWK_NULL);
|
||||
}
|
||||
|
||||
/*nflds--;*/
|
||||
|
||||
t1 = hawk_rtx_makeintval (rtx, nflds);
|
||||
t1 = hawk_rtx_makeintval(rtx, nflds);
|
||||
if (HAWK_UNLIKELY(!t1)) return -1;
|
||||
|
||||
hawk_rtx_setretval (rtx, t1);
|
||||
return 0;
|
||||
|
||||
oops:
|
||||
if (str.ptr) hawk_rtx_freevaloocstr (rtx, a0, str.ptr);
|
||||
if (str.ptr)
|
||||
{
|
||||
if (do_fld) hawk_rtx_freemem (rtx, str.ptr);
|
||||
else if (byte_str) hawk_rtx_freevalbcstr (rtx, a0, str.ptr);
|
||||
else hawk_rtx_freevaloocstr (rtx, a0, str.ptr);
|
||||
}
|
||||
|
||||
if (fs_free) hawk_rtx_freemem (rtx, fs_free);
|
||||
|
||||
@ -1185,7 +1001,8 @@ oops:
|
||||
|
||||
int hawk_fnc_split (hawk_rtx_t* rtx, const hawk_fnc_info_t* fi)
|
||||
{
|
||||
return fnc_split(rtx, fi, 1);
|
||||
/*return fnc_split(rtx, fi, 1);*/
|
||||
return fnc_split(rtx, fi, 0);
|
||||
}
|
||||
|
||||
int hawk_fnc_tolower (hawk_rtx_t* rtx, const hawk_fnc_info_t* fi)
|
||||
|
@ -24,6 +24,92 @@
|
||||
THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
char_t* split_xchars_to_fields (hawk_rtx_t* rtx, char_t* str, hawk_oow_t len, char_t fs, char_t ec, char_t lq, char_t rq, xcs_t* tok)
|
||||
{
|
||||
char_t* p = str;
|
||||
char_t* end = str + len;
|
||||
int escaped = 0, quoted = 0;
|
||||
char_t* ts; /* token start */
|
||||
char_t* tp; /* points to one char past the last token char */
|
||||
char_t* xp; /* points to one char past the last effective char */
|
||||
|
||||
/* skip leading spaces */
|
||||
while (p < end && is_xch_space(*p)) p++;
|
||||
|
||||
/* initialize token pointers */
|
||||
ts = tp = xp = p;
|
||||
|
||||
while (p < end)
|
||||
{
|
||||
char c = *p;
|
||||
|
||||
if (escaped)
|
||||
{
|
||||
*tp++ = c; xp = tp; p++;
|
||||
escaped = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (c == ec)
|
||||
{
|
||||
escaped = 1;
|
||||
p++;
|
||||
}
|
||||
else if (quoted)
|
||||
{
|
||||
if (c == rq)
|
||||
{
|
||||
quoted = 0;
|
||||
p++;
|
||||
}
|
||||
else
|
||||
{
|
||||
*tp++ = c; xp = tp; p++;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if (c == fs)
|
||||
{
|
||||
tok->ptr = ts;
|
||||
tok->len = xp - ts;
|
||||
p++;
|
||||
|
||||
if (is_xch_space(fs))
|
||||
{
|
||||
while (p < end && *p == fs) p++;
|
||||
if (p >= end) return HAWK_NULL;
|
||||
}
|
||||
|
||||
return p;
|
||||
}
|
||||
|
||||
if (c == lq)
|
||||
{
|
||||
quoted = 1;
|
||||
p++;
|
||||
}
|
||||
else
|
||||
{
|
||||
*tp++ = c; p++;
|
||||
if (!is_xch_space(c)) xp = tp;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (escaped)
|
||||
{
|
||||
/* if it is still escaped, the last character must be
|
||||
* the escaper itself. treat it as a normal character */
|
||||
*xp++ = ec;
|
||||
}
|
||||
|
||||
tok->ptr = ts;
|
||||
tok->len = xp - ts;
|
||||
return HAWK_NULL;
|
||||
}
|
||||
|
||||
char_t* tokenize_xchars (hawk_rtx_t* rtx, const char_t* s, hawk_oow_t len, const char_t* delim, hawk_oow_t delim_len, xcs_t* tok)
|
||||
{
|
||||
const char_t* p = s, *d;
|
||||
@ -214,88 +300,102 @@ exit_loop:
|
||||
return (char_t*)++p;
|
||||
}
|
||||
|
||||
char_t* split_xchars_to_fields (hawk_rtx_t* rtx, char_t* str, hawk_oow_t len, char_t fs, char_t ec, char_t lq, char_t rq, xcs_t* tok)
|
||||
|
||||
char_t* tokenize_xchars_by_rex (hawk_rtx_t* rtx, const char_t* str, hawk_oow_t len, const char_t* substr, hawk_oow_t sublen, hawk_tre_t* rex, xcs_t* tok)
|
||||
{
|
||||
char_t* p = str;
|
||||
char_t* end = str + len;
|
||||
int escaped = 0, quoted = 0;
|
||||
char_t* ts; /* token start */
|
||||
char_t* tp; /* points to one char past the last token char */
|
||||
char_t* xp; /* points to one char past the last effective char */
|
||||
int n;
|
||||
hawk_oow_t i;
|
||||
xcs_t match, s, cursub, realsub;
|
||||
|
||||
/* skip leading spaces */
|
||||
while (p < end && is_xch_space(*p)) p++;
|
||||
s.ptr = (char_t*)str;
|
||||
s.len = len;
|
||||
|
||||
/* initialize token pointers */
|
||||
ts = tp = xp = p;
|
||||
cursub.ptr = (char_t*)substr;
|
||||
cursub.len = sublen;
|
||||
|
||||
while (p < end)
|
||||
realsub.ptr = (char_t*)substr;
|
||||
realsub.len = sublen;
|
||||
|
||||
while (cursub.len > 0)
|
||||
{
|
||||
char c = *p;
|
||||
n = match_rex_with_xcs(rtx, rex, &s, &cursub, &match, HAWK_NULL);
|
||||
if (n <= -1) return HAWK_NULL;
|
||||
|
||||
if (escaped)
|
||||
if (n == 0)
|
||||
{
|
||||
*tp++ = c; xp = tp; p++;
|
||||
escaped = 0;
|
||||
/* no match has been found. return the entire string as a token */
|
||||
hawk_rtx_seterrnum (rtx, HAWK_NULL, HAWK_ENOERR); /* reset HAWK_EREXNOMAT to no error */
|
||||
tok->ptr = realsub.ptr;
|
||||
tok->len = realsub.len;
|
||||
return HAWK_NULL;
|
||||
}
|
||||
else
|
||||
|
||||
HAWK_ASSERT (n == 1);
|
||||
|
||||
if (match.len == 0)
|
||||
{
|
||||
if (c == ec)
|
||||
/* the match length is zero. */
|
||||
cursub.ptr++;
|
||||
cursub.len--;
|
||||
}
|
||||
else if (HAWK_RTX_IS_STRIPRECSPC_ON(rtx))
|
||||
{
|
||||
/* match at the beginning of the input string */
|
||||
if (match.ptr == substr)
|
||||
{
|
||||
escaped = 1;
|
||||
p++;
|
||||
}
|
||||
else if (quoted)
|
||||
{
|
||||
if (c == rq)
|
||||
for (i = 0; i < match.len; i++)
|
||||
{
|
||||
quoted = 0;
|
||||
p++;
|
||||
if (!is_xch_space(match.ptr[i])) goto exit_loop;
|
||||
}
|
||||
else
|
||||
{
|
||||
*tp++ = c; xp = tp; p++;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if (c == fs)
|
||||
{
|
||||
tok->ptr = ts;
|
||||
tok->len = xp - ts;
|
||||
p++;
|
||||
|
||||
if (is_xch_space(fs))
|
||||
{
|
||||
while (p < end && *p == fs) p++;
|
||||
if (p >= end) return HAWK_NULL;
|
||||
}
|
||||
/* the match that is all spaces at the
|
||||
* beginning of the input string is skipped */
|
||||
cursub.ptr += match.len;
|
||||
cursub.len -= match.len;
|
||||
|
||||
return p;
|
||||
}
|
||||
|
||||
if (c == lq)
|
||||
{
|
||||
quoted = 1;
|
||||
p++;
|
||||
}
|
||||
else
|
||||
{
|
||||
*tp++ = c; p++;
|
||||
if (!is_xch_space(c)) xp = tp;
|
||||
}
|
||||
/* adjust the substring by skipping the leading
|
||||
* spaces and retry matching */
|
||||
realsub.ptr = (char_t*)substr + match.len;
|
||||
realsub.len -= match.len;
|
||||
}
|
||||
else break;
|
||||
}
|
||||
else break;
|
||||
}
|
||||
|
||||
exit_loop:
|
||||
hawk_rtx_seterrnum (rtx, HAWK_NULL, HAWK_ENOERR);
|
||||
|
||||
if (cursub.len <= 0)
|
||||
{
|
||||
tok->ptr = realsub.ptr;
|
||||
tok->len = realsub.len;
|
||||
return HAWK_NULL;
|
||||
}
|
||||
|
||||
tok->ptr = realsub.ptr;
|
||||
tok->len = match.ptr - realsub.ptr;
|
||||
|
||||
for (i = 0; i < match.len; i++)
|
||||
{
|
||||
if (!is_xch_space(match.ptr[i]))
|
||||
{
|
||||
/* the match contains a non-space character. */
|
||||
return (char_t*)match.ptr+match.len;
|
||||
}
|
||||
}
|
||||
|
||||
if (escaped)
|
||||
/* the match is all spaces */
|
||||
if (HAWK_RTX_IS_STRIPRECSPC_ON(rtx))
|
||||
{
|
||||
/* if it is still escaped, the last character must be
|
||||
* the escaper itself. treat it as a normal character */
|
||||
*xp++ = ec;
|
||||
/* if the match reached the last character in the input string,
|
||||
* it returns HAWK_NULL to terminate tokenization. */
|
||||
return (match.ptr+match.len >= substr+sublen)? HAWK_NULL: ((char_t*)match.ptr+match.len);
|
||||
}
|
||||
else
|
||||
{
|
||||
/* if the match went beyond the the last character in the input
|
||||
* string, it returns HAWK_NULL to terminate tokenization. */
|
||||
return (match.ptr+match.len > substr+sublen)? HAWK_NULL: ((char_t*)match.ptr+match.len);
|
||||
}
|
||||
|
||||
tok->ptr = ts;
|
||||
tok->len = xp - ts;
|
||||
return HAWK_NULL;
|
||||
}
|
||||
|
@ -64,24 +64,37 @@ hawk_bch_t* hawk_rtx_tokbcharswithbchars (
|
||||
const hawk_bch_t* delim, hawk_oow_t delim_len, hawk_bcs_t* tok);
|
||||
|
||||
|
||||
hawk_uch_t* hawk_rtx_tokucharsbyrex (
|
||||
hawk_rtx_t* rtx,
|
||||
const hawk_uch_t* str,
|
||||
hawk_oow_t len,
|
||||
const hawk_uch_t* substr,
|
||||
hawk_oow_t sublen,
|
||||
hawk_tre_t* rex,
|
||||
hawk_ucs_t* tok
|
||||
);
|
||||
|
||||
hawk_bch_t* hawk_rtx_tokbcharsbyrex (
|
||||
hawk_rtx_t* rtx,
|
||||
const hawk_bch_t* str,
|
||||
hawk_oow_t len,
|
||||
const hawk_bch_t* substr,
|
||||
hawk_oow_t sublen,
|
||||
hawk_tre_t* rex,
|
||||
hawk_bcs_t* tok
|
||||
);
|
||||
|
||||
|
||||
#if defined(HAWK_OOCH_IS_UCH)
|
||||
# define hawk_rtx_fldoochars hawk_rtx_flduchars
|
||||
# define hawk_rtx_tokoocharswithoochars hawk_rtx_tokucharswithuchars
|
||||
# define hawk_rtx_tokoocharsbyrex hawk_rtx_tokucharsbyrex
|
||||
#else
|
||||
# define hawk_rtx_fldoochars hawk_rtx_fldbchars
|
||||
# define hawk_rtx_tokoocharswithoochars hawk_rtx_tokbcharswithbchars
|
||||
# define hawk_rtx_tokoocharsbyrex hawk_rtx_tokbcharsbyrex
|
||||
#endif
|
||||
|
||||
hawk_ooch_t* hawk_rtx_tokoocharsbyrex (
|
||||
hawk_rtx_t* rtx,
|
||||
const hawk_ooch_t* str,
|
||||
hawk_oow_t len,
|
||||
const hawk_ooch_t* substr,
|
||||
hawk_oow_t sublen,
|
||||
hawk_tre_t* rex,
|
||||
hawk_oocs_t* tok
|
||||
);
|
||||
|
||||
|
||||
int hawk_rtx_matchvalwithucs (
|
||||
hawk_rtx_t* rtx, hawk_val_t* val,
|
||||
|
215
hawk/lib/misc.c
215
hawk/lib/misc.c
@ -30,220 +30,41 @@
|
||||
#undef char_t
|
||||
#undef xcs_t
|
||||
#undef is_xch_space
|
||||
#undef tokenize_xchars
|
||||
#undef match_rex_with_xcs
|
||||
#undef split_xchars_to_fields
|
||||
#undef tokenize_xchars
|
||||
#undef tokenize_xchars_by_rex
|
||||
|
||||
#define char_t hawk_bch_t
|
||||
#define xcs_t hawk_bcs_t
|
||||
#define is_xch_space hawk_is_bch_space
|
||||
#define tokenize_xchars hawk_rtx_tokbcharswithbchars
|
||||
#define match_rex_with_xcs hawk_rtx_matchrexwithbcs
|
||||
|
||||
#define split_xchars_to_fields hawk_rtx_fldbchars
|
||||
#define tokenize_xchars hawk_rtx_tokbcharswithbchars
|
||||
#define tokenize_xchars_by_rex hawk_rtx_tokbcharsbyrex
|
||||
|
||||
#include "misc-imp.h"
|
||||
|
||||
#undef char_t
|
||||
#undef xcs_t
|
||||
#undef is_xch_space
|
||||
#undef tokenize_xchars
|
||||
#undef match_rex_with_xcs
|
||||
#undef split_xchars_to_fields
|
||||
#undef tokenize_xchars
|
||||
#undef tokenize_xchars_by_rex
|
||||
|
||||
#define char_t hawk_uch_t
|
||||
#define xcs_t hawk_ucs_t
|
||||
#define is_xch_space hawk_is_uch_space
|
||||
#define tokenize_xchars hawk_rtx_tokucharswithuchars
|
||||
#define match_rex_with_xcs hawk_rtx_matchrexwithucs
|
||||
|
||||
#define split_xchars_to_fields hawk_rtx_flduchars
|
||||
#define tokenize_xchars hawk_rtx_tokucharswithuchars
|
||||
#define tokenize_xchars_by_rex hawk_rtx_tokucharsbyrex
|
||||
|
||||
#include "misc-imp.h"
|
||||
|
||||
hawk_ooch_t* hawk_rtx_tokoocharsbyrex (
|
||||
hawk_rtx_t* rtx,
|
||||
const hawk_ooch_t* str, hawk_oow_t len,
|
||||
const hawk_ooch_t* substr, hawk_oow_t sublen,
|
||||
hawk_tre_t* rex, hawk_oocs_t* tok)
|
||||
{
|
||||
int n;
|
||||
hawk_oow_t i;
|
||||
hawk_oocs_t match, s, cursub, realsub;
|
||||
|
||||
s.ptr = (hawk_ooch_t*)str;
|
||||
s.len = len;
|
||||
|
||||
cursub.ptr = (hawk_ooch_t*)substr;
|
||||
cursub.len = sublen;
|
||||
|
||||
realsub.ptr = (hawk_ooch_t*)substr;
|
||||
realsub.len = sublen;
|
||||
|
||||
while (cursub.len > 0)
|
||||
{
|
||||
n = hawk_rtx_matchrexwithoocs(rtx, rex, &s, &cursub, &match, HAWK_NULL);
|
||||
if (n <= -1) return HAWK_NULL;
|
||||
|
||||
if (n == 0)
|
||||
{
|
||||
/* no match has been found. return the entire string as a token */
|
||||
hawk_rtx_seterrnum (rtx, HAWK_NULL, HAWK_ENOERR); /* reset HAWK_EREXNOMAT to no error */
|
||||
tok->ptr = realsub.ptr;
|
||||
tok->len = realsub.len;
|
||||
return HAWK_NULL;
|
||||
}
|
||||
|
||||
HAWK_ASSERT (n == 1);
|
||||
|
||||
if (match.len == 0)
|
||||
{
|
||||
/* the match length is zero. */
|
||||
cursub.ptr++;
|
||||
cursub.len--;
|
||||
}
|
||||
else if (HAWK_RTX_IS_STRIPRECSPC_ON(rtx))
|
||||
{
|
||||
/* match at the beginning of the input string */
|
||||
if (match.ptr == substr)
|
||||
{
|
||||
for (i = 0; i < match.len; i++)
|
||||
{
|
||||
if (!hawk_is_ooch_space(match.ptr[i])) goto exit_loop;
|
||||
}
|
||||
|
||||
/* the match that is all spaces at the
|
||||
* beginning of the input string is skipped */
|
||||
cursub.ptr += match.len;
|
||||
cursub.len -= match.len;
|
||||
|
||||
/* adjust the substring by skipping the leading
|
||||
* spaces and retry matching */
|
||||
realsub.ptr = (hawk_ooch_t*)substr + match.len;
|
||||
realsub.len -= match.len;
|
||||
}
|
||||
else break;
|
||||
}
|
||||
else break;
|
||||
}
|
||||
|
||||
exit_loop:
|
||||
hawk_rtx_seterrnum (rtx, HAWK_NULL, HAWK_ENOERR);
|
||||
|
||||
if (cursub.len <= 0)
|
||||
{
|
||||
tok->ptr = realsub.ptr;
|
||||
tok->len = realsub.len;
|
||||
return HAWK_NULL;
|
||||
}
|
||||
|
||||
tok->ptr = realsub.ptr;
|
||||
tok->len = match.ptr - realsub.ptr;
|
||||
|
||||
for (i = 0; i < match.len; i++)
|
||||
{
|
||||
if (!hawk_is_ooch_space(match.ptr[i]))
|
||||
{
|
||||
/* the match contains a non-space character. */
|
||||
return (hawk_ooch_t*)match.ptr+match.len;
|
||||
}
|
||||
}
|
||||
|
||||
/* the match is all spaces */
|
||||
if (HAWK_RTX_IS_STRIPRECSPC_ON(rtx))
|
||||
{
|
||||
/* if the match reached the last character in the input string,
|
||||
* it returns HAWK_NULL to terminate tokenization. */
|
||||
return (match.ptr+match.len >= substr+sublen)? HAWK_NULL: ((hawk_ooch_t*)match.ptr+match.len);
|
||||
}
|
||||
else
|
||||
{
|
||||
/* if the match went beyond the the last character in the input
|
||||
* string, it returns HAWK_NULL to terminate tokenization. */
|
||||
return (match.ptr+match.len > substr+sublen)? HAWK_NULL: ((hawk_ooch_t*)match.ptr+match.len);
|
||||
}
|
||||
}
|
||||
|
||||
#if 0
|
||||
hawk_ooch_t* hawk_rtx_strxnfld (
|
||||
hawk_rtx_t* rtx, hawk_ooch_t* str, hawk_oow_t len,
|
||||
hawk_ooch_t fs, hawk_ooch_t ec, hawk_ooch_t lq, hawk_ooch_t rq,
|
||||
hawk_oocs_t* tok)
|
||||
{
|
||||
hawk_ooch_t* p = str;
|
||||
hawk_ooch_t* end = str + len;
|
||||
int escaped = 0, quoted = 0;
|
||||
hawk_ooch_t* ts; /* token start */
|
||||
hawk_ooch_t* tp; /* points to one char past the last token char */
|
||||
hawk_ooch_t* xp; /* points to one char past the last effective char */
|
||||
|
||||
/* skip leading spaces */
|
||||
while (p < end && hawk_is_ooch_space(*p)) p++;
|
||||
|
||||
/* initialize token pointers */
|
||||
ts = tp = xp = p;
|
||||
|
||||
while (p < end)
|
||||
{
|
||||
char c = *p;
|
||||
|
||||
if (escaped)
|
||||
{
|
||||
*tp++ = c; xp = tp; p++;
|
||||
escaped = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (c == ec)
|
||||
{
|
||||
escaped = 1;
|
||||
p++;
|
||||
}
|
||||
else if (quoted)
|
||||
{
|
||||
if (c == rq)
|
||||
{
|
||||
quoted = 0;
|
||||
p++;
|
||||
}
|
||||
else
|
||||
{
|
||||
*tp++ = c; xp = tp; p++;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if (c == fs)
|
||||
{
|
||||
tok->ptr = ts;
|
||||
tok->len = xp - ts;
|
||||
p++;
|
||||
|
||||
if (hawk_is_ooch_space(fs))
|
||||
{
|
||||
while (p < end && *p == fs) p++;
|
||||
if (p >= end) return HAWK_NULL;
|
||||
}
|
||||
|
||||
return p;
|
||||
}
|
||||
|
||||
if (c == lq)
|
||||
{
|
||||
quoted = 1;
|
||||
p++;
|
||||
}
|
||||
else
|
||||
{
|
||||
*tp++ = c; p++;
|
||||
if (!hawk_is_ooch_space(c)) xp = tp;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (escaped)
|
||||
{
|
||||
/* if it is still escaped, the last character must be
|
||||
* the escaper itself. treat it as a normal character */
|
||||
*xp++ = ec;
|
||||
}
|
||||
|
||||
tok->ptr = ts;
|
||||
tok->len = xp - ts;
|
||||
return HAWK_NULL;
|
||||
}
|
||||
#endif
|
||||
|
||||
static int matchtre_ucs (hawk_tre_t* tre, int opt, const hawk_ucs_t* str, hawk_ucs_t* mat, hawk_ucs_t submat[9], hawk_gem_t* errgem)
|
||||
{
|
||||
|
@ -2579,8 +2579,8 @@ static hawk_nde_t* parse_while (hawk_t* hawk, const hawk_loc_t* xloc)
|
||||
if (get_token(hawk) <= -1) goto oops;
|
||||
|
||||
ploc = hawk->tok.loc;
|
||||
test = parse_expr_withdc (hawk, &ploc);
|
||||
if (test == HAWK_NULL) goto oops;
|
||||
test = parse_expr_withdc(hawk, &ploc);
|
||||
if (HAWK_UNLIKELY(!test)) goto oops;
|
||||
|
||||
if (!MATCH(hawk,TOK_RPAREN))
|
||||
{
|
||||
@ -2591,11 +2591,11 @@ static hawk_nde_t* parse_while (hawk_t* hawk, const hawk_loc_t* xloc)
|
||||
if (get_token(hawk) <= -1) goto oops;
|
||||
|
||||
ploc = hawk->tok.loc;
|
||||
body = parse_statement (hawk, &ploc);
|
||||
if (body == HAWK_NULL) goto oops;
|
||||
body = parse_statement(hawk, &ploc);
|
||||
if (HAWK_UNLIKELY(!body)) goto oops;
|
||||
|
||||
nde = (hawk_nde_while_t*) hawk_callocmem (hawk, HAWK_SIZEOF(*nde));
|
||||
if (nde == HAWK_NULL)
|
||||
nde = (hawk_nde_while_t*)hawk_callocmem(hawk, HAWK_SIZEOF(*nde));
|
||||
if (HAWK_UNLIKELY(!nde))
|
||||
{
|
||||
ADJERR_LOC (hawk, xloc);
|
||||
goto oops;
|
||||
@ -2628,7 +2628,7 @@ static hawk_nde_t* parse_for (hawk_t* hawk, const hawk_loc_t* xloc)
|
||||
return HAWK_NULL;
|
||||
}
|
||||
if (get_token(hawk) <= -1) return HAWK_NULL;
|
||||
|
||||
|
||||
if (!MATCH(hawk,TOK_SEMICOLON))
|
||||
{
|
||||
/* this line is very ugly. it checks the entire next
|
||||
@ -2694,8 +2694,8 @@ static hawk_nde_t* parse_for (hawk_t* hawk, const hawk_loc_t* xloc)
|
||||
if (!MATCH(hawk,TOK_SEMICOLON))
|
||||
{
|
||||
ploc = hawk->tok.loc;
|
||||
test = parse_expr_withdc (hawk, &ploc);
|
||||
if (test == HAWK_NULL) goto oops;
|
||||
test = parse_expr_withdc(hawk, &ploc);
|
||||
if (HAWK_UNLIKELY(!test)) goto oops;
|
||||
|
||||
if (!MATCH(hawk,TOK_SEMICOLON))
|
||||
{
|
||||
@ -2717,8 +2717,8 @@ static hawk_nde_t* parse_for (hawk_t* hawk, const hawk_loc_t* xloc)
|
||||
hawk_loc_t eloc;
|
||||
|
||||
eloc = hawk->tok.loc;
|
||||
incr = parse_expr_withdc (hawk, &eloc);
|
||||
if (incr == HAWK_NULL) goto oops;
|
||||
incr = parse_expr_withdc(hawk, &eloc);
|
||||
if (HAWK_UNLIKELY(!incr)) goto oops;
|
||||
}
|
||||
|
||||
if (!MATCH(hawk,TOK_RPAREN))
|
||||
@ -2734,8 +2734,8 @@ static hawk_nde_t* parse_for (hawk_t* hawk, const hawk_loc_t* xloc)
|
||||
body = parse_statement (hawk, &ploc);
|
||||
if (body == HAWK_NULL) goto oops;
|
||||
|
||||
nde_for = (hawk_nde_for_t*) hawk_callocmem (hawk, HAWK_SIZEOF(*nde_for));
|
||||
if (nde_for == HAWK_NULL)
|
||||
nde_for = (hawk_nde_for_t*)hawk_callocmem(hawk, HAWK_SIZEOF(*nde_for));
|
||||
if (HAWK_UNLIKELY(!nde_for))
|
||||
{
|
||||
ADJERR_LOC (hawk, xloc);
|
||||
goto oops;
|
||||
@ -2768,8 +2768,8 @@ static hawk_nde_t* parse_dowhile (hawk_t* hawk, const hawk_loc_t* xloc)
|
||||
HAWK_ASSERT (hawk->ptok.type == TOK_DO);
|
||||
|
||||
ploc = hawk->tok.loc;
|
||||
body = parse_statement (hawk, &ploc);
|
||||
if (body == HAWK_NULL) goto oops;
|
||||
body = parse_statement(hawk, &ploc);
|
||||
if (HAWK_UNLIKELY(!body)) goto oops;
|
||||
|
||||
while (MATCH(hawk,TOK_NEWLINE))
|
||||
{
|
||||
@ -2794,7 +2794,7 @@ static hawk_nde_t* parse_dowhile (hawk_t* hawk, const hawk_loc_t* xloc)
|
||||
|
||||
ploc = hawk->tok.loc;
|
||||
test = parse_expr_withdc (hawk, &ploc);
|
||||
if (test == HAWK_NULL) goto oops;
|
||||
if (HAWK_UNLIKELY(!test)) goto oops;
|
||||
|
||||
if (!MATCH(hawk,TOK_RPAREN))
|
||||
{
|
||||
@ -2803,9 +2803,9 @@ static hawk_nde_t* parse_dowhile (hawk_t* hawk, const hawk_loc_t* xloc)
|
||||
}
|
||||
|
||||
if (get_token(hawk) <= -1) goto oops;
|
||||
|
||||
nde = (hawk_nde_while_t*) hawk_callocmem (hawk, HAWK_SIZEOF(*nde));
|
||||
if (nde == HAWK_NULL)
|
||||
|
||||
nde = (hawk_nde_while_t*)hawk_callocmem(hawk, HAWK_SIZEOF(*nde));
|
||||
if (HAWK_UNLIKELY(!nde))
|
||||
{
|
||||
ADJERR_LOC (hawk, xloc);
|
||||
goto oops;
|
||||
@ -2836,8 +2836,8 @@ static hawk_nde_t* parse_break (hawk_t* hawk, const hawk_loc_t* xloc)
|
||||
return HAWK_NULL;
|
||||
}
|
||||
|
||||
nde = (hawk_nde_break_t*) hawk_callocmem (hawk, HAWK_SIZEOF(*nde));
|
||||
if (nde == HAWK_NULL)
|
||||
nde = (hawk_nde_break_t*)hawk_callocmem(hawk, HAWK_SIZEOF(*nde));
|
||||
if (HAWK_UNLIKELY(!nde))
|
||||
{
|
||||
ADJERR_LOC (hawk, xloc);
|
||||
return HAWK_NULL;
|
||||
@ -2845,7 +2845,7 @@ static hawk_nde_t* parse_break (hawk_t* hawk, const hawk_loc_t* xloc)
|
||||
|
||||
nde->type = HAWK_NDE_BREAK;
|
||||
nde->loc = *xloc;
|
||||
|
||||
|
||||
return (hawk_nde_t*)nde;
|
||||
}
|
||||
|
||||
@ -2860,8 +2860,8 @@ static hawk_nde_t* parse_continue (hawk_t* hawk, const hawk_loc_t* xloc)
|
||||
return HAWK_NULL;
|
||||
}
|
||||
|
||||
nde = (hawk_nde_continue_t*) hawk_callocmem (hawk, HAWK_SIZEOF(*nde));
|
||||
if (nde == HAWK_NULL)
|
||||
nde = (hawk_nde_continue_t*)hawk_callocmem(hawk, HAWK_SIZEOF(*nde));
|
||||
if (HAWK_UNLIKELY(!nde))
|
||||
{
|
||||
ADJERR_LOC (hawk, xloc);
|
||||
return HAWK_NULL;
|
||||
@ -2880,8 +2880,8 @@ static hawk_nde_t* parse_return (hawk_t* hawk, const hawk_loc_t* xloc)
|
||||
|
||||
HAWK_ASSERT (hawk->ptok.type == TOK_RETURN);
|
||||
|
||||
nde = (hawk_nde_return_t*) hawk_callocmem ( hawk, HAWK_SIZEOF(*nde));
|
||||
if (nde == HAWK_NULL)
|
||||
nde = (hawk_nde_return_t*)hawk_callocmem(hawk, HAWK_SIZEOF(*nde));
|
||||
if (HAWK_UNLIKELY(!nde))
|
||||
{
|
||||
ADJERR_LOC (hawk, xloc);
|
||||
return HAWK_NULL;
|
||||
@ -2900,8 +2900,8 @@ static hawk_nde_t* parse_return (hawk_t* hawk, const hawk_loc_t* xloc)
|
||||
hawk_loc_t eloc;
|
||||
|
||||
eloc = hawk->tok.loc;
|
||||
val = parse_expr_withdc (hawk, &eloc);
|
||||
if (val == HAWK_NULL)
|
||||
val = parse_expr_withdc(hawk, &eloc);
|
||||
if (HAWK_UNLIKELY(!val))
|
||||
{
|
||||
hawk_freemem (hawk, nde);
|
||||
return HAWK_NULL;
|
||||
|
@ -379,12 +379,12 @@ static int set_global (hawk_rtx_t* rtx, int idx, hawk_nde_var_t* var, hawk_val_t
|
||||
HAWK_ASSERT (vtype != HAWK_VAL_REX);
|
||||
|
||||
out.type = HAWK_RTX_VALTOSTR_CPLDUP;
|
||||
if (hawk_rtx_valtostr (rtx, val, &out) <= -1) return -1;
|
||||
if (hawk_rtx_valtostr(rtx, val, &out) <= -1) return -1;
|
||||
fs_ptr = out.u.cpldup.ptr;
|
||||
fs_len = out.u.cpldup.len;
|
||||
}
|
||||
|
||||
if (fs_len > 1 && !(fs_len == 5 && fs_ptr[0] == HAWK_T('?')))
|
||||
if (fs_len > 1 && !(fs_len == 5 && fs_ptr[0] == '?'))
|
||||
{
|
||||
/* it's a regular expression if FS contains multiple characters.
|
||||
* however, it's not a regular expression if it's 5 character
|
||||
|
@ -1869,14 +1869,14 @@ tre_ast_to_tnfa(hawk_gem_t* gem, tre_ast_node_t *node, tre_tnfa_transition_t *tr
|
||||
}
|
||||
|
||||
|
||||
#define ERROR_EXIT(err) \
|
||||
do \
|
||||
{ \
|
||||
errcode = err; \
|
||||
if (/*CONSTCOND*/1) \
|
||||
goto error_exit; \
|
||||
} \
|
||||
while (/*CONSTCOND*/0)
|
||||
#define ERROR_EXIT(err) \
|
||||
do \
|
||||
{ \
|
||||
errcode = err; \
|
||||
if (/*CONSTCOND*/1) \
|
||||
goto error_exit; \
|
||||
} \
|
||||
while (/*CONSTCOND*/0)
|
||||
|
||||
|
||||
int tre_compile (regex_t *preg, const tre_char_t *regex, size_t n, int cflags)
|
||||
@ -1901,11 +1901,10 @@ int tre_compile (regex_t *preg, const tre_char_t *regex, size_t n, int cflags)
|
||||
/* HAWK: deleted limit on the stack size
|
||||
stack = tre_stack_new(preg->gem, 512, 10240, 128); */
|
||||
stack = tre_stack_new(preg->gem, 512, -1, 128);
|
||||
if (!stack)
|
||||
return REG_ESPACE;
|
||||
if (HAWK_UNLIKELY(!stack)) return REG_ESPACE;
|
||||
/* Allocate a fast memory allocator. */
|
||||
mem = tre_mem_new(preg->gem);
|
||||
if (!mem)
|
||||
if (HAWK_UNLIKELY(!mem))
|
||||
{
|
||||
tre_stack_destroy(stack);
|
||||
return REG_ESPACE;
|
||||
@ -1921,8 +1920,7 @@ int tre_compile (regex_t *preg, const tre_char_t *regex, size_t n, int cflags)
|
||||
parse_ctx.max_backref = -1;
|
||||
DPRINT(("tre_compile: parsing '%.*" STRF "'\n", (int)n, regex));
|
||||
errcode = tre_parse(&parse_ctx);
|
||||
if (errcode != REG_OK)
|
||||
ERROR_EXIT(errcode);
|
||||
if (errcode != REG_OK) ERROR_EXIT(errcode);
|
||||
preg->re_nsub = parse_ctx.submatch_id - 1;
|
||||
tree = parse_ctx.result;
|
||||
|
||||
@ -1941,8 +1939,8 @@ int tre_compile (regex_t *preg, const tre_char_t *regex, size_t n, int cflags)
|
||||
|
||||
/* Allocate the TNFA struct. */
|
||||
tnfa = xcalloc(preg->gem, 1, sizeof(tre_tnfa_t));
|
||||
if (tnfa == NULL)
|
||||
ERROR_EXIT(REG_ESPACE);
|
||||
if (HAWK_UNLIKELY(!tnfa)) ERROR_EXIT(REG_ESPACE);
|
||||
|
||||
tnfa->have_backrefs = parse_ctx.max_backref >= 0;
|
||||
tnfa->have_approx = parse_ctx.have_approx;
|
||||
tnfa->num_submatches = parse_ctx.submatch_id;
|
||||
@ -1966,26 +1964,21 @@ int tre_compile (regex_t *preg, const tre_char_t *regex, size_t n, int cflags)
|
||||
{
|
||||
tag_directions = xmalloc(preg->gem,sizeof(*tag_directions)
|
||||
* (tnfa->num_tags + 1));
|
||||
if (tag_directions == NULL)
|
||||
ERROR_EXIT(REG_ESPACE);
|
||||
if (tag_directions == NULL) ERROR_EXIT(REG_ESPACE);
|
||||
tnfa->tag_directions = tag_directions;
|
||||
HAWK_MEMSET(tag_directions, -1,
|
||||
sizeof(*tag_directions) * (tnfa->num_tags + 1));
|
||||
HAWK_MEMSET(tag_directions, -1, sizeof(*tag_directions) * (tnfa->num_tags + 1));
|
||||
}
|
||||
tnfa->minimal_tags = xcalloc(preg->gem, (unsigned)tnfa->num_tags * 2 + 1,
|
||||
sizeof(tnfa->minimal_tags));
|
||||
if (tnfa->minimal_tags == NULL)
|
||||
ERROR_EXIT(REG_ESPACE);
|
||||
|
||||
submatch_data = xcalloc(preg->gem,(unsigned)parse_ctx.submatch_id,
|
||||
sizeof(*submatch_data));
|
||||
if (submatch_data == NULL)
|
||||
ERROR_EXIT(REG_ESPACE);
|
||||
submatch_data = xcalloc(preg->gem,(unsigned)parse_ctx.submatch_id, sizeof(*submatch_data));
|
||||
if (HAWK_UNLIKELY(!submatch_data)) ERROR_EXIT(REG_ESPACE);
|
||||
tnfa->submatch_data = submatch_data;
|
||||
|
||||
errcode = tre_add_tags(mem, stack, tree, tnfa, 0);
|
||||
if (errcode != REG_OK)
|
||||
ERROR_EXIT(errcode);
|
||||
if (errcode != REG_OK) ERROR_EXIT(errcode);
|
||||
|
||||
#ifdef TRE_DEBUG
|
||||
for (i = 0; i < parse_ctx.submatch_id; i++)
|
||||
@ -1999,10 +1992,8 @@ int tre_compile (regex_t *preg, const tre_char_t *regex, size_t n, int cflags)
|
||||
}
|
||||
|
||||
/* Expand iteration nodes. */
|
||||
errcode = tre_expand_ast(mem, stack, tree, &parse_ctx.position,
|
||||
tag_directions, &tnfa->params_depth);
|
||||
if (errcode != REG_OK)
|
||||
ERROR_EXIT(errcode);
|
||||
errcode = tre_expand_ast(mem, stack, tree, &parse_ctx.position, tag_directions, &tnfa->params_depth);
|
||||
if (errcode != REG_OK) ERROR_EXIT(errcode);
|
||||
|
||||
/* Add a dummy node for the final state.
|
||||
XXX - For certain patterns this dummy node can be optimized away,
|
||||
@ -2010,12 +2001,10 @@ int tre_compile (regex_t *preg, const tre_char_t *regex, size_t n, int cflags)
|
||||
this possibility. */
|
||||
tmp_ast_l = tree;
|
||||
tmp_ast_r = tre_ast_new_literal(mem, 0, 0, parse_ctx.position++);
|
||||
if (tmp_ast_r == NULL)
|
||||
ERROR_EXIT(REG_ESPACE);
|
||||
if (HAWK_UNLIKELY(!tmp_ast_r)) ERROR_EXIT(REG_ESPACE);
|
||||
|
||||
tree = tre_ast_new_catenation(mem, tmp_ast_l, tmp_ast_r);
|
||||
if (tree == NULL)
|
||||
ERROR_EXIT(REG_ESPACE);
|
||||
if (HAWK_UNLIKELY(!tree)) ERROR_EXIT(REG_ESPACE);
|
||||
|
||||
#ifdef TRE_DEBUG
|
||||
tre_ast_print(tree);
|
||||
@ -2023,16 +2012,13 @@ int tre_compile (regex_t *preg, const tre_char_t *regex, size_t n, int cflags)
|
||||
#endif /* TRE_DEBUG */
|
||||
|
||||
errcode = tre_compute_nfl(mem, stack, tree);
|
||||
if (errcode != REG_OK)
|
||||
ERROR_EXIT(errcode);
|
||||
if (errcode != REG_OK) ERROR_EXIT(errcode);
|
||||
|
||||
counts = xmalloc(preg->gem,sizeof(int) * parse_ctx.position);
|
||||
if (counts == NULL)
|
||||
ERROR_EXIT(REG_ESPACE);
|
||||
if (HAWK_UNLIKELY(!counts)) ERROR_EXIT(REG_ESPACE);
|
||||
|
||||
offs = xmalloc(preg->gem,sizeof(int) * parse_ctx.position);
|
||||
if (offs == NULL)
|
||||
ERROR_EXIT(REG_ESPACE);
|
||||
if (HAWK_UNLIKELY(!offs)) ERROR_EXIT(REG_ESPACE);
|
||||
|
||||
for (i = 0; i < parse_ctx.position; i++)
|
||||
counts[i] = 0;
|
||||
@ -2046,15 +2032,13 @@ int tre_compile (regex_t *preg, const tre_char_t *regex, size_t n, int cflags)
|
||||
counts[i] = 0;
|
||||
}
|
||||
transitions = xcalloc(preg->gem, (unsigned)add + 1, sizeof(*transitions));
|
||||
if (transitions == NULL)
|
||||
ERROR_EXIT(REG_ESPACE);
|
||||
if (HAWK_UNLIKELY(!transitions)) ERROR_EXIT(REG_ESPACE);
|
||||
tnfa->transitions = transitions;
|
||||
tnfa->num_transitions = add;
|
||||
|
||||
DPRINT(("Converting to TNFA:\n"));
|
||||
errcode = tre_ast_to_tnfa(preg->gem, tree, transitions, counts, offs);
|
||||
if (errcode != REG_OK)
|
||||
ERROR_EXIT(errcode);
|
||||
if (errcode != REG_OK) ERROR_EXIT(errcode);
|
||||
|
||||
/* If in eight bit mode, compute a table of characters that can be the
|
||||
first character of a match. */
|
||||
@ -2145,8 +2129,7 @@ int tre_compile (regex_t *preg, const tre_char_t *regex, size_t n, int cflags)
|
||||
}
|
||||
|
||||
initial = xcalloc(preg->gem, (unsigned)i + 1, sizeof(tre_tnfa_transition_t));
|
||||
if (initial == NULL)
|
||||
ERROR_EXIT(REG_ESPACE);
|
||||
if (HAWK_UNLIKELY(!initial)) ERROR_EXIT(REG_ESPACE);
|
||||
tnfa->initial = initial;
|
||||
|
||||
i = 0;
|
||||
@ -2162,18 +2145,15 @@ int tre_compile (regex_t *preg, const tre_char_t *regex, size_t n, int cflags)
|
||||
int j;
|
||||
for (j = 0; p->tags[j] >= 0; j++);
|
||||
initial[i].tags = xmalloc(preg->gem,sizeof(*p->tags) * (j + 1));
|
||||
if (!initial[i].tags)
|
||||
ERROR_EXIT(REG_ESPACE);
|
||||
if (HAWK_UNLIKELY(!initial[i].tags)) ERROR_EXIT(REG_ESPACE);
|
||||
HAWK_MEMCPY (initial[i].tags, p->tags, sizeof(*p->tags) * (j + 1));
|
||||
}
|
||||
initial[i].params = NULL;
|
||||
if (p->params)
|
||||
{
|
||||
initial[i].params = xmalloc(preg->gem,sizeof(*p->params) * TRE_PARAM_LAST);
|
||||
if (!initial[i].params)
|
||||
ERROR_EXIT(REG_ESPACE);
|
||||
HAWK_MEMCPY (initial[i].params, p->params,
|
||||
sizeof(*p->params) * TRE_PARAM_LAST);
|
||||
if (HAWK_UNLIKELY(!initial[i].params)) ERROR_EXIT(REG_ESPACE);
|
||||
HAWK_MEMCPY (initial[i].params, p->params, sizeof(*p->params) * TRE_PARAM_LAST);
|
||||
}
|
||||
initial[i].assertions = p->assertions;
|
||||
i++;
|
||||
@ -2198,12 +2178,9 @@ int tre_compile (regex_t *preg, const tre_char_t *regex, size_t n, int cflags)
|
||||
error_exit:
|
||||
/* Free everything that was allocated and return the error code. */
|
||||
tre_mem_destroy(mem);
|
||||
if (stack != NULL)
|
||||
tre_stack_destroy(stack);
|
||||
if (counts != NULL)
|
||||
xfree(preg->gem,counts);
|
||||
if (offs != NULL)
|
||||
xfree(preg->gem,offs);
|
||||
if (stack) tre_stack_destroy(stack);
|
||||
if (counts) xfree(preg->gem,counts);
|
||||
if (offs) xfree(preg->gem,offs);
|
||||
preg->TRE_REGEX_T_FIELD = (void *)tnfa;
|
||||
tre_free(preg);
|
||||
return errcode;
|
||||
|
@ -64,83 +64,83 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
/* Wide character and multibyte support. */
|
||||
|
||||
#define GET_NEXT_WCHAR() \
|
||||
do { \
|
||||
prev_c = next_c; \
|
||||
if (type == STR_BYTE) \
|
||||
{ \
|
||||
pos++; \
|
||||
if (len >= 0 && pos >= len) \
|
||||
next_c = '\0'; \
|
||||
else \
|
||||
next_c = (unsigned char)(*str_byte++); \
|
||||
} \
|
||||
else if (type == STR_WIDE) \
|
||||
{ \
|
||||
pos++; \
|
||||
if (len >= 0 && pos >= len) \
|
||||
next_c = HAWK_T('\0'); \
|
||||
else \
|
||||
next_c = *str_wide++; \
|
||||
} \
|
||||
else if (type == STR_MBS) \
|
||||
{ \
|
||||
pos += pos_add_next; \
|
||||
if (str_byte == NULL) \
|
||||
next_c = HAWK_T('\0'); \
|
||||
else \
|
||||
{ \
|
||||
size_t w; \
|
||||
int max; \
|
||||
if (len >= 0) \
|
||||
max = len - pos; \
|
||||
else \
|
||||
max = 32; \
|
||||
if (max <= 0) \
|
||||
{ \
|
||||
next_c = HAWK_T('\0'); \
|
||||
pos_add_next = 1; \
|
||||
} \
|
||||
else \
|
||||
{ \
|
||||
w = hawk_mbrtowc(str_byte, (size_t)max, &next_c, &mbstate); \
|
||||
if (w <= 0 || w > max) \
|
||||
return REG_NOMATCH; \
|
||||
if (next_c == HAWK_T('\0') && len >= 0) \
|
||||
{ \
|
||||
pos_add_next = 1; \
|
||||
next_c = 0; \
|
||||
str_byte++; \
|
||||
} \
|
||||
else \
|
||||
{ \
|
||||
pos_add_next = w; \
|
||||
str_byte += w; \
|
||||
} \
|
||||
} \
|
||||
} \
|
||||
} \
|
||||
} while(/*CONSTCOND*/0)
|
||||
#define GET_NEXT_WCHAR() \
|
||||
do { \
|
||||
prev_c = next_c; \
|
||||
if (type == STR_BYTE) \
|
||||
{ \
|
||||
pos++; \
|
||||
if (len >= 0 && pos >= len) \
|
||||
next_c = '\0'; \
|
||||
else \
|
||||
next_c = (unsigned char)(*str_byte++); \
|
||||
} \
|
||||
else if (type == STR_WIDE) \
|
||||
{ \
|
||||
pos++; \
|
||||
if (len >= 0 && pos >= len) \
|
||||
next_c = '\0'; \
|
||||
else \
|
||||
next_c = *str_wide++; \
|
||||
} \
|
||||
else if (type == STR_MBS) \
|
||||
{ \
|
||||
pos += pos_add_next; \
|
||||
if (str_byte == NULL) \
|
||||
next_c = '\0'; \
|
||||
else \
|
||||
{ \
|
||||
size_t w; \
|
||||
int max; \
|
||||
if (len >= 0) \
|
||||
max = len - pos; \
|
||||
else \
|
||||
max = 32; \
|
||||
if (max <= 0) \
|
||||
{ \
|
||||
next_c = '\0'; \
|
||||
pos_add_next = 1; \
|
||||
} \
|
||||
else \
|
||||
{ \
|
||||
w = hawk_mbrtowc(str_byte, (size_t)max, &next_c, &mbstate); \
|
||||
if (w <= 0 || w > max) \
|
||||
return REG_NOMATCH; \
|
||||
if (next_c == '\0' && len >= 0) \
|
||||
{ \
|
||||
pos_add_next = 1; \
|
||||
next_c = 0; \
|
||||
str_byte++; \
|
||||
} \
|
||||
else \
|
||||
{ \
|
||||
pos_add_next = w; \
|
||||
str_byte += w; \
|
||||
} \
|
||||
} \
|
||||
} \
|
||||
} \
|
||||
} while(/*CONSTCOND*/0)
|
||||
|
||||
#else /* !TRE_MULTIBYTE */
|
||||
|
||||
/* Wide character support, no multibyte support. */
|
||||
|
||||
#define GET_NEXT_WCHAR() \
|
||||
do { \
|
||||
prev_c = next_c; \
|
||||
if (type == STR_BYTE) \
|
||||
{ \
|
||||
pos++; \
|
||||
if (len >= 0 && pos >= len) next_c = HAWK_BT('\0'); \
|
||||
else next_c = (unsigned char)(*str_byte++); \
|
||||
} \
|
||||
else if (type == STR_WIDE) \
|
||||
{ \
|
||||
pos++; \
|
||||
if (len >= 0 && pos >= len) next_c = HAWK_T('\0'); \
|
||||
else next_c = *str_wide++; \
|
||||
} \
|
||||
#define GET_NEXT_WCHAR() \
|
||||
do { \
|
||||
prev_c = next_c; \
|
||||
if (type == STR_BYTE) \
|
||||
{ \
|
||||
pos++; \
|
||||
if (len >= 0 && pos >= len) next_c = '\0'; \
|
||||
else next_c = (unsigned char)(*str_byte++); \
|
||||
} \
|
||||
else if (type == STR_WIDE) \
|
||||
{ \
|
||||
pos++; \
|
||||
if (len >= 0 && pos >= len) next_c = '\0'; \
|
||||
else next_c = *str_wide++; \
|
||||
} \
|
||||
} while(/*CONSTCOND*/0)
|
||||
|
||||
#endif /* !TRE_MULTIBYTE */
|
||||
@ -166,22 +166,22 @@ do { \
|
||||
|
||||
#define IS_WORD_CHAR(c) ((c) == HAWK_T('_') || tre_isalnum(c))
|
||||
|
||||
#define CHECK_ASSERTIONS(assertions) \
|
||||
(((assertions & ASSERT_AT_BOL) \
|
||||
&& (pos > 0 || reg_notbol) \
|
||||
&& (prev_c != HAWK_T('\n') || !reg_newline)) \
|
||||
|| ((assertions & ASSERT_AT_EOL) \
|
||||
&& (next_c != HAWK_T('\0') || reg_noteol) \
|
||||
&& (next_c != HAWK_T('\n') || !reg_newline)) \
|
||||
|| ((assertions & ASSERT_AT_BOW) \
|
||||
&& (IS_WORD_CHAR(prev_c) || !IS_WORD_CHAR(next_c))) \
|
||||
|| ((assertions & ASSERT_AT_EOW) \
|
||||
&& (!IS_WORD_CHAR(prev_c) || IS_WORD_CHAR(next_c))) \
|
||||
|| ((assertions & ASSERT_AT_WB) \
|
||||
&& (pos != 0 && next_c != HAWK_T('\0') \
|
||||
&& IS_WORD_CHAR(prev_c) == IS_WORD_CHAR(next_c))) \
|
||||
|| ((assertions & ASSERT_AT_WB_NEG) \
|
||||
&& (pos == 0 || next_c == HAWK_T('\0') \
|
||||
#define CHECK_ASSERTIONS(assertions) \
|
||||
(((assertions & ASSERT_AT_BOL) \
|
||||
&& (pos > 0 || reg_notbol) \
|
||||
&& (prev_c != HAWK_T('\n') || !reg_newline)) \
|
||||
|| ((assertions & ASSERT_AT_EOL) \
|
||||
&& (next_c != HAWK_T('\0') || reg_noteol) \
|
||||
&& (next_c != HAWK_T('\n') || !reg_newline)) \
|
||||
|| ((assertions & ASSERT_AT_BOW) \
|
||||
&& (IS_WORD_CHAR(prev_c) || !IS_WORD_CHAR(next_c))) \
|
||||
|| ((assertions & ASSERT_AT_EOW) \
|
||||
&& (!IS_WORD_CHAR(prev_c) || IS_WORD_CHAR(next_c))) \
|
||||
|| ((assertions & ASSERT_AT_WB) \
|
||||
&& (pos != 0 && next_c != HAWK_T('\0') \
|
||||
&& IS_WORD_CHAR(prev_c) == IS_WORD_CHAR(next_c))) \
|
||||
|| ((assertions & ASSERT_AT_WB_NEG) \
|
||||
&& (pos == 0 || next_c == HAWK_T('\0') \
|
||||
|| IS_WORD_CHAR(prev_c) != IS_WORD_CHAR(next_c))))
|
||||
|
||||
#define CHECK_CHAR_CLASSES(trans_i, tnfa, eflags) \
|
||||
@ -191,7 +191,7 @@ do { \
|
||||
|| ((trans_i->assertions & ASSERT_CHAR_CLASS) \
|
||||
&& (tnfa->cflags & REG_ICASE) \
|
||||
&& !tre_isctype(tre_tolower((tre_cint_t)prev_c),trans_i->u.class) \
|
||||
&& !tre_isctype(tre_toupper((tre_cint_t)prev_c),trans_i->u.class)) \
|
||||
&& !tre_isctype(tre_toupper((tre_cint_t)prev_c),trans_i->u.class)) \
|
||||
|| ((trans_i->assertions & ASSERT_CHAR_CLASS_NEG) \
|
||||
&& tre_neg_char_classes_match(trans_i->neg_classes,(tre_cint_t)prev_c,\
|
||||
tnfa->cflags & REG_ICASE)))
|
||||
@ -201,8 +201,7 @@ do { \
|
||||
|
||||
/* Returns 1 if `t1' wins `t2', 0 otherwise. */
|
||||
HAWK_INLINE static int
|
||||
tre_tag_order(int num_tags, tre_tag_direction_t *tag_directions,
|
||||
int *t1, int *t2)
|
||||
tre_tag_order(int num_tags, tre_tag_direction_t *tag_directions, int *t1, int *t2)
|
||||
{
|
||||
int i;
|
||||
for (i = 0; i < num_tags; i++)
|
||||
|
@ -169,11 +169,7 @@ SUBMATCH[4] = [defg]
|
||||
#define tre_tolower(c) hawk_to_ooch_lower(c)
|
||||
#define tre_toupper(c) hawk_to_ooch_upper(c)
|
||||
|
||||
#if defined(HAWK_OOCH_IS_BCH) && (HAWK_SIZEOF_MCHAR_T == HAWK_SIZEOF_CHAR)
|
||||
typedef unsigned char tre_char_t;
|
||||
#else
|
||||
typedef hawk_ooch_t tre_char_t;
|
||||
#endif
|
||||
typedef hawk_ooch_t tre_char_t;
|
||||
typedef hawk_ooci_t tre_cint_t;
|
||||
|
||||
#define size_t hawk_oow_t
|
||||
|
@ -291,6 +291,33 @@ function main()
|
||||
ensure (a[2] === @b"Is", 1, @SCRIPTNAME, @SCRIPTLINE);
|
||||
ensure (a[3] === @b"Some", 1, @SCRIPTNAME, @SCRIPTLINE);
|
||||
ensure (a[4] === @b"Data", 1, @SCRIPTNAME, @SCRIPTLINE);
|
||||
|
||||
ensure (split(@b"Here===Is=Some=====Data", a, /=+/), 4, @SCRIPTNAME, @SCRIPTLINE);
|
||||
ensure (a[1] === @b"Here", 1, @SCRIPTNAME, @SCRIPTLINE);
|
||||
ensure (a[2] === @b"Is", 1, @SCRIPTNAME, @SCRIPTLINE);
|
||||
ensure (a[3] === @b"Some", 1, @SCRIPTNAME, @SCRIPTLINE);
|
||||
ensure (a[4] === @b"Data", 1, @SCRIPTNAME, @SCRIPTLINE);
|
||||
|
||||
ensure (split("[Here] : [Is] : [So\\]me] :[Da:ta]", a, "?:\\[]"), 4, @SCRIPTNAME, @SCRIPTLINE);
|
||||
ensure (a[1] === "Here", 1, @SCRIPTNAME, @SCRIPTLINE);
|
||||
ensure (a[2] === "Is", 1, @SCRIPTNAME, @SCRIPTLINE);
|
||||
ensure (a[3] === "So]me", 1, @SCRIPTNAME, @SCRIPTLINE);
|
||||
ensure (a[4] === "Da:ta", 1, @SCRIPTNAME, @SCRIPTLINE);
|
||||
|
||||
ensure (split(@b"[Here] : [Is] : [So\\]me] :[Da:ta]", a, "?:\\[]"), 4, @SCRIPTNAME, @SCRIPTLINE);
|
||||
ensure (a[1] === @b"Here", 1, @SCRIPTNAME, @SCRIPTLINE);
|
||||
ensure (a[2] === @b"Is", 1, @SCRIPTNAME, @SCRIPTLINE);
|
||||
ensure (a[3] === @b"So]me", 1, @SCRIPTNAME, @SCRIPTLINE);
|
||||
ensure (a[4] === @b"Da:ta", 1, @SCRIPTNAME, @SCRIPTLINE);
|
||||
|
||||
ensure (split("Here===Is=Some=====Data", a, ""), 23, @SCRIPTNAME, @SCRIPTLINE);
|
||||
|
||||
ensure (split("Here Is Some Data", a, / /), 7, @SCRIPTNAME, @SCRIPTLINE);
|
||||
ensure (split("Here Is Some Data", a, " "), 4, @SCRIPTNAME, @SCRIPTLINE);
|
||||
ensure (a[1] === "Here", 1, @SCRIPTNAME, @SCRIPTLINE);
|
||||
ensure (a[2] === "Is", 1, @SCRIPTNAME, @SCRIPTLINE);
|
||||
ensure (a[3] === "Some", 1, @SCRIPTNAME, @SCRIPTLINE);
|
||||
ensure (a[4] === "Data", 1, @SCRIPTNAME, @SCRIPTLINE);
|
||||
}
|
||||
|
||||
print "SUCCESS";
|
||||
|
Loading…
Reference in New Issue
Block a user