From 4a60654b49adc0cd481da68d9c8243b3e7315a61 Mon Sep 17 00:00:00 2001 From: hyung-hwan Date: Fri, 13 Nov 2020 02:50:20 +0000 Subject: [PATCH] added code to preapre byte string support in split(). still long way to go --- hawk/lib/fnc.c | 240 ++++++++++++++++++++++++++++++++--- hawk/lib/misc-imp.h | 301 ++++++++++++++++++++++++++++++++++++++++++++ hawk/lib/misc-prv.h | 59 +++++---- hawk/lib/misc.c | 237 ++++------------------------------ hawk/lib/rec.c | 10 +- hawk/t/h-002.hawk | 54 +++++++- 6 files changed, 640 insertions(+), 261 deletions(-) create mode 100644 hawk/lib/misc-imp.h diff --git a/hawk/lib/fnc.c b/hawk/lib/fnc.c index af8da7f1..938860aa 100644 --- a/hawk/lib/fnc.c +++ b/hawk/lib/fnc.c @@ -787,7 +787,200 @@ int hawk_fnc_substr (hawk_rtx_t* rtx, const hawk_fnc_info_t* fi) return 0; } -int hawk_fnc_split (hawk_rtx_t* rtx, const hawk_fnc_info_t* fi) +#if 0 +static int split_mbs (hawk_rtx_t* rtx, const hawk_fnc_info_t* fi) +{ + hawk_oow_t nargs; + hawk_val_t* a0, * a2, * t1, * t2; + hawk_val_type_t a2_vtype, t1_vtype; + + hawk_bcs_t str; + hawk_bcs_t fs; + hawk_bch_t* fs_free = HAWK_NULL; + const hawk_bch_t* p; + hawk_oow_t str_left, org_len; + hawk_tre_t* fs_rex = HAWK_NULL; + hawk_tre_t* fs_rex_free = HAWK_NULL; + + hawk_bcs_t tok; + hawk_int_t nflds; + int x; + + str.ptr = HAWK_NULL; + str.len = 0; + + nargs = hawk_rtx_getnargs(rtx); + HAWK_ASSERT (nargs >= 2 && nargs <= 3); + + a0 = hawk_rtx_getarg(rtx, 0); + a2 = (nargs >= 3)? hawk_rtx_getarg(rtx, 2): HAWK_NULL; + + str.ptr = hawk_rtx_getvalbcstr(rtx, a0, &str.len); + if (HAWK_UNLIKELY(!str.ptr)) goto oops; + + if (!a2) + { + /* get the value from FS */ + t1 = hawk_rtx_getgbl(rtx, HAWK_GBL_FS); + t1_vtype = HAWK_RTX_GETVALTYPE(rtx, t1); + if (t1_vtype == HAWK_VAL_NIL) + { + fs.ptr = " "; + fs.len = 1; + } + else if (t1_vtype == HAWK_VAL_MBS) + { + fs.ptr = ((hawk_val_mbs_t*)t1)->val.ptr; + fs.len = ((hawk_val_mbs_t*)t1)->val.len; + } + else + { + fs.ptr = hawk_rtx_valtobcstrdup(rtx, t1, &fs.len); + if (HAWK_UNLIKELY(!fs.ptr)) goto oops; + fs_free = (hawk_bch_t*)fs.ptr; + } + + if (fs.len > 1) fs_rex = rtx->gbl.fs[rtx->gbl.ignorecase]; + } + else + { + a2_vtype = HAWK_RTX_GETVALTYPE(rtx, a2); + + if (a2_vtype == HAWK_VAL_REX) + { + /* the third parameter is a regular expression */ + fs_rex = ((hawk_val_rex_t*)a2)->code[rtx->gbl.ignorecase]; + + /* make the loop below to take fs_rex by + * setting fs_len greater than 1*/ + fs.ptr = HAWK_NULL; + fs.len = 2; + } + else + { + if (a2_vtype == HAWK_VAL_MBS) + { + fs.ptr = ((hawk_val_mbs_t*)a2)->val.ptr; + fs.len = ((hawk_val_mbs_t*)a2)->val.len; + } + else + { + fs.ptr = hawk_rtx_valtobcstrdup(rtx, a2, &fs.len); + if (fs.ptr == HAWK_NULL) goto oops; + fs_free = (hawk_bch_t*)fs.ptr; + } + + if (fs.len > 1) + { + int x; + + x = rtx->gbl.ignorecase? + hawk_rtx_buildrex(rtx, fs.ptr, fs.len, HAWK_NULL, &fs_rex): + hawk_rtx_buildrex(rtx, fs.ptr, fs.len, &fs_rex, HAWK_NULL); + if (x <= -1) goto oops; + + fs_rex_free = fs_rex; + } + } + } + + t1 = hawk_rtx_makearrval(rtx); + if (HAWK_UNLIKELY(!t1)) goto oops; + + hawk_rtx_refupval (rtx, t1); + x = hawk_rtx_setrefval(rtx, (hawk_val_ref_t*)hawk_rtx_getarg(rtx, 1), t1); + hawk_rtx_refdownval (rtx, t1); + if (HAWK_UNLIKELY(x <= -1)) goto oops; + + /* fill the map with actual values */ + p = str.ptr; str_left = str.len; org_len = str.len; + nflds = 0; + + while (p) + { + hawk_bch_t key_buf[HAWK_SIZEOF(hawk_int_t)*8+2]; + hawk_oow_t key_len; + + if (fs.len <= 1) + { + p = hawk_rtx_tokoocharswithoochars(rtx, p, str.len, fs.ptr, fs.len, &tok); + } + else + { + p = hawk_rtx_tokoocharsbyrex(rtx, str.ptr, org_len, p, str.len, fs_rex, &tok); + if (p == HAWK_NULL && hawk_rtx_geterrnum(rtx) != HAWK_ENOERR) + { + goto oops; + } + } + + if (nflds == 0 && p == HAWK_NULL && tok.len == 0) + { + /* no field at all*/ + break; + } + + HAWK_ASSERT ((tok.ptr != HAWK_NULL && tok.len > 0) || tok.len == 0); + + /* create the field string - however, the split function must + * create a numeric value if the string is a number */ + /*t2 = hawk_rtx_makembsvalwithbcs (rtx, &tok);*/ + /*t2 = hawk_rtx_makenmbsvalwithbcs(rtx, &tok); */ + t2 = hawk_rtx_makenumormbsvalwithbchars(rtx, tok.ptr, tok.len); + if (HAWK_UNLIKELY(!t2)) goto oops; + + /* put it into the map */ + key_len = hawk_int_to_oocstr(++nflds, 10, HAWK_NULL, key_buf, HAWK_COUNTOF(key_buf)); + HAWK_ASSERT (key_len != (hawk_oow_t)-1); + + if (hawk_rtx_setarrvalfld(rtx, t1, key_buf, key_len, t2) == HAWK_NULL) + { + hawk_rtx_refupval (rtx, t2); + hawk_rtx_refdownval (rtx, t2); + goto oops; + } + + str.len = str_left - (p - str.ptr); + } + + /*if (str_free) hawk_rtx_freemem (rtx, str_free);*/ + hawk_rtx_freevalbcstr (rtx, a0, str.ptr); + + if (fs_free) hawk_rtx_freemem (rtx, fs_free); + + if (fs_rex_free) + { + if (rtx->gbl.ignorecase) + hawk_rtx_freerex (rtx, HAWK_NULL, fs_rex_free); + else + hawk_rtx_freerex (rtx, fs_rex_free, HAWK_NULL); + } + + /*nflds--;*/ + + t1 = hawk_rtx_makeintval(rtx, nflds); + if (HAWK_UNLIKELY(!t1)) return -1; + + hawk_rtx_setretval (rtx, t1); + return 0; + +oops: + if (str.ptr) hawk_rtx_freevalbcstr (rtx, a0, str.ptr); + + if (fs_free) hawk_rtx_freemem (rtx, fs_free); + + if (fs_rex_free) + { + if (rtx->gbl.ignorecase) + hawk_rtx_freerex (rtx, HAWK_NULL, fs_rex_free); + else + hawk_rtx_freerex (rtx, fs_rex_free, HAWK_NULL); + } + return -1; +} +#endif + +static int fnc_split (hawk_rtx_t* rtx, const hawk_fnc_info_t* fi, int use_array) { hawk_oow_t nargs; hawk_val_t* a0, * a2, * t1, * t2; @@ -817,7 +1010,7 @@ int hawk_fnc_split (hawk_rtx_t* rtx, const hawk_fnc_info_t* fi) str.ptr = hawk_rtx_getvaloocstr(rtx, a0, &str.len); if (HAWK_UNLIKELY(!str.ptr)) goto oops; - if (a2 == HAWK_NULL) + if (!a2) { /* get the value from FS */ t1 = hawk_rtx_getgbl(rtx, HAWK_GBL_FS); @@ -835,7 +1028,7 @@ int hawk_fnc_split (hawk_rtx_t* rtx, const hawk_fnc_info_t* fi) else { fs.ptr = hawk_rtx_valtooocstrdup(rtx, t1, &fs.len); - if (fs.ptr == HAWK_NULL) goto oops; + if (HAWK_UNLIKELY(!fs.ptr)) goto oops; fs_free = (hawk_ooch_t*)fs.ptr; } @@ -883,7 +1076,7 @@ int hawk_fnc_split (hawk_rtx_t* rtx, const hawk_fnc_info_t* fi) } } - t1 = hawk_rtx_makemapval(rtx); + t1 = use_array? hawk_rtx_makearrval(rtx, 16): hawk_rtx_makemapval(rtx); if (HAWK_UNLIKELY(!t1)) goto oops; hawk_rtx_refupval (rtx, t1); @@ -902,11 +1095,11 @@ int hawk_fnc_split (hawk_rtx_t* rtx, const hawk_fnc_info_t* fi) if (fs.len <= 1) { - p = hawk_rtx_strxntok(rtx, p, str.len, fs.ptr, fs.len, &tok); + p = hawk_rtx_tokoocharswithoochars(rtx, p, str.len, fs.ptr, fs.len, &tok); } else { - p = hawk_rtx_strxntokbyrex(rtx, str.ptr, org_len, p, str.len, fs_rex, &tok); + p = hawk_rtx_tokoocharsbyrex(rtx, str.ptr, org_len, p, str.len, fs_rex, &tok); if (p == HAWK_NULL && hawk_rtx_geterrnum(rtx) != HAWK_ENOERR) { goto oops; @@ -928,15 +1121,27 @@ int hawk_fnc_split (hawk_rtx_t* rtx, const hawk_fnc_info_t* fi) t2 = hawk_rtx_makenumorstrvalwithoochars(rtx, tok.ptr, tok.len); if (HAWK_UNLIKELY(!t2)) goto oops; - /* put it into the map */ - key_len = hawk_int_to_oocstr(++nflds, 10, HAWK_NULL, key_buf, HAWK_COUNTOF(key_buf)); - HAWK_ASSERT (key_len != (hawk_oow_t)-1); - - if (hawk_rtx_setmapvalfld(rtx, t1, key_buf, key_len, t2) == HAWK_NULL) + if (use_array) { - hawk_rtx_refupval (rtx, t2); - hawk_rtx_refdownval (rtx, t2); - goto oops; + if (hawk_rtx_setarrvalfld(rtx, t1, ++nflds, t2) == HAWK_NULL) + { + hawk_rtx_refupval (rtx, t2); + hawk_rtx_refdownval (rtx, t2); + goto oops; + } + } + else + { + /* put it into the map */ + key_len = hawk_int_to_oocstr(++nflds, 10, HAWK_NULL, key_buf, HAWK_COUNTOF(key_buf)); + HAWK_ASSERT (key_len != (hawk_oow_t)-1); + + if (hawk_rtx_setmapvalfld(rtx, t1, key_buf, key_len, t2) == HAWK_NULL) + { + hawk_rtx_refupval (rtx, t2); + hawk_rtx_refdownval (rtx, t2); + goto oops; + } } str.len = str_left - (p - str.ptr); @@ -958,7 +1163,7 @@ int hawk_fnc_split (hawk_rtx_t* rtx, const hawk_fnc_info_t* fi) /*nflds--;*/ t1 = hawk_rtx_makeintval (rtx, nflds); - if (t1 == HAWK_NULL) return -1; + if (HAWK_UNLIKELY(!t1)) return -1; hawk_rtx_setretval (rtx, t1); return 0; @@ -978,6 +1183,11 @@ oops: return -1; } +int hawk_fnc_split (hawk_rtx_t* rtx, const hawk_fnc_info_t* fi) +{ + return fnc_split(rtx, fi, 1); +} + int hawk_fnc_tolower (hawk_rtx_t* rtx, const hawk_fnc_info_t* fi) { hawk_oow_t i; diff --git a/hawk/lib/misc-imp.h b/hawk/lib/misc-imp.h new file mode 100644 index 00000000..f6644688 --- /dev/null +++ b/hawk/lib/misc-imp.h @@ -0,0 +1,301 @@ +/* + * $Id$ + * + Copyright (c) 2006-2020 Chung, Hyung-Hwan. All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED BY THE AUTHOR "AS IS" AND ANY EXPRESS OR + IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +char_t* tokenize_xchars (hawk_rtx_t* rtx, const char_t* s, hawk_oow_t len, const char_t* delim, hawk_oow_t delim_len, xcs_t* tok) +{ + const char_t* p = s, *d; + const char_t* end = s + len; + const char_t* sp = HAWK_NULL, * ep = HAWK_NULL; + const char_t* delim_end = delim + delim_len; + char_t c; + int delim_mode; + +#define __DELIM_NULL 0 +#define __DELIM_EMPTY 1 +#define __DELIM_SPACES 2 +#define __DELIM_NOSPACES 3 +#define __DELIM_COMPOSITE 4 + if (delim == HAWK_NULL) delim_mode = __DELIM_NULL; + else + { + delim_mode = __DELIM_EMPTY; + + for (d = delim; d < delim_end; d++) + { + if (is_xch_space(*d)) + { + if (delim_mode == __DELIM_EMPTY) + delim_mode = __DELIM_SPACES; + else if (delim_mode == __DELIM_NOSPACES) + { + delim_mode = __DELIM_COMPOSITE; + break; + } + } + else + { + if (delim_mode == __DELIM_EMPTY) + delim_mode = __DELIM_NOSPACES; + else if (delim_mode == __DELIM_SPACES) + { + delim_mode = __DELIM_COMPOSITE; + break; + } + } + } + + /* TODO: verify the following statement... */ + if (delim_mode == __DELIM_SPACES && + delim_len == 1 && + delim[0] != ' ') delim_mode = __DELIM_NOSPACES; + } + + if (delim_mode == __DELIM_NULL) + { + /* when HAWK_NULL is given as "delim", it trims off the + * leading and trailing spaces characters off the source + * string "s" eventually. */ + + while (p < end && is_xch_space(*p)) p++; + while (p < end) + { + c = *p; + + if (!is_xch_space(c)) + { + if (sp == HAWK_NULL) sp = p; + ep = p; + } + p++; + } + } + else if (delim_mode == __DELIM_EMPTY) + { + /* each character in the source string "s" becomes a token. */ + if (p < end) + { + c = *p; + sp = p; + ep = p++; + } + } + else if (delim_mode == __DELIM_SPACES) + { + /* each token is delimited by space characters. all leading + * and trailing spaces are removed. */ + + while (p < end && is_xch_space(*p)) p++; + while (p < end) + { + c = *p; + if (is_xch_space(c)) break; + if (sp == HAWK_NULL) sp = p; + ep = p++; + } + while (p < end && is_xch_space(*p)) p++; + } + else if (delim_mode == __DELIM_NOSPACES) + { + /* each token is delimited by one of charaters + * in the delimeter set "delim". */ + if (rtx->gbl.ignorecase) + { + while (p < end) + { + c = hawk_to_ooch_upper(*p); + for (d = delim; d < delim_end; d++) + { + if (c == hawk_to_ooch_upper(*d)) goto exit_loop; + } + + if (sp == HAWK_NULL) sp = p; + ep = p++; + } + } + else + { + while (p < end) + { + c = *p; + for (d = delim; d < delim_end; d++) + { + if (c == *d) goto exit_loop; + } + + if (sp == HAWK_NULL) sp = p; + ep = p++; + } + } + } + else /* if (delim_mode == __DELIM_COMPOSITE) */ + { + /* each token is delimited by one of non-space charaters + * in the delimeter set "delim". however, all space characters + * surrounding the token are removed */ + while (p < end && is_xch_space(*p)) p++; + if (rtx->gbl.ignorecase) + { + while (p < end) + { + c = hawk_to_ooch_upper(*p); + if (is_xch_space(c)) + { + p++; + continue; + } + for (d = delim; d < delim_end; d++) + { + if (c == hawk_to_ooch_upper(*d)) + goto exit_loop; + } + if (sp == HAWK_NULL) sp = p; + ep = p++; + } + } + else + { + while (p < end) + { + c = *p; + if (is_xch_space(c)) + { + p++; + continue; + } + for (d = delim; d < delim_end; d++) + { + if (c == *d) goto exit_loop; + } + if (sp == HAWK_NULL) sp = p; + ep = p++; + } + } + } + +exit_loop: + if (sp == HAWK_NULL) + { + tok->ptr = HAWK_NULL; + tok->len = (hawk_oow_t)0; + } + else + { + tok->ptr = (char_t*)sp; + tok->len = ep - sp + 1; + } + + /* if HAWK_NULL is returned, this function should not be called again */ + if (p >= end) return HAWK_NULL; + if (delim_mode == __DELIM_EMPTY || + delim_mode == __DELIM_SPACES) return (char_t*)p; + return (char_t*)++p; +} + +char_t* split_xchars_to_fields (hawk_rtx_t* rtx, char_t* str, hawk_oow_t len, char_t fs, char_t ec, char_t lq, char_t rq, xcs_t* tok) +{ + char_t* p = str; + char_t* end = str + len; + int escaped = 0, quoted = 0; + char_t* ts; /* token start */ + char_t* tp; /* points to one char past the last token char */ + char_t* xp; /* points to one char past the last effective char */ + + /* skip leading spaces */ + while (p < end && is_xch_space(*p)) p++; + + /* initialize token pointers */ + ts = tp = xp = p; + + while (p < end) + { + char c = *p; + + if (escaped) + { + *tp++ = c; xp = tp; p++; + escaped = 0; + } + else + { + if (c == ec) + { + escaped = 1; + p++; + } + else if (quoted) + { + if (c == rq) + { + quoted = 0; + p++; + } + else + { + *tp++ = c; xp = tp; p++; + } + } + else + { + if (c == fs) + { + tok->ptr = ts; + tok->len = xp - ts; + p++; + + if (is_xch_space(fs)) + { + while (p < end && *p == fs) p++; + if (p >= end) return HAWK_NULL; + } + + return p; + } + + if (c == lq) + { + quoted = 1; + p++; + } + else + { + *tp++ = c; p++; + if (!is_xch_space(c)) xp = tp; + } + } + } + } + + if (escaped) + { + /* if it is still escaped, the last character must be + * the escaper itself. treat it as a normal character */ + *xp++ = ec; + } + + tok->ptr = ts; + tok->len = xp - ts; + return HAWK_NULL; +} diff --git a/hawk/lib/misc-prv.h b/hawk/lib/misc-prv.h index 099965db..8dc93d06 100644 --- a/hawk/lib/misc-prv.h +++ b/hawk/lib/misc-prv.h @@ -33,23 +33,46 @@ extern "C" { #endif -hawk_ooch_t* hawk_rtx_strtok ( - hawk_rtx_t* rtx, const hawk_ooch_t* s, - const hawk_ooch_t* delim, hawk_oocs_t* tok); +hawk_uch_t* hawk_rtx_flduchars ( + hawk_rtx_t* rtx, + hawk_uch_t* str, + hawk_oow_t len, + hawk_uch_t fs, + hawk_uch_t lq, + hawk_uch_t rq, + hawk_uch_t ec, + hawk_ucs_t* tok +); -hawk_ooch_t* hawk_rtx_strxtok ( - hawk_rtx_t* rtx, const hawk_ooch_t* s, hawk_oow_t len, - const hawk_ooch_t* delim, hawk_oocs_t* tok); +hawk_bch_t* hawk_rtx_fldbchars ( + hawk_rtx_t* rtx, + hawk_bch_t* str, + hawk_oow_t len, + hawk_bch_t fs, + hawk_bch_t lq, + hawk_bch_t rq, + hawk_bch_t ec, + hawk_bcs_t* tok +); -hawk_ooch_t* hawk_rtx_strntok ( - hawk_rtx_t* rtx, const hawk_ooch_t* s, - const hawk_ooch_t* delim, hawk_oow_t delim_len, hawk_oocs_t* tok); +hawk_uch_t* hawk_rtx_tokucharswithuchars ( + hawk_rtx_t* rtx, const hawk_uch_t* s, hawk_oow_t len, + const hawk_uch_t* delim, hawk_oow_t delim_len, hawk_ucs_t* tok); -hawk_ooch_t* hawk_rtx_strxntok ( - hawk_rtx_t* rtx, const hawk_ooch_t* s, hawk_oow_t len, - const hawk_ooch_t* delim, hawk_oow_t delim_len, hawk_oocs_t* tok); +hawk_bch_t* hawk_rtx_tokbcharswithbchars ( + hawk_rtx_t* rtx, const hawk_bch_t* s, hawk_oow_t len, + const hawk_bch_t* delim, hawk_oow_t delim_len, hawk_bcs_t* tok); -hawk_ooch_t* hawk_rtx_strxntokbyrex ( + +#if defined(HAWK_OOCH_IS_UCH) +# define hawk_rtx_fldoochars hawk_rtx_flduchars +# define hawk_rtx_tokoocharswithoochars hawk_rtx_tokucharswithuchars +#else +# define hawk_rtx_fldoochars hawk_rtx_fldbchars +# define hawk_rtx_tokoocharswithoochars hawk_rtx_tokbcharswithbchars +#endif + +hawk_ooch_t* hawk_rtx_tokoocharsbyrex ( hawk_rtx_t* rtx, const hawk_ooch_t* str, hawk_oow_t len, @@ -59,16 +82,6 @@ hawk_ooch_t* hawk_rtx_strxntokbyrex ( hawk_oocs_t* tok ); -hawk_ooch_t* hawk_rtx_strxnfld ( - hawk_rtx_t* rtx, - hawk_ooch_t* str, - hawk_oow_t len, - hawk_ooch_t fs, - hawk_ooch_t lq, - hawk_ooch_t rq, - hawk_ooch_t ec, - hawk_oocs_t* tok -); int hawk_rtx_matchvalwithucs ( hawk_rtx_t* rtx, hawk_val_t* val, diff --git a/hawk/lib/misc.c b/hawk/lib/misc.c index 6654ab08..42688e66 100644 --- a/hawk/lib/misc.c +++ b/hawk/lib/misc.c @@ -26,221 +26,32 @@ #include "hawk-prv.h" -hawk_ooch_t* hawk_rtx_strtok ( - hawk_rtx_t* rtx, const hawk_ooch_t* s, - const hawk_ooch_t* delim, hawk_oocs_t* tok) -{ - return hawk_rtx_strxntok(rtx, s, hawk_count_oocstr(s), delim, hawk_count_oocstr(delim), tok); -} -hawk_ooch_t* hawk_rtx_strxtok ( - hawk_rtx_t* rtx, const hawk_ooch_t* s, hawk_oow_t len, - const hawk_ooch_t* delim, hawk_oocs_t* tok) -{ - return hawk_rtx_strxntok(rtx, s, len, delim, hawk_count_oocstr(delim), tok); -} +#undef char_t +#undef xcs_t +#undef is_xch_space +#undef tokenize_xchars +#undef split_xchars_to_fields +#define char_t hawk_bch_t +#define xcs_t hawk_bcs_t +#define is_xch_space hawk_is_bch_space +#define tokenize_xchars hawk_rtx_tokbcharswithbchars +#define split_xchars_to_fields hawk_rtx_fldbchars +#include "misc-imp.h" -hawk_ooch_t* hawk_rtx_strntok ( - hawk_rtx_t* rtx, const hawk_ooch_t* s, - const hawk_ooch_t* delim, hawk_oow_t delim_len, - hawk_oocs_t* tok) -{ - return hawk_rtx_strxntok(rtx, s, hawk_count_oocstr(s), delim, delim_len, tok); -} +#undef char_t +#undef xcs_t +#undef is_xch_space +#undef tokenize_xchars +#undef split_xchars_to_fields +#define char_t hawk_uch_t +#define xcs_t hawk_ucs_t +#define is_xch_space hawk_is_uch_space +#define tokenize_xchars hawk_rtx_tokucharswithuchars +#define split_xchars_to_fields hawk_rtx_flduchars +#include "misc-imp.h" -hawk_ooch_t* hawk_rtx_strxntok ( - hawk_rtx_t* rtx, const hawk_ooch_t* s, hawk_oow_t len, - const hawk_ooch_t* delim, hawk_oow_t delim_len, hawk_oocs_t* tok) -{ - const hawk_ooch_t* p = s, *d; - const hawk_ooch_t* end = s + len; - const hawk_ooch_t* sp = HAWK_NULL, * ep = HAWK_NULL; - const hawk_ooch_t* delim_end = delim + delim_len; - hawk_ooch_t c; - int delim_mode; - -#define __DELIM_NULL 0 -#define __DELIM_EMPTY 1 -#define __DELIM_SPACES 2 -#define __DELIM_NOSPACES 3 -#define __DELIM_COMPOSITE 4 - if (delim == HAWK_NULL) delim_mode = __DELIM_NULL; - else - { - delim_mode = __DELIM_EMPTY; - - for (d = delim; d < delim_end; d++) - { - if (hawk_is_ooch_space(*d)) - { - if (delim_mode == __DELIM_EMPTY) - delim_mode = __DELIM_SPACES; - else if (delim_mode == __DELIM_NOSPACES) - { - delim_mode = __DELIM_COMPOSITE; - break; - } - } - else - { - if (delim_mode == __DELIM_EMPTY) - delim_mode = __DELIM_NOSPACES; - else if (delim_mode == __DELIM_SPACES) - { - delim_mode = __DELIM_COMPOSITE; - break; - } - } - } - - /* TODO: verify the following statement... */ - if (delim_mode == __DELIM_SPACES && - delim_len == 1 && - delim[0] != HAWK_T(' ')) delim_mode = __DELIM_NOSPACES; - } - - if (delim_mode == __DELIM_NULL) - { - /* when HAWK_NULL is given as "delim", it trims off the - * leading and trailing spaces characters off the source - * string "s" eventually. */ - - while (p < end && hawk_is_ooch_space(*p)) p++; - while (p < end) - { - c = *p; - - if (!hawk_is_ooch_space(c)) - { - if (sp == HAWK_NULL) sp = p; - ep = p; - } - p++; - } - } - else if (delim_mode == __DELIM_EMPTY) - { - /* each character in the source string "s" becomes a token. */ - if (p < end) - { - c = *p; - sp = p; - ep = p++; - } - } - else if (delim_mode == __DELIM_SPACES) - { - /* each token is delimited by space characters. all leading - * and trailing spaces are removed. */ - - while (p < end && hawk_is_ooch_space(*p)) p++; - while (p < end) - { - c = *p; - if (hawk_is_ooch_space(c)) break; - if (sp == HAWK_NULL) sp = p; - ep = p++; - } - while (p < end && hawk_is_ooch_space(*p)) p++; - } - else if (delim_mode == __DELIM_NOSPACES) - { - /* each token is delimited by one of charaters - * in the delimeter set "delim". */ - if (rtx->gbl.ignorecase) - { - while (p < end) - { - c = hawk_to_ooch_upper(*p); - for (d = delim; d < delim_end; d++) - { - if (c == hawk_to_ooch_upper(*d)) goto exit_loop; - } - - if (sp == HAWK_NULL) sp = p; - ep = p++; - } - } - else - { - while (p < end) - { - c = *p; - for (d = delim; d < delim_end; d++) - { - if (c == *d) goto exit_loop; - } - - if (sp == HAWK_NULL) sp = p; - ep = p++; - } - } - } - else /* if (delim_mode == __DELIM_COMPOSITE) */ - { - /* each token is delimited by one of non-space charaters - * in the delimeter set "delim". however, all space characters - * surrounding the token are removed */ - while (p < end && hawk_is_ooch_space(*p)) p++; - if (rtx->gbl.ignorecase) - { - while (p < end) - { - c = hawk_to_ooch_upper(*p); - if (hawk_is_ooch_space(c)) - { - p++; - continue; - } - for (d = delim; d < delim_end; d++) - { - if (c == hawk_to_ooch_upper(*d)) - goto exit_loop; - } - if (sp == HAWK_NULL) sp = p; - ep = p++; - } - } - else - { - while (p < end) - { - c = *p; - if (hawk_is_ooch_space(c)) - { - p++; - continue; - } - for (d = delim; d < delim_end; d++) - { - if (c == *d) goto exit_loop; - } - if (sp == HAWK_NULL) sp = p; - ep = p++; - } - } - } - -exit_loop: - if (sp == HAWK_NULL) - { - tok->ptr = HAWK_NULL; - tok->len = (hawk_oow_t)0; - } - else - { - tok->ptr = (hawk_ooch_t*)sp; - tok->len = ep - sp + 1; - } - - /* if HAWK_NULL is returned, this function should not be called again */ - if (p >= end) return HAWK_NULL; - if (delim_mode == __DELIM_EMPTY || - delim_mode == __DELIM_SPACES) return (hawk_ooch_t*)p; - return (hawk_ooch_t*)++p; -} - -hawk_ooch_t* hawk_rtx_strxntokbyrex ( +hawk_ooch_t* hawk_rtx_tokoocharsbyrex ( hawk_rtx_t* rtx, const hawk_ooch_t* str, hawk_oow_t len, const hawk_ooch_t* substr, hawk_oow_t sublen, @@ -343,6 +154,7 @@ exit_loop: } } +#if 0 hawk_ooch_t* hawk_rtx_strxnfld ( hawk_rtx_t* rtx, hawk_ooch_t* str, hawk_oow_t len, hawk_ooch_t fs, hawk_ooch_t ec, hawk_ooch_t lq, hawk_ooch_t rq, @@ -431,6 +243,7 @@ hawk_ooch_t* hawk_rtx_strxnfld ( tok->len = xp - ts; return HAWK_NULL; } +#endif static int matchtre_ucs (hawk_tre_t* tre, int opt, const hawk_ucs_t* str, hawk_ucs_t* mat, hawk_ucs_t submat[9], hawk_gem_t* errgem) { diff --git a/hawk/lib/rec.c b/hawk/lib/rec.c index 785f84d8..c4781d57 100644 --- a/hawk/lib/rec.c +++ b/hawk/lib/rec.c @@ -166,14 +166,14 @@ static int split_record (hawk_rtx_t* rtx, int prefer_number) switch (how) { case 0: - p = hawk_rtx_strxntok (rtx, p, len, fs_ptr, fs_len, &tok); + p = hawk_rtx_tokoocharswithoochars (rtx, p, len, fs_ptr, fs_len, &tok); break; case 1: break; default: - p = hawk_rtx_strxntokbyrex( + p = hawk_rtx_tokoocharsbyrex( rtx, HAWK_OOECS_PTR(&rtx->inrec.line), HAWK_OOECS_LEN(&rtx->inrec.line), @@ -241,17 +241,17 @@ static int split_record (hawk_rtx_t* rtx, int prefer_number) { case 0: /* 1 character FS */ - p = hawk_rtx_strxntok(rtx, p, len, fs_ptr, fs_len, &tok); + p = hawk_rtx_tokoocharswithoochars(rtx, p, len, fs_ptr, fs_len, &tok); break; case 1: /* 5 character FS beginning with ? */ - p = hawk_rtx_strxnfld(rtx, p, len, fs_ptr[1], fs_ptr[2], fs_ptr[3], fs_ptr[4], &tok); + p = hawk_rtx_fldoochars(rtx, p, len, fs_ptr[1], fs_ptr[2], fs_ptr[3], fs_ptr[4], &tok); break; default: /* all other cases */ - p = hawk_rtx_strxntokbyrex( + p = hawk_rtx_tokoocharsbyrex( rtx, HAWK_OOECS_PTR(&rtx->inrec.line), HAWK_OOECS_LEN(&rtx->inrec.line), diff --git a/hawk/t/h-002.hawk b/hawk/t/h-002.hawk index f016715a..6973bcb3 100644 --- a/hawk/t/h-002.hawk +++ b/hawk/t/h-002.hawk @@ -233,12 +233,15 @@ function main() ensure (sprintf("%*.*s", 20, 0, "hello"), " ", @SCRIPTNAME, @SCRIPTLINE); ensure (sprintf("%*.*s", 20, 2, "hello"), " he", @SCRIPTNAME, @SCRIPTLINE); - ensure (sprintf(@b"%0s", "hello"), @b"hello", @SCRIPTNAME, @SCRIPTLINE); - ensure (sprintf(@b"%.0s", "hello"), @b"", @SCRIPTNAME, @SCRIPTLINE); - ensure (sprintf(@b"%0.0s", "hello"), @b"", @SCRIPTNAME, @SCRIPTLINE); - ensure (sprintf(@b"%1.0s", "hello"), @b" ", @SCRIPTNAME, @SCRIPTLINE); - ensure (sprintf(@b"%*.*s", 20, 0, "hello"), @b" ", @SCRIPTNAME, @SCRIPTLINE); - ensure (sprintf(@b"%*.*s", 20, 2, "hello"), @b" he", @SCRIPTNAME, @SCRIPTLINE); + ensure (sprintf(@b"%0s", "hello"), @b"hello", @SCRIPTNAME, @SCRIPTLINE); + ensure (sprintf(@b"%.0s", "hello"), @b"", @SCRIPTNAME, @SCRIPTLINE); + ensure (sprintf(@b"%0.0s", "hello"), @b"", @SCRIPTNAME, @SCRIPTLINE); + ensure (sprintf(@b"%1.0s", "hello"), @b" ", @SCRIPTNAME, @SCRIPTLINE); + ensure (sprintf(@b"%*.*s", 20, 0, "hello"), @b" ", @SCRIPTNAME, @SCRIPTLINE); + ensure (sprintf(@b"%*.*s", 20, 2, "hello"), @b" he", @SCRIPTNAME, @SCRIPTLINE); + + ensure (sprintf("%+d %d", 3, 4), "+3 4", @SCRIPTNAME, @SCRIPTLINE); + ensure (sprintf(@b"%+d %d", 3, 4), @b"+3 4", @SCRIPTNAME, @SCRIPTLINE); } { @@ -251,6 +254,45 @@ function main() ensure (b, 1, @SCRIPTNAME, @SCRIPTLINE); } + + { + ensure (substr(1000+"5000", 2) === "000", 1, @SCRIPTNAME, @SCRIPTLINE); + ensure (substr(1000+"10000", 2) === "1000", 1, @SCRIPTNAME, @SCRIPTLINE); + ensure (substr(1000+"5000", 2) === "000", 1, @SCRIPTNAME, @SCRIPTLINE); + + ensure (substr("5000" + 1000, 2) === "000", 1, @SCRIPTNAME, @SCRIPTLINE); + ensure (substr("10000" + 1000, 2) === "1000", 1, @SCRIPTNAME, @SCRIPTLINE); + ensure (substr("5000" + 1000, 2) === "000", 1, @SCRIPTNAME, @SCRIPTLINE); + + ensure (substr(@b"5000" + 1000, 2) === "000", 1, @SCRIPTNAME, @SCRIPTLINE); + ensure (substr(@b"10000" + 1000, 2) === "1000", 1, @SCRIPTNAME, @SCRIPTLINE); + ensure (substr(@b"5000" + 1000, 2) === "000", 1, @SCRIPTNAME, @SCRIPTLINE); + + ensure (substr(@b"5000", 2) === @b"000", 1, @SCRIPTNAME, @SCRIPTLINE); + ensure (substr(@b"10000", 2) === @b"0000", 1, @SCRIPTNAME, @SCRIPTLINE); + ensure (substr(@b"5000", 2) === @b"000", 1, @SCRIPTNAME, @SCRIPTLINE); + + ensure (substr(1000+5000, 2) === "000", 1, @SCRIPTNAME, @SCRIPTLINE); + ensure (substr(1000+10000, 2) === "1000", 1, @SCRIPTNAME, @SCRIPTLINE); + ensure (substr(1000+5000, 2) === "000", 1, @SCRIPTNAME, @SCRIPTLINE); + } + + + { + @local a; + ensure (split("Here===Is=Some=====Data", a, "=+"), 4, @SCRIPTNAME, @SCRIPTLINE); + ensure (a[1] === "Here", 1, @SCRIPTNAME, @SCRIPTLINE); + ensure (a[2] === "Is", 1, @SCRIPTNAME, @SCRIPTLINE); + ensure (a[3] === "Some", 1, @SCRIPTNAME, @SCRIPTLINE); + ensure (a[4] === "Data", 1, @SCRIPTNAME, @SCRIPTLINE); + + ensure (split(@b"Here===Is=Some=====Data", a, @b"=+"), 4, @SCRIPTNAME, @SCRIPTLINE); + ensure (a[1] === @b"Here", 1, @SCRIPTNAME, @SCRIPTLINE); + ensure (a[2] === @b"Is", 1, @SCRIPTNAME, @SCRIPTLINE); + ensure (a[3] === @b"Some", 1, @SCRIPTNAME, @SCRIPTLINE); + ensure (a[4] === @b"Data", 1, @SCRIPTNAME, @SCRIPTLINE); + } + print "SUCCESS"; }