added code to preapre byte string support in split(). still long way to go
This commit is contained in:
parent
afe27f4eec
commit
4a60654b49
224
hawk/lib/fnc.c
224
hawk/lib/fnc.c
@ -787,7 +787,200 @@ int hawk_fnc_substr (hawk_rtx_t* rtx, const hawk_fnc_info_t* fi)
|
||||
return 0;
|
||||
}
|
||||
|
||||
int hawk_fnc_split (hawk_rtx_t* rtx, const hawk_fnc_info_t* fi)
|
||||
#if 0
|
||||
static int split_mbs (hawk_rtx_t* rtx, const hawk_fnc_info_t* fi)
|
||||
{
|
||||
hawk_oow_t nargs;
|
||||
hawk_val_t* a0, * a2, * t1, * t2;
|
||||
hawk_val_type_t a2_vtype, t1_vtype;
|
||||
|
||||
hawk_bcs_t str;
|
||||
hawk_bcs_t fs;
|
||||
hawk_bch_t* fs_free = HAWK_NULL;
|
||||
const hawk_bch_t* p;
|
||||
hawk_oow_t str_left, org_len;
|
||||
hawk_tre_t* fs_rex = HAWK_NULL;
|
||||
hawk_tre_t* fs_rex_free = HAWK_NULL;
|
||||
|
||||
hawk_bcs_t tok;
|
||||
hawk_int_t nflds;
|
||||
int x;
|
||||
|
||||
str.ptr = HAWK_NULL;
|
||||
str.len = 0;
|
||||
|
||||
nargs = hawk_rtx_getnargs(rtx);
|
||||
HAWK_ASSERT (nargs >= 2 && nargs <= 3);
|
||||
|
||||
a0 = hawk_rtx_getarg(rtx, 0);
|
||||
a2 = (nargs >= 3)? hawk_rtx_getarg(rtx, 2): HAWK_NULL;
|
||||
|
||||
str.ptr = hawk_rtx_getvalbcstr(rtx, a0, &str.len);
|
||||
if (HAWK_UNLIKELY(!str.ptr)) goto oops;
|
||||
|
||||
if (!a2)
|
||||
{
|
||||
/* get the value from FS */
|
||||
t1 = hawk_rtx_getgbl(rtx, HAWK_GBL_FS);
|
||||
t1_vtype = HAWK_RTX_GETVALTYPE(rtx, t1);
|
||||
if (t1_vtype == HAWK_VAL_NIL)
|
||||
{
|
||||
fs.ptr = " ";
|
||||
fs.len = 1;
|
||||
}
|
||||
else if (t1_vtype == HAWK_VAL_MBS)
|
||||
{
|
||||
fs.ptr = ((hawk_val_mbs_t*)t1)->val.ptr;
|
||||
fs.len = ((hawk_val_mbs_t*)t1)->val.len;
|
||||
}
|
||||
else
|
||||
{
|
||||
fs.ptr = hawk_rtx_valtobcstrdup(rtx, t1, &fs.len);
|
||||
if (HAWK_UNLIKELY(!fs.ptr)) goto oops;
|
||||
fs_free = (hawk_bch_t*)fs.ptr;
|
||||
}
|
||||
|
||||
if (fs.len > 1) fs_rex = rtx->gbl.fs[rtx->gbl.ignorecase];
|
||||
}
|
||||
else
|
||||
{
|
||||
a2_vtype = HAWK_RTX_GETVALTYPE(rtx, a2);
|
||||
|
||||
if (a2_vtype == HAWK_VAL_REX)
|
||||
{
|
||||
/* the third parameter is a regular expression */
|
||||
fs_rex = ((hawk_val_rex_t*)a2)->code[rtx->gbl.ignorecase];
|
||||
|
||||
/* make the loop below to take fs_rex by
|
||||
* setting fs_len greater than 1*/
|
||||
fs.ptr = HAWK_NULL;
|
||||
fs.len = 2;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (a2_vtype == HAWK_VAL_MBS)
|
||||
{
|
||||
fs.ptr = ((hawk_val_mbs_t*)a2)->val.ptr;
|
||||
fs.len = ((hawk_val_mbs_t*)a2)->val.len;
|
||||
}
|
||||
else
|
||||
{
|
||||
fs.ptr = hawk_rtx_valtobcstrdup(rtx, a2, &fs.len);
|
||||
if (fs.ptr == HAWK_NULL) goto oops;
|
||||
fs_free = (hawk_bch_t*)fs.ptr;
|
||||
}
|
||||
|
||||
if (fs.len > 1)
|
||||
{
|
||||
int x;
|
||||
|
||||
x = rtx->gbl.ignorecase?
|
||||
hawk_rtx_buildrex(rtx, fs.ptr, fs.len, HAWK_NULL, &fs_rex):
|
||||
hawk_rtx_buildrex(rtx, fs.ptr, fs.len, &fs_rex, HAWK_NULL);
|
||||
if (x <= -1) goto oops;
|
||||
|
||||
fs_rex_free = fs_rex;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
t1 = hawk_rtx_makearrval(rtx);
|
||||
if (HAWK_UNLIKELY(!t1)) goto oops;
|
||||
|
||||
hawk_rtx_refupval (rtx, t1);
|
||||
x = hawk_rtx_setrefval(rtx, (hawk_val_ref_t*)hawk_rtx_getarg(rtx, 1), t1);
|
||||
hawk_rtx_refdownval (rtx, t1);
|
||||
if (HAWK_UNLIKELY(x <= -1)) goto oops;
|
||||
|
||||
/* fill the map with actual values */
|
||||
p = str.ptr; str_left = str.len; org_len = str.len;
|
||||
nflds = 0;
|
||||
|
||||
while (p)
|
||||
{
|
||||
hawk_bch_t key_buf[HAWK_SIZEOF(hawk_int_t)*8+2];
|
||||
hawk_oow_t key_len;
|
||||
|
||||
if (fs.len <= 1)
|
||||
{
|
||||
p = hawk_rtx_tokoocharswithoochars(rtx, p, str.len, fs.ptr, fs.len, &tok);
|
||||
}
|
||||
else
|
||||
{
|
||||
p = hawk_rtx_tokoocharsbyrex(rtx, str.ptr, org_len, p, str.len, fs_rex, &tok);
|
||||
if (p == HAWK_NULL && hawk_rtx_geterrnum(rtx) != HAWK_ENOERR)
|
||||
{
|
||||
goto oops;
|
||||
}
|
||||
}
|
||||
|
||||
if (nflds == 0 && p == HAWK_NULL && tok.len == 0)
|
||||
{
|
||||
/* no field at all*/
|
||||
break;
|
||||
}
|
||||
|
||||
HAWK_ASSERT ((tok.ptr != HAWK_NULL && tok.len > 0) || tok.len == 0);
|
||||
|
||||
/* create the field string - however, the split function must
|
||||
* create a numeric value if the string is a number */
|
||||
/*t2 = hawk_rtx_makembsvalwithbcs (rtx, &tok);*/
|
||||
/*t2 = hawk_rtx_makenmbsvalwithbcs(rtx, &tok); */
|
||||
t2 = hawk_rtx_makenumormbsvalwithbchars(rtx, tok.ptr, tok.len);
|
||||
if (HAWK_UNLIKELY(!t2)) goto oops;
|
||||
|
||||
/* put it into the map */
|
||||
key_len = hawk_int_to_oocstr(++nflds, 10, HAWK_NULL, key_buf, HAWK_COUNTOF(key_buf));
|
||||
HAWK_ASSERT (key_len != (hawk_oow_t)-1);
|
||||
|
||||
if (hawk_rtx_setarrvalfld(rtx, t1, key_buf, key_len, t2) == HAWK_NULL)
|
||||
{
|
||||
hawk_rtx_refupval (rtx, t2);
|
||||
hawk_rtx_refdownval (rtx, t2);
|
||||
goto oops;
|
||||
}
|
||||
|
||||
str.len = str_left - (p - str.ptr);
|
||||
}
|
||||
|
||||
/*if (str_free) hawk_rtx_freemem (rtx, str_free);*/
|
||||
hawk_rtx_freevalbcstr (rtx, a0, str.ptr);
|
||||
|
||||
if (fs_free) hawk_rtx_freemem (rtx, fs_free);
|
||||
|
||||
if (fs_rex_free)
|
||||
{
|
||||
if (rtx->gbl.ignorecase)
|
||||
hawk_rtx_freerex (rtx, HAWK_NULL, fs_rex_free);
|
||||
else
|
||||
hawk_rtx_freerex (rtx, fs_rex_free, HAWK_NULL);
|
||||
}
|
||||
|
||||
/*nflds--;*/
|
||||
|
||||
t1 = hawk_rtx_makeintval(rtx, nflds);
|
||||
if (HAWK_UNLIKELY(!t1)) return -1;
|
||||
|
||||
hawk_rtx_setretval (rtx, t1);
|
||||
return 0;
|
||||
|
||||
oops:
|
||||
if (str.ptr) hawk_rtx_freevalbcstr (rtx, a0, str.ptr);
|
||||
|
||||
if (fs_free) hawk_rtx_freemem (rtx, fs_free);
|
||||
|
||||
if (fs_rex_free)
|
||||
{
|
||||
if (rtx->gbl.ignorecase)
|
||||
hawk_rtx_freerex (rtx, HAWK_NULL, fs_rex_free);
|
||||
else
|
||||
hawk_rtx_freerex (rtx, fs_rex_free, HAWK_NULL);
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
#endif
|
||||
|
||||
static int fnc_split (hawk_rtx_t* rtx, const hawk_fnc_info_t* fi, int use_array)
|
||||
{
|
||||
hawk_oow_t nargs;
|
||||
hawk_val_t* a0, * a2, * t1, * t2;
|
||||
@ -817,7 +1010,7 @@ int hawk_fnc_split (hawk_rtx_t* rtx, const hawk_fnc_info_t* fi)
|
||||
str.ptr = hawk_rtx_getvaloocstr(rtx, a0, &str.len);
|
||||
if (HAWK_UNLIKELY(!str.ptr)) goto oops;
|
||||
|
||||
if (a2 == HAWK_NULL)
|
||||
if (!a2)
|
||||
{
|
||||
/* get the value from FS */
|
||||
t1 = hawk_rtx_getgbl(rtx, HAWK_GBL_FS);
|
||||
@ -835,7 +1028,7 @@ int hawk_fnc_split (hawk_rtx_t* rtx, const hawk_fnc_info_t* fi)
|
||||
else
|
||||
{
|
||||
fs.ptr = hawk_rtx_valtooocstrdup(rtx, t1, &fs.len);
|
||||
if (fs.ptr == HAWK_NULL) goto oops;
|
||||
if (HAWK_UNLIKELY(!fs.ptr)) goto oops;
|
||||
fs_free = (hawk_ooch_t*)fs.ptr;
|
||||
}
|
||||
|
||||
@ -883,7 +1076,7 @@ int hawk_fnc_split (hawk_rtx_t* rtx, const hawk_fnc_info_t* fi)
|
||||
}
|
||||
}
|
||||
|
||||
t1 = hawk_rtx_makemapval(rtx);
|
||||
t1 = use_array? hawk_rtx_makearrval(rtx, 16): hawk_rtx_makemapval(rtx);
|
||||
if (HAWK_UNLIKELY(!t1)) goto oops;
|
||||
|
||||
hawk_rtx_refupval (rtx, t1);
|
||||
@ -902,11 +1095,11 @@ int hawk_fnc_split (hawk_rtx_t* rtx, const hawk_fnc_info_t* fi)
|
||||
|
||||
if (fs.len <= 1)
|
||||
{
|
||||
p = hawk_rtx_strxntok(rtx, p, str.len, fs.ptr, fs.len, &tok);
|
||||
p = hawk_rtx_tokoocharswithoochars(rtx, p, str.len, fs.ptr, fs.len, &tok);
|
||||
}
|
||||
else
|
||||
{
|
||||
p = hawk_rtx_strxntokbyrex(rtx, str.ptr, org_len, p, str.len, fs_rex, &tok);
|
||||
p = hawk_rtx_tokoocharsbyrex(rtx, str.ptr, org_len, p, str.len, fs_rex, &tok);
|
||||
if (p == HAWK_NULL && hawk_rtx_geterrnum(rtx) != HAWK_ENOERR)
|
||||
{
|
||||
goto oops;
|
||||
@ -928,6 +1121,17 @@ int hawk_fnc_split (hawk_rtx_t* rtx, const hawk_fnc_info_t* fi)
|
||||
t2 = hawk_rtx_makenumorstrvalwithoochars(rtx, tok.ptr, tok.len);
|
||||
if (HAWK_UNLIKELY(!t2)) goto oops;
|
||||
|
||||
if (use_array)
|
||||
{
|
||||
if (hawk_rtx_setarrvalfld(rtx, t1, ++nflds, t2) == HAWK_NULL)
|
||||
{
|
||||
hawk_rtx_refupval (rtx, t2);
|
||||
hawk_rtx_refdownval (rtx, t2);
|
||||
goto oops;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
/* put it into the map */
|
||||
key_len = hawk_int_to_oocstr(++nflds, 10, HAWK_NULL, key_buf, HAWK_COUNTOF(key_buf));
|
||||
HAWK_ASSERT (key_len != (hawk_oow_t)-1);
|
||||
@ -938,6 +1142,7 @@ int hawk_fnc_split (hawk_rtx_t* rtx, const hawk_fnc_info_t* fi)
|
||||
hawk_rtx_refdownval (rtx, t2);
|
||||
goto oops;
|
||||
}
|
||||
}
|
||||
|
||||
str.len = str_left - (p - str.ptr);
|
||||
}
|
||||
@ -958,7 +1163,7 @@ int hawk_fnc_split (hawk_rtx_t* rtx, const hawk_fnc_info_t* fi)
|
||||
/*nflds--;*/
|
||||
|
||||
t1 = hawk_rtx_makeintval (rtx, nflds);
|
||||
if (t1 == HAWK_NULL) return -1;
|
||||
if (HAWK_UNLIKELY(!t1)) return -1;
|
||||
|
||||
hawk_rtx_setretval (rtx, t1);
|
||||
return 0;
|
||||
@ -978,6 +1183,11 @@ oops:
|
||||
return -1;
|
||||
}
|
||||
|
||||
int hawk_fnc_split (hawk_rtx_t* rtx, const hawk_fnc_info_t* fi)
|
||||
{
|
||||
return fnc_split(rtx, fi, 1);
|
||||
}
|
||||
|
||||
int hawk_fnc_tolower (hawk_rtx_t* rtx, const hawk_fnc_info_t* fi)
|
||||
{
|
||||
hawk_oow_t i;
|
||||
|
301
hawk/lib/misc-imp.h
Normal file
301
hawk/lib/misc-imp.h
Normal file
@ -0,0 +1,301 @@
|
||||
/*
|
||||
* $Id$
|
||||
*
|
||||
Copyright (c) 2006-2020 Chung, Hyung-Hwan. All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE AUTHOR "AS IS" AND ANY EXPRESS OR
|
||||
IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
||||
OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
||||
IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
|
||||
NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
|
||||
THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
char_t* tokenize_xchars (hawk_rtx_t* rtx, const char_t* s, hawk_oow_t len, const char_t* delim, hawk_oow_t delim_len, xcs_t* tok)
|
||||
{
|
||||
const char_t* p = s, *d;
|
||||
const char_t* end = s + len;
|
||||
const char_t* sp = HAWK_NULL, * ep = HAWK_NULL;
|
||||
const char_t* delim_end = delim + delim_len;
|
||||
char_t c;
|
||||
int delim_mode;
|
||||
|
||||
#define __DELIM_NULL 0
|
||||
#define __DELIM_EMPTY 1
|
||||
#define __DELIM_SPACES 2
|
||||
#define __DELIM_NOSPACES 3
|
||||
#define __DELIM_COMPOSITE 4
|
||||
if (delim == HAWK_NULL) delim_mode = __DELIM_NULL;
|
||||
else
|
||||
{
|
||||
delim_mode = __DELIM_EMPTY;
|
||||
|
||||
for (d = delim; d < delim_end; d++)
|
||||
{
|
||||
if (is_xch_space(*d))
|
||||
{
|
||||
if (delim_mode == __DELIM_EMPTY)
|
||||
delim_mode = __DELIM_SPACES;
|
||||
else if (delim_mode == __DELIM_NOSPACES)
|
||||
{
|
||||
delim_mode = __DELIM_COMPOSITE;
|
||||
break;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if (delim_mode == __DELIM_EMPTY)
|
||||
delim_mode = __DELIM_NOSPACES;
|
||||
else if (delim_mode == __DELIM_SPACES)
|
||||
{
|
||||
delim_mode = __DELIM_COMPOSITE;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* TODO: verify the following statement... */
|
||||
if (delim_mode == __DELIM_SPACES &&
|
||||
delim_len == 1 &&
|
||||
delim[0] != ' ') delim_mode = __DELIM_NOSPACES;
|
||||
}
|
||||
|
||||
if (delim_mode == __DELIM_NULL)
|
||||
{
|
||||
/* when HAWK_NULL is given as "delim", it trims off the
|
||||
* leading and trailing spaces characters off the source
|
||||
* string "s" eventually. */
|
||||
|
||||
while (p < end && is_xch_space(*p)) p++;
|
||||
while (p < end)
|
||||
{
|
||||
c = *p;
|
||||
|
||||
if (!is_xch_space(c))
|
||||
{
|
||||
if (sp == HAWK_NULL) sp = p;
|
||||
ep = p;
|
||||
}
|
||||
p++;
|
||||
}
|
||||
}
|
||||
else if (delim_mode == __DELIM_EMPTY)
|
||||
{
|
||||
/* each character in the source string "s" becomes a token. */
|
||||
if (p < end)
|
||||
{
|
||||
c = *p;
|
||||
sp = p;
|
||||
ep = p++;
|
||||
}
|
||||
}
|
||||
else if (delim_mode == __DELIM_SPACES)
|
||||
{
|
||||
/* each token is delimited by space characters. all leading
|
||||
* and trailing spaces are removed. */
|
||||
|
||||
while (p < end && is_xch_space(*p)) p++;
|
||||
while (p < end)
|
||||
{
|
||||
c = *p;
|
||||
if (is_xch_space(c)) break;
|
||||
if (sp == HAWK_NULL) sp = p;
|
||||
ep = p++;
|
||||
}
|
||||
while (p < end && is_xch_space(*p)) p++;
|
||||
}
|
||||
else if (delim_mode == __DELIM_NOSPACES)
|
||||
{
|
||||
/* each token is delimited by one of charaters
|
||||
* in the delimeter set "delim". */
|
||||
if (rtx->gbl.ignorecase)
|
||||
{
|
||||
while (p < end)
|
||||
{
|
||||
c = hawk_to_ooch_upper(*p);
|
||||
for (d = delim; d < delim_end; d++)
|
||||
{
|
||||
if (c == hawk_to_ooch_upper(*d)) goto exit_loop;
|
||||
}
|
||||
|
||||
if (sp == HAWK_NULL) sp = p;
|
||||
ep = p++;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
while (p < end)
|
||||
{
|
||||
c = *p;
|
||||
for (d = delim; d < delim_end; d++)
|
||||
{
|
||||
if (c == *d) goto exit_loop;
|
||||
}
|
||||
|
||||
if (sp == HAWK_NULL) sp = p;
|
||||
ep = p++;
|
||||
}
|
||||
}
|
||||
}
|
||||
else /* if (delim_mode == __DELIM_COMPOSITE) */
|
||||
{
|
||||
/* each token is delimited by one of non-space charaters
|
||||
* in the delimeter set "delim". however, all space characters
|
||||
* surrounding the token are removed */
|
||||
while (p < end && is_xch_space(*p)) p++;
|
||||
if (rtx->gbl.ignorecase)
|
||||
{
|
||||
while (p < end)
|
||||
{
|
||||
c = hawk_to_ooch_upper(*p);
|
||||
if (is_xch_space(c))
|
||||
{
|
||||
p++;
|
||||
continue;
|
||||
}
|
||||
for (d = delim; d < delim_end; d++)
|
||||
{
|
||||
if (c == hawk_to_ooch_upper(*d))
|
||||
goto exit_loop;
|
||||
}
|
||||
if (sp == HAWK_NULL) sp = p;
|
||||
ep = p++;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
while (p < end)
|
||||
{
|
||||
c = *p;
|
||||
if (is_xch_space(c))
|
||||
{
|
||||
p++;
|
||||
continue;
|
||||
}
|
||||
for (d = delim; d < delim_end; d++)
|
||||
{
|
||||
if (c == *d) goto exit_loop;
|
||||
}
|
||||
if (sp == HAWK_NULL) sp = p;
|
||||
ep = p++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
exit_loop:
|
||||
if (sp == HAWK_NULL)
|
||||
{
|
||||
tok->ptr = HAWK_NULL;
|
||||
tok->len = (hawk_oow_t)0;
|
||||
}
|
||||
else
|
||||
{
|
||||
tok->ptr = (char_t*)sp;
|
||||
tok->len = ep - sp + 1;
|
||||
}
|
||||
|
||||
/* if HAWK_NULL is returned, this function should not be called again */
|
||||
if (p >= end) return HAWK_NULL;
|
||||
if (delim_mode == __DELIM_EMPTY ||
|
||||
delim_mode == __DELIM_SPACES) return (char_t*)p;
|
||||
return (char_t*)++p;
|
||||
}
|
||||
|
||||
char_t* split_xchars_to_fields (hawk_rtx_t* rtx, char_t* str, hawk_oow_t len, char_t fs, char_t ec, char_t lq, char_t rq, xcs_t* tok)
|
||||
{
|
||||
char_t* p = str;
|
||||
char_t* end = str + len;
|
||||
int escaped = 0, quoted = 0;
|
||||
char_t* ts; /* token start */
|
||||
char_t* tp; /* points to one char past the last token char */
|
||||
char_t* xp; /* points to one char past the last effective char */
|
||||
|
||||
/* skip leading spaces */
|
||||
while (p < end && is_xch_space(*p)) p++;
|
||||
|
||||
/* initialize token pointers */
|
||||
ts = tp = xp = p;
|
||||
|
||||
while (p < end)
|
||||
{
|
||||
char c = *p;
|
||||
|
||||
if (escaped)
|
||||
{
|
||||
*tp++ = c; xp = tp; p++;
|
||||
escaped = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (c == ec)
|
||||
{
|
||||
escaped = 1;
|
||||
p++;
|
||||
}
|
||||
else if (quoted)
|
||||
{
|
||||
if (c == rq)
|
||||
{
|
||||
quoted = 0;
|
||||
p++;
|
||||
}
|
||||
else
|
||||
{
|
||||
*tp++ = c; xp = tp; p++;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if (c == fs)
|
||||
{
|
||||
tok->ptr = ts;
|
||||
tok->len = xp - ts;
|
||||
p++;
|
||||
|
||||
if (is_xch_space(fs))
|
||||
{
|
||||
while (p < end && *p == fs) p++;
|
||||
if (p >= end) return HAWK_NULL;
|
||||
}
|
||||
|
||||
return p;
|
||||
}
|
||||
|
||||
if (c == lq)
|
||||
{
|
||||
quoted = 1;
|
||||
p++;
|
||||
}
|
||||
else
|
||||
{
|
||||
*tp++ = c; p++;
|
||||
if (!is_xch_space(c)) xp = tp;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (escaped)
|
||||
{
|
||||
/* if it is still escaped, the last character must be
|
||||
* the escaper itself. treat it as a normal character */
|
||||
*xp++ = ec;
|
||||
}
|
||||
|
||||
tok->ptr = ts;
|
||||
tok->len = xp - ts;
|
||||
return HAWK_NULL;
|
||||
}
|
@ -33,23 +33,46 @@
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
hawk_ooch_t* hawk_rtx_strtok (
|
||||
hawk_rtx_t* rtx, const hawk_ooch_t* s,
|
||||
const hawk_ooch_t* delim, hawk_oocs_t* tok);
|
||||
hawk_uch_t* hawk_rtx_flduchars (
|
||||
hawk_rtx_t* rtx,
|
||||
hawk_uch_t* str,
|
||||
hawk_oow_t len,
|
||||
hawk_uch_t fs,
|
||||
hawk_uch_t lq,
|
||||
hawk_uch_t rq,
|
||||
hawk_uch_t ec,
|
||||
hawk_ucs_t* tok
|
||||
);
|
||||
|
||||
hawk_ooch_t* hawk_rtx_strxtok (
|
||||
hawk_rtx_t* rtx, const hawk_ooch_t* s, hawk_oow_t len,
|
||||
const hawk_ooch_t* delim, hawk_oocs_t* tok);
|
||||
hawk_bch_t* hawk_rtx_fldbchars (
|
||||
hawk_rtx_t* rtx,
|
||||
hawk_bch_t* str,
|
||||
hawk_oow_t len,
|
||||
hawk_bch_t fs,
|
||||
hawk_bch_t lq,
|
||||
hawk_bch_t rq,
|
||||
hawk_bch_t ec,
|
||||
hawk_bcs_t* tok
|
||||
);
|
||||
|
||||
hawk_ooch_t* hawk_rtx_strntok (
|
||||
hawk_rtx_t* rtx, const hawk_ooch_t* s,
|
||||
const hawk_ooch_t* delim, hawk_oow_t delim_len, hawk_oocs_t* tok);
|
||||
hawk_uch_t* hawk_rtx_tokucharswithuchars (
|
||||
hawk_rtx_t* rtx, const hawk_uch_t* s, hawk_oow_t len,
|
||||
const hawk_uch_t* delim, hawk_oow_t delim_len, hawk_ucs_t* tok);
|
||||
|
||||
hawk_ooch_t* hawk_rtx_strxntok (
|
||||
hawk_rtx_t* rtx, const hawk_ooch_t* s, hawk_oow_t len,
|
||||
const hawk_ooch_t* delim, hawk_oow_t delim_len, hawk_oocs_t* tok);
|
||||
hawk_bch_t* hawk_rtx_tokbcharswithbchars (
|
||||
hawk_rtx_t* rtx, const hawk_bch_t* s, hawk_oow_t len,
|
||||
const hawk_bch_t* delim, hawk_oow_t delim_len, hawk_bcs_t* tok);
|
||||
|
||||
hawk_ooch_t* hawk_rtx_strxntokbyrex (
|
||||
|
||||
#if defined(HAWK_OOCH_IS_UCH)
|
||||
# define hawk_rtx_fldoochars hawk_rtx_flduchars
|
||||
# define hawk_rtx_tokoocharswithoochars hawk_rtx_tokucharswithuchars
|
||||
#else
|
||||
# define hawk_rtx_fldoochars hawk_rtx_fldbchars
|
||||
# define hawk_rtx_tokoocharswithoochars hawk_rtx_tokbcharswithbchars
|
||||
#endif
|
||||
|
||||
hawk_ooch_t* hawk_rtx_tokoocharsbyrex (
|
||||
hawk_rtx_t* rtx,
|
||||
const hawk_ooch_t* str,
|
||||
hawk_oow_t len,
|
||||
@ -59,16 +82,6 @@ hawk_ooch_t* hawk_rtx_strxntokbyrex (
|
||||
hawk_oocs_t* tok
|
||||
);
|
||||
|
||||
hawk_ooch_t* hawk_rtx_strxnfld (
|
||||
hawk_rtx_t* rtx,
|
||||
hawk_ooch_t* str,
|
||||
hawk_oow_t len,
|
||||
hawk_ooch_t fs,
|
||||
hawk_ooch_t lq,
|
||||
hawk_ooch_t rq,
|
||||
hawk_ooch_t ec,
|
||||
hawk_oocs_t* tok
|
||||
);
|
||||
|
||||
int hawk_rtx_matchvalwithucs (
|
||||
hawk_rtx_t* rtx, hawk_val_t* val,
|
||||
|
237
hawk/lib/misc.c
237
hawk/lib/misc.c
@ -26,221 +26,32 @@
|
||||
|
||||
#include "hawk-prv.h"
|
||||
|
||||
hawk_ooch_t* hawk_rtx_strtok (
|
||||
hawk_rtx_t* rtx, const hawk_ooch_t* s,
|
||||
const hawk_ooch_t* delim, hawk_oocs_t* tok)
|
||||
{
|
||||
return hawk_rtx_strxntok(rtx, s, hawk_count_oocstr(s), delim, hawk_count_oocstr(delim), tok);
|
||||
}
|
||||
|
||||
hawk_ooch_t* hawk_rtx_strxtok (
|
||||
hawk_rtx_t* rtx, const hawk_ooch_t* s, hawk_oow_t len,
|
||||
const hawk_ooch_t* delim, hawk_oocs_t* tok)
|
||||
{
|
||||
return hawk_rtx_strxntok(rtx, s, len, delim, hawk_count_oocstr(delim), tok);
|
||||
}
|
||||
#undef char_t
|
||||
#undef xcs_t
|
||||
#undef is_xch_space
|
||||
#undef tokenize_xchars
|
||||
#undef split_xchars_to_fields
|
||||
#define char_t hawk_bch_t
|
||||
#define xcs_t hawk_bcs_t
|
||||
#define is_xch_space hawk_is_bch_space
|
||||
#define tokenize_xchars hawk_rtx_tokbcharswithbchars
|
||||
#define split_xchars_to_fields hawk_rtx_fldbchars
|
||||
#include "misc-imp.h"
|
||||
|
||||
hawk_ooch_t* hawk_rtx_strntok (
|
||||
hawk_rtx_t* rtx, const hawk_ooch_t* s,
|
||||
const hawk_ooch_t* delim, hawk_oow_t delim_len,
|
||||
hawk_oocs_t* tok)
|
||||
{
|
||||
return hawk_rtx_strxntok(rtx, s, hawk_count_oocstr(s), delim, delim_len, tok);
|
||||
}
|
||||
#undef char_t
|
||||
#undef xcs_t
|
||||
#undef is_xch_space
|
||||
#undef tokenize_xchars
|
||||
#undef split_xchars_to_fields
|
||||
#define char_t hawk_uch_t
|
||||
#define xcs_t hawk_ucs_t
|
||||
#define is_xch_space hawk_is_uch_space
|
||||
#define tokenize_xchars hawk_rtx_tokucharswithuchars
|
||||
#define split_xchars_to_fields hawk_rtx_flduchars
|
||||
#include "misc-imp.h"
|
||||
|
||||
hawk_ooch_t* hawk_rtx_strxntok (
|
||||
hawk_rtx_t* rtx, const hawk_ooch_t* s, hawk_oow_t len,
|
||||
const hawk_ooch_t* delim, hawk_oow_t delim_len, hawk_oocs_t* tok)
|
||||
{
|
||||
const hawk_ooch_t* p = s, *d;
|
||||
const hawk_ooch_t* end = s + len;
|
||||
const hawk_ooch_t* sp = HAWK_NULL, * ep = HAWK_NULL;
|
||||
const hawk_ooch_t* delim_end = delim + delim_len;
|
||||
hawk_ooch_t c;
|
||||
int delim_mode;
|
||||
|
||||
#define __DELIM_NULL 0
|
||||
#define __DELIM_EMPTY 1
|
||||
#define __DELIM_SPACES 2
|
||||
#define __DELIM_NOSPACES 3
|
||||
#define __DELIM_COMPOSITE 4
|
||||
if (delim == HAWK_NULL) delim_mode = __DELIM_NULL;
|
||||
else
|
||||
{
|
||||
delim_mode = __DELIM_EMPTY;
|
||||
|
||||
for (d = delim; d < delim_end; d++)
|
||||
{
|
||||
if (hawk_is_ooch_space(*d))
|
||||
{
|
||||
if (delim_mode == __DELIM_EMPTY)
|
||||
delim_mode = __DELIM_SPACES;
|
||||
else if (delim_mode == __DELIM_NOSPACES)
|
||||
{
|
||||
delim_mode = __DELIM_COMPOSITE;
|
||||
break;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if (delim_mode == __DELIM_EMPTY)
|
||||
delim_mode = __DELIM_NOSPACES;
|
||||
else if (delim_mode == __DELIM_SPACES)
|
||||
{
|
||||
delim_mode = __DELIM_COMPOSITE;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* TODO: verify the following statement... */
|
||||
if (delim_mode == __DELIM_SPACES &&
|
||||
delim_len == 1 &&
|
||||
delim[0] != HAWK_T(' ')) delim_mode = __DELIM_NOSPACES;
|
||||
}
|
||||
|
||||
if (delim_mode == __DELIM_NULL)
|
||||
{
|
||||
/* when HAWK_NULL is given as "delim", it trims off the
|
||||
* leading and trailing spaces characters off the source
|
||||
* string "s" eventually. */
|
||||
|
||||
while (p < end && hawk_is_ooch_space(*p)) p++;
|
||||
while (p < end)
|
||||
{
|
||||
c = *p;
|
||||
|
||||
if (!hawk_is_ooch_space(c))
|
||||
{
|
||||
if (sp == HAWK_NULL) sp = p;
|
||||
ep = p;
|
||||
}
|
||||
p++;
|
||||
}
|
||||
}
|
||||
else if (delim_mode == __DELIM_EMPTY)
|
||||
{
|
||||
/* each character in the source string "s" becomes a token. */
|
||||
if (p < end)
|
||||
{
|
||||
c = *p;
|
||||
sp = p;
|
||||
ep = p++;
|
||||
}
|
||||
}
|
||||
else if (delim_mode == __DELIM_SPACES)
|
||||
{
|
||||
/* each token is delimited by space characters. all leading
|
||||
* and trailing spaces are removed. */
|
||||
|
||||
while (p < end && hawk_is_ooch_space(*p)) p++;
|
||||
while (p < end)
|
||||
{
|
||||
c = *p;
|
||||
if (hawk_is_ooch_space(c)) break;
|
||||
if (sp == HAWK_NULL) sp = p;
|
||||
ep = p++;
|
||||
}
|
||||
while (p < end && hawk_is_ooch_space(*p)) p++;
|
||||
}
|
||||
else if (delim_mode == __DELIM_NOSPACES)
|
||||
{
|
||||
/* each token is delimited by one of charaters
|
||||
* in the delimeter set "delim". */
|
||||
if (rtx->gbl.ignorecase)
|
||||
{
|
||||
while (p < end)
|
||||
{
|
||||
c = hawk_to_ooch_upper(*p);
|
||||
for (d = delim; d < delim_end; d++)
|
||||
{
|
||||
if (c == hawk_to_ooch_upper(*d)) goto exit_loop;
|
||||
}
|
||||
|
||||
if (sp == HAWK_NULL) sp = p;
|
||||
ep = p++;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
while (p < end)
|
||||
{
|
||||
c = *p;
|
||||
for (d = delim; d < delim_end; d++)
|
||||
{
|
||||
if (c == *d) goto exit_loop;
|
||||
}
|
||||
|
||||
if (sp == HAWK_NULL) sp = p;
|
||||
ep = p++;
|
||||
}
|
||||
}
|
||||
}
|
||||
else /* if (delim_mode == __DELIM_COMPOSITE) */
|
||||
{
|
||||
/* each token is delimited by one of non-space charaters
|
||||
* in the delimeter set "delim". however, all space characters
|
||||
* surrounding the token are removed */
|
||||
while (p < end && hawk_is_ooch_space(*p)) p++;
|
||||
if (rtx->gbl.ignorecase)
|
||||
{
|
||||
while (p < end)
|
||||
{
|
||||
c = hawk_to_ooch_upper(*p);
|
||||
if (hawk_is_ooch_space(c))
|
||||
{
|
||||
p++;
|
||||
continue;
|
||||
}
|
||||
for (d = delim; d < delim_end; d++)
|
||||
{
|
||||
if (c == hawk_to_ooch_upper(*d))
|
||||
goto exit_loop;
|
||||
}
|
||||
if (sp == HAWK_NULL) sp = p;
|
||||
ep = p++;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
while (p < end)
|
||||
{
|
||||
c = *p;
|
||||
if (hawk_is_ooch_space(c))
|
||||
{
|
||||
p++;
|
||||
continue;
|
||||
}
|
||||
for (d = delim; d < delim_end; d++)
|
||||
{
|
||||
if (c == *d) goto exit_loop;
|
||||
}
|
||||
if (sp == HAWK_NULL) sp = p;
|
||||
ep = p++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
exit_loop:
|
||||
if (sp == HAWK_NULL)
|
||||
{
|
||||
tok->ptr = HAWK_NULL;
|
||||
tok->len = (hawk_oow_t)0;
|
||||
}
|
||||
else
|
||||
{
|
||||
tok->ptr = (hawk_ooch_t*)sp;
|
||||
tok->len = ep - sp + 1;
|
||||
}
|
||||
|
||||
/* if HAWK_NULL is returned, this function should not be called again */
|
||||
if (p >= end) return HAWK_NULL;
|
||||
if (delim_mode == __DELIM_EMPTY ||
|
||||
delim_mode == __DELIM_SPACES) return (hawk_ooch_t*)p;
|
||||
return (hawk_ooch_t*)++p;
|
||||
}
|
||||
|
||||
hawk_ooch_t* hawk_rtx_strxntokbyrex (
|
||||
hawk_ooch_t* hawk_rtx_tokoocharsbyrex (
|
||||
hawk_rtx_t* rtx,
|
||||
const hawk_ooch_t* str, hawk_oow_t len,
|
||||
const hawk_ooch_t* substr, hawk_oow_t sublen,
|
||||
@ -343,6 +154,7 @@ exit_loop:
|
||||
}
|
||||
}
|
||||
|
||||
#if 0
|
||||
hawk_ooch_t* hawk_rtx_strxnfld (
|
||||
hawk_rtx_t* rtx, hawk_ooch_t* str, hawk_oow_t len,
|
||||
hawk_ooch_t fs, hawk_ooch_t ec, hawk_ooch_t lq, hawk_ooch_t rq,
|
||||
@ -431,6 +243,7 @@ hawk_ooch_t* hawk_rtx_strxnfld (
|
||||
tok->len = xp - ts;
|
||||
return HAWK_NULL;
|
||||
}
|
||||
#endif
|
||||
|
||||
static int matchtre_ucs (hawk_tre_t* tre, int opt, const hawk_ucs_t* str, hawk_ucs_t* mat, hawk_ucs_t submat[9], hawk_gem_t* errgem)
|
||||
{
|
||||
|
@ -166,14 +166,14 @@ static int split_record (hawk_rtx_t* rtx, int prefer_number)
|
||||
switch (how)
|
||||
{
|
||||
case 0:
|
||||
p = hawk_rtx_strxntok (rtx, p, len, fs_ptr, fs_len, &tok);
|
||||
p = hawk_rtx_tokoocharswithoochars (rtx, p, len, fs_ptr, fs_len, &tok);
|
||||
break;
|
||||
|
||||
case 1:
|
||||
break;
|
||||
|
||||
default:
|
||||
p = hawk_rtx_strxntokbyrex(
|
||||
p = hawk_rtx_tokoocharsbyrex(
|
||||
rtx,
|
||||
HAWK_OOECS_PTR(&rtx->inrec.line),
|
||||
HAWK_OOECS_LEN(&rtx->inrec.line),
|
||||
@ -241,17 +241,17 @@ static int split_record (hawk_rtx_t* rtx, int prefer_number)
|
||||
{
|
||||
case 0:
|
||||
/* 1 character FS */
|
||||
p = hawk_rtx_strxntok(rtx, p, len, fs_ptr, fs_len, &tok);
|
||||
p = hawk_rtx_tokoocharswithoochars(rtx, p, len, fs_ptr, fs_len, &tok);
|
||||
break;
|
||||
|
||||
case 1:
|
||||
/* 5 character FS beginning with ? */
|
||||
p = hawk_rtx_strxnfld(rtx, p, len, fs_ptr[1], fs_ptr[2], fs_ptr[3], fs_ptr[4], &tok);
|
||||
p = hawk_rtx_fldoochars(rtx, p, len, fs_ptr[1], fs_ptr[2], fs_ptr[3], fs_ptr[4], &tok);
|
||||
break;
|
||||
|
||||
default:
|
||||
/* all other cases */
|
||||
p = hawk_rtx_strxntokbyrex(
|
||||
p = hawk_rtx_tokoocharsbyrex(
|
||||
rtx,
|
||||
HAWK_OOECS_PTR(&rtx->inrec.line),
|
||||
HAWK_OOECS_LEN(&rtx->inrec.line),
|
||||
|
@ -239,6 +239,9 @@ function main()
|
||||
ensure (sprintf(@b"%1.0s", "hello"), @b" ", @SCRIPTNAME, @SCRIPTLINE);
|
||||
ensure (sprintf(@b"%*.*s", 20, 0, "hello"), @b" ", @SCRIPTNAME, @SCRIPTLINE);
|
||||
ensure (sprintf(@b"%*.*s", 20, 2, "hello"), @b" he", @SCRIPTNAME, @SCRIPTLINE);
|
||||
|
||||
ensure (sprintf("%+d %d", 3, 4), "+3 4", @SCRIPTNAME, @SCRIPTLINE);
|
||||
ensure (sprintf(@b"%+d %d", 3, 4), @b"+3 4", @SCRIPTNAME, @SCRIPTLINE);
|
||||
}
|
||||
|
||||
{
|
||||
@ -251,6 +254,45 @@ function main()
|
||||
ensure (b, 1, @SCRIPTNAME, @SCRIPTLINE);
|
||||
}
|
||||
|
||||
|
||||
{
|
||||
ensure (substr(1000+"5000", 2) === "000", 1, @SCRIPTNAME, @SCRIPTLINE);
|
||||
ensure (substr(1000+"10000", 2) === "1000", 1, @SCRIPTNAME, @SCRIPTLINE);
|
||||
ensure (substr(1000+"5000", 2) === "000", 1, @SCRIPTNAME, @SCRIPTLINE);
|
||||
|
||||
ensure (substr("5000" + 1000, 2) === "000", 1, @SCRIPTNAME, @SCRIPTLINE);
|
||||
ensure (substr("10000" + 1000, 2) === "1000", 1, @SCRIPTNAME, @SCRIPTLINE);
|
||||
ensure (substr("5000" + 1000, 2) === "000", 1, @SCRIPTNAME, @SCRIPTLINE);
|
||||
|
||||
ensure (substr(@b"5000" + 1000, 2) === "000", 1, @SCRIPTNAME, @SCRIPTLINE);
|
||||
ensure (substr(@b"10000" + 1000, 2) === "1000", 1, @SCRIPTNAME, @SCRIPTLINE);
|
||||
ensure (substr(@b"5000" + 1000, 2) === "000", 1, @SCRIPTNAME, @SCRIPTLINE);
|
||||
|
||||
ensure (substr(@b"5000", 2) === @b"000", 1, @SCRIPTNAME, @SCRIPTLINE);
|
||||
ensure (substr(@b"10000", 2) === @b"0000", 1, @SCRIPTNAME, @SCRIPTLINE);
|
||||
ensure (substr(@b"5000", 2) === @b"000", 1, @SCRIPTNAME, @SCRIPTLINE);
|
||||
|
||||
ensure (substr(1000+5000, 2) === "000", 1, @SCRIPTNAME, @SCRIPTLINE);
|
||||
ensure (substr(1000+10000, 2) === "1000", 1, @SCRIPTNAME, @SCRIPTLINE);
|
||||
ensure (substr(1000+5000, 2) === "000", 1, @SCRIPTNAME, @SCRIPTLINE);
|
||||
}
|
||||
|
||||
|
||||
{
|
||||
@local a;
|
||||
ensure (split("Here===Is=Some=====Data", a, "=+"), 4, @SCRIPTNAME, @SCRIPTLINE);
|
||||
ensure (a[1] === "Here", 1, @SCRIPTNAME, @SCRIPTLINE);
|
||||
ensure (a[2] === "Is", 1, @SCRIPTNAME, @SCRIPTLINE);
|
||||
ensure (a[3] === "Some", 1, @SCRIPTNAME, @SCRIPTLINE);
|
||||
ensure (a[4] === "Data", 1, @SCRIPTNAME, @SCRIPTLINE);
|
||||
|
||||
ensure (split(@b"Here===Is=Some=====Data", a, @b"=+"), 4, @SCRIPTNAME, @SCRIPTLINE);
|
||||
ensure (a[1] === @b"Here", 1, @SCRIPTNAME, @SCRIPTLINE);
|
||||
ensure (a[2] === @b"Is", 1, @SCRIPTNAME, @SCRIPTLINE);
|
||||
ensure (a[3] === @b"Some", 1, @SCRIPTNAME, @SCRIPTLINE);
|
||||
ensure (a[4] === @b"Data", 1, @SCRIPTNAME, @SCRIPTLINE);
|
||||
}
|
||||
|
||||
print "SUCCESS";
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user