added code to preapre byte string support in split(). still long way to go

This commit is contained in:
hyung-hwan 2020-11-13 02:50:20 +00:00
parent afe27f4eec
commit 4a60654b49
6 changed files with 640 additions and 261 deletions

View File

@ -787,7 +787,200 @@ int hawk_fnc_substr (hawk_rtx_t* rtx, const hawk_fnc_info_t* fi)
return 0;
}
int hawk_fnc_split (hawk_rtx_t* rtx, const hawk_fnc_info_t* fi)
#if 0
static int split_mbs (hawk_rtx_t* rtx, const hawk_fnc_info_t* fi)
{
hawk_oow_t nargs;
hawk_val_t* a0, * a2, * t1, * t2;
hawk_val_type_t a2_vtype, t1_vtype;
hawk_bcs_t str;
hawk_bcs_t fs;
hawk_bch_t* fs_free = HAWK_NULL;
const hawk_bch_t* p;
hawk_oow_t str_left, org_len;
hawk_tre_t* fs_rex = HAWK_NULL;
hawk_tre_t* fs_rex_free = HAWK_NULL;
hawk_bcs_t tok;
hawk_int_t nflds;
int x;
str.ptr = HAWK_NULL;
str.len = 0;
nargs = hawk_rtx_getnargs(rtx);
HAWK_ASSERT (nargs >= 2 && nargs <= 3);
a0 = hawk_rtx_getarg(rtx, 0);
a2 = (nargs >= 3)? hawk_rtx_getarg(rtx, 2): HAWK_NULL;
str.ptr = hawk_rtx_getvalbcstr(rtx, a0, &str.len);
if (HAWK_UNLIKELY(!str.ptr)) goto oops;
if (!a2)
{
/* get the value from FS */
t1 = hawk_rtx_getgbl(rtx, HAWK_GBL_FS);
t1_vtype = HAWK_RTX_GETVALTYPE(rtx, t1);
if (t1_vtype == HAWK_VAL_NIL)
{
fs.ptr = " ";
fs.len = 1;
}
else if (t1_vtype == HAWK_VAL_MBS)
{
fs.ptr = ((hawk_val_mbs_t*)t1)->val.ptr;
fs.len = ((hawk_val_mbs_t*)t1)->val.len;
}
else
{
fs.ptr = hawk_rtx_valtobcstrdup(rtx, t1, &fs.len);
if (HAWK_UNLIKELY(!fs.ptr)) goto oops;
fs_free = (hawk_bch_t*)fs.ptr;
}
if (fs.len > 1) fs_rex = rtx->gbl.fs[rtx->gbl.ignorecase];
}
else
{
a2_vtype = HAWK_RTX_GETVALTYPE(rtx, a2);
if (a2_vtype == HAWK_VAL_REX)
{
/* the third parameter is a regular expression */
fs_rex = ((hawk_val_rex_t*)a2)->code[rtx->gbl.ignorecase];
/* make the loop below to take fs_rex by
* setting fs_len greater than 1*/
fs.ptr = HAWK_NULL;
fs.len = 2;
}
else
{
if (a2_vtype == HAWK_VAL_MBS)
{
fs.ptr = ((hawk_val_mbs_t*)a2)->val.ptr;
fs.len = ((hawk_val_mbs_t*)a2)->val.len;
}
else
{
fs.ptr = hawk_rtx_valtobcstrdup(rtx, a2, &fs.len);
if (fs.ptr == HAWK_NULL) goto oops;
fs_free = (hawk_bch_t*)fs.ptr;
}
if (fs.len > 1)
{
int x;
x = rtx->gbl.ignorecase?
hawk_rtx_buildrex(rtx, fs.ptr, fs.len, HAWK_NULL, &fs_rex):
hawk_rtx_buildrex(rtx, fs.ptr, fs.len, &fs_rex, HAWK_NULL);
if (x <= -1) goto oops;
fs_rex_free = fs_rex;
}
}
}
t1 = hawk_rtx_makearrval(rtx);
if (HAWK_UNLIKELY(!t1)) goto oops;
hawk_rtx_refupval (rtx, t1);
x = hawk_rtx_setrefval(rtx, (hawk_val_ref_t*)hawk_rtx_getarg(rtx, 1), t1);
hawk_rtx_refdownval (rtx, t1);
if (HAWK_UNLIKELY(x <= -1)) goto oops;
/* fill the map with actual values */
p = str.ptr; str_left = str.len; org_len = str.len;
nflds = 0;
while (p)
{
hawk_bch_t key_buf[HAWK_SIZEOF(hawk_int_t)*8+2];
hawk_oow_t key_len;
if (fs.len <= 1)
{
p = hawk_rtx_tokoocharswithoochars(rtx, p, str.len, fs.ptr, fs.len, &tok);
}
else
{
p = hawk_rtx_tokoocharsbyrex(rtx, str.ptr, org_len, p, str.len, fs_rex, &tok);
if (p == HAWK_NULL && hawk_rtx_geterrnum(rtx) != HAWK_ENOERR)
{
goto oops;
}
}
if (nflds == 0 && p == HAWK_NULL && tok.len == 0)
{
/* no field at all*/
break;
}
HAWK_ASSERT ((tok.ptr != HAWK_NULL && tok.len > 0) || tok.len == 0);
/* create the field string - however, the split function must
* create a numeric value if the string is a number */
/*t2 = hawk_rtx_makembsvalwithbcs (rtx, &tok);*/
/*t2 = hawk_rtx_makenmbsvalwithbcs(rtx, &tok); */
t2 = hawk_rtx_makenumormbsvalwithbchars(rtx, tok.ptr, tok.len);
if (HAWK_UNLIKELY(!t2)) goto oops;
/* put it into the map */
key_len = hawk_int_to_oocstr(++nflds, 10, HAWK_NULL, key_buf, HAWK_COUNTOF(key_buf));
HAWK_ASSERT (key_len != (hawk_oow_t)-1);
if (hawk_rtx_setarrvalfld(rtx, t1, key_buf, key_len, t2) == HAWK_NULL)
{
hawk_rtx_refupval (rtx, t2);
hawk_rtx_refdownval (rtx, t2);
goto oops;
}
str.len = str_left - (p - str.ptr);
}
/*if (str_free) hawk_rtx_freemem (rtx, str_free);*/
hawk_rtx_freevalbcstr (rtx, a0, str.ptr);
if (fs_free) hawk_rtx_freemem (rtx, fs_free);
if (fs_rex_free)
{
if (rtx->gbl.ignorecase)
hawk_rtx_freerex (rtx, HAWK_NULL, fs_rex_free);
else
hawk_rtx_freerex (rtx, fs_rex_free, HAWK_NULL);
}
/*nflds--;*/
t1 = hawk_rtx_makeintval(rtx, nflds);
if (HAWK_UNLIKELY(!t1)) return -1;
hawk_rtx_setretval (rtx, t1);
return 0;
oops:
if (str.ptr) hawk_rtx_freevalbcstr (rtx, a0, str.ptr);
if (fs_free) hawk_rtx_freemem (rtx, fs_free);
if (fs_rex_free)
{
if (rtx->gbl.ignorecase)
hawk_rtx_freerex (rtx, HAWK_NULL, fs_rex_free);
else
hawk_rtx_freerex (rtx, fs_rex_free, HAWK_NULL);
}
return -1;
}
#endif
static int fnc_split (hawk_rtx_t* rtx, const hawk_fnc_info_t* fi, int use_array)
{
hawk_oow_t nargs;
hawk_val_t* a0, * a2, * t1, * t2;
@ -817,7 +1010,7 @@ int hawk_fnc_split (hawk_rtx_t* rtx, const hawk_fnc_info_t* fi)
str.ptr = hawk_rtx_getvaloocstr(rtx, a0, &str.len);
if (HAWK_UNLIKELY(!str.ptr)) goto oops;
if (a2 == HAWK_NULL)
if (!a2)
{
/* get the value from FS */
t1 = hawk_rtx_getgbl(rtx, HAWK_GBL_FS);
@ -835,7 +1028,7 @@ int hawk_fnc_split (hawk_rtx_t* rtx, const hawk_fnc_info_t* fi)
else
{
fs.ptr = hawk_rtx_valtooocstrdup(rtx, t1, &fs.len);
if (fs.ptr == HAWK_NULL) goto oops;
if (HAWK_UNLIKELY(!fs.ptr)) goto oops;
fs_free = (hawk_ooch_t*)fs.ptr;
}
@ -883,7 +1076,7 @@ int hawk_fnc_split (hawk_rtx_t* rtx, const hawk_fnc_info_t* fi)
}
}
t1 = hawk_rtx_makemapval(rtx);
t1 = use_array? hawk_rtx_makearrval(rtx, 16): hawk_rtx_makemapval(rtx);
if (HAWK_UNLIKELY(!t1)) goto oops;
hawk_rtx_refupval (rtx, t1);
@ -902,11 +1095,11 @@ int hawk_fnc_split (hawk_rtx_t* rtx, const hawk_fnc_info_t* fi)
if (fs.len <= 1)
{
p = hawk_rtx_strxntok(rtx, p, str.len, fs.ptr, fs.len, &tok);
p = hawk_rtx_tokoocharswithoochars(rtx, p, str.len, fs.ptr, fs.len, &tok);
}
else
{
p = hawk_rtx_strxntokbyrex(rtx, str.ptr, org_len, p, str.len, fs_rex, &tok);
p = hawk_rtx_tokoocharsbyrex(rtx, str.ptr, org_len, p, str.len, fs_rex, &tok);
if (p == HAWK_NULL && hawk_rtx_geterrnum(rtx) != HAWK_ENOERR)
{
goto oops;
@ -928,15 +1121,27 @@ int hawk_fnc_split (hawk_rtx_t* rtx, const hawk_fnc_info_t* fi)
t2 = hawk_rtx_makenumorstrvalwithoochars(rtx, tok.ptr, tok.len);
if (HAWK_UNLIKELY(!t2)) goto oops;
/* put it into the map */
key_len = hawk_int_to_oocstr(++nflds, 10, HAWK_NULL, key_buf, HAWK_COUNTOF(key_buf));
HAWK_ASSERT (key_len != (hawk_oow_t)-1);
if (hawk_rtx_setmapvalfld(rtx, t1, key_buf, key_len, t2) == HAWK_NULL)
if (use_array)
{
hawk_rtx_refupval (rtx, t2);
hawk_rtx_refdownval (rtx, t2);
goto oops;
if (hawk_rtx_setarrvalfld(rtx, t1, ++nflds, t2) == HAWK_NULL)
{
hawk_rtx_refupval (rtx, t2);
hawk_rtx_refdownval (rtx, t2);
goto oops;
}
}
else
{
/* put it into the map */
key_len = hawk_int_to_oocstr(++nflds, 10, HAWK_NULL, key_buf, HAWK_COUNTOF(key_buf));
HAWK_ASSERT (key_len != (hawk_oow_t)-1);
if (hawk_rtx_setmapvalfld(rtx, t1, key_buf, key_len, t2) == HAWK_NULL)
{
hawk_rtx_refupval (rtx, t2);
hawk_rtx_refdownval (rtx, t2);
goto oops;
}
}
str.len = str_left - (p - str.ptr);
@ -958,7 +1163,7 @@ int hawk_fnc_split (hawk_rtx_t* rtx, const hawk_fnc_info_t* fi)
/*nflds--;*/
t1 = hawk_rtx_makeintval (rtx, nflds);
if (t1 == HAWK_NULL) return -1;
if (HAWK_UNLIKELY(!t1)) return -1;
hawk_rtx_setretval (rtx, t1);
return 0;
@ -978,6 +1183,11 @@ oops:
return -1;
}
int hawk_fnc_split (hawk_rtx_t* rtx, const hawk_fnc_info_t* fi)
{
return fnc_split(rtx, fi, 1);
}
int hawk_fnc_tolower (hawk_rtx_t* rtx, const hawk_fnc_info_t* fi)
{
hawk_oow_t i;

301
hawk/lib/misc-imp.h Normal file
View File

@ -0,0 +1,301 @@
/*
* $Id$
*
Copyright (c) 2006-2020 Chung, Hyung-Hwan. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE AUTHOR "AS IS" AND ANY EXPRESS OR
IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
char_t* tokenize_xchars (hawk_rtx_t* rtx, const char_t* s, hawk_oow_t len, const char_t* delim, hawk_oow_t delim_len, xcs_t* tok)
{
const char_t* p = s, *d;
const char_t* end = s + len;
const char_t* sp = HAWK_NULL, * ep = HAWK_NULL;
const char_t* delim_end = delim + delim_len;
char_t c;
int delim_mode;
#define __DELIM_NULL 0
#define __DELIM_EMPTY 1
#define __DELIM_SPACES 2
#define __DELIM_NOSPACES 3
#define __DELIM_COMPOSITE 4
if (delim == HAWK_NULL) delim_mode = __DELIM_NULL;
else
{
delim_mode = __DELIM_EMPTY;
for (d = delim; d < delim_end; d++)
{
if (is_xch_space(*d))
{
if (delim_mode == __DELIM_EMPTY)
delim_mode = __DELIM_SPACES;
else if (delim_mode == __DELIM_NOSPACES)
{
delim_mode = __DELIM_COMPOSITE;
break;
}
}
else
{
if (delim_mode == __DELIM_EMPTY)
delim_mode = __DELIM_NOSPACES;
else if (delim_mode == __DELIM_SPACES)
{
delim_mode = __DELIM_COMPOSITE;
break;
}
}
}
/* TODO: verify the following statement... */
if (delim_mode == __DELIM_SPACES &&
delim_len == 1 &&
delim[0] != ' ') delim_mode = __DELIM_NOSPACES;
}
if (delim_mode == __DELIM_NULL)
{
/* when HAWK_NULL is given as "delim", it trims off the
* leading and trailing spaces characters off the source
* string "s" eventually. */
while (p < end && is_xch_space(*p)) p++;
while (p < end)
{
c = *p;
if (!is_xch_space(c))
{
if (sp == HAWK_NULL) sp = p;
ep = p;
}
p++;
}
}
else if (delim_mode == __DELIM_EMPTY)
{
/* each character in the source string "s" becomes a token. */
if (p < end)
{
c = *p;
sp = p;
ep = p++;
}
}
else if (delim_mode == __DELIM_SPACES)
{
/* each token is delimited by space characters. all leading
* and trailing spaces are removed. */
while (p < end && is_xch_space(*p)) p++;
while (p < end)
{
c = *p;
if (is_xch_space(c)) break;
if (sp == HAWK_NULL) sp = p;
ep = p++;
}
while (p < end && is_xch_space(*p)) p++;
}
else if (delim_mode == __DELIM_NOSPACES)
{
/* each token is delimited by one of charaters
* in the delimeter set "delim". */
if (rtx->gbl.ignorecase)
{
while (p < end)
{
c = hawk_to_ooch_upper(*p);
for (d = delim; d < delim_end; d++)
{
if (c == hawk_to_ooch_upper(*d)) goto exit_loop;
}
if (sp == HAWK_NULL) sp = p;
ep = p++;
}
}
else
{
while (p < end)
{
c = *p;
for (d = delim; d < delim_end; d++)
{
if (c == *d) goto exit_loop;
}
if (sp == HAWK_NULL) sp = p;
ep = p++;
}
}
}
else /* if (delim_mode == __DELIM_COMPOSITE) */
{
/* each token is delimited by one of non-space charaters
* in the delimeter set "delim". however, all space characters
* surrounding the token are removed */
while (p < end && is_xch_space(*p)) p++;
if (rtx->gbl.ignorecase)
{
while (p < end)
{
c = hawk_to_ooch_upper(*p);
if (is_xch_space(c))
{
p++;
continue;
}
for (d = delim; d < delim_end; d++)
{
if (c == hawk_to_ooch_upper(*d))
goto exit_loop;
}
if (sp == HAWK_NULL) sp = p;
ep = p++;
}
}
else
{
while (p < end)
{
c = *p;
if (is_xch_space(c))
{
p++;
continue;
}
for (d = delim; d < delim_end; d++)
{
if (c == *d) goto exit_loop;
}
if (sp == HAWK_NULL) sp = p;
ep = p++;
}
}
}
exit_loop:
if (sp == HAWK_NULL)
{
tok->ptr = HAWK_NULL;
tok->len = (hawk_oow_t)0;
}
else
{
tok->ptr = (char_t*)sp;
tok->len = ep - sp + 1;
}
/* if HAWK_NULL is returned, this function should not be called again */
if (p >= end) return HAWK_NULL;
if (delim_mode == __DELIM_EMPTY ||
delim_mode == __DELIM_SPACES) return (char_t*)p;
return (char_t*)++p;
}
char_t* split_xchars_to_fields (hawk_rtx_t* rtx, char_t* str, hawk_oow_t len, char_t fs, char_t ec, char_t lq, char_t rq, xcs_t* tok)
{
char_t* p = str;
char_t* end = str + len;
int escaped = 0, quoted = 0;
char_t* ts; /* token start */
char_t* tp; /* points to one char past the last token char */
char_t* xp; /* points to one char past the last effective char */
/* skip leading spaces */
while (p < end && is_xch_space(*p)) p++;
/* initialize token pointers */
ts = tp = xp = p;
while (p < end)
{
char c = *p;
if (escaped)
{
*tp++ = c; xp = tp; p++;
escaped = 0;
}
else
{
if (c == ec)
{
escaped = 1;
p++;
}
else if (quoted)
{
if (c == rq)
{
quoted = 0;
p++;
}
else
{
*tp++ = c; xp = tp; p++;
}
}
else
{
if (c == fs)
{
tok->ptr = ts;
tok->len = xp - ts;
p++;
if (is_xch_space(fs))
{
while (p < end && *p == fs) p++;
if (p >= end) return HAWK_NULL;
}
return p;
}
if (c == lq)
{
quoted = 1;
p++;
}
else
{
*tp++ = c; p++;
if (!is_xch_space(c)) xp = tp;
}
}
}
}
if (escaped)
{
/* if it is still escaped, the last character must be
* the escaper itself. treat it as a normal character */
*xp++ = ec;
}
tok->ptr = ts;
tok->len = xp - ts;
return HAWK_NULL;
}

View File

@ -33,23 +33,46 @@
extern "C" {
#endif
hawk_ooch_t* hawk_rtx_strtok (
hawk_rtx_t* rtx, const hawk_ooch_t* s,
const hawk_ooch_t* delim, hawk_oocs_t* tok);
hawk_uch_t* hawk_rtx_flduchars (
hawk_rtx_t* rtx,
hawk_uch_t* str,
hawk_oow_t len,
hawk_uch_t fs,
hawk_uch_t lq,
hawk_uch_t rq,
hawk_uch_t ec,
hawk_ucs_t* tok
);
hawk_ooch_t* hawk_rtx_strxtok (
hawk_rtx_t* rtx, const hawk_ooch_t* s, hawk_oow_t len,
const hawk_ooch_t* delim, hawk_oocs_t* tok);
hawk_bch_t* hawk_rtx_fldbchars (
hawk_rtx_t* rtx,
hawk_bch_t* str,
hawk_oow_t len,
hawk_bch_t fs,
hawk_bch_t lq,
hawk_bch_t rq,
hawk_bch_t ec,
hawk_bcs_t* tok
);
hawk_ooch_t* hawk_rtx_strntok (
hawk_rtx_t* rtx, const hawk_ooch_t* s,
const hawk_ooch_t* delim, hawk_oow_t delim_len, hawk_oocs_t* tok);
hawk_uch_t* hawk_rtx_tokucharswithuchars (
hawk_rtx_t* rtx, const hawk_uch_t* s, hawk_oow_t len,
const hawk_uch_t* delim, hawk_oow_t delim_len, hawk_ucs_t* tok);
hawk_ooch_t* hawk_rtx_strxntok (
hawk_rtx_t* rtx, const hawk_ooch_t* s, hawk_oow_t len,
const hawk_ooch_t* delim, hawk_oow_t delim_len, hawk_oocs_t* tok);
hawk_bch_t* hawk_rtx_tokbcharswithbchars (
hawk_rtx_t* rtx, const hawk_bch_t* s, hawk_oow_t len,
const hawk_bch_t* delim, hawk_oow_t delim_len, hawk_bcs_t* tok);
hawk_ooch_t* hawk_rtx_strxntokbyrex (
#if defined(HAWK_OOCH_IS_UCH)
# define hawk_rtx_fldoochars hawk_rtx_flduchars
# define hawk_rtx_tokoocharswithoochars hawk_rtx_tokucharswithuchars
#else
# define hawk_rtx_fldoochars hawk_rtx_fldbchars
# define hawk_rtx_tokoocharswithoochars hawk_rtx_tokbcharswithbchars
#endif
hawk_ooch_t* hawk_rtx_tokoocharsbyrex (
hawk_rtx_t* rtx,
const hawk_ooch_t* str,
hawk_oow_t len,
@ -59,16 +82,6 @@ hawk_ooch_t* hawk_rtx_strxntokbyrex (
hawk_oocs_t* tok
);
hawk_ooch_t* hawk_rtx_strxnfld (
hawk_rtx_t* rtx,
hawk_ooch_t* str,
hawk_oow_t len,
hawk_ooch_t fs,
hawk_ooch_t lq,
hawk_ooch_t rq,
hawk_ooch_t ec,
hawk_oocs_t* tok
);
int hawk_rtx_matchvalwithucs (
hawk_rtx_t* rtx, hawk_val_t* val,

View File

@ -26,221 +26,32 @@
#include "hawk-prv.h"
hawk_ooch_t* hawk_rtx_strtok (
hawk_rtx_t* rtx, const hawk_ooch_t* s,
const hawk_ooch_t* delim, hawk_oocs_t* tok)
{
return hawk_rtx_strxntok(rtx, s, hawk_count_oocstr(s), delim, hawk_count_oocstr(delim), tok);
}
hawk_ooch_t* hawk_rtx_strxtok (
hawk_rtx_t* rtx, const hawk_ooch_t* s, hawk_oow_t len,
const hawk_ooch_t* delim, hawk_oocs_t* tok)
{
return hawk_rtx_strxntok(rtx, s, len, delim, hawk_count_oocstr(delim), tok);
}
#undef char_t
#undef xcs_t
#undef is_xch_space
#undef tokenize_xchars
#undef split_xchars_to_fields
#define char_t hawk_bch_t
#define xcs_t hawk_bcs_t
#define is_xch_space hawk_is_bch_space
#define tokenize_xchars hawk_rtx_tokbcharswithbchars
#define split_xchars_to_fields hawk_rtx_fldbchars
#include "misc-imp.h"
hawk_ooch_t* hawk_rtx_strntok (
hawk_rtx_t* rtx, const hawk_ooch_t* s,
const hawk_ooch_t* delim, hawk_oow_t delim_len,
hawk_oocs_t* tok)
{
return hawk_rtx_strxntok(rtx, s, hawk_count_oocstr(s), delim, delim_len, tok);
}
#undef char_t
#undef xcs_t
#undef is_xch_space
#undef tokenize_xchars
#undef split_xchars_to_fields
#define char_t hawk_uch_t
#define xcs_t hawk_ucs_t
#define is_xch_space hawk_is_uch_space
#define tokenize_xchars hawk_rtx_tokucharswithuchars
#define split_xchars_to_fields hawk_rtx_flduchars
#include "misc-imp.h"
hawk_ooch_t* hawk_rtx_strxntok (
hawk_rtx_t* rtx, const hawk_ooch_t* s, hawk_oow_t len,
const hawk_ooch_t* delim, hawk_oow_t delim_len, hawk_oocs_t* tok)
{
const hawk_ooch_t* p = s, *d;
const hawk_ooch_t* end = s + len;
const hawk_ooch_t* sp = HAWK_NULL, * ep = HAWK_NULL;
const hawk_ooch_t* delim_end = delim + delim_len;
hawk_ooch_t c;
int delim_mode;
#define __DELIM_NULL 0
#define __DELIM_EMPTY 1
#define __DELIM_SPACES 2
#define __DELIM_NOSPACES 3
#define __DELIM_COMPOSITE 4
if (delim == HAWK_NULL) delim_mode = __DELIM_NULL;
else
{
delim_mode = __DELIM_EMPTY;
for (d = delim; d < delim_end; d++)
{
if (hawk_is_ooch_space(*d))
{
if (delim_mode == __DELIM_EMPTY)
delim_mode = __DELIM_SPACES;
else if (delim_mode == __DELIM_NOSPACES)
{
delim_mode = __DELIM_COMPOSITE;
break;
}
}
else
{
if (delim_mode == __DELIM_EMPTY)
delim_mode = __DELIM_NOSPACES;
else if (delim_mode == __DELIM_SPACES)
{
delim_mode = __DELIM_COMPOSITE;
break;
}
}
}
/* TODO: verify the following statement... */
if (delim_mode == __DELIM_SPACES &&
delim_len == 1 &&
delim[0] != HAWK_T(' ')) delim_mode = __DELIM_NOSPACES;
}
if (delim_mode == __DELIM_NULL)
{
/* when HAWK_NULL is given as "delim", it trims off the
* leading and trailing spaces characters off the source
* string "s" eventually. */
while (p < end && hawk_is_ooch_space(*p)) p++;
while (p < end)
{
c = *p;
if (!hawk_is_ooch_space(c))
{
if (sp == HAWK_NULL) sp = p;
ep = p;
}
p++;
}
}
else if (delim_mode == __DELIM_EMPTY)
{
/* each character in the source string "s" becomes a token. */
if (p < end)
{
c = *p;
sp = p;
ep = p++;
}
}
else if (delim_mode == __DELIM_SPACES)
{
/* each token is delimited by space characters. all leading
* and trailing spaces are removed. */
while (p < end && hawk_is_ooch_space(*p)) p++;
while (p < end)
{
c = *p;
if (hawk_is_ooch_space(c)) break;
if (sp == HAWK_NULL) sp = p;
ep = p++;
}
while (p < end && hawk_is_ooch_space(*p)) p++;
}
else if (delim_mode == __DELIM_NOSPACES)
{
/* each token is delimited by one of charaters
* in the delimeter set "delim". */
if (rtx->gbl.ignorecase)
{
while (p < end)
{
c = hawk_to_ooch_upper(*p);
for (d = delim; d < delim_end; d++)
{
if (c == hawk_to_ooch_upper(*d)) goto exit_loop;
}
if (sp == HAWK_NULL) sp = p;
ep = p++;
}
}
else
{
while (p < end)
{
c = *p;
for (d = delim; d < delim_end; d++)
{
if (c == *d) goto exit_loop;
}
if (sp == HAWK_NULL) sp = p;
ep = p++;
}
}
}
else /* if (delim_mode == __DELIM_COMPOSITE) */
{
/* each token is delimited by one of non-space charaters
* in the delimeter set "delim". however, all space characters
* surrounding the token are removed */
while (p < end && hawk_is_ooch_space(*p)) p++;
if (rtx->gbl.ignorecase)
{
while (p < end)
{
c = hawk_to_ooch_upper(*p);
if (hawk_is_ooch_space(c))
{
p++;
continue;
}
for (d = delim; d < delim_end; d++)
{
if (c == hawk_to_ooch_upper(*d))
goto exit_loop;
}
if (sp == HAWK_NULL) sp = p;
ep = p++;
}
}
else
{
while (p < end)
{
c = *p;
if (hawk_is_ooch_space(c))
{
p++;
continue;
}
for (d = delim; d < delim_end; d++)
{
if (c == *d) goto exit_loop;
}
if (sp == HAWK_NULL) sp = p;
ep = p++;
}
}
}
exit_loop:
if (sp == HAWK_NULL)
{
tok->ptr = HAWK_NULL;
tok->len = (hawk_oow_t)0;
}
else
{
tok->ptr = (hawk_ooch_t*)sp;
tok->len = ep - sp + 1;
}
/* if HAWK_NULL is returned, this function should not be called again */
if (p >= end) return HAWK_NULL;
if (delim_mode == __DELIM_EMPTY ||
delim_mode == __DELIM_SPACES) return (hawk_ooch_t*)p;
return (hawk_ooch_t*)++p;
}
hawk_ooch_t* hawk_rtx_strxntokbyrex (
hawk_ooch_t* hawk_rtx_tokoocharsbyrex (
hawk_rtx_t* rtx,
const hawk_ooch_t* str, hawk_oow_t len,
const hawk_ooch_t* substr, hawk_oow_t sublen,
@ -343,6 +154,7 @@ exit_loop:
}
}
#if 0
hawk_ooch_t* hawk_rtx_strxnfld (
hawk_rtx_t* rtx, hawk_ooch_t* str, hawk_oow_t len,
hawk_ooch_t fs, hawk_ooch_t ec, hawk_ooch_t lq, hawk_ooch_t rq,
@ -431,6 +243,7 @@ hawk_ooch_t* hawk_rtx_strxnfld (
tok->len = xp - ts;
return HAWK_NULL;
}
#endif
static int matchtre_ucs (hawk_tre_t* tre, int opt, const hawk_ucs_t* str, hawk_ucs_t* mat, hawk_ucs_t submat[9], hawk_gem_t* errgem)
{

View File

@ -166,14 +166,14 @@ static int split_record (hawk_rtx_t* rtx, int prefer_number)
switch (how)
{
case 0:
p = hawk_rtx_strxntok (rtx, p, len, fs_ptr, fs_len, &tok);
p = hawk_rtx_tokoocharswithoochars (rtx, p, len, fs_ptr, fs_len, &tok);
break;
case 1:
break;
default:
p = hawk_rtx_strxntokbyrex(
p = hawk_rtx_tokoocharsbyrex(
rtx,
HAWK_OOECS_PTR(&rtx->inrec.line),
HAWK_OOECS_LEN(&rtx->inrec.line),
@ -241,17 +241,17 @@ static int split_record (hawk_rtx_t* rtx, int prefer_number)
{
case 0:
/* 1 character FS */
p = hawk_rtx_strxntok(rtx, p, len, fs_ptr, fs_len, &tok);
p = hawk_rtx_tokoocharswithoochars(rtx, p, len, fs_ptr, fs_len, &tok);
break;
case 1:
/* 5 character FS beginning with ? */
p = hawk_rtx_strxnfld(rtx, p, len, fs_ptr[1], fs_ptr[2], fs_ptr[3], fs_ptr[4], &tok);
p = hawk_rtx_fldoochars(rtx, p, len, fs_ptr[1], fs_ptr[2], fs_ptr[3], fs_ptr[4], &tok);
break;
default:
/* all other cases */
p = hawk_rtx_strxntokbyrex(
p = hawk_rtx_tokoocharsbyrex(
rtx,
HAWK_OOECS_PTR(&rtx->inrec.line),
HAWK_OOECS_LEN(&rtx->inrec.line),

View File

@ -233,12 +233,15 @@ function main()
ensure (sprintf("%*.*s", 20, 0, "hello"), " ", @SCRIPTNAME, @SCRIPTLINE);
ensure (sprintf("%*.*s", 20, 2, "hello"), " he", @SCRIPTNAME, @SCRIPTLINE);
ensure (sprintf(@b"%0s", "hello"), @b"hello", @SCRIPTNAME, @SCRIPTLINE);
ensure (sprintf(@b"%.0s", "hello"), @b"", @SCRIPTNAME, @SCRIPTLINE);
ensure (sprintf(@b"%0.0s", "hello"), @b"", @SCRIPTNAME, @SCRIPTLINE);
ensure (sprintf(@b"%1.0s", "hello"), @b" ", @SCRIPTNAME, @SCRIPTLINE);
ensure (sprintf(@b"%*.*s", 20, 0, "hello"), @b" ", @SCRIPTNAME, @SCRIPTLINE);
ensure (sprintf(@b"%*.*s", 20, 2, "hello"), @b" he", @SCRIPTNAME, @SCRIPTLINE);
ensure (sprintf(@b"%0s", "hello"), @b"hello", @SCRIPTNAME, @SCRIPTLINE);
ensure (sprintf(@b"%.0s", "hello"), @b"", @SCRIPTNAME, @SCRIPTLINE);
ensure (sprintf(@b"%0.0s", "hello"), @b"", @SCRIPTNAME, @SCRIPTLINE);
ensure (sprintf(@b"%1.0s", "hello"), @b" ", @SCRIPTNAME, @SCRIPTLINE);
ensure (sprintf(@b"%*.*s", 20, 0, "hello"), @b" ", @SCRIPTNAME, @SCRIPTLINE);
ensure (sprintf(@b"%*.*s", 20, 2, "hello"), @b" he", @SCRIPTNAME, @SCRIPTLINE);
ensure (sprintf("%+d %d", 3, 4), "+3 4", @SCRIPTNAME, @SCRIPTLINE);
ensure (sprintf(@b"%+d %d", 3, 4), @b"+3 4", @SCRIPTNAME, @SCRIPTLINE);
}
{
@ -251,6 +254,45 @@ function main()
ensure (b, 1, @SCRIPTNAME, @SCRIPTLINE);
}
{
ensure (substr(1000+"5000", 2) === "000", 1, @SCRIPTNAME, @SCRIPTLINE);
ensure (substr(1000+"10000", 2) === "1000", 1, @SCRIPTNAME, @SCRIPTLINE);
ensure (substr(1000+"5000", 2) === "000", 1, @SCRIPTNAME, @SCRIPTLINE);
ensure (substr("5000" + 1000, 2) === "000", 1, @SCRIPTNAME, @SCRIPTLINE);
ensure (substr("10000" + 1000, 2) === "1000", 1, @SCRIPTNAME, @SCRIPTLINE);
ensure (substr("5000" + 1000, 2) === "000", 1, @SCRIPTNAME, @SCRIPTLINE);
ensure (substr(@b"5000" + 1000, 2) === "000", 1, @SCRIPTNAME, @SCRIPTLINE);
ensure (substr(@b"10000" + 1000, 2) === "1000", 1, @SCRIPTNAME, @SCRIPTLINE);
ensure (substr(@b"5000" + 1000, 2) === "000", 1, @SCRIPTNAME, @SCRIPTLINE);
ensure (substr(@b"5000", 2) === @b"000", 1, @SCRIPTNAME, @SCRIPTLINE);
ensure (substr(@b"10000", 2) === @b"0000", 1, @SCRIPTNAME, @SCRIPTLINE);
ensure (substr(@b"5000", 2) === @b"000", 1, @SCRIPTNAME, @SCRIPTLINE);
ensure (substr(1000+5000, 2) === "000", 1, @SCRIPTNAME, @SCRIPTLINE);
ensure (substr(1000+10000, 2) === "1000", 1, @SCRIPTNAME, @SCRIPTLINE);
ensure (substr(1000+5000, 2) === "000", 1, @SCRIPTNAME, @SCRIPTLINE);
}
{
@local a;
ensure (split("Here===Is=Some=====Data", a, "=+"), 4, @SCRIPTNAME, @SCRIPTLINE);
ensure (a[1] === "Here", 1, @SCRIPTNAME, @SCRIPTLINE);
ensure (a[2] === "Is", 1, @SCRIPTNAME, @SCRIPTLINE);
ensure (a[3] === "Some", 1, @SCRIPTNAME, @SCRIPTLINE);
ensure (a[4] === "Data", 1, @SCRIPTNAME, @SCRIPTLINE);
ensure (split(@b"Here===Is=Some=====Data", a, @b"=+"), 4, @SCRIPTNAME, @SCRIPTLINE);
ensure (a[1] === @b"Here", 1, @SCRIPTNAME, @SCRIPTLINE);
ensure (a[2] === @b"Is", 1, @SCRIPTNAME, @SCRIPTLINE);
ensure (a[3] === @b"Some", 1, @SCRIPTNAME, @SCRIPTLINE);
ensure (a[4] === @b"Data", 1, @SCRIPTNAME, @SCRIPTLINE);
}
print "SUCCESS";
}