This commit is contained in:
91
lib/fnc.c
91
lib/fnc.c
@ -861,21 +861,19 @@ static int fnc_split (hawk_rtx_t* rtx, const hawk_fnc_info_t* fi, int use_array)
|
||||
hawk_oocs_t tok;
|
||||
hawk_int_t nflds;
|
||||
int is_byte_str;
|
||||
int x, do_fld = 0;
|
||||
int switch_fs_to_bchr = 0;
|
||||
|
||||
str.ptr = HAWK_NULL;
|
||||
str.len = 0;
|
||||
int x;
|
||||
int do_fld;
|
||||
int switch_fs_to_bchr;
|
||||
|
||||
nargs = hawk_rtx_getnargs(rtx);
|
||||
HAWK_ASSERT (nargs >= 2 && nargs <= 3);
|
||||
|
||||
a0 = hawk_rtx_getarg(rtx, 0);
|
||||
a2 = (nargs >= 3)? hawk_rtx_getarg(rtx, 2): HAWK_NULL;
|
||||
|
||||
do_fld = 0;
|
||||
switch_fs_to_bchr = 0;
|
||||
is_byte_str = 0;
|
||||
str.ptr = HAWK_NULL;
|
||||
str.len = 0;
|
||||
is_byte_str = 0;
|
||||
|
||||
/* field seperator */
|
||||
t0 = a2? a2: hawk_rtx_getgbl(rtx, HAWK_GBL_FS); /* if a2 is not available, get the value from FS */
|
||||
@ -977,21 +975,25 @@ static int fnc_split (hawk_rtx_t* rtx, const hawk_fnc_info_t* fi, int use_array)
|
||||
{
|
||||
if (fs_rex)
|
||||
{
|
||||
p = is_byte_str? (hawk_ooch_t*)hawk_rtx_tokbcharsbyrex(rtx, (hawk_bch_t*)str.ptr, org_len, (hawk_bch_t*)p, str.len, fs_rex, (hawk_bcs_t*)&tok):
|
||||
hawk_rtx_tokoocharsbyrex(rtx, str.ptr, org_len, p, str.len, fs_rex, &tok);
|
||||
p = is_byte_str?
|
||||
(hawk_ooch_t*)hawk_rtx_tokbcharsbyrex(rtx, (hawk_bch_t*)str.ptr, org_len, (hawk_bch_t*)p, str.len, fs_rex, (hawk_bcs_t*)&tok):
|
||||
hawk_rtx_tokoocharsbyrex(rtx, str.ptr, org_len, p, str.len, fs_rex, &tok);
|
||||
if (p && hawk_rtx_geterrnum(rtx) != HAWK_ENOERR) goto oops;
|
||||
}
|
||||
else if (do_fld)
|
||||
{
|
||||
/* [NOTE] even if is_byte_str is true, the field seperator is of the ooch type.
|
||||
* there may be some data truncation and related issues */
|
||||
p = is_byte_str? (hawk_ooch_t*)hawk_rtx_fldbchars(rtx, (hawk_bch_t*)p, str.len, fs.ptr[1], fs.ptr[2], fs.ptr[3], fs.ptr[4], (hawk_bcs_t*)&tok):
|
||||
hawk_rtx_fldoochars(rtx, p, str.len, fs.ptr[1], fs.ptr[2], fs.ptr[3], fs.ptr[4], &tok);
|
||||
/* ? sep esc lq rq */
|
||||
p = is_byte_str?
|
||||
(hawk_ooch_t*)hawk_rtx_fldbchars(rtx, (hawk_bch_t*)p, str.len, (hawk_bchu_t)fs.ptr[1], (hawk_bchu_t)fs.ptr[2], (hawk_bchu_t)fs.ptr[3], (hawk_bchu_t)fs.ptr[4], (hawk_bcs_t*)&tok):
|
||||
hawk_rtx_fldoochars(rtx, p, str.len, fs.ptr[1], fs.ptr[2], fs.ptr[3], fs.ptr[4], &tok);
|
||||
}
|
||||
else
|
||||
{
|
||||
p = is_byte_str? (hawk_ooch_t*)hawk_rtx_tokbcharswithbchars(rtx, (hawk_bch_t*)p, str.len, (hawk_bch_t*)fs.ptr, fs.len, (hawk_bcs_t*)&tok):
|
||||
hawk_rtx_tokoocharswithoochars(rtx, p, str.len, fs.ptr, fs.len, &tok);
|
||||
p = is_byte_str?
|
||||
(hawk_ooch_t*)hawk_rtx_tokbcharswithbchars(rtx, (hawk_bch_t*)p, str.len, (hawk_bch_t*)fs.ptr, fs.len, (hawk_bcs_t*)&tok):
|
||||
hawk_rtx_tokoocharswithoochars(rtx, p, str.len, fs.ptr, fs.len, &tok);
|
||||
}
|
||||
|
||||
if (nflds == 0 && !p && tok.len == 0)
|
||||
@ -1311,7 +1313,6 @@ static int __substitute_oocs (hawk_rtx_t* rtx, hawk_oow_t* max_count, hawk_tre_t
|
||||
{
|
||||
m = hawk_ooecs_ccat(new, s1->ptr[i]);
|
||||
}
|
||||
|
||||
if (HAWK_UNLIKELY(m == (hawk_oow_t)-1)) goto oops;
|
||||
}
|
||||
|
||||
@ -1347,13 +1348,14 @@ static int __substitute_bcs (hawk_rtx_t* rtx, hawk_oow_t* max_count, hawk_tre_t*
|
||||
{
|
||||
hawk_bcs_t mat, pmat, cur;
|
||||
hawk_bcs_t submat[9];
|
||||
hawk_oow_t sub_count, match_limit;
|
||||
hawk_oow_t sub_count, op_count, match_limit;
|
||||
hawk_bch_t* s2_end;
|
||||
|
||||
s2_end = s2->ptr + s2->len;
|
||||
cur.ptr = s2->ptr;
|
||||
cur.len = s2->len;
|
||||
sub_count = 0;
|
||||
op_count = 0;
|
||||
match_limit = *max_count;
|
||||
|
||||
pmat.ptr = HAWK_NULL;
|
||||
@ -1389,44 +1391,51 @@ static int __substitute_bcs (hawk_rtx_t* rtx, hawk_oow_t* max_count, hawk_tre_t*
|
||||
|
||||
if (hawk_becs_ncat(new, cur.ptr, mat.ptr - cur.ptr) == (hawk_oow_t)-1) goto oops;
|
||||
|
||||
for (i = 0; i < s1->len; i++)
|
||||
if (extended && op_pos > 0 && (op_pos - 1) != op_count)
|
||||
{
|
||||
if (s1->ptr[i] == '\\' && (i + 1) < s1->len)
|
||||
/* don't perform substitution. just copy the original string to the output */
|
||||
if (hawk_becs_ncat(new, mat.ptr, mat.len) == (hawk_oow_t)-1) goto oops;
|
||||
}
|
||||
else
|
||||
{
|
||||
for (i = 0; i < s1->len; i++)
|
||||
{
|
||||
if (extended) /* for gensub */
|
||||
if (s1->ptr[i] == '\\' && (i + 1) < s1->len)
|
||||
{
|
||||
hawk_bch_t ic = s1->ptr[i + 1];
|
||||
if (ic == '0')
|
||||
if (extended) /* for gensub */
|
||||
{
|
||||
m = hawk_becs_ncat(new, mat.ptr, mat.len);
|
||||
hawk_bch_t ic = s1->ptr[i + 1];
|
||||
if (ic == '0')
|
||||
{
|
||||
m = hawk_becs_ncat(new, mat.ptr, mat.len);
|
||||
}
|
||||
else if (ic >= '1' && ic <= '9')
|
||||
{
|
||||
hawk_oow_t idx = (ic - '0') - 1;
|
||||
m = hawk_becs_ncat(new, submat[idx].ptr, submat[idx].len);
|
||||
}
|
||||
else goto escape;
|
||||
}
|
||||
else if (ic >= '1' && ic <= '9')
|
||||
else
|
||||
{
|
||||
hawk_oow_t idx = (ic - '0') - 1;
|
||||
m = hawk_becs_ncat(new, submat[idx].ptr, submat[idx].len);
|
||||
escape:
|
||||
m = hawk_becs_ccat(new, s1->ptr[i + 1]);
|
||||
}
|
||||
else goto escape;
|
||||
i++; /* skip the backslash */
|
||||
}
|
||||
else if (s1->ptr[i] == '&')
|
||||
{
|
||||
m = hawk_becs_ncat(new, mat.ptr, mat.len);
|
||||
}
|
||||
else
|
||||
{
|
||||
escape:
|
||||
m = hawk_becs_ccat(new, s1->ptr[i + 1]);
|
||||
m = hawk_becs_ccat(new, s1->ptr[i]);
|
||||
}
|
||||
i++; /* skip the backslash */
|
||||
if (HAWK_UNLIKELY(m == (hawk_oow_t)-1)) goto oops;
|
||||
}
|
||||
else if (s1->ptr[i] == '&')
|
||||
{
|
||||
m = hawk_becs_ncat(new, mat.ptr, mat.len);
|
||||
}
|
||||
else
|
||||
{
|
||||
m = hawk_becs_ccat(new, s1->ptr[i]);
|
||||
}
|
||||
|
||||
if (HAWK_UNLIKELY(m == (hawk_oow_t)-1)) goto oops;
|
||||
sub_count++;
|
||||
}
|
||||
|
||||
sub_count++;
|
||||
op_count++;
|
||||
cur.len = cur.len - ((mat.ptr - cur.ptr) + mat.len);
|
||||
cur.ptr = mat.ptr + mat.len;
|
||||
|
||||
|
@ -210,6 +210,8 @@ function main()
|
||||
tap_ensure (x, "the tiger-tiger pounces on the dog", @SCRIPTNAME, @SCRIPTLINE);
|
||||
x = gensub(/(tiger|dog)/, "\\1-\\1", 2, "the tiger pounces on the dog");
|
||||
tap_ensure (x, "the tiger pounces on the dog-dog", @SCRIPTNAME, @SCRIPTLINE);
|
||||
x = gensub(/(tiger|dog)/, "\\1-\\1", 2, @b"the tiger pounces on the dog");
|
||||
tap_ensure (x, @b"the tiger pounces on the dog-dog", @SCRIPTNAME, @SCRIPTLINE);
|
||||
|
||||
## 0 as the third argument is same as not passing "g"/"G" or a positive occurrence number.
|
||||
x = gensub(/(tiger|dog)/, "\\1-\\1", 0, "the tiger pounces on the dog");
|
||||
@ -220,6 +222,8 @@ function main()
|
||||
tap_ensure (x, "the tiger pounces on the dog", @SCRIPTNAME, @SCRIPTLINE);
|
||||
x = gensub(/(tiger|(dog))/, "\\1-\\2", 'g', "the tiger pounces on the dog");
|
||||
tap_ensure (x, "the tiger- pounces on the dog-dog", @SCRIPTNAME, @SCRIPTLINE);
|
||||
x = gensub(/(tiger|(dog))/, "\\1-\\2", 'g', @b"the tiger pounces on the dog");
|
||||
tap_ensure (x, @b"the tiger- pounces on the dog-dog", @SCRIPTNAME, @SCRIPTLINE);
|
||||
x = gensub(/(tiger|(dog))/, "[&]", 'g', "the tiger pounces on the dog");
|
||||
tap_ensure (x, "the [tiger] pounces on the [dog]", @SCRIPTNAME, @SCRIPTLINE);
|
||||
x = gensub(/(tiger|(dog))/, "[\\0]", 'g', "the tiger pounces on the dog");
|
||||
|
Reference in New Issue
Block a user