more code clean-up
All checks were successful
continuous-integration/drone/push Build is passing

This commit is contained in:
2025-08-31 17:58:12 +09:00
parent 8f1b8ffc03
commit 6aca30c893
3 changed files with 194 additions and 181 deletions

View File

@ -861,21 +861,19 @@ static int fnc_split (hawk_rtx_t* rtx, const hawk_fnc_info_t* fi, int use_array)
hawk_oocs_t tok; hawk_oocs_t tok;
hawk_int_t nflds; hawk_int_t nflds;
int is_byte_str; int is_byte_str;
int x, do_fld = 0; int x;
int switch_fs_to_bchr = 0; int do_fld;
int switch_fs_to_bchr;
str.ptr = HAWK_NULL;
str.len = 0;
nargs = hawk_rtx_getnargs(rtx); nargs = hawk_rtx_getnargs(rtx);
HAWK_ASSERT (nargs >= 2 && nargs <= 3);
a0 = hawk_rtx_getarg(rtx, 0); a0 = hawk_rtx_getarg(rtx, 0);
a2 = (nargs >= 3)? hawk_rtx_getarg(rtx, 2): HAWK_NULL; a2 = (nargs >= 3)? hawk_rtx_getarg(rtx, 2): HAWK_NULL;
do_fld = 0;
switch_fs_to_bchr = 0;
is_byte_str = 0;
str.ptr = HAWK_NULL; str.ptr = HAWK_NULL;
str.len = 0; str.len = 0;
is_byte_str = 0;
/* field seperator */ /* field seperator */
t0 = a2? a2: hawk_rtx_getgbl(rtx, HAWK_GBL_FS); /* if a2 is not available, get the value from FS */ t0 = a2? a2: hawk_rtx_getgbl(rtx, HAWK_GBL_FS); /* if a2 is not available, get the value from FS */
@ -977,21 +975,25 @@ static int fnc_split (hawk_rtx_t* rtx, const hawk_fnc_info_t* fi, int use_array)
{ {
if (fs_rex) if (fs_rex)
{ {
p = is_byte_str? (hawk_ooch_t*)hawk_rtx_tokbcharsbyrex(rtx, (hawk_bch_t*)str.ptr, org_len, (hawk_bch_t*)p, str.len, fs_rex, (hawk_bcs_t*)&tok): p = is_byte_str?
hawk_rtx_tokoocharsbyrex(rtx, str.ptr, org_len, p, str.len, fs_rex, &tok); (hawk_ooch_t*)hawk_rtx_tokbcharsbyrex(rtx, (hawk_bch_t*)str.ptr, org_len, (hawk_bch_t*)p, str.len, fs_rex, (hawk_bcs_t*)&tok):
hawk_rtx_tokoocharsbyrex(rtx, str.ptr, org_len, p, str.len, fs_rex, &tok);
if (p && hawk_rtx_geterrnum(rtx) != HAWK_ENOERR) goto oops; if (p && hawk_rtx_geterrnum(rtx) != HAWK_ENOERR) goto oops;
} }
else if (do_fld) else if (do_fld)
{ {
/* [NOTE] even if is_byte_str is true, the field seperator is of the ooch type. /* [NOTE] even if is_byte_str is true, the field seperator is of the ooch type.
* there may be some data truncation and related issues */ * there may be some data truncation and related issues */
p = is_byte_str? (hawk_ooch_t*)hawk_rtx_fldbchars(rtx, (hawk_bch_t*)p, str.len, fs.ptr[1], fs.ptr[2], fs.ptr[3], fs.ptr[4], (hawk_bcs_t*)&tok): /* ? sep esc lq rq */
hawk_rtx_fldoochars(rtx, p, str.len, fs.ptr[1], fs.ptr[2], fs.ptr[3], fs.ptr[4], &tok); p = is_byte_str?
(hawk_ooch_t*)hawk_rtx_fldbchars(rtx, (hawk_bch_t*)p, str.len, (hawk_bchu_t)fs.ptr[1], (hawk_bchu_t)fs.ptr[2], (hawk_bchu_t)fs.ptr[3], (hawk_bchu_t)fs.ptr[4], (hawk_bcs_t*)&tok):
hawk_rtx_fldoochars(rtx, p, str.len, fs.ptr[1], fs.ptr[2], fs.ptr[3], fs.ptr[4], &tok);
} }
else else
{ {
p = is_byte_str? (hawk_ooch_t*)hawk_rtx_tokbcharswithbchars(rtx, (hawk_bch_t*)p, str.len, (hawk_bch_t*)fs.ptr, fs.len, (hawk_bcs_t*)&tok): p = is_byte_str?
hawk_rtx_tokoocharswithoochars(rtx, p, str.len, fs.ptr, fs.len, &tok); (hawk_ooch_t*)hawk_rtx_tokbcharswithbchars(rtx, (hawk_bch_t*)p, str.len, (hawk_bch_t*)fs.ptr, fs.len, (hawk_bcs_t*)&tok):
hawk_rtx_tokoocharswithoochars(rtx, p, str.len, fs.ptr, fs.len, &tok);
} }
if (nflds == 0 && !p && tok.len == 0) if (nflds == 0 && !p && tok.len == 0)
@ -1311,7 +1313,6 @@ static int __substitute_oocs (hawk_rtx_t* rtx, hawk_oow_t* max_count, hawk_tre_t
{ {
m = hawk_ooecs_ccat(new, s1->ptr[i]); m = hawk_ooecs_ccat(new, s1->ptr[i]);
} }
if (HAWK_UNLIKELY(m == (hawk_oow_t)-1)) goto oops; if (HAWK_UNLIKELY(m == (hawk_oow_t)-1)) goto oops;
} }
@ -1347,13 +1348,14 @@ static int __substitute_bcs (hawk_rtx_t* rtx, hawk_oow_t* max_count, hawk_tre_t*
{ {
hawk_bcs_t mat, pmat, cur; hawk_bcs_t mat, pmat, cur;
hawk_bcs_t submat[9]; hawk_bcs_t submat[9];
hawk_oow_t sub_count, match_limit; hawk_oow_t sub_count, op_count, match_limit;
hawk_bch_t* s2_end; hawk_bch_t* s2_end;
s2_end = s2->ptr + s2->len; s2_end = s2->ptr + s2->len;
cur.ptr = s2->ptr; cur.ptr = s2->ptr;
cur.len = s2->len; cur.len = s2->len;
sub_count = 0; sub_count = 0;
op_count = 0;
match_limit = *max_count; match_limit = *max_count;
pmat.ptr = HAWK_NULL; pmat.ptr = HAWK_NULL;
@ -1389,44 +1391,51 @@ static int __substitute_bcs (hawk_rtx_t* rtx, hawk_oow_t* max_count, hawk_tre_t*
if (hawk_becs_ncat(new, cur.ptr, mat.ptr - cur.ptr) == (hawk_oow_t)-1) goto oops; if (hawk_becs_ncat(new, cur.ptr, mat.ptr - cur.ptr) == (hawk_oow_t)-1) goto oops;
for (i = 0; i < s1->len; i++) if (extended && op_pos > 0 && (op_pos - 1) != op_count)
{ {
if (s1->ptr[i] == '\\' && (i + 1) < s1->len) /* don't perform substitution. just copy the original string to the output */
if (hawk_becs_ncat(new, mat.ptr, mat.len) == (hawk_oow_t)-1) goto oops;
}
else
{
for (i = 0; i < s1->len; i++)
{ {
if (extended) /* for gensub */ if (s1->ptr[i] == '\\' && (i + 1) < s1->len)
{ {
hawk_bch_t ic = s1->ptr[i + 1]; if (extended) /* for gensub */
if (ic == '0')
{ {
m = hawk_becs_ncat(new, mat.ptr, mat.len); hawk_bch_t ic = s1->ptr[i + 1];
if (ic == '0')
{
m = hawk_becs_ncat(new, mat.ptr, mat.len);
}
else if (ic >= '1' && ic <= '9')
{
hawk_oow_t idx = (ic - '0') - 1;
m = hawk_becs_ncat(new, submat[idx].ptr, submat[idx].len);
}
else goto escape;
} }
else if (ic >= '1' && ic <= '9') else
{ {
hawk_oow_t idx = (ic - '0') - 1; escape:
m = hawk_becs_ncat(new, submat[idx].ptr, submat[idx].len); m = hawk_becs_ccat(new, s1->ptr[i + 1]);
} }
else goto escape; i++; /* skip the backslash */
}
else if (s1->ptr[i] == '&')
{
m = hawk_becs_ncat(new, mat.ptr, mat.len);
} }
else else
{ {
escape: m = hawk_becs_ccat(new, s1->ptr[i]);
m = hawk_becs_ccat(new, s1->ptr[i + 1]);
} }
i++; /* skip the backslash */ if (HAWK_UNLIKELY(m == (hawk_oow_t)-1)) goto oops;
} }
else if (s1->ptr[i] == '&') sub_count++;
{
m = hawk_becs_ncat(new, mat.ptr, mat.len);
}
else
{
m = hawk_becs_ccat(new, s1->ptr[i]);
}
if (HAWK_UNLIKELY(m == (hawk_oow_t)-1)) goto oops;
} }
op_count++;
sub_count++;
cur.len = cur.len - ((mat.ptr - cur.ptr) + mat.len); cur.len = cur.len - ((mat.ptr - cur.ptr) + mat.len);
cur.ptr = mat.ptr + mat.len; cur.ptr = mat.ptr + mat.len;

280
lib/run.c

File diff suppressed because it is too large Load Diff

View File

@ -210,6 +210,8 @@ function main()
tap_ensure (x, "the tiger-tiger pounces on the dog", @SCRIPTNAME, @SCRIPTLINE); tap_ensure (x, "the tiger-tiger pounces on the dog", @SCRIPTNAME, @SCRIPTLINE);
x = gensub(/(tiger|dog)/, "\\1-\\1", 2, "the tiger pounces on the dog"); x = gensub(/(tiger|dog)/, "\\1-\\1", 2, "the tiger pounces on the dog");
tap_ensure (x, "the tiger pounces on the dog-dog", @SCRIPTNAME, @SCRIPTLINE); tap_ensure (x, "the tiger pounces on the dog-dog", @SCRIPTNAME, @SCRIPTLINE);
x = gensub(/(tiger|dog)/, "\\1-\\1", 2, @b"the tiger pounces on the dog");
tap_ensure (x, @b"the tiger pounces on the dog-dog", @SCRIPTNAME, @SCRIPTLINE);
## 0 as the third argument is same as not passing "g"/"G" or a positive occurrence number. ## 0 as the third argument is same as not passing "g"/"G" or a positive occurrence number.
x = gensub(/(tiger|dog)/, "\\1-\\1", 0, "the tiger pounces on the dog"); x = gensub(/(tiger|dog)/, "\\1-\\1", 0, "the tiger pounces on the dog");
@ -220,6 +222,8 @@ function main()
tap_ensure (x, "the tiger pounces on the dog", @SCRIPTNAME, @SCRIPTLINE); tap_ensure (x, "the tiger pounces on the dog", @SCRIPTNAME, @SCRIPTLINE);
x = gensub(/(tiger|(dog))/, "\\1-\\2", 'g', "the tiger pounces on the dog"); x = gensub(/(tiger|(dog))/, "\\1-\\2", 'g', "the tiger pounces on the dog");
tap_ensure (x, "the tiger- pounces on the dog-dog", @SCRIPTNAME, @SCRIPTLINE); tap_ensure (x, "the tiger- pounces on the dog-dog", @SCRIPTNAME, @SCRIPTLINE);
x = gensub(/(tiger|(dog))/, "\\1-\\2", 'g', @b"the tiger pounces on the dog");
tap_ensure (x, @b"the tiger- pounces on the dog-dog", @SCRIPTNAME, @SCRIPTLINE);
x = gensub(/(tiger|(dog))/, "[&]", 'g', "the tiger pounces on the dog"); x = gensub(/(tiger|(dog))/, "[&]", 'g', "the tiger pounces on the dog");
tap_ensure (x, "the [tiger] pounces on the [dog]", @SCRIPTNAME, @SCRIPTLINE); tap_ensure (x, "the [tiger] pounces on the [dog]", @SCRIPTNAME, @SCRIPTLINE);
x = gensub(/(tiger|(dog))/, "[\\0]", 'g', "the tiger pounces on the dog"); x = gensub(/(tiger|(dog))/, "[\\0]", 'g', "the tiger pounces on the dog");