From 0f59ab4a943addd1cad88993b9f17f69779ce8dd Mon Sep 17 00:00:00 2001 From: hyung-hwan Date: Sat, 30 Aug 2025 22:51:23 +0900 Subject: [PATCH] updated gensub to support the occurrence number (the third argument) --- lib/fnc.c | 69 ++++++++++++++++++++++++++++----------------------- lib/mod-sys.c | 8 +++--- mod/mod-sed.c | 2 -- t/h-002.hawk | 23 +++++++++++++++++ t/t-009.c | 2 +- 5 files changed, 66 insertions(+), 38 deletions(-) diff --git a/lib/fnc.c b/lib/fnc.c index 6d0b87f9..d3256b40 100644 --- a/lib/fnc.c +++ b/lib/fnc.c @@ -1226,13 +1226,14 @@ static int __substitute_oocs (hawk_rtx_t* rtx, hawk_oow_t* max_count, hawk_tre_t { hawk_oocs_t mat, pmat, cur; hawk_oocs_t submat[9]; - hawk_oow_t sub_count, match_limit; + hawk_oow_t sub_count, op_count, match_limit; hawk_ooch_t* s2_end; s2_end = s2->ptr + s2->len; cur.ptr = s2->ptr; cur.len = s2->len; sub_count = 0; + op_count = 0; match_limit = *max_count; pmat.ptr = HAWK_NULL; @@ -1268,49 +1269,53 @@ static int __substitute_oocs (hawk_rtx_t* rtx, hawk_oow_t* max_count, hawk_tre_t if (hawk_ooecs_ncat(new, cur.ptr, mat.ptr - cur.ptr) == (hawk_oow_t)-1) goto oops; - if (extended) + if (extended && op_pos > 0 && (op_pos - 1) != op_count) { -// TODO: check match occurrence in extended... + /* don't perform substitution. just copy the original string to the output */ + if (hawk_ooecs_ncat(new, mat.ptr, mat.len) == (hawk_oow_t)-1) goto oops; } - - for (i = 0; i < s1->len; i++) + else { - if (s1->ptr[i] == '\\' && (i + 1) < s1->len) + for (i = 0; i < s1->len; i++) { - if (extended) /* for gensub */ + if (s1->ptr[i] == '\\' && (i + 1) < s1->len) { - hawk_ooch_t ic = s1->ptr[i + 1]; - if (ic == '0') + if (extended) /* for gensub */ { - m = hawk_ooecs_ncat(new, mat.ptr, mat.len); + hawk_ooch_t ic = s1->ptr[i + 1]; + if (ic == '0') + { + m = hawk_ooecs_ncat(new, mat.ptr, mat.len); + } + else if (ic >= '1' && ic <= '9') + { + hawk_oow_t idx = (ic - '0') - 1; + m = hawk_ooecs_ncat(new, submat[idx].ptr, submat[idx].len); + } + else goto escape; } - else if (ic >= '1' && ic <= '9') + else { - hawk_oow_t idx = (ic - '0') - 1; - m = hawk_ooecs_ncat(new, submat[idx].ptr, submat[idx].len); + escape: + m = hawk_ooecs_ccat(new, s1->ptr[i + 1]); } - else goto escape; + i++; /* skip the backslash */ + } + else if (s1->ptr[i] == '&') + { + m = hawk_ooecs_ncat(new, mat.ptr, mat.len); } else { - escape: - m = hawk_ooecs_ccat(new, s1->ptr[i + 1]); + m = hawk_ooecs_ccat(new, s1->ptr[i]); } - i++; /* skip the backslash */ - } - else if (s1->ptr[i] == '&') - { - m = hawk_ooecs_ncat(new, mat.ptr, mat.len); - } - else - { - m = hawk_ooecs_ccat(new, s1->ptr[i]); + + if (HAWK_UNLIKELY(m == (hawk_oow_t)-1)) goto oops; } - if (HAWK_UNLIKELY(m == (hawk_oow_t)-1)) goto oops; + sub_count++; /* number of actual substitutions */ } - - sub_count++; + op_count++; /* actual susstitutions + skipped substitutions. not needed if extended is false */ cur.len = cur.len - ((mat.ptr - cur.ptr) + mat.len); cur.ptr = mat.ptr + mat.len; @@ -1716,6 +1721,7 @@ int hawk_fnc_gensub (hawk_rtx_t* rtx, const hawk_fnc_info_t* fi) } max_count = 1; + op_pos = 0; switch (HAWK_RTX_GETVALTYPE(rtx, a2)) { case HAWK_VAL_BCHR: @@ -1761,16 +1767,17 @@ int hawk_fnc_gensub (hawk_rtx_t* rtx, const hawk_fnc_info_t* fi) hawk_int_t l; hawk_flt_t r; int n; + n = hawk_rtx_valtonum(rtx, a2, &l, &r); - if (n == 1) + if (n == 0) { if (l > 0) { - op_pos = l; + op_pos = (hawk_oow_t)l; max_count = 1; } } - else if (n > 1) + else if (n > 0) { if (r > 0.0) { diff --git a/lib/mod-sys.c b/lib/mod-sys.c index b9db182c..2dc30547 100644 --- a/lib/mod-sys.c +++ b/lib/mod-sys.c @@ -1248,11 +1248,11 @@ static int fnc_tcgetattr (hawk_rtx_t* rtx, const hawk_fnc_info_t* fi) /* this is md[4].key.len = 2; md[4].type = HAWK_VAL_MAP_DATA_BCS; md[4].vptr = &c_cc; - c_cc.ptr = t.c_cc; + c_cc.ptr = (hawk_bch_t*)t.c_cc; c_cc.len = HAWK_COUNTOF(t.c_cc); tmp = hawk_rtx_makemapvalwithdata(rtx, md, HAWK_COUNTOF(md)); - if (!tmp) goto fail; + if (HAWK_UNLIKELY(!tmp)) goto fail; hawk_rtx_refupval(rtx, tmp); x = hawk_rtx_setrefval(rtx, (hawk_val_ref_t*)hawk_rtx_getarg(rtx, 1), tmp); @@ -5749,7 +5749,7 @@ static hawk_int_t unpack_data (hawk_rtx_t* rtx, const hawk_bcs_t* bin, const haw case 'p': { UNPACK_CHECK_ARG_AND_DATA (1, rep_cnt); - v = hawk_rtx_makembsvalwithbchars(rtx, binp, rep_cnt); + v = hawk_rtx_makembsvalwithbchars(rtx, (const hawk_bch_t*)binp, rep_cnt); binp += rep_cnt; if (HAWK_UNLIKELY(!v)) goto oops_internal; if (hawk_rtx_setrefval(rtx, (hawk_val_ref_t*)hawk_rtx_getarg(rtx, arg_idx++), v) <= -1) goto oops_internal; @@ -5814,7 +5814,7 @@ static int fnc_pack (hawk_rtx_t* rtx, const hawk_fnc_info_t* fi) hawk_val_t* tmp; int x; - tmp = hawk_rtx_makembsvalwithbchars(rtx, rdp->pack.ptr, rdp->pack.len); + tmp = hawk_rtx_makembsvalwithbchars(rtx, (const hawk_bch_t*)rdp->pack.ptr, rdp->pack.len); if (HAWK_UNLIKELY(!tmp)) goto fail; hawk_rtx_refupval(rtx, tmp); diff --git a/mod/mod-sed.c b/mod/mod-sed.c index 036e5457..8405b6d0 100644 --- a/mod/mod-sed.c +++ b/mod/mod-sed.c @@ -253,8 +253,6 @@ static void unload (hawk_mod_t* mod, hawk_t* hawk) int hawk_mod_sed (hawk_mod_t* mod, hawk_t* hawk) { - hawk_ntime_t tv; - mod->query = query; mod->unload = unload; diff --git a/t/h-002.hawk b/t/h-002.hawk index fc4d1682..2f0d50af 100644 --- a/t/h-002.hawk +++ b/t/h-002.hawk @@ -196,6 +196,29 @@ function main() tap_ensure (z, @r"x\&ax", @SCRIPTNAME, @SCRIPTLINE); } + ## gensub + { + @local x; + x = gensub(/(tiger|dog)/, "\\1-\\1", "g", "the tiger pounces on the dog"); + tap_ensure (x, "the tiger-tiger pounces on the dog-dog", @SCRIPTNAME, @SCRIPTLINE); + x = gensub(/(tiger|dog)/, "\\1-\\1", 'G', "the tiger pounces on the dog"); + tap_ensure (x, "the tiger-tiger pounces on the dog-dog", @SCRIPTNAME, @SCRIPTLINE); + x = gensub(/(tiger|dog)/, "\\1-\\1", 'G', @b"the tiger pounces on the dog"); + tap_ensure (x, @b"the tiger-tiger pounces on the dog-dog", @SCRIPTNAME, @SCRIPTLINE); + tap_ensure (hawk::typename(x), "mbs", @SCRIPTNAME, @SCRIPTLINE); + x = gensub(/(tiger|dog)/, "\\1-\\1", 1, "the tiger pounces on the dog"); + tap_ensure (x, "the tiger-tiger pounces on the dog", @SCRIPTNAME, @SCRIPTLINE); + x = gensub(/(tiger|dog)/, "\\1-\\1", 2, "the tiger pounces on the dog"); + tap_ensure (x, "the tiger pounces on the dog-dog", @SCRIPTNAME, @SCRIPTLINE); + + ## 0 as the third argument is same as not passing "g"/"G" or a positive occurrence number. + x = gensub(/(tiger|dog)/, "\\1-\\1", 0, "the tiger pounces on the dog"); + tap_ensure (x, "the tiger-tiger pounces on the dog", @SCRIPTNAME, @SCRIPTLINE); + x = gensub(/(tiger|dog)/, "\\1-\\1", "", "the tiger pounces on the dog"); + tap_ensure (x, "the tiger-tiger pounces on the dog", @SCRIPTNAME, @SCRIPTLINE); + x = gensub(/(tiger|dog)/, "\\1-\\1", 10, "the tiger pounces on the dog"); + tap_ensure (x, "the tiger pounces on the dog", @SCRIPTNAME, @SCRIPTLINE); + } { @local pi, e, tmp; diff --git a/t/t-009.c b/t/t-009.c index 69c4a4d0..e727d816 100644 --- a/t/t-009.c +++ b/t/t-009.c @@ -194,7 +194,7 @@ int main(int argc, char* argv[]) malloc_time = (double)(end_time - start_time) / CLOCKS_PER_SEC - free_time; - printf("Performed %d interleaved malloc/free operations\n", num_iterations); + printf("Performed %lu interleaved malloc/free operations\n", (unsigned long)num_iterations); printf("Total malloc time (estimated): %.6f seconds\n", malloc_time); printf("Total free time : %.6f seconds\n", free_time); printf("Average time per operation : %.9f seconds\n", (malloc_time + free_time) / num_iterations);