wip - implementing gensub
All checks were successful
continuous-integration/drone/push Build is passing

This commit is contained in:
2025-08-30 14:11:18 +09:00
parent 1036452736
commit 311e7e3580
6 changed files with 345 additions and 52 deletions

View File

@ -12,7 +12,7 @@ steps:
commands:
- find . -exec touch -r {} +
- mkdir -p bld/rocky9 && cd bld/rocky9
- ../../configure && make && make check && make rpm
- ../../configure && make && make check ##&& make rpm
- name: rocky9-release-rpms
image: docker.io/plugins/gitea-release:latest

View File

@ -168,8 +168,8 @@ am__DIST_COMMON = $(srcdir)/Makefile.in $(top_srcdir)/ac/ar-lib \
$(top_srcdir)/ac/ltmain.sh $(top_srcdir)/ac/missing \
$(top_srcdir)/ac/tap-driver.sh $(top_srcdir)/ac/test-driver \
$(top_srcdir)/pkgs/hawk.spec.in README.md ac/ar-lib ac/compile \
ac/config.guess ac/config.sub ac/depcomp ac/install-sh \
ac/ltmain.sh ac/missing
ac/config.guess ac/config.sub ac/install-sh ac/ltmain.sh \
ac/missing
DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
distdir = $(PACKAGE)-$(VERSION)
top_distdir = $(distdir)

View File

@ -56,6 +56,7 @@ hawk_fnc_t* hawk_findfncwithucs (hawk_t* hawk, const hawk_ucs_t* name);
#endif
/* EXPORT is required for linking on windows as they are referenced by mod-str.c */
HAWK_EXPORT int hawk_fnc_gensub (hawk_rtx_t* rtx, const hawk_fnc_info_t* fi);
HAWK_EXPORT int hawk_fnc_gsub (hawk_rtx_t* rtx, const hawk_fnc_info_t* fi);
HAWK_EXPORT int hawk_fnc_index (hawk_rtx_t* rtx, const hawk_fnc_info_t* fi);
HAWK_EXPORT int hawk_fnc_length (hawk_rtx_t* rtx, const hawk_fnc_info_t* fi, int mode);

361
lib/fnc.c
View File

@ -60,8 +60,8 @@ static hawk_fnc_t sysfnctab[] =
{ {HAWK_T("asorti"), 6}, 0, { {1, 3, HAWK_T("rrv")}, fnc_asorti, 0 }, HAWK_NULL},
/* string functions */
{ {HAWK_T("gensub"), 6}, 0, { {3, 4, HAWK_T("xvvv")}, hawk_fnc_gensub, 0 }, HAWK_NULL},
{ {HAWK_T("gsub"), 4}, 0, { {2, 3, HAWK_T("xvr")}, hawk_fnc_gsub, 0 }, HAWK_NULL},
/* TODO: gensub */
{ {HAWK_T("index"), 5}, 0, { {2, 3, HAWK_NULL}, hawk_fnc_index, 0 }, HAWK_NULL},
{ {HAWK_T("length"), 6}, 1, { {0, 1, HAWK_NULL}, fnc_length, 0 }, HAWK_NULL},
{ {HAWK_T("match"), 5}, 0, { {2, 3, HAWK_T("vxr")}, fnc_match, 0 }, HAWK_NULL},
@ -1222,9 +1222,10 @@ int hawk_fnc_toupper (hawk_rtx_t* rtx, const hawk_fnc_info_t* fi)
return 0;
}
static int __substitute_oocs (hawk_rtx_t* rtx, hawk_oow_t* max_count, hawk_tre_t* rex, hawk_oocs_t* s1, hawk_oocs_t* s2, hawk_ooecs_t* new)
static int __substitute_oocs (hawk_rtx_t* rtx, hawk_oow_t* max_count, hawk_tre_t* rex, hawk_oocs_t* s1, hawk_oocs_t* s2, hawk_ooecs_t* new, int extended, hawk_oow_t op_pos)
{
hawk_oocs_t mat, pmat, cur;
hawk_oocs_t submat[9];
hawk_oow_t sub_count, match_limit;
hawk_ooch_t* s2_end;
@ -1246,7 +1247,7 @@ static int __substitute_oocs (hawk_rtx_t* rtx, hawk_oow_t* max_count, hawk_tre_t
if (sub_count < match_limit)
{
n = hawk_rtx_matchrexwithoocs(rtx, rex, s2, &cur, &mat, HAWK_NULL);
n = hawk_rtx_matchrexwithoocs(rtx, rex, s2, &cur, &mat, (extended? submat: HAWK_NULL));
if (HAWK_UNLIKELY(n <= -1)) goto oops;
}
else n = 0;
@ -1267,27 +1268,35 @@ static int __substitute_oocs (hawk_rtx_t* rtx, hawk_oow_t* max_count, hawk_tre_t
if (hawk_ooecs_ncat(new, cur.ptr, mat.ptr - cur.ptr) == (hawk_oow_t)-1) goto oops;
if (extended)
{
// TODO: check match occurrence in extended...
}
for (i = 0; i < s1->len; i++)
{
if ((i + 3) < s1->len && s1->ptr[i] == '\\' && s1->ptr[i+1] == '\\' && s1->ptr[i+2] == '\\' && s1->ptr[i+3] == '&')
if (s1->ptr[i] == '\\' && (i + 1) < s1->len)
{
/* \\\& to produce a literal \& */
m = hawk_ooecs_cat(new, HAWK_T("\\&"));
i += 3;
}
else if ((i + 2) < s1->len && s1->ptr[i] == '\\' && s1->ptr[i+1] == '\\' && s1->ptr[i+2] == '&')
{
/* \\& to produce a literal \ followed by the matched text */
m = hawk_ooecs_ccat(new, '\\');
if (HAWK_UNLIKELY(m == (hawk_oow_t)-1)) goto oops;
m = hawk_ooecs_ncat(new, mat.ptr, mat.len);
i += 2;
}
else if ((i + 1) < s1->len && s1->ptr[i] == '\\' && s1->ptr[i+1] == '&')
{
/* \& to produce literal '&' */
m = hawk_ooecs_ccat(new, '&');
i++;
if (extended) /* for gensub */
{
hawk_ooch_t ic = s1->ptr[i + 1];
if (ic == '0')
{
m = hawk_ooecs_ncat(new, mat.ptr, mat.len);
}
else if (ic >= '1' && ic <= '9')
{
hawk_oow_t idx = (ic - '0') - 1;
m = hawk_ooecs_ncat(new, submat[idx].ptr, submat[idx].len);
}
else goto escape;
}
else
{
escape:
m = hawk_ooecs_ccat(new, s1->ptr[i + 1]);
}
i++; /* skip the backslash */
}
else if (s1->ptr[i] == '&')
{
@ -1327,9 +1336,10 @@ oops:
return -1;
}
static int __substitute_bcs (hawk_rtx_t* rtx, hawk_oow_t* max_count, hawk_tre_t* rex, hawk_bcs_t* s1, hawk_bcs_t* s2, hawk_becs_t* new)
static int __substitute_bcs (hawk_rtx_t* rtx, hawk_oow_t* max_count, hawk_tre_t* rex, hawk_bcs_t* s1, hawk_bcs_t* s2, hawk_becs_t* new, int extended, hawk_oow_t op_pos)
{
hawk_bcs_t mat, pmat, cur;
hawk_bcs_t submat[9];
hawk_oow_t sub_count, match_limit;
hawk_bch_t* s2_end;
@ -1351,7 +1361,7 @@ static int __substitute_bcs (hawk_rtx_t* rtx, hawk_oow_t* max_count, hawk_tre_t*
if (sub_count < match_limit)
{
n = hawk_rtx_matchrexwithbcs(rtx, rex, s2, &cur, &mat, HAWK_NULL);
n = hawk_rtx_matchrexwithbcs(rtx, rex, s2, &cur, &mat, (extended? submat: HAWK_NULL));
if (HAWK_UNLIKELY(n <= -1)) goto oops;
}
else n = 0;
@ -1374,25 +1384,28 @@ static int __substitute_bcs (hawk_rtx_t* rtx, hawk_oow_t* max_count, hawk_tre_t*
for (i = 0; i < s1->len; i++)
{
if ((i + 3) < s1->len && s1->ptr[i] == '\\' && s1->ptr[i+1] == '\\' && s1->ptr[i+2] == '\\' && s1->ptr[i+3] == '&')
if (s1->ptr[i] == '\\' && (i + 1) < s1->len)
{
/* \\\& to produce a literal \& */
m = hawk_becs_cat(new, "\\&");
i += 3;
}
else if ((i + 2) < s1->len && s1->ptr[i] == '\\' && s1->ptr[i+1] == '\\' && s1->ptr[i+2] == '&')
{
/* \\& to produce a literal \ followed by the matched text */
m = hawk_becs_ccat(new, '\\');
if (HAWK_UNLIKELY(m == (hawk_oow_t)-1)) goto oops;
m = hawk_becs_ncat(new, mat.ptr, mat.len);
i += 2;
}
else if ((i + 1) < s1->len && s1->ptr[i] == '\\' && s1->ptr[i+1] == '&')
{
/* \& to produce literal '&' */
m = hawk_becs_ccat(new, '&');
i++;
if (extended) /* for gensub */
{
hawk_bch_t ic = s1->ptr[i + 1];
if (ic == '0')
{
m = hawk_becs_ncat(new, mat.ptr, mat.len);
}
else if (ic >= '1' && ic <= '9')
{
hawk_oow_t idx = (ic - '0') - 1;
m = hawk_becs_ncat(new, submat[idx].ptr, submat[idx].len);
}
else goto escape;
}
else
{
escape:
m = hawk_becs_ccat(new, s1->ptr[i + 1]);
}
i++; /* skip the backslash */
}
else if (s1->ptr[i] == '&')
{
@ -1529,12 +1542,12 @@ static int __substitute (hawk_rtx_t* rtx, hawk_oow_t max_count)
if (s2_free == 2)
{
hawk_becs_clear(&rtx->fnc.bout);
if (__substitute_bcs(rtx, &sub_count, rex, (hawk_bcs_t*)&s1, (hawk_bcs_t*)&s2, &rtx->fnc.bout) <= -1) goto oops;
if (__substitute_bcs(rtx, &sub_count, rex, (hawk_bcs_t*)&s1, (hawk_bcs_t*)&s2, &rtx->fnc.bout, 0, 0) <= -1) goto oops;
}
else
{
hawk_ooecs_clear(&rtx->fnc.oout);
if (__substitute_oocs(rtx, &sub_count, rex, (hawk_oocs_t*)&s1, (hawk_oocs_t*)&s2, &rtx->fnc.oout) <= -1) goto oops;
if (__substitute_oocs(rtx, &sub_count, rex, (hawk_oocs_t*)&s1, (hawk_oocs_t*)&s2, &rtx->fnc.oout, 0, 0) <= -1) goto oops;
}
if (rex_free)
@ -1654,6 +1667,268 @@ int hawk_fnc_sub (hawk_rtx_t* rtx, const hawk_fnc_info_t* fi)
return __substitute(rtx, 1);
}
int hawk_fnc_gensub (hawk_rtx_t* rtx, const hawk_fnc_info_t* fi)
{
/* x=gensub(/(abc)/, "\\1\\1", "g");
* x=gensub(/(abc)/, "\\1\\1", "g", $0);
* x=gensub(/(tiger|deer)/, "\\1-\\1", "g", "the tiger pounced on the deer");
*/
hawk_oow_t nargs;
hawk_val_t* a0, * a1, * a2, * a3, * v;
hawk_val_type_t a0_vtype;
hawk_oocs_t s0;
hawk_ptl_t s1;
hawk_ptl_t s2;
int s1_free = 0;
int s2_free = 0;
hawk_tre_t* rex = HAWK_NULL;
hawk_tre_t* rex_free = HAWK_NULL;
hawk_oow_t op_pos;
hawk_oow_t max_count;
hawk_oow_t sub_count;
s0.ptr = HAWK_NULL;
s0.len = 0;
s1.ptr = HAWK_NULL;
s1.len = 0;
nargs = hawk_rtx_getnargs(rtx);
a0 = hawk_rtx_getarg(rtx, 0); /* pattern */
a1 = hawk_rtx_getarg(rtx, 1); /* substitute */
a2 = hawk_rtx_getarg(rtx, 2); /* mode or position */
a0_vtype = HAWK_RTX_GETVALTYPE(rtx, a0);
/* the first argument - pattern */
if (a0_vtype == HAWK_VAL_REX)
{
rex = ((hawk_val_rex_t*)a0)->code[rtx->gbl.ignorecase];
}
else
{
s0.ptr = hawk_rtx_getvaloocstr(rtx, a0, &s0.len);
if (HAWK_UNLIKELY(!s0.ptr)) goto oops;
}
max_count = 1;
switch (HAWK_RTX_GETVALTYPE(rtx, a2))
{
case HAWK_VAL_BCHR:
{
hawk_bch_t ch;
ch = HAWK_RTX_GETBCHRFROMVAL(rtx, a2);
if (ch == 'g' || ch == 'G') max_count = HAWK_TYPE_MAX(hawk_oow_t);
break;
}
case HAWK_VAL_MBS:
case HAWK_VAL_BOB: /* no separate code as hawk_val_mbs_t and hawk_val_bob_t are similar for the ptr field type */
{
if (((hawk_val_mbs_t*)a2)->val.len >= 1)
{
hawk_bch_t ch;
ch = ((hawk_val_mbs_t*)a2)->val.ptr[0];
if (ch == 'g' || ch == 'G') max_count = HAWK_TYPE_MAX(hawk_oow_t);
}
break;
}
case HAWK_VAL_CHAR:
{
hawk_ooch_t ch;
ch = HAWK_RTX_GETCHARFROMVAL(rtx, a2);
if (ch == 'g' || ch == 'G') max_count = HAWK_TYPE_MAX(hawk_oow_t);
break;
}
case HAWK_VAL_STR:
if (((hawk_val_str_t*)a2)->val.len >= 1)
{
hawk_ooch_t ch;
ch = ((hawk_val_str_t*)a2)->val.ptr[0];
if (ch == 'g' || ch == 'G') max_count = HAWK_TYPE_MAX(hawk_oow_t);
}
break;
}
if (max_count != HAWK_TYPE_MAX(hawk_oow_t))
{
hawk_int_t l;
hawk_flt_t r;
int n;
n = hawk_rtx_valtonum(rtx, a2, &l, &r);
if (n == 1)
{
if (l > 0)
{
op_pos = l;
max_count = 1;
}
}
else if (n > 1)
{
if (r > 0.0)
{
op_pos = (hawk_oow_t)r;
max_count = 1;
}
}
}
/* the optional fourth argument - string to manipulate */
if (nargs < 4)
{
/* is this correct? any needs to use inrec.d0? */
s2.ptr = HAWK_OOECS_PTR(&rtx->inrec.line);
s2.len = HAWK_OOECS_LEN(&rtx->inrec.line);
/* the second argument - substitute */
s1.ptr = hawk_rtx_getvaloocstr(rtx, a1, &s1.len);
s1_free = 1;
}
else
{
a3 = hawk_rtx_getarg(rtx, 3);
switch (HAWK_RTX_GETVALTYPE(rtx, a3))
{
case HAWK_VAL_BCHR:
case HAWK_VAL_MBS:
case HAWK_VAL_BOB:
s2.ptr = hawk_rtx_getvalbcstr(rtx, a3, &s2.len);
s2_free = 2;
/* the second argument - substitute */
s1.ptr = hawk_rtx_getvalbcstr(rtx, a1, &s1.len);
s1_free = 2;
break;
default:
s2.ptr = hawk_rtx_getvaloocstr(rtx, a3, &s2.len);
s2_free = 1;
/* the second argument - substituttion */
s1.ptr = hawk_rtx_getvaloocstr(rtx, a1, &s1.len);
s1_free = 1;
break;
}
}
if (HAWK_UNLIKELY(!s1.ptr || !s2.ptr)) goto oops;
if (a0_vtype != HAWK_VAL_REX)
{
int x;
x = rtx->gbl.ignorecase?
hawk_rtx_buildrex(rtx, s0.ptr, s0.len, HAWK_NULL, &rex):
hawk_rtx_buildrex(rtx, s0.ptr, s0.len, &rex, HAWK_NULL);
if (HAWK_UNLIKELY(x <= -1)) goto oops;
rex_free = rex;
}
sub_count = max_count;
if (s2_free == 2)
{
hawk_becs_clear(&rtx->fnc.bout);
if (__substitute_bcs(rtx, &sub_count, rex, (hawk_bcs_t*)&s1, (hawk_bcs_t*)&s2, &rtx->fnc.bout, 1, op_pos) <= -1) goto oops;
}
else
{
hawk_ooecs_clear(&rtx->fnc.oout);
if (__substitute_oocs(rtx, &sub_count, rex, (hawk_oocs_t*)&s1, (hawk_oocs_t*)&s2, &rtx->fnc.oout, 1, op_pos) <= -1) goto oops;
}
if (rex_free)
{
if (rtx->gbl.ignorecase)
hawk_rtx_freerex(rtx, HAWK_NULL, rex_free);
else
hawk_rtx_freerex(rtx, rex_free, HAWK_NULL);
rex_free = HAWK_NULL;
}
switch (s2_free)
{
case 1:
hawk_rtx_freevaloocstr(rtx, a3, s2.ptr);
break;
case 2:
hawk_rtx_freevalbcstr(rtx, a3, s2.ptr);
break;
}
s2.ptr = HAWK_NULL;
switch (s1_free)
{
case 1:
hawk_rtx_freevaloocstr(rtx, a1, s1.ptr);
break;
case 2:
hawk_rtx_freevalbcstr(rtx, a1, s1.ptr);
break;
}
s1.ptr = HAWK_NULL;
if (s0.ptr)
{
hawk_rtx_freevaloocstr(rtx, a0, s0.ptr);
s0.ptr = HAWK_NULL;
}
v = (s2_free == 2)?
hawk_rtx_makembsvalwithbcs(rtx, HAWK_BECS_BCS(&rtx->fnc.bout)):
hawk_rtx_makestrvalwithoocs(rtx, HAWK_OOECS_OOCS(&rtx->fnc.oout));
if (HAWK_UNLIKELY(!v)) goto oops;
hawk_rtx_setretval(rtx, v);
return 0;
oops:
if (rex_free)
{
if (rtx->gbl.ignorecase)
hawk_rtx_freerex(rtx, HAWK_NULL, rex_free);
else
hawk_rtx_freerex(rtx, rex_free, HAWK_NULL);
}
if (s2.ptr)
{
switch (s2_free)
{
case 1:
hawk_rtx_freevaloocstr(rtx, a3, s2.ptr);
break;
case 2:
hawk_rtx_freevalbcstr(rtx, a3, s2.ptr);
break;
}
}
if (s1.ptr)
{
switch (s1_free)
{
case 1:
hawk_rtx_freevaloocstr(rtx, a1, s1.ptr);
break;
case 2:
hawk_rtx_freevalbcstr(rtx, a1, s1.ptr);
break;
}
}
if (s0.ptr) hawk_rtx_freevaloocstr(rtx, a0, s0.ptr);
return -1;
}
static int __fnc_match (hawk_rtx_t* rtx, const hawk_fnc_info_t* fi, int support_start_index)
{
hawk_oow_t nargs;

View File

@ -305,6 +305,15 @@ HAWK_EXPORT void hawk_fini_xma_mmgr (
hawk_mmgr_t* mmgr
);
HAWK_EXPORT int hawk_init_ama_mmgr (
hawk_mmgr_t* mmgr
);
HAWK_EXPORT void hawk_fini_ama_mmgr (
hawk_mmgr_t* mmgr
);
#if defined(__cplusplus)
}
#endif

View File

@ -99,8 +99,13 @@ static int matchtre_ucs (hawk_tre_t* tre, int opt, const hawk_ucs_t* str, hawk_u
{
if (match[i].rm_so != -1)
{
submat[i-1].ptr = &str->ptr[match[i].rm_so];
submat[i-1].len = match[i].rm_eo - match[i].rm_so;
submat[i - 1].ptr = &str->ptr[match[i].rm_so];
submat[i - 1].len = match[i].rm_eo - match[i].rm_so;
}
else
{
submat[i - 1].ptr = HAWK_NULL;
submat[i - 1].len = 0;
}
}
}
@ -132,14 +137,17 @@ static int matchtre_bcs (hawk_tre_t* tre, int opt, const hawk_bcs_t* str, hawk_b
{
int i;
/* you must intialize submat before you pass into this
* function because it can abort filling */
for (i = 1; i < HAWK_COUNTOF(match); i++)
{
if (match[i].rm_so != -1)
{
submat[i-1].ptr = &str->ptr[match[i].rm_so];
submat[i-1].len = match[i].rm_eo - match[i].rm_so;
submat[i - 1].ptr = &str->ptr[match[i].rm_so];
submat[i - 1].len = match[i].rm_eo - match[i].rm_so;
}
else
{
submat[i - 1].ptr = HAWK_NULL;
submat[i - 1].len = 0;
}
}
}