implemented the option fourth parameter to match() in awk to support submatch retrieval
This commit is contained in:
parent
27dd0a551b
commit
905da20d24
@ -66,7 +66,7 @@ static qse_awk_fnc_t sysfnctab[] =
|
||||
{ {QSE_T("toupper"), 7}, 0, { {1, 1, QSE_NULL}, qse_awk_fnc_toupper, 0 }, QSE_NULL},
|
||||
{ {QSE_T("gsub"), 4}, 0, { {2, 3, QSE_T("xvr")}, qse_awk_fnc_gsub, 0 }, QSE_NULL},
|
||||
{ {QSE_T("sub"), 3}, 0, { {2, 3, QSE_T("xvr")}, qse_awk_fnc_sub, 0 }, QSE_NULL},
|
||||
{ {QSE_T("match"), 5}, 0, { {2, 3, QSE_T("vxv")}, qse_awk_fnc_match, 0 }, QSE_NULL},
|
||||
{ {QSE_T("match"), 5}, 0, { {2, 4, QSE_T("vxvr")}, qse_awk_fnc_match, 0 }, QSE_NULL},
|
||||
{ {QSE_T("sprintf"), 7}, 0, { {1, A_MAX, QSE_NULL}, qse_awk_fnc_sprintf, 0 }, QSE_NULL},
|
||||
|
||||
/* math functions */
|
||||
@ -1001,7 +1001,7 @@ static int __substitute (qse_awk_rtx_t* rtx, qse_awk_int_t max_count)
|
||||
{
|
||||
n = qse_awk_matchrex (
|
||||
rtx->awk, rex, rtx->gbl.ignorecase,
|
||||
&s2, &cur, &mat, &errnum
|
||||
&s2, &cur, &mat, QSE_NULL, &errnum
|
||||
);
|
||||
}
|
||||
else n = 0;
|
||||
@ -1159,15 +1159,17 @@ int qse_awk_fnc_sub (qse_awk_rtx_t* rtx, const qse_awk_fnc_info_t* fi)
|
||||
int qse_awk_fnc_match (qse_awk_rtx_t* rtx, const qse_awk_fnc_info_t* fi)
|
||||
{
|
||||
qse_size_t nargs;
|
||||
qse_awk_val_t* a0, * a1;
|
||||
qse_awk_val_t* a0, * a1, * a3;
|
||||
qse_char_t* str0;
|
||||
qse_size_t len0;
|
||||
qse_awk_int_t idx, start = 1;
|
||||
qse_awk_val_t* x0 = QSE_NULL, * x1 = QSE_NULL, * x2 = QSE_NULL;
|
||||
int n;
|
||||
qse_cstr_t mat;
|
||||
qse_cstr_t mat, submat[9];
|
||||
qse_str_t* tmpbuf = QSE_NULL;
|
||||
|
||||
nargs = qse_awk_rtx_getnargs (rtx);
|
||||
QSE_ASSERT (nargs >= 2 && nargs <= 3);
|
||||
QSE_ASSERT (nargs >= 2 && nargs <= 4);
|
||||
|
||||
a0 = qse_awk_rtx_getarg (rtx, 0);
|
||||
a1 = qse_awk_rtx_getarg (rtx, 1);
|
||||
@ -1181,6 +1183,8 @@ int qse_awk_fnc_match (qse_awk_rtx_t* rtx, const qse_awk_fnc_info_t* fi)
|
||||
* it is treated as a match start index */
|
||||
n = qse_awk_rtx_valtoint (rtx, a2, &start);
|
||||
if (n <= -1) return -1;
|
||||
|
||||
if (nargs >= 4) a3 = qse_awk_rtx_getarg (rtx, 3);
|
||||
}
|
||||
|
||||
str0 = qse_awk_rtx_getvalstr (rtx, a0, &len0);
|
||||
@ -1189,6 +1193,7 @@ int qse_awk_fnc_match (qse_awk_rtx_t* rtx, const qse_awk_fnc_info_t* fi)
|
||||
if (start == 0) start = 1;
|
||||
else if (start < 0) start = len0 + start + 1;
|
||||
|
||||
QSE_MEMSET (submat, 0, QSE_SIZEOF(submat));
|
||||
if (start > len0 || start <= 0) n = 0;
|
||||
else
|
||||
{
|
||||
@ -1198,7 +1203,7 @@ int qse_awk_fnc_match (qse_awk_rtx_t* rtx, const qse_awk_fnc_info_t* fi)
|
||||
tmp.ptr = str0 + start - 1;
|
||||
tmp.len = len0 - start + 1;
|
||||
|
||||
n = qse_awk_rtx_matchrex (rtx, a1, &tmp, &tmp, &mat);
|
||||
n = qse_awk_rtx_matchrex (rtx, a1, &tmp, &tmp, &mat, (nargs >= 4? submat: QSE_NULL));
|
||||
if (n <= -1) return -1;
|
||||
}
|
||||
|
||||
@ -1207,43 +1212,90 @@ int qse_awk_fnc_match (qse_awk_rtx_t* rtx, const qse_awk_fnc_info_t* fi)
|
||||
/* RSTART: 0 on no match */
|
||||
idx = (n == 0)? 0: ((qse_awk_int_t)(mat.ptr-str0) + 1);
|
||||
|
||||
a0 = qse_awk_rtx_makeintval (rtx, idx);
|
||||
if (a0 == QSE_NULL) return -1;
|
||||
|
||||
qse_awk_rtx_refupval (rtx, a0);
|
||||
x0 = qse_awk_rtx_makeintval (rtx, idx);
|
||||
if (!x0) goto oops;
|
||||
qse_awk_rtx_refupval (rtx, x0);
|
||||
|
||||
/* RLENGTH: -1 on no match */
|
||||
a1 = qse_awk_rtx_makeintval (rtx,
|
||||
x1 = qse_awk_rtx_makeintval (rtx,
|
||||
((n == 0)? (qse_awk_int_t)-1: (qse_awk_int_t)mat.len));
|
||||
if (a1 == QSE_NULL)
|
||||
if (!x1) goto oops;
|
||||
qse_awk_rtx_refupval (rtx, x1);
|
||||
|
||||
if (nargs >= 4)
|
||||
{
|
||||
qse_awk_rtx_refdownval (rtx, a0);
|
||||
return -1;
|
||||
const qse_cstr_t* subsep;
|
||||
qse_awk_int_t submatcount;
|
||||
qse_size_t i, xlen;
|
||||
qse_awk_val_t* tv;
|
||||
|
||||
tmpbuf = qse_str_open (qse_awk_rtx_getmmgr(rtx), 0, 64);
|
||||
if (!tmpbuf) goto oops;
|
||||
|
||||
x2 = qse_awk_rtx_makemapval (rtx);
|
||||
if (!x2) goto oops;
|
||||
qse_awk_rtx_refupval (rtx, x2);
|
||||
|
||||
submatcount =0;
|
||||
subsep = qse_awk_rtx_getsubsep (rtx);
|
||||
for (i = 0; i < QSE_COUNTOF(submat); i++)
|
||||
{
|
||||
if (!submat[i].ptr) break;
|
||||
|
||||
submatcount++;
|
||||
|
||||
if (qse_str_fmt (tmpbuf, QSE_T("%d"), (int)submatcount) == (qse_size_t)-1 ||
|
||||
qse_str_ncat (tmpbuf, subsep->ptr, subsep->len) == (qse_size_t)-1) goto oops;
|
||||
xlen = QSE_STR_LEN(tmpbuf);
|
||||
if (qse_str_ncat (tmpbuf, QSE_T("start"), 5) == (qse_size_t)-1) goto oops;
|
||||
|
||||
tv = qse_awk_rtx_makeintval (rtx, submat[i].ptr - str0 + 1);
|
||||
if (!tv) goto oops;
|
||||
if (!qse_awk_rtx_setmapvalfld (rtx, x2, QSE_STR_PTR(tmpbuf), QSE_STR_LEN(tmpbuf), tv))
|
||||
{
|
||||
qse_awk_rtx_refupval (rtx, tv);
|
||||
qse_awk_rtx_refdownval (rtx, tv);
|
||||
goto oops;
|
||||
}
|
||||
|
||||
qse_awk_rtx_refupval (rtx, a1);
|
||||
if (qse_str_setlen (tmpbuf, xlen) == (qse_size_t)-1 ||
|
||||
qse_str_ncat (tmpbuf, QSE_T("length"), 6) == (qse_size_t)-1) goto oops;
|
||||
|
||||
if (qse_awk_rtx_setgbl (rtx, QSE_AWK_GBL_RSTART, a0) <= -1)
|
||||
tv = qse_awk_rtx_makeintval (rtx, submat[i].len);
|
||||
if (!tv) goto oops;
|
||||
if (!qse_awk_rtx_setmapvalfld (rtx, x2, QSE_STR_PTR(tmpbuf), QSE_STR_LEN(tmpbuf), tv))
|
||||
{
|
||||
qse_awk_rtx_refdownval (rtx, a1);
|
||||
qse_awk_rtx_refdownval (rtx, a0);
|
||||
return -1;
|
||||
qse_awk_rtx_refupval (rtx, tv);
|
||||
qse_awk_rtx_refdownval (rtx, tv);
|
||||
goto oops;
|
||||
}
|
||||
}
|
||||
/* the caller of this function must be able to get the submatch count by
|
||||
* dividing the array size by 2 */
|
||||
|
||||
if (qse_awk_rtx_setrefval (rtx, (qse_awk_val_ref_t*)a3, x2) <= -1) goto oops;
|
||||
}
|
||||
|
||||
if (qse_awk_rtx_setgbl (rtx, QSE_AWK_GBL_RLENGTH, a1) <= -1)
|
||||
if (qse_awk_rtx_setgbl (rtx, QSE_AWK_GBL_RSTART, x0) <= -1 ||
|
||||
qse_awk_rtx_setgbl (rtx, QSE_AWK_GBL_RLENGTH, x1) <= -1)
|
||||
{
|
||||
qse_awk_rtx_refdownval (rtx, a1);
|
||||
qse_awk_rtx_refdownval (rtx, a0);
|
||||
return -1;
|
||||
goto oops;
|
||||
}
|
||||
|
||||
/* TOOD: set RMATCH to hold the mainmatch and submatches inside () */
|
||||
qse_awk_rtx_setretval (rtx, x0);
|
||||
|
||||
qse_awk_rtx_setretval (rtx, a0);
|
||||
|
||||
qse_awk_rtx_refdownval (rtx, a1);
|
||||
qse_awk_rtx_refdownval (rtx, a0);
|
||||
if (tmpbuf) qse_str_close (tmpbuf);
|
||||
if (x2) qse_awk_rtx_refdownval (rtx, x2);
|
||||
qse_awk_rtx_refdownval (rtx, x1);
|
||||
qse_awk_rtx_refdownval (rtx, x0);
|
||||
return 0;
|
||||
|
||||
oops:
|
||||
if (tmpbuf) qse_str_close (tmpbuf);
|
||||
if (x2) qse_awk_rtx_refdownval (rtx, x2);
|
||||
if (x1) qse_awk_rtx_refdownval (rtx, x1);
|
||||
if (x0) qse_awk_rtx_refdownval (rtx, x0);
|
||||
return -1;
|
||||
}
|
||||
|
||||
int qse_awk_fnc_sprintf (qse_awk_rtx_t* rtx, const qse_awk_fnc_info_t* fi)
|
||||
|
@ -906,7 +906,7 @@ qse_char_t* qse_awk_rtx_strxntokbyrex (
|
||||
{
|
||||
n = qse_awk_matchrex (
|
||||
rtx->awk, rex, rtx->gbl.ignorecase,
|
||||
&s, &cursub, &match, errnum);
|
||||
&s, &cursub, &match, QSE_NULL, errnum);
|
||||
if (n == -1) return QSE_NULL;
|
||||
if (n == 0)
|
||||
{
|
||||
@ -1248,12 +1248,13 @@ static int matchtre (
|
||||
int qse_awk_matchrex (
|
||||
qse_awk_t* awk, void* code, int icase,
|
||||
const qse_cstr_t* str, const qse_cstr_t* substr,
|
||||
qse_cstr_t* match, qse_awk_errnum_t* errnum)
|
||||
qse_cstr_t* match, qse_cstr_t submat[9], qse_awk_errnum_t* errnum)
|
||||
{
|
||||
#if defined(USE_REX)
|
||||
int x;
|
||||
qse_rex_errnum_t err;
|
||||
|
||||
/* submatch is not supported */
|
||||
x = qse_matchrex (
|
||||
awk->mmgr, awk->opt.depth.s.rex_match, code,
|
||||
(icase? QSE_REX_IGNORECASE: 0), str, substr, match, &err);
|
||||
@ -1266,7 +1267,7 @@ int qse_awk_matchrex (
|
||||
x = matchtre (
|
||||
awk, code,
|
||||
((str->ptr == substr->ptr)? opt: (opt | QSE_TRE_NOTBOL)),
|
||||
substr, match, QSE_NULL, errnum
|
||||
substr, match, submat, errnum
|
||||
);
|
||||
return x;
|
||||
#endif
|
||||
@ -1295,7 +1296,7 @@ void qse_awk_freerex (qse_awk_t* awk, void* code, void* icode)
|
||||
|
||||
int qse_awk_rtx_matchrex (
|
||||
qse_awk_rtx_t* rtx, qse_awk_val_t* val,
|
||||
const qse_cstr_t* str, const qse_cstr_t* substr, qse_cstr_t* match)
|
||||
const qse_cstr_t* str, const qse_cstr_t* substr, qse_cstr_t* match, qse_cstr_t submat[9])
|
||||
{
|
||||
void* code;
|
||||
int icase, x;
|
||||
@ -1329,6 +1330,7 @@ int qse_awk_rtx_matchrex (
|
||||
}
|
||||
|
||||
#if defined(USE_REX)
|
||||
/* submatch not supported */
|
||||
x = qse_matchrex (
|
||||
rtx->awk->mmgr, rtx->awk->opt.depth.s.rex_match,
|
||||
code, (icase? QSE_REX_IGNORECASE: 0),
|
||||
@ -1338,7 +1340,7 @@ int qse_awk_rtx_matchrex (
|
||||
x = matchtre (
|
||||
rtx->awk, code,
|
||||
((str->ptr == substr->ptr)? QSE_TRE_BACKTRACKING: (QSE_TRE_BACKTRACKING | QSE_TRE_NOTBOL)),
|
||||
substr, match, QSE_NULL, &awkerr
|
||||
substr, match, submat, &awkerr
|
||||
);
|
||||
if (x <= -1) qse_awk_rtx_seterrnum (rtx, awkerr, QSE_NULL);
|
||||
#endif
|
||||
|
@ -81,7 +81,7 @@ int qse_awk_buildrex (
|
||||
int qse_awk_matchrex (
|
||||
qse_awk_t* awk, void* code, int icase,
|
||||
const qse_cstr_t* str, const qse_cstr_t* substr,
|
||||
qse_cstr_t* match, qse_awk_errnum_t* errnum
|
||||
qse_cstr_t* match, qse_cstr_t submat[9], qse_awk_errnum_t* errnum
|
||||
);
|
||||
|
||||
void qse_awk_freerex (qse_awk_t* awk, void* code, void* icode);
|
||||
@ -89,7 +89,7 @@ void qse_awk_freerex (qse_awk_t* awk, void* code, void* icode);
|
||||
int qse_awk_rtx_matchrex (
|
||||
qse_awk_rtx_t* rtx, qse_awk_val_t* val,
|
||||
const qse_cstr_t* str, const qse_cstr_t* substr,
|
||||
qse_cstr_t* match
|
||||
qse_cstr_t* match, qse_cstr_t submat[9]
|
||||
);
|
||||
|
||||
#if defined(__cplusplus)
|
||||
|
@ -3367,28 +3367,30 @@ static int fold_constants_for_binop (
|
||||
if (((qse_awk_nde_int_t*)right)->val == 0)
|
||||
{
|
||||
qse_awk_seterrnum (awk, QSE_AWK_EDIVBY0, QSE_NULL);
|
||||
return QSE_NULL;
|
||||
fold = -2; /* error */
|
||||
}
|
||||
|
||||
if (INT_BINOP_INT(left,%,right))
|
||||
else if (INT_BINOP_INT(left,%,right))
|
||||
{
|
||||
folded->r = (qse_awk_flt_t)((qse_awk_nde_int_t*)left)->val /
|
||||
(qse_awk_flt_t)((qse_awk_nde_int_t*)right)->val;
|
||||
fold = QSE_AWK_NDE_FLT;
|
||||
break;
|
||||
}
|
||||
|
||||
else
|
||||
{
|
||||
folded->l = INT_BINOP_INT(left,/,right);
|
||||
}
|
||||
break;
|
||||
|
||||
case QSE_AWK_BINOP_IDIV:
|
||||
if (((qse_awk_nde_int_t*)right)->val == 0)
|
||||
{
|
||||
qse_awk_seterrnum (awk, QSE_AWK_EDIVBY0, QSE_NULL);
|
||||
return QSE_NULL;
|
||||
fold = -2; /* error */
|
||||
}
|
||||
|
||||
else
|
||||
{
|
||||
folded->l = INT_BINOP_INT(left,/,right);
|
||||
}
|
||||
break;
|
||||
|
||||
case QSE_AWK_BINOP_MOD:
|
||||
@ -3396,7 +3398,7 @@ static int fold_constants_for_binop (
|
||||
break;
|
||||
|
||||
default:
|
||||
fold = -1;
|
||||
fold = -1; /* no folding */
|
||||
break;
|
||||
}
|
||||
}
|
||||
@ -3700,6 +3702,9 @@ static qse_awk_nde_t* parse_binary (
|
||||
|
||||
break;
|
||||
|
||||
case -2:
|
||||
goto oops;
|
||||
|
||||
default:
|
||||
{
|
||||
qse_awk_nde_t* tmp;
|
||||
|
@ -239,7 +239,7 @@ static QSE_INLINE int match_long_rs (
|
||||
ret = qse_awk_matchrex (
|
||||
run->awk, run->gbl.rs[run->gbl.ignorecase],
|
||||
run->gbl.ignorecase, QSE_STR_XSTR(buf), QSE_STR_XSTR(buf),
|
||||
&match, &errnum);
|
||||
&match, QSE_NULL, &errnum);
|
||||
if (ret <= -1)
|
||||
{
|
||||
qse_awk_rtx_seterrnum (run, errnum, QSE_NULL);
|
||||
|
@ -293,7 +293,7 @@ QSE_INLINE qse_awk_val_t* qse_awk_rtx_getarg (qse_awk_rtx_t* run, qse_size_t idx
|
||||
|
||||
QSE_INLINE qse_awk_val_t* qse_awk_rtx_getgbl (qse_awk_rtx_t* run, int id)
|
||||
{
|
||||
QSE_ASSERT (id >= 0 && id < (int)QSE_LDA_SIZE(run->awk->parse.gbls));
|
||||
QSE_ASSERT (id >= 0 && id < (int)QSE_ARR_SIZE(run->awk->parse.gbls));
|
||||
return RTX_STACK_GBL (run, id);
|
||||
}
|
||||
|
||||
@ -687,7 +687,7 @@ QSE_INLINE void qse_awk_rtx_setretval (
|
||||
QSE_INLINE int qse_awk_rtx_setgbl (
|
||||
qse_awk_rtx_t* rtx, int id, qse_awk_val_t* val)
|
||||
{
|
||||
QSE_ASSERT (id >= 0 && id < (int)QSE_LDA_SIZE(rtx->awk->parse.gbls));
|
||||
QSE_ASSERT (id >= 0 && id < (int)QSE_ARR_SIZE(rtx->awk->parse.gbls));
|
||||
return set_global (rtx, id, QSE_NULL, val, 0);
|
||||
}
|
||||
|
||||
@ -3234,7 +3234,7 @@ static qse_awk_val_t* eval_expression (qse_awk_rtx_t* rtx, qse_awk_nde_t* nde)
|
||||
vs.len = ((qse_awk_val_str_t*)rtx->inrec.d0)->val.len;
|
||||
}
|
||||
|
||||
n = qse_awk_rtx_matchrex (rtx, v, &vs, &vs, QSE_NULL);
|
||||
n = qse_awk_rtx_matchrex (rtx, v, &vs, &vs, QSE_NULL, QSE_NULL);
|
||||
if (n <= -1)
|
||||
{
|
||||
ADJERR_LOC (rtx, &nde->loc);
|
||||
@ -5041,7 +5041,7 @@ static qse_awk_val_t* eval_binop_match0 (
|
||||
out.ptr = qse_awk_rtx_getvalstr (rtx, left, &out.len);
|
||||
if (out.ptr == QSE_NULL) return QSE_NULL;
|
||||
|
||||
n = qse_awk_rtx_matchrex (rtx, right, &out, &out, QSE_NULL);
|
||||
n = qse_awk_rtx_matchrex (rtx, right, &out, &out, QSE_NULL, QSE_NULL);
|
||||
qse_awk_rtx_freevalstr (rtx, left, out.ptr);
|
||||
|
||||
if (n <= -1)
|
||||
|
Loading…
Reference in New Issue
Block a user