implemented the option fourth parameter to match() in awk to support submatch retrieval

This commit is contained in:
hyung-hwan 2016-11-09 14:12:22 +00:00
parent 27dd0a551b
commit 905da20d24
6 changed files with 127 additions and 68 deletions

View File

@ -51,23 +51,23 @@ static int fnc_int (qse_awk_rtx_t* rtx, const qse_awk_fnc_info_t* fi);
static qse_awk_fnc_t sysfnctab[] =
{
/* io functions */
{ {QSE_T("close"), 5}, 0, { {1, 2, QSE_NULL}, fnc_close, QSE_AWK_RIO }, QSE_NULL},
{ {QSE_T("fflush"), 6}, 0, { {0, 1, QSE_NULL}, fnc_fflush, QSE_AWK_RIO }, QSE_NULL},
{ {QSE_T("close"), 5}, 0, { {1, 2, QSE_NULL}, fnc_close, QSE_AWK_RIO }, QSE_NULL},
{ {QSE_T("fflush"), 6}, 0, { {0, 1, QSE_NULL}, fnc_fflush, QSE_AWK_RIO }, QSE_NULL},
/* integer conversion */
{ {QSE_T("int"), 3}, 0, { {1, 1, QSE_NULL}, fnc_int, 0 }, QSE_NULL},
{ {QSE_T("int"), 3}, 0, { {1, 1, QSE_NULL}, fnc_int, 0 }, QSE_NULL},
/* string functions */
{ {QSE_T("index"), 5}, 0, { {2, 3, QSE_NULL}, qse_awk_fnc_index, 0 }, QSE_NULL},
{ {QSE_T("substr"), 6}, 0, { {2, 3, QSE_NULL}, qse_awk_fnc_substr, 0 }, QSE_NULL},
{ {QSE_T("length"), 6}, 1, { {0, 1, QSE_NULL}, qse_awk_fnc_length, 0 }, QSE_NULL},
{ {QSE_T("split"), 5}, 0, { {2, 3, QSE_T("vrx")}, qse_awk_fnc_split, 0 }, QSE_NULL},
{ {QSE_T("tolower"), 7}, 0, { {1, 1, QSE_NULL}, qse_awk_fnc_tolower, 0 }, QSE_NULL},
{ {QSE_T("toupper"), 7}, 0, { {1, 1, QSE_NULL}, qse_awk_fnc_toupper, 0 }, QSE_NULL},
{ {QSE_T("gsub"), 4}, 0, { {2, 3, QSE_T("xvr")}, qse_awk_fnc_gsub, 0 }, QSE_NULL},
{ {QSE_T("sub"), 3}, 0, { {2, 3, QSE_T("xvr")}, qse_awk_fnc_sub, 0 }, QSE_NULL},
{ {QSE_T("match"), 5}, 0, { {2, 3, QSE_T("vxv")}, qse_awk_fnc_match, 0 }, QSE_NULL},
{ {QSE_T("sprintf"), 7}, 0, { {1, A_MAX, QSE_NULL}, qse_awk_fnc_sprintf, 0 }, QSE_NULL},
{ {QSE_T("index"), 5}, 0, { {2, 3, QSE_NULL}, qse_awk_fnc_index, 0 }, QSE_NULL},
{ {QSE_T("substr"), 6}, 0, { {2, 3, QSE_NULL}, qse_awk_fnc_substr, 0 }, QSE_NULL},
{ {QSE_T("length"), 6}, 1, { {0, 1, QSE_NULL}, qse_awk_fnc_length, 0 }, QSE_NULL},
{ {QSE_T("split"), 5}, 0, { {2, 3, QSE_T("vrx")}, qse_awk_fnc_split, 0 }, QSE_NULL},
{ {QSE_T("tolower"), 7}, 0, { {1, 1, QSE_NULL}, qse_awk_fnc_tolower, 0 }, QSE_NULL},
{ {QSE_T("toupper"), 7}, 0, { {1, 1, QSE_NULL}, qse_awk_fnc_toupper, 0 }, QSE_NULL},
{ {QSE_T("gsub"), 4}, 0, { {2, 3, QSE_T("xvr")}, qse_awk_fnc_gsub, 0 }, QSE_NULL},
{ {QSE_T("sub"), 3}, 0, { {2, 3, QSE_T("xvr")}, qse_awk_fnc_sub, 0 }, QSE_NULL},
{ {QSE_T("match"), 5}, 0, { {2, 4, QSE_T("vxvr")}, qse_awk_fnc_match, 0 }, QSE_NULL},
{ {QSE_T("sprintf"), 7}, 0, { {1, A_MAX, QSE_NULL}, qse_awk_fnc_sprintf, 0 }, QSE_NULL},
/* math functions */
{ {QSE_T("sin"), 3}, 0, { {A_MAX, 0, QSE_T("math") }, QSE_NULL, 0 }, QSE_NULL},
@ -1001,7 +1001,7 @@ static int __substitute (qse_awk_rtx_t* rtx, qse_awk_int_t max_count)
{
n = qse_awk_matchrex (
rtx->awk, rex, rtx->gbl.ignorecase,
&s2, &cur, &mat, &errnum
&s2, &cur, &mat, QSE_NULL, &errnum
);
}
else n = 0;
@ -1159,15 +1159,17 @@ int qse_awk_fnc_sub (qse_awk_rtx_t* rtx, const qse_awk_fnc_info_t* fi)
int qse_awk_fnc_match (qse_awk_rtx_t* rtx, const qse_awk_fnc_info_t* fi)
{
qse_size_t nargs;
qse_awk_val_t* a0, * a1;
qse_awk_val_t* a0, * a1, * a3;
qse_char_t* str0;
qse_size_t len0;
qse_awk_int_t idx, start = 1;
qse_awk_val_t* x0 = QSE_NULL, * x1 = QSE_NULL, * x2 = QSE_NULL;
int n;
qse_cstr_t mat;
qse_cstr_t mat, submat[9];
qse_str_t* tmpbuf = QSE_NULL;
nargs = qse_awk_rtx_getnargs (rtx);
QSE_ASSERT (nargs >= 2 && nargs <= 3);
QSE_ASSERT (nargs >= 2 && nargs <= 4);
a0 = qse_awk_rtx_getarg (rtx, 0);
a1 = qse_awk_rtx_getarg (rtx, 1);
@ -1181,6 +1183,8 @@ int qse_awk_fnc_match (qse_awk_rtx_t* rtx, const qse_awk_fnc_info_t* fi)
* it is treated as a match start index */
n = qse_awk_rtx_valtoint (rtx, a2, &start);
if (n <= -1) return -1;
if (nargs >= 4) a3 = qse_awk_rtx_getarg (rtx, 3);
}
str0 = qse_awk_rtx_getvalstr (rtx, a0, &len0);
@ -1189,6 +1193,7 @@ int qse_awk_fnc_match (qse_awk_rtx_t* rtx, const qse_awk_fnc_info_t* fi)
if (start == 0) start = 1;
else if (start < 0) start = len0 + start + 1;
QSE_MEMSET (submat, 0, QSE_SIZEOF(submat));
if (start > len0 || start <= 0) n = 0;
else
{
@ -1198,7 +1203,7 @@ int qse_awk_fnc_match (qse_awk_rtx_t* rtx, const qse_awk_fnc_info_t* fi)
tmp.ptr = str0 + start - 1;
tmp.len = len0 - start + 1;
n = qse_awk_rtx_matchrex (rtx, a1, &tmp, &tmp, &mat);
n = qse_awk_rtx_matchrex (rtx, a1, &tmp, &tmp, &mat, (nargs >= 4? submat: QSE_NULL));
if (n <= -1) return -1;
}
@ -1207,43 +1212,90 @@ int qse_awk_fnc_match (qse_awk_rtx_t* rtx, const qse_awk_fnc_info_t* fi)
/* RSTART: 0 on no match */
idx = (n == 0)? 0: ((qse_awk_int_t)(mat.ptr-str0) + 1);
a0 = qse_awk_rtx_makeintval (rtx, idx);
if (a0 == QSE_NULL) return -1;
qse_awk_rtx_refupval (rtx, a0);
x0 = qse_awk_rtx_makeintval (rtx, idx);
if (!x0) goto oops;
qse_awk_rtx_refupval (rtx, x0);
/* RLENGTH: -1 on no match */
a1 = qse_awk_rtx_makeintval (rtx,
x1 = qse_awk_rtx_makeintval (rtx,
((n == 0)? (qse_awk_int_t)-1: (qse_awk_int_t)mat.len));
if (a1 == QSE_NULL)
if (!x1) goto oops;
qse_awk_rtx_refupval (rtx, x1);
if (nargs >= 4)
{
qse_awk_rtx_refdownval (rtx, a0);
return -1;
const qse_cstr_t* subsep;
qse_awk_int_t submatcount;
qse_size_t i, xlen;
qse_awk_val_t* tv;
tmpbuf = qse_str_open (qse_awk_rtx_getmmgr(rtx), 0, 64);
if (!tmpbuf) goto oops;
x2 = qse_awk_rtx_makemapval (rtx);
if (!x2) goto oops;
qse_awk_rtx_refupval (rtx, x2);
submatcount =0;
subsep = qse_awk_rtx_getsubsep (rtx);
for (i = 0; i < QSE_COUNTOF(submat); i++)
{
if (!submat[i].ptr) break;
submatcount++;
if (qse_str_fmt (tmpbuf, QSE_T("%d"), (int)submatcount) == (qse_size_t)-1 ||
qse_str_ncat (tmpbuf, subsep->ptr, subsep->len) == (qse_size_t)-1) goto oops;
xlen = QSE_STR_LEN(tmpbuf);
if (qse_str_ncat (tmpbuf, QSE_T("start"), 5) == (qse_size_t)-1) goto oops;
tv = qse_awk_rtx_makeintval (rtx, submat[i].ptr - str0 + 1);
if (!tv) goto oops;
if (!qse_awk_rtx_setmapvalfld (rtx, x2, QSE_STR_PTR(tmpbuf), QSE_STR_LEN(tmpbuf), tv))
{
qse_awk_rtx_refupval (rtx, tv);
qse_awk_rtx_refdownval (rtx, tv);
goto oops;
}
if (qse_str_setlen (tmpbuf, xlen) == (qse_size_t)-1 ||
qse_str_ncat (tmpbuf, QSE_T("length"), 6) == (qse_size_t)-1) goto oops;
tv = qse_awk_rtx_makeintval (rtx, submat[i].len);
if (!tv) goto oops;
if (!qse_awk_rtx_setmapvalfld (rtx, x2, QSE_STR_PTR(tmpbuf), QSE_STR_LEN(tmpbuf), tv))
{
qse_awk_rtx_refupval (rtx, tv);
qse_awk_rtx_refdownval (rtx, tv);
goto oops;
}
}
/* the caller of this function must be able to get the submatch count by
* dividing the array size by 2 */
if (qse_awk_rtx_setrefval (rtx, (qse_awk_val_ref_t*)a3, x2) <= -1) goto oops;
}
qse_awk_rtx_refupval (rtx, a1);
if (qse_awk_rtx_setgbl (rtx, QSE_AWK_GBL_RSTART, a0) <= -1)
if (qse_awk_rtx_setgbl (rtx, QSE_AWK_GBL_RSTART, x0) <= -1 ||
qse_awk_rtx_setgbl (rtx, QSE_AWK_GBL_RLENGTH, x1) <= -1)
{
qse_awk_rtx_refdownval (rtx, a1);
qse_awk_rtx_refdownval (rtx, a0);
return -1;
goto oops;
}
if (qse_awk_rtx_setgbl (rtx, QSE_AWK_GBL_RLENGTH, a1) <= -1)
{
qse_awk_rtx_refdownval (rtx, a1);
qse_awk_rtx_refdownval (rtx, a0);
return -1;
}
qse_awk_rtx_setretval (rtx, x0);
/* TOOD: set RMATCH to hold the mainmatch and submatches inside () */
qse_awk_rtx_setretval (rtx, a0);
qse_awk_rtx_refdownval (rtx, a1);
qse_awk_rtx_refdownval (rtx, a0);
if (tmpbuf) qse_str_close (tmpbuf);
if (x2) qse_awk_rtx_refdownval (rtx, x2);
qse_awk_rtx_refdownval (rtx, x1);
qse_awk_rtx_refdownval (rtx, x0);
return 0;
oops:
if (tmpbuf) qse_str_close (tmpbuf);
if (x2) qse_awk_rtx_refdownval (rtx, x2);
if (x1) qse_awk_rtx_refdownval (rtx, x1);
if (x0) qse_awk_rtx_refdownval (rtx, x0);
return -1;
}
int qse_awk_fnc_sprintf (qse_awk_rtx_t* rtx, const qse_awk_fnc_info_t* fi)

View File

@ -906,7 +906,7 @@ qse_char_t* qse_awk_rtx_strxntokbyrex (
{
n = qse_awk_matchrex (
rtx->awk, rex, rtx->gbl.ignorecase,
&s, &cursub, &match, errnum);
&s, &cursub, &match, QSE_NULL, errnum);
if (n == -1) return QSE_NULL;
if (n == 0)
{
@ -1248,12 +1248,13 @@ static int matchtre (
int qse_awk_matchrex (
qse_awk_t* awk, void* code, int icase,
const qse_cstr_t* str, const qse_cstr_t* substr,
qse_cstr_t* match, qse_awk_errnum_t* errnum)
qse_cstr_t* match, qse_cstr_t submat[9], qse_awk_errnum_t* errnum)
{
#if defined(USE_REX)
int x;
qse_rex_errnum_t err;
/* submatch is not supported */
x = qse_matchrex (
awk->mmgr, awk->opt.depth.s.rex_match, code,
(icase? QSE_REX_IGNORECASE: 0), str, substr, match, &err);
@ -1266,7 +1267,7 @@ int qse_awk_matchrex (
x = matchtre (
awk, code,
((str->ptr == substr->ptr)? opt: (opt | QSE_TRE_NOTBOL)),
substr, match, QSE_NULL, errnum
substr, match, submat, errnum
);
return x;
#endif
@ -1295,7 +1296,7 @@ void qse_awk_freerex (qse_awk_t* awk, void* code, void* icode)
int qse_awk_rtx_matchrex (
qse_awk_rtx_t* rtx, qse_awk_val_t* val,
const qse_cstr_t* str, const qse_cstr_t* substr, qse_cstr_t* match)
const qse_cstr_t* str, const qse_cstr_t* substr, qse_cstr_t* match, qse_cstr_t submat[9])
{
void* code;
int icase, x;
@ -1329,6 +1330,7 @@ int qse_awk_rtx_matchrex (
}
#if defined(USE_REX)
/* submatch not supported */
x = qse_matchrex (
rtx->awk->mmgr, rtx->awk->opt.depth.s.rex_match,
code, (icase? QSE_REX_IGNORECASE: 0),
@ -1338,7 +1340,7 @@ int qse_awk_rtx_matchrex (
x = matchtre (
rtx->awk, code,
((str->ptr == substr->ptr)? QSE_TRE_BACKTRACKING: (QSE_TRE_BACKTRACKING | QSE_TRE_NOTBOL)),
substr, match, QSE_NULL, &awkerr
substr, match, submat, &awkerr
);
if (x <= -1) qse_awk_rtx_seterrnum (rtx, awkerr, QSE_NULL);
#endif

View File

@ -81,7 +81,7 @@ int qse_awk_buildrex (
int qse_awk_matchrex (
qse_awk_t* awk, void* code, int icase,
const qse_cstr_t* str, const qse_cstr_t* substr,
qse_cstr_t* match, qse_awk_errnum_t* errnum
qse_cstr_t* match, qse_cstr_t submat[9], qse_awk_errnum_t* errnum
);
void qse_awk_freerex (qse_awk_t* awk, void* code, void* icode);
@ -89,7 +89,7 @@ void qse_awk_freerex (qse_awk_t* awk, void* code, void* icode);
int qse_awk_rtx_matchrex (
qse_awk_rtx_t* rtx, qse_awk_val_t* val,
const qse_cstr_t* str, const qse_cstr_t* substr,
qse_cstr_t* match
qse_cstr_t* match, qse_cstr_t submat[9]
);
#if defined(__cplusplus)

View File

@ -3367,28 +3367,30 @@ static int fold_constants_for_binop (
if (((qse_awk_nde_int_t*)right)->val == 0)
{
qse_awk_seterrnum (awk, QSE_AWK_EDIVBY0, QSE_NULL);
return QSE_NULL;
fold = -2; /* error */
}
if (INT_BINOP_INT(left,%,right))
else if (INT_BINOP_INT(left,%,right))
{
folded->r = (qse_awk_flt_t)((qse_awk_nde_int_t*)left)->val /
(qse_awk_flt_t)((qse_awk_nde_int_t*)right)->val;
fold = QSE_AWK_NDE_FLT;
break;
}
folded->l = INT_BINOP_INT(left,/,right);
else
{
folded->l = INT_BINOP_INT(left,/,right);
}
break;
case QSE_AWK_BINOP_IDIV:
if (((qse_awk_nde_int_t*)right)->val == 0)
{
qse_awk_seterrnum (awk, QSE_AWK_EDIVBY0, QSE_NULL);
return QSE_NULL;
fold = -2; /* error */
}
else
{
folded->l = INT_BINOP_INT(left,/,right);
}
folded->l = INT_BINOP_INT(left,/,right);
break;
case QSE_AWK_BINOP_MOD:
@ -3396,7 +3398,7 @@ static int fold_constants_for_binop (
break;
default:
fold = -1;
fold = -1; /* no folding */
break;
}
}
@ -3528,7 +3530,7 @@ static int fold_constants_for_binop (
return fold;
}
static qse_awk_nde_t* new_exp_bin_node (
static qse_awk_nde_t* new_exp_bin_node (
qse_awk_t* awk, const qse_awk_loc_t* loc,
int opcode, qse_awk_nde_t* left, qse_awk_nde_t* right)
{
@ -3668,7 +3670,7 @@ static qse_awk_nde_t* parse_binary (
{
qse_awk_clrpt (awk, right); right = QSE_NULL;
qse_awk_clrpt (awk, left); left = QSE_NULL;
left = new_int_node (awk, folded.l, xloc);
if (left == QSE_NULL) goto oops;
}
@ -3700,6 +3702,9 @@ static qse_awk_nde_t* parse_binary (
break;
case -2:
goto oops;
default:
{
qse_awk_nde_t* tmp;

View File

@ -239,7 +239,7 @@ static QSE_INLINE int match_long_rs (
ret = qse_awk_matchrex (
run->awk, run->gbl.rs[run->gbl.ignorecase],
run->gbl.ignorecase, QSE_STR_XSTR(buf), QSE_STR_XSTR(buf),
&match, &errnum);
&match, QSE_NULL, &errnum);
if (ret <= -1)
{
qse_awk_rtx_seterrnum (run, errnum, QSE_NULL);

View File

@ -293,7 +293,7 @@ QSE_INLINE qse_awk_val_t* qse_awk_rtx_getarg (qse_awk_rtx_t* run, qse_size_t idx
QSE_INLINE qse_awk_val_t* qse_awk_rtx_getgbl (qse_awk_rtx_t* run, int id)
{
QSE_ASSERT (id >= 0 && id < (int)QSE_LDA_SIZE(run->awk->parse.gbls));
QSE_ASSERT (id >= 0 && id < (int)QSE_ARR_SIZE(run->awk->parse.gbls));
return RTX_STACK_GBL (run, id);
}
@ -687,7 +687,7 @@ QSE_INLINE void qse_awk_rtx_setretval (
QSE_INLINE int qse_awk_rtx_setgbl (
qse_awk_rtx_t* rtx, int id, qse_awk_val_t* val)
{
QSE_ASSERT (id >= 0 && id < (int)QSE_LDA_SIZE(rtx->awk->parse.gbls));
QSE_ASSERT (id >= 0 && id < (int)QSE_ARR_SIZE(rtx->awk->parse.gbls));
return set_global (rtx, id, QSE_NULL, val, 0);
}
@ -3234,7 +3234,7 @@ static qse_awk_val_t* eval_expression (qse_awk_rtx_t* rtx, qse_awk_nde_t* nde)
vs.len = ((qse_awk_val_str_t*)rtx->inrec.d0)->val.len;
}
n = qse_awk_rtx_matchrex (rtx, v, &vs, &vs, QSE_NULL);
n = qse_awk_rtx_matchrex (rtx, v, &vs, &vs, QSE_NULL, QSE_NULL);
if (n <= -1)
{
ADJERR_LOC (rtx, &nde->loc);
@ -5041,7 +5041,7 @@ static qse_awk_val_t* eval_binop_match0 (
out.ptr = qse_awk_rtx_getvalstr (rtx, left, &out.len);
if (out.ptr == QSE_NULL) return QSE_NULL;
n = qse_awk_rtx_matchrex (rtx, right, &out, &out, QSE_NULL);
n = qse_awk_rtx_matchrex (rtx, right, &out, &out, QSE_NULL, QSE_NULL);
qse_awk_rtx_freevalstr (rtx, left, out.ptr);
if (n <= -1)