From 3d813a1bf1ad248d6e148a064f3e6d4793713197 Mon Sep 17 00:00:00 2001 From: hyung-hwan Date: Tue, 16 Apr 2013 15:30:37 +0000 Subject: [PATCH] amended the reference handling in Awk.cpp. added str::index() and str::rindex(). enhanded qse_awk_rtx_valtobool(). removed QSE_AWK_NCMPONSTR from QSE_AWK_CLASSIC. added QSE_AWK_NCMPONSTR to QSE_AWK_MODERN. --- qse/include/qse/awk/awk.h | 9 +-- qse/lib/awk/Awk.cpp | 50 +++++++++------- qse/lib/awk/mod-str.c | 110 ++++++++++++++++++++++++++++++++++-- qse/lib/awk/val.c | 60 ++++++++++++-------- qse/regress/awk/cou-019.awk | 8 ++- qse/regress/awk/cou-020.awk | 7 ++- 6 files changed, 190 insertions(+), 54 deletions(-) diff --git a/qse/include/qse/awk/awk.h b/qse/include/qse/awk/awk.h index 15591537..06c6f7fc 100644 --- a/qse/include/qse/awk/awk.h +++ b/qse/include/qse/awk/awk.h @@ -234,8 +234,9 @@ struct qse_awk_val_ref_t QSE_AWK_VAL_REF_POS /**< positional variable */ } id; - /* if id is QSE_AWK_VAL_REF_POS, adr holds an index of the - * positional variable. Otherwise, adr points to the value + /* if id is QSE_AWK_VAL_REF_POS, adr holds the index of a + * positional variable. If id is QSE_AWK_VAL_REF_GBL, adr hold + * the index of a global variable. Otherwise, adr points to the value * directly. */ qse_awk_val_t** adr; }; @@ -1124,11 +1125,11 @@ enum qse_awk_trait_t QSE_AWK_CLASSIC = QSE_AWK_IMPLICIT | QSE_AWK_RIO | QSE_AWK_NEWLINE | QSE_AWK_BLANKCONCAT | QSE_AWK_PABLOCK | - QSE_AWK_STRIPSTRSPC | QSE_AWK_NCMPONSTR | QSE_AWK_STRICTNAMING, + QSE_AWK_STRIPSTRSPC | QSE_AWK_STRICTNAMING, QSE_AWK_MODERN = QSE_AWK_CLASSIC | QSE_AWK_FLEXMAP | - QSE_AWK_RWPIPE | QSE_AWK_TOLERANT | QSE_AWK_NEXTOFILE + QSE_AWK_RWPIPE | QSE_AWK_TOLERANT | QSE_AWK_NEXTOFILE | QSE_AWK_NCMPONSTR }; typedef enum qse_awk_trait_t qse_awk_trait_t; diff --git a/qse/lib/awk/Awk.cpp b/qse/lib/awk/Awk.cpp index 334c2fb2..b9dd096b 100644 --- a/qse/lib/awk/Awk.cpp +++ b/qse/lib/awk/Awk.cpp @@ -1392,30 +1392,42 @@ int Awk::dispatch_function (Run* run, const fnc_info_t* fi) { qse_awk_val_ref_t* ref = (qse_awk_val_ref_t*)v; - if (ref->id == qse_awk_val_ref_t::QSE_AWK_VAL_REF_POS) + switch (ref->id) { - qse_size_t idx = (qse_size_t)ref->adr; + case qse_awk_val_ref_t::QSE_AWK_VAL_REF_POS: + { + qse_size_t idx = (qse_size_t)ref->adr; - if (idx == 0) - { - xx = args[i].setStr (run, - QSE_STR_PTR(&run->rtx->inrec.line), - QSE_STR_LEN(&run->rtx->inrec.line)); + if (idx == 0) + { + xx = args[i].setStr (run, + QSE_STR_PTR(&run->rtx->inrec.line), + QSE_STR_LEN(&run->rtx->inrec.line)); + } + else if (idx <= run->rtx->inrec.nflds) + { + xx = args[i].setStr (run, + run->rtx->inrec.flds[idx-1].ptr, + run->rtx->inrec.flds[idx-1].len); + } + else + { + xx = args[i].setStr (run, QSE_T(""), 0); + } + break; } - else if (idx <= run->rtx->inrec.nflds) + + case qse_awk_val_ref_t::QSE_AWK_VAL_REF_GBL: { - xx = args[i].setStr (run, - run->rtx->inrec.flds[idx-1].ptr, - run->rtx->inrec.flds[idx-1].len); + qse_size_t idx = (qse_size_t)ref->adr; + qse_awk_val_t* val = (qse_awk_val_t*)RTX_STACK_GBL (run->rtx, idx); + xx = args[i].setVal (run, val); + break; } - else - { - xx = args[i].setStr (run, QSE_T(""), 0); - } - } - else - { - xx = args[i].setVal (run, *(ref->adr)); + + default: + xx = args[i].setVal (run, *(ref->adr)); + break; } has_ref_arg = true; } diff --git a/qse/lib/awk/mod-str.c b/qse/lib/awk/mod-str.c index 4fdd9f5e..603f491a 100644 --- a/qse/lib/awk/mod-str.c +++ b/qse/lib/awk/mod-str.c @@ -76,6 +76,106 @@ static int fnc_rtrim (qse_awk_rtx_t* rtx, const qse_awk_fnc_info_t* fi) return trim (rtx, QSE_STRTRMX_RIGHT); } +static int index_or_rindex (qse_awk_rtx_t* rtx, int rindex) +{ + /* this is similar to the built-in index() function but doesn't + * care about IGNORECASE. */ + qse_size_t nargs; + qse_awk_val_t* a0, * a1; + qse_char_t* str0, * str1, * ptr; + qse_size_t len0, len1; + qse_long_t idx, boundary = 1; + + nargs = qse_awk_rtx_getnargs (rtx); + a0 = qse_awk_rtx_getarg (rtx, 0); + a1 = qse_awk_rtx_getarg (rtx, 1); + + /* + str::index ("abc", "d", 3); + str::rindex ("abcdefabcdx", "cd", 8); + */ + + if (nargs >= 3) + { + qse_awk_val_t* a2; + int n; + + a2 = qse_awk_rtx_getarg (rtx, 2); + n = qse_awk_rtx_valtolong (rtx, a2, &boundary); + if (n <= -1) return -1; + } + + if (a0->type == QSE_AWK_VAL_STR) + { + str0 = ((qse_awk_val_str_t*)a0)->val.ptr; + len0 = ((qse_awk_val_str_t*)a0)->val.len; + } + else + { + str0 = qse_awk_rtx_valtostrdup (rtx, a0, &len0); + if (str0 == QSE_NULL) return -1; + } + + if (a1->type == QSE_AWK_VAL_STR) + { + str1 = ((qse_awk_val_str_t*)a1)->val.ptr; + len1 = ((qse_awk_val_str_t*)a1)->val.len; + } + else + { + str1 = qse_awk_rtx_valtostrdup (rtx, a1, &len1); + if (str1 == QSE_NULL) + { + if (a0->type != QSE_AWK_VAL_STR) + qse_awk_rtx_freemem (rtx, str0); + return -1; + } + } + + if (nargs < 3) + { + boundary = rindex? len0: 1; + } + else + { + if (boundary == 0) boundary = 1; + else if (boundary < 0) boundary = len0 + boundary + 1; + } + + if (rindex) + { + /* 'boundary' acts as an end position */ + ptr = (boundary > len0 || boundary <= 0)? + QSE_NULL: qse_strxnrstr (&str0[0], boundary, str1, len1); + } + else + { + /* 'boundary' acts as a start position */ + ptr = (boundary > len0 || boundary <= 0)? + QSE_NULL: qse_strxnstr (&str0[boundary-1], len0-boundary+1, str1, len1); + } + + idx = (ptr == QSE_NULL)? 0: ((qse_long_t)(ptr-str0) + 1); + + if (a0->type != QSE_AWK_VAL_STR) qse_awk_rtx_freemem (rtx, str0); + if (a1->type != QSE_AWK_VAL_STR) qse_awk_rtx_freemem (rtx, str1); + + a0 = qse_awk_rtx_makeintval (rtx, idx); + if (a0 == QSE_NULL) return -1; + + qse_awk_rtx_setretval (rtx, a0); + return 0; +} + +static int fnc_index (qse_awk_rtx_t* rtx, const qse_awk_fnc_info_t* fi) +{ + return index_or_rindex (rtx, 0); +} +static int fnc_rindex (qse_awk_rtx_t* rtx, const qse_awk_fnc_info_t* fi) +{ + return index_or_rindex (rtx, 1); +} + typedef struct fnctab_t fnctab_t; struct fnctab_t { @@ -86,10 +186,12 @@ struct fnctab_t static fnctab_t fnctab[] = { /* keep this table sorted for binary search in query(). */ - { QSE_T("ltrim"), { { 1, 1, QSE_NULL }, fnc_ltrim, 0 } }, - { QSE_T("normspace"), { { 1, 1, QSE_NULL }, fnc_normspace, 0 } }, - { QSE_T("rtrim"), { { 1, 1, QSE_NULL }, fnc_rtrim, 0 } }, - { QSE_T("trim"), { { 1, 1, QSE_NULL }, fnc_trim, 0 } } + { QSE_T("index"), { { 2, 3, QSE_NULL }, fnc_index, 0 } }, + { QSE_T("ltrim"), { { 1, 1, QSE_NULL }, fnc_ltrim, 0 } }, + { QSE_T("normspace"), { { 1, 1, QSE_NULL }, fnc_normspace, 0 } }, + { QSE_T("rindex"), { { 2, 3, QSE_NULL }, fnc_rindex, 0 } }, + { QSE_T("rtrim"), { { 1, 1, QSE_NULL }, fnc_rtrim, 0 } }, + { QSE_T("trim"), { { 1, 1, QSE_NULL }, fnc_trim, 0 } } }; static int query (qse_awk_mod_t* mod, qse_awk_t* awk, const qse_char_t* name, qse_awk_mod_sym_t* sym) diff --git a/qse/lib/awk/val.c b/qse/lib/awk/val.c index b3848ffa..f571ff0d 100644 --- a/qse/lib/awk/val.c +++ b/qse/lib/awk/val.c @@ -913,37 +913,47 @@ void qse_awk_rtx_freevalchunk (qse_awk_rtx_t* rtx, qse_awk_val_chunk_t* chunk) static int val_ref_to_bool ( qse_awk_rtx_t* rtx, const qse_awk_val_ref_t* ref) { - if (ref->id == QSE_AWK_VAL_REF_POS) + switch (ref->id) { - qse_size_t idx; + case QSE_AWK_VAL_REF_POS: + { + qse_size_t idx; - idx = (qse_size_t)ref->adr; - if (idx == 0) - { - return QSE_STR_LEN(&rtx->inrec.line) > 0; + idx = (qse_size_t)ref->adr; + if (idx == 0) + { + return QSE_STR_LEN(&rtx->inrec.line) > 0; + } + else if (idx <= rtx->inrec.nflds) + { + return rtx->inrec.flds[idx-1].len > 0; + } + else + { + /* the index is greater than the number of records. + * it's an empty string. so false */ + return 0; + } } - else if (idx <= rtx->inrec.nflds) + case QSE_AWK_VAL_REF_GBL: { - return rtx->inrec.flds[idx-1].len > 0; + qse_size_t idx; + idx = (qse_size_t)ref->adr; + return qse_awk_rtx_valtobool (rtx, RTX_STACK_GBL (rtx, idx)); } - else - { - /* the index is greater than the number of records. - * it's an empty string. so false */ - return 0; - } - } - else - { - qse_awk_val_t** xref = (qse_awk_val_t**)ref->adr; - /* A reference value is not able to point to another - * refernce value for the way values are represented - * in QSEAWK */ - QSE_ASSERT ((*xref)->type != QSE_AWK_VAL_REF); + default: + { + qse_awk_val_t** xref = (qse_awk_val_t**)ref->adr; - /* make a recursive call back to the caller */ - return qse_awk_rtx_valtobool (rtx, *xref); + /* A reference value is not able to point to another + * refernce value for the way values are represented + * in QSEAWK */ + QSE_ASSERT ((*xref)->type != QSE_AWK_VAL_REF); + + /* make a recursive call back to the caller */ + return qse_awk_rtx_valtobool (rtx, *xref); + } } } @@ -1680,7 +1690,7 @@ qse_long_t qse_awk_rtx_hashval (qse_awk_rtx_t* rtx, qse_awk_val_t* v) int qse_awk_rtx_setrefval (qse_awk_rtx_t* rtx, qse_awk_val_ref_t* ref, qse_awk_val_t* val) { - if (val->type == QSE_AWK_VAL_REX) + if (val->type == QSE_AWK_VAL_REX || val->type == QSE_AWK_VAL_REF) { /* though it is possible that an intrinsic function handler * can accept a regular expression withtout evaluation when 'x' diff --git a/qse/regress/awk/cou-019.awk b/qse/regress/awk/cou-019.awk index e4f9bdf0..4d2b1a00 100644 --- a/qse/regress/awk/cou-019.awk +++ b/qse/regress/awk/cou-019.awk @@ -1 +1,7 @@ -FNR == 1, FNR == 5 { print FILENAME ": " $0; } +function basename (str) { + ridx = str::rindex (str, "/"); + if (ridx == 0) return str; + else return substr (str, ridx + 1); +} + +FNR == 1, FNR == 5 { print basename(FILENAME) ": " $0; } diff --git a/qse/regress/awk/cou-020.awk b/qse/regress/awk/cou-020.awk index fb1ac184..c0b56129 100644 --- a/qse/regress/awk/cou-020.awk +++ b/qse/regress/awk/cou-020.awk @@ -1 +1,6 @@ -FNR <= 5 { print FILENAME ": " $0; } +function basename (str) { + ridx = str::rindex (str, "/"); + if (ridx == 0) return str; + else return substr (str, ridx + 1); +} +FNR <= 5 { print basename(FILENAME) ": " $0; }