diff --git a/qse/include/qse/awk/awk.h b/qse/include/qse/awk/awk.h index e79cea5a..1c55b89d 100644 --- a/qse/include/qse/awk/awk.h +++ b/qse/include/qse/awk/awk.h @@ -1365,6 +1365,7 @@ enum qse_awk_gbl_id_t QSE_AWK_GBL_RLENGTH, QSE_AWK_GBL_RS, QSE_AWK_GBL_RSTART, + QSE_AWK_GBL_STRIPRECSPC, QSE_AWK_GBL_SUBSEP, /* these are not not the actual IDs and are used internally only diff --git a/qse/lib/awk/awk-prv.h b/qse/lib/awk/awk-prv.h index 870e564f..6237f2fe 100644 --- a/qse/lib/awk/awk-prv.h +++ b/qse/lib/awk/awk-prv.h @@ -316,6 +316,7 @@ struct qse_awk_rtx_t void* rs[2]; void* fs[2]; int ignorecase; + int striprecspc; qse_awk_int_t nr; qse_awk_int_t fnr; diff --git a/qse/lib/awk/misc.c b/qse/lib/awk/misc.c index 98430126..8ea0503e 100644 --- a/qse/lib/awk/misc.c +++ b/qse/lib/awk/misc.c @@ -926,7 +926,7 @@ qse_char_t* qse_awk_rtx_strxntokbyrex ( cursub.ptr++; cursub.len--; } - else if (rtx->awk->opt.trait & QSE_AWK_STRIPRECSPC) + else if (rtx->gbl.striprecspc > 0 || (rtx->gbl.striprecspc < 0 && (rtx->awk->opt.trait & QSE_AWK_STRIPRECSPC))) { /* match at the beginning of the input string */ if (match.ptr == substr) @@ -976,7 +976,7 @@ exit_loop: /* the match is all spaces */ *errnum = QSE_AWK_ENOERR; - if (rtx->awk->opt.trait & QSE_AWK_STRIPRECSPC) + if (rtx->gbl.striprecspc > 0 || (rtx->gbl.striprecspc < 0 && (rtx->awk->opt.trait & QSE_AWK_STRIPRECSPC))) { /* if the match reached the last character in the input string, * it returns QSE_NULL to terminate tokenization. */ diff --git a/qse/lib/awk/parse.c b/qse/lib/awk/parse.c index 5df255a8..9f49870a 100644 --- a/qse/lib/awk/parse.c +++ b/qse/lib/awk/parse.c @@ -364,6 +364,15 @@ static global_t gtab[] = { QSE_T("RS"), 2, 0 }, { QSE_T("RSTART"), 6, 0 }, + + /* it decides the field construction behavior when FS is a regular expression and + * the field splitter is composed of whitespaces only. e.g) FS="[ \t]*"; + * if set to a non-zero value, remove leading spaces and trailing spaces off a record + * before field splitting. + * if set to zero, leading spaces and trailing spaces result in 1 empty field respectively. + * if not set, the behavior is dependent on the awk->opt.trait & QSE_AWK_STRIPRECSPC */ + { QSE_T("STRIPRECSPC"), 11, 0 }, + { QSE_T("SUBSEP"), 6, 0 } }; diff --git a/qse/lib/awk/rec.c b/qse/lib/awk/rec.c index e57a7591..0730d39c 100644 --- a/qse/lib/awk/rec.c +++ b/qse/lib/awk/rec.c @@ -255,11 +255,13 @@ static int split_record (qse_awk_rtx_t* rtx) switch (how) { case 0: + /* 1 character FS */ p = qse_awk_rtx_strxntok ( rtx, p, len, fs_ptr, fs_len, &tok); break; case 1: + /* 5 character FS beginning with ? */ p = qse_awk_rtx_strxnfld ( rtx, p, len, fs_ptr[1], fs_ptr[2], @@ -267,6 +269,7 @@ static int split_record (qse_awk_rtx_t* rtx) break; default: + /* all other cases */ p = qse_awk_rtx_strxntokbyrex ( rtx, QSE_STR_PTR(&rtx->inrec.line), diff --git a/qse/lib/awk/run.c b/qse/lib/awk/run.c index 11d23a21..9c8a8627 100644 --- a/qse/lib/awk/run.c +++ b/qse/lib/awk/run.c @@ -474,8 +474,10 @@ static int set_global ( if (fs_len > 1 && !(fs_len == 5 && fs_ptr[0] == QSE_T('?'))) { + /* it's a regular expression if FS contains multiple characters. + * however, it's not a regular expression if it's 5 character + * string beginning with a question mark. */ void* rex, * irex; - qse_awk_errnum_t errnum; if (qse_awk_buildrex (rtx->awk, fs_ptr, fs_len, &errnum, &rex, &irex) <= -1) @@ -499,11 +501,17 @@ static int set_global ( case QSE_AWK_GBL_IGNORECASE: { - qse_awk_val_type_t vtype = QSE_AWK_RTX_GETVALTYPE (rtx, val); - rtx->gbl.ignorecase = - ((vtype == QSE_AWK_VAL_INT && QSE_AWK_RTX_GETINTFROMVAL (rtx,val) != 0) || - (vtype == QSE_AWK_VAL_FLT && ((qse_awk_val_flt_t*)val)->val != 0.0) || - (vtype == QSE_AWK_VAL_STR && ((qse_awk_val_str_t*)val)->val.len != 0))? 1: 0; + qse_awk_int_t l; + qse_awk_flt_t r; + int vt; + + vt = qse_awk_rtx_valtonum(rtx, val, &l, &r); + if (vt <= -1) return -1; + + if (vt == 0) + rtx->gbl.ignorecase = ((l > 0)? 1: (l < 0)? -1: 0); + else + rtx->gbl.ignorecase = ((r > 0.0)? 1: (r < 0.0)? -1: 0); break; } @@ -512,12 +520,12 @@ static int set_global ( int n; qse_awk_int_t lv; - n = qse_awk_rtx_valtoint (rtx, val, &lv); + n = qse_awk_rtx_valtoint(rtx, val, &lv); if (n <= -1) return -1; if (lv < (qse_awk_int_t)rtx->inrec.nflds) { - if (shorten_record (rtx, (qse_size_t)lv) == -1) + if (shorten_record(rtx, (qse_size_t)lv) == -1) { /* adjust the error line */ /*if (var) ADJERR_LOC (rtx, &var->loc);*/ @@ -534,7 +542,7 @@ static int set_global ( int n; qse_awk_int_t lv; - n = qse_awk_rtx_valtoint (rtx, val, &lv); + n = qse_awk_rtx_valtoint(rtx, val, &lv); if (n <= -1) return -1; rtx->gbl.nr = lv; @@ -547,7 +555,7 @@ static int set_global ( qse_awk_rtx_valtostr_out_t out; out.type = QSE_AWK_RTX_VALTOSTR_CPLDUP; - if (qse_awk_rtx_valtostr (rtx, val, &out) <= -1) return -1; + if (qse_awk_rtx_valtostr(rtx, val, &out) <= -1) return -1; for (i = 0; i < out.u.cpldup.len; i++) { @@ -572,7 +580,7 @@ static int set_global ( qse_awk_rtx_valtostr_out_t out; out.type = QSE_AWK_RTX_VALTOSTR_CPLDUP; - if (qse_awk_rtx_valtostr (rtx, val, &out) <= -1) return -1; + if (qse_awk_rtx_valtostr(rtx, val, &out) <= -1) return -1; if (rtx->gbl.ofs.ptr != QSE_NULL) QSE_AWK_FREE (rtx->awk, rtx->gbl.ofs.ptr); @@ -587,7 +595,7 @@ static int set_global ( qse_awk_rtx_valtostr_out_t out; out.type = QSE_AWK_RTX_VALTOSTR_CPLDUP; - if (qse_awk_rtx_valtostr (rtx, val, &out) <= -1) return -1; + if (qse_awk_rtx_valtostr(rtx, val, &out) <= -1) return -1; if (rtx->gbl.ors.ptr != QSE_NULL) QSE_AWK_FREE (rtx->awk, rtx->gbl.ors.ptr); @@ -615,7 +623,7 @@ static int set_global ( QSE_ASSERT (vtype != QSE_AWK_VAL_REX); out.type = QSE_AWK_RTX_VALTOSTR_CPLDUP; - if (qse_awk_rtx_valtostr (rtx, val, &out) <= -1) return -1; + if (qse_awk_rtx_valtostr(rtx, val, &out) <= -1) return -1; rss = out.u.cpldup; } @@ -633,7 +641,7 @@ static int set_global ( qse_awk_errnum_t errnum; /* compile the regular expression */ - if (qse_awk_buildrex (rtx->awk, rss.ptr, rss.len, &errnum, &rex, &irex) <= -1) + if (qse_awk_buildrex(rtx->awk, rss.ptr, rss.len, &errnum, &rex, &irex) <= -1) { SETERR_COD (rtx, errnum); if (vtype != QSE_AWK_VAL_STR) QSE_AWK_FREE (rtx->awk, rss.ptr); @@ -654,7 +662,7 @@ static int set_global ( qse_awk_rtx_valtostr_out_t out; out.type = QSE_AWK_RTX_VALTOSTR_CPLDUP; - if (qse_awk_rtx_valtostr (rtx, val, &out) <= -1) return -1; + if (qse_awk_rtx_valtostr(rtx, val, &out) <= -1) return -1; if (rtx->gbl.subsep.ptr) QSE_AWK_FREE (rtx->awk, rtx->gbl.subsep.ptr); @@ -663,6 +671,22 @@ static int set_global ( break; } + + case QSE_AWK_GBL_STRIPRECSPC: + { + qse_awk_int_t l; + qse_awk_flt_t r; + int vt; + + vt = qse_awk_rtx_valtonum(rtx, val, &l, &r); + if (vt <= -1) return -1; + + if (vt == 0) + rtx->gbl.striprecspc = ((l > 0)? 1: (l < 0)? -1: 0); + else + rtx->gbl.striprecspc = ((r > 0.0)? 1: (r < 0.0)? -1: 0); + break; + } } qse_awk_rtx_refdownval (rtx, old); @@ -1026,6 +1050,7 @@ static int init_rtx (qse_awk_rtx_t* rtx, qse_awk_t* awk, qse_awk_rio_t* rio) rtx->gbl.fs[0] = QSE_NULL; rtx->gbl.fs[1] = QSE_NULL; rtx->gbl.ignorecase = 0; + rtx->gbl.striprecspc = -1; return 0;