From d841c9f62f482e93e8dd8237cf7dc93319fa9e43 Mon Sep 17 00:00:00 2001 From: hyung-hwan Date: Fri, 23 Aug 2013 15:19:29 +0000 Subject: [PATCH] changed awk to handle IGNORECASE with a regular expression engine that doesn't have a run-time option but has a compile-time option --- qse/include/qse/awk/awk.h | 32 +- qse/include/qse/cmn/tre.h | 9 - qse/lib/awk/awk.h | 4 +- qse/lib/awk/fnc.c | 192 ++-- qse/lib/awk/misc.c | 209 ++++- qse/lib/awk/misc.h | 16 +- qse/lib/awk/parse.c | 6 +- qse/lib/awk/rec.c | 13 +- qse/lib/awk/rio.c | 8 +- qse/lib/awk/run.c | 180 ++-- qse/lib/awk/tree.c | 6 +- qse/lib/awk/tree.h | 2 +- qse/lib/awk/val.c | 20 +- qse/lib/cmn/tre-ast.c | 21 +- qse/lib/cmn/tre-compile.c | 52 +- qse/lib/cmn/tre-match-backtrack.c | 51 +- qse/lib/cmn/tre-match-parallel.c | 28 +- qse/lib/cmn/tre-match-utils.h | 41 +- qse/lib/cmn/tre-parse.c | 1449 ++++++++++++++--------------- qse/lib/cmn/tre.c | 18 - qse/lib/cmn/tre.h | 6 +- 21 files changed, 1127 insertions(+), 1236 deletions(-) diff --git a/qse/include/qse/awk/awk.h b/qse/include/qse/awk/awk.h index 68b92394..b83b584c 100644 --- a/qse/include/qse/awk/awk.h +++ b/qse/include/qse/awk/awk.h @@ -110,21 +110,21 @@ typedef struct qse_awk_loc_t qse_awk_loc_t; * Three common fields are: * - type - type of a value from #qse_awk_val_type_t * - ref - reference count - * - nstr - numeric string marker + * - stat - static value + * - nstr - numeric string marker, 1 -> long, 2 -> real */ -#if QSE_SIZEOF_INT == 2 -# define QSE_AWK_VAL_HDR \ - unsigned int type: 3; \ - unsigned int ref: 10; \ - unsigned int stat: 1; \ - unsigned int nstr: 2 -#else -# define QSE_AWK_VAL_HDR \ - unsigned int type: 3; \ - unsigned int ref: 26; \ - unsigned int stat: 1; \ - unsigned int nstr: 2 -#endif +/* +#define QSE_AWK_VAL_HDR \ + unsigned int type: 3; \ + unsigned int ref: 26; \ + unsigned int stat: 1; \ + unsigned int nstr: 2; +*/ +#define QSE_AWK_VAL_HDR \ + qse_uintptr_t type: 3; \ + qse_uintptr_t ref: ((QSE_SIZEOF_UINTPTR_T * 8) - 6); \ + qse_uintptr_t stat: 1; \ + qse_uintptr_t nstr: 2; /** * The qse_awk_val_t type is an abstract value type. A value commonly contains: @@ -191,7 +191,7 @@ struct qse_awk_val_rex_t { QSE_AWK_VAL_HDR; qse_xstr_t str; - void* code; + void* code[2]; }; typedef struct qse_awk_val_rex_t qse_awk_val_rex_t; @@ -2388,7 +2388,7 @@ QSE_EXPORT qse_awk_val_t* qse_awk_rtx_makenstrvalwithcstr ( QSE_EXPORT qse_awk_val_t* qse_awk_rtx_makerexval ( qse_awk_rtx_t* rtx, const qse_cstr_t* str, - void* code + void* code[2] ); /** diff --git a/qse/include/qse/cmn/tre.h b/qse/include/qse/cmn/tre.h index a14f7a3a..d2446528 100644 --- a/qse/include/qse/cmn/tre.h +++ b/qse/include/qse/cmn/tre.h @@ -99,15 +99,6 @@ enum qse_tre_eflag_t QSE_TRE_NOTEOL = (1 << 2) }; -typedef struct qse_tre_strsrc_t qse_tre_strsrc_t; -struct qse_tre_strsrc_t -{ - int (*get_next_char) (qse_char_t *c, unsigned int* pos_add, void* context); - void (*rewind)(qse_size_t pos, void *context); - int (*compare)(qse_size_t pos1, qse_size_t pos2, qse_size_t len, void* context); - void* context; -}; - #ifdef __cplusplus extern "C" { #endif diff --git a/qse/lib/awk/awk.h b/qse/lib/awk/awk.h index c30b07e7..56cc0b3d 100644 --- a/qse/lib/awk/awk.h +++ b/qse/lib/awk/awk.h @@ -307,8 +307,8 @@ struct qse_awk_rtx_t struct { - void* rs; - void* fs; + void* rs[2]; + void* fs[2]; int ignorecase; qse_long_t nr; diff --git a/qse/lib/awk/fnc.c b/qse/lib/awk/fnc.c index 3db10646..b2174ce6 100644 --- a/qse/lib/awk/fnc.c +++ b/qse/lib/awk/fnc.c @@ -624,7 +624,7 @@ static int fnc_substr (qse_awk_rtx_t* rtx, const qse_awk_fnc_info_t* fi) return 0; } -static int fnc_split (qse_awk_rtx_t* run, const qse_awk_fnc_info_t* fi) +static int fnc_split (qse_awk_rtx_t* rtx, const qse_awk_fnc_info_t* fi) { qse_size_t nargs; qse_awk_val_t* a0, * a1, * a2, * t1, * t2; @@ -642,12 +642,12 @@ static int fnc_split (qse_awk_rtx_t* run, const qse_awk_fnc_info_t* fi) qse_awk_errnum_t errnum; int x; - nargs = qse_awk_rtx_getnargs (run); + nargs = qse_awk_rtx_getnargs (rtx); QSE_ASSERT (nargs >= 2 && nargs <= 3); - a0 = qse_awk_rtx_getarg (run, 0); - a1 = qse_awk_rtx_getarg (run, 1); - a2 = (nargs >= 3)? qse_awk_rtx_getarg (run, 2): QSE_NULL; + a0 = qse_awk_rtx_getarg (rtx, 0); + a1 = qse_awk_rtx_getarg (rtx, 1); + a2 = (nargs >= 3)? qse_awk_rtx_getarg (rtx, 2): QSE_NULL; QSE_ASSERT (a1->type == QSE_AWK_VAL_REF); @@ -658,7 +658,7 @@ static int fnc_split (qse_awk_rtx_t* run, const qse_awk_fnc_info_t* fi) } else { - str.ptr = qse_awk_rtx_valtostrdup (run, a0, &str.len); + str.ptr = qse_awk_rtx_valtostrdup (rtx, a0, &str.len); if (str.ptr == QSE_NULL) return -1; str_free = (qse_char_t*)str.ptr; } @@ -666,7 +666,7 @@ static int fnc_split (qse_awk_rtx_t* run, const qse_awk_fnc_info_t* fi) if (a2 == QSE_NULL) { /* get the value from FS */ - t1 = qse_awk_rtx_getgbl (run, QSE_AWK_GBL_FS); + t1 = qse_awk_rtx_getgbl (rtx, QSE_AWK_GBL_FS); if (t1->type == QSE_AWK_VAL_NIL) { fs.ptr = QSE_T(" "); @@ -679,17 +679,17 @@ static int fnc_split (qse_awk_rtx_t* run, const qse_awk_fnc_info_t* fi) } else { - fs.ptr = qse_awk_rtx_valtostrdup (run, t1, &fs.len); + fs.ptr = qse_awk_rtx_valtostrdup (rtx, t1, &fs.len); if (fs.ptr == QSE_NULL) goto oops; fs_free = (qse_char_t*)fs.ptr; } - if (fs.len > 1) fs_rex = run->gbl.fs; + if (fs.len > 1) fs_rex = rtx->gbl.fs[rtx->gbl.ignorecase]; } else if (a2->type == QSE_AWK_VAL_REX) { /* the third parameter is a regular expression */ - fs_rex = ((qse_awk_val_rex_t*)a2)->code; + fs_rex = ((qse_awk_val_rex_t*)a2)->code[rtx->gbl.ignorecase]; /* make the loop below to take fs_rex by * setting fs_len greater than 1*/ @@ -705,30 +705,36 @@ static int fnc_split (qse_awk_rtx_t* run, const qse_awk_fnc_info_t* fi) } else { - fs.ptr = qse_awk_rtx_valtostrdup (run, a2, &fs.len); + fs.ptr = qse_awk_rtx_valtostrdup (rtx, a2, &fs.len); if (fs.ptr == QSE_NULL) goto oops; fs_free = (qse_char_t*)fs.ptr; } if (fs.len > 1) { - fs_rex = qse_awk_buildrex ( - run->awk, fs.ptr, fs.len, &errnum); - if (fs_rex == QSE_NULL) + int x; + + if (rtx->gbl.ignorecase) + x = qse_awk_buildrex (rtx->awk, fs.ptr, fs.len, &errnum, QSE_NULL, &fs_rex); + else + x = qse_awk_buildrex (rtx->awk, fs.ptr, fs.len, &errnum, &fs_rex, QSE_NULL); + + if (x <= -1) { - qse_awk_rtx_seterrnum (run, errnum, QSE_NULL); + qse_awk_rtx_seterrnum (rtx, errnum, QSE_NULL); goto oops; } + fs_rex_free = fs_rex; } } - t1 = qse_awk_rtx_makemapval (run); + t1 = qse_awk_rtx_makemapval (rtx); if (t1 == QSE_NULL) goto oops; - qse_awk_rtx_refupval (run, t1); - x = qse_awk_rtx_setrefval (run, a1, t1); - qse_awk_rtx_refdownval (run, t1); + qse_awk_rtx_refupval (rtx, t1); + x = qse_awk_rtx_setrefval (rtx, (qse_awk_val_ref_t*)a1, t1); + qse_awk_rtx_refdownval (rtx, t1); if (x <= -1) goto oops; /* fill the map with actual values */ @@ -742,18 +748,18 @@ static int fnc_split (qse_awk_rtx_t* run, const qse_awk_fnc_info_t* fi) if (fs.len <= 1) { - p = qse_awk_rtx_strxntok (run, + p = qse_awk_rtx_strxntok (rtx, p, str.len, fs.ptr, fs.len, &tok); } else { p = qse_awk_rtx_strxntokbyrex ( - run, str.ptr, org_len, p, str.len, + rtx, str.ptr, org_len, p, str.len, fs_rex, &tok, &errnum ); if (p == QSE_NULL && errnum != QSE_AWK_ENOERR) { - qse_awk_rtx_seterrnum (run, errnum, QSE_NULL); + qse_awk_rtx_seterrnum (rtx, errnum, QSE_NULL); goto oops; } } @@ -768,42 +774,54 @@ static int fnc_split (qse_awk_rtx_t* run, const qse_awk_fnc_info_t* fi) /* create the field string - however, the split function must * create a numeric string if the string is a number */ - /*t2 = qse_awk_rtx_makestrvalwithcstr (run, &tok);*/ - t2 = qse_awk_rtx_makenstrvalwithcstr (run, &tok); + /*t2 = qse_awk_rtx_makestrvalwithcstr (rtx, &tok);*/ + t2 = qse_awk_rtx_makenstrvalwithcstr (rtx, &tok); if (t2 == QSE_NULL) goto oops; /* put it into the map */ key_len = qse_awk_longtostr ( - run->awk, ++nflds, 10, QSE_NULL, key_buf, QSE_COUNTOF(key_buf)); + rtx->awk, ++nflds, 10, QSE_NULL, key_buf, QSE_COUNTOF(key_buf)); QSE_ASSERT (key_len != (qse_size_t)-1); if (qse_awk_rtx_setmapvalfld ( - run, t1, key_buf, key_len, t2) == QSE_NULL) + rtx, t1, key_buf, key_len, t2) == QSE_NULL) { - qse_awk_rtx_refupval (run, t2); - qse_awk_rtx_refdownval (run, t2); + qse_awk_rtx_refupval (rtx, t2); + qse_awk_rtx_refdownval (rtx, t2); goto oops; } str.len = str_left - (p - str.ptr); } - if (str_free) QSE_AWK_FREE (run->awk, str_free); - if (fs_free) QSE_AWK_FREE (run->awk, fs_free); - if (fs_rex_free) qse_awk_freerex (run->awk, fs_rex_free); + if (str_free) QSE_AWK_FREE (rtx->awk, str_free); + if (fs_free) QSE_AWK_FREE (rtx->awk, fs_free); + if (fs_rex_free) + { + if (rtx->gbl.ignorecase) + qse_awk_freerex (rtx->awk, QSE_NULL, fs_rex_free); + else + qse_awk_freerex (rtx->awk, fs_rex_free, QSE_NULL); + } /*nflds--;*/ - t1 = qse_awk_rtx_makeintval (run, nflds); + t1 = qse_awk_rtx_makeintval (rtx, nflds); if (t1 == QSE_NULL) return -1; - qse_awk_rtx_setretval (run, t1); + qse_awk_rtx_setretval (rtx, t1); return 0; oops: - if (str_free) QSE_AWK_FREE (run->awk, str_free); - if (fs_free) QSE_AWK_FREE (run->awk, fs_free); - if (fs_rex_free) qse_awk_freerex (run->awk, fs_rex_free); + if (str_free) QSE_AWK_FREE (rtx->awk, str_free); + if (fs_free) QSE_AWK_FREE (rtx->awk, fs_free); + if (fs_rex_free) + { + if (rtx->gbl.ignorecase) + qse_awk_freerex (rtx->awk, QSE_NULL, fs_rex_free); + else + qse_awk_freerex (rtx->awk, fs_rex_free, QSE_NULL); + } return -1; } @@ -832,7 +850,7 @@ static int fnc_tolower (qse_awk_rtx_t* run, const qse_awk_fnc_info_t* fi) for (i = 0; i < str.len; i++) str.ptr[i] = QSE_AWK_TOLOWER (run->awk, str.ptr[i]); - r = qse_awk_rtx_makestrvalwithcstr (run, &str); + r = qse_awk_rtx_makestrvalwithcstr (run, (qse_cstr_t*)&str); if (r == QSE_NULL) { if (a0->type != QSE_AWK_VAL_STR) QSE_AWK_FREE (run->awk, str.ptr); @@ -869,7 +887,7 @@ static int fnc_toupper (qse_awk_rtx_t* run, const qse_awk_fnc_info_t* fi) for (i = 0; i < str.len; i++) str.ptr[i] = QSE_AWK_TOUPPER (run->awk, str.ptr[i]); - r = qse_awk_rtx_makestrvalwithcstr (run, &str); + r = qse_awk_rtx_makestrvalwithcstr (run, (qse_cstr_t*)&str); if (r == QSE_NULL) { if (a0->type != QSE_AWK_VAL_STR) QSE_AWK_FREE (run->awk, str.ptr); @@ -881,11 +899,10 @@ static int fnc_toupper (qse_awk_rtx_t* run, const qse_awk_fnc_info_t* fi) return 0; } - static int __substitute (qse_awk_rtx_t* run, qse_long_t max_count) { qse_size_t nargs; - qse_awk_val_t* a0, * a1, * a2, ** a2_ref, * v; + qse_awk_val_t* a0, * a1, * a2, * v; qse_cstr_t s0, s1, s2; const qse_char_t* s2_end; @@ -898,7 +915,7 @@ static int __substitute (qse_awk_rtx_t* run, qse_long_t max_count) void* rex_free = QSE_NULL; qse_str_t new; - int new_inited = 0, opt; + int new_inited = 0; qse_cstr_t mat, pmat, cur; @@ -915,7 +932,7 @@ static int __substitute (qse_awk_rtx_t* run, qse_long_t max_count) if (a0->type == QSE_AWK_VAL_REX) { - rex = ((qse_awk_val_rex_t*)a0)->code; + rex = ((qse_awk_val_rex_t*)a0)->code[run->gbl.ignorecase]; } else if (a0->type == QSE_AWK_VAL_STR) { @@ -964,10 +981,14 @@ static int __substitute (qse_awk_rtx_t* run, qse_long_t max_count) if (a0->type != QSE_AWK_VAL_REX) { qse_awk_errnum_t errnum; + int x; - rex = qse_awk_buildrex ( - run->awk, s0.ptr, s0.len, &errnum); - if (rex == QSE_NULL) + if (run->gbl.ignorecase) + x = qse_awk_buildrex (run->awk, s0.ptr, s0.len, &errnum, QSE_NULL, &rex); + else + x = qse_awk_buildrex (run->awk, s0.ptr, s0.len, &errnum, &rex, QSE_NULL); + + if (x <= -1) { qse_awk_rtx_seterrnum (run, errnum, QSE_NULL); goto oops; @@ -976,8 +997,6 @@ static int __substitute (qse_awk_rtx_t* run, qse_long_t max_count) rex_free = rex; } - opt = (run->gbl.ignorecase)? QSE_REX_IGNORECASE: 0; - s2_end = s2.ptr + s2.len; cur.ptr = s2.ptr; cur.len = s2.len; @@ -997,7 +1016,8 @@ static int __substitute (qse_awk_rtx_t* run, qse_long_t max_count) if (max_count == 0 || sub_count < max_count) { n = qse_awk_matchrex ( - run->awk, rex, opt, &s2, &cur, &mat, &errnum + run->awk, rex, run->gbl.ignorecase, + &s2, &cur, &mat, &errnum ); } else n = 0; @@ -1085,7 +1105,10 @@ static int __substitute (qse_awk_rtx_t* run, qse_long_t max_count) if (rex_free) { - qse_awk_freerex (run->awk, rex_free); + if (run->gbl.ignorecase) + qse_awk_freerex (run->awk, QSE_NULL, rex_free); + else + qse_awk_freerex (run->awk, rex_free, QSE_NULL); rex_free = QSE_NULL; } @@ -1104,7 +1127,7 @@ static int __substitute (qse_awk_rtx_t* run, qse_long_t max_count) v = qse_awk_rtx_makestrvalwithcstr (run, QSE_STR_CSTR(&new)); if (v == QSE_NULL) goto oops; qse_awk_rtx_refupval (run, v); - n = qse_awk_rtx_setrefval (run, a2, v); + n = qse_awk_rtx_setrefval (run, (qse_awk_val_ref_t*)a2, v); qse_awk_rtx_refdownval (run, v); if (n <= -1) goto oops; } @@ -1123,7 +1146,13 @@ static int __substitute (qse_awk_rtx_t* run, qse_long_t max_count) return 0; oops: - if (rex_free) qse_awk_freerex (run->awk, rex_free); + if (rex_free) + { + if (run->gbl.ignorecase) + qse_awk_freerex (run->awk, QSE_NULL, rex_free); + else + qse_awk_freerex (run->awk, rex_free, QSE_NULL); + } if (new_inited) qse_str_fini (&new); if (s2_free) QSE_AWK_FREE (run->awk, s2_free); if (s1_free) QSE_AWK_FREE (run->awk, s1_free); @@ -1145,13 +1174,11 @@ static int fnc_match (qse_awk_rtx_t* rtx, const qse_awk_fnc_info_t* fi) { qse_size_t nargs; qse_awk_val_t* a0, * a1; - qse_char_t* str0, * str1; - qse_size_t len0, len1; + qse_char_t* str0; + qse_size_t len0; qse_long_t idx, start = 1; - void* rex; int n; qse_cstr_t mat; - qse_awk_errnum_t errnum; nargs = qse_awk_rtx_getnargs (rtx); QSE_ASSERT (nargs >= 2 && nargs <= 3); @@ -1195,42 +1222,6 @@ static int fnc_match (qse_awk_rtx_t* rtx, const qse_awk_fnc_info_t* fi) if (str0 == QSE_NULL) return -1; } - if (a1->type == QSE_AWK_VAL_REX) - { - rex = ((qse_awk_val_rex_t*)a1)->code; - } - else - { - qse_awk_errnum_t errnum; - - if (a1->type == QSE_AWK_VAL_STR) - { - str1 = ((qse_awk_val_str_t*)a1)->val.ptr; - len1 = ((qse_awk_val_str_t*)a1)->val.len; - } - else - { - str1 = qse_awk_rtx_valtostrdup (rtx, a1, &len1); - if (str1 == QSE_NULL) - { - if (a0->type != QSE_AWK_VAL_STR) - QSE_AWK_FREE (rtx->awk, str0); - return -1; - } - } - - rex = qse_awk_buildrex (rtx->awk, str1, len1, &errnum); - if (rex == QSE_NULL) - { - if (a0->type != QSE_AWK_VAL_STR) - QSE_AWK_FREE (rtx->awk, str0); - qse_awk_rtx_seterrnum (rtx, errnum, QSE_NULL); - return -1; - } - - if (a1->type != QSE_AWK_VAL_STR) QSE_AWK_FREE (rtx->awk, str1); - } - if (start == 0) start = 1; else if (start < 0) start = len0 + start + 1; @@ -1239,24 +1230,15 @@ static int fnc_match (qse_awk_rtx_t* rtx, const qse_awk_fnc_info_t* fi) { qse_cstr_t tmp; - /*TODO: must use str0,len0?*/ + /*TODO: must use str0,len0? */ tmp.ptr = str0 + start - 1; tmp.len = len0 - start + 1; - n = qse_awk_matchrex ( - rtx->awk, rex, - (rtx->gbl.ignorecase? QSE_REX_IGNORECASE: 0), - &tmp, &tmp, &mat, &errnum - ); + + n = qse_awk_rtx_matchrex (rtx, a1, &tmp, &tmp, &mat); + if (n <= -1) return -1; } if (a0->type != QSE_AWK_VAL_STR) QSE_AWK_FREE (rtx->awk, str0); - if (a1->type != QSE_AWK_VAL_REX) qse_awk_freerex (rtx->awk, rex); - - if (n <= -1) - { - qse_awk_rtx_seterrnum (rtx, errnum, QSE_NULL); - return -1; - } idx = (n == 0)? 0: ((qse_long_t)(mat.ptr-str0) + 1); @@ -1338,7 +1320,7 @@ static int fnc_sprintf (qse_awk_rtx_t* run, const qse_awk_fnc_info_t* fi) if (a0->type != QSE_AWK_VAL_STR) QSE_AWK_FREE (run->awk, cs0.ptr); if (x.ptr == QSE_NULL) goto oops; - a0 = qse_awk_rtx_makestrvalwithcstr (run, &x); + a0 = qse_awk_rtx_makestrvalwithcstr (run, (qse_cstr_t*)&x); if (a0 == QSE_NULL) goto oops; qse_str_fini (&fbu); diff --git a/qse/lib/awk/misc.c b/qse/lib/awk/misc.c index 0758f9f3..e1b5f15a 100644 --- a/qse/lib/awk/misc.c +++ b/qse/lib/awk/misc.c @@ -20,7 +20,7 @@ #include "awk.h" -#define USE_REX +/*#define USE_REX */ #if defined(USE_REX) # include @@ -892,8 +892,7 @@ qse_char_t* qse_awk_rtx_strxntokbyrex ( while (cursub.len > 0) { n = qse_awk_matchrex ( - rtx->awk, rex, - ((rtx->gbl.ignorecase)? QSE_REX_IGNORECASE: 0), + rtx->awk, rex, rtx->gbl.ignorecase, &s, &cursub, &match, errnum); if (n == -1) return QSE_NULL; if (n == 0) @@ -1090,56 +1089,97 @@ static QSE_INLINE int rexerr_to_errnum (int err) } } -void* qse_awk_buildrex ( - qse_awk_t* awk, const qse_char_t* ptn, - qse_size_t len, qse_awk_errnum_t* errnum) +int qse_awk_buildrex ( + qse_awk_t* awk, const qse_char_t* ptn, qse_size_t len, + qse_awk_errnum_t* errnum, void** code, void** icode) { #if defined(USE_REX) qse_rex_errnum_t err; void* p; - p = qse_buildrex ( - awk->mmgr, awk->opt.depth.s.rex_build, - ((awk->opt.trait & QSE_AWK_REXBOUND)? 0: QSE_REX_NOBOUND), - ptn, len, &err - ); - if (p == QSE_NULL) *errnum = rexerr_to_errnum(err); - return p; + if (code || icode) + { + p = qse_buildrex ( + awk->mmgr, awk->opt.depth.s.rex_build, + ((awk->opt.trait & QSE_AWK_REXBOUND)? 0: QSE_REX_NOBOUND), + ptn, len, &err + ); + if (p == QSE_NULL) + { + *errnum = rexerr_to_errnum(err); + return -1; + } + + if (code) *code = p; + if (icode) *icode = p; + } + + return 0; #else - qse_tre_t* tre; + qse_tre_t* tre = QSE_NULL; + qse_tre_t* itre = QSE_NULL; int opt = QSE_TRE_EXTENDED; - tre = qse_tre_open (awk->mmgr, 0); - if (tre == QSE_NULL) + if (code) { - *errnum = QSE_AWK_ENOMEM; - return QSE_NULL; - } + tre = qse_tre_open (awk->mmgr, 0); + if (tre == QSE_NULL) + { + *errnum = QSE_AWK_ENOMEM; + return -1; + } - /* ignorecase is a compile option for TRE */ -#if 0 /* TODO */ - if (ignorecase) opt |= QSE_TRE_IGNORECASE; -#endif - if (!(awk->opt.trait & QSE_AWK_REXBOUND)) opt |= QSE_TRE_NOBOUND; + if (!(awk->opt.trait & QSE_AWK_REXBOUND)) opt |= QSE_TRE_NOBOUND; - if (qse_tre_compx (tre, ptn, len, QSE_NULL, opt) <= -1) - { + if (qse_tre_compx (tre, ptn, len, QSE_NULL, opt) <= -1) + { #if 0 /* TODO */ - if (QSE_TRE_ERRNUM(tre) == QSE_TRE_ENOMEM) *errnum = QSE_AWK_ENOMEM; - else - SETERR1 (awk, QSE_AWK_EREXBL, str->ptr, str->len, loc); + if (QSE_TRE_ERRNUM(tre) == QSE_TRE_ENOMEM) *errnum = QSE_AWK_ENOMEM; + else + SETERR1 (awk, QSE_AWK_EREXBL, str->ptr, str->len, loc); #endif - *errnum = (QSE_TRE_ERRNUM(tre) == QSE_TRE_ENOMEM)? - QSE_AWK_ENOMEM: QSE_AWK_EREXBL; - qse_tre_close (tre); - return QSE_NULL; + *errnum = (QSE_TRE_ERRNUM(tre) == QSE_TRE_ENOMEM)? + QSE_AWK_ENOMEM: QSE_AWK_EREXBL; + qse_tre_close (tre); + return -1; + } } - return tre; + if (icode) + { + itre = qse_tre_open (awk->mmgr, 0); + if (itre == QSE_NULL) + { + if (tre) qse_tre_close (tre); + *errnum = QSE_AWK_ENOMEM; + return -1; + } + + /* ignorecase is a compile option for TRE */ + if (qse_tre_compx (itre, ptn, len, QSE_NULL, opt | QSE_TRE_IGNORECASE) <= -1) + { +#if 0 /* TODO */ + + if (QSE_TRE_ERRNUM(tre) == QSE_TRE_ENOMEM) *errnum = QSE_AWK_ENOMEM; + else + SETERR1 (awk, QSE_AWK_EREXBL, str->ptr, str->len, loc); +#endif + *errnum = (QSE_TRE_ERRNUM(tre) == QSE_TRE_ENOMEM)? + QSE_AWK_ENOMEM: QSE_AWK_EREXBL; + qse_tre_close (itre); + if (tre) qse_tre_close (tre); + return -1; + } + } + + if (code) *code = tre; + if (icode) *icode = itre; + return 0; #endif } + #if !defined(USE_REX) static int matchtre ( @@ -1192,7 +1232,7 @@ static int matchtre ( #endif int qse_awk_matchrex ( - qse_awk_t* awk, void* code, int option, + qse_awk_t* awk, void* code, int icase, const qse_cstr_t* str, const qse_cstr_t* substr, qse_cstr_t* match, qse_awk_errnum_t* errnum) { @@ -1201,8 +1241,8 @@ int qse_awk_matchrex ( qse_rex_errnum_t err; x = qse_matchrex ( - awk->mmgr, awk->opt.depth.s.rex_match, - code, option, str, substr, match, &err); + awk->mmgr, awk->opt.depth.s.rex_match, code, + (icase? QSE_REX_IGNORECASE: 0), str, substr, match, &err); if (x <= -1) *errnum = rexerr_to_errnum(err); return x; #else @@ -1218,13 +1258,102 @@ int qse_awk_matchrex ( #endif } -void qse_awk_freerex (qse_awk_t* awk, void* code) +void qse_awk_freerex (qse_awk_t* awk, void* code, void* icode) { + if (code) + { #if defined(USE_REX) - qse_freerex((awk)->mmgr,code); + qse_freerex ((awk)->mmgr, code); #else - qse_tre_close (code); + qse_tre_close (code); #endif + } + + if (icode && icode != code) + { +#if defined(USE_REX) + qse_freerex ((awk)->mmgr, icode); +#else + qse_tre_close (icode); +#endif + } +} + +int qse_awk_rtx_matchrex ( + qse_awk_rtx_t* rtx, qse_awk_val_t* val, + const qse_cstr_t* str, const qse_cstr_t* substr, qse_cstr_t* match) +{ + void* code; + int icase, x; + qse_awk_errnum_t awkerr; +#if defined(USE_REX) + qse_rex_errnum_t rexerr; +#endif + + icase = rtx->gbl.ignorecase; + + if (val->type == QSE_AWK_VAL_REX) + { + code = ((qse_awk_val_rex_t*)val)->code[icase]; + } + else if (val->type == QSE_AWK_VAL_STR) + { + /* build a regular expression */ + qse_awk_val_str_t* strv = (qse_awk_val_str_t*)val; + x = icase? qse_awk_buildrex (rtx->awk, strv->val.ptr, strv->val.len, &awkerr, QSE_NULL, &code): + qse_awk_buildrex (rtx->awk, strv->val.ptr, strv->val.len, &awkerr, &code, QSE_NULL); + if (x <= -1) + { + qse_awk_rtx_seterrnum (rtx, awkerr, QSE_NULL); + return -1; + } + } + else + { + /* convert to a string and build a regular expression */ + + qse_xstr_t tmp; + tmp.ptr = qse_awk_rtx_valtostrdup (rtx, val, &tmp.len); + if (tmp.ptr == QSE_NULL) return -1; + + x = icase? qse_awk_buildrex (rtx->awk, tmp.ptr, tmp.len, &awkerr, QSE_NULL, &code): + qse_awk_buildrex (rtx->awk, tmp.ptr, tmp.len, &awkerr, &code, QSE_NULL); + qse_awk_rtx_freemem (rtx, tmp.ptr); + if (x <= -1) + { + qse_awk_rtx_seterrnum (rtx, awkerr, QSE_NULL); + return -1; + } + } + +#if defined(USE_REX) + x = qse_matchrex ( + rtx->awk->mmgr, rtx->awk->opt.depth.s.rex_match, + code, (icase? QSE_REX_IGNORECASE: 0), + str, substr, match, &rexerr); + if (x <= -1) qse_awk_rtx_seterrnum (rtx, rexerr_to_errnum(rexerr), QSE_NULL); +#else + x = matchtre ( + rtx->awk, code, + ((str->ptr == substr->ptr)? QSE_TRE_BACKTRACKING: (QSE_TRE_BACKTRACKING | QSE_TRE_NOTBOL)), + substr, match, QSE_NULL, &awkerr + ); + if (x <= -1) qse_awk_rtx_seterrnum (rtx, awkerr, QSE_NULL); +#endif + + if (val->type == QSE_AWK_VAL_REX) + { + /* nothing to free */ + } + else + { + if (icase) + qse_awk_freerex (rtx->awk, QSE_NULL, code); + else + qse_awk_freerex (rtx->awk, code, QSE_NULL); + } + + return x; } void* qse_awk_rtx_allocmem (qse_awk_rtx_t* rtx, qse_size_t size) diff --git a/qse/lib/awk/misc.h b/qse/lib/awk/misc.h index 49a78563..15ea3fcd 100644 --- a/qse/lib/awk/misc.h +++ b/qse/lib/awk/misc.h @@ -63,20 +63,28 @@ qse_char_t* qse_awk_rtx_strxnfld ( qse_cstr_t* tok ); -void* qse_awk_buildrex ( +int qse_awk_buildrex ( qse_awk_t* awk, const qse_char_t* ptn, qse_size_t len, - qse_awk_errnum_t* errnum + qse_awk_errnum_t* errnum, + void** code, + void** icode ); int qse_awk_matchrex ( - qse_awk_t* awk, void* code, int option, + qse_awk_t* awk, void* code, int icase, const qse_cstr_t* str, const qse_cstr_t* substr, qse_cstr_t* match, qse_awk_errnum_t* errnum ); -void qse_awk_freerex (qse_awk_t* awk, void* code); +void qse_awk_freerex (qse_awk_t* awk, void* code, void* icode); + +int qse_awk_rtx_matchrex ( + qse_awk_rtx_t* rtx, qse_awk_val_t* val, + const qse_cstr_t* str, const qse_cstr_t* substr, + qse_cstr_t* match +); int qse_awk_sprintflt ( qse_awk_t* awk, diff --git a/qse/lib/awk/parse.c b/qse/lib/awk/parse.c index 63cf6048..4a182653 100644 --- a/qse/lib/awk/parse.c +++ b/qse/lib/awk/parse.c @@ -4351,9 +4351,7 @@ static qse_awk_nde_t* parse_primary_rex (qse_awk_t* awk, const qse_awk_loc_t* x nde->str.ptr = qse_awk_cstrdup (awk, QSE_STR_CSTR(awk->tok.name)); if (nde->str.ptr == QSE_NULL) goto oops; - nde->code = qse_awk_buildrex ( - awk, QSE_STR_PTR(awk->tok.name), QSE_STR_LEN(awk->tok.name), &errnum); - if (nde->code == QSE_NULL) + if (qse_awk_buildrex (awk, QSE_STR_PTR(awk->tok.name), QSE_STR_LEN(awk->tok.name), &errnum, &nde->code[0], &nde->code[1]) <= -1) { SETERR_LOC (awk, errnum, xloc); goto oops; @@ -4365,7 +4363,7 @@ static qse_awk_nde_t* parse_primary_rex (qse_awk_t* awk, const qse_awk_loc_t* x oops: QSE_ASSERT (nde != QSE_NULL); - if (nde->code) qse_awk_freerex (awk, nde->code); + if (nde->code[0]) qse_awk_freerex (awk, nde->code[0], nde->code[1]); if (nde->str.ptr) QSE_AWK_FREE (awk, nde->str.ptr); QSE_AWK_FREE (awk, nde); return QSE_NULL; diff --git a/qse/lib/awk/rec.c b/qse/lib/awk/rec.c index a0d91d6f..483de023 100644 --- a/qse/lib/awk/rec.c +++ b/qse/lib/awk/rec.c @@ -122,13 +122,8 @@ static int split_record (qse_awk_rtx_t* rtx) } else { - qse_awk_rtx_valtostr_out_t out; - - out.type = QSE_AWK_RTX_VALTOSTR_CPLDUP; - if (qse_awk_rtx_valtostr (rtx, fs, &out) <= -1) return -1; - - fs_ptr = out.u.cpldup.ptr; - fs_len = out.u.cpldup.len; + fs_ptr = qse_awk_rtx_valtostrdup (rtx, fs, &fs_len); + if (fs_ptr == QSE_NULL) return -1; fs_free = fs_ptr; } @@ -178,7 +173,7 @@ static int split_record (qse_awk_rtx_t* rtx) QSE_STR_PTR(&rtx->inrec.line), QSE_STR_LEN(&rtx->inrec.line), p, len, - rtx->gbl.fs, &tok, &errnum + rtx->gbl.fs[rtx->gbl.ignorecase], &tok, &errnum ); if (p == QSE_NULL && errnum != QSE_AWK_ENOERR) { @@ -268,7 +263,7 @@ static int split_record (qse_awk_rtx_t* rtx) QSE_STR_PTR(&rtx->inrec.line), QSE_STR_LEN(&rtx->inrec.line), p, len, - rtx->gbl.fs, &tok, &errnum + rtx->gbl.fs[rtx->gbl.ignorecase], &tok, &errnum ); if (p == QSE_NULL && errnum != QSE_AWK_ENOERR) { diff --git a/qse/lib/awk/rio.c b/qse/lib/awk/rio.c index fd074e8e..c397f8da 100644 --- a/qse/lib/awk/rio.c +++ b/qse/lib/awk/rio.c @@ -223,12 +223,12 @@ static QSE_INLINE int match_long_rs ( qse_awk_errnum_t errnum; int ret; - QSE_ASSERT (run->gbl.rs != QSE_NULL); + QSE_ASSERT (run->gbl.rs[0] != QSE_NULL); + QSE_ASSERT (run->gbl.rs[1] != QSE_NULL); ret = qse_awk_matchrex ( - run->awk, run->gbl.rs, - ((run->gbl.ignorecase)? QSE_REX_IGNORECASE: 0), - QSE_STR_CSTR(buf), QSE_STR_CSTR(buf), + run->awk, run->gbl.rs[run->gbl.ignorecase], + run->gbl.ignorecase, QSE_STR_CSTR(buf), QSE_STR_CSTR(buf), &match, &errnum); if (ret <= -1) { diff --git a/qse/lib/awk/run.c b/qse/lib/awk/run.c index af3d610b..a669998d 100644 --- a/qse/lib/awk/run.c +++ b/qse/lib/awk/run.c @@ -464,12 +464,11 @@ static int set_global ( if (fs_len > 1 && !(fs_len == 5 && fs_ptr[0] == QSE_T('?'))) { - void* rex; + void* rex, * irex; + qse_awk_errnum_t errnum; - rex = qse_awk_buildrex ( - rtx->awk, fs_ptr, fs_len, &errnum); - if (rex == QSE_NULL) + if (qse_awk_buildrex (rtx->awk, fs_ptr, fs_len, &errnum, &rex, &irex) <= -1) { SETERR_COD (rtx, errnum); if (val->type != QSE_AWK_VAL_STR) @@ -477,14 +476,14 @@ static int set_global ( return -1; } - if (rtx->gbl.fs != QSE_NULL) - qse_awk_freerex (rtx->awk, rtx->gbl.fs); + if (rtx->gbl.fs[0]) + qse_awk_freerex (rtx->awk, rtx->gbl.fs[0], rtx->gbl.fs[1]); - rtx->gbl.fs = rex; + rtx->gbl.fs[0] = rex; + rtx->gbl.fs[1] = irex; } - if (val->type != QSE_AWK_VAL_STR) - QSE_AWK_FREE (rtx->awk, fs_ptr); + if (val->type != QSE_AWK_VAL_STR) QSE_AWK_FREE (rtx->awk, fs_ptr); break; } @@ -613,33 +612,31 @@ static int set_global ( rss = out.u.cpldup; } - if (rtx->gbl.rs) + if (rtx->gbl.rs[0]) { - qse_awk_freerex (rtx->awk, rtx->gbl.rs); - rtx->gbl.rs = QSE_NULL; + qse_awk_freerex (rtx->awk, rtx->gbl.rs[0], rtx->gbl.rs[1]); + rtx->gbl.rs[0] = QSE_NULL; + rtx->gbl.rs[1] = QSE_NULL; } if (rss.len > 1) { - void* rex; + void* rex, * irex; qse_awk_errnum_t errnum; /* compile the regular expression */ - rex = qse_awk_buildrex ( - rtx->awk, rss.ptr, rss.len, &errnum); - if (rex == QSE_NULL) + if (qse_awk_buildrex (rtx->awk, rss.ptr, rss.len, &errnum, &rex, &irex) <= -1) { SETERR_COD (rtx, errnum); - if (val->type != QSE_AWK_VAL_STR) - QSE_AWK_FREE (rtx->awk, rss.ptr); + if (val->type != QSE_AWK_VAL_STR) QSE_AWK_FREE (rtx->awk, rss.ptr); return -1; } - rtx->gbl.rs = rex; + rtx->gbl.rs[0] = rex; + rtx->gbl.rs[1] = irex; } - if (val->type != QSE_AWK_VAL_STR) - QSE_AWK_FREE (rtx->awk, rss.ptr); + if (val->type != QSE_AWK_VAL_STR) QSE_AWK_FREE (rtx->awk, rss.ptr); break; } @@ -1016,8 +1013,10 @@ static int init_rtx (qse_awk_rtx_t* rtx, qse_awk_t* awk, qse_awk_rio_t* rio) rtx->rio.chain = QSE_NULL; } - rtx->gbl.rs = QSE_NULL; - rtx->gbl.fs = QSE_NULL; + rtx->gbl.rs[0] = QSE_NULL; + rtx->gbl.rs[1] = QSE_NULL; + rtx->gbl.fs[0] = QSE_NULL; + rtx->gbl.fs[1] = QSE_NULL; rtx->gbl.ignorecase = 0; return 0; @@ -1051,15 +1050,17 @@ static void fini_rtx (qse_awk_rtx_t* rtx, int fini_globals) qse_awk_rtx_cleario (rtx); QSE_ASSERT (rtx->rio.chain == QSE_NULL); - if (rtx->gbl.rs) + if (rtx->gbl.rs[0]) { - qse_awk_freerex (rtx->awk, rtx->gbl.rs); - rtx->gbl.rs = QSE_NULL; + qse_awk_freerex (rtx->awk, rtx->gbl.rs[0], rtx->gbl.rs[1]); + rtx->gbl.rs[0] = QSE_NULL; + rtx->gbl.rs[1] = QSE_NULL; } - if (rtx->gbl.fs) + if (rtx->gbl.fs[0]) { - qse_awk_freerex (rtx->awk, rtx->gbl.fs); - rtx->gbl.fs = QSE_NULL; + qse_awk_freerex (rtx->awk, rtx->gbl.fs[0], rtx->gbl.fs[1]); + rtx->gbl.fs[0] = QSE_NULL; + rtx->gbl.fs[1] = QSE_NULL; } if (rtx->gbl.convfmt.ptr != QSE_NULL && @@ -3208,7 +3209,6 @@ static qse_awk_val_t* eval_expression (qse_awk_rtx_t* rtx, qse_awk_nde_t* nde) { qse_awk_val_t* v; int n; - qse_awk_errnum_t errnum; #if 0 if (rtx->exit_level >= EXIT_GLOBAL) @@ -3230,9 +3230,11 @@ static qse_awk_val_t* eval_expression (qse_awk_rtx_t* rtx, qse_awk_nde_t* nde) qse_cstr_t vs; int opt = 0; - if (((qse_awk_rtx_t*)rtx)->gbl.ignorecase) - opt = QSE_REX_IGNORECASE; - + /* special case where a regular expression is used in + * without any match operators: + * print /abc/; + * perform match against $0. + */ qse_awk_rtx_refupval (rtx, v); if (rtx->inrec.d0->type == QSE_AWK_VAL_NIL) @@ -3253,23 +3255,13 @@ static qse_awk_val_t* eval_expression (qse_awk_rtx_t* rtx, qse_awk_nde_t* nde) vs.len = ((qse_awk_val_str_t*)rtx->inrec.d0)->val.len; } - n = qse_awk_matchrex ( - ((qse_awk_rtx_t*)rtx)->awk, - ((qse_awk_val_rex_t*)v)->code, - opt, &vs, &vs, QSE_NULL, &errnum - ); - if (n <= -1) + n = qse_awk_rtx_matchrex (rtx, v, &vs, &vs, QSE_NULL); + if (n <= -1) { + ADJERR_LOC (rtx, &nde->loc); qse_awk_rtx_refdownval (rtx, v); - - /* matchrex should never set the error number - * whose message contains a formatting - * character. otherwise, the following way of - * setting the error information may not work */ - SETERR_LOC (rtx, errnum, &nde->loc); return QSE_NULL; } - qse_awk_rtx_refdownval (rtx, v); v = qse_awk_rtx_makeintval (rtx, (n != 0)); @@ -4974,115 +4966,50 @@ static qse_awk_val_t* eval_binop_match0 ( { qse_awk_val_t* res; int n; - qse_awk_errnum_t errnum; - void* rex_code; - - if (right->type == QSE_AWK_VAL_REX) - { - rex_code = ((qse_awk_val_rex_t*)right)->code; - } - else if (right->type == QSE_AWK_VAL_STR) - { - rex_code = qse_awk_buildrex ( - rtx->awk, - ((qse_awk_val_str_t*)right)->val.ptr, - ((qse_awk_val_str_t*)right)->val.len, &errnum); - if (rex_code == QSE_NULL) - { - SETERR_LOC (rtx, errnum, rloc); - return QSE_NULL; - } - } - else - { - qse_awk_rtx_valtostr_out_t out; - - out.type = QSE_AWK_RTX_VALTOSTR_CPLDUP; - if (qse_awk_rtx_valtostr (rtx, right, &out) <= -1) return QSE_NULL; - - rex_code = qse_awk_buildrex ( - rtx->awk, out.u.cpldup.ptr, out.u.cpldup.len, &errnum); - if (rex_code == QSE_NULL) - { - QSE_AWK_FREE (rtx->awk, out.u.cpldup.ptr); - SETERR_LOC (rtx, errnum, rloc); - return QSE_NULL; - } - - QSE_AWK_FREE (rtx->awk, out.u.cpldup.ptr); - } if (left->type == QSE_AWK_VAL_STR) { - n = qse_awk_matchrex ( - rtx->awk, rex_code, - ((rtx->gbl.ignorecase)? QSE_REX_IGNORECASE: 0), + n = qse_awk_rtx_matchrex ( + rtx, right, xstr_to_cstr(&((qse_awk_val_str_t*)left)->val), - xstr_to_cstr(&((qse_awk_val_str_t*)left)->val), - QSE_NULL, &errnum); - if (n == -1) + xstr_to_cstr(&((qse_awk_val_str_t*)left)->val), QSE_NULL); + if (n <= -1) { - if (right->type != QSE_AWK_VAL_REX) - qse_awk_freerex (rtx->awk, rex_code); - - SETERR_LOC (rtx, errnum, lloc); + ADJERR_LOC (rtx, lloc); return QSE_NULL; } res = qse_awk_rtx_makeintval (rtx, (n == ret)); if (res == QSE_NULL) { - if (right->type != QSE_AWK_VAL_REX) - qse_awk_freerex (rtx->awk, rex_code); - ADJERR_LOC (rtx, lloc); return QSE_NULL; } } else { - qse_awk_rtx_valtostr_out_t out; + qse_xstr_t out; - out.type = QSE_AWK_RTX_VALTOSTR_CPLDUP; - if (qse_awk_rtx_valtostr (rtx, left, &out) <= -1) + out.ptr = qse_awk_rtx_valtostrdup (rtx, left, &out.len); + if (out.ptr == QSE_NULL) return QSE_NULL; + + n = qse_awk_rtx_matchrex (rtx, right, &out, &out, QSE_NULL); + QSE_AWK_FREE (rtx->awk, out.ptr); + + if (n <= -1) { - if (right->type != QSE_AWK_VAL_REX) - qse_awk_freerex (rtx->awk, rex_code); - return QSE_NULL; - } - - n = qse_awk_matchrex ( - rtx->awk, rex_code, - ((rtx->gbl.ignorecase)? QSE_REX_IGNORECASE: 0), - xstr_to_cstr(&out.u.cpldup), - xstr_to_cstr(&out.u.cpldup), - QSE_NULL, &errnum - ); - if (n == -1) - { - QSE_AWK_FREE (rtx->awk, out.u.cpldup.ptr); - if (right->type != QSE_AWK_VAL_REX) - qse_awk_freerex (rtx->awk, rex_code); - - SETERR_LOC (rtx, errnum, lloc); + ADJERR_LOC (rtx, lloc); return QSE_NULL; } res = qse_awk_rtx_makeintval (rtx, (n == ret)); if (res == QSE_NULL) { - QSE_AWK_FREE (rtx->awk, out.u.cpldup.ptr); - if (right->type != QSE_AWK_VAL_REX) - qse_awk_freerex (rtx->awk, rex_code); - ADJERR_LOC (rtx, lloc); return QSE_NULL; } - - QSE_AWK_FREE (rtx->awk, out.u.cpldup.ptr); } - if (right->type != QSE_AWK_VAL_REX) qse_awk_freerex (rtx->awk, rex_code); return res; } @@ -6307,7 +6234,8 @@ static qse_awk_val_t* eval_rex (qse_awk_rtx_t* run, qse_awk_nde_t* nde) val = qse_awk_rtx_makerexval (run, &((qse_awk_nde_rex_t*)nde)->str, - ((qse_awk_nde_rex_t*)nde)->code); + ((qse_awk_nde_rex_t*)nde)->code + ); if (val == QSE_NULL) ADJERR_LOC (run, &nde->loc); return val; diff --git a/qse/lib/awk/tree.c b/qse/lib/awk/tree.c index 28df3381..8b74ecec 100644 --- a/qse/lib/awk/tree.c +++ b/qse/lib/awk/tree.c @@ -1305,7 +1305,8 @@ void qse_awk_clrpt (qse_awk_t* awk, qse_awk_nde_t* tree) case QSE_AWK_NDE_REX: { - qse_awk_freerex (awk, ((qse_awk_nde_rex_t*)p)->code); + qse_awk_nde_rex_t* rex = (qse_awk_nde_rex_t*)p; + qse_awk_freerex (awk, rex->code[0], rex->code[1]); QSE_AWK_FREE (awk, ((qse_awk_nde_rex_t*)p)->str.ptr); QSE_AWK_FREE (awk, p); break; @@ -1318,8 +1319,7 @@ void qse_awk_clrpt (qse_awk_t* awk, qse_awk_nde_t* tree) { qse_awk_nde_var_t* px = (qse_awk_nde_var_t*)p; QSE_ASSERT (px->idx == QSE_NULL); - if (px->id.name.ptr != QSE_NULL) - QSE_AWK_FREE (awk, px->id.name.ptr); + if (px->id.name.ptr) QSE_AWK_FREE (awk, px->id.name.ptr); QSE_AWK_FREE (awk, p); break; } diff --git a/qse/lib/awk/tree.h b/qse/lib/awk/tree.h index c1efdf31..d9dbf9c9 100644 --- a/qse/lib/awk/tree.h +++ b/qse/lib/awk/tree.h @@ -155,7 +155,7 @@ struct qse_awk_nde_rex_t { QSE_AWK_NDE_HDR; qse_xstr_t str; - void* code; + void* code[2]; /* [0]: case sensitive, [1]: case insensitive */ }; /* QSE_AWK_NDE_NAMED, QSE_AWK_NDE_GBL, diff --git a/qse/lib/awk/val.c b/qse/lib/awk/val.c index a7c2bf99..602e8671 100644 --- a/qse/lib/awk/val.c +++ b/qse/lib/awk/val.c @@ -28,13 +28,14 @@ #define CHUNKSIZE QSE_AWK_VAL_CHUNK_SIZE static qse_awk_val_nil_t awk_nil = { QSE_AWK_VAL_NIL, 0, 1, 0 }; -static qse_awk_val_str_t awk_zls = { QSE_AWK_VAL_STR, 0, 1, 0, { QSE_T(""), 0 } }; +static qse_awk_val_str_t awk_zls = { QSE_AWK_VAL_STR, 0, 1, 0, { QSE_T(""), 0 } }; qse_awk_val_t* qse_awk_val_nil = (qse_awk_val_t*)&awk_nil; qse_awk_val_t* qse_awk_val_zls = (qse_awk_val_t*)&awk_zls; static qse_awk_val_int_t awk_int[] = { + /* type ref stat nstr val nde */ { QSE_AWK_VAL_INT, 0, 1, 0, -1, QSE_NULL }, { QSE_AWK_VAL_INT, 0, 1, 0, 0, QSE_NULL }, { QSE_AWK_VAL_INT, 0, 1, 0, 1, QSE_NULL }, @@ -216,7 +217,7 @@ qse_awk_val_t* qse_awk_rtx_makestrvalwithmbs ( return QSE_NULL; } - v = qse_awk_rtx_makestrvalwithcstr (rtx, &tmp); + v = qse_awk_rtx_makestrvalwithcstr (rtx, (qse_cstr_t*)&tmp); QSE_AWK_FREE (rtx->awk, tmp.ptr); return v; #endif @@ -268,7 +269,7 @@ qse_awk_val_t* qse_awk_rtx_makestrvalwithmcstr ( return QSE_NULL; } - v = qse_awk_rtx_makestrvalwithcstr (rtx, &tmp); + v = qse_awk_rtx_makestrvalwithcstr (rtx, (qse_cstr_t*)&tmp); QSE_AWK_FREE (rtx->awk, tmp.ptr); return v; #endif @@ -438,7 +439,7 @@ qse_awk_val_t* qse_awk_rtx_makenstrvalwithcstr (qse_awk_rtx_t* rtx, const qse_cs } qse_awk_val_t* qse_awk_rtx_makerexval ( - qse_awk_rtx_t* rtx, const qse_cstr_t* str, void* code) + qse_awk_rtx_t* rtx, const qse_cstr_t* str, void* code[2]) { qse_awk_val_rex_t* val; qse_size_t totsz; @@ -465,7 +466,8 @@ qse_awk_val_t* qse_awk_rtx_makerexval ( val->str.ptr = (qse_char_t*)(val + 1); qse_strncpy (val->str.ptr, str->ptr, str->len); - val->code = code; + val->code[0] = code[0]; + val->code[1] = code[1]; return (qse_awk_val_t*)val; } @@ -824,7 +826,7 @@ void qse_awk_rtx_freeval ( /* code is just a pointer to a regular expression stored * in parse tree nodes. so don't free it. - qse_awk_freerex (rtx->awk, ((qse_awk_val_rex_t*)val)->code); + qse_awk_freerex (rtx->awk, ((qse_awk_val_rex_t*)val)->code[0], ((qse_awk_val_rex_t*)val)->code[1]); */ QSE_AWK_FREE (rtx->awk, val); @@ -895,7 +897,7 @@ void qse_awk_rtx_refdownval_nofree (qse_awk_rtx_t* rtx, qse_awk_val_t* val) void qse_awk_rtx_freevalchunk (qse_awk_rtx_t* rtx, qse_awk_val_chunk_t* chunk) { while (chunk != QSE_NULL) - { + { qse_awk_val_chunk_t* next = chunk->next; QSE_AWK_FREE (rtx->awk, chunk); chunk = next; @@ -1717,7 +1719,7 @@ int qse_awk_rtx_setrefval (qse_awk_rtx_t* rtx, qse_awk_val_ref_t* ref, qse_awk_v qse_awk_rtx_refupval (rtx, val); x = qse_awk_rtx_setrec ( rtx, (qse_size_t)ref->adr, - &((qse_awk_val_str_t*)val)->val + (qse_cstr_t*)&((qse_awk_val_str_t*)val)->val ); qse_awk_rtx_refdownval (rtx, val); return x; @@ -1730,7 +1732,7 @@ int qse_awk_rtx_setrefval (qse_awk_rtx_t* rtx, qse_awk_val_ref_t* ref, qse_awk_v str.ptr = qse_awk_rtx_valtostrdup (rtx, val, &str.len); qse_awk_rtx_refupval (rtx, val); - x = qse_awk_rtx_setrec (rtx, (qse_size_t)ref->adr, &str); + x = qse_awk_rtx_setrec (rtx, (qse_size_t)ref->adr, (qse_cstr_t*)&str); qse_awk_rtx_refdownval (rtx, val); QSE_AWK_FREE (rtx->awk, str.ptr); return x; diff --git a/qse/lib/cmn/tre-ast.c b/qse/lib/cmn/tre-ast.c index 3b03a040..ad8f0c4f 100644 --- a/qse/lib/cmn/tre-ast.c +++ b/qse/lib/cmn/tre-ast.c @@ -60,11 +60,9 @@ tre_ast_new_node(tre_mem_t mem, tre_ast_type_t type, size_t size) tre_ast_node_t *node; node = tre_mem_calloc(mem, sizeof(*node)); - if (!node) - return NULL; + if (!node) return NULL; node->obj = tre_mem_calloc(mem, size); - if (!node->obj) - return NULL; + if (!node->obj) return NULL; node->type = type; node->nullable = -1; node->submatch_id = -1; @@ -72,15 +70,13 @@ tre_ast_new_node(tre_mem_t mem, tre_ast_type_t type, size_t size) return node; } -tre_ast_node_t * -tre_ast_new_literal(tre_mem_t mem, int code_min, int code_max, int position) +tre_ast_node_t * tre_ast_new_literal(tre_mem_t mem, int code_min, int code_max, int position) { tre_ast_node_t *node; tre_literal_t *lit; node = tre_ast_new_node(mem, LITERAL, sizeof(tre_literal_t)); - if (!node) - return NULL; + if (!node) return NULL; lit = node->obj; lit->code_min = code_min; lit->code_max = code_max; @@ -97,8 +93,7 @@ tre_ast_new_iter(tre_mem_t mem, tre_ast_node_t *arg, int min, int max, tre_iteration_t *iter; node = tre_ast_new_node(mem, ITERATION, sizeof(tre_iteration_t)); - if (!node) - return NULL; + if (!node) return NULL; iter = node->obj; iter->arg = arg; iter->min = min; @@ -115,8 +110,7 @@ tre_ast_new_union(tre_mem_t mem, tre_ast_node_t *left, tre_ast_node_t *right) tre_ast_node_t *node; node = tre_ast_new_node(mem, UNION, sizeof(tre_union_t)); - if (node == NULL) - return NULL; + if (node == NULL) return NULL; ((tre_union_t *)node->obj)->left = left; ((tre_union_t *)node->obj)->right = right; node->num_submatches = left->num_submatches + right->num_submatches; @@ -131,8 +125,7 @@ tre_ast_new_catenation(tre_mem_t mem, tre_ast_node_t *left, tre_ast_node_t *node; node = tre_ast_new_node(mem, CATENATION, sizeof(tre_catenation_t)); - if (node == NULL) - return NULL; + if (node == NULL) return NULL; ((tre_catenation_t *)node->obj)->left = left; ((tre_catenation_t *)node->obj)->right = right; node->num_submatches = left->num_submatches + right->num_submatches; diff --git a/qse/lib/cmn/tre-compile.c b/qse/lib/cmn/tre-compile.c index 53dc3f8e..8a802889 100644 --- a/qse/lib/cmn/tre-compile.c +++ b/qse/lib/cmn/tre-compile.c @@ -88,14 +88,11 @@ tre_add_tag_left(tre_mem_t mem, tre_ast_node_t *node, int tag_id) DPRINT(("add_tag_left: tag %d\n", tag_id)); c = tre_mem_alloc(mem, sizeof(*c)); - if (c == NULL) - return REG_ESPACE; + if (c == NULL) return REG_ESPACE; c->left = tre_ast_new_literal(mem, TAG, tag_id, -1); - if (c->left == NULL) - return REG_ESPACE; + if (c->left == NULL) return REG_ESPACE; c->right = tre_mem_alloc(mem, sizeof(tre_ast_node_t)); - if (c->right == NULL) - return REG_ESPACE; + if (c->right == NULL) return REG_ESPACE; c->right->obj = node->obj; c->right->type = node->type; @@ -152,7 +149,6 @@ typedef enum ADDTAGS_SET_SUBMATCH_END } tre_addtags_symbol_t; - typedef struct { int tag; @@ -763,8 +759,7 @@ tre_copy_ast(tre_mem_t mem, tre_stack_t *stack, tre_ast_node_t *ast, first_tag = 0; } *result = tre_ast_new_literal(mem, min, max, pos); - if (*result == NULL) - status = REG_ESPACE; + if (*result == NULL) status = REG_ESPACE; if (pos > *max_pos) *max_pos = pos; @@ -1121,8 +1116,7 @@ tre_set_one(tre_mem_t mem, int position, int code_min, int code_max, tre_pos_and_tags_t *new_set; new_set = tre_mem_calloc(mem, sizeof(*new_set) * 2); - if (new_set == NULL) - return NULL; + if (new_set == NULL) return NULL; new_set[0].position = position; new_set[0].code_min = code_min; @@ -1150,8 +1144,7 @@ tre_set_union(tre_mem_t mem, tre_pos_and_tags_t *set1, tre_pos_and_tags_t *set2, for (s1 = 0; set1[s1].position >= 0; s1++); for (s2 = 0; set2[s2].position >= 0; s2++); new_set = tre_mem_calloc(mem, sizeof(*new_set) * (s1 + s2 + 1)); - if (!new_set ) - return NULL; + if (!new_set) return NULL; for (s1 = 0; set1[s1].position >= 0; s1++) { @@ -1395,15 +1388,10 @@ tre_compute_nfl(tre_mem_t mem, tre_stack_t *stack, tre_ast_node_t *tree) /* Back references: nullable = false, firstpos = {i}, lastpos = {i}. */ node->nullable = 0; - node->firstpos = tre_set_one(mem, lit->position, 0, - TRE_CHAR_MAX, 0, NULL, -1); - if (!node->firstpos) - return REG_ESPACE; - node->lastpos = tre_set_one(mem, lit->position, 0, - TRE_CHAR_MAX, 0, NULL, - (int)lit->code_max); - if (!node->lastpos) - return REG_ESPACE; + node->firstpos = tre_set_one(mem, lit->position, 0, TRE_CHAR_MAX, 0, NULL, -1); + if (!node->firstpos) return REG_ESPACE; + node->lastpos = tre_set_one(mem, lit->position, 0, TRE_CHAR_MAX, 0, NULL, (int)lit->code_max); + if (!node->lastpos) return REG_ESPACE; } else if (lit->code_min < 0) { @@ -1422,18 +1410,10 @@ tre_compute_nfl(tre_mem_t mem, tre_stack_t *stack, tre_ast_node_t *tree) /* Literal at position i: nullable = false, firstpos = {i}, lastpos = {i}. */ node->nullable = 0; - node->firstpos = - tre_set_one(mem, lit->position, (int)lit->code_min, - (int)lit->code_max, 0, NULL, -1); - if (!node->firstpos) - return REG_ESPACE; - node->lastpos = tre_set_one(mem, lit->position, - (int)lit->code_min, - (int)lit->code_max, - lit->u.class, lit->neg_classes, - -1); - if (!node->lastpos) - return REG_ESPACE; + node->firstpos = tre_set_one(mem, lit->position, (int)lit->code_min, (int)lit->code_max, 0, NULL, -1); + if (!node->firstpos) return REG_ESPACE; + node->lastpos = tre_set_one(mem, lit->position, (int)lit->code_min, (int)lit->code_max, lit->u.class, lit->neg_classes, -1); + if (!node->lastpos) return REG_ESPACE; } break; } @@ -1628,6 +1608,7 @@ tre_make_trans(qse_mmgr_t* mmgr, tre_pos_and_tags_t *p1, tre_pos_and_tags_t *p2, int i, j, k, l, dup, prev_p2_pos; if (transitions != NULL) + { while (p1->position >= 0) { p2 = orig_p2; @@ -1814,7 +1795,9 @@ tre_make_trans(qse_mmgr_t* mmgr, tre_pos_and_tags_t *p1, tre_pos_and_tags_t *p2, } p1++; } + } else + { /* Compute a maximum limit for the number of transitions leaving from each state. */ while (p1->position >= 0) @@ -1827,6 +1810,7 @@ tre_make_trans(qse_mmgr_t* mmgr, tre_pos_and_tags_t *p1, tre_pos_and_tags_t *p2, } p1++; } + } return REG_OK; } diff --git a/qse/lib/cmn/tre-match-backtrack.c b/qse/lib/cmn/tre-match-backtrack.c index 37d4367d..fd7f3b14 100644 --- a/qse/lib/cmn/tre-match-backtrack.c +++ b/qse/lib/cmn/tre-match-backtrack.c @@ -168,23 +168,18 @@ typedef struct tre_backtrack_struct while (/*CONSTCOND*/0) #define BT_STACK_POP() \ - do \ - { \ - int i; \ - assert(stack->prev); \ - pos = stack->item.pos; \ - if (type == STR_USER) \ - str_source->rewind(pos + pos_add_next, str_source->context); \ - str_byte = stack->item.str_byte; \ - BT_STACK_WIDE_OUT; \ - state = stack->item.state; \ - next_c = stack->item.next_c; \ - for (i = 0; i < tnfa->num_tags; i++) \ - tags[i] = stack->item.tags[i]; \ - BT_STACK_MBSTATE_OUT; \ - stack = stack->prev; \ - } \ - while (/*CONSTCOND*/0) + do { \ + int i; \ + assert(stack->prev); \ + pos = stack->item.pos; \ + str_byte = stack->item.str_byte; \ + BT_STACK_WIDE_OUT; \ + state = stack->item.state; \ + next_c = stack->item.next_c; \ + for (i = 0; i < tnfa->num_tags; i++) tags[i] = stack->item.tags[i]; \ + BT_STACK_MBSTATE_OUT; \ + stack = stack->prev; \ + } while (/*CONSTCOND*/0) #undef MIN #define MIN(a, b) ((a) <= (b) ? (a) : (b)) @@ -208,7 +203,6 @@ tre_tnfa_run_backtrack(qse_mmgr_t* mmgr, const tre_tnfa_t *tnfa, const void *str int reg_notbol = eflags & REG_NOTBOL; int reg_noteol = eflags & REG_NOTEOL; int reg_newline = tnfa->cflags & REG_NEWLINE; - int str_user_end = 0; /* These are used to remember the necessary values of the above variables to return to the position where the current search @@ -302,8 +296,6 @@ retry: state = NULL; pos = pos_start; - if (type == STR_USER) - str_source->rewind(pos + pos_add_next, str_source->context); GET_NEXT_WCHAR(); pos_start = pos; next_c_start = next_c; @@ -446,15 +438,11 @@ retry: if (len < 0) { - if (type == STR_USER) - result = str_source->compare((unsigned)so, (unsigned)pos, - (unsigned)bt_len, - str_source->context); #ifdef TRE_WCHAR - else if (type == STR_WIDE) + if (type == STR_WIDE) result = qse_wcszcmp((const qse_wchar_t*)string + so, str_wide - 1, (size_t)bt_len); -#endif /* TRE_WCHAR */ else +#endif /* TRE_WCHAR */ result = qse_mbszcmp((const char*)string + so, str_byte - 1, (size_t)bt_len); } else if (len - pos < bt_len) @@ -508,12 +496,7 @@ retry: /* Check for end of string. */ if (len < 0) { - if (type == STR_USER) - { - if (str_user_end) - goto backtrack; - } - else if (next_c == QSE_T('\0')) + if (next_c == QSE_T('\0')) goto backtrack; } else @@ -533,8 +516,8 @@ retry: trans_i->code_min, trans_i->code_max, trans_i->code_min, trans_i->code_max, trans_i->assertions, trans_i->state_id)); - if (trans_i->code_min <= (tre_cint_t)prev_c && - trans_i->code_max >= (tre_cint_t)prev_c) + + if (trans_i->code_min <= (tre_cint_t)prev_c && trans_i->code_max >= (tre_cint_t)prev_c) { if (trans_i->assertions && (CHECK_ASSERTIONS(trans_i->assertions) diff --git a/qse/lib/cmn/tre-match-parallel.c b/qse/lib/cmn/tre-match-parallel.c index b54ab40f..cb855450 100644 --- a/qse/lib/cmn/tre-match-parallel.c +++ b/qse/lib/cmn/tre-match-parallel.c @@ -325,12 +325,7 @@ tre_tnfa_run_parallel(qse_mmgr_t* mmgr, const tre_tnfa_t *tnfa, const void *stri /* Check for end of string. */ if (len < 0) { - if (type == STR_USER) - { - if (str_user_end) - break; - } - else if (next_c == QSE_T('\0')) + if (next_c == QSE_T('\0')) break; } else @@ -408,28 +403,28 @@ tre_tnfa_run_parallel(qse_mmgr_t* mmgr, const tre_tnfa_t *tnfa, const void *stri for (trans_i = reach_i->state; trans_i->state; trans_i++) { /* Does this transition match the input symbol? */ - if (trans_i->code_min <= (tre_cint_t)prev_c && - trans_i->code_max >= (tre_cint_t)prev_c) + if (trans_i->code_min <= (tre_cint_t)prev_c && trans_i->code_max >= (tre_cint_t)prev_c) { - if (trans_i->assertions - && (CHECK_ASSERTIONS(trans_i->assertions) - || CHECK_CHAR_CLASSES(trans_i, tnfa, eflags))) + if (trans_i->assertions && + (CHECK_ASSERTIONS(trans_i->assertions) || + CHECK_CHAR_CLASSES(trans_i, tnfa, eflags))) { DPRINT(("assertion failed\n")); continue; } /* Compute the tags after this transition. */ - for (i = 0; i < num_tags; i++) - tmp_tags[i] = reach_i->tags[i]; + for (i = 0; i < num_tags; i++) tmp_tags[i] = reach_i->tags[i]; tag_i = trans_i->tags; if (tag_i != NULL) + { while (*tag_i >= 0) { if (*tag_i < num_tags) tmp_tags[*tag_i] = pos; tag_i++; } + } if (reach_pos[trans_i->state_id].pos < pos) { @@ -442,15 +437,12 @@ tre_tnfa_run_parallel(qse_mmgr_t* mmgr, const tre_tnfa_t *tnfa, const void *stri reach_pos[trans_i->state_id].tags = &reach_next_i->tags; if (reach_next_i->state == tnfa->final - && (match_eo == -1 - || (num_tags > 0 - && reach_next_i->tags[0] <= match_tags[0]))) + && (match_eo == -1 || (num_tags > 0 && reach_next_i->tags[0] <= match_tags[0]))) { DPRINT((" found match %p\n", trans_i->state)); match_eo = pos; new_match = 1; - for (i = 0; i < num_tags; i++) - match_tags[i] = reach_next_i->tags[i]; + for (i = 0; i < num_tags; i++) match_tags[i] = reach_next_i->tags[i]; } reach_next_i++; diff --git a/qse/lib/cmn/tre-match-utils.h b/qse/lib/cmn/tre-match-utils.h index 88a939d1..9df2f265 100644 --- a/qse/lib/cmn/tre-match-utils.h +++ b/qse/lib/cmn/tre-match-utils.h @@ -52,8 +52,6 @@ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ -#define str_source ((const tre_str_source*)string) - #ifdef TRE_WCHAR #ifdef TRE_MULTIBYTE @@ -116,12 +114,6 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. } \ } \ } \ - else if (type == STR_USER) \ - { \ - pos += pos_add_next; \ - str_user_end = str_source->get_next_char(&next_c, &pos_add_next, \ - str_source->context); \ - } \ } while(/*CONSTCOND*/0) #else /* !TRE_MULTIBYTE */ @@ -143,11 +135,6 @@ do { \ if (len >= 0 && pos >= len) next_c = QSE_T('\0'); \ else next_c = *str_wide++; \ } \ - else if (type == STR_USER) \ - { \ - pos += pos_add_next; \ - str_user_end = str_source->get_next_char(&next_c, &pos_add_next, str_source->context); \ - } \ } while(/*CONSTCOND*/0) #endif /* !TRE_MULTIBYTE */ @@ -156,24 +143,16 @@ do { \ /* No wide character or multibyte support. */ -#define GET_NEXT_WCHAR() \ - do { \ - prev_c = next_c; \ - if (type == STR_BYTE) \ - { \ - pos++; \ - if (len >= 0 && pos >= len) \ - next_c = '\0'; \ - else \ - next_c = (unsigned char)(*str_byte++); \ - } \ - else if (type == STR_USER) \ - { \ - pos += pos_add_next; \ - str_user_end = str_source->get_next_char(&next_c, &pos_add_next, \ - str_source->context); \ - } \ - } while(/*CONSTCOND*/0) +#define GET_NEXT_WCHAR() \ + do { \ + prev_c = next_c; \ + if (type == STR_BYTE) \ + { \ + pos++; \ + if (len >= 0 && pos >= len) next_c = '\0'; \ + else next_c = (unsigned char)(*str_byte++); \ + } \ + } while(/*CONSTCOND*/0) #endif /* !TRE_WCHAR */ diff --git a/qse/lib/cmn/tre-parse.c b/qse/lib/cmn/tre-parse.c index 9ad387db..4e5f39a2 100644 --- a/qse/lib/cmn/tre-parse.c +++ b/qse/lib/cmn/tre-parse.c @@ -138,8 +138,7 @@ tre_expand_macro(const tre_char_t *regex, const tre_char_t *regex_end, } static reg_errcode_t -tre_new_item(tre_mem_t mem, int min, int max, int *i, int *max_i, - tre_ast_node_t ***items) +tre_new_item(tre_mem_t mem, int min, int max, int *i, int *max_i, tre_ast_node_t ***items) { reg_errcode_t status; tre_ast_node_t **array = *items; @@ -306,8 +305,7 @@ tre_parse_bracket_items(tre_parse_ctx_t *ctx, int negate, /* END QSE */ if (status == REG_OK) { - status = tre_expand_ctype(ctx->mem, class, items, - &i, &max_i, ctx->cflags); + status = tre_expand_ctype(ctx->mem, class, items, &i, &max_i, ctx->cflags); class = (tre_ctype_t)0; skip = 1; } @@ -328,25 +326,25 @@ tre_parse_bracket_items(tre_parse_ctx_t *ctx, int negate, min = max = *re++; } - if (status != REG_OK) - break; + if (status != REG_OK) break; if (class && negate) + { if (*num_neg_classes >= MAX_NEG_CLASSES) status = REG_ESPACE; else neg_classes[(*num_neg_classes)++] = class; + } else if (!skip) { status = tre_new_item(ctx->mem, min, max, &i, &max_i, items); - if (status != REG_OK) - break; + if (status != REG_OK) break; ((tre_literal_t*)((*items)[i-1])->obj)->u.class = class; } /* Add opposite-case counterpoints if REG_ICASE is present. This is broken if there are more than two "same" characters. */ - if (ctx->cflags & REG_ICASE && !class && status == REG_OK && !skip) + if ((ctx->cflags & REG_ICASE) && !class && status == REG_OK && !skip) { tre_cint_t cmin, ccurr; @@ -356,27 +354,21 @@ tre_parse_bracket_items(tre_parse_ctx_t *ctx, int negate, if (tre_islower(min)) { cmin = ccurr = tre_toupper(min++); - while (tre_islower(min) && tre_toupper(min) == ccurr + 1 - && min <= max) + while (tre_islower(min) && tre_toupper(min) == ccurr + 1 && min <= max) ccurr = tre_toupper(min++); - status = tre_new_item(ctx->mem, cmin, ccurr, - &i, &max_i, items); + status = tre_new_item(ctx->mem, cmin, ccurr, &i, &max_i, items); } else if (tre_isupper(min)) { cmin = ccurr = tre_tolower(min++); - while (tre_isupper(min) && tre_tolower(min) == ccurr + 1 - && min <= max) + while (tre_isupper(min) && tre_tolower(min) == ccurr + 1 && min <= max) ccurr = tre_tolower(min++); - status = tre_new_item(ctx->mem, cmin, ccurr, - &i, &max_i, items); + status = tre_new_item(ctx->mem, cmin, ccurr, &i, &max_i, items); } else min++; - if (status != REG_OK) - break; + if (status != REG_OK) break; } - if (status != REG_OK) - break; + if (status != REG_OK) break; } } } @@ -399,8 +391,7 @@ tre_parse_bracket(tre_parse_ctx_t *ctx, tre_ast_node_t **result) /* Start off with an array of `max_i' elements. */ items = xmalloc(ctx->mem->mmgr, sizeof(*items) * max_i); - if (items == NULL) - return REG_ESPACE; + if (items == NULL) return REG_ESPACE; if (*ctx->re == CHAR_CARET) { @@ -409,15 +400,11 @@ tre_parse_bracket(tre_parse_ctx_t *ctx, tre_ast_node_t **result) ctx->re++; } - status = tre_parse_bracket_items(ctx, negate, neg_classes, &num_neg_classes, - &items, &i, &max_i); - - if (status != REG_OK) - goto parse_bracket_done; + status = tre_parse_bracket_items(ctx, negate, neg_classes, &num_neg_classes, &items, &i, &max_i); + if (status != REG_OK) goto parse_bracket_done; /* Sort the array if we need to negate it. */ - if (negate) - qse_qsort(items, (unsigned)i, sizeof(*items), tre_compare_items, QSE_NULL); + if (negate) qse_qsort(items, (unsigned)i, sizeof(*items), tre_compare_items, QSE_NULL); curr_max = curr_min = 0; /* Build a union of the items in the array, negated if necessary. */ @@ -466,22 +453,23 @@ tre_parse_bracket(tre_parse_ctx_t *ctx, tre_ast_node_t **result) l->position = ctx->position; if (num_neg_classes > 0) { - l->neg_classes = tre_mem_alloc(ctx->mem, - (sizeof(l->neg_classes) - * (num_neg_classes + 1))); + l->neg_classes = tre_mem_alloc(ctx->mem, (sizeof(l->neg_classes) * (num_neg_classes + 1))); if (l->neg_classes == NULL) { status = REG_ESPACE; break; } - for (k = 0; k < num_neg_classes; k++) - l->neg_classes[k] = neg_classes[k]; + for (k = 0; k < num_neg_classes; k++) l->neg_classes[k] = neg_classes[k]; l->neg_classes[k] = (tre_ctype_t)0; } else + { l->neg_classes = NULL; + } if (node == NULL) + { node = items[j]; + } else { u = tre_ast_new_union(ctx->mem, node, items[j]); @@ -492,16 +480,17 @@ tre_parse_bracket(tre_parse_ctx_t *ctx, tre_ast_node_t **result) } } - if (status != REG_OK) - goto parse_bracket_done; + if (status != REG_OK) goto parse_bracket_done; if (negate) { int k; DPRINT(("final: creating %d - %d\n", curr_min, (int)TRE_CHAR_MAX)); n = tre_ast_new_literal(ctx->mem, curr_min, TRE_CHAR_MAX, ctx->position); - if (n == NULL) + if (n == NULL) + { status = REG_ESPACE; + } else { tre_literal_t *l = n->obj; @@ -520,21 +509,23 @@ tre_parse_bracket(tre_parse_ctx_t *ctx, tre_ast_node_t **result) l->neg_classes[k] = (tre_ctype_t)0; } else + { l->neg_classes = NULL; + } if (node == NULL) + { node = n; + } else { u = tre_ast_new_union(ctx->mem, node, n); - if (u == NULL) - status = REG_ESPACE; + if (u == NULL) status = REG_ESPACE; node = u; } } } - if (status != REG_OK) - goto parse_bracket_done; + if (status != REG_OK) goto parse_bracket_done; #ifdef TRE_DEBUG tre_ast_print(node); @@ -814,8 +805,7 @@ tre_parse_bound(tre_parse_ctx_t *ctx, tre_ast_node_t **result) if (min == 0 && max == 0) { *result = tre_ast_new_literal(ctx->mem, EMPTY, -1, -1); - if (*result == NULL) - return REG_ESPACE; + if (*result == NULL) return REG_ESPACE; } else { @@ -909,9 +899,7 @@ typedef enum PARSE_RESTORE_CFLAGS } tre_parse_re_stack_symbol_t; - -reg_errcode_t -tre_parse(tre_parse_ctx_t *ctx) +reg_errcode_t tre_parse(tre_parse_ctx_t *ctx) { tre_ast_node_t *result = NULL; tre_parse_re_stack_symbol_t symbol; @@ -941,8 +929,8 @@ tre_parse(tre_parse_ctx_t *ctx) call stack, and efficiency (both in lines of code and speed). */ while (tre_stack_num_objects(stack) > bottom && status == REG_OK) { - if (status != REG_OK) - break; + if (status != REG_OK) break; + symbol = tre_stack_pop_int(stack); switch (symbol) { @@ -978,8 +966,8 @@ tre_parse(tre_parse_ctx_t *ctx) /* If the expression has not ended, parse another piece. */ { tre_char_t c; - if (ctx->re >= ctx->re_end) - break; + + if (ctx->re >= ctx->re_end) break; c = *ctx->re; #ifdef REG_LITERAL if (!(ctx->cflags & REG_LITERAL)) @@ -1025,281 +1013,281 @@ tre_parse(tre_parse_ctx_t *ctx) STACK_PUSHX(stack, int, PARSE_PIECE); } break; - } - - case PARSE_POST_CATENATION: - { - tre_ast_node_t *tree = tre_stack_pop_voidptr(stack); - tre_ast_node_t *tmp_node; - tmp_node = tre_ast_new_catenation(ctx->mem, tree, result); - if (!tmp_node) - return REG_ESPACE; - result = tmp_node; - break; - } - - case PARSE_UNION: - if (ctx->re >= ctx->re_end) - break; -#ifdef REG_LITERAL - if (ctx->cflags & REG_LITERAL) - break; -#endif /* REG_LITERAL */ - switch (*ctx->re) - { - case CHAR_PIPE: - DPRINT(("tre_parse: union: '%.*" STRF "'\n", - REST(ctx->re))); - STACK_PUSHX(stack, int, PARSE_UNION); - STACK_PUSHX(stack, voidptr, result); - STACK_PUSHX(stack, int, PARSE_POST_UNION); - STACK_PUSHX(stack, int, PARSE_BRANCH); - ctx->re++; - break; - - case CHAR_RPAREN: - ctx->re++; - break; - - default: - break; } - break; - - case PARSE_POST_UNION: - { - tre_ast_node_t *tmp_node; - tre_ast_node_t *tree = tre_stack_pop_voidptr(stack); - tmp_node = tre_ast_new_union(ctx->mem, tree, result); - if (!tmp_node) - return REG_ESPACE; - result = tmp_node; - break; - } - - case PARSE_POSTFIX: - /* Parse postfix operators. */ - if (ctx->re >= ctx->re_end) - break; -#ifdef REG_LITERAL - if (ctx->cflags & REG_LITERAL) - break; -#endif /* REG_LITERAL */ - switch (*ctx->re) - { - case CHAR_PLUS: - case CHAR_QUESTIONMARK: - if (!(ctx->cflags & REG_EXTENDED)) - break; - /*FALLTHROUGH*/ - case CHAR_STAR: -/* QSE - added this label */ -parse_star: -/* END QSE */ + + case PARSE_POST_CATENATION: { + tre_ast_node_t *tree = tre_stack_pop_voidptr(stack); tre_ast_node_t *tmp_node; - int minimal = (ctx->cflags & REG_UNGREEDY) ? 1 : 0; - int rep_min = 0; - int rep_max = -1; -#ifdef TRE_DEBUG - const tre_char_t *tmp_re; -#endif - - if (*ctx->re == CHAR_PLUS) /* QSE: case CHAR_PLUS fell through down here */ - rep_min = 1; - if (*ctx->re == CHAR_QUESTIONMARK) /* QSE: case CHAR_QUESTIONMARK fell though down here */ - rep_max = 1; -#ifdef TRE_DEBUG - tmp_re = ctx->re; -#endif - - if (ctx->re + 1 < ctx->re_end) - { - if (*(ctx->re + 1) == CHAR_QUESTIONMARK) /* QSE: +?, ??, *? */ - { - minimal = !(ctx->cflags & REG_UNGREEDY); - ctx->re++; - } -/* QSE - TRE has provisions for ** or *+ as a special repetition operator. - * however, that seems to break backward compatibility. - * '+' in 'a*+' is not treated as a normal character with the - * following block enabled. So let me comment it out */ -#if 0 - else if (*(ctx->re + 1) == CHAR_STAR - || *(ctx->re + 1) == CHAR_PLUS) - { - /* These are reserved for future extensions. */ - return REG_BADRPT; - } -#endif - } - - DPRINT(("tre_parse: %s star: '%.*" STRF "'\n", - minimal ? " minimal" : "greedy", REST(tmp_re))); - ctx->re++; - tmp_node = tre_ast_new_iter(ctx->mem, result, rep_min, rep_max, - minimal); - if (tmp_node == NULL) + tmp_node = tre_ast_new_catenation(ctx->mem, tree, result); + if (!tmp_node) return REG_ESPACE; result = tmp_node; - STACK_PUSHX(stack, int, PARSE_POSTFIX); + break; } - break; - - case CHAR_BACKSLASH: - /* "\{" is special without REG_EXTENDED */ - /* QSE - also handle \+ and \? */ - /* - if (!(ctx->cflags & REG_EXTENDED) - && ctx->re + 1 < ctx->re_end - && *(ctx->re + 1) == CHAR_LBRACE) + + case PARSE_UNION: + if (ctx->re >= ctx->re_end) break; + #ifdef REG_LITERAL + if (ctx->cflags & REG_LITERAL) break; + #endif /* REG_LITERAL */ + switch (*ctx->re) { + case CHAR_PIPE: + DPRINT(("tre_parse: union: '%.*" STRF "'\n", + REST(ctx->re))); + STACK_PUSHX(stack, int, PARSE_UNION); + STACK_PUSHX(stack, voidptr, result); + STACK_PUSHX(stack, int, PARSE_POST_UNION); + STACK_PUSHX(stack, int, PARSE_BRANCH); ctx->re++; - goto parse_brace; - } - else break; - */ - if (!(ctx->cflags & REG_EXTENDED) && ctx->re + 1 < ctx->re_end) + + case CHAR_RPAREN: + ctx->re++; + break; + + default: + break; + } + break; + + case PARSE_POST_UNION: + { + tre_ast_node_t *tmp_node; + tre_ast_node_t *tree = tre_stack_pop_voidptr(stack); + tmp_node = tre_ast_new_union(ctx->mem, tree, result); + if (!tmp_node) + return REG_ESPACE; + result = tmp_node; + break; + } + + case PARSE_POSTFIX: + /* Parse postfix operators. */ + if (ctx->re >= ctx->re_end) + break; + #ifdef REG_LITERAL + if (ctx->cflags & REG_LITERAL) + break; + #endif /* REG_LITERAL */ + switch (*ctx->re) { - if (*(ctx->re + 1) == CHAR_LBRACE) + case CHAR_PLUS: + case CHAR_QUESTIONMARK: + if (!(ctx->cflags & REG_EXTENDED)) break; + /*FALLTHROUGH*/ + case CHAR_STAR: + /* QSE - added this label */ + parse_star: + /* END QSE */ + { + tre_ast_node_t *tmp_node; + int minimal = (ctx->cflags & REG_UNGREEDY) ? 1 : 0; + int rep_min = 0; + int rep_max = -1; + #ifdef TRE_DEBUG + const tre_char_t *tmp_re; + #endif + + if (*ctx->re == CHAR_PLUS) /* QSE: case CHAR_PLUS fell through down here */ + rep_min = 1; + if (*ctx->re == CHAR_QUESTIONMARK) /* QSE: case CHAR_QUESTIONMARK fell though down here */ + rep_max = 1; + #ifdef TRE_DEBUG + tmp_re = ctx->re; + #endif + + if (ctx->re + 1 < ctx->re_end) + { + if (*(ctx->re + 1) == CHAR_QUESTIONMARK) /* QSE: +?, ??, *? */ + { + minimal = !(ctx->cflags & REG_UNGREEDY); + ctx->re++; + } + /* QSE - TRE has provisions for ** or *+ as a special repetition operator. + * however, that seems to break backward compatibility. + * '+' in 'a*+' is not treated as a normal character with the + * following block enabled. So let me comment it out */ + #if 0 + else if (*(ctx->re + 1) == CHAR_STAR + || *(ctx->re + 1) == CHAR_PLUS) + { + /* These are reserved for future extensions. */ + return REG_BADRPT; + } + #endif + } + + DPRINT(("tre_parse: %s star: '%.*" STRF "'\n", + minimal ? " minimal" : "greedy", REST(tmp_re))); + ctx->re++; + tmp_node = tre_ast_new_iter(ctx->mem, result, rep_min, rep_max, + minimal); + if (tmp_node == NULL) + return REG_ESPACE; + result = tmp_node; + STACK_PUSHX(stack, int, PARSE_POSTFIX); + + break; + } + + case CHAR_BACKSLASH: + /* "\{" is special without REG_EXTENDED */ + /* QSE - also handle \+ and \? */ + /* + if (!(ctx->cflags & REG_EXTENDED) + && ctx->re + 1 < ctx->re_end + && *(ctx->re + 1) == CHAR_LBRACE) { ctx->re++; goto parse_brace; } - else if (*(ctx->re + 1) == CHAR_PLUS || - *(ctx->re + 1) == CHAR_QUESTIONMARK) + else + break; + */ + if (!(ctx->cflags & REG_EXTENDED) && ctx->re + 1 < ctx->re_end) { - ctx->re++; - goto parse_star; + if (*(ctx->re + 1) == CHAR_LBRACE) + { + ctx->re++; + goto parse_brace; + } + else if (*(ctx->re + 1) == CHAR_PLUS || + *(ctx->re + 1) == CHAR_QUESTIONMARK) + { + ctx->re++; + goto parse_star; + } } - } - break; - /* END QSE */ - - - case CHAR_LBRACE: - /* "{" is literal without REG_EXTENDED */ - if (!(ctx->cflags & REG_EXTENDED)) break; - /* QSE */ - if (ctx->cflags & REG_NOBOUND) break; - /* END QSE */ - -parse_brace: - DPRINT(("tre_parse: bound: '%.*" STRF "'\n", - REST(ctx->re))); - ctx->re++; - - status = tre_parse_bound(ctx, &result); - if (status != REG_OK) - return status; - STACK_PUSHX(stack, int, PARSE_POSTFIX); - break; - } - - break; - - case PARSE_ATOM: - /* Parse an atom. An atom is a regular expression enclosed in `()', - an empty set of `()', a bracket expression, `.', `^', `$', - a `\' followed by a character, or a single character. */ - - /* End of regexp? (empty string). */ - if (ctx->re >= ctx->re_end) - goto parse_literal; - -#ifdef REG_LITERAL - if (ctx->cflags & REG_LITERAL) - goto parse_literal; -#endif /* REG_LITERAL */ - - switch (*ctx->re) - { - case CHAR_LPAREN: /* parenthesized subexpression */ - - /* Handle "(?...)" extensions. They work in a way similar - to Perls corresponding extensions. */ - /* QSE: added ctx->cflags & REG_NONSTDEXT */ - if ((ctx->cflags & REG_NONSTDEXT) && - (ctx->cflags & REG_EXTENDED) && - *(ctx->re + 1) == CHAR_QUESTIONMARK) - { - int new_cflags = ctx->cflags; - int bit = 1; - DPRINT(("tre_parse: extension: '%.*" STRF "\n", + break; + /* END QSE */ + + + case CHAR_LBRACE: + /* "{" is literal without REG_EXTENDED */ + if (!(ctx->cflags & REG_EXTENDED)) break; + /* QSE */ + if (ctx->cflags & REG_NOBOUND) break; + /* END QSE */ + + parse_brace: + DPRINT(("tre_parse: bound: '%.*" STRF "'\n", REST(ctx->re))); - ctx->re += 2; - while (/*CONSTCOND*/1) + ctx->re++; + + status = tre_parse_bound(ctx, &result); + if (status != REG_OK) + return status; + STACK_PUSHX(stack, int, PARSE_POSTFIX); + break; + } + + break; + + case PARSE_ATOM: + + /* Parse an atom. An atom is a regular expression enclosed in `()', + an empty set of `()', a bracket expression, `.', `^', `$', + a `\' followed by a character, or a single character. */ + + /* End of regexp? (empty string). */ + if (ctx->re >= ctx->re_end) goto parse_literal; + + #ifdef REG_LITERAL + if (ctx->cflags & REG_LITERAL) goto parse_literal; + #endif /* REG_LITERAL */ + + switch (*ctx->re) + { + case CHAR_LPAREN: /* parenthesized subexpression */ + + /* Handle "(?...)" extensions. They work in a way similar + to Perls corresponding extensions. */ + /* QSE: added ctx->cflags & REG_NONSTDEXT */ + if ((ctx->cflags & REG_NONSTDEXT) && + (ctx->cflags & REG_EXTENDED) && + *(ctx->re + 1) == CHAR_QUESTIONMARK) { - if (*ctx->re == QSE_T('i')) + int new_cflags = ctx->cflags; + int bit = 1; + DPRINT(("tre_parse: extension: '%.*" STRF "\n", REST(ctx->re))); + ctx->re += 2; + while (/*CONSTCOND*/1) { - DPRINT(("tre_parse: icase: '%.*" STRF "\n", - REST(ctx->re))); - if (bit) - new_cflags |= REG_ICASE; - else - new_cflags &= ~REG_ICASE; - ctx->re++; - } - else if (*ctx->re == QSE_T('n')) - { - DPRINT(("tre_parse: newline: '%.*" STRF "\n", - REST(ctx->re))); - if (bit) - new_cflags |= REG_NEWLINE; - else - new_cflags &= ~REG_NEWLINE; - ctx->re++; - } -#ifdef REG_RIGHT_ASSOC - else if (*ctx->re == QSE_T('r')) - { - DPRINT(("tre_parse: right assoc: '%.*" STRF "\n", - REST(ctx->re))); - if (bit) - new_cflags |= REG_RIGHT_ASSOC; - else - new_cflags &= ~REG_RIGHT_ASSOC; - ctx->re++; - } -#endif /* REG_RIGHT_ASSOC */ -#ifdef REG_UNGREEDY - else if (*ctx->re == QSE_T('U')) - { - DPRINT(("tre_parse: ungreedy: '%.*" STRF "\n", - REST(ctx->re))); - if (bit) - new_cflags |= REG_UNGREEDY; - else - new_cflags &= ~REG_UNGREEDY; - ctx->re++; - } -#endif /* REG_UNGREEDY */ - else if (*ctx->re == CHAR_MINUS) - { - DPRINT(("tre_parse: turn off: '%.*" STRF "\n", - REST(ctx->re))); - ctx->re++; - bit = 0; - } - else if (*ctx->re == CHAR_COLON) - { - DPRINT(("tre_parse: no group: '%.*" STRF "\n", - REST(ctx->re))); - ctx->re++; - depth++; - break; - } - else if (*ctx->re == CHAR_HASH) - { - DPRINT(("tre_parse: comment: '%.*" STRF "\n", - REST(ctx->re))); - /* A comment can contain any character except a - right parenthesis */ - while (*ctx->re != CHAR_RPAREN - && ctx->re < ctx->re_end) + if (*ctx->re == QSE_T('i')) + { + DPRINT(("tre_parse: icase: '%.*" STRF "\n", REST(ctx->re))); + if (bit) + new_cflags |= REG_ICASE; + else + new_cflags &= ~REG_ICASE; ctx->re++; - if (*ctx->re == CHAR_RPAREN && ctx->re < ctx->re_end) + } + else if (*ctx->re == QSE_T('n')) + { + DPRINT(("tre_parse: newline: '%.*" STRF "\n", REST(ctx->re))); + if (bit) + new_cflags |= REG_NEWLINE; + else + new_cflags &= ~REG_NEWLINE; + ctx->re++; + } + #ifdef REG_RIGHT_ASSOC + else if (*ctx->re == QSE_T('r')) + { + DPRINT(("tre_parse: right assoc: '%.*" STRF "\n", REST(ctx->re))); + if (bit) + new_cflags |= REG_RIGHT_ASSOC; + else + new_cflags &= ~REG_RIGHT_ASSOC; + ctx->re++; + } + #endif /* REG_RIGHT_ASSOC */ + #ifdef REG_UNGREEDY + else if (*ctx->re == QSE_T('U')) + { + DPRINT(("tre_parse: ungreedy: '%.*" STRF "\n", REST(ctx->re))); + if (bit) + new_cflags |= REG_UNGREEDY; + else + new_cflags &= ~REG_UNGREEDY; + ctx->re++; + } + #endif /* REG_UNGREEDY */ + else if (*ctx->re == CHAR_MINUS) + { + DPRINT(("tre_parse: turn off: '%.*" STRF "\n", + REST(ctx->re))); + ctx->re++; + bit = 0; + } + else if (*ctx->re == CHAR_COLON) + { + DPRINT(("tre_parse: no group: '%.*" STRF "\n", + REST(ctx->re))); + ctx->re++; + depth++; + break; + } + else if (*ctx->re == CHAR_HASH) + { + DPRINT(("tre_parse: comment: '%.*" STRF "\n", + REST(ctx->re))); + /* A comment can contain any character except a + right parenthesis */ + while (*ctx->re != CHAR_RPAREN + && ctx->re < ctx->re_end) + ctx->re++; + if (*ctx->re == CHAR_RPAREN && ctx->re < ctx->re_end) + { + ctx->re++; + break; + } + else + return REG_BADPAT; + } + else if (*ctx->re == CHAR_RPAREN) { ctx->re++; break; @@ -1307,493 +1295,448 @@ parse_brace: else return REG_BADPAT; } - else if (*ctx->re == CHAR_RPAREN) + + /* Turn on the cflags changes for the rest of the + enclosing group. */ + STACK_PUSHX(stack, int, ctx->cflags); + STACK_PUSHX(stack, int, PARSE_RESTORE_CFLAGS); + STACK_PUSHX(stack, int, PARSE_RE); + ctx->cflags = new_cflags; + break; + } + + if (ctx->cflags & REG_EXTENDED + || (ctx->re > ctx->re_start + && *(ctx->re - 1) == CHAR_BACKSLASH)) + { + depth++; + /* QSE: added ctx->cflags & REG_NONSTDEXT */ + if ((ctx->cflags & REG_NONSTDEXT) && + ctx->re + 2 < ctx->re_end && + *(ctx->re + 1) == CHAR_QUESTIONMARK && + *(ctx->re + 2) == CHAR_COLON) { - ctx->re++; - break; + /* QSE: \(?: or (?: depending on REG_EXTENDED */ + DPRINT(("tre_parse: group begin: '%.*" STRF + "', no submatch\n", REST(ctx->re))); + /* Don't mark for submatching. */ + ctx->re += 3; + STACK_PUSHX(stack, int, PARSE_RE); } else - return REG_BADPAT; - } - - /* Turn on the cflags changes for the rest of the - enclosing group. */ - STACK_PUSHX(stack, int, ctx->cflags); - STACK_PUSHX(stack, int, PARSE_RESTORE_CFLAGS); - STACK_PUSHX(stack, int, PARSE_RE); - ctx->cflags = new_cflags; - break; - } - - if (ctx->cflags & REG_EXTENDED - || (ctx->re > ctx->re_start - && *(ctx->re - 1) == CHAR_BACKSLASH)) - { - depth++; - /* QSE: added ctx->cflags & REG_NONSTDEXT */ - if ((ctx->cflags & REG_NONSTDEXT) && - ctx->re + 2 < ctx->re_end && - *(ctx->re + 1) == CHAR_QUESTIONMARK && - *(ctx->re + 2) == CHAR_COLON) - { - /* QSE: \(?: or (?: depending on REG_EXTENDED */ - DPRINT(("tre_parse: group begin: '%.*" STRF - "', no submatch\n", REST(ctx->re))); - /* Don't mark for submatching. */ - ctx->re += 3; - STACK_PUSHX(stack, int, PARSE_RE); + { + DPRINT(("tre_parse: group begin: '%.*" STRF + "', submatch %d\n", REST(ctx->re), + ctx->submatch_id)); + ctx->re++; + /* First parse a whole RE, then mark the resulting tree + for submatching. */ + STACK_PUSHX(stack, int, ctx->submatch_id); + STACK_PUSHX(stack, int, PARSE_MARK_FOR_SUBMATCH); + STACK_PUSHX(stack, int, PARSE_RE); + ctx->submatch_id++; + } } else - { - DPRINT(("tre_parse: group begin: '%.*" STRF - "', submatch %d\n", REST(ctx->re), - ctx->submatch_id)); - ctx->re++; - /* First parse a whole RE, then mark the resulting tree - for submatching. */ - STACK_PUSHX(stack, int, ctx->submatch_id); - STACK_PUSHX(stack, int, PARSE_MARK_FOR_SUBMATCH); - STACK_PUSHX(stack, int, PARSE_RE); - ctx->submatch_id++; - } - } - else - goto parse_literal; - break; - - case CHAR_RPAREN: /* end of current subexpression */ - if ((ctx->cflags & REG_EXTENDED && depth > 0) - || (ctx->re > ctx->re_start - && *(ctx->re - 1) == CHAR_BACKSLASH)) - { - DPRINT(("tre_parse: empty: '%.*" STRF "'\n", - REST(ctx->re))); - /* We were expecting an atom, but instead the current - subexpression was closed. POSIX leaves the meaning of - this to be implementation-defined. We interpret this as - an empty expression (which matches an empty string). */ - result = tre_ast_new_literal(ctx->mem, EMPTY, -1, -1); - if (result == NULL) - return REG_ESPACE; - if (!(ctx->cflags & REG_EXTENDED)) - ctx->re--; - } - else - goto parse_literal; - break; - - case CHAR_LBRACKET: /* bracket expression */ - DPRINT(("tre_parse: bracket: '%.*" STRF "'\n", - REST(ctx->re))); - ctx->re++; - status = tre_parse_bracket(ctx, &result); - if (status != REG_OK) - return status; - break; - - case CHAR_BACKSLASH: - /* If this is "\(" or "\)" chew off the backslash and - try again. */ - if (!(ctx->cflags & REG_EXTENDED) - && ctx->re + 1 < ctx->re_end - && (*(ctx->re + 1) == CHAR_LPAREN - || *(ctx->re + 1) == CHAR_RPAREN)) - { - ctx->re++; - STACK_PUSHX(stack, int, PARSE_ATOM); + goto parse_literal; break; - } - - /* If a macro is used, parse the expanded macro recursively. */ - { - tre_char_t buf[64]; - tre_expand_macro(ctx->re + 1, ctx->re_end, - buf, QSE_COUNTOF(buf)); - if (buf[0] != 0) + + case CHAR_RPAREN: /* end of current subexpression */ + if ((ctx->cflags & REG_EXTENDED && depth > 0) + || (ctx->re > ctx->re_start + && *(ctx->re - 1) == CHAR_BACKSLASH)) { - tre_parse_ctx_t subctx; - QSE_MEMCPY (&subctx, ctx, sizeof(subctx)); - subctx.re = buf; - subctx.len = tre_strlen(buf); - subctx.nofirstsub = 1; - status = tre_parse(&subctx); - if (status != REG_OK) return status; + DPRINT(("tre_parse: empty: '%.*" STRF "'\n", REST(ctx->re))); + /* We were expecting an atom, but instead the current + subexpression was closed. POSIX leaves the meaning of + this to be implementation-defined. We interpret this as + an empty expression (which matches an empty string). */ + result = tre_ast_new_literal(ctx->mem, EMPTY, -1, -1); + if (result == NULL) return REG_ESPACE; + if (!(ctx->cflags & REG_EXTENDED)) ctx->re--; + } + else + goto parse_literal; + break; + + case CHAR_LBRACKET: /* bracket expression */ + DPRINT(("tre_parse: bracket: '%.*" STRF "'\n", REST(ctx->re))); + ctx->re++; + status = tre_parse_bracket(ctx, &result); + if (status != REG_OK) return status; + break; + + case CHAR_BACKSLASH: + /* If this is "\(" or "\)" chew off the backslash and + try again. */ + if (!(ctx->cflags & REG_EXTENDED) + && ctx->re + 1 < ctx->re_end + && (*(ctx->re + 1) == CHAR_LPAREN + || *(ctx->re + 1) == CHAR_RPAREN)) + { + ctx->re++; + STACK_PUSHX(stack, int, PARSE_ATOM); + break; + } + + /* If a macro is used, parse the expanded macro recursively. */ + { + tre_char_t buf[64]; + tre_expand_macro(ctx->re + 1, ctx->re_end, buf, QSE_COUNTOF(buf)); + if (buf[0] != 0) + { + tre_parse_ctx_t subctx; + QSE_MEMCPY (&subctx, ctx, sizeof(subctx)); + subctx.re = buf; + subctx.len = tre_strlen(buf); + subctx.nofirstsub = 1; + status = tre_parse(&subctx); + if (status != REG_OK) return status; + ctx->re += 2; + ctx->position = subctx.position; + result = subctx.result; + break; + } + } + + if (ctx->re + 1 >= ctx->re_end) + { + /* Trailing backslash. */ + return REG_EESCAPE; + } + + #ifdef REG_LITERAL + if (*(ctx->re + 1) == QSE_T('Q')) + { + DPRINT(("tre_parse: tmp literal: '%.*" STRF "'\n", + REST(ctx->re))); + ctx->cflags |= REG_LITERAL; + temporary_cflags |= REG_LITERAL; ctx->re += 2; - ctx->position = subctx.position; - result = subctx.result; + STACK_PUSHX(stack, int, PARSE_ATOM); break; } - } - - if (ctx->re + 1 >= ctx->re_end) - /* Trailing backslash. */ - return REG_EESCAPE; - -#ifdef REG_LITERAL - if (*(ctx->re + 1) == QSE_T('Q')) - { - DPRINT(("tre_parse: tmp literal: '%.*" STRF "'\n", - REST(ctx->re))); - ctx->cflags |= REG_LITERAL; - temporary_cflags |= REG_LITERAL; - ctx->re += 2; - STACK_PUSHX(stack, int, PARSE_ATOM); - break; - } -#endif /* REG_LITERAL */ - - DPRINT(("tre_parse: bleep: '%.*" STRF "'\n", REST(ctx->re))); - ctx->re++; - switch (*ctx->re) - { - case QSE_T('b'): - result = tre_ast_new_literal(ctx->mem, ASSERTION, - ASSERT_AT_WB, -1); + #endif /* REG_LITERAL */ + + DPRINT(("tre_parse: bleep: '%.*" STRF "'\n", REST(ctx->re))); ctx->re++; - break; - case QSE_T('B'): - result = tre_ast_new_literal(ctx->mem, ASSERTION, - ASSERT_AT_WB_NEG, -1); - ctx->re++; - break; - case QSE_T('<'): - result = tre_ast_new_literal(ctx->mem, ASSERTION, - ASSERT_AT_BOW, -1); - ctx->re++; - break; - case QSE_T('>'): - result = tre_ast_new_literal(ctx->mem, ASSERTION, - ASSERT_AT_EOW, -1); - ctx->re++; - break; - case QSE_T('x'): - ctx->re++; - if (ctx->re[0] != CHAR_LBRACE && ctx->re < ctx->re_end) + switch (*ctx->re) { - /* QSE */ - #if 0 - /* 8 bit hex char. */ - char tmp[3] = {0, 0, 0}; - long val; - DPRINT(("tre_parse: 8 bit hex: '%.*" STRF "'\n", - REST(ctx->re - 2))); - - if (tre_isxdigit(ctx->re[0]) && ctx->re < ctx->re_end) - { - tmp[0] = (char)ctx->re[0]; - ctx->re++; - } - if (tre_isxdigit(ctx->re[0]) && ctx->re < ctx->re_end) - { - tmp[1] = (char)ctx->re[0]; - ctx->re++; - } - val = strtol(tmp, NULL, 16); - #endif - long val = 0; - int tmp; - if ((tmp = xdigit_to_num(ctx->re[0])) >= 0 && ctx->re < ctx->re_end) - { - val = val * 16 + tmp; - ctx->re++; - } - if ((tmp = xdigit_to_num(ctx->re[1])) >= 0 && ctx->re < ctx->re_end) - { - val = val * 16 + tmp; - ctx->re++; - } - - result = tre_ast_new_literal(ctx->mem, (int)val, - (int)val, ctx->position); - ctx->position++; - break; - } - else if (ctx->re < ctx->re_end) - { - /* Wide char. */ - /* QSE */ - #if 0 - char tmp[32]; - long val; - int i = 0; + case QSE_T('b'): + result = tre_ast_new_literal(ctx->mem, ASSERTION, ASSERT_AT_WB, -1); ctx->re++; - while (ctx->re_end - ctx->re >= 0) + break; + case QSE_T('B'): + result = tre_ast_new_literal(ctx->mem, ASSERTION, ASSERT_AT_WB_NEG, -1); + ctx->re++; + break; + case QSE_T('<'): + result = tre_ast_new_literal(ctx->mem, ASSERTION, ASSERT_AT_BOW, -1); + ctx->re++; + break; + case QSE_T('>'): + result = tre_ast_new_literal(ctx->mem, ASSERTION, ASSERT_AT_EOW, -1); + ctx->re++; + break; + case QSE_T('x'): + ctx->re++; + if (ctx->re[0] != CHAR_LBRACE && ctx->re < ctx->re_end) { - if (ctx->re[0] == CHAR_RBRACE) - break; - if (tre_isxdigit(ctx->re[0])) + /* QSE */ + #if 0 + /* 8 bit hex char. */ + char tmp[3] = {0, 0, 0}; + long val; + DPRINT(("tre_parse: 8 bit hex: '%.*" STRF "'\n", + REST(ctx->re - 2))); + + if (tre_isxdigit(ctx->re[0]) && ctx->re < ctx->re_end) { - tmp[i] = (char)ctx->re[0]; - i++; + tmp[0] = (char)ctx->re[0]; ctx->re++; - continue; } - return REG_EBRACE; - } - ctx->re++; - tmp[i] = 0; - val = strtol(tmp, NULL, 16); - #endif - long val = 0; - int tmp; - - ctx->re++; - while (ctx->re_end - ctx->re >= 0) - { - if (ctx->re[0] == CHAR_RBRACE) - break; - tmp = xdigit_to_num(ctx->re[0]); - if (tmp >= 0) + if (tre_isxdigit(ctx->re[0]) && ctx->re < ctx->re_end) + { + tmp[1] = (char)ctx->re[0]; + ctx->re++; + } + val = strtol(tmp, NULL, 16); + #endif + long val = 0; + int tmp; + if ((tmp = xdigit_to_num(ctx->re[0])) >= 0 && ctx->re < ctx->re_end) { val = val * 16 + tmp; ctx->re++; - continue; } - return REG_EBRACE; + if ((tmp = xdigit_to_num(ctx->re[1])) >= 0 && ctx->re < ctx->re_end) + { + val = val * 16 + tmp; + ctx->re++; + } + + result = tre_ast_new_literal(ctx->mem, (int)val, (int)val, ctx->position); + ctx->position++; + break; + } + else if (ctx->re < ctx->re_end) + { + /* Wide char. */ + /* QSE */ + #if 0 + char tmp[32]; + long val; + int i = 0; + ctx->re++; + while (ctx->re_end - ctx->re >= 0) + { + if (ctx->re[0] == CHAR_RBRACE) + break; + if (tre_isxdigit(ctx->re[0])) + { + tmp[i] = (char)ctx->re[0]; + i++; + ctx->re++; + continue; + } + return REG_EBRACE; + } + ctx->re++; + tmp[i] = 0; + val = strtol(tmp, NULL, 16); + #endif + long val = 0; + int tmp; + + ctx->re++; + while (ctx->re_end - ctx->re >= 0) + { + if (ctx->re[0] == CHAR_RBRACE) + break; + tmp = xdigit_to_num(ctx->re[0]); + if (tmp >= 0) + { + val = val * 16 + tmp; + ctx->re++; + continue; + } + return REG_EBRACE; + } + + result = tre_ast_new_literal(ctx->mem, (int)val, (int)val, ctx->position); + ctx->position++; + break; + } + /*FALLTHROUGH*/ + + default: + if (tre_isdigit(*ctx->re)) + { + /* Back reference. */ + int val = *ctx->re - QSE_T('0'); + DPRINT(("tre_parse: backref: '%.*" STRF "'\n", REST(ctx->re - 1))); + result = tre_ast_new_literal(ctx->mem, BACKREF, val, ctx->position); + if (result == NULL) return REG_ESPACE; + ctx->position++; + ctx->max_backref = MAX(val, ctx->max_backref); + ctx->re++; + } + else + { + /* Escaped character. */ + DPRINT(("tre_parse: escaped: '%.*" STRF "'\n", REST(ctx->re - 1))); + result = tre_ast_new_literal(ctx->mem, *ctx->re, *ctx->re, ctx->position); + ctx->position++; + ctx->re++; } - - result = tre_ast_new_literal(ctx->mem, (int)val, (int)val, - ctx->position); - ctx->position++; break; } - /*FALLTHROUGH*/ - - default: - if (tre_isdigit(*ctx->re)) + if (result == NULL) + return REG_ESPACE; + break; + + case CHAR_PERIOD: /* the any-symbol */ + DPRINT(("tre_parse: any: '%.*" STRF "'\n", + REST(ctx->re))); + if (ctx->cflags & REG_NEWLINE) { - /* Back reference. */ - int val = *ctx->re - QSE_T('0'); - DPRINT(("tre_parse: backref: '%.*" STRF "'\n", - REST(ctx->re - 1))); - result = tre_ast_new_literal(ctx->mem, BACKREF, val, - ctx->position); - if (result == NULL) - return REG_ESPACE; - ctx->position++; - ctx->max_backref = MAX(val, ctx->max_backref); - ctx->re++; + tre_ast_node_t *tmp1; + tre_ast_node_t *tmp2; + /* exclude new line */ + tmp1 = tre_ast_new_literal(ctx->mem, 0, QSE_T('\n') - 1, ctx->position); + if (!tmp1) return REG_ESPACE; + tmp2 = tre_ast_new_literal(ctx->mem, QSE_T('\n') + 1, TRE_CHAR_MAX, ctx->position + 1); + if (!tmp2) return REG_ESPACE; + result = tre_ast_new_union(ctx->mem, tmp1, tmp2); + if (!result) return REG_ESPACE; + ctx->position += 2; } else { - /* Escaped character. */ - DPRINT(("tre_parse: escaped: '%.*" STRF "'\n", - REST(ctx->re - 1))); - result = tre_ast_new_literal(ctx->mem, *ctx->re, *ctx->re, ctx->position); + /* all characters */ + result = tre_ast_new_literal(ctx->mem, 0, TRE_CHAR_MAX, ctx->position); + if (!result) return REG_ESPACE; ctx->position++; + } + ctx->re++; + break; + + case CHAR_CARET: /* beginning of line assertion */ + /* '^' has a special meaning everywhere in EREs, and in the + beginning of the RE and after \( is BREs. */ + if (ctx->cflags & REG_EXTENDED + || (ctx->re - 2 >= ctx->re_start + && *(ctx->re - 2) == CHAR_BACKSLASH + && *(ctx->re - 1) == CHAR_LPAREN) + || ctx->re == ctx->re_start) + { + DPRINT(("tre_parse: BOL: '%.*" STRF "'\n", + REST(ctx->re))); + result = tre_ast_new_literal(ctx->mem, ASSERTION, ASSERT_AT_BOL, -1); + if (result == NULL) return REG_ESPACE; ctx->re++; } + else + goto parse_literal; break; - } - if (result == NULL) - return REG_ESPACE; - break; - - case CHAR_PERIOD: /* the any-symbol */ - DPRINT(("tre_parse: any: '%.*" STRF "'\n", - REST(ctx->re))); - if (ctx->cflags & REG_NEWLINE) - { - tre_ast_node_t *tmp1; - tre_ast_node_t *tmp2; - tmp1 = tre_ast_new_literal(ctx->mem, 0, QSE_T('\n') - 1, - ctx->position); - if (!tmp1) - return REG_ESPACE; - tmp2 = tre_ast_new_literal(ctx->mem, QSE_T('\n') + 1, TRE_CHAR_MAX, - ctx->position + 1); - if (!tmp2) - return REG_ESPACE; - result = tre_ast_new_union(ctx->mem, tmp1, tmp2); - if (!result) - return REG_ESPACE; - ctx->position += 2; - } - else - { - result = tre_ast_new_literal(ctx->mem, 0, TRE_CHAR_MAX, - ctx->position); - if (!result) - return REG_ESPACE; + + case CHAR_DOLLAR: /* end of line assertion. */ + /* '$' is special everywhere in EREs, and in the end of the + string and before \) is BREs. */ + if (ctx->cflags & REG_EXTENDED + || (ctx->re + 2 < ctx->re_end + && *(ctx->re + 1) == CHAR_BACKSLASH + && *(ctx->re + 2) == CHAR_RPAREN) + || ctx->re + 1 == ctx->re_end) + { + DPRINT(("tre_parse: EOL: '%.*" STRF "'\n", + REST(ctx->re))); + result = tre_ast_new_literal(ctx->mem, ASSERTION, ASSERT_AT_EOL, -1); + if (result == NULL) + return REG_ESPACE; + ctx->re++; + } + else + goto parse_literal; + break; + + default: + parse_literal: + + if (temporary_cflags && ctx->re + 1 < ctx->re_end + && *ctx->re == CHAR_BACKSLASH && *(ctx->re + 1) == QSE_T('E')) + { + DPRINT(("tre_parse: end tmps: '%.*" STRF "'\n", REST(ctx->re))); + ctx->cflags &= ~temporary_cflags; + temporary_cflags = 0; + ctx->re += 2; + STACK_PUSHX(stack, int, PARSE_PIECE); + break; + } + + + /* We are expecting an atom. If the subexpression (or the whole + regexp ends here, we interpret it as an empty expression + (which matches an empty string). */ + if ( + #ifdef REG_LITERAL + !(ctx->cflags & REG_LITERAL) && + #endif /* REG_LITERAL */ + (ctx->re >= ctx->re_end + || *ctx->re == CHAR_STAR + || (ctx->cflags & REG_EXTENDED + && (*ctx->re == CHAR_PIPE + /* QSE */ + /*|| *ctx->re == CHAR_LBRACE*/ + || (*ctx->re == CHAR_LBRACE && !(ctx->cflags & REG_NOBOUND)) + /* END QSE */ + || *ctx->re == CHAR_PLUS + || *ctx->re == CHAR_QUESTIONMARK)) + /* Test for "\)" in BRE mode. */ + || (!(ctx->cflags & REG_EXTENDED) + && ctx->re + 1 < ctx->re_end + && *ctx->re == CHAR_BACKSLASH + && *(ctx->re + 1) == CHAR_LBRACE))) + { + DPRINT(("tre_parse: empty: '%.*" STRF "'\n", REST(ctx->re))); + result = tre_ast_new_literal(ctx->mem, EMPTY, -1, -1); + if (!result) return REG_ESPACE; + break; + } + + DPRINT(("tre_parse: literal: '%.*" STRF "'\n", + REST(ctx->re))); + /* Note that we can't use an tre_isalpha() test here, since there + may be characters which are alphabetic but neither upper or + lower case. */ + if (ctx->cflags & REG_ICASE && (tre_isupper(*ctx->re) || tre_islower(*ctx->re))) + { + tre_ast_node_t *tmp1; + tre_ast_node_t *tmp2; + + /* XXX - Can there be more than one opposite-case + counterpoints for some character in some locale? Or + more than two characters which all should be regarded + the same character if case is ignored? If yes, there + does not seem to be a portable way to detect it. I guess + that at least for multi-character collating elements there + could be several opposite-case counterpoints, but they + cannot be supported portably anyway. */ + tmp1 = tre_ast_new_literal(ctx->mem, tre_toupper(*ctx->re), tre_toupper(*ctx->re), ctx->position); + if (!tmp1) return REG_ESPACE; + tmp2 = tre_ast_new_literal(ctx->mem, tre_tolower(*ctx->re), tre_tolower(*ctx->re), ctx->position); + if (!tmp2) return REG_ESPACE; + result = tre_ast_new_union(ctx->mem, tmp1, tmp2); + if (!result) return REG_ESPACE; + } + else + { + result = tre_ast_new_literal(ctx->mem, *ctx->re, *ctx->re, ctx->position); + if (!result) return REG_ESPACE; + } ctx->position++; - } - ctx->re++; - break; - - case CHAR_CARET: /* beginning of line assertion */ - /* '^' has a special meaning everywhere in EREs, and in the - beginning of the RE and after \( is BREs. */ - if (ctx->cflags & REG_EXTENDED - || (ctx->re - 2 >= ctx->re_start - && *(ctx->re - 2) == CHAR_BACKSLASH - && *(ctx->re - 1) == CHAR_LPAREN) - || ctx->re == ctx->re_start) - { - DPRINT(("tre_parse: BOL: '%.*" STRF "'\n", - REST(ctx->re))); - result = tre_ast_new_literal(ctx->mem, ASSERTION, - ASSERT_AT_BOL, -1); - if (result == NULL) - return REG_ESPACE; ctx->re++; - } - else - goto parse_literal; - break; - - case CHAR_DOLLAR: /* end of line assertion. */ - /* '$' is special everywhere in EREs, and in the end of the - string and before \) is BREs. */ - if (ctx->cflags & REG_EXTENDED - || (ctx->re + 2 < ctx->re_end - && *(ctx->re + 1) == CHAR_BACKSLASH - && *(ctx->re + 2) == CHAR_RPAREN) - || ctx->re + 1 == ctx->re_end) - { - DPRINT(("tre_parse: EOL: '%.*" STRF "'\n", - REST(ctx->re))); - result = tre_ast_new_literal(ctx->mem, ASSERTION, - ASSERT_AT_EOL, -1); - if (result == NULL) - return REG_ESPACE; - ctx->re++; - } - else - goto parse_literal; - break; - - default: -parse_literal: - - if (temporary_cflags && ctx->re + 1 < ctx->re_end - && *ctx->re == CHAR_BACKSLASH && *(ctx->re + 1) == QSE_T('E')) - { - DPRINT(("tre_parse: end tmps: '%.*" STRF "'\n", - REST(ctx->re))); - ctx->cflags &= ~temporary_cflags; - temporary_cflags = 0; - ctx->re += 2; - STACK_PUSHX(stack, int, PARSE_PIECE); break; } - - - /* We are expecting an atom. If the subexpression (or the whole - regexp ends here, we interpret it as an empty expression - (which matches an empty string). */ - if ( -#ifdef REG_LITERAL - !(ctx->cflags & REG_LITERAL) && -#endif /* REG_LITERAL */ - (ctx->re >= ctx->re_end - || *ctx->re == CHAR_STAR - || (ctx->cflags & REG_EXTENDED - && (*ctx->re == CHAR_PIPE - /* QSE */ - /*|| *ctx->re == CHAR_LBRACE*/ - || (*ctx->re == CHAR_LBRACE && !(ctx->cflags & REG_NOBOUND)) - /* END QSE */ - || *ctx->re == CHAR_PLUS - || *ctx->re == CHAR_QUESTIONMARK)) - /* Test for "\)" in BRE mode. */ - || (!(ctx->cflags & REG_EXTENDED) - && ctx->re + 1 < ctx->re_end - && *ctx->re == CHAR_BACKSLASH - && *(ctx->re + 1) == CHAR_LBRACE))) - { - DPRINT(("tre_parse: empty: '%.*" STRF "'\n", - REST(ctx->re))); - result = tre_ast_new_literal(ctx->mem, EMPTY, -1, -1); - if (!result) - return REG_ESPACE; - break; - } - - DPRINT(("tre_parse: literal: '%.*" STRF "'\n", - REST(ctx->re))); - /* Note that we can't use an tre_isalpha() test here, since there - may be characters which are alphabetic but neither upper or - lower case. */ - if (ctx->cflags & REG_ICASE - && (tre_isupper(*ctx->re) || tre_islower(*ctx->re))) - { - tre_ast_node_t *tmp1; - tre_ast_node_t *tmp2; - - /* XXX - Can there be more than one opposite-case - counterpoints for some character in some locale? Or - more than two characters which all should be regarded - the same character if case is ignored? If yes, there - does not seem to be a portable way to detect it. I guess - that at least for multi-character collating elements there - could be several opposite-case counterpoints, but they - cannot be supported portably anyway. */ - tmp1 = tre_ast_new_literal(ctx->mem, tre_toupper(*ctx->re), - tre_toupper(*ctx->re), - ctx->position); - if (!tmp1) - return REG_ESPACE; - tmp2 = tre_ast_new_literal(ctx->mem, tre_tolower(*ctx->re), - tre_tolower(*ctx->re), - ctx->position); - if (!tmp2) - return REG_ESPACE; - result = tre_ast_new_union(ctx->mem, tmp1, tmp2); - if (!result) - return REG_ESPACE; - } - else - { - result = tre_ast_new_literal(ctx->mem, *ctx->re, *ctx->re, - ctx->position); - if (!result) - return REG_ESPACE; - } - ctx->position++; - ctx->re++; break; - } - break; - - case PARSE_MARK_FOR_SUBMATCH: - { - int submatch_id = tre_stack_pop_int(stack); - - if (result->submatch_id >= 0) + + case PARSE_MARK_FOR_SUBMATCH: { - tre_ast_node_t *n, *tmp_node; - n = tre_ast_new_literal(ctx->mem, EMPTY, -1, -1); - if (n == NULL) - return REG_ESPACE; - tmp_node = tre_ast_new_catenation(ctx->mem, n, result); - if (tmp_node == NULL) - return REG_ESPACE; - tmp_node->num_submatches = result->num_submatches; - result = tmp_node; + int submatch_id = tre_stack_pop_int(stack); + + if (result->submatch_id >= 0) + { + tre_ast_node_t *n, *tmp_node; + n = tre_ast_new_literal(ctx->mem, EMPTY, -1, -1); + if (n == NULL) return REG_ESPACE; + tmp_node = tre_ast_new_catenation(ctx->mem, n, result); + if (tmp_node == NULL) return REG_ESPACE; + tmp_node->num_submatches = result->num_submatches; + result = tmp_node; + } + result->submatch_id = submatch_id; + result->num_submatches++; + break; + } + + case PARSE_RESTORE_CFLAGS: + ctx->cflags = tre_stack_pop_int(stack); + break; + + default: + assert(0); + break; } - result->submatch_id = submatch_id; - result->num_submatches++; - break; } - - case PARSE_RESTORE_CFLAGS: - ctx->cflags = tre_stack_pop_int(stack); - break; - - default: - assert(0); - break; - } -} - -/* Check for missing closing parentheses. */ -if (depth > 0) - return REG_EPAREN; - -if (status == REG_OK) - ctx->result = result; - -return status; + + /* Check for missing closing parentheses. */ + if (depth > 0) + return REG_EPAREN; + + if (status == REG_OK) + ctx->result = result; + + return status; } /* EOF */ diff --git a/qse/lib/cmn/tre.c b/qse/lib/cmn/tre.c index 1e2afdff..4e93242b 100644 --- a/qse/lib/cmn/tre.c +++ b/qse/lib/cmn/tre.c @@ -205,15 +205,6 @@ static int tre_match( if (tnfa->have_backrefs || (eflags & REG_BACKTRACKING_MATCHER)) { /* The regex has back references, use the backtracking matcher. */ - if (type == STR_USER) - { - const tre_str_source *source = string; - if (source->rewind == QSE_NULL || source->compare == QSE_NULL) - /* The backtracking matcher requires rewind and compare - capabilities from the input stream. */ - return REG_BADPAT; - } - status = tre_tnfa_run_backtrack ( preg->mmgr, tnfa, string, (int)len, type, tags, eflags, &eo); @@ -266,15 +257,6 @@ int qse_tre_exec ( return qse_tre_execx (tre, str, (qse_size_t)-1, pmatch, nmatch, eflags); } -#if 0 -int qse_tre_execsrc ( - const regex_t *preg, const tre_str_source *str, - qse_size_t nmatch, regmatch_t pmatch[], int eflags) -{ - return tre_match (preg, str, (unsigned)-1, STR_USER, nmatch, pmatch, eflags); -} -#endif - qse_tre_errnum_t qse_tre_geterrnum (qse_tre_t* tre) { return tre->errnum; diff --git a/qse/lib/cmn/tre.h b/qse/lib/cmn/tre.h index 8d5bb03b..a6ee67a3 100644 --- a/qse/lib/cmn/tre.h +++ b/qse/lib/cmn/tre.h @@ -177,7 +177,6 @@ typedef qse_cint_t tre_cint_t; #define regex_t qse_tre_t #define regmatch_t qse_tre_match_t #define reg_errcode_t qse_tre_errnum_t -#define tre_str_source qse_tre_strsrc_t #define REG_OK QSE_TRE_ENOERR @@ -278,7 +277,7 @@ typedef qse_pma_t* tre_mem_t; typedef qse_ctype_t tre_ctype_t; #define tre_isctype(c,t) QSE_ISCTYPE(c,t) -typedef enum { STR_WIDE, STR_BYTE, STR_MBS, STR_USER } tre_str_type_t; +typedef enum { STR_WIDE, STR_BYTE, STR_MBS } tre_str_type_t; /* Returns number of bytes to add to (char *)ptr to make it properly aligned for the type. */ @@ -305,6 +304,9 @@ typedef struct tnfa_transition tre_tnfa_transition_t; struct tnfa_transition { /* Range of accepted characters. */ + /* QSE indicate that code_min .. code_max is not yet negated for ^ in a bracket */ + int negate_range; + /* END QSE */ tre_cint_t code_min; tre_cint_t code_max; /* Pointer to the destination state. */