changed awk to handle IGNORECASE with a regular expression engine that doesn't have a run-time option but has a compile-time option

This commit is contained in:
hyung-hwan 2013-08-23 15:19:29 +00:00
parent 47677ca566
commit d841c9f62f
21 changed files with 1127 additions and 1236 deletions

View File

@ -110,21 +110,21 @@ typedef struct qse_awk_loc_t qse_awk_loc_t;
* Three common fields are: * Three common fields are:
* - type - type of a value from #qse_awk_val_type_t * - type - type of a value from #qse_awk_val_type_t
* - ref - reference count * - ref - reference count
* - nstr - numeric string marker * - stat - static value
* - nstr - numeric string marker, 1 -> long, 2 -> real
*/ */
#if QSE_SIZEOF_INT == 2 /*
# define QSE_AWK_VAL_HDR \
unsigned int type: 3; \
unsigned int ref: 10; \
unsigned int stat: 1; \
unsigned int nstr: 2
#else
#define QSE_AWK_VAL_HDR \ #define QSE_AWK_VAL_HDR \
unsigned int type: 3; \ unsigned int type: 3; \
unsigned int ref: 26; \ unsigned int ref: 26; \
unsigned int stat: 1; \ unsigned int stat: 1; \
unsigned int nstr: 2 unsigned int nstr: 2;
#endif */
#define QSE_AWK_VAL_HDR \
qse_uintptr_t type: 3; \
qse_uintptr_t ref: ((QSE_SIZEOF_UINTPTR_T * 8) - 6); \
qse_uintptr_t stat: 1; \
qse_uintptr_t nstr: 2;
/** /**
* The qse_awk_val_t type is an abstract value type. A value commonly contains: * The qse_awk_val_t type is an abstract value type. A value commonly contains:
@ -191,7 +191,7 @@ struct qse_awk_val_rex_t
{ {
QSE_AWK_VAL_HDR; QSE_AWK_VAL_HDR;
qse_xstr_t str; qse_xstr_t str;
void* code; void* code[2];
}; };
typedef struct qse_awk_val_rex_t qse_awk_val_rex_t; typedef struct qse_awk_val_rex_t qse_awk_val_rex_t;
@ -2388,7 +2388,7 @@ QSE_EXPORT qse_awk_val_t* qse_awk_rtx_makenstrvalwithcstr (
QSE_EXPORT qse_awk_val_t* qse_awk_rtx_makerexval ( QSE_EXPORT qse_awk_val_t* qse_awk_rtx_makerexval (
qse_awk_rtx_t* rtx, qse_awk_rtx_t* rtx,
const qse_cstr_t* str, const qse_cstr_t* str,
void* code void* code[2]
); );
/** /**

View File

@ -99,15 +99,6 @@ enum qse_tre_eflag_t
QSE_TRE_NOTEOL = (1 << 2) QSE_TRE_NOTEOL = (1 << 2)
}; };
typedef struct qse_tre_strsrc_t qse_tre_strsrc_t;
struct qse_tre_strsrc_t
{
int (*get_next_char) (qse_char_t *c, unsigned int* pos_add, void* context);
void (*rewind)(qse_size_t pos, void *context);
int (*compare)(qse_size_t pos1, qse_size_t pos2, qse_size_t len, void* context);
void* context;
};
#ifdef __cplusplus #ifdef __cplusplus
extern "C" { extern "C" {
#endif #endif

View File

@ -307,8 +307,8 @@ struct qse_awk_rtx_t
struct struct
{ {
void* rs; void* rs[2];
void* fs; void* fs[2];
int ignorecase; int ignorecase;
qse_long_t nr; qse_long_t nr;

View File

@ -624,7 +624,7 @@ static int fnc_substr (qse_awk_rtx_t* rtx, const qse_awk_fnc_info_t* fi)
return 0; return 0;
} }
static int fnc_split (qse_awk_rtx_t* run, const qse_awk_fnc_info_t* fi) static int fnc_split (qse_awk_rtx_t* rtx, const qse_awk_fnc_info_t* fi)
{ {
qse_size_t nargs; qse_size_t nargs;
qse_awk_val_t* a0, * a1, * a2, * t1, * t2; qse_awk_val_t* a0, * a1, * a2, * t1, * t2;
@ -642,12 +642,12 @@ static int fnc_split (qse_awk_rtx_t* run, const qse_awk_fnc_info_t* fi)
qse_awk_errnum_t errnum; qse_awk_errnum_t errnum;
int x; int x;
nargs = qse_awk_rtx_getnargs (run); nargs = qse_awk_rtx_getnargs (rtx);
QSE_ASSERT (nargs >= 2 && nargs <= 3); QSE_ASSERT (nargs >= 2 && nargs <= 3);
a0 = qse_awk_rtx_getarg (run, 0); a0 = qse_awk_rtx_getarg (rtx, 0);
a1 = qse_awk_rtx_getarg (run, 1); a1 = qse_awk_rtx_getarg (rtx, 1);
a2 = (nargs >= 3)? qse_awk_rtx_getarg (run, 2): QSE_NULL; a2 = (nargs >= 3)? qse_awk_rtx_getarg (rtx, 2): QSE_NULL;
QSE_ASSERT (a1->type == QSE_AWK_VAL_REF); QSE_ASSERT (a1->type == QSE_AWK_VAL_REF);
@ -658,7 +658,7 @@ static int fnc_split (qse_awk_rtx_t* run, const qse_awk_fnc_info_t* fi)
} }
else else
{ {
str.ptr = qse_awk_rtx_valtostrdup (run, a0, &str.len); str.ptr = qse_awk_rtx_valtostrdup (rtx, a0, &str.len);
if (str.ptr == QSE_NULL) return -1; if (str.ptr == QSE_NULL) return -1;
str_free = (qse_char_t*)str.ptr; str_free = (qse_char_t*)str.ptr;
} }
@ -666,7 +666,7 @@ static int fnc_split (qse_awk_rtx_t* run, const qse_awk_fnc_info_t* fi)
if (a2 == QSE_NULL) if (a2 == QSE_NULL)
{ {
/* get the value from FS */ /* get the value from FS */
t1 = qse_awk_rtx_getgbl (run, QSE_AWK_GBL_FS); t1 = qse_awk_rtx_getgbl (rtx, QSE_AWK_GBL_FS);
if (t1->type == QSE_AWK_VAL_NIL) if (t1->type == QSE_AWK_VAL_NIL)
{ {
fs.ptr = QSE_T(" "); fs.ptr = QSE_T(" ");
@ -679,17 +679,17 @@ static int fnc_split (qse_awk_rtx_t* run, const qse_awk_fnc_info_t* fi)
} }
else else
{ {
fs.ptr = qse_awk_rtx_valtostrdup (run, t1, &fs.len); fs.ptr = qse_awk_rtx_valtostrdup (rtx, t1, &fs.len);
if (fs.ptr == QSE_NULL) goto oops; if (fs.ptr == QSE_NULL) goto oops;
fs_free = (qse_char_t*)fs.ptr; fs_free = (qse_char_t*)fs.ptr;
} }
if (fs.len > 1) fs_rex = run->gbl.fs; if (fs.len > 1) fs_rex = rtx->gbl.fs[rtx->gbl.ignorecase];
} }
else if (a2->type == QSE_AWK_VAL_REX) else if (a2->type == QSE_AWK_VAL_REX)
{ {
/* the third parameter is a regular expression */ /* the third parameter is a regular expression */
fs_rex = ((qse_awk_val_rex_t*)a2)->code; fs_rex = ((qse_awk_val_rex_t*)a2)->code[rtx->gbl.ignorecase];
/* make the loop below to take fs_rex by /* make the loop below to take fs_rex by
* setting fs_len greater than 1*/ * setting fs_len greater than 1*/
@ -705,30 +705,36 @@ static int fnc_split (qse_awk_rtx_t* run, const qse_awk_fnc_info_t* fi)
} }
else else
{ {
fs.ptr = qse_awk_rtx_valtostrdup (run, a2, &fs.len); fs.ptr = qse_awk_rtx_valtostrdup (rtx, a2, &fs.len);
if (fs.ptr == QSE_NULL) goto oops; if (fs.ptr == QSE_NULL) goto oops;
fs_free = (qse_char_t*)fs.ptr; fs_free = (qse_char_t*)fs.ptr;
} }
if (fs.len > 1) if (fs.len > 1)
{ {
fs_rex = qse_awk_buildrex ( int x;
run->awk, fs.ptr, fs.len, &errnum);
if (fs_rex == QSE_NULL) if (rtx->gbl.ignorecase)
x = qse_awk_buildrex (rtx->awk, fs.ptr, fs.len, &errnum, QSE_NULL, &fs_rex);
else
x = qse_awk_buildrex (rtx->awk, fs.ptr, fs.len, &errnum, &fs_rex, QSE_NULL);
if (x <= -1)
{ {
qse_awk_rtx_seterrnum (run, errnum, QSE_NULL); qse_awk_rtx_seterrnum (rtx, errnum, QSE_NULL);
goto oops; goto oops;
} }
fs_rex_free = fs_rex; fs_rex_free = fs_rex;
} }
} }
t1 = qse_awk_rtx_makemapval (run); t1 = qse_awk_rtx_makemapval (rtx);
if (t1 == QSE_NULL) goto oops; if (t1 == QSE_NULL) goto oops;
qse_awk_rtx_refupval (run, t1); qse_awk_rtx_refupval (rtx, t1);
x = qse_awk_rtx_setrefval (run, a1, t1); x = qse_awk_rtx_setrefval (rtx, (qse_awk_val_ref_t*)a1, t1);
qse_awk_rtx_refdownval (run, t1); qse_awk_rtx_refdownval (rtx, t1);
if (x <= -1) goto oops; if (x <= -1) goto oops;
/* fill the map with actual values */ /* fill the map with actual values */
@ -742,18 +748,18 @@ static int fnc_split (qse_awk_rtx_t* run, const qse_awk_fnc_info_t* fi)
if (fs.len <= 1) if (fs.len <= 1)
{ {
p = qse_awk_rtx_strxntok (run, p = qse_awk_rtx_strxntok (rtx,
p, str.len, fs.ptr, fs.len, &tok); p, str.len, fs.ptr, fs.len, &tok);
} }
else else
{ {
p = qse_awk_rtx_strxntokbyrex ( p = qse_awk_rtx_strxntokbyrex (
run, str.ptr, org_len, p, str.len, rtx, str.ptr, org_len, p, str.len,
fs_rex, &tok, &errnum fs_rex, &tok, &errnum
); );
if (p == QSE_NULL && errnum != QSE_AWK_ENOERR) if (p == QSE_NULL && errnum != QSE_AWK_ENOERR)
{ {
qse_awk_rtx_seterrnum (run, errnum, QSE_NULL); qse_awk_rtx_seterrnum (rtx, errnum, QSE_NULL);
goto oops; goto oops;
} }
} }
@ -768,42 +774,54 @@ static int fnc_split (qse_awk_rtx_t* run, const qse_awk_fnc_info_t* fi)
/* create the field string - however, the split function must /* create the field string - however, the split function must
* create a numeric string if the string is a number */ * create a numeric string if the string is a number */
/*t2 = qse_awk_rtx_makestrvalwithcstr (run, &tok);*/ /*t2 = qse_awk_rtx_makestrvalwithcstr (rtx, &tok);*/
t2 = qse_awk_rtx_makenstrvalwithcstr (run, &tok); t2 = qse_awk_rtx_makenstrvalwithcstr (rtx, &tok);
if (t2 == QSE_NULL) goto oops; if (t2 == QSE_NULL) goto oops;
/* put it into the map */ /* put it into the map */
key_len = qse_awk_longtostr ( key_len = qse_awk_longtostr (
run->awk, ++nflds, 10, QSE_NULL, key_buf, QSE_COUNTOF(key_buf)); rtx->awk, ++nflds, 10, QSE_NULL, key_buf, QSE_COUNTOF(key_buf));
QSE_ASSERT (key_len != (qse_size_t)-1); QSE_ASSERT (key_len != (qse_size_t)-1);
if (qse_awk_rtx_setmapvalfld ( if (qse_awk_rtx_setmapvalfld (
run, t1, key_buf, key_len, t2) == QSE_NULL) rtx, t1, key_buf, key_len, t2) == QSE_NULL)
{ {
qse_awk_rtx_refupval (run, t2); qse_awk_rtx_refupval (rtx, t2);
qse_awk_rtx_refdownval (run, t2); qse_awk_rtx_refdownval (rtx, t2);
goto oops; goto oops;
} }
str.len = str_left - (p - str.ptr); str.len = str_left - (p - str.ptr);
} }
if (str_free) QSE_AWK_FREE (run->awk, str_free); if (str_free) QSE_AWK_FREE (rtx->awk, str_free);
if (fs_free) QSE_AWK_FREE (run->awk, fs_free); if (fs_free) QSE_AWK_FREE (rtx->awk, fs_free);
if (fs_rex_free) qse_awk_freerex (run->awk, fs_rex_free); if (fs_rex_free)
{
if (rtx->gbl.ignorecase)
qse_awk_freerex (rtx->awk, QSE_NULL, fs_rex_free);
else
qse_awk_freerex (rtx->awk, fs_rex_free, QSE_NULL);
}
/*nflds--;*/ /*nflds--;*/
t1 = qse_awk_rtx_makeintval (run, nflds); t1 = qse_awk_rtx_makeintval (rtx, nflds);
if (t1 == QSE_NULL) return -1; if (t1 == QSE_NULL) return -1;
qse_awk_rtx_setretval (run, t1); qse_awk_rtx_setretval (rtx, t1);
return 0; return 0;
oops: oops:
if (str_free) QSE_AWK_FREE (run->awk, str_free); if (str_free) QSE_AWK_FREE (rtx->awk, str_free);
if (fs_free) QSE_AWK_FREE (run->awk, fs_free); if (fs_free) QSE_AWK_FREE (rtx->awk, fs_free);
if (fs_rex_free) qse_awk_freerex (run->awk, fs_rex_free); if (fs_rex_free)
{
if (rtx->gbl.ignorecase)
qse_awk_freerex (rtx->awk, QSE_NULL, fs_rex_free);
else
qse_awk_freerex (rtx->awk, fs_rex_free, QSE_NULL);
}
return -1; return -1;
} }
@ -832,7 +850,7 @@ static int fnc_tolower (qse_awk_rtx_t* run, const qse_awk_fnc_info_t* fi)
for (i = 0; i < str.len; i++) str.ptr[i] = QSE_AWK_TOLOWER (run->awk, str.ptr[i]); for (i = 0; i < str.len; i++) str.ptr[i] = QSE_AWK_TOLOWER (run->awk, str.ptr[i]);
r = qse_awk_rtx_makestrvalwithcstr (run, &str); r = qse_awk_rtx_makestrvalwithcstr (run, (qse_cstr_t*)&str);
if (r == QSE_NULL) if (r == QSE_NULL)
{ {
if (a0->type != QSE_AWK_VAL_STR) QSE_AWK_FREE (run->awk, str.ptr); if (a0->type != QSE_AWK_VAL_STR) QSE_AWK_FREE (run->awk, str.ptr);
@ -869,7 +887,7 @@ static int fnc_toupper (qse_awk_rtx_t* run, const qse_awk_fnc_info_t* fi)
for (i = 0; i < str.len; i++) str.ptr[i] = QSE_AWK_TOUPPER (run->awk, str.ptr[i]); for (i = 0; i < str.len; i++) str.ptr[i] = QSE_AWK_TOUPPER (run->awk, str.ptr[i]);
r = qse_awk_rtx_makestrvalwithcstr (run, &str); r = qse_awk_rtx_makestrvalwithcstr (run, (qse_cstr_t*)&str);
if (r == QSE_NULL) if (r == QSE_NULL)
{ {
if (a0->type != QSE_AWK_VAL_STR) QSE_AWK_FREE (run->awk, str.ptr); if (a0->type != QSE_AWK_VAL_STR) QSE_AWK_FREE (run->awk, str.ptr);
@ -881,11 +899,10 @@ static int fnc_toupper (qse_awk_rtx_t* run, const qse_awk_fnc_info_t* fi)
return 0; return 0;
} }
static int __substitute (qse_awk_rtx_t* run, qse_long_t max_count) static int __substitute (qse_awk_rtx_t* run, qse_long_t max_count)
{ {
qse_size_t nargs; qse_size_t nargs;
qse_awk_val_t* a0, * a1, * a2, ** a2_ref, * v; qse_awk_val_t* a0, * a1, * a2, * v;
qse_cstr_t s0, s1, s2; qse_cstr_t s0, s1, s2;
const qse_char_t* s2_end; const qse_char_t* s2_end;
@ -898,7 +915,7 @@ static int __substitute (qse_awk_rtx_t* run, qse_long_t max_count)
void* rex_free = QSE_NULL; void* rex_free = QSE_NULL;
qse_str_t new; qse_str_t new;
int new_inited = 0, opt; int new_inited = 0;
qse_cstr_t mat, pmat, cur; qse_cstr_t mat, pmat, cur;
@ -915,7 +932,7 @@ static int __substitute (qse_awk_rtx_t* run, qse_long_t max_count)
if (a0->type == QSE_AWK_VAL_REX) if (a0->type == QSE_AWK_VAL_REX)
{ {
rex = ((qse_awk_val_rex_t*)a0)->code; rex = ((qse_awk_val_rex_t*)a0)->code[run->gbl.ignorecase];
} }
else if (a0->type == QSE_AWK_VAL_STR) else if (a0->type == QSE_AWK_VAL_STR)
{ {
@ -964,10 +981,14 @@ static int __substitute (qse_awk_rtx_t* run, qse_long_t max_count)
if (a0->type != QSE_AWK_VAL_REX) if (a0->type != QSE_AWK_VAL_REX)
{ {
qse_awk_errnum_t errnum; qse_awk_errnum_t errnum;
int x;
rex = qse_awk_buildrex ( if (run->gbl.ignorecase)
run->awk, s0.ptr, s0.len, &errnum); x = qse_awk_buildrex (run->awk, s0.ptr, s0.len, &errnum, QSE_NULL, &rex);
if (rex == QSE_NULL) else
x = qse_awk_buildrex (run->awk, s0.ptr, s0.len, &errnum, &rex, QSE_NULL);
if (x <= -1)
{ {
qse_awk_rtx_seterrnum (run, errnum, QSE_NULL); qse_awk_rtx_seterrnum (run, errnum, QSE_NULL);
goto oops; goto oops;
@ -976,8 +997,6 @@ static int __substitute (qse_awk_rtx_t* run, qse_long_t max_count)
rex_free = rex; rex_free = rex;
} }
opt = (run->gbl.ignorecase)? QSE_REX_IGNORECASE: 0;
s2_end = s2.ptr + s2.len; s2_end = s2.ptr + s2.len;
cur.ptr = s2.ptr; cur.ptr = s2.ptr;
cur.len = s2.len; cur.len = s2.len;
@ -997,7 +1016,8 @@ static int __substitute (qse_awk_rtx_t* run, qse_long_t max_count)
if (max_count == 0 || sub_count < max_count) if (max_count == 0 || sub_count < max_count)
{ {
n = qse_awk_matchrex ( n = qse_awk_matchrex (
run->awk, rex, opt, &s2, &cur, &mat, &errnum run->awk, rex, run->gbl.ignorecase,
&s2, &cur, &mat, &errnum
); );
} }
else n = 0; else n = 0;
@ -1085,7 +1105,10 @@ static int __substitute (qse_awk_rtx_t* run, qse_long_t max_count)
if (rex_free) if (rex_free)
{ {
qse_awk_freerex (run->awk, rex_free); if (run->gbl.ignorecase)
qse_awk_freerex (run->awk, QSE_NULL, rex_free);
else
qse_awk_freerex (run->awk, rex_free, QSE_NULL);
rex_free = QSE_NULL; rex_free = QSE_NULL;
} }
@ -1104,7 +1127,7 @@ static int __substitute (qse_awk_rtx_t* run, qse_long_t max_count)
v = qse_awk_rtx_makestrvalwithcstr (run, QSE_STR_CSTR(&new)); v = qse_awk_rtx_makestrvalwithcstr (run, QSE_STR_CSTR(&new));
if (v == QSE_NULL) goto oops; if (v == QSE_NULL) goto oops;
qse_awk_rtx_refupval (run, v); qse_awk_rtx_refupval (run, v);
n = qse_awk_rtx_setrefval (run, a2, v); n = qse_awk_rtx_setrefval (run, (qse_awk_val_ref_t*)a2, v);
qse_awk_rtx_refdownval (run, v); qse_awk_rtx_refdownval (run, v);
if (n <= -1) goto oops; if (n <= -1) goto oops;
} }
@ -1123,7 +1146,13 @@ static int __substitute (qse_awk_rtx_t* run, qse_long_t max_count)
return 0; return 0;
oops: oops:
if (rex_free) qse_awk_freerex (run->awk, rex_free); if (rex_free)
{
if (run->gbl.ignorecase)
qse_awk_freerex (run->awk, QSE_NULL, rex_free);
else
qse_awk_freerex (run->awk, rex_free, QSE_NULL);
}
if (new_inited) qse_str_fini (&new); if (new_inited) qse_str_fini (&new);
if (s2_free) QSE_AWK_FREE (run->awk, s2_free); if (s2_free) QSE_AWK_FREE (run->awk, s2_free);
if (s1_free) QSE_AWK_FREE (run->awk, s1_free); if (s1_free) QSE_AWK_FREE (run->awk, s1_free);
@ -1145,13 +1174,11 @@ static int fnc_match (qse_awk_rtx_t* rtx, const qse_awk_fnc_info_t* fi)
{ {
qse_size_t nargs; qse_size_t nargs;
qse_awk_val_t* a0, * a1; qse_awk_val_t* a0, * a1;
qse_char_t* str0, * str1; qse_char_t* str0;
qse_size_t len0, len1; qse_size_t len0;
qse_long_t idx, start = 1; qse_long_t idx, start = 1;
void* rex;
int n; int n;
qse_cstr_t mat; qse_cstr_t mat;
qse_awk_errnum_t errnum;
nargs = qse_awk_rtx_getnargs (rtx); nargs = qse_awk_rtx_getnargs (rtx);
QSE_ASSERT (nargs >= 2 && nargs <= 3); QSE_ASSERT (nargs >= 2 && nargs <= 3);
@ -1195,42 +1222,6 @@ static int fnc_match (qse_awk_rtx_t* rtx, const qse_awk_fnc_info_t* fi)
if (str0 == QSE_NULL) return -1; if (str0 == QSE_NULL) return -1;
} }
if (a1->type == QSE_AWK_VAL_REX)
{
rex = ((qse_awk_val_rex_t*)a1)->code;
}
else
{
qse_awk_errnum_t errnum;
if (a1->type == QSE_AWK_VAL_STR)
{
str1 = ((qse_awk_val_str_t*)a1)->val.ptr;
len1 = ((qse_awk_val_str_t*)a1)->val.len;
}
else
{
str1 = qse_awk_rtx_valtostrdup (rtx, a1, &len1);
if (str1 == QSE_NULL)
{
if (a0->type != QSE_AWK_VAL_STR)
QSE_AWK_FREE (rtx->awk, str0);
return -1;
}
}
rex = qse_awk_buildrex (rtx->awk, str1, len1, &errnum);
if (rex == QSE_NULL)
{
if (a0->type != QSE_AWK_VAL_STR)
QSE_AWK_FREE (rtx->awk, str0);
qse_awk_rtx_seterrnum (rtx, errnum, QSE_NULL);
return -1;
}
if (a1->type != QSE_AWK_VAL_STR) QSE_AWK_FREE (rtx->awk, str1);
}
if (start == 0) start = 1; if (start == 0) start = 1;
else if (start < 0) start = len0 + start + 1; else if (start < 0) start = len0 + start + 1;
@ -1242,21 +1233,12 @@ static int fnc_match (qse_awk_rtx_t* rtx, const qse_awk_fnc_info_t* fi)
/*TODO: must use str0,len0? */ /*TODO: must use str0,len0? */
tmp.ptr = str0 + start - 1; tmp.ptr = str0 + start - 1;
tmp.len = len0 - start + 1; tmp.len = len0 - start + 1;
n = qse_awk_matchrex (
rtx->awk, rex, n = qse_awk_rtx_matchrex (rtx, a1, &tmp, &tmp, &mat);
(rtx->gbl.ignorecase? QSE_REX_IGNORECASE: 0), if (n <= -1) return -1;
&tmp, &tmp, &mat, &errnum
);
} }
if (a0->type != QSE_AWK_VAL_STR) QSE_AWK_FREE (rtx->awk, str0); if (a0->type != QSE_AWK_VAL_STR) QSE_AWK_FREE (rtx->awk, str0);
if (a1->type != QSE_AWK_VAL_REX) qse_awk_freerex (rtx->awk, rex);
if (n <= -1)
{
qse_awk_rtx_seterrnum (rtx, errnum, QSE_NULL);
return -1;
}
idx = (n == 0)? 0: ((qse_long_t)(mat.ptr-str0) + 1); idx = (n == 0)? 0: ((qse_long_t)(mat.ptr-str0) + 1);
@ -1338,7 +1320,7 @@ static int fnc_sprintf (qse_awk_rtx_t* run, const qse_awk_fnc_info_t* fi)
if (a0->type != QSE_AWK_VAL_STR) QSE_AWK_FREE (run->awk, cs0.ptr); if (a0->type != QSE_AWK_VAL_STR) QSE_AWK_FREE (run->awk, cs0.ptr);
if (x.ptr == QSE_NULL) goto oops; if (x.ptr == QSE_NULL) goto oops;
a0 = qse_awk_rtx_makestrvalwithcstr (run, &x); a0 = qse_awk_rtx_makestrvalwithcstr (run, (qse_cstr_t*)&x);
if (a0 == QSE_NULL) goto oops; if (a0 == QSE_NULL) goto oops;
qse_str_fini (&fbu); qse_str_fini (&fbu);

View File

@ -20,7 +20,7 @@
#include "awk.h" #include "awk.h"
#define USE_REX /*#define USE_REX */
#if defined(USE_REX) #if defined(USE_REX)
# include <qse/cmn/rex.h> # include <qse/cmn/rex.h>
@ -892,8 +892,7 @@ qse_char_t* qse_awk_rtx_strxntokbyrex (
while (cursub.len > 0) while (cursub.len > 0)
{ {
n = qse_awk_matchrex ( n = qse_awk_matchrex (
rtx->awk, rex, rtx->awk, rex, rtx->gbl.ignorecase,
((rtx->gbl.ignorecase)? QSE_REX_IGNORECASE: 0),
&s, &cursub, &match, errnum); &s, &cursub, &match, errnum);
if (n == -1) return QSE_NULL; if (n == -1) return QSE_NULL;
if (n == 0) if (n == 0)
@ -1090,36 +1089,46 @@ static QSE_INLINE int rexerr_to_errnum (int err)
} }
} }
void* qse_awk_buildrex ( int qse_awk_buildrex (
qse_awk_t* awk, const qse_char_t* ptn, qse_awk_t* awk, const qse_char_t* ptn, qse_size_t len,
qse_size_t len, qse_awk_errnum_t* errnum) qse_awk_errnum_t* errnum, void** code, void** icode)
{ {
#if defined(USE_REX) #if defined(USE_REX)
qse_rex_errnum_t err; qse_rex_errnum_t err;
void* p; void* p;
if (code || icode)
{
p = qse_buildrex ( p = qse_buildrex (
awk->mmgr, awk->opt.depth.s.rex_build, awk->mmgr, awk->opt.depth.s.rex_build,
((awk->opt.trait & QSE_AWK_REXBOUND)? 0: QSE_REX_NOBOUND), ((awk->opt.trait & QSE_AWK_REXBOUND)? 0: QSE_REX_NOBOUND),
ptn, len, &err ptn, len, &err
); );
if (p == QSE_NULL) *errnum = rexerr_to_errnum(err); if (p == QSE_NULL)
return p; {
*errnum = rexerr_to_errnum(err);
return -1;
}
if (code) *code = p;
if (icode) *icode = p;
}
return 0;
#else #else
qse_tre_t* tre; qse_tre_t* tre = QSE_NULL;
qse_tre_t* itre = QSE_NULL;
int opt = QSE_TRE_EXTENDED; int opt = QSE_TRE_EXTENDED;
if (code)
{
tre = qse_tre_open (awk->mmgr, 0); tre = qse_tre_open (awk->mmgr, 0);
if (tre == QSE_NULL) if (tre == QSE_NULL)
{ {
*errnum = QSE_AWK_ENOMEM; *errnum = QSE_AWK_ENOMEM;
return QSE_NULL; return -1;
} }
/* ignorecase is a compile option for TRE */
#if 0 /* TODO */
if (ignorecase) opt |= QSE_TRE_IGNORECASE;
#endif
if (!(awk->opt.trait & QSE_AWK_REXBOUND)) opt |= QSE_TRE_NOBOUND; if (!(awk->opt.trait & QSE_AWK_REXBOUND)) opt |= QSE_TRE_NOBOUND;
if (qse_tre_compx (tre, ptn, len, QSE_NULL, opt) <= -1) if (qse_tre_compx (tre, ptn, len, QSE_NULL, opt) <= -1)
@ -1133,13 +1142,44 @@ void* qse_awk_buildrex (
*errnum = (QSE_TRE_ERRNUM(tre) == QSE_TRE_ENOMEM)? *errnum = (QSE_TRE_ERRNUM(tre) == QSE_TRE_ENOMEM)?
QSE_AWK_ENOMEM: QSE_AWK_EREXBL; QSE_AWK_ENOMEM: QSE_AWK_EREXBL;
qse_tre_close (tre); qse_tre_close (tre);
return QSE_NULL; return -1;
}
} }
return tre; if (icode)
{
itre = qse_tre_open (awk->mmgr, 0);
if (itre == QSE_NULL)
{
if (tre) qse_tre_close (tre);
*errnum = QSE_AWK_ENOMEM;
return -1;
}
/* ignorecase is a compile option for TRE */
if (qse_tre_compx (itre, ptn, len, QSE_NULL, opt | QSE_TRE_IGNORECASE) <= -1)
{
#if 0 /* TODO */
if (QSE_TRE_ERRNUM(tre) == QSE_TRE_ENOMEM) *errnum = QSE_AWK_ENOMEM;
else
SETERR1 (awk, QSE_AWK_EREXBL, str->ptr, str->len, loc);
#endif
*errnum = (QSE_TRE_ERRNUM(tre) == QSE_TRE_ENOMEM)?
QSE_AWK_ENOMEM: QSE_AWK_EREXBL;
qse_tre_close (itre);
if (tre) qse_tre_close (tre);
return -1;
}
}
if (code) *code = tre;
if (icode) *icode = itre;
return 0;
#endif #endif
} }
#if !defined(USE_REX) #if !defined(USE_REX)
static int matchtre ( static int matchtre (
@ -1192,7 +1232,7 @@ static int matchtre (
#endif #endif
int qse_awk_matchrex ( int qse_awk_matchrex (
qse_awk_t* awk, void* code, int option, qse_awk_t* awk, void* code, int icase,
const qse_cstr_t* str, const qse_cstr_t* substr, const qse_cstr_t* str, const qse_cstr_t* substr,
qse_cstr_t* match, qse_awk_errnum_t* errnum) qse_cstr_t* match, qse_awk_errnum_t* errnum)
{ {
@ -1201,8 +1241,8 @@ int qse_awk_matchrex (
qse_rex_errnum_t err; qse_rex_errnum_t err;
x = qse_matchrex ( x = qse_matchrex (
awk->mmgr, awk->opt.depth.s.rex_match, awk->mmgr, awk->opt.depth.s.rex_match, code,
code, option, str, substr, match, &err); (icase? QSE_REX_IGNORECASE: 0), str, substr, match, &err);
if (x <= -1) *errnum = rexerr_to_errnum(err); if (x <= -1) *errnum = rexerr_to_errnum(err);
return x; return x;
#else #else
@ -1218,7 +1258,9 @@ int qse_awk_matchrex (
#endif #endif
} }
void qse_awk_freerex (qse_awk_t* awk, void* code) void qse_awk_freerex (qse_awk_t* awk, void* code, void* icode)
{
if (code)
{ {
#if defined(USE_REX) #if defined(USE_REX)
qse_freerex ((awk)->mmgr, code); qse_freerex ((awk)->mmgr, code);
@ -1227,6 +1269,93 @@ void qse_awk_freerex (qse_awk_t* awk, void* code)
#endif #endif
} }
if (icode && icode != code)
{
#if defined(USE_REX)
qse_freerex ((awk)->mmgr, icode);
#else
qse_tre_close (icode);
#endif
}
}
int qse_awk_rtx_matchrex (
qse_awk_rtx_t* rtx, qse_awk_val_t* val,
const qse_cstr_t* str, const qse_cstr_t* substr, qse_cstr_t* match)
{
void* code;
int icase, x;
qse_awk_errnum_t awkerr;
#if defined(USE_REX)
qse_rex_errnum_t rexerr;
#endif
icase = rtx->gbl.ignorecase;
if (val->type == QSE_AWK_VAL_REX)
{
code = ((qse_awk_val_rex_t*)val)->code[icase];
}
else if (val->type == QSE_AWK_VAL_STR)
{
/* build a regular expression */
qse_awk_val_str_t* strv = (qse_awk_val_str_t*)val;
x = icase? qse_awk_buildrex (rtx->awk, strv->val.ptr, strv->val.len, &awkerr, QSE_NULL, &code):
qse_awk_buildrex (rtx->awk, strv->val.ptr, strv->val.len, &awkerr, &code, QSE_NULL);
if (x <= -1)
{
qse_awk_rtx_seterrnum (rtx, awkerr, QSE_NULL);
return -1;
}
}
else
{
/* convert to a string and build a regular expression */
qse_xstr_t tmp;
tmp.ptr = qse_awk_rtx_valtostrdup (rtx, val, &tmp.len);
if (tmp.ptr == QSE_NULL) return -1;
x = icase? qse_awk_buildrex (rtx->awk, tmp.ptr, tmp.len, &awkerr, QSE_NULL, &code):
qse_awk_buildrex (rtx->awk, tmp.ptr, tmp.len, &awkerr, &code, QSE_NULL);
qse_awk_rtx_freemem (rtx, tmp.ptr);
if (x <= -1)
{
qse_awk_rtx_seterrnum (rtx, awkerr, QSE_NULL);
return -1;
}
}
#if defined(USE_REX)
x = qse_matchrex (
rtx->awk->mmgr, rtx->awk->opt.depth.s.rex_match,
code, (icase? QSE_REX_IGNORECASE: 0),
str, substr, match, &rexerr);
if (x <= -1) qse_awk_rtx_seterrnum (rtx, rexerr_to_errnum(rexerr), QSE_NULL);
#else
x = matchtre (
rtx->awk, code,
((str->ptr == substr->ptr)? QSE_TRE_BACKTRACKING: (QSE_TRE_BACKTRACKING | QSE_TRE_NOTBOL)),
substr, match, QSE_NULL, &awkerr
);
if (x <= -1) qse_awk_rtx_seterrnum (rtx, awkerr, QSE_NULL);
#endif
if (val->type == QSE_AWK_VAL_REX)
{
/* nothing to free */
}
else
{
if (icase)
qse_awk_freerex (rtx->awk, QSE_NULL, code);
else
qse_awk_freerex (rtx->awk, code, QSE_NULL);
}
return x;
}
void* qse_awk_rtx_allocmem (qse_awk_rtx_t* rtx, qse_size_t size) void* qse_awk_rtx_allocmem (qse_awk_rtx_t* rtx, qse_size_t size)
{ {
void* ptr = QSE_AWK_ALLOC (rtx->awk, size); void* ptr = QSE_AWK_ALLOC (rtx->awk, size);

View File

@ -63,20 +63,28 @@ qse_char_t* qse_awk_rtx_strxnfld (
qse_cstr_t* tok qse_cstr_t* tok
); );
void* qse_awk_buildrex ( int qse_awk_buildrex (
qse_awk_t* awk, qse_awk_t* awk,
const qse_char_t* ptn, const qse_char_t* ptn,
qse_size_t len, qse_size_t len,
qse_awk_errnum_t* errnum qse_awk_errnum_t* errnum,
void** code,
void** icode
); );
int qse_awk_matchrex ( int qse_awk_matchrex (
qse_awk_t* awk, void* code, int option, qse_awk_t* awk, void* code, int icase,
const qse_cstr_t* str, const qse_cstr_t* substr, const qse_cstr_t* str, const qse_cstr_t* substr,
qse_cstr_t* match, qse_awk_errnum_t* errnum qse_cstr_t* match, qse_awk_errnum_t* errnum
); );
void qse_awk_freerex (qse_awk_t* awk, void* code); void qse_awk_freerex (qse_awk_t* awk, void* code, void* icode);
int qse_awk_rtx_matchrex (
qse_awk_rtx_t* rtx, qse_awk_val_t* val,
const qse_cstr_t* str, const qse_cstr_t* substr,
qse_cstr_t* match
);
int qse_awk_sprintflt ( int qse_awk_sprintflt (
qse_awk_t* awk, qse_awk_t* awk,

View File

@ -4351,9 +4351,7 @@ static qse_awk_nde_t* parse_primary_rex (qse_awk_t* awk, const qse_awk_loc_t* x
nde->str.ptr = qse_awk_cstrdup (awk, QSE_STR_CSTR(awk->tok.name)); nde->str.ptr = qse_awk_cstrdup (awk, QSE_STR_CSTR(awk->tok.name));
if (nde->str.ptr == QSE_NULL) goto oops; if (nde->str.ptr == QSE_NULL) goto oops;
nde->code = qse_awk_buildrex ( if (qse_awk_buildrex (awk, QSE_STR_PTR(awk->tok.name), QSE_STR_LEN(awk->tok.name), &errnum, &nde->code[0], &nde->code[1]) <= -1)
awk, QSE_STR_PTR(awk->tok.name), QSE_STR_LEN(awk->tok.name), &errnum);
if (nde->code == QSE_NULL)
{ {
SETERR_LOC (awk, errnum, xloc); SETERR_LOC (awk, errnum, xloc);
goto oops; goto oops;
@ -4365,7 +4363,7 @@ static qse_awk_nde_t* parse_primary_rex (qse_awk_t* awk, const qse_awk_loc_t* x
oops: oops:
QSE_ASSERT (nde != QSE_NULL); QSE_ASSERT (nde != QSE_NULL);
if (nde->code) qse_awk_freerex (awk, nde->code); if (nde->code[0]) qse_awk_freerex (awk, nde->code[0], nde->code[1]);
if (nde->str.ptr) QSE_AWK_FREE (awk, nde->str.ptr); if (nde->str.ptr) QSE_AWK_FREE (awk, nde->str.ptr);
QSE_AWK_FREE (awk, nde); QSE_AWK_FREE (awk, nde);
return QSE_NULL; return QSE_NULL;

View File

@ -122,13 +122,8 @@ static int split_record (qse_awk_rtx_t* rtx)
} }
else else
{ {
qse_awk_rtx_valtostr_out_t out; fs_ptr = qse_awk_rtx_valtostrdup (rtx, fs, &fs_len);
if (fs_ptr == QSE_NULL) return -1;
out.type = QSE_AWK_RTX_VALTOSTR_CPLDUP;
if (qse_awk_rtx_valtostr (rtx, fs, &out) <= -1) return -1;
fs_ptr = out.u.cpldup.ptr;
fs_len = out.u.cpldup.len;
fs_free = fs_ptr; fs_free = fs_ptr;
} }
@ -178,7 +173,7 @@ static int split_record (qse_awk_rtx_t* rtx)
QSE_STR_PTR(&rtx->inrec.line), QSE_STR_PTR(&rtx->inrec.line),
QSE_STR_LEN(&rtx->inrec.line), QSE_STR_LEN(&rtx->inrec.line),
p, len, p, len,
rtx->gbl.fs, &tok, &errnum rtx->gbl.fs[rtx->gbl.ignorecase], &tok, &errnum
); );
if (p == QSE_NULL && errnum != QSE_AWK_ENOERR) if (p == QSE_NULL && errnum != QSE_AWK_ENOERR)
{ {
@ -268,7 +263,7 @@ static int split_record (qse_awk_rtx_t* rtx)
QSE_STR_PTR(&rtx->inrec.line), QSE_STR_PTR(&rtx->inrec.line),
QSE_STR_LEN(&rtx->inrec.line), QSE_STR_LEN(&rtx->inrec.line),
p, len, p, len,
rtx->gbl.fs, &tok, &errnum rtx->gbl.fs[rtx->gbl.ignorecase], &tok, &errnum
); );
if (p == QSE_NULL && errnum != QSE_AWK_ENOERR) if (p == QSE_NULL && errnum != QSE_AWK_ENOERR)
{ {

View File

@ -223,12 +223,12 @@ static QSE_INLINE int match_long_rs (
qse_awk_errnum_t errnum; qse_awk_errnum_t errnum;
int ret; int ret;
QSE_ASSERT (run->gbl.rs != QSE_NULL); QSE_ASSERT (run->gbl.rs[0] != QSE_NULL);
QSE_ASSERT (run->gbl.rs[1] != QSE_NULL);
ret = qse_awk_matchrex ( ret = qse_awk_matchrex (
run->awk, run->gbl.rs, run->awk, run->gbl.rs[run->gbl.ignorecase],
((run->gbl.ignorecase)? QSE_REX_IGNORECASE: 0), run->gbl.ignorecase, QSE_STR_CSTR(buf), QSE_STR_CSTR(buf),
QSE_STR_CSTR(buf), QSE_STR_CSTR(buf),
&match, &errnum); &match, &errnum);
if (ret <= -1) if (ret <= -1)
{ {

View File

@ -464,12 +464,11 @@ static int set_global (
if (fs_len > 1 && !(fs_len == 5 && fs_ptr[0] == QSE_T('?'))) if (fs_len > 1 && !(fs_len == 5 && fs_ptr[0] == QSE_T('?')))
{ {
void* rex; void* rex, * irex;
qse_awk_errnum_t errnum; qse_awk_errnum_t errnum;
rex = qse_awk_buildrex ( if (qse_awk_buildrex (rtx->awk, fs_ptr, fs_len, &errnum, &rex, &irex) <= -1)
rtx->awk, fs_ptr, fs_len, &errnum);
if (rex == QSE_NULL)
{ {
SETERR_COD (rtx, errnum); SETERR_COD (rtx, errnum);
if (val->type != QSE_AWK_VAL_STR) if (val->type != QSE_AWK_VAL_STR)
@ -477,14 +476,14 @@ static int set_global (
return -1; return -1;
} }
if (rtx->gbl.fs != QSE_NULL) if (rtx->gbl.fs[0])
qse_awk_freerex (rtx->awk, rtx->gbl.fs); qse_awk_freerex (rtx->awk, rtx->gbl.fs[0], rtx->gbl.fs[1]);
rtx->gbl.fs = rex; rtx->gbl.fs[0] = rex;
rtx->gbl.fs[1] = irex;
} }
if (val->type != QSE_AWK_VAL_STR) if (val->type != QSE_AWK_VAL_STR) QSE_AWK_FREE (rtx->awk, fs_ptr);
QSE_AWK_FREE (rtx->awk, fs_ptr);
break; break;
} }
@ -613,33 +612,31 @@ static int set_global (
rss = out.u.cpldup; rss = out.u.cpldup;
} }
if (rtx->gbl.rs) if (rtx->gbl.rs[0])
{ {
qse_awk_freerex (rtx->awk, rtx->gbl.rs); qse_awk_freerex (rtx->awk, rtx->gbl.rs[0], rtx->gbl.rs[1]);
rtx->gbl.rs = QSE_NULL; rtx->gbl.rs[0] = QSE_NULL;
rtx->gbl.rs[1] = QSE_NULL;
} }
if (rss.len > 1) if (rss.len > 1)
{ {
void* rex; void* rex, * irex;
qse_awk_errnum_t errnum; qse_awk_errnum_t errnum;
/* compile the regular expression */ /* compile the regular expression */
rex = qse_awk_buildrex ( if (qse_awk_buildrex (rtx->awk, rss.ptr, rss.len, &errnum, &rex, &irex) <= -1)
rtx->awk, rss.ptr, rss.len, &errnum);
if (rex == QSE_NULL)
{ {
SETERR_COD (rtx, errnum); SETERR_COD (rtx, errnum);
if (val->type != QSE_AWK_VAL_STR) if (val->type != QSE_AWK_VAL_STR) QSE_AWK_FREE (rtx->awk, rss.ptr);
QSE_AWK_FREE (rtx->awk, rss.ptr);
return -1; return -1;
} }
rtx->gbl.rs = rex; rtx->gbl.rs[0] = rex;
rtx->gbl.rs[1] = irex;
} }
if (val->type != QSE_AWK_VAL_STR) if (val->type != QSE_AWK_VAL_STR) QSE_AWK_FREE (rtx->awk, rss.ptr);
QSE_AWK_FREE (rtx->awk, rss.ptr);
break; break;
} }
@ -1016,8 +1013,10 @@ static int init_rtx (qse_awk_rtx_t* rtx, qse_awk_t* awk, qse_awk_rio_t* rio)
rtx->rio.chain = QSE_NULL; rtx->rio.chain = QSE_NULL;
} }
rtx->gbl.rs = QSE_NULL; rtx->gbl.rs[0] = QSE_NULL;
rtx->gbl.fs = QSE_NULL; rtx->gbl.rs[1] = QSE_NULL;
rtx->gbl.fs[0] = QSE_NULL;
rtx->gbl.fs[1] = QSE_NULL;
rtx->gbl.ignorecase = 0; rtx->gbl.ignorecase = 0;
return 0; return 0;
@ -1051,15 +1050,17 @@ static void fini_rtx (qse_awk_rtx_t* rtx, int fini_globals)
qse_awk_rtx_cleario (rtx); qse_awk_rtx_cleario (rtx);
QSE_ASSERT (rtx->rio.chain == QSE_NULL); QSE_ASSERT (rtx->rio.chain == QSE_NULL);
if (rtx->gbl.rs) if (rtx->gbl.rs[0])
{ {
qse_awk_freerex (rtx->awk, rtx->gbl.rs); qse_awk_freerex (rtx->awk, rtx->gbl.rs[0], rtx->gbl.rs[1]);
rtx->gbl.rs = QSE_NULL; rtx->gbl.rs[0] = QSE_NULL;
rtx->gbl.rs[1] = QSE_NULL;
} }
if (rtx->gbl.fs) if (rtx->gbl.fs[0])
{ {
qse_awk_freerex (rtx->awk, rtx->gbl.fs); qse_awk_freerex (rtx->awk, rtx->gbl.fs[0], rtx->gbl.fs[1]);
rtx->gbl.fs = QSE_NULL; rtx->gbl.fs[0] = QSE_NULL;
rtx->gbl.fs[1] = QSE_NULL;
} }
if (rtx->gbl.convfmt.ptr != QSE_NULL && if (rtx->gbl.convfmt.ptr != QSE_NULL &&
@ -3208,7 +3209,6 @@ static qse_awk_val_t* eval_expression (qse_awk_rtx_t* rtx, qse_awk_nde_t* nde)
{ {
qse_awk_val_t* v; qse_awk_val_t* v;
int n; int n;
qse_awk_errnum_t errnum;
#if 0 #if 0
if (rtx->exit_level >= EXIT_GLOBAL) if (rtx->exit_level >= EXIT_GLOBAL)
@ -3230,9 +3230,11 @@ static qse_awk_val_t* eval_expression (qse_awk_rtx_t* rtx, qse_awk_nde_t* nde)
qse_cstr_t vs; qse_cstr_t vs;
int opt = 0; int opt = 0;
if (((qse_awk_rtx_t*)rtx)->gbl.ignorecase) /* special case where a regular expression is used in
opt = QSE_REX_IGNORECASE; * without any match operators:
* print /abc/;
* perform match against $0.
*/
qse_awk_rtx_refupval (rtx, v); qse_awk_rtx_refupval (rtx, v);
if (rtx->inrec.d0->type == QSE_AWK_VAL_NIL) if (rtx->inrec.d0->type == QSE_AWK_VAL_NIL)
@ -3253,23 +3255,13 @@ static qse_awk_val_t* eval_expression (qse_awk_rtx_t* rtx, qse_awk_nde_t* nde)
vs.len = ((qse_awk_val_str_t*)rtx->inrec.d0)->val.len; vs.len = ((qse_awk_val_str_t*)rtx->inrec.d0)->val.len;
} }
n = qse_awk_matchrex ( n = qse_awk_rtx_matchrex (rtx, v, &vs, &vs, QSE_NULL);
((qse_awk_rtx_t*)rtx)->awk,
((qse_awk_val_rex_t*)v)->code,
opt, &vs, &vs, QSE_NULL, &errnum
);
if (n <= -1) if (n <= -1)
{ {
ADJERR_LOC (rtx, &nde->loc);
qse_awk_rtx_refdownval (rtx, v); qse_awk_rtx_refdownval (rtx, v);
/* matchrex should never set the error number
* whose message contains a formatting
* character. otherwise, the following way of
* setting the error information may not work */
SETERR_LOC (rtx, errnum, &nde->loc);
return QSE_NULL; return QSE_NULL;
} }
qse_awk_rtx_refdownval (rtx, v); qse_awk_rtx_refdownval (rtx, v);
v = qse_awk_rtx_makeintval (rtx, (n != 0)); v = qse_awk_rtx_makeintval (rtx, (n != 0));
@ -4974,115 +4966,50 @@ static qse_awk_val_t* eval_binop_match0 (
{ {
qse_awk_val_t* res; qse_awk_val_t* res;
int n; int n;
qse_awk_errnum_t errnum;
void* rex_code;
if (right->type == QSE_AWK_VAL_REX)
{
rex_code = ((qse_awk_val_rex_t*)right)->code;
}
else if (right->type == QSE_AWK_VAL_STR)
{
rex_code = qse_awk_buildrex (
rtx->awk,
((qse_awk_val_str_t*)right)->val.ptr,
((qse_awk_val_str_t*)right)->val.len, &errnum);
if (rex_code == QSE_NULL)
{
SETERR_LOC (rtx, errnum, rloc);
return QSE_NULL;
}
}
else
{
qse_awk_rtx_valtostr_out_t out;
out.type = QSE_AWK_RTX_VALTOSTR_CPLDUP;
if (qse_awk_rtx_valtostr (rtx, right, &out) <= -1) return QSE_NULL;
rex_code = qse_awk_buildrex (
rtx->awk, out.u.cpldup.ptr, out.u.cpldup.len, &errnum);
if (rex_code == QSE_NULL)
{
QSE_AWK_FREE (rtx->awk, out.u.cpldup.ptr);
SETERR_LOC (rtx, errnum, rloc);
return QSE_NULL;
}
QSE_AWK_FREE (rtx->awk, out.u.cpldup.ptr);
}
if (left->type == QSE_AWK_VAL_STR) if (left->type == QSE_AWK_VAL_STR)
{ {
n = qse_awk_matchrex ( n = qse_awk_rtx_matchrex (
rtx->awk, rex_code, rtx, right,
((rtx->gbl.ignorecase)? QSE_REX_IGNORECASE: 0),
xstr_to_cstr(&((qse_awk_val_str_t*)left)->val), xstr_to_cstr(&((qse_awk_val_str_t*)left)->val),
xstr_to_cstr(&((qse_awk_val_str_t*)left)->val), xstr_to_cstr(&((qse_awk_val_str_t*)left)->val), QSE_NULL);
QSE_NULL, &errnum); if (n <= -1)
if (n == -1)
{ {
if (right->type != QSE_AWK_VAL_REX) ADJERR_LOC (rtx, lloc);
qse_awk_freerex (rtx->awk, rex_code);
SETERR_LOC (rtx, errnum, lloc);
return QSE_NULL; return QSE_NULL;
} }
res = qse_awk_rtx_makeintval (rtx, (n == ret)); res = qse_awk_rtx_makeintval (rtx, (n == ret));
if (res == QSE_NULL) if (res == QSE_NULL)
{ {
if (right->type != QSE_AWK_VAL_REX)
qse_awk_freerex (rtx->awk, rex_code);
ADJERR_LOC (rtx, lloc); ADJERR_LOC (rtx, lloc);
return QSE_NULL; return QSE_NULL;
} }
} }
else else
{ {
qse_awk_rtx_valtostr_out_t out; qse_xstr_t out;
out.type = QSE_AWK_RTX_VALTOSTR_CPLDUP; out.ptr = qse_awk_rtx_valtostrdup (rtx, left, &out.len);
if (qse_awk_rtx_valtostr (rtx, left, &out) <= -1) if (out.ptr == QSE_NULL) return QSE_NULL;
n = qse_awk_rtx_matchrex (rtx, right, &out, &out, QSE_NULL);
QSE_AWK_FREE (rtx->awk, out.ptr);
if (n <= -1)
{ {
if (right->type != QSE_AWK_VAL_REX) ADJERR_LOC (rtx, lloc);
qse_awk_freerex (rtx->awk, rex_code);
return QSE_NULL;
}
n = qse_awk_matchrex (
rtx->awk, rex_code,
((rtx->gbl.ignorecase)? QSE_REX_IGNORECASE: 0),
xstr_to_cstr(&out.u.cpldup),
xstr_to_cstr(&out.u.cpldup),
QSE_NULL, &errnum
);
if (n == -1)
{
QSE_AWK_FREE (rtx->awk, out.u.cpldup.ptr);
if (right->type != QSE_AWK_VAL_REX)
qse_awk_freerex (rtx->awk, rex_code);
SETERR_LOC (rtx, errnum, lloc);
return QSE_NULL; return QSE_NULL;
} }
res = qse_awk_rtx_makeintval (rtx, (n == ret)); res = qse_awk_rtx_makeintval (rtx, (n == ret));
if (res == QSE_NULL) if (res == QSE_NULL)
{ {
QSE_AWK_FREE (rtx->awk, out.u.cpldup.ptr);
if (right->type != QSE_AWK_VAL_REX)
qse_awk_freerex (rtx->awk, rex_code);
ADJERR_LOC (rtx, lloc); ADJERR_LOC (rtx, lloc);
return QSE_NULL; return QSE_NULL;
} }
QSE_AWK_FREE (rtx->awk, out.u.cpldup.ptr);
} }
if (right->type != QSE_AWK_VAL_REX) qse_awk_freerex (rtx->awk, rex_code);
return res; return res;
} }
@ -6307,7 +6234,8 @@ static qse_awk_val_t* eval_rex (qse_awk_rtx_t* run, qse_awk_nde_t* nde)
val = qse_awk_rtx_makerexval (run, val = qse_awk_rtx_makerexval (run,
&((qse_awk_nde_rex_t*)nde)->str, &((qse_awk_nde_rex_t*)nde)->str,
((qse_awk_nde_rex_t*)nde)->code); ((qse_awk_nde_rex_t*)nde)->code
);
if (val == QSE_NULL) ADJERR_LOC (run, &nde->loc); if (val == QSE_NULL) ADJERR_LOC (run, &nde->loc);
return val; return val;

View File

@ -1305,7 +1305,8 @@ void qse_awk_clrpt (qse_awk_t* awk, qse_awk_nde_t* tree)
case QSE_AWK_NDE_REX: case QSE_AWK_NDE_REX:
{ {
qse_awk_freerex (awk, ((qse_awk_nde_rex_t*)p)->code); qse_awk_nde_rex_t* rex = (qse_awk_nde_rex_t*)p;
qse_awk_freerex (awk, rex->code[0], rex->code[1]);
QSE_AWK_FREE (awk, ((qse_awk_nde_rex_t*)p)->str.ptr); QSE_AWK_FREE (awk, ((qse_awk_nde_rex_t*)p)->str.ptr);
QSE_AWK_FREE (awk, p); QSE_AWK_FREE (awk, p);
break; break;
@ -1318,8 +1319,7 @@ void qse_awk_clrpt (qse_awk_t* awk, qse_awk_nde_t* tree)
{ {
qse_awk_nde_var_t* px = (qse_awk_nde_var_t*)p; qse_awk_nde_var_t* px = (qse_awk_nde_var_t*)p;
QSE_ASSERT (px->idx == QSE_NULL); QSE_ASSERT (px->idx == QSE_NULL);
if (px->id.name.ptr != QSE_NULL) if (px->id.name.ptr) QSE_AWK_FREE (awk, px->id.name.ptr);
QSE_AWK_FREE (awk, px->id.name.ptr);
QSE_AWK_FREE (awk, p); QSE_AWK_FREE (awk, p);
break; break;
} }

View File

@ -155,7 +155,7 @@ struct qse_awk_nde_rex_t
{ {
QSE_AWK_NDE_HDR; QSE_AWK_NDE_HDR;
qse_xstr_t str; qse_xstr_t str;
void* code; void* code[2]; /* [0]: case sensitive, [1]: case insensitive */
}; };
/* QSE_AWK_NDE_NAMED, QSE_AWK_NDE_GBL, /* QSE_AWK_NDE_NAMED, QSE_AWK_NDE_GBL,

View File

@ -35,6 +35,7 @@ qse_awk_val_t* qse_awk_val_zls = (qse_awk_val_t*)&awk_zls;
static qse_awk_val_int_t awk_int[] = static qse_awk_val_int_t awk_int[] =
{ {
/* type ref stat nstr val nde */
{ QSE_AWK_VAL_INT, 0, 1, 0, -1, QSE_NULL }, { QSE_AWK_VAL_INT, 0, 1, 0, -1, QSE_NULL },
{ QSE_AWK_VAL_INT, 0, 1, 0, 0, QSE_NULL }, { QSE_AWK_VAL_INT, 0, 1, 0, 0, QSE_NULL },
{ QSE_AWK_VAL_INT, 0, 1, 0, 1, QSE_NULL }, { QSE_AWK_VAL_INT, 0, 1, 0, 1, QSE_NULL },
@ -216,7 +217,7 @@ qse_awk_val_t* qse_awk_rtx_makestrvalwithmbs (
return QSE_NULL; return QSE_NULL;
} }
v = qse_awk_rtx_makestrvalwithcstr (rtx, &tmp); v = qse_awk_rtx_makestrvalwithcstr (rtx, (qse_cstr_t*)&tmp);
QSE_AWK_FREE (rtx->awk, tmp.ptr); QSE_AWK_FREE (rtx->awk, tmp.ptr);
return v; return v;
#endif #endif
@ -268,7 +269,7 @@ qse_awk_val_t* qse_awk_rtx_makestrvalwithmcstr (
return QSE_NULL; return QSE_NULL;
} }
v = qse_awk_rtx_makestrvalwithcstr (rtx, &tmp); v = qse_awk_rtx_makestrvalwithcstr (rtx, (qse_cstr_t*)&tmp);
QSE_AWK_FREE (rtx->awk, tmp.ptr); QSE_AWK_FREE (rtx->awk, tmp.ptr);
return v; return v;
#endif #endif
@ -438,7 +439,7 @@ qse_awk_val_t* qse_awk_rtx_makenstrvalwithcstr (qse_awk_rtx_t* rtx, const qse_cs
} }
qse_awk_val_t* qse_awk_rtx_makerexval ( qse_awk_val_t* qse_awk_rtx_makerexval (
qse_awk_rtx_t* rtx, const qse_cstr_t* str, void* code) qse_awk_rtx_t* rtx, const qse_cstr_t* str, void* code[2])
{ {
qse_awk_val_rex_t* val; qse_awk_val_rex_t* val;
qse_size_t totsz; qse_size_t totsz;
@ -465,7 +466,8 @@ qse_awk_val_t* qse_awk_rtx_makerexval (
val->str.ptr = (qse_char_t*)(val + 1); val->str.ptr = (qse_char_t*)(val + 1);
qse_strncpy (val->str.ptr, str->ptr, str->len); qse_strncpy (val->str.ptr, str->ptr, str->len);
val->code = code; val->code[0] = code[0];
val->code[1] = code[1];
return (qse_awk_val_t*)val; return (qse_awk_val_t*)val;
} }
@ -824,7 +826,7 @@ void qse_awk_rtx_freeval (
/* code is just a pointer to a regular expression stored /* code is just a pointer to a regular expression stored
* in parse tree nodes. so don't free it. * in parse tree nodes. so don't free it.
qse_awk_freerex (rtx->awk, ((qse_awk_val_rex_t*)val)->code); qse_awk_freerex (rtx->awk, ((qse_awk_val_rex_t*)val)->code[0], ((qse_awk_val_rex_t*)val)->code[1]);
*/ */
QSE_AWK_FREE (rtx->awk, val); QSE_AWK_FREE (rtx->awk, val);
@ -1717,7 +1719,7 @@ int qse_awk_rtx_setrefval (qse_awk_rtx_t* rtx, qse_awk_val_ref_t* ref, qse_awk_v
qse_awk_rtx_refupval (rtx, val); qse_awk_rtx_refupval (rtx, val);
x = qse_awk_rtx_setrec ( x = qse_awk_rtx_setrec (
rtx, (qse_size_t)ref->adr, rtx, (qse_size_t)ref->adr,
&((qse_awk_val_str_t*)val)->val (qse_cstr_t*)&((qse_awk_val_str_t*)val)->val
); );
qse_awk_rtx_refdownval (rtx, val); qse_awk_rtx_refdownval (rtx, val);
return x; return x;
@ -1730,7 +1732,7 @@ int qse_awk_rtx_setrefval (qse_awk_rtx_t* rtx, qse_awk_val_ref_t* ref, qse_awk_v
str.ptr = qse_awk_rtx_valtostrdup (rtx, val, &str.len); str.ptr = qse_awk_rtx_valtostrdup (rtx, val, &str.len);
qse_awk_rtx_refupval (rtx, val); qse_awk_rtx_refupval (rtx, val);
x = qse_awk_rtx_setrec (rtx, (qse_size_t)ref->adr, &str); x = qse_awk_rtx_setrec (rtx, (qse_size_t)ref->adr, (qse_cstr_t*)&str);
qse_awk_rtx_refdownval (rtx, val); qse_awk_rtx_refdownval (rtx, val);
QSE_AWK_FREE (rtx->awk, str.ptr); QSE_AWK_FREE (rtx->awk, str.ptr);
return x; return x;

View File

@ -60,11 +60,9 @@ tre_ast_new_node(tre_mem_t mem, tre_ast_type_t type, size_t size)
tre_ast_node_t *node; tre_ast_node_t *node;
node = tre_mem_calloc(mem, sizeof(*node)); node = tre_mem_calloc(mem, sizeof(*node));
if (!node) if (!node) return NULL;
return NULL;
node->obj = tre_mem_calloc(mem, size); node->obj = tre_mem_calloc(mem, size);
if (!node->obj) if (!node->obj) return NULL;
return NULL;
node->type = type; node->type = type;
node->nullable = -1; node->nullable = -1;
node->submatch_id = -1; node->submatch_id = -1;
@ -72,15 +70,13 @@ tre_ast_new_node(tre_mem_t mem, tre_ast_type_t type, size_t size)
return node; return node;
} }
tre_ast_node_t * tre_ast_node_t * tre_ast_new_literal(tre_mem_t mem, int code_min, int code_max, int position)
tre_ast_new_literal(tre_mem_t mem, int code_min, int code_max, int position)
{ {
tre_ast_node_t *node; tre_ast_node_t *node;
tre_literal_t *lit; tre_literal_t *lit;
node = tre_ast_new_node(mem, LITERAL, sizeof(tre_literal_t)); node = tre_ast_new_node(mem, LITERAL, sizeof(tre_literal_t));
if (!node) if (!node) return NULL;
return NULL;
lit = node->obj; lit = node->obj;
lit->code_min = code_min; lit->code_min = code_min;
lit->code_max = code_max; lit->code_max = code_max;
@ -97,8 +93,7 @@ tre_ast_new_iter(tre_mem_t mem, tre_ast_node_t *arg, int min, int max,
tre_iteration_t *iter; tre_iteration_t *iter;
node = tre_ast_new_node(mem, ITERATION, sizeof(tre_iteration_t)); node = tre_ast_new_node(mem, ITERATION, sizeof(tre_iteration_t));
if (!node) if (!node) return NULL;
return NULL;
iter = node->obj; iter = node->obj;
iter->arg = arg; iter->arg = arg;
iter->min = min; iter->min = min;
@ -115,8 +110,7 @@ tre_ast_new_union(tre_mem_t mem, tre_ast_node_t *left, tre_ast_node_t *right)
tre_ast_node_t *node; tre_ast_node_t *node;
node = tre_ast_new_node(mem, UNION, sizeof(tre_union_t)); node = tre_ast_new_node(mem, UNION, sizeof(tre_union_t));
if (node == NULL) if (node == NULL) return NULL;
return NULL;
((tre_union_t *)node->obj)->left = left; ((tre_union_t *)node->obj)->left = left;
((tre_union_t *)node->obj)->right = right; ((tre_union_t *)node->obj)->right = right;
node->num_submatches = left->num_submatches + right->num_submatches; node->num_submatches = left->num_submatches + right->num_submatches;
@ -131,8 +125,7 @@ tre_ast_new_catenation(tre_mem_t mem, tre_ast_node_t *left,
tre_ast_node_t *node; tre_ast_node_t *node;
node = tre_ast_new_node(mem, CATENATION, sizeof(tre_catenation_t)); node = tre_ast_new_node(mem, CATENATION, sizeof(tre_catenation_t));
if (node == NULL) if (node == NULL) return NULL;
return NULL;
((tre_catenation_t *)node->obj)->left = left; ((tre_catenation_t *)node->obj)->left = left;
((tre_catenation_t *)node->obj)->right = right; ((tre_catenation_t *)node->obj)->right = right;
node->num_submatches = left->num_submatches + right->num_submatches; node->num_submatches = left->num_submatches + right->num_submatches;

View File

@ -88,14 +88,11 @@ tre_add_tag_left(tre_mem_t mem, tre_ast_node_t *node, int tag_id)
DPRINT(("add_tag_left: tag %d\n", tag_id)); DPRINT(("add_tag_left: tag %d\n", tag_id));
c = tre_mem_alloc(mem, sizeof(*c)); c = tre_mem_alloc(mem, sizeof(*c));
if (c == NULL) if (c == NULL) return REG_ESPACE;
return REG_ESPACE;
c->left = tre_ast_new_literal(mem, TAG, tag_id, -1); c->left = tre_ast_new_literal(mem, TAG, tag_id, -1);
if (c->left == NULL) if (c->left == NULL) return REG_ESPACE;
return REG_ESPACE;
c->right = tre_mem_alloc(mem, sizeof(tre_ast_node_t)); c->right = tre_mem_alloc(mem, sizeof(tre_ast_node_t));
if (c->right == NULL) if (c->right == NULL) return REG_ESPACE;
return REG_ESPACE;
c->right->obj = node->obj; c->right->obj = node->obj;
c->right->type = node->type; c->right->type = node->type;
@ -152,7 +149,6 @@ typedef enum
ADDTAGS_SET_SUBMATCH_END ADDTAGS_SET_SUBMATCH_END
} tre_addtags_symbol_t; } tre_addtags_symbol_t;
typedef struct typedef struct
{ {
int tag; int tag;
@ -763,8 +759,7 @@ tre_copy_ast(tre_mem_t mem, tre_stack_t *stack, tre_ast_node_t *ast,
first_tag = 0; first_tag = 0;
} }
*result = tre_ast_new_literal(mem, min, max, pos); *result = tre_ast_new_literal(mem, min, max, pos);
if (*result == NULL) if (*result == NULL) status = REG_ESPACE;
status = REG_ESPACE;
if (pos > *max_pos) if (pos > *max_pos)
*max_pos = pos; *max_pos = pos;
@ -1121,8 +1116,7 @@ tre_set_one(tre_mem_t mem, int position, int code_min, int code_max,
tre_pos_and_tags_t *new_set; tre_pos_and_tags_t *new_set;
new_set = tre_mem_calloc(mem, sizeof(*new_set) * 2); new_set = tre_mem_calloc(mem, sizeof(*new_set) * 2);
if (new_set == NULL) if (new_set == NULL) return NULL;
return NULL;
new_set[0].position = position; new_set[0].position = position;
new_set[0].code_min = code_min; new_set[0].code_min = code_min;
@ -1150,8 +1144,7 @@ tre_set_union(tre_mem_t mem, tre_pos_and_tags_t *set1, tre_pos_and_tags_t *set2,
for (s1 = 0; set1[s1].position >= 0; s1++); for (s1 = 0; set1[s1].position >= 0; s1++);
for (s2 = 0; set2[s2].position >= 0; s2++); for (s2 = 0; set2[s2].position >= 0; s2++);
new_set = tre_mem_calloc(mem, sizeof(*new_set) * (s1 + s2 + 1)); new_set = tre_mem_calloc(mem, sizeof(*new_set) * (s1 + s2 + 1));
if (!new_set ) if (!new_set) return NULL;
return NULL;
for (s1 = 0; set1[s1].position >= 0; s1++) for (s1 = 0; set1[s1].position >= 0; s1++)
{ {
@ -1395,15 +1388,10 @@ tre_compute_nfl(tre_mem_t mem, tre_stack_t *stack, tre_ast_node_t *tree)
/* Back references: nullable = false, firstpos = {i}, /* Back references: nullable = false, firstpos = {i},
lastpos = {i}. */ lastpos = {i}. */
node->nullable = 0; node->nullable = 0;
node->firstpos = tre_set_one(mem, lit->position, 0, node->firstpos = tre_set_one(mem, lit->position, 0, TRE_CHAR_MAX, 0, NULL, -1);
TRE_CHAR_MAX, 0, NULL, -1); if (!node->firstpos) return REG_ESPACE;
if (!node->firstpos) node->lastpos = tre_set_one(mem, lit->position, 0, TRE_CHAR_MAX, 0, NULL, (int)lit->code_max);
return REG_ESPACE; if (!node->lastpos) return REG_ESPACE;
node->lastpos = tre_set_one(mem, lit->position, 0,
TRE_CHAR_MAX, 0, NULL,
(int)lit->code_max);
if (!node->lastpos)
return REG_ESPACE;
} }
else if (lit->code_min < 0) else if (lit->code_min < 0)
{ {
@ -1422,18 +1410,10 @@ tre_compute_nfl(tre_mem_t mem, tre_stack_t *stack, tre_ast_node_t *tree)
/* Literal at position i: nullable = false, firstpos = {i}, /* Literal at position i: nullable = false, firstpos = {i},
lastpos = {i}. */ lastpos = {i}. */
node->nullable = 0; node->nullable = 0;
node->firstpos = node->firstpos = tre_set_one(mem, lit->position, (int)lit->code_min, (int)lit->code_max, 0, NULL, -1);
tre_set_one(mem, lit->position, (int)lit->code_min, if (!node->firstpos) return REG_ESPACE;
(int)lit->code_max, 0, NULL, -1); node->lastpos = tre_set_one(mem, lit->position, (int)lit->code_min, (int)lit->code_max, lit->u.class, lit->neg_classes, -1);
if (!node->firstpos) if (!node->lastpos) return REG_ESPACE;
return REG_ESPACE;
node->lastpos = tre_set_one(mem, lit->position,
(int)lit->code_min,
(int)lit->code_max,
lit->u.class, lit->neg_classes,
-1);
if (!node->lastpos)
return REG_ESPACE;
} }
break; break;
} }
@ -1628,6 +1608,7 @@ tre_make_trans(qse_mmgr_t* mmgr, tre_pos_and_tags_t *p1, tre_pos_and_tags_t *p2,
int i, j, k, l, dup, prev_p2_pos; int i, j, k, l, dup, prev_p2_pos;
if (transitions != NULL) if (transitions != NULL)
{
while (p1->position >= 0) while (p1->position >= 0)
{ {
p2 = orig_p2; p2 = orig_p2;
@ -1814,7 +1795,9 @@ tre_make_trans(qse_mmgr_t* mmgr, tre_pos_and_tags_t *p1, tre_pos_and_tags_t *p2,
} }
p1++; p1++;
} }
}
else else
{
/* Compute a maximum limit for the number of transitions leaving /* Compute a maximum limit for the number of transitions leaving
from each state. */ from each state. */
while (p1->position >= 0) while (p1->position >= 0)
@ -1827,6 +1810,7 @@ tre_make_trans(qse_mmgr_t* mmgr, tre_pos_and_tags_t *p1, tre_pos_and_tags_t *p2,
} }
p1++; p1++;
} }
}
return REG_OK; return REG_OK;
} }

View File

@ -168,23 +168,18 @@ typedef struct tre_backtrack_struct
while (/*CONSTCOND*/0) while (/*CONSTCOND*/0)
#define BT_STACK_POP() \ #define BT_STACK_POP() \
do \ do { \
{ \
int i; \ int i; \
assert(stack->prev); \ assert(stack->prev); \
pos = stack->item.pos; \ pos = stack->item.pos; \
if (type == STR_USER) \
str_source->rewind(pos + pos_add_next, str_source->context); \
str_byte = stack->item.str_byte; \ str_byte = stack->item.str_byte; \
BT_STACK_WIDE_OUT; \ BT_STACK_WIDE_OUT; \
state = stack->item.state; \ state = stack->item.state; \
next_c = stack->item.next_c; \ next_c = stack->item.next_c; \
for (i = 0; i < tnfa->num_tags; i++) \ for (i = 0; i < tnfa->num_tags; i++) tags[i] = stack->item.tags[i]; \
tags[i] = stack->item.tags[i]; \
BT_STACK_MBSTATE_OUT; \ BT_STACK_MBSTATE_OUT; \
stack = stack->prev; \ stack = stack->prev; \
} \ } while (/*CONSTCOND*/0)
while (/*CONSTCOND*/0)
#undef MIN #undef MIN
#define MIN(a, b) ((a) <= (b) ? (a) : (b)) #define MIN(a, b) ((a) <= (b) ? (a) : (b))
@ -208,7 +203,6 @@ tre_tnfa_run_backtrack(qse_mmgr_t* mmgr, const tre_tnfa_t *tnfa, const void *str
int reg_notbol = eflags & REG_NOTBOL; int reg_notbol = eflags & REG_NOTBOL;
int reg_noteol = eflags & REG_NOTEOL; int reg_noteol = eflags & REG_NOTEOL;
int reg_newline = tnfa->cflags & REG_NEWLINE; int reg_newline = tnfa->cflags & REG_NEWLINE;
int str_user_end = 0;
/* These are used to remember the necessary values of the above /* These are used to remember the necessary values of the above
variables to return to the position where the current search variables to return to the position where the current search
@ -302,8 +296,6 @@ retry:
state = NULL; state = NULL;
pos = pos_start; pos = pos_start;
if (type == STR_USER)
str_source->rewind(pos + pos_add_next, str_source->context);
GET_NEXT_WCHAR(); GET_NEXT_WCHAR();
pos_start = pos; pos_start = pos;
next_c_start = next_c; next_c_start = next_c;
@ -446,15 +438,11 @@ retry:
if (len < 0) if (len < 0)
{ {
if (type == STR_USER)
result = str_source->compare((unsigned)so, (unsigned)pos,
(unsigned)bt_len,
str_source->context);
#ifdef TRE_WCHAR #ifdef TRE_WCHAR
else if (type == STR_WIDE) if (type == STR_WIDE)
result = qse_wcszcmp((const qse_wchar_t*)string + so, str_wide - 1, (size_t)bt_len); result = qse_wcszcmp((const qse_wchar_t*)string + so, str_wide - 1, (size_t)bt_len);
#endif /* TRE_WCHAR */
else else
#endif /* TRE_WCHAR */
result = qse_mbszcmp((const char*)string + so, str_byte - 1, (size_t)bt_len); result = qse_mbszcmp((const char*)string + so, str_byte - 1, (size_t)bt_len);
} }
else if (len - pos < bt_len) else if (len - pos < bt_len)
@ -508,12 +496,7 @@ retry:
/* Check for end of string. */ /* Check for end of string. */
if (len < 0) if (len < 0)
{ {
if (type == STR_USER) if (next_c == QSE_T('\0'))
{
if (str_user_end)
goto backtrack;
}
else if (next_c == QSE_T('\0'))
goto backtrack; goto backtrack;
} }
else else
@ -533,8 +516,8 @@ retry:
trans_i->code_min, trans_i->code_max, trans_i->code_min, trans_i->code_max,
trans_i->code_min, trans_i->code_max, trans_i->code_min, trans_i->code_max,
trans_i->assertions, trans_i->state_id)); trans_i->assertions, trans_i->state_id));
if (trans_i->code_min <= (tre_cint_t)prev_c &&
trans_i->code_max >= (tre_cint_t)prev_c) if (trans_i->code_min <= (tre_cint_t)prev_c && trans_i->code_max >= (tre_cint_t)prev_c)
{ {
if (trans_i->assertions if (trans_i->assertions
&& (CHECK_ASSERTIONS(trans_i->assertions) && (CHECK_ASSERTIONS(trans_i->assertions)

View File

@ -325,12 +325,7 @@ tre_tnfa_run_parallel(qse_mmgr_t* mmgr, const tre_tnfa_t *tnfa, const void *stri
/* Check for end of string. */ /* Check for end of string. */
if (len < 0) if (len < 0)
{ {
if (type == STR_USER) if (next_c == QSE_T('\0'))
{
if (str_user_end)
break;
}
else if (next_c == QSE_T('\0'))
break; break;
} }
else else
@ -408,28 +403,28 @@ tre_tnfa_run_parallel(qse_mmgr_t* mmgr, const tre_tnfa_t *tnfa, const void *stri
for (trans_i = reach_i->state; trans_i->state; trans_i++) for (trans_i = reach_i->state; trans_i->state; trans_i++)
{ {
/* Does this transition match the input symbol? */ /* Does this transition match the input symbol? */
if (trans_i->code_min <= (tre_cint_t)prev_c && if (trans_i->code_min <= (tre_cint_t)prev_c && trans_i->code_max >= (tre_cint_t)prev_c)
trans_i->code_max >= (tre_cint_t)prev_c)
{ {
if (trans_i->assertions if (trans_i->assertions &&
&& (CHECK_ASSERTIONS(trans_i->assertions) (CHECK_ASSERTIONS(trans_i->assertions) ||
|| CHECK_CHAR_CLASSES(trans_i, tnfa, eflags))) CHECK_CHAR_CLASSES(trans_i, tnfa, eflags)))
{ {
DPRINT(("assertion failed\n")); DPRINT(("assertion failed\n"));
continue; continue;
} }
/* Compute the tags after this transition. */ /* Compute the tags after this transition. */
for (i = 0; i < num_tags; i++) for (i = 0; i < num_tags; i++) tmp_tags[i] = reach_i->tags[i];
tmp_tags[i] = reach_i->tags[i];
tag_i = trans_i->tags; tag_i = trans_i->tags;
if (tag_i != NULL) if (tag_i != NULL)
{
while (*tag_i >= 0) while (*tag_i >= 0)
{ {
if (*tag_i < num_tags) if (*tag_i < num_tags)
tmp_tags[*tag_i] = pos; tmp_tags[*tag_i] = pos;
tag_i++; tag_i++;
} }
}
if (reach_pos[trans_i->state_id].pos < pos) if (reach_pos[trans_i->state_id].pos < pos)
{ {
@ -442,15 +437,12 @@ tre_tnfa_run_parallel(qse_mmgr_t* mmgr, const tre_tnfa_t *tnfa, const void *stri
reach_pos[trans_i->state_id].tags = &reach_next_i->tags; reach_pos[trans_i->state_id].tags = &reach_next_i->tags;
if (reach_next_i->state == tnfa->final if (reach_next_i->state == tnfa->final
&& (match_eo == -1 && (match_eo == -1 || (num_tags > 0 && reach_next_i->tags[0] <= match_tags[0])))
|| (num_tags > 0
&& reach_next_i->tags[0] <= match_tags[0])))
{ {
DPRINT((" found match %p\n", trans_i->state)); DPRINT((" found match %p\n", trans_i->state));
match_eo = pos; match_eo = pos;
new_match = 1; new_match = 1;
for (i = 0; i < num_tags; i++) for (i = 0; i < num_tags; i++) match_tags[i] = reach_next_i->tags[i];
match_tags[i] = reach_next_i->tags[i];
} }
reach_next_i++; reach_next_i++;

View File

@ -52,8 +52,6 @@ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/ */
#define str_source ((const tre_str_source*)string)
#ifdef TRE_WCHAR #ifdef TRE_WCHAR
#ifdef TRE_MULTIBYTE #ifdef TRE_MULTIBYTE
@ -116,12 +114,6 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
} \ } \
} \ } \
} \ } \
else if (type == STR_USER) \
{ \
pos += pos_add_next; \
str_user_end = str_source->get_next_char(&next_c, &pos_add_next, \
str_source->context); \
} \
} while(/*CONSTCOND*/0) } while(/*CONSTCOND*/0)
#else /* !TRE_MULTIBYTE */ #else /* !TRE_MULTIBYTE */
@ -143,11 +135,6 @@ do { \
if (len >= 0 && pos >= len) next_c = QSE_T('\0'); \ if (len >= 0 && pos >= len) next_c = QSE_T('\0'); \
else next_c = *str_wide++; \ else next_c = *str_wide++; \
} \ } \
else if (type == STR_USER) \
{ \
pos += pos_add_next; \
str_user_end = str_source->get_next_char(&next_c, &pos_add_next, str_source->context); \
} \
} while(/*CONSTCOND*/0) } while(/*CONSTCOND*/0)
#endif /* !TRE_MULTIBYTE */ #endif /* !TRE_MULTIBYTE */
@ -162,16 +149,8 @@ do { \
if (type == STR_BYTE) \ if (type == STR_BYTE) \
{ \ { \
pos++; \ pos++; \
if (len >= 0 && pos >= len) \ if (len >= 0 && pos >= len) next_c = '\0'; \
next_c = '\0'; \ else next_c = (unsigned char)(*str_byte++); \
else \
next_c = (unsigned char)(*str_byte++); \
} \
else if (type == STR_USER) \
{ \
pos += pos_add_next; \
str_user_end = str_source->get_next_char(&next_c, &pos_add_next, \
str_source->context); \
} \ } \
} while(/*CONSTCOND*/0) } while(/*CONSTCOND*/0)

View File

@ -138,8 +138,7 @@ tre_expand_macro(const tre_char_t *regex, const tre_char_t *regex_end,
} }
static reg_errcode_t static reg_errcode_t
tre_new_item(tre_mem_t mem, int min, int max, int *i, int *max_i, tre_new_item(tre_mem_t mem, int min, int max, int *i, int *max_i, tre_ast_node_t ***items)
tre_ast_node_t ***items)
{ {
reg_errcode_t status; reg_errcode_t status;
tre_ast_node_t **array = *items; tre_ast_node_t **array = *items;
@ -306,8 +305,7 @@ tre_parse_bracket_items(tre_parse_ctx_t *ctx, int negate,
/* END QSE */ /* END QSE */
if (status == REG_OK) if (status == REG_OK)
{ {
status = tre_expand_ctype(ctx->mem, class, items, status = tre_expand_ctype(ctx->mem, class, items, &i, &max_i, ctx->cflags);
&i, &max_i, ctx->cflags);
class = (tre_ctype_t)0; class = (tre_ctype_t)0;
skip = 1; skip = 1;
} }
@ -328,25 +326,25 @@ tre_parse_bracket_items(tre_parse_ctx_t *ctx, int negate,
min = max = *re++; min = max = *re++;
} }
if (status != REG_OK) if (status != REG_OK) break;
break;
if (class && negate) if (class && negate)
{
if (*num_neg_classes >= MAX_NEG_CLASSES) if (*num_neg_classes >= MAX_NEG_CLASSES)
status = REG_ESPACE; status = REG_ESPACE;
else else
neg_classes[(*num_neg_classes)++] = class; neg_classes[(*num_neg_classes)++] = class;
}
else if (!skip) else if (!skip)
{ {
status = tre_new_item(ctx->mem, min, max, &i, &max_i, items); status = tre_new_item(ctx->mem, min, max, &i, &max_i, items);
if (status != REG_OK) if (status != REG_OK) break;
break;
((tre_literal_t*)((*items)[i-1])->obj)->u.class = class; ((tre_literal_t*)((*items)[i-1])->obj)->u.class = class;
} }
/* Add opposite-case counterpoints if REG_ICASE is present. /* Add opposite-case counterpoints if REG_ICASE is present.
This is broken if there are more than two "same" characters. */ This is broken if there are more than two "same" characters. */
if (ctx->cflags & REG_ICASE && !class && status == REG_OK && !skip) if ((ctx->cflags & REG_ICASE) && !class && status == REG_OK && !skip)
{ {
tre_cint_t cmin, ccurr; tre_cint_t cmin, ccurr;
@ -356,27 +354,21 @@ tre_parse_bracket_items(tre_parse_ctx_t *ctx, int negate,
if (tre_islower(min)) if (tre_islower(min))
{ {
cmin = ccurr = tre_toupper(min++); cmin = ccurr = tre_toupper(min++);
while (tre_islower(min) && tre_toupper(min) == ccurr + 1 while (tre_islower(min) && tre_toupper(min) == ccurr + 1 && min <= max)
&& min <= max)
ccurr = tre_toupper(min++); ccurr = tre_toupper(min++);
status = tre_new_item(ctx->mem, cmin, ccurr, status = tre_new_item(ctx->mem, cmin, ccurr, &i, &max_i, items);
&i, &max_i, items);
} }
else if (tre_isupper(min)) else if (tre_isupper(min))
{ {
cmin = ccurr = tre_tolower(min++); cmin = ccurr = tre_tolower(min++);
while (tre_isupper(min) && tre_tolower(min) == ccurr + 1 while (tre_isupper(min) && tre_tolower(min) == ccurr + 1 && min <= max)
&& min <= max)
ccurr = tre_tolower(min++); ccurr = tre_tolower(min++);
status = tre_new_item(ctx->mem, cmin, ccurr, status = tre_new_item(ctx->mem, cmin, ccurr, &i, &max_i, items);
&i, &max_i, items);
} }
else min++; else min++;
if (status != REG_OK) if (status != REG_OK) break;
break;
} }
if (status != REG_OK) if (status != REG_OK) break;
break;
} }
} }
} }
@ -399,8 +391,7 @@ tre_parse_bracket(tre_parse_ctx_t *ctx, tre_ast_node_t **result)
/* Start off with an array of `max_i' elements. */ /* Start off with an array of `max_i' elements. */
items = xmalloc(ctx->mem->mmgr, sizeof(*items) * max_i); items = xmalloc(ctx->mem->mmgr, sizeof(*items) * max_i);
if (items == NULL) if (items == NULL) return REG_ESPACE;
return REG_ESPACE;
if (*ctx->re == CHAR_CARET) if (*ctx->re == CHAR_CARET)
{ {
@ -409,15 +400,11 @@ tre_parse_bracket(tre_parse_ctx_t *ctx, tre_ast_node_t **result)
ctx->re++; ctx->re++;
} }
status = tre_parse_bracket_items(ctx, negate, neg_classes, &num_neg_classes, status = tre_parse_bracket_items(ctx, negate, neg_classes, &num_neg_classes, &items, &i, &max_i);
&items, &i, &max_i); if (status != REG_OK) goto parse_bracket_done;
if (status != REG_OK)
goto parse_bracket_done;
/* Sort the array if we need to negate it. */ /* Sort the array if we need to negate it. */
if (negate) if (negate) qse_qsort(items, (unsigned)i, sizeof(*items), tre_compare_items, QSE_NULL);
qse_qsort(items, (unsigned)i, sizeof(*items), tre_compare_items, QSE_NULL);
curr_max = curr_min = 0; curr_max = curr_min = 0;
/* Build a union of the items in the array, negated if necessary. */ /* Build a union of the items in the array, negated if necessary. */
@ -466,22 +453,23 @@ tre_parse_bracket(tre_parse_ctx_t *ctx, tre_ast_node_t **result)
l->position = ctx->position; l->position = ctx->position;
if (num_neg_classes > 0) if (num_neg_classes > 0)
{ {
l->neg_classes = tre_mem_alloc(ctx->mem, l->neg_classes = tre_mem_alloc(ctx->mem, (sizeof(l->neg_classes) * (num_neg_classes + 1)));
(sizeof(l->neg_classes)
* (num_neg_classes + 1)));
if (l->neg_classes == NULL) if (l->neg_classes == NULL)
{ {
status = REG_ESPACE; status = REG_ESPACE;
break; break;
} }
for (k = 0; k < num_neg_classes; k++) for (k = 0; k < num_neg_classes; k++) l->neg_classes[k] = neg_classes[k];
l->neg_classes[k] = neg_classes[k];
l->neg_classes[k] = (tre_ctype_t)0; l->neg_classes[k] = (tre_ctype_t)0;
} }
else else
{
l->neg_classes = NULL; l->neg_classes = NULL;
}
if (node == NULL) if (node == NULL)
{
node = items[j]; node = items[j];
}
else else
{ {
u = tre_ast_new_union(ctx->mem, node, items[j]); u = tre_ast_new_union(ctx->mem, node, items[j]);
@ -492,8 +480,7 @@ tre_parse_bracket(tre_parse_ctx_t *ctx, tre_ast_node_t **result)
} }
} }
if (status != REG_OK) if (status != REG_OK) goto parse_bracket_done;
goto parse_bracket_done;
if (negate) if (negate)
{ {
@ -501,7 +488,9 @@ tre_parse_bracket(tre_parse_ctx_t *ctx, tre_ast_node_t **result)
DPRINT(("final: creating %d - %d\n", curr_min, (int)TRE_CHAR_MAX)); DPRINT(("final: creating %d - %d\n", curr_min, (int)TRE_CHAR_MAX));
n = tre_ast_new_literal(ctx->mem, curr_min, TRE_CHAR_MAX, ctx->position); n = tre_ast_new_literal(ctx->mem, curr_min, TRE_CHAR_MAX, ctx->position);
if (n == NULL) if (n == NULL)
{
status = REG_ESPACE; status = REG_ESPACE;
}
else else
{ {
tre_literal_t *l = n->obj; tre_literal_t *l = n->obj;
@ -520,21 +509,23 @@ tre_parse_bracket(tre_parse_ctx_t *ctx, tre_ast_node_t **result)
l->neg_classes[k] = (tre_ctype_t)0; l->neg_classes[k] = (tre_ctype_t)0;
} }
else else
{
l->neg_classes = NULL; l->neg_classes = NULL;
}
if (node == NULL) if (node == NULL)
{
node = n; node = n;
}
else else
{ {
u = tre_ast_new_union(ctx->mem, node, n); u = tre_ast_new_union(ctx->mem, node, n);
if (u == NULL) if (u == NULL) status = REG_ESPACE;
status = REG_ESPACE;
node = u; node = u;
} }
} }
} }
if (status != REG_OK) if (status != REG_OK) goto parse_bracket_done;
goto parse_bracket_done;
#ifdef TRE_DEBUG #ifdef TRE_DEBUG
tre_ast_print(node); tre_ast_print(node);
@ -814,8 +805,7 @@ tre_parse_bound(tre_parse_ctx_t *ctx, tre_ast_node_t **result)
if (min == 0 && max == 0) if (min == 0 && max == 0)
{ {
*result = tre_ast_new_literal(ctx->mem, EMPTY, -1, -1); *result = tre_ast_new_literal(ctx->mem, EMPTY, -1, -1);
if (*result == NULL) if (*result == NULL) return REG_ESPACE;
return REG_ESPACE;
} }
else else
{ {
@ -909,9 +899,7 @@ typedef enum
PARSE_RESTORE_CFLAGS PARSE_RESTORE_CFLAGS
} tre_parse_re_stack_symbol_t; } tre_parse_re_stack_symbol_t;
reg_errcode_t tre_parse(tre_parse_ctx_t *ctx)
reg_errcode_t
tre_parse(tre_parse_ctx_t *ctx)
{ {
tre_ast_node_t *result = NULL; tre_ast_node_t *result = NULL;
tre_parse_re_stack_symbol_t symbol; tre_parse_re_stack_symbol_t symbol;
@ -941,8 +929,8 @@ tre_parse(tre_parse_ctx_t *ctx)
call stack, and efficiency (both in lines of code and speed). */ call stack, and efficiency (both in lines of code and speed). */
while (tre_stack_num_objects(stack) > bottom && status == REG_OK) while (tre_stack_num_objects(stack) > bottom && status == REG_OK)
{ {
if (status != REG_OK) if (status != REG_OK) break;
break;
symbol = tre_stack_pop_int(stack); symbol = tre_stack_pop_int(stack);
switch (symbol) switch (symbol)
{ {
@ -978,8 +966,8 @@ tre_parse(tre_parse_ctx_t *ctx)
/* If the expression has not ended, parse another piece. */ /* If the expression has not ended, parse another piece. */
{ {
tre_char_t c; tre_char_t c;
if (ctx->re >= ctx->re_end)
break; if (ctx->re >= ctx->re_end) break;
c = *ctx->re; c = *ctx->re;
#ifdef REG_LITERAL #ifdef REG_LITERAL
if (!(ctx->cflags & REG_LITERAL)) if (!(ctx->cflags & REG_LITERAL))
@ -1039,11 +1027,9 @@ tre_parse(tre_parse_ctx_t *ctx)
} }
case PARSE_UNION: case PARSE_UNION:
if (ctx->re >= ctx->re_end) if (ctx->re >= ctx->re_end) break;
break;
#ifdef REG_LITERAL #ifdef REG_LITERAL
if (ctx->cflags & REG_LITERAL) if (ctx->cflags & REG_LITERAL) break;
break;
#endif /* REG_LITERAL */ #endif /* REG_LITERAL */
switch (*ctx->re) switch (*ctx->re)
{ {
@ -1089,8 +1075,7 @@ tre_parse(tre_parse_ctx_t *ctx)
{ {
case CHAR_PLUS: case CHAR_PLUS:
case CHAR_QUESTIONMARK: case CHAR_QUESTIONMARK:
if (!(ctx->cflags & REG_EXTENDED)) if (!(ctx->cflags & REG_EXTENDED)) break;
break;
/*FALLTHROUGH*/ /*FALLTHROUGH*/
case CHAR_STAR: case CHAR_STAR:
/* QSE - added this label */ /* QSE - added this label */
@ -1143,8 +1128,9 @@ parse_star:
return REG_ESPACE; return REG_ESPACE;
result = tmp_node; result = tmp_node;
STACK_PUSHX(stack, int, PARSE_POSTFIX); STACK_PUSHX(stack, int, PARSE_POSTFIX);
}
break; break;
}
case CHAR_BACKSLASH: case CHAR_BACKSLASH:
/* "\{" is special without REG_EXTENDED */ /* "\{" is special without REG_EXTENDED */
@ -1200,17 +1186,16 @@ parse_brace:
break; break;
case PARSE_ATOM: case PARSE_ATOM:
/* Parse an atom. An atom is a regular expression enclosed in `()', /* Parse an atom. An atom is a regular expression enclosed in `()',
an empty set of `()', a bracket expression, `.', `^', `$', an empty set of `()', a bracket expression, `.', `^', `$',
a `\' followed by a character, or a single character. */ a `\' followed by a character, or a single character. */
/* End of regexp? (empty string). */ /* End of regexp? (empty string). */
if (ctx->re >= ctx->re_end) if (ctx->re >= ctx->re_end) goto parse_literal;
goto parse_literal;
#ifdef REG_LITERAL #ifdef REG_LITERAL
if (ctx->cflags & REG_LITERAL) if (ctx->cflags & REG_LITERAL) goto parse_literal;
goto parse_literal;
#endif /* REG_LITERAL */ #endif /* REG_LITERAL */
switch (*ctx->re) switch (*ctx->re)
@ -1226,15 +1211,13 @@ parse_brace:
{ {
int new_cflags = ctx->cflags; int new_cflags = ctx->cflags;
int bit = 1; int bit = 1;
DPRINT(("tre_parse: extension: '%.*" STRF "\n", DPRINT(("tre_parse: extension: '%.*" STRF "\n", REST(ctx->re)));
REST(ctx->re)));
ctx->re += 2; ctx->re += 2;
while (/*CONSTCOND*/1) while (/*CONSTCOND*/1)
{ {
if (*ctx->re == QSE_T('i')) if (*ctx->re == QSE_T('i'))
{ {
DPRINT(("tre_parse: icase: '%.*" STRF "\n", DPRINT(("tre_parse: icase: '%.*" STRF "\n", REST(ctx->re)));
REST(ctx->re)));
if (bit) if (bit)
new_cflags |= REG_ICASE; new_cflags |= REG_ICASE;
else else
@ -1243,8 +1226,7 @@ parse_brace:
} }
else if (*ctx->re == QSE_T('n')) else if (*ctx->re == QSE_T('n'))
{ {
DPRINT(("tre_parse: newline: '%.*" STRF "\n", DPRINT(("tre_parse: newline: '%.*" STRF "\n", REST(ctx->re)));
REST(ctx->re)));
if (bit) if (bit)
new_cflags |= REG_NEWLINE; new_cflags |= REG_NEWLINE;
else else
@ -1254,8 +1236,7 @@ parse_brace:
#ifdef REG_RIGHT_ASSOC #ifdef REG_RIGHT_ASSOC
else if (*ctx->re == QSE_T('r')) else if (*ctx->re == QSE_T('r'))
{ {
DPRINT(("tre_parse: right assoc: '%.*" STRF "\n", DPRINT(("tre_parse: right assoc: '%.*" STRF "\n", REST(ctx->re)));
REST(ctx->re)));
if (bit) if (bit)
new_cflags |= REG_RIGHT_ASSOC; new_cflags |= REG_RIGHT_ASSOC;
else else
@ -1266,8 +1247,7 @@ parse_brace:
#ifdef REG_UNGREEDY #ifdef REG_UNGREEDY
else if (*ctx->re == QSE_T('U')) else if (*ctx->re == QSE_T('U'))
{ {
DPRINT(("tre_parse: ungreedy: '%.*" STRF "\n", DPRINT(("tre_parse: ungreedy: '%.*" STRF "\n", REST(ctx->re)));
REST(ctx->re)));
if (bit) if (bit)
new_cflags |= REG_UNGREEDY; new_cflags |= REG_UNGREEDY;
else else
@ -1366,29 +1346,24 @@ parse_brace:
|| (ctx->re > ctx->re_start || (ctx->re > ctx->re_start
&& *(ctx->re - 1) == CHAR_BACKSLASH)) && *(ctx->re - 1) == CHAR_BACKSLASH))
{ {
DPRINT(("tre_parse: empty: '%.*" STRF "'\n", DPRINT(("tre_parse: empty: '%.*" STRF "'\n", REST(ctx->re)));
REST(ctx->re)));
/* We were expecting an atom, but instead the current /* We were expecting an atom, but instead the current
subexpression was closed. POSIX leaves the meaning of subexpression was closed. POSIX leaves the meaning of
this to be implementation-defined. We interpret this as this to be implementation-defined. We interpret this as
an empty expression (which matches an empty string). */ an empty expression (which matches an empty string). */
result = tre_ast_new_literal(ctx->mem, EMPTY, -1, -1); result = tre_ast_new_literal(ctx->mem, EMPTY, -1, -1);
if (result == NULL) if (result == NULL) return REG_ESPACE;
return REG_ESPACE; if (!(ctx->cflags & REG_EXTENDED)) ctx->re--;
if (!(ctx->cflags & REG_EXTENDED))
ctx->re--;
} }
else else
goto parse_literal; goto parse_literal;
break; break;
case CHAR_LBRACKET: /* bracket expression */ case CHAR_LBRACKET: /* bracket expression */
DPRINT(("tre_parse: bracket: '%.*" STRF "'\n", DPRINT(("tre_parse: bracket: '%.*" STRF "'\n", REST(ctx->re)));
REST(ctx->re)));
ctx->re++; ctx->re++;
status = tre_parse_bracket(ctx, &result); status = tre_parse_bracket(ctx, &result);
if (status != REG_OK) if (status != REG_OK) return status;
return status;
break; break;
case CHAR_BACKSLASH: case CHAR_BACKSLASH:
@ -1407,8 +1382,7 @@ parse_brace:
/* If a macro is used, parse the expanded macro recursively. */ /* If a macro is used, parse the expanded macro recursively. */
{ {
tre_char_t buf[64]; tre_char_t buf[64];
tre_expand_macro(ctx->re + 1, ctx->re_end, tre_expand_macro(ctx->re + 1, ctx->re_end, buf, QSE_COUNTOF(buf));
buf, QSE_COUNTOF(buf));
if (buf[0] != 0) if (buf[0] != 0)
{ {
tre_parse_ctx_t subctx; tre_parse_ctx_t subctx;
@ -1426,8 +1400,10 @@ parse_brace:
} }
if (ctx->re + 1 >= ctx->re_end) if (ctx->re + 1 >= ctx->re_end)
{
/* Trailing backslash. */ /* Trailing backslash. */
return REG_EESCAPE; return REG_EESCAPE;
}
#ifdef REG_LITERAL #ifdef REG_LITERAL
if (*(ctx->re + 1) == QSE_T('Q')) if (*(ctx->re + 1) == QSE_T('Q'))
@ -1447,23 +1423,19 @@ parse_brace:
switch (*ctx->re) switch (*ctx->re)
{ {
case QSE_T('b'): case QSE_T('b'):
result = tre_ast_new_literal(ctx->mem, ASSERTION, result = tre_ast_new_literal(ctx->mem, ASSERTION, ASSERT_AT_WB, -1);
ASSERT_AT_WB, -1);
ctx->re++; ctx->re++;
break; break;
case QSE_T('B'): case QSE_T('B'):
result = tre_ast_new_literal(ctx->mem, ASSERTION, result = tre_ast_new_literal(ctx->mem, ASSERTION, ASSERT_AT_WB_NEG, -1);
ASSERT_AT_WB_NEG, -1);
ctx->re++; ctx->re++;
break; break;
case QSE_T('<'): case QSE_T('<'):
result = tre_ast_new_literal(ctx->mem, ASSERTION, result = tre_ast_new_literal(ctx->mem, ASSERTION, ASSERT_AT_BOW, -1);
ASSERT_AT_BOW, -1);
ctx->re++; ctx->re++;
break; break;
case QSE_T('>'): case QSE_T('>'):
result = tre_ast_new_literal(ctx->mem, ASSERTION, result = tre_ast_new_literal(ctx->mem, ASSERTION, ASSERT_AT_EOW, -1);
ASSERT_AT_EOW, -1);
ctx->re++; ctx->re++;
break; break;
case QSE_T('x'): case QSE_T('x'):
@ -1503,8 +1475,7 @@ parse_brace:
ctx->re++; ctx->re++;
} }
result = tre_ast_new_literal(ctx->mem, (int)val, result = tre_ast_new_literal(ctx->mem, (int)val, (int)val, ctx->position);
(int)val, ctx->position);
ctx->position++; ctx->position++;
break; break;
} }
@ -1552,8 +1523,7 @@ parse_brace:
return REG_EBRACE; return REG_EBRACE;
} }
result = tre_ast_new_literal(ctx->mem, (int)val, (int)val, result = tre_ast_new_literal(ctx->mem, (int)val, (int)val, ctx->position);
ctx->position);
ctx->position++; ctx->position++;
break; break;
} }
@ -1564,12 +1534,9 @@ parse_brace:
{ {
/* Back reference. */ /* Back reference. */
int val = *ctx->re - QSE_T('0'); int val = *ctx->re - QSE_T('0');
DPRINT(("tre_parse: backref: '%.*" STRF "'\n", DPRINT(("tre_parse: backref: '%.*" STRF "'\n", REST(ctx->re - 1)));
REST(ctx->re - 1))); result = tre_ast_new_literal(ctx->mem, BACKREF, val, ctx->position);
result = tre_ast_new_literal(ctx->mem, BACKREF, val, if (result == NULL) return REG_ESPACE;
ctx->position);
if (result == NULL)
return REG_ESPACE;
ctx->position++; ctx->position++;
ctx->max_backref = MAX(val, ctx->max_backref); ctx->max_backref = MAX(val, ctx->max_backref);
ctx->re++; ctx->re++;
@ -1577,8 +1544,7 @@ parse_brace:
else else
{ {
/* Escaped character. */ /* Escaped character. */
DPRINT(("tre_parse: escaped: '%.*" STRF "'\n", DPRINT(("tre_parse: escaped: '%.*" STRF "'\n", REST(ctx->re - 1)));
REST(ctx->re - 1)));
result = tre_ast_new_literal(ctx->mem, *ctx->re, *ctx->re, ctx->position); result = tre_ast_new_literal(ctx->mem, *ctx->re, *ctx->re, ctx->position);
ctx->position++; ctx->position++;
ctx->re++; ctx->re++;
@ -1596,25 +1562,20 @@ parse_brace:
{ {
tre_ast_node_t *tmp1; tre_ast_node_t *tmp1;
tre_ast_node_t *tmp2; tre_ast_node_t *tmp2;
tmp1 = tre_ast_new_literal(ctx->mem, 0, QSE_T('\n') - 1, /* exclude new line */
ctx->position); tmp1 = tre_ast_new_literal(ctx->mem, 0, QSE_T('\n') - 1, ctx->position);
if (!tmp1) if (!tmp1) return REG_ESPACE;
return REG_ESPACE; tmp2 = tre_ast_new_literal(ctx->mem, QSE_T('\n') + 1, TRE_CHAR_MAX, ctx->position + 1);
tmp2 = tre_ast_new_literal(ctx->mem, QSE_T('\n') + 1, TRE_CHAR_MAX, if (!tmp2) return REG_ESPACE;
ctx->position + 1);
if (!tmp2)
return REG_ESPACE;
result = tre_ast_new_union(ctx->mem, tmp1, tmp2); result = tre_ast_new_union(ctx->mem, tmp1, tmp2);
if (!result) if (!result) return REG_ESPACE;
return REG_ESPACE;
ctx->position += 2; ctx->position += 2;
} }
else else
{ {
result = tre_ast_new_literal(ctx->mem, 0, TRE_CHAR_MAX, /* all characters */
ctx->position); result = tre_ast_new_literal(ctx->mem, 0, TRE_CHAR_MAX, ctx->position);
if (!result) if (!result) return REG_ESPACE;
return REG_ESPACE;
ctx->position++; ctx->position++;
} }
ctx->re++; ctx->re++;
@ -1631,10 +1592,8 @@ parse_brace:
{ {
DPRINT(("tre_parse: BOL: '%.*" STRF "'\n", DPRINT(("tre_parse: BOL: '%.*" STRF "'\n",
REST(ctx->re))); REST(ctx->re)));
result = tre_ast_new_literal(ctx->mem, ASSERTION, result = tre_ast_new_literal(ctx->mem, ASSERTION, ASSERT_AT_BOL, -1);
ASSERT_AT_BOL, -1); if (result == NULL) return REG_ESPACE;
if (result == NULL)
return REG_ESPACE;
ctx->re++; ctx->re++;
} }
else else
@ -1652,8 +1611,7 @@ parse_brace:
{ {
DPRINT(("tre_parse: EOL: '%.*" STRF "'\n", DPRINT(("tre_parse: EOL: '%.*" STRF "'\n",
REST(ctx->re))); REST(ctx->re)));
result = tre_ast_new_literal(ctx->mem, ASSERTION, result = tre_ast_new_literal(ctx->mem, ASSERTION, ASSERT_AT_EOL, -1);
ASSERT_AT_EOL, -1);
if (result == NULL) if (result == NULL)
return REG_ESPACE; return REG_ESPACE;
ctx->re++; ctx->re++;
@ -1668,8 +1626,7 @@ parse_literal:
if (temporary_cflags && ctx->re + 1 < ctx->re_end if (temporary_cflags && ctx->re + 1 < ctx->re_end
&& *ctx->re == CHAR_BACKSLASH && *(ctx->re + 1) == QSE_T('E')) && *ctx->re == CHAR_BACKSLASH && *(ctx->re + 1) == QSE_T('E'))
{ {
DPRINT(("tre_parse: end tmps: '%.*" STRF "'\n", DPRINT(("tre_parse: end tmps: '%.*" STRF "'\n", REST(ctx->re)));
REST(ctx->re)));
ctx->cflags &= ~temporary_cflags; ctx->cflags &= ~temporary_cflags;
temporary_cflags = 0; temporary_cflags = 0;
ctx->re += 2; ctx->re += 2;
@ -1701,11 +1658,9 @@ parse_literal:
&& *ctx->re == CHAR_BACKSLASH && *ctx->re == CHAR_BACKSLASH
&& *(ctx->re + 1) == CHAR_LBRACE))) && *(ctx->re + 1) == CHAR_LBRACE)))
{ {
DPRINT(("tre_parse: empty: '%.*" STRF "'\n", DPRINT(("tre_parse: empty: '%.*" STRF "'\n", REST(ctx->re)));
REST(ctx->re)));
result = tre_ast_new_literal(ctx->mem, EMPTY, -1, -1); result = tre_ast_new_literal(ctx->mem, EMPTY, -1, -1);
if (!result) if (!result) return REG_ESPACE;
return REG_ESPACE;
break; break;
} }
@ -1714,8 +1669,7 @@ parse_literal:
/* Note that we can't use an tre_isalpha() test here, since there /* Note that we can't use an tre_isalpha() test here, since there
may be characters which are alphabetic but neither upper or may be characters which are alphabetic but neither upper or
lower case. */ lower case. */
if (ctx->cflags & REG_ICASE if (ctx->cflags & REG_ICASE && (tre_isupper(*ctx->re) || tre_islower(*ctx->re)))
&& (tre_isupper(*ctx->re) || tre_islower(*ctx->re)))
{ {
tre_ast_node_t *tmp1; tre_ast_node_t *tmp1;
tre_ast_node_t *tmp2; tre_ast_node_t *tmp2;
@ -1728,26 +1682,17 @@ parse_literal:
that at least for multi-character collating elements there that at least for multi-character collating elements there
could be several opposite-case counterpoints, but they could be several opposite-case counterpoints, but they
cannot be supported portably anyway. */ cannot be supported portably anyway. */
tmp1 = tre_ast_new_literal(ctx->mem, tre_toupper(*ctx->re), tmp1 = tre_ast_new_literal(ctx->mem, tre_toupper(*ctx->re), tre_toupper(*ctx->re), ctx->position);
tre_toupper(*ctx->re), if (!tmp1) return REG_ESPACE;
ctx->position); tmp2 = tre_ast_new_literal(ctx->mem, tre_tolower(*ctx->re), tre_tolower(*ctx->re), ctx->position);
if (!tmp1) if (!tmp2) return REG_ESPACE;
return REG_ESPACE;
tmp2 = tre_ast_new_literal(ctx->mem, tre_tolower(*ctx->re),
tre_tolower(*ctx->re),
ctx->position);
if (!tmp2)
return REG_ESPACE;
result = tre_ast_new_union(ctx->mem, tmp1, tmp2); result = tre_ast_new_union(ctx->mem, tmp1, tmp2);
if (!result) if (!result) return REG_ESPACE;
return REG_ESPACE;
} }
else else
{ {
result = tre_ast_new_literal(ctx->mem, *ctx->re, *ctx->re, result = tre_ast_new_literal(ctx->mem, *ctx->re, *ctx->re, ctx->position);
ctx->position); if (!result) return REG_ESPACE;
if (!result)
return REG_ESPACE;
} }
ctx->position++; ctx->position++;
ctx->re++; ctx->re++;
@ -1763,11 +1708,9 @@ parse_literal:
{ {
tre_ast_node_t *n, *tmp_node; tre_ast_node_t *n, *tmp_node;
n = tre_ast_new_literal(ctx->mem, EMPTY, -1, -1); n = tre_ast_new_literal(ctx->mem, EMPTY, -1, -1);
if (n == NULL) if (n == NULL) return REG_ESPACE;
return REG_ESPACE;
tmp_node = tre_ast_new_catenation(ctx->mem, n, result); tmp_node = tre_ast_new_catenation(ctx->mem, n, result);
if (tmp_node == NULL) if (tmp_node == NULL) return REG_ESPACE;
return REG_ESPACE;
tmp_node->num_submatches = result->num_submatches; tmp_node->num_submatches = result->num_submatches;
result = tmp_node; result = tmp_node;
} }

View File

@ -205,15 +205,6 @@ static int tre_match(
if (tnfa->have_backrefs || (eflags & REG_BACKTRACKING_MATCHER)) if (tnfa->have_backrefs || (eflags & REG_BACKTRACKING_MATCHER))
{ {
/* The regex has back references, use the backtracking matcher. */ /* The regex has back references, use the backtracking matcher. */
if (type == STR_USER)
{
const tre_str_source *source = string;
if (source->rewind == QSE_NULL || source->compare == QSE_NULL)
/* The backtracking matcher requires rewind and compare
capabilities from the input stream. */
return REG_BADPAT;
}
status = tre_tnfa_run_backtrack ( status = tre_tnfa_run_backtrack (
preg->mmgr, tnfa, string, (int)len, type, preg->mmgr, tnfa, string, (int)len, type,
tags, eflags, &eo); tags, eflags, &eo);
@ -266,15 +257,6 @@ int qse_tre_exec (
return qse_tre_execx (tre, str, (qse_size_t)-1, pmatch, nmatch, eflags); return qse_tre_execx (tre, str, (qse_size_t)-1, pmatch, nmatch, eflags);
} }
#if 0
int qse_tre_execsrc (
const regex_t *preg, const tre_str_source *str,
qse_size_t nmatch, regmatch_t pmatch[], int eflags)
{
return tre_match (preg, str, (unsigned)-1, STR_USER, nmatch, pmatch, eflags);
}
#endif
qse_tre_errnum_t qse_tre_geterrnum (qse_tre_t* tre) qse_tre_errnum_t qse_tre_geterrnum (qse_tre_t* tre)
{ {
return tre->errnum; return tre->errnum;

View File

@ -177,7 +177,6 @@ typedef qse_cint_t tre_cint_t;
#define regex_t qse_tre_t #define regex_t qse_tre_t
#define regmatch_t qse_tre_match_t #define regmatch_t qse_tre_match_t
#define reg_errcode_t qse_tre_errnum_t #define reg_errcode_t qse_tre_errnum_t
#define tre_str_source qse_tre_strsrc_t
#define REG_OK QSE_TRE_ENOERR #define REG_OK QSE_TRE_ENOERR
@ -278,7 +277,7 @@ typedef qse_pma_t* tre_mem_t;
typedef qse_ctype_t tre_ctype_t; typedef qse_ctype_t tre_ctype_t;
#define tre_isctype(c,t) QSE_ISCTYPE(c,t) #define tre_isctype(c,t) QSE_ISCTYPE(c,t)
typedef enum { STR_WIDE, STR_BYTE, STR_MBS, STR_USER } tre_str_type_t; typedef enum { STR_WIDE, STR_BYTE, STR_MBS } tre_str_type_t;
/* Returns number of bytes to add to (char *)ptr to make it /* Returns number of bytes to add to (char *)ptr to make it
properly aligned for the type. */ properly aligned for the type. */
@ -305,6 +304,9 @@ typedef struct tnfa_transition tre_tnfa_transition_t;
struct tnfa_transition struct tnfa_transition
{ {
/* Range of accepted characters. */ /* Range of accepted characters. */
/* QSE indicate that code_min .. code_max is not yet negated for ^ in a bracket */
int negate_range;
/* END QSE */
tre_cint_t code_min; tre_cint_t code_min;
tre_cint_t code_max; tre_cint_t code_max;
/* Pointer to the destination state. */ /* Pointer to the destination state. */