changed awk to handle IGNORECASE with a regular expression engine that doesn't have a run-time option but has a compile-time option

This commit is contained in:
hyung-hwan 2013-08-23 15:19:29 +00:00
parent 47677ca566
commit d841c9f62f
21 changed files with 1127 additions and 1236 deletions

View File

@ -110,21 +110,21 @@ typedef struct qse_awk_loc_t qse_awk_loc_t;
* Three common fields are: * Three common fields are:
* - type - type of a value from #qse_awk_val_type_t * - type - type of a value from #qse_awk_val_type_t
* - ref - reference count * - ref - reference count
* - nstr - numeric string marker * - stat - static value
* - nstr - numeric string marker, 1 -> long, 2 -> real
*/ */
#if QSE_SIZEOF_INT == 2 /*
# define QSE_AWK_VAL_HDR \ #define QSE_AWK_VAL_HDR \
unsigned int type: 3; \ unsigned int type: 3; \
unsigned int ref: 10; \ unsigned int ref: 26; \
unsigned int stat: 1; \ unsigned int stat: 1; \
unsigned int nstr: 2 unsigned int nstr: 2;
#else */
# define QSE_AWK_VAL_HDR \ #define QSE_AWK_VAL_HDR \
unsigned int type: 3; \ qse_uintptr_t type: 3; \
unsigned int ref: 26; \ qse_uintptr_t ref: ((QSE_SIZEOF_UINTPTR_T * 8) - 6); \
unsigned int stat: 1; \ qse_uintptr_t stat: 1; \
unsigned int nstr: 2 qse_uintptr_t nstr: 2;
#endif
/** /**
* The qse_awk_val_t type is an abstract value type. A value commonly contains: * The qse_awk_val_t type is an abstract value type. A value commonly contains:
@ -191,7 +191,7 @@ struct qse_awk_val_rex_t
{ {
QSE_AWK_VAL_HDR; QSE_AWK_VAL_HDR;
qse_xstr_t str; qse_xstr_t str;
void* code; void* code[2];
}; };
typedef struct qse_awk_val_rex_t qse_awk_val_rex_t; typedef struct qse_awk_val_rex_t qse_awk_val_rex_t;
@ -2388,7 +2388,7 @@ QSE_EXPORT qse_awk_val_t* qse_awk_rtx_makenstrvalwithcstr (
QSE_EXPORT qse_awk_val_t* qse_awk_rtx_makerexval ( QSE_EXPORT qse_awk_val_t* qse_awk_rtx_makerexval (
qse_awk_rtx_t* rtx, qse_awk_rtx_t* rtx,
const qse_cstr_t* str, const qse_cstr_t* str,
void* code void* code[2]
); );
/** /**

View File

@ -99,15 +99,6 @@ enum qse_tre_eflag_t
QSE_TRE_NOTEOL = (1 << 2) QSE_TRE_NOTEOL = (1 << 2)
}; };
typedef struct qse_tre_strsrc_t qse_tre_strsrc_t;
struct qse_tre_strsrc_t
{
int (*get_next_char) (qse_char_t *c, unsigned int* pos_add, void* context);
void (*rewind)(qse_size_t pos, void *context);
int (*compare)(qse_size_t pos1, qse_size_t pos2, qse_size_t len, void* context);
void* context;
};
#ifdef __cplusplus #ifdef __cplusplus
extern "C" { extern "C" {
#endif #endif

View File

@ -307,8 +307,8 @@ struct qse_awk_rtx_t
struct struct
{ {
void* rs; void* rs[2];
void* fs; void* fs[2];
int ignorecase; int ignorecase;
qse_long_t nr; qse_long_t nr;

View File

@ -624,7 +624,7 @@ static int fnc_substr (qse_awk_rtx_t* rtx, const qse_awk_fnc_info_t* fi)
return 0; return 0;
} }
static int fnc_split (qse_awk_rtx_t* run, const qse_awk_fnc_info_t* fi) static int fnc_split (qse_awk_rtx_t* rtx, const qse_awk_fnc_info_t* fi)
{ {
qse_size_t nargs; qse_size_t nargs;
qse_awk_val_t* a0, * a1, * a2, * t1, * t2; qse_awk_val_t* a0, * a1, * a2, * t1, * t2;
@ -642,12 +642,12 @@ static int fnc_split (qse_awk_rtx_t* run, const qse_awk_fnc_info_t* fi)
qse_awk_errnum_t errnum; qse_awk_errnum_t errnum;
int x; int x;
nargs = qse_awk_rtx_getnargs (run); nargs = qse_awk_rtx_getnargs (rtx);
QSE_ASSERT (nargs >= 2 && nargs <= 3); QSE_ASSERT (nargs >= 2 && nargs <= 3);
a0 = qse_awk_rtx_getarg (run, 0); a0 = qse_awk_rtx_getarg (rtx, 0);
a1 = qse_awk_rtx_getarg (run, 1); a1 = qse_awk_rtx_getarg (rtx, 1);
a2 = (nargs >= 3)? qse_awk_rtx_getarg (run, 2): QSE_NULL; a2 = (nargs >= 3)? qse_awk_rtx_getarg (rtx, 2): QSE_NULL;
QSE_ASSERT (a1->type == QSE_AWK_VAL_REF); QSE_ASSERT (a1->type == QSE_AWK_VAL_REF);
@ -658,7 +658,7 @@ static int fnc_split (qse_awk_rtx_t* run, const qse_awk_fnc_info_t* fi)
} }
else else
{ {
str.ptr = qse_awk_rtx_valtostrdup (run, a0, &str.len); str.ptr = qse_awk_rtx_valtostrdup (rtx, a0, &str.len);
if (str.ptr == QSE_NULL) return -1; if (str.ptr == QSE_NULL) return -1;
str_free = (qse_char_t*)str.ptr; str_free = (qse_char_t*)str.ptr;
} }
@ -666,7 +666,7 @@ static int fnc_split (qse_awk_rtx_t* run, const qse_awk_fnc_info_t* fi)
if (a2 == QSE_NULL) if (a2 == QSE_NULL)
{ {
/* get the value from FS */ /* get the value from FS */
t1 = qse_awk_rtx_getgbl (run, QSE_AWK_GBL_FS); t1 = qse_awk_rtx_getgbl (rtx, QSE_AWK_GBL_FS);
if (t1->type == QSE_AWK_VAL_NIL) if (t1->type == QSE_AWK_VAL_NIL)
{ {
fs.ptr = QSE_T(" "); fs.ptr = QSE_T(" ");
@ -679,17 +679,17 @@ static int fnc_split (qse_awk_rtx_t* run, const qse_awk_fnc_info_t* fi)
} }
else else
{ {
fs.ptr = qse_awk_rtx_valtostrdup (run, t1, &fs.len); fs.ptr = qse_awk_rtx_valtostrdup (rtx, t1, &fs.len);
if (fs.ptr == QSE_NULL) goto oops; if (fs.ptr == QSE_NULL) goto oops;
fs_free = (qse_char_t*)fs.ptr; fs_free = (qse_char_t*)fs.ptr;
} }
if (fs.len > 1) fs_rex = run->gbl.fs; if (fs.len > 1) fs_rex = rtx->gbl.fs[rtx->gbl.ignorecase];
} }
else if (a2->type == QSE_AWK_VAL_REX) else if (a2->type == QSE_AWK_VAL_REX)
{ {
/* the third parameter is a regular expression */ /* the third parameter is a regular expression */
fs_rex = ((qse_awk_val_rex_t*)a2)->code; fs_rex = ((qse_awk_val_rex_t*)a2)->code[rtx->gbl.ignorecase];
/* make the loop below to take fs_rex by /* make the loop below to take fs_rex by
* setting fs_len greater than 1*/ * setting fs_len greater than 1*/
@ -705,30 +705,36 @@ static int fnc_split (qse_awk_rtx_t* run, const qse_awk_fnc_info_t* fi)
} }
else else
{ {
fs.ptr = qse_awk_rtx_valtostrdup (run, a2, &fs.len); fs.ptr = qse_awk_rtx_valtostrdup (rtx, a2, &fs.len);
if (fs.ptr == QSE_NULL) goto oops; if (fs.ptr == QSE_NULL) goto oops;
fs_free = (qse_char_t*)fs.ptr; fs_free = (qse_char_t*)fs.ptr;
} }
if (fs.len > 1) if (fs.len > 1)
{ {
fs_rex = qse_awk_buildrex ( int x;
run->awk, fs.ptr, fs.len, &errnum);
if (fs_rex == QSE_NULL) if (rtx->gbl.ignorecase)
x = qse_awk_buildrex (rtx->awk, fs.ptr, fs.len, &errnum, QSE_NULL, &fs_rex);
else
x = qse_awk_buildrex (rtx->awk, fs.ptr, fs.len, &errnum, &fs_rex, QSE_NULL);
if (x <= -1)
{ {
qse_awk_rtx_seterrnum (run, errnum, QSE_NULL); qse_awk_rtx_seterrnum (rtx, errnum, QSE_NULL);
goto oops; goto oops;
} }
fs_rex_free = fs_rex; fs_rex_free = fs_rex;
} }
} }
t1 = qse_awk_rtx_makemapval (run); t1 = qse_awk_rtx_makemapval (rtx);
if (t1 == QSE_NULL) goto oops; if (t1 == QSE_NULL) goto oops;
qse_awk_rtx_refupval (run, t1); qse_awk_rtx_refupval (rtx, t1);
x = qse_awk_rtx_setrefval (run, a1, t1); x = qse_awk_rtx_setrefval (rtx, (qse_awk_val_ref_t*)a1, t1);
qse_awk_rtx_refdownval (run, t1); qse_awk_rtx_refdownval (rtx, t1);
if (x <= -1) goto oops; if (x <= -1) goto oops;
/* fill the map with actual values */ /* fill the map with actual values */
@ -742,18 +748,18 @@ static int fnc_split (qse_awk_rtx_t* run, const qse_awk_fnc_info_t* fi)
if (fs.len <= 1) if (fs.len <= 1)
{ {
p = qse_awk_rtx_strxntok (run, p = qse_awk_rtx_strxntok (rtx,
p, str.len, fs.ptr, fs.len, &tok); p, str.len, fs.ptr, fs.len, &tok);
} }
else else
{ {
p = qse_awk_rtx_strxntokbyrex ( p = qse_awk_rtx_strxntokbyrex (
run, str.ptr, org_len, p, str.len, rtx, str.ptr, org_len, p, str.len,
fs_rex, &tok, &errnum fs_rex, &tok, &errnum
); );
if (p == QSE_NULL && errnum != QSE_AWK_ENOERR) if (p == QSE_NULL && errnum != QSE_AWK_ENOERR)
{ {
qse_awk_rtx_seterrnum (run, errnum, QSE_NULL); qse_awk_rtx_seterrnum (rtx, errnum, QSE_NULL);
goto oops; goto oops;
} }
} }
@ -768,42 +774,54 @@ static int fnc_split (qse_awk_rtx_t* run, const qse_awk_fnc_info_t* fi)
/* create the field string - however, the split function must /* create the field string - however, the split function must
* create a numeric string if the string is a number */ * create a numeric string if the string is a number */
/*t2 = qse_awk_rtx_makestrvalwithcstr (run, &tok);*/ /*t2 = qse_awk_rtx_makestrvalwithcstr (rtx, &tok);*/
t2 = qse_awk_rtx_makenstrvalwithcstr (run, &tok); t2 = qse_awk_rtx_makenstrvalwithcstr (rtx, &tok);
if (t2 == QSE_NULL) goto oops; if (t2 == QSE_NULL) goto oops;
/* put it into the map */ /* put it into the map */
key_len = qse_awk_longtostr ( key_len = qse_awk_longtostr (
run->awk, ++nflds, 10, QSE_NULL, key_buf, QSE_COUNTOF(key_buf)); rtx->awk, ++nflds, 10, QSE_NULL, key_buf, QSE_COUNTOF(key_buf));
QSE_ASSERT (key_len != (qse_size_t)-1); QSE_ASSERT (key_len != (qse_size_t)-1);
if (qse_awk_rtx_setmapvalfld ( if (qse_awk_rtx_setmapvalfld (
run, t1, key_buf, key_len, t2) == QSE_NULL) rtx, t1, key_buf, key_len, t2) == QSE_NULL)
{ {
qse_awk_rtx_refupval (run, t2); qse_awk_rtx_refupval (rtx, t2);
qse_awk_rtx_refdownval (run, t2); qse_awk_rtx_refdownval (rtx, t2);
goto oops; goto oops;
} }
str.len = str_left - (p - str.ptr); str.len = str_left - (p - str.ptr);
} }
if (str_free) QSE_AWK_FREE (run->awk, str_free); if (str_free) QSE_AWK_FREE (rtx->awk, str_free);
if (fs_free) QSE_AWK_FREE (run->awk, fs_free); if (fs_free) QSE_AWK_FREE (rtx->awk, fs_free);
if (fs_rex_free) qse_awk_freerex (run->awk, fs_rex_free); if (fs_rex_free)
{
if (rtx->gbl.ignorecase)
qse_awk_freerex (rtx->awk, QSE_NULL, fs_rex_free);
else
qse_awk_freerex (rtx->awk, fs_rex_free, QSE_NULL);
}
/*nflds--;*/ /*nflds--;*/
t1 = qse_awk_rtx_makeintval (run, nflds); t1 = qse_awk_rtx_makeintval (rtx, nflds);
if (t1 == QSE_NULL) return -1; if (t1 == QSE_NULL) return -1;
qse_awk_rtx_setretval (run, t1); qse_awk_rtx_setretval (rtx, t1);
return 0; return 0;
oops: oops:
if (str_free) QSE_AWK_FREE (run->awk, str_free); if (str_free) QSE_AWK_FREE (rtx->awk, str_free);
if (fs_free) QSE_AWK_FREE (run->awk, fs_free); if (fs_free) QSE_AWK_FREE (rtx->awk, fs_free);
if (fs_rex_free) qse_awk_freerex (run->awk, fs_rex_free); if (fs_rex_free)
{
if (rtx->gbl.ignorecase)
qse_awk_freerex (rtx->awk, QSE_NULL, fs_rex_free);
else
qse_awk_freerex (rtx->awk, fs_rex_free, QSE_NULL);
}
return -1; return -1;
} }
@ -832,7 +850,7 @@ static int fnc_tolower (qse_awk_rtx_t* run, const qse_awk_fnc_info_t* fi)
for (i = 0; i < str.len; i++) str.ptr[i] = QSE_AWK_TOLOWER (run->awk, str.ptr[i]); for (i = 0; i < str.len; i++) str.ptr[i] = QSE_AWK_TOLOWER (run->awk, str.ptr[i]);
r = qse_awk_rtx_makestrvalwithcstr (run, &str); r = qse_awk_rtx_makestrvalwithcstr (run, (qse_cstr_t*)&str);
if (r == QSE_NULL) if (r == QSE_NULL)
{ {
if (a0->type != QSE_AWK_VAL_STR) QSE_AWK_FREE (run->awk, str.ptr); if (a0->type != QSE_AWK_VAL_STR) QSE_AWK_FREE (run->awk, str.ptr);
@ -869,7 +887,7 @@ static int fnc_toupper (qse_awk_rtx_t* run, const qse_awk_fnc_info_t* fi)
for (i = 0; i < str.len; i++) str.ptr[i] = QSE_AWK_TOUPPER (run->awk, str.ptr[i]); for (i = 0; i < str.len; i++) str.ptr[i] = QSE_AWK_TOUPPER (run->awk, str.ptr[i]);
r = qse_awk_rtx_makestrvalwithcstr (run, &str); r = qse_awk_rtx_makestrvalwithcstr (run, (qse_cstr_t*)&str);
if (r == QSE_NULL) if (r == QSE_NULL)
{ {
if (a0->type != QSE_AWK_VAL_STR) QSE_AWK_FREE (run->awk, str.ptr); if (a0->type != QSE_AWK_VAL_STR) QSE_AWK_FREE (run->awk, str.ptr);
@ -881,11 +899,10 @@ static int fnc_toupper (qse_awk_rtx_t* run, const qse_awk_fnc_info_t* fi)
return 0; return 0;
} }
static int __substitute (qse_awk_rtx_t* run, qse_long_t max_count) static int __substitute (qse_awk_rtx_t* run, qse_long_t max_count)
{ {
qse_size_t nargs; qse_size_t nargs;
qse_awk_val_t* a0, * a1, * a2, ** a2_ref, * v; qse_awk_val_t* a0, * a1, * a2, * v;
qse_cstr_t s0, s1, s2; qse_cstr_t s0, s1, s2;
const qse_char_t* s2_end; const qse_char_t* s2_end;
@ -898,7 +915,7 @@ static int __substitute (qse_awk_rtx_t* run, qse_long_t max_count)
void* rex_free = QSE_NULL; void* rex_free = QSE_NULL;
qse_str_t new; qse_str_t new;
int new_inited = 0, opt; int new_inited = 0;
qse_cstr_t mat, pmat, cur; qse_cstr_t mat, pmat, cur;
@ -915,7 +932,7 @@ static int __substitute (qse_awk_rtx_t* run, qse_long_t max_count)
if (a0->type == QSE_AWK_VAL_REX) if (a0->type == QSE_AWK_VAL_REX)
{ {
rex = ((qse_awk_val_rex_t*)a0)->code; rex = ((qse_awk_val_rex_t*)a0)->code[run->gbl.ignorecase];
} }
else if (a0->type == QSE_AWK_VAL_STR) else if (a0->type == QSE_AWK_VAL_STR)
{ {
@ -964,10 +981,14 @@ static int __substitute (qse_awk_rtx_t* run, qse_long_t max_count)
if (a0->type != QSE_AWK_VAL_REX) if (a0->type != QSE_AWK_VAL_REX)
{ {
qse_awk_errnum_t errnum; qse_awk_errnum_t errnum;
int x;
rex = qse_awk_buildrex ( if (run->gbl.ignorecase)
run->awk, s0.ptr, s0.len, &errnum); x = qse_awk_buildrex (run->awk, s0.ptr, s0.len, &errnum, QSE_NULL, &rex);
if (rex == QSE_NULL) else
x = qse_awk_buildrex (run->awk, s0.ptr, s0.len, &errnum, &rex, QSE_NULL);
if (x <= -1)
{ {
qse_awk_rtx_seterrnum (run, errnum, QSE_NULL); qse_awk_rtx_seterrnum (run, errnum, QSE_NULL);
goto oops; goto oops;
@ -976,8 +997,6 @@ static int __substitute (qse_awk_rtx_t* run, qse_long_t max_count)
rex_free = rex; rex_free = rex;
} }
opt = (run->gbl.ignorecase)? QSE_REX_IGNORECASE: 0;
s2_end = s2.ptr + s2.len; s2_end = s2.ptr + s2.len;
cur.ptr = s2.ptr; cur.ptr = s2.ptr;
cur.len = s2.len; cur.len = s2.len;
@ -997,7 +1016,8 @@ static int __substitute (qse_awk_rtx_t* run, qse_long_t max_count)
if (max_count == 0 || sub_count < max_count) if (max_count == 0 || sub_count < max_count)
{ {
n = qse_awk_matchrex ( n = qse_awk_matchrex (
run->awk, rex, opt, &s2, &cur, &mat, &errnum run->awk, rex, run->gbl.ignorecase,
&s2, &cur, &mat, &errnum
); );
} }
else n = 0; else n = 0;
@ -1085,7 +1105,10 @@ static int __substitute (qse_awk_rtx_t* run, qse_long_t max_count)
if (rex_free) if (rex_free)
{ {
qse_awk_freerex (run->awk, rex_free); if (run->gbl.ignorecase)
qse_awk_freerex (run->awk, QSE_NULL, rex_free);
else
qse_awk_freerex (run->awk, rex_free, QSE_NULL);
rex_free = QSE_NULL; rex_free = QSE_NULL;
} }
@ -1104,7 +1127,7 @@ static int __substitute (qse_awk_rtx_t* run, qse_long_t max_count)
v = qse_awk_rtx_makestrvalwithcstr (run, QSE_STR_CSTR(&new)); v = qse_awk_rtx_makestrvalwithcstr (run, QSE_STR_CSTR(&new));
if (v == QSE_NULL) goto oops; if (v == QSE_NULL) goto oops;
qse_awk_rtx_refupval (run, v); qse_awk_rtx_refupval (run, v);
n = qse_awk_rtx_setrefval (run, a2, v); n = qse_awk_rtx_setrefval (run, (qse_awk_val_ref_t*)a2, v);
qse_awk_rtx_refdownval (run, v); qse_awk_rtx_refdownval (run, v);
if (n <= -1) goto oops; if (n <= -1) goto oops;
} }
@ -1123,7 +1146,13 @@ static int __substitute (qse_awk_rtx_t* run, qse_long_t max_count)
return 0; return 0;
oops: oops:
if (rex_free) qse_awk_freerex (run->awk, rex_free); if (rex_free)
{
if (run->gbl.ignorecase)
qse_awk_freerex (run->awk, QSE_NULL, rex_free);
else
qse_awk_freerex (run->awk, rex_free, QSE_NULL);
}
if (new_inited) qse_str_fini (&new); if (new_inited) qse_str_fini (&new);
if (s2_free) QSE_AWK_FREE (run->awk, s2_free); if (s2_free) QSE_AWK_FREE (run->awk, s2_free);
if (s1_free) QSE_AWK_FREE (run->awk, s1_free); if (s1_free) QSE_AWK_FREE (run->awk, s1_free);
@ -1145,13 +1174,11 @@ static int fnc_match (qse_awk_rtx_t* rtx, const qse_awk_fnc_info_t* fi)
{ {
qse_size_t nargs; qse_size_t nargs;
qse_awk_val_t* a0, * a1; qse_awk_val_t* a0, * a1;
qse_char_t* str0, * str1; qse_char_t* str0;
qse_size_t len0, len1; qse_size_t len0;
qse_long_t idx, start = 1; qse_long_t idx, start = 1;
void* rex;
int n; int n;
qse_cstr_t mat; qse_cstr_t mat;
qse_awk_errnum_t errnum;
nargs = qse_awk_rtx_getnargs (rtx); nargs = qse_awk_rtx_getnargs (rtx);
QSE_ASSERT (nargs >= 2 && nargs <= 3); QSE_ASSERT (nargs >= 2 && nargs <= 3);
@ -1195,42 +1222,6 @@ static int fnc_match (qse_awk_rtx_t* rtx, const qse_awk_fnc_info_t* fi)
if (str0 == QSE_NULL) return -1; if (str0 == QSE_NULL) return -1;
} }
if (a1->type == QSE_AWK_VAL_REX)
{
rex = ((qse_awk_val_rex_t*)a1)->code;
}
else
{
qse_awk_errnum_t errnum;
if (a1->type == QSE_AWK_VAL_STR)
{
str1 = ((qse_awk_val_str_t*)a1)->val.ptr;
len1 = ((qse_awk_val_str_t*)a1)->val.len;
}
else
{
str1 = qse_awk_rtx_valtostrdup (rtx, a1, &len1);
if (str1 == QSE_NULL)
{
if (a0->type != QSE_AWK_VAL_STR)
QSE_AWK_FREE (rtx->awk, str0);
return -1;
}
}
rex = qse_awk_buildrex (rtx->awk, str1, len1, &errnum);
if (rex == QSE_NULL)
{
if (a0->type != QSE_AWK_VAL_STR)
QSE_AWK_FREE (rtx->awk, str0);
qse_awk_rtx_seterrnum (rtx, errnum, QSE_NULL);
return -1;
}
if (a1->type != QSE_AWK_VAL_STR) QSE_AWK_FREE (rtx->awk, str1);
}
if (start == 0) start = 1; if (start == 0) start = 1;
else if (start < 0) start = len0 + start + 1; else if (start < 0) start = len0 + start + 1;
@ -1239,24 +1230,15 @@ static int fnc_match (qse_awk_rtx_t* rtx, const qse_awk_fnc_info_t* fi)
{ {
qse_cstr_t tmp; qse_cstr_t tmp;
/*TODO: must use str0,len0?*/ /*TODO: must use str0,len0? */
tmp.ptr = str0 + start - 1; tmp.ptr = str0 + start - 1;
tmp.len = len0 - start + 1; tmp.len = len0 - start + 1;
n = qse_awk_matchrex (
rtx->awk, rex, n = qse_awk_rtx_matchrex (rtx, a1, &tmp, &tmp, &mat);
(rtx->gbl.ignorecase? QSE_REX_IGNORECASE: 0), if (n <= -1) return -1;
&tmp, &tmp, &mat, &errnum
);
} }
if (a0->type != QSE_AWK_VAL_STR) QSE_AWK_FREE (rtx->awk, str0); if (a0->type != QSE_AWK_VAL_STR) QSE_AWK_FREE (rtx->awk, str0);
if (a1->type != QSE_AWK_VAL_REX) qse_awk_freerex (rtx->awk, rex);
if (n <= -1)
{
qse_awk_rtx_seterrnum (rtx, errnum, QSE_NULL);
return -1;
}
idx = (n == 0)? 0: ((qse_long_t)(mat.ptr-str0) + 1); idx = (n == 0)? 0: ((qse_long_t)(mat.ptr-str0) + 1);
@ -1338,7 +1320,7 @@ static int fnc_sprintf (qse_awk_rtx_t* run, const qse_awk_fnc_info_t* fi)
if (a0->type != QSE_AWK_VAL_STR) QSE_AWK_FREE (run->awk, cs0.ptr); if (a0->type != QSE_AWK_VAL_STR) QSE_AWK_FREE (run->awk, cs0.ptr);
if (x.ptr == QSE_NULL) goto oops; if (x.ptr == QSE_NULL) goto oops;
a0 = qse_awk_rtx_makestrvalwithcstr (run, &x); a0 = qse_awk_rtx_makestrvalwithcstr (run, (qse_cstr_t*)&x);
if (a0 == QSE_NULL) goto oops; if (a0 == QSE_NULL) goto oops;
qse_str_fini (&fbu); qse_str_fini (&fbu);

View File

@ -20,7 +20,7 @@
#include "awk.h" #include "awk.h"
#define USE_REX /*#define USE_REX */
#if defined(USE_REX) #if defined(USE_REX)
# include <qse/cmn/rex.h> # include <qse/cmn/rex.h>
@ -892,8 +892,7 @@ qse_char_t* qse_awk_rtx_strxntokbyrex (
while (cursub.len > 0) while (cursub.len > 0)
{ {
n = qse_awk_matchrex ( n = qse_awk_matchrex (
rtx->awk, rex, rtx->awk, rex, rtx->gbl.ignorecase,
((rtx->gbl.ignorecase)? QSE_REX_IGNORECASE: 0),
&s, &cursub, &match, errnum); &s, &cursub, &match, errnum);
if (n == -1) return QSE_NULL; if (n == -1) return QSE_NULL;
if (n == 0) if (n == 0)
@ -1090,56 +1089,97 @@ static QSE_INLINE int rexerr_to_errnum (int err)
} }
} }
void* qse_awk_buildrex ( int qse_awk_buildrex (
qse_awk_t* awk, const qse_char_t* ptn, qse_awk_t* awk, const qse_char_t* ptn, qse_size_t len,
qse_size_t len, qse_awk_errnum_t* errnum) qse_awk_errnum_t* errnum, void** code, void** icode)
{ {
#if defined(USE_REX) #if defined(USE_REX)
qse_rex_errnum_t err; qse_rex_errnum_t err;
void* p; void* p;
p = qse_buildrex ( if (code || icode)
awk->mmgr, awk->opt.depth.s.rex_build, {
((awk->opt.trait & QSE_AWK_REXBOUND)? 0: QSE_REX_NOBOUND), p = qse_buildrex (
ptn, len, &err awk->mmgr, awk->opt.depth.s.rex_build,
); ((awk->opt.trait & QSE_AWK_REXBOUND)? 0: QSE_REX_NOBOUND),
if (p == QSE_NULL) *errnum = rexerr_to_errnum(err); ptn, len, &err
return p; );
if (p == QSE_NULL)
{
*errnum = rexerr_to_errnum(err);
return -1;
}
if (code) *code = p;
if (icode) *icode = p;
}
return 0;
#else #else
qse_tre_t* tre; qse_tre_t* tre = QSE_NULL;
qse_tre_t* itre = QSE_NULL;
int opt = QSE_TRE_EXTENDED; int opt = QSE_TRE_EXTENDED;
tre = qse_tre_open (awk->mmgr, 0); if (code)
if (tre == QSE_NULL)
{ {
*errnum = QSE_AWK_ENOMEM; tre = qse_tre_open (awk->mmgr, 0);
return QSE_NULL; if (tre == QSE_NULL)
} {
*errnum = QSE_AWK_ENOMEM;
return -1;
}
/* ignorecase is a compile option for TRE */ if (!(awk->opt.trait & QSE_AWK_REXBOUND)) opt |= QSE_TRE_NOBOUND;
#if 0 /* TODO */
if (ignorecase) opt |= QSE_TRE_IGNORECASE;
#endif
if (!(awk->opt.trait & QSE_AWK_REXBOUND)) opt |= QSE_TRE_NOBOUND;
if (qse_tre_compx (tre, ptn, len, QSE_NULL, opt) <= -1) if (qse_tre_compx (tre, ptn, len, QSE_NULL, opt) <= -1)
{ {
#if 0 /* TODO */ #if 0 /* TODO */
if (QSE_TRE_ERRNUM(tre) == QSE_TRE_ENOMEM) *errnum = QSE_AWK_ENOMEM; if (QSE_TRE_ERRNUM(tre) == QSE_TRE_ENOMEM) *errnum = QSE_AWK_ENOMEM;
else else
SETERR1 (awk, QSE_AWK_EREXBL, str->ptr, str->len, loc); SETERR1 (awk, QSE_AWK_EREXBL, str->ptr, str->len, loc);
#endif #endif
*errnum = (QSE_TRE_ERRNUM(tre) == QSE_TRE_ENOMEM)? *errnum = (QSE_TRE_ERRNUM(tre) == QSE_TRE_ENOMEM)?
QSE_AWK_ENOMEM: QSE_AWK_EREXBL; QSE_AWK_ENOMEM: QSE_AWK_EREXBL;
qse_tre_close (tre); qse_tre_close (tre);
return QSE_NULL; return -1;
}
} }
return tre; if (icode)
{
itre = qse_tre_open (awk->mmgr, 0);
if (itre == QSE_NULL)
{
if (tre) qse_tre_close (tre);
*errnum = QSE_AWK_ENOMEM;
return -1;
}
/* ignorecase is a compile option for TRE */
if (qse_tre_compx (itre, ptn, len, QSE_NULL, opt | QSE_TRE_IGNORECASE) <= -1)
{
#if 0 /* TODO */
if (QSE_TRE_ERRNUM(tre) == QSE_TRE_ENOMEM) *errnum = QSE_AWK_ENOMEM;
else
SETERR1 (awk, QSE_AWK_EREXBL, str->ptr, str->len, loc);
#endif
*errnum = (QSE_TRE_ERRNUM(tre) == QSE_TRE_ENOMEM)?
QSE_AWK_ENOMEM: QSE_AWK_EREXBL;
qse_tre_close (itre);
if (tre) qse_tre_close (tre);
return -1;
}
}
if (code) *code = tre;
if (icode) *icode = itre;
return 0;
#endif #endif
} }
#if !defined(USE_REX) #if !defined(USE_REX)
static int matchtre ( static int matchtre (
@ -1192,7 +1232,7 @@ static int matchtre (
#endif #endif
int qse_awk_matchrex ( int qse_awk_matchrex (
qse_awk_t* awk, void* code, int option, qse_awk_t* awk, void* code, int icase,
const qse_cstr_t* str, const qse_cstr_t* substr, const qse_cstr_t* str, const qse_cstr_t* substr,
qse_cstr_t* match, qse_awk_errnum_t* errnum) qse_cstr_t* match, qse_awk_errnum_t* errnum)
{ {
@ -1201,8 +1241,8 @@ int qse_awk_matchrex (
qse_rex_errnum_t err; qse_rex_errnum_t err;
x = qse_matchrex ( x = qse_matchrex (
awk->mmgr, awk->opt.depth.s.rex_match, awk->mmgr, awk->opt.depth.s.rex_match, code,
code, option, str, substr, match, &err); (icase? QSE_REX_IGNORECASE: 0), str, substr, match, &err);
if (x <= -1) *errnum = rexerr_to_errnum(err); if (x <= -1) *errnum = rexerr_to_errnum(err);
return x; return x;
#else #else
@ -1218,13 +1258,102 @@ int qse_awk_matchrex (
#endif #endif
} }
void qse_awk_freerex (qse_awk_t* awk, void* code) void qse_awk_freerex (qse_awk_t* awk, void* code, void* icode)
{ {
if (code)
{
#if defined(USE_REX) #if defined(USE_REX)
qse_freerex((awk)->mmgr,code); qse_freerex ((awk)->mmgr, code);
#else #else
qse_tre_close (code); qse_tre_close (code);
#endif #endif
}
if (icode && icode != code)
{
#if defined(USE_REX)
qse_freerex ((awk)->mmgr, icode);
#else
qse_tre_close (icode);
#endif
}
}
int qse_awk_rtx_matchrex (
qse_awk_rtx_t* rtx, qse_awk_val_t* val,
const qse_cstr_t* str, const qse_cstr_t* substr, qse_cstr_t* match)
{
void* code;
int icase, x;
qse_awk_errnum_t awkerr;
#if defined(USE_REX)
qse_rex_errnum_t rexerr;
#endif
icase = rtx->gbl.ignorecase;
if (val->type == QSE_AWK_VAL_REX)
{
code = ((qse_awk_val_rex_t*)val)->code[icase];
}
else if (val->type == QSE_AWK_VAL_STR)
{
/* build a regular expression */
qse_awk_val_str_t* strv = (qse_awk_val_str_t*)val;
x = icase? qse_awk_buildrex (rtx->awk, strv->val.ptr, strv->val.len, &awkerr, QSE_NULL, &code):
qse_awk_buildrex (rtx->awk, strv->val.ptr, strv->val.len, &awkerr, &code, QSE_NULL);
if (x <= -1)
{
qse_awk_rtx_seterrnum (rtx, awkerr, QSE_NULL);
return -1;
}
}
else
{
/* convert to a string and build a regular expression */
qse_xstr_t tmp;
tmp.ptr = qse_awk_rtx_valtostrdup (rtx, val, &tmp.len);
if (tmp.ptr == QSE_NULL) return -1;
x = icase? qse_awk_buildrex (rtx->awk, tmp.ptr, tmp.len, &awkerr, QSE_NULL, &code):
qse_awk_buildrex (rtx->awk, tmp.ptr, tmp.len, &awkerr, &code, QSE_NULL);
qse_awk_rtx_freemem (rtx, tmp.ptr);
if (x <= -1)
{
qse_awk_rtx_seterrnum (rtx, awkerr, QSE_NULL);
return -1;
}
}
#if defined(USE_REX)
x = qse_matchrex (
rtx->awk->mmgr, rtx->awk->opt.depth.s.rex_match,
code, (icase? QSE_REX_IGNORECASE: 0),
str, substr, match, &rexerr);
if (x <= -1) qse_awk_rtx_seterrnum (rtx, rexerr_to_errnum(rexerr), QSE_NULL);
#else
x = matchtre (
rtx->awk, code,
((str->ptr == substr->ptr)? QSE_TRE_BACKTRACKING: (QSE_TRE_BACKTRACKING | QSE_TRE_NOTBOL)),
substr, match, QSE_NULL, &awkerr
);
if (x <= -1) qse_awk_rtx_seterrnum (rtx, awkerr, QSE_NULL);
#endif
if (val->type == QSE_AWK_VAL_REX)
{
/* nothing to free */
}
else
{
if (icase)
qse_awk_freerex (rtx->awk, QSE_NULL, code);
else
qse_awk_freerex (rtx->awk, code, QSE_NULL);
}
return x;
} }
void* qse_awk_rtx_allocmem (qse_awk_rtx_t* rtx, qse_size_t size) void* qse_awk_rtx_allocmem (qse_awk_rtx_t* rtx, qse_size_t size)

View File

@ -63,20 +63,28 @@ qse_char_t* qse_awk_rtx_strxnfld (
qse_cstr_t* tok qse_cstr_t* tok
); );
void* qse_awk_buildrex ( int qse_awk_buildrex (
qse_awk_t* awk, qse_awk_t* awk,
const qse_char_t* ptn, const qse_char_t* ptn,
qse_size_t len, qse_size_t len,
qse_awk_errnum_t* errnum qse_awk_errnum_t* errnum,
void** code,
void** icode
); );
int qse_awk_matchrex ( int qse_awk_matchrex (
qse_awk_t* awk, void* code, int option, qse_awk_t* awk, void* code, int icase,
const qse_cstr_t* str, const qse_cstr_t* substr, const qse_cstr_t* str, const qse_cstr_t* substr,
qse_cstr_t* match, qse_awk_errnum_t* errnum qse_cstr_t* match, qse_awk_errnum_t* errnum
); );
void qse_awk_freerex (qse_awk_t* awk, void* code); void qse_awk_freerex (qse_awk_t* awk, void* code, void* icode);
int qse_awk_rtx_matchrex (
qse_awk_rtx_t* rtx, qse_awk_val_t* val,
const qse_cstr_t* str, const qse_cstr_t* substr,
qse_cstr_t* match
);
int qse_awk_sprintflt ( int qse_awk_sprintflt (
qse_awk_t* awk, qse_awk_t* awk,

View File

@ -4351,9 +4351,7 @@ static qse_awk_nde_t* parse_primary_rex (qse_awk_t* awk, const qse_awk_loc_t* x
nde->str.ptr = qse_awk_cstrdup (awk, QSE_STR_CSTR(awk->tok.name)); nde->str.ptr = qse_awk_cstrdup (awk, QSE_STR_CSTR(awk->tok.name));
if (nde->str.ptr == QSE_NULL) goto oops; if (nde->str.ptr == QSE_NULL) goto oops;
nde->code = qse_awk_buildrex ( if (qse_awk_buildrex (awk, QSE_STR_PTR(awk->tok.name), QSE_STR_LEN(awk->tok.name), &errnum, &nde->code[0], &nde->code[1]) <= -1)
awk, QSE_STR_PTR(awk->tok.name), QSE_STR_LEN(awk->tok.name), &errnum);
if (nde->code == QSE_NULL)
{ {
SETERR_LOC (awk, errnum, xloc); SETERR_LOC (awk, errnum, xloc);
goto oops; goto oops;
@ -4365,7 +4363,7 @@ static qse_awk_nde_t* parse_primary_rex (qse_awk_t* awk, const qse_awk_loc_t* x
oops: oops:
QSE_ASSERT (nde != QSE_NULL); QSE_ASSERT (nde != QSE_NULL);
if (nde->code) qse_awk_freerex (awk, nde->code); if (nde->code[0]) qse_awk_freerex (awk, nde->code[0], nde->code[1]);
if (nde->str.ptr) QSE_AWK_FREE (awk, nde->str.ptr); if (nde->str.ptr) QSE_AWK_FREE (awk, nde->str.ptr);
QSE_AWK_FREE (awk, nde); QSE_AWK_FREE (awk, nde);
return QSE_NULL; return QSE_NULL;

View File

@ -122,13 +122,8 @@ static int split_record (qse_awk_rtx_t* rtx)
} }
else else
{ {
qse_awk_rtx_valtostr_out_t out; fs_ptr = qse_awk_rtx_valtostrdup (rtx, fs, &fs_len);
if (fs_ptr == QSE_NULL) return -1;
out.type = QSE_AWK_RTX_VALTOSTR_CPLDUP;
if (qse_awk_rtx_valtostr (rtx, fs, &out) <= -1) return -1;
fs_ptr = out.u.cpldup.ptr;
fs_len = out.u.cpldup.len;
fs_free = fs_ptr; fs_free = fs_ptr;
} }
@ -178,7 +173,7 @@ static int split_record (qse_awk_rtx_t* rtx)
QSE_STR_PTR(&rtx->inrec.line), QSE_STR_PTR(&rtx->inrec.line),
QSE_STR_LEN(&rtx->inrec.line), QSE_STR_LEN(&rtx->inrec.line),
p, len, p, len,
rtx->gbl.fs, &tok, &errnum rtx->gbl.fs[rtx->gbl.ignorecase], &tok, &errnum
); );
if (p == QSE_NULL && errnum != QSE_AWK_ENOERR) if (p == QSE_NULL && errnum != QSE_AWK_ENOERR)
{ {
@ -268,7 +263,7 @@ static int split_record (qse_awk_rtx_t* rtx)
QSE_STR_PTR(&rtx->inrec.line), QSE_STR_PTR(&rtx->inrec.line),
QSE_STR_LEN(&rtx->inrec.line), QSE_STR_LEN(&rtx->inrec.line),
p, len, p, len,
rtx->gbl.fs, &tok, &errnum rtx->gbl.fs[rtx->gbl.ignorecase], &tok, &errnum
); );
if (p == QSE_NULL && errnum != QSE_AWK_ENOERR) if (p == QSE_NULL && errnum != QSE_AWK_ENOERR)
{ {

View File

@ -223,12 +223,12 @@ static QSE_INLINE int match_long_rs (
qse_awk_errnum_t errnum; qse_awk_errnum_t errnum;
int ret; int ret;
QSE_ASSERT (run->gbl.rs != QSE_NULL); QSE_ASSERT (run->gbl.rs[0] != QSE_NULL);
QSE_ASSERT (run->gbl.rs[1] != QSE_NULL);
ret = qse_awk_matchrex ( ret = qse_awk_matchrex (
run->awk, run->gbl.rs, run->awk, run->gbl.rs[run->gbl.ignorecase],
((run->gbl.ignorecase)? QSE_REX_IGNORECASE: 0), run->gbl.ignorecase, QSE_STR_CSTR(buf), QSE_STR_CSTR(buf),
QSE_STR_CSTR(buf), QSE_STR_CSTR(buf),
&match, &errnum); &match, &errnum);
if (ret <= -1) if (ret <= -1)
{ {

View File

@ -464,12 +464,11 @@ static int set_global (
if (fs_len > 1 && !(fs_len == 5 && fs_ptr[0] == QSE_T('?'))) if (fs_len > 1 && !(fs_len == 5 && fs_ptr[0] == QSE_T('?')))
{ {
void* rex; void* rex, * irex;
qse_awk_errnum_t errnum; qse_awk_errnum_t errnum;
rex = qse_awk_buildrex ( if (qse_awk_buildrex (rtx->awk, fs_ptr, fs_len, &errnum, &rex, &irex) <= -1)
rtx->awk, fs_ptr, fs_len, &errnum);
if (rex == QSE_NULL)
{ {
SETERR_COD (rtx, errnum); SETERR_COD (rtx, errnum);
if (val->type != QSE_AWK_VAL_STR) if (val->type != QSE_AWK_VAL_STR)
@ -477,14 +476,14 @@ static int set_global (
return -1; return -1;
} }
if (rtx->gbl.fs != QSE_NULL) if (rtx->gbl.fs[0])
qse_awk_freerex (rtx->awk, rtx->gbl.fs); qse_awk_freerex (rtx->awk, rtx->gbl.fs[0], rtx->gbl.fs[1]);
rtx->gbl.fs = rex; rtx->gbl.fs[0] = rex;
rtx->gbl.fs[1] = irex;
} }
if (val->type != QSE_AWK_VAL_STR) if (val->type != QSE_AWK_VAL_STR) QSE_AWK_FREE (rtx->awk, fs_ptr);
QSE_AWK_FREE (rtx->awk, fs_ptr);
break; break;
} }
@ -613,33 +612,31 @@ static int set_global (
rss = out.u.cpldup; rss = out.u.cpldup;
} }
if (rtx->gbl.rs) if (rtx->gbl.rs[0])
{ {
qse_awk_freerex (rtx->awk, rtx->gbl.rs); qse_awk_freerex (rtx->awk, rtx->gbl.rs[0], rtx->gbl.rs[1]);
rtx->gbl.rs = QSE_NULL; rtx->gbl.rs[0] = QSE_NULL;
rtx->gbl.rs[1] = QSE_NULL;
} }
if (rss.len > 1) if (rss.len > 1)
{ {
void* rex; void* rex, * irex;
qse_awk_errnum_t errnum; qse_awk_errnum_t errnum;
/* compile the regular expression */ /* compile the regular expression */
rex = qse_awk_buildrex ( if (qse_awk_buildrex (rtx->awk, rss.ptr, rss.len, &errnum, &rex, &irex) <= -1)
rtx->awk, rss.ptr, rss.len, &errnum);
if (rex == QSE_NULL)
{ {
SETERR_COD (rtx, errnum); SETERR_COD (rtx, errnum);
if (val->type != QSE_AWK_VAL_STR) if (val->type != QSE_AWK_VAL_STR) QSE_AWK_FREE (rtx->awk, rss.ptr);
QSE_AWK_FREE (rtx->awk, rss.ptr);
return -1; return -1;
} }
rtx->gbl.rs = rex; rtx->gbl.rs[0] = rex;
rtx->gbl.rs[1] = irex;
} }
if (val->type != QSE_AWK_VAL_STR) if (val->type != QSE_AWK_VAL_STR) QSE_AWK_FREE (rtx->awk, rss.ptr);
QSE_AWK_FREE (rtx->awk, rss.ptr);
break; break;
} }
@ -1016,8 +1013,10 @@ static int init_rtx (qse_awk_rtx_t* rtx, qse_awk_t* awk, qse_awk_rio_t* rio)
rtx->rio.chain = QSE_NULL; rtx->rio.chain = QSE_NULL;
} }
rtx->gbl.rs = QSE_NULL; rtx->gbl.rs[0] = QSE_NULL;
rtx->gbl.fs = QSE_NULL; rtx->gbl.rs[1] = QSE_NULL;
rtx->gbl.fs[0] = QSE_NULL;
rtx->gbl.fs[1] = QSE_NULL;
rtx->gbl.ignorecase = 0; rtx->gbl.ignorecase = 0;
return 0; return 0;
@ -1051,15 +1050,17 @@ static void fini_rtx (qse_awk_rtx_t* rtx, int fini_globals)
qse_awk_rtx_cleario (rtx); qse_awk_rtx_cleario (rtx);
QSE_ASSERT (rtx->rio.chain == QSE_NULL); QSE_ASSERT (rtx->rio.chain == QSE_NULL);
if (rtx->gbl.rs) if (rtx->gbl.rs[0])
{ {
qse_awk_freerex (rtx->awk, rtx->gbl.rs); qse_awk_freerex (rtx->awk, rtx->gbl.rs[0], rtx->gbl.rs[1]);
rtx->gbl.rs = QSE_NULL; rtx->gbl.rs[0] = QSE_NULL;
rtx->gbl.rs[1] = QSE_NULL;
} }
if (rtx->gbl.fs) if (rtx->gbl.fs[0])
{ {
qse_awk_freerex (rtx->awk, rtx->gbl.fs); qse_awk_freerex (rtx->awk, rtx->gbl.fs[0], rtx->gbl.fs[1]);
rtx->gbl.fs = QSE_NULL; rtx->gbl.fs[0] = QSE_NULL;
rtx->gbl.fs[1] = QSE_NULL;
} }
if (rtx->gbl.convfmt.ptr != QSE_NULL && if (rtx->gbl.convfmt.ptr != QSE_NULL &&
@ -3208,7 +3209,6 @@ static qse_awk_val_t* eval_expression (qse_awk_rtx_t* rtx, qse_awk_nde_t* nde)
{ {
qse_awk_val_t* v; qse_awk_val_t* v;
int n; int n;
qse_awk_errnum_t errnum;
#if 0 #if 0
if (rtx->exit_level >= EXIT_GLOBAL) if (rtx->exit_level >= EXIT_GLOBAL)
@ -3230,9 +3230,11 @@ static qse_awk_val_t* eval_expression (qse_awk_rtx_t* rtx, qse_awk_nde_t* nde)
qse_cstr_t vs; qse_cstr_t vs;
int opt = 0; int opt = 0;
if (((qse_awk_rtx_t*)rtx)->gbl.ignorecase) /* special case where a regular expression is used in
opt = QSE_REX_IGNORECASE; * without any match operators:
* print /abc/;
* perform match against $0.
*/
qse_awk_rtx_refupval (rtx, v); qse_awk_rtx_refupval (rtx, v);
if (rtx->inrec.d0->type == QSE_AWK_VAL_NIL) if (rtx->inrec.d0->type == QSE_AWK_VAL_NIL)
@ -3253,23 +3255,13 @@ static qse_awk_val_t* eval_expression (qse_awk_rtx_t* rtx, qse_awk_nde_t* nde)
vs.len = ((qse_awk_val_str_t*)rtx->inrec.d0)->val.len; vs.len = ((qse_awk_val_str_t*)rtx->inrec.d0)->val.len;
} }
n = qse_awk_matchrex ( n = qse_awk_rtx_matchrex (rtx, v, &vs, &vs, QSE_NULL);
((qse_awk_rtx_t*)rtx)->awk, if (n <= -1)
((qse_awk_val_rex_t*)v)->code,
opt, &vs, &vs, QSE_NULL, &errnum
);
if (n <= -1)
{ {
ADJERR_LOC (rtx, &nde->loc);
qse_awk_rtx_refdownval (rtx, v); qse_awk_rtx_refdownval (rtx, v);
/* matchrex should never set the error number
* whose message contains a formatting
* character. otherwise, the following way of
* setting the error information may not work */
SETERR_LOC (rtx, errnum, &nde->loc);
return QSE_NULL; return QSE_NULL;
} }
qse_awk_rtx_refdownval (rtx, v); qse_awk_rtx_refdownval (rtx, v);
v = qse_awk_rtx_makeintval (rtx, (n != 0)); v = qse_awk_rtx_makeintval (rtx, (n != 0));
@ -4974,115 +4966,50 @@ static qse_awk_val_t* eval_binop_match0 (
{ {
qse_awk_val_t* res; qse_awk_val_t* res;
int n; int n;
qse_awk_errnum_t errnum;
void* rex_code;
if (right->type == QSE_AWK_VAL_REX)
{
rex_code = ((qse_awk_val_rex_t*)right)->code;
}
else if (right->type == QSE_AWK_VAL_STR)
{
rex_code = qse_awk_buildrex (
rtx->awk,
((qse_awk_val_str_t*)right)->val.ptr,
((qse_awk_val_str_t*)right)->val.len, &errnum);
if (rex_code == QSE_NULL)
{
SETERR_LOC (rtx, errnum, rloc);
return QSE_NULL;
}
}
else
{
qse_awk_rtx_valtostr_out_t out;
out.type = QSE_AWK_RTX_VALTOSTR_CPLDUP;
if (qse_awk_rtx_valtostr (rtx, right, &out) <= -1) return QSE_NULL;
rex_code = qse_awk_buildrex (
rtx->awk, out.u.cpldup.ptr, out.u.cpldup.len, &errnum);
if (rex_code == QSE_NULL)
{
QSE_AWK_FREE (rtx->awk, out.u.cpldup.ptr);
SETERR_LOC (rtx, errnum, rloc);
return QSE_NULL;
}
QSE_AWK_FREE (rtx->awk, out.u.cpldup.ptr);
}
if (left->type == QSE_AWK_VAL_STR) if (left->type == QSE_AWK_VAL_STR)
{ {
n = qse_awk_matchrex ( n = qse_awk_rtx_matchrex (
rtx->awk, rex_code, rtx, right,
((rtx->gbl.ignorecase)? QSE_REX_IGNORECASE: 0),
xstr_to_cstr(&((qse_awk_val_str_t*)left)->val), xstr_to_cstr(&((qse_awk_val_str_t*)left)->val),
xstr_to_cstr(&((qse_awk_val_str_t*)left)->val), xstr_to_cstr(&((qse_awk_val_str_t*)left)->val), QSE_NULL);
QSE_NULL, &errnum); if (n <= -1)
if (n == -1)
{ {
if (right->type != QSE_AWK_VAL_REX) ADJERR_LOC (rtx, lloc);
qse_awk_freerex (rtx->awk, rex_code);
SETERR_LOC (rtx, errnum, lloc);
return QSE_NULL; return QSE_NULL;
} }
res = qse_awk_rtx_makeintval (rtx, (n == ret)); res = qse_awk_rtx_makeintval (rtx, (n == ret));
if (res == QSE_NULL) if (res == QSE_NULL)
{ {
if (right->type != QSE_AWK_VAL_REX)
qse_awk_freerex (rtx->awk, rex_code);
ADJERR_LOC (rtx, lloc); ADJERR_LOC (rtx, lloc);
return QSE_NULL; return QSE_NULL;
} }
} }
else else
{ {
qse_awk_rtx_valtostr_out_t out; qse_xstr_t out;
out.type = QSE_AWK_RTX_VALTOSTR_CPLDUP; out.ptr = qse_awk_rtx_valtostrdup (rtx, left, &out.len);
if (qse_awk_rtx_valtostr (rtx, left, &out) <= -1) if (out.ptr == QSE_NULL) return QSE_NULL;
n = qse_awk_rtx_matchrex (rtx, right, &out, &out, QSE_NULL);
QSE_AWK_FREE (rtx->awk, out.ptr);
if (n <= -1)
{ {
if (right->type != QSE_AWK_VAL_REX) ADJERR_LOC (rtx, lloc);
qse_awk_freerex (rtx->awk, rex_code);
return QSE_NULL;
}
n = qse_awk_matchrex (
rtx->awk, rex_code,
((rtx->gbl.ignorecase)? QSE_REX_IGNORECASE: 0),
xstr_to_cstr(&out.u.cpldup),
xstr_to_cstr(&out.u.cpldup),
QSE_NULL, &errnum
);
if (n == -1)
{
QSE_AWK_FREE (rtx->awk, out.u.cpldup.ptr);
if (right->type != QSE_AWK_VAL_REX)
qse_awk_freerex (rtx->awk, rex_code);
SETERR_LOC (rtx, errnum, lloc);
return QSE_NULL; return QSE_NULL;
} }
res = qse_awk_rtx_makeintval (rtx, (n == ret)); res = qse_awk_rtx_makeintval (rtx, (n == ret));
if (res == QSE_NULL) if (res == QSE_NULL)
{ {
QSE_AWK_FREE (rtx->awk, out.u.cpldup.ptr);
if (right->type != QSE_AWK_VAL_REX)
qse_awk_freerex (rtx->awk, rex_code);
ADJERR_LOC (rtx, lloc); ADJERR_LOC (rtx, lloc);
return QSE_NULL; return QSE_NULL;
} }
QSE_AWK_FREE (rtx->awk, out.u.cpldup.ptr);
} }
if (right->type != QSE_AWK_VAL_REX) qse_awk_freerex (rtx->awk, rex_code);
return res; return res;
} }
@ -6307,7 +6234,8 @@ static qse_awk_val_t* eval_rex (qse_awk_rtx_t* run, qse_awk_nde_t* nde)
val = qse_awk_rtx_makerexval (run, val = qse_awk_rtx_makerexval (run,
&((qse_awk_nde_rex_t*)nde)->str, &((qse_awk_nde_rex_t*)nde)->str,
((qse_awk_nde_rex_t*)nde)->code); ((qse_awk_nde_rex_t*)nde)->code
);
if (val == QSE_NULL) ADJERR_LOC (run, &nde->loc); if (val == QSE_NULL) ADJERR_LOC (run, &nde->loc);
return val; return val;

View File

@ -1305,7 +1305,8 @@ void qse_awk_clrpt (qse_awk_t* awk, qse_awk_nde_t* tree)
case QSE_AWK_NDE_REX: case QSE_AWK_NDE_REX:
{ {
qse_awk_freerex (awk, ((qse_awk_nde_rex_t*)p)->code); qse_awk_nde_rex_t* rex = (qse_awk_nde_rex_t*)p;
qse_awk_freerex (awk, rex->code[0], rex->code[1]);
QSE_AWK_FREE (awk, ((qse_awk_nde_rex_t*)p)->str.ptr); QSE_AWK_FREE (awk, ((qse_awk_nde_rex_t*)p)->str.ptr);
QSE_AWK_FREE (awk, p); QSE_AWK_FREE (awk, p);
break; break;
@ -1318,8 +1319,7 @@ void qse_awk_clrpt (qse_awk_t* awk, qse_awk_nde_t* tree)
{ {
qse_awk_nde_var_t* px = (qse_awk_nde_var_t*)p; qse_awk_nde_var_t* px = (qse_awk_nde_var_t*)p;
QSE_ASSERT (px->idx == QSE_NULL); QSE_ASSERT (px->idx == QSE_NULL);
if (px->id.name.ptr != QSE_NULL) if (px->id.name.ptr) QSE_AWK_FREE (awk, px->id.name.ptr);
QSE_AWK_FREE (awk, px->id.name.ptr);
QSE_AWK_FREE (awk, p); QSE_AWK_FREE (awk, p);
break; break;
} }

View File

@ -155,7 +155,7 @@ struct qse_awk_nde_rex_t
{ {
QSE_AWK_NDE_HDR; QSE_AWK_NDE_HDR;
qse_xstr_t str; qse_xstr_t str;
void* code; void* code[2]; /* [0]: case sensitive, [1]: case insensitive */
}; };
/* QSE_AWK_NDE_NAMED, QSE_AWK_NDE_GBL, /* QSE_AWK_NDE_NAMED, QSE_AWK_NDE_GBL,

View File

@ -28,13 +28,14 @@
#define CHUNKSIZE QSE_AWK_VAL_CHUNK_SIZE #define CHUNKSIZE QSE_AWK_VAL_CHUNK_SIZE
static qse_awk_val_nil_t awk_nil = { QSE_AWK_VAL_NIL, 0, 1, 0 }; static qse_awk_val_nil_t awk_nil = { QSE_AWK_VAL_NIL, 0, 1, 0 };
static qse_awk_val_str_t awk_zls = { QSE_AWK_VAL_STR, 0, 1, 0, { QSE_T(""), 0 } }; static qse_awk_val_str_t awk_zls = { QSE_AWK_VAL_STR, 0, 1, 0, { QSE_T(""), 0 } };
qse_awk_val_t* qse_awk_val_nil = (qse_awk_val_t*)&awk_nil; qse_awk_val_t* qse_awk_val_nil = (qse_awk_val_t*)&awk_nil;
qse_awk_val_t* qse_awk_val_zls = (qse_awk_val_t*)&awk_zls; qse_awk_val_t* qse_awk_val_zls = (qse_awk_val_t*)&awk_zls;
static qse_awk_val_int_t awk_int[] = static qse_awk_val_int_t awk_int[] =
{ {
/* type ref stat nstr val nde */
{ QSE_AWK_VAL_INT, 0, 1, 0, -1, QSE_NULL }, { QSE_AWK_VAL_INT, 0, 1, 0, -1, QSE_NULL },
{ QSE_AWK_VAL_INT, 0, 1, 0, 0, QSE_NULL }, { QSE_AWK_VAL_INT, 0, 1, 0, 0, QSE_NULL },
{ QSE_AWK_VAL_INT, 0, 1, 0, 1, QSE_NULL }, { QSE_AWK_VAL_INT, 0, 1, 0, 1, QSE_NULL },
@ -216,7 +217,7 @@ qse_awk_val_t* qse_awk_rtx_makestrvalwithmbs (
return QSE_NULL; return QSE_NULL;
} }
v = qse_awk_rtx_makestrvalwithcstr (rtx, &tmp); v = qse_awk_rtx_makestrvalwithcstr (rtx, (qse_cstr_t*)&tmp);
QSE_AWK_FREE (rtx->awk, tmp.ptr); QSE_AWK_FREE (rtx->awk, tmp.ptr);
return v; return v;
#endif #endif
@ -268,7 +269,7 @@ qse_awk_val_t* qse_awk_rtx_makestrvalwithmcstr (
return QSE_NULL; return QSE_NULL;
} }
v = qse_awk_rtx_makestrvalwithcstr (rtx, &tmp); v = qse_awk_rtx_makestrvalwithcstr (rtx, (qse_cstr_t*)&tmp);
QSE_AWK_FREE (rtx->awk, tmp.ptr); QSE_AWK_FREE (rtx->awk, tmp.ptr);
return v; return v;
#endif #endif
@ -438,7 +439,7 @@ qse_awk_val_t* qse_awk_rtx_makenstrvalwithcstr (qse_awk_rtx_t* rtx, const qse_cs
} }
qse_awk_val_t* qse_awk_rtx_makerexval ( qse_awk_val_t* qse_awk_rtx_makerexval (
qse_awk_rtx_t* rtx, const qse_cstr_t* str, void* code) qse_awk_rtx_t* rtx, const qse_cstr_t* str, void* code[2])
{ {
qse_awk_val_rex_t* val; qse_awk_val_rex_t* val;
qse_size_t totsz; qse_size_t totsz;
@ -465,7 +466,8 @@ qse_awk_val_t* qse_awk_rtx_makerexval (
val->str.ptr = (qse_char_t*)(val + 1); val->str.ptr = (qse_char_t*)(val + 1);
qse_strncpy (val->str.ptr, str->ptr, str->len); qse_strncpy (val->str.ptr, str->ptr, str->len);
val->code = code; val->code[0] = code[0];
val->code[1] = code[1];
return (qse_awk_val_t*)val; return (qse_awk_val_t*)val;
} }
@ -824,7 +826,7 @@ void qse_awk_rtx_freeval (
/* code is just a pointer to a regular expression stored /* code is just a pointer to a regular expression stored
* in parse tree nodes. so don't free it. * in parse tree nodes. so don't free it.
qse_awk_freerex (rtx->awk, ((qse_awk_val_rex_t*)val)->code); qse_awk_freerex (rtx->awk, ((qse_awk_val_rex_t*)val)->code[0], ((qse_awk_val_rex_t*)val)->code[1]);
*/ */
QSE_AWK_FREE (rtx->awk, val); QSE_AWK_FREE (rtx->awk, val);
@ -895,7 +897,7 @@ void qse_awk_rtx_refdownval_nofree (qse_awk_rtx_t* rtx, qse_awk_val_t* val)
void qse_awk_rtx_freevalchunk (qse_awk_rtx_t* rtx, qse_awk_val_chunk_t* chunk) void qse_awk_rtx_freevalchunk (qse_awk_rtx_t* rtx, qse_awk_val_chunk_t* chunk)
{ {
while (chunk != QSE_NULL) while (chunk != QSE_NULL)
{ {
qse_awk_val_chunk_t* next = chunk->next; qse_awk_val_chunk_t* next = chunk->next;
QSE_AWK_FREE (rtx->awk, chunk); QSE_AWK_FREE (rtx->awk, chunk);
chunk = next; chunk = next;
@ -1717,7 +1719,7 @@ int qse_awk_rtx_setrefval (qse_awk_rtx_t* rtx, qse_awk_val_ref_t* ref, qse_awk_v
qse_awk_rtx_refupval (rtx, val); qse_awk_rtx_refupval (rtx, val);
x = qse_awk_rtx_setrec ( x = qse_awk_rtx_setrec (
rtx, (qse_size_t)ref->adr, rtx, (qse_size_t)ref->adr,
&((qse_awk_val_str_t*)val)->val (qse_cstr_t*)&((qse_awk_val_str_t*)val)->val
); );
qse_awk_rtx_refdownval (rtx, val); qse_awk_rtx_refdownval (rtx, val);
return x; return x;
@ -1730,7 +1732,7 @@ int qse_awk_rtx_setrefval (qse_awk_rtx_t* rtx, qse_awk_val_ref_t* ref, qse_awk_v
str.ptr = qse_awk_rtx_valtostrdup (rtx, val, &str.len); str.ptr = qse_awk_rtx_valtostrdup (rtx, val, &str.len);
qse_awk_rtx_refupval (rtx, val); qse_awk_rtx_refupval (rtx, val);
x = qse_awk_rtx_setrec (rtx, (qse_size_t)ref->adr, &str); x = qse_awk_rtx_setrec (rtx, (qse_size_t)ref->adr, (qse_cstr_t*)&str);
qse_awk_rtx_refdownval (rtx, val); qse_awk_rtx_refdownval (rtx, val);
QSE_AWK_FREE (rtx->awk, str.ptr); QSE_AWK_FREE (rtx->awk, str.ptr);
return x; return x;

View File

@ -60,11 +60,9 @@ tre_ast_new_node(tre_mem_t mem, tre_ast_type_t type, size_t size)
tre_ast_node_t *node; tre_ast_node_t *node;
node = tre_mem_calloc(mem, sizeof(*node)); node = tre_mem_calloc(mem, sizeof(*node));
if (!node) if (!node) return NULL;
return NULL;
node->obj = tre_mem_calloc(mem, size); node->obj = tre_mem_calloc(mem, size);
if (!node->obj) if (!node->obj) return NULL;
return NULL;
node->type = type; node->type = type;
node->nullable = -1; node->nullable = -1;
node->submatch_id = -1; node->submatch_id = -1;
@ -72,15 +70,13 @@ tre_ast_new_node(tre_mem_t mem, tre_ast_type_t type, size_t size)
return node; return node;
} }
tre_ast_node_t * tre_ast_node_t * tre_ast_new_literal(tre_mem_t mem, int code_min, int code_max, int position)
tre_ast_new_literal(tre_mem_t mem, int code_min, int code_max, int position)
{ {
tre_ast_node_t *node; tre_ast_node_t *node;
tre_literal_t *lit; tre_literal_t *lit;
node = tre_ast_new_node(mem, LITERAL, sizeof(tre_literal_t)); node = tre_ast_new_node(mem, LITERAL, sizeof(tre_literal_t));
if (!node) if (!node) return NULL;
return NULL;
lit = node->obj; lit = node->obj;
lit->code_min = code_min; lit->code_min = code_min;
lit->code_max = code_max; lit->code_max = code_max;
@ -97,8 +93,7 @@ tre_ast_new_iter(tre_mem_t mem, tre_ast_node_t *arg, int min, int max,
tre_iteration_t *iter; tre_iteration_t *iter;
node = tre_ast_new_node(mem, ITERATION, sizeof(tre_iteration_t)); node = tre_ast_new_node(mem, ITERATION, sizeof(tre_iteration_t));
if (!node) if (!node) return NULL;
return NULL;
iter = node->obj; iter = node->obj;
iter->arg = arg; iter->arg = arg;
iter->min = min; iter->min = min;
@ -115,8 +110,7 @@ tre_ast_new_union(tre_mem_t mem, tre_ast_node_t *left, tre_ast_node_t *right)
tre_ast_node_t *node; tre_ast_node_t *node;
node = tre_ast_new_node(mem, UNION, sizeof(tre_union_t)); node = tre_ast_new_node(mem, UNION, sizeof(tre_union_t));
if (node == NULL) if (node == NULL) return NULL;
return NULL;
((tre_union_t *)node->obj)->left = left; ((tre_union_t *)node->obj)->left = left;
((tre_union_t *)node->obj)->right = right; ((tre_union_t *)node->obj)->right = right;
node->num_submatches = left->num_submatches + right->num_submatches; node->num_submatches = left->num_submatches + right->num_submatches;
@ -131,8 +125,7 @@ tre_ast_new_catenation(tre_mem_t mem, tre_ast_node_t *left,
tre_ast_node_t *node; tre_ast_node_t *node;
node = tre_ast_new_node(mem, CATENATION, sizeof(tre_catenation_t)); node = tre_ast_new_node(mem, CATENATION, sizeof(tre_catenation_t));
if (node == NULL) if (node == NULL) return NULL;
return NULL;
((tre_catenation_t *)node->obj)->left = left; ((tre_catenation_t *)node->obj)->left = left;
((tre_catenation_t *)node->obj)->right = right; ((tre_catenation_t *)node->obj)->right = right;
node->num_submatches = left->num_submatches + right->num_submatches; node->num_submatches = left->num_submatches + right->num_submatches;

View File

@ -88,14 +88,11 @@ tre_add_tag_left(tre_mem_t mem, tre_ast_node_t *node, int tag_id)
DPRINT(("add_tag_left: tag %d\n", tag_id)); DPRINT(("add_tag_left: tag %d\n", tag_id));
c = tre_mem_alloc(mem, sizeof(*c)); c = tre_mem_alloc(mem, sizeof(*c));
if (c == NULL) if (c == NULL) return REG_ESPACE;
return REG_ESPACE;
c->left = tre_ast_new_literal(mem, TAG, tag_id, -1); c->left = tre_ast_new_literal(mem, TAG, tag_id, -1);
if (c->left == NULL) if (c->left == NULL) return REG_ESPACE;
return REG_ESPACE;
c->right = tre_mem_alloc(mem, sizeof(tre_ast_node_t)); c->right = tre_mem_alloc(mem, sizeof(tre_ast_node_t));
if (c->right == NULL) if (c->right == NULL) return REG_ESPACE;
return REG_ESPACE;
c->right->obj = node->obj; c->right->obj = node->obj;
c->right->type = node->type; c->right->type = node->type;
@ -152,7 +149,6 @@ typedef enum
ADDTAGS_SET_SUBMATCH_END ADDTAGS_SET_SUBMATCH_END
} tre_addtags_symbol_t; } tre_addtags_symbol_t;
typedef struct typedef struct
{ {
int tag; int tag;
@ -763,8 +759,7 @@ tre_copy_ast(tre_mem_t mem, tre_stack_t *stack, tre_ast_node_t *ast,
first_tag = 0; first_tag = 0;
} }
*result = tre_ast_new_literal(mem, min, max, pos); *result = tre_ast_new_literal(mem, min, max, pos);
if (*result == NULL) if (*result == NULL) status = REG_ESPACE;
status = REG_ESPACE;
if (pos > *max_pos) if (pos > *max_pos)
*max_pos = pos; *max_pos = pos;
@ -1121,8 +1116,7 @@ tre_set_one(tre_mem_t mem, int position, int code_min, int code_max,
tre_pos_and_tags_t *new_set; tre_pos_and_tags_t *new_set;
new_set = tre_mem_calloc(mem, sizeof(*new_set) * 2); new_set = tre_mem_calloc(mem, sizeof(*new_set) * 2);
if (new_set == NULL) if (new_set == NULL) return NULL;
return NULL;
new_set[0].position = position; new_set[0].position = position;
new_set[0].code_min = code_min; new_set[0].code_min = code_min;
@ -1150,8 +1144,7 @@ tre_set_union(tre_mem_t mem, tre_pos_and_tags_t *set1, tre_pos_and_tags_t *set2,
for (s1 = 0; set1[s1].position >= 0; s1++); for (s1 = 0; set1[s1].position >= 0; s1++);
for (s2 = 0; set2[s2].position >= 0; s2++); for (s2 = 0; set2[s2].position >= 0; s2++);
new_set = tre_mem_calloc(mem, sizeof(*new_set) * (s1 + s2 + 1)); new_set = tre_mem_calloc(mem, sizeof(*new_set) * (s1 + s2 + 1));
if (!new_set ) if (!new_set) return NULL;
return NULL;
for (s1 = 0; set1[s1].position >= 0; s1++) for (s1 = 0; set1[s1].position >= 0; s1++)
{ {
@ -1395,15 +1388,10 @@ tre_compute_nfl(tre_mem_t mem, tre_stack_t *stack, tre_ast_node_t *tree)
/* Back references: nullable = false, firstpos = {i}, /* Back references: nullable = false, firstpos = {i},
lastpos = {i}. */ lastpos = {i}. */
node->nullable = 0; node->nullable = 0;
node->firstpos = tre_set_one(mem, lit->position, 0, node->firstpos = tre_set_one(mem, lit->position, 0, TRE_CHAR_MAX, 0, NULL, -1);
TRE_CHAR_MAX, 0, NULL, -1); if (!node->firstpos) return REG_ESPACE;
if (!node->firstpos) node->lastpos = tre_set_one(mem, lit->position, 0, TRE_CHAR_MAX, 0, NULL, (int)lit->code_max);
return REG_ESPACE; if (!node->lastpos) return REG_ESPACE;
node->lastpos = tre_set_one(mem, lit->position, 0,
TRE_CHAR_MAX, 0, NULL,
(int)lit->code_max);
if (!node->lastpos)
return REG_ESPACE;
} }
else if (lit->code_min < 0) else if (lit->code_min < 0)
{ {
@ -1422,18 +1410,10 @@ tre_compute_nfl(tre_mem_t mem, tre_stack_t *stack, tre_ast_node_t *tree)
/* Literal at position i: nullable = false, firstpos = {i}, /* Literal at position i: nullable = false, firstpos = {i},
lastpos = {i}. */ lastpos = {i}. */
node->nullable = 0; node->nullable = 0;
node->firstpos = node->firstpos = tre_set_one(mem, lit->position, (int)lit->code_min, (int)lit->code_max, 0, NULL, -1);
tre_set_one(mem, lit->position, (int)lit->code_min, if (!node->firstpos) return REG_ESPACE;
(int)lit->code_max, 0, NULL, -1); node->lastpos = tre_set_one(mem, lit->position, (int)lit->code_min, (int)lit->code_max, lit->u.class, lit->neg_classes, -1);
if (!node->firstpos) if (!node->lastpos) return REG_ESPACE;
return REG_ESPACE;
node->lastpos = tre_set_one(mem, lit->position,
(int)lit->code_min,
(int)lit->code_max,
lit->u.class, lit->neg_classes,
-1);
if (!node->lastpos)
return REG_ESPACE;
} }
break; break;
} }
@ -1628,6 +1608,7 @@ tre_make_trans(qse_mmgr_t* mmgr, tre_pos_and_tags_t *p1, tre_pos_and_tags_t *p2,
int i, j, k, l, dup, prev_p2_pos; int i, j, k, l, dup, prev_p2_pos;
if (transitions != NULL) if (transitions != NULL)
{
while (p1->position >= 0) while (p1->position >= 0)
{ {
p2 = orig_p2; p2 = orig_p2;
@ -1814,7 +1795,9 @@ tre_make_trans(qse_mmgr_t* mmgr, tre_pos_and_tags_t *p1, tre_pos_and_tags_t *p2,
} }
p1++; p1++;
} }
}
else else
{
/* Compute a maximum limit for the number of transitions leaving /* Compute a maximum limit for the number of transitions leaving
from each state. */ from each state. */
while (p1->position >= 0) while (p1->position >= 0)
@ -1827,6 +1810,7 @@ tre_make_trans(qse_mmgr_t* mmgr, tre_pos_and_tags_t *p1, tre_pos_and_tags_t *p2,
} }
p1++; p1++;
} }
}
return REG_OK; return REG_OK;
} }

View File

@ -168,23 +168,18 @@ typedef struct tre_backtrack_struct
while (/*CONSTCOND*/0) while (/*CONSTCOND*/0)
#define BT_STACK_POP() \ #define BT_STACK_POP() \
do \ do { \
{ \ int i; \
int i; \ assert(stack->prev); \
assert(stack->prev); \ pos = stack->item.pos; \
pos = stack->item.pos; \ str_byte = stack->item.str_byte; \
if (type == STR_USER) \ BT_STACK_WIDE_OUT; \
str_source->rewind(pos + pos_add_next, str_source->context); \ state = stack->item.state; \
str_byte = stack->item.str_byte; \ next_c = stack->item.next_c; \
BT_STACK_WIDE_OUT; \ for (i = 0; i < tnfa->num_tags; i++) tags[i] = stack->item.tags[i]; \
state = stack->item.state; \ BT_STACK_MBSTATE_OUT; \
next_c = stack->item.next_c; \ stack = stack->prev; \
for (i = 0; i < tnfa->num_tags; i++) \ } while (/*CONSTCOND*/0)
tags[i] = stack->item.tags[i]; \
BT_STACK_MBSTATE_OUT; \
stack = stack->prev; \
} \
while (/*CONSTCOND*/0)
#undef MIN #undef MIN
#define MIN(a, b) ((a) <= (b) ? (a) : (b)) #define MIN(a, b) ((a) <= (b) ? (a) : (b))
@ -208,7 +203,6 @@ tre_tnfa_run_backtrack(qse_mmgr_t* mmgr, const tre_tnfa_t *tnfa, const void *str
int reg_notbol = eflags & REG_NOTBOL; int reg_notbol = eflags & REG_NOTBOL;
int reg_noteol = eflags & REG_NOTEOL; int reg_noteol = eflags & REG_NOTEOL;
int reg_newline = tnfa->cflags & REG_NEWLINE; int reg_newline = tnfa->cflags & REG_NEWLINE;
int str_user_end = 0;
/* These are used to remember the necessary values of the above /* These are used to remember the necessary values of the above
variables to return to the position where the current search variables to return to the position where the current search
@ -302,8 +296,6 @@ retry:
state = NULL; state = NULL;
pos = pos_start; pos = pos_start;
if (type == STR_USER)
str_source->rewind(pos + pos_add_next, str_source->context);
GET_NEXT_WCHAR(); GET_NEXT_WCHAR();
pos_start = pos; pos_start = pos;
next_c_start = next_c; next_c_start = next_c;
@ -446,15 +438,11 @@ retry:
if (len < 0) if (len < 0)
{ {
if (type == STR_USER)
result = str_source->compare((unsigned)so, (unsigned)pos,
(unsigned)bt_len,
str_source->context);
#ifdef TRE_WCHAR #ifdef TRE_WCHAR
else if (type == STR_WIDE) if (type == STR_WIDE)
result = qse_wcszcmp((const qse_wchar_t*)string + so, str_wide - 1, (size_t)bt_len); result = qse_wcszcmp((const qse_wchar_t*)string + so, str_wide - 1, (size_t)bt_len);
#endif /* TRE_WCHAR */
else else
#endif /* TRE_WCHAR */
result = qse_mbszcmp((const char*)string + so, str_byte - 1, (size_t)bt_len); result = qse_mbszcmp((const char*)string + so, str_byte - 1, (size_t)bt_len);
} }
else if (len - pos < bt_len) else if (len - pos < bt_len)
@ -508,12 +496,7 @@ retry:
/* Check for end of string. */ /* Check for end of string. */
if (len < 0) if (len < 0)
{ {
if (type == STR_USER) if (next_c == QSE_T('\0'))
{
if (str_user_end)
goto backtrack;
}
else if (next_c == QSE_T('\0'))
goto backtrack; goto backtrack;
} }
else else
@ -533,8 +516,8 @@ retry:
trans_i->code_min, trans_i->code_max, trans_i->code_min, trans_i->code_max,
trans_i->code_min, trans_i->code_max, trans_i->code_min, trans_i->code_max,
trans_i->assertions, trans_i->state_id)); trans_i->assertions, trans_i->state_id));
if (trans_i->code_min <= (tre_cint_t)prev_c &&
trans_i->code_max >= (tre_cint_t)prev_c) if (trans_i->code_min <= (tre_cint_t)prev_c && trans_i->code_max >= (tre_cint_t)prev_c)
{ {
if (trans_i->assertions if (trans_i->assertions
&& (CHECK_ASSERTIONS(trans_i->assertions) && (CHECK_ASSERTIONS(trans_i->assertions)

View File

@ -325,12 +325,7 @@ tre_tnfa_run_parallel(qse_mmgr_t* mmgr, const tre_tnfa_t *tnfa, const void *stri
/* Check for end of string. */ /* Check for end of string. */
if (len < 0) if (len < 0)
{ {
if (type == STR_USER) if (next_c == QSE_T('\0'))
{
if (str_user_end)
break;
}
else if (next_c == QSE_T('\0'))
break; break;
} }
else else
@ -408,28 +403,28 @@ tre_tnfa_run_parallel(qse_mmgr_t* mmgr, const tre_tnfa_t *tnfa, const void *stri
for (trans_i = reach_i->state; trans_i->state; trans_i++) for (trans_i = reach_i->state; trans_i->state; trans_i++)
{ {
/* Does this transition match the input symbol? */ /* Does this transition match the input symbol? */
if (trans_i->code_min <= (tre_cint_t)prev_c && if (trans_i->code_min <= (tre_cint_t)prev_c && trans_i->code_max >= (tre_cint_t)prev_c)
trans_i->code_max >= (tre_cint_t)prev_c)
{ {
if (trans_i->assertions if (trans_i->assertions &&
&& (CHECK_ASSERTIONS(trans_i->assertions) (CHECK_ASSERTIONS(trans_i->assertions) ||
|| CHECK_CHAR_CLASSES(trans_i, tnfa, eflags))) CHECK_CHAR_CLASSES(trans_i, tnfa, eflags)))
{ {
DPRINT(("assertion failed\n")); DPRINT(("assertion failed\n"));
continue; continue;
} }
/* Compute the tags after this transition. */ /* Compute the tags after this transition. */
for (i = 0; i < num_tags; i++) for (i = 0; i < num_tags; i++) tmp_tags[i] = reach_i->tags[i];
tmp_tags[i] = reach_i->tags[i];
tag_i = trans_i->tags; tag_i = trans_i->tags;
if (tag_i != NULL) if (tag_i != NULL)
{
while (*tag_i >= 0) while (*tag_i >= 0)
{ {
if (*tag_i < num_tags) if (*tag_i < num_tags)
tmp_tags[*tag_i] = pos; tmp_tags[*tag_i] = pos;
tag_i++; tag_i++;
} }
}
if (reach_pos[trans_i->state_id].pos < pos) if (reach_pos[trans_i->state_id].pos < pos)
{ {
@ -442,15 +437,12 @@ tre_tnfa_run_parallel(qse_mmgr_t* mmgr, const tre_tnfa_t *tnfa, const void *stri
reach_pos[trans_i->state_id].tags = &reach_next_i->tags; reach_pos[trans_i->state_id].tags = &reach_next_i->tags;
if (reach_next_i->state == tnfa->final if (reach_next_i->state == tnfa->final
&& (match_eo == -1 && (match_eo == -1 || (num_tags > 0 && reach_next_i->tags[0] <= match_tags[0])))
|| (num_tags > 0
&& reach_next_i->tags[0] <= match_tags[0])))
{ {
DPRINT((" found match %p\n", trans_i->state)); DPRINT((" found match %p\n", trans_i->state));
match_eo = pos; match_eo = pos;
new_match = 1; new_match = 1;
for (i = 0; i < num_tags; i++) for (i = 0; i < num_tags; i++) match_tags[i] = reach_next_i->tags[i];
match_tags[i] = reach_next_i->tags[i];
} }
reach_next_i++; reach_next_i++;

View File

@ -52,8 +52,6 @@ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/ */
#define str_source ((const tre_str_source*)string)
#ifdef TRE_WCHAR #ifdef TRE_WCHAR
#ifdef TRE_MULTIBYTE #ifdef TRE_MULTIBYTE
@ -116,12 +114,6 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
} \ } \
} \ } \
} \ } \
else if (type == STR_USER) \
{ \
pos += pos_add_next; \
str_user_end = str_source->get_next_char(&next_c, &pos_add_next, \
str_source->context); \
} \
} while(/*CONSTCOND*/0) } while(/*CONSTCOND*/0)
#else /* !TRE_MULTIBYTE */ #else /* !TRE_MULTIBYTE */
@ -143,11 +135,6 @@ do { \
if (len >= 0 && pos >= len) next_c = QSE_T('\0'); \ if (len >= 0 && pos >= len) next_c = QSE_T('\0'); \
else next_c = *str_wide++; \ else next_c = *str_wide++; \
} \ } \
else if (type == STR_USER) \
{ \
pos += pos_add_next; \
str_user_end = str_source->get_next_char(&next_c, &pos_add_next, str_source->context); \
} \
} while(/*CONSTCOND*/0) } while(/*CONSTCOND*/0)
#endif /* !TRE_MULTIBYTE */ #endif /* !TRE_MULTIBYTE */
@ -156,24 +143,16 @@ do { \
/* No wide character or multibyte support. */ /* No wide character or multibyte support. */
#define GET_NEXT_WCHAR() \ #define GET_NEXT_WCHAR() \
do { \ do { \
prev_c = next_c; \ prev_c = next_c; \
if (type == STR_BYTE) \ if (type == STR_BYTE) \
{ \ { \
pos++; \ pos++; \
if (len >= 0 && pos >= len) \ if (len >= 0 && pos >= len) next_c = '\0'; \
next_c = '\0'; \ else next_c = (unsigned char)(*str_byte++); \
else \ } \
next_c = (unsigned char)(*str_byte++); \ } while(/*CONSTCOND*/0)
} \
else if (type == STR_USER) \
{ \
pos += pos_add_next; \
str_user_end = str_source->get_next_char(&next_c, &pos_add_next, \
str_source->context); \
} \
} while(/*CONSTCOND*/0)
#endif /* !TRE_WCHAR */ #endif /* !TRE_WCHAR */

File diff suppressed because it is too large Load Diff

View File

@ -205,15 +205,6 @@ static int tre_match(
if (tnfa->have_backrefs || (eflags & REG_BACKTRACKING_MATCHER)) if (tnfa->have_backrefs || (eflags & REG_BACKTRACKING_MATCHER))
{ {
/* The regex has back references, use the backtracking matcher. */ /* The regex has back references, use the backtracking matcher. */
if (type == STR_USER)
{
const tre_str_source *source = string;
if (source->rewind == QSE_NULL || source->compare == QSE_NULL)
/* The backtracking matcher requires rewind and compare
capabilities from the input stream. */
return REG_BADPAT;
}
status = tre_tnfa_run_backtrack ( status = tre_tnfa_run_backtrack (
preg->mmgr, tnfa, string, (int)len, type, preg->mmgr, tnfa, string, (int)len, type,
tags, eflags, &eo); tags, eflags, &eo);
@ -266,15 +257,6 @@ int qse_tre_exec (
return qse_tre_execx (tre, str, (qse_size_t)-1, pmatch, nmatch, eflags); return qse_tre_execx (tre, str, (qse_size_t)-1, pmatch, nmatch, eflags);
} }
#if 0
int qse_tre_execsrc (
const regex_t *preg, const tre_str_source *str,
qse_size_t nmatch, regmatch_t pmatch[], int eflags)
{
return tre_match (preg, str, (unsigned)-1, STR_USER, nmatch, pmatch, eflags);
}
#endif
qse_tre_errnum_t qse_tre_geterrnum (qse_tre_t* tre) qse_tre_errnum_t qse_tre_geterrnum (qse_tre_t* tre)
{ {
return tre->errnum; return tre->errnum;

View File

@ -177,7 +177,6 @@ typedef qse_cint_t tre_cint_t;
#define regex_t qse_tre_t #define regex_t qse_tre_t
#define regmatch_t qse_tre_match_t #define regmatch_t qse_tre_match_t
#define reg_errcode_t qse_tre_errnum_t #define reg_errcode_t qse_tre_errnum_t
#define tre_str_source qse_tre_strsrc_t
#define REG_OK QSE_TRE_ENOERR #define REG_OK QSE_TRE_ENOERR
@ -278,7 +277,7 @@ typedef qse_pma_t* tre_mem_t;
typedef qse_ctype_t tre_ctype_t; typedef qse_ctype_t tre_ctype_t;
#define tre_isctype(c,t) QSE_ISCTYPE(c,t) #define tre_isctype(c,t) QSE_ISCTYPE(c,t)
typedef enum { STR_WIDE, STR_BYTE, STR_MBS, STR_USER } tre_str_type_t; typedef enum { STR_WIDE, STR_BYTE, STR_MBS } tre_str_type_t;
/* Returns number of bytes to add to (char *)ptr to make it /* Returns number of bytes to add to (char *)ptr to make it
properly aligned for the type. */ properly aligned for the type. */
@ -305,6 +304,9 @@ typedef struct tnfa_transition tre_tnfa_transition_t;
struct tnfa_transition struct tnfa_transition
{ {
/* Range of accepted characters. */ /* Range of accepted characters. */
/* QSE indicate that code_min .. code_max is not yet negated for ^ in a bracket */
int negate_range;
/* END QSE */
tre_cint_t code_min; tre_cint_t code_min;
tre_cint_t code_max; tre_cint_t code_max;
/* Pointer to the destination state. */ /* Pointer to the destination state. */