changed awk to handle IGNORECASE with a regular expression engine that doesn't have a run-time option but has a compile-time option

This commit is contained in:
hyung-hwan 2013-08-23 15:19:29 +00:00
parent 47677ca566
commit d841c9f62f
21 changed files with 1127 additions and 1236 deletions

View File

@ -110,21 +110,21 @@ typedef struct qse_awk_loc_t qse_awk_loc_t;
* Three common fields are:
* - type - type of a value from #qse_awk_val_type_t
* - ref - reference count
* - nstr - numeric string marker
* - stat - static value
* - nstr - numeric string marker, 1 -> long, 2 -> real
*/
#if QSE_SIZEOF_INT == 2
# define QSE_AWK_VAL_HDR \
unsigned int type: 3; \
unsigned int ref: 10; \
unsigned int stat: 1; \
unsigned int nstr: 2
#else
/*
#define QSE_AWK_VAL_HDR \
unsigned int type: 3; \
unsigned int ref: 26; \
unsigned int stat: 1; \
unsigned int nstr: 2
#endif
unsigned int nstr: 2;
*/
#define QSE_AWK_VAL_HDR \
qse_uintptr_t type: 3; \
qse_uintptr_t ref: ((QSE_SIZEOF_UINTPTR_T * 8) - 6); \
qse_uintptr_t stat: 1; \
qse_uintptr_t nstr: 2;
/**
* The qse_awk_val_t type is an abstract value type. A value commonly contains:
@ -191,7 +191,7 @@ struct qse_awk_val_rex_t
{
QSE_AWK_VAL_HDR;
qse_xstr_t str;
void* code;
void* code[2];
};
typedef struct qse_awk_val_rex_t qse_awk_val_rex_t;
@ -2388,7 +2388,7 @@ QSE_EXPORT qse_awk_val_t* qse_awk_rtx_makenstrvalwithcstr (
QSE_EXPORT qse_awk_val_t* qse_awk_rtx_makerexval (
qse_awk_rtx_t* rtx,
const qse_cstr_t* str,
void* code
void* code[2]
);
/**

View File

@ -99,15 +99,6 @@ enum qse_tre_eflag_t
QSE_TRE_NOTEOL = (1 << 2)
};
typedef struct qse_tre_strsrc_t qse_tre_strsrc_t;
struct qse_tre_strsrc_t
{
int (*get_next_char) (qse_char_t *c, unsigned int* pos_add, void* context);
void (*rewind)(qse_size_t pos, void *context);
int (*compare)(qse_size_t pos1, qse_size_t pos2, qse_size_t len, void* context);
void* context;
};
#ifdef __cplusplus
extern "C" {
#endif

View File

@ -307,8 +307,8 @@ struct qse_awk_rtx_t
struct
{
void* rs;
void* fs;
void* rs[2];
void* fs[2];
int ignorecase;
qse_long_t nr;

View File

@ -624,7 +624,7 @@ static int fnc_substr (qse_awk_rtx_t* rtx, const qse_awk_fnc_info_t* fi)
return 0;
}
static int fnc_split (qse_awk_rtx_t* run, const qse_awk_fnc_info_t* fi)
static int fnc_split (qse_awk_rtx_t* rtx, const qse_awk_fnc_info_t* fi)
{
qse_size_t nargs;
qse_awk_val_t* a0, * a1, * a2, * t1, * t2;
@ -642,12 +642,12 @@ static int fnc_split (qse_awk_rtx_t* run, const qse_awk_fnc_info_t* fi)
qse_awk_errnum_t errnum;
int x;
nargs = qse_awk_rtx_getnargs (run);
nargs = qse_awk_rtx_getnargs (rtx);
QSE_ASSERT (nargs >= 2 && nargs <= 3);
a0 = qse_awk_rtx_getarg (run, 0);
a1 = qse_awk_rtx_getarg (run, 1);
a2 = (nargs >= 3)? qse_awk_rtx_getarg (run, 2): QSE_NULL;
a0 = qse_awk_rtx_getarg (rtx, 0);
a1 = qse_awk_rtx_getarg (rtx, 1);
a2 = (nargs >= 3)? qse_awk_rtx_getarg (rtx, 2): QSE_NULL;
QSE_ASSERT (a1->type == QSE_AWK_VAL_REF);
@ -658,7 +658,7 @@ static int fnc_split (qse_awk_rtx_t* run, const qse_awk_fnc_info_t* fi)
}
else
{
str.ptr = qse_awk_rtx_valtostrdup (run, a0, &str.len);
str.ptr = qse_awk_rtx_valtostrdup (rtx, a0, &str.len);
if (str.ptr == QSE_NULL) return -1;
str_free = (qse_char_t*)str.ptr;
}
@ -666,7 +666,7 @@ static int fnc_split (qse_awk_rtx_t* run, const qse_awk_fnc_info_t* fi)
if (a2 == QSE_NULL)
{
/* get the value from FS */
t1 = qse_awk_rtx_getgbl (run, QSE_AWK_GBL_FS);
t1 = qse_awk_rtx_getgbl (rtx, QSE_AWK_GBL_FS);
if (t1->type == QSE_AWK_VAL_NIL)
{
fs.ptr = QSE_T(" ");
@ -679,17 +679,17 @@ static int fnc_split (qse_awk_rtx_t* run, const qse_awk_fnc_info_t* fi)
}
else
{
fs.ptr = qse_awk_rtx_valtostrdup (run, t1, &fs.len);
fs.ptr = qse_awk_rtx_valtostrdup (rtx, t1, &fs.len);
if (fs.ptr == QSE_NULL) goto oops;
fs_free = (qse_char_t*)fs.ptr;
}
if (fs.len > 1) fs_rex = run->gbl.fs;
if (fs.len > 1) fs_rex = rtx->gbl.fs[rtx->gbl.ignorecase];
}
else if (a2->type == QSE_AWK_VAL_REX)
{
/* the third parameter is a regular expression */
fs_rex = ((qse_awk_val_rex_t*)a2)->code;
fs_rex = ((qse_awk_val_rex_t*)a2)->code[rtx->gbl.ignorecase];
/* make the loop below to take fs_rex by
* setting fs_len greater than 1*/
@ -705,30 +705,36 @@ static int fnc_split (qse_awk_rtx_t* run, const qse_awk_fnc_info_t* fi)
}
else
{
fs.ptr = qse_awk_rtx_valtostrdup (run, a2, &fs.len);
fs.ptr = qse_awk_rtx_valtostrdup (rtx, a2, &fs.len);
if (fs.ptr == QSE_NULL) goto oops;
fs_free = (qse_char_t*)fs.ptr;
}
if (fs.len > 1)
{
fs_rex = qse_awk_buildrex (
run->awk, fs.ptr, fs.len, &errnum);
if (fs_rex == QSE_NULL)
int x;
if (rtx->gbl.ignorecase)
x = qse_awk_buildrex (rtx->awk, fs.ptr, fs.len, &errnum, QSE_NULL, &fs_rex);
else
x = qse_awk_buildrex (rtx->awk, fs.ptr, fs.len, &errnum, &fs_rex, QSE_NULL);
if (x <= -1)
{
qse_awk_rtx_seterrnum (run, errnum, QSE_NULL);
qse_awk_rtx_seterrnum (rtx, errnum, QSE_NULL);
goto oops;
}
fs_rex_free = fs_rex;
}
}
t1 = qse_awk_rtx_makemapval (run);
t1 = qse_awk_rtx_makemapval (rtx);
if (t1 == QSE_NULL) goto oops;
qse_awk_rtx_refupval (run, t1);
x = qse_awk_rtx_setrefval (run, a1, t1);
qse_awk_rtx_refdownval (run, t1);
qse_awk_rtx_refupval (rtx, t1);
x = qse_awk_rtx_setrefval (rtx, (qse_awk_val_ref_t*)a1, t1);
qse_awk_rtx_refdownval (rtx, t1);
if (x <= -1) goto oops;
/* fill the map with actual values */
@ -742,18 +748,18 @@ static int fnc_split (qse_awk_rtx_t* run, const qse_awk_fnc_info_t* fi)
if (fs.len <= 1)
{
p = qse_awk_rtx_strxntok (run,
p = qse_awk_rtx_strxntok (rtx,
p, str.len, fs.ptr, fs.len, &tok);
}
else
{
p = qse_awk_rtx_strxntokbyrex (
run, str.ptr, org_len, p, str.len,
rtx, str.ptr, org_len, p, str.len,
fs_rex, &tok, &errnum
);
if (p == QSE_NULL && errnum != QSE_AWK_ENOERR)
{
qse_awk_rtx_seterrnum (run, errnum, QSE_NULL);
qse_awk_rtx_seterrnum (rtx, errnum, QSE_NULL);
goto oops;
}
}
@ -768,42 +774,54 @@ static int fnc_split (qse_awk_rtx_t* run, const qse_awk_fnc_info_t* fi)
/* create the field string - however, the split function must
* create a numeric string if the string is a number */
/*t2 = qse_awk_rtx_makestrvalwithcstr (run, &tok);*/
t2 = qse_awk_rtx_makenstrvalwithcstr (run, &tok);
/*t2 = qse_awk_rtx_makestrvalwithcstr (rtx, &tok);*/
t2 = qse_awk_rtx_makenstrvalwithcstr (rtx, &tok);
if (t2 == QSE_NULL) goto oops;
/* put it into the map */
key_len = qse_awk_longtostr (
run->awk, ++nflds, 10, QSE_NULL, key_buf, QSE_COUNTOF(key_buf));
rtx->awk, ++nflds, 10, QSE_NULL, key_buf, QSE_COUNTOF(key_buf));
QSE_ASSERT (key_len != (qse_size_t)-1);
if (qse_awk_rtx_setmapvalfld (
run, t1, key_buf, key_len, t2) == QSE_NULL)
rtx, t1, key_buf, key_len, t2) == QSE_NULL)
{
qse_awk_rtx_refupval (run, t2);
qse_awk_rtx_refdownval (run, t2);
qse_awk_rtx_refupval (rtx, t2);
qse_awk_rtx_refdownval (rtx, t2);
goto oops;
}
str.len = str_left - (p - str.ptr);
}
if (str_free) QSE_AWK_FREE (run->awk, str_free);
if (fs_free) QSE_AWK_FREE (run->awk, fs_free);
if (fs_rex_free) qse_awk_freerex (run->awk, fs_rex_free);
if (str_free) QSE_AWK_FREE (rtx->awk, str_free);
if (fs_free) QSE_AWK_FREE (rtx->awk, fs_free);
if (fs_rex_free)
{
if (rtx->gbl.ignorecase)
qse_awk_freerex (rtx->awk, QSE_NULL, fs_rex_free);
else
qse_awk_freerex (rtx->awk, fs_rex_free, QSE_NULL);
}
/*nflds--;*/
t1 = qse_awk_rtx_makeintval (run, nflds);
t1 = qse_awk_rtx_makeintval (rtx, nflds);
if (t1 == QSE_NULL) return -1;
qse_awk_rtx_setretval (run, t1);
qse_awk_rtx_setretval (rtx, t1);
return 0;
oops:
if (str_free) QSE_AWK_FREE (run->awk, str_free);
if (fs_free) QSE_AWK_FREE (run->awk, fs_free);
if (fs_rex_free) qse_awk_freerex (run->awk, fs_rex_free);
if (str_free) QSE_AWK_FREE (rtx->awk, str_free);
if (fs_free) QSE_AWK_FREE (rtx->awk, fs_free);
if (fs_rex_free)
{
if (rtx->gbl.ignorecase)
qse_awk_freerex (rtx->awk, QSE_NULL, fs_rex_free);
else
qse_awk_freerex (rtx->awk, fs_rex_free, QSE_NULL);
}
return -1;
}
@ -832,7 +850,7 @@ static int fnc_tolower (qse_awk_rtx_t* run, const qse_awk_fnc_info_t* fi)
for (i = 0; i < str.len; i++) str.ptr[i] = QSE_AWK_TOLOWER (run->awk, str.ptr[i]);
r = qse_awk_rtx_makestrvalwithcstr (run, &str);
r = qse_awk_rtx_makestrvalwithcstr (run, (qse_cstr_t*)&str);
if (r == QSE_NULL)
{
if (a0->type != QSE_AWK_VAL_STR) QSE_AWK_FREE (run->awk, str.ptr);
@ -869,7 +887,7 @@ static int fnc_toupper (qse_awk_rtx_t* run, const qse_awk_fnc_info_t* fi)
for (i = 0; i < str.len; i++) str.ptr[i] = QSE_AWK_TOUPPER (run->awk, str.ptr[i]);
r = qse_awk_rtx_makestrvalwithcstr (run, &str);
r = qse_awk_rtx_makestrvalwithcstr (run, (qse_cstr_t*)&str);
if (r == QSE_NULL)
{
if (a0->type != QSE_AWK_VAL_STR) QSE_AWK_FREE (run->awk, str.ptr);
@ -881,11 +899,10 @@ static int fnc_toupper (qse_awk_rtx_t* run, const qse_awk_fnc_info_t* fi)
return 0;
}
static int __substitute (qse_awk_rtx_t* run, qse_long_t max_count)
{
qse_size_t nargs;
qse_awk_val_t* a0, * a1, * a2, ** a2_ref, * v;
qse_awk_val_t* a0, * a1, * a2, * v;
qse_cstr_t s0, s1, s2;
const qse_char_t* s2_end;
@ -898,7 +915,7 @@ static int __substitute (qse_awk_rtx_t* run, qse_long_t max_count)
void* rex_free = QSE_NULL;
qse_str_t new;
int new_inited = 0, opt;
int new_inited = 0;
qse_cstr_t mat, pmat, cur;
@ -915,7 +932,7 @@ static int __substitute (qse_awk_rtx_t* run, qse_long_t max_count)
if (a0->type == QSE_AWK_VAL_REX)
{
rex = ((qse_awk_val_rex_t*)a0)->code;
rex = ((qse_awk_val_rex_t*)a0)->code[run->gbl.ignorecase];
}
else if (a0->type == QSE_AWK_VAL_STR)
{
@ -964,10 +981,14 @@ static int __substitute (qse_awk_rtx_t* run, qse_long_t max_count)
if (a0->type != QSE_AWK_VAL_REX)
{
qse_awk_errnum_t errnum;
int x;
rex = qse_awk_buildrex (
run->awk, s0.ptr, s0.len, &errnum);
if (rex == QSE_NULL)
if (run->gbl.ignorecase)
x = qse_awk_buildrex (run->awk, s0.ptr, s0.len, &errnum, QSE_NULL, &rex);
else
x = qse_awk_buildrex (run->awk, s0.ptr, s0.len, &errnum, &rex, QSE_NULL);
if (x <= -1)
{
qse_awk_rtx_seterrnum (run, errnum, QSE_NULL);
goto oops;
@ -976,8 +997,6 @@ static int __substitute (qse_awk_rtx_t* run, qse_long_t max_count)
rex_free = rex;
}
opt = (run->gbl.ignorecase)? QSE_REX_IGNORECASE: 0;
s2_end = s2.ptr + s2.len;
cur.ptr = s2.ptr;
cur.len = s2.len;
@ -997,7 +1016,8 @@ static int __substitute (qse_awk_rtx_t* run, qse_long_t max_count)
if (max_count == 0 || sub_count < max_count)
{
n = qse_awk_matchrex (
run->awk, rex, opt, &s2, &cur, &mat, &errnum
run->awk, rex, run->gbl.ignorecase,
&s2, &cur, &mat, &errnum
);
}
else n = 0;
@ -1085,7 +1105,10 @@ static int __substitute (qse_awk_rtx_t* run, qse_long_t max_count)
if (rex_free)
{
qse_awk_freerex (run->awk, rex_free);
if (run->gbl.ignorecase)
qse_awk_freerex (run->awk, QSE_NULL, rex_free);
else
qse_awk_freerex (run->awk, rex_free, QSE_NULL);
rex_free = QSE_NULL;
}
@ -1104,7 +1127,7 @@ static int __substitute (qse_awk_rtx_t* run, qse_long_t max_count)
v = qse_awk_rtx_makestrvalwithcstr (run, QSE_STR_CSTR(&new));
if (v == QSE_NULL) goto oops;
qse_awk_rtx_refupval (run, v);
n = qse_awk_rtx_setrefval (run, a2, v);
n = qse_awk_rtx_setrefval (run, (qse_awk_val_ref_t*)a2, v);
qse_awk_rtx_refdownval (run, v);
if (n <= -1) goto oops;
}
@ -1123,7 +1146,13 @@ static int __substitute (qse_awk_rtx_t* run, qse_long_t max_count)
return 0;
oops:
if (rex_free) qse_awk_freerex (run->awk, rex_free);
if (rex_free)
{
if (run->gbl.ignorecase)
qse_awk_freerex (run->awk, QSE_NULL, rex_free);
else
qse_awk_freerex (run->awk, rex_free, QSE_NULL);
}
if (new_inited) qse_str_fini (&new);
if (s2_free) QSE_AWK_FREE (run->awk, s2_free);
if (s1_free) QSE_AWK_FREE (run->awk, s1_free);
@ -1145,13 +1174,11 @@ static int fnc_match (qse_awk_rtx_t* rtx, const qse_awk_fnc_info_t* fi)
{
qse_size_t nargs;
qse_awk_val_t* a0, * a1;
qse_char_t* str0, * str1;
qse_size_t len0, len1;
qse_char_t* str0;
qse_size_t len0;
qse_long_t idx, start = 1;
void* rex;
int n;
qse_cstr_t mat;
qse_awk_errnum_t errnum;
nargs = qse_awk_rtx_getnargs (rtx);
QSE_ASSERT (nargs >= 2 && nargs <= 3);
@ -1195,42 +1222,6 @@ static int fnc_match (qse_awk_rtx_t* rtx, const qse_awk_fnc_info_t* fi)
if (str0 == QSE_NULL) return -1;
}
if (a1->type == QSE_AWK_VAL_REX)
{
rex = ((qse_awk_val_rex_t*)a1)->code;
}
else
{
qse_awk_errnum_t errnum;
if (a1->type == QSE_AWK_VAL_STR)
{
str1 = ((qse_awk_val_str_t*)a1)->val.ptr;
len1 = ((qse_awk_val_str_t*)a1)->val.len;
}
else
{
str1 = qse_awk_rtx_valtostrdup (rtx, a1, &len1);
if (str1 == QSE_NULL)
{
if (a0->type != QSE_AWK_VAL_STR)
QSE_AWK_FREE (rtx->awk, str0);
return -1;
}
}
rex = qse_awk_buildrex (rtx->awk, str1, len1, &errnum);
if (rex == QSE_NULL)
{
if (a0->type != QSE_AWK_VAL_STR)
QSE_AWK_FREE (rtx->awk, str0);
qse_awk_rtx_seterrnum (rtx, errnum, QSE_NULL);
return -1;
}
if (a1->type != QSE_AWK_VAL_STR) QSE_AWK_FREE (rtx->awk, str1);
}
if (start == 0) start = 1;
else if (start < 0) start = len0 + start + 1;
@ -1242,21 +1233,12 @@ static int fnc_match (qse_awk_rtx_t* rtx, const qse_awk_fnc_info_t* fi)
/*TODO: must use str0,len0? */
tmp.ptr = str0 + start - 1;
tmp.len = len0 - start + 1;
n = qse_awk_matchrex (
rtx->awk, rex,
(rtx->gbl.ignorecase? QSE_REX_IGNORECASE: 0),
&tmp, &tmp, &mat, &errnum
);
n = qse_awk_rtx_matchrex (rtx, a1, &tmp, &tmp, &mat);
if (n <= -1) return -1;
}
if (a0->type != QSE_AWK_VAL_STR) QSE_AWK_FREE (rtx->awk, str0);
if (a1->type != QSE_AWK_VAL_REX) qse_awk_freerex (rtx->awk, rex);
if (n <= -1)
{
qse_awk_rtx_seterrnum (rtx, errnum, QSE_NULL);
return -1;
}
idx = (n == 0)? 0: ((qse_long_t)(mat.ptr-str0) + 1);
@ -1338,7 +1320,7 @@ static int fnc_sprintf (qse_awk_rtx_t* run, const qse_awk_fnc_info_t* fi)
if (a0->type != QSE_AWK_VAL_STR) QSE_AWK_FREE (run->awk, cs0.ptr);
if (x.ptr == QSE_NULL) goto oops;
a0 = qse_awk_rtx_makestrvalwithcstr (run, &x);
a0 = qse_awk_rtx_makestrvalwithcstr (run, (qse_cstr_t*)&x);
if (a0 == QSE_NULL) goto oops;
qse_str_fini (&fbu);

View File

@ -20,7 +20,7 @@
#include "awk.h"
#define USE_REX
/*#define USE_REX */
#if defined(USE_REX)
# include <qse/cmn/rex.h>
@ -892,8 +892,7 @@ qse_char_t* qse_awk_rtx_strxntokbyrex (
while (cursub.len > 0)
{
n = qse_awk_matchrex (
rtx->awk, rex,
((rtx->gbl.ignorecase)? QSE_REX_IGNORECASE: 0),
rtx->awk, rex, rtx->gbl.ignorecase,
&s, &cursub, &match, errnum);
if (n == -1) return QSE_NULL;
if (n == 0)
@ -1090,36 +1089,46 @@ static QSE_INLINE int rexerr_to_errnum (int err)
}
}
void* qse_awk_buildrex (
qse_awk_t* awk, const qse_char_t* ptn,
qse_size_t len, qse_awk_errnum_t* errnum)
int qse_awk_buildrex (
qse_awk_t* awk, const qse_char_t* ptn, qse_size_t len,
qse_awk_errnum_t* errnum, void** code, void** icode)
{
#if defined(USE_REX)
qse_rex_errnum_t err;
void* p;
if (code || icode)
{
p = qse_buildrex (
awk->mmgr, awk->opt.depth.s.rex_build,
((awk->opt.trait & QSE_AWK_REXBOUND)? 0: QSE_REX_NOBOUND),
ptn, len, &err
);
if (p == QSE_NULL) *errnum = rexerr_to_errnum(err);
return p;
if (p == QSE_NULL)
{
*errnum = rexerr_to_errnum(err);
return -1;
}
if (code) *code = p;
if (icode) *icode = p;
}
return 0;
#else
qse_tre_t* tre;
qse_tre_t* tre = QSE_NULL;
qse_tre_t* itre = QSE_NULL;
int opt = QSE_TRE_EXTENDED;
if (code)
{
tre = qse_tre_open (awk->mmgr, 0);
if (tre == QSE_NULL)
{
*errnum = QSE_AWK_ENOMEM;
return QSE_NULL;
return -1;
}
/* ignorecase is a compile option for TRE */
#if 0 /* TODO */
if (ignorecase) opt |= QSE_TRE_IGNORECASE;
#endif
if (!(awk->opt.trait & QSE_AWK_REXBOUND)) opt |= QSE_TRE_NOBOUND;
if (qse_tre_compx (tre, ptn, len, QSE_NULL, opt) <= -1)
@ -1133,13 +1142,44 @@ void* qse_awk_buildrex (
*errnum = (QSE_TRE_ERRNUM(tre) == QSE_TRE_ENOMEM)?
QSE_AWK_ENOMEM: QSE_AWK_EREXBL;
qse_tre_close (tre);
return QSE_NULL;
return -1;
}
}
return tre;
if (icode)
{
itre = qse_tre_open (awk->mmgr, 0);
if (itre == QSE_NULL)
{
if (tre) qse_tre_close (tre);
*errnum = QSE_AWK_ENOMEM;
return -1;
}
/* ignorecase is a compile option for TRE */
if (qse_tre_compx (itre, ptn, len, QSE_NULL, opt | QSE_TRE_IGNORECASE) <= -1)
{
#if 0 /* TODO */
if (QSE_TRE_ERRNUM(tre) == QSE_TRE_ENOMEM) *errnum = QSE_AWK_ENOMEM;
else
SETERR1 (awk, QSE_AWK_EREXBL, str->ptr, str->len, loc);
#endif
*errnum = (QSE_TRE_ERRNUM(tre) == QSE_TRE_ENOMEM)?
QSE_AWK_ENOMEM: QSE_AWK_EREXBL;
qse_tre_close (itre);
if (tre) qse_tre_close (tre);
return -1;
}
}
if (code) *code = tre;
if (icode) *icode = itre;
return 0;
#endif
}
#if !defined(USE_REX)
static int matchtre (
@ -1192,7 +1232,7 @@ static int matchtre (
#endif
int qse_awk_matchrex (
qse_awk_t* awk, void* code, int option,
qse_awk_t* awk, void* code, int icase,
const qse_cstr_t* str, const qse_cstr_t* substr,
qse_cstr_t* match, qse_awk_errnum_t* errnum)
{
@ -1201,8 +1241,8 @@ int qse_awk_matchrex (
qse_rex_errnum_t err;
x = qse_matchrex (
awk->mmgr, awk->opt.depth.s.rex_match,
code, option, str, substr, match, &err);
awk->mmgr, awk->opt.depth.s.rex_match, code,
(icase? QSE_REX_IGNORECASE: 0), str, substr, match, &err);
if (x <= -1) *errnum = rexerr_to_errnum(err);
return x;
#else
@ -1218,7 +1258,9 @@ int qse_awk_matchrex (
#endif
}
void qse_awk_freerex (qse_awk_t* awk, void* code)
void qse_awk_freerex (qse_awk_t* awk, void* code, void* icode)
{
if (code)
{
#if defined(USE_REX)
qse_freerex ((awk)->mmgr, code);
@ -1227,6 +1269,93 @@ void qse_awk_freerex (qse_awk_t* awk, void* code)
#endif
}
if (icode && icode != code)
{
#if defined(USE_REX)
qse_freerex ((awk)->mmgr, icode);
#else
qse_tre_close (icode);
#endif
}
}
int qse_awk_rtx_matchrex (
qse_awk_rtx_t* rtx, qse_awk_val_t* val,
const qse_cstr_t* str, const qse_cstr_t* substr, qse_cstr_t* match)
{
void* code;
int icase, x;
qse_awk_errnum_t awkerr;
#if defined(USE_REX)
qse_rex_errnum_t rexerr;
#endif
icase = rtx->gbl.ignorecase;
if (val->type == QSE_AWK_VAL_REX)
{
code = ((qse_awk_val_rex_t*)val)->code[icase];
}
else if (val->type == QSE_AWK_VAL_STR)
{
/* build a regular expression */
qse_awk_val_str_t* strv = (qse_awk_val_str_t*)val;
x = icase? qse_awk_buildrex (rtx->awk, strv->val.ptr, strv->val.len, &awkerr, QSE_NULL, &code):
qse_awk_buildrex (rtx->awk, strv->val.ptr, strv->val.len, &awkerr, &code, QSE_NULL);
if (x <= -1)
{
qse_awk_rtx_seterrnum (rtx, awkerr, QSE_NULL);
return -1;
}
}
else
{
/* convert to a string and build a regular expression */
qse_xstr_t tmp;
tmp.ptr = qse_awk_rtx_valtostrdup (rtx, val, &tmp.len);
if (tmp.ptr == QSE_NULL) return -1;
x = icase? qse_awk_buildrex (rtx->awk, tmp.ptr, tmp.len, &awkerr, QSE_NULL, &code):
qse_awk_buildrex (rtx->awk, tmp.ptr, tmp.len, &awkerr, &code, QSE_NULL);
qse_awk_rtx_freemem (rtx, tmp.ptr);
if (x <= -1)
{
qse_awk_rtx_seterrnum (rtx, awkerr, QSE_NULL);
return -1;
}
}
#if defined(USE_REX)
x = qse_matchrex (
rtx->awk->mmgr, rtx->awk->opt.depth.s.rex_match,
code, (icase? QSE_REX_IGNORECASE: 0),
str, substr, match, &rexerr);
if (x <= -1) qse_awk_rtx_seterrnum (rtx, rexerr_to_errnum(rexerr), QSE_NULL);
#else
x = matchtre (
rtx->awk, code,
((str->ptr == substr->ptr)? QSE_TRE_BACKTRACKING: (QSE_TRE_BACKTRACKING | QSE_TRE_NOTBOL)),
substr, match, QSE_NULL, &awkerr
);
if (x <= -1) qse_awk_rtx_seterrnum (rtx, awkerr, QSE_NULL);
#endif
if (val->type == QSE_AWK_VAL_REX)
{
/* nothing to free */
}
else
{
if (icase)
qse_awk_freerex (rtx->awk, QSE_NULL, code);
else
qse_awk_freerex (rtx->awk, code, QSE_NULL);
}
return x;
}
void* qse_awk_rtx_allocmem (qse_awk_rtx_t* rtx, qse_size_t size)
{
void* ptr = QSE_AWK_ALLOC (rtx->awk, size);

View File

@ -63,20 +63,28 @@ qse_char_t* qse_awk_rtx_strxnfld (
qse_cstr_t* tok
);
void* qse_awk_buildrex (
int qse_awk_buildrex (
qse_awk_t* awk,
const qse_char_t* ptn,
qse_size_t len,
qse_awk_errnum_t* errnum
qse_awk_errnum_t* errnum,
void** code,
void** icode
);
int qse_awk_matchrex (
qse_awk_t* awk, void* code, int option,
qse_awk_t* awk, void* code, int icase,
const qse_cstr_t* str, const qse_cstr_t* substr,
qse_cstr_t* match, qse_awk_errnum_t* errnum
);
void qse_awk_freerex (qse_awk_t* awk, void* code);
void qse_awk_freerex (qse_awk_t* awk, void* code, void* icode);
int qse_awk_rtx_matchrex (
qse_awk_rtx_t* rtx, qse_awk_val_t* val,
const qse_cstr_t* str, const qse_cstr_t* substr,
qse_cstr_t* match
);
int qse_awk_sprintflt (
qse_awk_t* awk,

View File

@ -4351,9 +4351,7 @@ static qse_awk_nde_t* parse_primary_rex (qse_awk_t* awk, const qse_awk_loc_t* x
nde->str.ptr = qse_awk_cstrdup (awk, QSE_STR_CSTR(awk->tok.name));
if (nde->str.ptr == QSE_NULL) goto oops;
nde->code = qse_awk_buildrex (
awk, QSE_STR_PTR(awk->tok.name), QSE_STR_LEN(awk->tok.name), &errnum);
if (nde->code == QSE_NULL)
if (qse_awk_buildrex (awk, QSE_STR_PTR(awk->tok.name), QSE_STR_LEN(awk->tok.name), &errnum, &nde->code[0], &nde->code[1]) <= -1)
{
SETERR_LOC (awk, errnum, xloc);
goto oops;
@ -4365,7 +4363,7 @@ static qse_awk_nde_t* parse_primary_rex (qse_awk_t* awk, const qse_awk_loc_t* x
oops:
QSE_ASSERT (nde != QSE_NULL);
if (nde->code) qse_awk_freerex (awk, nde->code);
if (nde->code[0]) qse_awk_freerex (awk, nde->code[0], nde->code[1]);
if (nde->str.ptr) QSE_AWK_FREE (awk, nde->str.ptr);
QSE_AWK_FREE (awk, nde);
return QSE_NULL;

View File

@ -122,13 +122,8 @@ static int split_record (qse_awk_rtx_t* rtx)
}
else
{
qse_awk_rtx_valtostr_out_t out;
out.type = QSE_AWK_RTX_VALTOSTR_CPLDUP;
if (qse_awk_rtx_valtostr (rtx, fs, &out) <= -1) return -1;
fs_ptr = out.u.cpldup.ptr;
fs_len = out.u.cpldup.len;
fs_ptr = qse_awk_rtx_valtostrdup (rtx, fs, &fs_len);
if (fs_ptr == QSE_NULL) return -1;
fs_free = fs_ptr;
}
@ -178,7 +173,7 @@ static int split_record (qse_awk_rtx_t* rtx)
QSE_STR_PTR(&rtx->inrec.line),
QSE_STR_LEN(&rtx->inrec.line),
p, len,
rtx->gbl.fs, &tok, &errnum
rtx->gbl.fs[rtx->gbl.ignorecase], &tok, &errnum
);
if (p == QSE_NULL && errnum != QSE_AWK_ENOERR)
{
@ -268,7 +263,7 @@ static int split_record (qse_awk_rtx_t* rtx)
QSE_STR_PTR(&rtx->inrec.line),
QSE_STR_LEN(&rtx->inrec.line),
p, len,
rtx->gbl.fs, &tok, &errnum
rtx->gbl.fs[rtx->gbl.ignorecase], &tok, &errnum
);
if (p == QSE_NULL && errnum != QSE_AWK_ENOERR)
{

View File

@ -223,12 +223,12 @@ static QSE_INLINE int match_long_rs (
qse_awk_errnum_t errnum;
int ret;
QSE_ASSERT (run->gbl.rs != QSE_NULL);
QSE_ASSERT (run->gbl.rs[0] != QSE_NULL);
QSE_ASSERT (run->gbl.rs[1] != QSE_NULL);
ret = qse_awk_matchrex (
run->awk, run->gbl.rs,
((run->gbl.ignorecase)? QSE_REX_IGNORECASE: 0),
QSE_STR_CSTR(buf), QSE_STR_CSTR(buf),
run->awk, run->gbl.rs[run->gbl.ignorecase],
run->gbl.ignorecase, QSE_STR_CSTR(buf), QSE_STR_CSTR(buf),
&match, &errnum);
if (ret <= -1)
{

View File

@ -464,12 +464,11 @@ static int set_global (
if (fs_len > 1 && !(fs_len == 5 && fs_ptr[0] == QSE_T('?')))
{
void* rex;
void* rex, * irex;
qse_awk_errnum_t errnum;
rex = qse_awk_buildrex (
rtx->awk, fs_ptr, fs_len, &errnum);
if (rex == QSE_NULL)
if (qse_awk_buildrex (rtx->awk, fs_ptr, fs_len, &errnum, &rex, &irex) <= -1)
{
SETERR_COD (rtx, errnum);
if (val->type != QSE_AWK_VAL_STR)
@ -477,14 +476,14 @@ static int set_global (
return -1;
}
if (rtx->gbl.fs != QSE_NULL)
qse_awk_freerex (rtx->awk, rtx->gbl.fs);
if (rtx->gbl.fs[0])
qse_awk_freerex (rtx->awk, rtx->gbl.fs[0], rtx->gbl.fs[1]);
rtx->gbl.fs = rex;
rtx->gbl.fs[0] = rex;
rtx->gbl.fs[1] = irex;
}
if (val->type != QSE_AWK_VAL_STR)
QSE_AWK_FREE (rtx->awk, fs_ptr);
if (val->type != QSE_AWK_VAL_STR) QSE_AWK_FREE (rtx->awk, fs_ptr);
break;
}
@ -613,33 +612,31 @@ static int set_global (
rss = out.u.cpldup;
}
if (rtx->gbl.rs)
if (rtx->gbl.rs[0])
{
qse_awk_freerex (rtx->awk, rtx->gbl.rs);
rtx->gbl.rs = QSE_NULL;
qse_awk_freerex (rtx->awk, rtx->gbl.rs[0], rtx->gbl.rs[1]);
rtx->gbl.rs[0] = QSE_NULL;
rtx->gbl.rs[1] = QSE_NULL;
}
if (rss.len > 1)
{
void* rex;
void* rex, * irex;
qse_awk_errnum_t errnum;
/* compile the regular expression */
rex = qse_awk_buildrex (
rtx->awk, rss.ptr, rss.len, &errnum);
if (rex == QSE_NULL)
if (qse_awk_buildrex (rtx->awk, rss.ptr, rss.len, &errnum, &rex, &irex) <= -1)
{
SETERR_COD (rtx, errnum);
if (val->type != QSE_AWK_VAL_STR)
QSE_AWK_FREE (rtx->awk, rss.ptr);
if (val->type != QSE_AWK_VAL_STR) QSE_AWK_FREE (rtx->awk, rss.ptr);
return -1;
}
rtx->gbl.rs = rex;
rtx->gbl.rs[0] = rex;
rtx->gbl.rs[1] = irex;
}
if (val->type != QSE_AWK_VAL_STR)
QSE_AWK_FREE (rtx->awk, rss.ptr);
if (val->type != QSE_AWK_VAL_STR) QSE_AWK_FREE (rtx->awk, rss.ptr);
break;
}
@ -1016,8 +1013,10 @@ static int init_rtx (qse_awk_rtx_t* rtx, qse_awk_t* awk, qse_awk_rio_t* rio)
rtx->rio.chain = QSE_NULL;
}
rtx->gbl.rs = QSE_NULL;
rtx->gbl.fs = QSE_NULL;
rtx->gbl.rs[0] = QSE_NULL;
rtx->gbl.rs[1] = QSE_NULL;
rtx->gbl.fs[0] = QSE_NULL;
rtx->gbl.fs[1] = QSE_NULL;
rtx->gbl.ignorecase = 0;
return 0;
@ -1051,15 +1050,17 @@ static void fini_rtx (qse_awk_rtx_t* rtx, int fini_globals)
qse_awk_rtx_cleario (rtx);
QSE_ASSERT (rtx->rio.chain == QSE_NULL);
if (rtx->gbl.rs)
if (rtx->gbl.rs[0])
{
qse_awk_freerex (rtx->awk, rtx->gbl.rs);
rtx->gbl.rs = QSE_NULL;
qse_awk_freerex (rtx->awk, rtx->gbl.rs[0], rtx->gbl.rs[1]);
rtx->gbl.rs[0] = QSE_NULL;
rtx->gbl.rs[1] = QSE_NULL;
}
if (rtx->gbl.fs)
if (rtx->gbl.fs[0])
{
qse_awk_freerex (rtx->awk, rtx->gbl.fs);
rtx->gbl.fs = QSE_NULL;
qse_awk_freerex (rtx->awk, rtx->gbl.fs[0], rtx->gbl.fs[1]);
rtx->gbl.fs[0] = QSE_NULL;
rtx->gbl.fs[1] = QSE_NULL;
}
if (rtx->gbl.convfmt.ptr != QSE_NULL &&
@ -3208,7 +3209,6 @@ static qse_awk_val_t* eval_expression (qse_awk_rtx_t* rtx, qse_awk_nde_t* nde)
{
qse_awk_val_t* v;
int n;
qse_awk_errnum_t errnum;
#if 0
if (rtx->exit_level >= EXIT_GLOBAL)
@ -3230,9 +3230,11 @@ static qse_awk_val_t* eval_expression (qse_awk_rtx_t* rtx, qse_awk_nde_t* nde)
qse_cstr_t vs;
int opt = 0;
if (((qse_awk_rtx_t*)rtx)->gbl.ignorecase)
opt = QSE_REX_IGNORECASE;
/* special case where a regular expression is used in
* without any match operators:
* print /abc/;
* perform match against $0.
*/
qse_awk_rtx_refupval (rtx, v);
if (rtx->inrec.d0->type == QSE_AWK_VAL_NIL)
@ -3253,23 +3255,13 @@ static qse_awk_val_t* eval_expression (qse_awk_rtx_t* rtx, qse_awk_nde_t* nde)
vs.len = ((qse_awk_val_str_t*)rtx->inrec.d0)->val.len;
}
n = qse_awk_matchrex (
((qse_awk_rtx_t*)rtx)->awk,
((qse_awk_val_rex_t*)v)->code,
opt, &vs, &vs, QSE_NULL, &errnum
);
n = qse_awk_rtx_matchrex (rtx, v, &vs, &vs, QSE_NULL);
if (n <= -1)
{
ADJERR_LOC (rtx, &nde->loc);
qse_awk_rtx_refdownval (rtx, v);
/* matchrex should never set the error number
* whose message contains a formatting
* character. otherwise, the following way of
* setting the error information may not work */
SETERR_LOC (rtx, errnum, &nde->loc);
return QSE_NULL;
}
qse_awk_rtx_refdownval (rtx, v);
v = qse_awk_rtx_makeintval (rtx, (n != 0));
@ -4974,115 +4966,50 @@ static qse_awk_val_t* eval_binop_match0 (
{
qse_awk_val_t* res;
int n;
qse_awk_errnum_t errnum;
void* rex_code;
if (right->type == QSE_AWK_VAL_REX)
{
rex_code = ((qse_awk_val_rex_t*)right)->code;
}
else if (right->type == QSE_AWK_VAL_STR)
{
rex_code = qse_awk_buildrex (
rtx->awk,
((qse_awk_val_str_t*)right)->val.ptr,
((qse_awk_val_str_t*)right)->val.len, &errnum);
if (rex_code == QSE_NULL)
{
SETERR_LOC (rtx, errnum, rloc);
return QSE_NULL;
}
}
else
{
qse_awk_rtx_valtostr_out_t out;
out.type = QSE_AWK_RTX_VALTOSTR_CPLDUP;
if (qse_awk_rtx_valtostr (rtx, right, &out) <= -1) return QSE_NULL;
rex_code = qse_awk_buildrex (
rtx->awk, out.u.cpldup.ptr, out.u.cpldup.len, &errnum);
if (rex_code == QSE_NULL)
{
QSE_AWK_FREE (rtx->awk, out.u.cpldup.ptr);
SETERR_LOC (rtx, errnum, rloc);
return QSE_NULL;
}
QSE_AWK_FREE (rtx->awk, out.u.cpldup.ptr);
}
if (left->type == QSE_AWK_VAL_STR)
{
n = qse_awk_matchrex (
rtx->awk, rex_code,
((rtx->gbl.ignorecase)? QSE_REX_IGNORECASE: 0),
n = qse_awk_rtx_matchrex (
rtx, right,
xstr_to_cstr(&((qse_awk_val_str_t*)left)->val),
xstr_to_cstr(&((qse_awk_val_str_t*)left)->val),
QSE_NULL, &errnum);
if (n == -1)
xstr_to_cstr(&((qse_awk_val_str_t*)left)->val), QSE_NULL);
if (n <= -1)
{
if (right->type != QSE_AWK_VAL_REX)
qse_awk_freerex (rtx->awk, rex_code);
SETERR_LOC (rtx, errnum, lloc);
ADJERR_LOC (rtx, lloc);
return QSE_NULL;
}
res = qse_awk_rtx_makeintval (rtx, (n == ret));
if (res == QSE_NULL)
{
if (right->type != QSE_AWK_VAL_REX)
qse_awk_freerex (rtx->awk, rex_code);
ADJERR_LOC (rtx, lloc);
return QSE_NULL;
}
}
else
{
qse_awk_rtx_valtostr_out_t out;
qse_xstr_t out;
out.type = QSE_AWK_RTX_VALTOSTR_CPLDUP;
if (qse_awk_rtx_valtostr (rtx, left, &out) <= -1)
out.ptr = qse_awk_rtx_valtostrdup (rtx, left, &out.len);
if (out.ptr == QSE_NULL) return QSE_NULL;
n = qse_awk_rtx_matchrex (rtx, right, &out, &out, QSE_NULL);
QSE_AWK_FREE (rtx->awk, out.ptr);
if (n <= -1)
{
if (right->type != QSE_AWK_VAL_REX)
qse_awk_freerex (rtx->awk, rex_code);
return QSE_NULL;
}
n = qse_awk_matchrex (
rtx->awk, rex_code,
((rtx->gbl.ignorecase)? QSE_REX_IGNORECASE: 0),
xstr_to_cstr(&out.u.cpldup),
xstr_to_cstr(&out.u.cpldup),
QSE_NULL, &errnum
);
if (n == -1)
{
QSE_AWK_FREE (rtx->awk, out.u.cpldup.ptr);
if (right->type != QSE_AWK_VAL_REX)
qse_awk_freerex (rtx->awk, rex_code);
SETERR_LOC (rtx, errnum, lloc);
ADJERR_LOC (rtx, lloc);
return QSE_NULL;
}
res = qse_awk_rtx_makeintval (rtx, (n == ret));
if (res == QSE_NULL)
{
QSE_AWK_FREE (rtx->awk, out.u.cpldup.ptr);
if (right->type != QSE_AWK_VAL_REX)
qse_awk_freerex (rtx->awk, rex_code);
ADJERR_LOC (rtx, lloc);
return QSE_NULL;
}
QSE_AWK_FREE (rtx->awk, out.u.cpldup.ptr);
}
if (right->type != QSE_AWK_VAL_REX) qse_awk_freerex (rtx->awk, rex_code);
return res;
}
@ -6307,7 +6234,8 @@ static qse_awk_val_t* eval_rex (qse_awk_rtx_t* run, qse_awk_nde_t* nde)
val = qse_awk_rtx_makerexval (run,
&((qse_awk_nde_rex_t*)nde)->str,
((qse_awk_nde_rex_t*)nde)->code);
((qse_awk_nde_rex_t*)nde)->code
);
if (val == QSE_NULL) ADJERR_LOC (run, &nde->loc);
return val;

View File

@ -1305,7 +1305,8 @@ void qse_awk_clrpt (qse_awk_t* awk, qse_awk_nde_t* tree)
case QSE_AWK_NDE_REX:
{
qse_awk_freerex (awk, ((qse_awk_nde_rex_t*)p)->code);
qse_awk_nde_rex_t* rex = (qse_awk_nde_rex_t*)p;
qse_awk_freerex (awk, rex->code[0], rex->code[1]);
QSE_AWK_FREE (awk, ((qse_awk_nde_rex_t*)p)->str.ptr);
QSE_AWK_FREE (awk, p);
break;
@ -1318,8 +1319,7 @@ void qse_awk_clrpt (qse_awk_t* awk, qse_awk_nde_t* tree)
{
qse_awk_nde_var_t* px = (qse_awk_nde_var_t*)p;
QSE_ASSERT (px->idx == QSE_NULL);
if (px->id.name.ptr != QSE_NULL)
QSE_AWK_FREE (awk, px->id.name.ptr);
if (px->id.name.ptr) QSE_AWK_FREE (awk, px->id.name.ptr);
QSE_AWK_FREE (awk, p);
break;
}

View File

@ -155,7 +155,7 @@ struct qse_awk_nde_rex_t
{
QSE_AWK_NDE_HDR;
qse_xstr_t str;
void* code;
void* code[2]; /* [0]: case sensitive, [1]: case insensitive */
};
/* QSE_AWK_NDE_NAMED, QSE_AWK_NDE_GBL,

View File

@ -35,6 +35,7 @@ qse_awk_val_t* qse_awk_val_zls = (qse_awk_val_t*)&awk_zls;
static qse_awk_val_int_t awk_int[] =
{
/* type ref stat nstr val nde */
{ QSE_AWK_VAL_INT, 0, 1, 0, -1, QSE_NULL },
{ QSE_AWK_VAL_INT, 0, 1, 0, 0, QSE_NULL },
{ QSE_AWK_VAL_INT, 0, 1, 0, 1, QSE_NULL },
@ -216,7 +217,7 @@ qse_awk_val_t* qse_awk_rtx_makestrvalwithmbs (
return QSE_NULL;
}
v = qse_awk_rtx_makestrvalwithcstr (rtx, &tmp);
v = qse_awk_rtx_makestrvalwithcstr (rtx, (qse_cstr_t*)&tmp);
QSE_AWK_FREE (rtx->awk, tmp.ptr);
return v;
#endif
@ -268,7 +269,7 @@ qse_awk_val_t* qse_awk_rtx_makestrvalwithmcstr (
return QSE_NULL;
}
v = qse_awk_rtx_makestrvalwithcstr (rtx, &tmp);
v = qse_awk_rtx_makestrvalwithcstr (rtx, (qse_cstr_t*)&tmp);
QSE_AWK_FREE (rtx->awk, tmp.ptr);
return v;
#endif
@ -438,7 +439,7 @@ qse_awk_val_t* qse_awk_rtx_makenstrvalwithcstr (qse_awk_rtx_t* rtx, const qse_cs
}
qse_awk_val_t* qse_awk_rtx_makerexval (
qse_awk_rtx_t* rtx, const qse_cstr_t* str, void* code)
qse_awk_rtx_t* rtx, const qse_cstr_t* str, void* code[2])
{
qse_awk_val_rex_t* val;
qse_size_t totsz;
@ -465,7 +466,8 @@ qse_awk_val_t* qse_awk_rtx_makerexval (
val->str.ptr = (qse_char_t*)(val + 1);
qse_strncpy (val->str.ptr, str->ptr, str->len);
val->code = code;
val->code[0] = code[0];
val->code[1] = code[1];
return (qse_awk_val_t*)val;
}
@ -824,7 +826,7 @@ void qse_awk_rtx_freeval (
/* code is just a pointer to a regular expression stored
* in parse tree nodes. so don't free it.
qse_awk_freerex (rtx->awk, ((qse_awk_val_rex_t*)val)->code);
qse_awk_freerex (rtx->awk, ((qse_awk_val_rex_t*)val)->code[0], ((qse_awk_val_rex_t*)val)->code[1]);
*/
QSE_AWK_FREE (rtx->awk, val);
@ -1717,7 +1719,7 @@ int qse_awk_rtx_setrefval (qse_awk_rtx_t* rtx, qse_awk_val_ref_t* ref, qse_awk_v
qse_awk_rtx_refupval (rtx, val);
x = qse_awk_rtx_setrec (
rtx, (qse_size_t)ref->adr,
&((qse_awk_val_str_t*)val)->val
(qse_cstr_t*)&((qse_awk_val_str_t*)val)->val
);
qse_awk_rtx_refdownval (rtx, val);
return x;
@ -1730,7 +1732,7 @@ int qse_awk_rtx_setrefval (qse_awk_rtx_t* rtx, qse_awk_val_ref_t* ref, qse_awk_v
str.ptr = qse_awk_rtx_valtostrdup (rtx, val, &str.len);
qse_awk_rtx_refupval (rtx, val);
x = qse_awk_rtx_setrec (rtx, (qse_size_t)ref->adr, &str);
x = qse_awk_rtx_setrec (rtx, (qse_size_t)ref->adr, (qse_cstr_t*)&str);
qse_awk_rtx_refdownval (rtx, val);
QSE_AWK_FREE (rtx->awk, str.ptr);
return x;

View File

@ -60,11 +60,9 @@ tre_ast_new_node(tre_mem_t mem, tre_ast_type_t type, size_t size)
tre_ast_node_t *node;
node = tre_mem_calloc(mem, sizeof(*node));
if (!node)
return NULL;
if (!node) return NULL;
node->obj = tre_mem_calloc(mem, size);
if (!node->obj)
return NULL;
if (!node->obj) return NULL;
node->type = type;
node->nullable = -1;
node->submatch_id = -1;
@ -72,15 +70,13 @@ tre_ast_new_node(tre_mem_t mem, tre_ast_type_t type, size_t size)
return node;
}
tre_ast_node_t *
tre_ast_new_literal(tre_mem_t mem, int code_min, int code_max, int position)
tre_ast_node_t * tre_ast_new_literal(tre_mem_t mem, int code_min, int code_max, int position)
{
tre_ast_node_t *node;
tre_literal_t *lit;
node = tre_ast_new_node(mem, LITERAL, sizeof(tre_literal_t));
if (!node)
return NULL;
if (!node) return NULL;
lit = node->obj;
lit->code_min = code_min;
lit->code_max = code_max;
@ -97,8 +93,7 @@ tre_ast_new_iter(tre_mem_t mem, tre_ast_node_t *arg, int min, int max,
tre_iteration_t *iter;
node = tre_ast_new_node(mem, ITERATION, sizeof(tre_iteration_t));
if (!node)
return NULL;
if (!node) return NULL;
iter = node->obj;
iter->arg = arg;
iter->min = min;
@ -115,8 +110,7 @@ tre_ast_new_union(tre_mem_t mem, tre_ast_node_t *left, tre_ast_node_t *right)
tre_ast_node_t *node;
node = tre_ast_new_node(mem, UNION, sizeof(tre_union_t));
if (node == NULL)
return NULL;
if (node == NULL) return NULL;
((tre_union_t *)node->obj)->left = left;
((tre_union_t *)node->obj)->right = right;
node->num_submatches = left->num_submatches + right->num_submatches;
@ -131,8 +125,7 @@ tre_ast_new_catenation(tre_mem_t mem, tre_ast_node_t *left,
tre_ast_node_t *node;
node = tre_ast_new_node(mem, CATENATION, sizeof(tre_catenation_t));
if (node == NULL)
return NULL;
if (node == NULL) return NULL;
((tre_catenation_t *)node->obj)->left = left;
((tre_catenation_t *)node->obj)->right = right;
node->num_submatches = left->num_submatches + right->num_submatches;

View File

@ -88,14 +88,11 @@ tre_add_tag_left(tre_mem_t mem, tre_ast_node_t *node, int tag_id)
DPRINT(("add_tag_left: tag %d\n", tag_id));
c = tre_mem_alloc(mem, sizeof(*c));
if (c == NULL)
return REG_ESPACE;
if (c == NULL) return REG_ESPACE;
c->left = tre_ast_new_literal(mem, TAG, tag_id, -1);
if (c->left == NULL)
return REG_ESPACE;
if (c->left == NULL) return REG_ESPACE;
c->right = tre_mem_alloc(mem, sizeof(tre_ast_node_t));
if (c->right == NULL)
return REG_ESPACE;
if (c->right == NULL) return REG_ESPACE;
c->right->obj = node->obj;
c->right->type = node->type;
@ -152,7 +149,6 @@ typedef enum
ADDTAGS_SET_SUBMATCH_END
} tre_addtags_symbol_t;
typedef struct
{
int tag;
@ -763,8 +759,7 @@ tre_copy_ast(tre_mem_t mem, tre_stack_t *stack, tre_ast_node_t *ast,
first_tag = 0;
}
*result = tre_ast_new_literal(mem, min, max, pos);
if (*result == NULL)
status = REG_ESPACE;
if (*result == NULL) status = REG_ESPACE;
if (pos > *max_pos)
*max_pos = pos;
@ -1121,8 +1116,7 @@ tre_set_one(tre_mem_t mem, int position, int code_min, int code_max,
tre_pos_and_tags_t *new_set;
new_set = tre_mem_calloc(mem, sizeof(*new_set) * 2);
if (new_set == NULL)
return NULL;
if (new_set == NULL) return NULL;
new_set[0].position = position;
new_set[0].code_min = code_min;
@ -1150,8 +1144,7 @@ tre_set_union(tre_mem_t mem, tre_pos_and_tags_t *set1, tre_pos_and_tags_t *set2,
for (s1 = 0; set1[s1].position >= 0; s1++);
for (s2 = 0; set2[s2].position >= 0; s2++);
new_set = tre_mem_calloc(mem, sizeof(*new_set) * (s1 + s2 + 1));
if (!new_set )
return NULL;
if (!new_set) return NULL;
for (s1 = 0; set1[s1].position >= 0; s1++)
{
@ -1395,15 +1388,10 @@ tre_compute_nfl(tre_mem_t mem, tre_stack_t *stack, tre_ast_node_t *tree)
/* Back references: nullable = false, firstpos = {i},
lastpos = {i}. */
node->nullable = 0;
node->firstpos = tre_set_one(mem, lit->position, 0,
TRE_CHAR_MAX, 0, NULL, -1);
if (!node->firstpos)
return REG_ESPACE;
node->lastpos = tre_set_one(mem, lit->position, 0,
TRE_CHAR_MAX, 0, NULL,
(int)lit->code_max);
if (!node->lastpos)
return REG_ESPACE;
node->firstpos = tre_set_one(mem, lit->position, 0, TRE_CHAR_MAX, 0, NULL, -1);
if (!node->firstpos) return REG_ESPACE;
node->lastpos = tre_set_one(mem, lit->position, 0, TRE_CHAR_MAX, 0, NULL, (int)lit->code_max);
if (!node->lastpos) return REG_ESPACE;
}
else if (lit->code_min < 0)
{
@ -1422,18 +1410,10 @@ tre_compute_nfl(tre_mem_t mem, tre_stack_t *stack, tre_ast_node_t *tree)
/* Literal at position i: nullable = false, firstpos = {i},
lastpos = {i}. */
node->nullable = 0;
node->firstpos =
tre_set_one(mem, lit->position, (int)lit->code_min,
(int)lit->code_max, 0, NULL, -1);
if (!node->firstpos)
return REG_ESPACE;
node->lastpos = tre_set_one(mem, lit->position,
(int)lit->code_min,
(int)lit->code_max,
lit->u.class, lit->neg_classes,
-1);
if (!node->lastpos)
return REG_ESPACE;
node->firstpos = tre_set_one(mem, lit->position, (int)lit->code_min, (int)lit->code_max, 0, NULL, -1);
if (!node->firstpos) return REG_ESPACE;
node->lastpos = tre_set_one(mem, lit->position, (int)lit->code_min, (int)lit->code_max, lit->u.class, lit->neg_classes, -1);
if (!node->lastpos) return REG_ESPACE;
}
break;
}
@ -1628,6 +1608,7 @@ tre_make_trans(qse_mmgr_t* mmgr, tre_pos_and_tags_t *p1, tre_pos_and_tags_t *p2,
int i, j, k, l, dup, prev_p2_pos;
if (transitions != NULL)
{
while (p1->position >= 0)
{
p2 = orig_p2;
@ -1814,7 +1795,9 @@ tre_make_trans(qse_mmgr_t* mmgr, tre_pos_and_tags_t *p1, tre_pos_and_tags_t *p2,
}
p1++;
}
}
else
{
/* Compute a maximum limit for the number of transitions leaving
from each state. */
while (p1->position >= 0)
@ -1827,6 +1810,7 @@ tre_make_trans(qse_mmgr_t* mmgr, tre_pos_and_tags_t *p1, tre_pos_and_tags_t *p2,
}
p1++;
}
}
return REG_OK;
}

View File

@ -168,23 +168,18 @@ typedef struct tre_backtrack_struct
while (/*CONSTCOND*/0)
#define BT_STACK_POP() \
do \
{ \
do { \
int i; \
assert(stack->prev); \
pos = stack->item.pos; \
if (type == STR_USER) \
str_source->rewind(pos + pos_add_next, str_source->context); \
str_byte = stack->item.str_byte; \
BT_STACK_WIDE_OUT; \
state = stack->item.state; \
next_c = stack->item.next_c; \
for (i = 0; i < tnfa->num_tags; i++) \
tags[i] = stack->item.tags[i]; \
for (i = 0; i < tnfa->num_tags; i++) tags[i] = stack->item.tags[i]; \
BT_STACK_MBSTATE_OUT; \
stack = stack->prev; \
} \
while (/*CONSTCOND*/0)
} while (/*CONSTCOND*/0)
#undef MIN
#define MIN(a, b) ((a) <= (b) ? (a) : (b))
@ -208,7 +203,6 @@ tre_tnfa_run_backtrack(qse_mmgr_t* mmgr, const tre_tnfa_t *tnfa, const void *str
int reg_notbol = eflags & REG_NOTBOL;
int reg_noteol = eflags & REG_NOTEOL;
int reg_newline = tnfa->cflags & REG_NEWLINE;
int str_user_end = 0;
/* These are used to remember the necessary values of the above
variables to return to the position where the current search
@ -302,8 +296,6 @@ retry:
state = NULL;
pos = pos_start;
if (type == STR_USER)
str_source->rewind(pos + pos_add_next, str_source->context);
GET_NEXT_WCHAR();
pos_start = pos;
next_c_start = next_c;
@ -446,15 +438,11 @@ retry:
if (len < 0)
{
if (type == STR_USER)
result = str_source->compare((unsigned)so, (unsigned)pos,
(unsigned)bt_len,
str_source->context);
#ifdef TRE_WCHAR
else if (type == STR_WIDE)
if (type == STR_WIDE)
result = qse_wcszcmp((const qse_wchar_t*)string + so, str_wide - 1, (size_t)bt_len);
#endif /* TRE_WCHAR */
else
#endif /* TRE_WCHAR */
result = qse_mbszcmp((const char*)string + so, str_byte - 1, (size_t)bt_len);
}
else if (len - pos < bt_len)
@ -508,12 +496,7 @@ retry:
/* Check for end of string. */
if (len < 0)
{
if (type == STR_USER)
{
if (str_user_end)
goto backtrack;
}
else if (next_c == QSE_T('\0'))
if (next_c == QSE_T('\0'))
goto backtrack;
}
else
@ -533,8 +516,8 @@ retry:
trans_i->code_min, trans_i->code_max,
trans_i->code_min, trans_i->code_max,
trans_i->assertions, trans_i->state_id));
if (trans_i->code_min <= (tre_cint_t)prev_c &&
trans_i->code_max >= (tre_cint_t)prev_c)
if (trans_i->code_min <= (tre_cint_t)prev_c && trans_i->code_max >= (tre_cint_t)prev_c)
{
if (trans_i->assertions
&& (CHECK_ASSERTIONS(trans_i->assertions)

View File

@ -325,12 +325,7 @@ tre_tnfa_run_parallel(qse_mmgr_t* mmgr, const tre_tnfa_t *tnfa, const void *stri
/* Check for end of string. */
if (len < 0)
{
if (type == STR_USER)
{
if (str_user_end)
break;
}
else if (next_c == QSE_T('\0'))
if (next_c == QSE_T('\0'))
break;
}
else
@ -408,28 +403,28 @@ tre_tnfa_run_parallel(qse_mmgr_t* mmgr, const tre_tnfa_t *tnfa, const void *stri
for (trans_i = reach_i->state; trans_i->state; trans_i++)
{
/* Does this transition match the input symbol? */
if (trans_i->code_min <= (tre_cint_t)prev_c &&
trans_i->code_max >= (tre_cint_t)prev_c)
if (trans_i->code_min <= (tre_cint_t)prev_c && trans_i->code_max >= (tre_cint_t)prev_c)
{
if (trans_i->assertions
&& (CHECK_ASSERTIONS(trans_i->assertions)
|| CHECK_CHAR_CLASSES(trans_i, tnfa, eflags)))
if (trans_i->assertions &&
(CHECK_ASSERTIONS(trans_i->assertions) ||
CHECK_CHAR_CLASSES(trans_i, tnfa, eflags)))
{
DPRINT(("assertion failed\n"));
continue;
}
/* Compute the tags after this transition. */
for (i = 0; i < num_tags; i++)
tmp_tags[i] = reach_i->tags[i];
for (i = 0; i < num_tags; i++) tmp_tags[i] = reach_i->tags[i];
tag_i = trans_i->tags;
if (tag_i != NULL)
{
while (*tag_i >= 0)
{
if (*tag_i < num_tags)
tmp_tags[*tag_i] = pos;
tag_i++;
}
}
if (reach_pos[trans_i->state_id].pos < pos)
{
@ -442,15 +437,12 @@ tre_tnfa_run_parallel(qse_mmgr_t* mmgr, const tre_tnfa_t *tnfa, const void *stri
reach_pos[trans_i->state_id].tags = &reach_next_i->tags;
if (reach_next_i->state == tnfa->final
&& (match_eo == -1
|| (num_tags > 0
&& reach_next_i->tags[0] <= match_tags[0])))
&& (match_eo == -1 || (num_tags > 0 && reach_next_i->tags[0] <= match_tags[0])))
{
DPRINT((" found match %p\n", trans_i->state));
match_eo = pos;
new_match = 1;
for (i = 0; i < num_tags; i++)
match_tags[i] = reach_next_i->tags[i];
for (i = 0; i < num_tags; i++) match_tags[i] = reach_next_i->tags[i];
}
reach_next_i++;

View File

@ -52,8 +52,6 @@ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#define str_source ((const tre_str_source*)string)
#ifdef TRE_WCHAR
#ifdef TRE_MULTIBYTE
@ -116,12 +114,6 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
} \
} \
} \
else if (type == STR_USER) \
{ \
pos += pos_add_next; \
str_user_end = str_source->get_next_char(&next_c, &pos_add_next, \
str_source->context); \
} \
} while(/*CONSTCOND*/0)
#else /* !TRE_MULTIBYTE */
@ -143,11 +135,6 @@ do { \
if (len >= 0 && pos >= len) next_c = QSE_T('\0'); \
else next_c = *str_wide++; \
} \
else if (type == STR_USER) \
{ \
pos += pos_add_next; \
str_user_end = str_source->get_next_char(&next_c, &pos_add_next, str_source->context); \
} \
} while(/*CONSTCOND*/0)
#endif /* !TRE_MULTIBYTE */
@ -162,16 +149,8 @@ do { \
if (type == STR_BYTE) \
{ \
pos++; \
if (len >= 0 && pos >= len) \
next_c = '\0'; \
else \
next_c = (unsigned char)(*str_byte++); \
} \
else if (type == STR_USER) \
{ \
pos += pos_add_next; \
str_user_end = str_source->get_next_char(&next_c, &pos_add_next, \
str_source->context); \
if (len >= 0 && pos >= len) next_c = '\0'; \
else next_c = (unsigned char)(*str_byte++); \
} \
} while(/*CONSTCOND*/0)

View File

@ -138,8 +138,7 @@ tre_expand_macro(const tre_char_t *regex, const tre_char_t *regex_end,
}
static reg_errcode_t
tre_new_item(tre_mem_t mem, int min, int max, int *i, int *max_i,
tre_ast_node_t ***items)
tre_new_item(tre_mem_t mem, int min, int max, int *i, int *max_i, tre_ast_node_t ***items)
{
reg_errcode_t status;
tre_ast_node_t **array = *items;
@ -306,8 +305,7 @@ tre_parse_bracket_items(tre_parse_ctx_t *ctx, int negate,
/* END QSE */
if (status == REG_OK)
{
status = tre_expand_ctype(ctx->mem, class, items,
&i, &max_i, ctx->cflags);
status = tre_expand_ctype(ctx->mem, class, items, &i, &max_i, ctx->cflags);
class = (tre_ctype_t)0;
skip = 1;
}
@ -328,25 +326,25 @@ tre_parse_bracket_items(tre_parse_ctx_t *ctx, int negate,
min = max = *re++;
}
if (status != REG_OK)
break;
if (status != REG_OK) break;
if (class && negate)
{
if (*num_neg_classes >= MAX_NEG_CLASSES)
status = REG_ESPACE;
else
neg_classes[(*num_neg_classes)++] = class;
}
else if (!skip)
{
status = tre_new_item(ctx->mem, min, max, &i, &max_i, items);
if (status != REG_OK)
break;
if (status != REG_OK) break;
((tre_literal_t*)((*items)[i-1])->obj)->u.class = class;
}
/* Add opposite-case counterpoints if REG_ICASE is present.
This is broken if there are more than two "same" characters. */
if (ctx->cflags & REG_ICASE && !class && status == REG_OK && !skip)
if ((ctx->cflags & REG_ICASE) && !class && status == REG_OK && !skip)
{
tre_cint_t cmin, ccurr;
@ -356,27 +354,21 @@ tre_parse_bracket_items(tre_parse_ctx_t *ctx, int negate,
if (tre_islower(min))
{
cmin = ccurr = tre_toupper(min++);
while (tre_islower(min) && tre_toupper(min) == ccurr + 1
&& min <= max)
while (tre_islower(min) && tre_toupper(min) == ccurr + 1 && min <= max)
ccurr = tre_toupper(min++);
status = tre_new_item(ctx->mem, cmin, ccurr,
&i, &max_i, items);
status = tre_new_item(ctx->mem, cmin, ccurr, &i, &max_i, items);
}
else if (tre_isupper(min))
{
cmin = ccurr = tre_tolower(min++);
while (tre_isupper(min) && tre_tolower(min) == ccurr + 1
&& min <= max)
while (tre_isupper(min) && tre_tolower(min) == ccurr + 1 && min <= max)
ccurr = tre_tolower(min++);
status = tre_new_item(ctx->mem, cmin, ccurr,
&i, &max_i, items);
status = tre_new_item(ctx->mem, cmin, ccurr, &i, &max_i, items);
}
else min++;
if (status != REG_OK)
break;
if (status != REG_OK) break;
}
if (status != REG_OK)
break;
if (status != REG_OK) break;
}
}
}
@ -399,8 +391,7 @@ tre_parse_bracket(tre_parse_ctx_t *ctx, tre_ast_node_t **result)
/* Start off with an array of `max_i' elements. */
items = xmalloc(ctx->mem->mmgr, sizeof(*items) * max_i);
if (items == NULL)
return REG_ESPACE;
if (items == NULL) return REG_ESPACE;
if (*ctx->re == CHAR_CARET)
{
@ -409,15 +400,11 @@ tre_parse_bracket(tre_parse_ctx_t *ctx, tre_ast_node_t **result)
ctx->re++;
}
status = tre_parse_bracket_items(ctx, negate, neg_classes, &num_neg_classes,
&items, &i, &max_i);
if (status != REG_OK)
goto parse_bracket_done;
status = tre_parse_bracket_items(ctx, negate, neg_classes, &num_neg_classes, &items, &i, &max_i);
if (status != REG_OK) goto parse_bracket_done;
/* Sort the array if we need to negate it. */
if (negate)
qse_qsort(items, (unsigned)i, sizeof(*items), tre_compare_items, QSE_NULL);
if (negate) qse_qsort(items, (unsigned)i, sizeof(*items), tre_compare_items, QSE_NULL);
curr_max = curr_min = 0;
/* Build a union of the items in the array, negated if necessary. */
@ -466,22 +453,23 @@ tre_parse_bracket(tre_parse_ctx_t *ctx, tre_ast_node_t **result)
l->position = ctx->position;
if (num_neg_classes > 0)
{
l->neg_classes = tre_mem_alloc(ctx->mem,
(sizeof(l->neg_classes)
* (num_neg_classes + 1)));
l->neg_classes = tre_mem_alloc(ctx->mem, (sizeof(l->neg_classes) * (num_neg_classes + 1)));
if (l->neg_classes == NULL)
{
status = REG_ESPACE;
break;
}
for (k = 0; k < num_neg_classes; k++)
l->neg_classes[k] = neg_classes[k];
for (k = 0; k < num_neg_classes; k++) l->neg_classes[k] = neg_classes[k];
l->neg_classes[k] = (tre_ctype_t)0;
}
else
{
l->neg_classes = NULL;
}
if (node == NULL)
{
node = items[j];
}
else
{
u = tre_ast_new_union(ctx->mem, node, items[j]);
@ -492,8 +480,7 @@ tre_parse_bracket(tre_parse_ctx_t *ctx, tre_ast_node_t **result)
}
}
if (status != REG_OK)
goto parse_bracket_done;
if (status != REG_OK) goto parse_bracket_done;
if (negate)
{
@ -501,7 +488,9 @@ tre_parse_bracket(tre_parse_ctx_t *ctx, tre_ast_node_t **result)
DPRINT(("final: creating %d - %d\n", curr_min, (int)TRE_CHAR_MAX));
n = tre_ast_new_literal(ctx->mem, curr_min, TRE_CHAR_MAX, ctx->position);
if (n == NULL)
{
status = REG_ESPACE;
}
else
{
tre_literal_t *l = n->obj;
@ -520,21 +509,23 @@ tre_parse_bracket(tre_parse_ctx_t *ctx, tre_ast_node_t **result)
l->neg_classes[k] = (tre_ctype_t)0;
}
else
{
l->neg_classes = NULL;
}
if (node == NULL)
{
node = n;
}
else
{
u = tre_ast_new_union(ctx->mem, node, n);
if (u == NULL)
status = REG_ESPACE;
if (u == NULL) status = REG_ESPACE;
node = u;
}
}
}
if (status != REG_OK)
goto parse_bracket_done;
if (status != REG_OK) goto parse_bracket_done;
#ifdef TRE_DEBUG
tre_ast_print(node);
@ -814,8 +805,7 @@ tre_parse_bound(tre_parse_ctx_t *ctx, tre_ast_node_t **result)
if (min == 0 && max == 0)
{
*result = tre_ast_new_literal(ctx->mem, EMPTY, -1, -1);
if (*result == NULL)
return REG_ESPACE;
if (*result == NULL) return REG_ESPACE;
}
else
{
@ -909,9 +899,7 @@ typedef enum
PARSE_RESTORE_CFLAGS
} tre_parse_re_stack_symbol_t;
reg_errcode_t
tre_parse(tre_parse_ctx_t *ctx)
reg_errcode_t tre_parse(tre_parse_ctx_t *ctx)
{
tre_ast_node_t *result = NULL;
tre_parse_re_stack_symbol_t symbol;
@ -941,8 +929,8 @@ tre_parse(tre_parse_ctx_t *ctx)
call stack, and efficiency (both in lines of code and speed). */
while (tre_stack_num_objects(stack) > bottom && status == REG_OK)
{
if (status != REG_OK)
break;
if (status != REG_OK) break;
symbol = tre_stack_pop_int(stack);
switch (symbol)
{
@ -978,8 +966,8 @@ tre_parse(tre_parse_ctx_t *ctx)
/* If the expression has not ended, parse another piece. */
{
tre_char_t c;
if (ctx->re >= ctx->re_end)
break;
if (ctx->re >= ctx->re_end) break;
c = *ctx->re;
#ifdef REG_LITERAL
if (!(ctx->cflags & REG_LITERAL))
@ -1039,11 +1027,9 @@ tre_parse(tre_parse_ctx_t *ctx)
}
case PARSE_UNION:
if (ctx->re >= ctx->re_end)
break;
if (ctx->re >= ctx->re_end) break;
#ifdef REG_LITERAL
if (ctx->cflags & REG_LITERAL)
break;
if (ctx->cflags & REG_LITERAL) break;
#endif /* REG_LITERAL */
switch (*ctx->re)
{
@ -1089,8 +1075,7 @@ tre_parse(tre_parse_ctx_t *ctx)
{
case CHAR_PLUS:
case CHAR_QUESTIONMARK:
if (!(ctx->cflags & REG_EXTENDED))
break;
if (!(ctx->cflags & REG_EXTENDED)) break;
/*FALLTHROUGH*/
case CHAR_STAR:
/* QSE - added this label */
@ -1143,8 +1128,9 @@ parse_star:
return REG_ESPACE;
result = tmp_node;
STACK_PUSHX(stack, int, PARSE_POSTFIX);
}
break;
}
case CHAR_BACKSLASH:
/* "\{" is special without REG_EXTENDED */
@ -1200,17 +1186,16 @@ parse_brace:
break;
case PARSE_ATOM:
/* Parse an atom. An atom is a regular expression enclosed in `()',
an empty set of `()', a bracket expression, `.', `^', `$',
a `\' followed by a character, or a single character. */
/* End of regexp? (empty string). */
if (ctx->re >= ctx->re_end)
goto parse_literal;
if (ctx->re >= ctx->re_end) goto parse_literal;
#ifdef REG_LITERAL
if (ctx->cflags & REG_LITERAL)
goto parse_literal;
if (ctx->cflags & REG_LITERAL) goto parse_literal;
#endif /* REG_LITERAL */
switch (*ctx->re)
@ -1226,15 +1211,13 @@ parse_brace:
{
int new_cflags = ctx->cflags;
int bit = 1;
DPRINT(("tre_parse: extension: '%.*" STRF "\n",
REST(ctx->re)));
DPRINT(("tre_parse: extension: '%.*" STRF "\n", REST(ctx->re)));
ctx->re += 2;
while (/*CONSTCOND*/1)
{
if (*ctx->re == QSE_T('i'))
{
DPRINT(("tre_parse: icase: '%.*" STRF "\n",
REST(ctx->re)));
DPRINT(("tre_parse: icase: '%.*" STRF "\n", REST(ctx->re)));
if (bit)
new_cflags |= REG_ICASE;
else
@ -1243,8 +1226,7 @@ parse_brace:
}
else if (*ctx->re == QSE_T('n'))
{
DPRINT(("tre_parse: newline: '%.*" STRF "\n",
REST(ctx->re)));
DPRINT(("tre_parse: newline: '%.*" STRF "\n", REST(ctx->re)));
if (bit)
new_cflags |= REG_NEWLINE;
else
@ -1254,8 +1236,7 @@ parse_brace:
#ifdef REG_RIGHT_ASSOC
else if (*ctx->re == QSE_T('r'))
{
DPRINT(("tre_parse: right assoc: '%.*" STRF "\n",
REST(ctx->re)));
DPRINT(("tre_parse: right assoc: '%.*" STRF "\n", REST(ctx->re)));
if (bit)
new_cflags |= REG_RIGHT_ASSOC;
else
@ -1266,8 +1247,7 @@ parse_brace:
#ifdef REG_UNGREEDY
else if (*ctx->re == QSE_T('U'))
{
DPRINT(("tre_parse: ungreedy: '%.*" STRF "\n",
REST(ctx->re)));
DPRINT(("tre_parse: ungreedy: '%.*" STRF "\n", REST(ctx->re)));
if (bit)
new_cflags |= REG_UNGREEDY;
else
@ -1366,29 +1346,24 @@ parse_brace:
|| (ctx->re > ctx->re_start
&& *(ctx->re - 1) == CHAR_BACKSLASH))
{
DPRINT(("tre_parse: empty: '%.*" STRF "'\n",
REST(ctx->re)));
DPRINT(("tre_parse: empty: '%.*" STRF "'\n", REST(ctx->re)));
/* We were expecting an atom, but instead the current
subexpression was closed. POSIX leaves the meaning of
this to be implementation-defined. We interpret this as
an empty expression (which matches an empty string). */
result = tre_ast_new_literal(ctx->mem, EMPTY, -1, -1);
if (result == NULL)
return REG_ESPACE;
if (!(ctx->cflags & REG_EXTENDED))
ctx->re--;
if (result == NULL) return REG_ESPACE;
if (!(ctx->cflags & REG_EXTENDED)) ctx->re--;
}
else
goto parse_literal;
break;
case CHAR_LBRACKET: /* bracket expression */
DPRINT(("tre_parse: bracket: '%.*" STRF "'\n",
REST(ctx->re)));
DPRINT(("tre_parse: bracket: '%.*" STRF "'\n", REST(ctx->re)));
ctx->re++;
status = tre_parse_bracket(ctx, &result);
if (status != REG_OK)
return status;
if (status != REG_OK) return status;
break;
case CHAR_BACKSLASH:
@ -1407,8 +1382,7 @@ parse_brace:
/* If a macro is used, parse the expanded macro recursively. */
{
tre_char_t buf[64];
tre_expand_macro(ctx->re + 1, ctx->re_end,
buf, QSE_COUNTOF(buf));
tre_expand_macro(ctx->re + 1, ctx->re_end, buf, QSE_COUNTOF(buf));
if (buf[0] != 0)
{
tre_parse_ctx_t subctx;
@ -1426,8 +1400,10 @@ parse_brace:
}
if (ctx->re + 1 >= ctx->re_end)
{
/* Trailing backslash. */
return REG_EESCAPE;
}
#ifdef REG_LITERAL
if (*(ctx->re + 1) == QSE_T('Q'))
@ -1447,23 +1423,19 @@ parse_brace:
switch (*ctx->re)
{
case QSE_T('b'):
result = tre_ast_new_literal(ctx->mem, ASSERTION,
ASSERT_AT_WB, -1);
result = tre_ast_new_literal(ctx->mem, ASSERTION, ASSERT_AT_WB, -1);
ctx->re++;
break;
case QSE_T('B'):
result = tre_ast_new_literal(ctx->mem, ASSERTION,
ASSERT_AT_WB_NEG, -1);
result = tre_ast_new_literal(ctx->mem, ASSERTION, ASSERT_AT_WB_NEG, -1);
ctx->re++;
break;
case QSE_T('<'):
result = tre_ast_new_literal(ctx->mem, ASSERTION,
ASSERT_AT_BOW, -1);
result = tre_ast_new_literal(ctx->mem, ASSERTION, ASSERT_AT_BOW, -1);
ctx->re++;
break;
case QSE_T('>'):
result = tre_ast_new_literal(ctx->mem, ASSERTION,
ASSERT_AT_EOW, -1);
result = tre_ast_new_literal(ctx->mem, ASSERTION, ASSERT_AT_EOW, -1);
ctx->re++;
break;
case QSE_T('x'):
@ -1503,8 +1475,7 @@ parse_brace:
ctx->re++;
}
result = tre_ast_new_literal(ctx->mem, (int)val,
(int)val, ctx->position);
result = tre_ast_new_literal(ctx->mem, (int)val, (int)val, ctx->position);
ctx->position++;
break;
}
@ -1552,8 +1523,7 @@ parse_brace:
return REG_EBRACE;
}
result = tre_ast_new_literal(ctx->mem, (int)val, (int)val,
ctx->position);
result = tre_ast_new_literal(ctx->mem, (int)val, (int)val, ctx->position);
ctx->position++;
break;
}
@ -1564,12 +1534,9 @@ parse_brace:
{
/* Back reference. */
int val = *ctx->re - QSE_T('0');
DPRINT(("tre_parse: backref: '%.*" STRF "'\n",
REST(ctx->re - 1)));
result = tre_ast_new_literal(ctx->mem, BACKREF, val,
ctx->position);
if (result == NULL)
return REG_ESPACE;
DPRINT(("tre_parse: backref: '%.*" STRF "'\n", REST(ctx->re - 1)));
result = tre_ast_new_literal(ctx->mem, BACKREF, val, ctx->position);
if (result == NULL) return REG_ESPACE;
ctx->position++;
ctx->max_backref = MAX(val, ctx->max_backref);
ctx->re++;
@ -1577,8 +1544,7 @@ parse_brace:
else
{
/* Escaped character. */
DPRINT(("tre_parse: escaped: '%.*" STRF "'\n",
REST(ctx->re - 1)));
DPRINT(("tre_parse: escaped: '%.*" STRF "'\n", REST(ctx->re - 1)));
result = tre_ast_new_literal(ctx->mem, *ctx->re, *ctx->re, ctx->position);
ctx->position++;
ctx->re++;
@ -1596,25 +1562,20 @@ parse_brace:
{
tre_ast_node_t *tmp1;
tre_ast_node_t *tmp2;
tmp1 = tre_ast_new_literal(ctx->mem, 0, QSE_T('\n') - 1,
ctx->position);
if (!tmp1)
return REG_ESPACE;
tmp2 = tre_ast_new_literal(ctx->mem, QSE_T('\n') + 1, TRE_CHAR_MAX,
ctx->position + 1);
if (!tmp2)
return REG_ESPACE;
/* exclude new line */
tmp1 = tre_ast_new_literal(ctx->mem, 0, QSE_T('\n') - 1, ctx->position);
if (!tmp1) return REG_ESPACE;
tmp2 = tre_ast_new_literal(ctx->mem, QSE_T('\n') + 1, TRE_CHAR_MAX, ctx->position + 1);
if (!tmp2) return REG_ESPACE;
result = tre_ast_new_union(ctx->mem, tmp1, tmp2);
if (!result)
return REG_ESPACE;
if (!result) return REG_ESPACE;
ctx->position += 2;
}
else
{
result = tre_ast_new_literal(ctx->mem, 0, TRE_CHAR_MAX,
ctx->position);
if (!result)
return REG_ESPACE;
/* all characters */
result = tre_ast_new_literal(ctx->mem, 0, TRE_CHAR_MAX, ctx->position);
if (!result) return REG_ESPACE;
ctx->position++;
}
ctx->re++;
@ -1631,10 +1592,8 @@ parse_brace:
{
DPRINT(("tre_parse: BOL: '%.*" STRF "'\n",
REST(ctx->re)));
result = tre_ast_new_literal(ctx->mem, ASSERTION,
ASSERT_AT_BOL, -1);
if (result == NULL)
return REG_ESPACE;
result = tre_ast_new_literal(ctx->mem, ASSERTION, ASSERT_AT_BOL, -1);
if (result == NULL) return REG_ESPACE;
ctx->re++;
}
else
@ -1652,8 +1611,7 @@ parse_brace:
{
DPRINT(("tre_parse: EOL: '%.*" STRF "'\n",
REST(ctx->re)));
result = tre_ast_new_literal(ctx->mem, ASSERTION,
ASSERT_AT_EOL, -1);
result = tre_ast_new_literal(ctx->mem, ASSERTION, ASSERT_AT_EOL, -1);
if (result == NULL)
return REG_ESPACE;
ctx->re++;
@ -1668,8 +1626,7 @@ parse_literal:
if (temporary_cflags && ctx->re + 1 < ctx->re_end
&& *ctx->re == CHAR_BACKSLASH && *(ctx->re + 1) == QSE_T('E'))
{
DPRINT(("tre_parse: end tmps: '%.*" STRF "'\n",
REST(ctx->re)));
DPRINT(("tre_parse: end tmps: '%.*" STRF "'\n", REST(ctx->re)));
ctx->cflags &= ~temporary_cflags;
temporary_cflags = 0;
ctx->re += 2;
@ -1701,11 +1658,9 @@ parse_literal:
&& *ctx->re == CHAR_BACKSLASH
&& *(ctx->re + 1) == CHAR_LBRACE)))
{
DPRINT(("tre_parse: empty: '%.*" STRF "'\n",
REST(ctx->re)));
DPRINT(("tre_parse: empty: '%.*" STRF "'\n", REST(ctx->re)));
result = tre_ast_new_literal(ctx->mem, EMPTY, -1, -1);
if (!result)
return REG_ESPACE;
if (!result) return REG_ESPACE;
break;
}
@ -1714,8 +1669,7 @@ parse_literal:
/* Note that we can't use an tre_isalpha() test here, since there
may be characters which are alphabetic but neither upper or
lower case. */
if (ctx->cflags & REG_ICASE
&& (tre_isupper(*ctx->re) || tre_islower(*ctx->re)))
if (ctx->cflags & REG_ICASE && (tre_isupper(*ctx->re) || tre_islower(*ctx->re)))
{
tre_ast_node_t *tmp1;
tre_ast_node_t *tmp2;
@ -1728,26 +1682,17 @@ parse_literal:
that at least for multi-character collating elements there
could be several opposite-case counterpoints, but they
cannot be supported portably anyway. */
tmp1 = tre_ast_new_literal(ctx->mem, tre_toupper(*ctx->re),
tre_toupper(*ctx->re),
ctx->position);
if (!tmp1)
return REG_ESPACE;
tmp2 = tre_ast_new_literal(ctx->mem, tre_tolower(*ctx->re),
tre_tolower(*ctx->re),
ctx->position);
if (!tmp2)
return REG_ESPACE;
tmp1 = tre_ast_new_literal(ctx->mem, tre_toupper(*ctx->re), tre_toupper(*ctx->re), ctx->position);
if (!tmp1) return REG_ESPACE;
tmp2 = tre_ast_new_literal(ctx->mem, tre_tolower(*ctx->re), tre_tolower(*ctx->re), ctx->position);
if (!tmp2) return REG_ESPACE;
result = tre_ast_new_union(ctx->mem, tmp1, tmp2);
if (!result)
return REG_ESPACE;
if (!result) return REG_ESPACE;
}
else
{
result = tre_ast_new_literal(ctx->mem, *ctx->re, *ctx->re,
ctx->position);
if (!result)
return REG_ESPACE;
result = tre_ast_new_literal(ctx->mem, *ctx->re, *ctx->re, ctx->position);
if (!result) return REG_ESPACE;
}
ctx->position++;
ctx->re++;
@ -1763,11 +1708,9 @@ parse_literal:
{
tre_ast_node_t *n, *tmp_node;
n = tre_ast_new_literal(ctx->mem, EMPTY, -1, -1);
if (n == NULL)
return REG_ESPACE;
if (n == NULL) return REG_ESPACE;
tmp_node = tre_ast_new_catenation(ctx->mem, n, result);
if (tmp_node == NULL)
return REG_ESPACE;
if (tmp_node == NULL) return REG_ESPACE;
tmp_node->num_submatches = result->num_submatches;
result = tmp_node;
}

View File

@ -205,15 +205,6 @@ static int tre_match(
if (tnfa->have_backrefs || (eflags & REG_BACKTRACKING_MATCHER))
{
/* The regex has back references, use the backtracking matcher. */
if (type == STR_USER)
{
const tre_str_source *source = string;
if (source->rewind == QSE_NULL || source->compare == QSE_NULL)
/* The backtracking matcher requires rewind and compare
capabilities from the input stream. */
return REG_BADPAT;
}
status = tre_tnfa_run_backtrack (
preg->mmgr, tnfa, string, (int)len, type,
tags, eflags, &eo);
@ -266,15 +257,6 @@ int qse_tre_exec (
return qse_tre_execx (tre, str, (qse_size_t)-1, pmatch, nmatch, eflags);
}
#if 0
int qse_tre_execsrc (
const regex_t *preg, const tre_str_source *str,
qse_size_t nmatch, regmatch_t pmatch[], int eflags)
{
return tre_match (preg, str, (unsigned)-1, STR_USER, nmatch, pmatch, eflags);
}
#endif
qse_tre_errnum_t qse_tre_geterrnum (qse_tre_t* tre)
{
return tre->errnum;

View File

@ -177,7 +177,6 @@ typedef qse_cint_t tre_cint_t;
#define regex_t qse_tre_t
#define regmatch_t qse_tre_match_t
#define reg_errcode_t qse_tre_errnum_t
#define tre_str_source qse_tre_strsrc_t
#define REG_OK QSE_TRE_ENOERR
@ -278,7 +277,7 @@ typedef qse_pma_t* tre_mem_t;
typedef qse_ctype_t tre_ctype_t;
#define tre_isctype(c,t) QSE_ISCTYPE(c,t)
typedef enum { STR_WIDE, STR_BYTE, STR_MBS, STR_USER } tre_str_type_t;
typedef enum { STR_WIDE, STR_BYTE, STR_MBS } tre_str_type_t;
/* Returns number of bytes to add to (char *)ptr to make it
properly aligned for the type. */
@ -305,6 +304,9 @@ typedef struct tnfa_transition tre_tnfa_transition_t;
struct tnfa_transition
{
/* Range of accepted characters. */
/* QSE indicate that code_min .. code_max is not yet negated for ^ in a bracket */
int negate_range;
/* END QSE */
tre_cint_t code_min;
tre_cint_t code_max;
/* Pointer to the destination state. */