fixed a parser bug in awk.
- handling of idiv operators - tokenization of /=/ as a regular expression - calling QSE_AWK_FREEREX to free compiled regular expressions. switched to a new regular expression handler.
This commit is contained in:
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* $Id: awk.h 291 2009-09-21 13:28:18Z hyunghwan.chung $
|
||||
* $Id: awk.h 312 2009-12-10 13:03:54Z hyunghwan.chung $
|
||||
*
|
||||
Copyright 2006-2009 Chung, Hyung-Hwan.
|
||||
This file is part of QSE.
|
||||
@ -383,7 +383,6 @@ struct qse_awk_rtx_t
|
||||
|
||||
|
||||
#define QSE_AWK_FREEREX(awk,code) qse_freerex((awk)->mmgr,code)
|
||||
#define QSE_AWK_ISEMPTYREX(awk,code) qse_isemptyrex(code)
|
||||
#define QSE_AWK_BUILDREX(awk,ptn,len,errnum) \
|
||||
qse_awk_buildrex(awk,ptn,len,errnum)
|
||||
#define QSE_AWK_MATCHREX(awk,code,option,str,len,substr,sublen,match,errnum) \
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* $Id: fnc.c 299 2009-10-19 13:33:40Z hyunghwan.chung $
|
||||
* $Id: fnc.c 312 2009-12-10 13:03:54Z hyunghwan.chung $
|
||||
*
|
||||
Copyright 2006-2009 Chung, Hyung-Hwan.
|
||||
This file is part of QSE.
|
||||
@ -1118,7 +1118,7 @@ static int __substitute (qse_awk_rtx_t* run, qse_long_t max_count)
|
||||
}
|
||||
}
|
||||
|
||||
opt = (run->gbl.ignorecase)? QSE_REX_MATCH_IGNORECASE: 0;
|
||||
opt = (run->gbl.ignorecase)? QSE_REX_IGNORECASE: 0;
|
||||
|
||||
a2_end = a2_ptr + a2_len;
|
||||
cur_ptr = a2_ptr;
|
||||
@ -1331,12 +1331,28 @@ static int fnc_match (qse_awk_rtx_t* rtx, const qse_cstr_t* fnm)
|
||||
if (nargs >= 3)
|
||||
{
|
||||
qse_awk_val_t* a2;
|
||||
qse_real_t rv;
|
||||
|
||||
a2 = qse_awk_rtx_getarg (rtx, 2);
|
||||
n = qse_awk_rtx_valtonum (rtx, a2, &start, &rv);
|
||||
if (n <= -1) return -1;
|
||||
if (n >= 1) start = (qse_long_t)rv;
|
||||
#if 0
|
||||
if (a2->type == QSE_AWK_VAL_MAP)
|
||||
{
|
||||
/* if the 3rd paramater is an array,
|
||||
* it is a placeholder to store parenthesized
|
||||
* subexpressions */
|
||||
|
||||
/* TODO: please implement this... */
|
||||
start = 0;
|
||||
}
|
||||
else
|
||||
#endif
|
||||
{
|
||||
qse_real_t rv;
|
||||
/* if the 3rd parameter is not an array,
|
||||
* it is treated as a match start index */
|
||||
n = qse_awk_rtx_valtonum (rtx, a2, &start, &rv);
|
||||
if (n <= -1) return -1;
|
||||
if (n >= 1) start = (qse_long_t)rv;
|
||||
}
|
||||
}
|
||||
|
||||
if (a0->type == QSE_AWK_VAL_STR)
|
||||
@ -1394,7 +1410,7 @@ static int fnc_match (qse_awk_rtx_t* rtx, const qse_cstr_t* fnm)
|
||||
{
|
||||
n = QSE_AWK_MATCHREX (
|
||||
rtx->awk, rex,
|
||||
(rtx->gbl.ignorecase? QSE_REX_MATCH_IGNORECASE: 0),
|
||||
(rtx->gbl.ignorecase? QSE_REX_IGNORECASE: 0),
|
||||
str0+start-1, len0-start+1,/*TODO: must use str0,len0?*/
|
||||
str0+start-1, len0-start+1,
|
||||
&mat, &errnum
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* $Id: misc.c 311 2009-12-09 11:35:54Z hyunghwan.chung $
|
||||
* $Id: misc.c 312 2009-12-10 13:03:54Z hyunghwan.chung $
|
||||
*
|
||||
Copyright 2006-2009 Chung, Hyung-Hwan.
|
||||
This file is part of QSE.
|
||||
@ -860,7 +860,7 @@ qse_char_t* qse_awk_rtx_strxntokbyrex (
|
||||
{
|
||||
n = QSE_AWK_MATCHREX (
|
||||
rtx->awk, rex,
|
||||
((rtx->gbl.ignorecase)? QSE_REX_MATCH_IGNORECASE: 0),
|
||||
((rtx->gbl.ignorecase)? QSE_REX_IGNORECASE: 0),
|
||||
str, len, ptr, left, &match, errnum);
|
||||
if (n == -1) return QSE_NULL;
|
||||
if (n == 0)
|
||||
@ -1052,7 +1052,7 @@ void* qse_awk_buildrex (
|
||||
|
||||
p = qse_buildrex (
|
||||
awk->mmgr, awk->rex.depth.max.build,
|
||||
((awk->option&QSE_AWK_REXBOUND)? 0:QSE_REX_BUILD_NOBOUND),
|
||||
((awk->option&QSE_AWK_REXBOUND)? 0:QSE_REX_NOBOUND),
|
||||
ptn, len, &err
|
||||
);
|
||||
if (p == QSE_NULL) *errnum = QSE_AWK_REXERRTOERR(err);
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* $Id: parse.c 299 2009-10-19 13:33:40Z hyunghwan.chung $
|
||||
* $Id: parse.c 312 2009-12-10 13:03:54Z hyunghwan.chung $
|
||||
*
|
||||
Copyright 2006-2009 Chung, Hyung-Hwan.
|
||||
This file is part of QSE.
|
||||
@ -4012,7 +4012,7 @@ static qse_awk_nde_t* parse_primary_nogetline (
|
||||
|
||||
return (qse_awk_nde_t*)nde;
|
||||
}
|
||||
else if (MATCH(awk,TOK_DIV))
|
||||
else if (MATCH(awk,TOK_DIV) || MATCH(awk,TOK_DIV_ASSN))
|
||||
{
|
||||
qse_awk_nde_rex_t* nde;
|
||||
qse_awk_errnum_t errnum;
|
||||
@ -4021,8 +4021,16 @@ static qse_awk_nde_t* parse_primary_nogetline (
|
||||
* of the context-sensitivity of the slash symbol.
|
||||
* if TOK_DIV is seen as a primary, it tries to compile
|
||||
* it as a regular expression */
|
||||
SET_TOKEN_TYPE (awk, &awk->tok, TOK_REX);
|
||||
qse_str_clear (awk->tok.name);
|
||||
|
||||
if (MATCH(awk,TOK_DIV_ASSN) &&
|
||||
qse_str_ccat (awk->tok.name, QSE_T('=')) == (qse_size_t)-1)
|
||||
{
|
||||
SETERR_LOC (awk, QSE_AWK_ENOMEM, xloc);
|
||||
return QSE_NULL;
|
||||
}
|
||||
|
||||
SET_TOKEN_TYPE (awk, &awk->tok, TOK_REX);
|
||||
if (get_rexstr (awk, &awk->tok) <= -1) return QSE_NULL;
|
||||
|
||||
QSE_ASSERT (MATCH(awk,TOK_REX));
|
||||
@ -4064,8 +4072,8 @@ static qse_awk_nde_t* parse_primary_nogetline (
|
||||
|
||||
if (get_token(awk) <= -1)
|
||||
{
|
||||
QSE_AWK_FREEREX (awk, nde->code);
|
||||
QSE_AWK_FREE (awk, nde->ptr);
|
||||
QSE_AWK_FREE (awk, nde->code);
|
||||
QSE_AWK_FREE (awk, nde);
|
||||
return QSE_NULL;
|
||||
}
|
||||
@ -4274,7 +4282,10 @@ static qse_awk_nde_t* parse_primary_nogetline (
|
||||
&awk->ptok.loc
|
||||
);
|
||||
}
|
||||
else SETERR_TOK (awk, QSE_AWK_EEXPRNR);
|
||||
else
|
||||
{
|
||||
SETERR_TOK (awk, QSE_AWK_EEXPRNR);
|
||||
}
|
||||
|
||||
return QSE_NULL;
|
||||
}
|
||||
@ -5480,10 +5491,10 @@ static int get_symbols (qse_awk_t* awk, qse_cint_t c, qse_awk_tok_t* tok)
|
||||
{ QSE_T("**"), 2, TOK_EXP, QSE_AWK_EXTRAOPS },
|
||||
{ QSE_T("*="), 2, TOK_MUL_ASSN, 0 },
|
||||
{ QSE_T("*"), 1, TOK_MUL, 0 },
|
||||
{ QSE_T("//="), 3, TOK_IDIV_ASSN, 0 },
|
||||
{ QSE_T("//"), 2, TOK_IDIV, QSE_AWK_EXTRAOPS },
|
||||
{ QSE_T("/="), 2, TOK_DIV_ASSN, QSE_AWK_EXTRAOPS },
|
||||
{ QSE_T("/="), 2, TOK_DIV_ASSN, 0 },
|
||||
{ QSE_T("/"), 1, TOK_DIV, 0 },
|
||||
{ QSE_T("\\="), 2, TOK_IDIV_ASSN, QSE_AWK_EXTRAOPS },
|
||||
{ QSE_T("\\"), 1, TOK_IDIV, QSE_AWK_EXTRAOPS },
|
||||
{ QSE_T("%="), 2, TOK_MOD_ASSN, 0 },
|
||||
{ QSE_T("%"), 1, TOK_MOD, 0 },
|
||||
{ QSE_T("~"), 1, TOK_TILDE, 0 },
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* $Id: rio.c 287 2009-09-15 10:01:02Z hyunghwan.chung $
|
||||
* $Id: rio.c 312 2009-12-10 13:03:54Z hyunghwan.chung $
|
||||
*
|
||||
Copyright 2006-2009 Chung, Hyung-Hwan.
|
||||
This file is part of QSE.
|
||||
@ -280,7 +280,7 @@ int qse_awk_rtx_readio (
|
||||
|
||||
n = QSE_AWK_MATCHREX (
|
||||
run->awk, run->gbl.rs,
|
||||
((run->gbl.ignorecase)? QSE_REX_MATCH_IGNORECASE: 0),
|
||||
((run->gbl.ignorecase)? QSE_REX_IGNORECASE: 0),
|
||||
QSE_STR_PTR(buf), QSE_STR_LEN(buf),
|
||||
QSE_STR_PTR(buf), QSE_STR_LEN(buf),
|
||||
&match, &errnum);
|
||||
@ -374,7 +374,7 @@ int qse_awk_rtx_readio (
|
||||
|
||||
n = QSE_AWK_MATCHREX (
|
||||
run->awk, run->gbl.rs,
|
||||
((run->gbl.ignorecase)? QSE_REX_MATCH_IGNORECASE: 0),
|
||||
((run->gbl.ignorecase)? QSE_REX_IGNORECASE: 0),
|
||||
QSE_STR_PTR(buf), QSE_STR_LEN(buf),
|
||||
QSE_STR_PTR(buf), QSE_STR_LEN(buf),
|
||||
&match, &errnum);
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* $Id: run.c 299 2009-10-19 13:33:40Z hyunghwan.chung $
|
||||
* $Id: run.c 312 2009-12-10 13:03:54Z hyunghwan.chung $
|
||||
*
|
||||
Copyright 2006-2009 Chung, Hyung-Hwan.
|
||||
This file is part of QSE.
|
||||
@ -930,12 +930,12 @@ static void fini_rtx (qse_awk_rtx_t* rtx, int fini_globals)
|
||||
|
||||
if (rtx->gbl.rs != QSE_NULL)
|
||||
{
|
||||
QSE_AWK_FREE (rtx->awk, rtx->gbl.rs);
|
||||
QSE_AWK_FREEREX (rtx->awk, rtx->gbl.rs);
|
||||
rtx->gbl.rs = QSE_NULL;
|
||||
}
|
||||
if (rtx->gbl.fs != QSE_NULL)
|
||||
{
|
||||
QSE_AWK_FREE (rtx->awk, rtx->gbl.fs);
|
||||
QSE_AWK_FREEREX (rtx->awk, rtx->gbl.fs);
|
||||
rtx->gbl.fs = QSE_NULL;
|
||||
}
|
||||
|
||||
@ -3101,6 +3101,13 @@ static qse_awk_val_t* eval_expression (qse_awk_rtx_t* rtx, qse_awk_nde_t* nde)
|
||||
|
||||
if (v->type == QSE_AWK_VAL_REX)
|
||||
{
|
||||
const qse_char_t* ptr;
|
||||
qse_size_t len;
|
||||
int opt = 0;
|
||||
|
||||
if (((qse_awk_rtx_t*)rtx)->gbl.ignorecase)
|
||||
opt = QSE_REX_IGNORECASE;
|
||||
|
||||
qse_awk_rtx_refupval (rtx, v);
|
||||
|
||||
if (rtx->inrec.d0->type == QSE_AWK_VAL_NIL)
|
||||
@ -3108,35 +3115,37 @@ static qse_awk_val_t* eval_expression (qse_awk_rtx_t* rtx, qse_awk_nde_t* nde)
|
||||
/* the record has never been read.
|
||||
* probably, this function has been triggered
|
||||
* by the statements in the BEGIN block */
|
||||
n = QSE_AWK_ISEMPTYREX(rtx->awk,((qse_awk_val_rex_t*)v)->code)? 1: 0;
|
||||
ptr = QSE_T("");
|
||||
len = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
QSE_ASSERTX (
|
||||
rtx->inrec.d0->type == QSE_AWK_VAL_STR,
|
||||
"the internal value representing $0 should always be of the string type once it has been set/updated. it is nil initially.");
|
||||
"the internal value representing $0 should "
|
||||
"always be of the string type once it has "
|
||||
"been set/updated. it is nil initially.");
|
||||
|
||||
n = QSE_AWK_MATCHREX (
|
||||
((qse_awk_rtx_t*)rtx)->awk,
|
||||
((qse_awk_val_rex_t*)v)->code,
|
||||
((((qse_awk_rtx_t*)rtx)->gbl.ignorecase)? QSE_REX_MATCH_IGNORECASE: 0),
|
||||
((qse_awk_val_str_t*)rtx->inrec.d0)->ptr,
|
||||
((qse_awk_val_str_t*)rtx->inrec.d0)->len,
|
||||
((qse_awk_val_str_t*)rtx->inrec.d0)->ptr,
|
||||
((qse_awk_val_str_t*)rtx->inrec.d0)->len,
|
||||
QSE_NULL, &errnum);
|
||||
|
||||
if (n == -1)
|
||||
{
|
||||
qse_awk_rtx_refdownval (rtx, v);
|
||||
ptr = ((qse_awk_val_str_t*)rtx->inrec.d0)->ptr;
|
||||
len = ((qse_awk_val_str_t*)rtx->inrec.d0)->len;
|
||||
}
|
||||
|
||||
/* matchrex should never set the error number
|
||||
* whose message contains a formatting
|
||||
* character. otherwise, the following way of
|
||||
* setting the error information may not work */
|
||||
SETERR_LOC (rtx, errnum, &nde->loc);
|
||||
return QSE_NULL;
|
||||
}
|
||||
n = QSE_AWK_MATCHREX (
|
||||
((qse_awk_rtx_t*)rtx)->awk,
|
||||
((qse_awk_val_rex_t*)v)->code,
|
||||
opt, ptr, len, ptr, len,
|
||||
QSE_NULL, &errnum);
|
||||
|
||||
if (n <= -1)
|
||||
{
|
||||
qse_awk_rtx_refdownval (rtx, v);
|
||||
|
||||
/* matchrex should never set the error number
|
||||
* whose message contains a formatting
|
||||
* character. otherwise, the following way of
|
||||
* setting the error information may not work */
|
||||
SETERR_LOC (rtx, errnum, &nde->loc);
|
||||
return QSE_NULL;
|
||||
}
|
||||
|
||||
qse_awk_rtx_refdownval (rtx, v);
|
||||
@ -4775,7 +4784,7 @@ static qse_awk_val_t* eval_binop_match0 (
|
||||
{
|
||||
n = QSE_AWK_MATCHREX (
|
||||
rtx->awk, rex_code,
|
||||
((rtx->gbl.ignorecase)? QSE_REX_MATCH_IGNORECASE: 0),
|
||||
((rtx->gbl.ignorecase)? QSE_REX_IGNORECASE: 0),
|
||||
((qse_awk_val_str_t*)left)->ptr,
|
||||
((qse_awk_val_str_t*)left)->len,
|
||||
((qse_awk_val_str_t*)left)->ptr,
|
||||
@ -4784,7 +4793,7 @@ static qse_awk_val_t* eval_binop_match0 (
|
||||
if (n == -1)
|
||||
{
|
||||
if (right->type != QSE_AWK_VAL_REX)
|
||||
QSE_AWK_FREE (rtx->awk, rex_code);
|
||||
QSE_AWK_FREEREX (rtx->awk, rex_code);
|
||||
|
||||
SETERR_LOC (rtx, errnum, lloc);
|
||||
return QSE_NULL;
|
||||
@ -4794,7 +4803,7 @@ static qse_awk_val_t* eval_binop_match0 (
|
||||
if (res == QSE_NULL)
|
||||
{
|
||||
if (right->type != QSE_AWK_VAL_REX)
|
||||
QSE_AWK_FREE (rtx->awk, rex_code);
|
||||
QSE_AWK_FREEREX (rtx->awk, rex_code);
|
||||
|
||||
ADJERR_LOC (rtx, lloc);
|
||||
return QSE_NULL;
|
||||
@ -4808,13 +4817,13 @@ static qse_awk_val_t* eval_binop_match0 (
|
||||
if (qse_awk_rtx_valtostr (rtx, left, &out) == QSE_NULL)
|
||||
{
|
||||
if (right->type != QSE_AWK_VAL_REX)
|
||||
QSE_AWK_FREE (rtx->awk, rex_code);
|
||||
QSE_AWK_FREEREX (rtx->awk, rex_code);
|
||||
return QSE_NULL;
|
||||
}
|
||||
|
||||
n = QSE_AWK_MATCHREX (
|
||||
rtx->awk, rex_code,
|
||||
((rtx->gbl.ignorecase)? QSE_REX_MATCH_IGNORECASE: 0),
|
||||
((rtx->gbl.ignorecase)? QSE_REX_IGNORECASE: 0),
|
||||
out.u.cpldup.ptr, out.u.cpldup.len,
|
||||
out.u.cpldup.ptr, out.u.cpldup.len,
|
||||
QSE_NULL, &errnum);
|
||||
@ -4822,7 +4831,7 @@ static qse_awk_val_t* eval_binop_match0 (
|
||||
{
|
||||
QSE_AWK_FREE (rtx->awk, out.u.cpldup.ptr);
|
||||
if (right->type != QSE_AWK_VAL_REX)
|
||||
QSE_AWK_FREE (rtx->awk, rex_code);
|
||||
QSE_AWK_FREEREX (rtx->awk, rex_code);
|
||||
|
||||
SETERR_LOC (rtx, errnum, lloc);
|
||||
return QSE_NULL;
|
||||
@ -4833,7 +4842,7 @@ static qse_awk_val_t* eval_binop_match0 (
|
||||
{
|
||||
QSE_AWK_FREE (rtx->awk, out.u.cpldup.ptr);
|
||||
if (right->type != QSE_AWK_VAL_REX)
|
||||
QSE_AWK_FREE (rtx->awk, rex_code);
|
||||
QSE_AWK_FREEREX (rtx->awk, rex_code);
|
||||
|
||||
ADJERR_LOC (rtx, lloc);
|
||||
return QSE_NULL;
|
||||
@ -4842,7 +4851,7 @@ static qse_awk_val_t* eval_binop_match0 (
|
||||
QSE_AWK_FREE (rtx->awk, out.u.cpldup.ptr);
|
||||
}
|
||||
|
||||
if (right->type != QSE_AWK_VAL_REX) QSE_AWK_FREE (rtx->awk, rex_code);
|
||||
if (right->type != QSE_AWK_VAL_REX) QSE_AWK_FREEREX (rtx->awk, rex_code);
|
||||
return res;
|
||||
}
|
||||
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* $Id: tree.c 299 2009-10-19 13:33:40Z hyunghwan.chung $
|
||||
* $Id: tree.c 312 2009-12-10 13:03:54Z hyunghwan.chung $
|
||||
*
|
||||
Copyright 2006-2009 Chung, Hyung-Hwan.
|
||||
This file is part of QSE.
|
||||
@ -1229,8 +1229,8 @@ void qse_awk_clrpt (qse_awk_t* awk, qse_awk_nde_t* tree)
|
||||
|
||||
case QSE_AWK_NDE_REX:
|
||||
{
|
||||
QSE_AWK_FREEREX (awk, ((qse_awk_nde_rex_t*)p)->code);
|
||||
QSE_AWK_FREE (awk, ((qse_awk_nde_rex_t*)p)->ptr);
|
||||
QSE_AWK_FREE (awk, ((qse_awk_nde_rex_t*)p)->code);
|
||||
QSE_AWK_FREE (awk, p);
|
||||
break;
|
||||
}
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* $Id: val.c 290 2009-09-19 04:28:49Z hyunghwan.chung $
|
||||
* $Id: val.c 312 2009-12-10 13:03:54Z hyunghwan.chung $
|
||||
*
|
||||
Copyright 2006-2009 Chung, Hyung-Hwan.
|
||||
This file is part of QSE.
|
||||
@ -339,13 +339,9 @@ qse_awk_val_t* qse_awk_rtx_makerexval (
|
||||
/* the regular expression value holds:
|
||||
* - header
|
||||
* - a raw string plus with added a terminating '\0'
|
||||
* - a compiled regular expression
|
||||
* the total size is just large enough for all these.
|
||||
*/
|
||||
totsz = QSE_SIZEOF(qse_awk_val_rex_t) +
|
||||
(QSE_SIZEOF(*buf) * (len + 1)) +
|
||||
QSE_REX_LEN(code);
|
||||
|
||||
totsz = QSE_SIZEOF(*val) + (QSE_SIZEOF(*buf) * (len + 1));
|
||||
val = (qse_awk_val_rex_t*) QSE_AWK_ALLOC (rtx->awk, totsz);
|
||||
if (val == QSE_NULL)
|
||||
{
|
||||
@ -361,8 +357,7 @@ qse_awk_val_t* qse_awk_rtx_makerexval (
|
||||
val->ptr = (qse_char_t*)(val + 1);
|
||||
qse_strncpy (val->ptr, buf, len);
|
||||
|
||||
val->code = val->ptr + len + 1;
|
||||
QSE_MEMCPY (val->code, code, QSE_REX_LEN(code));
|
||||
val->code = code;
|
||||
|
||||
return (qse_awk_val_t*)val;
|
||||
}
|
||||
@ -551,10 +546,15 @@ void qse_awk_rtx_freeval (
|
||||
}
|
||||
else if (val->type == QSE_AWK_VAL_REX)
|
||||
{
|
||||
/*
|
||||
/* don't free ptr as it is inlined to val
|
||||
QSE_AWK_FREE (rtx->awk, ((qse_awk_val_rex_t*)val)->ptr);
|
||||
*/
|
||||
|
||||
/* code is just a pointer to a regular expression stored
|
||||
* in parse tree nodes. so don't free it.
|
||||
QSE_AWK_FREEREX (rtx->awk, ((qse_awk_val_rex_t*)val)->code);
|
||||
*/
|
||||
*/
|
||||
|
||||
QSE_AWK_FREE (rtx->awk, val);
|
||||
}
|
||||
else if (val->type == QSE_AWK_VAL_MAP)
|
||||
|
Reference in New Issue
Block a user