fixed a parser bug in awk.

- handling of idiv operators
- tokenization of /=/ as a regular expression
- calling QSE_AWK_FREEREX to free compiled regular expressions.
switched to a new regular expression handler.
This commit is contained in:
2009-12-11 07:03:54 +00:00
parent 93adbf9244
commit faea2475ac
17 changed files with 2089 additions and 3870 deletions

View File

@ -1,5 +1,5 @@
/*
* $Id: awk.h 291 2009-09-21 13:28:18Z hyunghwan.chung $
* $Id: awk.h 312 2009-12-10 13:03:54Z hyunghwan.chung $
*
Copyright 2006-2009 Chung, Hyung-Hwan.
This file is part of QSE.
@ -383,7 +383,6 @@ struct qse_awk_rtx_t
#define QSE_AWK_FREEREX(awk,code) qse_freerex((awk)->mmgr,code)
#define QSE_AWK_ISEMPTYREX(awk,code) qse_isemptyrex(code)
#define QSE_AWK_BUILDREX(awk,ptn,len,errnum) \
qse_awk_buildrex(awk,ptn,len,errnum)
#define QSE_AWK_MATCHREX(awk,code,option,str,len,substr,sublen,match,errnum) \

View File

@ -1,5 +1,5 @@
/*
* $Id: fnc.c 299 2009-10-19 13:33:40Z hyunghwan.chung $
* $Id: fnc.c 312 2009-12-10 13:03:54Z hyunghwan.chung $
*
Copyright 2006-2009 Chung, Hyung-Hwan.
This file is part of QSE.
@ -1118,7 +1118,7 @@ static int __substitute (qse_awk_rtx_t* run, qse_long_t max_count)
}
}
opt = (run->gbl.ignorecase)? QSE_REX_MATCH_IGNORECASE: 0;
opt = (run->gbl.ignorecase)? QSE_REX_IGNORECASE: 0;
a2_end = a2_ptr + a2_len;
cur_ptr = a2_ptr;
@ -1331,12 +1331,28 @@ static int fnc_match (qse_awk_rtx_t* rtx, const qse_cstr_t* fnm)
if (nargs >= 3)
{
qse_awk_val_t* a2;
qse_real_t rv;
a2 = qse_awk_rtx_getarg (rtx, 2);
n = qse_awk_rtx_valtonum (rtx, a2, &start, &rv);
if (n <= -1) return -1;
if (n >= 1) start = (qse_long_t)rv;
#if 0
if (a2->type == QSE_AWK_VAL_MAP)
{
/* if the 3rd paramater is an array,
* it is a placeholder to store parenthesized
* subexpressions */
/* TODO: please implement this... */
start = 0;
}
else
#endif
{
qse_real_t rv;
/* if the 3rd parameter is not an array,
* it is treated as a match start index */
n = qse_awk_rtx_valtonum (rtx, a2, &start, &rv);
if (n <= -1) return -1;
if (n >= 1) start = (qse_long_t)rv;
}
}
if (a0->type == QSE_AWK_VAL_STR)
@ -1394,7 +1410,7 @@ static int fnc_match (qse_awk_rtx_t* rtx, const qse_cstr_t* fnm)
{
n = QSE_AWK_MATCHREX (
rtx->awk, rex,
(rtx->gbl.ignorecase? QSE_REX_MATCH_IGNORECASE: 0),
(rtx->gbl.ignorecase? QSE_REX_IGNORECASE: 0),
str0+start-1, len0-start+1,/*TODO: must use str0,len0?*/
str0+start-1, len0-start+1,
&mat, &errnum

View File

@ -1,5 +1,5 @@
/*
* $Id: misc.c 311 2009-12-09 11:35:54Z hyunghwan.chung $
* $Id: misc.c 312 2009-12-10 13:03:54Z hyunghwan.chung $
*
Copyright 2006-2009 Chung, Hyung-Hwan.
This file is part of QSE.
@ -860,7 +860,7 @@ qse_char_t* qse_awk_rtx_strxntokbyrex (
{
n = QSE_AWK_MATCHREX (
rtx->awk, rex,
((rtx->gbl.ignorecase)? QSE_REX_MATCH_IGNORECASE: 0),
((rtx->gbl.ignorecase)? QSE_REX_IGNORECASE: 0),
str, len, ptr, left, &match, errnum);
if (n == -1) return QSE_NULL;
if (n == 0)
@ -1052,7 +1052,7 @@ void* qse_awk_buildrex (
p = qse_buildrex (
awk->mmgr, awk->rex.depth.max.build,
((awk->option&QSE_AWK_REXBOUND)? 0:QSE_REX_BUILD_NOBOUND),
((awk->option&QSE_AWK_REXBOUND)? 0:QSE_REX_NOBOUND),
ptn, len, &err
);
if (p == QSE_NULL) *errnum = QSE_AWK_REXERRTOERR(err);

View File

@ -1,5 +1,5 @@
/*
* $Id: parse.c 299 2009-10-19 13:33:40Z hyunghwan.chung $
* $Id: parse.c 312 2009-12-10 13:03:54Z hyunghwan.chung $
*
Copyright 2006-2009 Chung, Hyung-Hwan.
This file is part of QSE.
@ -4012,7 +4012,7 @@ static qse_awk_nde_t* parse_primary_nogetline (
return (qse_awk_nde_t*)nde;
}
else if (MATCH(awk,TOK_DIV))
else if (MATCH(awk,TOK_DIV) || MATCH(awk,TOK_DIV_ASSN))
{
qse_awk_nde_rex_t* nde;
qse_awk_errnum_t errnum;
@ -4021,8 +4021,16 @@ static qse_awk_nde_t* parse_primary_nogetline (
* of the context-sensitivity of the slash symbol.
* if TOK_DIV is seen as a primary, it tries to compile
* it as a regular expression */
SET_TOKEN_TYPE (awk, &awk->tok, TOK_REX);
qse_str_clear (awk->tok.name);
if (MATCH(awk,TOK_DIV_ASSN) &&
qse_str_ccat (awk->tok.name, QSE_T('=')) == (qse_size_t)-1)
{
SETERR_LOC (awk, QSE_AWK_ENOMEM, xloc);
return QSE_NULL;
}
SET_TOKEN_TYPE (awk, &awk->tok, TOK_REX);
if (get_rexstr (awk, &awk->tok) <= -1) return QSE_NULL;
QSE_ASSERT (MATCH(awk,TOK_REX));
@ -4064,8 +4072,8 @@ static qse_awk_nde_t* parse_primary_nogetline (
if (get_token(awk) <= -1)
{
QSE_AWK_FREEREX (awk, nde->code);
QSE_AWK_FREE (awk, nde->ptr);
QSE_AWK_FREE (awk, nde->code);
QSE_AWK_FREE (awk, nde);
return QSE_NULL;
}
@ -4274,7 +4282,10 @@ static qse_awk_nde_t* parse_primary_nogetline (
&awk->ptok.loc
);
}
else SETERR_TOK (awk, QSE_AWK_EEXPRNR);
else
{
SETERR_TOK (awk, QSE_AWK_EEXPRNR);
}
return QSE_NULL;
}
@ -5480,10 +5491,10 @@ static int get_symbols (qse_awk_t* awk, qse_cint_t c, qse_awk_tok_t* tok)
{ QSE_T("**"), 2, TOK_EXP, QSE_AWK_EXTRAOPS },
{ QSE_T("*="), 2, TOK_MUL_ASSN, 0 },
{ QSE_T("*"), 1, TOK_MUL, 0 },
{ QSE_T("//="), 3, TOK_IDIV_ASSN, 0 },
{ QSE_T("//"), 2, TOK_IDIV, QSE_AWK_EXTRAOPS },
{ QSE_T("/="), 2, TOK_DIV_ASSN, QSE_AWK_EXTRAOPS },
{ QSE_T("/="), 2, TOK_DIV_ASSN, 0 },
{ QSE_T("/"), 1, TOK_DIV, 0 },
{ QSE_T("\\="), 2, TOK_IDIV_ASSN, QSE_AWK_EXTRAOPS },
{ QSE_T("\\"), 1, TOK_IDIV, QSE_AWK_EXTRAOPS },
{ QSE_T("%="), 2, TOK_MOD_ASSN, 0 },
{ QSE_T("%"), 1, TOK_MOD, 0 },
{ QSE_T("~"), 1, TOK_TILDE, 0 },

View File

@ -1,5 +1,5 @@
/*
* $Id: rio.c 287 2009-09-15 10:01:02Z hyunghwan.chung $
* $Id: rio.c 312 2009-12-10 13:03:54Z hyunghwan.chung $
*
Copyright 2006-2009 Chung, Hyung-Hwan.
This file is part of QSE.
@ -280,7 +280,7 @@ int qse_awk_rtx_readio (
n = QSE_AWK_MATCHREX (
run->awk, run->gbl.rs,
((run->gbl.ignorecase)? QSE_REX_MATCH_IGNORECASE: 0),
((run->gbl.ignorecase)? QSE_REX_IGNORECASE: 0),
QSE_STR_PTR(buf), QSE_STR_LEN(buf),
QSE_STR_PTR(buf), QSE_STR_LEN(buf),
&match, &errnum);
@ -374,7 +374,7 @@ int qse_awk_rtx_readio (
n = QSE_AWK_MATCHREX (
run->awk, run->gbl.rs,
((run->gbl.ignorecase)? QSE_REX_MATCH_IGNORECASE: 0),
((run->gbl.ignorecase)? QSE_REX_IGNORECASE: 0),
QSE_STR_PTR(buf), QSE_STR_LEN(buf),
QSE_STR_PTR(buf), QSE_STR_LEN(buf),
&match, &errnum);

View File

@ -1,5 +1,5 @@
/*
* $Id: run.c 299 2009-10-19 13:33:40Z hyunghwan.chung $
* $Id: run.c 312 2009-12-10 13:03:54Z hyunghwan.chung $
*
Copyright 2006-2009 Chung, Hyung-Hwan.
This file is part of QSE.
@ -930,12 +930,12 @@ static void fini_rtx (qse_awk_rtx_t* rtx, int fini_globals)
if (rtx->gbl.rs != QSE_NULL)
{
QSE_AWK_FREE (rtx->awk, rtx->gbl.rs);
QSE_AWK_FREEREX (rtx->awk, rtx->gbl.rs);
rtx->gbl.rs = QSE_NULL;
}
if (rtx->gbl.fs != QSE_NULL)
{
QSE_AWK_FREE (rtx->awk, rtx->gbl.fs);
QSE_AWK_FREEREX (rtx->awk, rtx->gbl.fs);
rtx->gbl.fs = QSE_NULL;
}
@ -3101,6 +3101,13 @@ static qse_awk_val_t* eval_expression (qse_awk_rtx_t* rtx, qse_awk_nde_t* nde)
if (v->type == QSE_AWK_VAL_REX)
{
const qse_char_t* ptr;
qse_size_t len;
int opt = 0;
if (((qse_awk_rtx_t*)rtx)->gbl.ignorecase)
opt = QSE_REX_IGNORECASE;
qse_awk_rtx_refupval (rtx, v);
if (rtx->inrec.d0->type == QSE_AWK_VAL_NIL)
@ -3108,35 +3115,37 @@ static qse_awk_val_t* eval_expression (qse_awk_rtx_t* rtx, qse_awk_nde_t* nde)
/* the record has never been read.
* probably, this function has been triggered
* by the statements in the BEGIN block */
n = QSE_AWK_ISEMPTYREX(rtx->awk,((qse_awk_val_rex_t*)v)->code)? 1: 0;
ptr = QSE_T("");
len = 0;
}
else
{
QSE_ASSERTX (
rtx->inrec.d0->type == QSE_AWK_VAL_STR,
"the internal value representing $0 should always be of the string type once it has been set/updated. it is nil initially.");
"the internal value representing $0 should "
"always be of the string type once it has "
"been set/updated. it is nil initially.");
n = QSE_AWK_MATCHREX (
((qse_awk_rtx_t*)rtx)->awk,
((qse_awk_val_rex_t*)v)->code,
((((qse_awk_rtx_t*)rtx)->gbl.ignorecase)? QSE_REX_MATCH_IGNORECASE: 0),
((qse_awk_val_str_t*)rtx->inrec.d0)->ptr,
((qse_awk_val_str_t*)rtx->inrec.d0)->len,
((qse_awk_val_str_t*)rtx->inrec.d0)->ptr,
((qse_awk_val_str_t*)rtx->inrec.d0)->len,
QSE_NULL, &errnum);
if (n == -1)
{
qse_awk_rtx_refdownval (rtx, v);
ptr = ((qse_awk_val_str_t*)rtx->inrec.d0)->ptr;
len = ((qse_awk_val_str_t*)rtx->inrec.d0)->len;
}
/* matchrex should never set the error number
* whose message contains a formatting
* character. otherwise, the following way of
* setting the error information may not work */
SETERR_LOC (rtx, errnum, &nde->loc);
return QSE_NULL;
}
n = QSE_AWK_MATCHREX (
((qse_awk_rtx_t*)rtx)->awk,
((qse_awk_val_rex_t*)v)->code,
opt, ptr, len, ptr, len,
QSE_NULL, &errnum);
if (n <= -1)
{
qse_awk_rtx_refdownval (rtx, v);
/* matchrex should never set the error number
* whose message contains a formatting
* character. otherwise, the following way of
* setting the error information may not work */
SETERR_LOC (rtx, errnum, &nde->loc);
return QSE_NULL;
}
qse_awk_rtx_refdownval (rtx, v);
@ -4775,7 +4784,7 @@ static qse_awk_val_t* eval_binop_match0 (
{
n = QSE_AWK_MATCHREX (
rtx->awk, rex_code,
((rtx->gbl.ignorecase)? QSE_REX_MATCH_IGNORECASE: 0),
((rtx->gbl.ignorecase)? QSE_REX_IGNORECASE: 0),
((qse_awk_val_str_t*)left)->ptr,
((qse_awk_val_str_t*)left)->len,
((qse_awk_val_str_t*)left)->ptr,
@ -4784,7 +4793,7 @@ static qse_awk_val_t* eval_binop_match0 (
if (n == -1)
{
if (right->type != QSE_AWK_VAL_REX)
QSE_AWK_FREE (rtx->awk, rex_code);
QSE_AWK_FREEREX (rtx->awk, rex_code);
SETERR_LOC (rtx, errnum, lloc);
return QSE_NULL;
@ -4794,7 +4803,7 @@ static qse_awk_val_t* eval_binop_match0 (
if (res == QSE_NULL)
{
if (right->type != QSE_AWK_VAL_REX)
QSE_AWK_FREE (rtx->awk, rex_code);
QSE_AWK_FREEREX (rtx->awk, rex_code);
ADJERR_LOC (rtx, lloc);
return QSE_NULL;
@ -4808,13 +4817,13 @@ static qse_awk_val_t* eval_binop_match0 (
if (qse_awk_rtx_valtostr (rtx, left, &out) == QSE_NULL)
{
if (right->type != QSE_AWK_VAL_REX)
QSE_AWK_FREE (rtx->awk, rex_code);
QSE_AWK_FREEREX (rtx->awk, rex_code);
return QSE_NULL;
}
n = QSE_AWK_MATCHREX (
rtx->awk, rex_code,
((rtx->gbl.ignorecase)? QSE_REX_MATCH_IGNORECASE: 0),
((rtx->gbl.ignorecase)? QSE_REX_IGNORECASE: 0),
out.u.cpldup.ptr, out.u.cpldup.len,
out.u.cpldup.ptr, out.u.cpldup.len,
QSE_NULL, &errnum);
@ -4822,7 +4831,7 @@ static qse_awk_val_t* eval_binop_match0 (
{
QSE_AWK_FREE (rtx->awk, out.u.cpldup.ptr);
if (right->type != QSE_AWK_VAL_REX)
QSE_AWK_FREE (rtx->awk, rex_code);
QSE_AWK_FREEREX (rtx->awk, rex_code);
SETERR_LOC (rtx, errnum, lloc);
return QSE_NULL;
@ -4833,7 +4842,7 @@ static qse_awk_val_t* eval_binop_match0 (
{
QSE_AWK_FREE (rtx->awk, out.u.cpldup.ptr);
if (right->type != QSE_AWK_VAL_REX)
QSE_AWK_FREE (rtx->awk, rex_code);
QSE_AWK_FREEREX (rtx->awk, rex_code);
ADJERR_LOC (rtx, lloc);
return QSE_NULL;
@ -4842,7 +4851,7 @@ static qse_awk_val_t* eval_binop_match0 (
QSE_AWK_FREE (rtx->awk, out.u.cpldup.ptr);
}
if (right->type != QSE_AWK_VAL_REX) QSE_AWK_FREE (rtx->awk, rex_code);
if (right->type != QSE_AWK_VAL_REX) QSE_AWK_FREEREX (rtx->awk, rex_code);
return res;
}

View File

@ -1,5 +1,5 @@
/*
* $Id: tree.c 299 2009-10-19 13:33:40Z hyunghwan.chung $
* $Id: tree.c 312 2009-12-10 13:03:54Z hyunghwan.chung $
*
Copyright 2006-2009 Chung, Hyung-Hwan.
This file is part of QSE.
@ -1229,8 +1229,8 @@ void qse_awk_clrpt (qse_awk_t* awk, qse_awk_nde_t* tree)
case QSE_AWK_NDE_REX:
{
QSE_AWK_FREEREX (awk, ((qse_awk_nde_rex_t*)p)->code);
QSE_AWK_FREE (awk, ((qse_awk_nde_rex_t*)p)->ptr);
QSE_AWK_FREE (awk, ((qse_awk_nde_rex_t*)p)->code);
QSE_AWK_FREE (awk, p);
break;
}

View File

@ -1,5 +1,5 @@
/*
* $Id: val.c 290 2009-09-19 04:28:49Z hyunghwan.chung $
* $Id: val.c 312 2009-12-10 13:03:54Z hyunghwan.chung $
*
Copyright 2006-2009 Chung, Hyung-Hwan.
This file is part of QSE.
@ -339,13 +339,9 @@ qse_awk_val_t* qse_awk_rtx_makerexval (
/* the regular expression value holds:
* - header
* - a raw string plus with added a terminating '\0'
* - a compiled regular expression
* the total size is just large enough for all these.
*/
totsz = QSE_SIZEOF(qse_awk_val_rex_t) +
(QSE_SIZEOF(*buf) * (len + 1)) +
QSE_REX_LEN(code);
totsz = QSE_SIZEOF(*val) + (QSE_SIZEOF(*buf) * (len + 1));
val = (qse_awk_val_rex_t*) QSE_AWK_ALLOC (rtx->awk, totsz);
if (val == QSE_NULL)
{
@ -361,8 +357,7 @@ qse_awk_val_t* qse_awk_rtx_makerexval (
val->ptr = (qse_char_t*)(val + 1);
qse_strncpy (val->ptr, buf, len);
val->code = val->ptr + len + 1;
QSE_MEMCPY (val->code, code, QSE_REX_LEN(code));
val->code = code;
return (qse_awk_val_t*)val;
}
@ -551,10 +546,15 @@ void qse_awk_rtx_freeval (
}
else if (val->type == QSE_AWK_VAL_REX)
{
/*
/* don't free ptr as it is inlined to val
QSE_AWK_FREE (rtx->awk, ((qse_awk_val_rex_t*)val)->ptr);
*/
/* code is just a pointer to a regular expression stored
* in parse tree nodes. so don't free it.
QSE_AWK_FREEREX (rtx->awk, ((qse_awk_val_rex_t*)val)->code);
*/
*/
QSE_AWK_FREE (rtx->awk, val);
}
else if (val->type == QSE_AWK_VAL_MAP)