- enhanced how to determine between a function call and a variable concatenated with an parenthsized expression.

- fixed a bug in split.
This commit is contained in:
hyung-hwan 2009-10-20 07:33:40 +00:00
parent 28759bb9f4
commit e384e1d044
11 changed files with 326 additions and 245 deletions

View File

@ -1,5 +1,5 @@
/*
* $Id: awk.h 291 2009-09-21 13:28:18Z hyunghwan.chung $
* $Id: awk.h 299 2009-10-19 13:33:40Z hyunghwan.chung $
*
Copyright 2006-2009 Chung, Hyung-Hwan.
This file is part of QSE.
@ -713,7 +713,7 @@ enum qse_awk_errnum_t
QSE_AWK_EKWWHL, /**< keyword 'while' expected in place of '${0}' */
QSE_AWK_EASSIGN, /**< assignment statement expected */
QSE_AWK_EIDENT, /**< identifier expected in place of '${0}' */
QSE_AWK_EFUNNAME, /**< '${0}' not a valid function name */
QSE_AWK_EFUNNAM, /**< '${0}' not a valid function name */
QSE_AWK_EBLKBEG, /**< BEGIN not followed by left bracket on the same line */
QSE_AWK_EBLKEND, /**< END not followed by left bracket on the same line */
QSE_AWK_EKWRED, /**< keyword '${0}' redefined */

View File

@ -1,5 +1,5 @@
/*
* $Id: fnc.c 290 2009-09-19 04:28:49Z hyunghwan.chung $
* $Id: fnc.c 299 2009-10-19 13:33:40Z hyunghwan.chung $
*
Copyright 2006-2009 Chung, Hyung-Hwan.
This file is part of QSE.
@ -20,6 +20,8 @@
#include "awk.h"
#include <qse/cmn/stdio.h>
static int fnc_close (qse_awk_rtx_t*, const qse_cstr_t*);
static int fnc_fflush (qse_awk_rtx_t*, const qse_cstr_t*);
static int fnc_index (qse_awk_rtx_t*, const qse_cstr_t*);
@ -36,6 +38,19 @@ static int fnc_sprintf (qse_awk_rtx_t*, const qse_cstr_t*);
#undef MAX
#define MAX QSE_TYPE_UNSIGNED_MAX(qse_size_t)
/* Argument Specifier
*
* Each character in the specifier indicates how a parameter
* of the corresponding postion should be passed to a function.
*
* - v: value. pass it after normal evaluation.
* - r: pass a variable by reference
* - x: regular expression as it it. not evaluated as /rex/ ~ $0.
*
* If the first character of the specifer is 'R', all
* parameters are passed by reference regarless of the remaining
* chracters.
*/
static qse_awk_fnc_t sys_fnc[] =
{
/* io functions */
@ -46,7 +61,7 @@ static qse_awk_fnc_t sys_fnc[] =
{ {QSE_T("index"), 5}, 0, 0, {2, 3, QSE_NULL}, fnc_index},
{ {QSE_T("substr"), 6}, 0, 0, {2, 3, QSE_NULL}, fnc_substr},
{ {QSE_T("length"), 6}, 1, 0, {0, 1, QSE_NULL}, fnc_length},
{ {QSE_T("split"), 5}, 0, 0, {2, 3, QSE_T("vrv")}, fnc_split},
{ {QSE_T("split"), 5}, 0, 0, {2, 3, QSE_T("vrx")}, fnc_split},
{ {QSE_T("tolower"), 7}, 0, 0, {1, 1, QSE_NULL}, fnc_tolower},
{ {QSE_T("toupper"), 7}, 0, 0, {1, 1, QSE_NULL}, fnc_toupper},
{ {QSE_T("gsub"), 4}, 0, 0, {2, 3, QSE_T("xvr")}, fnc_gsub},
@ -714,6 +729,19 @@ static int fnc_split (qse_awk_rtx_t* run, const qse_cstr_t* fnm)
fs_rex_free = QSE_NULL;
}
}
else if (a2->type == QSE_AWK_VAL_REX)
{
/* the third parameter is a regular expression */
fs_rex = ((qse_awk_val_rex_t*)a2)->code;
fs_rex_free = QSE_NULL;
/* make the loop below to take fs_rex by
* setting fs_len greater than 1*/
fs_ptr = QSE_NULL;
fs_free = QSE_NULL;
fs_len = 2;
}
else
{
if (a2->type == QSE_AWK_VAL_STR)

View File

@ -1,5 +1,5 @@
/*
* $Id: parse.c 291 2009-09-21 13:28:18Z hyunghwan.chung $
* $Id: parse.c 299 2009-10-19 13:33:40Z hyunghwan.chung $
*
Copyright 2006-2009 Chung, Hyung-Hwan.
This file is part of QSE.
@ -1043,7 +1043,7 @@ static qse_awk_nde_t* parse_function (qse_awk_t* awk)
if (!MATCH(awk,TOK_IDENT))
{
/* cannot find a valid identifier for a function name */
SETERR_TOK (awk, QSE_AWK_EFUNNAME);
SETERR_TOK (awk, QSE_AWK_EFUNNAM);
return QSE_NULL;
}
@ -4364,6 +4364,41 @@ static qse_awk_nde_t* parse_primary (
return left;
}
static int isfnname (qse_awk_t* awk, const qse_char_t* name, qse_size_t len)
{
if (qse_awk_getfnc (awk, name, len) != QSE_NULL)
{
/* implicit function */
return 1;
}
/* check if it is an awk function */
if (awk->tree.cur_fun.ptr != QSE_NULL)
{
if (qse_strxncmp (
awk->tree.cur_fun.ptr, awk->tree.cur_fun.len,
name, len) == 0)
{
/* the current function begin parsed */
return 2;
}
}
if (qse_map_search (awk->tree.funs, name, len) != QSE_NULL)
{
/* one of the functions defined previously */
return 2;
}
if (qse_map_search (awk->parse.funs, name, len) != QSE_NULL)
{
/* one of the function calls not resolved so far. */
return 2;
}
return 0;
}
static qse_awk_nde_t* parse_primary_ident (
qse_awk_t* awk, const qse_awk_loc_t* xloc)
{
@ -4401,7 +4436,7 @@ static qse_awk_nde_t* parse_primary_ident (
if (fnc->dfl0)
{
/* handles a function that assumes ()
* when () is missing */
* when () is missing. i.e. length */
nde = parse_fncall (
awk, namedup, namelen, fnc, xloc, 1);
if (nde == QSE_NULL)
@ -4435,14 +4470,25 @@ static qse_awk_nde_t* parse_primary_ident (
qse_awk_nde_var_t* nde;
if ((awk->option & QSE_AWK_EXPLICIT) &&
!(awk->option & QSE_AWK_IMPLICIT))
{
/* if explicit only, the concatenation operator(.)
* must be used. so it is obvious that it is a function
* call, which is illegal for a local variable.
* if implicit, "local_var (1)" may be concatenation of
* the value of local_var and 1.
*/
if (MATCH(awk,TOK_LPAREN))
{
/* a local variable is not a function */
SETERR_ARG_LOC (
awk, QSE_AWK_EFUNNAME, namedup, namelen, xloc);
awk, QSE_AWK_EFUNNAM,
namedup, namelen, xloc);
QSE_AWK_FREE (awk, namedup);
return QSE_NULL;
}
}
nde = (qse_awk_nde_var_t*) QSE_AWK_ALLOC (
awk, QSE_SIZEOF(qse_awk_nde_var_t));
@ -4470,14 +4516,19 @@ static qse_awk_nde_t* parse_primary_ident (
qse_awk_nde_var_t* nde;
if ((awk->option & QSE_AWK_EXPLICIT) &&
!(awk->option & QSE_AWK_IMPLICIT))
{
if (MATCH(awk,TOK_LPAREN))
{
/* a parameter is not a function */
SETERR_ARG_LOC (
awk, QSE_AWK_EFUNNAME, namedup, namelen, xloc);
awk, QSE_AWK_EFUNNAM,
namedup, namelen, xloc);
QSE_AWK_FREE (awk, namedup);
return QSE_NULL;
}
}
nde = (qse_awk_nde_var_t*) QSE_AWK_ALLOC (
awk, QSE_SIZEOF(qse_awk_nde_var_t));
@ -4505,14 +4556,19 @@ static qse_awk_nde_t* parse_primary_ident (
qse_awk_nde_var_t* nde;
if ((awk->option & QSE_AWK_EXPLICIT) &&
!(awk->option & QSE_AWK_IMPLICIT))
{
if (MATCH(awk,TOK_LPAREN))
{
/* a global variable is not a function */
SETERR_ARG_LOC (
awk, QSE_AWK_EFUNNAME, namedup, namelen, xloc);
awk, QSE_AWK_EFUNNAM,
namedup, namelen, xloc);
QSE_AWK_FREE (awk, namedup);
return QSE_NULL;
}
}
nde = (qse_awk_nde_var_t*) QSE_AWK_ALLOC (
awk, QSE_SIZEOF(qse_awk_nde_var_t));
@ -4534,13 +4590,52 @@ static qse_awk_nde_t* parse_primary_ident (
return (qse_awk_nde_t*)nde;
}
else if (MATCH(awk,TOK_LPAREN))
else
{
int fnname = isfnname (awk, namedup, namelen);
if (fnname)
{
/* function call */
qse_awk_nde_t* nde;
if (awk->option & QSE_AWK_IMPLICIT)
if (!MATCH(awk,TOK_LPAREN))
{
/* function named appeared without () */
if (fnname == 1)
{
SETERR_ARG_LOC (
awk, QSE_AWK_EFNCRED,
namedup, namelen, xloc);
}
else
{
SETERR_ARG_LOC (
awk, QSE_AWK_EFUNRED,
namedup, namelen, xloc);
}
QSE_AWK_FREE (awk, namedup);
return QSE_NULL;
}
/* must be a function name */
QSE_ASSERT (qse_map_search (
awk->parse.named, namedup, namelen) == QSE_NULL);
nde = parse_fncall (
awk, namedup, namelen, QSE_NULL, xloc, 0);
if (nde == QSE_NULL) QSE_AWK_FREE (awk, namedup);
return (qse_awk_nde_t*)nde;
}
else if (awk->option & QSE_AWK_IMPLICIT)
{
if (MATCH(awk,TOK_LPAREN) &&
awk->tok.loc.lin == xloc->lin &&
awk->tok.loc.col == xloc->col + namelen)
{
qse_awk_nde_t* nde;
/* a function call to a yet undefined function */
if (qse_map_search (awk->parse.named,
namedup, namelen) != QSE_NULL)
{
@ -4551,7 +4646,6 @@ static qse_awk_nde_t* parse_primary_ident (
QSE_AWK_FREE (awk, namedup);
return QSE_NULL;
}
}
nde = parse_fncall (
awk, namedup, namelen, QSE_NULL, xloc, 0);
@ -4560,9 +4654,9 @@ static qse_awk_nde_t* parse_primary_ident (
}
else
{
/* named variable */
qse_awk_nde_var_t* nde;
/* named variable */
nde = (qse_awk_nde_var_t*) QSE_AWK_ALLOC (
awk, QSE_SIZEOF(qse_awk_nde_var_t));
if (nde == QSE_NULL)
@ -4572,49 +4666,6 @@ static qse_awk_nde_t* parse_primary_ident (
return QSE_NULL;
}
if (awk->option & QSE_AWK_IMPLICIT)
{
qse_bool_t iscur = QSE_FALSE;
/* the name should not conflict with a function name */
/* check if it is a builtin function */
if (qse_awk_getfnc (awk, namedup, namelen) != QSE_NULL)
{
SETERR_ARG_LOC (
awk, QSE_AWK_EFNCRED,
namedup, namelen, xloc);
goto exit_func;
}
/* check if it is an AWK function */
if (awk->tree.cur_fun.ptr != QSE_NULL)
{
iscur = (qse_strxncmp (
awk->tree.cur_fun.ptr,
awk->tree.cur_fun.len,
namedup, namelen) == 0);
}
if (iscur || qse_map_search (awk->tree.funs, namedup, namelen) != QSE_NULL)
{
/* the function is defined previously */
SETERR_ARG_LOC (
awk, QSE_AWK_EFUNRED,
namedup, namelen, xloc);
goto exit_func;
}
if (qse_map_search (
awk->parse.funs,
namedup, namelen) != QSE_NULL)
{
/* is it one of the function calls found so far? */
SETERR_ARG_LOC (
awk, QSE_AWK_EFUNRED,
namedup, namelen, xloc);
goto exit_func;
}
nde->type = QSE_AWK_NDE_NAMED;
nde->loc = *xloc;
nde->next = QSE_NULL;
@ -4630,22 +4681,33 @@ static qse_awk_nde_t* parse_primary_ident (
namedup, namelen, QSE_NULL, 0) == QSE_NULL)
{
SETERR_LOC (awk, QSE_AWK_ENOMEM, xloc);
goto exit_func;
QSE_AWK_FREE (awk, nde);
return QSE_NULL;
}
return (qse_awk_nde_t*)nde;
}
}
else
{
if (MATCH(awk,TOK_LPAREN))
{
/* it is a function call as the name is followed
* by ( and implicit variables are disabled. */
qse_awk_nde_t* nde;
nde = parse_fncall (
awk, namedup, namelen, QSE_NULL, xloc, 0);
if (nde == QSE_NULL) QSE_AWK_FREE (awk, namedup);
return (qse_awk_nde_t*)nde;
}
/* undefined variable */
SETERR_ARG_LOC (awk, QSE_AWK_EUNDEF, namedup, namelen, xloc);
exit_func:
QSE_AWK_FREE (awk, namedup);
QSE_AWK_FREE (awk, nde);
return QSE_NULL;
}
}
}
static qse_awk_nde_t* parse_hashidx (
qse_awk_t* awk, qse_char_t* name, qse_size_t namelen,
@ -4768,40 +4830,21 @@ static qse_awk_nde_t* parse_hashidx (
if (awk->option & QSE_AWK_IMPLICIT)
{
qse_bool_t iscur = QSE_FALSE;
/* check if it is a builtin function */
if (qse_awk_getfnc (awk, name, namelen) != QSE_NULL)
int fnname = isfnname (awk, name, namelen);
if (fnname == 1)
{
SETERR_ARG_LOC (
awk, QSE_AWK_EFNCRED, name, namelen, xloc);
goto exit_func;
}
/* check if it is an AWK function */
if (awk->tree.cur_fun.ptr != QSE_NULL)
else if (fnname == 2)
{
iscur = (qse_strxncmp (
awk->tree.cur_fun.ptr, awk->tree.cur_fun.len,
name, namelen) == 0);
}
if (iscur || qse_map_search (awk->tree.funs, name, namelen) != QSE_NULL)
{
/* the function is defined previously */
SETERR_ARG_LOC (
awk, QSE_AWK_EFUNRED, name, namelen, xloc);
goto exit_func;
}
if (qse_map_search (
awk->parse.funs, name, namelen) != QSE_NULL)
{
/* is it one of the function calls found so far? */
SETERR_ARG_LOC (
awk, QSE_AWK_EFUNRED, name, namelen, xloc);
goto exit_func;
}
QSE_ASSERT (fnname == 0);
nde->type = QSE_AWK_NDE_NAMEDIDX;
nde->loc = *xloc;
@ -4817,7 +4860,6 @@ static qse_awk_nde_t* parse_hashidx (
/* undefined variable */
SETERR_ARG_LOC (awk, QSE_AWK_EUNDEF, name, namelen, xloc);
exit_func:
qse_awk_clrpt (awk, idx);
QSE_AWK_FREE (awk, nde);
@ -5549,7 +5591,6 @@ retry:
unget_char (awk, &awk->sio.last);
awk->sio.last = lc;
if (get_number (awk, tok) <= -1) return -1;
}
else
{

View File

@ -1,5 +1,5 @@
/*
* $Id: run.c 292 2009-09-23 10:19:30Z hyunghwan.chung $
* $Id: run.c 299 2009-10-19 13:33:40Z hyunghwan.chung $
*
Copyright 2006-2009 Chung, Hyung-Hwan.
This file is part of QSE.
@ -2285,7 +2285,13 @@ static int run_foreach (qse_awk_rtx_t* rtx, qse_awk_nde_foreach_t* nde)
if (rv == QSE_NULL) return -1;
qse_awk_rtx_refupval (rtx, rv);
if (rv->type != QSE_AWK_VAL_MAP)
if (rv->type == QSE_AWK_VAL_NIL)
{
/* just return without excuting the loop body */
qse_awk_rtx_refdownval (rtx, rv);
return 0;
}
else if (rv->type != QSE_AWK_VAL_MAP)
{
qse_awk_rtx_refdownval (rtx, rv);
SETERR_LOC (rtx, QSE_AWK_ENOTMAPIN, &test->right->loc);
@ -3100,7 +3106,7 @@ static qse_awk_val_t* eval_expression (qse_awk_rtx_t* rtx, qse_awk_nde_t* nde)
if (rtx->inrec.d0->type == QSE_AWK_VAL_NIL)
{
/* the record has never been read.
* probably, this functions has been triggered
* probably, this function has been triggered
* by the statements in the BEGIN block */
n = QSE_AWK_ISEMPTYREX(rtx->awk,((qse_awk_val_rex_t*)v)->code)? 1: 0;
}

View File

@ -1,5 +1,5 @@
/*
* $Id: tree.c 287 2009-09-15 10:01:02Z hyunghwan.chung $
* $Id: tree.c 299 2009-10-19 13:33:40Z hyunghwan.chung $
*
Copyright 2006-2009 Chung, Hyung-Hwan.
This file is part of QSE.

View File

@ -1,5 +1,5 @@
/*
* $Id: rex.c 287 2009-09-15 10:01:02Z hyunghwan.chung $
* $Id: rex.c 299 2009-10-19 13:33:40Z hyunghwan.chung $
*
Copyright 2006-2009 Chung, Hyung-Hwan.
This file is part of QSE.
@ -23,7 +23,7 @@
#include "mem.h"
#ifdef DEBUG_REX
#include <qse/bas/sio.h>
#include <qse/cmn/sio.h>
#define DPUTS(x) qse_sio_puts(&qse_sio_err,x)
#endif
@ -475,7 +475,6 @@ int qse_matchrex (
matcher_t matcher;
match_t mat;
qse_size_t offset = 0;
/*const qse_char_t* match_ptr_zero = QSE_NULL;*/
matcher.mmgr = mmgr;
@ -494,7 +493,6 @@ int qse_matchrex (
/* TODO: should it allow an offset here??? */
mat.match_ptr = substr + offset;
/*while (mat.match_ptr < matcher.match.str.end)*/
while (mat.match_ptr <= matcher.match.str.end)
{
if (match_pattern (&matcher, code, &mat) == QSE_NULL)
@ -505,41 +503,18 @@ int qse_matchrex (
if (mat.matched)
{
/*
if (mat.match_len == 0)
{
if (match_ptr_zero == QSE_NULL)
match_ptr_zero = mat.match_ptr;
mat.match_ptr++;
continue;
}
*/
if (match != QSE_NULL)
{
match->ptr = mat.match_ptr;
match->len = mat.match_len;
}
/*match_ptr_zero = QSE_NULL;*/
break;
}
mat.match_ptr++;
}
/*
if (match_ptr_zero != QSE_NULL)
{
if (match != QSE_NULL)
{
match->ptr = match_ptr_zero;
match->len = 0;
}
return 1;
}
*/
return (mat.matched)? 1: 0;
}
@ -1364,19 +1339,11 @@ static const qse_byte_t* match_branch_body0 (
matcher_t* matcher, const qse_byte_t* base, match_t* mat)
{
const qse_byte_t* p;
/* match_t mat2;*/
qse_size_t match_len = 0;
mat->matched = QSE_FALSE;
mat->match_len = 0;
/* TODO: is mat2 necessary here ? */
/*
mat2.match_ptr = mat->match_ptr;
mat2.branch = mat->branch;
mat2.branch_end = mat->branch_end;
*/
p = base;
while (p < mat->branch_end)
@ -1388,21 +1355,6 @@ static const qse_byte_t* match_branch_body0 (
mat->match_ptr = &mat->match_ptr[mat->match_len];
match_len += mat->match_len;
#if 0
p = match_atom (matcher, p, &mat2);
if (p == QSE_NULL) return QSE_NULL;
if (!mat2.matched)
{
mat->matched = QSE_FALSE;
break; /* stop matching */
}
mat->matched = QSE_TRUE;
mat->match_len += mat2.match_len;
mat2.match_ptr = &mat2.match_ptr[mat2.match_len];
#endif
}
if (mat->matched) mat->match_len = match_len;
@ -1494,7 +1446,6 @@ static const qse_byte_t* match_any_char (
* uses the maximum value to mean infinite.
* consider the upper bound of '+' and '*'. */
lbound = (BOUND_MAX-lb >= lbound)? (lbound + lb): BOUND_MAX;
lbound = (BOUND_MAX-lb >= lbound)? (lbound + lb): BOUND_MAX;
ubound = (BOUND_MAX-ub >= ubound)? (ubound + ub): BOUND_MAX;
p += QSE_SIZEOF(*cp);
@ -1782,8 +1733,20 @@ static const qse_byte_t* match_group (
grp_len_capa += 256;
}
grp_len[si+1] = grp_len[si] + mat2.match_len;
if (mat2.match_len == 0)
{
/* TODO: verify this.... */
/* Advance to the next in case the match lengh is zero
* for (1*)*, 1* can match zero-length.
* A zero-length inner match results in zero-length no
* matter how many times the outer group is requested
* to match */
break;
}
mat2.match_ptr += mat2.match_len;
mat2.match_len = 0;
mat2.matched = QSE_FALSE;

View File

@ -0,0 +1,4 @@
BEGIN {
print length 11 # this should print 011 as length is length($0);
print length (11) # this should print 2
}

View File

@ -0,0 +1,5 @@
BEGIN {
/* the following for statement is valid if y is nil */
for (x in y) print x;
}

View File

@ -0,0 +1,4 @@
BEGIN {
abc = 20;
print abc (10); # this is not a function call
}

View File

@ -2019,6 +2019,33 @@ BEGIN {
1 abc
7 abc
13 abc
--------------------------------------------------------------------------------
../../cmd/awk/.libs/qseawk --newline=on -o- -f lang-039.awk </dev/stdin 2>&1
--------------------------------------------------------------------------------
BEGIN {
print (length() 11);
print length(11);
}
011
2
--------------------------------------------------------------------------------
../../cmd/awk/.libs/qseawk --newline=on -o- -f lang-040.awk </dev/stdin 2>&1
--------------------------------------------------------------------------------
BEGIN {
for (x in y)
print x;
}
--------------------------------------------------------------------------------
../../cmd/awk/.libs/qseawk --newline=on -o- -f lang-041.awk </dev/stdin 2>&1
--------------------------------------------------------------------------------
BEGIN {
abc = 20;
print (abc 10);
}
2010
--------------------------------------------------------------------------------
../../cmd/awk/.libs/qseawk -f quicksort.awk quicksort.dat </dev/stdin 2>&1
--------------------------------------------------------------------------------

View File

@ -149,6 +149,9 @@ PROGS="
lang-036.awk/lang-036.dat//--newline=on -o-
lang-037.awk/lang-037.dat//--newline=on -o-
lang-038.awk///--newline=on -o-
lang-039.awk///--newline=on -o-
lang-040.awk///--newline=on -o-
lang-041.awk///--newline=on -o-
quicksort.awk/quicksort.dat//
quicksort2.awk/quicksort2.dat//