touched up code while enhancing index() and match()

This commit is contained in:
hyung-hwan 2009-09-16 08:03:15 +00:00
parent 773f5cec57
commit 63c12720cb
7 changed files with 185 additions and 118 deletions

View File

@ -956,7 +956,7 @@ GENERATE_TREEVIEW = NONE
# By enabling USE_INLINE_TREES, doxygen will generate the Groups, Directories,
# and Class Hierarchy pages using a tree view instead of an ordered list.
USE_INLINE_TREES = YES
USE_INLINE_TREES = NO
# If the treeview is enabled (see GENERATE_TREEVIEW) then this tag can be
# used to set the initial width (in pixels) of the frame in which the tree

View File

@ -1,6 +1,20 @@
/** @page awk AWK
AWK Interpreter
@section awk_intro INTRODUCTION
QSE includes an implementaion of an AWK interpreter that can be embedded
into an application written in C and/or C++. A hosting application can
- add new awk global variables and functions.
- get and set the value of a global variable.
- call an awk function.
- customize I/O handlers for file, pipe, console I/O.
- embed multiple interpreters independent of each other.
- run a single script with different I/O streams independently.
- change language features supported by setting options.
The interpreter implements the language described in the book
The AWK Programming Language (http://cm.bell-labs.com/cm/cs/awkbook/) with
some extensions.
@section awk_ext EXTENSION
Some language extensions are implemented and they can be enabled by setting the
@ -62,11 +76,21 @@ BEGIN {
print "13" || "sort";
print "12" || "sort";
print "11" || "sort";
#close the input as sort emits when the input is closed
# close the input side of the pipe as 'sort' starts emitting result
# once the input is closed.
close ("sort", "r");
while (("sort" || getline x) > 0) print "xx:", x;
}
@endcode
*/
@subsectin awk_ext_fnc EXTENDED FUNCTIONS
index() and match() can accept the third parameter indicating the position
where the search should begin. The negative position enables backward search.
@code
BEGIN {
}
@endcode
*/

View File

@ -1,5 +1,5 @@
/*
* $Id: Awk.hpp 287 2009-09-15 10:01:02Z hyunghwan.chung $
* $Id: Awk.hpp 288 2009-09-15 14:03:15Z hyunghwan.chung $
*
Copyright 2006-2009 Chung, Hyung-Hwan.
This file is part of QSE.
@ -27,9 +27,9 @@
#include <qse/Mmgr.hpp>
#include <stdarg.h>
/** @file
* AWK Interpreter
*/
/// @file
/// AWK Interpreter
///
/////////////////////////////////
QSE_BEGIN_NAMESPACE(QSE)
@ -702,7 +702,7 @@ public:
operator rtx_t* () const;
void stop () const;
bool isStopReq () const;
bool pendingStop () const;
errnum_t getErrorNumber () const;
loc_t getErrorLocation () const;
@ -720,39 +720,43 @@ public:
const loc_t* loc
);
/**
* Sets the value of a global variable identified by @a id
* to @a v.
* @return 0 on success, -1 on failure
*/
///
/// The setGlobal() function sets the value of a global
/// variable identified by @a id
/// to @a v.
/// @return 0 on success, -1 on failure
///
int setGlobal (int id, long_t v);
/**
* Sets the value of a global variable identified by @a id
* to @a v.
* @return 0 on success, -1 on failure
*/
///
/// The setGlobal() function sets the value of a global
/// variable identified by @a id
/// to @a v.
/// @return 0 on success, -1 on failure
///
int setGlobal (int id, real_t v);
/**
* Sets the value of a global variable identified by @a id
* to a string as long as @a len characters pointed to by
* @a ptr.
* @return 0 on success, -1 on failure
*/
///
/// The setGlobal() function sets the value of a global
/// variable identified by @a id
/// to a string as long as @a len characters pointed to by
/// @a ptr.
/// @return 0 on success, -1 on failure
///
int setGlobal (int id, const char_t* ptr, size_t len);
/**
* Sets a global variable identified by @a id to a value @a v.
* @return 0 on success, -1 on failure
*/
///
/// The setGlobal() function sets a global variable
/// identified by @a id to a value @a v.
/// @return 0 on success, -1 on failure
///
int setGlobal (int id, const Value& v);
/**
* Gets the value of a global variable identified by @a id
* and store it in @a v.
* @return 0 on success, -1 on failure
*/
///
/// The getGlobal() function gets the value of a global
/// variable identified by @a id and stores it in @a v.
/// @return 0 on success, -1 on failure
///
int getGlobal (int id, Value& v) const;
protected:
@ -918,7 +922,7 @@ public:
/// The getGlobal() function gets the value of a global variable
/// identified by @a id. The @a id is either a value returned by
/// addGlobal() or one of the #gbl_id_t enumerators. It is not allowed
/// to call this function before Awk::parse().
/// to call this function before parse().
/// @return 0 on success, -1 on failure
///
int getGlobal (
@ -926,9 +930,9 @@ public:
Value& v ///< value store
);
/**
* Defines a intrinsic function handler.
*/
///
/// The FunctionHandler type defines a intrinsic function handler.
///
typedef int (Awk::*FunctionHandler) (
Run& run,
Value& ret,

View File

@ -1,5 +1,5 @@
/*
* $Id: awk.h 287 2009-09-15 10:01:02Z hyunghwan.chung $
* $Id: awk.h 288 2009-09-15 14:03:15Z hyunghwan.chung $
*
Copyright 2006-2009 Chung, Hyung-Hwan.
This file is part of QSE.
@ -101,15 +101,15 @@ typedef struct qse_awk_t qse_awk_t;
* to handle runtime I/O:
* - getline piped in from a command reads from a pipe.
* ("ls -l" | getline line)
* - print and printf piped out to a command writes to a pipe.
* - print and printf piped out to a command write to a pipe.
* (print 2 | "sort")
* - getline redirected in reads from a file.
* (getline line < "file")
* - print and printf redirected out writes to a file.
* - print and printf redirected out write to a file.
* (print num > "file")
* - The pattern-action loop and getline with no redirected input
* reads from a console. (/susie/ { ... })
* - print and printf writes to a console. (print "hello, world")
* read from a console. (/susie/ { ... })
* - print and printf write to a console. (print "hello, world")
*
* @sa qse_awk_t qse_awk_rtx_open qse_awk_rio_t
*/
@ -1479,10 +1479,10 @@ void qse_awk_stopall (
);
/**
* The qse_awk_isstopreq() function tests if qse_awk_rtx_stop() has been
* The qse_awk_pendingstop() function tests if qse_awk_rtx_stop() has been
* called.
*/
qse_bool_t qse_awk_rtx_isstopreq (
qse_bool_t qse_awk_rtx_pendingstop (
qse_awk_rtx_t* rtx /**< runtime context */
);

View File

@ -1,5 +1,5 @@
/*
* $Id: Awk.cpp 287 2009-09-15 10:01:02Z hyunghwan.chung $
* $Id: Awk.cpp 288 2009-09-15 14:03:15Z hyunghwan.chung $
*
Copyright 2006-2009 Chung, Hyung-Hwan.
This file is part of QSE.
@ -903,10 +903,10 @@ void Awk::Run::stop () const
qse_awk_rtx_stop (this->rtx);
}
bool Awk::Run::isStopReq () const
bool Awk::Run::pendingStop () const
{
QSE_ASSERT (this->rtx != QSE_NULL);
return qse_awk_rtx_isstopreq (this->rtx)? true: false;
return qse_awk_rtx_pendingstop (this->rtx)? true: false;
}
Awk::errnum_t Awk::Run::getErrorNumber () const

View File

@ -1,5 +1,5 @@
/*
* $Id: fnc.c 287 2009-09-15 10:01:02Z hyunghwan.chung $
* $Id: fnc.c 288 2009-09-15 14:03:15Z hyunghwan.chung $
*
Copyright 2006-2009 Chung, Hyung-Hwan.
This file is part of QSE.
@ -43,7 +43,7 @@ static qse_awk_fnc_t sys_fnc[] =
{ {QSE_T("fflush"), 6}, 0, QSE_AWK_RIO, {0, 1, QSE_NULL}, fnc_fflush},
/* string functions */
{ {QSE_T("index"), 5}, 0, 0, {2, 2, QSE_NULL}, fnc_index},
{ {QSE_T("index"), 5}, 0, 0, {2, 3, QSE_NULL}, fnc_index},
{ {QSE_T("substr"), 6}, 0, 0, {2, 3, QSE_NULL}, fnc_substr},
{ {QSE_T("length"), 6}, 1, 0, {0, 1, QSE_NULL}, fnc_length},
{ {QSE_T("split"), 5}, 0, 0, {2, 3, QSE_T("vrv")}, fnc_split},
@ -51,7 +51,7 @@ static qse_awk_fnc_t sys_fnc[] =
{ {QSE_T("toupper"), 7}, 0, 0, {1, 1, QSE_NULL}, fnc_toupper},
{ {QSE_T("gsub"), 4}, 0, 0, {2, 3, QSE_T("xvr")}, fnc_gsub},
{ {QSE_T("sub"), 3}, 0, 0, {2, 3, QSE_T("xvr")}, fnc_sub},
{ {QSE_T("match"), 5}, 0, 0, {2, 2, QSE_T("vx")}, fnc_match},
{ {QSE_T("match"), 5}, 0, 0, {2, 3, QSE_T("vxv")}, fnc_match},
{ {QSE_T("sprintf"), 7}, 0, 0, {1, MAX, QSE_NULL}, fnc_sprintf},
{ {QSE_NULL, 0}, 0, 0, {0, 0, QSE_NULL}, QSE_NULL}
@ -438,19 +438,32 @@ static int fnc_fflush (qse_awk_rtx_t* run, const qse_cstr_t* fnm)
return 0;
}
static int fnc_index (qse_awk_rtx_t* run, const qse_cstr_t* fnm)
static int fnc_index (qse_awk_rtx_t* rtx, const qse_cstr_t* fnm)
{
qse_size_t nargs;
qse_awk_val_t* a0, * a1;
qse_char_t* str0, * str1, * ptr;
qse_size_t len0, len1;
qse_long_t idx;
qse_long_t idx, start = 1;
nargs = qse_awk_rtx_getnargs (run);
QSE_ASSERT (nargs == 2);
nargs = qse_awk_rtx_getnargs (rtx);
QSE_ASSERT (nargs >= 2 && nargs <= 3);
a0 = qse_awk_rtx_getarg (run, 0);
a1 = qse_awk_rtx_getarg (run, 1);
a0 = qse_awk_rtx_getarg (rtx, 0);
a1 = qse_awk_rtx_getarg (rtx, 1);
if (nargs >= 3)
{
qse_awk_val_t* a2;
qse_real_t rv;
int n;
a2 = qse_awk_rtx_getarg (rtx, 2);
n = qse_awk_rtx_valtonum (rtx, a2, &start, &rv);
if (n <= -1) return -1;
if (n >= 1) start = (qse_long_t)rv;
if (start < 1) start = 1;
}
if (a0->type == QSE_AWK_VAL_STR)
{
@ -459,7 +472,7 @@ static int fnc_index (qse_awk_rtx_t* run, const qse_cstr_t* fnm)
}
else
{
str0 = qse_awk_rtx_valtocpldup (run, a0, &len0);
str0 = qse_awk_rtx_valtocpldup (rtx, a0, &len0);
if (str0 == QSE_NULL) return -1;
}
@ -470,25 +483,27 @@ static int fnc_index (qse_awk_rtx_t* run, const qse_cstr_t* fnm)
}
else
{
str1 = qse_awk_rtx_valtocpldup (run, a1, &len1);
str1 = qse_awk_rtx_valtocpldup (rtx, a1, &len1);
if (str1 == QSE_NULL)
{
if (a0->type != QSE_AWK_VAL_STR)
QSE_AWK_FREE (run->awk, str0);
QSE_AWK_FREE (rtx->awk, str0);
return -1;
}
}
ptr = qse_strxnstr (str0, len0, str1, len1);
ptr = (start > len0)?
QSE_NULL:
qse_strxnstr (&str0[start-1], len0-start+1, str1, len1);
idx = (ptr == QSE_NULL)? 0: ((qse_long_t)(ptr-str0) + 1);
if (a0->type != QSE_AWK_VAL_STR) QSE_AWK_FREE (run->awk, str0);
if (a1->type != QSE_AWK_VAL_STR) QSE_AWK_FREE (run->awk, str1);
if (a0->type != QSE_AWK_VAL_STR) QSE_AWK_FREE (rtx->awk, str0);
if (a1->type != QSE_AWK_VAL_STR) QSE_AWK_FREE (rtx->awk, str1);
a0 = qse_awk_rtx_makeintval (run, idx);
a0 = qse_awk_rtx_makeintval (rtx, idx);
if (a0 == QSE_NULL) return -1;
qse_awk_rtx_setretval (run, a0);
qse_awk_rtx_setretval (rtx, a0);
return 0;
}
@ -529,7 +544,7 @@ static int fnc_length (qse_awk_rtx_t* rtx, const qse_cstr_t* fnm)
return 0;
}
static int fnc_substr (qse_awk_rtx_t* run, const qse_cstr_t* fnm)
static int fnc_substr (qse_awk_rtx_t* rtx, const qse_cstr_t* fnm)
{
qse_size_t nargs;
qse_awk_val_t* a0, * a1, * a2, * r;
@ -539,12 +554,12 @@ static int fnc_substr (qse_awk_rtx_t* run, const qse_cstr_t* fnm)
qse_real_t rindex, rcount;
int n;
nargs = qse_awk_rtx_getnargs (run);
nargs = qse_awk_rtx_getnargs (rtx);
QSE_ASSERT (nargs >= 2 && nargs <= 3);
a0 = qse_awk_rtx_getarg (run, 0);
a1 = qse_awk_rtx_getarg (run, 1);
a2 = (nargs >= 3)? qse_awk_rtx_getarg (run, 2): QSE_NULL;
a0 = qse_awk_rtx_getarg (rtx, 0);
a1 = qse_awk_rtx_getarg (rtx, 1);
a2 = (nargs >= 3)? qse_awk_rtx_getarg (rtx, 2): QSE_NULL;
if (a0->type == QSE_AWK_VAL_STR)
{
@ -553,14 +568,14 @@ static int fnc_substr (qse_awk_rtx_t* run, const qse_cstr_t* fnm)
}
else
{
str = qse_awk_rtx_valtocpldup (run, a0, &len);
str = qse_awk_rtx_valtocpldup (rtx, a0, &len);
if (str == QSE_NULL) return -1;
}
n = qse_awk_rtx_valtonum (run, a1, &lindex, &rindex);
if (n == -1)
n = qse_awk_rtx_valtonum (rtx, a1, &lindex, &rindex);
if (n <= -1)
{
if (a0->type != QSE_AWK_VAL_STR) QSE_AWK_FREE (run->awk, str);
if (a0->type != QSE_AWK_VAL_STR) QSE_AWK_FREE (rtx->awk, str);
return -1;
}
if (n == 1) lindex = (qse_long_t)rindex;
@ -568,11 +583,11 @@ static int fnc_substr (qse_awk_rtx_t* run, const qse_cstr_t* fnm)
if (a2 == QSE_NULL) lcount = (qse_long_t)len;
else
{
n = qse_awk_rtx_valtonum (run, a2, &lcount, &rcount);
n = qse_awk_rtx_valtonum (rtx, a2, &lcount, &rcount);
if (n == -1)
{
if (a0->type != QSE_AWK_VAL_STR)
QSE_AWK_FREE (run->awk, str);
QSE_AWK_FREE (rtx->awk, str);
return -1;
}
if (n == 1) lcount = (qse_long_t)rcount;
@ -588,15 +603,15 @@ static int fnc_substr (qse_awk_rtx_t* run, const qse_cstr_t* fnm)
lcount = (qse_long_t)len - lindex;
}
r = qse_awk_rtx_makestrval (run, &str[lindex], (qse_size_t)lcount);
r = qse_awk_rtx_makestrval (rtx, &str[lindex], (qse_size_t)lcount);
if (r == QSE_NULL)
{
if (a0->type != QSE_AWK_VAL_STR) QSE_AWK_FREE (run->awk, str);
if (a0->type != QSE_AWK_VAL_STR) QSE_AWK_FREE (rtx->awk, str);
return -1;
}
if (a0->type != QSE_AWK_VAL_STR) QSE_AWK_FREE (run->awk, str);
qse_awk_rtx_setretval (run, r);
if (a0->type != QSE_AWK_VAL_STR) QSE_AWK_FREE (rtx->awk, str);
qse_awk_rtx_setretval (rtx, r);
return 0;
}
@ -1058,12 +1073,15 @@ static int __substitute (qse_awk_rtx_t* run, qse_long_t max_count)
if (a0->type != QSE_AWK_VAL_REX)
{
qse_awk_errnum_t errnum;
rex = QSE_AWK_BUILDREX (
run->awk, a0_ptr, a0_len, &run->errinf.num);
run->awk, a0_ptr, a0_len, &errnum);
if (rex == QSE_NULL)
{
qse_str_fini (&new);
FREE_A_PTRS (run->awk);
qse_awk_rtx_seterrnum (run, errnum, QSE_NULL);
return -1;
}
}
@ -1260,23 +1278,36 @@ static int fnc_sub (qse_awk_rtx_t* run, const qse_cstr_t* fnm)
return __substitute (run, 1);
}
static int fnc_match (qse_awk_rtx_t* run, const qse_cstr_t* fnm)
static int fnc_match (qse_awk_rtx_t* rtx, const qse_cstr_t* fnm)
{
qse_size_t nargs;
qse_awk_val_t* a0, * a1;
qse_char_t* str0, * str1;
qse_size_t len0, len1;
qse_long_t idx;
qse_long_t idx, start = 1;
void* rex;
int n;
qse_cstr_t mat;
qse_awk_errnum_t errnum;
nargs = qse_awk_rtx_getnargs (run);
QSE_ASSERT (nargs == 2);
nargs = qse_awk_rtx_getnargs (rtx);
QSE_ASSERT (nargs >= 2 && nargs <= 3);
a0 = qse_awk_rtx_getarg (rtx, 0);
a1 = qse_awk_rtx_getarg (rtx, 1);
if (nargs >= 3)
{
qse_awk_val_t* a2;
qse_real_t rv;
a2 = qse_awk_rtx_getarg (rtx, 2);
n = qse_awk_rtx_valtonum (rtx, a2, &start, &rv);
if (n <= -1) return -1;
if (n >= 1) start = (qse_long_t)rv;
if (start < 1) start = 1;
}
a0 = qse_awk_rtx_getarg (run, 0);
a1 = qse_awk_rtx_getarg (run, 1);
if (a0->type == QSE_AWK_VAL_STR)
{
@ -1285,7 +1316,7 @@ static int fnc_match (qse_awk_rtx_t* run, const qse_cstr_t* fnm)
}
else
{
str0 = qse_awk_rtx_valtocpldup (run, a0, &len0);
str0 = qse_awk_rtx_valtocpldup (rtx, a0, &len0);
if (str0 == QSE_NULL) return -1;
}
@ -1295,6 +1326,8 @@ static int fnc_match (qse_awk_rtx_t* run, const qse_cstr_t* fnm)
}
else
{
qse_awk_errnum_t errnum;
if (a1->type == QSE_AWK_VAL_STR)
{
str1 = ((qse_awk_val_str_t*)a1)->ptr;
@ -1302,77 +1335,83 @@ static int fnc_match (qse_awk_rtx_t* run, const qse_cstr_t* fnm)
}
else
{
str1 = qse_awk_rtx_valtocpldup (run, a1, &len1);
str1 = qse_awk_rtx_valtocpldup (rtx, a1, &len1);
if (str1 == QSE_NULL)
{
if (a0->type != QSE_AWK_VAL_STR)
QSE_AWK_FREE (run->awk, str0);
QSE_AWK_FREE (rtx->awk, str0);
return -1;
}
}
rex = QSE_AWK_BUILDREX (run->awk, str1, len1, &run->errinf.num);
rex = QSE_AWK_BUILDREX (rtx->awk, str1, len1, &errnum);
if (rex == QSE_NULL)
{
if (a0->type != QSE_AWK_VAL_STR)
QSE_AWK_FREE (run->awk, str0);
QSE_AWK_FREE (rtx->awk, str0);
qse_awk_rtx_seterrnum (rtx, errnum, QSE_NULL);
return -1;
}
if (a1->type != QSE_AWK_VAL_STR) QSE_AWK_FREE (run->awk, str1);
if (a1->type != QSE_AWK_VAL_STR) QSE_AWK_FREE (rtx->awk, str1);
}
if (start > len0) n = 0;
else
{
n = QSE_AWK_MATCHREX (
run->awk, rex,
(run->gbl.ignorecase? QSE_REX_MATCH_IGNORECASE: 0),
str0, len0, str0, len0,
rtx->awk, rex,
(rtx->gbl.ignorecase? QSE_REX_MATCH_IGNORECASE: 0),
str0+start-1, len0-start+1,/*TODO: must use str0,len0?*/
str0+start-1, len0-start+1,
&mat, &errnum
);
}
if (a0->type != QSE_AWK_VAL_STR) QSE_AWK_FREE (run->awk, str0);
if (a1->type != QSE_AWK_VAL_REX) QSE_AWK_FREEREX (run->awk, rex);
if (a0->type != QSE_AWK_VAL_STR) QSE_AWK_FREE (rtx->awk, str0);
if (a1->type != QSE_AWK_VAL_REX) QSE_AWK_FREEREX (rtx->awk, rex);
if (n <= -1)
{
qse_awk_rtx_seterrnum (run, errnum, QSE_NULL);
qse_awk_rtx_seterrnum (rtx, errnum, QSE_NULL);
return -1;
}
idx = (n == 0)? 0: ((qse_long_t)(mat.ptr-str0) + 1);
a0 = qse_awk_rtx_makeintval (run, idx);
a0 = qse_awk_rtx_makeintval (rtx, idx);
if (a0 == QSE_NULL) return -1;
qse_awk_rtx_refupval (run, a0);
qse_awk_rtx_refupval (rtx, a0);
a1 = qse_awk_rtx_makeintval (run,
a1 = qse_awk_rtx_makeintval (rtx,
((n == 0)? (qse_long_t)-1: (qse_long_t)mat.len));
if (a1 == QSE_NULL)
{
qse_awk_rtx_refdownval (run, a0);
qse_awk_rtx_refdownval (rtx, a0);
return -1;
}
qse_awk_rtx_refupval (run, a1);
qse_awk_rtx_refupval (rtx, a1);
if (qse_awk_rtx_setgbl (run, QSE_AWK_GBL_RSTART, a0) == -1)
if (qse_awk_rtx_setgbl (rtx, QSE_AWK_GBL_RSTART, a0) == -1)
{
qse_awk_rtx_refdownval (run, a1);
qse_awk_rtx_refdownval (run, a0);
qse_awk_rtx_refdownval (rtx, a1);
qse_awk_rtx_refdownval (rtx, a0);
return -1;
}
if (qse_awk_rtx_setgbl (run, QSE_AWK_GBL_RLENGTH, a1) == -1)
if (qse_awk_rtx_setgbl (rtx, QSE_AWK_GBL_RLENGTH, a1) == -1)
{
qse_awk_rtx_refdownval (run, a1);
qse_awk_rtx_refdownval (run, a0);
qse_awk_rtx_refdownval (rtx, a1);
qse_awk_rtx_refdownval (rtx, a0);
return -1;
}
qse_awk_rtx_setretval (run, a0);
qse_awk_rtx_setretval (rtx, a0);
qse_awk_rtx_refdownval (run, a1);
qse_awk_rtx_refdownval (run, a0);
qse_awk_rtx_refdownval (rtx, a1);
qse_awk_rtx_refdownval (rtx, a0);
return 0;
}

View File

@ -1,5 +1,5 @@
/*
* $Id: run.c 287 2009-09-15 10:01:02Z hyunghwan.chung $
* $Id: run.c 288 2009-09-15 14:03:15Z hyunghwan.chung $
*
Copyright 2006-2009 Chung, Hyung-Hwan.
This file is part of QSE.
@ -758,7 +758,7 @@ void qse_awk_rtx_stop (qse_awk_rtx_t* rtx)
rtx->exit_level = EXIT_ABORT;
}
qse_bool_t qse_awk_rtx_isstopreq (qse_awk_rtx_t* rtx)
qse_bool_t qse_awk_rtx_pendingstop (qse_awk_rtx_t* rtx)
{
return (rtx->exit_level == EXIT_ABORT || rtx->awk->stopall);
}