fixed a bug in closing a qse_awk_rtx_t object

- refdown_globals() should have been called after qse_awk_rtx_clrrec()
  as it still access NF.
fixed typo in awk error messages
fixed a memory allocation bug in matching a group (match_group) 
uncommented binary number parsing code in the awk parser.
This commit is contained in:
hyung-hwan 2009-06-23 07:01:28 +00:00
parent 385e1acc26
commit 97a7febc78
13 changed files with 212 additions and 172 deletions

View File

@ -1,5 +1,5 @@
/*
* $Id: awk.c 206 2009-06-21 13:33:05Z hyunghwan.chung $
* $Id: awk.c 207 2009-06-22 13:01:28Z hyunghwan.chung $
*
Copyright 2006-2009 Chung, Hyung-Hwan.
@ -331,20 +331,20 @@ struct opttab_t
{
{ QSE_T("implicit"), QSE_AWK_IMPLICIT, QSE_T("allow undeclared variables") },
{ QSE_T("explicit"), QSE_AWK_EXPLICIT, QSE_T("allow declared variables(local,global)") },
{ QSE_T("bxor"), QSE_AWK_BXOR, QSE_T("enable bit-wise xor operator(^)") },
{ QSE_T("bxor"), QSE_AWK_BXOR, QSE_T("enable bit-wise XOR operator(^)") },
{ QSE_T("shift"), QSE_AWK_SHIFT, QSE_T("enable shift operators(<<,>>)") },
{ QSE_T("idiv"), QSE_AWK_IDIV, QSE_T("enable idiv operator(//)") },
{ QSE_T("rio"), QSE_AWK_RIO, QSE_T("") },
{ QSE_T("rio"), QSE_AWK_RIO, QSE_T("enable builtin I/O including getline & print") },
{ QSE_T("rwpipe"), QSE_AWK_RWPIPE, QSE_T("allow a dual-directional pipe") },
{ QSE_T("newline"), QSE_AWK_NEWLINE, QSE_T("") },
{ QSE_T("stripspaces"), QSE_AWK_STRIPSPACES, QSE_T("") },
{ QSE_T("nextofile"), QSE_AWK_NEXTOFILE, QSE_T("") },
{ QSE_T("crfl"), QSE_AWK_CRLF, QSE_T("") },
{ QSE_T("reset"), QSE_AWK_RESET, QSE_T("") },
{ QSE_T("maptovar"), QSE_AWK_MAPTOVAR, QSE_T("") },
{ QSE_T("pablock"), QSE_AWK_PABLOCK, QSE_T("") },
{ QSE_T("rexbound"), QSE_AWK_REXBOUND, QSE_T("") },
{ QSE_T("ncmponstr"), QSE_AWK_NCMPONSTR, QSE_T("") },
{ QSE_T("newline"), QSE_AWK_NEWLINE, QSE_T("enable a newline to terminate a statement") },
{ QSE_T("stripspaces"), QSE_AWK_STRIPSPACES, QSE_T("strip leading and trailing space of a record") },
{ QSE_T("nextofile"), QSE_AWK_NEXTOFILE, QSE_T("enable 'nextofile'") },
{ QSE_T("reset"), QSE_AWK_RESET, QSE_T("enable 'reset'") },
{ QSE_T("crlf"), QSE_AWK_CRLF, QSE_T("use CRLF for a newline") },
{ QSE_T("maptovar"), QSE_AWK_MAPTOVAR, QSE_T("allow a map to be assigned or returned") },
{ QSE_T("pablock"), QSE_AWK_PABLOCK, QSE_T("enable pattern-action loop") },
{ QSE_T("rexbound"), QSE_AWK_REXBOUND, QSE_T("enable {n,m} in a regular expression") },
{ QSE_T("ncmponstr"), QSE_AWK_NCMPONSTR, QSE_T("perform numeric comparsion on numeric strings") },
{ QSE_NULL, 0 }
};
@ -357,7 +357,7 @@ static void print_usage (QSE_FILE* out, const qse_char_t* argv0)
qse_fprintf (out, QSE_T("Where options are:\n"));
qse_fprintf (out, QSE_T(" -h/--help print this message\n"));
qse_fprintf (out, QSE_T(" -d show extra information\n"));
qse_fprintf (out, QSE_T(" -c/--call name calls a function instead of entering\n"));
qse_fprintf (out, QSE_T(" -c/--call name call a function instead of entering\n"));
qse_fprintf (out, QSE_T(" the pattern-action loop\n"));
qse_fprintf (out, QSE_T(" -f/--file sourcefile set the source script file\n"));
qse_fprintf (out, QSE_T(" -o/--deparsed-file deparsedfile set the deparsing output file\n"));
@ -384,8 +384,8 @@ static int comparg (int argc, qse_char_t* argv[], struct arg_t* arg)
{ QSE_T(":newline"), QSE_T('\0') },
{ QSE_T(":stripspaces"), QSE_T('\0') },
{ QSE_T(":nextofile"), QSE_T('\0') },
{ QSE_T(":crlf"), QSE_T('\0') },
{ QSE_T(":reset"), QSE_T('\0') },
{ QSE_T(":crlf"), QSE_T('\0') },
{ QSE_T(":maptovar"), QSE_T('\0') },
{ QSE_T(":pablock"), QSE_T('\0') },
{ QSE_T(":rexbound"), QSE_T('\0') },
@ -411,7 +411,6 @@ static int comparg (int argc, qse_char_t* argv[], struct arg_t* arg)
qse_size_t isfc = 16; /* the capacity of isf */
qse_size_t isfl = 0; /* number of input source files */
qse_size_t argl = 0;
qse_size_t icfc = 0; /* the capacity of icf */
qse_size_t icfl = 0; /* the number of input console files */
@ -678,8 +677,8 @@ qse_map_walk_t add_global (qse_map_t* map, qse_map_pair_t* pair, void* arg)
static int awk_main (int argc, qse_char_t* argv[])
{
qse_awk_t* awk;
qse_awk_rtx_t* rtx;
qse_awk_t* awk = QSE_NULL;
qse_awk_rtx_t* rtx = QSE_NULL;
qse_awk_rcb_t rcb;
int i;

View File

@ -1,5 +1,5 @@
/*
* $Id: Awk.hpp 206 2009-06-21 13:33:05Z hyunghwan.chung $
* $Id: Awk.hpp 207 2009-06-22 13:01:28Z hyunghwan.chung $
*
Copyright 2006-2009 Chung, Hyung-Hwan.
@ -557,10 +557,10 @@ public:
/** Support the nextofile statement */
OPT_NEXTOFILE = QSE_AWK_NEXTOFILE,
/** Use CR+LF instead of LF for line breaking. */
OPT_CRLF = QSE_AWK_CRLF,
/** Enables the keyword 'reset' */
OPT_RESET = QSE_AWK_RESET,
/** Use CR+LF instead of LF for line breaking. */
OPT_CRLF = QSE_AWK_CRLF,
/** Allows the assignment of a map value to a variable */
OPT_MAPTOVAR = QSE_AWK_MAPTOVAR,
/** Allows BEGIN, END, pattern-action blocks */

View File

@ -1,5 +1,5 @@
/*
* $Id: awk.h 206 2009-06-21 13:33:05Z hyunghwan.chung $
* $Id: awk.h 207 2009-06-22 13:01:28Z hyunghwan.chung $
*
Copyright 2006-2009 Chung, Hyung-Hwan.
@ -388,11 +388,11 @@ enum qse_awk_option_t
/** enables @b nextofile */
QSE_AWK_NEXTOFILE = (1 << 12),
/** CR + LF by default */
QSE_AWK_CRLF = (1 << 13),
/** enables @b reset */
QSE_AWK_RESET = (1 << 14),
QSE_AWK_RESET = (1 << 13),
/** CR + LF by default */
QSE_AWK_CRLF = (1 << 14),
/** allows the assignment of a map value to a variable */
QSE_AWK_MAPTOVAR = (1 << 15),

View File

@ -1,5 +1,5 @@
/*
* $Id: std.h 206 2009-06-21 13:33:05Z hyunghwan.chung $
* $Id: std.h 207 2009-06-22 13:01:28Z hyunghwan.chung $
*
Copyright 2006-2009 Chung, Hyung-Hwan.
@ -33,6 +33,7 @@
* @todo
* - StdAwk ARGV and console name handling
* - add RQ and LQ for more powerful record splitting
* - improve performance in qse_awk_rtx_readio() if RS is logner than 2 chars.
*/
/**

View File

@ -1,5 +1,5 @@
/*
* $Id: err.c 205 2009-06-20 12:47:34Z hyunghwan.chung $
* $Id: err.c 207 2009-06-22 13:01:28Z hyunghwan.chung $
*
Copyright 2006-2009 Chung, Hyung-Hwan.
@ -64,8 +64,8 @@ const qse_char_t* qse_awk_dflerrstr (qse_awk_t* awk, qse_awk_errnum_t errnum)
QSE_T("cannot unget character"),
QSE_T("unexpected end of source"),
QSE_T("a comment not cloawk properly"),
QSE_T("a string or a regular expression not cloawk"),
QSE_T("a comment not closed properly"),
QSE_T("a string or a regular expression not closed"),
QSE_T("unexpected end of a regular expression"),
QSE_T("a left brace expected in place of '${0}'"),
QSE_T("a left parenthesis expected in place of '${0}'"),

View File

@ -1,5 +1,5 @@
/*
* $Id: parse.c 205 2009-06-20 12:47:34Z hyunghwan.chung $
* $Id: parse.c 207 2009-06-22 13:01:28Z hyunghwan.chung $
*
Copyright 2006-2009 Chung, Hyung-Hwan.
@ -2558,6 +2558,7 @@ static qse_awk_nde_t* parse_concat (qse_awk_t* awk, qse_size_t line)
MATCH(awk,TOKEN_MINUS) ||
MATCH(awk,TOKEN_PLUSPLUS) ||
MATCH(awk,TOKEN_MINUSMINUS) ||
MATCH(awk,TOKEN_LNOT) ||
awk->token.type >= TOKEN_GETLINE)
{
/* TODO: is the check above sufficient? */
@ -5041,7 +5042,6 @@ static int get_number (qse_awk_t* awk)
return 0;
}
#if 0
else if (c == QSE_T('b') || c == QSE_T('B'))
{
/* binary number */
@ -5054,7 +5054,6 @@ static int get_number (qse_awk_t* awk)
return 0;
}
#endif
else if (c != '.')
{
/* octal number */

View File

@ -1,5 +1,5 @@
/*
* $Id: rio.c 202 2009-06-16 06:05:40Z hyunghwan.chung $
* $Id: rio.c 207 2009-06-22 13:01:28Z hyunghwan.chung $
*
Copyright 2006-2009 Chung, Hyung-Hwan.
@ -279,13 +279,13 @@ int qse_awk_rtx_readio (
QSE_STR_PTR(buf), QSE_STR_LEN(buf),
QSE_STR_PTR(buf), QSE_STR_LEN(buf),
&match, &run->errinf.num);
if (n == -1)
if (n <= -1)
{
ret = -1;
break;
}
if (n == 1)
if (n >= 1)
{
/* the match should be found at the end of
* the current buffer */
@ -359,6 +359,10 @@ int qse_awk_rtx_readio (
{
qse_cstr_t match;
/* TODO: minimize the number of regular expressoin match here...
* currently matchrex is called for each character added to buf.
* this is a very bad way of doing the job.
*/
QSE_ASSERT (run->gbl.rs != QSE_NULL);
n = QSE_AWK_MATCHREX (
@ -367,14 +371,14 @@ int qse_awk_rtx_readio (
QSE_STR_PTR(buf), QSE_STR_LEN(buf),
QSE_STR_PTR(buf), QSE_STR_LEN(buf),
&match, &run->errinf.num);
if (n == -1)
if (n <= -1)
{
ret = -1;
p->in.pos--; /* unread the character in c */
break;
}
if (n == 1)
if (n >= 1)
{
/* the match should be found at the end of
* the current buffer */

View File

@ -1,5 +1,5 @@
/*
* $Id: run.c 206 2009-06-21 13:33:05Z hyunghwan.chung $
* $Id: run.c 207 2009-06-22 13:01:28Z hyunghwan.chung $
*
Copyright 2006-2009 Chung, Hyung-Hwan.
@ -77,10 +77,10 @@ static qse_size_t push_arg_from_vals (
qse_awk_rtx_t* rtx, qse_awk_nde_call_t* call, void* data);
static int init_rtx (qse_awk_rtx_t* rtx, qse_awk_t* awk, qse_awk_rio_t* rio);
static void fini_rtx (qse_awk_rtx_t* rtx);
static void fini_rtx (qse_awk_rtx_t* rtx, int fini_globals);
static int init_globals (qse_awk_rtx_t* rtx, const qse_cstr_t* arg);
static void fini_globals (qse_awk_rtx_t* rtx);
static void refdown_globals (qse_awk_rtx_t* run, int pop);
static int run_pattern_blocks (qse_awk_rtx_t* run);
static int run_pattern_block_chain (
@ -415,7 +415,7 @@ static int set_global (
qse_real_t rv;
n = qse_awk_rtx_valtonum (run, val, &lv, &rv);
if (n == -1) return -1;
if (n <= -1) return -1;
if (n == 1) lv = (qse_long_t)rv;
if (lv < (qse_long_t)run->inrec.nflds)
@ -682,7 +682,7 @@ qse_awk_rtx_t* qse_awk_rtx_open (
if (init_globals (rtx, arg) == -1)
{
fini_rtx (rtx);
fini_rtx (rtx, 0);
QSE_AWK_FREE (awk, rtx);
return QSE_NULL;
}
@ -692,8 +692,7 @@ qse_awk_rtx_t* qse_awk_rtx_open (
void qse_awk_rtx_close (qse_awk_rtx_t* rtx)
{
fini_globals (rtx);
fini_rtx (rtx);
fini_rtx (rtx, 1);
QSE_AWK_FREE (rtx->awk, rtx);
}
@ -854,7 +853,7 @@ static int init_rtx (qse_awk_rtx_t* rtx, qse_awk_t* awk, qse_awk_rio_t* rio)
return 0;
}
static void fini_rtx (qse_awk_rtx_t* rtx)
static void fini_rtx (qse_awk_rtx_t* rtx, int fini_globals)
{
if (rtx->pattern_range_state != QSE_NULL)
QSE_AWK_FREE (rtx->awk, rtx->pattern_range_state);
@ -922,8 +921,8 @@ static void fini_rtx (qse_awk_rtx_t* rtx)
qse_str_fini (&rtx->format.fmt);
qse_str_fini (&rtx->format.out);
/* destroy input record. qse_awk_rtx_clrrec should be called
* before the rtx stack has been destroyed because it may try
/* destroy input record. qse_awk_rtx_clrrec() should be called
* before the stack has been destroyed because it may try
* to change the value to QSE_AWK_GBL_NF. */
qse_awk_rtx_clrrec (rtx, QSE_FALSE);
if (rtx->inrec.flds != QSE_NULL)
@ -934,6 +933,8 @@ static void fini_rtx (qse_awk_rtx_t* rtx)
}
qse_str_fini (&rtx->inrec.line);
if (fini_globals) refdown_globals (rtx, 1);
/* destroy the stack if necessary */
if (rtx->stack != QSE_NULL)
{
@ -1248,11 +1249,6 @@ oops:
return -1;
}
static void fini_globals (qse_awk_rtx_t* rtx)
{
refdown_globals (rtx, 1);
}
struct capture_retval_data_t
{
qse_awk_rtx_t* rtx;
@ -4028,7 +4024,6 @@ static int __cmp_int_str (
if (rtx->awk->option & QSE_AWK_NCMPONSTR)
{
const qse_char_t* end;
qse_long_t ll;
qse_real_t rr;
@ -4180,9 +4175,6 @@ static int __cmp_str_real (
static int __cmp_str_str (
qse_awk_rtx_t* rtx, qse_awk_val_t* left, qse_awk_val_t* right)
{
int n1, n2;
qse_long_t l1, l2;
qse_real_t r1, r2;
qse_awk_val_str_t* ls, * rs;
ls = (qse_awk_val_str_t*)left;

View File

@ -71,7 +71,7 @@ void qse_assert_failed (
const qse_char_t* expr, const qse_char_t* desc,
const qse_char_t* file, qse_size_t line)
{
qse_sio_puts (QSE_SIO_ERR, QSE_T("=[ASSERTION FAILURE]============================================================"));
qse_sio_puts (QSE_SIO_ERR, QSE_T("=[ASSERTION FAILURE]============================================================\n"));
qse_sio_puts (QSE_SIO_ERR, QSE_T("FILE "));
qse_sio_puts (QSE_SIO_ERR, file);
@ -89,7 +89,7 @@ void qse_assert_failed (
qse_sio_puts (QSE_SIO_ERR, desc);
qse_sio_puts (QSE_SIO_ERR, QSE_T("\n"));
}
qse_sio_puts (QSE_SIO_ERR, QSE_T("================================================================================"));
qse_sio_puts (QSE_SIO_ERR, QSE_T("================================================================================\n"));
qse_sio_flush (QSE_SIO_ERR);
#ifdef _WIN32

View File

@ -1,5 +1,5 @@
/*
* $Id: rex.c 204 2009-06-18 12:08:06Z hyunghwan.chung $
* $Id: rex.c 207 2009-06-22 13:01:28Z hyunghwan.chung $
*
Copyright 2006-2009 Chung, Hyung-Hwan.
@ -39,7 +39,7 @@ enum
LEVEL_RANGE
};
enum
enum
{
CMD_BOL,
CMD_EOL,
@ -65,6 +65,7 @@ enum
};
#define DEF_CODE_CAPA 512
#define BOUND_MIN 0
#define BOUND_MAX (QSE_TYPE_MAX(qse_size_t))
@ -72,7 +73,7 @@ typedef struct builder_t builder_t;
typedef struct matcher_t matcher_t;
typedef struct match_t match_t;
typedef struct code_t code_t;
typedef struct atom_t atom_t;
typedef struct rhdr_t rhdr_t;
typedef struct bhdr_t bhdr_t;
typedef struct cshdr_t cshdr_t;
@ -153,12 +154,11 @@ struct match_t
#include <qse/pack1.h>
QSE_BEGIN_PACKED_STRUCT (code_t)
/*qse_byte_t cmd;*/
short cmd;
short negate; /* only for CMD_CHARSET */
qse_size_t lbound;
qse_size_t ubound;
QSE_BEGIN_PACKED_STRUCT (atom_t)
short cmd; /* CMD_XXX */
short negate; /* only for CMD_CHARSET */
qse_size_t lbound; /* lower bound */
qse_size_t ubound; /* upper bound */
QSE_END_PACKED_STRUCT ()
/* compiled regular expression header */
@ -196,10 +196,11 @@ static int build_pattern (builder_t* rex);
static int build_pattern0 (builder_t* rex);
static int build_branch (builder_t* rex);
static int build_atom (builder_t* rex);
static int build_charset (builder_t* rex, code_t* cmd);
static int build_occurrences (builder_t* rex, code_t* cmd);
static int build_cclass (builder_t* rex, qse_char_t* cc);
static int build_range (builder_t* rex, code_t* cmd);
static int build_atom_charset (builder_t* rex, atom_t* cmd);
static int build_atom_occ (builder_t* rex, atom_t* cmd);
static int build_atom_cclass (builder_t* rex, qse_char_t* cc);
static int build_atom_occ_range (builder_t* rex, atom_t* cmd);
static int next_char (builder_t* rex, int level);
static int add_code (builder_t* rex, void* data, qse_size_t len);
@ -402,7 +403,7 @@ void* qse_buildrex (
builder.code.capa = DEF_CODE_CAPA;
builder.code.size = 0;
builder.code.buf = (qse_byte_t*)
QSE_MALLOC (builder.mmgr, builder.code.capa);
QSE_MMGR_ALLOC (builder.mmgr, builder.code.capa);
if (builder.code.buf == QSE_NULL)
{
*errnum = QSE_REX_ENOMEM;
@ -424,14 +425,14 @@ void* qse_buildrex (
if (next_char (&builder, LEVEL_TOP) == -1)
{
if (errnum != QSE_NULL) *errnum = builder.errnum;
QSE_FREE (builder.mmgr, builder.code.buf);
QSE_MMGR_FREE (builder.mmgr, builder.code.buf);
return QSE_NULL;
}
if (build_pattern (&builder) == -1)
{
if (errnum != QSE_NULL) *errnum = builder.errnum;
QSE_FREE (builder.mmgr, builder.code.buf);
QSE_MMGR_FREE (builder.mmgr, builder.code.buf);
return QSE_NULL;
}
@ -455,7 +456,7 @@ void* qse_buildrex (
}
}
QSE_FREE (builder.mmgr, builder.code.buf);
QSE_MMGR_FREE (builder.mmgr, builder.code.buf);
return QSE_NULL;
}
@ -543,7 +544,7 @@ int qse_matchrex (
void qse_freerex (qse_mmgr_t* mmgr, void* code)
{
QSE_ASSERT (code != QSE_NULL);
QSE_FREE (mmgr, code);
QSE_MMGR_FREE (mmgr, code);
}
qse_bool_t qse_isemptyrex (void* code)
@ -636,7 +637,7 @@ static int build_branch (builder_t* builder)
qse_size_t zero = 0;
qse_size_t old_size;
qse_size_t pos_na;
code_t* cmd;
atom_t* cmd;
bhdr_t* bhdr;
old_size = builder->code.size;
@ -647,7 +648,7 @@ static int build_branch (builder_t* builder)
while (1)
{
cmd = (code_t*)&builder->code.buf[builder->code.size];
cmd = (atom_t*)&builder->code.buf[builder->code.size];
n = build_atom (builder);
if (n == -1)
@ -658,7 +659,7 @@ static int build_branch (builder_t* builder)
if (n == 0) break; /* no atom */
n = build_occurrences (builder, cmd);
n = build_atom_occ (builder, cmd);
if (n == -1)
{
builder->code.size = old_size;
@ -666,7 +667,7 @@ static int build_branch (builder_t* builder)
}
/* n == 0 no bound character. just continue */
/* n == 1 bound has been applied by build_occurrences */
/* n == 1 bound has been applied by build_atom_occ */
bhdr = (bhdr_t*)&builder->code.buf[pos_na];
bhdr->na++;
@ -681,7 +682,7 @@ static int build_branch (builder_t* builder)
static int build_atom (builder_t* builder)
{
int n;
code_t tmp;
atom_t tmp;
if (builder->ptn.curc.type == CT_EOF) return 0;
@ -733,9 +734,9 @@ static int build_atom (builder_t* builder)
}
else if (builder->ptn.curc.value == QSE_T('['))
{
code_t* cmd;
atom_t* cmd;
cmd = (code_t*)&builder->code.buf[builder->code.size];
cmd = (atom_t*)&builder->code.buf[builder->code.size];
tmp.cmd = CMD_CHARSET;
tmp.negate = 0;
@ -745,7 +746,7 @@ static int build_atom (builder_t* builder)
NEXT_CHAR (builder, LEVEL_CHARSET);
n = build_charset (builder, cmd);
n = build_atom_charset (builder, cmd);
if (n == -1) return -1;
QSE_ASSERT (n != 0);
@ -782,7 +783,7 @@ static int build_atom (builder_t* builder)
}
}
static int build_charset (builder_t* builder, code_t* cmd)
static int build_atom_charset (builder_t* builder, atom_t* cmd)
{
qse_size_t zero = 0;
qse_size_t old_size;
@ -814,7 +815,7 @@ static int build_charset (builder_t* builder, code_t* cmd)
builder->ptn.curc.type == CT_NORMAL &&
builder->ptn.curc.value == QSE_T(':'))
{
if (build_cclass (builder, &c1) == -1) return -1;
if (build_atom_cclass (builder, &c1) == -1) return -1;
cc = cc | 1;
}
@ -834,7 +835,7 @@ static int build_charset (builder_t* builder, code_t* cmd)
builder->ptn.curc.type == CT_NORMAL &&
builder->ptn.curc.value == QSE_T(':'))
{
if (build_cclass (builder, &c2) == -1)
if (build_atom_cclass (builder, &c2) == -1)
{
return -1;
}
@ -872,7 +873,7 @@ static int build_charset (builder_t* builder, code_t* cmd)
{
/* invalid range */
#ifdef DEBUG_REX
DPUTS (QSE_T("build_charset: invalid character set range\n"));
DPUTS (QSE_T("build_atom_charset: invalid character set range\n"));
#endif
builder->errnum = QSE_REX_ECRANGE;
return -1;
@ -888,7 +889,7 @@ static int build_charset (builder_t* builder, code_t* cmd)
return 1;
}
static int build_cclass (builder_t* builder, qse_char_t* cc)
static int build_atom_cclass (builder_t* builder, qse_char_t* cc)
{
const struct __char_class_t* ccp = __char_class;
qse_size_t len = builder->ptn.end - builder->ptn.curp;
@ -903,7 +904,7 @@ static int build_cclass (builder_t* builder, qse_char_t* cc)
{
/* wrong class name */
#ifdef DEBUG_REX
DPUTS (QSE_T("build_cclass: wrong class name\n"));
DPUTS (QSE_T("build_atom_cclass: wrong class name\n"));
#endif
builder->errnum = QSE_REX_ECCLASS;
return -1;
@ -916,7 +917,7 @@ static int build_cclass (builder_t* builder, qse_char_t* cc)
builder->ptn.curc.value != QSE_T(':'))
{
#ifdef DEBUG_REX
DPUTS (QSE_T("build_cclass: a colon(:) expected\n"));
DPUTS (QSE_T("build_atom_cclass: a colon(:) expected\n"));
#endif
builder->errnum = QSE_REX_ECOLON;
return -1;
@ -929,7 +930,7 @@ static int build_cclass (builder_t* builder, qse_char_t* cc)
builder->ptn.curc.value != QSE_T(']'))
{
#ifdef DEBUG_REX
DPUTS (QSE_T("build_cclass: ] expected\n"));
DPUTS (QSE_T("build_atom_cclass: ] expected\n"));
#endif
builder->errnum = QSE_REX_ERBRACKET;
return -1;
@ -941,7 +942,7 @@ static int build_cclass (builder_t* builder, qse_char_t* cc)
return 1;
}
static int build_occurrences (builder_t* builder, code_t* cmd)
static int build_atom_occ (builder_t* builder, atom_t* cmd)
{
if (builder->ptn.curc.type != CT_SPECIAL) return 0;
@ -975,7 +976,7 @@ static int build_occurrences (builder_t* builder, code_t* cmd)
{
NEXT_CHAR (builder, LEVEL_RANGE);
if (build_range(builder, cmd) == -1) return -1;
if (build_atom_occ_range(builder, cmd) == -1) return -1;
if (builder->ptn.curc.type != CT_SPECIAL ||
builder->ptn.curc.value != QSE_T('}'))
@ -992,7 +993,7 @@ static int build_occurrences (builder_t* builder, code_t* cmd)
return 0;
}
static int build_range (builder_t* builder, code_t* cmd)
static int build_atom_occ_range (builder_t* builder, atom_t* cmd)
{
qse_size_t bound;
@ -1234,7 +1235,7 @@ static int add_code (builder_t* builder, void* data, qse_size_t len)
if (builder->mmgr->realloc != QSE_NULL)
{
tmp = (qse_byte_t*) QSE_REALLOC (
tmp = (qse_byte_t*) QSE_MMGR_REALLOC (
builder->mmgr, builder->code.buf, capa);
if (tmp == QSE_NULL)
{
@ -1244,7 +1245,7 @@ static int add_code (builder_t* builder, void* data, qse_size_t len)
}
else
{
tmp = (qse_byte_t*) QSE_MALLOC (builder->mmgr, capa);
tmp = (qse_byte_t*) QSE_MMGR_ALLOC (builder->mmgr, capa);
if (tmp == QSE_NULL)
{
builder->errnum = QSE_REX_ENOMEM;
@ -1254,7 +1255,7 @@ static int add_code (builder_t* builder, void* data, qse_size_t len)
if (builder->code.buf != QSE_NULL)
{
QSE_MEMCPY (tmp, builder->code.buf, builder->code.capa);
QSE_FREE (builder->mmgr, builder->code.buf);
QSE_MMGR_FREE (builder->mmgr, builder->code.buf);
}
}
@ -1420,19 +1421,19 @@ static const qse_byte_t* match_atom (
};
QSE_ASSERT (
((code_t*)base)->cmd >= 0 &&
((code_t*)base)->cmd < QSE_COUNTOF(matchers));
((atom_t*)base)->cmd >= 0 &&
((atom_t*)base)->cmd < QSE_COUNTOF(matchers));
return matchers[((code_t*)base)->cmd] (matcher, base, mat);
return matchers[((atom_t*)base)->cmd] (matcher, base, mat);
}
static const qse_byte_t* match_bol (
matcher_t* matcher, const qse_byte_t* base, match_t* mat)
{
const qse_byte_t* p = base;
const code_t* cp;
const atom_t* cp;
cp = (const code_t*)p; p += QSE_SIZEOF(*cp);
cp = (const atom_t*)p; p += QSE_SIZEOF(*cp);
QSE_ASSERT (cp->cmd == CMD_BOL);
/*mat->matched = (mat->match_ptr == matcher->match.str.ptr ||
@ -1448,9 +1449,9 @@ static const qse_byte_t* match_eol (
matcher_t* matcher, const qse_byte_t* base, match_t* mat)
{
const qse_byte_t* p = base;
const code_t* cp;
const atom_t* cp;
cp = (const code_t*)p; p += QSE_SIZEOF(*cp);
cp = (const atom_t*)p; p += QSE_SIZEOF(*cp);
QSE_ASSERT (cp->cmd == CMD_EOL);
/*mat->matched = (mat->match_ptr == matcher->match.str.end ||
@ -1466,10 +1467,10 @@ static const qse_byte_t* match_any_char (
matcher_t* matcher, const qse_byte_t* base, match_t* mat)
{
const qse_byte_t* p = base;
const code_t* cp;
const atom_t* cp;
qse_size_t si = 0, lbound, ubound;
cp = (const code_t*)p; p += QSE_SIZEOF(*cp);
cp = (const atom_t*)p; p += QSE_SIZEOF(*cp);
QSE_ASSERT (cp->cmd == CMD_ANY_CHAR);
lbound = cp->lbound;
@ -1480,10 +1481,10 @@ static const qse_byte_t* match_any_char (
/* merge the same consecutive codes */
while (p < mat->branch_end &&
cp->cmd == ((const code_t*)p)->cmd)
cp->cmd == ((const atom_t*)p)->cmd)
{
lbound += ((const code_t*)p)->lbound;
ubound += ((const code_t*)p)->ubound;
lbound += ((const atom_t*)p)->lbound;
ubound += ((const atom_t*)p)->ubound;
p += QSE_SIZEOF(*cp);
}
@ -1518,11 +1519,11 @@ static const qse_byte_t* match_ord_char (
matcher_t* matcher, const qse_byte_t* base, match_t* mat)
{
const qse_byte_t* p = base;
const code_t* cp;
const atom_t* cp;
qse_size_t si = 0, lbound, ubound;
qse_char_t cc;
cp = (const code_t*)p; p += QSE_SIZEOF(*cp);
cp = (const atom_t*)p; p += QSE_SIZEOF(*cp);
QSE_ASSERT (cp->cmd == CMD_ORD_CHAR);
lbound = cp->lbound;
@ -1536,12 +1537,12 @@ static const qse_byte_t* match_ord_char (
if (matcher->option & QSE_REX_MATCH_IGNORECASE)
{
while (p < mat->branch_end &&
cp->cmd == ((const code_t*)p)->cmd)
cp->cmd == ((const atom_t*)p)->cmd)
{
if (QSE_TOUPPER (*(qse_char_t*)(p+QSE_SIZEOF(*cp))) != cc) break;
lbound += ((const code_t*)p)->lbound;
ubound += ((const code_t*)p)->ubound;
lbound += ((const atom_t*)p)->lbound;
ubound += ((const atom_t*)p)->ubound;
p += QSE_SIZEOF(*cp) + QSE_SIZEOF(cc);
}
@ -1549,12 +1550,12 @@ static const qse_byte_t* match_ord_char (
else
{
while (p < mat->branch_end &&
cp->cmd == ((const code_t*)p)->cmd)
cp->cmd == ((const atom_t*)p)->cmd)
{
if (*(qse_char_t*)(p+QSE_SIZEOF(*cp)) != cc) break;
lbound += ((const code_t*)p)->lbound;
ubound += ((const code_t*)p)->ubound;
lbound += ((const atom_t*)p)->lbound;
ubound += ((const atom_t*)p)->ubound;
p += QSE_SIZEOF(*cp) + QSE_SIZEOF(cc);
}
@ -1621,10 +1622,10 @@ static const qse_byte_t* match_charset (
qse_bool_t n;
qse_char_t c;
code_t* cp;
atom_t* cp;
cshdr_t* cshdr;
cp = (code_t*)p; p += QSE_SIZEOF(*cp);
cp = (atom_t*)p; p += QSE_SIZEOF(*cp);
QSE_ASSERT (cp->cmd == CMD_CHARSET);
cshdr = (cshdr_t*)p; p += QSE_SIZEOF(*cshdr);
@ -1672,11 +1673,13 @@ static const qse_byte_t* match_group (
matcher_t* matcher, const qse_byte_t* base, match_t* mat)
{
const qse_byte_t* p = base;
const code_t* cp;
const atom_t* cp;
match_t mat2;
qse_size_t si = 0, grp_len_static[16], * grp_len;
qse_size_t si = 0, grp_len_static[16], * grp_len, grp_len_capa;
cp = (const atom_t*)p;
p += QSE_SIZEOF(*cp); /* points to a subpattern in a group */
cp = (const code_t*)p; p += QSE_SIZEOF(*cp);
QSE_ASSERT (cp->cmd == CMD_GROUP);
mat->matched = QSE_FALSE;
@ -1708,12 +1711,16 @@ static const qse_byte_t* match_group (
if (cp->ubound < QSE_COUNTOF(grp_len_static))
{
grp_len_capa = QSE_COUNTOF(grp_len_static);
grp_len = grp_len_static;
}
else
{
grp_len = (qse_size_t*) QSE_MALLOC (
matcher->mmgr, QSE_SIZEOF(qse_size_t) * cp->ubound);
grp_len_capa = cp->ubound;
if (grp_len_capa > 256) grp_len_capa = 256;
grp_len = (qse_size_t*) QSE_MMGR_ALLOC (
matcher->mmgr, QSE_SIZEOF(qse_size_t) * grp_len_capa);
if (grp_len == QSE_NULL)
{
matcher->errnum = QSE_REX_ENOMEM;
@ -1736,11 +1743,31 @@ static const qse_byte_t* match_group (
if (match_pattern (matcher, p, &mat2) == QSE_NULL)
{
if (grp_len != grp_len_static)
QSE_FREE (matcher->mmgr, grp_len);
QSE_MMGR_FREE (matcher->mmgr, grp_len);
return QSE_NULL;
}
if (!mat2.matched) break;
if ((si + 1) >= grp_len_capa)
{
qse_size_t* tmp;
QSE_ASSERT (grp_len != grp_len_static);
tmp = (qse_size_t*) QSE_MMGR_REALLOC (
matcher->mmgr, grp_len,
QSE_SIZEOF(qse_size_t) * (grp_len_capa + 256)
);
if (tmp == QSE_NULL)
{
QSE_MMGR_FREE (matcher->mmgr, grp_len);
return QSE_NULL;
}
grp_len = tmp;
grp_len_capa += 256;
}
grp_len[si+1] = grp_len[si] + mat2.match_len;
mat2.match_ptr += mat2.match_len;
@ -1763,6 +1790,14 @@ static const qse_byte_t* match_group (
}
else
{
/* consider the pattern '(abc|def){1,3}(abc)'.
* for the input abcabcabc,
* '(abc|def){1,3}' should match up to the second 'abc'.
* '(abc)' should match the last 'abc'.
*
* backtracking is needed to handle this case.
*/
QSE_ASSERT (cp->ubound > cp->lbound);
do
@ -1778,11 +1813,12 @@ static const qse_byte_t* match_group (
QSE_T("match_group: GROUP si=%d [%s]\n"),
(unsigned)si, mat->match_ptr);
#endif
tmp = match_branch_body (matcher, p, &mat2);
if (tmp == QSE_NULL)
{
if (grp_len != grp_len_static)
QSE_FREE (matcher->mmgr, grp_len);
QSE_MMGR_FREE (matcher->mmgr, grp_len);
return QSE_NULL;
}
@ -1802,7 +1838,7 @@ static const qse_byte_t* match_group (
}
if (grp_len != grp_len_static) QSE_FREE (matcher->mmgr, grp_len);
if (grp_len != grp_len_static) QSE_MMGR_FREE (matcher->mmgr, grp_len);
return p;
}
@ -2002,7 +2038,7 @@ static const qse_byte_t* __print_branch (qse_awk_t* awk, const qse_byte_t* p)
static const qse_byte_t* __print_atom (qse_awk_t* awk, const qse_byte_t* p)
{
const code_t* cp = (const code_t*)p;
const atom_t* cp = (const atom_t*)p;
if (cp->cmd == CMD_BOL)
{

View File

@ -1,5 +1,5 @@
/*
* $Id: err.c 191 2009-06-07 13:09:14Z hyunghwan.chung $
* $Id: err.c 207 2009-06-22 13:01:28Z hyunghwan.chung $
*
Copyright 2006-2009 Chung, Hyung-Hwan.
@ -18,6 +18,45 @@
#include "sed.h"
const qse_char_t* qse_sed_dflerrstr (qse_sed_t* sed, qse_sed_errnum_t errnum)
{
static const qse_char_t* errstr[] =
{
QSE_T("no error"),
QSE_T("insufficient memory"),
QSE_T("command '${0}' not recognized"),
QSE_T("command code missing"),
QSE_T("command '${0}' incomplete"),
QSE_T("regular expression '${0}' incomplete"),
QSE_T("failed to compile regular expression '${0}'"),
QSE_T("failed to match regular expression"),
QSE_T("address 1 prohibited for '${0}'"),
QSE_T("address 2 prohibited for '${0}'"),
QSE_T("address 2 missing or invalid"),
QSE_T("newline expected"),
QSE_T("backslash expected"),
QSE_T("backslash used as delimiter"),
QSE_T("garbage after backslash"),
QSE_T("semicolon expected"),
QSE_T("empty label name"),
QSE_T("duplicate label name '${0}'"),
QSE_T("label '${0}' not found"),
QSE_T("empty file name"),
QSE_T("illegal file name"),
QSE_T("strings in translation set not the same length"),
QSE_T("group brackets not balanced"),
QSE_T("group nesting too deep"),
QSE_T("multiple occurrence specifiers"),
QSE_T("occurrence specifier zero"),
QSE_T("occurrence specifier too large"),
QSE_T("io error with file '${0}'"),
QSE_T("error returned by user io handler")
};
return (errnum >= 0 && errnum < QSE_COUNTOF(errstr))?
errstr[errnum]: QSE_T("unknown error");
}
qse_sed_errstr_t qse_sed_geterrstr (qse_sed_t* sed)
{
return sed->errstr;

View File

@ -1,5 +1,5 @@
/*
* $Id: sed.c 203 2009-06-17 12:43:50Z hyunghwan.chung $
* $Id: sed.c 207 2009-06-22 13:01:28Z hyunghwan.chung $
*
Copyright 2006-2009 Chung, Hyung-Hwan.
@ -39,45 +39,6 @@ do { \
qse_sed_seterror (sed, num, line, &__qse__err__arg__); \
} while (0)
static const qse_char_t* dflerrstr (qse_sed_t* sed, qse_sed_errnum_t errnum)
{
static const qse_char_t* errstr[] =
{
QSE_T("no error"),
QSE_T("insufficient memory"),
QSE_T("command '${0}' not recognized"),
QSE_T("command code missing"),
QSE_T("command '${0}' incomplete"),
QSE_T("regular expression '${0}' incomplete"),
QSE_T("failed to compile regular expression '${0}'"),
QSE_T("failed to match regular expression"),
QSE_T("address 1 prohibited for '${0}'"),
QSE_T("address 2 prohibited for '${0}'"),
QSE_T("address 2 missing or invalid"),
QSE_T("newline expected"),
QSE_T("backslash expected"),
QSE_T("backslash used as delimiter"),
QSE_T("garbage after backslash"),
QSE_T("semicolon expected"),
QSE_T("empty label name"),
QSE_T("duplicate label name '${0}'"),
QSE_T("label '${0}' not found"),
QSE_T("empty file name"),
QSE_T("illegal file name"),
QSE_T("strings in translation set not the same length"),
QSE_T("group brackets not balanced"),
QSE_T("group nesting too deep"),
QSE_T("multiple occurrence specifiers"),
QSE_T("occurrence specifier zero"),
QSE_T("occurrence specifier too large"),
QSE_T("io error with file '${0}'"),
QSE_T("error returned by user io handler")
};
return (errnum >= 0 && errnum < QSE_COUNTOF(errstr))?
errstr[errnum]: QSE_T("unknown error");
}
qse_sed_t* qse_sed_open (qse_mmgr_t* mmgr, qse_size_t xtn)
{
qse_sed_t* sed;
@ -114,7 +75,7 @@ static qse_sed_t* qse_sed_init (qse_sed_t* sed, qse_mmgr_t* mmgr)
{
QSE_MEMSET (sed, 0, QSE_SIZEOF(*sed));
sed->mmgr = mmgr;
sed->errstr = dflerrstr;
sed->errstr = qse_sed_dflerrstr;
if (qse_str_init (&sed->tmp.rex, mmgr, 0) == QSE_NULL)
{

View File

@ -1,5 +1,5 @@
/*
* $Id: sed.h 191 2009-06-07 13:09:14Z hyunghwan.chung $
* $Id: sed.h 207 2009-06-22 13:01:28Z hyunghwan.chung $
*
Copyright 2006-2009 Chung, Hyung-Hwan.
@ -260,4 +260,13 @@ struct qse_sed_t
} e;
};
#ifdef __cplusplus
extern "C" {
#endif
const qse_char_t* qse_sed_dflerrstr (qse_sed_t* sed, qse_sed_errnum_t errnum);
#ifdef __cplusplus
}
#endif
#endif