interim commit while implementing a regular expression processor

This commit is contained in:
hyung-hwan 2009-11-23 07:58:53 +00:00
parent 1d5be1f982
commit 782fa151de
9 changed files with 304 additions and 165 deletions

View File

@ -1,5 +1,5 @@
/*
* $Id: awk.c 291 2009-09-21 13:28:18Z hyunghwan.chung $
* $Id: awk.c 306 2009-11-22 13:58:53Z hyunghwan.chung $
*
Copyright 2006-2009 Chung, Hyung-Hwan.
This file is part of QSE.
@ -297,11 +297,7 @@ static int fnc_sleep (qse_awk_rtx_t* run, const qse_cstr_t* fnm)
#endif
r = qse_awk_rtx_makeintval (run, n);
if (r == QSE_NULL)
{
qse_awk_rtx_seterrnum (run, QSE_AWK_ENOMEM, QSE_NULL);
return -1;
}
if (r == QSE_NULL) return -1;
qse_awk_rtx_setretval (run, r);
return 0;

20
qse/configure vendored
View File

@ -1,6 +1,6 @@
#! /bin/sh
# Guess values for system-dependent variables and create Makefiles.
# Generated by GNU Autoconf 2.64 for qse 0.5.2.
# Generated by GNU Autoconf 2.64 for qse 0.5.3.
#
# Report bugs to <bacon@abiyo.net>.
#
@ -698,8 +698,8 @@ MAKEFLAGS=
# Identity of this package.
PACKAGE_NAME='qse'
PACKAGE_TARNAME='qse'
PACKAGE_VERSION='0.5.2'
PACKAGE_STRING='qse 0.5.2'
PACKAGE_VERSION='0.5.3'
PACKAGE_STRING='qse 0.5.3'
PACKAGE_BUGREPORT='bacon@abiyo.net'
PACKAGE_URL=''
@ -1457,7 +1457,7 @@ if test "$ac_init_help" = "long"; then
# Omit some internal or obsolete options to make the list less imposing.
# This message is too long to be a string in the A/UX 3.1 sh.
cat <<_ACEOF
\`configure' configures qse 0.5.2 to adapt to many kinds of systems.
\`configure' configures qse 0.5.3 to adapt to many kinds of systems.
Usage: $0 [OPTION]... [VAR=VALUE]...
@ -1527,7 +1527,7 @@ fi
if test -n "$ac_init_help"; then
case $ac_init_help in
short | recursive ) echo "Configuration of qse 0.5.2:";;
short | recursive ) echo "Configuration of qse 0.5.3:";;
esac
cat <<\_ACEOF
@ -1641,7 +1641,7 @@ fi
test -n "$ac_init_help" && exit $ac_status
if $ac_init_version; then
cat <<\_ACEOF
qse configure 0.5.2
qse configure 0.5.3
generated by GNU Autoconf 2.64
Copyright (C) 2009 Free Software Foundation, Inc.
@ -2403,7 +2403,7 @@ cat >config.log <<_ACEOF
This file contains any messages produced by compilers while
running configure, to aid debugging if configure makes a mistake.
It was created by qse $as_me 0.5.2, which was
It was created by qse $as_me 0.5.3, which was
generated by GNU Autoconf 2.64. Invocation command line was
$ $0 $@
@ -3222,7 +3222,7 @@ fi
# Define the identity of the package.
PACKAGE='qse'
VERSION='0.5.2'
VERSION='0.5.3'
cat >>confdefs.h <<_ACEOF
@ -17992,7 +17992,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
# report actual input values of CONFIG_FILES etc. instead of their
# values after options handling.
ac_log="
This file was extended by qse $as_me 0.5.2, which was
This file was extended by qse $as_me 0.5.3, which was
generated by GNU Autoconf 2.64. Invocation command line was
CONFIG_FILES = $CONFIG_FILES
@ -18056,7 +18056,7 @@ Report bugs to <bacon@abiyo.net>."
_ACEOF
cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
ac_cs_version="\\
qse config.status 0.5.2
qse config.status 0.5.3
configured by $0, generated by GNU Autoconf 2.64,
with options \\"`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`\\"

View File

@ -1,5 +1,5 @@
dnl AC_PREREQ(2.59)
AC_INIT([qse],[0.5.2],[bacon@abiyo.net])
AC_INIT([qse],[0.5.3],[bacon@abiyo.net])
AC_CONFIG_HEADER([include/qse/config.h])
AC_CONFIG_AUX_DIR([ac/au])
AC_CONFIG_MACRO_DIR([ac/m4])

View File

@ -1,5 +1,5 @@
/*
* $Id: awk.h 299 2009-10-19 13:33:40Z hyunghwan.chung $
* $Id: awk.h 306 2009-11-22 13:58:53Z hyunghwan.chung $
*
Copyright 2006-2009 Chung, Hyung-Hwan.
This file is part of QSE.
@ -795,7 +795,7 @@ enum qse_awk_errnum_t
QSE_AWK_EREXCOLON, /**< a colon is expected */
QSE_AWK_EREXCRANGE, /**< invalid character range */
QSE_AWK_EREXCCLASS, /**< invalid character class */
QSE_AWK_EREXBRANGE, /**< invalid boundary range */
QSE_AWK_EREXBOUND, /**< invalid boundary range */
QSE_AWK_EREXEND, /**< unexpected end of the pattern */
QSE_AWK_EREXGARBAGE, /**< garbage after the pattern */

View File

@ -1,5 +1,5 @@
/*
* $Id: rex.h 304 2009-11-20 05:12:27Z hyunghwan.chung $
* $Id: rex.h 306 2009-11-22 13:58:53Z hyunghwan.chung $
*
Copyright 2006-2009 Chung, Hyung-Hwan.
This file is part of QSE.
@ -72,6 +72,10 @@ enum qse_rex_option_t
/**< do not support the {n,m} style occurrence specifier */
QSE_REX_NOBOUND = (1 << 0),
QSE_REX_ESQ_HEX = (1 << 1), /* \xhh and \uhhhh */
QSE_REX_ESQ_OCTAL = (1 << 2), /* \000 */
QSE_REX_ESQ_CNTRL = (1 << 3), /* \cX where X is A to Z */
/**< perform case-insensitive match */
QSE_REX_IGNORECASE = (1 << 8)
};
@ -89,7 +93,7 @@ enum qse_rex_errnum_t
QSE_REX_ECOLON, /* a colon is expected */
QSE_REX_ECRANGE, /* invalid character range */
QSE_REX_ECCLASS, /* invalid character class */
QSE_REX_EBRANGE, /* invalid boundary range */
QSE_REX_EBOUND, /* invalid boundary range */
QSE_REX_EEND, /* unexpected end of the pattern */
QSE_REX_EGARBAGE /* garbage after the pattern */
};

View File

@ -1,5 +1,5 @@
/*
* $Id: misc.c 292 2009-09-23 10:19:30Z hyunghwan.chung $
* $Id: misc.c 306 2009-11-22 13:58:53Z hyunghwan.chung $
*
Copyright 2006-2009 Chung, Hyung-Hwan.
This file is part of QSE.
@ -1039,7 +1039,7 @@ qse_char_t* qse_awk_rtx_strxnfld (
(err == QSE_REX_ECOLON)? QSE_AWK_EREXCOLON: \
(err == QSE_REX_ECRANGE)? QSE_AWK_EREXCRANGE: \
(err == QSE_REX_ECCLASS)? QSE_AWK_EREXCCLASS: \
(err == QSE_REX_EBRANGE)? QSE_AWK_EREXBRANGE: \
(err == QSE_REX_EBOUND)? QSE_AWK_EREXBOUND: \
(err == QSE_REX_EEND)? QSE_AWK_EREXEND: \
(err == QSE_REX_EGARBAGE)? QSE_AWK_EREXGARBAGE: \
QSE_AWK_EINTERN)

View File

@ -1,5 +1,5 @@
/*
* $Id: std.c 291 2009-09-21 13:28:18Z hyunghwan.chung $
* $Id: std.c 306 2009-11-22 13:58:53Z hyunghwan.chung $
*
Copyright 2006-2009 Chung, Hyung-Hwan.
This file is part of QSE.
@ -1224,11 +1224,7 @@ static int fnc_math_1 (
r = qse_awk_rtx_makerealval (run, rf(rv));
}
if (r == QSE_NULL)
{
qse_awk_rtx_seterrnum (run, QSE_AWK_ENOMEM, QSE_NULL);
return -1;
}
if (r == QSE_NULL) return -1;
qse_awk_rtx_setretval (run, r);
return 0;
@ -1277,11 +1273,7 @@ static int fnc_math_2 (
r = qse_awk_rtx_makerealval (run, rf(rv0,rv1));
}
if (r == QSE_NULL)
{
qse_awk_rtx_seterrnum (run, QSE_AWK_ENOMEM, QSE_NULL);
return -1;
}
if (r == QSE_NULL) return -1;
qse_awk_rtx_setretval (run, r);
return 0;
@ -1434,11 +1426,7 @@ static int fnc_int (qse_awk_rtx_t* run, const qse_cstr_t* fnm)
if (n == 1) lv = (qse_long_t)rv;
r = qse_awk_rtx_makeintval (run, lv);
if (r == QSE_NULL)
{
qse_awk_rtx_seterrnum (run, QSE_AWK_ENOMEM, QSE_NULL);
return -1;
}
if (r == QSE_NULL) return -1;
qse_awk_rtx_setretval (run, r);
return 0;
@ -1456,11 +1444,7 @@ static int fnc_rand (qse_awk_rtx_t* run, const qse_cstr_t* fnm)
*/
r = qse_awk_rtx_makerealval (
run, (qse_real_t)(rand() % RAND_MAX) / RAND_MAX);
if (r == QSE_NULL)
{
qse_awk_rtx_seterrnum (run, QSE_AWK_ENOMEM, QSE_NULL);
return -1;
}
if (r == QSE_NULL) return -1;
qse_awk_rtx_setretval (run, r);
return 0;
@ -1504,11 +1488,7 @@ static int fnc_srand (qse_awk_rtx_t* run, const qse_cstr_t* fnm)
srand (rxtn->seed);
r = qse_awk_rtx_makeintval (run, prev);
if (r == QSE_NULL)
{
qse_awk_rtx_seterrnum (run, QSE_AWK_ENOMEM, QSE_NULL);
return -1;
}
if (r == QSE_NULL) return -1;
qse_awk_rtx_setretval (run, r);
return 0;

View File

@ -1,5 +1,5 @@
/*
* $Id: rex.c 304 2009-11-20 05:12:27Z hyunghwan.chung $
* $Id: rex.c 306 2009-11-22 13:58:53Z hyunghwan.chung $
*
Copyright 2006-2009 Chung, Hyung-Hwan.
This file is part of QSE.
@ -962,7 +962,7 @@ what if it is not in the raight format? convert it to ordinary characters?? */
if (cmd->lbound > cmd->ubound)
{
/* invalid occurrences range */
builder->errnum = QSE_REX_EBRANGE;
builder->errnum = QSE_REX_EBOUND;
return -1;
}

View File

@ -144,7 +144,7 @@ static void freenode (qse_rex_node_t* node, qse_mmgr_t* mmgr)
{
if (node->id == QSE_REX_NODE_CHARSET)
{
// TODO:
if (node->u.cs != QSE_NULL)
QSE_MMGR_FREE (mmgr, node->u.cs);
}
@ -288,8 +288,13 @@ static qse_rex_node_t* newbranchnode (
#define IS_ESC(com) ((com)->c.escaped)
#define IS_EOF(com) ((com)->c.value == QSE_CHAR_EOF)
static int getc (comp_t* com)
#define getc_noesc(c) getc(c,1)
#define getc_esc(c) getc(c,0)
static int getc (comp_t* com, int noesc)
{
qse_char_t c;
if (com->ptr >= com->end)
{
com->c.value = QSE_CHAR_EOF;
@ -298,11 +303,10 @@ static int getc (comp_t* com)
}
com->c.value = *com->ptr++;
com->c.escaped = QSE_FALSE;
com->c.escaped = 0;
if (noesc || com->c.value != QSE_T('\\')) return 0;
if (com->c.value == QSE_T('\\'))
{
qse_char_t c;
CHECK_END (com);
c = *com->ptr++;
@ -401,7 +405,6 @@ static int getc (comp_t* com)
com->c.value = c;
com->c.escaped = QSE_TRUE;
}
#if 0
com->c = (com->ptr < com->end)? *com->ptr++: QSE_CHAR_EOF;
@ -412,6 +415,162 @@ else qse_printf (QSE_T("getc => %c\n"), com->c);
return 0;
}
#if 0
static int charclass (comp_t* builder, qse_char_t* cc)
{
const struct __char_class_t* ccp = __char_class;
qse_size_t len = builder->ptn.end - builder->ptn.curp;
while (ccp->name != QSE_NULL)
{
if (__begin_with (builder->ptn.curp, len, ccp->name)) break;
ccp++;
}
if (ccp->name == QSE_NULL)
{
/* wrong class name */
#ifdef DEBUG_REX
DPUTS (QSE_T("build_atom_cclass: wrong class name\n"));
#endif
builder->errnum = QSE_REX_ECCLASS;
return -1;
}
builder->ptn.curp += ccp->name_len;
NEXT_CHAR (builder, LEVEL_CHARSET);
if (builder->ptn.curc.type != CT_NORMAL ||
builder->ptn.curc.value != QSE_T(':'))
{
#ifdef DEBUG_REX
DPUTS (QSE_T("build_atom_cclass: a colon(:) expected\n"));
#endif
builder->errnum = QSE_REX_ECOLON;
return -1;
}
NEXT_CHAR (builder, LEVEL_CHARSET);
/* ] happens to be the charset ender ] */
if (builder->ptn.curc.type != CT_SPECIAL ||
builder->ptn.curc.value != QSE_T(']'))
{
#ifdef DEBUG_REX
DPUTS (QSE_T("build_atom_cclass: ] expected\n"));
#endif
builder->errnum = QSE_REX_ERBRACKET;
return -1;
}
NEXT_CHAR (builder, LEVEL_CHARSET);
*cc = (qse_char_t)(ccp - __char_class);
return 1;
}
#endif
static int charset (comp_t* c, qse_rex_node_t* node)
{
qse_size_t zero = 0;
qse_size_t old_size;
qse_size_t pos_csc;
if (c->c.value == QSE_T('^'))
{
//cmd->negate = 1;
//TODO: negate...
if (getc_noesc(c) <= -1) return -1;
}
/* if ] is the first character or the second character following ^,
* it is treated literally */
do
{
qse_char_t c1, c2;
c1 = c->c.value;
if (getc_noesc(c) <= -1) return -1;
c2 = c->c.value;
if (c1 == QSE_T('[') && c2 == QSE_T(':'))
{
/* begins with [: */
if (getc_noesc(c) <= -1) return -1;
//if (charclass (c) <= -1) return -1;
}
else if (c2 == QSE_T('-'))
{
if (getc_noesc(c) <= -1) return -1;
//add c->c.value;
qse_printf (QSE_T("[%c-%c]\n"), c1, c->c.value);
if (getc_noesc(c) <= -1) return -1;
}
else
{
//add c1;
qse_printf (QSE_T("[%c]\n"), c1);
}
}
while (c->c.value != QSE_T(']'));
if (getc_esc(c) <= -1) return -1;
return 0;
}
static int occbound (comp_t* c, qse_rex_node_t* n)
{
qse_size_t bound;
bound = 0;
while (c->c.value >= QSE_T('0') && c->c.value <= QSE_T('9'))
{
bound = bound * 10 + c->c.value - QSE_T('0');
if (getc_noesc(c) <= -1) return -1;
}
n->occ.min = bound;
if (c->c.value == QSE_T(','))
{
if (getc_noesc(c) <= -1) return -1;
if (c->c.value >= QSE_T('0') && c->c.value <= QSE_T('9'))
{
bound = 0;
do
{
bound = bound * 10 + c->c.value - QSE_T('0');
if (getc_noesc(c) <= -1) return -1;
}
while (c->c.value >= QSE_T('0') &&
c->c.value <= QSE_T('9'));
n->occ.max = bound;
}
else n->occ.max = OCC_MAX;
}
else n->occ.max = n->occ.min;
if (n->occ.min > n->occ.min)
{
/* invalid occurrences range */
c->rex->errnum = QSE_REX_EBOUND;
return -1;
}
if (c->c.value != QSE_T('}'))
{
c->rex->errnum = QSE_REX_ERBRACE;
return -1;
}
if (getc_esc(c) <= -1) return -1;
return 0;
}
static qse_rex_node_t* comp0 (comp_t* c, qse_rex_node_t* ge);
static qse_rex_node_t* comp2 (comp_t* c)
@ -434,7 +593,7 @@ static qse_rex_node_t* comp2 (comp_t* c)
ge = newgroupendnode (c, n);
if (ge == QSE_NULL) return QSE_NULL;
if (getc(c) <= -1) return QSE_NULL;
if (getc_esc(c) <= -1) return QSE_NULL;
c->gdepth++;
x = comp0 (c, ge);
@ -442,12 +601,12 @@ static qse_rex_node_t* comp2 (comp_t* c)
if (!IS_SPE(c,QSE_T(')')))
{
c->rex->errnum = QSE_REX_EUNBALPAREN;
c->rex->errnum = QSE_REX_ERPAREN;
return QSE_NULL;
}
c->gdepth--;
if (getc(c) <= -1) return QSE_NULL;
if (getc_esc(c) <= -1) return QSE_NULL;
n->u.g.head = x;
break;
@ -457,26 +616,28 @@ static qse_rex_node_t* comp2 (comp_t* c)
case QSE_T('.'):
n = newnode (c, QSE_REX_NODE_ANYCHAR);
if (n == QSE_NULL) return QSE_NULL;
if (getc(c) <= -1) return QSE_NULL;
if (getc_esc(c) <= -1) return QSE_NULL;
break;
case QSE_T('^'):
n = newnode (c, QSE_REX_NODE_BOL);
if (n == QSE_NULL) return QSE_NULL;
if (getc(c) <= -1) return QSE_NULL;
if (getc_esc(c) <= -1) return QSE_NULL;
break;
case QSE_T('$'):
n = newnode (c, QSE_REX_NODE_EOL);
if (n == QSE_NULL) return QSE_NULL;
if (getc(c) <= -1) return QSE_NULL;
if (getc_esc(c) <= -1) return QSE_NULL;
break;
/*
case QSE_T('['):
n = newnode (c, QSE_REX_NODE_CHARSET);
if (n == QSE_NULL) return QSE_NULL;
if (getc_noesc(c) <= -1) return QSE_NULL;
if (charset(c, n) <= -1) return QSE_NULL;
break;
*/
default:
goto normal_char;
@ -488,7 +649,7 @@ static qse_rex_node_t* comp2 (comp_t* c)
/* normal character */
n = newcharnode (c, c->c.value);
if (n == QSE_NULL) return QSE_NULL;
if (getc(c) <= -1) return QSE_NULL;
if (getc_esc(c) <= -1) return QSE_NULL;
}
n->occ.min = 1;
@ -503,30 +664,28 @@ static qse_rex_node_t* comp2 (comp_t* c)
case QSE_T('?'):
n->occ.min = 0;
n->occ.max = 1;
if (getc(c) <= -1) return QSE_NULL;
if (getc_esc(c) <= -1) return QSE_NULL;
break;
case QSE_T('*'):
n->occ.min = 0;
n->occ.max = OCC_MAX;
if (getc(c) <= -1) return QSE_NULL;
if (getc_esc(c) <= -1) return QSE_NULL;
break;
case QSE_T('+'):
n->occ.min = 1;
n->occ.max = OCC_MAX;
if (getc(c) <= -1) return QSE_NULL;
if (getc_esc(c) <= -1) return QSE_NULL;
break;
/*
case QSE_T('{'):
// TODO:
if (!(com->rex->option & QSE_REX_NOBOUND))
if (!(c->rex->option & QSE_REX_NOBOUND))
{
if (getc_noesc(c) <= -1) return QSE_NULL;
if (occbound(c,n) <= -1) return QSE_NULL;
}
break;
*/
}
}
@ -568,7 +727,7 @@ static qse_rex_node_t* comp0 (comp_t* c, qse_rex_node_t* ge)
while (IS_SPE(c,QSE_T('|')))
{
if (getc(c) <= -1)
if (getc_esc(c) <= -1)
{
//freere (left);
return QSE_NULL;
@ -614,7 +773,7 @@ qse_rex_node_t* qse_rex_comp (
c.start = QSE_NULL;
/* read the first character */
if (getc(&c) <= -1) return QSE_NULL;
if (getc_esc(&c) <= -1) return QSE_NULL;
c.start = newstartnode (&c);
if (c.start == QSE_NULL) return QSE_NULL;