interim commit while implementing a regular expression processor
This commit is contained in:
parent
1d5be1f982
commit
782fa151de
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* $Id: awk.c 291 2009-09-21 13:28:18Z hyunghwan.chung $
|
||||
* $Id: awk.c 306 2009-11-22 13:58:53Z hyunghwan.chung $
|
||||
*
|
||||
Copyright 2006-2009 Chung, Hyung-Hwan.
|
||||
This file is part of QSE.
|
||||
@ -297,11 +297,7 @@ static int fnc_sleep (qse_awk_rtx_t* run, const qse_cstr_t* fnm)
|
||||
#endif
|
||||
|
||||
r = qse_awk_rtx_makeintval (run, n);
|
||||
if (r == QSE_NULL)
|
||||
{
|
||||
qse_awk_rtx_seterrnum (run, QSE_AWK_ENOMEM, QSE_NULL);
|
||||
return -1;
|
||||
}
|
||||
if (r == QSE_NULL) return -1;
|
||||
|
||||
qse_awk_rtx_setretval (run, r);
|
||||
return 0;
|
||||
|
20
qse/configure
vendored
20
qse/configure
vendored
@ -1,6 +1,6 @@
|
||||
#! /bin/sh
|
||||
# Guess values for system-dependent variables and create Makefiles.
|
||||
# Generated by GNU Autoconf 2.64 for qse 0.5.2.
|
||||
# Generated by GNU Autoconf 2.64 for qse 0.5.3.
|
||||
#
|
||||
# Report bugs to <bacon@abiyo.net>.
|
||||
#
|
||||
@ -698,8 +698,8 @@ MAKEFLAGS=
|
||||
# Identity of this package.
|
||||
PACKAGE_NAME='qse'
|
||||
PACKAGE_TARNAME='qse'
|
||||
PACKAGE_VERSION='0.5.2'
|
||||
PACKAGE_STRING='qse 0.5.2'
|
||||
PACKAGE_VERSION='0.5.3'
|
||||
PACKAGE_STRING='qse 0.5.3'
|
||||
PACKAGE_BUGREPORT='bacon@abiyo.net'
|
||||
PACKAGE_URL=''
|
||||
|
||||
@ -1457,7 +1457,7 @@ if test "$ac_init_help" = "long"; then
|
||||
# Omit some internal or obsolete options to make the list less imposing.
|
||||
# This message is too long to be a string in the A/UX 3.1 sh.
|
||||
cat <<_ACEOF
|
||||
\`configure' configures qse 0.5.2 to adapt to many kinds of systems.
|
||||
\`configure' configures qse 0.5.3 to adapt to many kinds of systems.
|
||||
|
||||
Usage: $0 [OPTION]... [VAR=VALUE]...
|
||||
|
||||
@ -1527,7 +1527,7 @@ fi
|
||||
|
||||
if test -n "$ac_init_help"; then
|
||||
case $ac_init_help in
|
||||
short | recursive ) echo "Configuration of qse 0.5.2:";;
|
||||
short | recursive ) echo "Configuration of qse 0.5.3:";;
|
||||
esac
|
||||
cat <<\_ACEOF
|
||||
|
||||
@ -1641,7 +1641,7 @@ fi
|
||||
test -n "$ac_init_help" && exit $ac_status
|
||||
if $ac_init_version; then
|
||||
cat <<\_ACEOF
|
||||
qse configure 0.5.2
|
||||
qse configure 0.5.3
|
||||
generated by GNU Autoconf 2.64
|
||||
|
||||
Copyright (C) 2009 Free Software Foundation, Inc.
|
||||
@ -2403,7 +2403,7 @@ cat >config.log <<_ACEOF
|
||||
This file contains any messages produced by compilers while
|
||||
running configure, to aid debugging if configure makes a mistake.
|
||||
|
||||
It was created by qse $as_me 0.5.2, which was
|
||||
It was created by qse $as_me 0.5.3, which was
|
||||
generated by GNU Autoconf 2.64. Invocation command line was
|
||||
|
||||
$ $0 $@
|
||||
@ -3222,7 +3222,7 @@ fi
|
||||
|
||||
# Define the identity of the package.
|
||||
PACKAGE='qse'
|
||||
VERSION='0.5.2'
|
||||
VERSION='0.5.3'
|
||||
|
||||
|
||||
cat >>confdefs.h <<_ACEOF
|
||||
@ -17992,7 +17992,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
|
||||
# report actual input values of CONFIG_FILES etc. instead of their
|
||||
# values after options handling.
|
||||
ac_log="
|
||||
This file was extended by qse $as_me 0.5.2, which was
|
||||
This file was extended by qse $as_me 0.5.3, which was
|
||||
generated by GNU Autoconf 2.64. Invocation command line was
|
||||
|
||||
CONFIG_FILES = $CONFIG_FILES
|
||||
@ -18056,7 +18056,7 @@ Report bugs to <bacon@abiyo.net>."
|
||||
_ACEOF
|
||||
cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
|
||||
ac_cs_version="\\
|
||||
qse config.status 0.5.2
|
||||
qse config.status 0.5.3
|
||||
configured by $0, generated by GNU Autoconf 2.64,
|
||||
with options \\"`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`\\"
|
||||
|
||||
|
@ -1,5 +1,5 @@
|
||||
dnl AC_PREREQ(2.59)
|
||||
AC_INIT([qse],[0.5.2],[bacon@abiyo.net])
|
||||
AC_INIT([qse],[0.5.3],[bacon@abiyo.net])
|
||||
AC_CONFIG_HEADER([include/qse/config.h])
|
||||
AC_CONFIG_AUX_DIR([ac/au])
|
||||
AC_CONFIG_MACRO_DIR([ac/m4])
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* $Id: awk.h 299 2009-10-19 13:33:40Z hyunghwan.chung $
|
||||
* $Id: awk.h 306 2009-11-22 13:58:53Z hyunghwan.chung $
|
||||
*
|
||||
Copyright 2006-2009 Chung, Hyung-Hwan.
|
||||
This file is part of QSE.
|
||||
@ -795,7 +795,7 @@ enum qse_awk_errnum_t
|
||||
QSE_AWK_EREXCOLON, /**< a colon is expected */
|
||||
QSE_AWK_EREXCRANGE, /**< invalid character range */
|
||||
QSE_AWK_EREXCCLASS, /**< invalid character class */
|
||||
QSE_AWK_EREXBRANGE, /**< invalid boundary range */
|
||||
QSE_AWK_EREXBOUND, /**< invalid boundary range */
|
||||
QSE_AWK_EREXEND, /**< unexpected end of the pattern */
|
||||
QSE_AWK_EREXGARBAGE, /**< garbage after the pattern */
|
||||
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* $Id: rex.h 304 2009-11-20 05:12:27Z hyunghwan.chung $
|
||||
* $Id: rex.h 306 2009-11-22 13:58:53Z hyunghwan.chung $
|
||||
*
|
||||
Copyright 2006-2009 Chung, Hyung-Hwan.
|
||||
This file is part of QSE.
|
||||
@ -72,6 +72,10 @@ enum qse_rex_option_t
|
||||
/**< do not support the {n,m} style occurrence specifier */
|
||||
QSE_REX_NOBOUND = (1 << 0),
|
||||
|
||||
QSE_REX_ESQ_HEX = (1 << 1), /* \xhh and \uhhhh */
|
||||
QSE_REX_ESQ_OCTAL = (1 << 2), /* \000 */
|
||||
QSE_REX_ESQ_CNTRL = (1 << 3), /* \cX where X is A to Z */
|
||||
|
||||
/**< perform case-insensitive match */
|
||||
QSE_REX_IGNORECASE = (1 << 8)
|
||||
};
|
||||
@ -89,7 +93,7 @@ enum qse_rex_errnum_t
|
||||
QSE_REX_ECOLON, /* a colon is expected */
|
||||
QSE_REX_ECRANGE, /* invalid character range */
|
||||
QSE_REX_ECCLASS, /* invalid character class */
|
||||
QSE_REX_EBRANGE, /* invalid boundary range */
|
||||
QSE_REX_EBOUND, /* invalid boundary range */
|
||||
QSE_REX_EEND, /* unexpected end of the pattern */
|
||||
QSE_REX_EGARBAGE /* garbage after the pattern */
|
||||
};
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* $Id: misc.c 292 2009-09-23 10:19:30Z hyunghwan.chung $
|
||||
* $Id: misc.c 306 2009-11-22 13:58:53Z hyunghwan.chung $
|
||||
*
|
||||
Copyright 2006-2009 Chung, Hyung-Hwan.
|
||||
This file is part of QSE.
|
||||
@ -1039,7 +1039,7 @@ qse_char_t* qse_awk_rtx_strxnfld (
|
||||
(err == QSE_REX_ECOLON)? QSE_AWK_EREXCOLON: \
|
||||
(err == QSE_REX_ECRANGE)? QSE_AWK_EREXCRANGE: \
|
||||
(err == QSE_REX_ECCLASS)? QSE_AWK_EREXCCLASS: \
|
||||
(err == QSE_REX_EBRANGE)? QSE_AWK_EREXBRANGE: \
|
||||
(err == QSE_REX_EBOUND)? QSE_AWK_EREXBOUND: \
|
||||
(err == QSE_REX_EEND)? QSE_AWK_EREXEND: \
|
||||
(err == QSE_REX_EGARBAGE)? QSE_AWK_EREXGARBAGE: \
|
||||
QSE_AWK_EINTERN)
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* $Id: std.c 291 2009-09-21 13:28:18Z hyunghwan.chung $
|
||||
* $Id: std.c 306 2009-11-22 13:58:53Z hyunghwan.chung $
|
||||
*
|
||||
Copyright 2006-2009 Chung, Hyung-Hwan.
|
||||
This file is part of QSE.
|
||||
@ -1224,11 +1224,7 @@ static int fnc_math_1 (
|
||||
r = qse_awk_rtx_makerealval (run, rf(rv));
|
||||
}
|
||||
|
||||
if (r == QSE_NULL)
|
||||
{
|
||||
qse_awk_rtx_seterrnum (run, QSE_AWK_ENOMEM, QSE_NULL);
|
||||
return -1;
|
||||
}
|
||||
if (r == QSE_NULL) return -1;
|
||||
|
||||
qse_awk_rtx_setretval (run, r);
|
||||
return 0;
|
||||
@ -1277,11 +1273,7 @@ static int fnc_math_2 (
|
||||
r = qse_awk_rtx_makerealval (run, rf(rv0,rv1));
|
||||
}
|
||||
|
||||
if (r == QSE_NULL)
|
||||
{
|
||||
qse_awk_rtx_seterrnum (run, QSE_AWK_ENOMEM, QSE_NULL);
|
||||
return -1;
|
||||
}
|
||||
if (r == QSE_NULL) return -1;
|
||||
|
||||
qse_awk_rtx_setretval (run, r);
|
||||
return 0;
|
||||
@ -1434,11 +1426,7 @@ static int fnc_int (qse_awk_rtx_t* run, const qse_cstr_t* fnm)
|
||||
if (n == 1) lv = (qse_long_t)rv;
|
||||
|
||||
r = qse_awk_rtx_makeintval (run, lv);
|
||||
if (r == QSE_NULL)
|
||||
{
|
||||
qse_awk_rtx_seterrnum (run, QSE_AWK_ENOMEM, QSE_NULL);
|
||||
return -1;
|
||||
}
|
||||
if (r == QSE_NULL) return -1;
|
||||
|
||||
qse_awk_rtx_setretval (run, r);
|
||||
return 0;
|
||||
@ -1456,11 +1444,7 @@ static int fnc_rand (qse_awk_rtx_t* run, const qse_cstr_t* fnm)
|
||||
*/
|
||||
r = qse_awk_rtx_makerealval (
|
||||
run, (qse_real_t)(rand() % RAND_MAX) / RAND_MAX);
|
||||
if (r == QSE_NULL)
|
||||
{
|
||||
qse_awk_rtx_seterrnum (run, QSE_AWK_ENOMEM, QSE_NULL);
|
||||
return -1;
|
||||
}
|
||||
if (r == QSE_NULL) return -1;
|
||||
|
||||
qse_awk_rtx_setretval (run, r);
|
||||
return 0;
|
||||
@ -1504,11 +1488,7 @@ static int fnc_srand (qse_awk_rtx_t* run, const qse_cstr_t* fnm)
|
||||
srand (rxtn->seed);
|
||||
|
||||
r = qse_awk_rtx_makeintval (run, prev);
|
||||
if (r == QSE_NULL)
|
||||
{
|
||||
qse_awk_rtx_seterrnum (run, QSE_AWK_ENOMEM, QSE_NULL);
|
||||
return -1;
|
||||
}
|
||||
if (r == QSE_NULL) return -1;
|
||||
|
||||
qse_awk_rtx_setretval (run, r);
|
||||
return 0;
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* $Id: rex.c 304 2009-11-20 05:12:27Z hyunghwan.chung $
|
||||
* $Id: rex.c 306 2009-11-22 13:58:53Z hyunghwan.chung $
|
||||
*
|
||||
Copyright 2006-2009 Chung, Hyung-Hwan.
|
||||
This file is part of QSE.
|
||||
@ -962,7 +962,7 @@ what if it is not in the raight format? convert it to ordinary characters?? */
|
||||
if (cmd->lbound > cmd->ubound)
|
||||
{
|
||||
/* invalid occurrences range */
|
||||
builder->errnum = QSE_REX_EBRANGE;
|
||||
builder->errnum = QSE_REX_EBOUND;
|
||||
return -1;
|
||||
}
|
||||
|
||||
|
@ -144,8 +144,8 @@ static void freenode (qse_rex_node_t* node, qse_mmgr_t* mmgr)
|
||||
{
|
||||
if (node->id == QSE_REX_NODE_CHARSET)
|
||||
{
|
||||
// TODO:
|
||||
QSE_MMGR_FREE (mmgr, node->u.cs);
|
||||
if (node->u.cs != QSE_NULL)
|
||||
QSE_MMGR_FREE (mmgr, node->u.cs);
|
||||
}
|
||||
|
||||
QSE_MMGR_FREE (mmgr, node);
|
||||
@ -288,8 +288,13 @@ static qse_rex_node_t* newbranchnode (
|
||||
#define IS_ESC(com) ((com)->c.escaped)
|
||||
#define IS_EOF(com) ((com)->c.value == QSE_CHAR_EOF)
|
||||
|
||||
static int getc (comp_t* com)
|
||||
#define getc_noesc(c) getc(c,1)
|
||||
#define getc_esc(c) getc(c,0)
|
||||
|
||||
static int getc (comp_t* com, int noesc)
|
||||
{
|
||||
qse_char_t c;
|
||||
|
||||
if (com->ptr >= com->end)
|
||||
{
|
||||
com->c.value = QSE_CHAR_EOF;
|
||||
@ -298,110 +303,108 @@ static int getc (comp_t* com)
|
||||
}
|
||||
|
||||
com->c.value = *com->ptr++;
|
||||
com->c.escaped = QSE_FALSE;
|
||||
com->c.escaped = 0;
|
||||
|
||||
if (com->c.value == QSE_T('\\'))
|
||||
{
|
||||
qse_char_t c;
|
||||
if (noesc || com->c.value != QSE_T('\\')) return 0;
|
||||
|
||||
|
||||
CHECK_END (com);
|
||||
c = *com->ptr++;
|
||||
|
||||
if (c == QSE_T('n')) c = QSE_T('\n');
|
||||
else if (c == QSE_T('r')) c = QSE_T('\r');
|
||||
else if (c == QSE_T('t')) c = QSE_T('\t');
|
||||
else if (c == QSE_T('f')) c = QSE_T('\f');
|
||||
else if (c == QSE_T('b')) c = QSE_T('\b');
|
||||
else if (c == QSE_T('v')) c = QSE_T('\v');
|
||||
else if (c == QSE_T('a')) c = QSE_T('\a');
|
||||
else if (c >= QSE_T('0') && c <= QSE_T('7'))
|
||||
{
|
||||
qse_char_t cx;
|
||||
|
||||
c = c - QSE_T('0');
|
||||
|
||||
CHECK_END (com);
|
||||
c = *com->ptr++;
|
||||
|
||||
if (c == QSE_T('n')) c = QSE_T('\n');
|
||||
else if (c == QSE_T('r')) c = QSE_T('\r');
|
||||
else if (c == QSE_T('t')) c = QSE_T('\t');
|
||||
else if (c == QSE_T('f')) c = QSE_T('\f');
|
||||
else if (c == QSE_T('b')) c = QSE_T('\b');
|
||||
else if (c == QSE_T('v')) c = QSE_T('\v');
|
||||
else if (c == QSE_T('a')) c = QSE_T('\a');
|
||||
else if (c >= QSE_T('0') && c <= QSE_T('7'))
|
||||
cx = *com->ptr++;
|
||||
if (cx >= QSE_T('0') && cx <= QSE_T('7'))
|
||||
{
|
||||
qse_char_t cx;
|
||||
|
||||
c = c - QSE_T('0');
|
||||
c = c * 8 + cx - QSE_T('0');
|
||||
|
||||
CHECK_END (com);
|
||||
cx = *com->ptr++;
|
||||
if (cx >= QSE_T('0') && cx <= QSE_T('7'))
|
||||
{
|
||||
c = c * 8 + cx - QSE_T('0');
|
||||
|
||||
CHECK_END (com);
|
||||
cx = *com->ptr++;
|
||||
if (cx >= QSE_T('0') && cx <= QSE_T('7'))
|
||||
{
|
||||
c = c * 8 + cx - QSE_T('0');
|
||||
}
|
||||
}
|
||||
}
|
||||
else if (c == QSE_T('x'))
|
||||
{
|
||||
qse_char_t cx;
|
||||
|
||||
CHECK_END (com);
|
||||
cx = *com->ptr++;
|
||||
if (IS_HEX(cx))
|
||||
{
|
||||
c = HEX_TO_NUM(cx);
|
||||
|
||||
CHECK_END (com);
|
||||
cx = *com->ptr++;
|
||||
if (IS_HEX(cx))
|
||||
{
|
||||
c = c * 16 + HEX_TO_NUM(cx);
|
||||
}
|
||||
}
|
||||
}
|
||||
#ifdef QSE_CHAR_IS_WCHAR
|
||||
else if (c == QSE_T('u') && QSE_SIZEOF(qse_char_t) >= 2)
|
||||
{
|
||||
qse_char_t cx;
|
||||
|
||||
CHECK_END (com);
|
||||
cx = *com->ptr++;
|
||||
if (IS_HEX(cx))
|
||||
{
|
||||
qse_size_t i;
|
||||
|
||||
c = HEX_TO_NUM(cx);
|
||||
|
||||
for (i = 0; i < 3; i++)
|
||||
{
|
||||
CHECK_END (com);
|
||||
cx = *com->ptr++;
|
||||
|
||||
if (!IS_HEX(cx)) break;
|
||||
c = c * 16 + HEX_TO_NUM(cx);
|
||||
}
|
||||
}
|
||||
}
|
||||
else if (c == QSE_T('U') && QSE_SIZEOF(qse_char_t) >= 4)
|
||||
{
|
||||
qse_char_t cx;
|
||||
|
||||
CHECK_END (com);
|
||||
cx = *com->ptr++;
|
||||
if (IS_HEX(cx))
|
||||
{
|
||||
qse_size_t i;
|
||||
|
||||
c = HEX_TO_NUM(cx);
|
||||
|
||||
for (i = 0; i < 7; i++)
|
||||
{
|
||||
CHECK_END (com);
|
||||
cx = *com->ptr++;
|
||||
|
||||
if (!IS_HEX(cx)) break;
|
||||
c = c * 16 + HEX_TO_NUM(cx);
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
com->c.value = c;
|
||||
com->c.escaped = QSE_TRUE;
|
||||
}
|
||||
else if (c == QSE_T('x'))
|
||||
{
|
||||
qse_char_t cx;
|
||||
|
||||
CHECK_END (com);
|
||||
cx = *com->ptr++;
|
||||
if (IS_HEX(cx))
|
||||
{
|
||||
c = HEX_TO_NUM(cx);
|
||||
|
||||
CHECK_END (com);
|
||||
cx = *com->ptr++;
|
||||
if (IS_HEX(cx))
|
||||
{
|
||||
c = c * 16 + HEX_TO_NUM(cx);
|
||||
}
|
||||
}
|
||||
}
|
||||
#ifdef QSE_CHAR_IS_WCHAR
|
||||
else if (c == QSE_T('u') && QSE_SIZEOF(qse_char_t) >= 2)
|
||||
{
|
||||
qse_char_t cx;
|
||||
|
||||
CHECK_END (com);
|
||||
cx = *com->ptr++;
|
||||
if (IS_HEX(cx))
|
||||
{
|
||||
qse_size_t i;
|
||||
|
||||
c = HEX_TO_NUM(cx);
|
||||
|
||||
for (i = 0; i < 3; i++)
|
||||
{
|
||||
CHECK_END (com);
|
||||
cx = *com->ptr++;
|
||||
|
||||
if (!IS_HEX(cx)) break;
|
||||
c = c * 16 + HEX_TO_NUM(cx);
|
||||
}
|
||||
}
|
||||
}
|
||||
else if (c == QSE_T('U') && QSE_SIZEOF(qse_char_t) >= 4)
|
||||
{
|
||||
qse_char_t cx;
|
||||
|
||||
CHECK_END (com);
|
||||
cx = *com->ptr++;
|
||||
if (IS_HEX(cx))
|
||||
{
|
||||
qse_size_t i;
|
||||
|
||||
c = HEX_TO_NUM(cx);
|
||||
|
||||
for (i = 0; i < 7; i++)
|
||||
{
|
||||
CHECK_END (com);
|
||||
cx = *com->ptr++;
|
||||
|
||||
if (!IS_HEX(cx)) break;
|
||||
c = c * 16 + HEX_TO_NUM(cx);
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
com->c.value = c;
|
||||
com->c.escaped = QSE_TRUE;
|
||||
|
||||
#if 0
|
||||
com->c = (com->ptr < com->end)? *com->ptr++: QSE_CHAR_EOF;
|
||||
@ -412,6 +415,162 @@ else qse_printf (QSE_T("getc => %c\n"), com->c);
|
||||
return 0;
|
||||
}
|
||||
|
||||
#if 0
|
||||
static int charclass (comp_t* builder, qse_char_t* cc)
|
||||
{
|
||||
const struct __char_class_t* ccp = __char_class;
|
||||
qse_size_t len = builder->ptn.end - builder->ptn.curp;
|
||||
|
||||
while (ccp->name != QSE_NULL)
|
||||
{
|
||||
if (__begin_with (builder->ptn.curp, len, ccp->name)) break;
|
||||
ccp++;
|
||||
}
|
||||
|
||||
if (ccp->name == QSE_NULL)
|
||||
{
|
||||
/* wrong class name */
|
||||
#ifdef DEBUG_REX
|
||||
DPUTS (QSE_T("build_atom_cclass: wrong class name\n"));
|
||||
#endif
|
||||
builder->errnum = QSE_REX_ECCLASS;
|
||||
return -1;
|
||||
}
|
||||
|
||||
builder->ptn.curp += ccp->name_len;
|
||||
|
||||
NEXT_CHAR (builder, LEVEL_CHARSET);
|
||||
if (builder->ptn.curc.type != CT_NORMAL ||
|
||||
builder->ptn.curc.value != QSE_T(':'))
|
||||
{
|
||||
#ifdef DEBUG_REX
|
||||
DPUTS (QSE_T("build_atom_cclass: a colon(:) expected\n"));
|
||||
#endif
|
||||
builder->errnum = QSE_REX_ECOLON;
|
||||
return -1;
|
||||
}
|
||||
|
||||
NEXT_CHAR (builder, LEVEL_CHARSET);
|
||||
|
||||
/* ] happens to be the charset ender ] */
|
||||
if (builder->ptn.curc.type != CT_SPECIAL ||
|
||||
builder->ptn.curc.value != QSE_T(']'))
|
||||
{
|
||||
#ifdef DEBUG_REX
|
||||
DPUTS (QSE_T("build_atom_cclass: ] expected\n"));
|
||||
#endif
|
||||
builder->errnum = QSE_REX_ERBRACKET;
|
||||
return -1;
|
||||
}
|
||||
|
||||
NEXT_CHAR (builder, LEVEL_CHARSET);
|
||||
|
||||
*cc = (qse_char_t)(ccp - __char_class);
|
||||
return 1;
|
||||
}
|
||||
#endif
|
||||
|
||||
static int charset (comp_t* c, qse_rex_node_t* node)
|
||||
{
|
||||
qse_size_t zero = 0;
|
||||
qse_size_t old_size;
|
||||
qse_size_t pos_csc;
|
||||
|
||||
if (c->c.value == QSE_T('^'))
|
||||
{
|
||||
//cmd->negate = 1;
|
||||
//TODO: negate...
|
||||
if (getc_noesc(c) <= -1) return -1;
|
||||
}
|
||||
|
||||
/* if ] is the first character or the second character following ^,
|
||||
* it is treated literally */
|
||||
|
||||
do
|
||||
{
|
||||
qse_char_t c1, c2;
|
||||
|
||||
c1 = c->c.value;
|
||||
if (getc_noesc(c) <= -1) return -1;
|
||||
c2 = c->c.value;
|
||||
|
||||
if (c1 == QSE_T('[') && c2 == QSE_T(':'))
|
||||
{
|
||||
/* begins with [: */
|
||||
if (getc_noesc(c) <= -1) return -1;
|
||||
//if (charclass (c) <= -1) return -1;
|
||||
}
|
||||
else if (c2 == QSE_T('-'))
|
||||
{
|
||||
if (getc_noesc(c) <= -1) return -1;
|
||||
//add c->c.value;
|
||||
qse_printf (QSE_T("[%c-%c]\n"), c1, c->c.value);
|
||||
if (getc_noesc(c) <= -1) return -1;
|
||||
}
|
||||
else
|
||||
{
|
||||
//add c1;
|
||||
qse_printf (QSE_T("[%c]\n"), c1);
|
||||
}
|
||||
}
|
||||
while (c->c.value != QSE_T(']'));
|
||||
|
||||
if (getc_esc(c) <= -1) return -1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int occbound (comp_t* c, qse_rex_node_t* n)
|
||||
{
|
||||
qse_size_t bound;
|
||||
|
||||
bound = 0;
|
||||
while (c->c.value >= QSE_T('0') && c->c.value <= QSE_T('9'))
|
||||
{
|
||||
bound = bound * 10 + c->c.value - QSE_T('0');
|
||||
if (getc_noesc(c) <= -1) return -1;
|
||||
}
|
||||
|
||||
n->occ.min = bound;
|
||||
|
||||
if (c->c.value == QSE_T(','))
|
||||
{
|
||||
if (getc_noesc(c) <= -1) return -1;
|
||||
|
||||
if (c->c.value >= QSE_T('0') && c->c.value <= QSE_T('9'))
|
||||
{
|
||||
bound = 0;
|
||||
|
||||
do
|
||||
{
|
||||
bound = bound * 10 + c->c.value - QSE_T('0');
|
||||
if (getc_noesc(c) <= -1) return -1;
|
||||
}
|
||||
while (c->c.value >= QSE_T('0') &&
|
||||
c->c.value <= QSE_T('9'));
|
||||
|
||||
n->occ.max = bound;
|
||||
}
|
||||
else n->occ.max = OCC_MAX;
|
||||
}
|
||||
else n->occ.max = n->occ.min;
|
||||
|
||||
if (n->occ.min > n->occ.min)
|
||||
{
|
||||
/* invalid occurrences range */
|
||||
c->rex->errnum = QSE_REX_EBOUND;
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (c->c.value != QSE_T('}'))
|
||||
{
|
||||
c->rex->errnum = QSE_REX_ERBRACE;
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (getc_esc(c) <= -1) return -1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static qse_rex_node_t* comp0 (comp_t* c, qse_rex_node_t* ge);
|
||||
|
||||
static qse_rex_node_t* comp2 (comp_t* c)
|
||||
@ -434,7 +593,7 @@ static qse_rex_node_t* comp2 (comp_t* c)
|
||||
ge = newgroupendnode (c, n);
|
||||
if (ge == QSE_NULL) return QSE_NULL;
|
||||
|
||||
if (getc(c) <= -1) return QSE_NULL;
|
||||
if (getc_esc(c) <= -1) return QSE_NULL;
|
||||
|
||||
c->gdepth++;
|
||||
x = comp0 (c, ge);
|
||||
@ -442,12 +601,12 @@ static qse_rex_node_t* comp2 (comp_t* c)
|
||||
|
||||
if (!IS_SPE(c,QSE_T(')')))
|
||||
{
|
||||
c->rex->errnum = QSE_REX_EUNBALPAREN;
|
||||
c->rex->errnum = QSE_REX_ERPAREN;
|
||||
return QSE_NULL;
|
||||
}
|
||||
|
||||
c->gdepth--;
|
||||
if (getc(c) <= -1) return QSE_NULL;
|
||||
if (getc_esc(c) <= -1) return QSE_NULL;
|
||||
|
||||
n->u.g.head = x;
|
||||
break;
|
||||
@ -457,26 +616,28 @@ static qse_rex_node_t* comp2 (comp_t* c)
|
||||
case QSE_T('.'):
|
||||
n = newnode (c, QSE_REX_NODE_ANYCHAR);
|
||||
if (n == QSE_NULL) return QSE_NULL;
|
||||
if (getc(c) <= -1) return QSE_NULL;
|
||||
if (getc_esc(c) <= -1) return QSE_NULL;
|
||||
break;
|
||||
|
||||
case QSE_T('^'):
|
||||
n = newnode (c, QSE_REX_NODE_BOL);
|
||||
if (n == QSE_NULL) return QSE_NULL;
|
||||
if (getc(c) <= -1) return QSE_NULL;
|
||||
if (getc_esc(c) <= -1) return QSE_NULL;
|
||||
break;
|
||||
|
||||
case QSE_T('$'):
|
||||
n = newnode (c, QSE_REX_NODE_EOL);
|
||||
if (n == QSE_NULL) return QSE_NULL;
|
||||
if (getc(c) <= -1) return QSE_NULL;
|
||||
if (getc_esc(c) <= -1) return QSE_NULL;
|
||||
break;
|
||||
|
||||
|
||||
/*
|
||||
case QSE_T('['):
|
||||
n = newnode (c, QSE_REX_NODE_CHARSET);
|
||||
if (n == QSE_NULL) return QSE_NULL;
|
||||
|
||||
if (getc_noesc(c) <= -1) return QSE_NULL;
|
||||
if (charset(c, n) <= -1) return QSE_NULL;
|
||||
break;
|
||||
*/
|
||||
|
||||
default:
|
||||
goto normal_char;
|
||||
@ -488,7 +649,7 @@ static qse_rex_node_t* comp2 (comp_t* c)
|
||||
/* normal character */
|
||||
n = newcharnode (c, c->c.value);
|
||||
if (n == QSE_NULL) return QSE_NULL;
|
||||
if (getc(c) <= -1) return QSE_NULL;
|
||||
if (getc_esc(c) <= -1) return QSE_NULL;
|
||||
}
|
||||
|
||||
n->occ.min = 1;
|
||||
@ -503,30 +664,28 @@ static qse_rex_node_t* comp2 (comp_t* c)
|
||||
case QSE_T('?'):
|
||||
n->occ.min = 0;
|
||||
n->occ.max = 1;
|
||||
if (getc(c) <= -1) return QSE_NULL;
|
||||
if (getc_esc(c) <= -1) return QSE_NULL;
|
||||
break;
|
||||
|
||||
case QSE_T('*'):
|
||||
n->occ.min = 0;
|
||||
n->occ.max = OCC_MAX;
|
||||
if (getc(c) <= -1) return QSE_NULL;
|
||||
if (getc_esc(c) <= -1) return QSE_NULL;
|
||||
break;
|
||||
|
||||
case QSE_T('+'):
|
||||
n->occ.min = 1;
|
||||
n->occ.max = OCC_MAX;
|
||||
if (getc(c) <= -1) return QSE_NULL;
|
||||
if (getc_esc(c) <= -1) return QSE_NULL;
|
||||
break;
|
||||
|
||||
/*
|
||||
case QSE_T('{'):
|
||||
// TODO:
|
||||
if (!(com->rex->option & QSE_REX_NOBOUND))
|
||||
if (!(c->rex->option & QSE_REX_NOBOUND))
|
||||
{
|
||||
if (getc_noesc(c) <= -1) return QSE_NULL;
|
||||
if (occbound(c,n) <= -1) return QSE_NULL;
|
||||
}
|
||||
break;
|
||||
*/
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
@ -568,7 +727,7 @@ static qse_rex_node_t* comp0 (comp_t* c, qse_rex_node_t* ge)
|
||||
|
||||
while (IS_SPE(c,QSE_T('|')))
|
||||
{
|
||||
if (getc(c) <= -1)
|
||||
if (getc_esc(c) <= -1)
|
||||
{
|
||||
//freere (left);
|
||||
return QSE_NULL;
|
||||
@ -614,7 +773,7 @@ qse_rex_node_t* qse_rex_comp (
|
||||
c.start = QSE_NULL;
|
||||
|
||||
/* read the first character */
|
||||
if (getc(&c) <= -1) return QSE_NULL;
|
||||
if (getc_esc(&c) <= -1) return QSE_NULL;
|
||||
|
||||
c.start = newstartnode (&c);
|
||||
if (c.start == QSE_NULL) return QSE_NULL;
|
||||
|
Loading…
x
Reference in New Issue
Block a user