interim commit while implementing a regular expression processor

This commit is contained in:
hyung-hwan 2009-11-23 07:58:53 +00:00
parent 1d5be1f982
commit 782fa151de
9 changed files with 304 additions and 165 deletions

View File

@ -1,5 +1,5 @@
/* /*
* $Id: awk.c 291 2009-09-21 13:28:18Z hyunghwan.chung $ * $Id: awk.c 306 2009-11-22 13:58:53Z hyunghwan.chung $
* *
Copyright 2006-2009 Chung, Hyung-Hwan. Copyright 2006-2009 Chung, Hyung-Hwan.
This file is part of QSE. This file is part of QSE.
@ -297,11 +297,7 @@ static int fnc_sleep (qse_awk_rtx_t* run, const qse_cstr_t* fnm)
#endif #endif
r = qse_awk_rtx_makeintval (run, n); r = qse_awk_rtx_makeintval (run, n);
if (r == QSE_NULL) if (r == QSE_NULL) return -1;
{
qse_awk_rtx_seterrnum (run, QSE_AWK_ENOMEM, QSE_NULL);
return -1;
}
qse_awk_rtx_setretval (run, r); qse_awk_rtx_setretval (run, r);
return 0; return 0;

20
qse/configure vendored
View File

@ -1,6 +1,6 @@
#! /bin/sh #! /bin/sh
# Guess values for system-dependent variables and create Makefiles. # Guess values for system-dependent variables and create Makefiles.
# Generated by GNU Autoconf 2.64 for qse 0.5.2. # Generated by GNU Autoconf 2.64 for qse 0.5.3.
# #
# Report bugs to <bacon@abiyo.net>. # Report bugs to <bacon@abiyo.net>.
# #
@ -698,8 +698,8 @@ MAKEFLAGS=
# Identity of this package. # Identity of this package.
PACKAGE_NAME='qse' PACKAGE_NAME='qse'
PACKAGE_TARNAME='qse' PACKAGE_TARNAME='qse'
PACKAGE_VERSION='0.5.2' PACKAGE_VERSION='0.5.3'
PACKAGE_STRING='qse 0.5.2' PACKAGE_STRING='qse 0.5.3'
PACKAGE_BUGREPORT='bacon@abiyo.net' PACKAGE_BUGREPORT='bacon@abiyo.net'
PACKAGE_URL='' PACKAGE_URL=''
@ -1457,7 +1457,7 @@ if test "$ac_init_help" = "long"; then
# Omit some internal or obsolete options to make the list less imposing. # Omit some internal or obsolete options to make the list less imposing.
# This message is too long to be a string in the A/UX 3.1 sh. # This message is too long to be a string in the A/UX 3.1 sh.
cat <<_ACEOF cat <<_ACEOF
\`configure' configures qse 0.5.2 to adapt to many kinds of systems. \`configure' configures qse 0.5.3 to adapt to many kinds of systems.
Usage: $0 [OPTION]... [VAR=VALUE]... Usage: $0 [OPTION]... [VAR=VALUE]...
@ -1527,7 +1527,7 @@ fi
if test -n "$ac_init_help"; then if test -n "$ac_init_help"; then
case $ac_init_help in case $ac_init_help in
short | recursive ) echo "Configuration of qse 0.5.2:";; short | recursive ) echo "Configuration of qse 0.5.3:";;
esac esac
cat <<\_ACEOF cat <<\_ACEOF
@ -1641,7 +1641,7 @@ fi
test -n "$ac_init_help" && exit $ac_status test -n "$ac_init_help" && exit $ac_status
if $ac_init_version; then if $ac_init_version; then
cat <<\_ACEOF cat <<\_ACEOF
qse configure 0.5.2 qse configure 0.5.3
generated by GNU Autoconf 2.64 generated by GNU Autoconf 2.64
Copyright (C) 2009 Free Software Foundation, Inc. Copyright (C) 2009 Free Software Foundation, Inc.
@ -2403,7 +2403,7 @@ cat >config.log <<_ACEOF
This file contains any messages produced by compilers while This file contains any messages produced by compilers while
running configure, to aid debugging if configure makes a mistake. running configure, to aid debugging if configure makes a mistake.
It was created by qse $as_me 0.5.2, which was It was created by qse $as_me 0.5.3, which was
generated by GNU Autoconf 2.64. Invocation command line was generated by GNU Autoconf 2.64. Invocation command line was
$ $0 $@ $ $0 $@
@ -3222,7 +3222,7 @@ fi
# Define the identity of the package. # Define the identity of the package.
PACKAGE='qse' PACKAGE='qse'
VERSION='0.5.2' VERSION='0.5.3'
cat >>confdefs.h <<_ACEOF cat >>confdefs.h <<_ACEOF
@ -17992,7 +17992,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
# report actual input values of CONFIG_FILES etc. instead of their # report actual input values of CONFIG_FILES etc. instead of their
# values after options handling. # values after options handling.
ac_log=" ac_log="
This file was extended by qse $as_me 0.5.2, which was This file was extended by qse $as_me 0.5.3, which was
generated by GNU Autoconf 2.64. Invocation command line was generated by GNU Autoconf 2.64. Invocation command line was
CONFIG_FILES = $CONFIG_FILES CONFIG_FILES = $CONFIG_FILES
@ -18056,7 +18056,7 @@ Report bugs to <bacon@abiyo.net>."
_ACEOF _ACEOF
cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
ac_cs_version="\\ ac_cs_version="\\
qse config.status 0.5.2 qse config.status 0.5.3
configured by $0, generated by GNU Autoconf 2.64, configured by $0, generated by GNU Autoconf 2.64,
with options \\"`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`\\" with options \\"`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`\\"

View File

@ -1,5 +1,5 @@
dnl AC_PREREQ(2.59) dnl AC_PREREQ(2.59)
AC_INIT([qse],[0.5.2],[bacon@abiyo.net]) AC_INIT([qse],[0.5.3],[bacon@abiyo.net])
AC_CONFIG_HEADER([include/qse/config.h]) AC_CONFIG_HEADER([include/qse/config.h])
AC_CONFIG_AUX_DIR([ac/au]) AC_CONFIG_AUX_DIR([ac/au])
AC_CONFIG_MACRO_DIR([ac/m4]) AC_CONFIG_MACRO_DIR([ac/m4])

View File

@ -1,5 +1,5 @@
/* /*
* $Id: awk.h 299 2009-10-19 13:33:40Z hyunghwan.chung $ * $Id: awk.h 306 2009-11-22 13:58:53Z hyunghwan.chung $
* *
Copyright 2006-2009 Chung, Hyung-Hwan. Copyright 2006-2009 Chung, Hyung-Hwan.
This file is part of QSE. This file is part of QSE.
@ -795,7 +795,7 @@ enum qse_awk_errnum_t
QSE_AWK_EREXCOLON, /**< a colon is expected */ QSE_AWK_EREXCOLON, /**< a colon is expected */
QSE_AWK_EREXCRANGE, /**< invalid character range */ QSE_AWK_EREXCRANGE, /**< invalid character range */
QSE_AWK_EREXCCLASS, /**< invalid character class */ QSE_AWK_EREXCCLASS, /**< invalid character class */
QSE_AWK_EREXBRANGE, /**< invalid boundary range */ QSE_AWK_EREXBOUND, /**< invalid boundary range */
QSE_AWK_EREXEND, /**< unexpected end of the pattern */ QSE_AWK_EREXEND, /**< unexpected end of the pattern */
QSE_AWK_EREXGARBAGE, /**< garbage after the pattern */ QSE_AWK_EREXGARBAGE, /**< garbage after the pattern */

View File

@ -1,5 +1,5 @@
/* /*
* $Id: rex.h 304 2009-11-20 05:12:27Z hyunghwan.chung $ * $Id: rex.h 306 2009-11-22 13:58:53Z hyunghwan.chung $
* *
Copyright 2006-2009 Chung, Hyung-Hwan. Copyright 2006-2009 Chung, Hyung-Hwan.
This file is part of QSE. This file is part of QSE.
@ -72,6 +72,10 @@ enum qse_rex_option_t
/**< do not support the {n,m} style occurrence specifier */ /**< do not support the {n,m} style occurrence specifier */
QSE_REX_NOBOUND = (1 << 0), QSE_REX_NOBOUND = (1 << 0),
QSE_REX_ESQ_HEX = (1 << 1), /* \xhh and \uhhhh */
QSE_REX_ESQ_OCTAL = (1 << 2), /* \000 */
QSE_REX_ESQ_CNTRL = (1 << 3), /* \cX where X is A to Z */
/**< perform case-insensitive match */ /**< perform case-insensitive match */
QSE_REX_IGNORECASE = (1 << 8) QSE_REX_IGNORECASE = (1 << 8)
}; };
@ -89,7 +93,7 @@ enum qse_rex_errnum_t
QSE_REX_ECOLON, /* a colon is expected */ QSE_REX_ECOLON, /* a colon is expected */
QSE_REX_ECRANGE, /* invalid character range */ QSE_REX_ECRANGE, /* invalid character range */
QSE_REX_ECCLASS, /* invalid character class */ QSE_REX_ECCLASS, /* invalid character class */
QSE_REX_EBRANGE, /* invalid boundary range */ QSE_REX_EBOUND, /* invalid boundary range */
QSE_REX_EEND, /* unexpected end of the pattern */ QSE_REX_EEND, /* unexpected end of the pattern */
QSE_REX_EGARBAGE /* garbage after the pattern */ QSE_REX_EGARBAGE /* garbage after the pattern */
}; };

View File

@ -1,5 +1,5 @@
/* /*
* $Id: misc.c 292 2009-09-23 10:19:30Z hyunghwan.chung $ * $Id: misc.c 306 2009-11-22 13:58:53Z hyunghwan.chung $
* *
Copyright 2006-2009 Chung, Hyung-Hwan. Copyright 2006-2009 Chung, Hyung-Hwan.
This file is part of QSE. This file is part of QSE.
@ -1039,7 +1039,7 @@ qse_char_t* qse_awk_rtx_strxnfld (
(err == QSE_REX_ECOLON)? QSE_AWK_EREXCOLON: \ (err == QSE_REX_ECOLON)? QSE_AWK_EREXCOLON: \
(err == QSE_REX_ECRANGE)? QSE_AWK_EREXCRANGE: \ (err == QSE_REX_ECRANGE)? QSE_AWK_EREXCRANGE: \
(err == QSE_REX_ECCLASS)? QSE_AWK_EREXCCLASS: \ (err == QSE_REX_ECCLASS)? QSE_AWK_EREXCCLASS: \
(err == QSE_REX_EBRANGE)? QSE_AWK_EREXBRANGE: \ (err == QSE_REX_EBOUND)? QSE_AWK_EREXBOUND: \
(err == QSE_REX_EEND)? QSE_AWK_EREXEND: \ (err == QSE_REX_EEND)? QSE_AWK_EREXEND: \
(err == QSE_REX_EGARBAGE)? QSE_AWK_EREXGARBAGE: \ (err == QSE_REX_EGARBAGE)? QSE_AWK_EREXGARBAGE: \
QSE_AWK_EINTERN) QSE_AWK_EINTERN)

View File

@ -1,5 +1,5 @@
/* /*
* $Id: std.c 291 2009-09-21 13:28:18Z hyunghwan.chung $ * $Id: std.c 306 2009-11-22 13:58:53Z hyunghwan.chung $
* *
Copyright 2006-2009 Chung, Hyung-Hwan. Copyright 2006-2009 Chung, Hyung-Hwan.
This file is part of QSE. This file is part of QSE.
@ -1224,11 +1224,7 @@ static int fnc_math_1 (
r = qse_awk_rtx_makerealval (run, rf(rv)); r = qse_awk_rtx_makerealval (run, rf(rv));
} }
if (r == QSE_NULL) if (r == QSE_NULL) return -1;
{
qse_awk_rtx_seterrnum (run, QSE_AWK_ENOMEM, QSE_NULL);
return -1;
}
qse_awk_rtx_setretval (run, r); qse_awk_rtx_setretval (run, r);
return 0; return 0;
@ -1277,11 +1273,7 @@ static int fnc_math_2 (
r = qse_awk_rtx_makerealval (run, rf(rv0,rv1)); r = qse_awk_rtx_makerealval (run, rf(rv0,rv1));
} }
if (r == QSE_NULL) if (r == QSE_NULL) return -1;
{
qse_awk_rtx_seterrnum (run, QSE_AWK_ENOMEM, QSE_NULL);
return -1;
}
qse_awk_rtx_setretval (run, r); qse_awk_rtx_setretval (run, r);
return 0; return 0;
@ -1434,11 +1426,7 @@ static int fnc_int (qse_awk_rtx_t* run, const qse_cstr_t* fnm)
if (n == 1) lv = (qse_long_t)rv; if (n == 1) lv = (qse_long_t)rv;
r = qse_awk_rtx_makeintval (run, lv); r = qse_awk_rtx_makeintval (run, lv);
if (r == QSE_NULL) if (r == QSE_NULL) return -1;
{
qse_awk_rtx_seterrnum (run, QSE_AWK_ENOMEM, QSE_NULL);
return -1;
}
qse_awk_rtx_setretval (run, r); qse_awk_rtx_setretval (run, r);
return 0; return 0;
@ -1456,11 +1444,7 @@ static int fnc_rand (qse_awk_rtx_t* run, const qse_cstr_t* fnm)
*/ */
r = qse_awk_rtx_makerealval ( r = qse_awk_rtx_makerealval (
run, (qse_real_t)(rand() % RAND_MAX) / RAND_MAX); run, (qse_real_t)(rand() % RAND_MAX) / RAND_MAX);
if (r == QSE_NULL) if (r == QSE_NULL) return -1;
{
qse_awk_rtx_seterrnum (run, QSE_AWK_ENOMEM, QSE_NULL);
return -1;
}
qse_awk_rtx_setretval (run, r); qse_awk_rtx_setretval (run, r);
return 0; return 0;
@ -1504,11 +1488,7 @@ static int fnc_srand (qse_awk_rtx_t* run, const qse_cstr_t* fnm)
srand (rxtn->seed); srand (rxtn->seed);
r = qse_awk_rtx_makeintval (run, prev); r = qse_awk_rtx_makeintval (run, prev);
if (r == QSE_NULL) if (r == QSE_NULL) return -1;
{
qse_awk_rtx_seterrnum (run, QSE_AWK_ENOMEM, QSE_NULL);
return -1;
}
qse_awk_rtx_setretval (run, r); qse_awk_rtx_setretval (run, r);
return 0; return 0;

View File

@ -1,5 +1,5 @@
/* /*
* $Id: rex.c 304 2009-11-20 05:12:27Z hyunghwan.chung $ * $Id: rex.c 306 2009-11-22 13:58:53Z hyunghwan.chung $
* *
Copyright 2006-2009 Chung, Hyung-Hwan. Copyright 2006-2009 Chung, Hyung-Hwan.
This file is part of QSE. This file is part of QSE.
@ -962,7 +962,7 @@ what if it is not in the raight format? convert it to ordinary characters?? */
if (cmd->lbound > cmd->ubound) if (cmd->lbound > cmd->ubound)
{ {
/* invalid occurrences range */ /* invalid occurrences range */
builder->errnum = QSE_REX_EBRANGE; builder->errnum = QSE_REX_EBOUND;
return -1; return -1;
} }

View File

@ -144,8 +144,8 @@ static void freenode (qse_rex_node_t* node, qse_mmgr_t* mmgr)
{ {
if (node->id == QSE_REX_NODE_CHARSET) if (node->id == QSE_REX_NODE_CHARSET)
{ {
// TODO: if (node->u.cs != QSE_NULL)
QSE_MMGR_FREE (mmgr, node->u.cs); QSE_MMGR_FREE (mmgr, node->u.cs);
} }
QSE_MMGR_FREE (mmgr, node); QSE_MMGR_FREE (mmgr, node);
@ -288,8 +288,13 @@ static qse_rex_node_t* newbranchnode (
#define IS_ESC(com) ((com)->c.escaped) #define IS_ESC(com) ((com)->c.escaped)
#define IS_EOF(com) ((com)->c.value == QSE_CHAR_EOF) #define IS_EOF(com) ((com)->c.value == QSE_CHAR_EOF)
static int getc (comp_t* com) #define getc_noesc(c) getc(c,1)
#define getc_esc(c) getc(c,0)
static int getc (comp_t* com, int noesc)
{ {
qse_char_t c;
if (com->ptr >= com->end) if (com->ptr >= com->end)
{ {
com->c.value = QSE_CHAR_EOF; com->c.value = QSE_CHAR_EOF;
@ -298,110 +303,108 @@ static int getc (comp_t* com)
} }
com->c.value = *com->ptr++; com->c.value = *com->ptr++;
com->c.escaped = QSE_FALSE; com->c.escaped = 0;
if (com->c.value == QSE_T('\\')) if (noesc || com->c.value != QSE_T('\\')) return 0;
{
qse_char_t c;
CHECK_END (com);
c = *com->ptr++;
if (c == QSE_T('n')) c = QSE_T('\n');
else if (c == QSE_T('r')) c = QSE_T('\r');
else if (c == QSE_T('t')) c = QSE_T('\t');
else if (c == QSE_T('f')) c = QSE_T('\f');
else if (c == QSE_T('b')) c = QSE_T('\b');
else if (c == QSE_T('v')) c = QSE_T('\v');
else if (c == QSE_T('a')) c = QSE_T('\a');
else if (c >= QSE_T('0') && c <= QSE_T('7'))
{
qse_char_t cx;
c = c - QSE_T('0');
CHECK_END (com); CHECK_END (com);
c = *com->ptr++; cx = *com->ptr++;
if (cx >= QSE_T('0') && cx <= QSE_T('7'))
if (c == QSE_T('n')) c = QSE_T('\n');
else if (c == QSE_T('r')) c = QSE_T('\r');
else if (c == QSE_T('t')) c = QSE_T('\t');
else if (c == QSE_T('f')) c = QSE_T('\f');
else if (c == QSE_T('b')) c = QSE_T('\b');
else if (c == QSE_T('v')) c = QSE_T('\v');
else if (c == QSE_T('a')) c = QSE_T('\a');
else if (c >= QSE_T('0') && c <= QSE_T('7'))
{ {
qse_char_t cx; c = c * 8 + cx - QSE_T('0');
c = c - QSE_T('0');
CHECK_END (com); CHECK_END (com);
cx = *com->ptr++; cx = *com->ptr++;
if (cx >= QSE_T('0') && cx <= QSE_T('7')) if (cx >= QSE_T('0') && cx <= QSE_T('7'))
{ {
c = c * 8 + cx - QSE_T('0'); c = c * 8 + cx - QSE_T('0');
CHECK_END (com);
cx = *com->ptr++;
if (cx >= QSE_T('0') && cx <= QSE_T('7'))
{
c = c * 8 + cx - QSE_T('0');
}
} }
} }
else if (c == QSE_T('x'))
{
qse_char_t cx;
CHECK_END (com);
cx = *com->ptr++;
if (IS_HEX(cx))
{
c = HEX_TO_NUM(cx);
CHECK_END (com);
cx = *com->ptr++;
if (IS_HEX(cx))
{
c = c * 16 + HEX_TO_NUM(cx);
}
}
}
#ifdef QSE_CHAR_IS_WCHAR
else if (c == QSE_T('u') && QSE_SIZEOF(qse_char_t) >= 2)
{
qse_char_t cx;
CHECK_END (com);
cx = *com->ptr++;
if (IS_HEX(cx))
{
qse_size_t i;
c = HEX_TO_NUM(cx);
for (i = 0; i < 3; i++)
{
CHECK_END (com);
cx = *com->ptr++;
if (!IS_HEX(cx)) break;
c = c * 16 + HEX_TO_NUM(cx);
}
}
}
else if (c == QSE_T('U') && QSE_SIZEOF(qse_char_t) >= 4)
{
qse_char_t cx;
CHECK_END (com);
cx = *com->ptr++;
if (IS_HEX(cx))
{
qse_size_t i;
c = HEX_TO_NUM(cx);
for (i = 0; i < 7; i++)
{
CHECK_END (com);
cx = *com->ptr++;
if (!IS_HEX(cx)) break;
c = c * 16 + HEX_TO_NUM(cx);
}
}
}
#endif
com->c.value = c;
com->c.escaped = QSE_TRUE;
} }
else if (c == QSE_T('x'))
{
qse_char_t cx;
CHECK_END (com);
cx = *com->ptr++;
if (IS_HEX(cx))
{
c = HEX_TO_NUM(cx);
CHECK_END (com);
cx = *com->ptr++;
if (IS_HEX(cx))
{
c = c * 16 + HEX_TO_NUM(cx);
}
}
}
#ifdef QSE_CHAR_IS_WCHAR
else if (c == QSE_T('u') && QSE_SIZEOF(qse_char_t) >= 2)
{
qse_char_t cx;
CHECK_END (com);
cx = *com->ptr++;
if (IS_HEX(cx))
{
qse_size_t i;
c = HEX_TO_NUM(cx);
for (i = 0; i < 3; i++)
{
CHECK_END (com);
cx = *com->ptr++;
if (!IS_HEX(cx)) break;
c = c * 16 + HEX_TO_NUM(cx);
}
}
}
else if (c == QSE_T('U') && QSE_SIZEOF(qse_char_t) >= 4)
{
qse_char_t cx;
CHECK_END (com);
cx = *com->ptr++;
if (IS_HEX(cx))
{
qse_size_t i;
c = HEX_TO_NUM(cx);
for (i = 0; i < 7; i++)
{
CHECK_END (com);
cx = *com->ptr++;
if (!IS_HEX(cx)) break;
c = c * 16 + HEX_TO_NUM(cx);
}
}
}
#endif
com->c.value = c;
com->c.escaped = QSE_TRUE;
#if 0 #if 0
com->c = (com->ptr < com->end)? *com->ptr++: QSE_CHAR_EOF; com->c = (com->ptr < com->end)? *com->ptr++: QSE_CHAR_EOF;
@ -412,6 +415,162 @@ else qse_printf (QSE_T("getc => %c\n"), com->c);
return 0; return 0;
} }
#if 0
static int charclass (comp_t* builder, qse_char_t* cc)
{
const struct __char_class_t* ccp = __char_class;
qse_size_t len = builder->ptn.end - builder->ptn.curp;
while (ccp->name != QSE_NULL)
{
if (__begin_with (builder->ptn.curp, len, ccp->name)) break;
ccp++;
}
if (ccp->name == QSE_NULL)
{
/* wrong class name */
#ifdef DEBUG_REX
DPUTS (QSE_T("build_atom_cclass: wrong class name\n"));
#endif
builder->errnum = QSE_REX_ECCLASS;
return -1;
}
builder->ptn.curp += ccp->name_len;
NEXT_CHAR (builder, LEVEL_CHARSET);
if (builder->ptn.curc.type != CT_NORMAL ||
builder->ptn.curc.value != QSE_T(':'))
{
#ifdef DEBUG_REX
DPUTS (QSE_T("build_atom_cclass: a colon(:) expected\n"));
#endif
builder->errnum = QSE_REX_ECOLON;
return -1;
}
NEXT_CHAR (builder, LEVEL_CHARSET);
/* ] happens to be the charset ender ] */
if (builder->ptn.curc.type != CT_SPECIAL ||
builder->ptn.curc.value != QSE_T(']'))
{
#ifdef DEBUG_REX
DPUTS (QSE_T("build_atom_cclass: ] expected\n"));
#endif
builder->errnum = QSE_REX_ERBRACKET;
return -1;
}
NEXT_CHAR (builder, LEVEL_CHARSET);
*cc = (qse_char_t)(ccp - __char_class);
return 1;
}
#endif
static int charset (comp_t* c, qse_rex_node_t* node)
{
qse_size_t zero = 0;
qse_size_t old_size;
qse_size_t pos_csc;
if (c->c.value == QSE_T('^'))
{
//cmd->negate = 1;
//TODO: negate...
if (getc_noesc(c) <= -1) return -1;
}
/* if ] is the first character or the second character following ^,
* it is treated literally */
do
{
qse_char_t c1, c2;
c1 = c->c.value;
if (getc_noesc(c) <= -1) return -1;
c2 = c->c.value;
if (c1 == QSE_T('[') && c2 == QSE_T(':'))
{
/* begins with [: */
if (getc_noesc(c) <= -1) return -1;
//if (charclass (c) <= -1) return -1;
}
else if (c2 == QSE_T('-'))
{
if (getc_noesc(c) <= -1) return -1;
//add c->c.value;
qse_printf (QSE_T("[%c-%c]\n"), c1, c->c.value);
if (getc_noesc(c) <= -1) return -1;
}
else
{
//add c1;
qse_printf (QSE_T("[%c]\n"), c1);
}
}
while (c->c.value != QSE_T(']'));
if (getc_esc(c) <= -1) return -1;
return 0;
}
static int occbound (comp_t* c, qse_rex_node_t* n)
{
qse_size_t bound;
bound = 0;
while (c->c.value >= QSE_T('0') && c->c.value <= QSE_T('9'))
{
bound = bound * 10 + c->c.value - QSE_T('0');
if (getc_noesc(c) <= -1) return -1;
}
n->occ.min = bound;
if (c->c.value == QSE_T(','))
{
if (getc_noesc(c) <= -1) return -1;
if (c->c.value >= QSE_T('0') && c->c.value <= QSE_T('9'))
{
bound = 0;
do
{
bound = bound * 10 + c->c.value - QSE_T('0');
if (getc_noesc(c) <= -1) return -1;
}
while (c->c.value >= QSE_T('0') &&
c->c.value <= QSE_T('9'));
n->occ.max = bound;
}
else n->occ.max = OCC_MAX;
}
else n->occ.max = n->occ.min;
if (n->occ.min > n->occ.min)
{
/* invalid occurrences range */
c->rex->errnum = QSE_REX_EBOUND;
return -1;
}
if (c->c.value != QSE_T('}'))
{
c->rex->errnum = QSE_REX_ERBRACE;
return -1;
}
if (getc_esc(c) <= -1) return -1;
return 0;
}
static qse_rex_node_t* comp0 (comp_t* c, qse_rex_node_t* ge); static qse_rex_node_t* comp0 (comp_t* c, qse_rex_node_t* ge);
static qse_rex_node_t* comp2 (comp_t* c) static qse_rex_node_t* comp2 (comp_t* c)
@ -434,7 +593,7 @@ static qse_rex_node_t* comp2 (comp_t* c)
ge = newgroupendnode (c, n); ge = newgroupendnode (c, n);
if (ge == QSE_NULL) return QSE_NULL; if (ge == QSE_NULL) return QSE_NULL;
if (getc(c) <= -1) return QSE_NULL; if (getc_esc(c) <= -1) return QSE_NULL;
c->gdepth++; c->gdepth++;
x = comp0 (c, ge); x = comp0 (c, ge);
@ -442,12 +601,12 @@ static qse_rex_node_t* comp2 (comp_t* c)
if (!IS_SPE(c,QSE_T(')'))) if (!IS_SPE(c,QSE_T(')')))
{ {
c->rex->errnum = QSE_REX_EUNBALPAREN; c->rex->errnum = QSE_REX_ERPAREN;
return QSE_NULL; return QSE_NULL;
} }
c->gdepth--; c->gdepth--;
if (getc(c) <= -1) return QSE_NULL; if (getc_esc(c) <= -1) return QSE_NULL;
n->u.g.head = x; n->u.g.head = x;
break; break;
@ -457,26 +616,28 @@ static qse_rex_node_t* comp2 (comp_t* c)
case QSE_T('.'): case QSE_T('.'):
n = newnode (c, QSE_REX_NODE_ANYCHAR); n = newnode (c, QSE_REX_NODE_ANYCHAR);
if (n == QSE_NULL) return QSE_NULL; if (n == QSE_NULL) return QSE_NULL;
if (getc(c) <= -1) return QSE_NULL; if (getc_esc(c) <= -1) return QSE_NULL;
break; break;
case QSE_T('^'): case QSE_T('^'):
n = newnode (c, QSE_REX_NODE_BOL); n = newnode (c, QSE_REX_NODE_BOL);
if (n == QSE_NULL) return QSE_NULL; if (n == QSE_NULL) return QSE_NULL;
if (getc(c) <= -1) return QSE_NULL; if (getc_esc(c) <= -1) return QSE_NULL;
break; break;
case QSE_T('$'): case QSE_T('$'):
n = newnode (c, QSE_REX_NODE_EOL); n = newnode (c, QSE_REX_NODE_EOL);
if (n == QSE_NULL) return QSE_NULL; if (n == QSE_NULL) return QSE_NULL;
if (getc(c) <= -1) return QSE_NULL; if (getc_esc(c) <= -1) return QSE_NULL;
break; break;
/*
case QSE_T('['): case QSE_T('['):
n = newnode (c, QSE_REX_NODE_CHARSET);
if (n == QSE_NULL) return QSE_NULL;
if (getc_noesc(c) <= -1) return QSE_NULL;
if (charset(c, n) <= -1) return QSE_NULL;
break; break;
*/
default: default:
goto normal_char; goto normal_char;
@ -488,7 +649,7 @@ static qse_rex_node_t* comp2 (comp_t* c)
/* normal character */ /* normal character */
n = newcharnode (c, c->c.value); n = newcharnode (c, c->c.value);
if (n == QSE_NULL) return QSE_NULL; if (n == QSE_NULL) return QSE_NULL;
if (getc(c) <= -1) return QSE_NULL; if (getc_esc(c) <= -1) return QSE_NULL;
} }
n->occ.min = 1; n->occ.min = 1;
@ -503,30 +664,28 @@ static qse_rex_node_t* comp2 (comp_t* c)
case QSE_T('?'): case QSE_T('?'):
n->occ.min = 0; n->occ.min = 0;
n->occ.max = 1; n->occ.max = 1;
if (getc(c) <= -1) return QSE_NULL; if (getc_esc(c) <= -1) return QSE_NULL;
break; break;
case QSE_T('*'): case QSE_T('*'):
n->occ.min = 0; n->occ.min = 0;
n->occ.max = OCC_MAX; n->occ.max = OCC_MAX;
if (getc(c) <= -1) return QSE_NULL; if (getc_esc(c) <= -1) return QSE_NULL;
break; break;
case QSE_T('+'): case QSE_T('+'):
n->occ.min = 1; n->occ.min = 1;
n->occ.max = OCC_MAX; n->occ.max = OCC_MAX;
if (getc(c) <= -1) return QSE_NULL; if (getc_esc(c) <= -1) return QSE_NULL;
break; break;
/*
case QSE_T('{'): case QSE_T('{'):
// TODO: if (!(c->rex->option & QSE_REX_NOBOUND))
if (!(com->rex->option & QSE_REX_NOBOUND))
{ {
if (getc_noesc(c) <= -1) return QSE_NULL;
if (occbound(c,n) <= -1) return QSE_NULL;
} }
break; break;
*/
} }
} }
@ -568,7 +727,7 @@ static qse_rex_node_t* comp0 (comp_t* c, qse_rex_node_t* ge)
while (IS_SPE(c,QSE_T('|'))) while (IS_SPE(c,QSE_T('|')))
{ {
if (getc(c) <= -1) if (getc_esc(c) <= -1)
{ {
//freere (left); //freere (left);
return QSE_NULL; return QSE_NULL;
@ -614,7 +773,7 @@ qse_rex_node_t* qse_rex_comp (
c.start = QSE_NULL; c.start = QSE_NULL;
/* read the first character */ /* read the first character */
if (getc(&c) <= -1) return QSE_NULL; if (getc_esc(&c) <= -1) return QSE_NULL;
c.start = newstartnode (&c); c.start = newstartnode (&c);
if (c.start == QSE_NULL) return QSE_NULL; if (c.start == QSE_NULL) return QSE_NULL;