From 782fa151de1ee57e1c3c0d08e1d1ec25e80376b7 Mon Sep 17 00:00:00 2001 From: hyung-hwan Date: Mon, 23 Nov 2009 07:58:53 +0000 Subject: [PATCH] interim commit while implementing a regular expression processor --- qse/cmd/awk/awk.c | 8 +- qse/configure | 20 +- qse/configure.ac | 2 +- qse/include/qse/awk/awk.h | 4 +- qse/include/qse/cmn/rex.h | 8 +- qse/lib/awk/misc.c | 4 +- qse/lib/awk/std.c | 32 +--- qse/lib/cmn/rex.c | 4 +- qse/lib/cmn/rex1.c | 387 +++++++++++++++++++++++++++----------- 9 files changed, 304 insertions(+), 165 deletions(-) diff --git a/qse/cmd/awk/awk.c b/qse/cmd/awk/awk.c index fcbfc9d4..73efded2 100644 --- a/qse/cmd/awk/awk.c +++ b/qse/cmd/awk/awk.c @@ -1,5 +1,5 @@ /* - * $Id: awk.c 291 2009-09-21 13:28:18Z hyunghwan.chung $ + * $Id: awk.c 306 2009-11-22 13:58:53Z hyunghwan.chung $ * Copyright 2006-2009 Chung, Hyung-Hwan. This file is part of QSE. @@ -297,11 +297,7 @@ static int fnc_sleep (qse_awk_rtx_t* run, const qse_cstr_t* fnm) #endif r = qse_awk_rtx_makeintval (run, n); - if (r == QSE_NULL) - { - qse_awk_rtx_seterrnum (run, QSE_AWK_ENOMEM, QSE_NULL); - return -1; - } + if (r == QSE_NULL) return -1; qse_awk_rtx_setretval (run, r); return 0; diff --git a/qse/configure b/qse/configure index 9aafb2c2..5554ec09 100755 --- a/qse/configure +++ b/qse/configure @@ -1,6 +1,6 @@ #! /bin/sh # Guess values for system-dependent variables and create Makefiles. -# Generated by GNU Autoconf 2.64 for qse 0.5.2. +# Generated by GNU Autoconf 2.64 for qse 0.5.3. # # Report bugs to . # @@ -698,8 +698,8 @@ MAKEFLAGS= # Identity of this package. PACKAGE_NAME='qse' PACKAGE_TARNAME='qse' -PACKAGE_VERSION='0.5.2' -PACKAGE_STRING='qse 0.5.2' +PACKAGE_VERSION='0.5.3' +PACKAGE_STRING='qse 0.5.3' PACKAGE_BUGREPORT='bacon@abiyo.net' PACKAGE_URL='' @@ -1457,7 +1457,7 @@ if test "$ac_init_help" = "long"; then # Omit some internal or obsolete options to make the list less imposing. # This message is too long to be a string in the A/UX 3.1 sh. cat <<_ACEOF -\`configure' configures qse 0.5.2 to adapt to many kinds of systems. +\`configure' configures qse 0.5.3 to adapt to many kinds of systems. Usage: $0 [OPTION]... [VAR=VALUE]... @@ -1527,7 +1527,7 @@ fi if test -n "$ac_init_help"; then case $ac_init_help in - short | recursive ) echo "Configuration of qse 0.5.2:";; + short | recursive ) echo "Configuration of qse 0.5.3:";; esac cat <<\_ACEOF @@ -1641,7 +1641,7 @@ fi test -n "$ac_init_help" && exit $ac_status if $ac_init_version; then cat <<\_ACEOF -qse configure 0.5.2 +qse configure 0.5.3 generated by GNU Autoconf 2.64 Copyright (C) 2009 Free Software Foundation, Inc. @@ -2403,7 +2403,7 @@ cat >config.log <<_ACEOF This file contains any messages produced by compilers while running configure, to aid debugging if configure makes a mistake. -It was created by qse $as_me 0.5.2, which was +It was created by qse $as_me 0.5.3, which was generated by GNU Autoconf 2.64. Invocation command line was $ $0 $@ @@ -3222,7 +3222,7 @@ fi # Define the identity of the package. PACKAGE='qse' - VERSION='0.5.2' + VERSION='0.5.3' cat >>confdefs.h <<_ACEOF @@ -17992,7 +17992,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 # report actual input values of CONFIG_FILES etc. instead of their # values after options handling. ac_log=" -This file was extended by qse $as_me 0.5.2, which was +This file was extended by qse $as_me 0.5.3, which was generated by GNU Autoconf 2.64. Invocation command line was CONFIG_FILES = $CONFIG_FILES @@ -18056,7 +18056,7 @@ Report bugs to ." _ACEOF cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 ac_cs_version="\\ -qse config.status 0.5.2 +qse config.status 0.5.3 configured by $0, generated by GNU Autoconf 2.64, with options \\"`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`\\" diff --git a/qse/configure.ac b/qse/configure.ac index 513f0156..931ff700 100644 --- a/qse/configure.ac +++ b/qse/configure.ac @@ -1,5 +1,5 @@ dnl AC_PREREQ(2.59) -AC_INIT([qse],[0.5.2],[bacon@abiyo.net]) +AC_INIT([qse],[0.5.3],[bacon@abiyo.net]) AC_CONFIG_HEADER([include/qse/config.h]) AC_CONFIG_AUX_DIR([ac/au]) AC_CONFIG_MACRO_DIR([ac/m4]) diff --git a/qse/include/qse/awk/awk.h b/qse/include/qse/awk/awk.h index 3bfb41e6..ebd65238 100644 --- a/qse/include/qse/awk/awk.h +++ b/qse/include/qse/awk/awk.h @@ -1,5 +1,5 @@ /* - * $Id: awk.h 299 2009-10-19 13:33:40Z hyunghwan.chung $ + * $Id: awk.h 306 2009-11-22 13:58:53Z hyunghwan.chung $ * Copyright 2006-2009 Chung, Hyung-Hwan. This file is part of QSE. @@ -795,7 +795,7 @@ enum qse_awk_errnum_t QSE_AWK_EREXCOLON, /**< a colon is expected */ QSE_AWK_EREXCRANGE, /**< invalid character range */ QSE_AWK_EREXCCLASS, /**< invalid character class */ - QSE_AWK_EREXBRANGE, /**< invalid boundary range */ + QSE_AWK_EREXBOUND, /**< invalid boundary range */ QSE_AWK_EREXEND, /**< unexpected end of the pattern */ QSE_AWK_EREXGARBAGE, /**< garbage after the pattern */ diff --git a/qse/include/qse/cmn/rex.h b/qse/include/qse/cmn/rex.h index 7d8c0f2f..174663cf 100644 --- a/qse/include/qse/cmn/rex.h +++ b/qse/include/qse/cmn/rex.h @@ -1,5 +1,5 @@ /* - * $Id: rex.h 304 2009-11-20 05:12:27Z hyunghwan.chung $ + * $Id: rex.h 306 2009-11-22 13:58:53Z hyunghwan.chung $ * Copyright 2006-2009 Chung, Hyung-Hwan. This file is part of QSE. @@ -72,6 +72,10 @@ enum qse_rex_option_t /**< do not support the {n,m} style occurrence specifier */ QSE_REX_NOBOUND = (1 << 0), + QSE_REX_ESQ_HEX = (1 << 1), /* \xhh and \uhhhh */ + QSE_REX_ESQ_OCTAL = (1 << 2), /* \000 */ + QSE_REX_ESQ_CNTRL = (1 << 3), /* \cX where X is A to Z */ + /**< perform case-insensitive match */ QSE_REX_IGNORECASE = (1 << 8) }; @@ -89,7 +93,7 @@ enum qse_rex_errnum_t QSE_REX_ECOLON, /* a colon is expected */ QSE_REX_ECRANGE, /* invalid character range */ QSE_REX_ECCLASS, /* invalid character class */ - QSE_REX_EBRANGE, /* invalid boundary range */ + QSE_REX_EBOUND, /* invalid boundary range */ QSE_REX_EEND, /* unexpected end of the pattern */ QSE_REX_EGARBAGE /* garbage after the pattern */ }; diff --git a/qse/lib/awk/misc.c b/qse/lib/awk/misc.c index 0c8a6146..b2e7db2a 100644 --- a/qse/lib/awk/misc.c +++ b/qse/lib/awk/misc.c @@ -1,5 +1,5 @@ /* - * $Id: misc.c 292 2009-09-23 10:19:30Z hyunghwan.chung $ + * $Id: misc.c 306 2009-11-22 13:58:53Z hyunghwan.chung $ * Copyright 2006-2009 Chung, Hyung-Hwan. This file is part of QSE. @@ -1039,7 +1039,7 @@ qse_char_t* qse_awk_rtx_strxnfld ( (err == QSE_REX_ECOLON)? QSE_AWK_EREXCOLON: \ (err == QSE_REX_ECRANGE)? QSE_AWK_EREXCRANGE: \ (err == QSE_REX_ECCLASS)? QSE_AWK_EREXCCLASS: \ - (err == QSE_REX_EBRANGE)? QSE_AWK_EREXBRANGE: \ + (err == QSE_REX_EBOUND)? QSE_AWK_EREXBOUND: \ (err == QSE_REX_EEND)? QSE_AWK_EREXEND: \ (err == QSE_REX_EGARBAGE)? QSE_AWK_EREXGARBAGE: \ QSE_AWK_EINTERN) diff --git a/qse/lib/awk/std.c b/qse/lib/awk/std.c index 8cf3e4ef..494ed77b 100644 --- a/qse/lib/awk/std.c +++ b/qse/lib/awk/std.c @@ -1,5 +1,5 @@ /* - * $Id: std.c 291 2009-09-21 13:28:18Z hyunghwan.chung $ + * $Id: std.c 306 2009-11-22 13:58:53Z hyunghwan.chung $ * Copyright 2006-2009 Chung, Hyung-Hwan. This file is part of QSE. @@ -1224,11 +1224,7 @@ static int fnc_math_1 ( r = qse_awk_rtx_makerealval (run, rf(rv)); } - if (r == QSE_NULL) - { - qse_awk_rtx_seterrnum (run, QSE_AWK_ENOMEM, QSE_NULL); - return -1; - } + if (r == QSE_NULL) return -1; qse_awk_rtx_setretval (run, r); return 0; @@ -1277,11 +1273,7 @@ static int fnc_math_2 ( r = qse_awk_rtx_makerealval (run, rf(rv0,rv1)); } - if (r == QSE_NULL) - { - qse_awk_rtx_seterrnum (run, QSE_AWK_ENOMEM, QSE_NULL); - return -1; - } + if (r == QSE_NULL) return -1; qse_awk_rtx_setretval (run, r); return 0; @@ -1434,11 +1426,7 @@ static int fnc_int (qse_awk_rtx_t* run, const qse_cstr_t* fnm) if (n == 1) lv = (qse_long_t)rv; r = qse_awk_rtx_makeintval (run, lv); - if (r == QSE_NULL) - { - qse_awk_rtx_seterrnum (run, QSE_AWK_ENOMEM, QSE_NULL); - return -1; - } + if (r == QSE_NULL) return -1; qse_awk_rtx_setretval (run, r); return 0; @@ -1456,11 +1444,7 @@ static int fnc_rand (qse_awk_rtx_t* run, const qse_cstr_t* fnm) */ r = qse_awk_rtx_makerealval ( run, (qse_real_t)(rand() % RAND_MAX) / RAND_MAX); - if (r == QSE_NULL) - { - qse_awk_rtx_seterrnum (run, QSE_AWK_ENOMEM, QSE_NULL); - return -1; - } + if (r == QSE_NULL) return -1; qse_awk_rtx_setretval (run, r); return 0; @@ -1504,11 +1488,7 @@ static int fnc_srand (qse_awk_rtx_t* run, const qse_cstr_t* fnm) srand (rxtn->seed); r = qse_awk_rtx_makeintval (run, prev); - if (r == QSE_NULL) - { - qse_awk_rtx_seterrnum (run, QSE_AWK_ENOMEM, QSE_NULL); - return -1; - } + if (r == QSE_NULL) return -1; qse_awk_rtx_setretval (run, r); return 0; diff --git a/qse/lib/cmn/rex.c b/qse/lib/cmn/rex.c index e2616ad8..6baf61a0 100644 --- a/qse/lib/cmn/rex.c +++ b/qse/lib/cmn/rex.c @@ -1,5 +1,5 @@ /* - * $Id: rex.c 304 2009-11-20 05:12:27Z hyunghwan.chung $ + * $Id: rex.c 306 2009-11-22 13:58:53Z hyunghwan.chung $ * Copyright 2006-2009 Chung, Hyung-Hwan. This file is part of QSE. @@ -962,7 +962,7 @@ what if it is not in the raight format? convert it to ordinary characters?? */ if (cmd->lbound > cmd->ubound) { /* invalid occurrences range */ - builder->errnum = QSE_REX_EBRANGE; + builder->errnum = QSE_REX_EBOUND; return -1; } diff --git a/qse/lib/cmn/rex1.c b/qse/lib/cmn/rex1.c index 55b1d713..4d43c29b 100644 --- a/qse/lib/cmn/rex1.c +++ b/qse/lib/cmn/rex1.c @@ -144,8 +144,8 @@ static void freenode (qse_rex_node_t* node, qse_mmgr_t* mmgr) { if (node->id == QSE_REX_NODE_CHARSET) { - // TODO: - QSE_MMGR_FREE (mmgr, node->u.cs); + if (node->u.cs != QSE_NULL) + QSE_MMGR_FREE (mmgr, node->u.cs); } QSE_MMGR_FREE (mmgr, node); @@ -288,8 +288,13 @@ static qse_rex_node_t* newbranchnode ( #define IS_ESC(com) ((com)->c.escaped) #define IS_EOF(com) ((com)->c.value == QSE_CHAR_EOF) -static int getc (comp_t* com) +#define getc_noesc(c) getc(c,1) +#define getc_esc(c) getc(c,0) + +static int getc (comp_t* com, int noesc) { + qse_char_t c; + if (com->ptr >= com->end) { com->c.value = QSE_CHAR_EOF; @@ -298,110 +303,108 @@ static int getc (comp_t* com) } com->c.value = *com->ptr++; - com->c.escaped = QSE_FALSE; + com->c.escaped = 0; - if (com->c.value == QSE_T('\\')) - { - qse_char_t c; + if (noesc || com->c.value != QSE_T('\\')) return 0; + + + CHECK_END (com); + c = *com->ptr++; + + if (c == QSE_T('n')) c = QSE_T('\n'); + else if (c == QSE_T('r')) c = QSE_T('\r'); + else if (c == QSE_T('t')) c = QSE_T('\t'); + else if (c == QSE_T('f')) c = QSE_T('\f'); + else if (c == QSE_T('b')) c = QSE_T('\b'); + else if (c == QSE_T('v')) c = QSE_T('\v'); + else if (c == QSE_T('a')) c = QSE_T('\a'); + else if (c >= QSE_T('0') && c <= QSE_T('7')) + { + qse_char_t cx; + + c = c - QSE_T('0'); CHECK_END (com); - c = *com->ptr++; - - if (c == QSE_T('n')) c = QSE_T('\n'); - else if (c == QSE_T('r')) c = QSE_T('\r'); - else if (c == QSE_T('t')) c = QSE_T('\t'); - else if (c == QSE_T('f')) c = QSE_T('\f'); - else if (c == QSE_T('b')) c = QSE_T('\b'); - else if (c == QSE_T('v')) c = QSE_T('\v'); - else if (c == QSE_T('a')) c = QSE_T('\a'); - else if (c >= QSE_T('0') && c <= QSE_T('7')) + cx = *com->ptr++; + if (cx >= QSE_T('0') && cx <= QSE_T('7')) { - qse_char_t cx; - - c = c - QSE_T('0'); + c = c * 8 + cx - QSE_T('0'); CHECK_END (com); cx = *com->ptr++; if (cx >= QSE_T('0') && cx <= QSE_T('7')) { c = c * 8 + cx - QSE_T('0'); - - CHECK_END (com); - cx = *com->ptr++; - if (cx >= QSE_T('0') && cx <= QSE_T('7')) - { - c = c * 8 + cx - QSE_T('0'); - } } } - else if (c == QSE_T('x')) - { - qse_char_t cx; - - CHECK_END (com); - cx = *com->ptr++; - if (IS_HEX(cx)) - { - c = HEX_TO_NUM(cx); - - CHECK_END (com); - cx = *com->ptr++; - if (IS_HEX(cx)) - { - c = c * 16 + HEX_TO_NUM(cx); - } - } - } - #ifdef QSE_CHAR_IS_WCHAR - else if (c == QSE_T('u') && QSE_SIZEOF(qse_char_t) >= 2) - { - qse_char_t cx; - - CHECK_END (com); - cx = *com->ptr++; - if (IS_HEX(cx)) - { - qse_size_t i; - - c = HEX_TO_NUM(cx); - - for (i = 0; i < 3; i++) - { - CHECK_END (com); - cx = *com->ptr++; - - if (!IS_HEX(cx)) break; - c = c * 16 + HEX_TO_NUM(cx); - } - } - } - else if (c == QSE_T('U') && QSE_SIZEOF(qse_char_t) >= 4) - { - qse_char_t cx; - - CHECK_END (com); - cx = *com->ptr++; - if (IS_HEX(cx)) - { - qse_size_t i; - - c = HEX_TO_NUM(cx); - - for (i = 0; i < 7; i++) - { - CHECK_END (com); - cx = *com->ptr++; - - if (!IS_HEX(cx)) break; - c = c * 16 + HEX_TO_NUM(cx); - } - } - } - #endif - - com->c.value = c; - com->c.escaped = QSE_TRUE; } + else if (c == QSE_T('x')) + { + qse_char_t cx; + + CHECK_END (com); + cx = *com->ptr++; + if (IS_HEX(cx)) + { + c = HEX_TO_NUM(cx); + + CHECK_END (com); + cx = *com->ptr++; + if (IS_HEX(cx)) + { + c = c * 16 + HEX_TO_NUM(cx); + } + } + } +#ifdef QSE_CHAR_IS_WCHAR + else if (c == QSE_T('u') && QSE_SIZEOF(qse_char_t) >= 2) + { + qse_char_t cx; + + CHECK_END (com); + cx = *com->ptr++; + if (IS_HEX(cx)) + { + qse_size_t i; + + c = HEX_TO_NUM(cx); + + for (i = 0; i < 3; i++) + { + CHECK_END (com); + cx = *com->ptr++; + + if (!IS_HEX(cx)) break; + c = c * 16 + HEX_TO_NUM(cx); + } + } + } + else if (c == QSE_T('U') && QSE_SIZEOF(qse_char_t) >= 4) + { + qse_char_t cx; + + CHECK_END (com); + cx = *com->ptr++; + if (IS_HEX(cx)) + { + qse_size_t i; + + c = HEX_TO_NUM(cx); + + for (i = 0; i < 7; i++) + { + CHECK_END (com); + cx = *com->ptr++; + + if (!IS_HEX(cx)) break; + c = c * 16 + HEX_TO_NUM(cx); + } + } + } +#endif + + com->c.value = c; + com->c.escaped = QSE_TRUE; #if 0 com->c = (com->ptr < com->end)? *com->ptr++: QSE_CHAR_EOF; @@ -412,6 +415,162 @@ else qse_printf (QSE_T("getc => %c\n"), com->c); return 0; } +#if 0 +static int charclass (comp_t* builder, qse_char_t* cc) +{ + const struct __char_class_t* ccp = __char_class; + qse_size_t len = builder->ptn.end - builder->ptn.curp; + + while (ccp->name != QSE_NULL) + { + if (__begin_with (builder->ptn.curp, len, ccp->name)) break; + ccp++; + } + + if (ccp->name == QSE_NULL) + { + /* wrong class name */ + #ifdef DEBUG_REX + DPUTS (QSE_T("build_atom_cclass: wrong class name\n")); + #endif + builder->errnum = QSE_REX_ECCLASS; + return -1; + } + + builder->ptn.curp += ccp->name_len; + + NEXT_CHAR (builder, LEVEL_CHARSET); + if (builder->ptn.curc.type != CT_NORMAL || + builder->ptn.curc.value != QSE_T(':')) + { + #ifdef DEBUG_REX + DPUTS (QSE_T("build_atom_cclass: a colon(:) expected\n")); + #endif + builder->errnum = QSE_REX_ECOLON; + return -1; + } + + NEXT_CHAR (builder, LEVEL_CHARSET); + + /* ] happens to be the charset ender ] */ + if (builder->ptn.curc.type != CT_SPECIAL || + builder->ptn.curc.value != QSE_T(']')) + { + #ifdef DEBUG_REX + DPUTS (QSE_T("build_atom_cclass: ] expected\n")); + #endif + builder->errnum = QSE_REX_ERBRACKET; + return -1; + } + + NEXT_CHAR (builder, LEVEL_CHARSET); + + *cc = (qse_char_t)(ccp - __char_class); + return 1; +} +#endif + +static int charset (comp_t* c, qse_rex_node_t* node) +{ + qse_size_t zero = 0; + qse_size_t old_size; + qse_size_t pos_csc; + + if (c->c.value == QSE_T('^')) + { + //cmd->negate = 1; + //TODO: negate... + if (getc_noesc(c) <= -1) return -1; + } + + /* if ] is the first character or the second character following ^, + * it is treated literally */ + + do + { + qse_char_t c1, c2; + + c1 = c->c.value; + if (getc_noesc(c) <= -1) return -1; + c2 = c->c.value; + + if (c1 == QSE_T('[') && c2 == QSE_T(':')) + { + /* begins with [: */ + if (getc_noesc(c) <= -1) return -1; + //if (charclass (c) <= -1) return -1; + } + else if (c2 == QSE_T('-')) + { + if (getc_noesc(c) <= -1) return -1; + //add c->c.value; +qse_printf (QSE_T("[%c-%c]\n"), c1, c->c.value); + if (getc_noesc(c) <= -1) return -1; + } + else + { + //add c1; +qse_printf (QSE_T("[%c]\n"), c1); + } + } + while (c->c.value != QSE_T(']')); + + if (getc_esc(c) <= -1) return -1; + return 0; +} + +static int occbound (comp_t* c, qse_rex_node_t* n) +{ + qse_size_t bound; + + bound = 0; + while (c->c.value >= QSE_T('0') && c->c.value <= QSE_T('9')) + { + bound = bound * 10 + c->c.value - QSE_T('0'); + if (getc_noesc(c) <= -1) return -1; + } + + n->occ.min = bound; + + if (c->c.value == QSE_T(',')) + { + if (getc_noesc(c) <= -1) return -1; + + if (c->c.value >= QSE_T('0') && c->c.value <= QSE_T('9')) + { + bound = 0; + + do + { + bound = bound * 10 + c->c.value - QSE_T('0'); + if (getc_noesc(c) <= -1) return -1; + } + while (c->c.value >= QSE_T('0') && + c->c.value <= QSE_T('9')); + + n->occ.max = bound; + } + else n->occ.max = OCC_MAX; + } + else n->occ.max = n->occ.min; + + if (n->occ.min > n->occ.min) + { + /* invalid occurrences range */ + c->rex->errnum = QSE_REX_EBOUND; + return -1; + } + + if (c->c.value != QSE_T('}')) + { + c->rex->errnum = QSE_REX_ERBRACE; + return -1; + } + + if (getc_esc(c) <= -1) return -1; + return 0; +} + static qse_rex_node_t* comp0 (comp_t* c, qse_rex_node_t* ge); static qse_rex_node_t* comp2 (comp_t* c) @@ -434,7 +593,7 @@ static qse_rex_node_t* comp2 (comp_t* c) ge = newgroupendnode (c, n); if (ge == QSE_NULL) return QSE_NULL; - if (getc(c) <= -1) return QSE_NULL; + if (getc_esc(c) <= -1) return QSE_NULL; c->gdepth++; x = comp0 (c, ge); @@ -442,12 +601,12 @@ static qse_rex_node_t* comp2 (comp_t* c) if (!IS_SPE(c,QSE_T(')'))) { - c->rex->errnum = QSE_REX_EUNBALPAREN; + c->rex->errnum = QSE_REX_ERPAREN; return QSE_NULL; } c->gdepth--; - if (getc(c) <= -1) return QSE_NULL; + if (getc_esc(c) <= -1) return QSE_NULL; n->u.g.head = x; break; @@ -457,26 +616,28 @@ static qse_rex_node_t* comp2 (comp_t* c) case QSE_T('.'): n = newnode (c, QSE_REX_NODE_ANYCHAR); if (n == QSE_NULL) return QSE_NULL; - if (getc(c) <= -1) return QSE_NULL; + if (getc_esc(c) <= -1) return QSE_NULL; break; case QSE_T('^'): n = newnode (c, QSE_REX_NODE_BOL); if (n == QSE_NULL) return QSE_NULL; - if (getc(c) <= -1) return QSE_NULL; + if (getc_esc(c) <= -1) return QSE_NULL; break; case QSE_T('$'): n = newnode (c, QSE_REX_NODE_EOL); if (n == QSE_NULL) return QSE_NULL; - if (getc(c) <= -1) return QSE_NULL; + if (getc_esc(c) <= -1) return QSE_NULL; break; - - /* case QSE_T('['): + n = newnode (c, QSE_REX_NODE_CHARSET); + if (n == QSE_NULL) return QSE_NULL; + + if (getc_noesc(c) <= -1) return QSE_NULL; + if (charset(c, n) <= -1) return QSE_NULL; break; - */ default: goto normal_char; @@ -488,7 +649,7 @@ static qse_rex_node_t* comp2 (comp_t* c) /* normal character */ n = newcharnode (c, c->c.value); if (n == QSE_NULL) return QSE_NULL; - if (getc(c) <= -1) return QSE_NULL; + if (getc_esc(c) <= -1) return QSE_NULL; } n->occ.min = 1; @@ -503,30 +664,28 @@ static qse_rex_node_t* comp2 (comp_t* c) case QSE_T('?'): n->occ.min = 0; n->occ.max = 1; - if (getc(c) <= -1) return QSE_NULL; + if (getc_esc(c) <= -1) return QSE_NULL; break; case QSE_T('*'): n->occ.min = 0; n->occ.max = OCC_MAX; - if (getc(c) <= -1) return QSE_NULL; + if (getc_esc(c) <= -1) return QSE_NULL; break; case QSE_T('+'): n->occ.min = 1; n->occ.max = OCC_MAX; - if (getc(c) <= -1) return QSE_NULL; + if (getc_esc(c) <= -1) return QSE_NULL; break; - /* case QSE_T('{'): - // TODO: - if (!(com->rex->option & QSE_REX_NOBOUND)) + if (!(c->rex->option & QSE_REX_NOBOUND)) { + if (getc_noesc(c) <= -1) return QSE_NULL; + if (occbound(c,n) <= -1) return QSE_NULL; } break; - */ - } } @@ -568,7 +727,7 @@ static qse_rex_node_t* comp0 (comp_t* c, qse_rex_node_t* ge) while (IS_SPE(c,QSE_T('|'))) { - if (getc(c) <= -1) + if (getc_esc(c) <= -1) { //freere (left); return QSE_NULL; @@ -614,7 +773,7 @@ qse_rex_node_t* qse_rex_comp ( c.start = QSE_NULL; /* read the first character */ - if (getc(&c) <= -1) return QSE_NULL; + if (getc_esc(&c) <= -1) return QSE_NULL; c.start = newstartnode (&c); if (c.start == QSE_NULL) return QSE_NULL;