diff --git a/qse/include/qse/awk/awk.h b/qse/include/qse/awk/awk.h index eab6718b..6b53c473 100644 --- a/qse/include/qse/awk/awk.h +++ b/qse/include/qse/awk/awk.h @@ -1,5 +1,5 @@ /* - * $Id: awk.h 307 2009-11-25 13:32:20Z hyunghwan.chung $ + * $Id: awk.h 311 2009-12-09 11:35:54Z hyunghwan.chung $ * Copyright 2006-2009 Chung, Hyung-Hwan. This file is part of QSE. @@ -791,14 +791,12 @@ enum qse_awk_errnum_t QSE_AWK_EREXRPAREN, /**< a right parenthesis is expected */ QSE_AWK_EREXRBRACK, /**< a right bracket is expected */ QSE_AWK_EREXRBRACE, /**< a right brace is expected */ - QSE_AWK_EREXUNBALPAREN,/**< unbalanced parenthesis */ - QSE_AWK_EREXINVALBRACE,/**< invalid brace */ QSE_AWK_EREXCOLON, /**< a colon is expected */ QSE_AWK_EREXCRANGE, /**< invalid character range */ QSE_AWK_EREXCCLASS, /**< invalid character class */ QSE_AWK_EREXBOUND, /**< invalid occurrence bound */ + QSE_AWK_EREXSPCAWP, /**< special character at wrong position */ QSE_AWK_EREXPREEND, /**< premature end of regular expression */ - QSE_AWK_EREXGARBAGE, /**< garbage after pattern */ /* the number of error numbers, internal use only */ QSE_AWK_NUMERRNUM diff --git a/qse/include/qse/cmn/lda.h b/qse/include/qse/cmn/lda.h index 63dd50e1..e41c260e 100644 --- a/qse/include/qse/cmn/lda.h +++ b/qse/include/qse/cmn/lda.h @@ -1,5 +1,5 @@ /* - * $Id: lda.h 287 2009-09-15 10:01:02Z hyunghwan.chung $ + * $Id: lda.h 311 2009-12-09 11:35:54Z hyunghwan.chung $ * Copyright 2006-2009 Chung, Hyung-Hwan. This file is part of QSE. @@ -87,7 +87,7 @@ typedef enum qse_lda_walk_t qse_lda_walk_t; * SYNOPSIS */ typedef void* (*qse_lda_copier_t) ( - qse_lda_t* lda /* a lda */, + qse_lda_t* lda /* lda */, void* dptr /* the pointer to data to copy */, qse_size_t dlen /* the length of data to copy */ ); @@ -99,7 +99,7 @@ typedef void* (*qse_lda_copier_t) ( * SYNOPSIS */ typedef void (*qse_lda_freeer_t) ( - qse_lda_t* lda /* a lda */, + qse_lda_t* lda /* lda */, void* dptr /* the pointer to data to free */, qse_size_t dlen /* the length of data to free */ ); @@ -141,7 +141,7 @@ typedef int (*qse_lda_comper_t) ( * SYNOPSIS */ typedef void (*qse_lda_keeper_t) ( - qse_lda_t* lda /* a lda */, + qse_lda_t* lda /* lda */, void* vptr /* the pointer to a value */, qse_size_t vlen /* the length of a value */ ); @@ -259,72 +259,63 @@ void qse_lda_fini ( /******/ int qse_lda_getscale ( - qse_lda_t* lda /* a lda */ + qse_lda_t* lda /* lda */ ); -/****f* Common/qse_lda_setscale - * NAME - * qse_lda_setscale - set the scale factor - * - * DESCRIPTION - * The qse_lda_setscale() function sets the scale factor of the length - * of a key and a value. A scale factor determines the actual length of - * a key and a value in bytes. A lda is created with a scale factor of 1. - * The scale factor should be larger than 0 and less than 256. - * - * NOTES - * It is a bad idea to change the scale factor when a lda is not empty. - * - * SYNOPSIS +/** + * The qse_lda_setscale() function sets the scale factor of the length + * of a key and a value. A scale factor determines the actual length of + * a key and a value in bytes. A lda is created with a scale factor of 1. + * The scale factor should be larger than 0 and less than 256. + * It is a bad idea to change the scale factor when @a lda is not empty. */ void qse_lda_setscale ( - qse_lda_t* lda /* a lda */, + qse_lda_t* lda /* lda */, int scale /* a scale factor */ ); -/******/ qse_lda_copier_t qse_lda_getcopier ( - qse_lda_t* lda /* a lda */ + qse_lda_t* lda /* lda */ ); -/****f* Common/qse_lda_setcopier - * NAME - * qse_lda_setcopier - specify how to clone an element - * - * DESCRIPTION - * A special copier QSE_LDA_COPIER_INLINE is provided. This copier enables - * you to copy the data inline to the internal node. No freeer is invoked - * when the node is freeed. - * - * You may set the copier to QSE_NULL to perform no special operation - * when the data pointer is rememebered. - * - * SYNOPSIS +/** + * The qse_lda_setcopier() specifies how to clone an element. The special + * copier #QSE_LDA_COPIER_INLINE copies the data inline to the internal node. + * No freeer is invoked when the node is freeed. You may set the copier to + * #QSE_LDA_COPIER_SIMPLE to perform no special operation when the data + * pointer is stored. */ void qse_lda_setcopier ( - qse_lda_t* lda /* a lda */, - qse_lda_copier_t copier /* an element copier */ + qse_lda_t* lda /** lda */, + qse_lda_copier_t copier /** element copier */ ); -/******/ qse_lda_freeer_t qse_lda_getfreeer ( - qse_lda_t* lda /* a lda */ + qse_lda_t* lda /**< lda */ ); -/****f* Common/qse_lda_setfreeer - * NAME - * qse_lda_setfreeer - specify how to destroy an element - * - * DESCRIPTION - * The freeer is called when a node containing the element is destroyed. - * - * SYNOPSIS +/** + * The qse_lda_setfreeer() function specifies how to destroy an element. + * The @a freeer is called when a node containing the element is destroyed. */ void qse_lda_setfreeer ( - qse_lda_t* lda /* a lda */, - qse_lda_freeer_t freeer /* an element freeer */ + qse_lda_t* lda /**< lda */, + qse_lda_freeer_t freeer /**< element freeer */ +); + +qse_lda_comper_t qse_lda_getcomper ( + qse_lda_t* lda /**< lda */ +); + +/** + * The qse_lda_setcomper() function specifies how to compare two elements + * for equality test. The comparator @a comper must return 0 if two elements + * compared are equal, or a non-zero number otherwise. + */ +void qse_lda_setcomper ( + qse_lda_t* lda /**< lda */, + qse_lda_comper_t comper /**< comparator */ ); -/******/ qse_lda_keeper_t qse_lda_getkeeper ( qse_lda_t* lda diff --git a/qse/include/qse/cmn/rex.h b/qse/include/qse/cmn/rex.h index 1e3fb4d7..5d6fc788 100644 --- a/qse/include/qse/cmn/rex.h +++ b/qse/include/qse/cmn/rex.h @@ -1,5 +1,5 @@ /* - * $Id: rex.h 310 2009-12-08 13:15:00Z hyunghwan.chung $ + * $Id: rex.h 311 2009-12-09 11:35:54Z hyunghwan.chung $ * Copyright 2006-2009 Chung, Hyung-Hwan. This file is part of QSE. @@ -66,12 +66,14 @@ enum qse_rex_option_t { - QSE_REX_BUILD_NOBOUND = (1 << 0), + QSE_REX_BUILD_NOBOUND = (1 << 1), QSE_REX_MATCH_IGNORECASE = (1 << 8), + /**< do not allow a special character at normal character position */ + QSE_REX_STRICT = (1 << 0), /**< do not support the {n,m} style occurrence specifier */ - QSE_REX_NOBOUND = (1 << 0), + QSE_REX_NOBOUND = (1 << 1), #if 0 QSE_REX_ESQ_HEX = (1 << 1), /* \xhh and \uhhhh */ @@ -92,14 +94,12 @@ enum qse_rex_errnum_t QSE_REX_ERPAREN, /**< right parenthesis expected */ QSE_REX_ERBRACK, /**< right bracket expected */ QSE_REX_ERBRACE, /**< right brace expected */ - QSE_REX_EUNBALPAREN, /**< unbalanced parenthesis */ - QSE_REX_EINVALBRACE, /**< invalid brace */ QSE_REX_ECOLON, /**< colon expected */ QSE_REX_ECRANGE, /**< invalid character range */ QSE_REX_ECCLASS, /**< invalid character class */ QSE_REX_EBOUND, /**< invalid occurrence bound */ - QSE_REX_EPREEND, /**< premature expression end */ - QSE_REX_EGARBAGE /**< garbage after expression */ + QSE_REX_ESPCAWP, /**< special character at wrong position */ + QSE_REX_EPREEND /**< premature expression end */ }; typedef enum qse_rex_errnum_t qse_rex_errnum_t; diff --git a/qse/lib/awk/err.c b/qse/lib/awk/err.c index c75fe0fd..951f0514 100644 --- a/qse/lib/awk/err.c +++ b/qse/lib/awk/err.c @@ -1,5 +1,5 @@ /* - * $Id: err.c 307 2009-11-25 13:32:20Z hyunghwan.chung $ + * $Id: err.c 311 2009-12-09 11:35:54Z hyunghwan.chung $ * Copyright 2006-2009 Chung, Hyung-Hwan. This file is part of QSE. @@ -143,14 +143,12 @@ const qse_char_t* qse_awk_dflerrstr (qse_awk_t* awk, qse_awk_errnum_t errnum) QSE_T("right parenthesis expected in regular expression"), QSE_T("right bracket expected in regular expression"), QSE_T("right brace expected in regular expression"), - QSE_T("unbalanced parenthesis in regular expression"), - QSE_T("invalid brace in regular expression"), QSE_T("colon expected in regular expression"), QSE_T("invalid character range in regular expression"), QSE_T("invalid character class in regular expression"), QSE_T("invalid occurrence bound in regular expression"), - QSE_T("premature end of regular expression"), - QSE_T("garbage after regular expression") + QSE_T("special character at wrong position"), + QSE_T("premature end of regular expression") }; return (errnum >= 0 && errnum < QSE_COUNTOF(errstr))? diff --git a/qse/lib/awk/misc.c b/qse/lib/awk/misc.c index 35e941ac..0a69f9c5 100644 --- a/qse/lib/awk/misc.c +++ b/qse/lib/awk/misc.c @@ -1,5 +1,5 @@ /* - * $Id: misc.c 307 2009-11-25 13:32:20Z hyunghwan.chung $ + * $Id: misc.c 311 2009-12-09 11:35:54Z hyunghwan.chung $ * Copyright 2006-2009 Chung, Hyung-Hwan. This file is part of QSE. @@ -1035,14 +1035,12 @@ qse_char_t* qse_awk_rtx_strxnfld ( (err == QSE_REX_ERPAREN)? QSE_AWK_EREXRPAREN: \ (err == QSE_REX_ERBRACK)? QSE_AWK_EREXRBRACK: \ (err == QSE_REX_ERBRACE)? QSE_AWK_EREXRBRACE: \ - (err == QSE_REX_EUNBALPAREN)? QSE_AWK_EREXUNBALPAREN: \ - (err == QSE_REX_EINVALBRACE)? QSE_AWK_EREXINVALBRACE: \ (err == QSE_REX_ECOLON)? QSE_AWK_EREXCOLON: \ (err == QSE_REX_ECRANGE)? QSE_AWK_EREXCRANGE: \ (err == QSE_REX_ECCLASS)? QSE_AWK_EREXCCLASS: \ (err == QSE_REX_EBOUND)? QSE_AWK_EREXBOUND: \ + (err == QSE_REX_ESPCAWP)? QSE_AWK_EREXSPCAWP: \ (err == QSE_REX_EPREEND)? QSE_AWK_EREXPREEND: \ - (err == QSE_REX_EGARBAGE)? QSE_AWK_EREXGARBAGE: \ QSE_AWK_EINTERN) void* qse_awk_buildrex ( diff --git a/qse/lib/cmn/rex.c b/qse/lib/cmn/rex.c index fcb9a362..e5d78d68 100644 --- a/qse/lib/cmn/rex.c +++ b/qse/lib/cmn/rex.c @@ -1,5 +1,5 @@ /* - * $Id: rex.c 307 2009-11-25 13:32:20Z hyunghwan.chung $ + * $Id: rex.c 311 2009-12-09 11:35:54Z hyunghwan.chung $ * Copyright 2006-2009 Chung, Hyung-Hwan. This file is part of QSE. @@ -384,22 +384,7 @@ void* qse_buildrex ( if (builder.ptn.curc.type != CT_EOF) { if (errnum != QSE_NULL) - { - if (builder.ptn.curc.type == CT_SPECIAL && - builder.ptn.curc.value == QSE_T(')')) - { - *errnum = QSE_REX_EUNBALPAREN; - } - else if (builder.ptn.curc.type == CT_SPECIAL && - builder.ptn.curc.value == QSE_T('{')) - { - *errnum = QSE_REX_EINVALBRACE; - } - else - { - *errnum = QSE_REX_EGARBAGE; - } - } + *errnum = QSE_REX_ESPCAWP; QSE_MMGR_FREE (builder.mmgr, builder.code.buf); return QSE_NULL; diff --git a/qse/lib/cmn/rex1.c b/qse/lib/cmn/rex1.c index 458dca71..d91b5542 100644 --- a/qse/lib/cmn/rex1.c +++ b/qse/lib/cmn/rex1.c @@ -210,14 +210,12 @@ const qse_char_t* qse_rex_geterrmsg (qse_rex_t* rex) QSE_T("right parenthesis expected"), QSE_T("right bracket expected"), QSE_T("right brace expected"), - QSE_T("unbalanced parenthesis"), - QSE_T("invalid brace"), QSE_T("colon expected"), QSE_T("invalid character range"), QSE_T("invalid character class"), QSE_T("invalid occurrence bound"), - QSE_T("premature expression end"), - QSE_T("garbage after expression"), + QSE_T("special character at wrong position"), + QSE_T("premature expression end") }; return (rex->errnum >= 0 && rex->errnum < QSE_COUNTOF(errstr))? @@ -793,6 +791,20 @@ static qse_rex_node_t* comp2 (comp_t* com) break; default: + if (com->rex->option & QSE_REX_STRICT) + { + qse_char_t spc[] = QSE_T(")?*+{"); + + if (com->rex->option & QSE_REX_NOBOUND) + spc[4] = QSE_T('\0'); + + if (qse_strchr (spc, com->c.value) != QSE_NULL) + { + com->rex->errnum = QSE_REX_ESPCAWP; + return QSE_NULL; + } + } + goto normal_char; } } @@ -1202,13 +1214,21 @@ else qse_printf (QSE_T("adding %d NA\n"), node->id); */ -if (qse_lda_search ( - &e->cand.set[e->cand.pending], - 0, - &cand, 1) != QSE_LDA_NIL) -{ -return 0; -} + if (qse_lda_search ( + &e->cand.set[e->cand.pending], + 0, &cand, 1) != QSE_LDA_NIL) + { + /* exclude any existing entries in the array. + * see comp_cand() for the equality test used. + * note this linear search may be a performance bottle neck + * if the arrary grows large. not so sure if it should be + * switched to a different data structure such as a hash table. + * the problem is that most practical regular expressions + * won't have many candidates for a particular match point. + * so i'm a bit skeptical about data struct switching. + */ + return 0; + } if (qse_lda_insert ( &e->cand.set[e->cand.pending], @@ -1810,8 +1830,9 @@ static int comp_cand (qse_lda_t* lda, { cand_t* c1 = (cand_t*)dptr1; cand_t* c2 = (cand_t*)dptr2; - if (c1->node == c2->node) return 0; - return 1; + return (c1->node == c2->node && + c1->mptr == c2->mptr && + c1->occ == c2->occ)? 0: 1; } static int init_exec_dds (exec_t* e, qse_mmgr_t* mmgr) diff --git a/qse/samples/cmn/rex01.c b/qse/samples/cmn/rex01.c index 9b30f0c4..cd236042 100644 --- a/qse/samples/cmn/rex01.c +++ b/qse/samples/cmn/rex01.c @@ -26,6 +26,8 @@ static int rex_main (int argc, qse_char_t* argv[]) return -1; } +qse_rex_setoption (rex, QSE_REX_STRICT); + start = qse_rex_comp (rex, argv[1], qse_strlen(argv[1])); if (start == QSE_NULL) {