From b9e365f148c882a02f773bd64845ca918f3606cf Mon Sep 17 00:00:00 2001 From: hyung-hwan Date: Sat, 24 Sep 2011 08:52:22 +0000 Subject: [PATCH] added qse_sed_setexechook() added qse_strfnmat() and related functions --- qse/cmd/sed/sed.c | 23 ++ qse/include/qse/cmn/str.h | 142 +++++++- qse/include/qse/sed/sed.h | 28 +- qse/lib/cmn/Makefile.am | 1 + qse/lib/cmn/Makefile.in | 15 +- qse/lib/cmn/str-fnmat.c | 692 ++++++++++++++++++++++++++++++++++++++ qse/lib/cmn/str-pbrk.c | 110 +++++- qse/lib/sed/sed.c | 21 +- qse/lib/sed/sed.h | 7 +- qse/samples/cmn/str.c | 55 +++ 10 files changed, 1078 insertions(+), 16 deletions(-) create mode 100644 qse/lib/cmn/str-fnmat.c diff --git a/qse/cmd/sed/sed.c b/qse/cmd/sed/sed.c index 2e3c8e93..1ac2be24 100644 --- a/qse/cmd/sed/sed.c +++ b/qse/cmd/sed/sed.c @@ -429,6 +429,25 @@ static void unset_intr_run (void) #endif } +static void trace (qse_sed_t* sed, qse_sed_exec_op_t op, const qse_sed_cmd_t* cmd) +{ + switch (op) + { + case QSE_SED_EXEC_READ: + qse_printf (QSE_T("reading...\n")); + break; + case QSE_SED_EXEC_WRITE: + qse_printf (QSE_T("wrting...\n")); + break; + case QSE_SED_EXEC_MATCH: + qse_printf (QSE_T("matching...\n")); + break; + case QSE_SED_EXEC_EXEC: + qse_printf (QSE_T("executing...\n")); + break; + } +} + int sed_main (int argc, qse_char_t* argv[]) { qse_mmgr_t* mmgr = QSE_NULL; @@ -499,6 +518,10 @@ int sed_main (int argc, qse_char_t* argv[]) goto oops; } +#if 0 +if (g_trace) qse_sed_setexechook (sed, trace); +#endif + if (g_separate && g_infile_pos > 0) { qse_sed_iostd_t out; diff --git a/qse/include/qse/cmn/str.h b/qse/include/qse/cmn/str.h index 38484fd1..65dbf02a 100644 --- a/qse/include/qse/cmn/str.h +++ b/qse/include/qse/cmn/str.h @@ -1,5 +1,5 @@ /* - * $Id: str.h 556 2011-08-31 15:43:46Z hyunghwan.chung $ + * $Id: str.h 576 2011-09-23 14:52:22Z hyunghwan.chung $ * Copyright 2006-2011 Chung, Hyung-Hwan. This file is part of QSE. @@ -317,6 +317,34 @@ enum qse_wcstrmx_op_t # define QSE_STRTRMX_RIGHT QSE_WCSTRMX_RIGHT #endif +enum qse_mbsfnmat_flag_t +{ + QSE_MBSFNMAT_PATHNAME = (1 << 0), + QSE_MBSFNMAT_NOESCAPE = (1 << 1), + QSE_MBSFNMAT_PERIOD = (1 << 2), + QSE_MBSFNMAT_IGNORECASE = (1 << 3) +}; + +enum qse_wcsfnmat_flag_t +{ + QSE_WCSFNMAT_PATHNAME = (1 << 0), + QSE_WCSFNMAT_NOESCAPE = (1 << 1), + QSE_WCSFNMAT_PERIOD = (1 << 2), + QSE_WCSFNMAT_IGNORECASE = (1 << 3) +}; + +#ifdef QSE_CHAR_IS_MCHAR +# define QSE_STRFNMAT_PATHNAME QSE_MBSFNMAT_PATHNAME +# define QSE_STRFNMAT_NOESCAPE QSE_MBSFNMAT_NOESCAPE +# define QSE_STRFNMAT_PERIOD QSE_MBSFNMAT_PERIOD +# define QSE_STRFNMAT_IGNORECASE QSE_MBSFNMAT_IGNORECASE +#else +# define QSE_STRFNMAT_PATHNAME QSE_WCSFNMAT_PATHNAME +# define QSE_STRFNMAT_NOESCAPE QSE_WCSFNMAT_NOESCAPE +# define QSE_STRFNMAT_PERIOD QSE_WCSFNMAT_PERIOD +# define QSE_STRFNMAT_IGNORECASE QSE_WCSFNMAT_IGNORECASE +#endif + #ifdef __cplusplus extern "C" { #endif @@ -1489,6 +1517,23 @@ qse_mchar_t* qse_mbspbrk ( const qse_mchar_t* str2 ); +qse_mchar_t* qse_mbsxpbrk ( + const qse_mchar_t* str1, + qse_size_t len, + const qse_mchar_t* str2 +); + +qse_mchar_t* qse_mbsrpbrk ( + const qse_mchar_t* str1, + const qse_mchar_t* str2 +); + +qse_mchar_t* qse_mbsxrpbrk ( + const qse_mchar_t* str1, + qse_size_t len, + const qse_mchar_t* str2 +); + /* * The qse_wcspbrk() function searches @a str1 for the first occurrence of * a character in @a str2. @@ -1500,10 +1545,33 @@ qse_wchar_t* qse_wcspbrk ( const qse_wchar_t* str2 ); +qse_wchar_t* qse_wcsxpbrk ( + const qse_wchar_t* str1, + qse_size_t len, + const qse_wchar_t* str2 +); + +qse_wchar_t* qse_wcsrpbrk ( + const qse_wchar_t* str1, + const qse_wchar_t* str2 +); + +qse_wchar_t* qse_wcsxrpbrk ( + const qse_wchar_t* str1, + qse_size_t len, + const qse_wchar_t* str2 +); + #ifdef QSE_CHAR_IS_MCHAR -# define qse_strpbrk(str1,str2) qse_mbspbrk(str1,str2) +# define qse_strpbrk(str1,str2) qse_mbspbrk(str1,str2) +# define qse_strxpbrk(str1,len,str2) qse_mbsxpbrk(str1,len,str2) +# define qse_strrpbrk(str1,str2) qse_mbsrpbrk(str1,str2) +# define qse_strxrpbrk(str1,len,str2) qse_mbsxrpbrk(str1,len,str2) #else -# define qse_strpbrk(str1,str2) qse_wcspbrk(str1,str2) +# define qse_strpbrk(str1,str2) qse_wcspbrk(str1,str2) +# define qse_strxpbrk(str1,len,str2) qse_wcsxpbrk(str1,len,str2) +# define qse_strrpbrk(str1,str2) qse_wcsrpbrk(str1,str2) +# define qse_strxrpbrk(str1,len,str2) qse_wcsxrpbrk(str1,len,str2) #endif /* @@ -2101,6 +2169,74 @@ qse_size_t qse_wcsxpac ( # define qse_strxpac(str,len) qse_wcsxpac(str,len) #endif +int qse_mbsfnmat ( + const qse_mchar_t* str, + const qse_mchar_t* ptn, + int flags +); + +int qse_mbsxfnmat ( + const qse_mchar_t* str, + qse_size_t slen, + const qse_mchar_t* ptn, + int flags +); + +int qse_mbsnfnmat ( + const qse_mchar_t* str, + const qse_mchar_t* ptn, + qse_size_t plen, + int flags +); + +int qse_mbsxnfnmat ( + const qse_mchar_t* str, + qse_size_t slen, + const qse_mchar_t* ptn, + qse_size_t plen, + int flags +); + +int qse_wcsfnmat ( + const qse_wchar_t* str, + const qse_wchar_t* ptn, + int flags +); + +int qse_wcsxfnmat ( + const qse_wchar_t* str, + qse_size_t slen, + const qse_wchar_t* ptn, + int flags +); + +int qse_wcsnfnmat ( + const qse_wchar_t* str, + const qse_wchar_t* ptn, + qse_size_t plen, + int flags +); + +int qse_wcsxnfnmat ( + const qse_wchar_t* str, + qse_size_t slen, + const qse_wchar_t* ptn, + qse_size_t plen, + int flags +); + +#ifdef QSE_CHAR_IS_MCHAR +# define qse_strfnmat(str,ptn,flags) qse_mbsfnmat(str,ptn,flags) +# define qse_strxfnmat(str,slen,ptn,flags) qse_mbsxfnmat(str,slen,ptn,flags) +# define qse_strnfnmat(str,ptn,plen,flags) qse_mbsnfnmat(str,ptn,plen,flags) +# define qse_strxnfnmat(str,slen,ptn,plen,flags) qse_mbsxnfnmat(str,slen,ptn,plen,flags) +#else +# define qse_strfnmat(str,ptn,flags) qse_wcsfnmat(str,ptn,flags) +# define qse_strxfnmat(str,slen,ptn,flags) qse_wcsxfnmat(str,slen,ptn,flags) +# define qse_strnfnmat(str,ptn,plen,flags) qse_wcsnfnmat(str,ptn,plen,flags) +# define qse_strxnfnmat(str,slen,ptn,plen,flags) qse_wcsxnfnmat(str,slen,ptn,plen,flags) +#endif + /** * The qse_mbstowcslen() function scans a null-terminated multibyte string * to calculate the number of wide characters it can be converted to. diff --git a/qse/include/qse/sed/sed.h b/qse/include/qse/sed/sed.h index 73c29bc3..02118c10 100644 --- a/qse/include/qse/sed/sed.h +++ b/qse/include/qse/sed/sed.h @@ -1,5 +1,5 @@ /* - * $Id: sed.h 570 2011-09-20 04:40:45Z hyunghwan.chung $ + * $Id: sed.h 576 2011-09-23 14:52:22Z hyunghwan.chung $ * Copyright 2006-2011 Chung, Hyung-Hwan. This file is part of QSE. @@ -65,6 +65,8 @@ */ typedef struct qse_sed_t qse_sed_t; +typedef struct qse_sed_cmd_t qse_sed_cmd_t; + /** * The qse_sed_loc_t defines a structure to store location information. */ @@ -193,6 +195,21 @@ typedef int (*qse_sed_lformatter_t) ( int (*cwriter) (qse_sed_t*, qse_char_t) ); +enum qse_sed_exec_op_t +{ + QSE_SED_EXEC_READ, + QSE_SED_EXEC_WRITE, + QSE_SED_EXEC_MATCH, + QSE_SED_EXEC_EXEC +}; +typedef enum qse_sed_exec_op_t qse_sed_exec_op_t; + +typedef void (*qse_sed_exec_hook_t) ( + qse_sed_t* sed, + qse_sed_exec_op_t op, + const qse_sed_cmd_t* cmd +); + #ifdef __cplusplus extern "C" { #endif @@ -415,6 +432,15 @@ void qse_sed_setlinnum ( qse_size_t num /**< a line number */ ); +qse_sed_exec_hook_t qse_sed_getexechook ( + qse_sed_t* sed +); + +void qse_sed_setexechook ( + qse_sed_t* sed, + qse_sed_exec_hook_t hook +); + #ifdef __cplusplus } #endif diff --git a/qse/lib/cmn/Makefile.am b/qse/lib/cmn/Makefile.am index 424f116c..33eb8900 100644 --- a/qse/lib/cmn/Makefile.am +++ b/qse/lib/cmn/Makefile.am @@ -47,6 +47,7 @@ libqsecmn_la_SOURCES = \ str-end.c \ str-excl.c \ str-fcpy.c \ + str-fnmat.c \ str-incl.c \ str-len.c \ str-pac.c \ diff --git a/qse/lib/cmn/Makefile.in b/qse/lib/cmn/Makefile.in index 2896e30d..a394ba91 100644 --- a/qse/lib/cmn/Makefile.in +++ b/qse/lib/cmn/Makefile.in @@ -79,12 +79,13 @@ am_libqsecmn_la_OBJECTS = alg-search.lo alg-sort.lo assert.lo chr.lo \ rex.lo sio.lo sll.lo str-beg.lo str-cat.lo str-chr.lo \ str-cnv.lo str-cmp.lo str-cpy.lo str-del.lo str-dup.lo \ str-dynm.lo str-dynw.lo str-end.lo str-excl.lo str-fcpy.lo \ - str-incl.lo str-len.lo str-pac.lo str-pbrk.lo str-put.lo \ - str-rev.lo str-rot.lo str-set.lo str-spl.lo str-spn.lo \ - str-str.lo str-subst.lo str-tok.lo str-trm.lo str-word.lo \ - time.lo tio.lo tio-get.lo tio-put.lo tre.lo tre-ast.lo \ - tre-compile.lo tre-match-backtrack.lo tre-match-parallel.lo \ - tre-parse.lo tre-stack.lo stdio.lo xma.lo + str-fnmat.lo str-incl.lo str-len.lo str-pac.lo str-pbrk.lo \ + str-put.lo str-rev.lo str-rot.lo str-set.lo str-spl.lo \ + str-spn.lo str-str.lo str-subst.lo str-tok.lo str-trm.lo \ + str-word.lo time.lo tio.lo tio-get.lo tio-put.lo tre.lo \ + tre-ast.lo tre-compile.lo tre-match-backtrack.lo \ + tre-match-parallel.lo tre-parse.lo tre-stack.lo stdio.lo \ + xma.lo libqsecmn_la_OBJECTS = $(am_libqsecmn_la_OBJECTS) libqsecmn_la_LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) \ $(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \ @@ -312,6 +313,7 @@ libqsecmn_la_SOURCES = \ str-end.c \ str-excl.c \ str-fcpy.c \ + str-fnmat.c \ str-incl.c \ str-len.c \ str-pac.c \ @@ -469,6 +471,7 @@ distclean-compile: @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/str-end.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/str-excl.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/str-fcpy.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/str-fnmat.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/str-incl.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/str-len.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/str-pac.Plo@am__quote@ diff --git a/qse/lib/cmn/str-fnmat.c b/qse/lib/cmn/str-fnmat.c new file mode 100644 index 00000000..cdfe086b --- /dev/null +++ b/qse/lib/cmn/str-fnmat.c @@ -0,0 +1,692 @@ +/* + * $Id$ + * + Copyright 2006-2011 Chung, Hyung-Hwan. + This file is part of QSE. + + QSE is free software: you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as + published by the Free Software Foundation, either version 3 of + the License, or (at your option) any later version. + + QSE is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with QSE. If not, see . + */ + +#include +#include + +/* ------------------------------------------------------------------------- */ +/* | MBS VERSION | */ +/* ------------------------------------------------------------------------- */ + +/* separator to use in a pattern. + * this also matches a backslash in non-unix OS where a blackslash + * is used as a path separator. MBS_SEPCHS defines OS path separators. */ +#define MBS_SEP QSE_MT('/') +#define MBS_ESC QSE_MT('\\') +#define MBS_DOT QSE_MT('.') + +#if defined(_WIN32) || defined(__OS2__) || defined(__DOS__) +# define MBS_ISSEPCH(c) ((c) == QSE_MT('/') || (c) == QSE_MT('\\')) +# define MBS_SEPCHS QSE_MT("/\\") +#else +# define MBS_ISSEPCH(c) ((c) == QSE_MT('/')) +# define MBS_SEPCHS QSE_MT("/") +#endif + +static int __mbsxnfnmat ( + const qse_mchar_t* str, qse_size_t slen, + const qse_mchar_t* ptn, qse_size_t plen, int flags, int no_first_period) +{ + const qse_mchar_t* sp = str; + const qse_mchar_t* pp = ptn; + const qse_mchar_t* se = str + slen; + const qse_mchar_t* pe = ptn + plen; + qse_mchar_t sc, pc, pc2; + + while (1) + { + if (pp < pe && *pp == MBS_ESC && + !(flags & QSE_MBSFNMAT_NOESCAPE)) + { + /* pattern is escaped and escaping is allowed. */ + + if ((++pp) >= pe) + { + /* + * the last character of the pattern is an MBS_ESC. + * matching is performed as if the end of the pattern is + * reached just without an MBS_ESC. + */ + if (sp < se) return 0; + return 1; + } + + if (sp >= se) return 0; /* premature string termination */ + + sc = *sp; pc = *pp; /* pc is just a normal character */ + if ((flags & QSE_MBSFNMAT_IGNORECASE) != 0) + { + /* make characters to lower-case */ + sc = QSE_TOMLOWER(sc); + pc = QSE_TOMLOWER(pc); + } + + if (sc != pc) return 0; + sp++; pp++; + continue; + } + if (pp >= pe) + { + /* + * the end of the pattern has been reached. + * the string must terminate too. + */ + return sp >= se; + } + + if (sp >= se) + { + /* the string terminats prematurely */ + while (pp < pe && *pp == QSE_MT('*')) pp++; + return pp >= pe; + } + + sc = *sp; pc = *pp; + + if (sc == MBS_DOT && (flags & QSE_MBSFNMAT_PERIOD)) + { + /* + * a leading period in the staring must match + * a period in the pattern explicitly + */ + if ((!no_first_period && sp == str) || + (MBS_ISSEPCH(sp[-1]) && (flags & QSE_MBSFNMAT_PATHNAME))) + { + if (pc != MBS_DOT) return 0; + sp++; pp++; + continue; + } + } + else if (MBS_ISSEPCH(sc) && (flags & QSE_MBSFNMAT_PATHNAME)) + { + while (pc == QSE_MT('*')) + { + if ((++pp) >= pe) return 0; + pc = *pp; + } + + /* a path separator must be matched explicitly */ + if (pc != MBS_SEP) return 0; + sp++; pp++; + continue; + } + + /* the handling of special pattern characters begins here */ + if (pc == QSE_MT('?')) + { + /* match any single character */ + sp++; pp++; + } + else if (pc == QSE_MT('*')) + { + /* match zero or more characters */ + + /* compact asterisks */ + do { pp++; } while (pp < pe && *pp == QSE_MT('*')); + + if (pp >= pe) + { + /* + * if the last character in the pattern is an asterisk, + * the string should not have any directory separators + * when QSE_MBSFNMAT_PATHNAME is set. + */ + if (flags & QSE_MBSFNMAT_PATHNAME) + { + if (qse_mbsxpbrk(sp, se-sp, MBS_SEPCHS) != QSE_NULL) return 0; + } + return 1; + } + else + { + do + { + if (__mbsxnfnmat(sp, se - sp, pp, pe - pp, flags, 1)) + { + return 1; + } + + if (MBS_ISSEPCH(*sp) && + (flags & QSE_MBSFNMAT_PATHNAME)) break; + + sp++; + } + while (sp < se); + + return 0; + } + } + else if (pc == QSE_MT('[')) + { + /* match range */ + int negate = 0; + int matched = 0; + + if ((++pp) >= pe) return 0; + if (*pp == QSE_MT('!')) { negate = 1; pp++; } + + while (pp < pe && *pp != QSE_MT(']')) + { + if (*pp == QSE_MT('[')) + { + qse_size_t pl = pe - pp; + + if (pl >= 10) + { + if (qse_mbszcmp(pp, QSE_MT("[:xdigit:]"), 10) == 0) + { + matched = QSE_ISMXDIGIT(sc); + pp += 10; continue; + } + } + + if (pl >= 9) + { + if (qse_mbszcmp(pp, QSE_MT("[:upper:]"), 9) == 0) + { + matched = QSE_ISMUPPER(sc); + pp += 9; continue; + } + else if (qse_mbszcmp(pp, QSE_MT("[:lower:]"), 9) == 0) + { + matched = QSE_ISMLOWER(sc); + pp += 9; continue; + } + else if (qse_mbszcmp(pp, QSE_MT("[:alpha:]"), 9) == 0) + { + matched = QSE_ISMALPHA(sc); + pp += 9; continue; + } + else if (qse_mbszcmp(pp, QSE_MT("[:digit:]"), 9) == 0) + { + matched = QSE_ISMDIGIT(sc); + pp += 9; continue; + } + else if (qse_mbszcmp(pp, QSE_MT("[:alnum:]"), 9) == 0) + { + matched = QSE_ISMALNUM(sc); + pp += 9; continue; + } + else if (qse_mbszcmp(pp, QSE_MT("[:space:]"), 9) == 0) + { + matched = QSE_ISMSPACE(sc); + pp += 9; continue; + } + else if (qse_mbszcmp(pp, QSE_MT("[:print:]"), 9) == 0) + { + matched = QSE_ISMPRINT(sc); + pp += 9; continue; + } + else if (qse_mbszcmp(pp, QSE_MT("[:graph:]"), 9) == 0) + { + matched = QSE_ISMGRAPH(sc); + pp += 9; continue; + } + else if (qse_mbszcmp(pp, QSE_MT("[:cntrl:]"), 9) == 0) + { + matched = QSE_ISMCNTRL(sc); + pp += 9; continue; + } + else if (qse_mbszcmp(pp, QSE_MT("[:punct:]"), 9) == 0) + { + matched = QSE_ISMPUNCT(sc); + pp += 9; continue; + } + } + + /* + * characters in an invalid class name are + * just treated as normal characters + */ + } + + if (*pp == MBS_ESC && + !(flags & QSE_MBSFNMAT_NOESCAPE)) pp++; + else if (*pp == QSE_MT(']')) break; + + if (pp >= pe) break; + + pc = *pp; + if ((flags & QSE_MBSFNMAT_IGNORECASE) != 0) + { + sc = QSE_TOMLOWER(sc); + pc = QSE_TOMLOWER(pc); + } + + if (pp + 1 < pe && pp[1] == QSE_MT('-')) + { + pp += 2; /* move the a character next to a dash */ + + if (pp >= pe) + { + if (sc >= pc) matched = 1; + break; + } + + if (*pp == MBS_ESC && + !(flags & QSE_MBSFNMAT_NOESCAPE)) + { + if ((++pp) >= pe) + { + if (sc >= pc) matched = 1; + break; + } + } + else if (*pp == QSE_MT(']')) + { + if (sc >= pc) matched = 1; + break; + } + + pc2 = *pp; + if ((flags & QSE_MBSFNMAT_IGNORECASE) != 0) + pc2 = QSE_TOMLOWER(pc2); + + if (sc >= pc && sc <= pc2) matched = 1; + pp++; + } + else + { + if (sc == pc) matched = 1; + pp++; + } + } + + if (negate) matched = !matched; + if (!matched) return 0; + sp++; if (pp < pe) pp++; + } + else + { + /* a normal character */ + if ((flags & QSE_MBSFNMAT_IGNORECASE) != 0) + { + sc = QSE_TOMLOWER(sc); + pc = QSE_TOMLOWER(pc); + } + + if (sc != pc) return 0; + sp++; pp++; + } + } + + /* will never reach here. but make some immature compilers happy... */ + return 0; +} + +int qse_mbsfnmat (const qse_mchar_t* str, const qse_mchar_t* ptn, int flags) +{ + return __mbsxnfnmat ( + str, qse_mbslen(str), ptn, qse_mbslen(ptn), flags, 0); +} + +int qse_mbsxfnmat ( + const qse_mchar_t* str, qse_size_t slen, const qse_mchar_t* ptn, int flags) +{ + return __mbsxnfnmat (str, slen, ptn, qse_mbslen(ptn), flags, 0); +} + +int qse_mbsnfnmat ( + const qse_mchar_t* str, const qse_mchar_t* ptn, qse_size_t plen, int flags) +{ + return __mbsxnfnmat (str, qse_mbslen(str), ptn, plen, flags, 0); +} + +int qse_mbsxnfnmat ( + const qse_mchar_t* str, qse_size_t slen, + const qse_mchar_t* ptn, qse_size_t plen, int flags) +{ + return __mbsxnfnmat (str, slen, ptn, plen, flags, 0); +} + +/* ------------------------------------------------------------------------- */ +/* | WCS VERSION | */ +/* ------------------------------------------------------------------------- */ + +/* separator to use in a pattern. + * this also matches a backslash in non-unix OS where a blackslash + * is used as a path separator. WCS_SEPCHS defines OS path separators. */ +#define WCS_SEP QSE_WT('/') +#define WCS_ESC QSE_WT('\\') +#define WCS_DOT QSE_WT('.') + +#if defined(_WIN32) || defined(__OS2__) || defined(__DOS__) +# define WCS_ISSEPCH(c) ((c) == QSE_WT('/') || (c) == QSE_WT('\\')) +# define WCS_SEPCHS QSE_WT("/\\") +#else +# define WCS_ISSEPCH(c) ((c) == QSE_WT('/')) +# define WCS_SEPCHS QSE_WT("/") +#endif + +static int __wcsxnfnmat ( + const qse_wchar_t* str, qse_size_t slen, + const qse_wchar_t* ptn, qse_size_t plen, int flags, int no_first_period) +{ + const qse_wchar_t* sp = str; + const qse_wchar_t* pp = ptn; + const qse_wchar_t* se = str + slen; + const qse_wchar_t* pe = ptn + plen; + qse_wchar_t sc, pc, pc2; + + while (1) + { + if (pp < pe && *pp == WCS_ESC && + !(flags & QSE_WCSFNMAT_NOESCAPE)) + { + /* pattern is escaped and escaping is allowed. */ + + if ((++pp) >= pe) + { + /* + * the last character of the pattern is an WCS_ESC. + * matching is performed as if the end of the pattern is + * reached just without an WCS_ESC. + */ + if (sp < se) return 0; + return 1; + } + + if (sp >= se) return 0; /* premature string termination */ + + sc = *sp; pc = *pp; /* pc is just a normal character */ + if ((flags & QSE_WCSFNMAT_IGNORECASE) != 0) + { + /* make characters to lower-case */ + sc = QSE_TOWLOWER(sc); + pc = QSE_TOWLOWER(pc); + } + + if (sc != pc) return 0; + sp++; pp++; + continue; + } + if (pp >= pe) + { + /* + * the end of the pattern has been reached. + * the string must terminate too. + */ + return sp >= se; + } + + if (sp >= se) + { + /* the string terminats prematurely */ + while (pp < pe && *pp == QSE_WT('*')) pp++; + return pp >= pe; + } + + sc = *sp; pc = *pp; + + if (sc == WCS_DOT && (flags & QSE_WCSFNMAT_PERIOD)) + { + /* + * a leading period in the staring must match + * a period in the pattern explicitly + */ + if ((!no_first_period && sp == str) || + (WCS_ISSEPCH(sp[-1]) && (flags & QSE_WCSFNMAT_PATHNAME))) + { + if (pc != WCS_DOT) return 0; + sp++; pp++; + continue; + } + } + else if (WCS_ISSEPCH(sc) && (flags & QSE_WCSFNMAT_PATHNAME)) + { + while (pc == QSE_WT('*')) + { + if ((++pp) >= pe) return 0; + pc = *pp; + } + + /* a path separator must be matched explicitly */ + if (pc != WCS_SEP) return 0; + sp++; pp++; + continue; + } + + /* the handling of special pattern characters begins here */ + if (pc == QSE_WT('?')) + { + /* match any single character */ + sp++; pp++; + } + else if (pc == QSE_WT('*')) + { + /* match zero or more characters */ + + /* compact asterisks */ + do { pp++; } while (pp < pe && *pp == QSE_WT('*')); + + if (pp >= pe) + { + /* + * if the last character in the pattern is an asterisk, + * the string should not have any directory separators + * when QSE_WCSFNMAT_PATHNAME is set. + */ + if (flags & QSE_WCSFNMAT_PATHNAME) + { + if (qse_wcsxpbrk(sp, se-sp, WCS_SEPCHS) != QSE_NULL) return 0; + } + return 1; + } + else + { + do + { + if (__wcsxnfnmat(sp, se - sp, pp, pe - pp, flags, 1)) + { + return 1; + } + + if (WCS_ISSEPCH(*sp) && + (flags & QSE_WCSFNMAT_PATHNAME)) break; + + sp++; + } + while (sp < se); + + return 0; + } + } + else if (pc == QSE_WT('[')) + { + /* match range */ + int negate = 0; + int matched = 0; + + if ((++pp) >= pe) return 0; + if (*pp == QSE_WT('!')) { negate = 1; pp++; } + + while (pp < pe && *pp != QSE_WT(']')) + { + if (*pp == QSE_WT('[')) + { + qse_size_t pl = pe - pp; + + if (pl >= 10) + { + if (qse_wcszcmp(pp, QSE_WT("[:xdigit:]"), 10) == 0) + { + matched = QSE_ISWXDIGIT(sc); + pp += 10; continue; + } + } + + if (pl >= 9) + { + if (qse_wcszcmp(pp, QSE_WT("[:upper:]"), 9) == 0) + { + matched = QSE_ISWUPPER(sc); + pp += 9; continue; + } + else if (qse_wcszcmp(pp, QSE_WT("[:lower:]"), 9) == 0) + { + matched = QSE_ISWLOWER(sc); + pp += 9; continue; + } + else if (qse_wcszcmp(pp, QSE_WT("[:alpha:]"), 9) == 0) + { + matched = QSE_ISWALPHA(sc); + pp += 9; continue; + } + else if (qse_wcszcmp(pp, QSE_WT("[:digit:]"), 9) == 0) + { + matched = QSE_ISWDIGIT(sc); + pp += 9; continue; + } + else if (qse_wcszcmp(pp, QSE_WT("[:alnum:]"), 9) == 0) + { + matched = QSE_ISWALNUM(sc); + pp += 9; continue; + } + else if (qse_wcszcmp(pp, QSE_WT("[:space:]"), 9) == 0) + { + matched = QSE_ISWSPACE(sc); + pp += 9; continue; + } + else if (qse_wcszcmp(pp, QSE_WT("[:print:]"), 9) == 0) + { + matched = QSE_ISWPRINT(sc); + pp += 9; continue; + } + else if (qse_wcszcmp(pp, QSE_WT("[:graph:]"), 9) == 0) + { + matched = QSE_ISWGRAPH(sc); + pp += 9; continue; + } + else if (qse_wcszcmp(pp, QSE_WT("[:cntrl:]"), 9) == 0) + { + matched = QSE_ISWCNTRL(sc); + pp += 9; continue; + } + else if (qse_wcszcmp(pp, QSE_WT("[:punct:]"), 9) == 0) + { + matched = QSE_ISWPUNCT(sc); + pp += 9; continue; + } + } + + /* + * characters in an invalid class name are + * just treated as normal characters + */ + } + + if (*pp == WCS_ESC && + !(flags & QSE_WCSFNMAT_NOESCAPE)) pp++; + else if (*pp == QSE_WT(']')) break; + + if (pp >= pe) break; + + pc = *pp; + if ((flags & QSE_WCSFNMAT_IGNORECASE) != 0) + { + sc = QSE_TOWLOWER(sc); + pc = QSE_TOWLOWER(pc); + } + + if (pp + 1 < pe && pp[1] == QSE_WT('-')) + { + pp += 2; /* move the a character next to a dash */ + + if (pp >= pe) + { + if (sc >= pc) matched = 1; + break; + } + + if (*pp == WCS_ESC && + !(flags & QSE_WCSFNMAT_NOESCAPE)) + { + if ((++pp) >= pe) + { + if (sc >= pc) matched = 1; + break; + } + } + else if (*pp == QSE_WT(']')) + { + if (sc >= pc) matched = 1; + break; + } + + pc2 = *pp; + if ((flags & QSE_WCSFNMAT_IGNORECASE) != 0) + pc2 = QSE_TOWLOWER(pc2); + + if (sc >= pc && sc <= pc2) matched = 1; + pp++; + } + else + { + if (sc == pc) matched = 1; + pp++; + } + } + + if (negate) matched = !matched; + if (!matched) return 0; + sp++; if (pp < pe) pp++; + } + else + { + /* a normal character */ + if ((flags & QSE_WCSFNMAT_IGNORECASE) != 0) + { + sc = QSE_TOWLOWER(sc); + pc = QSE_TOWLOWER(pc); + } + + if (sc != pc) return 0; + sp++; pp++; + } + } + + /* will never reach here. but make some immature compilers happy... */ + return 0; +} + +int qse_wcsfnmat (const qse_wchar_t* str, const qse_wchar_t* ptn, int flags) +{ + return __wcsxnfnmat ( + str, qse_wcslen(str), ptn, qse_wcslen(ptn), flags, 0); +} + +int qse_wcsxfnmat ( + const qse_wchar_t* str, qse_size_t slen, const qse_wchar_t* ptn, int flags) +{ + return __wcsxnfnmat (str, slen, ptn, qse_wcslen(ptn), flags, 0); +} + +int qse_wcsnfnmat ( + const qse_wchar_t* str, const qse_wchar_t* ptn, qse_size_t plen, int flags) +{ + return __wcsxnfnmat (str, qse_wcslen(str), ptn, plen, flags, 0); +} + +int qse_wcsxnfnmat ( + const qse_wchar_t* str, qse_size_t slen, + const qse_wchar_t* ptn, qse_size_t plen, int flags) +{ + return __wcsxnfnmat (str, slen, ptn, plen, flags, 0); +} diff --git a/qse/lib/cmn/str-pbrk.c b/qse/lib/cmn/str-pbrk.c index 6f050467..a84130ed 100644 --- a/qse/lib/cmn/str-pbrk.c +++ b/qse/lib/cmn/str-pbrk.c @@ -1,5 +1,5 @@ /* - * $Id: str-pbrk.c 556 2011-08-31 15:43:46Z hyunghwan.chung $ + * $Id: str-pbrk.c 576 2011-09-23 14:52:22Z hyunghwan.chung $ * Copyright 2006-2011 Chung, Hyung-Hwan. This file is part of QSE. @@ -35,6 +35,61 @@ qse_mchar_t* qse_mbspbrk (const qse_mchar_t* str1, const qse_mchar_t* str2) return QSE_NULL; } +qse_mchar_t* qse_mbsxpbrk ( + const qse_mchar_t* str1, qse_size_t len, const qse_mchar_t* str2) +{ + const qse_mchar_t* p1, * p2; + const qse_mchar_t* e1 = str1 + len; + + for (p1 = str1; p1 < e1; p1++) + { + for (p2 = str2; *p2 != QSE_MT('\0'); p2++) + { + if (*p2 == *p1) return (qse_mchar_t*)p1; + } + } + + return QSE_NULL; +} + + +qse_mchar_t* qse_mbsrpbrk (const qse_mchar_t* str1, const qse_mchar_t* str2) +{ + const qse_mchar_t* p1, * p2; + + for (p1 = str1; *p1 != QSE_MT('\0'); p1++); + + while (p1 > str1) + { + p1--; + for (p2 = str2; *p2 != QSE_MT('\0'); p2++) + { + if (*p2 == *p1) return (qse_mchar_t*)p1; + } + } + + return QSE_NULL; +} + +qse_mchar_t* qse_mbsxrpbrk (const qse_mchar_t* str1, qse_size_t len, const qse_mchar_t* str2) +{ + const qse_mchar_t* p1, * p2; + + p1 = str1 + len; + + while (p1 > str1) + { + p1--; + for (p2 = str2; *p2 != QSE_MT('\0'); p2++) + { + if (*p2 == *p1) return (qse_mchar_t*)p1; + } + } + + return QSE_NULL; +} + + qse_wchar_t* qse_wcspbrk (const qse_wchar_t* str1, const qse_wchar_t* str2) { const qse_wchar_t* p1, * p2; @@ -50,3 +105,56 @@ qse_wchar_t* qse_wcspbrk (const qse_wchar_t* str1, const qse_wchar_t* str2) return QSE_NULL; } +qse_wchar_t* qse_wcsxpbrk ( + const qse_wchar_t* str1, qse_size_t len, const qse_wchar_t* str2) +{ + const qse_wchar_t* p1, * p2; + const qse_wchar_t* e1 = str1 + len; + + for (p1 = str1; p1 < e1; p1++) + { + for (p2 = str2; *p2 != QSE_WT('\0'); p2++) + { + if (*p2 == *p1) return (qse_wchar_t*)p1; + } + } + + return QSE_NULL; +} + + +qse_wchar_t* qse_wcsrpbrk (const qse_wchar_t* str1, const qse_wchar_t* str2) +{ + const qse_wchar_t* p1, * p2; + + for (p1 = str1; *p1 != QSE_WT('\0'); p1++); + + while (p1 > str1) + { + p1--; + for (p2 = str2; *p2 != QSE_WT('\0'); p2++) + { + if (*p2 == *p1) return (qse_wchar_t*)p1; + } + } + + return QSE_NULL; +} + +qse_wchar_t* qse_wcsxrpbrk (const qse_wchar_t* str1, qse_size_t len, const qse_wchar_t* str2) +{ + const qse_wchar_t* p1, * p2; + + p1 = str1 + len; + + while (p1 > str1) + { + p1--; + for (p2 = str2; *p2 != QSE_WT('\0'); p2++) + { + if (*p2 == *p1) return (qse_wchar_t*)p1; + } + } + + return QSE_NULL; +} diff --git a/qse/lib/sed/sed.c b/qse/lib/sed/sed.c index 78b82acb..74058ea3 100644 --- a/qse/lib/sed/sed.c +++ b/qse/lib/sed/sed.c @@ -1,5 +1,5 @@ /* - * $Id: sed.c 575 2011-09-22 07:07:18Z hyunghwan.chung $ + * $Id: sed.c 576 2011-09-23 14:52:22Z hyunghwan.chung $ * Copyright 2006-2011 Chung, Hyung-Hwan. This file is part of QSE. @@ -3241,6 +3241,8 @@ int qse_sed_exec (qse_sed_t* sed, qse_sed_io_fun_t inf, qse_sed_io_fun_t outf) while (!sed->e.stopreq) { + if (sed->e.hook) sed->e.hook (sed, QSE_SED_EXEC_READ, QSE_NULL); + n = read_line (sed, 0); if (n <= -1) { ret = -1; goto done; } if (n == 0) goto done; @@ -3249,7 +3251,7 @@ int qse_sed_exec (qse_sed_t* sed, qse_sed_io_fun_t inf, qse_sed_io_fun_t outf) { /* the first command block contains at least 1 command * to execute. an empty script like ' ' has no commands, - * so this block is skipped. */ + * so we execute no commands */ qse_sed_cmd_t* c, * j; @@ -3258,6 +3260,8 @@ int qse_sed_exec (qse_sed_t* sed, qse_sed_io_fun_t inf, qse_sed_io_fun_t outf) while (c != &sed->cmd.over) { + if (sed->e.hook) sed->e.hook (sed, QSE_SED_EXEC_MATCH, c); + n = match_address (sed, c); if (n <= -1) { ret = -1; goto done; } @@ -3268,6 +3272,8 @@ int qse_sed_exec (qse_sed_t* sed, qse_sed_io_fun_t inf, qse_sed_io_fun_t outf) continue; } + if (sed->e.hook) sed->e.hook (sed, QSE_SED_EXEC_EXEC, c); + j = exec_cmd (sed, c); if (j == QSE_NULL) { ret = -1; goto done; } if (j == &sed->cmd.quit_quiet) goto done; @@ -3284,6 +3290,7 @@ int qse_sed_exec (qse_sed_t* sed, qse_sed_io_fun_t inf, qse_sed_io_fun_t outf) } } + if (sed->e.hook) sed->e.hook (sed, QSE_SED_EXEC_WRITE, QSE_NULL); if (emit_output (sed, 0) <= -1) { ret = -1; goto done; } } @@ -3327,3 +3334,13 @@ void qse_sed_setlinnum (qse_sed_t* sed, qse_size_t num) { sed->e.in.num = num; } + +qse_sed_exec_hook_t qse_sed_getexechook (qse_sed_t* sed) +{ + return sed->e.hook; +} + +void qse_sed_setexechook (qse_sed_t* sed, qse_sed_exec_hook_t hook) +{ + sed->e.hook = hook; +} diff --git a/qse/lib/sed/sed.h b/qse/lib/sed/sed.h index a4c0f9f7..557a3bb2 100644 --- a/qse/lib/sed/sed.h +++ b/qse/lib/sed/sed.h @@ -1,5 +1,5 @@ /* - * $Id: sed.h 572 2011-09-21 05:10:09Z hyunghwan.chung $ + * $Id: sed.h 576 2011-09-23 14:52:22Z hyunghwan.chung $ * Copyright 2006-2011 Chung, Hyung-Hwan. This file is part of QSE. @@ -27,7 +27,6 @@ #define QSE_MAP_AS_RBT #include - /* * Define USE_REX to use rex.h on behalf of tre.h * rex.h currently does not support backreference. @@ -45,7 +44,6 @@ typedef enum qse_sed_depth_t qse_sed_depth_t; #define QSE_SED_CMD_QUIT QSE_T('q') typedef struct qse_sed_adr_t qse_sed_adr_t; -typedef struct qse_sed_cmd_t qse_sed_cmd_t; typedef struct qse_sed_cmd_blk_t qse_sed_cmd_blk_t; struct qse_sed_adr_t @@ -274,6 +272,9 @@ struct qse_sed_t /** stop requested */ int stopreq; + + /** hook function */ + qse_sed_exec_hook_t hook; } e; }; diff --git a/qse/samples/cmn/str.c b/qse/samples/cmn/str.c index 29fbc630..45e312a6 100644 --- a/qse/samples/cmn/str.c +++ b/qse/samples/cmn/str.c @@ -586,6 +586,59 @@ static int test14 (void) return 0; } +static int test15 (void) +{ + const qse_char_t* x = QSE_T("this is good"); + + qse_printf (QSE_T("[%s]\n"), qse_strpbrk (x, QSE_T("si"))); + qse_printf (QSE_T("[%s]\n"), qse_strrpbrk (x, QSE_T("si"))); + qse_printf (QSE_T("[%s]\n"), qse_strpbrk (x, QSE_T("d"))); + qse_printf (QSE_T("[%s]\n"), qse_strrpbrk (x, QSE_T("d"))); + qse_printf (QSE_T("[%s]\n"), qse_strpbrk (x, QSE_T("t"))); + qse_printf (QSE_T("[%s]\n"), qse_strrpbrk (x, QSE_T("t"))); + return 0; +} + +static int test16 (void) +{ + const qse_char_t* ptn[] = + { + QSE_T("*.c"), + QSE_T("h??lo.???"), + QSE_T("[a-z]*.cpp") + }; + + const qse_char_t* name[] = + { + QSE_T("hello.c"), + QSE_T("hello.cpp"), + QSE_T("heLLo.Cpp"), + QSE_T("/tmp/hello.c"), + QSE_T("/tmp/Hello.c") + }; + + int i, j; + + + qse_printf (QSE_T("flags => 0\n")); + for (i = 0; i < QSE_COUNTOF(ptn); i++) + for (j = 0; j < QSE_COUNTOF(name); j++) + qse_printf (QSE_T("[%s] [%s] %d\n"), ptn[i], name[j], qse_strfnmat (name[j], ptn[i], 0)); + + qse_printf (QSE_T("flags => QSE_STRFNMAT_PATHNAME\n")); + for (i = 0; i < QSE_COUNTOF(ptn); i++) + for (j = 0; j < QSE_COUNTOF(name); j++) + qse_printf (QSE_T("[%s] [%s] %d\n"), ptn[i], name[j], qse_strfnmat (name[j], ptn[i], QSE_STRFNMAT_PATHNAME)); + + qse_printf (QSE_T("flags => QSE_STRFNMAT_PATHNAME | QSE_STRFNMAT_IGNORECASE\n")); + for (i = 0; i < QSE_COUNTOF(ptn); i++) + for (j = 0; j < QSE_COUNTOF(name); j++) + qse_printf (QSE_T("[%s] [%s] %d\n"), ptn[i], name[j], qse_strfnmat (name[j], ptn[i], QSE_STRFNMAT_PATHNAME | QSE_STRFNMAT_IGNORECASE)); + + + return 0; +} + int main () { @@ -609,6 +662,8 @@ int main () R (test12); R (test13); R (test14); + R (test15); + R (test16); return 0; }