From 0d4ea6aab9e7f94613d7fddf882b63138d729aca Mon Sep 17 00:00:00 2001 From: hyung-hwan Date: Thu, 15 Sep 2011 09:48:08 +0000 Subject: [PATCH] fixed a bug of not printing appended text properly when n or N is executed. fixed a bug of not capturing submatches properly --- qse/lib/sed/sed.c | 141 ++++++++++++++++++++++---------- qse/regress/sed/Makefile.am | 4 +- qse/regress/sed/Makefile.in | 4 +- qse/regress/sed/regress.out | 22 +++++ qse/regress/sed/regress.out.xma | 22 +++++ qse/regress/sed/regress.sh.in | 2 + qse/regress/sed/s005.dat | 5 ++ qse/regress/sed/s005.sed | 3 + qse/regress/sed/s006.dat | 5 ++ qse/regress/sed/s006.sed | 3 + 10 files changed, 166 insertions(+), 45 deletions(-) create mode 100644 qse/regress/sed/s005.dat create mode 100644 qse/regress/sed/s005.sed create mode 100644 qse/regress/sed/s006.dat create mode 100644 qse/regress/sed/s006.sed diff --git a/qse/lib/sed/sed.c b/qse/lib/sed/sed.c index b0053e9d..60b44db7 100644 --- a/qse/lib/sed/sed.c +++ b/qse/lib/sed/sed.c @@ -1,5 +1,5 @@ /* - * $Id: sed.c 566 2011-09-11 12:44:56Z hyunghwan.chung $ + * $Id: sed.c 567 2011-09-14 15:48:08Z hyunghwan.chung $ * Copyright 2006-2011 Chung, Hyung-Hwan. This file is part of QSE. @@ -22,9 +22,15 @@ #include "../cmn/mem.h" #include -#ifdef USE_REX +/* Define USE_REGEX to use regcomp(), regexec(), regfree() instead of TRE */ +/* #define USE_REGEX */ + +#if defined(USE_REX) # include #else +# if defined(QSE_CHAR_IS_MCHAR) && defined(USE_REGEX) +# include +# endif # include #endif @@ -32,6 +38,7 @@ QSE_IMPLEMENT_COMMON_FUNCTIONS (sed) static void free_command (qse_sed_t* sed, qse_sed_cmd_t* cmd); static void free_all_command_blocks (qse_sed_t* sed); +static int emit_output (qse_sed_t* sed, int skipline); #define EMPTY_REX ((void*)1) @@ -106,7 +113,6 @@ int qse_sed_init (qse_sed_t* sed, qse_mmgr_t* mmgr) return 0; - oops_7: qse_str_fini (&sed->e.txt.held); oops_6: @@ -145,7 +151,7 @@ int qse_sed_getoption (qse_sed_t* sed) return sed->option; } -#ifdef USE_REX +#if defined(USE_REX) qse_size_t qse_sed_getmaxdepth (qse_sed_t* sed, qse_sed_depth_t id) { return (id & QSE_SED_DEPTH_REX_BUILD)? sed->depth.rex.build: @@ -163,7 +169,7 @@ static void* build_rex ( qse_sed_t* sed, const qse_cstr_t* str, int ignorecase, const qse_sed_loc_t* loc) { -#ifdef USE_REX +#if defined(USE_REX) void* rex; int opt = 0; @@ -179,6 +185,43 @@ static void* build_rex ( } return rex; + +#elif defined(QSE_CHAR_IS_MCHAR) && defined(USE_REGEX) + + regex_t* rex; + qse_char_t* strz; + int xopt = 0; + + if (ignorecase) xopt |= REG_ICASE; + if (sed->option & QSE_SED_EXTENDEDREX) xopt |= REG_EXTENDED; + + rex = QSE_MMGR_ALLOC (sed->mmgr, QSE_SIZEOF(*rex)); + if (rex == QSE_NULL) + { + SETERR0 (sed, QSE_SED_ENOMEM, loc); + return QSE_NULL; + } + + strz = qse_strxdup (str->ptr, str->len, sed->mmgr); + if (strz == QSE_NULL) + { + QSE_MMGR_FREE (sed->mmgr, rex); + SETERR0 (sed, QSE_SED_ENOMEM, loc); + return QSE_NULL; + } + + xopt = regcomp (rex, strz, xopt); + QSE_MMGR_FREE (sed->mmgr, strz); + + if (xopt != 0) + { + QSE_MMGR_FREE (sed->mmgr, rex); + SETERR1 (sed, QSE_SED_EREXBL, str->ptr, str->len, loc); + return QSE_NULL; + } + + return rex; + #else qse_tre_t* tre; int opt = 0; @@ -197,11 +240,10 @@ static void* build_rex ( if (qse_tre_compx (tre, str->ptr, str->len, QSE_NULL, opt) <= -1) { - qse_sed_errnum_t errnum; - errnum = (QSE_TRE_ERRNUM(tre) == QSE_TRE_ENOMEM)? - QSE_TRE_ENOMEM: QSE_SED_EREXBL; - SETERR1 (sed, errnum, str->ptr, str->len, loc); - + if (QSE_TRE_ERRNUM(tre) == QSE_TRE_ENOMEM) + SETERR0 (sed, QSE_SED_ENOMEM, loc); + else + SETERR1 (sed, QSE_SED_EREXBL, str->ptr, str->len, loc); qse_tre_close (tre); return QSE_NULL; } @@ -212,23 +254,44 @@ static void* build_rex ( static QSE_INLINE void free_rex (qse_sed_t* sed, void* rex) { -#ifdef USE_REX +#if defined(USE_REX) qse_freerex (sed->mmgr, rex); +#elif defined(QSE_CHAR_IS_MCHAR) && defined(USE_REGEX) + regfree (rex); + QSE_MMGR_FREE (sed->mmgr, rex); #else qse_tre_close (rex); #endif } -#ifndef USE_REX +#if !defined(USE_REX) static int matchtre ( qse_sed_t* sed, qse_tre_t* tre, int opt, const qse_cstr_t* str, qse_cstr_t* mat, qse_cstr_t submat[9], const qse_sed_loc_t* loc) { +#if defined(QSE_CHAR_IS_MCHAR) && defined(USE_REGEX) + regmatch_t match[10]; + qse_char_t* strz; + int xopt = 0; + + if (opt & QSE_TRE_NOTBOL) xopt |= REG_NOTBOL; + + strz = qse_strxdup (str->ptr, str->len, sed->mmgr); + if (strz == QSE_NULL) + { + SETERR0 (sed, QSE_SED_ENOMEM, loc); + return -1; + } + xopt = regexec ((regex_t*)tre, strz, QSE_COUNTOF(match), match, xopt); + QSE_MMGR_FREE (sed->mmgr, strz); + if (xopt == REG_NOMATCH) return 0; +#else + int n; qse_tre_match_t match[10] = { { 0, 0 }, }; - n = qse_tre_execx (tre, str->ptr, str->len, match, 10, opt); + n = qse_tre_execx (tre, str->ptr, str->len, match, QSE_COUNTOF(match), opt); if (n <= -1) { qse_sed_errnum_t errnum; @@ -236,11 +299,12 @@ static int matchtre ( if (QSE_TRE_ERRNUM(tre) == QSE_TRE_ENOMATCH) return 0; errnum = (QSE_TRE_ERRNUM(tre) == QSE_TRE_ENOMEM)? - QSE_TRE_ENOMEM: QSE_SED_EREXMA; + QSE_SED_ENOMEM: QSE_SED_EREXMA; SETERR0 (sed, errnum, loc); return -1; } - +#endif + QSE_ASSERT (match[0].rm_so != -1); if (mat) { @@ -256,9 +320,11 @@ static int matchtre ( * function because it can abort filling */ for (i = 1; i < QSE_COUNTOF(match); i++) { - if (match[i].rm_so == -1) break; - submat[i-1].ptr = &str->ptr[match[i].rm_so]; - submat[i-1].len = match[i].rm_eo - match[i].rm_so; + if (match[i].rm_so != -1) + { + submat[i-1].ptr = &str->ptr[match[i].rm_so]; + submat[i-1].len = match[i].rm_eo - match[i].rm_so; + } } } return 1; @@ -2035,7 +2101,7 @@ static int do_subst (qse_sed_t* sed, qse_sed_cmd_t* cmd) { qse_cstr_t mat, pmat; int opt = 0, repl = 0, n; -#ifdef USE_REX +#if defined(USE_REX) qse_rex_errnum_t errnum; #endif const qse_char_t* finalizer = QSE_NULL; @@ -2047,7 +2113,7 @@ static int do_subst (qse_sed_t* sed, qse_sed_cmd_t* cmd) QSE_ASSERT (cmd->type == QSE_SED_CMD_SUBSTITUTE); qse_str_clear (&sed->e.txt.subst); -#ifdef USE_REX +#if defined(USE_REX) if (cmd->u.subst.i) opt = QSE_REX_IGNORECASE; #endif @@ -2106,7 +2172,7 @@ static int do_subst (qse_sed_t* sed, qse_sed_cmd_t* cmd) sed->e.last_rex = rex; } -#ifdef USE_REX +#if defined(USE_REX) n = qse_matchrex ( sed->mmgr, sed->depth.rex.match, @@ -2305,7 +2371,7 @@ static int match_a (qse_sed_t* sed, qse_sed_cmd_t* cmd, qse_sed_adr_t* a) case QSE_SED_ADR_REX: { -#ifdef USE_REX +#if defined(USE_REX) int n; qse_rex_errnum_t errnum; qse_cstr_t match; @@ -2339,7 +2405,7 @@ static int match_a (qse_sed_t* sed, qse_sed_cmd_t* cmd, qse_sed_adr_t* a) rex = a->u.rex; sed->e.last_rex = rex; } -#ifdef USE_REX +#if defined(USE_REX) n = qse_matchrex ( sed->mmgr, sed->depth.rex.match, @@ -2676,16 +2742,7 @@ static qse_sed_cmd_t* exec_cmd (qse_sed_t* sed, qse_sed_cmd_t* cmd) break; case QSE_SED_CMD_NEXT: - if (!(sed->option & QSE_SED_QUIET)) - { - /* output the current pattern space */ - n = write_str ( - sed, - QSE_STR_PTR(&sed->e.in.line), - QSE_STR_LEN(&sed->e.in.line) - ); - if (n <= -1) return QSE_NULL; - } + if (emit_output (sed, 0) <= -1) return QSE_NULL; /* read the next line and fill the pattern space */ n = read_line (sed, 0); @@ -2699,6 +2756,8 @@ static qse_sed_cmd_t* exec_cmd (qse_sed_t* sed, qse_sed_cmd_t* cmd) case QSE_SED_CMD_NEXT_APPEND: /* append the next line to the pattern space */ + if (emit_output (sed, 1) <= -1) return QSE_NULL; + n = read_line (sed, 1); if (n <= -1) return QSE_NULL; if (n == 0) @@ -2922,14 +2981,11 @@ static int init_all_commands_for_exec (qse_sed_t* sed) return 0; } -static int emit_output (qse_sed_t* sed) +static int emit_output (qse_sed_t* sed, int skipline) { int n; -#if 0 - qse_size_t i; -#endif - if (!(sed->option & QSE_SED_QUIET)) + if (!skipline && !(sed->option & QSE_SED_QUIET)) { /* write the pattern space */ n = write_str (sed, @@ -2946,6 +3002,8 @@ static int emit_output (qse_sed_t* sed) ); if (n <= -1) return -1; + qse_str_clear (&sed->e.txt.appended); + /* flush the output stream in case it's not flushed * in write functions */ n = flush (sed); @@ -3062,8 +3120,6 @@ int qse_sed_exec (qse_sed_t* sed, qse_sed_io_fun_t inf, qse_sed_io_fun_t outf) if (n <= -1) { ret = -1; goto done; } if (n == 0) goto done; - qse_str_clear (&sed->e.txt.appended); - if (sed->cmd.fb.len > 0) { qse_sed_cmd_t* c, * j; @@ -3088,7 +3144,7 @@ int qse_sed_exec (qse_sed_t* sed, qse_sed_io_fun_t inf, qse_sed_io_fun_t outf) if (j == &sed->cmd.quit_quiet) goto done; if (j == &sed->cmd.quit) { - if (emit_output (sed) <= -1) ret = -1; + if (emit_output (sed, 0) <= -1) ret = -1; goto done; } if (j == &sed->cmd.again) goto again; @@ -3098,8 +3154,7 @@ int qse_sed_exec (qse_sed_t* sed, qse_sed_io_fun_t inf, qse_sed_io_fun_t outf) } } - - if (emit_output (sed) <= -1) { ret = -1; goto done; } + if (emit_output (sed, 0) <= -1) { ret = -1; goto done; } } done: diff --git a/qse/regress/sed/Makefile.am b/qse/regress/sed/Makefile.am index 78a00b75..319d9f3c 100644 --- a/qse/regress/sed/Makefile.am +++ b/qse/regress/sed/Makefile.am @@ -7,4 +7,6 @@ EXTRA_DIST = \ s001.sed s001.dat \ s002.sed s002.dat \ s003.sed s003.dat \ - s004.sed s004.dat + s004.sed s004.dat \ + s005.sed s005.dat \ + s006.sed s006.dat diff --git a/qse/regress/sed/Makefile.in b/qse/regress/sed/Makefile.in index 1a83d677..479accd4 100644 --- a/qse/regress/sed/Makefile.in +++ b/qse/regress/sed/Makefile.in @@ -203,7 +203,9 @@ EXTRA_DIST = \ s001.sed s001.dat \ s002.sed s002.dat \ s003.sed s003.dat \ - s004.sed s004.dat + s004.sed s004.dat \ + s005.sed s005.dat \ + s006.sed s006.dat all: all-am diff --git a/qse/regress/sed/regress.out b/qse/regress/sed/regress.out index 35246d91..f01efe79 100644 --- a/qse/regress/sed/regress.out +++ b/qse/regress/sed/regress.out @@ -59,3 +59,25 @@ linux { HOST: com.com ADDRESS: 45.34.34.33 } +-------------------------------------------------------------------------------- +[CMD] qsesed -f s005.sed s005.dat &1 +-------------------------------------------------------------------------------- +1 +------------------- +2 +3 +------------------- +4 +5 +------------------- +-------------------------------------------------------------------------------- +[CMD] qsesed -f s006.sed s006.dat &1 +-------------------------------------------------------------------------------- +------------------- +1 +2 +------------------- +3 +4 +------------------- +5 diff --git a/qse/regress/sed/regress.out.xma b/qse/regress/sed/regress.out.xma index fbc656db..ddc01c48 100644 --- a/qse/regress/sed/regress.out.xma +++ b/qse/regress/sed/regress.out.xma @@ -59,3 +59,25 @@ linux { HOST: com.com ADDRESS: 45.34.34.33 } +-------------------------------------------------------------------------------- +[CMD] qsesed -m 500000 -f s005.sed s005.dat &1 +-------------------------------------------------------------------------------- +1 +------------------- +2 +3 +------------------- +4 +5 +------------------- +-------------------------------------------------------------------------------- +[CMD] qsesed -m 500000 -f s006.sed s006.dat &1 +-------------------------------------------------------------------------------- +------------------- +1 +2 +------------------- +3 +4 +------------------- +5 diff --git a/qse/regress/sed/regress.sh.in b/qse/regress/sed/regress.sh.in index 0661c881..b357eec2 100755 --- a/qse/regress/sed/regress.sh.in +++ b/qse/regress/sed/regress.sh.in @@ -60,6 +60,8 @@ PROGS=" s002.sed/s002.dat// s003.sed/s003.dat// s004.sed/s004.dat// + s005.sed/s005.dat// + s006.sed/s006.dat// " [ -x "${QSESED}" ] || diff --git a/qse/regress/sed/s005.dat b/qse/regress/sed/s005.dat new file mode 100644 index 00000000..8a1218a1 --- /dev/null +++ b/qse/regress/sed/s005.dat @@ -0,0 +1,5 @@ +1 +2 +3 +4 +5 diff --git a/qse/regress/sed/s005.sed b/qse/regress/sed/s005.sed new file mode 100644 index 00000000..d680f2a0 --- /dev/null +++ b/qse/regress/sed/s005.sed @@ -0,0 +1,3 @@ +a \ +------------------- +n diff --git a/qse/regress/sed/s006.dat b/qse/regress/sed/s006.dat new file mode 100644 index 00000000..8a1218a1 --- /dev/null +++ b/qse/regress/sed/s006.dat @@ -0,0 +1,5 @@ +1 +2 +3 +4 +5 diff --git a/qse/regress/sed/s006.sed b/qse/regress/sed/s006.sed new file mode 100644 index 00000000..f47c8f65 --- /dev/null +++ b/qse/regress/sed/s006.sed @@ -0,0 +1,3 @@ +a \ +------------------- +N