diff --git a/qse/cmd/sed/sed.c b/qse/cmd/sed/sed.c index 975f6695..20ac8983 100644 --- a/qse/cmd/sed/sed.c +++ b/qse/cmd/sed/sed.c @@ -139,6 +139,7 @@ static void print_usage (QSE_FILE* out, int argc, qse_char_t* argv[]) qse_fprintf (out, QSE_T(" -n disable auto-print\n")); qse_fprintf (out, QSE_T(" -f file specify a script file\n")); qse_fprintf (out, QSE_T(" -r use the extended regular expression\n")); + qse_fprintf (out, QSE_T(" -R enable non-standard extensions to the regular expression\n")); qse_fprintf (out, QSE_T(" -a perform strict address check\n")); qse_fprintf (out, QSE_T(" -w allow address format of start~step\n")); qse_fprintf (out, QSE_T(" -x allow text on the same line as c, a, i\n")); @@ -150,7 +151,7 @@ static int handle_args (int argc, qse_char_t* argv[]) { static qse_opt_t opt = { - QSE_T("hnf:rawxym:"), + QSE_T("hnf:rRawxym:"), QSE_NULL }; qse_cint_t c; @@ -195,6 +196,10 @@ static int handle_args (int argc, qse_char_t* argv[]) g_option |= QSE_SED_EXTENDEDREX; break; + case QSE_T('R'): + g_option |= QSE_SED_NONSTDEXTREX; + break; + case QSE_T('a'): g_option |= QSE_SED_STRICT; break; diff --git a/qse/include/qse/cmn/tre.h b/qse/include/qse/cmn/tre.h index b03101ea..914bef45 100644 --- a/qse/include/qse/cmn/tre.h +++ b/qse/include/qse/cmn/tre.h @@ -72,7 +72,14 @@ enum qse_tre_cflag_t QSE_TRE_NOSUBREG = (1 << 3), QSE_TRE_LITERAL = (1 << 4), QSE_TRE_RIGHTASSOC = (1 << 5), - QSE_TRE_UNGREEDY = (1 << 6) + QSE_TRE_UNGREEDY = (1 << 6), + + /* Enable non-standard extensions: + * - Enable (?:text) for no submatch backreference. + * - Enable perl-like (?...) extensions like (?i) + * if QSE_TRE_EXTENDED is also set. + */ + QSE_TRE_NONSTDEXT = (1 << 7) }; enum qse_tre_eflag_t diff --git a/qse/include/qse/sed/sed.h b/qse/include/qse/sed/sed.h index 5af6a2d5..75d67e47 100644 --- a/qse/include/qse/sed/sed.h +++ b/qse/include/qse/sed/sed.h @@ -1,5 +1,5 @@ /* - * $Id: sed.h 562 2011-09-07 15:36:08Z hyunghwan.chung $ + * $Id: sed.h 563 2011-09-08 07:49:53Z hyunghwan.chung $ * Copyright 2006-2011 Chung, Hyung-Hwan. This file is part of QSE. @@ -133,14 +133,15 @@ typedef const qse_char_t* (*qse_sed_errstr_t) ( */ enum qse_sed_option_t { - QSE_SED_STRIPLS = (1 << 0), /**< strip leading spaces from text */ - QSE_SED_KEEPTBS = (1 << 1), /**< keep an trailing backslash */ - QSE_SED_ENSURENL = (1 << 2), /**< ensure NL at the text end */ - QSE_SED_QUIET = (1 << 3), /**< do not print pattern space */ - QSE_SED_STRICT = (1 << 4), /**< do strict address check */ - QSE_SED_STARTSTEP = (1 << 5), /**< allow start~step */ - QSE_SED_EXTENDEDREX = (1 << 6), /**< allow {n,m} in regular expression */ - QSE_SED_SAMELINE = (1 << 7), /**< allow text on the same line as c, a, i */ + QSE_SED_STRIPLS = (1 << 0), /**< strip leading spaces from text */ + QSE_SED_KEEPTBS = (1 << 1), /**< keep an trailing backslash */ + QSE_SED_ENSURENL = (1 << 2), /**< ensure NL at the text end */ + QSE_SED_QUIET = (1 << 3), /**< do not print pattern space */ + QSE_SED_STRICT = (1 << 4), /**< do strict address check */ + QSE_SED_STARTSTEP = (1 << 5), /**< allow start~step */ + QSE_SED_EXTENDEDREX = (1 << 6), /**< use extended regex */ + QSE_SED_NONSTDEXTREX = (1 << 7), /**< enable non-standard extensions to regex */ + QSE_SED_SAMELINE = (1 << 8), /**< allow text on the same line as c, a, i */ }; typedef enum qse_sed_option_t qse_sed_option_t; diff --git a/qse/lib/cmn/tre-parse.c b/qse/lib/cmn/tre-parse.c index 1893f73a..dcfd0148 100644 --- a/qse/lib/cmn/tre-parse.c +++ b/qse/lib/cmn/tre-parse.c @@ -1084,6 +1084,9 @@ tre_parse(tre_parse_ctx_t *ctx) break; /*FALLTHROUGH*/ case CHAR_STAR: +/* QSE - added this label */ +parse_star: +/* END QSE */ { tre_ast_node_t *tmp_node; int minimal = (ctx->cflags & REG_UNGREEDY) ? 1 : 0; @@ -1093,9 +1096,9 @@ tre_parse(tre_parse_ctx_t *ctx) const tre_char_t *tmp_re; #endif - if (*ctx->re == CHAR_PLUS) + if (*ctx->re == CHAR_PLUS) /* QSE: case CHAR_PLUS fell through down here */ rep_min = 1; - if (*ctx->re == CHAR_QUESTIONMARK) + if (*ctx->re == CHAR_QUESTIONMARK) /* QSE: case CHAR_QUESTIONMARK fell though down here */ rep_max = 1; #ifdef TRE_DEBUG tmp_re = ctx->re; @@ -1103,7 +1106,7 @@ tre_parse(tre_parse_ctx_t *ctx) if (ctx->re + 1 < ctx->re_end) { - if (*(ctx->re + 1) == CHAR_QUESTIONMARK) + if (*(ctx->re + 1) == CHAR_QUESTIONMARK) /* QSE: +?, ??, *? */ { minimal = !(ctx->cflags & REG_UNGREEDY); ctx->re++; @@ -1136,6 +1139,8 @@ tre_parse(tre_parse_ctx_t *ctx) case CHAR_BACKSLASH: /* "\{" is special without REG_EXTENDED */ + /* QSE - also handle \+ and \? */ + /* if (!(ctx->cflags & REG_EXTENDED) && ctx->re + 1 < ctx->re_end && *(ctx->re + 1) == CHAR_LBRACE) @@ -1145,6 +1150,24 @@ tre_parse(tre_parse_ctx_t *ctx) } else break; + */ + if (!(ctx->cflags & REG_EXTENDED) && ctx->re + 1 < ctx->re_end) + { + if (*(ctx->re + 1) == CHAR_LBRACE) + { + ctx->re++; + goto parse_brace; + } + else if (*(ctx->re + 1) == CHAR_PLUS || + *(ctx->re + 1) == CHAR_QUESTIONMARK) + { + ctx->re++; + goto parse_star; + } + } + else break; + /* END QSE */ + case CHAR_LBRACE: /* "{" is literal without REG_EXTENDED */ @@ -1184,8 +1207,10 @@ parse_brace: /* Handle "(?...)" extensions. They work in a way similar to Perls corresponding extensions. */ - if (ctx->cflags & REG_EXTENDED - && *(ctx->re + 1) == CHAR_QUESTIONMARK) + /* QSE: added ctx->cflags & REG_NONSTDEXT */ + if ((ctx->cflags & REG_NONSTDEXT) && + (ctx->cflags & REG_EXTENDED) && + *(ctx->re + 1) == CHAR_QUESTIONMARK) { int new_cflags = ctx->cflags; int bit = 1; @@ -1293,10 +1318,13 @@ parse_brace: && *(ctx->re - 1) == CHAR_BACKSLASH)) { depth++; - if (ctx->re + 2 < ctx->re_end - && *(ctx->re + 1) == CHAR_QUESTIONMARK - && *(ctx->re + 2) == CHAR_COLON) + /* QSE: added ctx->cflags & REG_NONSTDEXT */ + if ((ctx->cflags & REG_NONSTDEXT) && + ctx->re + 2 < ctx->re_end && + *(ctx->re + 1) == CHAR_QUESTIONMARK && + *(ctx->re + 2) == CHAR_COLON) { + /* QSE: \(?: or (?: depending on REG_EXTENDED */ DPRINT(("tre_parse: group begin: '%.*" STRF "', no submatch\n", REST(ctx->re))); /* Don't mark for submatching. */ diff --git a/qse/lib/cmn/tre.h b/qse/lib/cmn/tre.h index 52b93823..332ff6b2 100644 --- a/qse/lib/cmn/tre.h +++ b/qse/lib/cmn/tre.h @@ -133,6 +133,7 @@ typedef qse_cint_t tre_cint_t; #define REG_LITERAL QSE_TRE_LITERAL #define REG_RIGHT_ASSOC QSE_TRE_RIGHTASSOC #define REG_UNGREEDY QSE_TRE_UNGREEDY +#define REG_NONSTDEXT QSE_TRE_NONSTDEXT /* POSIX tre_regexec() flags. */ #define REG_NOTBOL QSE_TRE_NOTBOL diff --git a/qse/lib/sed/sed.c b/qse/lib/sed/sed.c index 46e9aae8..a93e84e1 100644 --- a/qse/lib/sed/sed.c +++ b/qse/lib/sed/sed.c @@ -1,5 +1,5 @@ /* - * $Id: sed.c 562 2011-09-07 15:36:08Z hyunghwan.chung $ + * $Id: sed.c 563 2011-09-08 07:49:53Z hyunghwan.chung $ * Copyright 2006-2011 Chung, Hyung-Hwan. This file is part of QSE. @@ -198,6 +198,7 @@ static void* build_rex ( /* ignorecase is a compile option for TRE */ if (ignorecase) opt |= QSE_TRE_IGNORECASE; if (sed->option & QSE_SED_EXTENDEDREX) opt |= QSE_TRE_EXTENDED; + if (sed->option & QSE_SED_NONSTDEXTREX) opt |= QSE_TRE_NONSTDEXT; if (qse_tre_compx (tre, str->ptr, str->len, QSE_NULL, opt) <= -1) { @@ -230,7 +231,7 @@ static int matchtre ( qse_cstr_t submat[9], const qse_sed_loc_t* loc) { int n; - qse_tre_match_t match[10]; + qse_tre_match_t match[10] = { { 0, 0 }, }; n = qse_tre_execx (tre, str->ptr, str->len, match, 10, opt); if (n <= -1) @@ -258,7 +259,7 @@ static int matchtre ( /* you must intialize submat before you pass into this * function because it can abort filling */ - for (i = 1; i <= 10; i++) + for (i = 1; i < QSE_COUNTOF(match); i++) { if (match[i].rm_so == -1) break; submat[i-1].ptr = &str->ptr[match[i].rm_so]; @@ -569,6 +570,8 @@ static QSE_INLINE void* compile_rex_address (qse_sed_t* sed, qse_char_t rxend) { if (pickup_rex (sed, rxend, 1, QSE_NULL, &sed->tmp.rex) <= -1) return QSE_NULL; /* TODO: support ignore case option for address */ + + if (QSE_STR_LEN(&sed->tmp.rex) <= 0) return EMPTY_REX; return build_rex (sed, QSE_STR_CSTR(&sed->tmp.rex), 0, &sed->src.loc); } @@ -592,11 +595,6 @@ static qse_sed_adr_t* get_address (qse_sed_t* sed, qse_sed_adr_t* a) } while ((c = CURSC(sed)) >= QSE_T('0') && c <= QSE_T('9')); -#if 0 - /* line number 0 is illegal */ - if (lno == 0) return QSE_NULL; -#endif - a->type = QSE_SED_ADR_LINE; a->u.lno = lno; } @@ -985,8 +983,8 @@ static int get_subst (qse_sed_t* sed, qse_sed_cmd_t* cmd) if (pickup_rex (sed, delim, 1, cmd, t[0]) <= -1) goto oops; if (pickup_rex (sed, delim, 0, cmd, t[1]) <= -1) goto oops; - #if 0 +/* calling pickup_rex twice above instead of commenting out this part */ for (i = 0; i < 2; i++) { c = NXTSC (sed); @@ -2517,17 +2515,13 @@ static qse_sed_cmd_t* exec_cmd (qse_sed_t* sed, qse_sed_cmd_t* cmd) break; case QSE_SED_CMD_QUIT: - if (!(sed->option && QSE_SED_QUIET)) - { - n = write_str (sed, - QSE_STR_PTR(&sed->e.in.line), - QSE_STR_LEN(&sed->e.in.line)); - if (n <= -1) return QSE_NULL; - } - case QSE_SED_CMD_QUIT_QUIET: jumpto = &sed->cmd.quit; break; + case QSE_SED_CMD_QUIT_QUIET: + jumpto = &sed->cmd.quit_quiet; + break; + case QSE_SED_CMD_APPEND: if (qse_lda_insert ( &sed->e.txt.appended, @@ -3052,6 +3046,7 @@ int qse_sed_exec (qse_sed_t* sed, qse_sed_io_fun_t inf, qse_sed_io_fun_t outf) while (1) { qse_size_t i; + int quit = 0; n = read_line (sed, 0); if (n <= -1) { ret = -1; goto done; } @@ -3081,7 +3076,8 @@ int qse_sed_exec (qse_sed_t* sed, qse_sed_io_fun_t inf, qse_sed_io_fun_t outf) j = exec_cmd (sed, c); if (j == QSE_NULL) { ret = -1; goto done; } - if (j == &sed->cmd.quit) goto done; + if (j == &sed->cmd.quit_quiet) goto done; + if (j == &sed->cmd.quit) { quit = 1; break; } if (j == &sed->cmd.again) goto again; /* go to the next command */ @@ -3118,6 +3114,8 @@ int qse_sed_exec (qse_sed_t* sed, qse_sed_io_fun_t inf, qse_sed_io_fun_t outf) * in write functions */ n = flush (sed); if (n <= -1) { ret = -1; goto done; } + + if (quit) break; } done: diff --git a/qse/lib/sed/sed.h b/qse/lib/sed/sed.h index 354abe30..814edfd9 100644 --- a/qse/lib/sed/sed.h +++ b/qse/lib/sed/sed.h @@ -1,5 +1,5 @@ /* - * $Id: sed.h 560 2011-09-06 14:18:36Z hyunghwan.chung $ + * $Id: sed.h 563 2011-09-08 07:49:53Z hyunghwan.chung $ * Copyright 2006-2011 Chung, Hyung-Hwan. This file is part of QSE. @@ -215,6 +215,7 @@ struct qse_sed_t qse_sed_cmd_blk_t* lb; /**< points to the last block */ qse_sed_cmd_t quit; + qse_sed_cmd_t quit_quiet; qse_sed_cmd_t again; qse_sed_cmd_t over; } cmd;