added QSE_TRE_NONSTDEXT

fixed a bug of not printing pattern space properly after 'q'
This commit is contained in:
hyung-hwan 2011-09-09 01:49:53 +00:00
parent 92b4c29551
commit 3db2c566a2
7 changed files with 79 additions and 38 deletions

View File

@ -139,6 +139,7 @@ static void print_usage (QSE_FILE* out, int argc, qse_char_t* argv[])
qse_fprintf (out, QSE_T(" -n disable auto-print\n")); qse_fprintf (out, QSE_T(" -n disable auto-print\n"));
qse_fprintf (out, QSE_T(" -f file specify a script file\n")); qse_fprintf (out, QSE_T(" -f file specify a script file\n"));
qse_fprintf (out, QSE_T(" -r use the extended regular expression\n")); qse_fprintf (out, QSE_T(" -r use the extended regular expression\n"));
qse_fprintf (out, QSE_T(" -R enable non-standard extensions to the regular expression\n"));
qse_fprintf (out, QSE_T(" -a perform strict address check\n")); qse_fprintf (out, QSE_T(" -a perform strict address check\n"));
qse_fprintf (out, QSE_T(" -w allow address format of start~step\n")); qse_fprintf (out, QSE_T(" -w allow address format of start~step\n"));
qse_fprintf (out, QSE_T(" -x allow text on the same line as c, a, i\n")); qse_fprintf (out, QSE_T(" -x allow text on the same line as c, a, i\n"));
@ -150,7 +151,7 @@ static int handle_args (int argc, qse_char_t* argv[])
{ {
static qse_opt_t opt = static qse_opt_t opt =
{ {
QSE_T("hnf:rawxym:"), QSE_T("hnf:rRawxym:"),
QSE_NULL QSE_NULL
}; };
qse_cint_t c; qse_cint_t c;
@ -195,6 +196,10 @@ static int handle_args (int argc, qse_char_t* argv[])
g_option |= QSE_SED_EXTENDEDREX; g_option |= QSE_SED_EXTENDEDREX;
break; break;
case QSE_T('R'):
g_option |= QSE_SED_NONSTDEXTREX;
break;
case QSE_T('a'): case QSE_T('a'):
g_option |= QSE_SED_STRICT; g_option |= QSE_SED_STRICT;
break; break;

View File

@ -72,7 +72,14 @@ enum qse_tre_cflag_t
QSE_TRE_NOSUBREG = (1 << 3), QSE_TRE_NOSUBREG = (1 << 3),
QSE_TRE_LITERAL = (1 << 4), QSE_TRE_LITERAL = (1 << 4),
QSE_TRE_RIGHTASSOC = (1 << 5), QSE_TRE_RIGHTASSOC = (1 << 5),
QSE_TRE_UNGREEDY = (1 << 6) QSE_TRE_UNGREEDY = (1 << 6),
/* Enable non-standard extensions:
* - Enable (?:text) for no submatch backreference.
* - Enable perl-like (?...) extensions like (?i)
* if QSE_TRE_EXTENDED is also set.
*/
QSE_TRE_NONSTDEXT = (1 << 7)
}; };
enum qse_tre_eflag_t enum qse_tre_eflag_t

View File

@ -1,5 +1,5 @@
/* /*
* $Id: sed.h 562 2011-09-07 15:36:08Z hyunghwan.chung $ * $Id: sed.h 563 2011-09-08 07:49:53Z hyunghwan.chung $
* *
Copyright 2006-2011 Chung, Hyung-Hwan. Copyright 2006-2011 Chung, Hyung-Hwan.
This file is part of QSE. This file is part of QSE.
@ -133,14 +133,15 @@ typedef const qse_char_t* (*qse_sed_errstr_t) (
*/ */
enum qse_sed_option_t enum qse_sed_option_t
{ {
QSE_SED_STRIPLS = (1 << 0), /**< strip leading spaces from text */ QSE_SED_STRIPLS = (1 << 0), /**< strip leading spaces from text */
QSE_SED_KEEPTBS = (1 << 1), /**< keep an trailing backslash */ QSE_SED_KEEPTBS = (1 << 1), /**< keep an trailing backslash */
QSE_SED_ENSURENL = (1 << 2), /**< ensure NL at the text end */ QSE_SED_ENSURENL = (1 << 2), /**< ensure NL at the text end */
QSE_SED_QUIET = (1 << 3), /**< do not print pattern space */ QSE_SED_QUIET = (1 << 3), /**< do not print pattern space */
QSE_SED_STRICT = (1 << 4), /**< do strict address check */ QSE_SED_STRICT = (1 << 4), /**< do strict address check */
QSE_SED_STARTSTEP = (1 << 5), /**< allow start~step */ QSE_SED_STARTSTEP = (1 << 5), /**< allow start~step */
QSE_SED_EXTENDEDREX = (1 << 6), /**< allow {n,m} in regular expression */ QSE_SED_EXTENDEDREX = (1 << 6), /**< use extended regex */
QSE_SED_SAMELINE = (1 << 7), /**< allow text on the same line as c, a, i */ QSE_SED_NONSTDEXTREX = (1 << 7), /**< enable non-standard extensions to regex */
QSE_SED_SAMELINE = (1 << 8), /**< allow text on the same line as c, a, i */
}; };
typedef enum qse_sed_option_t qse_sed_option_t; typedef enum qse_sed_option_t qse_sed_option_t;

View File

@ -1084,6 +1084,9 @@ tre_parse(tre_parse_ctx_t *ctx)
break; break;
/*FALLTHROUGH*/ /*FALLTHROUGH*/
case CHAR_STAR: case CHAR_STAR:
/* QSE - added this label */
parse_star:
/* END QSE */
{ {
tre_ast_node_t *tmp_node; tre_ast_node_t *tmp_node;
int minimal = (ctx->cflags & REG_UNGREEDY) ? 1 : 0; int minimal = (ctx->cflags & REG_UNGREEDY) ? 1 : 0;
@ -1093,9 +1096,9 @@ tre_parse(tre_parse_ctx_t *ctx)
const tre_char_t *tmp_re; const tre_char_t *tmp_re;
#endif #endif
if (*ctx->re == CHAR_PLUS) if (*ctx->re == CHAR_PLUS) /* QSE: case CHAR_PLUS fell through down here */
rep_min = 1; rep_min = 1;
if (*ctx->re == CHAR_QUESTIONMARK) if (*ctx->re == CHAR_QUESTIONMARK) /* QSE: case CHAR_QUESTIONMARK fell though down here */
rep_max = 1; rep_max = 1;
#ifdef TRE_DEBUG #ifdef TRE_DEBUG
tmp_re = ctx->re; tmp_re = ctx->re;
@ -1103,7 +1106,7 @@ tre_parse(tre_parse_ctx_t *ctx)
if (ctx->re + 1 < ctx->re_end) if (ctx->re + 1 < ctx->re_end)
{ {
if (*(ctx->re + 1) == CHAR_QUESTIONMARK) if (*(ctx->re + 1) == CHAR_QUESTIONMARK) /* QSE: +?, ??, *? */
{ {
minimal = !(ctx->cflags & REG_UNGREEDY); minimal = !(ctx->cflags & REG_UNGREEDY);
ctx->re++; ctx->re++;
@ -1136,6 +1139,8 @@ tre_parse(tre_parse_ctx_t *ctx)
case CHAR_BACKSLASH: case CHAR_BACKSLASH:
/* "\{" is special without REG_EXTENDED */ /* "\{" is special without REG_EXTENDED */
/* QSE - also handle \+ and \? */
/*
if (!(ctx->cflags & REG_EXTENDED) if (!(ctx->cflags & REG_EXTENDED)
&& ctx->re + 1 < ctx->re_end && ctx->re + 1 < ctx->re_end
&& *(ctx->re + 1) == CHAR_LBRACE) && *(ctx->re + 1) == CHAR_LBRACE)
@ -1145,6 +1150,24 @@ tre_parse(tre_parse_ctx_t *ctx)
} }
else else
break; break;
*/
if (!(ctx->cflags & REG_EXTENDED) && ctx->re + 1 < ctx->re_end)
{
if (*(ctx->re + 1) == CHAR_LBRACE)
{
ctx->re++;
goto parse_brace;
}
else if (*(ctx->re + 1) == CHAR_PLUS ||
*(ctx->re + 1) == CHAR_QUESTIONMARK)
{
ctx->re++;
goto parse_star;
}
}
else break;
/* END QSE */
case CHAR_LBRACE: case CHAR_LBRACE:
/* "{" is literal without REG_EXTENDED */ /* "{" is literal without REG_EXTENDED */
@ -1184,8 +1207,10 @@ parse_brace:
/* Handle "(?...)" extensions. They work in a way similar /* Handle "(?...)" extensions. They work in a way similar
to Perls corresponding extensions. */ to Perls corresponding extensions. */
if (ctx->cflags & REG_EXTENDED /* QSE: added ctx->cflags & REG_NONSTDEXT */
&& *(ctx->re + 1) == CHAR_QUESTIONMARK) if ((ctx->cflags & REG_NONSTDEXT) &&
(ctx->cflags & REG_EXTENDED) &&
*(ctx->re + 1) == CHAR_QUESTIONMARK)
{ {
int new_cflags = ctx->cflags; int new_cflags = ctx->cflags;
int bit = 1; int bit = 1;
@ -1293,10 +1318,13 @@ parse_brace:
&& *(ctx->re - 1) == CHAR_BACKSLASH)) && *(ctx->re - 1) == CHAR_BACKSLASH))
{ {
depth++; depth++;
if (ctx->re + 2 < ctx->re_end /* QSE: added ctx->cflags & REG_NONSTDEXT */
&& *(ctx->re + 1) == CHAR_QUESTIONMARK if ((ctx->cflags & REG_NONSTDEXT) &&
&& *(ctx->re + 2) == CHAR_COLON) ctx->re + 2 < ctx->re_end &&
*(ctx->re + 1) == CHAR_QUESTIONMARK &&
*(ctx->re + 2) == CHAR_COLON)
{ {
/* QSE: \(?: or (?: depending on REG_EXTENDED */
DPRINT(("tre_parse: group begin: '%.*" STRF DPRINT(("tre_parse: group begin: '%.*" STRF
"', no submatch\n", REST(ctx->re))); "', no submatch\n", REST(ctx->re)));
/* Don't mark for submatching. */ /* Don't mark for submatching. */

View File

@ -133,6 +133,7 @@ typedef qse_cint_t tre_cint_t;
#define REG_LITERAL QSE_TRE_LITERAL #define REG_LITERAL QSE_TRE_LITERAL
#define REG_RIGHT_ASSOC QSE_TRE_RIGHTASSOC #define REG_RIGHT_ASSOC QSE_TRE_RIGHTASSOC
#define REG_UNGREEDY QSE_TRE_UNGREEDY #define REG_UNGREEDY QSE_TRE_UNGREEDY
#define REG_NONSTDEXT QSE_TRE_NONSTDEXT
/* POSIX tre_regexec() flags. */ /* POSIX tre_regexec() flags. */
#define REG_NOTBOL QSE_TRE_NOTBOL #define REG_NOTBOL QSE_TRE_NOTBOL

View File

@ -1,5 +1,5 @@
/* /*
* $Id: sed.c 562 2011-09-07 15:36:08Z hyunghwan.chung $ * $Id: sed.c 563 2011-09-08 07:49:53Z hyunghwan.chung $
* *
Copyright 2006-2011 Chung, Hyung-Hwan. Copyright 2006-2011 Chung, Hyung-Hwan.
This file is part of QSE. This file is part of QSE.
@ -198,6 +198,7 @@ static void* build_rex (
/* ignorecase is a compile option for TRE */ /* ignorecase is a compile option for TRE */
if (ignorecase) opt |= QSE_TRE_IGNORECASE; if (ignorecase) opt |= QSE_TRE_IGNORECASE;
if (sed->option & QSE_SED_EXTENDEDREX) opt |= QSE_TRE_EXTENDED; if (sed->option & QSE_SED_EXTENDEDREX) opt |= QSE_TRE_EXTENDED;
if (sed->option & QSE_SED_NONSTDEXTREX) opt |= QSE_TRE_NONSTDEXT;
if (qse_tre_compx (tre, str->ptr, str->len, QSE_NULL, opt) <= -1) if (qse_tre_compx (tre, str->ptr, str->len, QSE_NULL, opt) <= -1)
{ {
@ -230,7 +231,7 @@ static int matchtre (
qse_cstr_t submat[9], const qse_sed_loc_t* loc) qse_cstr_t submat[9], const qse_sed_loc_t* loc)
{ {
int n; int n;
qse_tre_match_t match[10]; qse_tre_match_t match[10] = { { 0, 0 }, };
n = qse_tre_execx (tre, str->ptr, str->len, match, 10, opt); n = qse_tre_execx (tre, str->ptr, str->len, match, 10, opt);
if (n <= -1) if (n <= -1)
@ -258,7 +259,7 @@ static int matchtre (
/* you must intialize submat before you pass into this /* you must intialize submat before you pass into this
* function because it can abort filling */ * function because it can abort filling */
for (i = 1; i <= 10; i++) for (i = 1; i < QSE_COUNTOF(match); i++)
{ {
if (match[i].rm_so == -1) break; if (match[i].rm_so == -1) break;
submat[i-1].ptr = &str->ptr[match[i].rm_so]; submat[i-1].ptr = &str->ptr[match[i].rm_so];
@ -569,6 +570,8 @@ static QSE_INLINE void* compile_rex_address (qse_sed_t* sed, qse_char_t rxend)
{ {
if (pickup_rex (sed, rxend, 1, QSE_NULL, &sed->tmp.rex) <= -1) return QSE_NULL; if (pickup_rex (sed, rxend, 1, QSE_NULL, &sed->tmp.rex) <= -1) return QSE_NULL;
/* TODO: support ignore case option for address */ /* TODO: support ignore case option for address */
if (QSE_STR_LEN(&sed->tmp.rex) <= 0) return EMPTY_REX;
return build_rex (sed, QSE_STR_CSTR(&sed->tmp.rex), 0, &sed->src.loc); return build_rex (sed, QSE_STR_CSTR(&sed->tmp.rex), 0, &sed->src.loc);
} }
@ -592,11 +595,6 @@ static qse_sed_adr_t* get_address (qse_sed_t* sed, qse_sed_adr_t* a)
} }
while ((c = CURSC(sed)) >= QSE_T('0') && c <= QSE_T('9')); while ((c = CURSC(sed)) >= QSE_T('0') && c <= QSE_T('9'));
#if 0
/* line number 0 is illegal */
if (lno == 0) return QSE_NULL;
#endif
a->type = QSE_SED_ADR_LINE; a->type = QSE_SED_ADR_LINE;
a->u.lno = lno; a->u.lno = lno;
} }
@ -985,8 +983,8 @@ static int get_subst (qse_sed_t* sed, qse_sed_cmd_t* cmd)
if (pickup_rex (sed, delim, 1, cmd, t[0]) <= -1) goto oops; if (pickup_rex (sed, delim, 1, cmd, t[0]) <= -1) goto oops;
if (pickup_rex (sed, delim, 0, cmd, t[1]) <= -1) goto oops; if (pickup_rex (sed, delim, 0, cmd, t[1]) <= -1) goto oops;
#if 0 #if 0
/* calling pickup_rex twice above instead of commenting out this part */
for (i = 0; i < 2; i++) for (i = 0; i < 2; i++)
{ {
c = NXTSC (sed); c = NXTSC (sed);
@ -2517,17 +2515,13 @@ static qse_sed_cmd_t* exec_cmd (qse_sed_t* sed, qse_sed_cmd_t* cmd)
break; break;
case QSE_SED_CMD_QUIT: case QSE_SED_CMD_QUIT:
if (!(sed->option && QSE_SED_QUIET))
{
n = write_str (sed,
QSE_STR_PTR(&sed->e.in.line),
QSE_STR_LEN(&sed->e.in.line));
if (n <= -1) return QSE_NULL;
}
case QSE_SED_CMD_QUIT_QUIET:
jumpto = &sed->cmd.quit; jumpto = &sed->cmd.quit;
break; break;
case QSE_SED_CMD_QUIT_QUIET:
jumpto = &sed->cmd.quit_quiet;
break;
case QSE_SED_CMD_APPEND: case QSE_SED_CMD_APPEND:
if (qse_lda_insert ( if (qse_lda_insert (
&sed->e.txt.appended, &sed->e.txt.appended,
@ -3052,6 +3046,7 @@ int qse_sed_exec (qse_sed_t* sed, qse_sed_io_fun_t inf, qse_sed_io_fun_t outf)
while (1) while (1)
{ {
qse_size_t i; qse_size_t i;
int quit = 0;
n = read_line (sed, 0); n = read_line (sed, 0);
if (n <= -1) { ret = -1; goto done; } if (n <= -1) { ret = -1; goto done; }
@ -3081,7 +3076,8 @@ int qse_sed_exec (qse_sed_t* sed, qse_sed_io_fun_t inf, qse_sed_io_fun_t outf)
j = exec_cmd (sed, c); j = exec_cmd (sed, c);
if (j == QSE_NULL) { ret = -1; goto done; } if (j == QSE_NULL) { ret = -1; goto done; }
if (j == &sed->cmd.quit) goto done; if (j == &sed->cmd.quit_quiet) goto done;
if (j == &sed->cmd.quit) { quit = 1; break; }
if (j == &sed->cmd.again) goto again; if (j == &sed->cmd.again) goto again;
/* go to the next command */ /* go to the next command */
@ -3118,6 +3114,8 @@ int qse_sed_exec (qse_sed_t* sed, qse_sed_io_fun_t inf, qse_sed_io_fun_t outf)
* in write functions */ * in write functions */
n = flush (sed); n = flush (sed);
if (n <= -1) { ret = -1; goto done; } if (n <= -1) { ret = -1; goto done; }
if (quit) break;
} }
done: done:

View File

@ -1,5 +1,5 @@
/* /*
* $Id: sed.h 560 2011-09-06 14:18:36Z hyunghwan.chung $ * $Id: sed.h 563 2011-09-08 07:49:53Z hyunghwan.chung $
* *
Copyright 2006-2011 Chung, Hyung-Hwan. Copyright 2006-2011 Chung, Hyung-Hwan.
This file is part of QSE. This file is part of QSE.
@ -215,6 +215,7 @@ struct qse_sed_t
qse_sed_cmd_blk_t* lb; /**< points to the last block */ qse_sed_cmd_blk_t* lb; /**< points to the last block */
qse_sed_cmd_t quit; qse_sed_cmd_t quit;
qse_sed_cmd_t quit_quiet;
qse_sed_cmd_t again; qse_sed_cmd_t again;
qse_sed_cmd_t over; qse_sed_cmd_t over;
} cmd; } cmd;