added QSE_TRE_NONSTDEXT

fixed a bug of not printing pattern space properly after 'q'
This commit is contained in:
hyung-hwan 2011-09-09 01:49:53 +00:00
parent 92b4c29551
commit 3db2c566a2
7 changed files with 79 additions and 38 deletions

View File

@ -139,6 +139,7 @@ static void print_usage (QSE_FILE* out, int argc, qse_char_t* argv[])
qse_fprintf (out, QSE_T(" -n disable auto-print\n"));
qse_fprintf (out, QSE_T(" -f file specify a script file\n"));
qse_fprintf (out, QSE_T(" -r use the extended regular expression\n"));
qse_fprintf (out, QSE_T(" -R enable non-standard extensions to the regular expression\n"));
qse_fprintf (out, QSE_T(" -a perform strict address check\n"));
qse_fprintf (out, QSE_T(" -w allow address format of start~step\n"));
qse_fprintf (out, QSE_T(" -x allow text on the same line as c, a, i\n"));
@ -150,7 +151,7 @@ static int handle_args (int argc, qse_char_t* argv[])
{
static qse_opt_t opt =
{
QSE_T("hnf:rawxym:"),
QSE_T("hnf:rRawxym:"),
QSE_NULL
};
qse_cint_t c;
@ -195,6 +196,10 @@ static int handle_args (int argc, qse_char_t* argv[])
g_option |= QSE_SED_EXTENDEDREX;
break;
case QSE_T('R'):
g_option |= QSE_SED_NONSTDEXTREX;
break;
case QSE_T('a'):
g_option |= QSE_SED_STRICT;
break;

View File

@ -72,7 +72,14 @@ enum qse_tre_cflag_t
QSE_TRE_NOSUBREG = (1 << 3),
QSE_TRE_LITERAL = (1 << 4),
QSE_TRE_RIGHTASSOC = (1 << 5),
QSE_TRE_UNGREEDY = (1 << 6)
QSE_TRE_UNGREEDY = (1 << 6),
/* Enable non-standard extensions:
* - Enable (?:text) for no submatch backreference.
* - Enable perl-like (?...) extensions like (?i)
* if QSE_TRE_EXTENDED is also set.
*/
QSE_TRE_NONSTDEXT = (1 << 7)
};
enum qse_tre_eflag_t

View File

@ -1,5 +1,5 @@
/*
* $Id: sed.h 562 2011-09-07 15:36:08Z hyunghwan.chung $
* $Id: sed.h 563 2011-09-08 07:49:53Z hyunghwan.chung $
*
Copyright 2006-2011 Chung, Hyung-Hwan.
This file is part of QSE.
@ -133,14 +133,15 @@ typedef const qse_char_t* (*qse_sed_errstr_t) (
*/
enum qse_sed_option_t
{
QSE_SED_STRIPLS = (1 << 0), /**< strip leading spaces from text */
QSE_SED_KEEPTBS = (1 << 1), /**< keep an trailing backslash */
QSE_SED_ENSURENL = (1 << 2), /**< ensure NL at the text end */
QSE_SED_QUIET = (1 << 3), /**< do not print pattern space */
QSE_SED_STRICT = (1 << 4), /**< do strict address check */
QSE_SED_STARTSTEP = (1 << 5), /**< allow start~step */
QSE_SED_EXTENDEDREX = (1 << 6), /**< allow {n,m} in regular expression */
QSE_SED_SAMELINE = (1 << 7), /**< allow text on the same line as c, a, i */
QSE_SED_STRIPLS = (1 << 0), /**< strip leading spaces from text */
QSE_SED_KEEPTBS = (1 << 1), /**< keep an trailing backslash */
QSE_SED_ENSURENL = (1 << 2), /**< ensure NL at the text end */
QSE_SED_QUIET = (1 << 3), /**< do not print pattern space */
QSE_SED_STRICT = (1 << 4), /**< do strict address check */
QSE_SED_STARTSTEP = (1 << 5), /**< allow start~step */
QSE_SED_EXTENDEDREX = (1 << 6), /**< use extended regex */
QSE_SED_NONSTDEXTREX = (1 << 7), /**< enable non-standard extensions to regex */
QSE_SED_SAMELINE = (1 << 8), /**< allow text on the same line as c, a, i */
};
typedef enum qse_sed_option_t qse_sed_option_t;

View File

@ -1084,6 +1084,9 @@ tre_parse(tre_parse_ctx_t *ctx)
break;
/*FALLTHROUGH*/
case CHAR_STAR:
/* QSE - added this label */
parse_star:
/* END QSE */
{
tre_ast_node_t *tmp_node;
int minimal = (ctx->cflags & REG_UNGREEDY) ? 1 : 0;
@ -1093,9 +1096,9 @@ tre_parse(tre_parse_ctx_t *ctx)
const tre_char_t *tmp_re;
#endif
if (*ctx->re == CHAR_PLUS)
if (*ctx->re == CHAR_PLUS) /* QSE: case CHAR_PLUS fell through down here */
rep_min = 1;
if (*ctx->re == CHAR_QUESTIONMARK)
if (*ctx->re == CHAR_QUESTIONMARK) /* QSE: case CHAR_QUESTIONMARK fell though down here */
rep_max = 1;
#ifdef TRE_DEBUG
tmp_re = ctx->re;
@ -1103,7 +1106,7 @@ tre_parse(tre_parse_ctx_t *ctx)
if (ctx->re + 1 < ctx->re_end)
{
if (*(ctx->re + 1) == CHAR_QUESTIONMARK)
if (*(ctx->re + 1) == CHAR_QUESTIONMARK) /* QSE: +?, ??, *? */
{
minimal = !(ctx->cflags & REG_UNGREEDY);
ctx->re++;
@ -1136,6 +1139,8 @@ tre_parse(tre_parse_ctx_t *ctx)
case CHAR_BACKSLASH:
/* "\{" is special without REG_EXTENDED */
/* QSE - also handle \+ and \? */
/*
if (!(ctx->cflags & REG_EXTENDED)
&& ctx->re + 1 < ctx->re_end
&& *(ctx->re + 1) == CHAR_LBRACE)
@ -1145,6 +1150,24 @@ tre_parse(tre_parse_ctx_t *ctx)
}
else
break;
*/
if (!(ctx->cflags & REG_EXTENDED) && ctx->re + 1 < ctx->re_end)
{
if (*(ctx->re + 1) == CHAR_LBRACE)
{
ctx->re++;
goto parse_brace;
}
else if (*(ctx->re + 1) == CHAR_PLUS ||
*(ctx->re + 1) == CHAR_QUESTIONMARK)
{
ctx->re++;
goto parse_star;
}
}
else break;
/* END QSE */
case CHAR_LBRACE:
/* "{" is literal without REG_EXTENDED */
@ -1184,8 +1207,10 @@ parse_brace:
/* Handle "(?...)" extensions. They work in a way similar
to Perls corresponding extensions. */
if (ctx->cflags & REG_EXTENDED
&& *(ctx->re + 1) == CHAR_QUESTIONMARK)
/* QSE: added ctx->cflags & REG_NONSTDEXT */
if ((ctx->cflags & REG_NONSTDEXT) &&
(ctx->cflags & REG_EXTENDED) &&
*(ctx->re + 1) == CHAR_QUESTIONMARK)
{
int new_cflags = ctx->cflags;
int bit = 1;
@ -1293,10 +1318,13 @@ parse_brace:
&& *(ctx->re - 1) == CHAR_BACKSLASH))
{
depth++;
if (ctx->re + 2 < ctx->re_end
&& *(ctx->re + 1) == CHAR_QUESTIONMARK
&& *(ctx->re + 2) == CHAR_COLON)
/* QSE: added ctx->cflags & REG_NONSTDEXT */
if ((ctx->cflags & REG_NONSTDEXT) &&
ctx->re + 2 < ctx->re_end &&
*(ctx->re + 1) == CHAR_QUESTIONMARK &&
*(ctx->re + 2) == CHAR_COLON)
{
/* QSE: \(?: or (?: depending on REG_EXTENDED */
DPRINT(("tre_parse: group begin: '%.*" STRF
"', no submatch\n", REST(ctx->re)));
/* Don't mark for submatching. */

View File

@ -133,6 +133,7 @@ typedef qse_cint_t tre_cint_t;
#define REG_LITERAL QSE_TRE_LITERAL
#define REG_RIGHT_ASSOC QSE_TRE_RIGHTASSOC
#define REG_UNGREEDY QSE_TRE_UNGREEDY
#define REG_NONSTDEXT QSE_TRE_NONSTDEXT
/* POSIX tre_regexec() flags. */
#define REG_NOTBOL QSE_TRE_NOTBOL

View File

@ -1,5 +1,5 @@
/*
* $Id: sed.c 562 2011-09-07 15:36:08Z hyunghwan.chung $
* $Id: sed.c 563 2011-09-08 07:49:53Z hyunghwan.chung $
*
Copyright 2006-2011 Chung, Hyung-Hwan.
This file is part of QSE.
@ -198,6 +198,7 @@ static void* build_rex (
/* ignorecase is a compile option for TRE */
if (ignorecase) opt |= QSE_TRE_IGNORECASE;
if (sed->option & QSE_SED_EXTENDEDREX) opt |= QSE_TRE_EXTENDED;
if (sed->option & QSE_SED_NONSTDEXTREX) opt |= QSE_TRE_NONSTDEXT;
if (qse_tre_compx (tre, str->ptr, str->len, QSE_NULL, opt) <= -1)
{
@ -230,7 +231,7 @@ static int matchtre (
qse_cstr_t submat[9], const qse_sed_loc_t* loc)
{
int n;
qse_tre_match_t match[10];
qse_tre_match_t match[10] = { { 0, 0 }, };
n = qse_tre_execx (tre, str->ptr, str->len, match, 10, opt);
if (n <= -1)
@ -258,7 +259,7 @@ static int matchtre (
/* you must intialize submat before you pass into this
* function because it can abort filling */
for (i = 1; i <= 10; i++)
for (i = 1; i < QSE_COUNTOF(match); i++)
{
if (match[i].rm_so == -1) break;
submat[i-1].ptr = &str->ptr[match[i].rm_so];
@ -569,6 +570,8 @@ static QSE_INLINE void* compile_rex_address (qse_sed_t* sed, qse_char_t rxend)
{
if (pickup_rex (sed, rxend, 1, QSE_NULL, &sed->tmp.rex) <= -1) return QSE_NULL;
/* TODO: support ignore case option for address */
if (QSE_STR_LEN(&sed->tmp.rex) <= 0) return EMPTY_REX;
return build_rex (sed, QSE_STR_CSTR(&sed->tmp.rex), 0, &sed->src.loc);
}
@ -592,11 +595,6 @@ static qse_sed_adr_t* get_address (qse_sed_t* sed, qse_sed_adr_t* a)
}
while ((c = CURSC(sed)) >= QSE_T('0') && c <= QSE_T('9'));
#if 0
/* line number 0 is illegal */
if (lno == 0) return QSE_NULL;
#endif
a->type = QSE_SED_ADR_LINE;
a->u.lno = lno;
}
@ -985,8 +983,8 @@ static int get_subst (qse_sed_t* sed, qse_sed_cmd_t* cmd)
if (pickup_rex (sed, delim, 1, cmd, t[0]) <= -1) goto oops;
if (pickup_rex (sed, delim, 0, cmd, t[1]) <= -1) goto oops;
#if 0
/* calling pickup_rex twice above instead of commenting out this part */
for (i = 0; i < 2; i++)
{
c = NXTSC (sed);
@ -2517,17 +2515,13 @@ static qse_sed_cmd_t* exec_cmd (qse_sed_t* sed, qse_sed_cmd_t* cmd)
break;
case QSE_SED_CMD_QUIT:
if (!(sed->option && QSE_SED_QUIET))
{
n = write_str (sed,
QSE_STR_PTR(&sed->e.in.line),
QSE_STR_LEN(&sed->e.in.line));
if (n <= -1) return QSE_NULL;
}
case QSE_SED_CMD_QUIT_QUIET:
jumpto = &sed->cmd.quit;
break;
case QSE_SED_CMD_QUIT_QUIET:
jumpto = &sed->cmd.quit_quiet;
break;
case QSE_SED_CMD_APPEND:
if (qse_lda_insert (
&sed->e.txt.appended,
@ -3052,6 +3046,7 @@ int qse_sed_exec (qse_sed_t* sed, qse_sed_io_fun_t inf, qse_sed_io_fun_t outf)
while (1)
{
qse_size_t i;
int quit = 0;
n = read_line (sed, 0);
if (n <= -1) { ret = -1; goto done; }
@ -3081,7 +3076,8 @@ int qse_sed_exec (qse_sed_t* sed, qse_sed_io_fun_t inf, qse_sed_io_fun_t outf)
j = exec_cmd (sed, c);
if (j == QSE_NULL) { ret = -1; goto done; }
if (j == &sed->cmd.quit) goto done;
if (j == &sed->cmd.quit_quiet) goto done;
if (j == &sed->cmd.quit) { quit = 1; break; }
if (j == &sed->cmd.again) goto again;
/* go to the next command */
@ -3118,6 +3114,8 @@ int qse_sed_exec (qse_sed_t* sed, qse_sed_io_fun_t inf, qse_sed_io_fun_t outf)
* in write functions */
n = flush (sed);
if (n <= -1) { ret = -1; goto done; }
if (quit) break;
}
done:

View File

@ -1,5 +1,5 @@
/*
* $Id: sed.h 560 2011-09-06 14:18:36Z hyunghwan.chung $
* $Id: sed.h 563 2011-09-08 07:49:53Z hyunghwan.chung $
*
Copyright 2006-2011 Chung, Hyung-Hwan.
This file is part of QSE.
@ -215,6 +215,7 @@ struct qse_sed_t
qse_sed_cmd_blk_t* lb; /**< points to the last block */
qse_sed_cmd_t quit;
qse_sed_cmd_t quit_quiet;
qse_sed_cmd_t again;
qse_sed_cmd_t over;
} cmd;