added QSE_TRE_NONSTDEXT
fixed a bug of not printing pattern space properly after 'q'
This commit is contained in:
parent
92b4c29551
commit
3db2c566a2
@ -139,6 +139,7 @@ static void print_usage (QSE_FILE* out, int argc, qse_char_t* argv[])
|
|||||||
qse_fprintf (out, QSE_T(" -n disable auto-print\n"));
|
qse_fprintf (out, QSE_T(" -n disable auto-print\n"));
|
||||||
qse_fprintf (out, QSE_T(" -f file specify a script file\n"));
|
qse_fprintf (out, QSE_T(" -f file specify a script file\n"));
|
||||||
qse_fprintf (out, QSE_T(" -r use the extended regular expression\n"));
|
qse_fprintf (out, QSE_T(" -r use the extended regular expression\n"));
|
||||||
|
qse_fprintf (out, QSE_T(" -R enable non-standard extensions to the regular expression\n"));
|
||||||
qse_fprintf (out, QSE_T(" -a perform strict address check\n"));
|
qse_fprintf (out, QSE_T(" -a perform strict address check\n"));
|
||||||
qse_fprintf (out, QSE_T(" -w allow address format of start~step\n"));
|
qse_fprintf (out, QSE_T(" -w allow address format of start~step\n"));
|
||||||
qse_fprintf (out, QSE_T(" -x allow text on the same line as c, a, i\n"));
|
qse_fprintf (out, QSE_T(" -x allow text on the same line as c, a, i\n"));
|
||||||
@ -150,7 +151,7 @@ static int handle_args (int argc, qse_char_t* argv[])
|
|||||||
{
|
{
|
||||||
static qse_opt_t opt =
|
static qse_opt_t opt =
|
||||||
{
|
{
|
||||||
QSE_T("hnf:rawxym:"),
|
QSE_T("hnf:rRawxym:"),
|
||||||
QSE_NULL
|
QSE_NULL
|
||||||
};
|
};
|
||||||
qse_cint_t c;
|
qse_cint_t c;
|
||||||
@ -195,6 +196,10 @@ static int handle_args (int argc, qse_char_t* argv[])
|
|||||||
g_option |= QSE_SED_EXTENDEDREX;
|
g_option |= QSE_SED_EXTENDEDREX;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
case QSE_T('R'):
|
||||||
|
g_option |= QSE_SED_NONSTDEXTREX;
|
||||||
|
break;
|
||||||
|
|
||||||
case QSE_T('a'):
|
case QSE_T('a'):
|
||||||
g_option |= QSE_SED_STRICT;
|
g_option |= QSE_SED_STRICT;
|
||||||
break;
|
break;
|
||||||
|
@ -72,7 +72,14 @@ enum qse_tre_cflag_t
|
|||||||
QSE_TRE_NOSUBREG = (1 << 3),
|
QSE_TRE_NOSUBREG = (1 << 3),
|
||||||
QSE_TRE_LITERAL = (1 << 4),
|
QSE_TRE_LITERAL = (1 << 4),
|
||||||
QSE_TRE_RIGHTASSOC = (1 << 5),
|
QSE_TRE_RIGHTASSOC = (1 << 5),
|
||||||
QSE_TRE_UNGREEDY = (1 << 6)
|
QSE_TRE_UNGREEDY = (1 << 6),
|
||||||
|
|
||||||
|
/* Enable non-standard extensions:
|
||||||
|
* - Enable (?:text) for no submatch backreference.
|
||||||
|
* - Enable perl-like (?...) extensions like (?i)
|
||||||
|
* if QSE_TRE_EXTENDED is also set.
|
||||||
|
*/
|
||||||
|
QSE_TRE_NONSTDEXT = (1 << 7)
|
||||||
};
|
};
|
||||||
|
|
||||||
enum qse_tre_eflag_t
|
enum qse_tre_eflag_t
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* $Id: sed.h 562 2011-09-07 15:36:08Z hyunghwan.chung $
|
* $Id: sed.h 563 2011-09-08 07:49:53Z hyunghwan.chung $
|
||||||
*
|
*
|
||||||
Copyright 2006-2011 Chung, Hyung-Hwan.
|
Copyright 2006-2011 Chung, Hyung-Hwan.
|
||||||
This file is part of QSE.
|
This file is part of QSE.
|
||||||
@ -133,14 +133,15 @@ typedef const qse_char_t* (*qse_sed_errstr_t) (
|
|||||||
*/
|
*/
|
||||||
enum qse_sed_option_t
|
enum qse_sed_option_t
|
||||||
{
|
{
|
||||||
QSE_SED_STRIPLS = (1 << 0), /**< strip leading spaces from text */
|
QSE_SED_STRIPLS = (1 << 0), /**< strip leading spaces from text */
|
||||||
QSE_SED_KEEPTBS = (1 << 1), /**< keep an trailing backslash */
|
QSE_SED_KEEPTBS = (1 << 1), /**< keep an trailing backslash */
|
||||||
QSE_SED_ENSURENL = (1 << 2), /**< ensure NL at the text end */
|
QSE_SED_ENSURENL = (1 << 2), /**< ensure NL at the text end */
|
||||||
QSE_SED_QUIET = (1 << 3), /**< do not print pattern space */
|
QSE_SED_QUIET = (1 << 3), /**< do not print pattern space */
|
||||||
QSE_SED_STRICT = (1 << 4), /**< do strict address check */
|
QSE_SED_STRICT = (1 << 4), /**< do strict address check */
|
||||||
QSE_SED_STARTSTEP = (1 << 5), /**< allow start~step */
|
QSE_SED_STARTSTEP = (1 << 5), /**< allow start~step */
|
||||||
QSE_SED_EXTENDEDREX = (1 << 6), /**< allow {n,m} in regular expression */
|
QSE_SED_EXTENDEDREX = (1 << 6), /**< use extended regex */
|
||||||
QSE_SED_SAMELINE = (1 << 7), /**< allow text on the same line as c, a, i */
|
QSE_SED_NONSTDEXTREX = (1 << 7), /**< enable non-standard extensions to regex */
|
||||||
|
QSE_SED_SAMELINE = (1 << 8), /**< allow text on the same line as c, a, i */
|
||||||
};
|
};
|
||||||
typedef enum qse_sed_option_t qse_sed_option_t;
|
typedef enum qse_sed_option_t qse_sed_option_t;
|
||||||
|
|
||||||
|
@ -1084,6 +1084,9 @@ tre_parse(tre_parse_ctx_t *ctx)
|
|||||||
break;
|
break;
|
||||||
/*FALLTHROUGH*/
|
/*FALLTHROUGH*/
|
||||||
case CHAR_STAR:
|
case CHAR_STAR:
|
||||||
|
/* QSE - added this label */
|
||||||
|
parse_star:
|
||||||
|
/* END QSE */
|
||||||
{
|
{
|
||||||
tre_ast_node_t *tmp_node;
|
tre_ast_node_t *tmp_node;
|
||||||
int minimal = (ctx->cflags & REG_UNGREEDY) ? 1 : 0;
|
int minimal = (ctx->cflags & REG_UNGREEDY) ? 1 : 0;
|
||||||
@ -1093,9 +1096,9 @@ tre_parse(tre_parse_ctx_t *ctx)
|
|||||||
const tre_char_t *tmp_re;
|
const tre_char_t *tmp_re;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
if (*ctx->re == CHAR_PLUS)
|
if (*ctx->re == CHAR_PLUS) /* QSE: case CHAR_PLUS fell through down here */
|
||||||
rep_min = 1;
|
rep_min = 1;
|
||||||
if (*ctx->re == CHAR_QUESTIONMARK)
|
if (*ctx->re == CHAR_QUESTIONMARK) /* QSE: case CHAR_QUESTIONMARK fell though down here */
|
||||||
rep_max = 1;
|
rep_max = 1;
|
||||||
#ifdef TRE_DEBUG
|
#ifdef TRE_DEBUG
|
||||||
tmp_re = ctx->re;
|
tmp_re = ctx->re;
|
||||||
@ -1103,7 +1106,7 @@ tre_parse(tre_parse_ctx_t *ctx)
|
|||||||
|
|
||||||
if (ctx->re + 1 < ctx->re_end)
|
if (ctx->re + 1 < ctx->re_end)
|
||||||
{
|
{
|
||||||
if (*(ctx->re + 1) == CHAR_QUESTIONMARK)
|
if (*(ctx->re + 1) == CHAR_QUESTIONMARK) /* QSE: +?, ??, *? */
|
||||||
{
|
{
|
||||||
minimal = !(ctx->cflags & REG_UNGREEDY);
|
minimal = !(ctx->cflags & REG_UNGREEDY);
|
||||||
ctx->re++;
|
ctx->re++;
|
||||||
@ -1136,6 +1139,8 @@ tre_parse(tre_parse_ctx_t *ctx)
|
|||||||
|
|
||||||
case CHAR_BACKSLASH:
|
case CHAR_BACKSLASH:
|
||||||
/* "\{" is special without REG_EXTENDED */
|
/* "\{" is special without REG_EXTENDED */
|
||||||
|
/* QSE - also handle \+ and \? */
|
||||||
|
/*
|
||||||
if (!(ctx->cflags & REG_EXTENDED)
|
if (!(ctx->cflags & REG_EXTENDED)
|
||||||
&& ctx->re + 1 < ctx->re_end
|
&& ctx->re + 1 < ctx->re_end
|
||||||
&& *(ctx->re + 1) == CHAR_LBRACE)
|
&& *(ctx->re + 1) == CHAR_LBRACE)
|
||||||
@ -1145,6 +1150,24 @@ tre_parse(tre_parse_ctx_t *ctx)
|
|||||||
}
|
}
|
||||||
else
|
else
|
||||||
break;
|
break;
|
||||||
|
*/
|
||||||
|
if (!(ctx->cflags & REG_EXTENDED) && ctx->re + 1 < ctx->re_end)
|
||||||
|
{
|
||||||
|
if (*(ctx->re + 1) == CHAR_LBRACE)
|
||||||
|
{
|
||||||
|
ctx->re++;
|
||||||
|
goto parse_brace;
|
||||||
|
}
|
||||||
|
else if (*(ctx->re + 1) == CHAR_PLUS ||
|
||||||
|
*(ctx->re + 1) == CHAR_QUESTIONMARK)
|
||||||
|
{
|
||||||
|
ctx->re++;
|
||||||
|
goto parse_star;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else break;
|
||||||
|
/* END QSE */
|
||||||
|
|
||||||
|
|
||||||
case CHAR_LBRACE:
|
case CHAR_LBRACE:
|
||||||
/* "{" is literal without REG_EXTENDED */
|
/* "{" is literal without REG_EXTENDED */
|
||||||
@ -1184,8 +1207,10 @@ parse_brace:
|
|||||||
|
|
||||||
/* Handle "(?...)" extensions. They work in a way similar
|
/* Handle "(?...)" extensions. They work in a way similar
|
||||||
to Perls corresponding extensions. */
|
to Perls corresponding extensions. */
|
||||||
if (ctx->cflags & REG_EXTENDED
|
/* QSE: added ctx->cflags & REG_NONSTDEXT */
|
||||||
&& *(ctx->re + 1) == CHAR_QUESTIONMARK)
|
if ((ctx->cflags & REG_NONSTDEXT) &&
|
||||||
|
(ctx->cflags & REG_EXTENDED) &&
|
||||||
|
*(ctx->re + 1) == CHAR_QUESTIONMARK)
|
||||||
{
|
{
|
||||||
int new_cflags = ctx->cflags;
|
int new_cflags = ctx->cflags;
|
||||||
int bit = 1;
|
int bit = 1;
|
||||||
@ -1293,10 +1318,13 @@ parse_brace:
|
|||||||
&& *(ctx->re - 1) == CHAR_BACKSLASH))
|
&& *(ctx->re - 1) == CHAR_BACKSLASH))
|
||||||
{
|
{
|
||||||
depth++;
|
depth++;
|
||||||
if (ctx->re + 2 < ctx->re_end
|
/* QSE: added ctx->cflags & REG_NONSTDEXT */
|
||||||
&& *(ctx->re + 1) == CHAR_QUESTIONMARK
|
if ((ctx->cflags & REG_NONSTDEXT) &&
|
||||||
&& *(ctx->re + 2) == CHAR_COLON)
|
ctx->re + 2 < ctx->re_end &&
|
||||||
|
*(ctx->re + 1) == CHAR_QUESTIONMARK &&
|
||||||
|
*(ctx->re + 2) == CHAR_COLON)
|
||||||
{
|
{
|
||||||
|
/* QSE: \(?: or (?: depending on REG_EXTENDED */
|
||||||
DPRINT(("tre_parse: group begin: '%.*" STRF
|
DPRINT(("tre_parse: group begin: '%.*" STRF
|
||||||
"', no submatch\n", REST(ctx->re)));
|
"', no submatch\n", REST(ctx->re)));
|
||||||
/* Don't mark for submatching. */
|
/* Don't mark for submatching. */
|
||||||
|
@ -133,6 +133,7 @@ typedef qse_cint_t tre_cint_t;
|
|||||||
#define REG_LITERAL QSE_TRE_LITERAL
|
#define REG_LITERAL QSE_TRE_LITERAL
|
||||||
#define REG_RIGHT_ASSOC QSE_TRE_RIGHTASSOC
|
#define REG_RIGHT_ASSOC QSE_TRE_RIGHTASSOC
|
||||||
#define REG_UNGREEDY QSE_TRE_UNGREEDY
|
#define REG_UNGREEDY QSE_TRE_UNGREEDY
|
||||||
|
#define REG_NONSTDEXT QSE_TRE_NONSTDEXT
|
||||||
|
|
||||||
/* POSIX tre_regexec() flags. */
|
/* POSIX tre_regexec() flags. */
|
||||||
#define REG_NOTBOL QSE_TRE_NOTBOL
|
#define REG_NOTBOL QSE_TRE_NOTBOL
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* $Id: sed.c 562 2011-09-07 15:36:08Z hyunghwan.chung $
|
* $Id: sed.c 563 2011-09-08 07:49:53Z hyunghwan.chung $
|
||||||
*
|
*
|
||||||
Copyright 2006-2011 Chung, Hyung-Hwan.
|
Copyright 2006-2011 Chung, Hyung-Hwan.
|
||||||
This file is part of QSE.
|
This file is part of QSE.
|
||||||
@ -198,6 +198,7 @@ static void* build_rex (
|
|||||||
/* ignorecase is a compile option for TRE */
|
/* ignorecase is a compile option for TRE */
|
||||||
if (ignorecase) opt |= QSE_TRE_IGNORECASE;
|
if (ignorecase) opt |= QSE_TRE_IGNORECASE;
|
||||||
if (sed->option & QSE_SED_EXTENDEDREX) opt |= QSE_TRE_EXTENDED;
|
if (sed->option & QSE_SED_EXTENDEDREX) opt |= QSE_TRE_EXTENDED;
|
||||||
|
if (sed->option & QSE_SED_NONSTDEXTREX) opt |= QSE_TRE_NONSTDEXT;
|
||||||
|
|
||||||
if (qse_tre_compx (tre, str->ptr, str->len, QSE_NULL, opt) <= -1)
|
if (qse_tre_compx (tre, str->ptr, str->len, QSE_NULL, opt) <= -1)
|
||||||
{
|
{
|
||||||
@ -230,7 +231,7 @@ static int matchtre (
|
|||||||
qse_cstr_t submat[9], const qse_sed_loc_t* loc)
|
qse_cstr_t submat[9], const qse_sed_loc_t* loc)
|
||||||
{
|
{
|
||||||
int n;
|
int n;
|
||||||
qse_tre_match_t match[10];
|
qse_tre_match_t match[10] = { { 0, 0 }, };
|
||||||
|
|
||||||
n = qse_tre_execx (tre, str->ptr, str->len, match, 10, opt);
|
n = qse_tre_execx (tre, str->ptr, str->len, match, 10, opt);
|
||||||
if (n <= -1)
|
if (n <= -1)
|
||||||
@ -258,7 +259,7 @@ static int matchtre (
|
|||||||
|
|
||||||
/* you must intialize submat before you pass into this
|
/* you must intialize submat before you pass into this
|
||||||
* function because it can abort filling */
|
* function because it can abort filling */
|
||||||
for (i = 1; i <= 10; i++)
|
for (i = 1; i < QSE_COUNTOF(match); i++)
|
||||||
{
|
{
|
||||||
if (match[i].rm_so == -1) break;
|
if (match[i].rm_so == -1) break;
|
||||||
submat[i-1].ptr = &str->ptr[match[i].rm_so];
|
submat[i-1].ptr = &str->ptr[match[i].rm_so];
|
||||||
@ -569,6 +570,8 @@ static QSE_INLINE void* compile_rex_address (qse_sed_t* sed, qse_char_t rxend)
|
|||||||
{
|
{
|
||||||
if (pickup_rex (sed, rxend, 1, QSE_NULL, &sed->tmp.rex) <= -1) return QSE_NULL;
|
if (pickup_rex (sed, rxend, 1, QSE_NULL, &sed->tmp.rex) <= -1) return QSE_NULL;
|
||||||
/* TODO: support ignore case option for address */
|
/* TODO: support ignore case option for address */
|
||||||
|
|
||||||
|
if (QSE_STR_LEN(&sed->tmp.rex) <= 0) return EMPTY_REX;
|
||||||
return build_rex (sed, QSE_STR_CSTR(&sed->tmp.rex), 0, &sed->src.loc);
|
return build_rex (sed, QSE_STR_CSTR(&sed->tmp.rex), 0, &sed->src.loc);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -592,11 +595,6 @@ static qse_sed_adr_t* get_address (qse_sed_t* sed, qse_sed_adr_t* a)
|
|||||||
}
|
}
|
||||||
while ((c = CURSC(sed)) >= QSE_T('0') && c <= QSE_T('9'));
|
while ((c = CURSC(sed)) >= QSE_T('0') && c <= QSE_T('9'));
|
||||||
|
|
||||||
#if 0
|
|
||||||
/* line number 0 is illegal */
|
|
||||||
if (lno == 0) return QSE_NULL;
|
|
||||||
#endif
|
|
||||||
|
|
||||||
a->type = QSE_SED_ADR_LINE;
|
a->type = QSE_SED_ADR_LINE;
|
||||||
a->u.lno = lno;
|
a->u.lno = lno;
|
||||||
}
|
}
|
||||||
@ -985,8 +983,8 @@ static int get_subst (qse_sed_t* sed, qse_sed_cmd_t* cmd)
|
|||||||
|
|
||||||
if (pickup_rex (sed, delim, 1, cmd, t[0]) <= -1) goto oops;
|
if (pickup_rex (sed, delim, 1, cmd, t[0]) <= -1) goto oops;
|
||||||
if (pickup_rex (sed, delim, 0, cmd, t[1]) <= -1) goto oops;
|
if (pickup_rex (sed, delim, 0, cmd, t[1]) <= -1) goto oops;
|
||||||
|
|
||||||
#if 0
|
#if 0
|
||||||
|
/* calling pickup_rex twice above instead of commenting out this part */
|
||||||
for (i = 0; i < 2; i++)
|
for (i = 0; i < 2; i++)
|
||||||
{
|
{
|
||||||
c = NXTSC (sed);
|
c = NXTSC (sed);
|
||||||
@ -2517,17 +2515,13 @@ static qse_sed_cmd_t* exec_cmd (qse_sed_t* sed, qse_sed_cmd_t* cmd)
|
|||||||
break;
|
break;
|
||||||
|
|
||||||
case QSE_SED_CMD_QUIT:
|
case QSE_SED_CMD_QUIT:
|
||||||
if (!(sed->option && QSE_SED_QUIET))
|
|
||||||
{
|
|
||||||
n = write_str (sed,
|
|
||||||
QSE_STR_PTR(&sed->e.in.line),
|
|
||||||
QSE_STR_LEN(&sed->e.in.line));
|
|
||||||
if (n <= -1) return QSE_NULL;
|
|
||||||
}
|
|
||||||
case QSE_SED_CMD_QUIT_QUIET:
|
|
||||||
jumpto = &sed->cmd.quit;
|
jumpto = &sed->cmd.quit;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
case QSE_SED_CMD_QUIT_QUIET:
|
||||||
|
jumpto = &sed->cmd.quit_quiet;
|
||||||
|
break;
|
||||||
|
|
||||||
case QSE_SED_CMD_APPEND:
|
case QSE_SED_CMD_APPEND:
|
||||||
if (qse_lda_insert (
|
if (qse_lda_insert (
|
||||||
&sed->e.txt.appended,
|
&sed->e.txt.appended,
|
||||||
@ -3052,6 +3046,7 @@ int qse_sed_exec (qse_sed_t* sed, qse_sed_io_fun_t inf, qse_sed_io_fun_t outf)
|
|||||||
while (1)
|
while (1)
|
||||||
{
|
{
|
||||||
qse_size_t i;
|
qse_size_t i;
|
||||||
|
int quit = 0;
|
||||||
|
|
||||||
n = read_line (sed, 0);
|
n = read_line (sed, 0);
|
||||||
if (n <= -1) { ret = -1; goto done; }
|
if (n <= -1) { ret = -1; goto done; }
|
||||||
@ -3081,7 +3076,8 @@ int qse_sed_exec (qse_sed_t* sed, qse_sed_io_fun_t inf, qse_sed_io_fun_t outf)
|
|||||||
|
|
||||||
j = exec_cmd (sed, c);
|
j = exec_cmd (sed, c);
|
||||||
if (j == QSE_NULL) { ret = -1; goto done; }
|
if (j == QSE_NULL) { ret = -1; goto done; }
|
||||||
if (j == &sed->cmd.quit) goto done;
|
if (j == &sed->cmd.quit_quiet) goto done;
|
||||||
|
if (j == &sed->cmd.quit) { quit = 1; break; }
|
||||||
if (j == &sed->cmd.again) goto again;
|
if (j == &sed->cmd.again) goto again;
|
||||||
|
|
||||||
/* go to the next command */
|
/* go to the next command */
|
||||||
@ -3118,6 +3114,8 @@ int qse_sed_exec (qse_sed_t* sed, qse_sed_io_fun_t inf, qse_sed_io_fun_t outf)
|
|||||||
* in write functions */
|
* in write functions */
|
||||||
n = flush (sed);
|
n = flush (sed);
|
||||||
if (n <= -1) { ret = -1; goto done; }
|
if (n <= -1) { ret = -1; goto done; }
|
||||||
|
|
||||||
|
if (quit) break;
|
||||||
}
|
}
|
||||||
|
|
||||||
done:
|
done:
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* $Id: sed.h 560 2011-09-06 14:18:36Z hyunghwan.chung $
|
* $Id: sed.h 563 2011-09-08 07:49:53Z hyunghwan.chung $
|
||||||
*
|
*
|
||||||
Copyright 2006-2011 Chung, Hyung-Hwan.
|
Copyright 2006-2011 Chung, Hyung-Hwan.
|
||||||
This file is part of QSE.
|
This file is part of QSE.
|
||||||
@ -215,6 +215,7 @@ struct qse_sed_t
|
|||||||
qse_sed_cmd_blk_t* lb; /**< points to the last block */
|
qse_sed_cmd_blk_t* lb; /**< points to the last block */
|
||||||
|
|
||||||
qse_sed_cmd_t quit;
|
qse_sed_cmd_t quit;
|
||||||
|
qse_sed_cmd_t quit_quiet;
|
||||||
qse_sed_cmd_t again;
|
qse_sed_cmd_t again;
|
||||||
qse_sed_cmd_t over;
|
qse_sed_cmd_t over;
|
||||||
} cmd;
|
} cmd;
|
||||||
|
Loading…
Reference in New Issue
Block a user