From 516031f17f521aa49161beb6d23dd7d59fea4f08 Mon Sep 17 00:00:00 2001 From: hyung-hwan Date: Wed, 21 Sep 2011 23:10:09 +0000 Subject: [PATCH] added escape sequences \xnn and \Xnnnnnnnn. added 'z' command. added 'I' address modifier --- qse/lib/sed/sed.c | 118 ++++++++++++++++++++++++++++++++++++++++------ qse/lib/sed/sed.h | 3 +- 2 files changed, 105 insertions(+), 16 deletions(-) diff --git a/qse/lib/sed/sed.c b/qse/lib/sed/sed.c index 0c02913d..1e40cb14 100644 --- a/qse/lib/sed/sed.c +++ b/qse/lib/sed/sed.c @@ -1,5 +1,5 @@ /* - * $Id: sed.c 570 2011-09-20 04:40:45Z hyunghwan.chung $ + * $Id: sed.c 572 2011-09-21 05:10:09Z hyunghwan.chung $ * Copyright 2006-2011 Chung, Hyung-Hwan. This file is part of QSE. @@ -346,6 +346,7 @@ static int matchtre ( #define CURSC(sed) ((sed)->src.cc) #define NXTSC(sed) getnextsc(sed) +#define PEEPNXTSC(sed) ((sed->src.cur < sed->src.end)? *sed->src.cur: QSE_CHAR_EOF) static qse_cint_t getnextsc (qse_sed_t* sed) { @@ -472,8 +473,18 @@ static void free_command (qse_sed_t* sed, qse_sed_cmd_t* cmd) } } -static qse_cint_t trans_escaped (qse_cint_t c) + +static QSE_INLINE int xdigit_to_num (qse_cint_t c) { + return (c >= QSE_T('0') && c <= QSE_T('9'))? (c - QSE_T('0')): + (c >= QSE_T('A') && c <= QSE_T('F'))? (c - QSE_T('A') + 10): + (c >= QSE_T('a') && c <= QSE_T('f'))? (c - QSE_T('a') + 10): -1; +} + +static qse_cint_t trans_escaped (qse_sed_t* sed, qse_cint_t c, int* xamp) +{ + if (xamp) *xamp = 0; + switch (c) { case QSE_T('a'): @@ -500,6 +511,51 @@ Omitted for clash with regular expression \b. case QSE_T('v'): c = QSE_T('\v'); break; + + case QSE_T('x'): + { + /* \xnn */ + int cc; + + cc = xdigit_to_num(PEEPNXTSC(sed)); + if (cc <= -1) break; + NXTSC(sed); + c = cc; + + cc = xdigit_to_num(PEEPNXTSC(sed)); + if (cc <= -1) break; + NXTSC(sed); + c = (c << 4) | cc; + + /* let's indicate that '&' is built from \x26. */ + if (xamp && c == QSE_T('&')) *xamp = 1; + break; + } + +#ifdef QSE_CHAR_IS_WCHAR + case QSE_T('X'): + { + /* \Xnnnnnnnn for wchar_t */ + int cc, i; + + cc = xdigit_to_num(PEEPNXTSC(sed)); + if (cc <= -1) break; + NXTSC(sed); + c = cc; + + for (i = 0; i < QSE_SIZEOF(qse_char_t) * 2; i++) + { + cc = xdigit_to_num(PEEPNXTSC(sed)); + if (cc <= -1) break; + NXTSC(sed); + c = (c << 4) | cc; + } + + /* let's indicate that '&' is built from \x26. */ + if (xamp && c == QSE_T('&')) *xamp = 1; + break; + } +#endif } return c; @@ -507,8 +563,13 @@ Omitted for clash with regular expression \b. static int pickup_rex ( qse_sed_t* sed, qse_char_t rxend, - int really, const qse_sed_cmd_t* cmd, qse_str_t* buf) + int replacement, const qse_sed_cmd_t* cmd, qse_str_t* buf) { + /* + * 'replacement' indicates that this functions is called for + * 'replacement' in 's/pattern/replacement'. + */ + qse_cint_t c; qse_size_t chars_from_opening_bracket = 0; int bracket_state = 0; @@ -573,7 +634,7 @@ static int pickup_rex ( if (bracket_state > 0 && nc == QSE_T(']')) { /* - * if 'really' is not set, bracket_state is alyway 0. + * if 'replacement' is not set, bracket_state is alyway 0. * so this block is never reached. * * a backslashed closing bracket is seen. @@ -589,13 +650,20 @@ static int pickup_rex ( else { qse_cint_t ec; + int xamp; - ec = trans_escaped (nc); - if (ec == nc) + ec = trans_escaped (sed, nc, &xamp); + if (ec == nc || (xamp && replacement)) { /* if the character after a backslash is not special * at the this layer, add the backslash into the - * regular expression buffer as it is. */ + * regular expression buffer as it is. + * + * if \x26 is found in the replacement, i also need to + * transform it to \& so that it is not treated as a + * special &. + */ + if (qse_str_ccat (buf, QSE_T('\\')) == (qse_size_t)-1) { SETERR0 (sed, QSE_SED_ENOMEM, QSE_NULL); @@ -605,7 +673,7 @@ static int pickup_rex ( c = ec; } } - else if (really) + else if (!replacement) { /* this block sets a flag to indicate that we are in [] * of a regular expression. */ @@ -662,12 +730,22 @@ static int pickup_rex ( static QSE_INLINE void* compile_rex_address (qse_sed_t* sed, qse_char_t rxend) { - if (pickup_rex (sed, rxend, 1, QSE_NULL, &sed->tmp.rex) <= -1) + int ignorecase = 0; + + if (pickup_rex (sed, rxend, 0, QSE_NULL, &sed->tmp.rex) <= -1) return QSE_NULL; -/* TODO: support ignore case option for address */ if (QSE_STR_LEN(&sed->tmp.rex) <= 0) return EMPTY_REX; - return build_rex (sed, QSE_STR_CSTR(&sed->tmp.rex), 0, &sed->src.loc); + + /* handle a modifer after having handled an empty regex. + * so a modifier is naturally disallowed for an empty regex. */ + if (PEEPNXTSC(sed) == QSE_T('I')) + { + ignorecase = 1; + NXTSC(sed); + } + + return build_rex (sed, QSE_STR_CSTR(&sed->tmp.rex), ignorecase, &sed->src.loc); } static qse_sed_adr_t* get_address (qse_sed_t* sed, qse_sed_adr_t* a) @@ -1084,8 +1162,8 @@ static int get_subst (qse_sed_t* sed, qse_sed_cmd_t* cmd) goto oops; } - if (pickup_rex (sed, delim, 1, cmd, t[0]) <= -1) goto oops; - if (pickup_rex (sed, delim, 0, cmd, t[1]) <= -1) goto oops; + if (pickup_rex (sed, delim, 0, cmd, t[0]) <= -1) goto oops; + if (pickup_rex (sed, delim, 1, cmd, t[1]) <= -1) goto oops; /* skip spaces before options */ do { c = NXTSC(sed); } while (IS_SPACE(c)); @@ -1212,7 +1290,7 @@ static int get_transet (qse_sed_t* sed, qse_sed_cmd_t* cmd) { c = NXTSC (sed); CHECK_CMDIC_ESCAPED (sed, cmd, c, goto oops); - c = trans_escaped (c); + c = trans_escaped (sed, c, QSE_NULL); } b[0] = c; @@ -1234,7 +1312,7 @@ static int get_transet (qse_sed_t* sed, qse_sed_cmd_t* cmd) { c = NXTSC (sed); CHECK_CMDIC_ESCAPED (sed, cmd, c, goto oops); - c = trans_escaped (c); + c = trans_escaped (sed, c, QSE_NULL); } if (pos >= QSE_STR_LEN(t)) @@ -1459,6 +1537,8 @@ static int get_command (qse_sed_t* sed, qse_sed_cmd_t* cmd) case QSE_T('n'): case QSE_T('N'): + + case QSE_T('z'): cmd->type = c; NXTSC (sed); if (terminate_command (sed) <= -1) return -1; @@ -1491,6 +1571,7 @@ static int get_command (qse_sed_t* sed, qse_sed_cmd_t* cmd) NXTSC (sed); if (get_transet (sed, cmd) <= -1) return -1; break; + } return 0; @@ -2882,6 +2963,13 @@ static qse_sed_cmd_t* exec_cmd (qse_sed_t* sed, qse_sed_cmd_t* cmd) } break; } + + case QSE_SED_CMD_CLEAR_PATTERN: + { + /* clear pattern space */ + qse_str_clear (&sed->e.in.line); + break; + } } if (jumpto == QSE_NULL) jumpto = cmd->state.next; diff --git a/qse/lib/sed/sed.h b/qse/lib/sed/sed.h index f63ed652..a4c0f9f7 100644 --- a/qse/lib/sed/sed.h +++ b/qse/lib/sed/sed.h @@ -1,5 +1,5 @@ /* - * $Id: sed.h 570 2011-09-20 04:40:45Z hyunghwan.chung $ + * $Id: sed.h 572 2011-09-21 05:10:09Z hyunghwan.chung $ * Copyright 2006-2011 Chung, Hyung-Hwan. This file is part of QSE. @@ -91,6 +91,7 @@ struct qse_sed_adr_t #define QSE_SED_CMD_BRANCH_COND QSE_T('t') #define QSE_SED_CMD_SUBSTITUTE QSE_T('s') #define QSE_SED_CMD_TRANSLATE QSE_T('y') +#define QSE_SED_CMD_CLEAR_PATTERN QSE_T('z') struct qse_sed_cmd_t {