added escape sequences \xnn and \Xnnnnnnnn.

added 'z' command.
added 'I' address modifier
This commit is contained in:
hyung-hwan 2011-09-21 23:10:09 +00:00
parent 716237d6c0
commit 516031f17f
2 changed files with 105 additions and 16 deletions

View File

@ -1,5 +1,5 @@
/* /*
* $Id: sed.c 570 2011-09-20 04:40:45Z hyunghwan.chung $ * $Id: sed.c 572 2011-09-21 05:10:09Z hyunghwan.chung $
* *
Copyright 2006-2011 Chung, Hyung-Hwan. Copyright 2006-2011 Chung, Hyung-Hwan.
This file is part of QSE. This file is part of QSE.
@ -346,6 +346,7 @@ static int matchtre (
#define CURSC(sed) ((sed)->src.cc) #define CURSC(sed) ((sed)->src.cc)
#define NXTSC(sed) getnextsc(sed) #define NXTSC(sed) getnextsc(sed)
#define PEEPNXTSC(sed) ((sed->src.cur < sed->src.end)? *sed->src.cur: QSE_CHAR_EOF)
static qse_cint_t getnextsc (qse_sed_t* sed) static qse_cint_t getnextsc (qse_sed_t* sed)
{ {
@ -472,8 +473,18 @@ static void free_command (qse_sed_t* sed, qse_sed_cmd_t* cmd)
} }
} }
static qse_cint_t trans_escaped (qse_cint_t c)
static QSE_INLINE int xdigit_to_num (qse_cint_t c)
{ {
return (c >= QSE_T('0') && c <= QSE_T('9'))? (c - QSE_T('0')):
(c >= QSE_T('A') && c <= QSE_T('F'))? (c - QSE_T('A') + 10):
(c >= QSE_T('a') && c <= QSE_T('f'))? (c - QSE_T('a') + 10): -1;
}
static qse_cint_t trans_escaped (qse_sed_t* sed, qse_cint_t c, int* xamp)
{
if (xamp) *xamp = 0;
switch (c) switch (c)
{ {
case QSE_T('a'): case QSE_T('a'):
@ -500,6 +511,51 @@ Omitted for clash with regular expression \b.
case QSE_T('v'): case QSE_T('v'):
c = QSE_T('\v'); c = QSE_T('\v');
break; break;
case QSE_T('x'):
{
/* \xnn */
int cc;
cc = xdigit_to_num(PEEPNXTSC(sed));
if (cc <= -1) break;
NXTSC(sed);
c = cc;
cc = xdigit_to_num(PEEPNXTSC(sed));
if (cc <= -1) break;
NXTSC(sed);
c = (c << 4) | cc;
/* let's indicate that '&' is built from \x26. */
if (xamp && c == QSE_T('&')) *xamp = 1;
break;
}
#ifdef QSE_CHAR_IS_WCHAR
case QSE_T('X'):
{
/* \Xnnnnnnnn for wchar_t */
int cc, i;
cc = xdigit_to_num(PEEPNXTSC(sed));
if (cc <= -1) break;
NXTSC(sed);
c = cc;
for (i = 0; i < QSE_SIZEOF(qse_char_t) * 2; i++)
{
cc = xdigit_to_num(PEEPNXTSC(sed));
if (cc <= -1) break;
NXTSC(sed);
c = (c << 4) | cc;
}
/* let's indicate that '&' is built from \x26. */
if (xamp && c == QSE_T('&')) *xamp = 1;
break;
}
#endif
} }
return c; return c;
@ -507,8 +563,13 @@ Omitted for clash with regular expression \b.
static int pickup_rex ( static int pickup_rex (
qse_sed_t* sed, qse_char_t rxend, qse_sed_t* sed, qse_char_t rxend,
int really, const qse_sed_cmd_t* cmd, qse_str_t* buf) int replacement, const qse_sed_cmd_t* cmd, qse_str_t* buf)
{ {
/*
* 'replacement' indicates that this functions is called for
* 'replacement' in 's/pattern/replacement'.
*/
qse_cint_t c; qse_cint_t c;
qse_size_t chars_from_opening_bracket = 0; qse_size_t chars_from_opening_bracket = 0;
int bracket_state = 0; int bracket_state = 0;
@ -573,7 +634,7 @@ static int pickup_rex (
if (bracket_state > 0 && nc == QSE_T(']')) if (bracket_state > 0 && nc == QSE_T(']'))
{ {
/* /*
* if 'really' is not set, bracket_state is alyway 0. * if 'replacement' is not set, bracket_state is alyway 0.
* so this block is never reached. * so this block is never reached.
* *
* a backslashed closing bracket is seen. * a backslashed closing bracket is seen.
@ -589,13 +650,20 @@ static int pickup_rex (
else else
{ {
qse_cint_t ec; qse_cint_t ec;
int xamp;
ec = trans_escaped (nc); ec = trans_escaped (sed, nc, &xamp);
if (ec == nc) if (ec == nc || (xamp && replacement))
{ {
/* if the character after a backslash is not special /* if the character after a backslash is not special
* at the this layer, add the backslash into the * at the this layer, add the backslash into the
* regular expression buffer as it is. */ * regular expression buffer as it is.
*
* if \x26 is found in the replacement, i also need to
* transform it to \& so that it is not treated as a
* special &.
*/
if (qse_str_ccat (buf, QSE_T('\\')) == (qse_size_t)-1) if (qse_str_ccat (buf, QSE_T('\\')) == (qse_size_t)-1)
{ {
SETERR0 (sed, QSE_SED_ENOMEM, QSE_NULL); SETERR0 (sed, QSE_SED_ENOMEM, QSE_NULL);
@ -605,7 +673,7 @@ static int pickup_rex (
c = ec; c = ec;
} }
} }
else if (really) else if (!replacement)
{ {
/* this block sets a flag to indicate that we are in [] /* this block sets a flag to indicate that we are in []
* of a regular expression. */ * of a regular expression. */
@ -662,12 +730,22 @@ static int pickup_rex (
static QSE_INLINE void* compile_rex_address (qse_sed_t* sed, qse_char_t rxend) static QSE_INLINE void* compile_rex_address (qse_sed_t* sed, qse_char_t rxend)
{ {
if (pickup_rex (sed, rxend, 1, QSE_NULL, &sed->tmp.rex) <= -1) int ignorecase = 0;
if (pickup_rex (sed, rxend, 0, QSE_NULL, &sed->tmp.rex) <= -1)
return QSE_NULL; return QSE_NULL;
/* TODO: support ignore case option for address */
if (QSE_STR_LEN(&sed->tmp.rex) <= 0) return EMPTY_REX; if (QSE_STR_LEN(&sed->tmp.rex) <= 0) return EMPTY_REX;
return build_rex (sed, QSE_STR_CSTR(&sed->tmp.rex), 0, &sed->src.loc);
/* handle a modifer after having handled an empty regex.
* so a modifier is naturally disallowed for an empty regex. */
if (PEEPNXTSC(sed) == QSE_T('I'))
{
ignorecase = 1;
NXTSC(sed);
}
return build_rex (sed, QSE_STR_CSTR(&sed->tmp.rex), ignorecase, &sed->src.loc);
} }
static qse_sed_adr_t* get_address (qse_sed_t* sed, qse_sed_adr_t* a) static qse_sed_adr_t* get_address (qse_sed_t* sed, qse_sed_adr_t* a)
@ -1084,8 +1162,8 @@ static int get_subst (qse_sed_t* sed, qse_sed_cmd_t* cmd)
goto oops; goto oops;
} }
if (pickup_rex (sed, delim, 1, cmd, t[0]) <= -1) goto oops; if (pickup_rex (sed, delim, 0, cmd, t[0]) <= -1) goto oops;
if (pickup_rex (sed, delim, 0, cmd, t[1]) <= -1) goto oops; if (pickup_rex (sed, delim, 1, cmd, t[1]) <= -1) goto oops;
/* skip spaces before options */ /* skip spaces before options */
do { c = NXTSC(sed); } while (IS_SPACE(c)); do { c = NXTSC(sed); } while (IS_SPACE(c));
@ -1212,7 +1290,7 @@ static int get_transet (qse_sed_t* sed, qse_sed_cmd_t* cmd)
{ {
c = NXTSC (sed); c = NXTSC (sed);
CHECK_CMDIC_ESCAPED (sed, cmd, c, goto oops); CHECK_CMDIC_ESCAPED (sed, cmd, c, goto oops);
c = trans_escaped (c); c = trans_escaped (sed, c, QSE_NULL);
} }
b[0] = c; b[0] = c;
@ -1234,7 +1312,7 @@ static int get_transet (qse_sed_t* sed, qse_sed_cmd_t* cmd)
{ {
c = NXTSC (sed); c = NXTSC (sed);
CHECK_CMDIC_ESCAPED (sed, cmd, c, goto oops); CHECK_CMDIC_ESCAPED (sed, cmd, c, goto oops);
c = trans_escaped (c); c = trans_escaped (sed, c, QSE_NULL);
} }
if (pos >= QSE_STR_LEN(t)) if (pos >= QSE_STR_LEN(t))
@ -1459,6 +1537,8 @@ static int get_command (qse_sed_t* sed, qse_sed_cmd_t* cmd)
case QSE_T('n'): case QSE_T('n'):
case QSE_T('N'): case QSE_T('N'):
case QSE_T('z'):
cmd->type = c; cmd->type = c;
NXTSC (sed); NXTSC (sed);
if (terminate_command (sed) <= -1) return -1; if (terminate_command (sed) <= -1) return -1;
@ -1491,6 +1571,7 @@ static int get_command (qse_sed_t* sed, qse_sed_cmd_t* cmd)
NXTSC (sed); NXTSC (sed);
if (get_transet (sed, cmd) <= -1) return -1; if (get_transet (sed, cmd) <= -1) return -1;
break; break;
} }
return 0; return 0;
@ -2882,6 +2963,13 @@ static qse_sed_cmd_t* exec_cmd (qse_sed_t* sed, qse_sed_cmd_t* cmd)
} }
break; break;
} }
case QSE_SED_CMD_CLEAR_PATTERN:
{
/* clear pattern space */
qse_str_clear (&sed->e.in.line);
break;
}
} }
if (jumpto == QSE_NULL) jumpto = cmd->state.next; if (jumpto == QSE_NULL) jumpto = cmd->state.next;

View File

@ -1,5 +1,5 @@
/* /*
* $Id: sed.h 570 2011-09-20 04:40:45Z hyunghwan.chung $ * $Id: sed.h 572 2011-09-21 05:10:09Z hyunghwan.chung $
* *
Copyright 2006-2011 Chung, Hyung-Hwan. Copyright 2006-2011 Chung, Hyung-Hwan.
This file is part of QSE. This file is part of QSE.
@ -91,6 +91,7 @@ struct qse_sed_adr_t
#define QSE_SED_CMD_BRANCH_COND QSE_T('t') #define QSE_SED_CMD_BRANCH_COND QSE_T('t')
#define QSE_SED_CMD_SUBSTITUTE QSE_T('s') #define QSE_SED_CMD_SUBSTITUTE QSE_T('s')
#define QSE_SED_CMD_TRANSLATE QSE_T('y') #define QSE_SED_CMD_TRANSLATE QSE_T('y')
#define QSE_SED_CMD_CLEAR_PATTERN QSE_T('z')
struct qse_sed_cmd_t struct qse_sed_cmd_t
{ {