added escape sequences \xnn and \Xnnnnnnnn.
added 'z' command. added 'I' address modifier
This commit is contained in:
parent
716237d6c0
commit
516031f17f
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* $Id: sed.c 570 2011-09-20 04:40:45Z hyunghwan.chung $
|
||||
* $Id: sed.c 572 2011-09-21 05:10:09Z hyunghwan.chung $
|
||||
*
|
||||
Copyright 2006-2011 Chung, Hyung-Hwan.
|
||||
This file is part of QSE.
|
||||
@ -346,6 +346,7 @@ static int matchtre (
|
||||
|
||||
#define CURSC(sed) ((sed)->src.cc)
|
||||
#define NXTSC(sed) getnextsc(sed)
|
||||
#define PEEPNXTSC(sed) ((sed->src.cur < sed->src.end)? *sed->src.cur: QSE_CHAR_EOF)
|
||||
|
||||
static qse_cint_t getnextsc (qse_sed_t* sed)
|
||||
{
|
||||
@ -472,8 +473,18 @@ static void free_command (qse_sed_t* sed, qse_sed_cmd_t* cmd)
|
||||
}
|
||||
}
|
||||
|
||||
static qse_cint_t trans_escaped (qse_cint_t c)
|
||||
|
||||
static QSE_INLINE int xdigit_to_num (qse_cint_t c)
|
||||
{
|
||||
return (c >= QSE_T('0') && c <= QSE_T('9'))? (c - QSE_T('0')):
|
||||
(c >= QSE_T('A') && c <= QSE_T('F'))? (c - QSE_T('A') + 10):
|
||||
(c >= QSE_T('a') && c <= QSE_T('f'))? (c - QSE_T('a') + 10): -1;
|
||||
}
|
||||
|
||||
static qse_cint_t trans_escaped (qse_sed_t* sed, qse_cint_t c, int* xamp)
|
||||
{
|
||||
if (xamp) *xamp = 0;
|
||||
|
||||
switch (c)
|
||||
{
|
||||
case QSE_T('a'):
|
||||
@ -500,6 +511,51 @@ Omitted for clash with regular expression \b.
|
||||
case QSE_T('v'):
|
||||
c = QSE_T('\v');
|
||||
break;
|
||||
|
||||
case QSE_T('x'):
|
||||
{
|
||||
/* \xnn */
|
||||
int cc;
|
||||
|
||||
cc = xdigit_to_num(PEEPNXTSC(sed));
|
||||
if (cc <= -1) break;
|
||||
NXTSC(sed);
|
||||
c = cc;
|
||||
|
||||
cc = xdigit_to_num(PEEPNXTSC(sed));
|
||||
if (cc <= -1) break;
|
||||
NXTSC(sed);
|
||||
c = (c << 4) | cc;
|
||||
|
||||
/* let's indicate that '&' is built from \x26. */
|
||||
if (xamp && c == QSE_T('&')) *xamp = 1;
|
||||
break;
|
||||
}
|
||||
|
||||
#ifdef QSE_CHAR_IS_WCHAR
|
||||
case QSE_T('X'):
|
||||
{
|
||||
/* \Xnnnnnnnn for wchar_t */
|
||||
int cc, i;
|
||||
|
||||
cc = xdigit_to_num(PEEPNXTSC(sed));
|
||||
if (cc <= -1) break;
|
||||
NXTSC(sed);
|
||||
c = cc;
|
||||
|
||||
for (i = 0; i < QSE_SIZEOF(qse_char_t) * 2; i++)
|
||||
{
|
||||
cc = xdigit_to_num(PEEPNXTSC(sed));
|
||||
if (cc <= -1) break;
|
||||
NXTSC(sed);
|
||||
c = (c << 4) | cc;
|
||||
}
|
||||
|
||||
/* let's indicate that '&' is built from \x26. */
|
||||
if (xamp && c == QSE_T('&')) *xamp = 1;
|
||||
break;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
return c;
|
||||
@ -507,8 +563,13 @@ Omitted for clash with regular expression \b.
|
||||
|
||||
static int pickup_rex (
|
||||
qse_sed_t* sed, qse_char_t rxend,
|
||||
int really, const qse_sed_cmd_t* cmd, qse_str_t* buf)
|
||||
int replacement, const qse_sed_cmd_t* cmd, qse_str_t* buf)
|
||||
{
|
||||
/*
|
||||
* 'replacement' indicates that this functions is called for
|
||||
* 'replacement' in 's/pattern/replacement'.
|
||||
*/
|
||||
|
||||
qse_cint_t c;
|
||||
qse_size_t chars_from_opening_bracket = 0;
|
||||
int bracket_state = 0;
|
||||
@ -573,7 +634,7 @@ static int pickup_rex (
|
||||
if (bracket_state > 0 && nc == QSE_T(']'))
|
||||
{
|
||||
/*
|
||||
* if 'really' is not set, bracket_state is alyway 0.
|
||||
* if 'replacement' is not set, bracket_state is alyway 0.
|
||||
* so this block is never reached.
|
||||
*
|
||||
* a backslashed closing bracket is seen.
|
||||
@ -589,13 +650,20 @@ static int pickup_rex (
|
||||
else
|
||||
{
|
||||
qse_cint_t ec;
|
||||
int xamp;
|
||||
|
||||
ec = trans_escaped (nc);
|
||||
if (ec == nc)
|
||||
ec = trans_escaped (sed, nc, &xamp);
|
||||
if (ec == nc || (xamp && replacement))
|
||||
{
|
||||
/* if the character after a backslash is not special
|
||||
* at the this layer, add the backslash into the
|
||||
* regular expression buffer as it is. */
|
||||
* regular expression buffer as it is.
|
||||
*
|
||||
* if \x26 is found in the replacement, i also need to
|
||||
* transform it to \& so that it is not treated as a
|
||||
* special &.
|
||||
*/
|
||||
|
||||
if (qse_str_ccat (buf, QSE_T('\\')) == (qse_size_t)-1)
|
||||
{
|
||||
SETERR0 (sed, QSE_SED_ENOMEM, QSE_NULL);
|
||||
@ -605,7 +673,7 @@ static int pickup_rex (
|
||||
c = ec;
|
||||
}
|
||||
}
|
||||
else if (really)
|
||||
else if (!replacement)
|
||||
{
|
||||
/* this block sets a flag to indicate that we are in []
|
||||
* of a regular expression. */
|
||||
@ -662,12 +730,22 @@ static int pickup_rex (
|
||||
|
||||
static QSE_INLINE void* compile_rex_address (qse_sed_t* sed, qse_char_t rxend)
|
||||
{
|
||||
if (pickup_rex (sed, rxend, 1, QSE_NULL, &sed->tmp.rex) <= -1)
|
||||
int ignorecase = 0;
|
||||
|
||||
if (pickup_rex (sed, rxend, 0, QSE_NULL, &sed->tmp.rex) <= -1)
|
||||
return QSE_NULL;
|
||||
|
||||
/* TODO: support ignore case option for address */
|
||||
if (QSE_STR_LEN(&sed->tmp.rex) <= 0) return EMPTY_REX;
|
||||
return build_rex (sed, QSE_STR_CSTR(&sed->tmp.rex), 0, &sed->src.loc);
|
||||
|
||||
/* handle a modifer after having handled an empty regex.
|
||||
* so a modifier is naturally disallowed for an empty regex. */
|
||||
if (PEEPNXTSC(sed) == QSE_T('I'))
|
||||
{
|
||||
ignorecase = 1;
|
||||
NXTSC(sed);
|
||||
}
|
||||
|
||||
return build_rex (sed, QSE_STR_CSTR(&sed->tmp.rex), ignorecase, &sed->src.loc);
|
||||
}
|
||||
|
||||
static qse_sed_adr_t* get_address (qse_sed_t* sed, qse_sed_adr_t* a)
|
||||
@ -1084,8 +1162,8 @@ static int get_subst (qse_sed_t* sed, qse_sed_cmd_t* cmd)
|
||||
goto oops;
|
||||
}
|
||||
|
||||
if (pickup_rex (sed, delim, 1, cmd, t[0]) <= -1) goto oops;
|
||||
if (pickup_rex (sed, delim, 0, cmd, t[1]) <= -1) goto oops;
|
||||
if (pickup_rex (sed, delim, 0, cmd, t[0]) <= -1) goto oops;
|
||||
if (pickup_rex (sed, delim, 1, cmd, t[1]) <= -1) goto oops;
|
||||
|
||||
/* skip spaces before options */
|
||||
do { c = NXTSC(sed); } while (IS_SPACE(c));
|
||||
@ -1212,7 +1290,7 @@ static int get_transet (qse_sed_t* sed, qse_sed_cmd_t* cmd)
|
||||
{
|
||||
c = NXTSC (sed);
|
||||
CHECK_CMDIC_ESCAPED (sed, cmd, c, goto oops);
|
||||
c = trans_escaped (c);
|
||||
c = trans_escaped (sed, c, QSE_NULL);
|
||||
}
|
||||
|
||||
b[0] = c;
|
||||
@ -1234,7 +1312,7 @@ static int get_transet (qse_sed_t* sed, qse_sed_cmd_t* cmd)
|
||||
{
|
||||
c = NXTSC (sed);
|
||||
CHECK_CMDIC_ESCAPED (sed, cmd, c, goto oops);
|
||||
c = trans_escaped (c);
|
||||
c = trans_escaped (sed, c, QSE_NULL);
|
||||
}
|
||||
|
||||
if (pos >= QSE_STR_LEN(t))
|
||||
@ -1459,6 +1537,8 @@ static int get_command (qse_sed_t* sed, qse_sed_cmd_t* cmd)
|
||||
|
||||
case QSE_T('n'):
|
||||
case QSE_T('N'):
|
||||
|
||||
case QSE_T('z'):
|
||||
cmd->type = c;
|
||||
NXTSC (sed);
|
||||
if (terminate_command (sed) <= -1) return -1;
|
||||
@ -1491,6 +1571,7 @@ static int get_command (qse_sed_t* sed, qse_sed_cmd_t* cmd)
|
||||
NXTSC (sed);
|
||||
if (get_transet (sed, cmd) <= -1) return -1;
|
||||
break;
|
||||
|
||||
}
|
||||
|
||||
return 0;
|
||||
@ -2882,6 +2963,13 @@ static qse_sed_cmd_t* exec_cmd (qse_sed_t* sed, qse_sed_cmd_t* cmd)
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
case QSE_SED_CMD_CLEAR_PATTERN:
|
||||
{
|
||||
/* clear pattern space */
|
||||
qse_str_clear (&sed->e.in.line);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (jumpto == QSE_NULL) jumpto = cmd->state.next;
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* $Id: sed.h 570 2011-09-20 04:40:45Z hyunghwan.chung $
|
||||
* $Id: sed.h 572 2011-09-21 05:10:09Z hyunghwan.chung $
|
||||
*
|
||||
Copyright 2006-2011 Chung, Hyung-Hwan.
|
||||
This file is part of QSE.
|
||||
@ -91,6 +91,7 @@ struct qse_sed_adr_t
|
||||
#define QSE_SED_CMD_BRANCH_COND QSE_T('t')
|
||||
#define QSE_SED_CMD_SUBSTITUTE QSE_T('s')
|
||||
#define QSE_SED_CMD_TRANSLATE QSE_T('y')
|
||||
#define QSE_SED_CMD_CLEAR_PATTERN QSE_T('z')
|
||||
|
||||
struct qse_sed_cmd_t
|
||||
{
|
||||
|
Loading…
x
Reference in New Issue
Block a user