added escape sequences \xnn and \Xnnnnnnnn.
added 'z' command. added 'I' address modifier
This commit is contained in:
parent
716237d6c0
commit
516031f17f
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* $Id: sed.c 570 2011-09-20 04:40:45Z hyunghwan.chung $
|
* $Id: sed.c 572 2011-09-21 05:10:09Z hyunghwan.chung $
|
||||||
*
|
*
|
||||||
Copyright 2006-2011 Chung, Hyung-Hwan.
|
Copyright 2006-2011 Chung, Hyung-Hwan.
|
||||||
This file is part of QSE.
|
This file is part of QSE.
|
||||||
@ -346,6 +346,7 @@ static int matchtre (
|
|||||||
|
|
||||||
#define CURSC(sed) ((sed)->src.cc)
|
#define CURSC(sed) ((sed)->src.cc)
|
||||||
#define NXTSC(sed) getnextsc(sed)
|
#define NXTSC(sed) getnextsc(sed)
|
||||||
|
#define PEEPNXTSC(sed) ((sed->src.cur < sed->src.end)? *sed->src.cur: QSE_CHAR_EOF)
|
||||||
|
|
||||||
static qse_cint_t getnextsc (qse_sed_t* sed)
|
static qse_cint_t getnextsc (qse_sed_t* sed)
|
||||||
{
|
{
|
||||||
@ -472,8 +473,18 @@ static void free_command (qse_sed_t* sed, qse_sed_cmd_t* cmd)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static qse_cint_t trans_escaped (qse_cint_t c)
|
|
||||||
|
static QSE_INLINE int xdigit_to_num (qse_cint_t c)
|
||||||
{
|
{
|
||||||
|
return (c >= QSE_T('0') && c <= QSE_T('9'))? (c - QSE_T('0')):
|
||||||
|
(c >= QSE_T('A') && c <= QSE_T('F'))? (c - QSE_T('A') + 10):
|
||||||
|
(c >= QSE_T('a') && c <= QSE_T('f'))? (c - QSE_T('a') + 10): -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
static qse_cint_t trans_escaped (qse_sed_t* sed, qse_cint_t c, int* xamp)
|
||||||
|
{
|
||||||
|
if (xamp) *xamp = 0;
|
||||||
|
|
||||||
switch (c)
|
switch (c)
|
||||||
{
|
{
|
||||||
case QSE_T('a'):
|
case QSE_T('a'):
|
||||||
@ -500,6 +511,51 @@ Omitted for clash with regular expression \b.
|
|||||||
case QSE_T('v'):
|
case QSE_T('v'):
|
||||||
c = QSE_T('\v');
|
c = QSE_T('\v');
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
case QSE_T('x'):
|
||||||
|
{
|
||||||
|
/* \xnn */
|
||||||
|
int cc;
|
||||||
|
|
||||||
|
cc = xdigit_to_num(PEEPNXTSC(sed));
|
||||||
|
if (cc <= -1) break;
|
||||||
|
NXTSC(sed);
|
||||||
|
c = cc;
|
||||||
|
|
||||||
|
cc = xdigit_to_num(PEEPNXTSC(sed));
|
||||||
|
if (cc <= -1) break;
|
||||||
|
NXTSC(sed);
|
||||||
|
c = (c << 4) | cc;
|
||||||
|
|
||||||
|
/* let's indicate that '&' is built from \x26. */
|
||||||
|
if (xamp && c == QSE_T('&')) *xamp = 1;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifdef QSE_CHAR_IS_WCHAR
|
||||||
|
case QSE_T('X'):
|
||||||
|
{
|
||||||
|
/* \Xnnnnnnnn for wchar_t */
|
||||||
|
int cc, i;
|
||||||
|
|
||||||
|
cc = xdigit_to_num(PEEPNXTSC(sed));
|
||||||
|
if (cc <= -1) break;
|
||||||
|
NXTSC(sed);
|
||||||
|
c = cc;
|
||||||
|
|
||||||
|
for (i = 0; i < QSE_SIZEOF(qse_char_t) * 2; i++)
|
||||||
|
{
|
||||||
|
cc = xdigit_to_num(PEEPNXTSC(sed));
|
||||||
|
if (cc <= -1) break;
|
||||||
|
NXTSC(sed);
|
||||||
|
c = (c << 4) | cc;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* let's indicate that '&' is built from \x26. */
|
||||||
|
if (xamp && c == QSE_T('&')) *xamp = 1;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
return c;
|
return c;
|
||||||
@ -507,8 +563,13 @@ Omitted for clash with regular expression \b.
|
|||||||
|
|
||||||
static int pickup_rex (
|
static int pickup_rex (
|
||||||
qse_sed_t* sed, qse_char_t rxend,
|
qse_sed_t* sed, qse_char_t rxend,
|
||||||
int really, const qse_sed_cmd_t* cmd, qse_str_t* buf)
|
int replacement, const qse_sed_cmd_t* cmd, qse_str_t* buf)
|
||||||
{
|
{
|
||||||
|
/*
|
||||||
|
* 'replacement' indicates that this functions is called for
|
||||||
|
* 'replacement' in 's/pattern/replacement'.
|
||||||
|
*/
|
||||||
|
|
||||||
qse_cint_t c;
|
qse_cint_t c;
|
||||||
qse_size_t chars_from_opening_bracket = 0;
|
qse_size_t chars_from_opening_bracket = 0;
|
||||||
int bracket_state = 0;
|
int bracket_state = 0;
|
||||||
@ -573,7 +634,7 @@ static int pickup_rex (
|
|||||||
if (bracket_state > 0 && nc == QSE_T(']'))
|
if (bracket_state > 0 && nc == QSE_T(']'))
|
||||||
{
|
{
|
||||||
/*
|
/*
|
||||||
* if 'really' is not set, bracket_state is alyway 0.
|
* if 'replacement' is not set, bracket_state is alyway 0.
|
||||||
* so this block is never reached.
|
* so this block is never reached.
|
||||||
*
|
*
|
||||||
* a backslashed closing bracket is seen.
|
* a backslashed closing bracket is seen.
|
||||||
@ -589,13 +650,20 @@ static int pickup_rex (
|
|||||||
else
|
else
|
||||||
{
|
{
|
||||||
qse_cint_t ec;
|
qse_cint_t ec;
|
||||||
|
int xamp;
|
||||||
|
|
||||||
ec = trans_escaped (nc);
|
ec = trans_escaped (sed, nc, &xamp);
|
||||||
if (ec == nc)
|
if (ec == nc || (xamp && replacement))
|
||||||
{
|
{
|
||||||
/* if the character after a backslash is not special
|
/* if the character after a backslash is not special
|
||||||
* at the this layer, add the backslash into the
|
* at the this layer, add the backslash into the
|
||||||
* regular expression buffer as it is. */
|
* regular expression buffer as it is.
|
||||||
|
*
|
||||||
|
* if \x26 is found in the replacement, i also need to
|
||||||
|
* transform it to \& so that it is not treated as a
|
||||||
|
* special &.
|
||||||
|
*/
|
||||||
|
|
||||||
if (qse_str_ccat (buf, QSE_T('\\')) == (qse_size_t)-1)
|
if (qse_str_ccat (buf, QSE_T('\\')) == (qse_size_t)-1)
|
||||||
{
|
{
|
||||||
SETERR0 (sed, QSE_SED_ENOMEM, QSE_NULL);
|
SETERR0 (sed, QSE_SED_ENOMEM, QSE_NULL);
|
||||||
@ -605,7 +673,7 @@ static int pickup_rex (
|
|||||||
c = ec;
|
c = ec;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else if (really)
|
else if (!replacement)
|
||||||
{
|
{
|
||||||
/* this block sets a flag to indicate that we are in []
|
/* this block sets a flag to indicate that we are in []
|
||||||
* of a regular expression. */
|
* of a regular expression. */
|
||||||
@ -662,12 +730,22 @@ static int pickup_rex (
|
|||||||
|
|
||||||
static QSE_INLINE void* compile_rex_address (qse_sed_t* sed, qse_char_t rxend)
|
static QSE_INLINE void* compile_rex_address (qse_sed_t* sed, qse_char_t rxend)
|
||||||
{
|
{
|
||||||
if (pickup_rex (sed, rxend, 1, QSE_NULL, &sed->tmp.rex) <= -1)
|
int ignorecase = 0;
|
||||||
|
|
||||||
|
if (pickup_rex (sed, rxend, 0, QSE_NULL, &sed->tmp.rex) <= -1)
|
||||||
return QSE_NULL;
|
return QSE_NULL;
|
||||||
|
|
||||||
/* TODO: support ignore case option for address */
|
|
||||||
if (QSE_STR_LEN(&sed->tmp.rex) <= 0) return EMPTY_REX;
|
if (QSE_STR_LEN(&sed->tmp.rex) <= 0) return EMPTY_REX;
|
||||||
return build_rex (sed, QSE_STR_CSTR(&sed->tmp.rex), 0, &sed->src.loc);
|
|
||||||
|
/* handle a modifer after having handled an empty regex.
|
||||||
|
* so a modifier is naturally disallowed for an empty regex. */
|
||||||
|
if (PEEPNXTSC(sed) == QSE_T('I'))
|
||||||
|
{
|
||||||
|
ignorecase = 1;
|
||||||
|
NXTSC(sed);
|
||||||
|
}
|
||||||
|
|
||||||
|
return build_rex (sed, QSE_STR_CSTR(&sed->tmp.rex), ignorecase, &sed->src.loc);
|
||||||
}
|
}
|
||||||
|
|
||||||
static qse_sed_adr_t* get_address (qse_sed_t* sed, qse_sed_adr_t* a)
|
static qse_sed_adr_t* get_address (qse_sed_t* sed, qse_sed_adr_t* a)
|
||||||
@ -1084,8 +1162,8 @@ static int get_subst (qse_sed_t* sed, qse_sed_cmd_t* cmd)
|
|||||||
goto oops;
|
goto oops;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (pickup_rex (sed, delim, 1, cmd, t[0]) <= -1) goto oops;
|
if (pickup_rex (sed, delim, 0, cmd, t[0]) <= -1) goto oops;
|
||||||
if (pickup_rex (sed, delim, 0, cmd, t[1]) <= -1) goto oops;
|
if (pickup_rex (sed, delim, 1, cmd, t[1]) <= -1) goto oops;
|
||||||
|
|
||||||
/* skip spaces before options */
|
/* skip spaces before options */
|
||||||
do { c = NXTSC(sed); } while (IS_SPACE(c));
|
do { c = NXTSC(sed); } while (IS_SPACE(c));
|
||||||
@ -1212,7 +1290,7 @@ static int get_transet (qse_sed_t* sed, qse_sed_cmd_t* cmd)
|
|||||||
{
|
{
|
||||||
c = NXTSC (sed);
|
c = NXTSC (sed);
|
||||||
CHECK_CMDIC_ESCAPED (sed, cmd, c, goto oops);
|
CHECK_CMDIC_ESCAPED (sed, cmd, c, goto oops);
|
||||||
c = trans_escaped (c);
|
c = trans_escaped (sed, c, QSE_NULL);
|
||||||
}
|
}
|
||||||
|
|
||||||
b[0] = c;
|
b[0] = c;
|
||||||
@ -1234,7 +1312,7 @@ static int get_transet (qse_sed_t* sed, qse_sed_cmd_t* cmd)
|
|||||||
{
|
{
|
||||||
c = NXTSC (sed);
|
c = NXTSC (sed);
|
||||||
CHECK_CMDIC_ESCAPED (sed, cmd, c, goto oops);
|
CHECK_CMDIC_ESCAPED (sed, cmd, c, goto oops);
|
||||||
c = trans_escaped (c);
|
c = trans_escaped (sed, c, QSE_NULL);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (pos >= QSE_STR_LEN(t))
|
if (pos >= QSE_STR_LEN(t))
|
||||||
@ -1459,6 +1537,8 @@ static int get_command (qse_sed_t* sed, qse_sed_cmd_t* cmd)
|
|||||||
|
|
||||||
case QSE_T('n'):
|
case QSE_T('n'):
|
||||||
case QSE_T('N'):
|
case QSE_T('N'):
|
||||||
|
|
||||||
|
case QSE_T('z'):
|
||||||
cmd->type = c;
|
cmd->type = c;
|
||||||
NXTSC (sed);
|
NXTSC (sed);
|
||||||
if (terminate_command (sed) <= -1) return -1;
|
if (terminate_command (sed) <= -1) return -1;
|
||||||
@ -1491,6 +1571,7 @@ static int get_command (qse_sed_t* sed, qse_sed_cmd_t* cmd)
|
|||||||
NXTSC (sed);
|
NXTSC (sed);
|
||||||
if (get_transet (sed, cmd) <= -1) return -1;
|
if (get_transet (sed, cmd) <= -1) return -1;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
@ -2882,6 +2963,13 @@ static qse_sed_cmd_t* exec_cmd (qse_sed_t* sed, qse_sed_cmd_t* cmd)
|
|||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
case QSE_SED_CMD_CLEAR_PATTERN:
|
||||||
|
{
|
||||||
|
/* clear pattern space */
|
||||||
|
qse_str_clear (&sed->e.in.line);
|
||||||
|
break;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (jumpto == QSE_NULL) jumpto = cmd->state.next;
|
if (jumpto == QSE_NULL) jumpto = cmd->state.next;
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* $Id: sed.h 570 2011-09-20 04:40:45Z hyunghwan.chung $
|
* $Id: sed.h 572 2011-09-21 05:10:09Z hyunghwan.chung $
|
||||||
*
|
*
|
||||||
Copyright 2006-2011 Chung, Hyung-Hwan.
|
Copyright 2006-2011 Chung, Hyung-Hwan.
|
||||||
This file is part of QSE.
|
This file is part of QSE.
|
||||||
@ -91,6 +91,7 @@ struct qse_sed_adr_t
|
|||||||
#define QSE_SED_CMD_BRANCH_COND QSE_T('t')
|
#define QSE_SED_CMD_BRANCH_COND QSE_T('t')
|
||||||
#define QSE_SED_CMD_SUBSTITUTE QSE_T('s')
|
#define QSE_SED_CMD_SUBSTITUTE QSE_T('s')
|
||||||
#define QSE_SED_CMD_TRANSLATE QSE_T('y')
|
#define QSE_SED_CMD_TRANSLATE QSE_T('y')
|
||||||
|
#define QSE_SED_CMD_CLEAR_PATTERN QSE_T('z')
|
||||||
|
|
||||||
struct qse_sed_cmd_t
|
struct qse_sed_cmd_t
|
||||||
{
|
{
|
||||||
|
Loading…
Reference in New Issue
Block a user