added QSE_SED_EXTENDEDADR

deleted QSE_SED_ZEROA1 and QSE_SED_STARTSTEP
added actual code for more ~extended address formats
fixed the 'divide-by-zero' bug caused by the command 'start~0'
added more test scripts for sed
This commit is contained in:
2011-09-29 00:31:17 +00:00
parent 84cb2b81d8
commit 7a246a02ef
13 changed files with 250 additions and 33 deletions

View File

@ -747,7 +747,7 @@ static QSE_INLINE void* compile_rex_address (qse_sed_t* sed, qse_char_t rxend)
return build_rex (sed, QSE_STR_CSTR(&sed->tmp.rex), ignorecase, &sed->src.loc);
}
static qse_sed_adr_t* get_address (qse_sed_t* sed, qse_sed_adr_t* a)
static qse_sed_adr_t* get_address (qse_sed_t* sed, qse_sed_adr_t* a, int extended)
{
qse_cint_t c;
@ -794,6 +794,28 @@ static qse_sed_adr_t* get_address (qse_sed_t* sed, qse_sed_adr_t* a)
a->type = QSE_SED_ADR_REX;
NXTSC (sed);
}
else if (extended && (c == QSE_T('+') || c == QSE_T('~')))
{
qse_size_t lno = 0;
a->type = (c == QSE_T('+'))? QSE_SED_ADR_RELLINE: QSE_SED_ADR_RELLINEM;
NXTSC (sed);
if (!((c = CURSC(sed)) >= QSE_T('0') && c <= QSE_T('9')))
{
SETERR0 (sed, QSE_SED_EA2MOI, &sed->src.loc);
return QSE_NULL;
}
do
{
lno = lno * 10 + c - QSE_T('0');
NXTSC (sed);
}
while ((c = CURSC(sed)) >= QSE_T('0') && c <= QSE_T('9'));
a->u.lno = lno;
}
else
{
a->type = QSE_SED_ADR_NONE;
@ -1633,7 +1655,7 @@ int qse_sed_comp (qse_sed_t* sed, const qse_char_t* sptr, qse_size_t slen)
QSE_MEMSET (cmd, 0, QSE_SIZEOF(*cmd));
/* process the first address */
if (get_address (sed, &cmd->a1) == QSE_NULL)
if (get_address (sed, &cmd->a1, 0) == QSE_NULL)
{
cmd = QSE_NULL;
SETERR0 (sed, QSE_SED_EA1MOI, &sed->src.loc);
@ -1646,14 +1668,14 @@ int qse_sed_comp (qse_sed_t* sed, const qse_char_t* sptr, qse_size_t slen)
while (IS_SPACE(c)) c = NXTSC (sed);
if (c == QSE_T(',') ||
((sed->option & QSE_SED_STARTSTEP) && c == QSE_T('~')))
((sed->option & QSE_SED_EXTENDEDADR) && c == QSE_T('~')))
{
qse_char_t delim = c;
/* maybe an address range */
do { c = NXTSC (sed); } while (IS_SPACE(c));
if (get_address (sed, &cmd->a2) == QSE_NULL)
if (get_address (sed, &cmd->a2, (sed->option & QSE_SED_EXTENDEDADR)) == QSE_NULL)
{
QSE_ASSERT (cmd->a2.type == QSE_SED_ADR_NONE);
SETERR0 (sed, QSE_SED_EA2MOI, &sed->src.loc);
@ -1667,8 +1689,17 @@ int qse_sed_comp (qse_sed_t* sed, const qse_char_t* sptr, qse_size_t slen)
SETERR0 (sed, QSE_SED_EA2MOI, &sed->src.loc);
goto oops;
}
if (cmd->a2.type == QSE_SED_ADR_RELLINE ||
cmd->a2.type == QSE_SED_ADR_RELLINEM)
{
if (cmd->a2.u.lno <= 0)
{
/* tranform 'addr1,+0' and 'addr1,~0' to 'addr1' */
cmd->a2.type = QSE_SED_ADR_NONE;
}
}
}
else if ((sed->option & QSE_SED_STARTSTEP) &&
else if ((sed->option & QSE_SED_EXTENDEDADR) &&
(delim == QSE_T('~')))
{
if (cmd->a1.type != QSE_SED_ADR_LINE ||
@ -1678,7 +1709,15 @@ int qse_sed_comp (qse_sed_t* sed, const qse_char_t* sptr, qse_size_t slen)
goto oops;
}
cmd->a2.type = QSE_SED_ADR_STEP;
if (cmd->a2.u.lno > 0)
{
cmd->a2.type = QSE_SED_ADR_STEP;
}
else
{
/* transform 'X,~0' to 'X' */
cmd->a2.type = QSE_SED_ADR_NONE;
}
}
c = CURSC (sed);
@ -1686,15 +1725,24 @@ int qse_sed_comp (qse_sed_t* sed, const qse_char_t* sptr, qse_size_t slen)
else cmd->a2.type = QSE_SED_ADR_NONE;
}
if (cmd->a2.type != QSE_SED_ADR_STEP &&
cmd->a1.type == QSE_SED_ADR_LINE &&
cmd->a1.u.lno <= 0)
if (cmd->a1.type == QSE_SED_ADR_LINE && cmd->a1.u.lno <= 0)
{
if (!(sed->option & QSE_SED_ZEROA1) ||
cmd->a2.type != QSE_SED_ADR_REX)
if (cmd->a2.type == QSE_SED_ADR_STEP ||
((sed->option & QSE_SED_EXTENDEDADR) && cmd->a2.type == QSE_SED_ADR_REX))
{
/* 0 as the first address is allowed in this two contexts.
* 0~step
* 0,/regex/
* however, '0~0' is not allowed. but at this point '0~0' is
* already transformed to '0'. and disallowing it is achieved
* gratuitously.
*/
/* nothing to do - adding negation to the condition dropped
* code readability so i decided to write this part of code this way.
*/
}
else
{
/* 0 is not allowed as a normal line number.
* 0,/regex/ is allowed */
SETERR0 (sed, QSE_SED_EA1MOI, &sed->src.loc);
goto oops;
}
@ -2636,6 +2684,29 @@ static int match_a (qse_sed_t* sed, qse_sed_cmd_t* cmd, qse_sed_adr_t* a)
}
}
case QSE_SED_ADR_RELLINE:
/* this address type should be seen only when matching
* the second address */
QSE_ASSERT (cmd->state.a1_matched && cmd->state.a1_match_line >= 1);
return (sed->e.in.num >= cmd->state.a1_match_line + a->u.lno)? 1: 0;
case QSE_SED_ADR_RELLINEM:
{
/* this address type should be seen only when matching
* the second address */
qse_size_t tmp;
QSE_ASSERT (cmd->state.a1_matched && cmd->state.a1_match_line >= 1);
QSE_ASSERT (a->u.lno > 0);
/* TODO: is it better to store this value some in the state
* not to calculate this every time?? */
tmp = (cmd->state.a1_match_line + a->u.lno) -
(cmd->state.a1_match_line % a->u.lno);
return (sed->e.in.num >= tmp)? 1: 0;
}
default:
QSE_ASSERT (a->type == QSE_SED_ADR_NONE);
return 1; /* match */
@ -2662,6 +2733,7 @@ static int match_address (qse_sed_t* sed, qse_sed_cmd_t* cmd)
/* stepping address */
cmd->state.c_ready = 1;
if (sed->e.in.num < cmd->a1.u.lno) return 0;
QSE_ASSERT (cmd->a2.u.lno > 0);
if ((sed->e.in.num - cmd->a1.u.lno) % cmd->a2.u.lno == 0) return 1;
return 0;
}
@ -2677,7 +2749,18 @@ static int match_address (qse_sed_t* sed, qse_sed_cmd_t* cmd)
if (cmd->a2.type == QSE_SED_ADR_LINE &&
sed->e.in.num > cmd->a2.u.lno)
{
/* exit the range */
/* This check is needed because matching of the second
* address could be skipped while it could match.
*
* Consider commands like '1,3p;2N'.
* '3' in '1,3p' is skipped because 'N' in '2N' triggers
* reading of the third line.
*
* Unfortunately, I can't handle a non-line-number
* second address like this. If 'abcxyz' is given as the third
* line for command '1,/abc/p;2N', 'abcxyz' is not matched
* against '/abc/'. so it doesn't exit the range.
*/
cmd->state.a1_matched = 0;
return 0;
}
@ -2714,6 +2797,7 @@ static int match_address (qse_sed_t* sed, qse_sed_cmd_t* cmd)
/* mark that the first is matched so as to
* move on to the range test */
cmd->state.a1_matched = 1;
cmd->state.a1_match_line = sed->e.in.num;
}
return 1;
@ -3070,7 +3154,7 @@ static int init_command_block_for_exec (qse_sed_t* sed, qse_sed_cmd_blk_t* b)
/* clear states */
c->state.a1_matched = 0;
if (sed->option & QSE_SED_ZEROA1)
if (sed->option & QSE_SED_EXTENDEDADR)
{
if (c->a2.type == QSE_SED_ADR_REX &&
c->a1.type == QSE_SED_ADR_LINE &&
@ -3078,6 +3162,7 @@ static int init_command_block_for_exec (qse_sed_t* sed, qse_sed_cmd_blk_t* b)
{
/* special handling for 0,/regex/ */
c->state.a1_matched = 1;
c->state.a1_match_line = 0;
}
}

View File

@ -50,11 +50,13 @@ struct qse_sed_adr_t
{
enum
{
QSE_SED_ADR_NONE, /* no address */
QSE_SED_ADR_DOL, /* $ - last line */
QSE_SED_ADR_LINE, /* specified line */
QSE_SED_ADR_REX, /* lines matching regular expression */
QSE_SED_ADR_STEP /* line steps - only in the second address */
QSE_SED_ADR_NONE, /* no address */
QSE_SED_ADR_DOL, /* $ - last line */
QSE_SED_ADR_LINE, /* specified line */
QSE_SED_ADR_REX, /* lines matching regular expression */
QSE_SED_ADR_STEP, /* line steps - only in the second address */
QSE_SED_ADR_RELLINE, /* relative line - only in second address */
QSE_SED_ADR_RELLINEM /* relative line in the multiples - only in second address */
} type;
union
@ -144,6 +146,8 @@ struct qse_sed_cmd_t
struct
{
int a1_matched;
qse_size_t a1_match_line;
int c_ready;
/* points to the next command for fast traversal and