enhanced address matching routines for sed

This commit is contained in:
hyung-hwan 2009-05-09 08:06:13 +00:00
parent c221321cda
commit 5aeac8aa19
4 changed files with 138 additions and 58 deletions

View File

@ -137,6 +137,9 @@ struct qse_sed_t
{ {
qse_sed_iof_t f; qse_sed_iof_t f;
qse_char_t xbuf[1];
int xbuf_len;
qse_char_t buf[2048]; qse_char_t buf[2048];
qse_size_t len; qse_size_t len;
qse_size_t pos; qse_size_t pos;

View File

@ -1237,8 +1237,10 @@ static int compile_source (
{ {
/* negate */ /* negate */
cmd->negated = 1; cmd->negated = 1;
do { ADVSCP (sed); } while (IS_SPACE(c));
} }
n = command (sed); n = command (sed);
if (n <= -1) if (n <= -1)
{ {
@ -1279,6 +1281,8 @@ static int read_char (qse_sed_t* sed, qse_char_t* c)
{ {
qse_ssize_t n; qse_ssize_t n;
if (sed->eio.in.xbuf_len == 0)
{
if (sed->eio.in.pos >= sed->eio.in.len) if (sed->eio.in.pos >= sed->eio.in.len)
{ {
n = sed->eio.in.f ( n = sed->eio.in.f (
@ -1301,6 +1305,18 @@ static int read_char (qse_sed_t* sed, qse_char_t* c)
*c = sed->eio.in.buf[sed->eio.in.pos++]; *c = sed->eio.in.buf[sed->eio.in.pos++];
return 1; return 1;
} }
else if (sed->eio.in.xbuf_len > 0)
{
QSE_ASSERT (sed->eio.in.xbuf_len == 1);
*c = sed->eio.in.xbuf[--sed->eio.in.xbuf_len];
return 1;
}
else /*if (sed->eio.in.xbuf_len < 0)*/
{
QSE_ASSERT (sed->eio.in.xbuf_len == -1);
return 0;
}
}
static int read_line (qse_sed_t* sed) static int read_line (qse_sed_t* sed)
{ {
@ -1335,6 +1351,7 @@ static int read_line (qse_sed_t* sed)
return -1; return -1;
} }
// TODO: different line convenstion
if (c == QSE_T('\n')) break; if (c == QSE_T('\n')) break;
} }
@ -1437,28 +1454,31 @@ static int match_a1 (qse_sed_t* sed, qse_sed_cmd_t* cmd)
switch (cmd->a1.type) switch (cmd->a1.type)
{ {
case QSE_SED_A_LINE: case QSE_SED_A_LINE:
return (sed->eio.in.num >= cmd->a1.u.line)? 1: 0; return (sed->eio.in.num == cmd->a1.u.line)? 1: 0;
case QSE_SED_A_REX: case QSE_SED_A_REX:
{ {
qse_str_t match; qse_str_t match;
int errnum, n; int errnum, n;
qse_str_t* line;
qse_size_t llen;
QSE_ASSERT (cmd->a1.u.rex != QSE_NULL); QSE_ASSERT (cmd->a1.u.rex != QSE_NULL);
/* line = &sed->eio.in.line;
// TODO: trim off trailing newline.... llen = QSE_STR_LEN(line);
if (QSE_STR_LEN(&sed->eio.in.line) > 1 &&
QSE_STR_CHAR(&sed->eio.in.line, /* TODO: support different line end scheme */
QSE_STR_LEN(&sed->eio.in.line)) if (llen > 0 &&
*/ QSE_STR_CHAR(line,llen-1) == QSE_T('\n')) llen--;
n = qse_matchrex ( n = qse_matchrex (
sed->mmgr, sed->mmgr,
0, 0,
cmd->a1.u.rex, cmd->a1.u.rex,
0, 0,
QSE_STR_PTR(&sed->eio.in.line), QSE_STR_PTR(line),
QSE_STR_LEN(&sed->eio.in.line), llen,
&match.ptr, &match.len, &errnum); &match.ptr, &match.len, &errnum);
if (n <= -1) if (n <= -1)
{ {
@ -1466,12 +1486,30 @@ static int match_a1 (qse_sed_t* sed, qse_sed_cmd_t* cmd)
return -1; return -1;
} }
qse_printf (QSE_T("matchrex=>%d [%s]\n"), n, QSE_STR_PTR(&sed->eio.in.line));
return n; return n;
} }
case QSE_SED_A_DOL: case QSE_SED_A_DOL:
{
qse_char_t c;
int n;
n = read_char (sed, &c);
if (n <= -1) return -1;
QSE_ASSERT (sed->eio.in.xbuf_len == 0);
if (n == 0)
{
/* eof has been reached */
sed->eio.in.xbuf_len--;
return 1;
}
else
{
sed->eio.in.xbuf[sed->eio.in.xbuf_len++] = c;
return 0; return 0;
} }
}
}
QSE_ASSERT (cmd->a1.type == QSE_SED_A_NONE); QSE_ASSERT (cmd->a1.type == QSE_SED_A_NONE);
return 1; /* match */ return 1; /* match */
@ -1482,20 +1520,32 @@ static int match_a2 (qse_sed_t* sed, qse_sed_cmd_t* cmd)
switch (cmd->a2.type) switch (cmd->a2.type)
{ {
case QSE_SED_A_LINE: case QSE_SED_A_LINE:
return (sed->eio.in.num <= cmd->a2.u.line)? 1: 0; return (sed->eio.in.num == cmd->a2.u.line)? 1:
(sed->eio.in.num < cmd->a2.u.line)? 2: 0;
case QSE_SED_A_REX: case QSE_SED_A_REX:
{ {
qse_str_t match; qse_str_t match;
int errnum, n; int errnum, n;
qse_str_t* line;
qse_size_t llen;
QSE_ASSERT (cmd->a2.u.rex != QSE_NULL); QSE_ASSERT (cmd->a2.u.rex != QSE_NULL);
line = &sed->eio.in.line;
llen = QSE_STR_LEN(line);
/* TODO: support different line end scheme */
if (llen > 0 &&
QSE_STR_CHAR(line,llen-1) == QSE_T('\n')) llen--;
n = qse_matchrex ( n = qse_matchrex (
sed->mmgr, sed->mmgr,
0, 0,
cmd->a2.u.rex, cmd->a2.u.rex,
0, 0,
QSE_STR_PTR(&sed->eio.in.line), QSE_STR_PTR(line),
QSE_STR_LEN(&sed->eio.in.line), llen,
&match.ptr, &match.len, &errnum); &match.ptr, &match.len, &errnum);
if (n <= -1) if (n <= -1)
{ {
@ -1503,14 +1553,33 @@ static int match_a2 (qse_sed_t* sed, qse_sed_cmd_t* cmd)
return -1; return -1;
} }
return n; return (n == 0)? 2: 1;
} }
case QSE_SED_A_DOL: case QSE_SED_A_DOL:
return 0; {
qse_char_t c;
int n;
n = read_char (sed, &c);
if (n <= -1) return -1;
QSE_ASSERT (sed->eio.in.xbuf_len == 0);
if (n == 0)
{
/* eof has been reached */
sed->eio.in.xbuf_len--;
return 1;
}
else
{
sed->eio.in.xbuf[sed->eio.in.xbuf_len++] = c;
return 2;
}
}
} }
QSE_ASSERT (cmd->a2.type == QSE_SED_A_NONE); QSE_ASSERT (cmd->a2.type == QSE_SED_A_NONE);
return 1; /* match */ return 0; /* no match unlike a1 */
} }
/* match an address against input. /* match an address against input.
@ -1519,13 +1588,36 @@ static int match_address (qse_sed_t* sed, qse_sed_cmd_t* cmd)
{ {
int a1, a2; int a1, a2;
if (cmd->a1.type == QSE_SED_A_NONE)
{
QSE_ASSERT (cmd->a2.type == QSE_SED_A_NONE);
return 1;
}
if (cmd->a1_matched == 0)
{
a1 = match_a1 (sed, cmd); a1 = match_a1 (sed, cmd);
if (a1 <= -1) return -1; if (a1 <= -1) return -1;
if (a1 == 0) return 0;
else
{
cmd->a1_matched = 1;
return 1;
}
}
else
{
a2 = match_a2 (sed, cmd); a2 = match_a2 (sed, cmd);
if (a2 <= -1) return -1; if (a2 <= -1) return -1;
//qse_printf (QSE_T("a1 = %d, a2 = %d\n"), a1, a2); if (a2 == 0) return 0;
return (a1 >= 1 && a2 >= 1)? 1: 0; else
{
if (a2 == 1) cmd->a1_matched = 0;
return 1;
}
}
} }
static int exec_cmd (qse_sed_t* sed, qse_sed_cmd_t* cmd) static int exec_cmd (qse_sed_t* sed, qse_sed_cmd_t* cmd)
@ -1648,6 +1740,7 @@ int qse_sed_execute (qse_sed_t* sed, qse_sed_iof_t inf, qse_sed_iof_t outf)
n = match_address (sed, c); n = match_address (sed, c);
if (n <= -1) { ret = -1; goto done; } if (n <= -1) { ret = -1; goto done; }
if (c->negated) n = !n;
if (n == 0) if (n == 0)
{ {
c++; c++;

View File

@ -99,6 +99,7 @@ struct qse_sed_cmd_t
} type; } type;
int negated; int negated;
int a1_matched;
qse_sed_a_t a1; /* optional start address */ qse_sed_a_t a1; /* optional start address */
qse_sed_a_t a2; /* optional end address */ qse_sed_a_t a2; /* optional end address */

View File

@ -26,20 +26,6 @@
#include <qse/cmn/str.h> #include <qse/cmn/str.h>
#include <qse/cmn/chr.h> #include <qse/cmn/chr.h>
static qse_bool_t custom_sed_isccls (
qse_sed_t* sed, qse_cint_t c, qse_ccls_id_t id)
{
qse_ccls_t* ccls = QSE_CCLS_GETDFL();
return ccls->is (ccls->data, c, id);
}
static qse_cint_t custom_sed_toccls (
qse_sed_t* sed, qse_cint_t c, qse_ccls_id_t id)
{
qse_ccls_t* ccls = QSE_CCLS_GETDFL();
return ccls->to (ccls->data, c, id);
}
static qse_ssize_t in ( static qse_ssize_t in (
qse_sed_t* sed, qse_sed_io_cmd_t cmd, qse_char_t* buf, qse_size_t len) qse_sed_t* sed, qse_sed_io_cmd_t cmd, qse_char_t* buf, qse_size_t len)
{ {
@ -90,7 +76,6 @@ static qse_ssize_t out (
int sed_main (int argc, qse_char_t* argv[]) int sed_main (int argc, qse_char_t* argv[])
{ {
qse_sed_t* sed = QSE_NULL; qse_sed_t* sed = QSE_NULL;
qse_sed_prm_t prm;
int ret = -1; int ret = -1;
if (argc != 2 && argc != 3) if (argc != 2 && argc != 3)
@ -99,9 +84,7 @@ int sed_main (int argc, qse_char_t* argv[])
return -1; return -1;
} }
prm.isccls = custom_sed_isccls; sed = qse_sed_open (QSE_NULL, 0);
prm.toccls = custom_sed_toccls;
sed = qse_sed_open (QSE_NULL, 0, &prm);
if (sed == QSE_NULL) if (sed == QSE_NULL)
{ {
qse_fprintf (QSE_STDERR, QSE_T("cannot open a stream editor\n")); qse_fprintf (QSE_STDERR, QSE_T("cannot open a stream editor\n"));