enhanced address matching routines for sed

This commit is contained in:
hyung-hwan 2009-05-09 08:06:13 +00:00
parent c221321cda
commit 5aeac8aa19
4 changed files with 138 additions and 58 deletions

View File

@ -137,6 +137,9 @@ struct qse_sed_t
{
qse_sed_iof_t f;
qse_char_t xbuf[1];
int xbuf_len;
qse_char_t buf[2048];
qse_size_t len;
qse_size_t pos;

View File

@ -1237,8 +1237,10 @@ static int compile_source (
{
/* negate */
cmd->negated = 1;
do { ADVSCP (sed); } while (IS_SPACE(c));
}
n = command (sed);
if (n <= -1)
{
@ -1279,6 +1281,8 @@ static int read_char (qse_sed_t* sed, qse_char_t* c)
{
qse_ssize_t n;
if (sed->eio.in.xbuf_len == 0)
{
if (sed->eio.in.pos >= sed->eio.in.len)
{
n = sed->eio.in.f (
@ -1301,6 +1305,18 @@ static int read_char (qse_sed_t* sed, qse_char_t* c)
*c = sed->eio.in.buf[sed->eio.in.pos++];
return 1;
}
else if (sed->eio.in.xbuf_len > 0)
{
QSE_ASSERT (sed->eio.in.xbuf_len == 1);
*c = sed->eio.in.xbuf[--sed->eio.in.xbuf_len];
return 1;
}
else /*if (sed->eio.in.xbuf_len < 0)*/
{
QSE_ASSERT (sed->eio.in.xbuf_len == -1);
return 0;
}
}
static int read_line (qse_sed_t* sed)
{
@ -1335,6 +1351,7 @@ static int read_line (qse_sed_t* sed)
return -1;
}
// TODO: different line convenstion
if (c == QSE_T('\n')) break;
}
@ -1437,28 +1454,31 @@ static int match_a1 (qse_sed_t* sed, qse_sed_cmd_t* cmd)
switch (cmd->a1.type)
{
case QSE_SED_A_LINE:
return (sed->eio.in.num >= cmd->a1.u.line)? 1: 0;
return (sed->eio.in.num == cmd->a1.u.line)? 1: 0;
case QSE_SED_A_REX:
{
qse_str_t match;
int errnum, n;
qse_str_t* line;
qse_size_t llen;
QSE_ASSERT (cmd->a1.u.rex != QSE_NULL);
/*
// TODO: trim off trailing newline....
if (QSE_STR_LEN(&sed->eio.in.line) > 1 &&
QSE_STR_CHAR(&sed->eio.in.line,
QSE_STR_LEN(&sed->eio.in.line))
*/
line = &sed->eio.in.line;
llen = QSE_STR_LEN(line);
/* TODO: support different line end scheme */
if (llen > 0 &&
QSE_STR_CHAR(line,llen-1) == QSE_T('\n')) llen--;
n = qse_matchrex (
sed->mmgr,
0,
cmd->a1.u.rex,
0,
QSE_STR_PTR(&sed->eio.in.line),
QSE_STR_LEN(&sed->eio.in.line),
QSE_STR_PTR(line),
llen,
&match.ptr, &match.len, &errnum);
if (n <= -1)
{
@ -1466,12 +1486,30 @@ static int match_a1 (qse_sed_t* sed, qse_sed_cmd_t* cmd)
return -1;
}
qse_printf (QSE_T("matchrex=>%d [%s]\n"), n, QSE_STR_PTR(&sed->eio.in.line));
return n;
}
case QSE_SED_A_DOL:
{
qse_char_t c;
int n;
n = read_char (sed, &c);
if (n <= -1) return -1;
QSE_ASSERT (sed->eio.in.xbuf_len == 0);
if (n == 0)
{
/* eof has been reached */
sed->eio.in.xbuf_len--;
return 1;
}
else
{
sed->eio.in.xbuf[sed->eio.in.xbuf_len++] = c;
return 0;
}
}
}
QSE_ASSERT (cmd->a1.type == QSE_SED_A_NONE);
return 1; /* match */
@ -1482,20 +1520,32 @@ static int match_a2 (qse_sed_t* sed, qse_sed_cmd_t* cmd)
switch (cmd->a2.type)
{
case QSE_SED_A_LINE:
return (sed->eio.in.num <= cmd->a2.u.line)? 1: 0;
return (sed->eio.in.num == cmd->a2.u.line)? 1:
(sed->eio.in.num < cmd->a2.u.line)? 2: 0;
case QSE_SED_A_REX:
{
qse_str_t match;
int errnum, n;
qse_str_t* line;
qse_size_t llen;
QSE_ASSERT (cmd->a2.u.rex != QSE_NULL);
line = &sed->eio.in.line;
llen = QSE_STR_LEN(line);
/* TODO: support different line end scheme */
if (llen > 0 &&
QSE_STR_CHAR(line,llen-1) == QSE_T('\n')) llen--;
n = qse_matchrex (
sed->mmgr,
0,
cmd->a2.u.rex,
0,
QSE_STR_PTR(&sed->eio.in.line),
QSE_STR_LEN(&sed->eio.in.line),
QSE_STR_PTR(line),
llen,
&match.ptr, &match.len, &errnum);
if (n <= -1)
{
@ -1503,14 +1553,33 @@ static int match_a2 (qse_sed_t* sed, qse_sed_cmd_t* cmd)
return -1;
}
return n;
return (n == 0)? 2: 1;
}
case QSE_SED_A_DOL:
return 0;
{
qse_char_t c;
int n;
n = read_char (sed, &c);
if (n <= -1) return -1;
QSE_ASSERT (sed->eio.in.xbuf_len == 0);
if (n == 0)
{
/* eof has been reached */
sed->eio.in.xbuf_len--;
return 1;
}
else
{
sed->eio.in.xbuf[sed->eio.in.xbuf_len++] = c;
return 2;
}
}
}
QSE_ASSERT (cmd->a2.type == QSE_SED_A_NONE);
return 1; /* match */
return 0; /* no match unlike a1 */
}
/* match an address against input.
@ -1519,13 +1588,36 @@ static int match_address (qse_sed_t* sed, qse_sed_cmd_t* cmd)
{
int a1, a2;
if (cmd->a1.type == QSE_SED_A_NONE)
{
QSE_ASSERT (cmd->a2.type == QSE_SED_A_NONE);
return 1;
}
if (cmd->a1_matched == 0)
{
a1 = match_a1 (sed, cmd);
if (a1 <= -1) return -1;
if (a1 == 0) return 0;
else
{
cmd->a1_matched = 1;
return 1;
}
}
else
{
a2 = match_a2 (sed, cmd);
if (a2 <= -1) return -1;
//qse_printf (QSE_T("a1 = %d, a2 = %d\n"), a1, a2);
return (a1 >= 1 && a2 >= 1)? 1: 0;
if (a2 == 0) return 0;
else
{
if (a2 == 1) cmd->a1_matched = 0;
return 1;
}
}
}
static int exec_cmd (qse_sed_t* sed, qse_sed_cmd_t* cmd)
@ -1648,6 +1740,7 @@ int qse_sed_execute (qse_sed_t* sed, qse_sed_iof_t inf, qse_sed_iof_t outf)
n = match_address (sed, c);
if (n <= -1) { ret = -1; goto done; }
if (c->negated) n = !n;
if (n == 0)
{
c++;

View File

@ -99,6 +99,7 @@ struct qse_sed_cmd_t
} type;
int negated;
int a1_matched;
qse_sed_a_t a1; /* optional start address */
qse_sed_a_t a2; /* optional end address */

View File

@ -26,20 +26,6 @@
#include <qse/cmn/str.h>
#include <qse/cmn/chr.h>
static qse_bool_t custom_sed_isccls (
qse_sed_t* sed, qse_cint_t c, qse_ccls_id_t id)
{
qse_ccls_t* ccls = QSE_CCLS_GETDFL();
return ccls->is (ccls->data, c, id);
}
static qse_cint_t custom_sed_toccls (
qse_sed_t* sed, qse_cint_t c, qse_ccls_id_t id)
{
qse_ccls_t* ccls = QSE_CCLS_GETDFL();
return ccls->to (ccls->data, c, id);
}
static qse_ssize_t in (
qse_sed_t* sed, qse_sed_io_cmd_t cmd, qse_char_t* buf, qse_size_t len)
{
@ -90,7 +76,6 @@ static qse_ssize_t out (
int sed_main (int argc, qse_char_t* argv[])
{
qse_sed_t* sed = QSE_NULL;
qse_sed_prm_t prm;
int ret = -1;
if (argc != 2 && argc != 3)
@ -99,9 +84,7 @@ int sed_main (int argc, qse_char_t* argv[])
return -1;
}
prm.isccls = custom_sed_isccls;
prm.toccls = custom_sed_toccls;
sed = qse_sed_open (QSE_NULL, 0, &prm);
sed = qse_sed_open (QSE_NULL, 0);
if (sed == QSE_NULL)
{
qse_fprintf (QSE_STDERR, QSE_T("cannot open a stream editor\n"));