From 5aeac8aa19258dfdb6cbe65a72527ba2d6a47d96 Mon Sep 17 00:00:00 2001 From: hyung-hwan Date: Sat, 9 May 2009 08:06:13 +0000 Subject: [PATCH] enhanced address matching routines for sed --- qse/include/qse/utl/sed.h | 3 + qse/lib/utl/sed.c | 173 +++++++++++++++++++++++++++++--------- qse/lib/utl/sed.h | 1 + qse/test/utl/sed01.c | 19 +---- 4 files changed, 138 insertions(+), 58 deletions(-) diff --git a/qse/include/qse/utl/sed.h b/qse/include/qse/utl/sed.h index 62ea8132..609f7404 100644 --- a/qse/include/qse/utl/sed.h +++ b/qse/include/qse/utl/sed.h @@ -137,6 +137,9 @@ struct qse_sed_t { qse_sed_iof_t f; + qse_char_t xbuf[1]; + int xbuf_len; + qse_char_t buf[2048]; qse_size_t len; qse_size_t pos; diff --git a/qse/lib/utl/sed.c b/qse/lib/utl/sed.c index ac0575d5..b3e54068 100644 --- a/qse/lib/utl/sed.c +++ b/qse/lib/utl/sed.c @@ -1237,7 +1237,9 @@ static int compile_source ( { /* negate */ cmd->negated = 1; + do { ADVSCP (sed); } while (IS_SPACE(c)); } + n = command (sed); if (n <= -1) @@ -1279,27 +1281,41 @@ static int read_char (qse_sed_t* sed, qse_char_t* c) { qse_ssize_t n; - if (sed->eio.in.pos >= sed->eio.in.len) + if (sed->eio.in.xbuf_len == 0) { - n = sed->eio.in.f ( - sed, QSE_SED_IO_READ, - sed->eio.in.buf, QSE_COUNTOF(sed->eio.in.buf) - ); - - if (n <= -1) + if (sed->eio.in.pos >= sed->eio.in.len) { - sed->errnum = QSE_SED_EIOUSR; - return -1; + n = sed->eio.in.f ( + sed, QSE_SED_IO_READ, + sed->eio.in.buf, QSE_COUNTOF(sed->eio.in.buf) + ); + + if (n <= -1) + { + sed->errnum = QSE_SED_EIOUSR; + return -1; + } + + if (n == 0) return 0; /* end of file */ + + sed->eio.in.len = n; + sed->eio.in.pos = 0; } - - if (n == 0) return 0; /* end of file */ - - sed->eio.in.len = n; - sed->eio.in.pos = 0; + + *c = sed->eio.in.buf[sed->eio.in.pos++]; + return 1; } - - *c = sed->eio.in.buf[sed->eio.in.pos++]; - return 1; + else if (sed->eio.in.xbuf_len > 0) + { + QSE_ASSERT (sed->eio.in.xbuf_len == 1); + *c = sed->eio.in.xbuf[--sed->eio.in.xbuf_len]; + return 1; + } + else /*if (sed->eio.in.xbuf_len < 0)*/ + { + QSE_ASSERT (sed->eio.in.xbuf_len == -1); + return 0; + } } static int read_line (qse_sed_t* sed) @@ -1335,6 +1351,7 @@ static int read_line (qse_sed_t* sed) return -1; } + // TODO: different line convenstion if (c == QSE_T('\n')) break; } @@ -1437,28 +1454,31 @@ static int match_a1 (qse_sed_t* sed, qse_sed_cmd_t* cmd) switch (cmd->a1.type) { case QSE_SED_A_LINE: - return (sed->eio.in.num >= cmd->a1.u.line)? 1: 0; + return (sed->eio.in.num == cmd->a1.u.line)? 1: 0; + case QSE_SED_A_REX: { qse_str_t match; int errnum, n; + qse_str_t* line; + qse_size_t llen; QSE_ASSERT (cmd->a1.u.rex != QSE_NULL); -/* -// TODO: trim off trailing newline.... - if (QSE_STR_LEN(&sed->eio.in.line) > 1 && - QSE_STR_CHAR(&sed->eio.in.line, - QSE_STR_LEN(&sed->eio.in.line)) -*/ + line = &sed->eio.in.line; + llen = QSE_STR_LEN(line); + + /* TODO: support different line end scheme */ + if (llen > 0 && + QSE_STR_CHAR(line,llen-1) == QSE_T('\n')) llen--; n = qse_matchrex ( sed->mmgr, 0, cmd->a1.u.rex, 0, - QSE_STR_PTR(&sed->eio.in.line), - QSE_STR_LEN(&sed->eio.in.line), + QSE_STR_PTR(line), + llen, &match.ptr, &match.len, &errnum); if (n <= -1) { @@ -1466,11 +1486,29 @@ static int match_a1 (qse_sed_t* sed, qse_sed_cmd_t* cmd) return -1; } -qse_printf (QSE_T("matchrex=>%d [%s]\n"), n, QSE_STR_PTR(&sed->eio.in.line)); return n; } case QSE_SED_A_DOL: - return 0; + { + qse_char_t c; + int n; + + n = read_char (sed, &c); + if (n <= -1) return -1; + + QSE_ASSERT (sed->eio.in.xbuf_len == 0); + if (n == 0) + { + /* eof has been reached */ + sed->eio.in.xbuf_len--; + return 1; + } + else + { + sed->eio.in.xbuf[sed->eio.in.xbuf_len++] = c; + return 0; + } + } } QSE_ASSERT (cmd->a1.type == QSE_SED_A_NONE); @@ -1482,20 +1520,32 @@ static int match_a2 (qse_sed_t* sed, qse_sed_cmd_t* cmd) switch (cmd->a2.type) { case QSE_SED_A_LINE: - return (sed->eio.in.num <= cmd->a2.u.line)? 1: 0; + return (sed->eio.in.num == cmd->a2.u.line)? 1: + (sed->eio.in.num < cmd->a2.u.line)? 2: 0; + case QSE_SED_A_REX: { qse_str_t match; int errnum, n; + qse_str_t* line; + qse_size_t llen; QSE_ASSERT (cmd->a2.u.rex != QSE_NULL); + + line = &sed->eio.in.line; + llen = QSE_STR_LEN(line); + + /* TODO: support different line end scheme */ + if (llen > 0 && + QSE_STR_CHAR(line,llen-1) == QSE_T('\n')) llen--; + n = qse_matchrex ( sed->mmgr, 0, cmd->a2.u.rex, 0, - QSE_STR_PTR(&sed->eio.in.line), - QSE_STR_LEN(&sed->eio.in.line), + QSE_STR_PTR(line), + llen, &match.ptr, &match.len, &errnum); if (n <= -1) { @@ -1503,14 +1553,33 @@ static int match_a2 (qse_sed_t* sed, qse_sed_cmd_t* cmd) return -1; } - return n; + return (n == 0)? 2: 1; } case QSE_SED_A_DOL: - return 0; + { + qse_char_t c; + int n; + + n = read_char (sed, &c); + if (n <= -1) return -1; + + QSE_ASSERT (sed->eio.in.xbuf_len == 0); + if (n == 0) + { + /* eof has been reached */ + sed->eio.in.xbuf_len--; + return 1; + } + else + { + sed->eio.in.xbuf[sed->eio.in.xbuf_len++] = c; + return 2; + } + } } QSE_ASSERT (cmd->a2.type == QSE_SED_A_NONE); - return 1; /* match */ + return 0; /* no match unlike a1 */ } /* match an address against input. @@ -1519,13 +1588,36 @@ static int match_address (qse_sed_t* sed, qse_sed_cmd_t* cmd) { int a1, a2; - a1 = match_a1 (sed, cmd); - if (a1 <= -1) return -1; - a2 = match_a2 (sed, cmd); - if (a2 <= -1) return -1; + if (cmd->a1.type == QSE_SED_A_NONE) + { + QSE_ASSERT (cmd->a2.type == QSE_SED_A_NONE); + return 1; + } -//qse_printf (QSE_T("a1 = %d, a2 = %d\n"), a1, a2); - return (a1 >= 1 && a2 >= 1)? 1: 0; + if (cmd->a1_matched == 0) + { + a1 = match_a1 (sed, cmd); + if (a1 <= -1) return -1; + + if (a1 == 0) return 0; + else + { + cmd->a1_matched = 1; + return 1; + } + } + else + { + a2 = match_a2 (sed, cmd); + if (a2 <= -1) return -1; + + if (a2 == 0) return 0; + else + { + if (a2 == 1) cmd->a1_matched = 0; + return 1; + } + } } static int exec_cmd (qse_sed_t* sed, qse_sed_cmd_t* cmd) @@ -1648,6 +1740,7 @@ int qse_sed_execute (qse_sed_t* sed, qse_sed_iof_t inf, qse_sed_iof_t outf) n = match_address (sed, c); if (n <= -1) { ret = -1; goto done; } + if (c->negated) n = !n; if (n == 0) { c++; diff --git a/qse/lib/utl/sed.h b/qse/lib/utl/sed.h index e464b931..c19ad203 100644 --- a/qse/lib/utl/sed.h +++ b/qse/lib/utl/sed.h @@ -99,6 +99,7 @@ struct qse_sed_cmd_t } type; int negated; + int a1_matched; qse_sed_a_t a1; /* optional start address */ qse_sed_a_t a2; /* optional end address */ diff --git a/qse/test/utl/sed01.c b/qse/test/utl/sed01.c index 38fc2df2..6ba75a80 100644 --- a/qse/test/utl/sed01.c +++ b/qse/test/utl/sed01.c @@ -26,20 +26,6 @@ #include #include -static qse_bool_t custom_sed_isccls ( - qse_sed_t* sed, qse_cint_t c, qse_ccls_id_t id) -{ - qse_ccls_t* ccls = QSE_CCLS_GETDFL(); - return ccls->is (ccls->data, c, id); -} - -static qse_cint_t custom_sed_toccls ( - qse_sed_t* sed, qse_cint_t c, qse_ccls_id_t id) -{ - qse_ccls_t* ccls = QSE_CCLS_GETDFL(); - return ccls->to (ccls->data, c, id); - -} static qse_ssize_t in ( qse_sed_t* sed, qse_sed_io_cmd_t cmd, qse_char_t* buf, qse_size_t len) { @@ -90,7 +76,6 @@ static qse_ssize_t out ( int sed_main (int argc, qse_char_t* argv[]) { qse_sed_t* sed = QSE_NULL; - qse_sed_prm_t prm; int ret = -1; if (argc != 2 && argc != 3) @@ -99,9 +84,7 @@ int sed_main (int argc, qse_char_t* argv[]) return -1; } - prm.isccls = custom_sed_isccls; - prm.toccls = custom_sed_toccls; - sed = qse_sed_open (QSE_NULL, 0, &prm); + sed = qse_sed_open (QSE_NULL, 0); if (sed == QSE_NULL) { qse_fprintf (QSE_STDERR, QSE_T("cannot open a stream editor\n"));