added code to handle an empty regular expression in sed

This commit is contained in:
hyung-hwan 2011-09-07 08:18:36 +00:00
parent 2f15ca2335
commit 0f85644269
10 changed files with 148 additions and 64 deletions

View File

@ -246,6 +246,7 @@ qse_char_t* load_script_file (const qse_char_t* file)
if (qse_str_init (&script, QSE_MMGR_GETDFL(), 1024) <= -1) if (qse_str_init (&script, QSE_MMGR_GETDFL(), 1024) <= -1)
{ {
qse_fclose (fp); qse_fclose (fp);
qse_fprintf (QSE_STDERR, QSE_T("ERROR: cannot load %s\n"), file);
return QSE_NULL; return QSE_NULL;
} }
@ -258,6 +259,13 @@ qse_char_t* load_script_file (const qse_char_t* file)
return QSE_NULL; return QSE_NULL;
} }
} }
if (qse_ferror(fp))
{
qse_fprintf (QSE_STDERR, QSE_T("ERROR: cannot read %s\n"), file);
qse_str_fini (&script);
qse_fclose (fp);
return QSE_NULL;
}
qse_str_yield (&script, &xstr, 0); qse_str_yield (&script, &xstr, 0);
qse_str_fini (&script); qse_str_fini (&script);
@ -303,11 +311,7 @@ int sed_main (int argc, qse_char_t* argv[])
QSE_ASSERT (g_script == QSE_NULL); QSE_ASSERT (g_script == QSE_NULL);
g_script = load_script_file (g_script_file); g_script = load_script_file (g_script_file);
if (g_script == QSE_NULL) if (g_script == QSE_NULL) goto oops;
{
qse_fprintf (QSE_STDERR, QSE_T("ERROR: cannot load %s\n"), g_script_file);
goto oops;
}
} }
if (qse_sed_comp (sed, g_script, qse_strlen(g_script)) == -1) if (qse_sed_comp (sed, g_script, qse_strlen(g_script)) == -1)

View File

@ -1,5 +1,5 @@
/* /*
* $Id: sed.h 558 2011-09-02 15:27:44Z hyunghwan.chung $ * $Id: sed.h 560 2011-09-06 14:18:36Z hyunghwan.chung $
* *
Copyright 2006-2011 Chung, Hyung-Hwan. Copyright 2006-2011 Chung, Hyung-Hwan.
This file is part of QSE. This file is part of QSE.
@ -107,6 +107,7 @@ enum qse_sed_errnum_t
QSE_SED_EOCSDU, /**< multiple occurrence specifiers */ QSE_SED_EOCSDU, /**< multiple occurrence specifiers */
QSE_SED_EOCSZE, /**< occurrence specifier zero */ QSE_SED_EOCSZE, /**< occurrence specifier zero */
QSE_SED_EOCSTL, /**< occurrence specifier too large */ QSE_SED_EOCSTL, /**< occurrence specifier too large */
QSE_SED_ENPREX, /**< no previous regular expression */
QSE_SED_EIOFIL, /**< io error with file '${0}'*/ QSE_SED_EIOFIL, /**< io error with file '${0}'*/
QSE_SED_EIOUSR /**< error returned by user io handler */ QSE_SED_EIOUSR /**< error returned by user io handler */
}; };

View File

@ -1,5 +1,5 @@
/* /*
* $Id: types.h 549 2011-08-14 09:07:31Z hyunghwan.chung $ * $Id: types.h 560 2011-09-06 14:18:36Z hyunghwan.chung $
* *
Copyright 2006-2011 Chung, Hyung-Hwan. Copyright 2006-2011 Chung, Hyung-Hwan.
This file is part of QSE. This file is part of QSE.
@ -365,12 +365,14 @@ typedef qse_int_t qse_intptr_t;
* The qse_mchar_t type defines a multi-byte character type. * The qse_mchar_t type defines a multi-byte character type.
*/ */
typedef char qse_mchar_t; typedef char qse_mchar_t;
#define QSE_SIZEOF_MCHAR_T QSE_SIZEOF_CHAR
/** /**
* The qse_mcint_t defines a type that can hold a qse_mchar_t value and * The qse_mcint_t defines a type that can hold a qse_mchar_t value and
* #QSE_MCHAR_EOF. * #QSE_MCHAR_EOF.
*/ */
typedef int qse_mcint_t; typedef int qse_mcint_t;
#define QSE_SIZEOF_MCINT_T QSE_SIZEOF_INT
/** @typedef qse_wchar_t /** @typedef qse_wchar_t
* The qse_wchar_t type defines a wide character type. * The qse_wchar_t type defines a wide character type.

View File

@ -1539,8 +1539,7 @@ parse_brace:
/* Escaped character. */ /* Escaped character. */
DPRINT(("tre_parse: escaped: '%.*" STRF "'\n", DPRINT(("tre_parse: escaped: '%.*" STRF "'\n",
REST(ctx->re - 1))); REST(ctx->re - 1)));
result = tre_ast_new_literal(ctx->mem, *ctx->re, *ctx->re, result = tre_ast_new_literal(ctx->mem, *ctx->re, *ctx->re, ctx->position);
ctx->position);
ctx->position++; ctx->position++;
ctx->re++; ctx->re++;
} }

View File

@ -91,7 +91,11 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define tre_tolower(c) QSE_TOLOWER(c) #define tre_tolower(c) QSE_TOLOWER(c)
#define tre_toupper(c) QSE_TOUPPER(c) #define tre_toupper(c) QSE_TOUPPER(c)
#if defined(QSE_CHAR_IS_MCHAR) && (QSE_SIZEOF_MCHAR_T == QSE_SIZEOF_CHAR)
typedef unsigned char tre_char_t;
#else
typedef qse_char_t tre_char_t; typedef qse_char_t tre_char_t;
#endif
typedef qse_cint_t tre_cint_t; typedef qse_cint_t tre_cint_t;
#define size_t qse_size_t #define size_t qse_size_t

View File

@ -1,5 +1,5 @@
/* /*
* $Id: err.c 441 2011-04-22 14:28:43Z hyunghwan.chung $ * $Id: err.c 560 2011-09-06 14:18:36Z hyunghwan.chung $
* *
Copyright 2006-2011 Chung, Hyung-Hwan. Copyright 2006-2011 Chung, Hyung-Hwan.
This file is part of QSE. This file is part of QSE.
@ -52,8 +52,9 @@ const qse_char_t* qse_sed_dflerrstr (qse_sed_t* sed, qse_sed_errnum_t errnum)
QSE_T("multiple occurrence specifiers"), QSE_T("multiple occurrence specifiers"),
QSE_T("occurrence specifier zero"), QSE_T("occurrence specifier zero"),
QSE_T("occurrence specifier too large"), QSE_T("occurrence specifier too large"),
QSE_T("io error with file '${0}'"), QSE_T("no previous regular expression"),
QSE_T("error returned by user io handler") QSE_T("I/O error with file '${0}'"),
QSE_T("error returned by user I/O handler")
}; };
return (errnum >= 0 && errnum < QSE_COUNTOF(errstr))? return (errnum >= 0 && errnum < QSE_COUNTOF(errstr))?

View File

@ -1,5 +1,5 @@
/* /*
* $Id: sed.c 559 2011-09-04 16:21:54Z hyunghwan.chung $ * $Id: sed.c 560 2011-09-06 14:18:36Z hyunghwan.chung $
* *
Copyright 2006-2011 Chung, Hyung-Hwan. Copyright 2006-2011 Chung, Hyung-Hwan.
This file is part of QSE. This file is part of QSE.
@ -33,6 +33,8 @@ QSE_IMPLEMENT_COMMON_FUNCTIONS (sed)
static void free_command (qse_sed_t* sed, qse_sed_cmd_t* cmd); static void free_command (qse_sed_t* sed, qse_sed_cmd_t* cmd);
static void free_all_command_blocks (qse_sed_t* sed); static void free_all_command_blocks (qse_sed_t* sed);
#define EMPTY_REX ((void*)1)
#define SETERR0(sed,num,loc) \ #define SETERR0(sed,num,loc) \
do { qse_sed_seterror (sed, num, QSE_NULL, loc); } while (0) do { qse_sed_seterror (sed, num, QSE_NULL, loc); } while (0)
@ -160,7 +162,7 @@ void qse_sed_setmaxdepth (qse_sed_t* sed, int ids, qse_size_t depth)
if (ids & QSE_SED_DEPTH_REX_BUILD) sed->depth.rex.build = depth; if (ids & QSE_SED_DEPTH_REX_BUILD) sed->depth.rex.build = depth;
if (ids & QSE_SED_DEPTH_REX_MATCH) sed->depth.rex.match = depth; if (ids & QSE_SED_DEPTH_REX_MATCH) sed->depth.rex.match = depth;
} }
#endif #else
static qse_tre_t* maketre ( static qse_tre_t* maketre (
qse_sed_t* sed, const qse_cstr_t* str, const qse_sed_loc_t* loc) qse_sed_t* sed, const qse_cstr_t* str, const qse_sed_loc_t* loc)
@ -237,7 +239,7 @@ static int matchtre (
} }
return 1; return 1;
} }
#endif
/* check if c is a space character */ /* check if c is a space character */
#define IS_SPACE(c) ((c) == QSE_T(' ') || (c) == QSE_T('\t') || (c) == QSE_T('\r')) #define IS_SPACE(c) ((c) == QSE_T(' ') || (c) == QSE_T('\t') || (c) == QSE_T('\r'))
@ -282,15 +284,27 @@ static void free_address (qse_sed_t* sed, qse_sed_cmd_t* cmd)
if (cmd->a2.type == QSE_SED_ADR_REX) if (cmd->a2.type == QSE_SED_ADR_REX)
{ {
QSE_ASSERT (cmd->a2.u.rex != QSE_NULL); QSE_ASSERT (cmd->a2.u.rex != QSE_NULL);
/*qse_freerex (sed->mmgr, cmd->a2.u.rex);*/ if (cmd->a2.u.rex != EMPTY_REX)
{
#ifdef USE_REX
qse_freerex (sed->mmgr, cmd->a2.u.rex);
#else
freetre (sed, cmd->a2.u.rex); freetre (sed, cmd->a2.u.rex);
#endif
}
cmd->a2.type = QSE_SED_ADR_NONE; cmd->a2.type = QSE_SED_ADR_NONE;
} }
if (cmd->a1.type == QSE_SED_ADR_REX) if (cmd->a1.type == QSE_SED_ADR_REX)
{ {
QSE_ASSERT (cmd->a1.u.rex != QSE_NULL); QSE_ASSERT (cmd->a1.u.rex != QSE_NULL);
/*qse_freerex (sed->mmgr, cmd->a1.u.rex);*/ if (cmd->a1.u.rex != EMPTY_REX)
{
#ifdef USE_REX
qse_freerex (sed->mmgr, cmd->a1.u.rex);
#else
freetre (sed, cmd->a1.u.rex); freetre (sed, cmd->a1.u.rex);
#endif
}
cmd->a1.type = QSE_SED_ADR_NONE; cmd->a1.type = QSE_SED_ADR_NONE;
} }
} }
@ -368,9 +382,14 @@ static void free_command (qse_sed_t* sed, qse_sed_cmd_t* cmd)
QSE_MMGR_FREE (sed->mmgr, cmd->u.subst.file.ptr); QSE_MMGR_FREE (sed->mmgr, cmd->u.subst.file.ptr);
if (cmd->u.subst.rpl.ptr) if (cmd->u.subst.rpl.ptr)
QSE_MMGR_FREE (sed->mmgr, cmd->u.subst.rpl.ptr); QSE_MMGR_FREE (sed->mmgr, cmd->u.subst.rpl.ptr);
if (cmd->u.subst.rex) if (cmd->u.subst.rex && cmd->u.subst.rex != EMPTY_REX)
/*qse_freerex (sed->mmgr, cmd->u.subst.rex);*/ {
#ifdef USE_REX
qse_freerex (sed->mmgr, cmd->u.subst.rex);
#else
freetre (sed, cmd->u.subst.rex); freetre (sed, cmd->u.subst.rex);
#endif
}
break; break;
case QSE_SED_CMD_TRANSLATE: case QSE_SED_CMD_TRANSLATE:
@ -485,6 +504,8 @@ static void* compile_rex (qse_sed_t* sed, qse_char_t rxend)
} }
} }
if (QSE_STR_LEN(&sed->tmp.rex) == 0) return EMPTY_REX;
#ifdef USE_REX #ifdef USE_REX
code = qse_buildrex ( code = qse_buildrex (
sed->mmgr, sed->mmgr,
@ -1044,6 +1065,9 @@ static int get_subst (qse_sed_t* sed, qse_sed_cmd_t* cmd)
QSE_ASSERT (cmd->u.subst.rex == QSE_NULL); QSE_ASSERT (cmd->u.subst.rex == QSE_NULL);
if (QSE_STR_LEN(t[0]) <= 0) cmd->u.subst.rex = EMPTY_REX;
else
{
#ifdef USE_REX #ifdef USE_REX
cmd->u.subst.rex = qse_buildrex ( cmd->u.subst.rex = qse_buildrex (
sed->mmgr, sed->mmgr,
@ -1067,6 +1091,7 @@ static int get_subst (qse_sed_t* sed, qse_sed_cmd_t* cmd)
cmd->u.subst.rex = maketre (sed, QSE_STR_CSTR(t[0]), &sed->src.loc); cmd->u.subst.rex = maketre (sed, QSE_STR_CSTR(t[0]), &sed->src.loc);
if (cmd->u.subst.rex == QSE_NULL) goto oops; if (cmd->u.subst.rex == QSE_NULL) goto oops;
#endif #endif
}
qse_str_yield (t[1], &cmd->u.subst.rpl, 0); qse_str_yield (t[1], &cmd->u.subst.rpl, 0);
if (cmd->u.subst.g == 0 && cmd->u.subst.occ == 0) cmd->u.subst.occ = 1; if (cmd->u.subst.g == 0 && cmd->u.subst.occ == 0) cmd->u.subst.occ = 1;
@ -1891,7 +1916,7 @@ static int write_str_clearly (
{ {
#ifdef QSE_CHAR_IS_MCHAR #ifdef QSE_CHAR_IS_MCHAR
WRITE_CHAR (sed, QSE_T('\\')); WRITE_CHAR (sed, QSE_T('\\'));
WRITE_NUM (sed, c, 8, QSE_SIZEOF(qse_char_t)*3); WRITE_NUM (sed, (unsigned char)c, 8, QSE_SIZEOF(qse_char_t)*3);
#else #else
if (QSE_SIZEOF(qse_char_t) <= 2) if (QSE_SIZEOF(qse_char_t) <= 2)
{ {
@ -2001,6 +2026,7 @@ static int do_subst (qse_sed_t* sed, qse_sed_cmd_t* cmd)
#ifdef USE_REX #ifdef USE_REX
qse_rex_errnum_t errnum; qse_rex_errnum_t errnum;
#endif #endif
const qse_char_t* finalizer = QSE_NULL;
qse_cstr_t str, cur; qse_cstr_t str, cur;
const qse_char_t* str_end; const qse_char_t* str_end;
@ -2019,7 +2045,19 @@ static int do_subst (qse_sed_t* sed, qse_sed_cmd_t* cmd)
str.len = QSE_STR_LEN(&sed->e.in.line); str.len = QSE_STR_LEN(&sed->e.in.line);
/* TODO: support different line end convension */ /* TODO: support different line end convension */
if (str.len > 0 && str.ptr[str.len-1] == QSE_T('\n')) str.len--; if (str.len > 0 && str.ptr[str.len-1] == QSE_T('\n'))
{
str.len--;
if (str.len > 0 && str.ptr[str.len-1] == QSE_T('\r'))
{
finalizer = QSE_T("\r\n");
str.len--;
}
else
{
finalizer = QSE_T("\n");
}
}
str_end = str.ptr + str.len; str_end = str.ptr + str.len;
cur = str; cur = str;
@ -2034,13 +2072,35 @@ static int do_subst (qse_sed_t* sed, qse_sed_cmd_t* cmd)
* end of string($) needs to be tested */ * end of string($) needs to be tested */
while (cur.ptr <= str_end) while (cur.ptr <= str_end)
{ {
#ifdef USE_REX #ifndef USE_REX
qse_cstr_t submat[9];
QSE_MEMSET (submat, 0, QSE_SIZEOF(submat));
#endif
if (max_count == 0 || sub_count < max_count) if (max_count == 0 || sub_count < max_count)
{ {
void* rex;
if (cmd->u.subst.rex == EMPTY_REX)
{
rex = sed->e.last_rex;
if (rex == QSE_NULL)
{
SETERR0 (sed, QSE_SED_ENPREX, &cmd->loc);
return -1;
}
}
else
{
rex = cmd->u.subst.rex;
sed->e.last_rex = rex;
}
#ifdef USE_REX
n = qse_matchrex ( n = qse_matchrex (
sed->mmgr, sed->mmgr,
sed->depth.rex.match, sed->depth.rex.match,
cmd->u.subst.rex, opt, rex, opt,
&str, &cur, &mat, &errnum &str, &cur, &mat, &errnum
); );
if (n <= -1) if (n <= -1)
@ -2048,24 +2108,16 @@ static int do_subst (qse_sed_t* sed, qse_sed_cmd_t* cmd)
SETERR0 (sed, QSE_SED_EREXMA, &cmd->loc); SETERR0 (sed, QSE_SED_EREXMA, &cmd->loc);
return -1; return -1;
} }
}
else n = 0;
#else #else
qse_cstr_t submat[9];
QSE_MEMSET (submat, 0, QSE_SIZEOF(submat));
if (max_count == 0 || sub_count < max_count)
{
n = matchtre ( n = matchtre (
sed, sed, rex,
cmd->u.subst.rex,
((str.ptr == cur.ptr)? opt: (opt | QSE_TRE_NOTBOL)), ((str.ptr == cur.ptr)? opt: (opt | QSE_TRE_NOTBOL)),
&cur, &mat, submat, &cmd->loc &cur, &mat, submat, &cmd->loc
); );
if (n <= -1) return -1; if (n <= -1) return -1;
#endif
} }
else n = 0; else n = 0;
#endif
if (n == 0) if (n == 0)
{ {
@ -2193,10 +2245,9 @@ static int do_subst (qse_sed_t* sed, qse_sed_cmd_t* cmd)
} }
} }
if (str.len < QSE_STR_LEN(&sed->e.in.line)) if (finalizer)
{ {
/* TODO: support different line ending convension */ m = qse_str_cat (&sed->e.txt.subst, finalizer);
m = qse_str_ccat (&sed->e.txt.subst, QSE_T('\n'));
if (m == (qse_size_t)-1) if (m == (qse_size_t)-1)
{ {
SETERR0 (sed, QSE_SED_ENOMEM, QSE_NULL); SETERR0 (sed, QSE_SED_ENOMEM, QSE_NULL);
@ -2251,6 +2302,7 @@ static int match_a (qse_sed_t* sed, qse_sed_cmd_t* cmd, qse_sed_adr_t* a)
qse_cstr_t match; qse_cstr_t match;
#endif #endif
qse_cstr_t line; qse_cstr_t line;
void* rex;
QSE_ASSERT (a->u.rex != QSE_NULL); QSE_ASSERT (a->u.rex != QSE_NULL);
@ -2258,13 +2310,31 @@ static int match_a (qse_sed_t* sed, qse_sed_cmd_t* cmd, qse_sed_adr_t* a)
line.len = QSE_STR_LEN(&sed->e.in.line); line.len = QSE_STR_LEN(&sed->e.in.line);
if (line.len > 0 && if (line.len > 0 &&
line.ptr[line.len-1] == QSE_T('\n')) line.len--; line.ptr[line.len-1] == QSE_T('\n'))
{
line.len--;
if (line.len > 0 && line.ptr[line.len-1] == QSE_T('\r')) line.len--;
}
if (a->u.rex == EMPTY_REX)
{
rex = sed->e.last_rex;
if (rex == QSE_NULL)
{
SETERR0 (sed, QSE_SED_ENPREX, &cmd->loc);
return -1;
}
}
else
{
rex = a->u.rex;
sed->e.last_rex = rex;
}
#ifdef USE_REX #ifdef USE_REX
n = qse_matchrex ( n = qse_matchrex (
sed->mmgr, sed->mmgr,
sed->depth.rex.match, sed->depth.rex.match,
a->u.rex, 0, rex, 0,
&line, &line, &line, &line,
&match, &errnum); &match, &errnum);
if (n <= -1) if (n <= -1)
@ -2275,7 +2345,7 @@ static int match_a (qse_sed_t* sed, qse_sed_cmd_t* cmd, qse_sed_adr_t* a)
return n; return n;
#else #else
return matchtre (sed, a->u.rex, 0, &line, QSE_NULL, QSE_NULL, &cmd->loc); return matchtre (sed, rex, 0, &line, QSE_NULL, QSE_NULL, &cmd->loc);
#endif #endif
} }
@ -2867,6 +2937,8 @@ int qse_sed_exec (qse_sed_t* sed, qse_sed_io_fun_t inf, qse_sed_io_fun_t outf)
#endif #endif
}; };
sed->e.last_rex = QSE_NULL;
sed->e.subst_done = 0; sed->e.subst_done = 0;
qse_lda_clear (&sed->e.txt.appended); qse_lda_clear (&sed->e.txt.appended);
qse_str_clear (&sed->e.txt.read); qse_str_clear (&sed->e.txt.read);

View File

@ -1,5 +1,5 @@
/* /*
* $Id: sed.h 558 2011-09-02 15:27:44Z hyunghwan.chung $ * $Id: sed.h 560 2011-09-06 14:18:36Z hyunghwan.chung $
* *
Copyright 2006-2011 Chung, Hyung-Hwan. Copyright 2006-2011 Chung, Hyung-Hwan.
This file is part of QSE. This file is part of QSE.
@ -39,7 +39,7 @@ enum qse_sed_depth_t
QSE_SED_DEPTH_REX_BUILD = (1 << 0), QSE_SED_DEPTH_REX_BUILD = (1 << 0),
QSE_SED_DEPTH_REX_MATCH = (1 << 1) QSE_SED_DEPTH_REX_MATCH = (1 << 1)
}; };
typedef enum qse_sed_depth_t qse_sed_depth_t typedef enum qse_sed_depth_t qse_sed_depth_t;
#endif #endif
#define QSE_SED_CMD_NOOP QSE_T('\0') #define QSE_SED_CMD_NOOP QSE_T('\0')
@ -270,6 +270,7 @@ struct qse_sed_t
/** indicates if a successful substitution has been made /** indicates if a successful substitution has been made
* since the last read on the input stream. */ * since the last read on the input stream. */
int subst_done; int subst_done;
void* last_rex;
} e; } e;
}; };

View File

@ -1,5 +1,5 @@
-------------------------------------------------------------------------------- --------------------------------------------------------------------------------
[CMD] qsesed -n -f s001.sed s001.dat </dev/stdin 2>&1 [CMD] qsesed -n -r -f s001.sed s001.dat </dev/stdin 2>&1
-------------------------------------------------------------------------------- --------------------------------------------------------------------------------
ab...c AAA ab...c AAA

View File

@ -1,5 +1,5 @@
-------------------------------------------------------------------------------- --------------------------------------------------------------------------------
[CMD] qsesed -m 500000 -n -f s001.sed s001.dat </dev/stdin 2>&1 [CMD] qsesed -m 500000 -n -r -f s001.sed s001.dat </dev/stdin 2>&1
-------------------------------------------------------------------------------- --------------------------------------------------------------------------------
ab...c AAA ab...c AAA