added code to handle an empty regular expression in sed

This commit is contained in:
hyung-hwan 2011-09-07 08:18:36 +00:00
parent 2f15ca2335
commit 0f85644269
10 changed files with 148 additions and 64 deletions

View File

@ -246,6 +246,7 @@ qse_char_t* load_script_file (const qse_char_t* file)
if (qse_str_init (&script, QSE_MMGR_GETDFL(), 1024) <= -1)
{
qse_fclose (fp);
qse_fprintf (QSE_STDERR, QSE_T("ERROR: cannot load %s\n"), file);
return QSE_NULL;
}
@ -258,6 +259,13 @@ qse_char_t* load_script_file (const qse_char_t* file)
return QSE_NULL;
}
}
if (qse_ferror(fp))
{
qse_fprintf (QSE_STDERR, QSE_T("ERROR: cannot read %s\n"), file);
qse_str_fini (&script);
qse_fclose (fp);
return QSE_NULL;
}
qse_str_yield (&script, &xstr, 0);
qse_str_fini (&script);
@ -303,11 +311,7 @@ int sed_main (int argc, qse_char_t* argv[])
QSE_ASSERT (g_script == QSE_NULL);
g_script = load_script_file (g_script_file);
if (g_script == QSE_NULL)
{
qse_fprintf (QSE_STDERR, QSE_T("ERROR: cannot load %s\n"), g_script_file);
goto oops;
}
if (g_script == QSE_NULL) goto oops;
}
if (qse_sed_comp (sed, g_script, qse_strlen(g_script)) == -1)

View File

@ -1,5 +1,5 @@
/*
* $Id: sed.h 558 2011-09-02 15:27:44Z hyunghwan.chung $
* $Id: sed.h 560 2011-09-06 14:18:36Z hyunghwan.chung $
*
Copyright 2006-2011 Chung, Hyung-Hwan.
This file is part of QSE.
@ -107,6 +107,7 @@ enum qse_sed_errnum_t
QSE_SED_EOCSDU, /**< multiple occurrence specifiers */
QSE_SED_EOCSZE, /**< occurrence specifier zero */
QSE_SED_EOCSTL, /**< occurrence specifier too large */
QSE_SED_ENPREX, /**< no previous regular expression */
QSE_SED_EIOFIL, /**< io error with file '${0}'*/
QSE_SED_EIOUSR /**< error returned by user io handler */
};

View File

@ -1,5 +1,5 @@
/*
* $Id: types.h 549 2011-08-14 09:07:31Z hyunghwan.chung $
* $Id: types.h 560 2011-09-06 14:18:36Z hyunghwan.chung $
*
Copyright 2006-2011 Chung, Hyung-Hwan.
This file is part of QSE.
@ -365,12 +365,14 @@ typedef qse_int_t qse_intptr_t;
* The qse_mchar_t type defines a multi-byte character type.
*/
typedef char qse_mchar_t;
#define QSE_SIZEOF_MCHAR_T QSE_SIZEOF_CHAR
/**
* The qse_mcint_t defines a type that can hold a qse_mchar_t value and
* #QSE_MCHAR_EOF.
*/
typedef int qse_mcint_t;
#define QSE_SIZEOF_MCINT_T QSE_SIZEOF_INT
/** @typedef qse_wchar_t
* The qse_wchar_t type defines a wide character type.

View File

@ -1539,8 +1539,7 @@ parse_brace:
/* Escaped character. */
DPRINT(("tre_parse: escaped: '%.*" STRF "'\n",
REST(ctx->re - 1)));
result = tre_ast_new_literal(ctx->mem, *ctx->re, *ctx->re,
ctx->position);
result = tre_ast_new_literal(ctx->mem, *ctx->re, *ctx->re, ctx->position);
ctx->position++;
ctx->re++;
}

View File

@ -91,7 +91,11 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define tre_tolower(c) QSE_TOLOWER(c)
#define tre_toupper(c) QSE_TOUPPER(c)
typedef qse_char_t tre_char_t;
#if defined(QSE_CHAR_IS_MCHAR) && (QSE_SIZEOF_MCHAR_T == QSE_SIZEOF_CHAR)
typedef unsigned char tre_char_t;
#else
typedef qse_char_t tre_char_t;
#endif
typedef qse_cint_t tre_cint_t;
#define size_t qse_size_t

View File

@ -1,5 +1,5 @@
/*
* $Id: err.c 441 2011-04-22 14:28:43Z hyunghwan.chung $
* $Id: err.c 560 2011-09-06 14:18:36Z hyunghwan.chung $
*
Copyright 2006-2011 Chung, Hyung-Hwan.
This file is part of QSE.
@ -52,8 +52,9 @@ const qse_char_t* qse_sed_dflerrstr (qse_sed_t* sed, qse_sed_errnum_t errnum)
QSE_T("multiple occurrence specifiers"),
QSE_T("occurrence specifier zero"),
QSE_T("occurrence specifier too large"),
QSE_T("io error with file '${0}'"),
QSE_T("error returned by user io handler")
QSE_T("no previous regular expression"),
QSE_T("I/O error with file '${0}'"),
QSE_T("error returned by user I/O handler")
};
return (errnum >= 0 && errnum < QSE_COUNTOF(errstr))?

View File

@ -1,5 +1,5 @@
/*
* $Id: sed.c 559 2011-09-04 16:21:54Z hyunghwan.chung $
* $Id: sed.c 560 2011-09-06 14:18:36Z hyunghwan.chung $
*
Copyright 2006-2011 Chung, Hyung-Hwan.
This file is part of QSE.
@ -33,6 +33,8 @@ QSE_IMPLEMENT_COMMON_FUNCTIONS (sed)
static void free_command (qse_sed_t* sed, qse_sed_cmd_t* cmd);
static void free_all_command_blocks (qse_sed_t* sed);
#define EMPTY_REX ((void*)1)
#define SETERR0(sed,num,loc) \
do { qse_sed_seterror (sed, num, QSE_NULL, loc); } while (0)
@ -160,7 +162,7 @@ void qse_sed_setmaxdepth (qse_sed_t* sed, int ids, qse_size_t depth)
if (ids & QSE_SED_DEPTH_REX_BUILD) sed->depth.rex.build = depth;
if (ids & QSE_SED_DEPTH_REX_MATCH) sed->depth.rex.match = depth;
}
#endif
#else
static qse_tre_t* maketre (
qse_sed_t* sed, const qse_cstr_t* str, const qse_sed_loc_t* loc)
@ -237,7 +239,7 @@ static int matchtre (
}
return 1;
}
#endif
/* check if c is a space character */
#define IS_SPACE(c) ((c) == QSE_T(' ') || (c) == QSE_T('\t') || (c) == QSE_T('\r'))
@ -282,15 +284,27 @@ static void free_address (qse_sed_t* sed, qse_sed_cmd_t* cmd)
if (cmd->a2.type == QSE_SED_ADR_REX)
{
QSE_ASSERT (cmd->a2.u.rex != QSE_NULL);
/*qse_freerex (sed->mmgr, cmd->a2.u.rex);*/
freetre (sed, cmd->a2.u.rex);
if (cmd->a2.u.rex != EMPTY_REX)
{
#ifdef USE_REX
qse_freerex (sed->mmgr, cmd->a2.u.rex);
#else
freetre (sed, cmd->a2.u.rex);
#endif
}
cmd->a2.type = QSE_SED_ADR_NONE;
}
if (cmd->a1.type == QSE_SED_ADR_REX)
{
QSE_ASSERT (cmd->a1.u.rex != QSE_NULL);
/*qse_freerex (sed->mmgr, cmd->a1.u.rex);*/
freetre (sed, cmd->a1.u.rex);
if (cmd->a1.u.rex != EMPTY_REX)
{
#ifdef USE_REX
qse_freerex (sed->mmgr, cmd->a1.u.rex);
#else
freetre (sed, cmd->a1.u.rex);
#endif
}
cmd->a1.type = QSE_SED_ADR_NONE;
}
}
@ -368,9 +382,14 @@ static void free_command (qse_sed_t* sed, qse_sed_cmd_t* cmd)
QSE_MMGR_FREE (sed->mmgr, cmd->u.subst.file.ptr);
if (cmd->u.subst.rpl.ptr)
QSE_MMGR_FREE (sed->mmgr, cmd->u.subst.rpl.ptr);
if (cmd->u.subst.rex)
/*qse_freerex (sed->mmgr, cmd->u.subst.rex);*/
if (cmd->u.subst.rex && cmd->u.subst.rex != EMPTY_REX)
{
#ifdef USE_REX
qse_freerex (sed->mmgr, cmd->u.subst.rex);
#else
freetre (sed, cmd->u.subst.rex);
#endif
}
break;
case QSE_SED_CMD_TRANSLATE:
@ -485,6 +504,8 @@ static void* compile_rex (qse_sed_t* sed, qse_char_t rxend)
}
}
if (QSE_STR_LEN(&sed->tmp.rex) == 0) return EMPTY_REX;
#ifdef USE_REX
code = qse_buildrex (
sed->mmgr,
@ -1044,29 +1065,33 @@ static int get_subst (qse_sed_t* sed, qse_sed_cmd_t* cmd)
QSE_ASSERT (cmd->u.subst.rex == QSE_NULL);
#ifdef USE_REX
cmd->u.subst.rex = qse_buildrex (
sed->mmgr,
sed->depth.rex.build,
((sed->option&QSE_SED_EXTENDEDREX)? 0:QSE_REX_NOBOUND),
QSE_STR_PTR(t[0]),
QSE_STR_LEN(t[0]),
QSE_NULL
);
if (cmd->u.subst.rex == QSE_NULL)
if (QSE_STR_LEN(t[0]) <= 0) cmd->u.subst.rex = EMPTY_REX;
else
{
SETERR1 (
sed, QSE_SED_EREXBL,
#ifdef USE_REX
cmd->u.subst.rex = qse_buildrex (
sed->mmgr,
sed->depth.rex.build,
((sed->option&QSE_SED_EXTENDEDREX)? 0:QSE_REX_NOBOUND),
QSE_STR_PTR(t[0]),
QSE_STR_LEN(t[0]),
&sed->src.loc
QSE_NULL
);
goto oops;
}
if (cmd->u.subst.rex == QSE_NULL)
{
SETERR1 (
sed, QSE_SED_EREXBL,
QSE_STR_PTR(t[0]),
QSE_STR_LEN(t[0]),
&sed->src.loc
);
goto oops;
}
#else
cmd->u.subst.rex = maketre (sed, QSE_STR_CSTR(t[0]), &sed->src.loc);
if (cmd->u.subst.rex == QSE_NULL) goto oops;
cmd->u.subst.rex = maketre (sed, QSE_STR_CSTR(t[0]), &sed->src.loc);
if (cmd->u.subst.rex == QSE_NULL) goto oops;
#endif
}
qse_str_yield (t[1], &cmd->u.subst.rpl, 0);
if (cmd->u.subst.g == 0 && cmd->u.subst.occ == 0) cmd->u.subst.occ = 1;
@ -1891,7 +1916,7 @@ static int write_str_clearly (
{
#ifdef QSE_CHAR_IS_MCHAR
WRITE_CHAR (sed, QSE_T('\\'));
WRITE_NUM (sed, c, 8, QSE_SIZEOF(qse_char_t)*3);
WRITE_NUM (sed, (unsigned char)c, 8, QSE_SIZEOF(qse_char_t)*3);
#else
if (QSE_SIZEOF(qse_char_t) <= 2)
{
@ -2001,6 +2026,7 @@ static int do_subst (qse_sed_t* sed, qse_sed_cmd_t* cmd)
#ifdef USE_REX
qse_rex_errnum_t errnum;
#endif
const qse_char_t* finalizer = QSE_NULL;
qse_cstr_t str, cur;
const qse_char_t* str_end;
@ -2019,7 +2045,19 @@ static int do_subst (qse_sed_t* sed, qse_sed_cmd_t* cmd)
str.len = QSE_STR_LEN(&sed->e.in.line);
/* TODO: support different line end convension */
if (str.len > 0 && str.ptr[str.len-1] == QSE_T('\n')) str.len--;
if (str.len > 0 && str.ptr[str.len-1] == QSE_T('\n'))
{
str.len--;
if (str.len > 0 && str.ptr[str.len-1] == QSE_T('\r'))
{
finalizer = QSE_T("\r\n");
str.len--;
}
else
{
finalizer = QSE_T("\n");
}
}
str_end = str.ptr + str.len;
cur = str;
@ -2034,13 +2072,35 @@ static int do_subst (qse_sed_t* sed, qse_sed_cmd_t* cmd)
* end of string($) needs to be tested */
while (cur.ptr <= str_end)
{
#ifdef USE_REX
#ifndef USE_REX
qse_cstr_t submat[9];
QSE_MEMSET (submat, 0, QSE_SIZEOF(submat));
#endif
if (max_count == 0 || sub_count < max_count)
{
void* rex;
if (cmd->u.subst.rex == EMPTY_REX)
{
rex = sed->e.last_rex;
if (rex == QSE_NULL)
{
SETERR0 (sed, QSE_SED_ENPREX, &cmd->loc);
return -1;
}
}
else
{
rex = cmd->u.subst.rex;
sed->e.last_rex = rex;
}
#ifdef USE_REX
n = qse_matchrex (
sed->mmgr,
sed->depth.rex.match,
cmd->u.subst.rex, opt,
rex, opt,
&str, &cur, &mat, &errnum
);
if (n <= -1)
@ -2048,24 +2108,16 @@ static int do_subst (qse_sed_t* sed, qse_sed_cmd_t* cmd)
SETERR0 (sed, QSE_SED_EREXMA, &cmd->loc);
return -1;
}
}
else n = 0;
#else
qse_cstr_t submat[9];
QSE_MEMSET (submat, 0, QSE_SIZEOF(submat));
if (max_count == 0 || sub_count < max_count)
{
n = matchtre (
sed,
cmd->u.subst.rex,
sed, rex,
((str.ptr == cur.ptr)? opt: (opt | QSE_TRE_NOTBOL)),
&cur, &mat, submat, &cmd->loc
);
if (n <= -1) return -1;
#endif
}
else n = 0;
#endif
if (n == 0)
{
@ -2193,10 +2245,9 @@ static int do_subst (qse_sed_t* sed, qse_sed_cmd_t* cmd)
}
}
if (str.len < QSE_STR_LEN(&sed->e.in.line))
if (finalizer)
{
/* TODO: support different line ending convension */
m = qse_str_ccat (&sed->e.txt.subst, QSE_T('\n'));
m = qse_str_cat (&sed->e.txt.subst, finalizer);
if (m == (qse_size_t)-1)
{
SETERR0 (sed, QSE_SED_ENOMEM, QSE_NULL);
@ -2251,6 +2302,7 @@ static int match_a (qse_sed_t* sed, qse_sed_cmd_t* cmd, qse_sed_adr_t* a)
qse_cstr_t match;
#endif
qse_cstr_t line;
void* rex;
QSE_ASSERT (a->u.rex != QSE_NULL);
@ -2258,13 +2310,31 @@ static int match_a (qse_sed_t* sed, qse_sed_cmd_t* cmd, qse_sed_adr_t* a)
line.len = QSE_STR_LEN(&sed->e.in.line);
if (line.len > 0 &&
line.ptr[line.len-1] == QSE_T('\n')) line.len--;
line.ptr[line.len-1] == QSE_T('\n'))
{
line.len--;
if (line.len > 0 && line.ptr[line.len-1] == QSE_T('\r')) line.len--;
}
if (a->u.rex == EMPTY_REX)
{
rex = sed->e.last_rex;
if (rex == QSE_NULL)
{
SETERR0 (sed, QSE_SED_ENPREX, &cmd->loc);
return -1;
}
}
else
{
rex = a->u.rex;
sed->e.last_rex = rex;
}
#ifdef USE_REX
n = qse_matchrex (
sed->mmgr,
sed->depth.rex.match,
a->u.rex, 0,
rex, 0,
&line, &line,
&match, &errnum);
if (n <= -1)
@ -2275,7 +2345,7 @@ static int match_a (qse_sed_t* sed, qse_sed_cmd_t* cmd, qse_sed_adr_t* a)
return n;
#else
return matchtre (sed, a->u.rex, 0, &line, QSE_NULL, QSE_NULL, &cmd->loc);
return matchtre (sed, rex, 0, &line, QSE_NULL, QSE_NULL, &cmd->loc);
#endif
}
@ -2867,6 +2937,8 @@ int qse_sed_exec (qse_sed_t* sed, qse_sed_io_fun_t inf, qse_sed_io_fun_t outf)
#endif
};
sed->e.last_rex = QSE_NULL;
sed->e.subst_done = 0;
qse_lda_clear (&sed->e.txt.appended);
qse_str_clear (&sed->e.txt.read);

View File

@ -1,5 +1,5 @@
/*
* $Id: sed.h 558 2011-09-02 15:27:44Z hyunghwan.chung $
* $Id: sed.h 560 2011-09-06 14:18:36Z hyunghwan.chung $
*
Copyright 2006-2011 Chung, Hyung-Hwan.
This file is part of QSE.
@ -39,7 +39,7 @@ enum qse_sed_depth_t
QSE_SED_DEPTH_REX_BUILD = (1 << 0),
QSE_SED_DEPTH_REX_MATCH = (1 << 1)
};
typedef enum qse_sed_depth_t qse_sed_depth_t
typedef enum qse_sed_depth_t qse_sed_depth_t;
#endif
#define QSE_SED_CMD_NOOP QSE_T('\0')
@ -270,6 +270,7 @@ struct qse_sed_t
/** indicates if a successful substitution has been made
* since the last read on the input stream. */
int subst_done;
void* last_rex;
} e;
};

View File

@ -1,5 +1,5 @@
--------------------------------------------------------------------------------
[CMD] qsesed -n -f s001.sed s001.dat </dev/stdin 2>&1
[CMD] qsesed -n -r -f s001.sed s001.dat </dev/stdin 2>&1
--------------------------------------------------------------------------------
ab...c AAA

View File

@ -1,5 +1,5 @@
--------------------------------------------------------------------------------
[CMD] qsesed -m 500000 -n -f s001.sed s001.dat </dev/stdin 2>&1
[CMD] qsesed -m 500000 -n -r -f s001.sed s001.dat </dev/stdin 2>&1
--------------------------------------------------------------------------------
ab...c AAA