qse/lib/sed/sed.c

4208 lines
88 KiB
C

/*
* $Id$
*
Copyright (c) 2006-2019 Chung, Hyung-Hwan. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE AUTHOR "AS IS" AND ANY EXPRESS OR
IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "sed-prv.h"
#include "../cmn/mem-prv.h"
#include <qse/cmn/chr.h>
/* Define USE_REX to use <qse/cmn/rex.h> */
/*#define USE_REX*/
/* Define USE_REGEX to use regcomp(), regexec(), regfree() instead of TRE */
/*#define USE_REGEX*/
#if defined(USE_REX)
# include <qse/cmn/rex.h>
#else
# if defined(QSE_CHAR_IS_MCHAR) && defined(USE_REGEX)
# include <regex.h>
# else
# include <qse/cmn/tre.h>
# endif
#endif
static void free_command (qse_sed_t* sed, qse_sed_cmd_t* cmd);
static void free_all_command_blocks (qse_sed_t* sed);
static void free_all_cids (qse_sed_t* sed);
static void free_appends (qse_sed_t* sed);
static int emit_output (qse_sed_t* sed, int skipline);
#define EMPTY_REX ((void*)1)
#define SETERR0(sed,num,loc) \
do { qse_sed_seterror (sed, num, QSE_NULL, loc); } while (0)
#define SETERR1(sed,num,argp,argl,loc) \
do { \
qse_cstr_t __ea__; \
__ea__.ptr = argp; __ea__.len = argl; \
qse_sed_seterror (sed, num, &__ea__, loc); \
} while (0)
static void free_all_cut_selector_blocks (qse_sed_t* sed, qse_sed_cmd_t* cmd);
qse_sed_t* qse_sed_open (qse_mmgr_t* mmgr, qse_size_t xtnsize, qse_sed_errnum_t* errnum)
{
qse_sed_t* sed;
sed = (qse_sed_t*) QSE_MMGR_ALLOC (mmgr, QSE_SIZEOF(qse_sed_t) + xtnsize);
if (sed)
{
if (qse_sed_init(sed, mmgr) <= -1)
{
if (errnum) *errnum = qse_sed_geterrnum(sed);
QSE_MMGR_FREE (mmgr, sed);
return QSE_NULL;
}
else QSE_MEMSET (QSE_XTN(sed), 0, xtnsize);
}
else if (errnum) *errnum = QSE_SED_ENOMEM;
return sed;
}
void qse_sed_close (qse_sed_t* sed)
{
qse_sed_ecb_t* ecb;
for (ecb = sed->ecb; ecb; ecb = ecb->next)
if (ecb->close) ecb->close (sed);
qse_sed_fini (sed);
QSE_MMGR_FREE (sed->_mmgr, sed);
}
int qse_sed_init (qse_sed_t* sed, qse_mmgr_t* mmgr)
{
QSE_MEMSET (sed, 0, QSE_SIZEOF(*sed));
sed->_instsize = QSE_SIZEOF(*sed);
sed->_mmgr = mmgr;
sed->errstr = qse_sed_dflerrstr;
if (qse_str_init (&sed->tmp.rex, mmgr, 0) <= -1) goto oops_1;
if (qse_str_init (&sed->tmp.lab, mmgr, 0) <= -1) goto oops_2;
if (qse_map_init (&sed->tmp.labs, mmgr,
128, 70, QSE_SIZEOF(qse_char_t), 1) <= -1) goto oops_3;
qse_map_setstyle (
&sed->tmp.labs,
qse_getmapstyle(QSE_MAP_STYLE_INLINE_KEY_COPIER)
);
/* init_append (sed); */
if (qse_str_init (&sed->e.txt.hold, mmgr, 256) <= -1) goto oops_6;
if (qse_str_init (&sed->e.txt.scratch, mmgr, 256) <= -1) goto oops_7;
/* on init, the last points to the first */
sed->cmd.lb = &sed->cmd.fb;
/* the block has no data yet */
sed->cmd.fb.len = 0;
/* initialize field buffers for cut */
sed->e.cutf.cflds = QSE_COUNTOF(sed->e.cutf.sflds);
sed->e.cutf.flds = sed->e.cutf.sflds;
return 0;
oops_7:
qse_str_fini (&sed->e.txt.hold);
oops_6:
qse_map_fini (&sed->tmp.labs);
oops_3:
qse_str_fini (&sed->tmp.lab);
oops_2:
qse_str_fini (&sed->tmp.rex);
oops_1:
SETERR0 (sed, QSE_SED_ENOMEM, QSE_NULL);
return -1;
}
void qse_sed_fini (qse_sed_t* sed)
{
free_all_command_blocks (sed);
free_all_cids (sed);
if (sed->e.cutf.flds != sed->e.cutf.sflds)
qse_sed_freemem (sed, sed->e.cutf.flds);
qse_str_fini (&sed->e.txt.scratch);
qse_str_fini (&sed->e.txt.hold);
free_appends (sed);
qse_map_fini (&sed->tmp.labs);
qse_str_fini (&sed->tmp.lab);
qse_str_fini (&sed->tmp.rex);
}
int qse_sed_setopt (qse_sed_t* sed, qse_sed_opt_t id, const void* value)
{
switch (id)
{
case QSE_SED_TRAIT:
sed->opt.trait = *(const int*)value;
return 0;
case QSE_SED_TRACER:
sed->opt.tracer = (qse_sed_tracer_t)value;
return 0;
case QSE_SED_LFORMATTER:
sed->opt.lformatter = (qse_sed_lformatter_t)value;
return 0;
case QSE_SED_DEPTH_REX_BUILD:
sed->opt.depth.rex.build = *(const qse_size_t*)value;
return 0;
case QSE_SED_DEPTH_REX_MATCH:
sed->opt.depth.rex.match = *(const qse_size_t*)value;
return 0;
}
qse_sed_seterrnum (sed, QSE_SED_EINVAL, QSE_NULL);
return -1;
}
int qse_sed_getopt (qse_sed_t* sed, qse_sed_opt_t id, void* value)
{
switch (id)
{
case QSE_SED_TRAIT:
*(int*)value = sed->opt.trait;
return 0;
case QSE_SED_TRACER:
*(qse_sed_tracer_t*)value = sed->opt.tracer;
return 0;
case QSE_SED_LFORMATTER:
*(qse_sed_lformatter_t*)value = sed->opt.lformatter;
return 0;
case QSE_SED_DEPTH_REX_BUILD:
*(qse_size_t*)value = sed->opt.depth.rex.build;
return 0;
case QSE_SED_DEPTH_REX_MATCH:
*(qse_size_t*)value = sed->opt.depth.rex.match;
return 0;
};
qse_sed_seterrnum (sed, QSE_SED_EINVAL, QSE_NULL);
return -1;
}
static void* build_rex (
qse_sed_t* sed, const qse_cstr_t* str,
int ignorecase, const qse_sed_loc_t* loc)
{
#if defined(USE_REX)
void* rex;
int opt = 0;
if ((sed->opt.trait & QSE_SED_EXTENDEDREX) == 0) opt |= QSE_REX_NOBOUND;
rex = qse_buildrex (
qse_sed_getmmgr(sed), sed->opt.depth.rex.build,
opt, str->ptr, str->len, QSE_NULL
);
if (rex == QSE_NULL)
{
SETERR1 (sed, QSE_SED_EREXBL, str->ptr, str->len, loc);
}
return rex;
#elif defined(QSE_CHAR_IS_MCHAR) && defined(USE_REGEX)
regex_t* rex;
qse_char_t* strz;
int xopt = 0;
if (ignorecase) xopt |= REG_ICASE;
if (sed->opt.trait & QSE_SED_EXTENDEDREX) xopt |= REG_EXTENDED;
rex = qse_sed_allocmem(sed, QSE_SIZEOF(*rex));
if (rex == QSE_NULL)
{
SETERR0 (sed, QSE_SED_ENOMEM, loc);
return QSE_NULL;
}
strz = qse_strxdup (str->ptr, str->len, qse_sed_getmmgr(sed));
if (strz == QSE_NULL)
{
qse_sed_freemem (sed, rex);
SETERR0 (sed, QSE_SED_ENOMEM, loc);
return QSE_NULL;
}
xopt = regcomp (rex, strz, xopt);
qse_sed_freemem (sed, strz);
if (xopt != 0)
{
qse_sed_freemem (sed, rex);
SETERR1 (sed, QSE_SED_EREXBL, str->ptr, str->len, loc);
return QSE_NULL;
}
return rex;
#else
qse_tre_t* tre;
int opt = 0;
tre = qse_tre_open(qse_sed_getmmgr(sed), 0);
if (tre == QSE_NULL)
{
SETERR0 (sed, QSE_SED_ENOMEM, loc);
return QSE_NULL;
}
/* ignorecase is a compile option for TRE */
if (ignorecase) opt |= QSE_TRE_IGNORECASE;
if (sed->opt.trait & QSE_SED_EXTENDEDREX) opt |= QSE_TRE_EXTENDED;
if (sed->opt.trait & QSE_SED_NONSTDEXTREX) opt |= QSE_TRE_NONSTDEXT;
if (qse_tre_compx (tre, str->ptr, str->len, QSE_NULL, opt) <= -1)
{
if (QSE_TRE_ERRNUM(tre) == QSE_TRE_ENOMEM)
SETERR0 (sed, QSE_SED_ENOMEM, loc);
else
SETERR1 (sed, QSE_SED_EREXBL, str->ptr, str->len, loc);
qse_tre_close (tre);
return QSE_NULL;
}
return tre;
#endif
}
static QSE_INLINE void free_rex (qse_sed_t* sed, void* rex)
{
#if defined(USE_REX)
qse_freerex (qse_sed_getmmgr(sed), rex);
#elif defined(QSE_CHAR_IS_MCHAR) && defined(USE_REGEX)
regfree (rex);
qse_sed_freemem (sed, rex);
#else
qse_tre_close (rex);
#endif
}
#if !defined(USE_REX)
static int matchtre (
qse_sed_t* sed, qse_tre_t* tre, int opt,
const qse_cstr_t* str, qse_cstr_t* mat,
qse_cstr_t submat[9], const qse_sed_loc_t* loc)
{
#if defined(QSE_CHAR_IS_MCHAR) && defined(USE_REGEX)
regmatch_t match[10];
qse_char_t* strz;
int xopt = 0;
if (opt & QSE_TRE_NOTBOL) xopt |= REG_NOTBOL;
strz = qse_strxdup(str->ptr, str->len, qse_sed_getmmgr(sed));
if (strz == QSE_NULL)
{
SETERR0 (sed, QSE_SED_ENOMEM, loc);
return -1;
}
xopt = regexec ((regex_t*)tre, strz, QSE_COUNTOF(match), match, xopt);
qse_sed_freemem (sed, strz);
if (xopt == REG_NOMATCH) return 0;
#else
int n;
/*qse_tre_match_t match[10] = { { 0, 0 }, };*/
qse_tre_match_t match[10];
QSE_MEMSET (match, 0, QSE_SIZEOF(match));
n = qse_tre_execx (tre, str->ptr, str->len, match, QSE_COUNTOF(match), opt);
if (n <= -1)
{
qse_sed_errnum_t errnum;
if (QSE_TRE_ERRNUM(tre) == QSE_TRE_ENOMATCH) return 0;
errnum = (QSE_TRE_ERRNUM(tre) == QSE_TRE_ENOMEM)?
QSE_SED_ENOMEM: QSE_SED_EREXMA;
SETERR0 (sed, errnum, loc);
return -1;
}
#endif
QSE_ASSERT (match[0].rm_so != -1);
if (mat)
{
mat->ptr = &str->ptr[match[0].rm_so];
mat->len = match[0].rm_eo - match[0].rm_so;
}
if (submat)
{
int i;
/* you must intialize submat before you pass into this
* function because it can abort filling */
for (i = 1; i < QSE_COUNTOF(match); i++)
{
if (match[i].rm_so != -1)
{
submat[i-1].ptr = &str->ptr[match[i].rm_so];
submat[i-1].len = match[i].rm_eo - match[i].rm_so;
}
}
}
return 1;
}
#endif
/* check if c is a space character */
#define IS_SPACE(c) ((c) == QSE_T(' ') || (c) == QSE_T('\t') || (c) == QSE_T('\r'))
#define IS_LINTERM(c) ((c) == QSE_T('\n'))
#define IS_WSPACE(c) (IS_SPACE(c) || IS_LINTERM(c))
/* check if c is a command terminator excluding a space character */
#define IS_CMDTERM(c) \
(c == QSE_CHAR_EOF || c == QSE_T('#') || \
c == QSE_T(';') || IS_LINTERM(c) || \
c == QSE_T('{') || c == QSE_T('}'))
/* check if c can compose a label */
#define IS_LABCHAR(c) (!IS_CMDTERM(c) && !IS_WSPACE(c))
#define CURSC(sed) ((sed)->src.cc)
#define NXTSC(sed,c,errret) \
do { if (getnextsc(sed,&(c)) <= -1) return (errret); } while (0)
#define NXTSC_GOTO(sed,c,label) \
do { if (getnextsc(sed,&(c)) <= -1) goto label; } while (0)
#define PEEPNXTSC(sed,c,errret) \
do { if (peepnextsc(sed,&(c)) <= -1) return (errret); } while (0)
static int open_script_stream (qse_sed_t* sed)
{
qse_ssize_t n;
sed->errnum = QSE_SED_ENOERR;
n = sed->src.fun (sed, QSE_SED_IO_OPEN, &sed->src.arg, QSE_NULL, 0);
if (n <= -1)
{
if (sed->errnum == QSE_SED_ENOERR)
SETERR0 (sed, QSE_SED_EIOUSR, QSE_NULL);
return -1;
}
sed->src.cur = sed->src.buf;
sed->src.end = sed->src.buf;
sed->src.cc = QSE_CHAR_EOF;
sed->src.loc.line = 1;
sed->src.loc.colm = 0;
sed->src.eof = 0;
return 0;
}
static int close_script_stream (qse_sed_t* sed)
{
qse_ssize_t n;
sed->errnum = QSE_SED_ENOERR;
n = sed->src.fun (sed, QSE_SED_IO_CLOSE, &sed->src.arg, QSE_NULL, 0);
if (n <= -1)
{
if (sed->errnum == QSE_SED_ENOERR)
SETERR0 (sed, QSE_SED_EIOUSR, QSE_NULL);
return -1;
}
return 0;
}
static int read_script_stream (qse_sed_t* sed)
{
qse_ssize_t n;
sed->errnum = QSE_SED_ENOERR;
n = sed->src.fun (
sed, QSE_SED_IO_READ, &sed->src.arg,
sed->src.buf, QSE_COUNTOF(sed->src.buf)
);
if (n <= -1)
{
if (sed->errnum == QSE_SED_ENOERR)
SETERR0 (sed, QSE_SED_EIOUSR, QSE_NULL);
return -1; /* error */
}
if (n == 0)
{
/* don't change sed->src.cur and sed->src.end.
* they remain the same on eof */
sed->src.eof = 1;
return 0; /* eof */
}
sed->src.cur = sed->src.buf;
sed->src.end = sed->src.buf + n;
return 1; /* read something */
}
static int getnextsc (qse_sed_t* sed, qse_cint_t* c)
{
/* adjust the line and column number of the next
* character based on the current character */
if (sed->src.cc == QSE_T('\n'))
{
/* TODO: support different line end convension */
sed->src.loc.line++;
sed->src.loc.colm = 1;
}
else
{
/* take note that if you keep on calling getnextsc()
* after QSE_CHAR_EOF is read, this column number
* keeps increasing also. there should be a bug of
* reading more than necessary somewhere in the code
* if this happens. */
sed->src.loc.colm++;
}
if (sed->src.cur >= sed->src.end && !sed->src.eof)
{
/* read in more character if buffer is empty */
if (read_script_stream (sed) <= -1) return -1;
}
sed->src.cc =
(sed->src.cur < sed->src.end)?
(*sed->src.cur++): QSE_CHAR_EOF;
*c = sed->src.cc;
return 0;
}
static int peepnextsc (qse_sed_t* sed, qse_cint_t* c)
{
if (sed->src.cur >= sed->src.end && !sed->src.eof)
{
/* read in more character if buffer is empty.
* it is ok to fill the buffer in the peeping
* function if it doesn't change sed->src.cc. */
if (read_script_stream (sed) <= -1) return -1;
}
/* no changes in line nubmers, the 'cur' pointer, and
* most importantly 'cc' unlike getnextsc(). */
*c = (sed->src.cur < sed->src.end)? (*sed->src.cur): QSE_CHAR_EOF;
return 0;
}
static void free_address (qse_sed_t* sed, qse_sed_cmd_t* cmd)
{
if (cmd->a2.type == QSE_SED_ADR_REX)
{
QSE_ASSERT (cmd->a2.u.rex != QSE_NULL);
if (cmd->a2.u.rex != EMPTY_REX)
free_rex (sed, cmd->a2.u.rex);
cmd->a2.type = QSE_SED_ADR_NONE;
}
if (cmd->a1.type == QSE_SED_ADR_REX)
{
QSE_ASSERT (cmd->a1.u.rex != QSE_NULL);
if (cmd->a1.u.rex != EMPTY_REX)
free_rex (sed, cmd->a1.u.rex);
cmd->a1.type = QSE_SED_ADR_NONE;
}
}
static int add_command_block (qse_sed_t* sed)
{
qse_sed_cmd_blk_t* b;
b = (qse_sed_cmd_blk_t*) qse_sed_callocmem (sed, QSE_SIZEOF(*b));
if (b == QSE_NULL) return -1;
b->next = QSE_NULL;
b->len = 0;
sed->cmd.lb->next = b;
sed->cmd.lb = b;
return 0;
}
static void free_all_command_blocks (qse_sed_t* sed)
{
qse_sed_cmd_blk_t* b;
for (b = &sed->cmd.fb; b != QSE_NULL; )
{
qse_sed_cmd_blk_t* nxt = b->next;
while (b->len > 0) free_command (sed, &b->buf[--b->len]);
if (b != &sed->cmd.fb) qse_sed_freemem (sed, b);
b = nxt;
}
QSE_MEMSET (&sed->cmd.fb, 0, QSE_SIZEOF(sed->cmd.fb));
sed->cmd.lb = &sed->cmd.fb;
sed->cmd.lb->len = 0;
sed->cmd.lb->next = QSE_NULL;
}
static void free_command (qse_sed_t* sed, qse_sed_cmd_t* cmd)
{
free_address (sed, cmd);
switch (cmd->type)
{
case QSE_SED_CMD_APPEND:
case QSE_SED_CMD_INSERT:
case QSE_SED_CMD_CHANGE:
if (cmd->u.text.ptr)
qse_sed_freemem (sed, cmd->u.text.ptr);
break;
case QSE_SED_CMD_READ_FILE:
case QSE_SED_CMD_READ_FILELN:
case QSE_SED_CMD_WRITE_FILE:
case QSE_SED_CMD_WRITE_FILELN:
if (cmd->u.file.ptr)
qse_sed_freemem (sed, cmd->u.file.ptr);
break;
case QSE_SED_CMD_BRANCH:
case QSE_SED_CMD_BRANCH_COND:
if (cmd->u.branch.label.ptr)
qse_sed_freemem (sed, cmd->u.branch.label.ptr);
break;
case QSE_SED_CMD_SUBSTITUTE:
if (cmd->u.subst.file.ptr)
qse_sed_freemem (sed, cmd->u.subst.file.ptr);
if (cmd->u.subst.rpl.ptr)
qse_sed_freemem (sed, cmd->u.subst.rpl.ptr);
if (cmd->u.subst.rex && cmd->u.subst.rex != EMPTY_REX)
free_rex (sed, cmd->u.subst.rex);
break;
case QSE_SED_CMD_TRANSLATE:
if (cmd->u.transet.ptr)
qse_sed_freemem (sed, cmd->u.transet.ptr);
break;
case QSE_SED_CMD_CUT:
free_all_cut_selector_blocks (sed, cmd);
break;
default:
break;
}
}
static void free_all_cids (qse_sed_t* sed)
{
if (sed->src.cid == (qse_sed_cid_t*)&sed->src.unknown_cid)
sed->src.cid = sed->src.cid->next;
while (sed->src.cid)
{
qse_sed_cid_t* next = sed->src.cid->next;
qse_sed_freemem (sed, sed->src.cid);
sed->src.cid = next;
}
}
static int trans_escaped (qse_sed_t* sed, qse_cint_t c, qse_cint_t* ec, int* xamp)
{
if (xamp) *xamp = 0;
switch (c)
{
case QSE_T('a'):
c = QSE_T('\a');
break;
/*
Omitted for clash with regular expression \b.
case QSE_T('b'):
c = QSE_T('\b');
break;
*/
case QSE_T('f'):
c = QSE_T('\f');
case QSE_T('n'):
c = QSE_T('\n');
break;
case QSE_T('r'):
c = QSE_T('\r');
break;
case QSE_T('t'):
c = QSE_T('\t');
break;
case QSE_T('v'):
c = QSE_T('\v');
break;
case QSE_T('x'):
{
/* \xnn */
int cc;
qse_cint_t peeped;
PEEPNXTSC (sed, peeped, -1);
cc = QSE_XDIGITTONUM (peeped);
if (cc <= -1) break;
NXTSC (sed, peeped, -1); /* consume the character peeped */
c = cc;
PEEPNXTSC (sed, peeped, -1);
cc = QSE_XDIGITTONUM (peeped);
if (cc <= -1) break;
NXTSC (sed, peeped, -1); /* consume the character peeped */
c = (c << 4) | cc;
/* let's indicate that '&' is built from \x26. */
if (xamp && c == QSE_T('&')) *xamp = 1;
break;
}
#if defined(QSE_CHAR_IS_WCHAR)
case QSE_T('X'):
{
/* \Xnnnn or \Xnnnnnnnn for wchar_t */
int cc, i;
qse_cint_t peeped;
PEEPNXTSC (sed, peeped, -1);
cc = QSE_XDIGITTONUM (peeped);
if (cc <= -1) break;
NXTSC (sed, peeped, -1); /* consume the character peeped */
c = cc;
for (i = 1; i < QSE_SIZEOF(qse_char_t) * 2; i++)
{
PEEPNXTSC (sed, peeped, -1);
cc = QSE_XDIGITTONUM (peeped);
if (cc <= -1) break;
NXTSC (sed, peeped, -1); /* consume the character peeped */
c = (c << 4) | cc;
}
/* let's indicate that '&' is built from \x26. */
if (xamp && c == QSE_T('&')) *xamp = 1;
break;
}
#endif
}
*ec = c;
return 0;
}
static int pickup_rex (
qse_sed_t* sed, qse_char_t rxend,
int replacement, const qse_sed_cmd_t* cmd, qse_str_t* buf)
{
/*
* 'replacement' indicates that this functions is called for
* 'replacement' in 's/pattern/replacement'.
*/
qse_cint_t c;
qse_size_t chars_from_opening_bracket = 0;
int bracket_state = 0;
qse_str_clear (buf);
while (1)
{
NXTSC (sed, c, -1);
shortcut:
if (c == QSE_CHAR_EOF || IS_LINTERM(c))
{
if (cmd)
{
SETERR1 (
sed, QSE_SED_ECMDIC,
&cmd->type, 1,
&sed->src.loc
);
}
else
{
SETERR1 (
sed, QSE_SED_EREXIC,
QSE_STR_PTR(buf), QSE_STR_LEN(buf),
&sed->src.loc
);
}
return -1;
}
if (c == rxend && bracket_state == 0) break;
if (c == QSE_T('\\'))
{
qse_cint_t nc;
NXTSC (sed, nc, -1);
if (nc == QSE_CHAR_EOF /*|| IS_LINTERM(nc)*/)
{
if (cmd)
{
SETERR1 (
sed, QSE_SED_ECMDIC,
&cmd->type, 1,
&sed->src.loc
);
}
else
{
SETERR1 (
sed, QSE_SED_EREXIC,
QSE_STR_PTR(buf),
QSE_STR_LEN(buf),
&sed->src.loc
);
}
return -1;
}
if (bracket_state > 0 && nc == QSE_T(']'))
{
/*
* if 'replacement' is not set, bracket_state is alyway 0.
* so this block is never reached.
*
* a backslashed closing bracket is seen.
* it is not :]. if bracket_state is 2, this \]
* makes an illegal regular expression. but,
* let's not care.. just drop the state to 0
* as if the outer [ is closed.
*/
if (chars_from_opening_bracket > 1) bracket_state = 0;
}
if (nc == QSE_T('\n')) c = nc;
else
{
qse_cint_t ec;
int xamp;
if (trans_escaped (sed, nc, &ec, &xamp) <= -1) return -1;
if (ec == nc || (xamp && replacement))
{
/* if the character after a backslash is not special
* at the this layer, add the backslash into the
* regular expression buffer as it is.
*
* if \x26 is found in the replacement, i also need to
* transform it to \& so that it is not treated as a
* special &.
*/
if (qse_str_ccat (buf, QSE_T('\\')) == (qse_size_t)-1)
{
SETERR0 (sed, QSE_SED_ENOMEM, QSE_NULL);
return -1;
}
}
c = ec;
}
}
else if (!replacement)
{
/* this block sets a flag to indicate that we are in []
* of a regular expression. */
if (c == QSE_T('['))
{
if (bracket_state <= 0)
{
bracket_state = 1;
chars_from_opening_bracket = 0;
}
else if (bracket_state == 1)
{
qse_cint_t nc;
NXTSC (sed, nc, -1);
if (nc == QSE_T(':')) bracket_state = 2;
if (qse_str_ccat (buf, c) == (qse_size_t)-1)
{
SETERR0 (sed, QSE_SED_ENOMEM, QSE_NULL);
return -1;
}
chars_from_opening_bracket++;
c = nc;
goto shortcut;
}
}
else if (c == QSE_T(']'))
{
if (bracket_state == 1)
{
/* if it is the first character after [,
* it is a normal character. */
if (chars_from_opening_bracket > 1) bracket_state--;
}
else if (bracket_state == 2)
{
/* it doesn't really care if colon was for opening bracket
* like in [[:]] */
if (QSE_STR_LASTCHAR(buf) == QSE_T(':')) bracket_state--;
}
}
}
if (qse_str_ccat (buf, c) == (qse_size_t)-1)
{
SETERR0 (sed, QSE_SED_ENOMEM, QSE_NULL);
return -1;
}
chars_from_opening_bracket++;
}
return 0;
}
static QSE_INLINE void* compile_rex_address (qse_sed_t* sed, qse_char_t rxend)
{
int ignorecase = 0;
qse_cint_t peeped;
if (pickup_rex (sed, rxend, 0, QSE_NULL, &sed->tmp.rex) <= -1)
return QSE_NULL;
if (QSE_STR_LEN(&sed->tmp.rex) <= 0) return EMPTY_REX;
/* handle a modifer after having handled an empty regex.
* so a modifier is naturally disallowed for an empty regex. */
PEEPNXTSC (sed, peeped, QSE_NULL);
if (peeped == QSE_T('I'))
{
ignorecase = 1;
NXTSC (sed, peeped, QSE_NULL); /* consume the character peeped */
}
return build_rex (sed, QSE_STR_XSTR(&sed->tmp.rex), ignorecase, &sed->src.loc);
}
static qse_sed_adr_t* get_address (qse_sed_t* sed, qse_sed_adr_t* a, int extended)
{
qse_cint_t c;
c = CURSC (sed);
if (c == QSE_T('$'))
{
a->type = QSE_SED_ADR_DOL;
NXTSC (sed, c, QSE_NULL);
}
else if (c >= QSE_T('0') && c <= QSE_T('9'))
{
qse_size_t lno = 0;
do
{
lno = lno * 10 + c - QSE_T('0');
NXTSC (sed, c, QSE_NULL);
}
while (c >= QSE_T('0') && c <= QSE_T('9'));
a->type = QSE_SED_ADR_LINE;
a->u.lno = lno;
}
else if (c == QSE_T('/'))
{
/* /REGEX/ */
a->u.rex = compile_rex_address (sed, c);
if (a->u.rex == QSE_NULL) return QSE_NULL;
a->type = QSE_SED_ADR_REX;
NXTSC (sed, c, QSE_NULL);
}
else if (c == QSE_T('\\'))
{
/* \cREGEXc */
NXTSC (sed, c, QSE_NULL);
if (c == QSE_CHAR_EOF || IS_LINTERM(c))
{
SETERR1 (sed, QSE_SED_EREXIC,
QSE_T(""), 0, &sed->src.loc);
return QSE_NULL;
}
a->u.rex = compile_rex_address (sed, c);
if (a->u.rex == QSE_NULL) return QSE_NULL;
a->type = QSE_SED_ADR_REX;
NXTSC (sed, c, QSE_NULL);
}
else if (extended && (c == QSE_T('+') || c == QSE_T('~')))
{
qse_size_t lno = 0;
a->type = (c == QSE_T('+'))? QSE_SED_ADR_RELLINE: QSE_SED_ADR_RELLINEM;
NXTSC (sed, c, QSE_NULL);
if (!(c >= QSE_T('0') && c <= QSE_T('9')))
{
SETERR0 (sed, QSE_SED_EA2MOI, &sed->src.loc);
return QSE_NULL;
}
do
{
lno = lno * 10 + c - QSE_T('0');
NXTSC (sed, c, QSE_NULL);
}
while (c >= QSE_T('0') && c <= QSE_T('9'));
a->u.lno = lno;
}
else
{
a->type = QSE_SED_ADR_NONE;
}
return a;
}
/* get the text for the 'a', 'i', and 'c' commands.
* POSIX:
* The argument text shall consist of one or more lines. Each embedded
* <newline> in the text shall be preceded by a backslash. Other backslashes
* in text shall be removed, and the following character shall be treated
* literally. */
static int get_text (qse_sed_t* sed, qse_sed_cmd_t* cmd)
{
#define ADD(sed,str,c,errlabel) \
do { \
if (qse_str_ccat (str, c) == (qse_size_t)-1) \
{ \
SETERR0 (sed, QSE_SED_ENOMEM, QSE_NULL); \
goto errlabel; \
} \
} while (0)
qse_cint_t c;
qse_str_t* t = QSE_NULL;
t = qse_str_open(qse_sed_getmmgr(sed), 0, 128);
if (t == QSE_NULL) goto oops;
c = CURSC (sed);
do
{
if (sed->opt.trait & QSE_SED_STRIPLS)
{
/* get the first non-space character */
while (IS_SPACE(c)) NXTSC_GOTO (sed, c, oops);
}
while (c != QSE_CHAR_EOF)
{
int nl = 0;
if (c == QSE_T('\\'))
{
NXTSC_GOTO (sed, c, oops);
if (c == QSE_CHAR_EOF)
{
if (sed->opt.trait & QSE_SED_KEEPTBS)
ADD (sed, t, QSE_T('\\'), oops);
break;
}
}
else if (c == QSE_T('\n')) nl = 1; /* unescaped newline */
ADD (sed, t, c, oops);
if (c == QSE_T('\n'))
{
if (nl)
{
/* if newline is not escaped, stop */
qse_cint_t dump;
/* let's not pollute 'c' for ENSURELN check after done: */
NXTSC_GOTO (sed, dump, oops);
goto done;
}
/* else carry on reading the next line */
NXTSC_GOTO (sed, c, oops);
break;
}
NXTSC_GOTO (sed, c, oops);
}
}
while (c != QSE_CHAR_EOF);
done:
if ((sed->opt.trait & QSE_SED_ENSURENL) && c != QSE_T('\n'))
{
/* TODO: support different line end convension */
ADD (sed, t, QSE_T('\n'), oops);
}
qse_str_yield (t, &cmd->u.text, 0);
qse_str_close (t);
return 0;
oops:
if (t) qse_str_close (t);
return -1;
#undef ADD
}
static int get_label (qse_sed_t* sed, qse_sed_cmd_t* cmd)
{
qse_cint_t c;
/* skip white spaces */
c = CURSC (sed);
while (IS_SPACE(c)) NXTSC (sed, c, -1);
if (!IS_LABCHAR(c))
{
/* label name is empty */
if (sed->opt.trait & QSE_SED_STRICT)
{
SETERR0 (sed, QSE_SED_ELABEM, &sed->src.loc);
return -1;
}
/* empty label. noop command. don't register anything */
qse_str_clear (&sed->tmp.lab);
}
else
{
qse_str_clear (&sed->tmp.lab);
do
{
if (qse_str_ccat (&sed->tmp.lab, c) == (qse_size_t)-1)
{
SETERR0 (sed, QSE_SED_ENOMEM, QSE_NULL);
return -1;
}
NXTSC (sed, c, -1);
}
while (IS_LABCHAR(c));
if (qse_map_search (
&sed->tmp.labs,
QSE_STR_PTR(&sed->tmp.lab),
QSE_STR_LEN(&sed->tmp.lab)) != QSE_NULL)
{
SETERR1 (
sed, QSE_SED_ELABDU,
QSE_STR_PTR(&sed->tmp.lab),
QSE_STR_LEN(&sed->tmp.lab),
&sed->src.loc
);
return -1;
}
if (qse_map_insert (
&sed->tmp.labs,
QSE_STR_PTR(&sed->tmp.lab), QSE_STR_LEN(&sed->tmp.lab),
cmd, 0) == QSE_NULL)
{
SETERR0 (sed, QSE_SED_ENOMEM, QSE_NULL);
return -1;
}
}
while (IS_SPACE(c)) NXTSC (sed, c, -1);
if (IS_CMDTERM(c))
{
if (c != QSE_T('}') &&
c != QSE_T('#') &&
c != QSE_CHAR_EOF) NXTSC (sed, c, -1);
}
return 0;
}
static int terminate_command (qse_sed_t* sed)
{
qse_cint_t c;
c = CURSC (sed);
while (IS_SPACE(c)) NXTSC (sed, c, -1);
if (!IS_CMDTERM(c))
{
SETERR0 (sed, QSE_SED_ESCEXP, &sed->src.loc);
return -1;
}
/* if the target is terminated by #, it should let the caller
* to skip the comment text. so don't read in the next character.
* the same goes for brackets. */
if (c != QSE_T('#') &&
c != QSE_T('{') &&
c != QSE_T('}') &&
c != QSE_CHAR_EOF) NXTSC (sed, c, -1);
return 0;
}
static int get_branch_target (qse_sed_t* sed, qse_sed_cmd_t* cmd)
{
qse_cint_t c;
qse_str_t* t = QSE_NULL;
qse_map_pair_t* pair;
/* skip white spaces */
c = CURSC(sed);
while (IS_SPACE(c)) NXTSC (sed, c, -1);
if (IS_CMDTERM(c))
{
/* no branch target is given -
* a branch command without a target should cause
* sed to jump to the end of a script.
*/
cmd->u.branch.label.ptr = QSE_NULL;
cmd->u.branch.label.len = 0;
cmd->u.branch.target = QSE_NULL;
return terminate_command (sed);
}
t = qse_str_open(qse_sed_getmmgr(sed), 0, 32);
if (t == QSE_NULL)
{
SETERR0 (sed, QSE_SED_ENOMEM, QSE_NULL);
goto oops;
}
while (IS_LABCHAR(c))
{
if (qse_str_ccat (t, c) == (qse_size_t)-1)
{
SETERR0 (sed, QSE_SED_ENOMEM, QSE_NULL);
goto oops;
}
NXTSC_GOTO (sed, c, oops);
}
if (terminate_command (sed) <= -1) goto oops;
pair = qse_map_search (&sed->tmp.labs, QSE_STR_PTR(t), QSE_STR_LEN(t));
if (pair == QSE_NULL)
{
/* label not resolved yet */
qse_str_yield (t, &cmd->u.branch.label, 0);
cmd->u.branch.target = QSE_NULL;
}
else
{
cmd->u.branch.label.ptr = QSE_NULL;
cmd->u.branch.label.len = 0;
cmd->u.branch.target = QSE_MAP_VPTR(pair);
}
qse_str_close (t);
return 0;
oops:
if (t) qse_str_close (t);
return -1;
}
static int get_file (qse_sed_t* sed, qse_cstr_t* xstr)
{
qse_cint_t c;
qse_str_t* t = QSE_NULL;
qse_size_t trailing_spaces = 0;
/* skip white spaces */
c = CURSC(sed);
while (IS_SPACE(c)) NXTSC (sed, c, -1);
if (IS_CMDTERM(c))
{
SETERR0 (sed, QSE_SED_EFILEM, &sed->src.loc);
goto oops;
}
t = qse_str_open(qse_sed_getmmgr(sed), 0, 32);
if (t == QSE_NULL)
{
SETERR0 (sed, QSE_SED_ENOMEM, QSE_NULL);
goto oops;
}
do
{
if (c == QSE_T('\0'))
{
/* the file name should not contain '\0' */
SETERR0 (sed, QSE_SED_EFILIL, &sed->src.loc);
goto oops;
}
if (IS_SPACE(c)) trailing_spaces++;
else trailing_spaces = 0;
if (c == QSE_T('\\'))
{
NXTSC_GOTO (sed, c, oops);
if (c == QSE_T('\0') || c == QSE_CHAR_EOF || IS_LINTERM(c))
{
SETERR0 (sed, QSE_SED_EFILIL, &sed->src.loc);
goto oops;
}
if (c == QSE_T('n')) c = QSE_T('\n');
}
if (qse_str_ccat (t, c) == (qse_size_t)-1)
{
SETERR0 (sed, QSE_SED_ENOMEM, &sed->src.loc);
goto oops;
}
NXTSC_GOTO (sed, c, oops);
}
while (!IS_CMDTERM(c));
if (terminate_command (sed) <= -1) goto oops;
if (trailing_spaces > 0)
{
qse_str_setlen (t, QSE_STR_LEN(t) - trailing_spaces);
}
qse_str_yield (t, xstr, 0);
qse_str_close (t);
return 0;
oops:
if (t) qse_str_close (t);
return -1;
}
#define CHECK_CMDIC(sed,cmd,c,action) \
do { \
if (c == QSE_CHAR_EOF || IS_LINTERM(c)) \
{ \
SETERR1 (sed, QSE_SED_ECMDIC, \
&cmd->type, 1, &sed->src.loc); \
action; \
} \
} while (0)
#define CHECK_CMDIC_ESCAPED(sed,cmd,c,action) \
do { \
if (c == QSE_CHAR_EOF) \
{ \
SETERR1 (sed, QSE_SED_ECMDIC, \
&cmd->type, 1, &sed->src.loc); \
action; \
} \
} while (0)
static int get_subst (qse_sed_t* sed, qse_sed_cmd_t* cmd)
{
qse_cint_t c, delim;
/*qse_str_t* t[2] = { QSE_NULL, QSE_NULL };*/
qse_str_t* t[2];
t[0] = QSE_NULL;
t[1] = QSE_NULL;
c = CURSC (sed);
CHECK_CMDIC (sed, cmd, c, goto oops);
delim = c;
if (delim == QSE_T('\\'))
{
/* backspace is an illegal delimiter */
SETERR0 (sed, QSE_SED_EBSDEL, &sed->src.loc);
goto oops;
}
t[0] = &sed->tmp.rex;
qse_str_clear (t[0]);
t[1] = qse_str_open(qse_sed_getmmgr(sed), 0, 32);
if (t[1] == QSE_NULL)
{
SETERR0 (sed, QSE_SED_ENOMEM, QSE_NULL);
goto oops;
}
if (pickup_rex(sed, delim, 0, cmd, t[0]) <= -1) goto oops;
if (pickup_rex(sed, delim, 1, cmd, t[1]) <= -1) goto oops;
/* skip spaces before options */
do { NXTSC_GOTO (sed, c, oops); } while (IS_SPACE(c));
/* get options */
do
{
if (c == QSE_T('p'))
{
cmd->u.subst.p = 1;
NXTSC_GOTO (sed, c, oops);
}
else if (c == QSE_T('i') || c == QSE_T('I'))
{
cmd->u.subst.i = 1;
NXTSC_GOTO (sed, c, oops);
}
else if (c == QSE_T('g'))
{
cmd->u.subst.g = 1;
NXTSC_GOTO (sed, c, oops);
}
else if (c == QSE_T('k'))
{
cmd->u.subst.k = 1;
NXTSC_GOTO (sed, c, oops);
}
else if (c >= QSE_T('0') && c <= QSE_T('9'))
{
unsigned long occ;
if (cmd->u.subst.occ != 0)
{
SETERR0 (sed, QSE_SED_EOCSDU, &sed->src.loc);
goto oops;
}
occ = 0;
do
{
occ = occ * 10 + (c - QSE_T('0'));
if (occ > QSE_TYPE_MAX(unsigned short))
{
SETERR0 (sed, QSE_SED_EOCSTL, &sed->src.loc);
goto oops;
}
NXTSC_GOTO (sed, c, oops);
}
while (c >= QSE_T('0') && c <= QSE_T('9'));
if (occ == 0)
{
SETERR0 (sed, QSE_SED_EOCSZE, &sed->src.loc);
goto oops;
}
cmd->u.subst.occ = occ;
}
else if (c == QSE_T('w'))
{
NXTSC_GOTO (sed, c, oops);
if (get_file (sed, &cmd->u.subst.file) <= -1) goto oops;
break;
}
else break;
}
while (1);
/* call terminate_command() if the 'w' option is not specified.
* if the 'w' option is given, it is called in get_file(). */
if (cmd->u.subst.file.ptr == QSE_NULL &&
terminate_command (sed) <= -1) goto oops;
QSE_ASSERT (cmd->u.subst.rex == QSE_NULL);
if (QSE_STR_LEN(t[0]) <= 0) cmd->u.subst.rex = EMPTY_REX;
else
{
cmd->u.subst.rex = build_rex (
sed, QSE_STR_XSTR(t[0]),
cmd->u.subst.i, &sed->src.loc);
if (cmd->u.subst.rex == QSE_NULL) goto oops;
}
qse_str_yield (t[1], &cmd->u.subst.rpl, 0);
if (cmd->u.subst.g == 0 && cmd->u.subst.occ == 0) cmd->u.subst.occ = 1;
qse_str_close (t[1]);
return 0;
oops:
if (t[1]) qse_str_close (t[1]);
return -1;
}
static int get_transet (qse_sed_t* sed, qse_sed_cmd_t* cmd)
{
qse_cint_t c, delim;
qse_str_t* t = QSE_NULL;
qse_size_t pos;
c = CURSC (sed);
CHECK_CMDIC (sed, cmd, c, goto oops);
delim = c;
if (delim == QSE_T('\\'))
{
/* backspace is an illegal delimiter */
SETERR0 (sed, QSE_SED_EBSDEL, &sed->src.loc);
goto oops;
}
t = qse_str_open(qse_sed_getmmgr(sed), 0, 32);
if (t == QSE_NULL)
{
SETERR0 (sed, QSE_SED_ENOMEM, QSE_NULL);
goto oops;
}
NXTSC_GOTO (sed, c, oops);
while (c != delim)
{
qse_char_t b[2];
CHECK_CMDIC (sed, cmd, c, goto oops);
if (c == QSE_T('\\'))
{
NXTSC_GOTO (sed, c, oops);
CHECK_CMDIC_ESCAPED (sed, cmd, c, goto oops);
if (trans_escaped (sed, c, &c, QSE_NULL) <= -1) goto oops;
}
b[0] = c;
if (qse_str_ncat (t, b, 2) == (qse_size_t)-1)
{
SETERR0 (sed, QSE_SED_ENOMEM, QSE_NULL);
goto oops;
}
NXTSC_GOTO (sed, c, oops);
}
NXTSC_GOTO (sed, c, oops);
for (pos = 1; c != delim; pos += 2)
{
CHECK_CMDIC (sed, cmd, c, goto oops);
if (c == QSE_T('\\'))
{
NXTSC_GOTO (sed, c, oops);
CHECK_CMDIC_ESCAPED (sed, cmd, c, goto oops);
if (trans_escaped (sed, c, &c, QSE_NULL) <= -1) goto oops;
}
if (pos >= QSE_STR_LEN(t))
{
/* source and target not the same length */
SETERR0 (sed, QSE_SED_ETSNSL, &sed->src.loc);
goto oops;
}
QSE_STR_CHAR(t,pos) = c;
NXTSC_GOTO (sed, c, oops);
}
if (pos < QSE_STR_LEN(t))
{
/* source and target not the same length */
SETERR0 (sed, QSE_SED_ETSNSL, &sed->src.loc);
goto oops;
}
NXTSC_GOTO (sed, c, oops);
if (terminate_command (sed) <= -1) goto oops;
qse_str_yield (t, &cmd->u.transet, 0);
qse_str_close (t);
return 0;
oops:
if (t) qse_str_close (t);
return -1;
}
static int add_cut_selector_block (qse_sed_t* sed, qse_sed_cmd_t* cmd)
{
qse_sed_cut_sel_t* b;
b = (qse_sed_cut_sel_t*) qse_sed_callocmem (sed, QSE_SIZEOF(*b));
if (b == QSE_NULL) return -1;
b->next = QSE_NULL;
b->len = 0;
if (cmd->u.cut.fb == QSE_NULL)
{
cmd->u.cut.fb = b;
cmd->u.cut.lb = b;
}
else
{
cmd->u.cut.lb->next = b;
cmd->u.cut.lb = b;
}
return 0;
}
static void free_all_cut_selector_blocks (qse_sed_t* sed, qse_sed_cmd_t* cmd)
{
qse_sed_cut_sel_t* b, * next;
for (b = cmd->u.cut.fb; b; b = next)
{
next = b->next;
qse_sed_freemem (sed, b);
}
cmd->u.cut.lb = QSE_NULL;
cmd->u.cut.fb = QSE_NULL;
cmd->u.cut.count = 0;
cmd->u.cut.fcount = 0;
cmd->u.cut.ccount = 0;
}
static int get_cut (qse_sed_t* sed, qse_sed_cmd_t* cmd)
{
qse_cint_t c, delim;
qse_size_t i;
int sel = QSE_SED_CUT_SEL_CHAR;
c = CURSC (sed);
CHECK_CMDIC (sed, cmd, c, goto oops);
delim = c;
if (delim == QSE_T('\\'))
{
/* backspace is an illegal delimiter */
SETERR0 (sed, QSE_SED_EBSDEL, &sed->src.loc);
goto oops;
}
/* initialize the delimeter to a space letter */
for (i = 0; i < QSE_COUNTOF(cmd->u.cut.delim); i++)
cmd->u.cut.delim[i] = QSE_T(' ');
NXTSC_GOTO (sed, c, oops);
while (1)
{
qse_size_t start = 0, end = 0;
#define MASK_START (1 << 1)
#define MASK_END (1 << 2)
#define MAX QSE_TYPE_MAX(qse_size_t)
int mask = 0;
while (IS_SPACE(c)) NXTSC_GOTO (sed, c, oops);
if (c == QSE_CHAR_EOF)
{
SETERR0 (sed, QSE_SED_ECSLNV, &sed->src.loc);
goto oops;
}
if (c == QSE_T('d') || c == QSE_T('D'))
{
int delim_idx = (c == QSE_T('d'))? 0: 1;
/* the next character is an input/output delimiter. */
NXTSC_GOTO (sed, c, oops);
if (c == QSE_CHAR_EOF)
{
SETERR0 (sed, QSE_SED_ECSLNV, &sed->src.loc);
goto oops;
}
cmd->u.cut.delim[delim_idx] = c;
NXTSC_GOTO (sed, c, oops);
}
else
{
if (c == QSE_T('c') || c == QSE_T('f'))
{
sel = c;
NXTSC_GOTO (sed, c, oops);
while (IS_SPACE(c)) NXTSC_GOTO (sed, c, oops);
}
if (QSE_ISDIGIT(c))
{
do
{
start = start * 10 + (c - QSE_T('0'));
NXTSC_GOTO (sed, c, oops);
}
while (QSE_ISDIGIT(c));
while (IS_SPACE(c)) NXTSC_GOTO (sed, c, oops);
mask |= MASK_START;
if (start >= 1) start--; /* convert it to index */
}
else start = 0;
if (c == QSE_T('-'))
{
NXTSC_GOTO (sed, c, oops);
while (IS_SPACE(c)) NXTSC_GOTO (sed, c, oops);
if (QSE_ISDIGIT(c))
{
do
{
end = end * 10 + (c - QSE_T('0'));
NXTSC_GOTO (sed, c, oops);
}
while (QSE_ISDIGIT(c));
mask |= MASK_END;
}
else end = MAX;
while (IS_SPACE(c)) NXTSC_GOTO (sed, c, oops);
if (end >= 1) end--; /* convert it to index */
}
else end = start;
if (!(mask & (MASK_START | MASK_END)))
{
SETERR0 (sed, QSE_SED_ECSLNV, &sed->src.loc);
goto oops;
}
if (cmd->u.cut.lb == QSE_NULL ||
cmd->u.cut.lb->len >= QSE_COUNTOF(cmd->u.cut.lb->range))
{
if (add_cut_selector_block (sed, cmd) <= -1) goto oops;
}
cmd->u.cut.lb->range[cmd->u.cut.lb->len].id = sel;
cmd->u.cut.lb->range[cmd->u.cut.lb->len].start = start;
cmd->u.cut.lb->range[cmd->u.cut.lb->len].end = end;
cmd->u.cut.lb->len++;
cmd->u.cut.count++;
if (sel == QSE_SED_CUT_SEL_FIELD) cmd->u.cut.fcount++;
else cmd->u.cut.ccount++;
}
while (IS_SPACE(c)) NXTSC_GOTO (sed, c, oops);
if (c == QSE_CHAR_EOF)
{
SETERR0 (sed, QSE_SED_ECSLNV, &sed->src.loc);
goto oops;
}
if (c == delim) break;
if (c != QSE_T(','))
{
SETERR0 (sed, QSE_SED_ECSLNV, &sed->src.loc);
goto oops;
}
NXTSC_GOTO (sed, c, oops); /* skip a comma */
}
/* skip spaces before options */
do { NXTSC_GOTO (sed, c, oops); } while (IS_SPACE(c));
/* get options */
do
{
if (c == QSE_T('f'))
{
cmd->u.cut.f = 1;
}
else if (c == QSE_T('w'))
{
cmd->u.cut.w = 1;
}
else if (c == QSE_T('d'))
{
cmd->u.cut.d = 1;
}
else break;
NXTSC_GOTO (sed, c, oops);
}
while (1);
if (terminate_command (sed) <= -1) goto oops;
return 0;
oops:
free_all_cut_selector_blocks (sed, cmd);
return -1;
}
/* process a command code and following parts into cmd */
static int get_command (qse_sed_t* sed, qse_sed_cmd_t* cmd)
{
qse_cint_t c;
c = CURSC (sed);
cmd->lid = sed->src.cid? ((const qse_char_t*)(sed->src.cid + 1)): QSE_NULL;
cmd->loc = sed->src.loc;
switch (c)
{
default:
{
qse_char_t cc = c;
SETERR1 (sed, QSE_SED_ECMDNR, &cc, 1, &sed->src.loc);
return -1;
}
case QSE_CHAR_EOF:
case QSE_T('\n'):
SETERR0 (sed, QSE_SED_ECMDMS, &sed->src.loc);
return -1;
case QSE_T(':'):
if (cmd->a1.type != QSE_SED_ADR_NONE)
{
/* label cannot have an address */
SETERR1 (
sed, QSE_SED_EA1PHB,
&cmd->type, 1, &sed->src.loc
);
return -1;
}
cmd->type = QSE_SED_CMD_NOOP;
NXTSC (sed, c, -1);
if (get_label (sed, cmd) <= -1) return -1;
c = CURSC (sed);
while (IS_SPACE(c)) NXTSC (sed, c, -1);
break;
case QSE_T('{'):
/* insert a negated branch command at the beginning
* of a group. this way, all the commands in a group
* can be skipped. the branch target is set once a
* corresponding } is met. */
cmd->type = QSE_SED_CMD_BRANCH;
cmd->negated = !cmd->negated;
if (sed->tmp.grp.level >= QSE_COUNTOF(sed->tmp.grp.cmd))
{
/* group nesting too deep */
SETERR0 (sed, QSE_SED_EGRNTD, &sed->src.loc);
return -1;
}
sed->tmp.grp.cmd[sed->tmp.grp.level++] = cmd;
NXTSC (sed, c, -1);
break;
case QSE_T('}'):
{
qse_sed_cmd_t* tc;
if (cmd->a1.type != QSE_SED_ADR_NONE)
{
qse_char_t tmpc = c;
SETERR1 (
sed, QSE_SED_EA1PHB,
&tmpc, 1, &sed->src.loc
);
return -1;
}
cmd->type = QSE_SED_CMD_NOOP;
if (sed->tmp.grp.level <= 0)
{
/* group not balanced */
SETERR0 (sed, QSE_SED_EGRNBA, &sed->src.loc);
return -1;
}
tc = sed->tmp.grp.cmd[--sed->tmp.grp.level];
tc->u.branch.target = cmd;
NXTSC (sed, c, -1);
break;
}
case QSE_T('q'):
case QSE_T('Q'):
cmd->type = c;
if (sed->opt.trait & QSE_SED_STRICT &&
cmd->a2.type != QSE_SED_ADR_NONE)
{
SETERR1 (
sed, QSE_SED_EA2PHB,
&cmd->type, 1, &sed->src.loc
);
return -1;
}
NXTSC (sed, c, -1);
if (terminate_command (sed) <= -1) return -1;
break;
case QSE_T('a'):
case QSE_T('i'):
if (sed->opt.trait & QSE_SED_STRICT &&
cmd->a2.type != QSE_SED_ADR_NONE)
{
qse_char_t tmpc = c;
SETERR1 (
sed, QSE_SED_EA2PHB,
&tmpc, 1, &sed->src.loc
);
return -1;
}
case QSE_T('c'):
{
cmd->type = c;
NXTSC (sed, c, -1);
while (IS_SPACE(c)) NXTSC (sed, c, -1);
if (c != QSE_T('\\'))
{
if ((sed->opt.trait & QSE_SED_SAMELINE) &&
c != QSE_CHAR_EOF && c != QSE_T('\n'))
{
/* allow text without a starting backslash
* on the same line as a command */
goto sameline_ok;
}
SETERR0 (sed, QSE_SED_EBSEXP, &sed->src.loc);
return -1;
}
NXTSC (sed, c, -1);
while (IS_SPACE(c)) NXTSC (sed, c, -1);
if (c != QSE_CHAR_EOF && c != QSE_T('\n'))
{
if (sed->opt.trait & QSE_SED_SAMELINE)
{
/* allow text with a starting backslash
* on the same line as a command */
goto sameline_ok;
}
SETERR0 (sed, QSE_SED_EGBABS, &sed->src.loc);
return -1;
}
NXTSC (sed, c, -1); /* skip the new line */
sameline_ok:
/* get_text() starts from the next line */
if (get_text (sed, cmd) <= -1) return -1;
break;
}
case QSE_T('='):
if (sed->opt.trait & QSE_SED_STRICT &&
cmd->a2.type != QSE_SED_ADR_NONE)
{
qse_char_t tmpc = c;
SETERR1 (
sed, QSE_SED_EA2PHB,
&tmpc, 1, &sed->src.loc
);
return -1;
}
case QSE_T('d'):
case QSE_T('D'):
case QSE_T('p'):
case QSE_T('P'):
case QSE_T('l'):
case QSE_T('h'):
case QSE_T('H'):
case QSE_T('g'):
case QSE_T('G'):
case QSE_T('x'):
case QSE_T('n'):
case QSE_T('N'):
case QSE_T('z'):
cmd->type = c;
NXTSC (sed, c, -1);
if (terminate_command (sed) <= -1) return -1;
break;
case QSE_T('b'):
case QSE_T('t'):
cmd->type = c;
NXTSC (sed, c, -1);
if (get_branch_target (sed, cmd) <= -1) return -1;
break;
case QSE_T('r'):
case QSE_T('R'):
case QSE_T('w'):
case QSE_T('W'):
cmd->type = c;
NXTSC (sed, c, -1);
if (get_file (sed, &cmd->u.file) <= -1) return -1;
break;
case QSE_T('s'):
cmd->type = c;
NXTSC (sed, c, -1);
if (get_subst (sed, cmd) <= -1) return -1;
break;
case QSE_T('y'):
cmd->type = c;
NXTSC (sed, c, -1);
if (get_transet (sed, cmd) <= -1) return -1;
break;
case QSE_T('C'):
cmd->type = c;
NXTSC (sed, c, -1);
if (get_cut (sed, cmd) <= -1) return -1;
break;
}
return 0;
}
int qse_sed_comp (qse_sed_t* sed, qse_sed_io_impl_t inf)
{
qse_cint_t c;
qse_sed_cmd_t* cmd = QSE_NULL;
qse_sed_loc_t a1_loc;
if (inf == QSE_NULL)
{
qse_sed_seterrnum (sed, QSE_SED_EINVAL, QSE_NULL);
return -1;
}
/* free all the commands previously compiled */
free_all_command_blocks (sed);
QSE_ASSERT (sed->cmd.lb == &sed->cmd.fb && sed->cmd.lb->len == 0);
/* free all the compilation identifiers */
free_all_cids (sed);
/* clear the label table */
qse_map_clear (&sed->tmp.labs);
/* clear temporary data */
sed->tmp.grp.level = 0;
qse_str_clear (&sed->tmp.rex);
/* open script */
sed->src.fun = inf;
if (open_script_stream (sed) <= -1) return -1;
NXTSC_GOTO (sed, c, oops);
while (1)
{
int n;
/* skip spaces including newlines */
while (IS_WSPACE(c)) NXTSC_GOTO (sed, c, oops);
/* check if the end has been reached */
if (c == QSE_CHAR_EOF) break;
/* check if the line is commented out */
if (c == QSE_T('#'))
{
do NXTSC_GOTO (sed, c, oops);
while (!IS_LINTERM(c) && c != QSE_CHAR_EOF) ;
NXTSC_GOTO (sed, c, oops);
continue;
}
if (c == QSE_T(';'))
{
/* semicolon without a address-command pair */
NXTSC_GOTO (sed, c, oops);
continue;
}
/* initialize the current command */
cmd = &sed->cmd.lb->buf[sed->cmd.lb->len];
QSE_MEMSET (cmd, 0, QSE_SIZEOF(*cmd));
/* process the first address */
a1_loc = sed->src.loc;
if (get_address (sed, &cmd->a1, 0) == QSE_NULL)
{
cmd = QSE_NULL;
SETERR0 (sed, QSE_SED_EA1MOI, &sed->src.loc);
goto oops;
}
c = CURSC (sed);
if (cmd->a1.type != QSE_SED_ADR_NONE)
{
while (IS_SPACE(c)) NXTSC_GOTO (sed, c, oops);
if (c == QSE_T(',') ||
((sed->opt.trait & QSE_SED_EXTENDEDADR) && c == QSE_T('~')))
{
qse_char_t delim = c;
/* maybe an address range */
do { NXTSC_GOTO (sed, c, oops); } while (IS_SPACE(c));
if (get_address (sed, &cmd->a2, (sed->opt.trait & QSE_SED_EXTENDEDADR)) == QSE_NULL)
{
QSE_ASSERT (cmd->a2.type == QSE_SED_ADR_NONE);
SETERR0 (sed, QSE_SED_EA2MOI, &sed->src.loc);
goto oops;
}
if (delim == QSE_T(','))
{
if (cmd->a2.type == QSE_SED_ADR_NONE)
{
SETERR0 (sed, QSE_SED_EA2MOI, &sed->src.loc);
goto oops;
}
if (cmd->a2.type == QSE_SED_ADR_RELLINE ||
cmd->a2.type == QSE_SED_ADR_RELLINEM)
{
if (cmd->a2.u.lno <= 0)
{
/* tranform 'addr1,+0' and 'addr1,~0' to 'addr1' */
cmd->a2.type = QSE_SED_ADR_NONE;
}
}
}
else if ((sed->opt.trait & QSE_SED_EXTENDEDADR) &&
(delim == QSE_T('~')))
{
if (cmd->a1.type != QSE_SED_ADR_LINE ||
cmd->a2.type != QSE_SED_ADR_LINE)
{
SETERR0 (sed, QSE_SED_EA2MOI, &sed->src.loc);
goto oops;
}
if (cmd->a2.u.lno > 0)
{
cmd->a2.type = QSE_SED_ADR_STEP;
}
else
{
/* transform 'X,~0' to 'X' */
cmd->a2.type = QSE_SED_ADR_NONE;
}
}
c = CURSC (sed);
}
else cmd->a2.type = QSE_SED_ADR_NONE;
}
if (cmd->a1.type == QSE_SED_ADR_LINE && cmd->a1.u.lno <= 0)
{
if (cmd->a2.type == QSE_SED_ADR_STEP ||
((sed->opt.trait & QSE_SED_EXTENDEDADR) &&
cmd->a2.type == QSE_SED_ADR_REX))
{
/* 0 as the first address is allowed in this two contexts.
* 0~step
* 0,/regex/
* '0~0' is not allowed. but at this point '0~0'
* is already transformed to '0'. and disallowing it is
* achieved gratuitously.
*/
/* nothing to do - adding negation to the condition dropped
* code readability so i decided to write this part of code
* this way.
*/
}
else
{
SETERR0 (sed, QSE_SED_EA1MOI, &a1_loc);
goto oops;
}
}
/* skip white spaces */
while (IS_SPACE(c)) NXTSC_GOTO (sed, c, oops);
if (c == QSE_T('!'))
{
/* allow any number of the negation indicators */
do {
cmd->negated = !cmd->negated;
NXTSC_GOTO (sed, c, oops);
}
while (c == QSE_T('!'));
while (IS_SPACE(c)) NXTSC_GOTO (sed, c, oops);
}
n = get_command (sed, cmd);
if (n <= -1) goto oops;
c = CURSC (sed);
/* cmd's end of life */
cmd = QSE_NULL;
/* increment the total numbers of complete commands */
sed->cmd.lb->len++;
if (sed->cmd.lb->len >= QSE_COUNTOF(sed->cmd.lb->buf))
{
/* the number of commands in the block has
* reaches the maximum. add a new command block */
if (add_command_block (sed) <= -1) goto oops;
}
}
if (sed->tmp.grp.level != 0)
{
SETERR0 (sed, QSE_SED_EGRNBA, &sed->src.loc);
goto oops;
}
close_script_stream (sed);
return 0;
oops:
if (cmd) free_address (sed, cmd);
close_script_stream (sed);
return -1;
}
static int read_char (qse_sed_t* sed, qse_char_t* c)
{
qse_ssize_t n;
if (sed->e.in.xbuf_len == 0)
{
if (sed->e.in.pos >= sed->e.in.len)
{
sed->errnum = QSE_SED_ENOERR;
n = sed->e.in.fun (
sed, QSE_SED_IO_READ, &sed->e.in.arg,
sed->e.in.buf, QSE_COUNTOF(sed->e.in.buf)
);
if (n <= -1)
{
if (sed->errnum == QSE_SED_ENOERR)
SETERR0 (sed, QSE_SED_EIOUSR, QSE_NULL);
return -1;
}
if (n == 0) return 0; /* end of file */
sed->e.in.len = n;
sed->e.in.pos = 0;
}
*c = sed->e.in.buf[sed->e.in.pos++];
return 1;
}
else if (sed->e.in.xbuf_len > 0)
{
QSE_ASSERT (sed->e.in.xbuf_len == 1);
*c = sed->e.in.xbuf[--sed->e.in.xbuf_len];
return 1;
}
else /*if (sed->e.in.xbuf_len < 0)*/
{
QSE_ASSERT (sed->e.in.xbuf_len == -1);
return 0;
}
}
static int read_line (qse_sed_t* sed, int append)
{
qse_size_t len = 0;
qse_char_t c;
int n;
if (!append) qse_str_clear (&sed->e.in.line);
if (sed->e.in.eof)
{
#if 0
/* no more input detected in the previous read.
* set eof back to 0 here so that read_char() is called
* if read_line() is called again. that way, the result
* of subsequent calls counts on read_char(). */
sed->e.in.eof = 0;
#endif
return 0;
}
while (1)
{
n = read_char (sed, &c);
if (n <= -1) return -1;
if (n == 0)
{
sed->e.in.eof = 1;
if (len == 0) return 0;
/*sed->e.in.eof = 1;*/
break;
}
if (qse_str_ccat (&sed->e.in.line, c) == (qse_size_t)-1)
{
SETERR0 (sed, QSE_SED_ENOMEM, QSE_NULL);
return -1;
}
len++;
/* TODO: support different line end convension */
if (c == QSE_T('\n')) break;
}
sed->e.in.num++;
sed->e.subst_done = 0;
return 1;
}
static int flush (qse_sed_t* sed)
{
qse_size_t pos = 0;
qse_ssize_t n;
while (sed->e.out.len > 0)
{
sed->errnum = QSE_SED_ENOERR;
n = sed->e.out.fun (
sed, QSE_SED_IO_WRITE, &sed->e.out.arg,
&sed->e.out.buf[pos], sed->e.out.len);
if (n <= -1)
{
if (sed->errnum == QSE_SED_ENOERR)
SETERR0 (sed, QSE_SED_EIOUSR, QSE_NULL);
return -1;
}
if (n == 0)
{
/* reached the end of file - this is also an error */
if (sed->errnum == QSE_SED_ENOERR)
SETERR0 (sed, QSE_SED_EIOUSR, QSE_NULL);
return -1;
}
pos += n;
sed->e.out.len -= n;
}
return 0;
}
static int write_char (qse_sed_t* sed, qse_char_t c)
{
sed->e.out.buf[sed->e.out.len++] = c;
if (c == QSE_T('\n') ||
sed->e.out.len >= QSE_COUNTOF(sed->e.out.buf))
{
return flush (sed);
}
return 0;
}
static int write_str (qse_sed_t* sed, const qse_char_t* str, qse_size_t len)
{
qse_size_t i;
int flush_needed = 0;
for (i = 0; i < len; i++)
{
/*if (write_char (sed, str[i]) <= -1) return -1;*/
sed->e.out.buf[sed->e.out.len++] = str[i];
if (sed->e.out.len >= QSE_COUNTOF(sed->e.out.buf))
{
if (flush (sed) <= -1) return -1;
flush_needed = 0;
}
/* TODO: handle different line ending convension... */
else if (str[i] == QSE_T('\n')) flush_needed = 1;
}
if (flush_needed && flush(sed) <= -1) return -1;
return 0;
}
static int write_first_line (
qse_sed_t* sed, const qse_char_t* str, qse_size_t len)
{
qse_size_t i;
for (i = 0; i < len; i++)
{
if (write_char (sed, str[i]) <= -1) return -1;
/* TODO: handle different line ending convension... */
if (str[i] == QSE_T('\n')) break;
}
return 0;
}
#define NTOC(n) (QSE_T("0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ")[n])
static int write_num (qse_sed_t* sed, qse_ulong_t x, int base, int width)
{
qse_ulong_t last = x % base;
qse_ulong_t y = 0;
int dig = 0;
QSE_ASSERT (base >= 2 && base <= 36);
if (x < 0)
{
if (write_char (sed, QSE_T('-')) <= -1) return -1;
if (width > 0) width--;
}
x = x / base;
if (x < 0) x = -x;
while (x > 0)
{
y = y * base + (x % base);
x = x / base;
dig++;
}
if (width > 0)
{
while (--width > dig)
{
if (write_char (sed, QSE_T('0')) <= -1) return -1;
}
}
while (y > 0)
{
if (write_char (sed, NTOC(y % base)) <= -1) return -1;
y = y / base;
dig--;
}
while (dig > 0)
{
dig--;
if (write_char (sed, QSE_T('0')) <= -1) return -1;
}
if (last < 0) last = -last;
if (write_char (sed, NTOC(last)) <= -1) return -1;
return 0;
}
#define WRITE_CHAR(sed,c) \
do { if (write_char(sed,c) <= -1) return -1; } while (0)
#define WRITE_STR(sed,str,len) \
do { if (write_str(sed,str,len) <= -1) return -1; } while (0)
#define WRITE_NUM(sed,num,base,width) \
do { if (write_num(sed,num,base,width) <= -1) return -1; } while (0)
static int write_str_clearly (
qse_sed_t* sed, const qse_char_t* str, qse_size_t len)
{
const qse_char_t* p = str;
const qse_char_t* end = str + len;
/* TODO: break down long lines.... */
while (p < end)
{
qse_char_t c = *p++;
switch (c)
{
case QSE_T('\\'):
WRITE_STR (sed, QSE_T("\\\\"), 2);
break;
/*case QSE_T('\0'):
WRITE_STR (sed, QSE_T("\\0"), 2);
break;*/
case QSE_T('\n'):
WRITE_STR (sed, QSE_T("$\n"), 2);
break;
case QSE_T('\a'):
WRITE_STR (sed, QSE_T("\\a"), 2);
break;
case QSE_T('\b'):
WRITE_STR (sed, QSE_T("\\b"), 2);
break;
case QSE_T('\f'):
WRITE_STR (sed, QSE_T("\\f"), 2);
break;
case QSE_T('\r'):
WRITE_STR (sed, QSE_T("\\r"), 2);
break;
case QSE_T('\t'):
WRITE_STR (sed, QSE_T("\\t"), 2);
break;
case QSE_T('\v'):
WRITE_STR (sed, QSE_T("\\v"), 2);
break;
default:
{
if (QSE_ISPRINT(c)) WRITE_CHAR (sed, c);
else
{
#if defined(QSE_CHAR_IS_MCHAR)
WRITE_CHAR (sed, QSE_T('\\'));
WRITE_NUM (sed, (unsigned char)c, 8, QSE_SIZEOF(qse_char_t)*3);
#else
if (QSE_SIZEOF(qse_char_t) <= 2)
{
WRITE_STR (sed, QSE_T("\\u"), 2);
}
else
{
WRITE_STR (sed, QSE_T("\\U"), 2);
}
WRITE_NUM (sed, c, 16, QSE_SIZEOF(qse_char_t)*2);
#endif
}
}
}
}
if (len > 1 && end[-1] != QSE_T('\n'))
WRITE_STR (sed, QSE_T("$\n"), 2);
return 0;
}
static int write_str_to_file (
qse_sed_t* sed, qse_sed_cmd_t* cmd,
const qse_char_t* str, qse_size_t len,
const qse_char_t* path, qse_size_t plen)
{
qse_ssize_t n;
qse_map_pair_t* pair;
qse_sed_io_arg_t* ap;
pair = qse_map_search (&sed->e.out.files, path, plen);
if (pair == QSE_NULL)
{
qse_sed_io_arg_t arg;
QSE_MEMSET (&arg, 0, QSE_SIZEOF(arg));
pair = qse_map_insert (&sed->e.out.files,
(void*)path, plen, &arg, QSE_SIZEOF(arg));
if (pair == QSE_NULL)
{
SETERR0 (sed, QSE_SED_ENOMEM, &cmd->loc);
return -1;
}
}
ap = QSE_MAP_VPTR(pair);
if (ap->handle == QSE_NULL)
{
sed->errnum = QSE_SED_ENOERR;
ap->path = path;
n = sed->e.out.fun (sed, QSE_SED_IO_OPEN, ap, QSE_NULL, 0);
if (n <= -1)
{
if (sed->errnum == QSE_SED_ENOERR)
SETERR1 (sed, QSE_SED_EIOFIL, path, plen, &cmd->loc);
else sed->errloc = cmd->loc;
return -1;
}
}
while (len > 0)
{
sed->errnum = QSE_SED_ENOERR;
n = sed->e.out.fun (
sed, QSE_SED_IO_WRITE, ap, (qse_char_t*)str, len);
if (n <= -1)
{
sed->e.out.fun (sed, QSE_SED_IO_CLOSE, ap, QSE_NULL, 0);
ap->handle = QSE_NULL;
if (sed->errnum == QSE_SED_ENOERR)
SETERR1 (sed, QSE_SED_EIOFIL, path, plen, &cmd->loc);
sed->errloc = cmd->loc;
return -1;
}
if (n == 0)
{
/* eof is returned on the write stream.
* it is also an error as it can't write any more */
sed->e.out.fun (sed, QSE_SED_IO_CLOSE, ap, QSE_NULL, 0);
ap->handle = QSE_NULL;
SETERR1 (sed, QSE_SED_EIOFIL, path, plen, &cmd->loc);
return -1;
}
len -= n;
}
return 0;
}
static int write_file (
qse_sed_t* sed, qse_sed_cmd_t* cmd, int first_line)
{
qse_ssize_t n;
qse_sed_io_arg_t arg;
#if defined(QSE_CHAR_IS_MCHAR)
qse_char_t buf[1024];
#else
qse_char_t buf[512];
#endif
arg.handle = QSE_NULL;
arg.path = cmd->u.file.ptr;
sed->errnum = QSE_SED_ENOERR;
n = sed->e.in.fun (sed, QSE_SED_IO_OPEN, &arg, QSE_NULL, 0);
if (n <= -1)
{
/*if (sed->errnum != QSE_SED_ENOERR)
* SETERR0 (sed, QSE_SED_EIOUSR, &cmd->loc);
*return -1;*/
/* it is ok if it is not able to open a file */
return 0;
}
while (1)
{
sed->errnum = QSE_SED_ENOERR;
n = sed->e.in.fun (
sed, QSE_SED_IO_READ, &arg, buf, QSE_COUNTOF(buf));
if (n <= -1)
{
sed->e.in.fun (sed, QSE_SED_IO_CLOSE, &arg, QSE_NULL, 0);
if (sed->errnum == QSE_SED_ENOERR)
SETERR1 (sed, QSE_SED_EIOFIL, cmd->u.file.ptr, cmd->u.file.len, &cmd->loc);
else sed->errloc = cmd->loc;
return -1;
}
if (n == 0) break;
if (first_line)
{
qse_size_t i;
for (i = 0; i < n; i++)
{
if (write_char (sed, buf[i]) <= -1) return -1;
/* TODO: support different line end convension */
if (buf[i] == QSE_T('\n')) goto done;
}
}
else
{
if (write_str (sed, buf, n) <= -1) return -1;
}
}
done:
sed->e.in.fun (sed, QSE_SED_IO_CLOSE, &arg, QSE_NULL, 0);
return 0;
}
static int link_append (qse_sed_t* sed, qse_sed_cmd_t* cmd)
{
if (sed->e.append.count < QSE_COUNTOF(sed->e.append.s))
{
/* link it to the static buffer if it is not full */
sed->e.append.s[sed->e.append.count++].cmd = cmd;
}
else
{
qse_sed_app_t* app;
/* otherwise, link it using a linked list */
app = qse_sed_allocmem(sed, QSE_SIZEOF(*app));
if (app == QSE_NULL)
{
SETERR0 (sed, QSE_SED_ENOMEM, &cmd->loc);
return -1;
}
app->cmd = cmd;
app->next = QSE_NULL;
if (sed->e.append.d.tail == QSE_NULL)
sed->e.append.d.head = app;
else
sed->e.append.d.tail->next = app;
sed->e.append.d.tail = app;
/*sed->e.append.count++; don't really care */
}
return 0;
}
static void free_appends (qse_sed_t* sed)
{
qse_sed_app_t* app = sed->e.append.d.head;
qse_sed_app_t* next;
while (app)
{
next = app->next;
qse_sed_freemem (sed, app);
app = next;
}
sed->e.append.d.head = QSE_NULL;
sed->e.append.d.tail = QSE_NULL;
sed->e.append.count = 0;
}
static int emit_append (qse_sed_t* sed, qse_sed_app_t* app)
{
switch (app->cmd->type)
{
case QSE_SED_CMD_APPEND:
return write_str (sed, app->cmd->u.text.ptr, app->cmd->u.text.len);
case QSE_SED_CMD_READ_FILE:
return write_file (sed, app->cmd, 0);
case QSE_SED_CMD_READ_FILELN:
return write_file (sed, app->cmd, 1);
default:
QSE_ASSERTX (
!"should never happen",
"app->cmd->type must be one of APPEND,READ_FILE,READ_FILELN"
);
SETERR0 (sed, QSE_SED_EINTERN, &app->cmd->loc);
return -1;
}
}
static int emit_appends (qse_sed_t* sed)
{
qse_sed_app_t* app;
qse_size_t i;
for (i = 0; i < sed->e.append.count; i++)
{
if (emit_append (sed, &sed->e.append.s[i]) <= -1) return -1;
}
app = sed->e.append.d.head;
while (app)
{
if (emit_append (sed, app) <= -1) return -1;
app = app->next;
}
return 0;
}
static const qse_char_t* trim_line (qse_sed_t* sed, qse_cstr_t* str)
{
const qse_char_t* lineterm;
str->ptr = QSE_STR_PTR(&sed->e.in.line);
str->len = QSE_STR_LEN(&sed->e.in.line);
/* TODO: support different line end convension */
if (str->len > 0 && str->ptr[str->len-1] == QSE_T('\n'))
{
str->len--;
if (str->len > 0 && str->ptr[str->len-1] == QSE_T('\r'))
{
lineterm = QSE_T("\r\n");
str->len--;
}
else
{
lineterm = QSE_T("\n");
}
}
else lineterm = QSE_NULL;
return lineterm;
}
static int do_subst (qse_sed_t* sed, qse_sed_cmd_t* cmd)
{
qse_cstr_t mat, pmat;
int opt = 0, repl = 0, n;
#if defined(USE_REX)
qse_rex_errnum_t errnum;
#endif
const qse_char_t* lineterm;
qse_cstr_t str, cur;
const qse_char_t* str_end;
qse_size_t m, i, max_count, sub_count;
QSE_ASSERT (cmd->type == QSE_SED_CMD_SUBSTITUTE);
qse_str_clear (&sed->e.txt.scratch);
#if defined(USE_REX)
if (cmd->u.subst.i) opt = QSE_REX_IGNORECASE;
#endif
lineterm = trim_line (sed, &str);
str_end = str.ptr + str.len;
cur = str;
sub_count = 0;
max_count = (cmd->u.subst.g)? 0: cmd->u.subst.occ;
pmat.ptr = QSE_NULL;
pmat.len = 0;
/* perform test when cur_ptr == str_end also because
* end of string($) needs to be tested */
while (cur.ptr <= str_end)
{
#ifndef USE_REX
qse_cstr_t submat[9];
QSE_MEMSET (submat, 0, QSE_SIZEOF(submat));
#endif
if (max_count == 0 || sub_count < max_count)
{
void* rex;
if (cmd->u.subst.rex == EMPTY_REX)
{
rex = sed->e.last_rex;
if (rex == QSE_NULL)
{
SETERR0 (sed, QSE_SED_ENPREX, &cmd->loc);
return -1;
}
}
else
{
rex = cmd->u.subst.rex;
sed->e.last_rex = rex;
}
#if defined(USE_REX)
n = qse_matchrex (
qse_sed_getmmgr(sed),
sed->opt.depth.rex.match,
rex, opt,
&str, &cur, &mat, &errnum
);
if (n <= -1)
{
SETERR0 (sed, QSE_SED_EREXMA, &cmd->loc);
return -1;
}
#else
n = matchtre (
sed, rex,
((str.ptr == cur.ptr)? opt: (opt | QSE_TRE_NOTBOL)),
&cur, &mat, submat, &cmd->loc
);
if (n <= -1) return -1;
#endif
}
else n = 0;
if (n == 0)
{
/* no more match found or substitution occurrence matched.
* copy the remaining portion and finish */
if (!cmd->u.subst.k)
{
/* copy the remaining portion */
m = qse_str_ncat (&sed->e.txt.scratch, cur.ptr, cur.len);
if (m == (qse_size_t)-1)
{
SETERR0 (sed, QSE_SED_ENOMEM, QSE_NULL);
return -1;
}
}
break;
}
if (mat.len == 0 &&
pmat.ptr && mat.ptr == pmat.ptr + pmat.len)
{
/* match length is 0 and the match is still at the
* end of the previous match */
goto skip_one_char;
}
if (max_count > 0 && sub_count + 1 != max_count)
{
/* substition occurrence specified.
* but this is not the occurrence yet */
if (!cmd->u.subst.k && cur.ptr < str_end)
{
/* copy the unmatched portion and the matched portion
* together as if the matched portion was not matched */
m = qse_str_ncat (
&sed->e.txt.scratch,
cur.ptr, mat.ptr - cur.ptr + mat.len
);
if (m == (qse_size_t)-1)
{
SETERR0 (sed, QSE_SED_ENOMEM, QSE_NULL);
return -1;
}
}
}
else
{
/* perform actual substitution */
repl = 1;
if (!cmd->u.subst.k && cur.ptr < str_end)
{
m = qse_str_ncat (
&sed->e.txt.scratch, cur.ptr, mat.ptr - cur.ptr
);
if (m == (qse_size_t)-1)
{
SETERR0 (sed, QSE_SED_ENOMEM, QSE_NULL);
return -1;
}
}
for (i = 0; i < cmd->u.subst.rpl.len; i++)
{
if ((i+1) < cmd->u.subst.rpl.len &&
cmd->u.subst.rpl.ptr[i] == QSE_T('\\'))
{
qse_char_t nc = cmd->u.subst.rpl.ptr[i+1];
#ifndef USE_REX
if (nc >= QSE_T('1') && nc <= QSE_T('9'))
{
int smi = nc - QSE_T('1');
m = qse_str_ncat (
&sed->e.txt.scratch,
submat[smi].ptr, submat[smi].len
);
}
else
{
#endif
/* Known speical characters have been escaped
* in get_subst(). so i don't call trans_escaped() here.
* It's a normal character that's escaped.
* For example, \1 is just 1. and \M is just M. */
m = qse_str_ccat (&sed->e.txt.scratch, nc);
#ifndef USE_REX
}
#endif
i++;
}
else if (cmd->u.subst.rpl.ptr[i] == QSE_T('&'))
{
m = qse_str_ncat (
&sed->e.txt.scratch,
mat.ptr, mat.len);
}
else
{
m = qse_str_ccat (
&sed->e.txt.scratch,
cmd->u.subst.rpl.ptr[i]);
}
if (m == (qse_size_t)-1)
{
SETERR0 (sed, QSE_SED_ENOMEM, QSE_NULL);
return -1;
}
}
}
sub_count++;
cur.len = cur.len - ((mat.ptr - cur.ptr) + mat.len);
cur.ptr = mat.ptr + mat.len;
pmat = mat;
if (mat.len == 0)
{
skip_one_char:
if (cur.ptr < str_end)
{
/* special treament is needed if the match length is 0 */
m = qse_str_ncat (&sed->e.txt.scratch, cur.ptr, 1);
if (m == (qse_size_t)-1)
{
SETERR0 (sed, QSE_SED_ENOMEM, QSE_NULL);
return -1;
}
}
cur.ptr++; cur.len--;
}
}
if (lineterm)
{
m = qse_str_cat (&sed->e.txt.scratch, lineterm);
if (m == (qse_size_t)-1)
{
SETERR0 (sed, QSE_SED_ENOMEM, QSE_NULL);
return -1;
}
}
qse_str_swap (&sed->e.in.line, &sed->e.txt.scratch);
if (repl)
{
if (cmd->u.subst.p)
{
n = write_str (
sed,
QSE_STR_PTR(&sed->e.in.line),
QSE_STR_LEN(&sed->e.in.line)
);
if (n <= -1) return -1;
}
if (cmd->u.subst.file.ptr)
{
n = write_str_to_file (
sed, cmd,
QSE_STR_PTR(&sed->e.in.line),
QSE_STR_LEN(&sed->e.in.line),
cmd->u.subst.file.ptr,
cmd->u.subst.file.len
);
if (n <= -1) return -1;
}
sed->e.subst_done = 1;
}
return 0;
}
static int split_into_fields_for_cut (
qse_sed_t* sed, qse_sed_cmd_t* cmd, const qse_cstr_t* str)
{
qse_size_t i, x = 0, xl = 0;
sed->e.cutf.delimited = 0;
sed->e.cutf.flds[x].ptr = str->ptr;
for (i = 0; i < str->len; )
{
int isdelim = 0;
qse_char_t c = str->ptr[i++];
if (cmd->u.cut.w)
{
/* the w option ignores the d specifier */
if (QSE_ISSPACE(c))
{
/* the w option assumes the f option */
while (i < str->len && QSE_ISSPACE(str->ptr[i])) i++;
isdelim = 1;
}
}
else
{
if (c == cmd->u.cut.delim[0])
{
if (cmd->u.cut.f)
{
/* fold consecutive delimiters */
while (i < str->len && str->ptr[i] == cmd->u.cut.delim[0]) i++;
}
isdelim = 1;
}
}
if (isdelim)
{
sed->e.cutf.flds[x++].len = xl;
if (x >= sed->e.cutf.cflds)
{
qse_cstr_t* tmp;
qse_size_t nsz;
nsz = sed->e.cutf.cflds;
if (nsz > 50000) nsz += 50000;
else nsz *= 2;
if (sed->e.cutf.flds == sed->e.cutf.sflds)
{
tmp = qse_sed_allocmem (sed, QSE_SIZEOF(*tmp) * nsz);
if (tmp == QSE_NULL) return -1;
QSE_MEMCPY (tmp, sed->e.cutf.flds, QSE_SIZEOF(*tmp) * sed->e.cutf.cflds);
}
else
{
tmp = qse_sed_reallocmem (sed, sed->e.cutf.flds, QSE_SIZEOF(*tmp) * nsz);
if (tmp == QSE_NULL) return -1;
}
sed->e.cutf.flds = tmp;
sed->e.cutf.cflds = nsz;
}
xl = 0;
sed->e.cutf.flds[x].ptr = &str->ptr[i];
/* mark that this line is delimited at least once */
sed->e.cutf.delimited = 1;
}
else xl++;
}
sed->e.cutf.flds[x].len = xl;
sed->e.cutf.nflds = ++x;
return 0;
}
static int do_cut (qse_sed_t* sed, qse_sed_cmd_t* cmd)
{
qse_sed_cut_sel_t* b;
const qse_char_t* lineterm;
qse_cstr_t str;
int out_state;
qse_str_clear (&sed->e.txt.scratch);
lineterm = trim_line (sed, &str);
if (str.len <= 0) goto done;
if (cmd->u.cut.fcount > 0)
{
if (split_into_fields_for_cut (sed, cmd, &str) <= -1) goto oops;
if (cmd->u.cut.d && !sed->e.cutf.delimited)
{
/* if the 'd' option is set and the line is not
* delimited by the input delimiter, delete the pattern
* space and finish the current cycle */
qse_str_clear (&sed->e.in.line);
return 0;
}
}
out_state = 0;
for (b = cmd->u.cut.fb; b; b = b->next)
{
qse_size_t i, s, e;
for (i = 0; i < b->len; i++)
{
if (b->range[i].id == QSE_SED_CUT_SEL_CHAR)
{
s = b->range[i].start;
e = b->range[i].end;
if (s <= e)
{
if (s < str.len)
{
if (e >= str.len) e = str.len - 1;
if ((out_state == 2 && qse_str_ccat (&sed->e.txt.scratch, cmd->u.cut.delim[1]) == (qse_size_t)-1) ||
qse_str_ncat (&sed->e.txt.scratch, &str.ptr[s], e - s + 1) == (qse_size_t)-1)
{
SETERR0 (sed, QSE_SED_ENOMEM, QSE_NULL);
goto oops;
}
out_state = 1;
}
}
else
{
if (e < str.len)
{
if (s >= str.len) s = str.len - 1;
if ((out_state == 2 && qse_str_ccat (&sed->e.txt.scratch, cmd->u.cut.delim[1]) == (qse_size_t)-1) ||
qse_str_nrcat (&sed->e.txt.scratch, &str.ptr[e], s - e + 1) == (qse_size_t)-1)
{
SETERR0 (sed, QSE_SED_ENOMEM, QSE_NULL);
goto oops;
}
out_state = 1;
}
}
}
else /*if (b->range[i].id == QSE_SED_CUT_SEL_FIELD)*/
{
s = b->range[i].start;
e = b->range[i].end;
if (s <= e)
{
if (s < str.len)
{
if (e >= sed->e.cutf.nflds) e = sed->e.cutf.nflds - 1;
while (s <= e)
{
if ((out_state > 0 && qse_str_ccat (&sed->e.txt.scratch, cmd->u.cut.delim[1]) == (qse_size_t)-1) ||
qse_str_ncat (&sed->e.txt.scratch, sed->e.cutf.flds[s].ptr, sed->e.cutf.flds[s].len) == (qse_size_t)-1)
{
SETERR0 (sed, QSE_SED_ENOMEM, QSE_NULL);
goto oops;
}
s++;
out_state = 2;
}
}
}
else
{
if (e < str.len)
{
if (s >= sed->e.cutf.nflds) s = sed->e.cutf.nflds - 1;
while (e <= s)
{
if ((out_state > 0 && qse_str_ccat (&sed->e.txt.scratch, cmd->u.cut.delim[1]) == (qse_size_t)-1) ||
qse_str_ncat (&sed->e.txt.scratch, sed->e.cutf.flds[e].ptr, sed->e.cutf.flds[e].len) == (qse_size_t)-1)
{
SETERR0 (sed, QSE_SED_ENOMEM, QSE_NULL);
goto oops;
}
e++;
out_state = 2;
}
}
}
}
}
}
done:
if (lineterm)
{
if (qse_str_cat (&sed->e.txt.scratch, lineterm) == (qse_size_t)-1)
{
SETERR0 (sed, QSE_SED_ENOMEM, QSE_NULL);
return -1;
}
}
qse_str_swap (&sed->e.in.line, &sed->e.txt.scratch);
return 1;
oops:
return -1;
}
static int match_a (qse_sed_t* sed, qse_sed_cmd_t* cmd, qse_sed_adr_t* a)
{
switch (a->type)
{
case QSE_SED_ADR_LINE:
return (sed->e.in.num == a->u.lno)? 1: 0;
case QSE_SED_ADR_REX:
{
#if defined(USE_REX)
int n;
qse_rex_errnum_t errnum;
qse_cstr_t match;
#endif
qse_cstr_t line;
void* rex;
QSE_ASSERT (a->u.rex != QSE_NULL);
line.ptr = QSE_STR_PTR(&sed->e.in.line);
line.len = QSE_STR_LEN(&sed->e.in.line);
if (line.len > 0 &&
line.ptr[line.len-1] == QSE_T('\n'))
{
line.len--;
if (line.len > 0 && line.ptr[line.len-1] == QSE_T('\r')) line.len--;
}
if (a->u.rex == EMPTY_REX)
{
rex = sed->e.last_rex;
if (rex == QSE_NULL)
{
SETERR0 (sed, QSE_SED_ENPREX, &cmd->loc);
return -1;
}
}
else
{
rex = a->u.rex;
sed->e.last_rex = rex;
}
#if defined(USE_REX)
n = qse_matchrex (
qse_sed_getmmgr(sed),
sed->opt.depth.rex.match,
rex, 0,
&line, &line,
&match, &errnum);
if (n <= -1)
{
SETERR0 (sed, QSE_SED_EREXMA, &cmd->loc);
return -1;
}
return n;
#else
return matchtre (sed, rex, 0, &line, QSE_NULL, QSE_NULL, &cmd->loc);
#endif
}
case QSE_SED_ADR_DOL:
{
qse_char_t c;
int n;
if (sed->e.in.xbuf_len < 0)
{
/* we know that we've reached eof as it has
* been done so previously */
return 1;
}
n = read_char (sed, &c);
if (n <= -1) return -1;
QSE_ASSERT (sed->e.in.xbuf_len == 0);
if (n == 0)
{
/* eof has been reached */
sed->e.in.xbuf_len--;
return 1;
}
else
{
sed->e.in.xbuf[sed->e.in.xbuf_len++] = c;
return 0;
}
}
case QSE_SED_ADR_RELLINE:
/* this address type should be seen only when matching
* the second address */
QSE_ASSERT (cmd->state.a1_matched && cmd->state.a1_match_line >= 1);
return (sed->e.in.num >= cmd->state.a1_match_line + a->u.lno)? 1: 0;
case QSE_SED_ADR_RELLINEM:
{
/* this address type should be seen only when matching
* the second address */
qse_size_t tmp;
QSE_ASSERT (cmd->state.a1_matched && cmd->state.a1_match_line >= 1);
QSE_ASSERT (a->u.lno > 0);
/* TODO: is it better to store this value some in the state
* not to calculate this every time?? */
tmp = (cmd->state.a1_match_line + a->u.lno) -
(cmd->state.a1_match_line % a->u.lno);
return (sed->e.in.num >= tmp)? 1: 0;
}
default:
QSE_ASSERT (a->type == QSE_SED_ADR_NONE);
return 1; /* match */
}
}
/* match an address against input.
* return -1 on error, 0 on no match, 1 on match. */
static int match_address (qse_sed_t* sed, qse_sed_cmd_t* cmd)
{
int n;
cmd->state.c_ready = 0;
if (cmd->a1.type == QSE_SED_ADR_NONE)
{
QSE_ASSERT (cmd->a2.type == QSE_SED_ADR_NONE);
cmd->state.c_ready = 1;
return 1;
}
else if (cmd->a2.type == QSE_SED_ADR_STEP)
{
QSE_ASSERT (cmd->a1.type == QSE_SED_ADR_LINE);
/* stepping address */
cmd->state.c_ready = 1;
if (sed->e.in.num < cmd->a1.u.lno) return 0;
QSE_ASSERT (cmd->a2.u.lno > 0);
if ((sed->e.in.num - cmd->a1.u.lno) % cmd->a2.u.lno == 0) return 1;
return 0;
}
else if (cmd->a2.type != QSE_SED_ADR_NONE)
{
/* two addresses */
if (cmd->state.a1_matched)
{
n = match_a (sed, cmd, &cmd->a2);
if (n <= -1) return -1;
if (n == 0)
{
if (cmd->a2.type == QSE_SED_ADR_LINE &&
sed->e.in.num > cmd->a2.u.lno)
{
/* This check is needed because matching of the second
* address could be skipped while it could match.
*
* Consider commands like '1,3p;2N'.
* '3' in '1,3p' is skipped because 'N' in '2N' triggers
* reading of the third line.
*
* Unfortunately, I can't handle a non-line-number
* second address like this. If 'abcxyz' is given as the third
* line for command '1,/abc/p;2N', 'abcxyz' is not matched
* against '/abc/'. so it doesn't exit the range.
*/
cmd->state.a1_matched = 0;
return 0;
}
/* still in the range. return match
* despite the actual mismatch */
return 1;
}
/* exit the range */
cmd->state.a1_matched = 0;
cmd->state.c_ready = 1;
return 1;
}
else
{
n = match_a (sed, cmd, &cmd->a1);
if (n <= -1) return -1;
if (n == 0)
{
return 0;
}
if (cmd->a2.type == QSE_SED_ADR_LINE &&
sed->e.in.num >= cmd->a2.u.lno)
{
/* the line number specified in the second
* address is equal to or less than the current
* line number. */
cmd->state.c_ready = 1;
}
else
{
/* mark that the first is matched so as to
* move on to the range test */
cmd->state.a1_matched = 1;
cmd->state.a1_match_line = sed->e.in.num;
}
return 1;
}
}
else
{
/* single address */
cmd->state.c_ready = 1;
n = match_a (sed, cmd, &cmd->a1);
return (n <= -1)? -1:
(n == 0)? 0: 1;
}
}
static qse_sed_cmd_t* exec_cmd (qse_sed_t* sed, qse_sed_cmd_t* cmd)
{
int n;
qse_sed_cmd_t* jumpto = QSE_NULL;
switch (cmd->type)
{
case QSE_SED_CMD_NOOP:
break;
case QSE_SED_CMD_QUIT:
jumpto = &sed->cmd.quit;
break;
case QSE_SED_CMD_QUIT_QUIET:
jumpto = &sed->cmd.quit_quiet;
break;
case QSE_SED_CMD_APPEND:
if (link_append (sed, cmd) <= -1) return QSE_NULL;
break;
case QSE_SED_CMD_INSERT:
n = write_str (sed,
cmd->u.text.ptr,
cmd->u.text.len
);
if (n <= -1) return QSE_NULL;
break;
case QSE_SED_CMD_CHANGE:
if (cmd->state.c_ready)
{
/* change the pattern space */
n = qse_str_ncpy (
&sed->e.in.line,
cmd->u.text.ptr,
cmd->u.text.len
);
if (n == (qse_size_t)-1)
{
SETERR0 (sed, QSE_SED_ENOMEM, QSE_NULL);
return QSE_NULL;
}
}
else
{
qse_str_clear (&sed->e.in.line);
}
/* move past the last command so as to start
* the next cycle */
jumpto = &sed->cmd.over;
break;
case QSE_SED_CMD_DELETE_FIRSTLN:
{
qse_char_t* nl;
/* delete the first line from the pattern space */
nl = qse_strxchr (
QSE_STR_PTR(&sed->e.in.line),
QSE_STR_LEN(&sed->e.in.line),
QSE_T('\n'));
if (nl)
{
/* if a new line is found. delete up to it */
qse_str_del (&sed->e.in.line, 0,
nl - QSE_STR_PTR(&sed->e.in.line) + 1);
if (QSE_STR_LEN(&sed->e.in.line) > 0)
{
/* if the pattern space is not empty,
* arrange to execute from the first
* command */
jumpto = &sed->cmd.again;
}
else
{
/* finish the current cycle */
jumpto = &sed->cmd.over;
}
break;
}
/* otherwise clear the entire pattern space below */
}
case QSE_SED_CMD_DELETE:
/* delete the pattern space */
qse_str_clear (&sed->e.in.line);
/* finish the current cycle */
jumpto = &sed->cmd.over;
break;
case QSE_SED_CMD_PRINT_LNNUM:
if (write_num (sed, sed->e.in.num, 10, 0) <= -1) return QSE_NULL;
if (write_char (sed, QSE_T('\n')) <= -1) return QSE_NULL;
break;
case QSE_SED_CMD_PRINT:
n = write_str (
sed,
QSE_STR_PTR(&sed->e.in.line),
QSE_STR_LEN(&sed->e.in.line)
);
if (n <= -1) return QSE_NULL;
break;
case QSE_SED_CMD_PRINT_FIRSTLN:
n = write_first_line (
sed,
QSE_STR_PTR(&sed->e.in.line),
QSE_STR_LEN(&sed->e.in.line)
);
if (n <= -1) return QSE_NULL;
break;
case QSE_SED_CMD_PRINT_CLEARLY:
if (sed->opt.lformatter)
{
n = sed->opt.lformatter (
sed,
QSE_STR_PTR(&sed->e.in.line),
QSE_STR_LEN(&sed->e.in.line),
write_char
);
}
else {
n = write_str_clearly (
sed,
QSE_STR_PTR(&sed->e.in.line),
QSE_STR_LEN(&sed->e.in.line)
);
}
if (n <= -1) return QSE_NULL;
break;
case QSE_SED_CMD_HOLD:
/* copy the pattern space to the hold space */
if (qse_str_ncpy (&sed->e.txt.hold,
QSE_STR_PTR(&sed->e.in.line),
QSE_STR_LEN(&sed->e.in.line)) == (qse_size_t)-1)
{
SETERR0 (sed, QSE_SED_ENOMEM, QSE_NULL);
return QSE_NULL;
}
break;
case QSE_SED_CMD_HOLD_APPEND:
/* append the pattern space to the hold space */
if (qse_str_ncat (&sed->e.txt.hold,
QSE_STR_PTR(&sed->e.in.line),
QSE_STR_LEN(&sed->e.in.line)) == (qse_size_t)-1)
{
SETERR0 (sed, QSE_SED_ENOMEM, QSE_NULL);
return QSE_NULL;
}
break;
case QSE_SED_CMD_RELEASE:
/* copy the hold space to the pattern space */
if (qse_str_ncpy (&sed->e.in.line,
QSE_STR_PTR(&sed->e.txt.hold),
QSE_STR_LEN(&sed->e.txt.hold)) == (qse_size_t)-1)
{
SETERR0 (sed, QSE_SED_ENOMEM, QSE_NULL);
return QSE_NULL;
}
break;
case QSE_SED_CMD_RELEASE_APPEND:
/* append the hold space to the pattern space */
if (qse_str_ncat (&sed->e.in.line,
QSE_STR_PTR(&sed->e.txt.hold),
QSE_STR_LEN(&sed->e.txt.hold)) == (qse_size_t)-1)
{
SETERR0 (sed, QSE_SED_ENOMEM, QSE_NULL);
return QSE_NULL;
}
break;
case QSE_SED_CMD_EXCHANGE:
/* exchange the pattern space and the hold space */
qse_str_swap (&sed->e.in.line, &sed->e.txt.hold);
break;
case QSE_SED_CMD_NEXT:
if (emit_output (sed, 0) <= -1) return QSE_NULL;
/* read the next line and fill the pattern space */
n = read_line (sed, 0);
if (n <= -1) return QSE_NULL;
if (n == 0)
{
/* EOF is reached. */
jumpto = &sed->cmd.over;
}
break;
case QSE_SED_CMD_NEXT_APPEND:
/* append the next line to the pattern space */
if (emit_output (sed, 1) <= -1) return QSE_NULL;
n = read_line (sed, 1);
if (n <= -1) return QSE_NULL;
if (n == 0)
{
/* EOF is reached. */
jumpto = &sed->cmd.over;
}
break;
case QSE_SED_CMD_READ_FILE:
if (link_append (sed, cmd) <= -1) return QSE_NULL;
break;
case QSE_SED_CMD_READ_FILELN:
if (link_append (sed, cmd) <= -1) return QSE_NULL;
break;
case QSE_SED_CMD_WRITE_FILE:
n = write_str_to_file (
sed, cmd,
QSE_STR_PTR(&sed->e.in.line),
QSE_STR_LEN(&sed->e.in.line),
cmd->u.file.ptr,
cmd->u.file.len
);
if (n <= -1) return QSE_NULL;
break;
case QSE_SED_CMD_WRITE_FILELN:
{
const qse_char_t* ptr = QSE_STR_PTR(&sed->e.in.line);
qse_size_t i, len = QSE_STR_LEN(&sed->e.in.line);
for (i = 0; i < len; i++)
{
/* TODO: handle different line end convension */
if (ptr[i] == QSE_T('\n'))
{
i++;
break;
}
}
n = write_str_to_file (
sed, cmd, ptr, i,
cmd->u.file.ptr,
cmd->u.file.len
);
if (n <= -1) return QSE_NULL;
break;
}
case QSE_SED_CMD_BRANCH_COND:
if (!sed->e.subst_done) break;
sed->e.subst_done = 0;
case QSE_SED_CMD_BRANCH:
QSE_ASSERT (cmd->u.branch.target != QSE_NULL);
jumpto = cmd->u.branch.target;
break;
case QSE_SED_CMD_SUBSTITUTE:
if (do_subst (sed, cmd) <= -1) return QSE_NULL;
break;
case QSE_SED_CMD_TRANSLATE:
{
qse_char_t* ptr = QSE_STR_PTR(&sed->e.in.line);
qse_size_t i, len = QSE_STR_LEN(&sed->e.in.line);
/* TODO: sort cmd->u.transset and do binary search
* when sorted, you can, before binary search, check
* if ptr[i] < transet[0] || ptr[i] > transset[transset_size-1].
* if so, it has not mathing translation */
/* TODO: support different line end convension */
if (len > 0 && ptr[len-1] == QSE_T('\n'))
{
len--;
if (len > 0 && ptr[len-1] == QSE_T('\r')) len--;
}
for (i = 0; i < len; i++)
{
const qse_char_t* tptr = cmd->u.transet.ptr;
qse_size_t j, tlen = cmd->u.transet.len;
for (j = 0; j < tlen; j += 2)
{
if (ptr[i] == tptr[j])
{
ptr[i] = tptr[j+1];
break;
}
}
}
break;
}
case QSE_SED_CMD_CLEAR_PATTERN:
/* clear pattern space */
qse_str_clear (&sed->e.in.line);
break;
case QSE_SED_CMD_CUT:
n = do_cut (sed, cmd);
if (n <= -1) return QSE_NULL;
if (n == 0) jumpto = &sed->cmd.over; /* finish the current cycle */
break;
}
if (jumpto == QSE_NULL) jumpto = cmd->state.next;
return jumpto;
}
static void close_outfile (qse_map_t* map, void* dptr, qse_size_t dlen)
{
qse_sed_io_arg_t* arg = dptr;
QSE_ASSERT (dlen == QSE_SIZEOF(*arg));
if (arg->handle)
{
qse_sed_t* sed = *(qse_sed_t**)QSE_XTN(map);
sed->e.out.fun (sed, QSE_SED_IO_CLOSE, arg, QSE_NULL, 0);
arg->handle = QSE_NULL;
}
}
static int init_command_block_for_exec (qse_sed_t* sed, qse_sed_cmd_blk_t* b)
{
qse_size_t i;
QSE_ASSERT (b->len <= QSE_COUNTOF(b->buf));
for (i = 0; i < b->len; i++)
{
qse_sed_cmd_t* c = &b->buf[i];
const qse_cstr_t* file = QSE_NULL;
/* clear states */
c->state.a1_matched = 0;
if (sed->opt.trait & QSE_SED_EXTENDEDADR)
{
if (c->a2.type == QSE_SED_ADR_REX &&
c->a1.type == QSE_SED_ADR_LINE &&
c->a1.u.lno <= 0)
{
/* special handling for 0,/regex/ */
c->state.a1_matched = 1;
c->state.a1_match_line = 0;
}
}
c->state.c_ready = 0;
/* let c point to the next command */
if (i + 1 >= b->len)
{
if (b->next == QSE_NULL || b->next->len <= 0)
c->state.next = &sed->cmd.over;
else
c->state.next = &b->next->buf[0];
}
else
{
c->state.next = &b->buf[i+1];
}
if ((c->type == QSE_SED_CMD_BRANCH ||
c->type == QSE_SED_CMD_BRANCH_COND) &&
c->u.branch.target == QSE_NULL)
{
/* resolve unresolved branch targets */
qse_map_pair_t* pair;
qse_cstr_t* lab = &c->u.branch.label;
if (lab->ptr == QSE_NULL)
{
/* arrange to branch past the last */
c->u.branch.target = &sed->cmd.over;
}
else
{
/* resolve the target */
pair = qse_map_search (
&sed->tmp.labs, lab->ptr, lab->len);
if (pair == QSE_NULL)
{
SETERR1 (
sed, QSE_SED_ELABNF,
lab->ptr, lab->len, &c->loc
);
return -1;
}
c->u.branch.target = QSE_MAP_VPTR(pair);
/* free resolved label name */
qse_sed_freemem (sed, lab->ptr);
lab->ptr = QSE_NULL;
lab->len = 0;
}
}
else
{
/* open output files in advance */
if (c->type == QSE_SED_CMD_WRITE_FILE ||
c->type == QSE_SED_CMD_WRITE_FILELN)
{
file = &c->u.file;
}
else if (c->type == QSE_SED_CMD_SUBSTITUTE &&
c->u.subst.file.ptr)
{
file = &c->u.subst.file;
}
if (file)
{
/* call this function to an open output file */
int n = write_str_to_file (
sed, c, QSE_NULL, 0,
file->ptr, file->len
);
if (n <= -1) return -1;
}
}
}
return 0;
}
static int init_all_commands_for_exec (qse_sed_t* sed)
{
qse_sed_cmd_blk_t* b;
for (b = &sed->cmd.fb; b != QSE_NULL; b = b->next)
{
if (init_command_block_for_exec (sed, b) <= -1) return -1;
}
return 0;
}
static int emit_output (qse_sed_t* sed, int skipline)
{
int n;
if (!skipline && !(sed->opt.trait & QSE_SED_QUIET))
{
/* write the pattern space */
n = write_str (sed,
QSE_STR_PTR(&sed->e.in.line),
QSE_STR_LEN(&sed->e.in.line));
if (n <= -1) return -1;
}
if (emit_appends (sed) <= -1) return -1;
free_appends (sed);
/* flush the output stream in case it's not flushed
* in write functions */
n = flush (sed);
if (n <= -1) return -1;
return 0;
}
int qse_sed_exec (qse_sed_t* sed, qse_sed_io_impl_t inf, qse_sed_io_impl_t outf)
{
qse_ssize_t n;
int ret = 0;
static qse_map_style_t style =
{
{
QSE_MAP_COPIER_INLINE,
QSE_MAP_COPIER_INLINE
},
{
QSE_MAP_FREEER_DEFAULT,
close_outfile
},
QSE_MAP_COMPER_DEFAULT,
QSE_MAP_KEEPER_DEFAULT
#if defined(QSE_MAP_AS_HTB)
,
QSE_MAP_SIZER_DEFAULT,
QSE_MAP_HASHER_DEFAULT
#endif
};
sed->e.haltreq = 0;
sed->e.last_rex = QSE_NULL;
sed->e.subst_done = 0;
free_appends (sed);
qse_str_clear (&sed->e.txt.scratch);
qse_str_clear (&sed->e.txt.hold);
if (qse_str_ccat (&sed->e.txt.hold, QSE_T('\n')) == (qse_size_t)-1)
{
SETERR0 (sed, QSE_SED_ENOMEM, QSE_NULL);
return -1;
}
sed->e.out.fun = outf;
sed->e.out.eof = 0;
sed->e.out.len = 0;
if (qse_map_init (
&sed->e.out.files, qse_sed_getmmgr(sed),
128, 70, QSE_SIZEOF(qse_char_t), 1) <= -1)
{
SETERR0 (sed, QSE_SED_ENOMEM, QSE_NULL);
return -1;
}
*(qse_sed_t**)QSE_XTN(&sed->e.out.files) = sed;
qse_map_setstyle (&sed->e.out.files, &style);
sed->e.in.fun = inf;
sed->e.in.eof = 0;
sed->e.in.len = 0;
sed->e.in.pos = 0;
sed->e.in.num = 0;
if (qse_str_init (&sed->e.in.line, qse_sed_getmmgr(sed), 256) <= -1)
{
qse_map_fini (&sed->e.out.files);
SETERR0 (sed, QSE_SED_ENOMEM, QSE_NULL);
return -1;
}
sed->errnum = QSE_SED_ENOERR;
sed->e.in.arg.path = QSE_NULL;
n = sed->e.in.fun (sed, QSE_SED_IO_OPEN, &sed->e.in.arg, QSE_NULL, 0);
if (n <= -1)
{
ret = -1;
if (sed->errnum == QSE_SED_ENOERR)
SETERR0 (sed, QSE_SED_EIOUSR, QSE_NULL);
goto done3;
}
sed->errnum = QSE_SED_ENOERR;
sed->e.out.arg.path = QSE_NULL;
n = sed->e.out.fun (sed, QSE_SED_IO_OPEN, &sed->e.out.arg, QSE_NULL, 0);
if (n <= -1)
{
ret = -1;
if (sed->errnum == QSE_SED_ENOERR)
SETERR0 (sed, QSE_SED_EIOUSR, QSE_NULL);
goto done2;
}
if (init_all_commands_for_exec (sed) <= -1)
{
ret = -1;
goto done;
}
while (!sed->e.haltreq)
{
#if defined(QSE_ENABLE_SED_TRACER)
if (sed->opt.tracer) sed->opt.tracer (sed, QSE_SED_TRACER_READ, QSE_NULL);
#endif
n = read_line (sed, 0);
if (n <= -1) { ret = -1; goto done; }
if (n == 0) goto done;
if (sed->cmd.fb.len > 0)
{
/* the first command block contains at least 1 command
* to execute. an empty script like ' ' has no commands,
* so we execute no commands */
qse_sed_cmd_t* c, * j;
again:
c = &sed->cmd.fb.buf[0];
while (c != &sed->cmd.over)
{
#if defined(QSE_ENABLE_SED_TRACER)
if (sed->opt.tracer) sed->opt.tracer (sed, QSE_SED_TRACER_MATCH, c);
#endif
n = match_address (sed, c);
if (n <= -1) { ret = -1; goto done; }
if (c->negated) n = !n;
if (n == 0)
{
c = c->state.next;
continue;
}
#if defined(QSE_ENABLE_SED_TRACER)
if (sed->opt.tracer) sed->opt.tracer (sed, QSE_SED_TRACER_EXEC, c);
#endif
j = exec_cmd (sed, c);
if (j == QSE_NULL) { ret = -1; goto done; }
if (j == &sed->cmd.quit_quiet) goto done;
if (j == &sed->cmd.quit)
{
if (emit_output (sed, 0) <= -1) ret = -1;
goto done;
}
if (sed->e.haltreq) goto done;
if (j == &sed->cmd.again) goto again;
/* go to the next command */
c = j;
}
}
#if defined(QSE_ENABLE_SED_TRACER)
if (sed->opt.tracer) sed->opt.tracer (sed, QSE_SED_TRACER_WRITE, QSE_NULL);
#endif
if (emit_output (sed, 0) <= -1) { ret = -1; goto done; }
}
done:
qse_map_clear (&sed->e.out.files);
sed->e.out.fun (sed, QSE_SED_IO_CLOSE, &sed->e.out.arg, QSE_NULL, 0);
done2:
sed->e.in.fun (sed, QSE_SED_IO_CLOSE, &sed->e.in.arg, QSE_NULL, 0);
done3:
qse_str_fini (&sed->e.in.line);
qse_map_fini (&sed->e.out.files);
return ret;
}
void qse_sed_halt (qse_sed_t* sed)
{
sed->e.haltreq = 1;
}
int qse_sed_ishalt (qse_sed_t* sed)
{
return sed->e.haltreq;
}
const qse_char_t* qse_sed_getcompid (qse_sed_t* sed)
{
return sed->src.cid? ((const qse_char_t*)(sed->src.cid + 1)): QSE_NULL;
}
const qse_char_t* qse_sed_setcompid (qse_sed_t* sed, const qse_char_t* id)
{
qse_sed_cid_t* cid;
qse_size_t len;
if (sed->src.cid == (qse_sed_cid_t*)&sed->src.unknown_cid)
{
/* if an error has occurred in a previously, you can't set it
* any more */
return (const qse_char_t*)(sed->src.cid + 1);
}
if (id == QSE_NULL) id = QSE_T("");
len = qse_strlen (id);
cid = qse_sed_allocmem(sed, QSE_SIZEOF(*cid) + ((len + 1) * QSE_SIZEOF(*id)));
if (cid == QSE_NULL)
{
/* mark that an error has occurred */
sed->src.unknown_cid.buf[0] = QSE_T('\0');
cid = (qse_sed_cid_t*)&sed->src.unknown_cid;
}
else
{
qse_strcpy ((qse_char_t*)(cid + 1), id);
}
cid->next = sed->src.cid;
sed->src.cid = cid;
return (const qse_char_t*)(cid + 1);
}
qse_size_t qse_sed_getlinenum (qse_sed_t* sed)
{
return sed->e.in.num;
}
void qse_sed_setlinenum (qse_sed_t* sed, qse_size_t num)
{
sed->e.in.num = num;
}
qse_sed_ecb_t* qse_sed_popecb (qse_sed_t* sed)
{
qse_sed_ecb_t* top = sed->ecb;
if (top) sed->ecb = top->next;
return top;
}
void qse_sed_pushecb (qse_sed_t* sed, qse_sed_ecb_t* ecb)
{
ecb->next = sed->ecb;
sed->ecb = ecb;
}
void* qse_sed_allocmem (qse_sed_t* sed, qse_size_t size)
{
void* ptr = QSE_MMGR_ALLOC(qse_sed_getmmgr(sed), size);
if (ptr == QSE_NULL) qse_sed_seterrnum (sed, QSE_SED_ENOMEM, QSE_NULL);
return ptr;
}
void* qse_sed_callocmem (qse_sed_t* sed, qse_size_t size)
{
void* ptr = QSE_MMGR_ALLOC(qse_sed_getmmgr(sed), size);
if (ptr) QSE_MEMSET (ptr, 0, size);
else qse_sed_seterrnum (sed, QSE_SED_ENOMEM, QSE_NULL);
return ptr;
}
void* qse_sed_reallocmem (qse_sed_t* sed, void* ptr, qse_size_t size)
{
void* nptr = QSE_MMGR_REALLOC(qse_sed_getmmgr(sed), ptr, size);
if (nptr == QSE_NULL) qse_sed_seterrnum (sed, QSE_SED_ENOMEM, QSE_NULL);
return nptr;
}
void qse_sed_freemem (qse_sed_t* sed, void* ptr)
{
QSE_MMGR_FREE (qse_sed_getmmgr(sed), ptr);
}
void qse_sed_getspace (qse_sed_t* sed, qse_sed_space_t space, qse_cstr_t* str)
{
switch (space)
{
case QSE_SED_SPACE_HOLD:
str->ptr = QSE_STR_PTR(&sed->e.txt.hold);
str->len = QSE_STR_LEN(&sed->e.txt.hold);
break;
case QSE_SED_SPACE_PATTERN:
str->ptr = QSE_STR_PTR(&sed->e.in.line);
str->len = QSE_STR_LEN(&sed->e.in.line);
break;
}
}