1689 lines
32 KiB
C
1689 lines
32 KiB
C
/*
|
|
* $Id$
|
|
*
|
|
Copyright 2006-2009 Chung, Hyung-Hwan.
|
|
|
|
Licensed under the Apache License, Version 2.0 (the "License");
|
|
you may not use this file except in compliance with the License.
|
|
You may obtain a copy of the License at
|
|
|
|
http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
Unless required by applicable law or agreed to in writing, software
|
|
distributed under the License is distributed on an "AS IS" BASIS,
|
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
See the License for the specific language governing permissions and
|
|
limitations under the License.
|
|
*/
|
|
|
|
#include "sed.h"
|
|
#include "../cmn/mem.h"
|
|
#include <qse/cmn/rex.h>
|
|
|
|
/* TODO: delete stdio.h */
|
|
#include <qse/utl/stdio.h>
|
|
|
|
QSE_IMPLEMENT_COMMON_FUNCTIONS (sed)
|
|
|
|
static void free_command (qse_sed_t* sed, qse_sed_cmd_t* cmd);
|
|
|
|
qse_sed_t* qse_sed_open (qse_mmgr_t* mmgr, qse_size_t xtn, qse_sed_prm_t* prm)
|
|
{
|
|
qse_sed_t* sed;
|
|
|
|
if (mmgr == QSE_NULL)
|
|
{
|
|
mmgr = QSE_MMGR_GETDFL();
|
|
|
|
QSE_ASSERTX (mmgr != QSE_NULL,
|
|
"Set the memory manager with QSE_MMGR_SETDFL()");
|
|
|
|
if (mmgr == QSE_NULL) return QSE_NULL;
|
|
}
|
|
|
|
sed = (qse_sed_t*) QSE_MMGR_ALLOC (mmgr, QSE_SIZEOF(qse_sed_t) + xtn);
|
|
if (sed == QSE_NULL) return QSE_NULL;
|
|
|
|
if (qse_sed_init (sed, mmgr, prm) == QSE_NULL)
|
|
{
|
|
QSE_MMGR_FREE (sed->mmgr, sed);
|
|
return QSE_NULL;
|
|
}
|
|
|
|
return sed;
|
|
}
|
|
|
|
void qse_sed_close (qse_sed_t* sed)
|
|
{
|
|
qse_sed_fini (sed);
|
|
QSE_MMGR_FREE (sed->mmgr, sed);
|
|
}
|
|
|
|
qse_sed_t* qse_sed_init (qse_sed_t* sed, qse_mmgr_t* mmgr, qse_sed_prm_t* prm)
|
|
{
|
|
QSE_MEMSET (sed, 0, sizeof(*sed));
|
|
sed->mmgr = mmgr;
|
|
|
|
if (qse_str_init (&sed->rexbuf, mmgr, 0) == QSE_NULL)
|
|
{
|
|
sed->errnum = QSE_SED_ENOMEM;
|
|
return QSE_NULL;
|
|
}
|
|
|
|
if (qse_map_init (&sed->labs, mmgr, 128, 70) == QSE_NULL)
|
|
{
|
|
qse_str_fini (&sed->rexbuf);
|
|
sed->errnum = QSE_SED_ENOMEM;
|
|
return QSE_NULL;
|
|
}
|
|
qse_map_setcopier (&sed->labs, QSE_MAP_KEY, QSE_MAP_COPIER_INLINE);
|
|
qse_map_setscale (&sed->labs, QSE_MAP_KEY, QSE_SIZEOF(qse_char_t));
|
|
|
|
/* TODO: use different data structure... */
|
|
sed->cmd.buf = QSE_MMGR_ALLOC (
|
|
sed->mmgr, QSE_SIZEOF(qse_sed_cmd_t) * 1000);
|
|
if (sed->cmd.buf == QSE_NULL)
|
|
{
|
|
qse_map_fini (&sed->labs);
|
|
qse_str_fini (&sed->rexbuf);
|
|
return QSE_NULL;
|
|
}
|
|
sed->cmd.cur = sed->cmd.buf;
|
|
sed->cmd.end = sed->cmd.buf + 1000 - 1;
|
|
|
|
if (qse_lda_init (&sed->text_appended, mmgr, 32) == QSE_NULL)
|
|
{
|
|
QSE_MMGR_FREE (sed->mmgr, sed->cmd.buf);
|
|
qse_map_fini (&sed->labs);
|
|
qse_str_fini (&sed->rexbuf);
|
|
return QSE_NULL;
|
|
}
|
|
|
|
/* build a character classifier from the primitive functions */
|
|
sed->ccls.is = (qse_ccls_is_t) prm->isccls;
|
|
sed->ccls.to = (qse_ccls_to_t) prm->toccls;
|
|
sed->ccls.data = sed;
|
|
|
|
return sed;
|
|
}
|
|
|
|
void qse_sed_fini (qse_sed_t* sed)
|
|
{
|
|
qse_lda_fini (&sed->text_appended);
|
|
|
|
/* TODO: use different data sturect -> look at qse_sed_init */
|
|
qse_sed_cmd_t* c;
|
|
for (c = sed->cmd.buf; c != sed->cmd.cur; c++)
|
|
{
|
|
free_command (sed, c);
|
|
}
|
|
QSE_MMGR_FREE (sed->mmgr, sed->cmd.buf);
|
|
|
|
qse_map_fini (&sed->labs);
|
|
qse_str_fini (&sed->rexbuf);
|
|
}
|
|
|
|
const qse_char_t* qse_sed_geterrmsg (qse_sed_t* sed)
|
|
{
|
|
static const qse_char_t* errmsg[] =
|
|
{
|
|
QSE_T("no error"),
|
|
QSE_T("out of memory"),
|
|
QSE_T("too much text"),
|
|
QSE_T("command not recognized"),
|
|
QSE_T("command missing"),
|
|
QSE_T("command garbled"),
|
|
QSE_T("regular expression build error"),
|
|
QSE_T("regular expression match error"),
|
|
QSE_T("address 1 prohibited"),
|
|
QSE_T("address 2 prohibited"),
|
|
QSE_T("a new line expected"),
|
|
QSE_T("a backslash expected"),
|
|
QSE_T("a backslash used as a delimiter"),
|
|
QSE_T("garbage after a backslash"),
|
|
QSE_T("a semicolon expected"),
|
|
QSE_T("label name too long"),
|
|
QSE_T("empty label name"),
|
|
QSE_T("duplicate label name"),
|
|
QSE_T("empty file name"),
|
|
QSE_T("illegal file name"),
|
|
QSE_T("command not terminated properly"),
|
|
QSE_T("strings in translation set not the same length"),
|
|
QSE_T("group brackets not balanced"),
|
|
QSE_T("group nesting too deep"),
|
|
QSE_T("multiple occurrence specifier"),
|
|
QSE_T("occurrence specifier is zero"),
|
|
QSE_T("occurrence specifier too large"),
|
|
QSE_T("error returned by user io handler")
|
|
};
|
|
|
|
return (sed->errnum > 0 && sed->errnum < QSE_COUNTOF(errmsg))?
|
|
errmsg[sed->errnum]: QSE_T("unknown error");
|
|
}
|
|
|
|
void qse_sed_setoption (qse_sed_t* sed, int option)
|
|
{
|
|
sed->option = option;
|
|
}
|
|
|
|
int qse_sed_getoption (qse_sed_t* sed)
|
|
{
|
|
return sed->option;
|
|
}
|
|
|
|
qse_ccls_t* qse_sed_getccls (qse_sed_t* sed)
|
|
{
|
|
return &sed->ccls;
|
|
}
|
|
|
|
/* get the current charanter of the source code */
|
|
#define CURSC(sed) \
|
|
(((sed)->src.cur < (sed)->src.end)? (*(sed)->src.cur): QSE_CHAR_EOF)
|
|
/* advance the current pointer of the source code */
|
|
#define ADVSCP(sed) ((sed)->src.cur++)
|
|
#define NXTSC(sed) \
|
|
(((++(sed)->src.cur) < (sed)->src.end)? (*(sed)->src.cur): QSE_CHAR_EOF)
|
|
|
|
/* check if c is a space character */
|
|
#define IS_SPACE(c) (c == QSE_T(' ') || c == QSE_T('\t'))
|
|
#define IS_LINTERM(c) (c == QSE_T('\n') || c == QSE_T('\r'))
|
|
#define IS_WSPACE(c) (IS_SPACE(c) || IS_LINTERM(c))
|
|
|
|
/* check if c is a label terminator excluding a space character */
|
|
#define IS_CMDTERM(c) \
|
|
(c == QSE_CHAR_EOF || c == QSE_T('#') || \
|
|
c == QSE_T(';') || IS_LINTERM(c))
|
|
|
|
static void free_address (qse_sed_t* sed, qse_sed_cmd_t* cmd)
|
|
{
|
|
if (cmd->a2.type == QSE_SED_A_REX)
|
|
{
|
|
QSE_ASSERT (cmd->a2.u.rex != QSE_NULL);
|
|
qse_freerex (sed->mmgr, cmd->a2.u.rex);
|
|
cmd->a2.type = QSE_SED_A_NONE;
|
|
}
|
|
if (cmd->a1.type == QSE_SED_A_REX)
|
|
{
|
|
QSE_ASSERT (cmd->a1.u.rex != QSE_NULL);
|
|
qse_freerex (sed->mmgr, cmd->a1.u.rex);
|
|
cmd->a1.type = QSE_SED_A_NONE;
|
|
}
|
|
}
|
|
|
|
static void free_command (qse_sed_t* sed, qse_sed_cmd_t* cmd)
|
|
{
|
|
free_address (sed, cmd);
|
|
|
|
switch (cmd->type)
|
|
{
|
|
case QSE_SED_CMD_A:
|
|
case QSE_SED_CMD_C:
|
|
case QSE_SED_CMD_I:
|
|
if (cmd->u.text.ptr != QSE_NULL)
|
|
QSE_MMGR_FREE (sed->mmgr, cmd->u.text.ptr);
|
|
break;
|
|
|
|
case QSE_SED_CMD_B:
|
|
case QSE_SED_CMD_T:
|
|
if (cmd->u.branch.label.ptr != QSE_NULL)
|
|
QSE_MMGR_FREE (sed->mmgr, cmd->u.branch.label.ptr);
|
|
break;
|
|
|
|
case QSE_SED_CMD_S:
|
|
if (cmd->u.subst.file.ptr != QSE_NULL)
|
|
QSE_MMGR_FREE (sed->mmgr, cmd->u.subst.file.ptr);
|
|
if (cmd->u.subst.rpl.ptr != QSE_NULL)
|
|
QSE_MMGR_FREE (sed->mmgr, cmd->u.subst.rpl.ptr);
|
|
if (cmd->u.subst.rex != QSE_NULL)
|
|
qse_freerex (sed->mmgr, cmd->u.subst.rex);
|
|
break;
|
|
|
|
case QSE_SED_CMD_Y:
|
|
if (cmd->u.transet.ptr != QSE_NULL)
|
|
QSE_MMGR_FREE (sed->mmgr, cmd->u.transet.ptr);
|
|
break;
|
|
|
|
case QSE_SED_CMD_R:
|
|
case QSE_SED_CMD_RR:
|
|
case QSE_SED_CMD_W:
|
|
case QSE_SED_CMD_WW:
|
|
if (cmd->u.file.ptr != QSE_NULL)
|
|
QSE_MMGR_FREE (sed->mmgr, cmd->u.file.ptr);
|
|
break;
|
|
}
|
|
}
|
|
|
|
static void* compile_rex (qse_sed_t* sed, qse_char_t rxend)
|
|
{
|
|
void* code;
|
|
qse_cint_t c;
|
|
|
|
qse_str_clear (&sed->rexbuf);
|
|
|
|
for (;;)
|
|
{
|
|
c = NXTSC (sed);
|
|
if (c == QSE_CHAR_EOF || c == QSE_T('\n'))
|
|
{
|
|
sed->errnum = QSE_SED_ETMTXT;
|
|
return QSE_NULL;
|
|
}
|
|
|
|
if (c == rxend) break;
|
|
|
|
if (c == QSE_T('\\'))
|
|
{
|
|
ADVSCP (sed);
|
|
c = CURSC (sed);
|
|
if (c == QSE_CHAR_EOF || c == QSE_T('\n'))
|
|
{
|
|
sed->errnum = QSE_SED_ETMTXT;
|
|
return QSE_NULL;
|
|
}
|
|
|
|
if (c == QSE_T('n')) c = QSE_T('\n');
|
|
// TODO: support more escaped characters??
|
|
}
|
|
|
|
if (qse_str_ccat (&sed->rexbuf, c) == (qse_size_t)-1)
|
|
{
|
|
sed->errnum = QSE_SED_ENOMEM;
|
|
return QSE_NULL;
|
|
}
|
|
}
|
|
|
|
/* TODO: maximum depth - optionize the second parameter */
|
|
qse_printf (QSE_T("rexbuff=>[%.*s]\n"),
|
|
(int)QSE_STR_LEN(&sed->rexbuf),
|
|
QSE_STR_PTR(&sed->rexbuf));
|
|
|
|
code = qse_buildrex (
|
|
sed->mmgr, 0,
|
|
QSE_STR_PTR(&sed->rexbuf),
|
|
QSE_STR_LEN(&sed->rexbuf),
|
|
QSE_NULL
|
|
);
|
|
if (code == QSE_NULL)
|
|
{
|
|
sed->errnum = QSE_SED_EREXBL;
|
|
return QSE_NULL;
|
|
}
|
|
|
|
sed->lastrex = code;
|
|
return code;
|
|
}
|
|
|
|
static qse_sed_a_t* address (qse_sed_t* sed, qse_sed_a_t* a)
|
|
{
|
|
qse_cint_t c;
|
|
|
|
c = CURSC (sed);
|
|
if (c == QSE_T('$'))
|
|
{
|
|
a->type = QSE_SED_A_DOL;
|
|
ADVSCP (sed);
|
|
}
|
|
else if (c == QSE_T('/'))
|
|
{
|
|
if (compile_rex (sed, c) == QSE_NULL)
|
|
return QSE_NULL;
|
|
|
|
a->u.rex = sed->lastrex;
|
|
a->type = QSE_SED_A_REX;
|
|
ADVSCP (sed);
|
|
}
|
|
else if (c >= QSE_T('0') && c <= QSE_T('9'))
|
|
{
|
|
qse_sed_line_t lno = 0;
|
|
do
|
|
{
|
|
lno = lno * 10 + c - QSE_T('0');
|
|
ADVSCP (sed);
|
|
}
|
|
while ((c = CURSC(sed)) >= QSE_T('0') && c <= QSE_T('9'));
|
|
|
|
/* line number 0 is illegal */
|
|
if (lno == 0) return QSE_NULL;
|
|
|
|
a->type = QSE_SED_A_LINE;
|
|
a->u.line = lno;
|
|
}
|
|
else if (c == QSE_T('\\'))
|
|
{
|
|
c = NXTSC (sed);
|
|
if (c == QSE_CHAR_EOF || c == QSE_T('\n'))
|
|
{
|
|
/* TODO: change error code -
|
|
* unterminated address regular expression */
|
|
sed->errnum = QSE_SED_ETMTXT;
|
|
return QSE_NULL;
|
|
}
|
|
|
|
if (compile_rex (sed, c) == QSE_NULL)
|
|
return QSE_NULL;
|
|
|
|
a->u.rex = sed->lastrex;
|
|
a->type = QSE_SED_A_REX;
|
|
ADVSCP (sed);
|
|
}
|
|
else
|
|
{
|
|
a->type = QSE_SED_A_NONE;
|
|
}
|
|
|
|
return a;
|
|
}
|
|
|
|
|
|
/* get the text for the 'a', 'i', and 'c' commands.
|
|
* POSIX:
|
|
* The argument text shall consist of one or more lines. Each embedded
|
|
* <newline> in the text shall be preceded by a backslash. Other backslashes
|
|
* in text shall be removed, and the following character shall be treated
|
|
* literally. */
|
|
static int get_text (qse_sed_t* sed, qse_sed_cmd_t* cmd)
|
|
{
|
|
#define ADD(sed,str,c,errlabel) \
|
|
do { \
|
|
if (qse_str_ccat (str, c) == (qse_size_t)-1) \
|
|
{ \
|
|
sed->errnum = QSE_SED_ENOMEM; \
|
|
goto errlabel; \
|
|
} \
|
|
} while (0)
|
|
|
|
qse_cint_t c;
|
|
qse_str_t* t = QSE_NULL;
|
|
|
|
t = qse_str_open (sed->mmgr, 0, 128);
|
|
if (t == QSE_NULL) goto oops;
|
|
|
|
do
|
|
{
|
|
c = CURSC (sed);
|
|
|
|
if (sed->option & QSE_SED_STRIPLS)
|
|
{
|
|
/* get the first non-space character */
|
|
while (IS_SPACE(c)) c = NXTSC (sed);
|
|
}
|
|
|
|
while (c != QSE_CHAR_EOF)
|
|
{
|
|
int nl = 0;
|
|
|
|
if (c == QSE_T('\\'))
|
|
{
|
|
c = NXTSC (sed);
|
|
if (c == QSE_CHAR_EOF)
|
|
{
|
|
if (sed->option & QSE_SED_KEEPTBS)
|
|
ADD (sed, t, QSE_T('\\'), oops);
|
|
|
|
break;
|
|
}
|
|
}
|
|
else if (c == QSE_T('\n')) nl = 1;
|
|
|
|
ADD (sed, t, c, oops);
|
|
|
|
if (c == QSE_T('\n'))
|
|
{
|
|
ADVSCP (sed);
|
|
if (nl) goto done;
|
|
break;
|
|
}
|
|
|
|
c = NXTSC (sed);
|
|
}
|
|
}
|
|
while (c != QSE_CHAR_EOF);
|
|
|
|
done:
|
|
if ((sed->option & QSE_SED_ENSURENL) && c != QSE_T('\n'))
|
|
{
|
|
ADD (sed, t, QSE_T('\n'), oops);
|
|
}
|
|
|
|
qse_str_yield (t, &cmd->u.text, 0);
|
|
qse_str_close (t);
|
|
return 0;
|
|
|
|
oops:
|
|
if (t != QSE_NULL) qse_str_close (t);
|
|
return -1;
|
|
|
|
#undef ADD
|
|
}
|
|
|
|
static int get_label (qse_sed_t* sed, qse_sed_cmd_t* cmd)
|
|
{
|
|
qse_cint_t c;
|
|
qse_str_t* t = QSE_NULL; /* TODO: move this buffer to sed */
|
|
|
|
/* skip white spaces */
|
|
c = CURSC (sed);
|
|
while (IS_SPACE(c)) c = NXTSC (sed);
|
|
|
|
if (IS_CMDTERM(c))
|
|
{
|
|
/* label name is empty */
|
|
sed->errnum = QSE_SED_ELABEM;
|
|
goto oops;
|
|
}
|
|
|
|
/* TODO: change t to qse_str_t t; and ues qse_str_yield(t) to remember
|
|
* branch text - in that case make '\0' an illegal character for the label
|
|
* name or can remember the length for the text for '\0' to be legal */
|
|
t = qse_str_open (sed->mmgr, 0, 32);
|
|
if (t == QSE_NULL)
|
|
{
|
|
sed->errnum = QSE_SED_ENOMEM;
|
|
goto oops;
|
|
}
|
|
|
|
do
|
|
{
|
|
if (qse_str_ccat (t, c) == (qse_size_t)-1)
|
|
{
|
|
sed->errnum = QSE_SED_ENOMEM;
|
|
goto oops;
|
|
}
|
|
c = NXTSC (sed);
|
|
}
|
|
while (!IS_CMDTERM(c) && !IS_SPACE(c)) ;
|
|
|
|
if (qse_map_search (
|
|
&sed->labs, QSE_STR_PTR(t), QSE_STR_LEN(t)) != QSE_NULL)
|
|
{
|
|
sed->errnum = QSE_SED_ELABDU;
|
|
goto oops;
|
|
}
|
|
|
|
if (qse_map_insert (
|
|
&sed->labs, QSE_STR_PTR(t), QSE_STR_LEN(t), cmd, 0) == QSE_NULL)
|
|
{
|
|
sed->errnum = QSE_SED_ENOMEM;
|
|
goto oops;;
|
|
}
|
|
|
|
/* the label can be followed by a command on the same line without
|
|
* a semicolon as in ':label p'. */
|
|
if (c != QSE_T('#') && c != QSE_CHAR_EOF) ADVSCP (sed);
|
|
|
|
qse_str_close (t);
|
|
return 0;
|
|
|
|
oops:
|
|
if (t != QSE_NULL) qse_str_close (t);
|
|
return -1;
|
|
}
|
|
|
|
static int terminate_command (qse_sed_t* sed)
|
|
{
|
|
qse_cint_t c;
|
|
|
|
c = CURSC (sed);
|
|
while (IS_SPACE(c)) c = NXTSC (sed);
|
|
if (!IS_CMDTERM(c))
|
|
{
|
|
sed->errnum = QSE_SED_ESCEXP;
|
|
return -1;
|
|
}
|
|
|
|
/* if the target is terminated by #, it should let the caller
|
|
* to skip the comment text. so don't read in the next character */
|
|
if (c != QSE_T('#') && c != QSE_CHAR_EOF) ADVSCP (sed);
|
|
return 0;
|
|
}
|
|
|
|
static int get_branch_target (qse_sed_t* sed, qse_sed_cmd_t* cmd)
|
|
{
|
|
qse_cint_t c;
|
|
qse_str_t* t = QSE_NULL;
|
|
qse_map_pair_t* pair;
|
|
|
|
/* skip white spaces */
|
|
c = CURSC(sed);
|
|
while (IS_SPACE(c)) c = NXTSC (sed);
|
|
|
|
if (IS_CMDTERM(c))
|
|
{
|
|
/* no branch target is given -
|
|
* a branch command without a target should cause
|
|
* sed to jump to the end of a script.
|
|
*/
|
|
cmd->u.branch.label.ptr = QSE_NULL;
|
|
cmd->u.branch.label.len = 0;
|
|
cmd->u.branch.target = QSE_NULL;
|
|
return terminate_command (sed);
|
|
}
|
|
|
|
t = qse_str_open (sed->mmgr, 0, 32);
|
|
if (t == QSE_NULL)
|
|
{
|
|
sed->errnum = QSE_SED_ENOMEM;
|
|
goto oops;
|
|
}
|
|
|
|
do
|
|
{
|
|
if (qse_str_ccat (t, c) == (qse_size_t)-1)
|
|
{
|
|
sed->errnum = QSE_SED_ENOMEM;
|
|
goto oops;
|
|
}
|
|
|
|
c = NXTSC (sed);
|
|
}
|
|
while (!IS_CMDTERM(c) && !IS_SPACE(c));
|
|
|
|
if (terminate_command (sed) <= -1) goto oops;
|
|
|
|
pair = qse_map_search (&sed->labs, QSE_STR_PTR(t), QSE_STR_LEN(t));
|
|
if (pair == QSE_NULL)
|
|
{
|
|
/* label not resolved yet */
|
|
qse_str_yield (t, &cmd->u.branch.label, 0);
|
|
cmd->u.branch.target = QSE_NULL;
|
|
}
|
|
else
|
|
{
|
|
cmd->u.branch.label.ptr = QSE_NULL;
|
|
cmd->u.branch.label.len = 0;
|
|
cmd->u.branch.target = QSE_MAP_VPTR(pair);
|
|
}
|
|
|
|
qse_str_close (t);
|
|
return 0;
|
|
|
|
oops:
|
|
if (t != QSE_NULL) qse_str_close (t);
|
|
return -1;
|
|
}
|
|
|
|
static int get_file (qse_sed_t* sed, qse_xstr_t* xstr)
|
|
{
|
|
qse_cint_t c;
|
|
qse_str_t* t = QSE_NULL;
|
|
qse_size_t trailing_spaces = 0;
|
|
|
|
/* skip white spaces */
|
|
c = CURSC(sed);
|
|
while (IS_SPACE(c)) c = NXTSC (sed);
|
|
|
|
if (IS_CMDTERM(c))
|
|
{
|
|
sed->errnum = QSE_SED_EFILEM;
|
|
goto oops;
|
|
}
|
|
|
|
t = qse_str_open (sed->mmgr, 0, 32);
|
|
if (t == QSE_NULL)
|
|
{
|
|
sed->errnum = QSE_SED_ENOMEM;
|
|
goto oops;
|
|
}
|
|
|
|
do
|
|
{
|
|
if (c == QSE_T('\0'))
|
|
{
|
|
/* the file name should not contain '\0' */
|
|
sed->errnum = QSE_SED_EFILIL;
|
|
goto oops;
|
|
}
|
|
|
|
if (IS_SPACE(c)) trailing_spaces++;
|
|
else trailing_spaces = 0;
|
|
|
|
if (c == QSE_T('\\'))
|
|
{
|
|
c = NXTSC (sed);
|
|
if (c == QSE_T('\0') ||
|
|
c == QSE_CHAR_EOF ||
|
|
IS_LINTERM(c))
|
|
{
|
|
sed->errnum = QSE_SED_EFILIL;
|
|
goto oops;
|
|
}
|
|
|
|
if (c == QSE_T('n')) c = QSE_T('\n');
|
|
}
|
|
|
|
if (qse_str_ccat (t, c) == (qse_size_t)-1)
|
|
{
|
|
sed->errnum = QSE_SED_ENOMEM;
|
|
goto oops;
|
|
}
|
|
|
|
c = NXTSC (sed);
|
|
}
|
|
while (!IS_CMDTERM(c));
|
|
|
|
if (terminate_command (sed) <= -1) goto oops;
|
|
|
|
if (trailing_spaces > 0)
|
|
{
|
|
qse_str_setlen (t, QSE_STR_LEN(t) - trailing_spaces);
|
|
}
|
|
|
|
qse_str_yield (t, xstr, 0);
|
|
qse_str_close (t);
|
|
return 0;
|
|
|
|
oops:
|
|
if (t != QSE_NULL) qse_str_close (t);
|
|
return -1;
|
|
}
|
|
|
|
static int get_subst (qse_sed_t* sed, qse_sed_cmd_t* cmd)
|
|
{
|
|
qse_cint_t c, delim;
|
|
qse_str_t* t[2] = { QSE_NULL, QSE_NULL };
|
|
int i;
|
|
|
|
c = CURSC (sed);
|
|
if (c == QSE_CHAR_EOF || IS_LINTERM(c))
|
|
{
|
|
/* not terminated properly */
|
|
sed->errnum = QSE_SED_ENOTRM;
|
|
goto oops;
|
|
}
|
|
|
|
delim = c;
|
|
if (delim == QSE_T('\\'))
|
|
{
|
|
/* backspace is an illegal delimiter */
|
|
sed->errnum = QSE_SED_EBSDEL;
|
|
goto oops;
|
|
}
|
|
|
|
for (i = 0; i < 2; i++)
|
|
{
|
|
t[i] = qse_str_open (sed->mmgr, 0, 32);
|
|
if (t[i] == QSE_NULL)
|
|
{
|
|
sed->errnum = QSE_SED_ENOMEM;
|
|
goto oops;
|
|
}
|
|
}
|
|
|
|
|
|
for (i = 0; i < 2; i++)
|
|
{
|
|
c = NXTSC (sed);
|
|
|
|
while (c != delim)
|
|
{
|
|
if (c == QSE_CHAR_EOF || IS_LINTERM(c))
|
|
{
|
|
sed->errnum = QSE_SED_ENOTRM;
|
|
goto oops;
|
|
}
|
|
|
|
if (c == QSE_T('\\'))
|
|
{
|
|
c = NXTSC (sed);
|
|
if (c == QSE_CHAR_EOF || IS_LINTERM(c))
|
|
{
|
|
sed->errnum = QSE_SED_ENOTRM;
|
|
goto oops;
|
|
}
|
|
|
|
if (c == QSE_T('n')) c = QSE_T('\n');
|
|
}
|
|
|
|
if (qse_str_ccat (t[i], c) == (qse_size_t)-1)
|
|
{
|
|
sed->errnum = QSE_SED_ENOMEM;
|
|
goto oops;
|
|
}
|
|
|
|
c = NXTSC (sed);
|
|
}
|
|
}
|
|
|
|
/* skip spaces before options */
|
|
do { c = NXTSC(sed); } while (IS_SPACE(c));
|
|
|
|
/* get options */
|
|
do
|
|
{
|
|
if (c == QSE_T('p'))
|
|
{
|
|
cmd->u.subst.p = 1;
|
|
c = NXTSC (sed);
|
|
}
|
|
else if (c == QSE_T('i'))
|
|
{
|
|
cmd->u.subst.i = 1;
|
|
c = NXTSC (sed);
|
|
}
|
|
else if (c == QSE_T('g'))
|
|
{
|
|
cmd->u.subst.g = 1;
|
|
c = NXTSC (sed);
|
|
}
|
|
else if (c >= QSE_T('0') && c <= QSE_T('9'))
|
|
{
|
|
unsigned long occ;
|
|
|
|
if (cmd->u.subst.occ != 0)
|
|
{
|
|
sed->errnum = QSE_SED_EOCSDU;
|
|
goto oops;
|
|
}
|
|
|
|
occ = 0;
|
|
|
|
do
|
|
{
|
|
occ = occ * 10 + (c - QSE_T('0'));
|
|
if (occ > QSE_TYPE_MAX(unsigned short))
|
|
{
|
|
sed->errnum = QSE_SED_EOCSTL;
|
|
goto oops;
|
|
}
|
|
c = NXTSC (sed);
|
|
}
|
|
while (c >= QSE_T('0') && c <= QSE_T('9'));
|
|
|
|
if (occ == 0)
|
|
{
|
|
sed->errnum = QSE_SED_EOCSZE;
|
|
goto oops;
|
|
}
|
|
|
|
cmd->u.subst.occ = occ;
|
|
}
|
|
else if (c == QSE_T('w'))
|
|
{
|
|
ADVSCP (sed);
|
|
if (get_file (sed, &cmd->u.subst.file) <= -1) return -1;
|
|
break;
|
|
}
|
|
else break;
|
|
}
|
|
while (1);
|
|
|
|
/* call terminate_command() if the 'w' option is not specified.
|
|
* if the 'w' option is given, it is called in get_file(). */
|
|
if (cmd->u.subst.file.ptr == QSE_NULL &&
|
|
terminate_command (sed) <= -1) goto oops;
|
|
|
|
QSE_ASSERT (cmd->u.subst.rex == QSE_NULL);
|
|
qse_printf (QSE_T("buildrex 222222\n"));
|
|
cmd->u.subst.rex = qse_buildrex (
|
|
sed->mmgr, 0,
|
|
QSE_STR_PTR(t[0]),
|
|
QSE_STR_LEN(t[0]),
|
|
QSE_NULL
|
|
);
|
|
if (cmd->u.subst.rex == QSE_NULL)
|
|
{
|
|
sed->errnum = QSE_SED_EREXBL;
|
|
goto oops;
|
|
}
|
|
|
|
qse_str_yield (t[1], &cmd->u.subst.rpl, 0);
|
|
if (cmd->u.subst.g == 0 && cmd->u.subst.occ == 0) cmd->u.subst.occ = 1;
|
|
|
|
qse_str_close (t[1]);
|
|
qse_str_close (t[0]);
|
|
|
|
return 0;
|
|
|
|
oops:
|
|
if (t[1] != QSE_NULL) qse_str_close (t[1]);
|
|
if (t[0] != QSE_NULL) qse_str_close (t[0]);
|
|
return -1;
|
|
}
|
|
|
|
static int get_transet (qse_sed_t* sed, qse_sed_cmd_t* cmd)
|
|
{
|
|
qse_cint_t c, delim;
|
|
qse_str_t* t = QSE_NULL;
|
|
qse_size_t pos;
|
|
|
|
c = CURSC (sed);
|
|
if (c == QSE_CHAR_EOF || IS_LINTERM(c))
|
|
{
|
|
/* translation set terminated prematurely*/
|
|
sed->errnum = QSE_SED_ENOTRM;
|
|
goto oops;
|
|
}
|
|
|
|
delim = c;
|
|
if (delim == QSE_T('\\'))
|
|
{
|
|
/* backspace is an illegal delimiter */
|
|
sed->errnum = QSE_SED_EBSDEL;
|
|
goto oops;
|
|
}
|
|
|
|
t = qse_str_open (sed->mmgr, 0, 32);
|
|
if (t == QSE_NULL)
|
|
{
|
|
sed->errnum = QSE_SED_ENOMEM;
|
|
goto oops;
|
|
}
|
|
|
|
c = NXTSC (sed);
|
|
while (c != delim)
|
|
{
|
|
qse_char_t b[2];
|
|
|
|
if (c == QSE_CHAR_EOF || IS_LINTERM(c))
|
|
{
|
|
sed->errnum = QSE_SED_ENOTRM;
|
|
goto oops;
|
|
}
|
|
|
|
if (c == QSE_T('\\'))
|
|
{
|
|
c = NXTSC (sed);
|
|
if (c == QSE_CHAR_EOF || IS_LINTERM(c))
|
|
{
|
|
sed->errnum = QSE_SED_ENOTRM;
|
|
goto oops;
|
|
}
|
|
|
|
if (c == QSE_T('n')) c = QSE_T('\n');
|
|
}
|
|
|
|
b[0] = c;
|
|
if (qse_str_ncat (t, b, 2) == (qse_size_t)-1)
|
|
{
|
|
sed->errnum = QSE_SED_ENOMEM;
|
|
goto oops;
|
|
}
|
|
|
|
c = NXTSC (sed);
|
|
}
|
|
|
|
c = NXTSC (sed);
|
|
for (pos = 1; c != delim; pos += 2)
|
|
{
|
|
if (c == QSE_CHAR_EOF || IS_LINTERM(c))
|
|
{
|
|
sed->errnum = QSE_SED_ENOTRM;
|
|
goto oops;
|
|
}
|
|
|
|
if (c == QSE_T('\\'))
|
|
{
|
|
c = NXTSC (sed);
|
|
if (c == QSE_CHAR_EOF || IS_LINTERM(c))
|
|
{
|
|
sed->errnum = QSE_SED_ENOTRM;
|
|
goto oops;
|
|
}
|
|
|
|
if (c == QSE_T('n')) c = QSE_T('\n');
|
|
}
|
|
|
|
if (pos >= QSE_STR_LEN(t))
|
|
{
|
|
/* source and target not the same length */
|
|
sed->errnum = QSE_SED_ETSNSL;
|
|
goto oops;
|
|
}
|
|
|
|
QSE_STR_CHAR(t,pos) = c;
|
|
c = NXTSC (sed);
|
|
}
|
|
|
|
if (pos < QSE_STR_LEN(t))
|
|
{
|
|
/* source and target not the same length */
|
|
sed->errnum = QSE_SED_ETSNSL;
|
|
goto oops;
|
|
}
|
|
|
|
ADVSCP (sed);
|
|
if (terminate_command (sed) <= -1) goto oops;
|
|
|
|
qse_str_yield (t, &cmd->u.transet, 0);
|
|
qse_str_close (t);
|
|
return 0;
|
|
|
|
oops:
|
|
if (t != QSE_NULL) qse_str_close (t);
|
|
return -1;
|
|
}
|
|
|
|
static int command (qse_sed_t* sed)
|
|
{
|
|
qse_cint_t c;
|
|
qse_sed_cmd_t* cmd = sed->cmd.cur;
|
|
|
|
restart:
|
|
c = CURSC (sed);
|
|
switch (c)
|
|
{
|
|
default:
|
|
qse_printf (QSE_T("command not recognized [%c]\n"), c);
|
|
sed->errnum = QSE_SED_ECMDNR;
|
|
return -1;
|
|
case QSE_CHAR_EOF:
|
|
sed->errnum = QSE_SED_ECMDMS;
|
|
return -1;
|
|
|
|
case QSE_T(':'):
|
|
/* label - this is not a command */
|
|
cmd->type = c;
|
|
if (cmd->a1.type != QSE_SED_A_NONE)
|
|
{
|
|
/* label cannot have an address */
|
|
sed->errnum = QSE_SED_EA1PHB;
|
|
return -1;
|
|
}
|
|
|
|
ADVSCP (sed);
|
|
if (get_label (sed, cmd) <= -1) return -1;
|
|
goto restart;
|
|
|
|
case QSE_T('{'):
|
|
/* insert a negated branch command at the beginning
|
|
* of a group. this way, all the commands in a group
|
|
* can be skipped. the branch target is set once a
|
|
* corresponding } is met. */
|
|
cmd->type = QSE_SED_CMD_B;
|
|
cmd->negated = !cmd->negated;
|
|
|
|
if (sed->grplvl >= QSE_COUNTOF(sed->grpcmd))
|
|
{
|
|
/* group nesting too deep */
|
|
sed->errnum = QSE_SED_EGRNTD;
|
|
return -1;
|
|
}
|
|
|
|
sed->grpcmd[sed->grplvl++] = cmd;
|
|
ADVSCP (sed);
|
|
break;
|
|
|
|
case QSE_T('}'):
|
|
if (sed->grplvl <= 0)
|
|
{
|
|
/* group not balanced */
|
|
sed->errnum = QSE_SED_EGRNBA;
|
|
return -1;
|
|
}
|
|
|
|
sed->grpcmd[--sed->grplvl]->u.branch.target = cmd;
|
|
ADVSCP (sed);
|
|
return 0;
|
|
|
|
case QSE_T('='):
|
|
cmd->type = c;
|
|
if (sed->option & QSE_SED_CLASSIC &&
|
|
cmd->a2.type != QSE_SED_A_NONE)
|
|
{
|
|
sed->errnum = QSE_SED_EA2PHB;
|
|
return -1;
|
|
}
|
|
|
|
ADVSCP (sed);
|
|
if (terminate_command (sed) <= -1) return -1;
|
|
qse_printf (QSE_T("command %c\n"), cmd->type);
|
|
break;
|
|
|
|
case QSE_T('q'):
|
|
case QSE_T('Q'):
|
|
cmd->type = c;
|
|
if (cmd->a2.type != QSE_SED_A_NONE)
|
|
{
|
|
sed->errnum = QSE_SED_EA2PHB;
|
|
return -1;
|
|
}
|
|
|
|
ADVSCP (sed);
|
|
if (terminate_command (sed) <= -1) return -1;
|
|
qse_printf (QSE_T("command %c\n"), cmd->type);
|
|
break;
|
|
|
|
|
|
case QSE_T('a'):
|
|
case QSE_T('i'):
|
|
case QSE_T('c'):
|
|
{
|
|
cmd->type = c;
|
|
|
|
/* TODO: this check for A and I
|
|
if (cmd->a2.type != QSE_SED_A_NONE)
|
|
{
|
|
sed->errnum = QSE_SED_EA2PHB;
|
|
return -1;
|
|
}
|
|
*/
|
|
|
|
c = NXTSC (sed);
|
|
while (IS_SPACE(c)) c = NXTSC (sed);
|
|
|
|
if (c != QSE_T('\\'))
|
|
{
|
|
sed->errnum = QSE_SED_EBSEXP;
|
|
return -1;
|
|
}
|
|
|
|
c = NXTSC (sed);
|
|
while (IS_SPACE(c)) c = NXTSC (sed);
|
|
|
|
if (c != QSE_CHAR_EOF && c != QSE_T('\n'))
|
|
{
|
|
sed->errnum = QSE_SED_EGBABS;
|
|
return -1;
|
|
}
|
|
|
|
ADVSCP (sed); /* skip the new line */
|
|
|
|
/* get_text() starts from the next line */
|
|
if (get_text (sed, cmd) <= -1) return -1;
|
|
|
|
/*
|
|
{
|
|
qse_char_t ttt[1000];
|
|
qse_fgets (ttt, QSE_COUNTOF(ttt), QSE_STDIN);
|
|
qse_printf (QSE_T("%s%s"), ttt, cmd->u.text.ptr);
|
|
}
|
|
*/
|
|
break;
|
|
}
|
|
|
|
|
|
case QSE_T('d'):
|
|
case QSE_T('D'):
|
|
case QSE_T('h'):
|
|
case QSE_T('H'):
|
|
case QSE_T('g'):
|
|
case QSE_T('G'):
|
|
case QSE_T('l'):
|
|
case QSE_T('n'):
|
|
case QSE_T('N'):
|
|
case QSE_T('p'):
|
|
case QSE_T('P'):
|
|
case QSE_T('x'):
|
|
cmd->type = c;
|
|
ADVSCP (sed);
|
|
if (terminate_command (sed) <= -1) return -1;
|
|
qse_printf (QSE_T("command %c\n"), cmd->type);
|
|
break;
|
|
|
|
case QSE_T('b'):
|
|
case QSE_T('t'):
|
|
cmd->type = c;
|
|
ADVSCP (sed);
|
|
if (get_branch_target (sed, cmd) <= -1) return -1;
|
|
if (cmd->u.branch.label.ptr != NULL)
|
|
{
|
|
qse_printf (QSE_T("cmd->u.branch.label = [%.*s]\n"),
|
|
cmd->u.branch.label.len, cmd->u.branch.label.ptr);
|
|
}
|
|
else
|
|
{
|
|
qse_printf (QSE_T("cmd->u.branch.target = [%p]\n"), cmd->u.branch.target);
|
|
}
|
|
break;
|
|
|
|
case QSE_T('r'):
|
|
case QSE_T('R'):
|
|
case QSE_T('w'):
|
|
case QSE_T('W'):
|
|
cmd->type = c;
|
|
ADVSCP (sed);
|
|
if (get_file (sed, &cmd->u.file) <= -1) return -1;
|
|
|
|
qse_printf (QSE_T("cmd->u.file= [%.*s]\n"), (int)cmd->u.file.len, cmd->u.file.ptr);
|
|
break;
|
|
|
|
|
|
case QSE_T('s'):
|
|
cmd->type = c;
|
|
ADVSCP (sed);
|
|
if (get_subst (sed, cmd) <= -1) return -1;
|
|
//qse_printf (QSE_T("rex= [%.*s]\n"), (int)cmd->u.subst.rex.len, cmd->u.subst.rex.ptr);
|
|
qse_printf (QSE_T("rpl= [%.*s]\n"), (int)cmd->u.subst.rpl.len, cmd->u.subst.rpl.ptr);
|
|
qse_printf (QSE_T("g=%u p=%u i=%u occ=%d\n"),
|
|
cmd->u.subst.g,
|
|
cmd->u.subst.p,
|
|
cmd->u.subst.i,
|
|
cmd->u.subst.occ
|
|
);
|
|
qse_printf (QSE_T("w=[%.*s]\n"),
|
|
(int)cmd->u.subst.file.len,
|
|
cmd->u.subst.file.ptr
|
|
);
|
|
break;
|
|
|
|
case QSE_T('y'):
|
|
cmd->type = c;
|
|
ADVSCP (sed);
|
|
if (get_transet (sed, cmd) <= -1) return -1;
|
|
break;
|
|
}
|
|
|
|
return 1;
|
|
}
|
|
|
|
static int compile_source (
|
|
qse_sed_t* sed, const qse_char_t* ptr, qse_size_t len)
|
|
{
|
|
qse_cint_t c;
|
|
qse_sed_cmd_t* cmd = sed->cmd.cur;
|
|
|
|
/* store the source code pointers */
|
|
sed->src.ptr = ptr;
|
|
sed->src.end = ptr + len;
|
|
sed->src.cur = ptr;
|
|
|
|
/*
|
|
* # comment
|
|
* :label
|
|
* zero-address-command
|
|
* address[!] one-address-command
|
|
* address-range[!] address-range-command
|
|
*/
|
|
while (1)
|
|
{
|
|
int n;
|
|
|
|
c = CURSC (sed);
|
|
|
|
/* skip white spaces and comments*/
|
|
while (IS_WSPACE(c)) c = NXTSC (sed);
|
|
if (c == QSE_T('#'))
|
|
{
|
|
do c = NXTSC (sed); while (!IS_LINTERM(c));
|
|
ADVSCP (sed);
|
|
continue;
|
|
}
|
|
|
|
/* check if it has reached the end or is commented */
|
|
if (c == QSE_CHAR_EOF) break;
|
|
|
|
if (c == QSE_T(';'))
|
|
{
|
|
/* semicolon without a address-command pair */
|
|
ADVSCP (sed);
|
|
continue;
|
|
}
|
|
|
|
/* initialize the current command */
|
|
QSE_MEMSET (cmd, 0, QSE_SIZEOF(*cmd));
|
|
|
|
/* process address */
|
|
if (address (sed, &cmd->a1) == QSE_NULL) return -1;
|
|
|
|
c = CURSC (sed);
|
|
if (cmd->a1.type != QSE_SED_A_NONE)
|
|
{
|
|
/* if (cmd->a1.type == QSE_SED_A_LAST)
|
|
{
|
|
// TODO: ????
|
|
} */
|
|
if (c == QSE_T(',') || c == QSE_T(';'))
|
|
{
|
|
/* maybe an address range */
|
|
ADVSCP (sed);
|
|
|
|
/* TODO: skip white spaces??? */
|
|
if (address (sed, &cmd->a2) == QSE_NULL)
|
|
{
|
|
QSE_ASSERT (cmd->a2.type == QSE_SED_A_NONE);
|
|
free_address (sed, cmd);
|
|
return -1;
|
|
}
|
|
|
|
c = CURSC (sed);
|
|
}
|
|
else cmd->a2.type = QSE_SED_A_NONE;
|
|
}
|
|
|
|
/* skip white spaces */
|
|
while (IS_SPACE(c)) c = NXTSC (sed);
|
|
|
|
if (c == QSE_T('!'))
|
|
{
|
|
/* negate */
|
|
cmd->negated = 1;
|
|
}
|
|
|
|
n = command (sed);
|
|
if (n <= -1)
|
|
{
|
|
free_address (sed, cmd);
|
|
return -1;
|
|
}
|
|
if (n > 0)
|
|
{
|
|
QSE_ASSERT (n == 1);
|
|
|
|
if (sed->cmd.cur >= sed->cmd.end)
|
|
{
|
|
/* TODO: too many commands. change errnum */
|
|
free_command (sed, cmd);
|
|
sed->errnum = QSE_SED_ENOMEM;
|
|
return -1;
|
|
}
|
|
|
|
cmd = ++sed->cmd.cur;
|
|
}
|
|
}
|
|
|
|
if (sed->grplvl != 0)
|
|
{
|
|
sed->errnum = QSE_SED_EGRNBA;
|
|
return -1;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
int qse_sed_compile (qse_sed_t* sed, const qse_char_t* sptr, qse_size_t slen)
|
|
{
|
|
return compile_source (sed, sptr, slen);
|
|
}
|
|
|
|
static int read_char (qse_sed_t* sed, qse_char_t* c)
|
|
{
|
|
qse_ssize_t n;
|
|
|
|
if (sed->eio.in.pos >= sed->eio.in.len)
|
|
{
|
|
n = sed->eio.in.f (
|
|
sed, QSE_SED_IO_READ,
|
|
sed->eio.in.buf, QSE_COUNTOF(sed->eio.in.buf)
|
|
);
|
|
|
|
if (n <= -1)
|
|
{
|
|
sed->errnum = QSE_SED_EIOUSR;
|
|
return -1;
|
|
}
|
|
|
|
if (n == 0) return 0; /* end of file */
|
|
|
|
sed->eio.in.len = n;
|
|
sed->eio.in.pos = 0;
|
|
}
|
|
|
|
*c = sed->eio.in.buf[sed->eio.in.pos++];
|
|
return 1;
|
|
}
|
|
|
|
static int read_line (qse_sed_t* sed)
|
|
{
|
|
qse_char_t c;
|
|
int n;
|
|
|
|
qse_str_clear (&sed->eio.in.line);
|
|
if (sed->eio.in.eof)
|
|
{
|
|
/* no more input detected in the previous read.
|
|
* set eof back to 0 here so that read_char() is called
|
|
* if read_line() is called again. that way, the result
|
|
* of subsequent calls counts on read_char(). */
|
|
sed->eio.in.eof = 0;
|
|
return 0;
|
|
}
|
|
|
|
while (1)
|
|
{
|
|
n = read_char (sed, &c);
|
|
if (n <= -1) return -1;
|
|
if (n == 0)
|
|
{
|
|
if (QSE_STR_LEN(&sed->eio.in.line) == 0) return 0;
|
|
sed->eio.in.eof = 1;
|
|
break;
|
|
}
|
|
|
|
if (qse_str_ccat (&sed->eio.in.line, c) == (qse_size_t)-1)
|
|
{
|
|
sed->errnum = QSE_SED_ENOMEM;
|
|
return -1;
|
|
}
|
|
|
|
if (c == QSE_T('\n')) break;
|
|
}
|
|
|
|
sed->eio.in.num++;
|
|
return 1;
|
|
}
|
|
|
|
static int flush (qse_sed_t* sed)
|
|
{
|
|
qse_size_t pos = 0;
|
|
qse_ssize_t n;
|
|
|
|
while (sed->eio.out.len > 0)
|
|
{
|
|
n = sed->eio.out.f (
|
|
sed, QSE_SED_IO_WRITE,
|
|
&sed->eio.out.buf[pos], sed->eio.out.len
|
|
);
|
|
|
|
if (n <= -1)
|
|
{
|
|
sed->errnum = QSE_SED_EIOUSR;
|
|
return -1;
|
|
}
|
|
|
|
if (n == 0)
|
|
{
|
|
/* reached the end of file - anything to do? */
|
|
}
|
|
|
|
pos += n;
|
|
sed->eio.out.len -= n;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int write_char (qse_sed_t* sed, qse_char_t c)
|
|
{
|
|
sed->eio.out.buf[sed->eio.out.len++] = c;
|
|
if (c == QSE_T('\n') ||
|
|
sed->eio.out.len >= QSE_COUNTOF(sed->eio.out.buf))
|
|
{
|
|
return flush (sed);
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int write_str (qse_sed_t* sed, const qse_char_t* str, qse_size_t len)
|
|
{
|
|
qse_size_t i;
|
|
for (i = 0; i < len; i++)
|
|
{
|
|
if (write_char (sed, str[i]) <= -1) return -1;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
static int write_num (qse_sed_t* sed, qse_size_t x)
|
|
{
|
|
qse_size_t last = x % 10;
|
|
qse_size_t y = 0, dig = 0;
|
|
|
|
if (x < 0)
|
|
{
|
|
if (write_char (sed, QSE_T('-')) <= -1) return -1;
|
|
}
|
|
|
|
x = x / 10;
|
|
if (x < 0) x = -x;
|
|
|
|
while (x > 0)
|
|
{
|
|
y = y * 10 + (x % 10);
|
|
x = x / 10;
|
|
dig++;
|
|
}
|
|
|
|
while (y > 0)
|
|
{
|
|
if (write_char (sed, (y % 10) + QSE_T('0')) <= -1) return -1;
|
|
y = y / 10;
|
|
dig--;
|
|
}
|
|
|
|
while (dig > 0)
|
|
{
|
|
dig--;
|
|
if (write_char (sed, QSE_T('0')) <= -1) return -1;
|
|
}
|
|
if (last < 0) last = -last;
|
|
if (write_char (sed, last + QSE_T('0')) <= -1) return -1;
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int match_a1 (qse_sed_t* sed, qse_sed_cmd_t* cmd)
|
|
{
|
|
switch (cmd->a1.type)
|
|
{
|
|
case QSE_SED_A_LINE:
|
|
return (sed->eio.in.num >= cmd->a1.u.line)? 1: 0;
|
|
case QSE_SED_A_REX:
|
|
{
|
|
qse_str_t match;
|
|
int errnum, n;
|
|
|
|
QSE_ASSERT (cmd->a1.u.rex != QSE_NULL);
|
|
|
|
/*
|
|
// TODO: trim off trailing newline....
|
|
if (QSE_STR_LEN(&sed->eio.in.line) > 1 &&
|
|
QSE_STR_CHAR(&sed->eio.in.line,
|
|
QSE_STR_LEN(&sed->eio.in.line))
|
|
*/
|
|
|
|
n = qse_matchrex (
|
|
sed->mmgr,
|
|
&sed->ccls,
|
|
0,
|
|
cmd->a1.u.rex,
|
|
0,
|
|
QSE_STR_PTR(&sed->eio.in.line),
|
|
QSE_STR_LEN(&sed->eio.in.line),
|
|
&match.ptr, &match.len, &errnum);
|
|
if (n <= -1)
|
|
{
|
|
sed->errnum = QSE_SED_EREXMA;
|
|
return -1;
|
|
}
|
|
|
|
qse_printf (QSE_T("matchrex=>%d [%s]\n"), n, QSE_STR_PTR(&sed->eio.in.line));
|
|
return n;
|
|
}
|
|
case QSE_SED_A_DOL:
|
|
return 0;
|
|
}
|
|
|
|
QSE_ASSERT (cmd->a1.type == QSE_SED_A_NONE);
|
|
return 1; /* match */
|
|
}
|
|
|
|
static int match_a2 (qse_sed_t* sed, qse_sed_cmd_t* cmd)
|
|
{
|
|
switch (cmd->a2.type)
|
|
{
|
|
case QSE_SED_A_LINE:
|
|
return (sed->eio.in.num <= cmd->a2.u.line)? 1: 0;
|
|
case QSE_SED_A_REX:
|
|
{
|
|
qse_str_t match;
|
|
int errnum, n;
|
|
|
|
QSE_ASSERT (cmd->a2.u.rex != QSE_NULL);
|
|
n = qse_matchrex (
|
|
sed->mmgr,
|
|
&sed->ccls,
|
|
0,
|
|
cmd->a2.u.rex,
|
|
0,
|
|
QSE_STR_PTR(&sed->eio.in.line),
|
|
QSE_STR_LEN(&sed->eio.in.line),
|
|
&match.ptr, &match.len, &errnum);
|
|
if (n <= -1)
|
|
{
|
|
sed->errnum = QSE_SED_EREXMA;
|
|
return -1;
|
|
}
|
|
|
|
return n;
|
|
}
|
|
case QSE_SED_A_DOL:
|
|
return 0;
|
|
}
|
|
|
|
QSE_ASSERT (cmd->a2.type == QSE_SED_A_NONE);
|
|
return 1; /* match */
|
|
}
|
|
|
|
/* match an address against input.
|
|
* return -1 on error, 0 on no match, 1 on match. */
|
|
static int match_address (qse_sed_t* sed, qse_sed_cmd_t* cmd)
|
|
{
|
|
int a1, a2;
|
|
|
|
a1 = match_a1 (sed, cmd);
|
|
if (a1 <= -1) return -1;
|
|
a2 = match_a2 (sed, cmd);
|
|
if (a2 <= -1) return -1;
|
|
|
|
//qse_printf (QSE_T("a1 = %d, a2 = %d\n"), a1, a2);
|
|
return (a1 >= 1 && a2 >= 1)? 1: 0;
|
|
}
|
|
|
|
static int exec_cmd (qse_sed_t* sed, qse_sed_cmd_t* cmd)
|
|
{
|
|
int n;
|
|
|
|
switch (cmd->type)
|
|
{
|
|
case QSE_SED_CMD_Q:
|
|
n = write_str (sed,
|
|
QSE_STR_PTR(&sed->eio.in.line),
|
|
QSE_STR_LEN(&sed->eio.in.line));
|
|
if (n <= -1) return -1;
|
|
case QSE_SED_CMD_QQ:
|
|
return 0;
|
|
|
|
case QSE_SED_CMD_EQ:
|
|
if (write_num (sed, sed->eio.in.num) <= -1) return -1;
|
|
if (write_char (sed, QSE_T('\n')) <= -1) return -1;
|
|
break;
|
|
|
|
case QSE_SED_CMD_A:
|
|
if (qse_lda_insert (
|
|
&sed->text_appended,
|
|
QSE_LDA_SIZE(&sed->text_appended),
|
|
&cmd->u.text, 0) == (qse_size_t)-1)
|
|
{
|
|
sed->errnum = QSE_SED_ENOMEM;
|
|
return -1;
|
|
}
|
|
break;
|
|
|
|
case QSE_SED_CMD_I:
|
|
n = write_str (sed,
|
|
QSE_STR_PTR(&cmd->u.text),
|
|
QSE_STR_LEN(&cmd->u.text));
|
|
if (n <= -1) return -1;
|
|
break;
|
|
}
|
|
|
|
return 1;
|
|
}
|
|
|
|
int qse_sed_execute (qse_sed_t* sed, qse_sed_iof_t inf, qse_sed_iof_t outf)
|
|
{
|
|
qse_ssize_t n;
|
|
int ret = 0;
|
|
|
|
sed->eio.out.f = outf;
|
|
sed->eio.out.eof = 0;
|
|
sed->eio.out.len = 0;
|
|
|
|
sed->eio.in.f = inf;
|
|
sed->eio.in.eof = 0;
|
|
sed->eio.in.len = 0;
|
|
sed->eio.in.pos = 0;
|
|
sed->eio.in.num = 0;
|
|
if (qse_str_init (&sed->eio.in.line, QSE_MMGR(sed), 256) == QSE_NULL)
|
|
{
|
|
sed->errnum = QSE_SED_ENOMEM;
|
|
return -1;
|
|
}
|
|
|
|
n = sed->eio.in.f (sed, QSE_SED_IO_OPEN, QSE_NULL, 0);
|
|
if (n <= -1)
|
|
{
|
|
ret = -1;
|
|
sed->errnum = QSE_SED_EIOUSR;
|
|
goto done3;
|
|
}
|
|
if (n == 0)
|
|
{
|
|
/* EOF reached upon opening an input stream.
|
|
* no data to process. this is success */
|
|
goto done2;
|
|
}
|
|
|
|
n = sed->eio.out.f (sed, QSE_SED_IO_OPEN, QSE_NULL, 0);
|
|
if (n <= -1)
|
|
{
|
|
ret = -1;
|
|
sed->errnum = QSE_SED_EIOUSR;
|
|
goto done2;
|
|
}
|
|
if (n == 0)
|
|
{
|
|
/* still don't know if we will write something.
|
|
* just mark EOF on the output stream and continue */
|
|
sed->eio.out.eof = 1;
|
|
}
|
|
|
|
while (1)
|
|
{
|
|
qse_sed_cmd_t* c;
|
|
qse_size_t i;
|
|
|
|
n = read_line (sed);
|
|
if (n <= -1) { ret = -1; goto done; }
|
|
if (n == 0) goto done;
|
|
|
|
qse_lda_clear (&sed->text_appended);
|
|
|
|
c = sed->cmd.buf;
|
|
while (c < sed->cmd.cur)
|
|
{
|
|
n = match_address (sed, c);
|
|
if (n <= -1) { ret = -1; goto done; }
|
|
|
|
if (n == 0)
|
|
{
|
|
c++;
|
|
continue;
|
|
}
|
|
|
|
n = exec_cmd (sed, c);
|
|
if (n <= -1) { ret = -1; goto done; }
|
|
if (n == 0) goto done;
|
|
|
|
/* TODO: if exec_cmd jumped change c.... */
|
|
c++;
|
|
}
|
|
|
|
if (!(sed->option & QSE_SED_QUIET))
|
|
{
|
|
n = write_str (sed,
|
|
QSE_STR_PTR(&sed->eio.in.line),
|
|
QSE_STR_LEN(&sed->eio.in.line));
|
|
if (n <= -1) { ret = -1; goto done; }
|
|
}
|
|
|
|
for (i = 0; i < QSE_LDA_SIZE(&sed->text_appended); i++)
|
|
{
|
|
qse_xstr_t* t = QSE_LDA_DPTR(&sed->text_appended, i);
|
|
n = write_str (sed, t->ptr, t->len);
|
|
if (n <= -1) { ret = -1; goto done; }
|
|
//n = write_str (sed, QSE_T("\n"), 1);
|
|
//if (n <= -1) { ret = -1; goto done; }
|
|
}
|
|
}
|
|
|
|
done:
|
|
sed->eio.out.f (sed, QSE_SED_IO_CLOSE, QSE_NULL, 0);
|
|
done2:
|
|
sed->eio.in.f (sed, QSE_SED_IO_CLOSE, QSE_NULL, 0);
|
|
done3:
|
|
qse_str_fini (&sed->eio.in.line);
|
|
return ret;
|
|
}
|