changed code to compile more command codes in sed
This commit is contained in:
parent
5819dba1f9
commit
f48ae81fa6
@ -37,6 +37,7 @@ enum qse_sed_errnum_t
|
||||
QSE_SED_EA2PHB, /* address 2 prohibited */
|
||||
QSE_SED_ENEWLN, /* a new line is expected */
|
||||
QSE_SED_EBSEXP, /* \ is expected */
|
||||
QSE_SED_ESCEXP, /* ; is expected */
|
||||
QSE_SED_ELABTL, /* label too long */
|
||||
QSE_SED_ELABEM, /* label name is empty */
|
||||
QSE_SED_ELABDU /* duplicate label name */
|
||||
@ -50,7 +51,7 @@ enum qse_sed_option_t
|
||||
};
|
||||
|
||||
typedef struct qse_sed_t qse_sed_t;
|
||||
typedef struct qse_sed_c_t qse_sed_c_t; /* command */
|
||||
typedef struct qse_sed_cmd_t qse_sed_cmd_t; /* command */
|
||||
typedef enum qse_sed_errnum_t qse_sed_errnum_t;
|
||||
|
||||
struct qse_sed_t
|
||||
@ -74,9 +75,9 @@ struct qse_sed_t
|
||||
/*qse_lda_t cmds;*/
|
||||
struct
|
||||
{
|
||||
qse_sed_c_t* buf;
|
||||
qse_sed_c_t* end;
|
||||
qse_sed_c_t* cur;
|
||||
qse_sed_cmd_t* buf;
|
||||
qse_sed_cmd_t* end;
|
||||
qse_sed_cmd_t* cur;
|
||||
} cmd;
|
||||
|
||||
qse_map_t labs; /* label map */
|
||||
@ -152,6 +153,15 @@ void qse_sed_setoption (
|
||||
);
|
||||
/*****/
|
||||
|
||||
/****f* Text Processor/qse_sed_geterrmsg
|
||||
* NAME
|
||||
* qse_sed_geterrmsg - get an error message
|
||||
* SYNOPSIS
|
||||
*/
|
||||
const qse_char_t* qse_sed_geterrmsg (
|
||||
qse_sed_t* sed
|
||||
);
|
||||
/******/
|
||||
|
||||
int qse_sed_compile (
|
||||
qse_sed_t* sed,
|
||||
|
@ -84,7 +84,7 @@ qse_sed_t* qse_sed_init (qse_sed_t* sed, qse_mmgr_t* mmgr)
|
||||
|
||||
/* TODO: use different data structure... */
|
||||
sed->cmd.buf = QSE_MMGR_ALLOC (
|
||||
sed->mmgr, QSE_SIZEOF(qse_sed_c_t) * 1000);
|
||||
sed->mmgr, QSE_SIZEOF(qse_sed_cmd_t) * 1000);
|
||||
if (sed->cmd.buf == QSE_NULL)
|
||||
{
|
||||
qse_map_fini (&sed->labs);
|
||||
@ -99,10 +99,46 @@ qse_sed_t* qse_sed_init (qse_sed_t* sed, qse_mmgr_t* mmgr)
|
||||
|
||||
void qse_sed_fini (qse_sed_t* sed)
|
||||
{
|
||||
/* TODO: use different data sturect -> look at qse_sed_init */
|
||||
qse_sed_cmd_t* c;
|
||||
for (c = sed->cmd.buf; c != sed->cmd.cur; c++)
|
||||
{
|
||||
if (c->type == QSE_SED_CMD_B || c->type == QSE_SED_CMD_T)
|
||||
{
|
||||
if (c->u.branch.text != QSE_NULL)
|
||||
qse_str_close (c->u.branch.text);
|
||||
}
|
||||
}
|
||||
QSE_MMGR_FREE (sed->mmgr, sed->cmd.buf);
|
||||
|
||||
qse_map_fini (&sed->labs);
|
||||
qse_str_fini (&sed->rexbuf);
|
||||
}
|
||||
|
||||
const qse_char_t* qse_sed_geterrmsg (qse_sed_t* sed)
|
||||
{
|
||||
static const qse_char_t* errmsg[] =
|
||||
{
|
||||
QSE_T("no error"),
|
||||
QSE_T("out of memory"),
|
||||
QSE_T("too much text"),
|
||||
QSE_T("command not recognized"),
|
||||
QSE_T("command garbled"),
|
||||
QSE_T("regular expression build error"),
|
||||
QSE_T("address 1 prohibited"),
|
||||
QSE_T("address 2 prohibited"),
|
||||
QSE_T("a new line expected"),
|
||||
QSE_T("a backslash expected"),
|
||||
QSE_T("a semicolon expected"),
|
||||
QSE_T("label name too long"),
|
||||
QSE_T("empty label name"),
|
||||
QSE_T("duplicate label name")
|
||||
};
|
||||
|
||||
return (sed->errnum > 0 && sed->errnum < QSE_COUNTOF(errmsg))?
|
||||
errmsg[sed->errnum]: QSE_T("unknown error");
|
||||
}
|
||||
|
||||
void qse_sed_setoption (qse_sed_t* sed, int option)
|
||||
{
|
||||
sed->option = option;
|
||||
@ -121,15 +157,17 @@ int qse_sed_getoption (qse_sed_t* sed)
|
||||
#define NXTSC(sed) \
|
||||
(((++(sed)->src.cur) < (sed)->src.end)? (*(sed)->src.cur): QSE_CHAR_EOF)
|
||||
|
||||
/* check if c is a space character */
|
||||
#define IS_SPACE(c) (c == QSE_T(' ') || c == QSE_T('\t'))
|
||||
/* check if c is a white space */
|
||||
#define IS_WSPACE(c) (IS_SPACE(c) || c == QSE_T('\n') || c == QSE_T('\r'))
|
||||
/* check if c is a label terminator */
|
||||
#define IS_LABTERM(c) \
|
||||
(c == QSE_CHAR_EOF || c == QSE_T('#') || \
|
||||
c == QSE_T(';') || IS_WSPACE(c))
|
||||
#define IS_LINTERM(c) (c == QSE_T('\n') || c == QSE_T('\r'))
|
||||
#define IS_WSPACE(c) (IS_SPACE(c) || IS_LINTERM(c))
|
||||
|
||||
static void* compile_regex (qse_sed_t* sed, qse_char_t seof)
|
||||
/* check if c is a label terminator excluding a space character */
|
||||
#define IS_CMDTERM(c) \
|
||||
(c == QSE_CHAR_EOF || c == QSE_T('#') || \
|
||||
c == QSE_T(';') || IS_LINTERM(c))
|
||||
|
||||
static void* compile_regex (qse_sed_t* sed, qse_char_t rxend)
|
||||
{
|
||||
void* code;
|
||||
qse_cint_t c;
|
||||
@ -146,7 +184,7 @@ static void* compile_regex (qse_sed_t* sed, qse_char_t seof)
|
||||
return QSE_NULL;
|
||||
}
|
||||
|
||||
if (c == seof) break;
|
||||
if (c == rxend) break;
|
||||
|
||||
if (c == QSE_T('\\'))
|
||||
{
|
||||
@ -239,7 +277,7 @@ static qse_sed_a_t* address (qse_sed_t* sed, qse_sed_a_t* a)
|
||||
* <newline> in the text shall be preceded by a backslash. Other backslashes
|
||||
* in text shall be removed, and the following character shall be treated
|
||||
* literally. */
|
||||
static qse_str_t* get_text (qse_sed_t* sed, qse_sed_c_t* cmd)
|
||||
static qse_str_t* get_text (qse_sed_t* sed, qse_sed_cmd_t* cmd)
|
||||
{
|
||||
#define ADD(sed,str,c,errlabel) \
|
||||
do { \
|
||||
@ -312,24 +350,31 @@ oops:
|
||||
#undef ADD
|
||||
}
|
||||
|
||||
static int get_label (qse_sed_t* sed, qse_sed_c_t* cmd)
|
||||
static int get_label (qse_sed_t* sed, qse_sed_cmd_t* cmd)
|
||||
{
|
||||
qse_cint_t c;
|
||||
qse_str_t* t = QSE_NULL; /* TODO: move this buffer to sed */
|
||||
|
||||
/* skip white spaces */
|
||||
c = CURSC(sed);
|
||||
c = CURSC (sed);
|
||||
while (IS_SPACE(c)) c = NXTSC (sed);
|
||||
|
||||
if (IS_LABTERM(c))
|
||||
if (IS_CMDTERM(c))
|
||||
{
|
||||
/* label name is empty */
|
||||
sed->errnum = QSE_SED_ELABEM;
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* TODO: change t to qse_str_t t; and ues qse_str_yield(t) to remember
|
||||
* branch text - in that case make '\0' an illegal character for the label
|
||||
* name or can remember the length for the text for '\0' to be legal */
|
||||
t = qse_str_open (sed->mmgr, 0, 32);
|
||||
if (t == QSE_NULL) goto oops;
|
||||
if (t == QSE_NULL)
|
||||
{
|
||||
sed->errnum = QSE_SED_ENOMEM;
|
||||
goto oops;
|
||||
}
|
||||
|
||||
do
|
||||
{
|
||||
@ -338,11 +383,10 @@ static int get_label (qse_sed_t* sed, qse_sed_c_t* cmd)
|
||||
sed->errnum = QSE_SED_ENOMEM;
|
||||
goto oops;
|
||||
}
|
||||
|
||||
c = NXTSC (sed);
|
||||
}
|
||||
while (!IS_LABTERM(c));
|
||||
|
||||
while (!IS_CMDTERM(c) && !IS_SPACE(c));
|
||||
|
||||
if (qse_map_search (
|
||||
&sed->labs, QSE_STR_PTR(t), QSE_STR_LEN(t)) != QSE_NULL)
|
||||
{
|
||||
@ -357,7 +401,10 @@ static int get_label (qse_sed_t* sed, qse_sed_c_t* cmd)
|
||||
goto oops;;
|
||||
}
|
||||
|
||||
ADVSCP (sed);
|
||||
/* the label can be followed by a command on the same line without
|
||||
* a semicolon as in ':label p'. */
|
||||
if (c != QSE_T('#') && c != QSE_CHAR_EOF) ADVSCP (sed);
|
||||
|
||||
qse_str_close (t);
|
||||
return 0;
|
||||
|
||||
@ -366,7 +413,25 @@ oops:
|
||||
return -1;
|
||||
}
|
||||
|
||||
static int get_target_label (qse_sed_t* sed, qse_sed_c_t* cmd)
|
||||
static int terminate_command (qse_sed_t* sed)
|
||||
{
|
||||
qse_cint_t c;
|
||||
|
||||
c = CURSC (sed);
|
||||
while (IS_SPACE(c)) c = NXTSC (sed);
|
||||
if (!IS_CMDTERM(c))
|
||||
{
|
||||
sed->errnum = QSE_SED_ESCEXP;
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* if the target is terminated by #, it should let the caller
|
||||
* to skip the comment text. so don't read in the next character */
|
||||
if (c != QSE_T('#') && c != QSE_CHAR_EOF) ADVSCP (sed);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int get_branch_target (qse_sed_t* sed, qse_sed_cmd_t* cmd)
|
||||
{
|
||||
qse_cint_t c;
|
||||
qse_str_t* t = QSE_NULL;
|
||||
@ -376,15 +441,26 @@ static int get_target_label (qse_sed_t* sed, qse_sed_c_t* cmd)
|
||||
c = CURSC(sed);
|
||||
while (IS_SPACE(c)) c = NXTSC (sed);
|
||||
|
||||
if (IS_LABTERM(c))
|
||||
if (IS_CMDTERM(c))
|
||||
{
|
||||
/* label name is empty */
|
||||
sed->errnum = QSE_SED_ELABEM;
|
||||
return -1;
|
||||
/* no branch target is given -
|
||||
* a branch command without a target should cause
|
||||
* sed to jump to the end of a script.
|
||||
*/
|
||||
cmd->u.branch.text = QSE_NULL;
|
||||
cmd->u.branch.target = QSE_NULL;
|
||||
return terminate_command (sed);
|
||||
}
|
||||
|
||||
/* TODO: change t to qse_str_t t; and ues qse_str_yield(t) to remember
|
||||
* branch text - in that case make '\0' an illegal character for the label
|
||||
* name or can remember the length for the text for '\0' to be legal */
|
||||
t = qse_str_open (sed->mmgr, 0, 32);
|
||||
if (t == QSE_NULL) goto oops;
|
||||
if (t == QSE_NULL)
|
||||
{
|
||||
sed->errnum = QSE_SED_ENOMEM;
|
||||
goto oops;
|
||||
}
|
||||
|
||||
do
|
||||
{
|
||||
@ -396,12 +472,9 @@ static int get_target_label (qse_sed_t* sed, qse_sed_c_t* cmd)
|
||||
|
||||
c = NXTSC (sed);
|
||||
}
|
||||
while (!IS_LABTERM(c));
|
||||
while (!IS_CMDTERM(c) && !IS_SPACE(c));
|
||||
|
||||
/* TODO: what happend for something like b xxx yyy;
|
||||
* SHOULD y be a command? or an error ->
|
||||
* b xxx ; yyy; => should force ; or new line at the end?
|
||||
*/
|
||||
if (terminate_command (sed) == -1) goto oops;
|
||||
|
||||
pair = qse_map_search (&sed->labs, QSE_STR_PTR(t), QSE_STR_LEN(t));
|
||||
if (pair == QSE_NULL)
|
||||
@ -424,12 +497,42 @@ oops:
|
||||
return -1;
|
||||
}
|
||||
|
||||
static int get_trans_set (qse_sed_t* sed, qse_sed_cmd_t* cmd)
|
||||
{
|
||||
qse_str_t* t1 = QSE_NULL;
|
||||
qse_str_t* t2 = QSE_NULL;
|
||||
|
||||
t1 = qse_str_open (sed->mmgr, 0, 32);
|
||||
if (t1 == QSE_NULL)
|
||||
{
|
||||
sed->errnum = QSE_SED_ENOMEM;
|
||||
goto oops;
|
||||
}
|
||||
|
||||
t2 = qse_str_open (sed->mmgr, 0, 32);
|
||||
if (t2 == QSE_NULL)
|
||||
{
|
||||
sed->errnum = QSE_SED_ENOMEM;
|
||||
goto oops;
|
||||
}
|
||||
|
||||
qse_str_close (t2);
|
||||
qse_str_close (t1);
|
||||
|
||||
return 0;
|
||||
|
||||
oops:
|
||||
if (t2 != QSE_NULL) qse_str_close (t2);
|
||||
if (t1 != QSE_NULL) qse_str_close (t1);
|
||||
return -1;
|
||||
}
|
||||
|
||||
static int command (qse_sed_t* sed)
|
||||
{
|
||||
qse_cint_t c;
|
||||
qse_sed_c_t* cmd = sed->cmd.cur;
|
||||
qse_sed_cmd_t* cmd = sed->cmd.cur;
|
||||
|
||||
handle_command:
|
||||
restart:
|
||||
c = CURSC (sed);
|
||||
switch (c)
|
||||
{
|
||||
@ -450,7 +553,7 @@ qse_printf (QSE_T("command not recognized [%c]\n"), c);
|
||||
|
||||
ADVSCP (sed);
|
||||
if (get_label (sed, cmd) == -1) return -1;
|
||||
goto handle_command;
|
||||
goto restart;
|
||||
|
||||
case QSE_T('{'):
|
||||
/* insert a negated branch command at the beginning
|
||||
@ -464,15 +567,6 @@ qse_printf (QSE_T("command not recognized [%c]\n"), c);
|
||||
case QSE_T('}'):
|
||||
break;
|
||||
|
||||
case QSE_T('='):
|
||||
cmd->type = c;
|
||||
if (cmd->a2.type != QSE_SED_A_NONE)
|
||||
{
|
||||
sed->errnum = QSE_SED_EA2PHB;
|
||||
return -1;
|
||||
}
|
||||
break;
|
||||
|
||||
case QSE_T('a'):
|
||||
case QSE_T('i'):
|
||||
case QSE_T('c'):
|
||||
@ -522,9 +616,24 @@ qse_printf (QSE_T("%s%s"), ttt, QSE_STR_PTR(cmd->u.text));
|
||||
}
|
||||
|
||||
case QSE_T('D'):
|
||||
//cmd->u.label = pspace;
|
||||
case QSE_T('d'):
|
||||
cmd->type = c;
|
||||
ADVSCP (sed);
|
||||
if (terminate_command (sed) == -1) return -1;
|
||||
printf ("command %c\n", cmd->type);
|
||||
break;
|
||||
|
||||
case QSE_T('='):
|
||||
cmd->type = c;
|
||||
if (cmd->a2.type != QSE_SED_A_NONE)
|
||||
{
|
||||
sed->errnum = QSE_SED_EA2PHB;
|
||||
return -1;
|
||||
}
|
||||
|
||||
ADVSCP (sed);
|
||||
if (terminate_command (sed) == -1) return -1;
|
||||
printf ("command %c\n", cmd->type);
|
||||
break;
|
||||
|
||||
case QSE_T('h'):
|
||||
@ -538,6 +647,9 @@ qse_printf (QSE_T("%s%s"), ttt, QSE_STR_PTR(cmd->u.text));
|
||||
case QSE_T('P'):
|
||||
case QSE_T('x'):
|
||||
cmd->type = c;
|
||||
ADVSCP (sed);
|
||||
if (terminate_command (sed) == -1) return -1;
|
||||
printf ("command %c\n", cmd->type);
|
||||
break;
|
||||
|
||||
|
||||
@ -545,7 +657,15 @@ qse_printf (QSE_T("%s%s"), ttt, QSE_STR_PTR(cmd->u.text));
|
||||
case QSE_T('t'):
|
||||
cmd->type = c;
|
||||
ADVSCP (sed);
|
||||
if (get_target_label (sed, cmd) == -1) return -1;
|
||||
if (get_branch_target (sed, cmd) == -1) return -1;
|
||||
if (cmd->u.branch.text != NULL)
|
||||
{
|
||||
printf ("cmd->u.branch.text = [%s]\n", cmd->u.branch.text->ptr);
|
||||
}
|
||||
else
|
||||
{
|
||||
printf ("cmd->u.branch.target = [%p]\n", cmd->u.branch.target);
|
||||
}
|
||||
break;
|
||||
|
||||
case QSE_T('r'):
|
||||
@ -571,6 +691,9 @@ qse_printf (QSE_T("%s%s"), ttt, QSE_STR_PTR(cmd->u.text));
|
||||
case QSE_T('s'):
|
||||
break;
|
||||
case QSE_T('y'):
|
||||
cmd->type = c;
|
||||
ADVSCP (sed);
|
||||
if (get_trans_set (sed, cmd) == -1) return -1;
|
||||
break;
|
||||
}
|
||||
|
||||
@ -581,7 +704,7 @@ static int compile_source (
|
||||
qse_sed_t* sed, const qse_char_t* ptr, qse_size_t len)
|
||||
{
|
||||
qse_cint_t c;
|
||||
qse_sed_c_t* cmd = sed->cmd.cur;
|
||||
qse_sed_cmd_t* cmd = sed->cmd.cur;
|
||||
|
||||
/* store the source code pointers */
|
||||
sed->src.ptr = ptr;
|
||||
@ -598,12 +721,18 @@ static int compile_source (
|
||||
while (1)
|
||||
{
|
||||
c = CURSC (sed);
|
||||
|
||||
/* skip white spaces */
|
||||
while (IS_SPACE(c)) c = NXTSC (sed);
|
||||
|
||||
/* skip white spaces and comments*/
|
||||
while (IS_WSPACE(c)) c = NXTSC (sed);
|
||||
if (c == QSE_T('#'))
|
||||
{
|
||||
do c = NXTSC (sed); while (!IS_LINTERM(c));
|
||||
ADVSCP (sed);
|
||||
continue;
|
||||
}
|
||||
|
||||
/* check if it has reached the end or is commented */
|
||||
if (c == QSE_CHAR_EOF || c == QSE_T('#')) break;
|
||||
if (c == QSE_CHAR_EOF) break;
|
||||
|
||||
if (c == QSE_T(';'))
|
||||
{
|
||||
|
@ -44,7 +44,7 @@ struct qse_sed_a_t
|
||||
} u;
|
||||
};
|
||||
|
||||
struct qse_sed_c_t
|
||||
struct qse_sed_cmd_t
|
||||
{
|
||||
qse_sed_a_t a1; /* optional start address */
|
||||
qse_sed_a_t a2; /* optional end address */
|
||||
@ -56,7 +56,7 @@ struct qse_sed_c_t
|
||||
struct
|
||||
{
|
||||
qse_str_t* text;
|
||||
qse_sed_c_t* target;
|
||||
qse_sed_cmd_t* target;
|
||||
} branch;
|
||||
} u;
|
||||
|
||||
@ -65,6 +65,8 @@ struct qse_sed_c_t
|
||||
enum
|
||||
{
|
||||
QSE_SED_CMD_B = QSE_T('b'), /* branch */
|
||||
QSE_SED_CMD_T = QSE_T('t'), /* branch */
|
||||
|
||||
/* print current line number */
|
||||
QSE_SED_CMD_EQ = QSE_T('='), /* print current line number */
|
||||
|
||||
@ -105,6 +107,7 @@ struct qse_sed_c_t
|
||||
QSE_SED_CMD_QQ = QSE_T('Q'),
|
||||
|
||||
QSE_SED_CMD_S = QSE_T('s'),
|
||||
/* y/s/d/ - translate characters in s to characters in d */
|
||||
QSE_SED_CMD_Y = QSE_T('y')
|
||||
|
||||
} type;
|
||||
|
@ -47,8 +47,10 @@ int sed_main (int argc, qse_char_t* argv[])
|
||||
|
||||
if (qse_sed_compile (sed, argv[1], qse_strlen(argv[1])) == -1)
|
||||
{
|
||||
qse_fprintf (QSE_STDERR, QSE_T("cannot compile - %d\n"), sed->errnum);
|
||||
//qse_fprintf (QSE_STDERR, QSE_T("cannot compile - %s\n"), qse_sed_geterrstr(sed));
|
||||
qse_fprintf (QSE_STDERR,
|
||||
QSE_T("cannot compile - %s\n"),
|
||||
qse_sed_geterrmsg(sed)
|
||||
);
|
||||
goto oops;
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user