changed code to compile more command codes in sed

This commit is contained in:
hyung-hwan 2009-03-19 02:20:33 +00:00
parent 5819dba1f9
commit f48ae81fa6
4 changed files with 200 additions and 56 deletions

View File

@ -37,6 +37,7 @@ enum qse_sed_errnum_t
QSE_SED_EA2PHB, /* address 2 prohibited */ QSE_SED_EA2PHB, /* address 2 prohibited */
QSE_SED_ENEWLN, /* a new line is expected */ QSE_SED_ENEWLN, /* a new line is expected */
QSE_SED_EBSEXP, /* \ is expected */ QSE_SED_EBSEXP, /* \ is expected */
QSE_SED_ESCEXP, /* ; is expected */
QSE_SED_ELABTL, /* label too long */ QSE_SED_ELABTL, /* label too long */
QSE_SED_ELABEM, /* label name is empty */ QSE_SED_ELABEM, /* label name is empty */
QSE_SED_ELABDU /* duplicate label name */ QSE_SED_ELABDU /* duplicate label name */
@ -50,7 +51,7 @@ enum qse_sed_option_t
}; };
typedef struct qse_sed_t qse_sed_t; typedef struct qse_sed_t qse_sed_t;
typedef struct qse_sed_c_t qse_sed_c_t; /* command */ typedef struct qse_sed_cmd_t qse_sed_cmd_t; /* command */
typedef enum qse_sed_errnum_t qse_sed_errnum_t; typedef enum qse_sed_errnum_t qse_sed_errnum_t;
struct qse_sed_t struct qse_sed_t
@ -74,9 +75,9 @@ struct qse_sed_t
/*qse_lda_t cmds;*/ /*qse_lda_t cmds;*/
struct struct
{ {
qse_sed_c_t* buf; qse_sed_cmd_t* buf;
qse_sed_c_t* end; qse_sed_cmd_t* end;
qse_sed_c_t* cur; qse_sed_cmd_t* cur;
} cmd; } cmd;
qse_map_t labs; /* label map */ qse_map_t labs; /* label map */
@ -152,6 +153,15 @@ void qse_sed_setoption (
); );
/*****/ /*****/
/****f* Text Processor/qse_sed_geterrmsg
* NAME
* qse_sed_geterrmsg - get an error message
* SYNOPSIS
*/
const qse_char_t* qse_sed_geterrmsg (
qse_sed_t* sed
);
/******/
int qse_sed_compile ( int qse_sed_compile (
qse_sed_t* sed, qse_sed_t* sed,

View File

@ -84,7 +84,7 @@ qse_sed_t* qse_sed_init (qse_sed_t* sed, qse_mmgr_t* mmgr)
/* TODO: use different data structure... */ /* TODO: use different data structure... */
sed->cmd.buf = QSE_MMGR_ALLOC ( sed->cmd.buf = QSE_MMGR_ALLOC (
sed->mmgr, QSE_SIZEOF(qse_sed_c_t) * 1000); sed->mmgr, QSE_SIZEOF(qse_sed_cmd_t) * 1000);
if (sed->cmd.buf == QSE_NULL) if (sed->cmd.buf == QSE_NULL)
{ {
qse_map_fini (&sed->labs); qse_map_fini (&sed->labs);
@ -99,10 +99,46 @@ qse_sed_t* qse_sed_init (qse_sed_t* sed, qse_mmgr_t* mmgr)
void qse_sed_fini (qse_sed_t* sed) void qse_sed_fini (qse_sed_t* sed)
{ {
/* TODO: use different data sturect -> look at qse_sed_init */
qse_sed_cmd_t* c;
for (c = sed->cmd.buf; c != sed->cmd.cur; c++)
{
if (c->type == QSE_SED_CMD_B || c->type == QSE_SED_CMD_T)
{
if (c->u.branch.text != QSE_NULL)
qse_str_close (c->u.branch.text);
}
}
QSE_MMGR_FREE (sed->mmgr, sed->cmd.buf);
qse_map_fini (&sed->labs); qse_map_fini (&sed->labs);
qse_str_fini (&sed->rexbuf); qse_str_fini (&sed->rexbuf);
} }
const qse_char_t* qse_sed_geterrmsg (qse_sed_t* sed)
{
static const qse_char_t* errmsg[] =
{
QSE_T("no error"),
QSE_T("out of memory"),
QSE_T("too much text"),
QSE_T("command not recognized"),
QSE_T("command garbled"),
QSE_T("regular expression build error"),
QSE_T("address 1 prohibited"),
QSE_T("address 2 prohibited"),
QSE_T("a new line expected"),
QSE_T("a backslash expected"),
QSE_T("a semicolon expected"),
QSE_T("label name too long"),
QSE_T("empty label name"),
QSE_T("duplicate label name")
};
return (sed->errnum > 0 && sed->errnum < QSE_COUNTOF(errmsg))?
errmsg[sed->errnum]: QSE_T("unknown error");
}
void qse_sed_setoption (qse_sed_t* sed, int option) void qse_sed_setoption (qse_sed_t* sed, int option)
{ {
sed->option = option; sed->option = option;
@ -121,15 +157,17 @@ int qse_sed_getoption (qse_sed_t* sed)
#define NXTSC(sed) \ #define NXTSC(sed) \
(((++(sed)->src.cur) < (sed)->src.end)? (*(sed)->src.cur): QSE_CHAR_EOF) (((++(sed)->src.cur) < (sed)->src.end)? (*(sed)->src.cur): QSE_CHAR_EOF)
/* check if c is a space character */
#define IS_SPACE(c) (c == QSE_T(' ') || c == QSE_T('\t')) #define IS_SPACE(c) (c == QSE_T(' ') || c == QSE_T('\t'))
/* check if c is a white space */ #define IS_LINTERM(c) (c == QSE_T('\n') || c == QSE_T('\r'))
#define IS_WSPACE(c) (IS_SPACE(c) || c == QSE_T('\n') || c == QSE_T('\r')) #define IS_WSPACE(c) (IS_SPACE(c) || IS_LINTERM(c))
/* check if c is a label terminator */
#define IS_LABTERM(c) \
(c == QSE_CHAR_EOF || c == QSE_T('#') || \
c == QSE_T(';') || IS_WSPACE(c))
static void* compile_regex (qse_sed_t* sed, qse_char_t seof) /* check if c is a label terminator excluding a space character */
#define IS_CMDTERM(c) \
(c == QSE_CHAR_EOF || c == QSE_T('#') || \
c == QSE_T(';') || IS_LINTERM(c))
static void* compile_regex (qse_sed_t* sed, qse_char_t rxend)
{ {
void* code; void* code;
qse_cint_t c; qse_cint_t c;
@ -146,7 +184,7 @@ static void* compile_regex (qse_sed_t* sed, qse_char_t seof)
return QSE_NULL; return QSE_NULL;
} }
if (c == seof) break; if (c == rxend) break;
if (c == QSE_T('\\')) if (c == QSE_T('\\'))
{ {
@ -239,7 +277,7 @@ static qse_sed_a_t* address (qse_sed_t* sed, qse_sed_a_t* a)
* <newline> in the text shall be preceded by a backslash. Other backslashes * <newline> in the text shall be preceded by a backslash. Other backslashes
* in text shall be removed, and the following character shall be treated * in text shall be removed, and the following character shall be treated
* literally. */ * literally. */
static qse_str_t* get_text (qse_sed_t* sed, qse_sed_c_t* cmd) static qse_str_t* get_text (qse_sed_t* sed, qse_sed_cmd_t* cmd)
{ {
#define ADD(sed,str,c,errlabel) \ #define ADD(sed,str,c,errlabel) \
do { \ do { \
@ -312,24 +350,31 @@ oops:
#undef ADD #undef ADD
} }
static int get_label (qse_sed_t* sed, qse_sed_c_t* cmd) static int get_label (qse_sed_t* sed, qse_sed_cmd_t* cmd)
{ {
qse_cint_t c; qse_cint_t c;
qse_str_t* t = QSE_NULL; /* TODO: move this buffer to sed */ qse_str_t* t = QSE_NULL; /* TODO: move this buffer to sed */
/* skip white spaces */ /* skip white spaces */
c = CURSC(sed); c = CURSC (sed);
while (IS_SPACE(c)) c = NXTSC (sed); while (IS_SPACE(c)) c = NXTSC (sed);
if (IS_LABTERM(c)) if (IS_CMDTERM(c))
{ {
/* label name is empty */ /* label name is empty */
sed->errnum = QSE_SED_ELABEM; sed->errnum = QSE_SED_ELABEM;
return -1; return -1;
} }
/* TODO: change t to qse_str_t t; and ues qse_str_yield(t) to remember
* branch text - in that case make '\0' an illegal character for the label
* name or can remember the length for the text for '\0' to be legal */
t = qse_str_open (sed->mmgr, 0, 32); t = qse_str_open (sed->mmgr, 0, 32);
if (t == QSE_NULL) goto oops; if (t == QSE_NULL)
{
sed->errnum = QSE_SED_ENOMEM;
goto oops;
}
do do
{ {
@ -338,10 +383,9 @@ static int get_label (qse_sed_t* sed, qse_sed_c_t* cmd)
sed->errnum = QSE_SED_ENOMEM; sed->errnum = QSE_SED_ENOMEM;
goto oops; goto oops;
} }
c = NXTSC (sed); c = NXTSC (sed);
} }
while (!IS_LABTERM(c)); while (!IS_CMDTERM(c) && !IS_SPACE(c));
if (qse_map_search ( if (qse_map_search (
&sed->labs, QSE_STR_PTR(t), QSE_STR_LEN(t)) != QSE_NULL) &sed->labs, QSE_STR_PTR(t), QSE_STR_LEN(t)) != QSE_NULL)
@ -357,7 +401,10 @@ static int get_label (qse_sed_t* sed, qse_sed_c_t* cmd)
goto oops;; goto oops;;
} }
ADVSCP (sed); /* the label can be followed by a command on the same line without
* a semicolon as in ':label p'. */
if (c != QSE_T('#') && c != QSE_CHAR_EOF) ADVSCP (sed);
qse_str_close (t); qse_str_close (t);
return 0; return 0;
@ -366,7 +413,25 @@ oops:
return -1; return -1;
} }
static int get_target_label (qse_sed_t* sed, qse_sed_c_t* cmd) static int terminate_command (qse_sed_t* sed)
{
qse_cint_t c;
c = CURSC (sed);
while (IS_SPACE(c)) c = NXTSC (sed);
if (!IS_CMDTERM(c))
{
sed->errnum = QSE_SED_ESCEXP;
return -1;
}
/* if the target is terminated by #, it should let the caller
* to skip the comment text. so don't read in the next character */
if (c != QSE_T('#') && c != QSE_CHAR_EOF) ADVSCP (sed);
return 0;
}
static int get_branch_target (qse_sed_t* sed, qse_sed_cmd_t* cmd)
{ {
qse_cint_t c; qse_cint_t c;
qse_str_t* t = QSE_NULL; qse_str_t* t = QSE_NULL;
@ -376,15 +441,26 @@ static int get_target_label (qse_sed_t* sed, qse_sed_c_t* cmd)
c = CURSC(sed); c = CURSC(sed);
while (IS_SPACE(c)) c = NXTSC (sed); while (IS_SPACE(c)) c = NXTSC (sed);
if (IS_LABTERM(c)) if (IS_CMDTERM(c))
{ {
/* label name is empty */ /* no branch target is given -
sed->errnum = QSE_SED_ELABEM; * a branch command without a target should cause
return -1; * sed to jump to the end of a script.
*/
cmd->u.branch.text = QSE_NULL;
cmd->u.branch.target = QSE_NULL;
return terminate_command (sed);
} }
/* TODO: change t to qse_str_t t; and ues qse_str_yield(t) to remember
* branch text - in that case make '\0' an illegal character for the label
* name or can remember the length for the text for '\0' to be legal */
t = qse_str_open (sed->mmgr, 0, 32); t = qse_str_open (sed->mmgr, 0, 32);
if (t == QSE_NULL) goto oops; if (t == QSE_NULL)
{
sed->errnum = QSE_SED_ENOMEM;
goto oops;
}
do do
{ {
@ -396,12 +472,9 @@ static int get_target_label (qse_sed_t* sed, qse_sed_c_t* cmd)
c = NXTSC (sed); c = NXTSC (sed);
} }
while (!IS_LABTERM(c)); while (!IS_CMDTERM(c) && !IS_SPACE(c));
/* TODO: what happend for something like b xxx yyy; if (terminate_command (sed) == -1) goto oops;
* SHOULD y be a command? or an error ->
* b xxx ; yyy; => should force ; or new line at the end?
*/
pair = qse_map_search (&sed->labs, QSE_STR_PTR(t), QSE_STR_LEN(t)); pair = qse_map_search (&sed->labs, QSE_STR_PTR(t), QSE_STR_LEN(t));
if (pair == QSE_NULL) if (pair == QSE_NULL)
@ -424,12 +497,42 @@ oops:
return -1; return -1;
} }
static int get_trans_set (qse_sed_t* sed, qse_sed_cmd_t* cmd)
{
qse_str_t* t1 = QSE_NULL;
qse_str_t* t2 = QSE_NULL;
t1 = qse_str_open (sed->mmgr, 0, 32);
if (t1 == QSE_NULL)
{
sed->errnum = QSE_SED_ENOMEM;
goto oops;
}
t2 = qse_str_open (sed->mmgr, 0, 32);
if (t2 == QSE_NULL)
{
sed->errnum = QSE_SED_ENOMEM;
goto oops;
}
qse_str_close (t2);
qse_str_close (t1);
return 0;
oops:
if (t2 != QSE_NULL) qse_str_close (t2);
if (t1 != QSE_NULL) qse_str_close (t1);
return -1;
}
static int command (qse_sed_t* sed) static int command (qse_sed_t* sed)
{ {
qse_cint_t c; qse_cint_t c;
qse_sed_c_t* cmd = sed->cmd.cur; qse_sed_cmd_t* cmd = sed->cmd.cur;
handle_command: restart:
c = CURSC (sed); c = CURSC (sed);
switch (c) switch (c)
{ {
@ -450,7 +553,7 @@ qse_printf (QSE_T("command not recognized [%c]\n"), c);
ADVSCP (sed); ADVSCP (sed);
if (get_label (sed, cmd) == -1) return -1; if (get_label (sed, cmd) == -1) return -1;
goto handle_command; goto restart;
case QSE_T('{'): case QSE_T('{'):
/* insert a negated branch command at the beginning /* insert a negated branch command at the beginning
@ -464,15 +567,6 @@ qse_printf (QSE_T("command not recognized [%c]\n"), c);
case QSE_T('}'): case QSE_T('}'):
break; break;
case QSE_T('='):
cmd->type = c;
if (cmd->a2.type != QSE_SED_A_NONE)
{
sed->errnum = QSE_SED_EA2PHB;
return -1;
}
break;
case QSE_T('a'): case QSE_T('a'):
case QSE_T('i'): case QSE_T('i'):
case QSE_T('c'): case QSE_T('c'):
@ -522,9 +616,24 @@ qse_printf (QSE_T("%s%s"), ttt, QSE_STR_PTR(cmd->u.text));
} }
case QSE_T('D'): case QSE_T('D'):
//cmd->u.label = pspace;
case QSE_T('d'): case QSE_T('d'):
cmd->type = c; cmd->type = c;
ADVSCP (sed);
if (terminate_command (sed) == -1) return -1;
printf ("command %c\n", cmd->type);
break;
case QSE_T('='):
cmd->type = c;
if (cmd->a2.type != QSE_SED_A_NONE)
{
sed->errnum = QSE_SED_EA2PHB;
return -1;
}
ADVSCP (sed);
if (terminate_command (sed) == -1) return -1;
printf ("command %c\n", cmd->type);
break; break;
case QSE_T('h'): case QSE_T('h'):
@ -538,6 +647,9 @@ qse_printf (QSE_T("%s%s"), ttt, QSE_STR_PTR(cmd->u.text));
case QSE_T('P'): case QSE_T('P'):
case QSE_T('x'): case QSE_T('x'):
cmd->type = c; cmd->type = c;
ADVSCP (sed);
if (terminate_command (sed) == -1) return -1;
printf ("command %c\n", cmd->type);
break; break;
@ -545,7 +657,15 @@ qse_printf (QSE_T("%s%s"), ttt, QSE_STR_PTR(cmd->u.text));
case QSE_T('t'): case QSE_T('t'):
cmd->type = c; cmd->type = c;
ADVSCP (sed); ADVSCP (sed);
if (get_target_label (sed, cmd) == -1) return -1; if (get_branch_target (sed, cmd) == -1) return -1;
if (cmd->u.branch.text != NULL)
{
printf ("cmd->u.branch.text = [%s]\n", cmd->u.branch.text->ptr);
}
else
{
printf ("cmd->u.branch.target = [%p]\n", cmd->u.branch.target);
}
break; break;
case QSE_T('r'): case QSE_T('r'):
@ -571,6 +691,9 @@ qse_printf (QSE_T("%s%s"), ttt, QSE_STR_PTR(cmd->u.text));
case QSE_T('s'): case QSE_T('s'):
break; break;
case QSE_T('y'): case QSE_T('y'):
cmd->type = c;
ADVSCP (sed);
if (get_trans_set (sed, cmd) == -1) return -1;
break; break;
} }
@ -581,7 +704,7 @@ static int compile_source (
qse_sed_t* sed, const qse_char_t* ptr, qse_size_t len) qse_sed_t* sed, const qse_char_t* ptr, qse_size_t len)
{ {
qse_cint_t c; qse_cint_t c;
qse_sed_c_t* cmd = sed->cmd.cur; qse_sed_cmd_t* cmd = sed->cmd.cur;
/* store the source code pointers */ /* store the source code pointers */
sed->src.ptr = ptr; sed->src.ptr = ptr;
@ -599,11 +722,17 @@ static int compile_source (
{ {
c = CURSC (sed); c = CURSC (sed);
/* skip white spaces */ /* skip white spaces and comments*/
while (IS_SPACE(c)) c = NXTSC (sed); while (IS_WSPACE(c)) c = NXTSC (sed);
if (c == QSE_T('#'))
{
do c = NXTSC (sed); while (!IS_LINTERM(c));
ADVSCP (sed);
continue;
}
/* check if it has reached the end or is commented */ /* check if it has reached the end or is commented */
if (c == QSE_CHAR_EOF || c == QSE_T('#')) break; if (c == QSE_CHAR_EOF) break;
if (c == QSE_T(';')) if (c == QSE_T(';'))
{ {

View File

@ -44,7 +44,7 @@ struct qse_sed_a_t
} u; } u;
}; };
struct qse_sed_c_t struct qse_sed_cmd_t
{ {
qse_sed_a_t a1; /* optional start address */ qse_sed_a_t a1; /* optional start address */
qse_sed_a_t a2; /* optional end address */ qse_sed_a_t a2; /* optional end address */
@ -56,7 +56,7 @@ struct qse_sed_c_t
struct struct
{ {
qse_str_t* text; qse_str_t* text;
qse_sed_c_t* target; qse_sed_cmd_t* target;
} branch; } branch;
} u; } u;
@ -65,6 +65,8 @@ struct qse_sed_c_t
enum enum
{ {
QSE_SED_CMD_B = QSE_T('b'), /* branch */ QSE_SED_CMD_B = QSE_T('b'), /* branch */
QSE_SED_CMD_T = QSE_T('t'), /* branch */
/* print current line number */ /* print current line number */
QSE_SED_CMD_EQ = QSE_T('='), /* print current line number */ QSE_SED_CMD_EQ = QSE_T('='), /* print current line number */
@ -105,6 +107,7 @@ struct qse_sed_c_t
QSE_SED_CMD_QQ = QSE_T('Q'), QSE_SED_CMD_QQ = QSE_T('Q'),
QSE_SED_CMD_S = QSE_T('s'), QSE_SED_CMD_S = QSE_T('s'),
/* y/s/d/ - translate characters in s to characters in d */
QSE_SED_CMD_Y = QSE_T('y') QSE_SED_CMD_Y = QSE_T('y')
} type; } type;

View File

@ -47,8 +47,10 @@ int sed_main (int argc, qse_char_t* argv[])
if (qse_sed_compile (sed, argv[1], qse_strlen(argv[1])) == -1) if (qse_sed_compile (sed, argv[1], qse_strlen(argv[1])) == -1)
{ {
qse_fprintf (QSE_STDERR, QSE_T("cannot compile - %d\n"), sed->errnum); qse_fprintf (QSE_STDERR,
//qse_fprintf (QSE_STDERR, QSE_T("cannot compile - %s\n"), qse_sed_geterrstr(sed)); QSE_T("cannot compile - %s\n"),
qse_sed_geterrmsg(sed)
);
goto oops; goto oops;
} }