added branch target processing to sed

This commit is contained in:
hyung-hwan 2009-03-18 08:44:21 +00:00
parent ad5251570f
commit 5819dba1f9
3 changed files with 108 additions and 48 deletions

View File

@ -27,18 +27,19 @@
enum qse_sed_errnum_t enum qse_sed_errnum_t
{ {
QSE_SED_ENOERR, /* no error */ QSE_SED_ENOERR, /* no error */
QSE_SED_ENOMEM, /* no memory */ QSE_SED_ENOMEM, /* no memory */
QSE_SED_ETMTXT, /* too much text */ QSE_SED_ETMTXT, /* too much text */
QSE_SED_ECMDNR, /* command not recognized */ QSE_SED_ECMDNR, /* command not recognized */
QSE_SED_ECMDGB, /* command garbled */ QSE_SED_ECMDGB, /* command garbled */
QSE_SED_EREXBL, /* regular expression build error */ QSE_SED_EREXBL, /* regular expression build error */
QSE_SED_EA1PHB, /* address 1 prohibited */ QSE_SED_EA1PHB, /* address 1 prohibited */
QSE_SED_EA2PHB, /* address 2 prohibited */ QSE_SED_EA2PHB, /* address 2 prohibited */
QSE_SED_ENEWLN, /* a new line is expected */ QSE_SED_ENEWLN, /* a new line is expected */
QSE_SED_EBSEXP, /* \ is expected */ QSE_SED_EBSEXP, /* \ is expected */
QSE_SED_ELABTL, /* label too long */ QSE_SED_ELABTL, /* label too long */
QSE_SED_ELABEM /* label name is empty */ QSE_SED_ELABEM, /* label name is empty */
QSE_SED_ELABDU /* duplicate label name */
}; };
enum qse_sed_option_t enum qse_sed_option_t

View File

@ -122,10 +122,12 @@ int qse_sed_getoption (qse_sed_t* sed)
(((++(sed)->src.cur) < (sed)->src.end)? (*(sed)->src.cur): QSE_CHAR_EOF) (((++(sed)->src.cur) < (sed)->src.end)? (*(sed)->src.cur): QSE_CHAR_EOF)
#define IS_SPACE(c) (c == QSE_T(' ') || c == QSE_T('\t')) #define IS_SPACE(c) (c == QSE_T(' ') || c == QSE_T('\t'))
#define IS_WHITE_SPACE(c) (IS_SPACE(c) || c == QSE_T('\n') || c == QSE_T('\r')) /* check if c is a white space */
#define IS_LABEL_TERMINATOR(c) \ #define IS_WSPACE(c) (IS_SPACE(c) || c == QSE_T('\n') || c == QSE_T('\r'))
/* check if c is a label terminator */
#define IS_LABTERM(c) \
(c == QSE_CHAR_EOF || c == QSE_T('#') || \ (c == QSE_CHAR_EOF || c == QSE_T('#') || \
c == QSE_T(';') || IS_WHITE_SPACE(c)) c == QSE_T(';') || IS_WSPACE(c))
static void* compile_regex (qse_sed_t* sed, qse_char_t seof) static void* compile_regex (qse_sed_t* sed, qse_char_t seof)
{ {
@ -310,7 +312,7 @@ oops:
#undef ADD #undef ADD
} }
static qse_str_t* get_label (qse_sed_t* sed, qse_sed_c_t* cmd) static int get_label (qse_sed_t* sed, qse_sed_c_t* cmd)
{ {
qse_cint_t c; qse_cint_t c;
qse_str_t* t = QSE_NULL; /* TODO: move this buffer to sed */ qse_str_t* t = QSE_NULL; /* TODO: move this buffer to sed */
@ -319,11 +321,11 @@ static qse_str_t* get_label (qse_sed_t* sed, qse_sed_c_t* cmd)
c = CURSC(sed); c = CURSC(sed);
while (IS_SPACE(c)) c = NXTSC (sed); while (IS_SPACE(c)) c = NXTSC (sed);
if (IS_LABEL_TERMINATER(c)) if (IS_LABTERM(c))
{ {
/* label name is empty */ /* label name is empty */
sed->errnum = QSE_SED_ELABEM; sed->errnum = QSE_SED_ELABEM;
return QSE_NULL; return -1;
} }
t = qse_str_open (sed->mmgr, 0, 32); t = qse_str_open (sed->mmgr, 0, 32);
@ -336,33 +338,90 @@ static qse_str_t* get_label (qse_sed_t* sed, qse_sed_c_t* cmd)
sed->errnum = QSE_SED_ENOMEM; sed->errnum = QSE_SED_ENOMEM;
goto oops; goto oops;
} }
}
while (!IS_LABEL_TERMINATOR(c));
/* TODO: */ c = NXTSC (sed);
search_label_table (c); }
qse_map_insert (&sed->labs, QSE_STR_PTR(t), QSE_STR_LEN(t), cmd, 0); while (!IS_LABTERM(c));
if (qse_map_search (
&sed->labs, QSE_STR_PTR(t), QSE_STR_LEN(t)) != QSE_NULL)
{
sed->errnum = QSE_SED_ELABDU;
goto oops;
}
if (qse_map_insert (
&sed->labs, QSE_STR_PTR(t), QSE_STR_LEN(t), cmd, 0) == QSE_NULL)
{
sed->errnum = QSE_SED_ENOMEM;
goto oops;;
}
ADVSCP (sed); ADVSCP (sed);
return t; qse_str_close (t);
return 0;
oops: oops:
if (t != QSE_NULL) qse_str_close (t); if (t != QSE_NULL) qse_str_close (t);
return QSE_NULL; return -1;
} }
static qse_str_t* get_target (qse_sed_t* sed, qse_sed_c_t* cmd) static int get_target_label (qse_sed_t* sed, qse_sed_c_t* cmd)
{ {
qse_cint_t c; qse_cint_t c;
qse_str_t* t = QSE_NULL; qse_str_t* t = QSE_NULL;
qse_map_pair_t* pair;
/* skip white spaces */
c = CURSC(sed);
while (IS_SPACE(c)) c = NXTSC (sed);
if (IS_LABTERM(c))
{
/* label name is empty */
sed->errnum = QSE_SED_ELABEM;
return -1;
}
t = qse_str_open (sed->mmgr, 0, 32); t = qse_str_open (sed->mmgr, 0, 32);
if (t == QSE_NULL) goto oops; if (t == QSE_NULL) goto oops;
return t; do
{
if (qse_str_ccat (t, c) == (qse_size_t)-1)
{
sed->errnum = QSE_SED_ENOMEM;
goto oops;
}
c = NXTSC (sed);
}
while (!IS_LABTERM(c));
/* TODO: what happend for something like b xxx yyy;
* SHOULD y be a command? or an error ->
* b xxx ; yyy; => should force ; or new line at the end?
*/
pair = qse_map_search (&sed->labs, QSE_STR_PTR(t), QSE_STR_LEN(t));
if (pair == QSE_NULL)
{
/* label not resolved yet */
cmd->u.branch.text = t;
cmd->u.branch.target = QSE_NULL;
}
else
{
cmd->u.branch.text = QSE_NULL;
cmd->u.branch.target = QSE_MAP_VPTR(pair);
qse_str_close (t);
}
return 0;
oops: oops:
if (t != QSE_NULL) qse_str_close (t); if (t != QSE_NULL) qse_str_close (t);
return QSE_NULL; return -1;
} }
static int command (qse_sed_t* sed) static int command (qse_sed_t* sed)
@ -370,6 +429,7 @@ static int command (qse_sed_t* sed)
qse_cint_t c; qse_cint_t c;
qse_sed_c_t* cmd = sed->cmd.cur; qse_sed_c_t* cmd = sed->cmd.cur;
handle_command:
c = CURSC (sed); c = CURSC (sed);
switch (c) switch (c)
{ {
@ -378,18 +438,6 @@ qse_printf (QSE_T("command not recognized [%c]\n"), c);
sed->errnum = QSE_SED_ECMDNR; sed->errnum = QSE_SED_ECMDNR;
return -1; return -1;
case QSE_T('{'):
/* insert a negated branch command at the beginning
* of a group. this way, all the commands in a group
* can be skipped. the branch target is set once a
* corresponding } is met. */
cmd->type = QSE_SED_CMD_B;
cmd->negfl = !cmd->negfl;
break;
case QSE_T('}'):
break;
case QSE_T(':'): case QSE_T(':'):
/* label */ /* label */
cmd->type = c; cmd->type = c;
@ -401,8 +449,19 @@ qse_printf (QSE_T("command not recognized [%c]\n"), c);
} }
ADVSCP (sed); ADVSCP (sed);
if (get_label (sed, cmd) == -1) return -1;
goto handle_command;
/* TODO: ... */ case QSE_T('{'):
/* insert a negated branch command at the beginning
* of a group. this way, all the commands in a group
* can be skipped. the branch target is set once a
* corresponding } is met. */
cmd->type = QSE_SED_CMD_B;
cmd->negfl = !cmd->negfl;
break;
case QSE_T('}'):
break; break;
case QSE_T('='): case QSE_T('='):
@ -485,14 +544,10 @@ qse_printf (QSE_T("%s%s"), ttt, QSE_STR_PTR(cmd->u.text));
case QSE_T('b'): case QSE_T('b'):
case QSE_T('t'): case QSE_T('t'):
cmd->type = c; cmd->type = c;
ADVSCP (sed); /* skip the new line */ ADVSCP (sed);
if (get_target_label (sed, cmd) == -1) return -1;
//cmd->u.label = get_target (sed, cmd);
if (cmd->u.label == QSE_NULL) return -1;
break; break;
case QSE_T('r'): case QSE_T('r'):
break; break;
case QSE_T('R'): case QSE_T('R'):

View File

@ -53,7 +53,11 @@ struct qse_sed_c_t
{ {
qse_str_t* text; qse_str_t* text;
void* rex; void* rex;
qse_sed_c_t* label; /* branch destination */ struct
{
qse_str_t* text;
qse_sed_c_t* target;
} branch;
} u; } u;
qse_char_t* rhs; /* right-hand side of sustitution */ qse_char_t* rhs; /* right-hand side of sustitution */