fixed a bug in handling a regular expression starting with a backslash.

- a regular expression like /\// could not be handled properly without 
this fix
This commit is contained in:
hyung-hwan 2009-03-26 08:53:10 +00:00
parent ce46d8f641
commit ba8bd06016
4 changed files with 79 additions and 27 deletions

View File

@ -37,6 +37,7 @@ enum qse_sed_errnum_t
QSE_SED_EA2PHB, /* address 2 prohibited */ QSE_SED_EA2PHB, /* address 2 prohibited */
QSE_SED_ENEWLN, /* a new line is expected */ QSE_SED_ENEWLN, /* a new line is expected */
QSE_SED_EBSEXP, /* \ is expected */ QSE_SED_EBSEXP, /* \ is expected */
QSE_SED_EBSDEL, /* \ used a delimiter */
QSE_SED_EGBABS, /* garbage after \ */ QSE_SED_EGBABS, /* garbage after \ */
QSE_SED_ESCEXP, /* ; is expected */ QSE_SED_ESCEXP, /* ; is expected */
QSE_SED_ELABTL, /* label too long */ QSE_SED_ELABTL, /* label too long */
@ -44,7 +45,7 @@ enum qse_sed_errnum_t
QSE_SED_ELABDU, /* duplicate label name */ QSE_SED_ELABDU, /* duplicate label name */
QSE_SED_EFILEM, /* file name is empty */ QSE_SED_EFILEM, /* file name is empty */
QSE_SED_EFILIL, /* illegal file name */ QSE_SED_EFILIL, /* illegal file name */
QSE_SED_ETSNTR, /* translation set not terminated */ QSE_SED_ENOTRM, /* not terminated properly */
QSE_SED_ETSNSL, /* translation set not the same length*/ QSE_SED_ETSNSL, /* translation set not the same length*/
QSE_SED_EGRNBA, /* group brackets not balanced */ QSE_SED_EGRNBA, /* group brackets not balanced */
QSE_SED_EGRNTD /* group nested too deeply */ QSE_SED_EGRNTD /* group nested too deeply */

View File

@ -1,5 +1,5 @@
/* /*
* $Id: err.c 75 2009-02-22 14:10:34Z hyunghwan.chung $ * $Id: err.c 113 2009-03-25 14:53:10Z hyunghwan.chung $
* *
Copyright 2006-2009 Chung, Hyung-Hwan. Copyright 2006-2009 Chung, Hyung-Hwan.
@ -65,7 +65,7 @@ static const qse_char_t* __geterrstr (int errnum)
QSE_T("unexpected end of source"), QSE_T("unexpected end of source"),
QSE_T("a comment not closed properly"), QSE_T("a comment not closed properly"),
QSE_T("a string not closed with a quote"), QSE_T("a string or a regular expression not closed"),
QSE_T("unexpected end of a regular expression"), QSE_T("unexpected end of a regular expression"),
QSE_T("a left brace expected in place of '${0}'"), QSE_T("a left brace expected in place of '${0}'"),
QSE_T("a left parenthesis expected in place of '${0}'"), QSE_T("a left parenthesis expected in place of '${0}'"),

View File

@ -1,5 +1,5 @@
/* /*
* $Id: parse.c 85 2009-02-26 10:56:12Z hyunghwan.chung $ * $Id: parse.c 113 2009-03-25 14:53:10Z hyunghwan.chung $
* *
Copyright 2006-2009 Chung, Hyung-Hwan. Copyright 2006-2009 Chung, Hyung-Hwan.
@ -220,7 +220,7 @@ static int get_charstr (qse_awk_t* awk);
static int get_rexstr (qse_awk_t* awk); static int get_rexstr (qse_awk_t* awk);
static int get_string ( static int get_string (
qse_awk_t* awk, qse_char_t end_char, qse_awk_t* awk, qse_char_t end_char,
qse_char_t esc_char, qse_bool_t keep_esc_char); qse_char_t esc_char, qse_bool_t keep_esc_char, int preescaped);
static int get_char (qse_awk_t* awk); static int get_char (qse_awk_t* awk);
static int unget_char (qse_awk_t* awk, qse_cint_t c); static int unget_char (qse_awk_t* awk, qse_cint_t c);
static int skip_spaces (qse_awk_t* awk); static int skip_spaces (qse_awk_t* awk);
@ -2930,7 +2930,9 @@ static qse_awk_nde_t* parse_primary (qse_awk_t* awk, qse_size_t line)
int errnum; int errnum;
/* the regular expression is tokenized here because /* the regular expression is tokenized here because
* of the context-sensitivity of the slash symbol */ * of the context-sensitivity of the slash symbol.
* if TOKEN_DIV is seen as a primary, it tries to compile
* it as a regular expression */
SET_TOKEN_TYPE (awk, TOKEN_REX); SET_TOKEN_TYPE (awk, TOKEN_REX);
qse_str_clear (awk->token.name); qse_str_clear (awk->token.name);
@ -4567,7 +4569,6 @@ static qse_awk_nde_t* parse_print (qse_awk_t* awk, qse_size_t line, int type)
return (qse_awk_nde_t*)nde; return (qse_awk_nde_t*)nde;
} }
static int get_token (qse_awk_t* awk) static int get_token (qse_awk_t* awk)
{ {
qse_cint_t c; qse_cint_t c;
@ -5091,7 +5092,7 @@ static int get_charstr (qse_awk_t* awk)
* has been called */ * has been called */
ADD_TOKEN_CHAR (awk, awk->src.lex.curc); ADD_TOKEN_CHAR (awk, awk->src.lex.curc);
} }
return get_string (awk, QSE_T('\"'), QSE_T('\\'), QSE_FALSE); return get_string (awk, QSE_T('\"'), QSE_T('\\'), QSE_FALSE, 0);
} }
static int get_rexstr (qse_awk_t* awk) static int get_rexstr (qse_awk_t* awk)
@ -5099,23 +5100,44 @@ static int get_rexstr (qse_awk_t* awk)
if (awk->src.lex.curc == QSE_T('/')) if (awk->src.lex.curc == QSE_T('/'))
{ {
/* this part of the function is different from get_charstr /* this part of the function is different from get_charstr
* because of the way this function is called */ * because of the way this function is called.
* this condition is met when the input is //.
* the first / has been tokenized to TOKEN_DIV already.
* if TOKEN_DIV is seen as a primary, this function is called.
* as the token buffer has been cleared by the caller and
* the token type is set to TOKEN_REX, this function can
* just return after reading the next character */
GET_CHAR (awk); GET_CHAR (awk);
return 0; return 0;
} }
else else
{ {
int escaped = 0;
if (awk->src.lex.curc == QSE_T('\\'))
{
/* for input like /\//, this condition is met.
* the initial escape character is added when the
* second charater is handled in get_string() */
escaped = 1;
}
else
{
/* add other initial characters here as get_string()
* begins with reading the next character */
ADD_TOKEN_CHAR (awk, awk->src.lex.curc); ADD_TOKEN_CHAR (awk, awk->src.lex.curc);
return get_string (awk, QSE_T('/'), QSE_T('\\'), QSE_TRUE); }
return get_string (awk,
QSE_T('/'), QSE_T('\\'), QSE_TRUE, escaped);
} }
} }
static int get_string ( static int get_string (
qse_awk_t* awk, qse_char_t end_char, qse_awk_t* awk, qse_char_t end_char,
qse_char_t esc_char, qse_bool_t keep_esc_char) qse_char_t esc_char, qse_bool_t keep_esc_char,
int preescaped)
{ {
qse_cint_t c; qse_cint_t c;
int escaped = 0; int escaped = preescaped;
int digit_count = 0; int digit_count = 0;
qse_cint_t c_acc = 0; qse_cint_t c_acc = 0;

View File

@ -152,6 +152,7 @@ const qse_char_t* qse_sed_geterrmsg (qse_sed_t* sed)
QSE_T("address 2 prohibited"), QSE_T("address 2 prohibited"),
QSE_T("a new line expected"), QSE_T("a new line expected"),
QSE_T("a backslash expected"), QSE_T("a backslash expected"),
QSE_T("a backslash used as a delimiter"),
QSE_T("garbage after a backslash"), QSE_T("garbage after a backslash"),
QSE_T("a semicolon expected"), QSE_T("a semicolon expected"),
QSE_T("label name too long"), QSE_T("label name too long"),
@ -159,7 +160,7 @@ const qse_char_t* qse_sed_geterrmsg (qse_sed_t* sed)
QSE_T("duplicate label name"), QSE_T("duplicate label name"),
QSE_T("empty file name"), QSE_T("empty file name"),
QSE_T("illegal file name"), QSE_T("illegal file name"),
QSE_T("translation set not terminated"), QSE_T("command not terminated properly"),
QSE_T("strings in translation set not the same length"), QSE_T("strings in translation set not the same length"),
QSE_T("group brackets not balanced"), QSE_T("group brackets not balanced"),
QSE_T("group nesting too deep") QSE_T("group nesting too deep")
@ -227,7 +228,6 @@ static void* compile_regex (qse_sed_t* sed, qse_char_t rxend)
} }
if (c == QSE_T('n')) c = QSE_T('\n'); if (c == QSE_T('n')) c = QSE_T('\n');
else if (c == QSE_T('r')) c = QSE_T('\r');
// TODO: support more escaped characters?? // TODO: support more escaped characters??
} }
@ -576,7 +576,6 @@ static int get_file_name (qse_sed_t* sed, qse_sed_cmd_t* cmd)
} }
if (c == QSE_T('n')) c = QSE_T('\n'); if (c == QSE_T('n')) c = QSE_T('\n');
else if (c == QSE_T('r')) c = QSE_T('\r');
} }
if (qse_str_ccat (t, c) == (qse_size_t)-1) if (qse_str_ccat (t, c) == (qse_size_t)-1)
@ -613,15 +612,16 @@ static int get_subst (qse_sed_t* sed, qse_sed_cmd_t* cmd)
c = CURSC (sed); c = CURSC (sed);
if (c == QSE_CHAR_EOF || IS_LINTERM(c)) if (c == QSE_CHAR_EOF || IS_LINTERM(c))
{ {
//sed->errnum = QSE_SED_ESUNTR; /* not terminated properly */
sed->errnum = QSE_SED_ENOTRM;
goto oops; goto oops;
} }
delim = c; delim = c;
if (delim == QSE_T('\\')) if (delim == QSE_T('\\'))
{ {
/* illegal delimiter */ /* backspace is an illegal delimiter */
//sed->errnum = QSE_SED_ESUILD; sed->errnum = QSE_SED_EBSDEL;
goto oops; goto oops;
} }
@ -635,6 +635,31 @@ static int get_subst (qse_sed_t* sed, qse_sed_cmd_t* cmd)
c = NXTSC (sed); c = NXTSC (sed);
while (c != delim) while (c != delim)
{ {
if (c == QSE_CHAR_EOF || IS_LINTERM(c))
{
sed->errnum = QSE_SED_ENOTRM;
goto oops;
}
if (c == QSE_T('\\'))
{
c = NXTSC (sed);
if (c == QSE_CHAR_EOF || IS_LINTERM(c))
{
sed->errnum = QSE_SED_ENOTRM;
goto oops;
}
if (c == QSE_T('n')) c = QSE_T('\n');
}
if (qse_str_ccat (t, c) == (qse_size_t)-1)
{
sed->errnum = QSE_SED_ENOMEM;
goto oops;
}
c = NXTSC (sed);
} }
oops: oops:
@ -652,11 +677,17 @@ static int get_transet (qse_sed_t* sed, qse_sed_cmd_t* cmd)
if (c == QSE_CHAR_EOF || IS_LINTERM(c)) if (c == QSE_CHAR_EOF || IS_LINTERM(c))
{ {
/* translation set terminated prematurely*/ /* translation set terminated prematurely*/
sed->errnum = QSE_SED_ETSNTR; sed->errnum = QSE_SED_ENOTRM;
goto oops; goto oops;
} }
delim = c; delim = c;
if (delim == QSE_T('\\'))
{
/* backspace is an illegal delimiter */
sed->errnum = QSE_SED_EBSDEL;
goto oops;
}
t = qse_str_open (sed->mmgr, 0, 32); t = qse_str_open (sed->mmgr, 0, 32);
if (t == QSE_NULL) if (t == QSE_NULL)
@ -672,7 +703,7 @@ static int get_transet (qse_sed_t* sed, qse_sed_cmd_t* cmd)
if (c == QSE_CHAR_EOF || IS_LINTERM(c)) if (c == QSE_CHAR_EOF || IS_LINTERM(c))
{ {
sed->errnum = QSE_SED_ETSNTR; sed->errnum = QSE_SED_ENOTRM;
goto oops; goto oops;
} }
@ -681,12 +712,11 @@ static int get_transet (qse_sed_t* sed, qse_sed_cmd_t* cmd)
c = NXTSC (sed); c = NXTSC (sed);
if (c == QSE_CHAR_EOF || IS_LINTERM(c)) if (c == QSE_CHAR_EOF || IS_LINTERM(c))
{ {
sed->errnum = QSE_SED_ETSNTR; sed->errnum = QSE_SED_ENOTRM;
goto oops; goto oops;
} }
if (c == QSE_T('n')) c = QSE_T('\n'); if (c == QSE_T('n')) c = QSE_T('\n');
else if (c == QSE_T('r')) c = QSE_T('\r');
} }
b[0] = c; b[0] = c;
@ -704,7 +734,7 @@ static int get_transet (qse_sed_t* sed, qse_sed_cmd_t* cmd)
{ {
if (c == QSE_CHAR_EOF || IS_LINTERM(c)) if (c == QSE_CHAR_EOF || IS_LINTERM(c))
{ {
sed->errnum = QSE_SED_ETSNTR; sed->errnum = QSE_SED_ENOTRM;
goto oops; goto oops;
} }
@ -713,12 +743,11 @@ static int get_transet (qse_sed_t* sed, qse_sed_cmd_t* cmd)
c = NXTSC (sed); c = NXTSC (sed);
if (c == QSE_CHAR_EOF || IS_LINTERM(c)) if (c == QSE_CHAR_EOF || IS_LINTERM(c))
{ {
sed->errnum = QSE_SED_ETSNTR; sed->errnum = QSE_SED_ENOTRM;
goto oops; goto oops;
} }
if (c == QSE_T('n')) c = QSE_T('\n'); if (c == QSE_T('n')) c = QSE_T('\n');
else if (c == QSE_T('r')) c = QSE_T('\r');
} }
if (pos >= QSE_STR_LEN(t)) if (pos >= QSE_STR_LEN(t))