fixed a bug in handling a regular expression starting with a backslash.
- a regular expression like /\// could not be handled properly without this fix
This commit is contained in:
parent
ce46d8f641
commit
ba8bd06016
@ -37,6 +37,7 @@ enum qse_sed_errnum_t
|
||||
QSE_SED_EA2PHB, /* address 2 prohibited */
|
||||
QSE_SED_ENEWLN, /* a new line is expected */
|
||||
QSE_SED_EBSEXP, /* \ is expected */
|
||||
QSE_SED_EBSDEL, /* \ used a delimiter */
|
||||
QSE_SED_EGBABS, /* garbage after \ */
|
||||
QSE_SED_ESCEXP, /* ; is expected */
|
||||
QSE_SED_ELABTL, /* label too long */
|
||||
@ -44,7 +45,7 @@ enum qse_sed_errnum_t
|
||||
QSE_SED_ELABDU, /* duplicate label name */
|
||||
QSE_SED_EFILEM, /* file name is empty */
|
||||
QSE_SED_EFILIL, /* illegal file name */
|
||||
QSE_SED_ETSNTR, /* translation set not terminated */
|
||||
QSE_SED_ENOTRM, /* not terminated properly */
|
||||
QSE_SED_ETSNSL, /* translation set not the same length*/
|
||||
QSE_SED_EGRNBA, /* group brackets not balanced */
|
||||
QSE_SED_EGRNTD /* group nested too deeply */
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* $Id: err.c 75 2009-02-22 14:10:34Z hyunghwan.chung $
|
||||
* $Id: err.c 113 2009-03-25 14:53:10Z hyunghwan.chung $
|
||||
*
|
||||
Copyright 2006-2009 Chung, Hyung-Hwan.
|
||||
|
||||
@ -65,7 +65,7 @@ static const qse_char_t* __geterrstr (int errnum)
|
||||
|
||||
QSE_T("unexpected end of source"),
|
||||
QSE_T("a comment not closed properly"),
|
||||
QSE_T("a string not closed with a quote"),
|
||||
QSE_T("a string or a regular expression not closed"),
|
||||
QSE_T("unexpected end of a regular expression"),
|
||||
QSE_T("a left brace expected in place of '${0}'"),
|
||||
QSE_T("a left parenthesis expected in place of '${0}'"),
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* $Id: parse.c 85 2009-02-26 10:56:12Z hyunghwan.chung $
|
||||
* $Id: parse.c 113 2009-03-25 14:53:10Z hyunghwan.chung $
|
||||
*
|
||||
Copyright 2006-2009 Chung, Hyung-Hwan.
|
||||
|
||||
@ -220,7 +220,7 @@ static int get_charstr (qse_awk_t* awk);
|
||||
static int get_rexstr (qse_awk_t* awk);
|
||||
static int get_string (
|
||||
qse_awk_t* awk, qse_char_t end_char,
|
||||
qse_char_t esc_char, qse_bool_t keep_esc_char);
|
||||
qse_char_t esc_char, qse_bool_t keep_esc_char, int preescaped);
|
||||
static int get_char (qse_awk_t* awk);
|
||||
static int unget_char (qse_awk_t* awk, qse_cint_t c);
|
||||
static int skip_spaces (qse_awk_t* awk);
|
||||
@ -2930,7 +2930,9 @@ static qse_awk_nde_t* parse_primary (qse_awk_t* awk, qse_size_t line)
|
||||
int errnum;
|
||||
|
||||
/* the regular expression is tokenized here because
|
||||
* of the context-sensitivity of the slash symbol */
|
||||
* of the context-sensitivity of the slash symbol.
|
||||
* if TOKEN_DIV is seen as a primary, it tries to compile
|
||||
* it as a regular expression */
|
||||
SET_TOKEN_TYPE (awk, TOKEN_REX);
|
||||
|
||||
qse_str_clear (awk->token.name);
|
||||
@ -4567,7 +4569,6 @@ static qse_awk_nde_t* parse_print (qse_awk_t* awk, qse_size_t line, int type)
|
||||
return (qse_awk_nde_t*)nde;
|
||||
}
|
||||
|
||||
|
||||
static int get_token (qse_awk_t* awk)
|
||||
{
|
||||
qse_cint_t c;
|
||||
@ -5091,7 +5092,7 @@ static int get_charstr (qse_awk_t* awk)
|
||||
* has been called */
|
||||
ADD_TOKEN_CHAR (awk, awk->src.lex.curc);
|
||||
}
|
||||
return get_string (awk, QSE_T('\"'), QSE_T('\\'), QSE_FALSE);
|
||||
return get_string (awk, QSE_T('\"'), QSE_T('\\'), QSE_FALSE, 0);
|
||||
}
|
||||
|
||||
static int get_rexstr (qse_awk_t* awk)
|
||||
@ -5099,23 +5100,44 @@ static int get_rexstr (qse_awk_t* awk)
|
||||
if (awk->src.lex.curc == QSE_T('/'))
|
||||
{
|
||||
/* this part of the function is different from get_charstr
|
||||
* because of the way this function is called */
|
||||
* because of the way this function is called.
|
||||
* this condition is met when the input is //.
|
||||
* the first / has been tokenized to TOKEN_DIV already.
|
||||
* if TOKEN_DIV is seen as a primary, this function is called.
|
||||
* as the token buffer has been cleared by the caller and
|
||||
* the token type is set to TOKEN_REX, this function can
|
||||
* just return after reading the next character */
|
||||
GET_CHAR (awk);
|
||||
return 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
ADD_TOKEN_CHAR (awk, awk->src.lex.curc);
|
||||
return get_string (awk, QSE_T('/'), QSE_T('\\'), QSE_TRUE);
|
||||
int escaped = 0;
|
||||
if (awk->src.lex.curc == QSE_T('\\'))
|
||||
{
|
||||
/* for input like /\//, this condition is met.
|
||||
* the initial escape character is added when the
|
||||
* second charater is handled in get_string() */
|
||||
escaped = 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
/* add other initial characters here as get_string()
|
||||
* begins with reading the next character */
|
||||
ADD_TOKEN_CHAR (awk, awk->src.lex.curc);
|
||||
}
|
||||
return get_string (awk,
|
||||
QSE_T('/'), QSE_T('\\'), QSE_TRUE, escaped);
|
||||
}
|
||||
}
|
||||
|
||||
static int get_string (
|
||||
qse_awk_t* awk, qse_char_t end_char,
|
||||
qse_char_t esc_char, qse_bool_t keep_esc_char)
|
||||
qse_char_t esc_char, qse_bool_t keep_esc_char,
|
||||
int preescaped)
|
||||
{
|
||||
qse_cint_t c;
|
||||
int escaped = 0;
|
||||
int escaped = preescaped;
|
||||
int digit_count = 0;
|
||||
qse_cint_t c_acc = 0;
|
||||
|
||||
|
@ -152,6 +152,7 @@ const qse_char_t* qse_sed_geterrmsg (qse_sed_t* sed)
|
||||
QSE_T("address 2 prohibited"),
|
||||
QSE_T("a new line expected"),
|
||||
QSE_T("a backslash expected"),
|
||||
QSE_T("a backslash used as a delimiter"),
|
||||
QSE_T("garbage after a backslash"),
|
||||
QSE_T("a semicolon expected"),
|
||||
QSE_T("label name too long"),
|
||||
@ -159,7 +160,7 @@ const qse_char_t* qse_sed_geterrmsg (qse_sed_t* sed)
|
||||
QSE_T("duplicate label name"),
|
||||
QSE_T("empty file name"),
|
||||
QSE_T("illegal file name"),
|
||||
QSE_T("translation set not terminated"),
|
||||
QSE_T("command not terminated properly"),
|
||||
QSE_T("strings in translation set not the same length"),
|
||||
QSE_T("group brackets not balanced"),
|
||||
QSE_T("group nesting too deep")
|
||||
@ -227,7 +228,6 @@ static void* compile_regex (qse_sed_t* sed, qse_char_t rxend)
|
||||
}
|
||||
|
||||
if (c == QSE_T('n')) c = QSE_T('\n');
|
||||
else if (c == QSE_T('r')) c = QSE_T('\r');
|
||||
// TODO: support more escaped characters??
|
||||
}
|
||||
|
||||
@ -576,7 +576,6 @@ static int get_file_name (qse_sed_t* sed, qse_sed_cmd_t* cmd)
|
||||
}
|
||||
|
||||
if (c == QSE_T('n')) c = QSE_T('\n');
|
||||
else if (c == QSE_T('r')) c = QSE_T('\r');
|
||||
}
|
||||
|
||||
if (qse_str_ccat (t, c) == (qse_size_t)-1)
|
||||
@ -613,15 +612,16 @@ static int get_subst (qse_sed_t* sed, qse_sed_cmd_t* cmd)
|
||||
c = CURSC (sed);
|
||||
if (c == QSE_CHAR_EOF || IS_LINTERM(c))
|
||||
{
|
||||
//sed->errnum = QSE_SED_ESUNTR;
|
||||
/* not terminated properly */
|
||||
sed->errnum = QSE_SED_ENOTRM;
|
||||
goto oops;
|
||||
}
|
||||
|
||||
delim = c;
|
||||
if (delim == QSE_T('\\'))
|
||||
{
|
||||
/* illegal delimiter */
|
||||
//sed->errnum = QSE_SED_ESUILD;
|
||||
/* backspace is an illegal delimiter */
|
||||
sed->errnum = QSE_SED_EBSDEL;
|
||||
goto oops;
|
||||
}
|
||||
|
||||
@ -635,6 +635,31 @@ static int get_subst (qse_sed_t* sed, qse_sed_cmd_t* cmd)
|
||||
c = NXTSC (sed);
|
||||
while (c != delim)
|
||||
{
|
||||
if (c == QSE_CHAR_EOF || IS_LINTERM(c))
|
||||
{
|
||||
sed->errnum = QSE_SED_ENOTRM;
|
||||
goto oops;
|
||||
}
|
||||
|
||||
if (c == QSE_T('\\'))
|
||||
{
|
||||
c = NXTSC (sed);
|
||||
if (c == QSE_CHAR_EOF || IS_LINTERM(c))
|
||||
{
|
||||
sed->errnum = QSE_SED_ENOTRM;
|
||||
goto oops;
|
||||
}
|
||||
|
||||
if (c == QSE_T('n')) c = QSE_T('\n');
|
||||
}
|
||||
|
||||
if (qse_str_ccat (t, c) == (qse_size_t)-1)
|
||||
{
|
||||
sed->errnum = QSE_SED_ENOMEM;
|
||||
goto oops;
|
||||
}
|
||||
|
||||
c = NXTSC (sed);
|
||||
}
|
||||
|
||||
oops:
|
||||
@ -652,11 +677,17 @@ static int get_transet (qse_sed_t* sed, qse_sed_cmd_t* cmd)
|
||||
if (c == QSE_CHAR_EOF || IS_LINTERM(c))
|
||||
{
|
||||
/* translation set terminated prematurely*/
|
||||
sed->errnum = QSE_SED_ETSNTR;
|
||||
sed->errnum = QSE_SED_ENOTRM;
|
||||
goto oops;
|
||||
}
|
||||
|
||||
delim = c;
|
||||
if (delim == QSE_T('\\'))
|
||||
{
|
||||
/* backspace is an illegal delimiter */
|
||||
sed->errnum = QSE_SED_EBSDEL;
|
||||
goto oops;
|
||||
}
|
||||
|
||||
t = qse_str_open (sed->mmgr, 0, 32);
|
||||
if (t == QSE_NULL)
|
||||
@ -672,7 +703,7 @@ static int get_transet (qse_sed_t* sed, qse_sed_cmd_t* cmd)
|
||||
|
||||
if (c == QSE_CHAR_EOF || IS_LINTERM(c))
|
||||
{
|
||||
sed->errnum = QSE_SED_ETSNTR;
|
||||
sed->errnum = QSE_SED_ENOTRM;
|
||||
goto oops;
|
||||
}
|
||||
|
||||
@ -681,12 +712,11 @@ static int get_transet (qse_sed_t* sed, qse_sed_cmd_t* cmd)
|
||||
c = NXTSC (sed);
|
||||
if (c == QSE_CHAR_EOF || IS_LINTERM(c))
|
||||
{
|
||||
sed->errnum = QSE_SED_ETSNTR;
|
||||
sed->errnum = QSE_SED_ENOTRM;
|
||||
goto oops;
|
||||
}
|
||||
|
||||
if (c == QSE_T('n')) c = QSE_T('\n');
|
||||
else if (c == QSE_T('r')) c = QSE_T('\r');
|
||||
}
|
||||
|
||||
b[0] = c;
|
||||
@ -704,7 +734,7 @@ static int get_transet (qse_sed_t* sed, qse_sed_cmd_t* cmd)
|
||||
{
|
||||
if (c == QSE_CHAR_EOF || IS_LINTERM(c))
|
||||
{
|
||||
sed->errnum = QSE_SED_ETSNTR;
|
||||
sed->errnum = QSE_SED_ENOTRM;
|
||||
goto oops;
|
||||
}
|
||||
|
||||
@ -713,12 +743,11 @@ static int get_transet (qse_sed_t* sed, qse_sed_cmd_t* cmd)
|
||||
c = NXTSC (sed);
|
||||
if (c == QSE_CHAR_EOF || IS_LINTERM(c))
|
||||
{
|
||||
sed->errnum = QSE_SED_ETSNTR;
|
||||
sed->errnum = QSE_SED_ENOTRM;
|
||||
goto oops;
|
||||
}
|
||||
|
||||
if (c == QSE_T('n')) c = QSE_T('\n');
|
||||
else if (c == QSE_T('r')) c = QSE_T('\r');
|
||||
}
|
||||
|
||||
if (pos >= QSE_STR_LEN(t))
|
||||
|
Loading…
Reference in New Issue
Block a user