fixed a bug in handling a regular expression starting with a backslash.
- a regular expression like /\// could not be handled properly without this fix
This commit is contained in:
parent
ce46d8f641
commit
ba8bd06016
@ -37,6 +37,7 @@ enum qse_sed_errnum_t
|
|||||||
QSE_SED_EA2PHB, /* address 2 prohibited */
|
QSE_SED_EA2PHB, /* address 2 prohibited */
|
||||||
QSE_SED_ENEWLN, /* a new line is expected */
|
QSE_SED_ENEWLN, /* a new line is expected */
|
||||||
QSE_SED_EBSEXP, /* \ is expected */
|
QSE_SED_EBSEXP, /* \ is expected */
|
||||||
|
QSE_SED_EBSDEL, /* \ used a delimiter */
|
||||||
QSE_SED_EGBABS, /* garbage after \ */
|
QSE_SED_EGBABS, /* garbage after \ */
|
||||||
QSE_SED_ESCEXP, /* ; is expected */
|
QSE_SED_ESCEXP, /* ; is expected */
|
||||||
QSE_SED_ELABTL, /* label too long */
|
QSE_SED_ELABTL, /* label too long */
|
||||||
@ -44,7 +45,7 @@ enum qse_sed_errnum_t
|
|||||||
QSE_SED_ELABDU, /* duplicate label name */
|
QSE_SED_ELABDU, /* duplicate label name */
|
||||||
QSE_SED_EFILEM, /* file name is empty */
|
QSE_SED_EFILEM, /* file name is empty */
|
||||||
QSE_SED_EFILIL, /* illegal file name */
|
QSE_SED_EFILIL, /* illegal file name */
|
||||||
QSE_SED_ETSNTR, /* translation set not terminated */
|
QSE_SED_ENOTRM, /* not terminated properly */
|
||||||
QSE_SED_ETSNSL, /* translation set not the same length*/
|
QSE_SED_ETSNSL, /* translation set not the same length*/
|
||||||
QSE_SED_EGRNBA, /* group brackets not balanced */
|
QSE_SED_EGRNBA, /* group brackets not balanced */
|
||||||
QSE_SED_EGRNTD /* group nested too deeply */
|
QSE_SED_EGRNTD /* group nested too deeply */
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* $Id: err.c 75 2009-02-22 14:10:34Z hyunghwan.chung $
|
* $Id: err.c 113 2009-03-25 14:53:10Z hyunghwan.chung $
|
||||||
*
|
*
|
||||||
Copyright 2006-2009 Chung, Hyung-Hwan.
|
Copyright 2006-2009 Chung, Hyung-Hwan.
|
||||||
|
|
||||||
@ -65,7 +65,7 @@ static const qse_char_t* __geterrstr (int errnum)
|
|||||||
|
|
||||||
QSE_T("unexpected end of source"),
|
QSE_T("unexpected end of source"),
|
||||||
QSE_T("a comment not closed properly"),
|
QSE_T("a comment not closed properly"),
|
||||||
QSE_T("a string not closed with a quote"),
|
QSE_T("a string or a regular expression not closed"),
|
||||||
QSE_T("unexpected end of a regular expression"),
|
QSE_T("unexpected end of a regular expression"),
|
||||||
QSE_T("a left brace expected in place of '${0}'"),
|
QSE_T("a left brace expected in place of '${0}'"),
|
||||||
QSE_T("a left parenthesis expected in place of '${0}'"),
|
QSE_T("a left parenthesis expected in place of '${0}'"),
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* $Id: parse.c 85 2009-02-26 10:56:12Z hyunghwan.chung $
|
* $Id: parse.c 113 2009-03-25 14:53:10Z hyunghwan.chung $
|
||||||
*
|
*
|
||||||
Copyright 2006-2009 Chung, Hyung-Hwan.
|
Copyright 2006-2009 Chung, Hyung-Hwan.
|
||||||
|
|
||||||
@ -220,7 +220,7 @@ static int get_charstr (qse_awk_t* awk);
|
|||||||
static int get_rexstr (qse_awk_t* awk);
|
static int get_rexstr (qse_awk_t* awk);
|
||||||
static int get_string (
|
static int get_string (
|
||||||
qse_awk_t* awk, qse_char_t end_char,
|
qse_awk_t* awk, qse_char_t end_char,
|
||||||
qse_char_t esc_char, qse_bool_t keep_esc_char);
|
qse_char_t esc_char, qse_bool_t keep_esc_char, int preescaped);
|
||||||
static int get_char (qse_awk_t* awk);
|
static int get_char (qse_awk_t* awk);
|
||||||
static int unget_char (qse_awk_t* awk, qse_cint_t c);
|
static int unget_char (qse_awk_t* awk, qse_cint_t c);
|
||||||
static int skip_spaces (qse_awk_t* awk);
|
static int skip_spaces (qse_awk_t* awk);
|
||||||
@ -2930,7 +2930,9 @@ static qse_awk_nde_t* parse_primary (qse_awk_t* awk, qse_size_t line)
|
|||||||
int errnum;
|
int errnum;
|
||||||
|
|
||||||
/* the regular expression is tokenized here because
|
/* the regular expression is tokenized here because
|
||||||
* of the context-sensitivity of the slash symbol */
|
* of the context-sensitivity of the slash symbol.
|
||||||
|
* if TOKEN_DIV is seen as a primary, it tries to compile
|
||||||
|
* it as a regular expression */
|
||||||
SET_TOKEN_TYPE (awk, TOKEN_REX);
|
SET_TOKEN_TYPE (awk, TOKEN_REX);
|
||||||
|
|
||||||
qse_str_clear (awk->token.name);
|
qse_str_clear (awk->token.name);
|
||||||
@ -4567,7 +4569,6 @@ static qse_awk_nde_t* parse_print (qse_awk_t* awk, qse_size_t line, int type)
|
|||||||
return (qse_awk_nde_t*)nde;
|
return (qse_awk_nde_t*)nde;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
static int get_token (qse_awk_t* awk)
|
static int get_token (qse_awk_t* awk)
|
||||||
{
|
{
|
||||||
qse_cint_t c;
|
qse_cint_t c;
|
||||||
@ -5091,7 +5092,7 @@ static int get_charstr (qse_awk_t* awk)
|
|||||||
* has been called */
|
* has been called */
|
||||||
ADD_TOKEN_CHAR (awk, awk->src.lex.curc);
|
ADD_TOKEN_CHAR (awk, awk->src.lex.curc);
|
||||||
}
|
}
|
||||||
return get_string (awk, QSE_T('\"'), QSE_T('\\'), QSE_FALSE);
|
return get_string (awk, QSE_T('\"'), QSE_T('\\'), QSE_FALSE, 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
static int get_rexstr (qse_awk_t* awk)
|
static int get_rexstr (qse_awk_t* awk)
|
||||||
@ -5099,23 +5100,44 @@ static int get_rexstr (qse_awk_t* awk)
|
|||||||
if (awk->src.lex.curc == QSE_T('/'))
|
if (awk->src.lex.curc == QSE_T('/'))
|
||||||
{
|
{
|
||||||
/* this part of the function is different from get_charstr
|
/* this part of the function is different from get_charstr
|
||||||
* because of the way this function is called */
|
* because of the way this function is called.
|
||||||
|
* this condition is met when the input is //.
|
||||||
|
* the first / has been tokenized to TOKEN_DIV already.
|
||||||
|
* if TOKEN_DIV is seen as a primary, this function is called.
|
||||||
|
* as the token buffer has been cleared by the caller and
|
||||||
|
* the token type is set to TOKEN_REX, this function can
|
||||||
|
* just return after reading the next character */
|
||||||
GET_CHAR (awk);
|
GET_CHAR (awk);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
ADD_TOKEN_CHAR (awk, awk->src.lex.curc);
|
int escaped = 0;
|
||||||
return get_string (awk, QSE_T('/'), QSE_T('\\'), QSE_TRUE);
|
if (awk->src.lex.curc == QSE_T('\\'))
|
||||||
|
{
|
||||||
|
/* for input like /\//, this condition is met.
|
||||||
|
* the initial escape character is added when the
|
||||||
|
* second charater is handled in get_string() */
|
||||||
|
escaped = 1;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
/* add other initial characters here as get_string()
|
||||||
|
* begins with reading the next character */
|
||||||
|
ADD_TOKEN_CHAR (awk, awk->src.lex.curc);
|
||||||
|
}
|
||||||
|
return get_string (awk,
|
||||||
|
QSE_T('/'), QSE_T('\\'), QSE_TRUE, escaped);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static int get_string (
|
static int get_string (
|
||||||
qse_awk_t* awk, qse_char_t end_char,
|
qse_awk_t* awk, qse_char_t end_char,
|
||||||
qse_char_t esc_char, qse_bool_t keep_esc_char)
|
qse_char_t esc_char, qse_bool_t keep_esc_char,
|
||||||
|
int preescaped)
|
||||||
{
|
{
|
||||||
qse_cint_t c;
|
qse_cint_t c;
|
||||||
int escaped = 0;
|
int escaped = preescaped;
|
||||||
int digit_count = 0;
|
int digit_count = 0;
|
||||||
qse_cint_t c_acc = 0;
|
qse_cint_t c_acc = 0;
|
||||||
|
|
||||||
|
@ -152,6 +152,7 @@ const qse_char_t* qse_sed_geterrmsg (qse_sed_t* sed)
|
|||||||
QSE_T("address 2 prohibited"),
|
QSE_T("address 2 prohibited"),
|
||||||
QSE_T("a new line expected"),
|
QSE_T("a new line expected"),
|
||||||
QSE_T("a backslash expected"),
|
QSE_T("a backslash expected"),
|
||||||
|
QSE_T("a backslash used as a delimiter"),
|
||||||
QSE_T("garbage after a backslash"),
|
QSE_T("garbage after a backslash"),
|
||||||
QSE_T("a semicolon expected"),
|
QSE_T("a semicolon expected"),
|
||||||
QSE_T("label name too long"),
|
QSE_T("label name too long"),
|
||||||
@ -159,7 +160,7 @@ const qse_char_t* qse_sed_geterrmsg (qse_sed_t* sed)
|
|||||||
QSE_T("duplicate label name"),
|
QSE_T("duplicate label name"),
|
||||||
QSE_T("empty file name"),
|
QSE_T("empty file name"),
|
||||||
QSE_T("illegal file name"),
|
QSE_T("illegal file name"),
|
||||||
QSE_T("translation set not terminated"),
|
QSE_T("command not terminated properly"),
|
||||||
QSE_T("strings in translation set not the same length"),
|
QSE_T("strings in translation set not the same length"),
|
||||||
QSE_T("group brackets not balanced"),
|
QSE_T("group brackets not balanced"),
|
||||||
QSE_T("group nesting too deep")
|
QSE_T("group nesting too deep")
|
||||||
@ -227,7 +228,6 @@ static void* compile_regex (qse_sed_t* sed, qse_char_t rxend)
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (c == QSE_T('n')) c = QSE_T('\n');
|
if (c == QSE_T('n')) c = QSE_T('\n');
|
||||||
else if (c == QSE_T('r')) c = QSE_T('\r');
|
|
||||||
// TODO: support more escaped characters??
|
// TODO: support more escaped characters??
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -576,7 +576,6 @@ static int get_file_name (qse_sed_t* sed, qse_sed_cmd_t* cmd)
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (c == QSE_T('n')) c = QSE_T('\n');
|
if (c == QSE_T('n')) c = QSE_T('\n');
|
||||||
else if (c == QSE_T('r')) c = QSE_T('\r');
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (qse_str_ccat (t, c) == (qse_size_t)-1)
|
if (qse_str_ccat (t, c) == (qse_size_t)-1)
|
||||||
@ -613,15 +612,16 @@ static int get_subst (qse_sed_t* sed, qse_sed_cmd_t* cmd)
|
|||||||
c = CURSC (sed);
|
c = CURSC (sed);
|
||||||
if (c == QSE_CHAR_EOF || IS_LINTERM(c))
|
if (c == QSE_CHAR_EOF || IS_LINTERM(c))
|
||||||
{
|
{
|
||||||
//sed->errnum = QSE_SED_ESUNTR;
|
/* not terminated properly */
|
||||||
|
sed->errnum = QSE_SED_ENOTRM;
|
||||||
goto oops;
|
goto oops;
|
||||||
}
|
}
|
||||||
|
|
||||||
delim = c;
|
delim = c;
|
||||||
if (delim == QSE_T('\\'))
|
if (delim == QSE_T('\\'))
|
||||||
{
|
{
|
||||||
/* illegal delimiter */
|
/* backspace is an illegal delimiter */
|
||||||
//sed->errnum = QSE_SED_ESUILD;
|
sed->errnum = QSE_SED_EBSDEL;
|
||||||
goto oops;
|
goto oops;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -635,6 +635,31 @@ static int get_subst (qse_sed_t* sed, qse_sed_cmd_t* cmd)
|
|||||||
c = NXTSC (sed);
|
c = NXTSC (sed);
|
||||||
while (c != delim)
|
while (c != delim)
|
||||||
{
|
{
|
||||||
|
if (c == QSE_CHAR_EOF || IS_LINTERM(c))
|
||||||
|
{
|
||||||
|
sed->errnum = QSE_SED_ENOTRM;
|
||||||
|
goto oops;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (c == QSE_T('\\'))
|
||||||
|
{
|
||||||
|
c = NXTSC (sed);
|
||||||
|
if (c == QSE_CHAR_EOF || IS_LINTERM(c))
|
||||||
|
{
|
||||||
|
sed->errnum = QSE_SED_ENOTRM;
|
||||||
|
goto oops;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (c == QSE_T('n')) c = QSE_T('\n');
|
||||||
|
}
|
||||||
|
|
||||||
|
if (qse_str_ccat (t, c) == (qse_size_t)-1)
|
||||||
|
{
|
||||||
|
sed->errnum = QSE_SED_ENOMEM;
|
||||||
|
goto oops;
|
||||||
|
}
|
||||||
|
|
||||||
|
c = NXTSC (sed);
|
||||||
}
|
}
|
||||||
|
|
||||||
oops:
|
oops:
|
||||||
@ -652,11 +677,17 @@ static int get_transet (qse_sed_t* sed, qse_sed_cmd_t* cmd)
|
|||||||
if (c == QSE_CHAR_EOF || IS_LINTERM(c))
|
if (c == QSE_CHAR_EOF || IS_LINTERM(c))
|
||||||
{
|
{
|
||||||
/* translation set terminated prematurely*/
|
/* translation set terminated prematurely*/
|
||||||
sed->errnum = QSE_SED_ETSNTR;
|
sed->errnum = QSE_SED_ENOTRM;
|
||||||
goto oops;
|
goto oops;
|
||||||
}
|
}
|
||||||
|
|
||||||
delim = c;
|
delim = c;
|
||||||
|
if (delim == QSE_T('\\'))
|
||||||
|
{
|
||||||
|
/* backspace is an illegal delimiter */
|
||||||
|
sed->errnum = QSE_SED_EBSDEL;
|
||||||
|
goto oops;
|
||||||
|
}
|
||||||
|
|
||||||
t = qse_str_open (sed->mmgr, 0, 32);
|
t = qse_str_open (sed->mmgr, 0, 32);
|
||||||
if (t == QSE_NULL)
|
if (t == QSE_NULL)
|
||||||
@ -672,7 +703,7 @@ static int get_transet (qse_sed_t* sed, qse_sed_cmd_t* cmd)
|
|||||||
|
|
||||||
if (c == QSE_CHAR_EOF || IS_LINTERM(c))
|
if (c == QSE_CHAR_EOF || IS_LINTERM(c))
|
||||||
{
|
{
|
||||||
sed->errnum = QSE_SED_ETSNTR;
|
sed->errnum = QSE_SED_ENOTRM;
|
||||||
goto oops;
|
goto oops;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -681,12 +712,11 @@ static int get_transet (qse_sed_t* sed, qse_sed_cmd_t* cmd)
|
|||||||
c = NXTSC (sed);
|
c = NXTSC (sed);
|
||||||
if (c == QSE_CHAR_EOF || IS_LINTERM(c))
|
if (c == QSE_CHAR_EOF || IS_LINTERM(c))
|
||||||
{
|
{
|
||||||
sed->errnum = QSE_SED_ETSNTR;
|
sed->errnum = QSE_SED_ENOTRM;
|
||||||
goto oops;
|
goto oops;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (c == QSE_T('n')) c = QSE_T('\n');
|
if (c == QSE_T('n')) c = QSE_T('\n');
|
||||||
else if (c == QSE_T('r')) c = QSE_T('\r');
|
|
||||||
}
|
}
|
||||||
|
|
||||||
b[0] = c;
|
b[0] = c;
|
||||||
@ -704,7 +734,7 @@ static int get_transet (qse_sed_t* sed, qse_sed_cmd_t* cmd)
|
|||||||
{
|
{
|
||||||
if (c == QSE_CHAR_EOF || IS_LINTERM(c))
|
if (c == QSE_CHAR_EOF || IS_LINTERM(c))
|
||||||
{
|
{
|
||||||
sed->errnum = QSE_SED_ETSNTR;
|
sed->errnum = QSE_SED_ENOTRM;
|
||||||
goto oops;
|
goto oops;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -713,12 +743,11 @@ static int get_transet (qse_sed_t* sed, qse_sed_cmd_t* cmd)
|
|||||||
c = NXTSC (sed);
|
c = NXTSC (sed);
|
||||||
if (c == QSE_CHAR_EOF || IS_LINTERM(c))
|
if (c == QSE_CHAR_EOF || IS_LINTERM(c))
|
||||||
{
|
{
|
||||||
sed->errnum = QSE_SED_ETSNTR;
|
sed->errnum = QSE_SED_ENOTRM;
|
||||||
goto oops;
|
goto oops;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (c == QSE_T('n')) c = QSE_T('\n');
|
if (c == QSE_T('n')) c = QSE_T('\n');
|
||||||
else if (c == QSE_T('r')) c = QSE_T('\r');
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (pos >= QSE_STR_LEN(t))
|
if (pos >= QSE_STR_LEN(t))
|
||||||
|
Loading…
Reference in New Issue
Block a user