added QSE_XLI_LEADDIGIT.

enhanced keyword and identifier scanning for xli
This commit is contained in:
2013-09-17 01:28:04 +00:00
parent 3d6f9af82b
commit 99d0e04879
4 changed files with 64 additions and 28 deletions

View File

@ -49,6 +49,7 @@ const qse_char_t* qse_xli_dflerrstr (
QSE_T("invalid character '${0}'"),
QSE_T("'${0}' not recognized"),
QSE_T("@ not followed by a valid word"),
QSE_T("invalid identifier '${0}'"),
QSE_T("undefined key '${0}'"),
QSE_T("no alias for '${0}'"),
QSE_T("illegal value for '${0}'"),

View File

@ -104,6 +104,7 @@ struct kwent_t
int type;
};
/* note that the keyword must start with @. */
static kwent_t kwtab[] =
{
/* keep it sorted by the first field for binary search */
@ -420,50 +421,84 @@ retry:
}
else if (c == QSE_T('@'))
{
/* keyword/directive - start with @ */
int type;
ADD_TOKEN_CHAR (xli, tok, c);
GET_CHAR_TO (xli, c);
if (c != QSE_T('_') && !QSE_ISALPHA (c))
if (!QSE_ISALPHA (c))
{
/* this directive is empty,
* not followed by a valid word */
/* this directive is empty, not followed by a valid word */
qse_xli_seterror (xli, QSE_XLI_EXKWEM, QSE_NULL, &xli->tok.loc);
return -1;
}
/* expect normal identifier starting with an alphabet */
/* expect an identifier starting with an alphabet. the identifier
* forming a keyword/directory is composed of alphabets. */
do
{
ADD_TOKEN_CHAR (xli, tok, c);
GET_CHAR_TO (xli, c);
}
while (c == QSE_T('_') || c == QSE_T('-') || QSE_ISALNUM (c));
while (QSE_ISALPHA (c));
type = classify_ident (xli, QSE_STR_CSTR(tok->name));
if (type == TOK_IDENT)
{
/* this directive is not recognized */
/* this keyword/directive is not recognized */
qse_xli_seterror (xli, QSE_XLI_EXKWNR, QSE_STR_CSTR(xli->tok.name), &xli->tok.loc);
return -1;
}
SET_TOKEN_TYPE (xli, tok, type);
}
else if (c == QSE_T('_') || QSE_ISALPHA (c))
else if (c == QSE_T('_') || QSE_ISALPHA (c) ||
(!(xli->tok_status & TOK_STATUS_ENABLE_NSTR) &&
(xli->opt.trait & QSE_XLI_LEADDIGIT) &&
QSE_ISDIGIT(c)))
{
int type;
int lead_digit = QSE_ISDIGIT(c);
int all_digits = 1;
/* identifier */
do
/* a normal identifier can be composed of wider varieties of characters
* than a keyword/directive */
while (1)
{
ADD_TOKEN_CHAR (xli, tok, c);
GET_CHAR_TO (xli, c);
}
while (c == QSE_T('_') || c == QSE_T('-') || QSE_ISALNUM (c));
type = classify_ident (xli, QSE_STR_CSTR(tok->name));
SET_TOKEN_TYPE (xli, tok, type);
if (c == QSE_T('_') || c == QSE_T('-') ||
c == QSE_T(':') || c == QSE_T('*') ||
c == QSE_T('/') || QSE_ISALPHA (c))
{
all_digits = 0;
}
else if (lead_digit && QSE_ISDIGIT(c))
{
/* nothing to do */
}
else break;
}
if (lead_digit && all_digits)
{
/* if an identifier begins with a digit, it must contain a non-digits character */
qse_xli_seterror (xli, QSE_XLI_EIDENT, QSE_STR_CSTR(xli->tok.name), &xli->tok.loc);
return -1;
}
SET_TOKEN_TYPE (xli, tok, TOK_IDENT);
}
else if ((xli->tok_status & TOK_STATUS_ENABLE_NSTR) && QSE_ISDIGIT(c))
{
SET_TOKEN_TYPE (xli, tok, TOK_NSTR);
do
{
ADD_TOKEN_CHAR (xli, tok, c);
GET_CHAR_TO (xli, c);
}
while (QSE_ISDIGIT(c));
}
else if (c == QSE_T('\''))
{
@ -533,16 +568,6 @@ retry:
}
}
}
else if ((xli->tok_status & TOK_STATUS_ENABLE_NSTR) && QSE_ISDIGIT(c))
{
SET_TOKEN_TYPE (xli, tok, TOK_NSTR);
do
{
ADD_TOKEN_CHAR (xli, tok, c);
GET_CHAR_TO (xli, c);
}
while (QSE_ISDIGIT(c));
}
else
{
n = get_symbols (xli, c, tok);
@ -667,6 +692,7 @@ static int read_pair (qse_xli_t* xli)
}
}
/* once the key name is read, enable the numeric string for a value */
xli->tok_status |= TOK_STATUS_ENABLE_NSTR;
if (get_token (xli) <= -1) goto oops;
@ -705,7 +731,7 @@ static int read_pair (qse_xli_t* xli)
{
/* SCM_KEYALIAS is specified for this particular item. Let the alias be required.
* If KEYALIAS is globally specified with the specific one, it's optional. */
qse_xli_seterrnum (xli, QSE_XLI_ENOALI, &key);
qse_xli_seterrnum (xli, QSE_XLI_ENOALI, (const qse_cstr_t*)&key);
goto oops;
}
}
@ -771,6 +797,8 @@ static int read_pair (qse_xli_t* xli)
goto oops;
}
/* semicolon read. turn off NSTR */
xli->tok_status &= ~TOK_STATUS_ENABLE_NSTR;
if (get_token (xli) <= -1) goto oops;
}
else