enhanced escaping in xli's json functions

This commit is contained in:
hyung-hwan 2021-06-17 09:51:13 +00:00
parent 7b81290cd0
commit 6f6b82687a
2 changed files with 129 additions and 2 deletions

View File

@ -85,6 +85,23 @@ enum
} \ } \
} while (0) } while (0)
#if defined(QSE_CHAR_IS_MCHAR)
# define ADD_TOKEN_UINT32(xli,tok,c) \
do { \
if (c <= 0xFF) ADD_TOKEN_CHAR(xli, tok, c); \
else \
{ \
qse_mchar_t __xbuf[QSE_MBLEN_MAX + 1]; \
qse_size_t __len, __i; \
__len = qse_uctoutf8(c, __xbuf, QSE_COUNTOF(__xbuf)); /* use utf8 all the time */ \
for (__i = 0; __i < __len; __i++) ADD_TOKEN_CHAR(xli, tok, __xbuf[__i]); \
} \
} while (0)
#else
# define ADD_TOKEN_UINT32(xli,tok,c) ADD_TOKEN_CHAR(xli,tok,c);
#endif
#define SET_TOKEN_TYPE(xli,tok,code) \ #define SET_TOKEN_TYPE(xli,tok,code) \
do { (tok)->type = (code); } while (0) do { (tok)->type = (code); } while (0)
@ -478,6 +495,8 @@ retry:
{ {
/* double-quoted string - support escaping */ /* double-quoted string - support escaping */
int escaped = 0; int escaped = 0;
qse_size_t digit_count = 0;
qse_uint32_t c_acc = 0;
SET_TOKEN_TYPE (xli, tok, QSE_XLI_TOK_DQSTR); SET_TOKEN_TYPE (xli, tok, QSE_XLI_TOK_DQSTR);
@ -494,6 +513,7 @@ retry:
if (!escaped) if (!escaped)
{ {
not_escaped:
if (c == QSE_T('\\')) if (c == QSE_T('\\'))
{ {
escaped = 1; escaped = 1;
@ -509,8 +529,69 @@ retry:
ADD_TOKEN_CHAR (xli, tok, c); ADD_TOKEN_CHAR (xli, tok, c);
} }
else if (escaped == 4)
{
if (c >= QSE_T('0') && c <= QSE_T('9'))
{
c_acc = c_acc * 16 + c - QSE_T('0');
digit_count++;
if (digit_count >= escaped)
{
ADD_TOKEN_UINT32 (xli, tok, c_acc);
escaped = 0;
}
}
else if (c >= QSE_T('A') && c <= QSE_T('F'))
{
c_acc = c_acc * 16 + c - QSE_T('A') + 10;
digit_count++;
if (digit_count >= escaped)
{
ADD_TOKEN_UINT32 (xli, tok, c_acc);
escaped = 0;
}
}
else if (c >= QSE_T('a') && c <= QSE_T('f'))
{
c_acc = c_acc * 16 + c - QSE_T('a') + 10;
digit_count++;
if (digit_count >= escaped)
{
ADD_TOKEN_UINT32 (xli, tok, c_acc);
escaped = 0;
}
}
else
{
/* not a hexadecimal digit */
if (digit_count == 0)
{
/* no valid character after the escaper. keep the escaper as it is */
ADD_TOKEN_CHAR (xli, tok, 'u');
}
else ADD_TOKEN_UINT32 (xli, tok, c_acc);
escaped = 0;
goto not_escaped;
}
}
else else
{ {
if (c == 'u')
{
escaped = 4;
digit_count = 0;
c_acc = 0;
continue;
}
if (c == 'b') c = '\b';
else if (c == 'f') c = '\f';
else if (c == 'n') c = '\n';
else if (c == 'r') c = '\r';
else if (c == 't') c = '\t';
ADD_TOKEN_CHAR (xli, tok, c); ADD_TOKEN_CHAR (xli, tok, c);
escaped = 0; escaped = 0;
} }

View File

@ -42,16 +42,62 @@ static int write_to_current_stream(qse_xli_t* xli, const qse_char_t* ptr, qse_si
for (i = 0; i < len; i++) for (i = 0; i < len; i++)
{ {
if (escape)
{
if (ptr[i] == QSE_T('\\') || ptr[i] == QSE_T('\"') ||
ptr[i] == QSE_T('\b') || ptr[i] == QSE_T('\f') ||
ptr[i] == QSE_T('\n') || ptr[i] == QSE_T('\r') || ptr[i] == QSE_T('\t'))
{
qse_char_t ac;
if (arg->b.len + 2 > QSE_COUNTOF(arg->b.buf) && qse_xli_flushwstream(xli, arg) <= -1) return -1;
arg->b.buf[arg->b.len++] = QSE_T('\\');
switch (ptr[i])
{
case QSE_T('\b'): ac = 'b'; break;
case QSE_T('\f'): ac = 'f'; break;
case QSE_T('\n'): ac = 'n'; break;
case QSE_T('\r'): ac = 'r'; break;
case QSE_T('\t'): ac = 't'; break;
default: ac = ptr[i]; break;
}
arg->b.buf[arg->b.len++] = ac;
}
else if (ptr[i] >= 0x00 && ptr[i] <= 0x1F)
{
qse_char_t tmp[5];
if (arg->b.len + 6 > QSE_COUNTOF(arg->b.buf) && qse_xli_flushwstream(xli, arg) <= -1) return -1;
arg->b.buf[arg->b.len++] = QSE_T('\\');
arg->b.buf[arg->b.len++] = QSE_T('u');
/* use 'tmp' to avoid null termination beyond the buffer end */
qse_strxfmt(tmp, QSE_COUNTOF(tmp), QSE_T("%04X"), ptr[i]);
arg->b.len += qse_strcpy(&arg->b.buf[arg->b.len], tmp);
}
else
{
goto unescaped;
}
}
else
{
unescaped:
if (arg->b.len + 1 > QSE_COUNTOF(arg->b.buf) && qse_xli_flushwstream(xli, arg) <= -1) return -1;
arg->b.buf[arg->b.len++] = ptr[i];
}
/*
if (escape && (ptr[i] == QSE_T('\\') || ptr[i] == QSE_T('\"'))) if (escape && (ptr[i] == QSE_T('\\') || ptr[i] == QSE_T('\"')))
{ {
if (arg->b.len + 2 > QSE_COUNTOF(arg->b.buf) && qse_xli_flushwstream (xli, arg) <= -1) return -1; if (arg->b.len + 2 > QSE_COUNTOF(arg->b.buf) && qse_xli_flushwstream(xli, arg) <= -1) return -1;
arg->b.buf[arg->b.len++] = QSE_T('\\'); arg->b.buf[arg->b.len++] = QSE_T('\\');
} }
else else
{ {
if (arg->b.len + 1 > QSE_COUNTOF(arg->b.buf) && qse_xli_flushwstream (xli, arg) <= -1) return -1; if (arg->b.len + 1 > QSE_COUNTOF(arg->b.buf) && qse_xli_flushwstream(xli, arg) <= -1) return -1;
} }
arg->b.buf[arg->b.len++] = ptr[i]; arg->b.buf[arg->b.len++] = ptr[i];
*/
} }
return 0; return 0;