From 6f6b82687afe8941c3e65452e9302afa79c4894c Mon Sep 17 00:00:00 2001 From: hyung-hwan Date: Thu, 17 Jun 2021 09:51:13 +0000 Subject: [PATCH] enhanced escaping in xli's json functions --- qse/lib/xli/read-json.c | 81 ++++++++++++++++++++++++++++++++++++++++ qse/lib/xli/write-json.c | 50 ++++++++++++++++++++++++- 2 files changed, 129 insertions(+), 2 deletions(-) diff --git a/qse/lib/xli/read-json.c b/qse/lib/xli/read-json.c index 6a0f33f1..1ba4fae1 100644 --- a/qse/lib/xli/read-json.c +++ b/qse/lib/xli/read-json.c @@ -85,6 +85,23 @@ enum } \ } while (0) +#if defined(QSE_CHAR_IS_MCHAR) + +# define ADD_TOKEN_UINT32(xli,tok,c) \ + do { \ + if (c <= 0xFF) ADD_TOKEN_CHAR(xli, tok, c); \ + else \ + { \ + qse_mchar_t __xbuf[QSE_MBLEN_MAX + 1]; \ + qse_size_t __len, __i; \ + __len = qse_uctoutf8(c, __xbuf, QSE_COUNTOF(__xbuf)); /* use utf8 all the time */ \ + for (__i = 0; __i < __len; __i++) ADD_TOKEN_CHAR(xli, tok, __xbuf[__i]); \ + } \ + } while (0) +#else +# define ADD_TOKEN_UINT32(xli,tok,c) ADD_TOKEN_CHAR(xli,tok,c); +#endif + #define SET_TOKEN_TYPE(xli,tok,code) \ do { (tok)->type = (code); } while (0) @@ -478,6 +495,8 @@ retry: { /* double-quoted string - support escaping */ int escaped = 0; + qse_size_t digit_count = 0; + qse_uint32_t c_acc = 0; SET_TOKEN_TYPE (xli, tok, QSE_XLI_TOK_DQSTR); @@ -494,6 +513,7 @@ retry: if (!escaped) { + not_escaped: if (c == QSE_T('\\')) { escaped = 1; @@ -509,8 +529,69 @@ retry: ADD_TOKEN_CHAR (xli, tok, c); } + else if (escaped == 4) + { + if (c >= QSE_T('0') && c <= QSE_T('9')) + { + c_acc = c_acc * 16 + c - QSE_T('0'); + digit_count++; + if (digit_count >= escaped) + { + ADD_TOKEN_UINT32 (xli, tok, c_acc); + escaped = 0; + } + } + else if (c >= QSE_T('A') && c <= QSE_T('F')) + { + c_acc = c_acc * 16 + c - QSE_T('A') + 10; + digit_count++; + if (digit_count >= escaped) + { + ADD_TOKEN_UINT32 (xli, tok, c_acc); + escaped = 0; + } + } + else if (c >= QSE_T('a') && c <= QSE_T('f')) + { + c_acc = c_acc * 16 + c - QSE_T('a') + 10; + digit_count++; + if (digit_count >= escaped) + { + ADD_TOKEN_UINT32 (xli, tok, c_acc); + escaped = 0; + } + } + else + { + /* not a hexadecimal digit */ + + if (digit_count == 0) + { + /* no valid character after the escaper. keep the escaper as it is */ + ADD_TOKEN_CHAR (xli, tok, 'u'); + } + else ADD_TOKEN_UINT32 (xli, tok, c_acc); + + escaped = 0; + goto not_escaped; + } + } else { + if (c == 'u') + { + escaped = 4; + digit_count = 0; + c_acc = 0; + continue; + } + + if (c == 'b') c = '\b'; + else if (c == 'f') c = '\f'; + else if (c == 'n') c = '\n'; + else if (c == 'r') c = '\r'; + else if (c == 't') c = '\t'; + ADD_TOKEN_CHAR (xli, tok, c); escaped = 0; } diff --git a/qse/lib/xli/write-json.c b/qse/lib/xli/write-json.c index 213ea1eb..84efc084 100644 --- a/qse/lib/xli/write-json.c +++ b/qse/lib/xli/write-json.c @@ -42,16 +42,62 @@ static int write_to_current_stream(qse_xli_t* xli, const qse_char_t* ptr, qse_si for (i = 0; i < len; i++) { + + if (escape) + { + if (ptr[i] == QSE_T('\\') || ptr[i] == QSE_T('\"') || + ptr[i] == QSE_T('\b') || ptr[i] == QSE_T('\f') || + ptr[i] == QSE_T('\n') || ptr[i] == QSE_T('\r') || ptr[i] == QSE_T('\t')) + { + qse_char_t ac; + if (arg->b.len + 2 > QSE_COUNTOF(arg->b.buf) && qse_xli_flushwstream(xli, arg) <= -1) return -1; + arg->b.buf[arg->b.len++] = QSE_T('\\'); + switch (ptr[i]) + { + case QSE_T('\b'): ac = 'b'; break; + case QSE_T('\f'): ac = 'f'; break; + case QSE_T('\n'): ac = 'n'; break; + case QSE_T('\r'): ac = 'r'; break; + case QSE_T('\t'): ac = 't'; break; + default: ac = ptr[i]; break; + } + arg->b.buf[arg->b.len++] = ac; + } + else if (ptr[i] >= 0x00 && ptr[i] <= 0x1F) + { + qse_char_t tmp[5]; + if (arg->b.len + 6 > QSE_COUNTOF(arg->b.buf) && qse_xli_flushwstream(xli, arg) <= -1) return -1; + arg->b.buf[arg->b.len++] = QSE_T('\\'); + arg->b.buf[arg->b.len++] = QSE_T('u'); + /* use 'tmp' to avoid null termination beyond the buffer end */ + qse_strxfmt(tmp, QSE_COUNTOF(tmp), QSE_T("%04X"), ptr[i]); + arg->b.len += qse_strcpy(&arg->b.buf[arg->b.len], tmp); + } + else + { + goto unescaped; + } + } + else + { + unescaped: + if (arg->b.len + 1 > QSE_COUNTOF(arg->b.buf) && qse_xli_flushwstream(xli, arg) <= -1) return -1; + arg->b.buf[arg->b.len++] = ptr[i]; + } + + + /* if (escape && (ptr[i] == QSE_T('\\') || ptr[i] == QSE_T('\"'))) { - if (arg->b.len + 2 > QSE_COUNTOF(arg->b.buf) && qse_xli_flushwstream (xli, arg) <= -1) return -1; + if (arg->b.len + 2 > QSE_COUNTOF(arg->b.buf) && qse_xli_flushwstream(xli, arg) <= -1) return -1; arg->b.buf[arg->b.len++] = QSE_T('\\'); } else { - if (arg->b.len + 1 > QSE_COUNTOF(arg->b.buf) && qse_xli_flushwstream (xli, arg) <= -1) return -1; + if (arg->b.len + 1 > QSE_COUNTOF(arg->b.buf) && qse_xli_flushwstream(xli, arg) <= -1) return -1; } arg->b.buf[arg->b.len++] = ptr[i]; + */ } return 0;