From af9aad6aae59bbed607d904ca7dbba443dda7fea Mon Sep 17 00:00:00 2001 From: hyung-hwan Date: Wed, 14 Jul 2021 11:31:29 +0000 Subject: [PATCH] fixed some more bugs in the json parser --- mio/bin/t03.c | 52 ++++++++++++++++++++++++----- mio/lib/json.c | 91 ++++++++++++++++++++++++++++++++++---------------- 2 files changed, 106 insertions(+), 37 deletions(-) diff --git a/mio/bin/t03.c b/mio/bin/t03.c index c16a27e..e67b751 100644 --- a/mio/bin/t03.c +++ b/mio/bin/t03.c @@ -4,11 +4,12 @@ #include #include +#define DEBUG + static int on_json_inst (mio_json_t* json, mio_json_inst_t inst, mio_oow_t level, mio_oow_t index, mio_json_state_t container_state, const mio_oocs_t* str, void* ctx) { mio_t* mio = mio_json_getmio(json); mio_oow_t i; - int* pending = (int*)ctx; switch (inst) { @@ -18,15 +19,21 @@ static int on_json_inst (mio_json_t* json, mio_json_inst_t inst, mio_oow_t level if (index > 0) mio_logbfmt (mio, MIO_LOG_STDOUT, ",\n"); for (i = 0; i < level; i++) mio_logbfmt (mio, MIO_LOG_STDOUT, "\t"); } + #if defined(DEBUG) + mio_logbfmt (mio, MIO_LOG_STDOUT, "<%lu>[\n", (unsigned long)index); + #else mio_logbfmt (mio, MIO_LOG_STDOUT, "[\n"); - (*pending)++; + #endif break; case MIO_JSON_INST_END_ARRAY: mio_logbfmt (mio, MIO_LOG_STDOUT, "\n"); for (i = 0; i < level; i++) mio_logbfmt (mio, MIO_LOG_STDOUT, "\t"); + #if defined(DEBUG) + mio_logbfmt (mio, MIO_LOG_STDOUT, "<%lu>]", (unsigned long)index); + #else mio_logbfmt (mio, MIO_LOG_STDOUT, "]"); - (*pending)--; + #endif break; case MIO_JSON_INST_START_OBJECT: @@ -35,21 +42,31 @@ static int on_json_inst (mio_json_t* json, mio_json_inst_t inst, mio_oow_t level if (index > 0) mio_logbfmt (mio, MIO_LOG_STDOUT, ",\n"); for (i = 0; i < level; i++) mio_logbfmt (mio, MIO_LOG_STDOUT, "\t"); } + #if defined(DEBUG) + mio_logbfmt (mio, MIO_LOG_STDOUT, "<%lu>{\n", (unsigned long)index); + #else mio_logbfmt (mio, MIO_LOG_STDOUT, "{\n"); - (*pending)++; + #endif break; case MIO_JSON_INST_END_OBJECT: mio_logbfmt (mio, MIO_LOG_STDOUT, "\n"); for (i = 0; i < level; i++) mio_logbfmt (mio, MIO_LOG_STDOUT, "\t"); + #if defined(DEBUG) + mio_logbfmt (mio, MIO_LOG_STDOUT, "<%lu>}", (unsigned long)index); + #else mio_logbfmt (mio, MIO_LOG_STDOUT, "}"); - (*pending)--; + #endif break; case MIO_JSON_INST_KEY: if (index > 0) mio_logbfmt (mio, MIO_LOG_STDOUT, ",\n"); for (i = 0; i < level; i++) mio_logbfmt (mio, MIO_LOG_STDOUT, "\t"); + #if defined(DEBUG) + mio_logbfmt (mio, MIO_LOG_STDOUT, "<%lu>%.*js: ", (unsigned long)index, str->len, str->ptr); + #else mio_logbfmt (mio, MIO_LOG_STDOUT, "%.*js: ", str->len, str->ptr); + #endif break; case MIO_JSON_INST_NIL: @@ -58,7 +75,11 @@ static int on_json_inst (mio_json_t* json, mio_json_inst_t inst, mio_oow_t level if (index > 0) mio_logbfmt (mio, MIO_LOG_STDOUT, ",\n"); for (i = 0; i < level; i++) mio_logbfmt (mio, MIO_LOG_STDOUT, "\t"); } + #if defined(DEBUG) + mio_logbfmt (mio, MIO_LOG_STDOUT, "<%lu>null", (unsigned long)index); + #else mio_logbfmt (mio, MIO_LOG_STDOUT, "null"); + #endif break; case MIO_JSON_INST_TRUE: @@ -67,7 +88,11 @@ static int on_json_inst (mio_json_t* json, mio_json_inst_t inst, mio_oow_t level if (index > 0) mio_logbfmt (mio, MIO_LOG_STDOUT, ",\n"); for (i = 0; i < level; i++) mio_logbfmt (mio, MIO_LOG_STDOUT, "\t"); } + #if defined(DEBUG) + mio_logbfmt (mio, MIO_LOG_STDOUT, "<%lu>true", (unsigned long)index); + #else mio_logbfmt (mio, MIO_LOG_STDOUT, "true"); + #endif break; case MIO_JSON_INST_FALSE: @@ -76,7 +101,11 @@ static int on_json_inst (mio_json_t* json, mio_json_inst_t inst, mio_oow_t level if (index > 0) mio_logbfmt (mio, MIO_LOG_STDOUT, ",\n"); for (i = 0; i < level; i++) mio_logbfmt (mio, MIO_LOG_STDOUT, "\t"); } + #if defined(DEBUG) + mio_logbfmt (mio, MIO_LOG_STDOUT, "<%lu>false", (unsigned long)index); + #else mio_logbfmt (mio, MIO_LOG_STDOUT, "false"); + #endif break; case MIO_JSON_INST_NUMBER: @@ -85,7 +114,11 @@ static int on_json_inst (mio_json_t* json, mio_json_inst_t inst, mio_oow_t level if (index > 0) mio_logbfmt (mio, MIO_LOG_STDOUT, ",\n"); for (i = 0; i < level; i++) mio_logbfmt (mio, MIO_LOG_STDOUT, "\t"); } + #if defined(DEBUG) + mio_logbfmt (mio, MIO_LOG_STDOUT, "<%lu>%.*js", (unsigned long)index, str->len, str->ptr); + #else mio_logbfmt (mio, MIO_LOG_STDOUT, "%.*js", str->len, str->ptr); + #endif break; case MIO_JSON_INST_STRING: @@ -94,7 +127,11 @@ static int on_json_inst (mio_json_t* json, mio_json_inst_t inst, mio_oow_t level if (index > 0) mio_logbfmt (mio, MIO_LOG_STDOUT, ",\n"); for (i = 0; i < level; i++) mio_logbfmt (mio, MIO_LOG_STDOUT, "\t"); } + #if defined(DEBUG) + mio_logbfmt (mio, MIO_LOG_STDOUT, "<%lu>\"%.*js\"", (unsigned long)index, str->len, str->ptr); /* TODO: escaping */ + #else mio_logbfmt (mio, MIO_LOG_STDOUT, "\"%.*js\"", str->len, str->ptr); /* TODO: escaping */ + #endif break; default: @@ -135,12 +172,11 @@ int main (int argc, char* argv[]) char buf[128]; mio_oow_t rem; size_t size; - int pending = 0; json = mio_json_open(mio, 0); mio_json_setoption (json, o); - mio_json_setinstcb (json, on_json_inst, &pending); + mio_json_setinstcb (json, on_json_inst, MIO_NULL); rem = 0; while (!feof(stdin) || rem > 0) @@ -176,13 +212,13 @@ int main (int argc, char* argv[]) } mio_logbfmt (mio, MIO_LOG_STDOUT, "\n"); - //if (pending) mio_logbfmt (mio, MIO_LOG_STDOUT, "**** ERROR - incomplete ****\n"); if (json->state_stack != &json->state_top) mio_logbfmt (mio, MIO_LOG_STDOUT, "**** ERROR - incomplete ****\n"); done: mio_json_close (json); } + mio_logbfmt (mio, MIO_LOG_STDOUT, "\n===================================\n"); { mio_jsonwr_t* jsonwr = MIO_NULL; diff --git a/mio/lib/json.c b/mio/lib/json.c index 6943d54..d1780b7 100644 --- a/mio/lib/json.c +++ b/mio/lib/json.c @@ -176,10 +176,9 @@ static int invoke_data_inst (mio_json_t* json, mio_json_inst_t inst) if (ss->state == MIO_JSON_STATE_IN_OBJECT) { - if (ss->u.io.state == 1) /* got colon */ + if (ss->u.io.state == 1) { - /* this is called after the reader has seen a colon. - * the data item must be used as a key */ + /* just got the key part. the colon has not been seen. */ if (inst != MIO_JSON_INST_STRING && inst != __INST_WORD_STRING) { @@ -189,10 +188,13 @@ static int invoke_data_inst (mio_json_t* json, mio_json_inst_t inst) inst = MIO_JSON_INST_KEY; } - else + else { - /* if this variable is non-zero, level is set to 0 regardless of actual level */ - is_obj_val = 1; + /* if this variable is non-zero, level is set to 0 regardless of actual level. + * this helps the callback to print the value without indentation immediately + * after the key */ + //is_obj_val = 1; + is_obj_val = (ss->u.io.state >= 2); } } @@ -205,23 +207,49 @@ static int invoke_data_inst (mio_json_t* json, mio_json_inst_t inst) switch (inst) { case MIO_JSON_INST_START_ARRAY: + { + mio_json_state_node_t* nss; if (push_read_state(json, MIO_JSON_STATE_IN_ARRAY) <= -1) return -1; - json->state_stack->u.ia.got_value = 0; - json->state_stack->level++; - if (ss->state != MIO_JSON_STATE_IN_OBJECT || ss->u.io.state == 1) ss->index++; - return json->instcb(json, inst, (is_obj_val? 0: json->state_stack->level - 1), ss->index - 1, ss->state, MIO_NULL, json->rctx); + nss = json->state_stack; + nss->u.ia.got_value = 0; + nss->level++; + + MIO_ASSERT (json->mio, nss->level == ss->level + 1); + return json->instcb(json, inst, (is_obj_val? 0: ss->level), ss->index, ss->state, MIO_NULL, json->rctx); + /* no increment on ss->index here. incremented on END */ + } + + case MIO_JSON_INST_END_ARRAY: + if (json->instcb(json, MIO_JSON_INST_END_ARRAY, ss->level, ss->index, ss->state, MIO_NULL, json->rctx) <= -1) return -1; + if (ss->state != MIO_JSON_STATE_IN_OBJECT || ss->u.io.state == 3) ss->index++; + break; case MIO_JSON_INST_START_OBJECT: + { + mio_json_state_node_t* nss; + if (push_read_state(json, MIO_JSON_STATE_IN_OBJECT) <= -1) return -1; - json->state_stack->u.io.state = 0; - json->state_stack->level++; - if (ss->state != MIO_JSON_STATE_IN_OBJECT || ss->u.io.state == 1) ss->index++; - return json->instcb(json, inst, (is_obj_val? 0: json->state_stack->level - 1), ss->index - 1, ss->state, MIO_NULL, json->rctx); + nss = json->state_stack; + nss->u.io.state = 0; + nss->level++; + + MIO_ASSERT (json->mio, nss->level == ss->level + 1); + return json->instcb(json, inst, (is_obj_val? 0: ss->level), ss->index, ss->state, MIO_NULL, json->rctx); + /* no increment on ss->index here. incremented on END */ + } + + case MIO_JSON_INST_END_OBJECT: + if (json->instcb(json, MIO_JSON_INST_END_OBJECT, ss->level, ss->index, ss->state, MIO_NULL, json->rctx) <= -1) return -1; + if (ss->state != MIO_JSON_STATE_IN_OBJECT || ss->u.io.state == 3) ss->index++; + break; default: - if (ss->state != MIO_JSON_STATE_IN_OBJECT || ss->u.io.state == 1) ss->index++; - return json->instcb(json, inst, (is_obj_val? 0: json->state_stack->level), ss->index - 1, ss->state, &json->tok, json->rctx); + if (json->instcb(json, inst, (is_obj_val? 0: ss->level), ss->index, ss->state, &json->tok, json->rctx) <= -1) return -1; + if (ss->state != MIO_JSON_STATE_IN_OBJECT || ss->u.io.state == 3) ss->index++; + break; } + + return 0; } static int handle_string_value_char (mio_json_t* json, mio_ooci_t c) @@ -485,8 +513,7 @@ static int handle_char_in_array (mio_json_t* json, mio_ooci_t c) else if (c == ']') { pop_read_state (json); - /* START_ARRAY incremented index by 1. so subtract 1 from index before invoking instcb for END_ARRAY. */ - if (json->instcb(json, MIO_JSON_INST_END_ARRAY, json->state_stack->level, json->state_stack->index - 1, json->state_stack->state, MIO_NULL, json->rctx) <= -1) return -1; + if (invoke_data_inst(json, MIO_JSON_INST_END_ARRAY) <= -1) return -1; return 1; } else if (c == ',') @@ -562,9 +589,15 @@ static int handle_char_in_object (mio_json_t* json, mio_ooci_t c) } else if (c == '}') { + /* 0 - initial, 1 - got key, 2 -> got colon, 3 -> got value, 0 -> after comma */ + if (json->state_stack->u.io.state == 1 || json->state_stack->u.io.state == 2) + { + mio_seterrbfmt (json->mio, MIO_EINVAL, "no value for a key in object"); + return -1; + } + pop_read_state (json); - /* START_OBJECT incremented index by 1. so subtract 1 from index before invoking instcb for END_OBJECT. */ - if (json->instcb(json, MIO_JSON_INST_END_OBJECT, json->state_stack->level, json->state_stack->index - 1, json->state_stack->state, MIO_NULL, json->rctx) <= -1) return -1; + if (invoke_data_inst(json, MIO_JSON_INST_END_OBJECT) <= -1) return -1; return 1; } else if (c == ':') @@ -581,7 +614,7 @@ static int handle_char_in_object (mio_json_t* json, mio_ooci_t c) { if (json->state_stack->u.io.state != 3) { - mio_seterrbfmt (json->mio, MIO_EINVAL, "redundant comma in object - %jc", (mio_ooch_t)c); + mio_seterrbfmt (json->mio, MIO_EINVAL, "comma without value or redundant comma in object - %jc", (mio_ooch_t)c); return -1; } json->state_stack->u.io.state = 0; @@ -1155,7 +1188,7 @@ static int write_uchars (mio_jsonwr_t* jsonwr, int escape, const mio_uch_t* ptr, #define WRITE_COMMA(jsonwr) do { WRITE_BYTES_NOESC(jsonwr, ",", 1); if (jsonwr->flags & MIO_JSONWR_FLAG_PRETTY) WRITE_LINE_BREAK(jsonwr); } while(0) -#define PREACTION_FOR_VLAUE(jsonwr,sn) do { \ +#define PREACTION_FOR_VALUE(jsonwr,sn) do { \ if (sn->state != MIO_JSON_STATE_IN_ARRAY && !(sn->state == MIO_JSON_STATE_IN_OBJECT && sn->obj_awaiting_val)) goto incompatible_inst; \ if (sn->index > 0 && sn->state == MIO_JSON_STATE_IN_ARRAY) WRITE_COMMA (jsonwr); \ sn->index++; \ @@ -1251,22 +1284,22 @@ int mio_jsonwr_write (mio_jsonwr_t* jsonwr, mio_json_inst_t inst, int is_uchars, break; case MIO_JSON_INST_NIL: - PREACTION_FOR_VLAUE (jsonwr, sn); + PREACTION_FOR_VALUE (jsonwr, sn); WRITE_BYTES_NOESC (jsonwr, "nil", 3); break; case MIO_JSON_INST_TRUE: - PREACTION_FOR_VLAUE (jsonwr, sn); + PREACTION_FOR_VALUE (jsonwr, sn); WRITE_BYTES_NOESC (jsonwr, "true", 4); break; case MIO_JSON_INST_FALSE: - PREACTION_FOR_VLAUE (jsonwr, sn); + PREACTION_FOR_VALUE (jsonwr, sn); WRITE_BYTES_NOESC (jsonwr, "false", 5); break; case MIO_JSON_INST_NUMBER: - PREACTION_FOR_VLAUE (jsonwr, sn); + PREACTION_FOR_VALUE (jsonwr, sn); if (is_uchars) WRITE_UCHARS (jsonwr, 0, dptr, dlen); else @@ -1274,7 +1307,7 @@ int mio_jsonwr_write (mio_jsonwr_t* jsonwr, mio_json_inst_t inst, int is_uchars, break; case MIO_JSON_INST_STRING: - PREACTION_FOR_VLAUE (jsonwr, sn); + PREACTION_FOR_VALUE (jsonwr, sn); WRITE_BYTES_NOESC (jsonwr, "\"", 1); if (is_uchars) WRITE_UCHARS (jsonwr, 1, dptr, dlen); else WRITE_BYTES_ESC (jsonwr, dptr, dlen); @@ -1298,7 +1331,7 @@ int mio_jsonwr_writeintmax (mio_jsonwr_t* jsonwr, mio_intmax_t v) mio_bch_t tmp[((MIO_SIZEOF_UINTMAX_T * MIO_BITS_PER_BYTE) / 3) + 3]; /* there can be a sign. so +3 instead of +2 */ mio_oow_t len; - PREACTION_FOR_VLAUE (jsonwr, sn); + PREACTION_FOR_VALUE (jsonwr, sn); len = mio_fmt_intmax_to_bcstr(tmp, MIO_COUNTOF(tmp), v, 10, 0, '\0', MIO_NULL); WRITE_BYTES_NOESC (jsonwr, tmp, len); return 0; @@ -1315,7 +1348,7 @@ int mio_jsonwr_writeuintmax (mio_jsonwr_t* jsonwr, mio_uintmax_t v) mio_bch_t tmp[((MIO_SIZEOF_UINTMAX_T * MIO_BITS_PER_BYTE) / 3) + 2]; mio_oow_t len; - PREACTION_FOR_VLAUE (jsonwr, sn); + PREACTION_FOR_VALUE (jsonwr, sn); len = mio_fmt_uintmax_to_bcstr(tmp, MIO_COUNTOF(tmp), v, 10, 0, '\0', MIO_NULL); WRITE_BYTES_NOESC (jsonwr, tmp, len); return 0;