From b895566323a5fb0206a1aa0b2e5112c566d3da7e Mon Sep 17 00:00:00 2001 From: hyung-hwan Date: Fri, 29 May 2020 11:35:20 +0000 Subject: [PATCH] adding a simple json reader --- mio/lib/Makefile.am | 2 + mio/lib/Makefile.in | 34 +- mio/lib/json.c | 844 ++++++++++++++++++++++++++++++++++++++++++++ mio/lib/mio-json.h | 180 ++++++++++ mio/lib/mio-utl.h | 28 +- mio/lib/mio.h | 2 +- 6 files changed, 1065 insertions(+), 25 deletions(-) create mode 100644 mio/lib/json.c create mode 100644 mio/lib/mio-json.h diff --git a/mio/lib/Makefile.am b/mio/lib/Makefile.am index 1dd065f..a8130cc 100644 --- a/mio/lib/Makefile.am +++ b/mio/lib/Makefile.am @@ -30,6 +30,7 @@ include_HEADERS = \ mio-htrd.h \ mio-htre.h \ mio-http.h \ + mio-json.h \ mio-nwif.h \ mio-pac1.h \ mio-path.h \ @@ -60,6 +61,7 @@ libmio_la_SOURCES = \ http-svr.c \ http-thr.c \ http-txt.c \ + json.c \ mio-prv.h \ mio.c \ nwif.c \ diff --git a/mio/lib/Makefile.in b/mio/lib/Makefile.in index 95ba016..e0229bb 100644 --- a/mio/lib/Makefile.in +++ b/mio/lib/Makefile.in @@ -143,12 +143,13 @@ am_libmio_la_OBJECTS = libmio_la-chr.lo libmio_la-dns.lo \ libmio_la-fmt.lo libmio_la-htb.lo libmio_la-htrd.lo \ libmio_la-htre.lo libmio_la-http.lo libmio_la-http-cgi.lo \ libmio_la-http-svr.lo libmio_la-http-thr.lo \ - libmio_la-http-txt.lo libmio_la-mio.lo libmio_la-nwif.lo \ - libmio_la-path.lo libmio_la-pipe.lo libmio_la-pro.lo \ - libmio_la-sck.lo libmio_la-skad.lo libmio_la-sys.lo \ - libmio_la-sys-ass.lo libmio_la-sys-err.lo libmio_la-sys-log.lo \ - libmio_la-sys-mux.lo libmio_la-sys-tim.lo libmio_la-thr.lo \ - libmio_la-tmr.lo libmio_la-utf8.lo libmio_la-utl.lo + libmio_la-http-txt.lo libmio_la-json.lo libmio_la-mio.lo \ + libmio_la-nwif.lo libmio_la-path.lo libmio_la-pipe.lo \ + libmio_la-pro.lo libmio_la-sck.lo libmio_la-skad.lo \ + libmio_la-sys.lo libmio_la-sys-ass.lo libmio_la-sys-err.lo \ + libmio_la-sys-log.lo libmio_la-sys-mux.lo libmio_la-sys-tim.lo \ + libmio_la-thr.lo libmio_la-tmr.lo libmio_la-utf8.lo \ + libmio_la-utl.lo libmio_la_OBJECTS = $(am_libmio_la_OBJECTS) AM_V_lt = $(am__v_lt_@AM_V@) am__v_lt_ = $(am__v_lt_@AM_DEFAULT_V@) @@ -182,10 +183,11 @@ am__depfiles_remade = ./$(DEPDIR)/libmio_la-chr.Plo \ ./$(DEPDIR)/libmio_la-http-svr.Plo \ ./$(DEPDIR)/libmio_la-http-thr.Plo \ ./$(DEPDIR)/libmio_la-http-txt.Plo \ - ./$(DEPDIR)/libmio_la-http.Plo ./$(DEPDIR)/libmio_la-mio.Plo \ - ./$(DEPDIR)/libmio_la-nwif.Plo ./$(DEPDIR)/libmio_la-path.Plo \ - ./$(DEPDIR)/libmio_la-pipe.Plo ./$(DEPDIR)/libmio_la-pro.Plo \ - ./$(DEPDIR)/libmio_la-sck.Plo ./$(DEPDIR)/libmio_la-skad.Plo \ + ./$(DEPDIR)/libmio_la-http.Plo ./$(DEPDIR)/libmio_la-json.Plo \ + ./$(DEPDIR)/libmio_la-mio.Plo ./$(DEPDIR)/libmio_la-nwif.Plo \ + ./$(DEPDIR)/libmio_la-path.Plo ./$(DEPDIR)/libmio_la-pipe.Plo \ + ./$(DEPDIR)/libmio_la-pro.Plo ./$(DEPDIR)/libmio_la-sck.Plo \ + ./$(DEPDIR)/libmio_la-skad.Plo \ ./$(DEPDIR)/libmio_la-sys-ass.Plo \ ./$(DEPDIR)/libmio_la-sys-err.Plo \ ./$(DEPDIR)/libmio_la-sys-log.Plo \ @@ -420,6 +422,7 @@ include_HEADERS = \ mio-htrd.h \ mio-htre.h \ mio-http.h \ + mio-json.h \ mio-nwif.h \ mio-pac1.h \ mio-path.h \ @@ -450,6 +453,7 @@ libmio_la_SOURCES = \ http-svr.c \ http-thr.c \ http-txt.c \ + json.c \ mio-prv.h \ mio.c \ nwif.c \ @@ -583,6 +587,7 @@ distclean-compile: @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libmio_la-http-thr.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libmio_la-http-txt.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libmio_la-http.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libmio_la-json.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libmio_la-mio.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libmio_la-nwif.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libmio_la-path.Plo@am__quote@ # am--include-marker @@ -729,6 +734,13 @@ libmio_la-http-txt.lo: http-txt.c @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libmio_la_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o libmio_la-http-txt.lo `test -f 'http-txt.c' || echo '$(srcdir)/'`http-txt.c +libmio_la-json.lo: json.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libmio_la_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT libmio_la-json.lo -MD -MP -MF $(DEPDIR)/libmio_la-json.Tpo -c -o libmio_la-json.lo `test -f 'json.c' || echo '$(srcdir)/'`json.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/libmio_la-json.Tpo $(DEPDIR)/libmio_la-json.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='json.c' object='libmio_la-json.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libmio_la_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o libmio_la-json.lo `test -f 'json.c' || echo '$(srcdir)/'`json.c + libmio_la-mio.lo: mio.c @am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libmio_la_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT libmio_la-mio.lo -MD -MP -MF $(DEPDIR)/libmio_la-mio.Tpo -c -o libmio_la-mio.lo `test -f 'mio.c' || echo '$(srcdir)/'`mio.c @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/libmio_la-mio.Tpo $(DEPDIR)/libmio_la-mio.Plo @@ -1017,6 +1029,7 @@ distclean: distclean-am -rm -f ./$(DEPDIR)/libmio_la-http-thr.Plo -rm -f ./$(DEPDIR)/libmio_la-http-txt.Plo -rm -f ./$(DEPDIR)/libmio_la-http.Plo + -rm -f ./$(DEPDIR)/libmio_la-json.Plo -rm -f ./$(DEPDIR)/libmio_la-mio.Plo -rm -f ./$(DEPDIR)/libmio_la-nwif.Plo -rm -f ./$(DEPDIR)/libmio_la-path.Plo @@ -1094,6 +1107,7 @@ maintainer-clean: maintainer-clean-am -rm -f ./$(DEPDIR)/libmio_la-http-thr.Plo -rm -f ./$(DEPDIR)/libmio_la-http-txt.Plo -rm -f ./$(DEPDIR)/libmio_la-http.Plo + -rm -f ./$(DEPDIR)/libmio_la-json.Plo -rm -f ./$(DEPDIR)/libmio_la-mio.Plo -rm -f ./$(DEPDIR)/libmio_la-nwif.Plo -rm -f ./$(DEPDIR)/libmio_la-path.Plo diff --git a/mio/lib/json.c b/mio/lib/json.c new file mode 100644 index 0000000..4d2182f --- /dev/null +++ b/mio/lib/json.c @@ -0,0 +1,844 @@ +/* + * $Id$ + * + Copyright (c) 2016-2018 Chung, Hyung-Hwan. All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED BY THE AUTHOR "AS IS" AND ANY EXPRESS OR + IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include "mio-prv.h" + +#include +#include + +#define MIO_JSON_TOKEN_NAME_ALIGN 64 + +/* ========================================================================= */ + +static void clear_token (mio_json_t* json) +{ + json->tok.len = 0; + if (json->tok_capa > 0) json->tok.ptr[json->tok.len] = '\0'; +} + +static int add_char_to_token (mio_json_t* json, mio_ooch_t ch) +{ + if (json->tok.len >= json->tok_capa) + { + mio_ooch_t* tmp; + mio_oow_t newcapa; + + newcapa = MIO_ALIGN_POW2(json->tok.len + 2, MIO_JSON_TOKEN_NAME_ALIGN); /* +2 here because of -1 when setting newcapa */ + tmp = (mio_ooch_t*)mio_reallocmem(json->mio, json->tok.ptr, newcapa * MIO_SIZEOF(*tmp)); + if (!tmp) return -1; + + json->tok_capa = newcapa - 1; /* -1 to secure space for terminating null */ + json->tok.ptr = tmp; + } + + json->tok.ptr[json->tok.len++] = ch; + json->tok.ptr[json->tok.len] = '\0'; + return 0; +} + +static int add_chars_to_token (mio_json_t* json, const mio_ooch_t* ptr, mio_oow_t len) +{ + mio_oow_t i; + + if (json->tok_capa - json->tok.len > len) + { + mio_ooch_t* tmp; + mio_oow_t newcapa; + + newcapa = MIO_ALIGN_POW2(json->tok.len + len + 1, MIO_JSON_TOKEN_NAME_ALIGN); + tmp = (mio_ooch_t*)mio_reallocmem(json->mio, json->tok.ptr, newcapa * MIO_SIZEOF(*tmp)); + if (!tmp) return -1; + + json->tok_capa = newcapa - 1; + json->tok.ptr = tmp; + } + + for (i = 0; i < len; i++) + json->tok.ptr[json->tok.len++] = ptr[i]; + json->tok.ptr[json->tok.len] = '\0'; + return 0; +} + +static MIO_INLINE mio_ooch_t unescape (mio_ooch_t c) +{ + switch (c) + { + case 'a': return '\a'; + case 'b': return '\b'; + case 'f': return '\f'; + case 'n': return '\n'; + case 'r': return '\r'; + case 't': return '\t'; + case 'v': return '\v'; + default: return c; + } +} + +/* ========================================================================= */ + +static int push_state (mio_json_t* json, mio_json_state_t state) +{ + mio_json_state_node_t* ss; + + ss = (mio_json_state_node_t*)mio_callocmem(json->mio, MIO_SIZEOF(*ss)); + if (MIO_UNLIKELY(!ss)) return -1; + + ss->state = state; + ss->next = json->state_stack; + + json->state_stack = ss; + return 0; +} + +static void pop_state (mio_json_t* json) +{ + mio_json_state_node_t* ss; + + ss = json->state_stack; + MIO_ASSERT (json->mio, ss != MIO_NULL && ss != &json->state_top); + json->state_stack = ss->next; + + if (json->state_stack->state == MIO_JSON_STATE_IN_ARRAY) + { + json->state_stack->u.ia.got_value = 1; + } + else if (json->state_stack->state == MIO_JSON_STATE_IN_DIC) + { + json->state_stack->u.id.state++; + } + +/* TODO: don't free this. move it to the free list? */ + mio_freemem (json->mio, ss); +} + +static void pop_all_states (mio_json_t* json) +{ + while (json->state_stack != &json->state_top) pop_state (json); +} + +/* ========================================================================= */ + +static int invoke_data_inst (mio_json_t* json, mio_json_inst_t inst) +{ + if (json->state_stack->state == MIO_JSON_STATE_IN_DIC && json->state_stack->u.id.state == 1) + { + if (inst != MIO_JSON_INST_STRING) + { + mio_seterrbfmt (json->mio, MIO_EINVAL, "dictionary key not a string - %.*js", json->tok.len, json->tok.ptr); + return -1; + } + + inst = MIO_JSON_INST_KEY; + } + +///// XXXXX +///// if (json->instcb(json, inst, &json->tok) <= -1) return -1; +///// + return 0; +} + +static int handle_string_value_char (mio_json_t* json, mio_ooci_t c) +{ + int ret = 1; + + if (json->state_stack->u.sv.escaped == 3) + { + if (c >= '0' && c <= '7') + { + json->state_stack->u.sv.acc = json->state_stack->u.sv.acc * 8 + c - '0'; + json->state_stack->u.sv.digit_count++; + if (json->state_stack->u.sv.digit_count >= json->state_stack->u.sv.escaped) goto add_sv_acc; + } + else + { + ret = 0; + goto add_sv_acc; + } + } + else if (json->state_stack->u.sv.escaped >= 2) + { + if (c >= '0' && c <= '9') + { + json->state_stack->u.sv.acc = json->state_stack->u.sv.acc * 16 + c - '0'; + json->state_stack->u.sv.digit_count++; + if (json->state_stack->u.sv.digit_count >= json->state_stack->u.sv.escaped) goto add_sv_acc; + } + else if (c >= 'a' && c <= 'f') + { + json->state_stack->u.sv.acc = json->state_stack->u.sv.acc * 16 + c - 'a' + 10; + json->state_stack->u.sv.digit_count++; + if (json->state_stack->u.sv.digit_count >= json->state_stack->u.sv.escaped) goto add_sv_acc; + } + else if (c >= 'A' && c <= 'F') + { + json->state_stack->u.sv.acc = json->state_stack->u.sv.acc * 16 + c - 'A' + 10; + json->state_stack->u.sv.digit_count++; + if (json->state_stack->u.sv.digit_count >= json->state_stack->u.sv.escaped) goto add_sv_acc; + } + else + { + ret = 0; + add_sv_acc: + #if defined(MIO_OOCH_IS_UCH) + if (add_char_to_token(json, json->state_stack->u.sv.acc) <= -1) return -1; + #else + /* convert the character to utf8 */ + { + mio_bch_t bcsbuf[MIO_BCSIZE_MAX]; + mio_oow_t n; + + n = json->mio->_cmgr->uctobc(json->state_stack->u.sv.acc, bcsbuf, MIO_COUNTOF(bcsbuf)); + if (n == 0 || n > MIO_COUNTOF(bcsbuf)) + { + /* illegal character or buffer to small */ + mio_seterrbfmt (json->mio, MIO_EECERR, "unable to convert %jc", json->state_stack->u.sv.acc); + return -1; + } + + if (add_chars_to_token(json, bcsbuf, n) <= -1) return -1; + } + #endif + json->state_stack->u.sv.escaped = 0; + } + } + else if (json->state_stack->u.sv.escaped == 1) + { + if (c >= '0' && c <= '8') + { + json->state_stack->u.sv.escaped = 3; + json->state_stack->u.sv.digit_count = 0; + json->state_stack->u.sv.acc = c - '0'; + } + else if (c == 'x') + { + json->state_stack->u.sv.escaped = 2; + json->state_stack->u.sv.digit_count = 0; + json->state_stack->u.sv.acc = 0; + } + else if (c == 'u') + { + json->state_stack->u.sv.escaped = 4; + json->state_stack->u.sv.digit_count = 0; + json->state_stack->u.sv.acc = 0; + } + else if (c == 'U') + { + json->state_stack->u.sv.escaped = 8; + json->state_stack->u.sv.digit_count = 0; + json->state_stack->u.sv.acc = 0; + } + else + { + json->state_stack->u.sv.escaped = 0; + if (add_char_to_token(json, unescape(c)) <= -1) return -1; + } + } + else if (c == '\\') + { + json->state_stack->u.sv.escaped = 1; + } + else if (c == '\"') + { + pop_state (json); + if (invoke_data_inst(json, MIO_JSON_INST_STRING) <= -1) return -1; + } + else + { + if (add_char_to_token(json, c) <= -1) return -1; + } + + return ret; +} + +static int handle_character_value_char (mio_json_t* json, mio_ooci_t c) +{ + /* The real JSON dones't support character literal. this is MIO's own extension. */ + int ret = 1; + + if (json->state_stack->u.cv.escaped == 3) + { + if (c >= '0' && c <= '7') + { + json->state_stack->u.cv.acc = json->state_stack->u.cv.acc * 8 + c - '0'; + json->state_stack->u.cv.digit_count++; + if (json->state_stack->u.cv.digit_count >= json->state_stack->u.cv.escaped) goto add_cv_acc; + } + else + { + ret = 0; + goto add_cv_acc; + } + } + if (json->state_stack->u.cv.escaped >= 2) + { + if (c >= '0' && c <= '9') + { + json->state_stack->u.cv.acc = json->state_stack->u.cv.acc * 16 + c - '0'; + json->state_stack->u.cv.digit_count++; + if (json->state_stack->u.cv.digit_count >= json->state_stack->u.cv.escaped) goto add_cv_acc; + } + else if (c >= 'a' && c <= 'f') + { + json->state_stack->u.cv.acc = json->state_stack->u.cv.acc * 16 + c - 'a' + 10; + json->state_stack->u.cv.digit_count++; + if (json->state_stack->u.cv.digit_count >= json->state_stack->u.cv.escaped) goto add_cv_acc; + } + else if (c >= 'A' && c <= 'F') + { + json->state_stack->u.cv.acc = json->state_stack->u.cv.acc * 16 + c - 'A' + 10; + json->state_stack->u.cv.digit_count++; + if (json->state_stack->u.cv.digit_count >= json->state_stack->u.cv.escaped) goto add_cv_acc; + } + else + { + ret = 0; + add_cv_acc: + if (add_char_to_token(json, json->state_stack->u.cv.acc) <= -1) return -1; + json->state_stack->u.cv.escaped = 0; + } + } + else if (json->state_stack->u.cv.escaped == 1) + { + if (c >= '0' && c <= '8') + { + json->state_stack->u.cv.escaped = 3; + json->state_stack->u.cv.digit_count = 0; + json->state_stack->u.cv.acc = c - '0'; + } + else if (c == 'x') + { + json->state_stack->u.cv.escaped = 2; + json->state_stack->u.cv.digit_count = 0; + json->state_stack->u.cv.acc = 0; + } + else if (c == 'u') + { + json->state_stack->u.cv.escaped = 4; + json->state_stack->u.cv.digit_count = 0; + json->state_stack->u.cv.acc = 0; + } + else if (c == 'U') + { + json->state_stack->u.cv.escaped = 8; + json->state_stack->u.cv.digit_count = 0; + json->state_stack->u.cv.acc = 0; + } + else + { + json->state_stack->u.cv.escaped = 0; + if (add_char_to_token(json, unescape(c)) <= -1) return -1; + } + } + else if (c == '\\') + { + json->state_stack->u.cv.escaped = 1; + } + else if (c == '\'') + { + pop_state (json); + + if (json->tok.len < 1) + { + mio_seterrbfmt (json->mio, MIO_EINVAL, "no character in a character literal"); + return -1; + } + if (invoke_data_inst(json, MIO_JSON_INST_CHARACTER) <= -1) return -1; + } + else + { + if (add_char_to_token(json, c) <= -1) return -1; + } + + if (json->tok.len > 1) + { + mio_seterrbfmt (json->mio, MIO_EINVAL, "too many characters in a character literal - %.*js", json->tok.len, json->tok.ptr); + return -1; + } + + return ret; +} + +static int handle_numeric_value_char (mio_json_t* json, mio_ooci_t c) +{ + if (mio_is_ooch_digit(c) || (json->tok.len == 0 && (c == '+' || c == '-'))) + { + if (add_char_to_token(json, c) <= -1) return -1; + return 1; + } + else if (!json->state_stack->u.nv.dotted && c == '.' && + json->tok.len > 0 && mio_is_ooch_digit(json->tok.ptr[json->tok.len - 1])) + { + if (add_char_to_token(json, c) <= -1) return -1; + json->state_stack->u.nv.dotted = 1; + return 1; + } + + pop_state (json); + + MIO_ASSERT (json->mio, json->tok.len > 0); + if (!mio_is_ooch_digit(json->tok.ptr[json->tok.len - 1])) + { + mio_seterrbfmt (json->mio, MIO_EINVAL, "invalid numeric value - %.*js", json->tok.len, json->tok.ptr); + return -1; + } + if (invoke_data_inst(json, MIO_JSON_INST_NUMBER) <= -1) return -1; + return 0; /* start over */ +} + +static int handle_word_value_char (mio_json_t* json, mio_ooci_t c) +{ + mio_json_inst_t inst; + + if (mio_is_ooch_alpha(c)) + { + if (add_char_to_token(json, c) <= -1) return -1; + return 1; + } + + pop_state (json); + + if (mio_comp_oochars_bcstr(json->tok.ptr, json->tok.len, "null", 0) == 0) inst = MIO_JSON_INST_NIL; + else if (mio_comp_oochars_bcstr(json->tok.ptr, json->tok.len, "true", 0) == 0) inst = MIO_JSON_INST_TRUE; + else if (mio_comp_oochars_bcstr(json->tok.ptr, json->tok.len, "false", 0) == 0) inst = MIO_JSON_INST_FALSE; + else + { + mio_seterrbfmt (json->mio, MIO_EINVAL, "invalid word value - %.*js", json->tok.len, json->tok.ptr); + return -1; + } + + if (invoke_data_inst(json, inst) <= -1) return -1; + return 0; /* start over */ +} + +/* ========================================================================= */ + +static int handle_start_char (mio_json_t* json, mio_ooci_t c) +{ + if (c == '[') + { + if (push_state(json, MIO_JSON_STATE_IN_ARRAY) <= -1) return -1; + json->state_stack->u.ia.got_value = 0; + if (json->instcb(json, MIO_JSON_INST_START_ARRAY, MIO_NULL) <= -1) return -1; + return 1; + } + else if (c == '{') + { + if (push_state(json, MIO_JSON_STATE_IN_DIC) <= -1) return -1; + json->state_stack->u.id.state = 0; + if (json->instcb(json, MIO_JSON_INST_START_DIC, MIO_NULL) <= -1) return -1; + return 1; + } + else if (mio_is_ooch_space(c)) + { + /* do nothing */ + return 1; + } + else + { + mio_seterrbfmt (json->mio, MIO_EINVAL, "not starting with [ or { - %jc", (mio_ooch_t)c); + return -1; + } +} + +static int handle_char_in_array (mio_json_t* json, mio_ooci_t c) +{ + if (c == ']') + { + if (json->instcb(json, MIO_JSON_INST_END_ARRAY, MIO_NULL) <= -1) return -1; + pop_state (json); + return 1; + } + else if (c == ',') + { + if (!json->state_stack->u.ia.got_value) + { + mio_seterrbfmt (json->mio, MIO_EINVAL, "redundant comma in array - %jc", (mio_ooch_t)c); + return -1; + } + json->state_stack->u.ia.got_value = 0; + return 1; + } + else if (mio_is_ooch_space(c)) + { + /* do nothing */ + return 1; + } + else + { + if (json->state_stack->u.ia.got_value) + { + mio_seterrbfmt (json->mio, MIO_EINVAL, "comma required in array - %jc", (mio_ooch_t)c); + return -1; + } + + if (c == '\"') + { + if (push_state(json, MIO_JSON_STATE_IN_STRING_VALUE) <= -1) return -1; + clear_token (json); + return 1; + } + else if (c == '\'') + { + if (push_state(json, MIO_JSON_STATE_IN_CHARACTER_VALUE) <= -1) return -1; + clear_token (json); + return 1; + } + /* TOOD: else if (c == '#') MIO radixed number + */ + else if (mio_is_ooch_digit(c) || c == '+' || c == '-') + { + if (push_state(json, MIO_JSON_STATE_IN_NUMERIC_VALUE) <= -1) return -1; + clear_token (json); + json->state_stack->u.nv.dotted = 0; + return 0; /* start over */ + } + else if (mio_is_ooch_alpha(c)) + { + if (push_state(json, MIO_JSON_STATE_IN_WORD_VALUE) <= -1) return -1; + clear_token (json); + return 0; /* start over */ + } + else if (c == '[') + { + if (push_state(json, MIO_JSON_STATE_IN_ARRAY) <= -1) return -1; + json->state_stack->u.ia.got_value = 0; + if (json->instcb(json, MIO_JSON_INST_START_ARRAY, MIO_NULL) <= -1) return -1; + return 1; + } + else if (c == '{') + { + if (push_state(json, MIO_JSON_STATE_IN_DIC) <= -1) return -1; + json->state_stack->u.id.state = 0; + if (json->instcb(json, MIO_JSON_INST_START_DIC, MIO_NULL) <= -1) return -1; + return 1; + } + else + { + mio_seterrbfmt (json->mio, MIO_EINVAL, "wrong character inside array - %jc[%d]", (mio_ooch_t)c, (int)c); + return -1; + } + } +} + +static int handle_char_in_dic (mio_json_t* json, mio_ooci_t c) +{ + if (c == '}') + { + if (json->instcb(json, MIO_JSON_INST_END_DIC, MIO_NULL) <= -1) return -1; + pop_state (json); + return 1; + } + else if (c == ':') + { + if (json->state_stack->u.id.state != 1) + { + mio_seterrbfmt (json->mio, MIO_EINVAL, "redundant colon in dictionary - %jc", (mio_ooch_t)c); + return -1; + } + json->state_stack->u.id.state++; + return 1; + } + else if (c == ',') + { + if (json->state_stack->u.id.state != 3) + { + mio_seterrbfmt (json->mio, MIO_EINVAL, "redundant comma in dicitonary - %jc", (mio_ooch_t)c); + return -1; + } + json->state_stack->u.id.state = 0; + return 1; + } + else if (mio_is_ooch_space(c)) + { + /* do nothing */ + return 1; + } + else + { + if (json->state_stack->u.id.state == 1) + { + mio_seterrbfmt (json->mio, MIO_EINVAL, "colon required in dicitonary - %jc", (mio_ooch_t)c); + return -1; + } + else if (json->state_stack->u.id.state == 3) + { + mio_seterrbfmt (json->mio, MIO_EINVAL, "comma required in dicitonary - %jc", (mio_ooch_t)c); + return -1; + } + + if (c == '\"') + { + if (push_state(json, MIO_JSON_STATE_IN_STRING_VALUE) <= -1) return -1; + clear_token (json); + return 1; + } + else if (c == '\'') + { + if (push_state(json, MIO_JSON_STATE_IN_CHARACTER_VALUE) <= -1) return -1; + clear_token (json); + return 1; + } + /* TOOD: else if (c == '#') MIO radixed number + */ + else if (mio_is_ooch_digit(c) || c == '+' || c == '-') + { + if (push_state(json, MIO_JSON_STATE_IN_NUMERIC_VALUE) <= -1) return -1; + clear_token (json); + json->state_stack->u.nv.dotted = 0; + return 0; /* start over */ + } + else if (mio_is_ooch_alpha(c)) + { + if (push_state(json, MIO_JSON_STATE_IN_WORD_VALUE) <= -1) return -1; + clear_token (json); + return 0; /* start over */ + } + else if (c == '[') + { + if (push_state(json, MIO_JSON_STATE_IN_ARRAY) <= -1) return -1; + json->state_stack->u.ia.got_value = 0; + if (json->instcb(json, MIO_JSON_INST_START_ARRAY, MIO_NULL) <= -1) return -1; + return 1; + } + else if (c == '{') + { + if (push_state(json, MIO_JSON_STATE_IN_DIC) <= -1) return -1; + json->state_stack->u.id.state = 0; + if (json->instcb(json, MIO_JSON_INST_START_DIC, MIO_NULL) <= -1) return -1; + return 1; + } + else + { + mio_seterrbfmt (json->mio, MIO_EINVAL, "wrong character inside dictionary - %jc[%d]", (mio_ooch_t)c, (int)c); + return -1; + } + } +} + +/* ========================================================================= */ + +static int handle_char (mio_json_t* json, mio_ooci_t c) +{ + int x; + +start_over: + if (c == MIO_OOCI_EOF) + { + if (json->state_stack->state == MIO_JSON_STATE_START) + { + /* no input data */ + return 0; + } + else + { + mio_seterrbfmt (json->mio, MIO_EBADRE, "unexpected end of data"); + return -1; + } + } + + switch (json->state_stack->state) + { + case MIO_JSON_STATE_START: + x = handle_start_char(json, c); + break; + + case MIO_JSON_STATE_IN_ARRAY: + x = handle_char_in_array(json, c); + break; + + case MIO_JSON_STATE_IN_DIC: + x = handle_char_in_dic(json, c); + break; + + case MIO_JSON_STATE_IN_WORD_VALUE: + x = handle_word_value_char(json, c); + break; + + case MIO_JSON_STATE_IN_STRING_VALUE: + x = handle_string_value_char(json, c); + break; + + case MIO_JSON_STATE_IN_CHARACTER_VALUE: + x = handle_character_value_char(json, c); + break; + + case MIO_JSON_STATE_IN_NUMERIC_VALUE: + x = handle_numeric_value_char(json, c); + break; + + default: + mio_seterrbfmt (json->mio, MIO_EINTERN, "internal error - must not be called for state %d", (int)json->state_stack->state); + return -1; + } + + if (x <= -1) return -1; + if (x == 0) goto start_over; + + return 0; +} + +/* ========================================================================= */ + +static int feed_json_data (mio_json_t* json, const mio_bch_t* data, mio_oow_t len, mio_oow_t* xlen) +{ + const mio_bch_t* ptr; + const mio_bch_t* end; + + ptr = data; + end = ptr + len; + + while (ptr < end) + { + mio_ooci_t c; + + #if defined(MIO_OOCH_IS_UCH) + mio_ooch_t uc; + mio_oow_t bcslen; + mio_oow_t n; + + bcslen = end - ptr; + n = json->mio->_cmgr->bctouc(ptr, bcslen, &uc); + if (n == 0) + { + /* invalid sequence */ + uc = *ptr; + n = 1; + } + else if (n > bcslen) + { + /* incomplete sequence */ + *xlen = ptr - data; + return 0; /* feed more for incomplete sequence */ + } + + ptr += n; + c = uc; + #else + c = *ptr++; + #endif + + /* handle a signle character */ + if (handle_char(json, c) <= -1) goto oops; + } + + *xlen = ptr - data; + return 1; + +oops: + /* TODO: compute the number of processed bytes so far and return it via a parameter??? */ +/*printf ("feed oops....\n");*/ + return -1; +} + + +/* ========================================================================= */ + +mio_json_t* mio_json_open (mio_t* mio, mio_oow_t xtnsize) +{ + mio_json_t* json; + + json = (mio_json_t*)mio_allocmem(mio, MIO_SIZEOF(*json) + xtnsize); + if (MIO_LIKELY(json)) + { + if (mio_json_init(json, mio) <= -1) + { + mio_freemem (mio, json); + return MIO_NULL; + } + else + { + MIO_MEMSET (json + 1, 0, xtnsize); + } + } + + return json; +} + +void mio_json_close (mio_json_t* json) +{ + mio_json_fini (json); + mio_freemem (json->mio, json); +} + + +int mio_json_init (mio_json_t* json, mio_t* mio) +{ + MIO_MEMSET (json, 0, MIO_SIZEOF(*json)); + + json->mio = mio; + json->state_top.state = MIO_JSON_STATE_START; + json->state_top.next = MIO_NULL; + json->state_stack = &json->state_top; + + return 0; +} + +void mio_json_fini (mio_json_t* json) +{ + pop_all_states (json); + if (json->tok.ptr) + { + mio_freemem (json->mio, json->tok.ptr); + json->tok.ptr = MIO_NULL; + } +} +/* ========================================================================= */ + +mio_json_state_t mio_json_getstate (mio_json_t* json) +{ + return json->state_stack->state; +} + +void mio_json_reset (mio_json_t* json) +{ + /* TODO: reset XXXXXXXXXXXXXXXXXXXXXXXXXXXxxxxx */ + pop_all_states (json); + MIO_ASSERT (json->mio, json->state_stack == &json->state_top); + json->state_stack->state = MIO_JSON_STATE_START; +} + +int mio_json_feed (mio_json_t* json, const void* ptr, mio_oow_t len, mio_oow_t* xlen) +{ + int x; + mio_oow_t total, ylen; + const mio_bch_t* buf; + + buf = (const mio_bch_t*)ptr; + total = 0; + while (total < len) + { + x = feed_json_data(json, &buf[total], len - total, &ylen); + if (x <= -1) return -1; + + total += ylen; + if (x == 0) break; /* incomplete sequence encountered */ + } + + *xlen = total; + return 0; +} diff --git a/mio/lib/mio-json.h b/mio/lib/mio-json.h new file mode 100644 index 0000000..c930bc2 --- /dev/null +++ b/mio/lib/mio-json.h @@ -0,0 +1,180 @@ +/* + * $Id$ + * + Copyright (c) 2016-2020 Chung, Hyung-Hwan. All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED BY THE AUTHOR "AS IS" AND ANY EXPRESS OR + IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _MIO_JSON_H_ +#define _MIO_JSON_H_ + +#include + +/** + * The mio_json_t type defines a simple json parser. + */ +typedef struct mio_json_t mio_json_t; + +/* ========================================================================= */ + +enum mio_json_state_t +{ + MIO_JSON_STATE_START, + MIO_JSON_STATE_IN_ARRAY, + MIO_JSON_STATE_IN_DIC, + + MIO_JSON_STATE_IN_WORD_VALUE, + MIO_JSON_STATE_IN_NUMERIC_VALUE, + MIO_JSON_STATE_IN_STRING_VALUE, + MIO_JSON_STATE_IN_CHARACTER_VALUE +}; +typedef enum mio_json_state_t mio_json_state_t; + +/* ========================================================================= */ +enum mio_json_inst_t +{ + MIO_JSON_INST_START_ARRAY, + MIO_JSON_INST_END_ARRAY, + MIO_JSON_INST_START_DIC, + MIO_JSON_INST_END_DIC, + + MIO_JSON_INST_KEY, + + MIO_JSON_INST_CHARACTER, /* there is no such element as character in real JSON */ + MIO_JSON_INST_STRING, + MIO_JSON_INST_NUMBER, + MIO_JSON_INST_NIL, + MIO_JSON_INST_TRUE, + MIO_JSON_INST_FALSE, +}; +typedef enum mio_json_inst_t mio_json_inst_t; + +typedef int (*mio_json_instcb_t) ( + mio_json_t* json, + mio_json_inst_t inst, + const mio_oocs_t* str +); + + +typedef struct mio_json_state_node_t mio_json_state_node_t; +struct mio_json_state_node_t +{ + mio_json_state_t state; + union + { + struct + { + int got_value; + } ia; /* in array */ + + struct + { + /* 0: ready to get key (at the beginning or got comma), + * 1: got key, 2: got colon, 3: got value */ + int state; + } id; /* in dictionary */ + struct + { + int escaped; + int digit_count; + /* acc is always of unicode type to handle \u and \U. + * in the bch mode, it will get converted to a utf8 stream. */ + mio_uch_t acc; + } sv; + struct + { + int escaped; + int digit_count; + /* for a character, no way to support the unicode character + * in the bch mode */ + mio_ooch_t acc; + } cv; + struct + { + int dotted; + } nv; + } u; + mio_json_state_node_t* next; +}; + +struct mio_json_t +{ + mio_t* mio; + mio_json_instcb_t instcb; + + mio_json_state_node_t state_top; + mio_json_state_node_t* state_stack; + mio_oocs_t tok; + mio_oow_t tok_capa; +}; + +/* ========================================================================= */ + +#if defined(__cplusplus) +extern "C" { +#endif + +MIO_EXPORT mio_json_t* mio_json_open ( + mio_t* mio, + mio_oow_t xtnsize +); + +MIO_EXPORT void mio_json_close ( + mio_json_t* json +); + +MIO_EXPORT int mio_json_init ( + mio_json_t* json, + mio_t* mio +); + +MIO_EXPORT void mio_json_fini ( + mio_json_t* json +); + +MIO_EXPORT void mio_json_reset ( + mio_json_t* json +); + +MIO_EXPORT int mio_json_feed ( + mio_json_t* json, + const void* ptr, + mio_oow_t len, + mio_oow_t* xlen +); + +MIO_EXPORT mio_json_state_t mio_json_getstate ( + mio_json_t* json +); + + +#if defined(MIO_HAVE_INLINE) +static MIO_INLINE void* mio_json_getxtn (mio_json_t* json) { return (void*)(json + 1); } +#else +#define mio_json_getxtn(json) ((void*)((mio_json_t*)(json) + 1)) +#endif + +#if defined(__cplusplus) +} +#endif + +#endif diff --git a/mio/lib/mio-utl.h b/mio/lib/mio-utl.h index c9bd994..89dcc63 100644 --- a/mio/lib/mio-utl.h +++ b/mio/lib/mio-utl.h @@ -478,13 +478,13 @@ MIO_EXPORT mio_oow_t mio_count_bcstr ( ); #if defined(MIO_OOCH_IS_UCH) -# define mio_equal_oochars(str1,str2,len) mio_equal_uchars(str1,str2,len) -# define mio_comp_oochars(str1,len1,str2,len2) mio_comp_uchars(str1,len1,str2,len2) -# define mio_comp_oocstr_bcstr(str1,str2) mio_comp_ucstr_bcstr(str1,str2) -# define mio_comp_oochars_bcstr(str1,len1,str2) mio_comp_uchars_bcstr(str1,len1,str2) -# define mio_comp_oochars_ucstr(str1,len1,str2) mio_comp_uchars_ucstr(str1,len1,str2) -# define mio_comp_oochars_oocstr(str1,len1,str2) mio_comp_uchars_ucstr(str1,len1,str2) -# define mio_comp_oocstr(str1,str2) mio_comp_ucstr(str1,str2) +# define mio_equal_oochars mio_equal_uchars +# define mio_comp_oochars mio_comp_uchars +# define mio_comp_oocstr_bcstr mio_comp_ucstr_bcstr +# define mio_comp_oochars_bcstr mio_comp_uchars_bcstr +# define mio_comp_oochars_ucstr mio_comp_uchars_ucstr +# define mio_comp_oochars_oocstr mio_comp_uchars_ucstr +# define mio_comp_oocstr mio_comp_ucstr # define mio_copy_oochars mio_copy_uchars # define mio_copy_bchars_to_oochars mio_copy_bchars_to_uchars @@ -506,13 +506,13 @@ MIO_EXPORT mio_oow_t mio_count_bcstr ( # define mio_split_oocstr mio_split_ucstr # define mio_count_oocstr mio_count_ucstr #else -# define mio_equal_oochars(str1,str2,len) mio_equal_bchars(str1,str2,len) -# define mio_comp_oochars(str1,len1,str2,len2) mio_comp_bchars(str1,len1,str2,len2) -# define mio_comp_oocstr_bcstr(str1,str2) mio_comp_bcstr(str1,str2) -# define mio_comp_oochars_bcstr(str1,len1,str2) mio_comp_bchars_bcstr(str1,len1,str2) -# define mio_comp_oochars_ucstr(str1,len1,str2) mio_comp_bchars_ucstr(str1,len1,str2) -# define mio_comp_oochars_oocstr(str1,len1,str2) mio_comp_bchars_bcstr(str1,len1,str2) -# define mio_comp_oocstr(str1,str2) mio_comp_bcstr(str1,str2) +# define mio_equal_oochars mio_equal_bchars +# define mio_comp_oochars mio_comp_bchars +# define mio_comp_oocstr_bcstr mio_comp_bcstr +# define mio_comp_oochars_bcstr mio_comp_bchars_bcstr +# define mio_comp_oochars_ucstr mio_comp_bchars_ucstr +# define mio_comp_oochars_oocstr mio_comp_bchars_bcstr +# define mio_comp_oocstr mio_comp_bcstr # define mio_copy_oochars mio_copy_bchars # define mio_copy_bchars_to_oochars mio_copy_bchars diff --git a/mio/lib/mio.h b/mio/lib/mio.h index dfb9465..0a32d71 100644 --- a/mio/lib/mio.h +++ b/mio/lib/mio.h @@ -975,7 +975,7 @@ MIO_EXPORT int mio_gettmrjobdeadline ( * ========================================================================= */ /** - * the mio_gettime() function returns the elapsed time since mio initialization. + * The mio_gettime() function returns the elapsed time since mio initialization. */ MIO_EXPORT void mio_gettime ( mio_t* mio,