From de0715e3027bc0a887a86c91c940387f24624a29 Mon Sep 17 00:00:00 2001 From: hyung-hwan Date: Sat, 17 Feb 2018 15:42:12 +0000 Subject: [PATCH] implemented the single quote character literal. added the error literal and the smptr literal notations and types --- lib/hcl-prv.h | 2 + lib/hcl.h | 67 +++++++++++++++++++++++++++- lib/print.c | 120 +++++++++++++++++++++++++++++--------------------- lib/read.c | 90 +++++++++++++++++++++++++++++++------ 4 files changed, 213 insertions(+), 66 deletions(-) diff --git a/lib/hcl-prv.h b/lib/hcl-prv.h index d200b4e..db7b6ab 100644 --- a/lib/hcl-prv.h +++ b/lib/hcl-prv.h @@ -146,6 +146,8 @@ struct hcl_iotok_t HCL_IOTOK_STRLIT, HCL_IOTOK_NUMLIT, HCL_IOTOK_RADNUMLIT, + HCL_IOTOK_SMPTRLIT, + HCL_IOTOK_ERRORLIT, HCL_IOTOK_NIL, HCL_IOTOK_TRUE, HCL_IOTOK_FALSE, diff --git a/lib/hcl.h b/lib/hcl.h index 9614f5b..5d6ca27 100644 --- a/lib/hcl.h +++ b/lib/hcl.h @@ -236,6 +236,7 @@ typedef struct hcl_obj_word_t* hcl_oop_word_t; #endif +#if 0 /* * OOP encoding * An object pointer(OOP) is an ordinary pointer value to an object. @@ -266,12 +267,74 @@ typedef struct hcl_obj_word_t* hcl_oop_word_t; #define HCL_CHAR_TO_OOP(num) ((hcl_oop_t)((((hcl_oow_t)(num)) << HCL_OOP_TAG_BITS) | HCL_OOP_TAG_CHAR)) #define HCL_OOP_TO_CHAR(oop) (((hcl_oow_t)oop) >> HCL_OOP_TAG_BITS) +#else +/* + * OOP encoding + * An object pointer(OOP) is an ordinary pointer value to an object. + * but some simple numeric values are also encoded into OOP using a simple + * bit-shifting and masking. + * + * A real OOP is stored without any bit-shifting while a non-pointer value encoded + * in an OOP is bit-shifted to the left by 2 and the 2 least-significant bits + * are set to 1 or 2. + * + * This scheme works because the object allocators aligns the object size to + * a multiple of sizeof(moo_oop_t). This way, the 2 least-significant bits + * of a real OOP are always 0s. + * + * With 2 bits, i can encode only 3 special types except object pointers. + * Since I need more than 3 special types, I extend the tag bits up to 4 bits + * to represent a special data type that doesn't require a range as wide + * as a small integer. A unicode character, for instance, only requires 21 + * bits at most. An error doesn't need to be as diverse as a small integer. + */ + +#define HCL_OOP_TAG_BITS_LO 2 +#define HCL_OOP_TAG_BITS_HI 2 + +#define HCL_OOP_TAG_SMOOI 1 /* 01 */ +#define HCL_OOP_TAG_SMPTR 2 /* 10 */ +#define HCL_OOP_TAG_EXTENDED 3 /* 11 - internal use only */ +#define HCL_OOP_TAG_CHAR 3 /* 0011 */ +#define HCL_OOP_TAG_ERROR 7 /* 0111 */ +#define HCL_OOP_TAG_RESERVED0 11 /* 1011 */ +#define HCL_OOP_TAG_RESERVED1 15 /* 1111 */ + +#define HCL_OOP_GET_TAG_LO(oop) (((hcl_oow_t)oop) & HCL_LBMASK(hcl_oow_t, HCL_OOP_TAG_BITS_LO)) +#define HCL_OOP_GET_TAG_LOHI(oop) (((hcl_oow_t)oop) & HCL_LBMASK(hcl_oow_t, HCL_OOP_TAG_BITS_LO + HCL_OOP_TAG_BITS_HI)) +#define HCL_OOP_GET_TAG(oop) (HCL_OOP_GET_TAG_LO(oop) == HCL_OOP_TAG_EXTENDED? HCL_OOP_GET_TAG_LOHI(oop): HCL_OOP_GET_TAG_LO(oop)) + +#define HCL_OOP_IS_NUMERIC(oop) (HCL_OOP_GET_TAG_LO(oop) != 0) +#define HCL_OOP_IS_POINTER(oop) (HCL_OOP_GET_TAG_LO(oop) == 0) + +#define HCL_OOP_IS_SMOOI(oop) (HCL_OOP_GET_TAG_LO(oop) == HCL_OOP_TAG_SMOOI) +#define HCL_OOP_IS_SMPTR(oop) (HCL_OOP_GET_TAG_LO(oop) == HCL_OOP_TAG_SMPTR) + +#define HCL_SMOOI_TO_OOP(num) ((hcl_oop_t)((((hcl_ooi_t)(num)) << HCL_OOP_TAG_BITS_LO) | HCL_OOP_TAG_SMOOI)) +#define HCL_OOP_TO_SMOOI(oop) (((hcl_ooi_t)oop) >> HCL_OOP_TAG_BITS_LO) +/* +#define HCL_SMPTR_TO_OOP(num) ((hcl_oop_t)((((hcl_ooi_t)(num)) << HCL_OOP_TAG_BITS_LO) | HCL_OOP_TAG_SMPTR)) +#define HCL_OOP_TO_SMPTR(oop) (((hcl_ooi_t)oop) >> HCL_OOP_TAG_BITS_LO) +*/ +#define HCL_SMPTR_TO_OOP(ptr) ((hcl_oop_t)(((hcl_oow_t)ptr) | HCL_OOP_TAG_SMPTR)) +#define HCL_OOP_TO_SMPTR(oop) ((void*)(((hcl_oow_t)oop) & ~HCL_LBMASK(hcl_oow_t, HCL_OOP_TAG_BITS_LO))) + +#define HCL_OOP_IS_CHAR(oop) (HCL_OOP_GET_TAG(oop) == HCL_OOP_TAG_CHAR) +#define HCL_OOP_IS_ERROR(oop) (HCL_OOP_GET_TAG(oop) == HCL_OOP_TAG_ERROR) + +#define HCL_OOP_TO_CHAR(oop) (((hcl_oow_t)oop) >> (HCL_OOP_TAG_BITS_LO + HCL_OOP_TAG_BITS_LO)) +#define HCL_CHAR_TO_OOP(num) ((hcl_oop_t)((((hcl_oow_t)(num)) << (HCL_OOP_TAG_BITS_LO + HCL_OOP_TAG_BITS_LO)) | HCL_OOP_TAG_CHAR)) +#define HCL_OOP_TO_ERROR(oop) (((hcl_oow_t)oop) >> (HCL_OOP_TAG_BITS_LO + HCL_OOP_TAG_BITS_LO)) +#define HCL_ERROR_TO_OOP(num) ((hcl_oop_t)((((hcl_oow_t)(num)) << (HCL_OOP_TAG_BITS_LO + HCL_OOP_TAG_BITS_LO)) | HCL_OOP_TAG_ERROR)) + +#endif + /* SMOOI takes up 62 bit on a 64-bit architecture and 30 bits * on a 32-bit architecture. The absolute value takes up 61 bits and 29 bits * respectively for the 1 sign bit. */ -#define HCL_SMOOI_BITS (HCL_OOI_BITS - HCL_OOP_TAG_BITS) +#define HCL_SMOOI_BITS (HCL_OOI_BITS - HCL_OOP_TAG_BITS_LO) #define HCL_SMOOI_ABS_BITS (HCL_SMOOI_BITS - 1) -#define HCL_SMOOI_MAX ((hcl_ooi_t)(~((hcl_oow_t)0) >> (HCL_OOP_TAG_BITS + 1))) +#define HCL_SMOOI_MAX ((hcl_ooi_t)(~((hcl_oow_t)0) >> (HCL_OOP_TAG_BITS_LO + 1))) /* Sacrificing 1 bit pattern for a negative SMOOI makes * implementation a lot eaisier in many respect. */ /*#define HCL_SMOOI_MIN (-HCL_SMOOI_MAX - 1)*/ diff --git a/lib/print.c b/lib/print.c index 7f35056..35fca77 100644 --- a/lib/print.c +++ b/lib/print.c @@ -114,6 +114,60 @@ static struct }; +static HCL_INLINE int print_single_char (hcl_t* hcl, hcl_oow_t mask, hcl_ooch_t ch, hcl_outbfmt_t outbfmt) +{ + if (ch < ' ') + { + hcl_ooch_t escaped; + + switch (ch) + { + case '\0': + escaped = '0'; + break; + case '\n': + escaped = 'n'; + break; + case '\r': + escaped = 'r'; + break; + case '\t': + escaped = 't'; + break; + case '\f': + escaped = 'f'; + break; + case '\b': + escaped = 'b'; + break; + case '\v': + escaped = 'v'; + break; + case '\a': + escaped = 'a'; + break; + default: + escaped = ch; + break; + } + + if (escaped == ch) + { + if (outbfmt(hcl, mask, "\\x%X", ch) <= -1) return -1; + } + else + { + if (outbfmt(hcl, mask, "\\%jc", escaped) <= -1) return -1; + } + } + else + { + if (outbfmt(hcl, mask, "%jc", ch) <= -1) return -1; + } + + return 0; +} + int hcl_outfmtobj (hcl_t* hcl, hcl_oow_t mask, hcl_oop_t obj, hcl_outbfmt_t outbfmt) { hcl_oop_t cur; @@ -129,7 +183,20 @@ next: } else if (HCL_OOP_IS_CHAR(obj)) { - if (outbfmt(hcl, mask, "$%.1jc", HCL_OOP_TO_CHAR(obj)) <= -1) return -1; + hcl_ooch_t ch = HCL_OOP_TO_CHAR(obj); + if (outbfmt(hcl, mask, "\'") <= -1 || + print_single_char(hcl, mask, ch, outbfmt) <= -1 || + outbfmt(hcl, mask, "\'") <= -1) return -1; + goto done; + } + else if (HCL_OOP_IS_SMPTR(obj)) + { + if (outbfmt(hcl, mask, "#\\p%zu", (hcl_oow_t)HCL_OOP_TO_SMPTR(obj)) <= -1) return -1; + goto done; + } + else if (HCL_OOP_IS_ERROR(obj)) + { + if (outbfmt(hcl, mask, "#\\e%zd", (hcl_ooi_t)HCL_OOP_TO_ERROR(obj)) <= -1) return -1; goto done; } @@ -214,60 +281,13 @@ next: if (escape) { - hcl_ooch_t escaped; - if (outbfmt(hcl, mask, "\"") <= -1) return -1; for (i = 0; i < HCL_OBJ_GET_SIZE(obj); i++) { + ch = ((hcl_oop_char_t)obj)->slot[i]; - if (ch < ' ') - { - switch (ch) - { - case '\0': - escaped = '0'; - break; - case '\n': - escaped = 'n'; - break; - case '\r': - escaped = 'r'; - break; - case '\t': - escaped = 't'; - break; - case '\f': - escaped = 'f'; - break; - case '\b': - escaped = 'b'; - break; - case '\v': - escaped = 'v'; - break; - case '\a': - escaped = 'a'; - break; - default: - escaped = ch; - break; - } - - if (escaped == ch) - { - if (outbfmt(hcl, mask, "\\x%X", ch) <= -1) return -1; - } - else - { - if (outbfmt(hcl, mask, "\\%jc", escaped) <= -1) return -1; - } - } - else - { - if (outbfmt(hcl, mask, "%jc", ch) <= -1) return -1; - } + if (print_single_char(hcl, mask, ch, outbfmt) <= -1) return -1; } - if (outbfmt(hcl, mask, "\"") <= -1) return -1; } else diff --git a/lib/read.c b/lib/read.c index ed71855..5ad71ef 100644 --- a/lib/read.c +++ b/lib/read.c @@ -219,7 +219,7 @@ static hcl_oop_t string_to_num (hcl_t* hcl, hcl_oocs_t* str, int radixed) if (*ptr != '#') { hcl_seterrnum (hcl, HCL_EINVAL); - return -1; + return HCL_NULL; } ptr++; /* skip '#' */ @@ -229,7 +229,7 @@ static hcl_oop_t string_to_num (hcl_t* hcl, hcl_oocs_t* str, int radixed) else { hcl_seterrnum (hcl, HCL_EINVAL); - return -1; + return HCL_NULL; } ptr++; } @@ -821,25 +821,44 @@ static int get_sharp_token (hcl_t* hcl) } while (!is_delimiter(c)); - if (hcl->c->tok.name.len >= 4) + if (TOKEN_NAME_LEN(hcl) >= 4) { - if (hcl->c->tok.name.ptr[2] == 'x' || hcl->c->tok.name.ptr[2] == 'u') + if (hcl->c->tok.name.ptr[2] == 'p') + { + SET_TOKEN_TYPE (hcl, HCL_IOTOK_SMPTRLIT); + goto hexcharlit; + } + else if (hcl->c->tok.name.ptr[2] == 'x' || hcl->c->tok.name.ptr[2] == 'u') { hcl_oow_t i; + hexcharlit: c = 0; - for (i = 3; i < hcl->c->tok.name.len; i++) + for (i = 3; i < TOKEN_NAME_LEN(hcl); i++) { - if (!is_xdigitchar(hcl->c->tok.name.ptr[i])) + if (!is_xdigitchar(TOKEN_NAME_CHAR(hcl, i))) { hcl_setsynerrbfmt (hcl, HCL_SYNERR_CHARLIT, TOKEN_LOC(hcl), TOKEN_NAME(hcl), - "invalid hexadecimal character in %.*js", hcl->c->tok.name.len, hcl->c->tok.name.ptr); + "invalid hexadecimal character in %.*js", TOKEN_NAME_LEN(hcl), TOKEN_NAME_PTR(hcl)); return -1; } - - c = c * 16 + CHAR_TO_NUM(hcl->c->tok.name.ptr[i], 16); + c = c * 16 + CHAR_TO_NUM(hcl->c->tok.name.ptr[i], 16); /* don't care if it is for 'p' */ } } + else if (hcl->c->tok.name.ptr[2] == 'e') + { + hcl_oow_t i; + for (i = 3; i < TOKEN_NAME_LEN(hcl); i++) + { + if (!is_digitchar(TOKEN_NAME_CHAR(hcl, i))) + { + hcl_setsynerrbfmt (hcl, HCL_SYNERR_CHARLIT, TOKEN_LOC(hcl), TOKEN_NAME(hcl), + "invalid decimal character in %.*js", TOKEN_NAME_LEN(hcl), TOKEN_NAME_PTR(hcl)); + return -1; + } + } + SET_TOKEN_TYPE (hcl, HCL_IOTOK_ERRORLIT); + } else if (does_token_name_match(hcl, VOCA_SPACE)) { c = ' '; @@ -891,12 +910,15 @@ static int get_sharp_token (hcl_t* hcl) else { HCL_ASSERT (hcl, TOKEN_NAME_LEN(hcl) == 3); - c = TOKEN_NAME_CHAR(hcl,2); + c = TOKEN_NAME_CHAR(hcl, 2); } /* reset the token name to the converted character */ - CLEAR_TOKEN_NAME (hcl); - ADD_TOKEN_CHAR (hcl, c); + if (hcl->c->tok.type == HCL_IOTOK_CHARLIT) + { + CLEAR_TOKEN_NAME (hcl); + ADD_TOKEN_CHAR (hcl, c); + } unget_char (hcl, &hcl->c->lxc); break; @@ -1049,11 +1071,19 @@ retry: if (get_string(hcl, '\"', '\\', 0, 0) <= -1) return -1; break; -#if 0 case '\'': + #if 0 if (get_quoted_token(hcl) <= -1) return -1; + #else + if (get_string(hcl, '\'', '\\', 0, 0) <= -1) return -1; + if (hcl->c->tok.name.len != 1) + { + hcl_setsynerr (hcl, HCL_SYNERR_CHARLIT, TOKEN_LOC(hcl), TOKEN_NAME(hcl)); + return -1; + } + SET_TOKEN_TYPE (hcl, HCL_IOTOK_CHARLIT); + #endif break; -#endif case '#': if (get_sharp_token(hcl) <= -1) return -1; @@ -1918,6 +1948,38 @@ static int read_object (hcl_t* hcl) obj = hcl->_false; break; + case HCL_IOTOK_SMPTRLIT: + { + hcl_oow_t i; + hcl_oow_t v = 0; + + HCL_ASSERT (hcl, TOKEN_NAME_LEN(hcl) >= 4); + for (i = 3; i < TOKEN_NAME_LEN(hcl); i++) + { + HCL_ASSERT (hcl, is_xdigitchar(TOKEN_NAME_CHAR(hcl, i))); + v = v * 16 + CHAR_TO_NUM(TOKEN_NAME_CHAR(hcl, i), 16); + } + + obj = HCL_SMPTR_TO_OOP(v); + break; + } + + case HCL_IOTOK_ERRORLIT: + { + hcl_oow_t i; + hcl_ooi_t v = 0; + + HCL_ASSERT (hcl, TOKEN_NAME_LEN(hcl) >= 4); + for (i = 3; i < TOKEN_NAME_LEN(hcl); i++) + { + HCL_ASSERT (hcl, is_digitchar(TOKEN_NAME_CHAR(hcl, i))); + v = v * 10 + CHAR_TO_NUM(TOKEN_NAME_CHAR(hcl, i), 10); + } + + obj = HCL_ERROR_TO_OOP(v); + break; + } + case HCL_IOTOK_CHARLIT: obj = HCL_CHAR_TO_OOP(TOKEN_NAME_CHAR(hcl, 0)); break;