diff --git a/lib/bigint.c b/lib/bigint.c index 1e9b777..ffdc5c9 100644 --- a/lib/bigint.c +++ b/lib/bigint.c @@ -38,7 +38,7 @@ #define IS_POW2(ui) (((ui) > 0) && ((ui) & ((ui) - 1)) == 0) /* digit character array */ -static char* _digitc_array[] = +static const char* _digitc_array[] = { "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ", "0123456789abcdefghijklmnopqrstuvwxyz" diff --git a/lib/comp.c b/lib/comp.c index 2d6d538..6c4f56e 100644 --- a/lib/comp.c +++ b/lib/comp.c @@ -4625,11 +4625,12 @@ static HCL_INLINE int compile_dsymbol (hcl_t* hcl, hcl_cnode_t* obj) HCL_UNUSED static int string_to_ooi (hcl_t* hcl, hcl_oocs_t* str, int radixed, hcl_ooi_t* num) { - /* it is not a generic conversion function. + /* [NOTE] + * it is not a generic conversion functionu * it assumes a certain pre-sanity check on the string * done by the lexical analyzer */ - int v, negsign, base; + int v, negsign, base = 10; const hcl_ooch_t* ptr, * end; hcl_oow_t value, old_value; @@ -4647,26 +4648,43 @@ HCL_UNUSED static int string_to_ooi (hcl_t* hcl, hcl_oocs_t* str, int radixed, h if (radixed) { + /* 0xFF80, 0b1111 */ HCL_ASSERT (hcl, ptr < end); - if (*ptr != '#') + if (*ptr == '0') { - hcl_seterrbfmt (hcl, HCL_EINVAL, "radixed number not starting with # - %*.js", str->len, str->ptr); - return -1; - } - ptr++; /* skip '#' */ + ptr++; + HCL_ASSERT (hcl, ptr < end); - if (*ptr == 'x') base = 16; - else if (*ptr == 'o') base = 8; - else if (*ptr == 'b') base = 2; - else + if (*ptr == 'x') base = 16; + else if (*ptr == 'o') base = 8; + else if (*ptr == 'b') base = 2; + else goto radix_r; + + ptr++; + } + + if (base == 10) { - hcl_seterrbfmt (hcl, HCL_EINVAL, "invalid radix specifier - %c", *ptr); + radix_r: + base = 0; + do + { + base = base * 10 + HCL_CHAR_TO_NUM(*ptr, 10); + ptr++; + } + while (*ptr != 'r'); + ptr++; + } + + if (base < 2 || base > 36) + { + invalid_radix_value: + hcl_seterrbfmt (hcl, HCL_EINVAL, + "invalid radix value '%d' in radixed number '%.*js'", base, str->len, str->ptr); return -1; } - ptr++; } - else base = 10; HCL_ASSERT (hcl, ptr < end); @@ -4705,13 +4723,17 @@ HCL_UNUSED static int string_to_ooi (hcl_t* hcl, hcl_oocs_t* str, int radixed, h static hcl_oop_t string_to_num (hcl_t* hcl, hcl_oocs_t* str, const hcl_loc_t* loc, int radixed) { - int negsign, base; + int negsign, base = 10; const hcl_ooch_t* ptr, * end; negsign = 0; ptr = str->ptr, end = str->ptr + str->len; + /* [NOTE] + * The code here assumes that the reader ensures that + * there is at least 1 valid digit after radix specifier. */ + HCL_ASSERT (hcl, ptr < end); if (*ptr == '+' || *ptr == '-') @@ -4720,48 +4742,46 @@ static hcl_oop_t string_to_num (hcl_t* hcl, hcl_oocs_t* str, const hcl_loc_t* lo ptr++; } -#if 0 - if (radixed) - { - /* 16r1234, 2r1111 */ - HCL_ASSERT (hcl, ptr < end); - - base = 0; - do - { - base = base * 10 + HCL_CHAR_TO_NUM(*ptr, 10); - ptr++; - } - while (*ptr != 'r'); - - ptr++; - } - else base = 10; -#else if (radixed) { /* 0xFF80, 0b1111 */ HCL_ASSERT (hcl, ptr < end); - if (/**ptr != '#' &&*/ *ptr != '0') + if (*ptr == '0') { - hcl_setsynerrbfmt(hcl, HCL_SYNERR_RADIX, loc, str, "radixed number not starting with #"); - return HCL_NULL; - } - ptr++; /* skip '0' */ + ptr++; + HCL_ASSERT (hcl, ptr < end); - if (*ptr == 'x') base = 16; - else if (*ptr == 'o') base = 8; - else if (*ptr == 'b') base = 2; - else + if (*ptr == 'x') base = 16; + else if (*ptr == 'o') base = 8; + else if (*ptr == 'b') base = 2; + else goto radix_r; + + ptr++; + } + + + if (base == 10) { - hcl_setsynerrbfmt (hcl, HCL_SYNERR_RADIX, loc, HCL_NULL, "invalid radix specifier %c in %js", *ptr, str); + radix_r: + base = 0; + do + { + base = base * 10 + HCL_CHAR_TO_NUM(*ptr, 10); + ptr++; + } + while (*ptr != 'r'); + ptr++; + } + + if (base < 2 || base > 36) + { + invalid_radix_value: + hcl_setsynerrbfmt (hcl, HCL_SYNERR_RADIX, loc, HCL_NULL, + "invalid radix value '%d' in radixed number '%.*js'", base, str->len, str->ptr); return HCL_NULL; } - ptr++; } - else base = 10; -#endif /* TODO: handle floating point numbers ... etc */ if (negsign) base = -base; diff --git a/lib/err.c b/lib/err.c index 4820db2..cb8847a 100644 --- a/lib/err.c +++ b/lib/err.c @@ -91,7 +91,7 @@ static hcl_ooch_t* errstr[] = }; -static char* synerrstr[] = +static const char* synerrstr[] = { "no error", "internal error", diff --git a/lib/exec.c b/lib/exec.c index e672bdc..9206ac7 100644 --- a/lib/exec.c +++ b/lib/exec.c @@ -3301,7 +3301,7 @@ static hcl_oop_t fetch_numeric_rcv_slot (hcl_t* hcl, hcl_oop_t rcv, hcl_oow_t b1 hcl_oow_t w; hcl_obj_type_t rcv_type; - rcv_type = HCL_OBJ_GET_FLAGS_TYPE(rcv); + rcv_type = (hcl_obj_type_t)HCL_OBJ_GET_FLAGS_TYPE(rcv); switch (HCL_LIKELY(rcv_type)) { case HCL_OBJ_TYPE_CHAR: @@ -3334,7 +3334,7 @@ static int store_into_numeric_rcv_slot (hcl_t* hcl, hcl_oop_t rcv, hcl_oow_t b1, if (HCL_OOP_IS_CHAR(v)) w = HCL_OOP_TO_CHAR(v); else if (hcl_inttooow(hcl, v, &w) <= -1) return -1; - rcv_type = HCL_OBJ_GET_FLAGS_TYPE(rcv); + rcv_type = (hcl_obj_type_t)HCL_OBJ_GET_FLAGS_TYPE(rcv); switch (HCL_LIKELY(rcv_type)) { case HCL_OBJ_TYPE_CHAR: @@ -4172,7 +4172,7 @@ hcl_logbfmt (hcl, HCL_LOG_STDERR, ">>>%O c->sc=%O sc=%O b2=%d b3=%d nivars=%d nc hcl_oop_t _class; hcl_oop_t mdic, blk, name; int mtype; - static hcl_bch_t* pfx[] = { "c", "i", "ci" }; + static const hcl_bch_t* pfx[] = { "c", "i", "ci" }; mtype = (bcode - HCL_CODE_CLASS_CMSTORE) + 1; HCL_ASSERT (hcl, mtype >= 1 && mtype <= 3); diff --git a/lib/hcl-prv.h b/lib/hcl-prv.h index c9012b2..cccb711 100644 --- a/lib/hcl-prv.h +++ b/lib/hcl-prv.h @@ -813,6 +813,9 @@ struct hcl_flx_pn_t /* state data */ int fpdec; int radix; + int radix_cand_overflown; + hcl_oow_t radix_cand; + hcl_tok_type_t tok_type; hcl_oow_t digit_count[2]; hcl_ooch_t start_digit; }; diff --git a/lib/read.c b/lib/read.c index cbe9c93..1173b32 100644 --- a/lib/read.c +++ b/lib/read.c @@ -236,7 +236,6 @@ static HCL_INLINE int is_linebreak (hcl_ooci_t c) static HCL_INLINE int is_digit_char (hcl_ooci_t c) { -/* TODO: support full unicode */ return (c >= '0' && c <= '9'); } @@ -286,7 +285,7 @@ int hcl_is_binop_char (hcl_ooci_t c) /* not static HCL_INLINE for shared use wit return c == '&' || c == '*' || c == '+' || c == '-' || c == '/' || c == '%' || c == '<' || c == '>' || c == '=' || c == '@' || c == '|' || c == '~'; } - +#define is_binop_char(c) hcl_is_binop_char(c) static HCL_INLINE int is_lead_ident_char (hcl_ooci_t c) { @@ -1061,12 +1060,12 @@ static int chain_to_list (hcl_t* hcl, hcl_cnode_t* obj, hcl_loc_t* loc) { hcl_rstl_t* rstl; int flagv; - int list_concode; + /*int list_concode;*/ HCL_ASSERT (hcl, hcl->c->r.st != HCL_NULL); rstl = hcl->c->r.st; flagv = rstl->flagv; - list_concode = (hcl_concode_t)LIST_FLAG_GET_CONCODE(flagv); + /*list_concode = (hcl_concode_t)LIST_FLAG_GET_CONCODE(flagv);*/ if (flagv & CLOSED) { @@ -2232,6 +2231,9 @@ static HCL_INLINE void init_flx_pn (hcl_flx_pn_t* pn, hcl_ooch_t start_digit) HCL_MEMSET (pn, 0, HCL_SIZEOF(*pn)); pn->start_digit = start_digit; pn->radix = 10; + pn->radix_cand = 0; + pn->radix_cand_overflown = 0; + pn->tok_type = HCL_TOK_NUMLIT; } static HCL_INLINE void init_flx_st (hcl_flx_st_t* st, hcl_ooch_t sign_c) @@ -2349,7 +2351,7 @@ static int flx_start (hcl_t* hcl, hcl_ooci_t c) FEED_CONTINUE (hcl, HCL_FLX_PLAIN_NUMBER); goto not_consumed; - case 'B': + case 'B': /* for charcter/string prefixed with B,b,C,c */ case 'b': case 'C': case 'c': @@ -2358,7 +2360,7 @@ static int flx_start (hcl_t* hcl, hcl_ooci_t c) goto consumed; default: - if (hcl_is_binop_char(c)) + if (is_binop_char(c)) { init_flx_binop (FLX_BINOP(hcl)); FEED_CONTINUE (hcl, HCL_FLX_BINOP); @@ -2532,7 +2534,7 @@ static int flx_hmarked_token (hcl_t* hcl, hcl_ooci_t c) * #"..." symbol literal */ - if (hcl_is_binop_char(c)) + if (is_binop_char(c)) { reset_flx_token (hcl); FEED_CONTINUE_WITH_CHAR (hcl, c, HCL_FLX_HMARKED_BINOP); @@ -2551,6 +2553,7 @@ static int flx_hmarked_token (hcl_t* hcl, hcl_ooci_t c) /* --------------------------- */ + #if 0 case 'x': /* hexadecimal number */ init_flx_hn (FLX_HN(hcl), HCL_TOK_RADNUMLIT, HCL_SYNERR_NUMLIT, 16); goto radixed_number; @@ -2558,6 +2561,7 @@ static int flx_hmarked_token (hcl_t* hcl, hcl_ooci_t c) case 'o': /* octal number */ init_flx_hn (FLX_HN(hcl), HCL_TOK_RADNUMLIT, HCL_SYNERR_NUMLIT, 8); goto radixed_number; + #endif case 'b': /* binary number or byte array */ case 'B': @@ -2568,6 +2572,7 @@ static int flx_hmarked_token (hcl_t* hcl, hcl_ooci_t c) FEED_CONTINUE_WITH_CHAR (hcl, c, HCL_FLX_HMARKED_BC); break; + #if 0 case 'e': /* #eXXX - error literal */ init_flx_hn (FLX_HN(hcl), HCL_TOK_ERRLIT, HCL_SYNERR_ERRLIT, 10); goto radixed_number; @@ -2577,6 +2582,7 @@ static int flx_hmarked_token (hcl_t* hcl, hcl_ooci_t c) radixed_number: FEED_CONTINUE_WITH_CHAR (hcl, c, HCL_FLX_HMARKED_NUMBER); goto consumed; + #endif /* --------------------------- */ case '\\': @@ -2728,6 +2734,7 @@ static int flx_hmarked_bc (hcl_t* hcl, hcl_ooci_t c) FEED_WRAP_UP_WITH_CHAR (hcl, c, tt); goto consumed; } +#if 0 else if (hb->start_c == 'b' || hb->start_c == 'B') { /* TODO: this part needs to be removed once 0x, 0b, 0o and etc are implemented */ @@ -2735,6 +2742,7 @@ static int flx_hmarked_bc (hcl_t* hcl, hcl_ooci_t c) FEED_CONTINUE (hcl, HCL_FLX_HMARKED_NUMBER); goto not_consumed; } +#endif else { hcl_ooch_t start_c = hb->start_c; @@ -2752,7 +2760,7 @@ not_consumed: static int flx_hmarked_binop (hcl_t* hcl, hcl_ooci_t c) { - if (hcl_is_binop_char(c)) + if (is_binop_char(c)) { ADD_TOKEN_CHAR(hcl, c); goto consumed; @@ -3002,7 +3010,7 @@ static int flx_binop (hcl_t* hcl, hcl_ooci_t c) /* binary operator/selector */ hcl_flx_binop_t* binop = FLX_BINOP(hcl); #endif - if (hcl_is_binop_char(c)) + if (is_binop_char(c)) { ADD_TOKEN_CHAR (hcl, c); goto consumed; @@ -3028,33 +3036,15 @@ static int flx_plain_number (hcl_t* hcl, hcl_ooci_t c) /* number */ { ADD_TOKEN_CHAR (hcl, c); pn->digit_count[pn->fpdec]++; + if (pn->tok_type == HCL_TOK_NUMLIT) + { + hcl_oow_t cand = pn->radix_cand * 10 + (c - '0'); + if (cand < pn->radix_cand) pn->radix_cand_overflown = 1; + pn->radix_cand = cand; + } goto consumed; } - else if (c == 'x' || c == 'o' || c == 'b') - { - /* 0x12ab, 0b1010101, 0o12304567 */ - if (!pn->fpdec && pn->digit_count[0] == 1 && pn->start_digit == '0') - { - pn->radix = (c == 'x'? 16: (c == 'o'? 8: 2)); - ADD_TOKEN_CHAR (hcl, c); - pn->digit_count[0] = 0; - goto consumed; - } - else - { - goto non_digit_char; - } - } -#if 0 - else if (c == 'r') - { - /* 16r12ab, 2r1010101 */ - if (!pn->fpdec && !pn->radix) - { - } - } -#endif - else + else if (is_delim_char(c)) { if (!pn->fpdec && c == '.') { @@ -3066,6 +3056,7 @@ static int flx_plain_number (hcl_t* hcl, hcl_ooci_t c) /* number */ return -1; } pn->fpdec = 1; + pn->tok_type = HCL_TOK_FPDECLIT; ADD_TOKEN_CHAR (hcl, c); goto consumed; } @@ -3073,7 +3064,7 @@ static int flx_plain_number (hcl_t* hcl, hcl_ooci_t c) /* number */ if (pn->digit_count[0] == 0) { hcl_setsynerrbfmt (hcl, HCL_SYNERR_NUMLIT, TOKEN_LOC(hcl), HCL_NULL, - "invalid numeric literal with no digit '%.*js'", + "invalid numeric literal with no digit after '%.*js'", TOKEN_NAME_LEN(hcl), TOKEN_NAME_PTR(hcl)); return -1; } @@ -3085,10 +3076,74 @@ static int flx_plain_number (hcl_t* hcl, hcl_ooci_t c) /* number */ return -1; } - non_digit_char: - FEED_WRAP_UP (hcl, (pn->fpdec? HCL_TOK_FPDECLIT: (pn->radix != 10? HCL_TOK_RADNUMLIT: HCL_TOK_NUMLIT))); + FEED_WRAP_UP (hcl, pn->tok_type); goto not_consumed; } + else + { + if (!pn->fpdec && pn->digit_count[0] == 1 && pn->start_digit == '0' && pn->tok_type == HCL_TOK_NUMLIT) + { + /* prefixed with 0 */ + switch (c) + { + case 'x': + pn->tok_type = HCL_TOK_RADNUMLIT; + pn->radix = 16; + break; + + case 'o': + pn->tok_type = HCL_TOK_RADNUMLIT; + pn->radix = 8; + break; + + case 'b': + pn->tok_type = HCL_TOK_RADNUMLIT; + pn->radix = 2; + break; + + case 'p': + pn->tok_type = HCL_TOK_SMPTRLIT; + pn->radix = 16; + break; + + case 'e': + pn->tok_type = HCL_TOK_ERRLIT; + pn->radix = 10; + break; + + default: + goto other_char; + } + + ADD_TOKEN_CHAR (hcl, c); + pn->digit_count[0] = 0; + goto consumed; + } + + other_char: + if (!pn->fpdec && pn->tok_type == HCL_TOK_NUMLIT && pn->digit_count[0] > 0 && c == 'r') + { + /* 16rABCD */ + if (pn->radix_cand_overflown) + { + hcl_setsynerrbfmt (hcl, HCL_SYNERR_NUMLIT, TOKEN_LOC(hcl), HCL_NULL, + "radix too large '%.*js' before '%jc'", + TOKEN_NAME_LEN(hcl), TOKEN_NAME_PTR(hcl), c); + return -1; + } + + pn->tok_type = HCL_TOK_RADNUMLIT; + pn->radix = pn->radix_cand; + ADD_TOKEN_CHAR (hcl, c); + pn->digit_count[0] = 0; + goto consumed; + } + + hcl_setsynerrbfmt (hcl, HCL_SYNERR_NUMLIT, TOKEN_LOC(hcl), HCL_NULL, + "invalid numeric literal character '%jc' after '%.*js'", + c, TOKEN_NAME_LEN(hcl), TOKEN_NAME_PTR(hcl)); + return -1; + } consumed: return 1; diff --git a/t/feed-01.hcl b/t/feed-01.hcl index 5a54bf8..ec029ac 100644 --- a/t/feed-01.hcl +++ b/t/feed-01.hcl @@ -108,6 +108,9 @@ i := 0xAbCd93481FFAABBCCDDEeFa12837281 j := 0o125715446440377652567463357357502240671201 k := 0b1010101111001101100100110100100000011111111110101010101110111100110011011101111011101111101000010010100000110111001010000001 l := 14272837210234798094990047170340811393 +m := 16rAbCd93481FFAABBCCDDEeFa12837281 +n := 36rMVV36DNKTQ0Z2Q3L027T3NGH +o := +35r18Q4OPTU2KRS7FVR09B5MORY3 if (== i j) { printf "OK: i is equal to j\n" } \ else { printf "ERROR: i is not equal to j\n" } @@ -115,4 +118,16 @@ if (== i k) { printf "OK: i is equal to k\n" } \ else { printf "ERROR: i is not equal to k\n" } if (== i l) { printf "OK: i is equal to l\n" } \ else { printf "ERROR: i is not equal to l\n" } +if (== i m) { printf "OK: i is equal to m\n" } \ +else { printf "ERROR: i is not equal to m\n" } +if (== i n) { printf "OK: i is equal to n\n" } \ +else { printf "ERROR: i is not equal to n\n" } +if (== i o) { printf "OK: i is equal to o\n" } \ +else { printf "ERROR: i is not equal to o\n" } + + +i := (- -16r123abcd128738279878172387123aabbea19849d8282882822332332 123) +k := -1919784483373631008405784517212288102153752573650638404319957951405 +if (== i k) { printf "OK: i is %d\n" i k } \ +else { printf "ERROR: i is not equal to %d\n" k } } ## END diff --git a/t/feed-5001.err b/t/feed-5001.err index 2020317..0037698 100644 --- a/t/feed-5001.err +++ b/t/feed-5001.err @@ -14,10 +14,13 @@ $include 10 ##ERROR: syntax error - $include target expected in place of '10' --- -0b ##ERROR: invalid numeric literal with no digit '0b' +0b ##ERROR: syntax error - invalid numeric literal with no digit after '0b' --- +16r ##ERROR: syntax error - invalid numeric literal with no digit after '16r' +--- + ## x := (+ 10 20) "aaaa"; ##ERROR: syntax error - too many rvalues @@ -87,7 +90,28 @@ printf "[%c]\n" b'★'; ##ERROR: syntax error - wrong character literal printf "%O\n" #b[ 10 20 30 ]; printf "%010b\n" 0b0101; -printf "%O\n" #bxy; ##ERROR: syntax error - neither valid radixed number nor valid directive '#bxy' +printf "%O\n" 0bxy; ##ERROR: syntax error - invalid numeric literal character 'x' after '0b' + +--- + +printf "%O\n" 0b12xy ##ERROR: syntax error - invalid numeric literal character '2' after '0b1' + +--- + +printf "%O\n" 0b0b11 ##ERROR: syntax error - invalid numeric literal character 'b' after '0b0' + +--- + +printf "%O\n" 0o0127890 ##ERROR: syntax error - invalid numeric literal character '8' after '0o0127' + +--- + +printf "%O\n" 35rabcdefghijklzabcd ##ERROR: syntax error - invalid numeric literal character 'z' after '35rabcdefghijkl' + + +--- + ++ 12389127398127389217382197283197321897r11221 1 ##ERROR: syntax error - radix too large '12389127398127389217382197283197321897' before 'r' ---