updated code to support the radixed number with 'r'
All checks were successful
continuous-integration/drone/push Build is passing

This commit is contained in:
hyung-hwan 2024-09-24 19:41:42 +09:00
parent 4651fadcea
commit 2abda37861
8 changed files with 207 additions and 90 deletions

View File

@ -38,7 +38,7 @@
#define IS_POW2(ui) (((ui) > 0) && ((ui) & ((ui) - 1)) == 0)
/* digit character array */
static char* _digitc_array[] =
static const char* _digitc_array[] =
{
"0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ",
"0123456789abcdefghijklmnopqrstuvwxyz"

View File

@ -4625,11 +4625,12 @@ static HCL_INLINE int compile_dsymbol (hcl_t* hcl, hcl_cnode_t* obj)
HCL_UNUSED static int string_to_ooi (hcl_t* hcl, hcl_oocs_t* str, int radixed, hcl_ooi_t* num)
{
/* it is not a generic conversion function.
/* [NOTE]
* it is not a generic conversion functionu
* it assumes a certain pre-sanity check on the string
* done by the lexical analyzer */
int v, negsign, base;
int v, negsign, base = 10;
const hcl_ooch_t* ptr, * end;
hcl_oow_t value, old_value;
@ -4647,26 +4648,43 @@ HCL_UNUSED static int string_to_ooi (hcl_t* hcl, hcl_oocs_t* str, int radixed, h
if (radixed)
{
/* 0xFF80, 0b1111 */
HCL_ASSERT (hcl, ptr < end);
if (*ptr != '#')
if (*ptr == '0')
{
hcl_seterrbfmt (hcl, HCL_EINVAL, "radixed number not starting with # - %*.js", str->len, str->ptr);
return -1;
}
ptr++; /* skip '#' */
ptr++;
HCL_ASSERT (hcl, ptr < end);
if (*ptr == 'x') base = 16;
else if (*ptr == 'o') base = 8;
else if (*ptr == 'b') base = 2;
else
if (*ptr == 'x') base = 16;
else if (*ptr == 'o') base = 8;
else if (*ptr == 'b') base = 2;
else goto radix_r;
ptr++;
}
if (base == 10)
{
hcl_seterrbfmt (hcl, HCL_EINVAL, "invalid radix specifier - %c", *ptr);
radix_r:
base = 0;
do
{
base = base * 10 + HCL_CHAR_TO_NUM(*ptr, 10);
ptr++;
}
while (*ptr != 'r');
ptr++;
}
if (base < 2 || base > 36)
{
invalid_radix_value:
hcl_seterrbfmt (hcl, HCL_EINVAL,
"invalid radix value '%d' in radixed number '%.*js'", base, str->len, str->ptr);
return -1;
}
ptr++;
}
else base = 10;
HCL_ASSERT (hcl, ptr < end);
@ -4705,13 +4723,17 @@ HCL_UNUSED static int string_to_ooi (hcl_t* hcl, hcl_oocs_t* str, int radixed, h
static hcl_oop_t string_to_num (hcl_t* hcl, hcl_oocs_t* str, const hcl_loc_t* loc, int radixed)
{
int negsign, base;
int negsign, base = 10;
const hcl_ooch_t* ptr, * end;
negsign = 0;
ptr = str->ptr,
end = str->ptr + str->len;
/* [NOTE]
* The code here assumes that the reader ensures that
* there is at least 1 valid digit after radix specifier. */
HCL_ASSERT (hcl, ptr < end);
if (*ptr == '+' || *ptr == '-')
@ -4720,48 +4742,46 @@ static hcl_oop_t string_to_num (hcl_t* hcl, hcl_oocs_t* str, const hcl_loc_t* lo
ptr++;
}
#if 0
if (radixed)
{
/* 16r1234, 2r1111 */
HCL_ASSERT (hcl, ptr < end);
base = 0;
do
{
base = base * 10 + HCL_CHAR_TO_NUM(*ptr, 10);
ptr++;
}
while (*ptr != 'r');
ptr++;
}
else base = 10;
#else
if (radixed)
{
/* 0xFF80, 0b1111 */
HCL_ASSERT (hcl, ptr < end);
if (/**ptr != '#' &&*/ *ptr != '0')
if (*ptr == '0')
{
hcl_setsynerrbfmt(hcl, HCL_SYNERR_RADIX, loc, str, "radixed number not starting with #");
return HCL_NULL;
}
ptr++; /* skip '0' */
ptr++;
HCL_ASSERT (hcl, ptr < end);
if (*ptr == 'x') base = 16;
else if (*ptr == 'o') base = 8;
else if (*ptr == 'b') base = 2;
else
if (*ptr == 'x') base = 16;
else if (*ptr == 'o') base = 8;
else if (*ptr == 'b') base = 2;
else goto radix_r;
ptr++;
}
if (base == 10)
{
hcl_setsynerrbfmt (hcl, HCL_SYNERR_RADIX, loc, HCL_NULL, "invalid radix specifier %c in %js", *ptr, str);
radix_r:
base = 0;
do
{
base = base * 10 + HCL_CHAR_TO_NUM(*ptr, 10);
ptr++;
}
while (*ptr != 'r');
ptr++;
}
if (base < 2 || base > 36)
{
invalid_radix_value:
hcl_setsynerrbfmt (hcl, HCL_SYNERR_RADIX, loc, HCL_NULL,
"invalid radix value '%d' in radixed number '%.*js'", base, str->len, str->ptr);
return HCL_NULL;
}
ptr++;
}
else base = 10;
#endif
/* TODO: handle floating point numbers ... etc */
if (negsign) base = -base;

View File

@ -91,7 +91,7 @@ static hcl_ooch_t* errstr[] =
};
static char* synerrstr[] =
static const char* synerrstr[] =
{
"no error",
"internal error",

View File

@ -3301,7 +3301,7 @@ static hcl_oop_t fetch_numeric_rcv_slot (hcl_t* hcl, hcl_oop_t rcv, hcl_oow_t b1
hcl_oow_t w;
hcl_obj_type_t rcv_type;
rcv_type = HCL_OBJ_GET_FLAGS_TYPE(rcv);
rcv_type = (hcl_obj_type_t)HCL_OBJ_GET_FLAGS_TYPE(rcv);
switch (HCL_LIKELY(rcv_type))
{
case HCL_OBJ_TYPE_CHAR:
@ -3334,7 +3334,7 @@ static int store_into_numeric_rcv_slot (hcl_t* hcl, hcl_oop_t rcv, hcl_oow_t b1,
if (HCL_OOP_IS_CHAR(v)) w = HCL_OOP_TO_CHAR(v);
else if (hcl_inttooow(hcl, v, &w) <= -1) return -1;
rcv_type = HCL_OBJ_GET_FLAGS_TYPE(rcv);
rcv_type = (hcl_obj_type_t)HCL_OBJ_GET_FLAGS_TYPE(rcv);
switch (HCL_LIKELY(rcv_type))
{
case HCL_OBJ_TYPE_CHAR:
@ -4172,7 +4172,7 @@ hcl_logbfmt (hcl, HCL_LOG_STDERR, ">>>%O c->sc=%O sc=%O b2=%d b3=%d nivars=%d nc
hcl_oop_t _class;
hcl_oop_t mdic, blk, name;
int mtype;
static hcl_bch_t* pfx[] = { "c", "i", "ci" };
static const hcl_bch_t* pfx[] = { "c", "i", "ci" };
mtype = (bcode - HCL_CODE_CLASS_CMSTORE) + 1;
HCL_ASSERT (hcl, mtype >= 1 && mtype <= 3);

View File

@ -813,6 +813,9 @@ struct hcl_flx_pn_t
/* state data */
int fpdec;
int radix;
int radix_cand_overflown;
hcl_oow_t radix_cand;
hcl_tok_type_t tok_type;
hcl_oow_t digit_count[2];
hcl_ooch_t start_digit;
};

View File

@ -236,7 +236,6 @@ static HCL_INLINE int is_linebreak (hcl_ooci_t c)
static HCL_INLINE int is_digit_char (hcl_ooci_t c)
{
/* TODO: support full unicode */
return (c >= '0' && c <= '9');
}
@ -286,7 +285,7 @@ int hcl_is_binop_char (hcl_ooci_t c) /* not static HCL_INLINE for shared use wit
return c == '&' || c == '*' || c == '+' || c == '-' || c == '/' || c == '%' ||
c == '<' || c == '>' || c == '=' || c == '@' || c == '|' || c == '~';
}
#define is_binop_char(c) hcl_is_binop_char(c)
static HCL_INLINE int is_lead_ident_char (hcl_ooci_t c)
{
@ -1061,12 +1060,12 @@ static int chain_to_list (hcl_t* hcl, hcl_cnode_t* obj, hcl_loc_t* loc)
{
hcl_rstl_t* rstl;
int flagv;
int list_concode;
/*int list_concode;*/
HCL_ASSERT (hcl, hcl->c->r.st != HCL_NULL);
rstl = hcl->c->r.st;
flagv = rstl->flagv;
list_concode = (hcl_concode_t)LIST_FLAG_GET_CONCODE(flagv);
/*list_concode = (hcl_concode_t)LIST_FLAG_GET_CONCODE(flagv);*/
if (flagv & CLOSED)
{
@ -2232,6 +2231,9 @@ static HCL_INLINE void init_flx_pn (hcl_flx_pn_t* pn, hcl_ooch_t start_digit)
HCL_MEMSET (pn, 0, HCL_SIZEOF(*pn));
pn->start_digit = start_digit;
pn->radix = 10;
pn->radix_cand = 0;
pn->radix_cand_overflown = 0;
pn->tok_type = HCL_TOK_NUMLIT;
}
static HCL_INLINE void init_flx_st (hcl_flx_st_t* st, hcl_ooch_t sign_c)
@ -2349,7 +2351,7 @@ static int flx_start (hcl_t* hcl, hcl_ooci_t c)
FEED_CONTINUE (hcl, HCL_FLX_PLAIN_NUMBER);
goto not_consumed;
case 'B':
case 'B': /* for charcter/string prefixed with B,b,C,c */
case 'b':
case 'C':
case 'c':
@ -2358,7 +2360,7 @@ static int flx_start (hcl_t* hcl, hcl_ooci_t c)
goto consumed;
default:
if (hcl_is_binop_char(c))
if (is_binop_char(c))
{
init_flx_binop (FLX_BINOP(hcl));
FEED_CONTINUE (hcl, HCL_FLX_BINOP);
@ -2532,7 +2534,7 @@ static int flx_hmarked_token (hcl_t* hcl, hcl_ooci_t c)
* #"..." symbol literal
*/
if (hcl_is_binop_char(c))
if (is_binop_char(c))
{
reset_flx_token (hcl);
FEED_CONTINUE_WITH_CHAR (hcl, c, HCL_FLX_HMARKED_BINOP);
@ -2551,6 +2553,7 @@ static int flx_hmarked_token (hcl_t* hcl, hcl_ooci_t c)
/* --------------------------- */
#if 0
case 'x': /* hexadecimal number */
init_flx_hn (FLX_HN(hcl), HCL_TOK_RADNUMLIT, HCL_SYNERR_NUMLIT, 16);
goto radixed_number;
@ -2558,6 +2561,7 @@ static int flx_hmarked_token (hcl_t* hcl, hcl_ooci_t c)
case 'o': /* octal number */
init_flx_hn (FLX_HN(hcl), HCL_TOK_RADNUMLIT, HCL_SYNERR_NUMLIT, 8);
goto radixed_number;
#endif
case 'b': /* binary number or byte array */
case 'B':
@ -2568,6 +2572,7 @@ static int flx_hmarked_token (hcl_t* hcl, hcl_ooci_t c)
FEED_CONTINUE_WITH_CHAR (hcl, c, HCL_FLX_HMARKED_BC);
break;
#if 0
case 'e': /* #eXXX - error literal */
init_flx_hn (FLX_HN(hcl), HCL_TOK_ERRLIT, HCL_SYNERR_ERRLIT, 10);
goto radixed_number;
@ -2577,6 +2582,7 @@ static int flx_hmarked_token (hcl_t* hcl, hcl_ooci_t c)
radixed_number:
FEED_CONTINUE_WITH_CHAR (hcl, c, HCL_FLX_HMARKED_NUMBER);
goto consumed;
#endif
/* --------------------------- */
case '\\':
@ -2728,6 +2734,7 @@ static int flx_hmarked_bc (hcl_t* hcl, hcl_ooci_t c)
FEED_WRAP_UP_WITH_CHAR (hcl, c, tt);
goto consumed;
}
#if 0
else if (hb->start_c == 'b' || hb->start_c == 'B')
{
/* TODO: this part needs to be removed once 0x, 0b, 0o and etc are implemented */
@ -2735,6 +2742,7 @@ static int flx_hmarked_bc (hcl_t* hcl, hcl_ooci_t c)
FEED_CONTINUE (hcl, HCL_FLX_HMARKED_NUMBER);
goto not_consumed;
}
#endif
else
{
hcl_ooch_t start_c = hb->start_c;
@ -2752,7 +2760,7 @@ not_consumed:
static int flx_hmarked_binop (hcl_t* hcl, hcl_ooci_t c)
{
if (hcl_is_binop_char(c))
if (is_binop_char(c))
{
ADD_TOKEN_CHAR(hcl, c);
goto consumed;
@ -3002,7 +3010,7 @@ static int flx_binop (hcl_t* hcl, hcl_ooci_t c) /* binary operator/selector */
hcl_flx_binop_t* binop = FLX_BINOP(hcl);
#endif
if (hcl_is_binop_char(c))
if (is_binop_char(c))
{
ADD_TOKEN_CHAR (hcl, c);
goto consumed;
@ -3028,33 +3036,15 @@ static int flx_plain_number (hcl_t* hcl, hcl_ooci_t c) /* number */
{
ADD_TOKEN_CHAR (hcl, c);
pn->digit_count[pn->fpdec]++;
if (pn->tok_type == HCL_TOK_NUMLIT)
{
hcl_oow_t cand = pn->radix_cand * 10 + (c - '0');
if (cand < pn->radix_cand) pn->radix_cand_overflown = 1;
pn->radix_cand = cand;
}
goto consumed;
}
else if (c == 'x' || c == 'o' || c == 'b')
{
/* 0x12ab, 0b1010101, 0o12304567 */
if (!pn->fpdec && pn->digit_count[0] == 1 && pn->start_digit == '0')
{
pn->radix = (c == 'x'? 16: (c == 'o'? 8: 2));
ADD_TOKEN_CHAR (hcl, c);
pn->digit_count[0] = 0;
goto consumed;
}
else
{
goto non_digit_char;
}
}
#if 0
else if (c == 'r')
{
/* 16r12ab, 2r1010101 */
if (!pn->fpdec && !pn->radix)
{
}
}
#endif
else
else if (is_delim_char(c))
{
if (!pn->fpdec && c == '.')
{
@ -3066,6 +3056,7 @@ static int flx_plain_number (hcl_t* hcl, hcl_ooci_t c) /* number */
return -1;
}
pn->fpdec = 1;
pn->tok_type = HCL_TOK_FPDECLIT;
ADD_TOKEN_CHAR (hcl, c);
goto consumed;
}
@ -3073,7 +3064,7 @@ static int flx_plain_number (hcl_t* hcl, hcl_ooci_t c) /* number */
if (pn->digit_count[0] == 0)
{
hcl_setsynerrbfmt (hcl, HCL_SYNERR_NUMLIT, TOKEN_LOC(hcl), HCL_NULL,
"invalid numeric literal with no digit '%.*js'",
"invalid numeric literal with no digit after '%.*js'",
TOKEN_NAME_LEN(hcl), TOKEN_NAME_PTR(hcl));
return -1;
}
@ -3085,10 +3076,74 @@ static int flx_plain_number (hcl_t* hcl, hcl_ooci_t c) /* number */
return -1;
}
non_digit_char:
FEED_WRAP_UP (hcl, (pn->fpdec? HCL_TOK_FPDECLIT: (pn->radix != 10? HCL_TOK_RADNUMLIT: HCL_TOK_NUMLIT)));
FEED_WRAP_UP (hcl, pn->tok_type);
goto not_consumed;
}
else
{
if (!pn->fpdec && pn->digit_count[0] == 1 && pn->start_digit == '0' && pn->tok_type == HCL_TOK_NUMLIT)
{
/* prefixed with 0 */
switch (c)
{
case 'x':
pn->tok_type = HCL_TOK_RADNUMLIT;
pn->radix = 16;
break;
case 'o':
pn->tok_type = HCL_TOK_RADNUMLIT;
pn->radix = 8;
break;
case 'b':
pn->tok_type = HCL_TOK_RADNUMLIT;
pn->radix = 2;
break;
case 'p':
pn->tok_type = HCL_TOK_SMPTRLIT;
pn->radix = 16;
break;
case 'e':
pn->tok_type = HCL_TOK_ERRLIT;
pn->radix = 10;
break;
default:
goto other_char;
}
ADD_TOKEN_CHAR (hcl, c);
pn->digit_count[0] = 0;
goto consumed;
}
other_char:
if (!pn->fpdec && pn->tok_type == HCL_TOK_NUMLIT && pn->digit_count[0] > 0 && c == 'r')
{
/* 16rABCD */
if (pn->radix_cand_overflown)
{
hcl_setsynerrbfmt (hcl, HCL_SYNERR_NUMLIT, TOKEN_LOC(hcl), HCL_NULL,
"radix too large '%.*js' before '%jc'",
TOKEN_NAME_LEN(hcl), TOKEN_NAME_PTR(hcl), c);
return -1;
}
pn->tok_type = HCL_TOK_RADNUMLIT;
pn->radix = pn->radix_cand;
ADD_TOKEN_CHAR (hcl, c);
pn->digit_count[0] = 0;
goto consumed;
}
hcl_setsynerrbfmt (hcl, HCL_SYNERR_NUMLIT, TOKEN_LOC(hcl), HCL_NULL,
"invalid numeric literal character '%jc' after '%.*js'",
c, TOKEN_NAME_LEN(hcl), TOKEN_NAME_PTR(hcl));
return -1;
}
consumed:
return 1;

View File

@ -108,6 +108,9 @@ i := 0xAbCd93481FFAABBCCDDEeFa12837281
j := 0o125715446440377652567463357357502240671201
k := 0b1010101111001101100100110100100000011111111110101010101110111100110011011101111011101111101000010010100000110111001010000001
l := 14272837210234798094990047170340811393
m := 16rAbCd93481FFAABBCCDDEeFa12837281
n := 36rMVV36DNKTQ0Z2Q3L027T3NGH
o := +35r18Q4OPTU2KRS7FVR09B5MORY3
if (== i j) { printf "OK: i is equal to j\n" } \
else { printf "ERROR: i is not equal to j\n" }
@ -115,4 +118,16 @@ if (== i k) { printf "OK: i is equal to k\n" } \
else { printf "ERROR: i is not equal to k\n" }
if (== i l) { printf "OK: i is equal to l\n" } \
else { printf "ERROR: i is not equal to l\n" }
if (== i m) { printf "OK: i is equal to m\n" } \
else { printf "ERROR: i is not equal to m\n" }
if (== i n) { printf "OK: i is equal to n\n" } \
else { printf "ERROR: i is not equal to n\n" }
if (== i o) { printf "OK: i is equal to o\n" } \
else { printf "ERROR: i is not equal to o\n" }
i := (- -16r123abcd128738279878172387123aabbea19849d8282882822332332 123)
k := -1919784483373631008405784517212288102153752573650638404319957951405
if (== i k) { printf "OK: i is %d\n" i k } \
else { printf "ERROR: i is not equal to %d\n" k }
} ## END

View File

@ -14,10 +14,13 @@ $include 10 ##ERROR: syntax error - $include target expected in place of '10'
---
0b ##ERROR: invalid numeric literal with no digit '0b'
0b ##ERROR: syntax error - invalid numeric literal with no digit after '0b'
---
16r ##ERROR: syntax error - invalid numeric literal with no digit after '16r'
---
##
x := (+ 10 20) "aaaa"; ##ERROR: syntax error - too many rvalues
@ -87,7 +90,28 @@ printf "[%c]\n" b'★'; ##ERROR: syntax error - wrong character literal
printf "%O\n" #b[ 10 20 30 ];
printf "%010b\n" 0b0101;
printf "%O\n" #bxy; ##ERROR: syntax error - neither valid radixed number nor valid directive '#bxy'
printf "%O\n" 0bxy; ##ERROR: syntax error - invalid numeric literal character 'x' after '0b'
---
printf "%O\n" 0b12xy ##ERROR: syntax error - invalid numeric literal character '2' after '0b1'
---
printf "%O\n" 0b0b11 ##ERROR: syntax error - invalid numeric literal character 'b' after '0b0'
---
printf "%O\n" 0o0127890 ##ERROR: syntax error - invalid numeric literal character '8' after '0o0127'
---
printf "%O\n" 35rabcdefghijklzabcd ##ERROR: syntax error - invalid numeric literal character 'z' after '35rabcdefghijkl'
---
+ 12389127398127389217382197283197321897r11221 1 ##ERROR: syntax error - radix too large '12389127398127389217382197283197321897' before 'r'
---