implemented the single quote character literal.

added the error literal and the smptr literal notations and types
This commit is contained in:
hyung-hwan 2018-02-17 15:42:12 +00:00
parent 270a762e32
commit de0715e302
4 changed files with 213 additions and 66 deletions

View File

@ -146,6 +146,8 @@ struct hcl_iotok_t
HCL_IOTOK_STRLIT, HCL_IOTOK_STRLIT,
HCL_IOTOK_NUMLIT, HCL_IOTOK_NUMLIT,
HCL_IOTOK_RADNUMLIT, HCL_IOTOK_RADNUMLIT,
HCL_IOTOK_SMPTRLIT,
HCL_IOTOK_ERRORLIT,
HCL_IOTOK_NIL, HCL_IOTOK_NIL,
HCL_IOTOK_TRUE, HCL_IOTOK_TRUE,
HCL_IOTOK_FALSE, HCL_IOTOK_FALSE,

View File

@ -236,6 +236,7 @@ typedef struct hcl_obj_word_t* hcl_oop_word_t;
#endif #endif
#if 0
/* /*
* OOP encoding * OOP encoding
* An object pointer(OOP) is an ordinary pointer value to an object. * An object pointer(OOP) is an ordinary pointer value to an object.
@ -266,12 +267,74 @@ typedef struct hcl_obj_word_t* hcl_oop_word_t;
#define HCL_CHAR_TO_OOP(num) ((hcl_oop_t)((((hcl_oow_t)(num)) << HCL_OOP_TAG_BITS) | HCL_OOP_TAG_CHAR)) #define HCL_CHAR_TO_OOP(num) ((hcl_oop_t)((((hcl_oow_t)(num)) << HCL_OOP_TAG_BITS) | HCL_OOP_TAG_CHAR))
#define HCL_OOP_TO_CHAR(oop) (((hcl_oow_t)oop) >> HCL_OOP_TAG_BITS) #define HCL_OOP_TO_CHAR(oop) (((hcl_oow_t)oop) >> HCL_OOP_TAG_BITS)
#else
/*
* OOP encoding
* An object pointer(OOP) is an ordinary pointer value to an object.
* but some simple numeric values are also encoded into OOP using a simple
* bit-shifting and masking.
*
* A real OOP is stored without any bit-shifting while a non-pointer value encoded
* in an OOP is bit-shifted to the left by 2 and the 2 least-significant bits
* are set to 1 or 2.
*
* This scheme works because the object allocators aligns the object size to
* a multiple of sizeof(moo_oop_t). This way, the 2 least-significant bits
* of a real OOP are always 0s.
*
* With 2 bits, i can encode only 3 special types except object pointers.
* Since I need more than 3 special types, I extend the tag bits up to 4 bits
* to represent a special data type that doesn't require a range as wide
* as a small integer. A unicode character, for instance, only requires 21
* bits at most. An error doesn't need to be as diverse as a small integer.
*/
#define HCL_OOP_TAG_BITS_LO 2
#define HCL_OOP_TAG_BITS_HI 2
#define HCL_OOP_TAG_SMOOI 1 /* 01 */
#define HCL_OOP_TAG_SMPTR 2 /* 10 */
#define HCL_OOP_TAG_EXTENDED 3 /* 11 - internal use only */
#define HCL_OOP_TAG_CHAR 3 /* 0011 */
#define HCL_OOP_TAG_ERROR 7 /* 0111 */
#define HCL_OOP_TAG_RESERVED0 11 /* 1011 */
#define HCL_OOP_TAG_RESERVED1 15 /* 1111 */
#define HCL_OOP_GET_TAG_LO(oop) (((hcl_oow_t)oop) & HCL_LBMASK(hcl_oow_t, HCL_OOP_TAG_BITS_LO))
#define HCL_OOP_GET_TAG_LOHI(oop) (((hcl_oow_t)oop) & HCL_LBMASK(hcl_oow_t, HCL_OOP_TAG_BITS_LO + HCL_OOP_TAG_BITS_HI))
#define HCL_OOP_GET_TAG(oop) (HCL_OOP_GET_TAG_LO(oop) == HCL_OOP_TAG_EXTENDED? HCL_OOP_GET_TAG_LOHI(oop): HCL_OOP_GET_TAG_LO(oop))
#define HCL_OOP_IS_NUMERIC(oop) (HCL_OOP_GET_TAG_LO(oop) != 0)
#define HCL_OOP_IS_POINTER(oop) (HCL_OOP_GET_TAG_LO(oop) == 0)
#define HCL_OOP_IS_SMOOI(oop) (HCL_OOP_GET_TAG_LO(oop) == HCL_OOP_TAG_SMOOI)
#define HCL_OOP_IS_SMPTR(oop) (HCL_OOP_GET_TAG_LO(oop) == HCL_OOP_TAG_SMPTR)
#define HCL_SMOOI_TO_OOP(num) ((hcl_oop_t)((((hcl_ooi_t)(num)) << HCL_OOP_TAG_BITS_LO) | HCL_OOP_TAG_SMOOI))
#define HCL_OOP_TO_SMOOI(oop) (((hcl_ooi_t)oop) >> HCL_OOP_TAG_BITS_LO)
/*
#define HCL_SMPTR_TO_OOP(num) ((hcl_oop_t)((((hcl_ooi_t)(num)) << HCL_OOP_TAG_BITS_LO) | HCL_OOP_TAG_SMPTR))
#define HCL_OOP_TO_SMPTR(oop) (((hcl_ooi_t)oop) >> HCL_OOP_TAG_BITS_LO)
*/
#define HCL_SMPTR_TO_OOP(ptr) ((hcl_oop_t)(((hcl_oow_t)ptr) | HCL_OOP_TAG_SMPTR))
#define HCL_OOP_TO_SMPTR(oop) ((void*)(((hcl_oow_t)oop) & ~HCL_LBMASK(hcl_oow_t, HCL_OOP_TAG_BITS_LO)))
#define HCL_OOP_IS_CHAR(oop) (HCL_OOP_GET_TAG(oop) == HCL_OOP_TAG_CHAR)
#define HCL_OOP_IS_ERROR(oop) (HCL_OOP_GET_TAG(oop) == HCL_OOP_TAG_ERROR)
#define HCL_OOP_TO_CHAR(oop) (((hcl_oow_t)oop) >> (HCL_OOP_TAG_BITS_LO + HCL_OOP_TAG_BITS_LO))
#define HCL_CHAR_TO_OOP(num) ((hcl_oop_t)((((hcl_oow_t)(num)) << (HCL_OOP_TAG_BITS_LO + HCL_OOP_TAG_BITS_LO)) | HCL_OOP_TAG_CHAR))
#define HCL_OOP_TO_ERROR(oop) (((hcl_oow_t)oop) >> (HCL_OOP_TAG_BITS_LO + HCL_OOP_TAG_BITS_LO))
#define HCL_ERROR_TO_OOP(num) ((hcl_oop_t)((((hcl_oow_t)(num)) << (HCL_OOP_TAG_BITS_LO + HCL_OOP_TAG_BITS_LO)) | HCL_OOP_TAG_ERROR))
#endif
/* SMOOI takes up 62 bit on a 64-bit architecture and 30 bits /* SMOOI takes up 62 bit on a 64-bit architecture and 30 bits
* on a 32-bit architecture. The absolute value takes up 61 bits and 29 bits * on a 32-bit architecture. The absolute value takes up 61 bits and 29 bits
* respectively for the 1 sign bit. */ * respectively for the 1 sign bit. */
#define HCL_SMOOI_BITS (HCL_OOI_BITS - HCL_OOP_TAG_BITS) #define HCL_SMOOI_BITS (HCL_OOI_BITS - HCL_OOP_TAG_BITS_LO)
#define HCL_SMOOI_ABS_BITS (HCL_SMOOI_BITS - 1) #define HCL_SMOOI_ABS_BITS (HCL_SMOOI_BITS - 1)
#define HCL_SMOOI_MAX ((hcl_ooi_t)(~((hcl_oow_t)0) >> (HCL_OOP_TAG_BITS + 1))) #define HCL_SMOOI_MAX ((hcl_ooi_t)(~((hcl_oow_t)0) >> (HCL_OOP_TAG_BITS_LO + 1)))
/* Sacrificing 1 bit pattern for a negative SMOOI makes /* Sacrificing 1 bit pattern for a negative SMOOI makes
* implementation a lot eaisier in many respect. */ * implementation a lot eaisier in many respect. */
/*#define HCL_SMOOI_MIN (-HCL_SMOOI_MAX - 1)*/ /*#define HCL_SMOOI_MIN (-HCL_SMOOI_MAX - 1)*/

View File

@ -114,6 +114,60 @@ static struct
}; };
static HCL_INLINE int print_single_char (hcl_t* hcl, hcl_oow_t mask, hcl_ooch_t ch, hcl_outbfmt_t outbfmt)
{
if (ch < ' ')
{
hcl_ooch_t escaped;
switch (ch)
{
case '\0':
escaped = '0';
break;
case '\n':
escaped = 'n';
break;
case '\r':
escaped = 'r';
break;
case '\t':
escaped = 't';
break;
case '\f':
escaped = 'f';
break;
case '\b':
escaped = 'b';
break;
case '\v':
escaped = 'v';
break;
case '\a':
escaped = 'a';
break;
default:
escaped = ch;
break;
}
if (escaped == ch)
{
if (outbfmt(hcl, mask, "\\x%X", ch) <= -1) return -1;
}
else
{
if (outbfmt(hcl, mask, "\\%jc", escaped) <= -1) return -1;
}
}
else
{
if (outbfmt(hcl, mask, "%jc", ch) <= -1) return -1;
}
return 0;
}
int hcl_outfmtobj (hcl_t* hcl, hcl_oow_t mask, hcl_oop_t obj, hcl_outbfmt_t outbfmt) int hcl_outfmtobj (hcl_t* hcl, hcl_oow_t mask, hcl_oop_t obj, hcl_outbfmt_t outbfmt)
{ {
hcl_oop_t cur; hcl_oop_t cur;
@ -129,7 +183,20 @@ next:
} }
else if (HCL_OOP_IS_CHAR(obj)) else if (HCL_OOP_IS_CHAR(obj))
{ {
if (outbfmt(hcl, mask, "$%.1jc", HCL_OOP_TO_CHAR(obj)) <= -1) return -1; hcl_ooch_t ch = HCL_OOP_TO_CHAR(obj);
if (outbfmt(hcl, mask, "\'") <= -1 ||
print_single_char(hcl, mask, ch, outbfmt) <= -1 ||
outbfmt(hcl, mask, "\'") <= -1) return -1;
goto done;
}
else if (HCL_OOP_IS_SMPTR(obj))
{
if (outbfmt(hcl, mask, "#\\p%zu", (hcl_oow_t)HCL_OOP_TO_SMPTR(obj)) <= -1) return -1;
goto done;
}
else if (HCL_OOP_IS_ERROR(obj))
{
if (outbfmt(hcl, mask, "#\\e%zd", (hcl_ooi_t)HCL_OOP_TO_ERROR(obj)) <= -1) return -1;
goto done; goto done;
} }
@ -214,60 +281,13 @@ next:
if (escape) if (escape)
{ {
hcl_ooch_t escaped;
if (outbfmt(hcl, mask, "\"") <= -1) return -1; if (outbfmt(hcl, mask, "\"") <= -1) return -1;
for (i = 0; i < HCL_OBJ_GET_SIZE(obj); i++) for (i = 0; i < HCL_OBJ_GET_SIZE(obj); i++)
{ {
ch = ((hcl_oop_char_t)obj)->slot[i]; ch = ((hcl_oop_char_t)obj)->slot[i];
if (ch < ' ') if (print_single_char(hcl, mask, ch, outbfmt) <= -1) return -1;
{
switch (ch)
{
case '\0':
escaped = '0';
break;
case '\n':
escaped = 'n';
break;
case '\r':
escaped = 'r';
break;
case '\t':
escaped = 't';
break;
case '\f':
escaped = 'f';
break;
case '\b':
escaped = 'b';
break;
case '\v':
escaped = 'v';
break;
case '\a':
escaped = 'a';
break;
default:
escaped = ch;
break;
}
if (escaped == ch)
{
if (outbfmt(hcl, mask, "\\x%X", ch) <= -1) return -1;
}
else
{
if (outbfmt(hcl, mask, "\\%jc", escaped) <= -1) return -1;
}
}
else
{
if (outbfmt(hcl, mask, "%jc", ch) <= -1) return -1;
}
} }
if (outbfmt(hcl, mask, "\"") <= -1) return -1; if (outbfmt(hcl, mask, "\"") <= -1) return -1;
} }
else else

View File

@ -219,7 +219,7 @@ static hcl_oop_t string_to_num (hcl_t* hcl, hcl_oocs_t* str, int radixed)
if (*ptr != '#') if (*ptr != '#')
{ {
hcl_seterrnum (hcl, HCL_EINVAL); hcl_seterrnum (hcl, HCL_EINVAL);
return -1; return HCL_NULL;
} }
ptr++; /* skip '#' */ ptr++; /* skip '#' */
@ -229,7 +229,7 @@ static hcl_oop_t string_to_num (hcl_t* hcl, hcl_oocs_t* str, int radixed)
else else
{ {
hcl_seterrnum (hcl, HCL_EINVAL); hcl_seterrnum (hcl, HCL_EINVAL);
return -1; return HCL_NULL;
} }
ptr++; ptr++;
} }
@ -821,25 +821,44 @@ static int get_sharp_token (hcl_t* hcl)
} }
while (!is_delimiter(c)); while (!is_delimiter(c));
if (hcl->c->tok.name.len >= 4) if (TOKEN_NAME_LEN(hcl) >= 4)
{ {
if (hcl->c->tok.name.ptr[2] == 'x' || hcl->c->tok.name.ptr[2] == 'u') if (hcl->c->tok.name.ptr[2] == 'p')
{
SET_TOKEN_TYPE (hcl, HCL_IOTOK_SMPTRLIT);
goto hexcharlit;
}
else if (hcl->c->tok.name.ptr[2] == 'x' || hcl->c->tok.name.ptr[2] == 'u')
{ {
hcl_oow_t i; hcl_oow_t i;
hexcharlit:
c = 0; c = 0;
for (i = 3; i < hcl->c->tok.name.len; i++) for (i = 3; i < TOKEN_NAME_LEN(hcl); i++)
{ {
if (!is_xdigitchar(hcl->c->tok.name.ptr[i])) if (!is_xdigitchar(TOKEN_NAME_CHAR(hcl, i)))
{ {
hcl_setsynerrbfmt (hcl, HCL_SYNERR_CHARLIT, TOKEN_LOC(hcl), TOKEN_NAME(hcl), hcl_setsynerrbfmt (hcl, HCL_SYNERR_CHARLIT, TOKEN_LOC(hcl), TOKEN_NAME(hcl),
"invalid hexadecimal character in %.*js", hcl->c->tok.name.len, hcl->c->tok.name.ptr); "invalid hexadecimal character in %.*js", TOKEN_NAME_LEN(hcl), TOKEN_NAME_PTR(hcl));
return -1; return -1;
} }
c = c * 16 + CHAR_TO_NUM(hcl->c->tok.name.ptr[i], 16); /* don't care if it is for 'p' */
c = c * 16 + CHAR_TO_NUM(hcl->c->tok.name.ptr[i], 16);
} }
} }
else if (hcl->c->tok.name.ptr[2] == 'e')
{
hcl_oow_t i;
for (i = 3; i < TOKEN_NAME_LEN(hcl); i++)
{
if (!is_digitchar(TOKEN_NAME_CHAR(hcl, i)))
{
hcl_setsynerrbfmt (hcl, HCL_SYNERR_CHARLIT, TOKEN_LOC(hcl), TOKEN_NAME(hcl),
"invalid decimal character in %.*js", TOKEN_NAME_LEN(hcl), TOKEN_NAME_PTR(hcl));
return -1;
}
}
SET_TOKEN_TYPE (hcl, HCL_IOTOK_ERRORLIT);
}
else if (does_token_name_match(hcl, VOCA_SPACE)) else if (does_token_name_match(hcl, VOCA_SPACE))
{ {
c = ' '; c = ' ';
@ -891,12 +910,15 @@ static int get_sharp_token (hcl_t* hcl)
else else
{ {
HCL_ASSERT (hcl, TOKEN_NAME_LEN(hcl) == 3); HCL_ASSERT (hcl, TOKEN_NAME_LEN(hcl) == 3);
c = TOKEN_NAME_CHAR(hcl,2); c = TOKEN_NAME_CHAR(hcl, 2);
} }
/* reset the token name to the converted character */ /* reset the token name to the converted character */
CLEAR_TOKEN_NAME (hcl); if (hcl->c->tok.type == HCL_IOTOK_CHARLIT)
ADD_TOKEN_CHAR (hcl, c); {
CLEAR_TOKEN_NAME (hcl);
ADD_TOKEN_CHAR (hcl, c);
}
unget_char (hcl, &hcl->c->lxc); unget_char (hcl, &hcl->c->lxc);
break; break;
@ -1049,11 +1071,19 @@ retry:
if (get_string(hcl, '\"', '\\', 0, 0) <= -1) return -1; if (get_string(hcl, '\"', '\\', 0, 0) <= -1) return -1;
break; break;
#if 0
case '\'': case '\'':
#if 0
if (get_quoted_token(hcl) <= -1) return -1; if (get_quoted_token(hcl) <= -1) return -1;
#else
if (get_string(hcl, '\'', '\\', 0, 0) <= -1) return -1;
if (hcl->c->tok.name.len != 1)
{
hcl_setsynerr (hcl, HCL_SYNERR_CHARLIT, TOKEN_LOC(hcl), TOKEN_NAME(hcl));
return -1;
}
SET_TOKEN_TYPE (hcl, HCL_IOTOK_CHARLIT);
#endif
break; break;
#endif
case '#': case '#':
if (get_sharp_token(hcl) <= -1) return -1; if (get_sharp_token(hcl) <= -1) return -1;
@ -1918,6 +1948,38 @@ static int read_object (hcl_t* hcl)
obj = hcl->_false; obj = hcl->_false;
break; break;
case HCL_IOTOK_SMPTRLIT:
{
hcl_oow_t i;
hcl_oow_t v = 0;
HCL_ASSERT (hcl, TOKEN_NAME_LEN(hcl) >= 4);
for (i = 3; i < TOKEN_NAME_LEN(hcl); i++)
{
HCL_ASSERT (hcl, is_xdigitchar(TOKEN_NAME_CHAR(hcl, i)));
v = v * 16 + CHAR_TO_NUM(TOKEN_NAME_CHAR(hcl, i), 16);
}
obj = HCL_SMPTR_TO_OOP(v);
break;
}
case HCL_IOTOK_ERRORLIT:
{
hcl_oow_t i;
hcl_ooi_t v = 0;
HCL_ASSERT (hcl, TOKEN_NAME_LEN(hcl) >= 4);
for (i = 3; i < TOKEN_NAME_LEN(hcl); i++)
{
HCL_ASSERT (hcl, is_digitchar(TOKEN_NAME_CHAR(hcl, i)));
v = v * 10 + CHAR_TO_NUM(TOKEN_NAME_CHAR(hcl, i), 10);
}
obj = HCL_ERROR_TO_OOP(v);
break;
}
case HCL_IOTOK_CHARLIT: case HCL_IOTOK_CHARLIT:
obj = HCL_CHAR_TO_OOP(TOKEN_NAME_CHAR(hcl, 0)); obj = HCL_CHAR_TO_OOP(TOKEN_NAME_CHAR(hcl, 0));
break; break;