added more compiler code

This commit is contained in:
hyunghwan.chung 2015-06-16 13:29:29 +00:00
parent 13fecb4c70
commit af5e2a543b
5 changed files with 261 additions and 65 deletions

View File

@ -132,6 +132,9 @@ enum voca_id_t
};
typedef enum voca_id_t voca_id_t;
static int compile_method_statement (stix_t* stix);
static int compile_method_expression (stix_t* stix, int pop);
static int add_literal (stix_t* stix, stix_oop_t lit, stix_size_t* index);
static STIX_INLINE int is_spacechar (stix_uci_t c)
{
@ -347,10 +350,14 @@ static stix_ssize_t find_word_in_string (const stix_ucs_t* haystack, const stix_
(c >= 'A' && c <= 'Z')? ((c - 'A' + 10 < base)? (c - 'A' + 10): base): \
(c >= 'a' && c <= 'z')? ((c - 'a' + 10 < base)? (c - 'a' + 10): base): base)
static int string_to_smint (stix_t* stix, stix_ucs_t* str, int base, stix_ooi_t* num)
static int string_to_smint (stix_t* stix, stix_ucs_t* str, int radixed, stix_ooi_t* num)
{
/* it is not a generic conversion function.
* it assumes a certain pre-sanity check on the string
* done by the lexical analyzer */
/* TODO: handle floating point numbers, etc, handle radix */
int v, negsign, overflow;
int v, negsign, overflow, base;
const stix_uch_t* ptr, * end;
stix_oow_t value, old_value;
@ -359,23 +366,34 @@ static int string_to_smint (stix_t* stix, stix_ucs_t* str, int base, stix_ooi_t*
ptr = str->ptr,
end = str->ptr + str->len;
if (ptr < end)
{
STIX_ASSERT (ptr < end);
if (*ptr == '+' || *ptr == '-')
{
negsign = *ptr - '+';
ptr++;
}
}
if (ptr >= end)
if (radixed)
{
stix->errnum = STIX_EINVAL;
return -1;
STIX_ASSERT (ptr < end);
base = 0;
do
{
base = base * 10 + CHAR_TO_NUM(*ptr, 10);
ptr++;
}
while (*ptr != 'r');
ptr++;
}
else base = 10;
STIX_ASSERT (ptr < end);
value = old_value = 0;
while (ptr < end && (v = CHAR_TO_NUM(*ptr, base)) != base)
while (ptr < end && (v = CHAR_TO_NUM(*ptr, base)) < base)
{
value = value * base + v;
if (value < old_value)
@ -387,7 +405,6 @@ static int string_to_smint (stix_t* stix, stix_ucs_t* str, int base, stix_ooi_t*
ptr++;
}
if (ptr < end || overflow)
{
/* trailing garbage or overflow */
@ -702,19 +719,62 @@ static int get_numlit (stix_t* stix, int negated)
* fractionalDigits := decimal-integer
*/
stix_uci_t c = stix->c->lxc.c;
stix_uci_t c;
int radix = 0, r;
c = stix->c->lxc.c;
stix->c->tok.type = STIX_IOTOK_NUMLIT;
/*TODO: support a complex numeric literal */
do
{
if (radix <= 36)
{
/* collect the potential radix specifier */
r = CHAR_TO_NUM (c, 10);
STIX_ASSERT (r < 10);
radix = radix * 10 + r;
}
ADD_TOKEN_CHAR(stix, c);
GET_CHAR (stix);
c = stix->c->lxc.c;
GET_CHAR_TO (stix, c);
}
while (is_digitchar(c));
/* TODO; more */
if (c == 'r')
{
/* radix specifier */
if (radix < 2 || radix > 36)
{
/* no digit after the radix specifier */
set_syntax_error (stix, STIX_SYNERR_RADIX, &stix->c->tok.loc, &stix->c->tok.name);
return -1;
}
ADD_TOKEN_CHAR(stix, c);
GET_CHAR_TO (stix, c);
if (CHAR_TO_NUM(c, radix) >= radix)
{
/* no digit after the radix specifier */
set_syntax_error (stix, STIX_SYNERR_RADNUMLIT, &stix->c->tok.loc, &stix->c->tok.name);
return -1;
}
do
{
ADD_TOKEN_CHAR(stix, c);
GET_CHAR_TO (stix, c);
}
while (CHAR_TO_NUM(c, radix) < radix);
stix->c->tok.type = STIX_IOTOK_RADNUMLIT;
}
/*
* TODO: handle floating point number
*/
return 0;
}
@ -1220,13 +1280,33 @@ static int emit_double_positional_instruction (stix_t* stix, int cmd, stix_size_
return 0;
}
static int emit_push_smint_literal (stix_t* stix, stix_ooi_t i)
{
stix_size_t index;
switch (i)
{
case -1:
return emit_byte_instruction (stix, CODE_PUSH_NEGONE);
case 0:
return emit_byte_instruction (stix, CODE_PUSH_ZERO);
case 1:
return emit_byte_instruction (stix, CODE_PUSH_ONE);
}
if (add_literal(stix, STIX_OOP_FROM_SMINT(i), &index) <= -1 ||
emit_positional_instruction(stix, CMD_PUSH_LITERAL, index) <= -1) return -1;
return 0;
}
/* ---------------------------------------------------------------------
* Compiler
* --------------------------------------------------------------------- */
static int compile_method_statement (stix_t* stix);
static int compile_method_expression (stix_t* stix, int pop);
static int add_literal (stix_t* stix, stix_oop_t lit, stix_size_t* index)
{
stix_size_t i;
@ -1950,7 +2030,11 @@ static int compile_block_temporaries (stix_t* stix)
static int compile_block_expression (stix_t* stix)
{
stix_size_t code_start_pos;
stix_size_t jump_inst_pos;
stix_size_t saved_tmpr_count;
stix_size_t block_arg_count;
stix_size_t block_code_size;
stix_ioloc_t block_loc, colon_loc;
/*
* block-expression := "[" block-body "]"
@ -1958,23 +2042,17 @@ static int compile_block_expression (stix_t* stix)
* block-argument := ":" identifier
*/
code_start_pos = stix->c->mth.code.len;
/* this function expects [ not to be consumed away */
STIX_ASSERT (stix->c->tok.type = STIX_IOTOK_LBRACK);
block_loc = stix->c->tok.loc;
GET_TOKEN (stix);
#if 0
if (emit_byte_instruction(stix, CODE_PUSH_CONTEXT) <= -1 ||
emit_byte_instruction(stix,
if (emit_byte_instruction(stix, CODE_NOOP) <= -1 ||
emit_byte_instruction(stix, CODE_NOOP) <= -
/* reserve space for JUMP instruction */
if (emit_byte_instruction(stix, CODE_NOOP) <= -1 ||
emit_byte_instruction(stix, CODE_NOOP) <= -1 ||
emit_byte_instruction(stix, CODE_NOOP) <= -1) return -1;
#endif
saved_tmpr_count = stix->c->mth.tmpr_count;
if (stix->c->tok.type == STIX_IOTOK_COLON)
{
colon_loc = stix->c->tok.loc;
/* block temporary variables */
do
{
@ -1987,8 +2065,15 @@ static int compile_block_expression (stix_t* stix)
return -1;
}
/* TODO : store block arguments */
/* TODO: check conflicting names as well */
if (find_temporary_variable(stix, &stix->c->tok.name) >= 0)
{
set_syntax_error (stix, STIX_SYNERR_BLKARGNAMEDUP, &stix->c->tok.loc, &stix->c->tok.name);
return -1;
}
if (add_temporary_variable(stix, &stix->c->tok.name) <= -1) return -1;
GET_TOKEN (stix);
}
while (stix->c->tok.type == STIX_IOTOK_COLON);
@ -2002,7 +2087,23 @@ static int compile_block_expression (stix_t* stix)
GET_TOKEN (stix);
}
/* TODO: create a block closure */
block_arg_count = stix->c->mth.tmpr_count - saved_tmpr_count;
if (block_arg_count > MAX_CODE_NBLKARGS)
{
set_syntax_error (stix, STIX_SYNERR_BLKARGFLOOD, &colon_loc, STIX_NULL);
return -1;
}
if (emit_byte_instruction(stix, CODE_PUSH_CONTEXT) <= -1 ||
emit_push_smint_literal(stix, block_arg_count) <= -1 ||
emit_byte_instruction(stix, CODE_SEND_BLOCK_COPY) <= -1) return -1;
/* insert dummy instructions before replacing them with a jump instruction */
jump_inst_pos = stix->c->mth.code.len;
if (emit_byte_instruction(stix, 0) <= -1 ||
emit_byte_instruction(stix, 0) <= -1 ||
emit_byte_instruction(stix, 0) <= -1) return -1;
if (compile_block_temporaries(stix) <= -1 ||
compile_block_statements(stix) <= -1) return -1;
@ -2012,10 +2113,19 @@ static int compile_block_expression (stix_t* stix)
return -1;
}
GET_TOKEN (stix);
block_code_size = stix->c->mth.code.len - jump_inst_pos + 3;
if (block_code_size > MAX_CODE_BLKCODE)
{
set_syntax_error (stix, STIX_SYNERR_BLKFLOOD, &colon_loc, STIX_NULL);
return -1;
}
/* TODO: do special treatment for block closures */
/* TODO: GENERATE BLOCK CONTEXT CREATION INSTRUCTION */
/* TODO: use CMD_EXTEND if block_code_size is <= 255 */
stix->c->mth.code.ptr[jump_inst_pos] = MAKE_CODE(CMD_EXTEND_DOUBLE, CMD_JUMP);
stix->c->mth.code.ptr[jump_inst_pos + 1] = (block_code_size & 0xFF00u) >> 8;
stix->c->mth.code.ptr[jump_inst_pos + 2] = (block_code_size & 0x00FFu);
GET_TOKEN (stix);
return 0;
}
@ -2114,6 +2224,7 @@ printf ("push false...\n");
break;
case STIX_IOTOK_THIS_CONTEXT:
printf ("push context...\n");
if (emit_byte_instruction(stix, CODE_PUSH_CONTEXT) <= -1) return -1;
GET_TOKEN (stix);
break;
@ -2141,12 +2252,13 @@ printf ("push symbol literal %d\n", (int)index);
break;
case STIX_IOTOK_NUMLIT:
case STIX_IOTOK_RADNUMLIT:
{
/* TODO: other types of numbers, negative numbers, etc */
/* TODO: proper numbeic literal handling */
stix_ooi_t tmp;
if (string_to_smint(stix, &stix->c->tok.name, 10, &tmp) <= -1)
if (string_to_smint(stix, &stix->c->tok.name, stix->c->tok.type == STIX_IOTOK_RADNUMLIT, &tmp) <= -1)
{
printf ("NOT IMPLEMENTED LARGE_INTEGER or ERROR?\n");
stix->errnum = STIX_ENOIMPL;
@ -2154,23 +2266,7 @@ printf ("NOT IMPLEMENTED LARGE_INTEGER or ERROR?\n");
}
else
{
switch (tmp)
{
case -1:
if (emit_byte_instruction(stix, CODE_PUSH_NEGONE) <= -1) return -1;
break;
case 0:
if (emit_byte_instruction(stix, CODE_PUSH_ZERO) <= -1) return -1;
break;
case 1:
if (emit_byte_instruction(stix, CODE_PUSH_ONE) <= -1) return -1;
break;
default:
if (add_literal(stix, STIX_OOP_FROM_SMINT(tmp), &index) <= -1) return -1;
}
if (emit_push_smint_literal(stix, tmp) <= -1) return -1;
}
GET_TOKEN (stix);
@ -2186,7 +2282,7 @@ printf ("NOT IMPLEMENTED LARGE_INTEGER or ERROR?\n");
break;
case STIX_IOTOK_LBRACK:
GET_TOKEN (stix);
/*GET_TOKEN (stix);*/
if (compile_block_expression(stix) <= -1) return -1;
break;

View File

@ -96,15 +96,100 @@ void __dump_object (stix_t* stix, stix_oop_t oop, int depth)
s.ptr = ((stix_oop_char_t)c->name)->slot;
s.len = STIX_OBJ_GET_SIZE(c->name);
print_ucs (&s);
printf ("\n");
if (oop == stix->_nil)
{
printf (" nil");
}
else if (oop == stix->_true)
{
printf (" true");
}
else if (oop == stix->_false)
{
printf (" false");
}
else if (STIX_OOP_IS_SMINT(oop))
{
printf (" %ld", (long int)STIX_OOP_TO_SMINT(oop));
}
else if (STIX_OOP_IS_CHAR(oop))
{
stix_bch_t bcs[32];
stix_uch_t uch;
stix_size_t ucslen, bcslen;
uch = STIX_OOP_TO_CHAR(oop);
bcslen = STIX_COUNTOF(bcs);
ucslen = 1;
if (stix_ucstoutf8 (&uch, &ucslen, bcs, &bcslen) >= 0)
{
printf (" $%.*s", (int)bcslen, bcs);
}
}
else if (STIX_OOP_IS_POINTER(oop))
{
if (STIX_OBJ_GET_FLAGS_TYPE(oop) == STIX_OBJ_TYPE_OOP)
{
/* TODO: print _Array specially using #( */
printf ("\n");
for (i = 0; i < STIX_OBJ_GET_SIZE(oop); i++)
{
__dump_object (stix, ((stix_oop_oop_t)oop)->slot[i], depth + 1);
}
}
else if (STIX_OBJ_GET_FLAGS_TYPE(oop) == STIX_OBJ_TYPE_CHAR)
{
if (STIX_CLASSOF(stix,oop) == stix->_symbol)
{
printf (" #'");
}
else if (STIX_CLASSOF(stix,oop) == stix->_string)
{
printf (" '");
}
for (i = 0; i < STIX_OBJ_GET_SIZE(oop); i++)
{
stix_bch_t bcs[32];
stix_uch_t uch;
stix_size_t ucslen, bcslen;
uch = ((stix_oop_char_t)oop)->slot[i];
if (uch == '\'') printf ("''");
else
{
bcslen = STIX_COUNTOF(bcs);
ucslen = 1;
if (stix_ucstoutf8 (&uch, &ucslen, bcs, &bcslen) >= 0)
{
printf ("%.*s", (int)bcslen, bcs);
}
}
}
printf ("'");
}
else if (STIX_OBJ_GET_FLAGS_TYPE(oop) == STIX_OBJ_TYPE_BYTE)
{
printf (" #[");
for (i = 0; i < STIX_OBJ_GET_SIZE(oop); i++)
{
printf (" %d", ((stix_oop_byte_t)oop)->slot[i]);
}
printf ("]");
}
else if (STIX_OBJ_GET_FLAGS_TYPE(oop) == STIX_OBJ_TYPE_WORD)
{
printf (" #["); /* TODO: different symbol for word array ?? */
for (i = 0; i < STIX_OBJ_GET_SIZE(oop); i++)
{
printf (" %ld", ((stix_oop_word_t)oop)->slot[i]);
}
printf ("]");
}
}
printf ("\n");
}
void dump_object (stix_t* stix, stix_oop_t oop, const char* title)

View File

@ -208,7 +208,7 @@ printf ("instance method dictioanry ====\n");
do
{
mthdic = ((stix_oop_class_t)c)->mthdic[dic_no];
STIX_ASSERT (mthdic != stix->_nil);
STIX_ASSERT ((stix_oop_t)mthdic != stix->_nil);
STIX_ASSERT (STIX_CLASSOF(stix, mthdic) == stix->_method_dictionary);
dump_dictionary (stix, mthdic, "Method dictionary");

View File

@ -174,7 +174,9 @@ static char* syntax_error_msg[] =
"no character after $",
"no valid character after #",
"missing colon",
"string expected", /* string expected in place of ${1} */
"string expected",
"invalid radix",
"invalid numeric literal",
"{ expected",
"} expected",
"( expected",
@ -196,12 +198,15 @@ static char* syntax_error_msg[] =
"duplicate argument name",
"duplicate temporary variable name",
"duplicate variable name",
"duplicate block argument name",
"cannot assign to argument",
"undeclared variable",
"unusable variable in compiled code",
"inaccessible variable",
"wrong expression primary",
"too many arguments",
"too many block arguments",
"too large block",
"wrong primitive number"
};

View File

@ -230,6 +230,7 @@ struct stix_iotok_t
STIX_IOTOK_STRLIT,
STIX_IOTOK_SYMLIT,
STIX_IOTOK_NUMLIT,
STIX_IOTOK_RADNUMLIT,
STIX_IOTOK_NIL,
STIX_IOTOK_SELF,
STIX_IOTOK_SUPER,
@ -272,6 +273,8 @@ enum stix_synerrnum_t
STIX_SYNERR_HLTNT, /* hased literal not terminated */
STIX_SYNERR_CLNMS, /* colon missing */
STIX_SYNERR_STRING, /* string expected */
STIX_SYNERR_RADIX, /* invalid radix */
STIX_SYNERR_RADNUMLIT, /* invalid numeric literal with radix */
STIX_SYNERR_LBRACE, /* { expected */
STIX_SYNERR_RBRACE, /* } expected */
STIX_SYNERR_LPAREN, /* ( expected */
@ -293,12 +296,15 @@ enum stix_synerrnum_t
STIX_SYNERR_ARGNAMEDUP, /* duplicate argument name */
STIX_SYNERR_TMPRNAMEDUP, /* duplicate temporary variable name */
STIX_SYNERR_VARNAMEDUP, /* duplicate variable name */
STIX_SYNERR_BLKARGNAMEDUP, /* duplicate block argument name */
STIX_SYNERR_VARARG, /* cannot assign to argument */
STIX_SYNERR_VARUNDCL, /* undeclared variable */
STIX_SYNERR_VARUNUSE, /* unsuable variable in compiled code */
STIX_SYNERR_VARINACC, /* inaccessible variable - e.g. accessing an instance variable from a class method is not allowed. */
STIX_SYNERR_PRIMARY, /* wrong expression primary */
STIX_SYNERR_ARGFLOOD, /* too many arguments */
STIX_SYNERR_BLKARGFLOOD, /* too many block arguments */
STIX_SYNERR_BLKFLOOD, /* too large block */
STIX_SYNERR_PRIMITIVENO /* wrong primitive number */
};
typedef enum stix_synerrnum_t stix_synerrnum_t;
@ -438,6 +444,8 @@ struct stix_compiler_t
#define MAKE_CODE(x,y) (((x) << 4) | y)
#define MAX_CODE_INDEX 0xFFFFu
#define MAX_CODE_NARGS 0xFFFFu
#define MAX_CODE_NBLKARGS 0xFFFFu
#define MAX_CODE_BLKCODE 0xFFFFu
enum stix_cmdcode_t
{
@ -500,6 +508,7 @@ enum stix_cmdcode_t
SUBCMD_RETURN_STACKTOP = 0x2,
SUBCMD_RETURN_BLOCK_STACKTOP = 0x3,
SUBCMD_RETURN_RECEIVER = 0x4,
SUBCMD_SEND_BLOCK_COPY = 0xE,
SUBCMD_NOOP = 0xF
};
@ -519,6 +528,7 @@ enum stix_cmdcode_t
#define CODE_RETURN_STACKTOP MAKE_CODE(CMD_DO_SPECIAL, SUBCMD_RETURN_STACKTOP)
#define CODE_RETURN_BLOCK_STACKTOP MAKE_CODE(CMD_DO_SPECIAL, SUBCMD_RETURN_BLOCK_STACKTOP)
#define CODE_RETURN_RECEIVER MAKE_CODE(CMD_DO_SPECIAL, SUBCMD_RETURN_RECEIVER)
#define CODE_SEND_BLOCK_COPY MAKE_CODE(CMD_DO_SPECIAL, SUBCMD_SEND_BLOCK_COPY)
#define CODE_NOOP MAKE_CODE(CMD_DO_SPECIAL, SUBCMD_NOOP)
#if defined(__cplusplus)