From dea9944270ae3185978d09dbe1d526e5e7b6d1f7 Mon Sep 17 00:00:00 2001 From: "hyunghwan.chung" Date: Tue, 23 Jun 2015 14:00:26 +0000 Subject: [PATCH] added array and byte-array literal handling. removed CMD_EXTEND_DOUBLE and cleaned up instruction decoding a bit --- stix/lib/comp.c | 390 ++++++++++++++++++++++++++++++++++++++------ stix/lib/debug.c | 27 ++- stix/lib/exec.c | 141 ++++++++++------ stix/lib/main.c | 1 + stix/lib/stix-prv.h | 85 ++++++---- stix/lib/stix.h | 1 + 6 files changed, 507 insertions(+), 138 deletions(-) diff --git a/stix/lib/comp.c b/stix/lib/comp.c index 6698846..fd12aec 100644 --- a/stix/lib/comp.c +++ b/stix/lib/comp.c @@ -27,9 +27,11 @@ #include "stix-prv.h" #define TOKEN_NAME_ALIGN 256 -#define CLASS_BUFFER_ALIGN 8 /* 256 */ +#define CLASS_BUFFER_ALIGN 8 /* 256 */ /*TODO: change 8 to 256 */ #define LITERAL_BUFFER_ALIGN 8 /* 256 */ #define CODE_BUFFER_ALIGN 8 /* 256 */ +#define BALIT_BUFFER_ALIGN 8 /* 256 */ +#define ARLIT_BUFFER_ALIGN 8 /* 256 */ /* initial method dictionary size */ #define INSTANCE_METHOD_DICTIONARY_SIZE 256 /* TODO: choose the right size */ @@ -358,12 +360,11 @@ static int string_to_smint (stix_t* stix, stix_ucs_t* str, int radixed, stix_ooi * done by the lexical analyzer */ /* TODO: handle floating point numbers, etc, handle radix */ - int v, negsign, overflow, base; + int v, negsign, base; const stix_uch_t* ptr, * end; stix_oow_t value, old_value; negsign = 0; - overflow = 0; ptr = str->ptr, end = str->ptr + str->len; @@ -400,28 +401,38 @@ static int string_to_smint (stix_t* stix, stix_ucs_t* str, int radixed, stix_ooi if (value < old_value) { /* overflow must have occurred */ - overflow = 1; + stix->errnum = STIX_ERANGE; + return -1; } old_value = value; ptr++; } - if (ptr < end || overflow) + if (ptr < end) { - /* trailing garbage or overflow */ + /* trailing garbage? */ stix->errnum = STIX_EINVAL; return -1; } if (negsign) { - if (value > STIX_SMINT_MIN) return -1; + /*if (value > STIX_SMINT_MIN) return -1;*/ + if (value > ((stix_oow_t)STIX_SMINT_MAX + 1)) + { + stix->errnum = STIX_ERANGE; + return -1; + } *num = value; *num *= -1; } else { - if (value > STIX_SMINT_MAX) return -1; + if (value > STIX_SMINT_MAX) + { + stix->errnum = STIX_ERANGE; + return -1; + } *num = value; } @@ -445,6 +456,9 @@ static int string_to_smint (stix_t* stix, stix_ucs_t* str, int radixed, stix_ooi #define GET_TOKEN(stix) \ do { if (get_token(stix) <= -1) return -1; } while (0) +#define GET_TOKEN_WITH_ERRRET(stix, v_ret) \ + do { if (get_token(stix) <= -1) return v_ret; } while (0) + #define ADD_TOKEN_STR(stix,s,l) \ do { if (add_token_str(stix, s, l) <= -1) return -1; } while (0) @@ -651,14 +665,16 @@ static int get_ident (stix_t* stix) * keyword := identifier ":" */ - stix_uci_t c = stix->c->lxc.c; + stix_uci_t c; + + c = stix->c->lxc.c; stix->c->tok.type = STIX_IOTOK_IDENT; +get_more: do { ADD_TOKEN_CHAR (stix, c); - GET_CHAR (stix); - c = stix->c->lxc.c; + GET_CHAR_TO (stix, c); } while (is_alnumchar(c)); @@ -666,7 +682,17 @@ static int get_ident (stix_t* stix) { ADD_TOKEN_CHAR (stix, c); stix->c->tok.type = STIX_IOTOK_KEYWORD; - GET_CHAR (stix); + GET_CHAR_TO (stix, c); + + if (stix->c->in_array && is_alnumchar(c)) + { + /* [NOTE] + * for an input like #(abc:def 1 2 3), abc:def is returned as + * a keyword. it would not be a real keyword even if it were + * prefixed with #. it is because it doesn't end with a colon. + */ + goto get_more; + } } else { @@ -794,6 +820,7 @@ static int get_charlit (stix_t* stix) } stix->c->tok.type = STIX_IOTOK_CHRLIT; + ADD_TOKEN_CHAR(stix, '$'); ADD_TOKEN_CHAR(stix, c); GET_CHAR (stix); return 0; @@ -1210,21 +1237,21 @@ static STIX_INLINE int emit_byte_instruction (stix_t* stix, stix_byte_t code) return 0; } -static int emit_positional_instruction (stix_t* stix, int cmd, stix_size_t index) +static int emit_positional_instruction (stix_t* stix, int cmd, stix_oow_t index) { STIX_ASSERT (cmd <= 0xF); STIX_ASSERT (index <= MAX_CODE_INDEX); - if (index > 0xFF) + if (index > 0xF) { - if (emit_byte_instruction(stix, MAKE_CODE(CMD_EXTEND_DOUBLE, cmd)) <= -1 || + #if (STIX_CODE_EXTEND_SIZE == 2) + if (emit_byte_instruction(stix, MAKE_CODE(CMD_EXTEND, cmd)) <= -1 || emit_byte_instruction(stix, index >> 8) <= -1 || emit_byte_instruction(stix, index & 0xFF) <= -1) return -1; - } - else if (index > 0xF) - { + #else if (emit_byte_instruction(stix, MAKE_CODE(CMD_EXTEND, cmd)) <= -1 || emit_byte_instruction(stix, index) <= -1) return -1; + #endif } else { @@ -1232,7 +1259,6 @@ static int emit_positional_instruction (stix_t* stix, int cmd, stix_size_t index } return 0; - } static int emit_double_positional_instruction (stix_t* stix, int cmd, stix_size_t index_1, stix_size_t index_2) @@ -1242,8 +1268,8 @@ static int emit_double_positional_instruction (stix_t* stix, int cmd, stix_size_ * 1011JJJJ KKKKKKKK Send literal index_2 K with J arguments to super * 00001010 JJJJJJJJ KKKKKKKK * 00001011 JJJJJJJJ KKKKKKKK - * 00011010 JJJJJJJJ JJJJJJJJ KKKKKKKK KKKKKKKK - * 00011011 JJJJJJJJ JJJJJJJJ KKKKKKKK KKKKKKKK + * 00001010 JJJJJJJJ JJJJJJJJ KKKKKKKK KKKKKKKK + * 00001011 JJJJJJJJ JJJJJJJJ KKKKKKKK KKKKKKKK * * Send: * index_1 nargs JJJJ @@ -1258,19 +1284,19 @@ static int emit_double_positional_instruction (stix_t* stix, int cmd, stix_size_ STIX_ASSERT (index_1 <= MAX_CODE_NARGS); STIX_ASSERT (index_2 <= MAX_CODE_INDEX); - if (index_1 > 0xFF || index_2 > 0xFF) + if (index_1 > 0xF || index_2 > 0xFF) { - if (emit_byte_instruction(stix, MAKE_CODE(CMD_EXTEND_DOUBLE, cmd)) <= -1 || + #if (STIX_CODE_EXTEND_SIZE == 2) + if (emit_byte_instruction(stix, MAKE_CODE(CMD_EXTEND, cmd)) <= -1 || emit_byte_instruction(stix, index_1 >> 8) <= -1 || emit_byte_instruction(stix, index_1 & 0xFF) <= -1 || emit_byte_instruction(stix, index_2 >> 8) <= -1 || emit_byte_instruction(stix, index_2 & 0xFF) <= -1) return -1; - } - else if (index_1 > 0xF) - { + #else if (emit_byte_instruction(stix, MAKE_CODE(CMD_EXTEND, cmd)) <= -1 || emit_byte_instruction(stix, index_1) <= -1 || emit_byte_instruction(stix, index_2) <= -1) return -1; + #endif } else { @@ -1281,7 +1307,6 @@ static int emit_double_positional_instruction (stix_t* stix, int cmd, stix_size_ return 0; } - static int emit_push_smint_literal (stix_t* stix, stix_ooi_t i) { stix_size_t index; @@ -1296,6 +1321,8 @@ static int emit_push_smint_literal (stix_t* stix, stix_ooi_t i) case 1: return emit_byte_instruction (stix, CODE_PUSH_ONE); + +/* TODO: include some other numbers? like 2 */ } @@ -1309,6 +1336,7 @@ static int emit_push_smint_literal (stix_t* stix, stix_ooi_t i) * Compiler * --------------------------------------------------------------------- */ + static int add_literal (stix_t* stix, stix_oop_t lit, stix_size_t* index) { stix_size_t i; @@ -1384,8 +1412,6 @@ static int add_symbol_literal (stix_t* stix, const stix_ucs_t* str, stix_size_t* return add_literal (stix, tmp, index); } -/* TODO: add_array_literal, add_byte_array_literal () */ - static STIX_INLINE int set_class_name (stix_t* stix, const stix_ucs_t* name) { return copy_string_to (stix, name, &stix->c->cls.name, &stix->c->cls.name_capa, 0, '\0'); @@ -1639,7 +1665,6 @@ static int compile_class_level_variables (stix_t* stix) if (find_class_level_variable(stix, STIX_NULL, &stix->c->tok.name, &var) >= 0) { -printf ("duplicate variable name type %d pos %lu\n", var.type, var.pos); set_syntax_error (stix, STIX_SYNERR_VARNAMEDUP, &stix->c->tok.loc, &stix->c->tok.name); return -1; } @@ -2019,7 +2044,6 @@ static int compile_block_temporaries (stix_t* stix) return -1; } -/* TODO: check if tmpr_count exceededs LIMIT (SMINT MAX). also bytecode max */ GET_TOKEN (stix); } @@ -2035,9 +2059,9 @@ static int compile_block_temporaries (stix_t* stix) static int compile_block_expression (stix_t* stix) { - stix_size_t i, jump_inst_pos; + stix_size_t jump_inst_pos; stix_size_t saved_tmpr_count, saved_tmprs_len; - stix_size_t block_arg_count/*, block_tmpr_count*/; + stix_size_t block_arg_count, block_tmpr_count; stix_size_t block_code_size; stix_ioloc_t block_loc, colon_loc, tmpr_loc; @@ -2048,7 +2072,7 @@ static int compile_block_expression (stix_t* stix) */ /* this function expects [ not to be consumed away */ - STIX_ASSERT (stix->c->tok.type = STIX_IOTOK_LBRACK); + STIX_ASSERT (stix->c->tok.type == STIX_IOTOK_LBRACK); block_loc = stix->c->tok.loc; GET_TOKEN (stix); @@ -2113,14 +2137,13 @@ static int compile_block_expression (stix_t* stix) tmpr_loc = stix->c->tok.loc; if (compile_block_temporaries(stix) <= -1) return -1; -#if 0 + /* this is a block-local temporary count */ block_tmpr_count = stix->c->mth.tmpr_count - saved_tmpr_count; if (block_tmpr_count > MAX_CODE_NBLKTMPRS) { set_syntax_error (stix, STIX_SYNERR_BLKTMPRFLOOD, &tmpr_loc, STIX_NULL); return -1; } -#endif printf ("\tpush_context nargs %d ntmprs %d\n", (int)block_arg_count, (int)stix->c->mth.tmpr_count /*block_tmpr_count*/); printf ("\tpush smint %d\n", (int)block_arg_count); @@ -2134,9 +2157,14 @@ printf ("\tsend_block_copy\n"); printf ("\tjump\n"); /* insert dummy instructions before replacing them with a jump instruction */ jump_inst_pos = stix->c->mth.code.len; - if (emit_byte_instruction(stix, MAKE_CODE(CMD_EXTEND_DOUBLE, CMD_JUMP)) <= -1 || +#if (STIX_CODE_EXTEND_SIZE == 2) + if (emit_byte_instruction(stix, MAKE_CODE(CMD_EXTEND, CMD_JUMP)) <= -1 || emit_byte_instruction(stix, 0) <= -1 || emit_byte_instruction(stix, 0) <= -1) return -1; +#else + if (emit_byte_instruction(stix, MAKE_CODE(CMD_EXTEND, CMD_JUMP)) <= -1 || + emit_byte_instruction(stix, 0) <= -1) return -1; +#endif /* compile statements inside a block */ if (stix->c->tok.type == STIX_IOTOK_RBRACK) @@ -2191,6 +2219,246 @@ printf ("\tjump\n"); } +static int add_to_balit_buffer (stix_t* stix, stix_byte_t b) +{ + if (stix->c->mth.balit_count >= stix->c->mth.balit_capa) + { + stix_byte_t* tmp; + stix_size_t new_capa; + + new_capa = STIX_ALIGN (stix->c->mth.balit_count + 1, BALIT_BUFFER_ALIGN); + tmp = (stix_byte_t*)stix_reallocmem (stix, stix->c->mth.balit, new_capa * STIX_SIZEOF(*tmp)); + if (!tmp) return -1; + + stix->c->mth.balit_capa = new_capa; + stix->c->mth.balit = tmp; + } + + stix->c->mth.balit[stix->c->mth.balit_count++] = b; + return 0; +} + +static int add_to_arlit_buffer (stix_t* stix, stix_oop_t item) +{ + if (stix->c->mth.arlit_count >= stix->c->mth.arlit_capa) + { + stix_oop_t* tmp; + stix_size_t new_capa; + + new_capa = STIX_ALIGN (stix->c->mth.arlit_count + 1, ARLIT_BUFFER_ALIGN); + tmp = (stix_oop_t*)stix_reallocmem (stix, stix->c->mth.arlit, new_capa * STIX_SIZEOF(*tmp)); + if (!tmp) return -1; + + stix->c->mth.arlit_capa = new_capa; + stix->c->mth.arlit = tmp; + } + +/* TODO: overflow check of stix->c->mth.arlit_count */ + stix->c->mth.arlit[stix->c->mth.arlit_count++] = item; + return 0; +} + + +static int __compile_byte_array_literal (stix_t* stix, stix_oop_t* xlit) +{ + stix_ooi_t tmp; + stix_oop_t ba; + + stix->c->mth.balit_count = 0; + + while (stix->c->tok.type == STIX_IOTOK_NUMLIT || stix->c->tok.type == STIX_IOTOK_RADNUMLIT) + { + /* TODO: check if the number is an integer */ + + if (string_to_smint(stix, &stix->c->tok.name, stix->c->tok.type == STIX_IOTOK_RADNUMLIT, &tmp) <= -1) + { + /* the token reader reads a valid token. no other errors + * than the range error must not occur */ + STIX_ASSERT (stix->errnum == STIX_ERANGE); + +printf ("NOT IMPLEMENTED LARGE_INTEGER or ERROR?\n"); + stix->errnum = STIX_ENOIMPL; + return -1; + } + else if (tmp < 0 || tmp > 255) + { + set_syntax_error (stix, STIX_SYNERR_BYTERANGE, &stix->c->tok.loc, &stix->c->tok.name); + return -1; + } + + if (add_to_balit_buffer(stix, tmp) <= -1) return -1; + GET_TOKEN (stix); + } + + if (stix->c->tok.type != STIX_IOTOK_RBRACK) + { + set_syntax_error (stix, STIX_SYNERR_RBRACK, &stix->c->tok.loc, &stix->c->tok.name); + return -1; + } + + ba = stix_instantiate (stix, stix->_byte_array, stix->c->mth.balit, stix->c->mth.balit_count); + if (!ba) return -1; + + *xlit = ba; + return 0; +} + +struct arlit_info_t +{ + stix_size_t pos; + stix_size_t len; +}; + +typedef struct arlit_info_t arlit_info_t; + +static int __compile_array_literal (stix_t* stix, stix_oop_t* xlit) +{ + stix_oop_t lit, a; + stix_size_t i, saved_arlit_count; + arlit_info_t info; + + info.pos = stix->c->mth.arlit_count; + info.len = 0; + + do + { + switch (stix->c->tok.type) + { +/* TODO: floating pointer number */ + + case STIX_IOTOK_NUMLIT: + case STIX_IOTOK_RADNUMLIT: + { + stix_ooi_t tmp; + + if (string_to_smint(stix, &stix->c->tok.name, stix->c->tok.type == STIX_IOTOK_RADNUMLIT, &tmp) <= -1) + { + /* the token reader reads a valid token. no other errors + * than the range error must not occur */ + STIX_ASSERT (stix->errnum == STIX_ERANGE); + +/* TODO: IMPLMENET LARGE INTEGER */ +printf ("LARGE NOT IMPLEMENTED IN COMPILE_ARRAY_LITERAL\n"); + stix->errnum = STIX_ENOIMPL; + return -1; + } + + lit = STIX_OOP_FROM_SMINT(tmp); + break; + } + + case STIX_IOTOK_CHRLIT: + STIX_ASSERT (stix->c->tok.name.len == 2); + lit = STIX_OOP_FROM_CHAR(stix->c->tok.name.ptr[1]); + break; + + case STIX_IOTOK_STRLIT: + lit = stix_instantiate (stix, stix->_string, stix->c->tok.name.ptr, stix->c->tok.name.len); + break; + + case STIX_IOTOK_IDENT: + case STIX_IOTOK_BINSEL: + case STIX_IOTOK_KEYWORD: + case STIX_IOTOK_SYMLIT: + case STIX_IOTOK_SELF: + case STIX_IOTOK_SUPER: + case STIX_IOTOK_THIS_CONTEXT: + lit = stix_makesymbol (stix, stix->c->tok.name.ptr, stix->c->tok.name.len); + break; + + case STIX_IOTOK_NIL: + lit = stix->_nil; + break; + + case STIX_IOTOK_TRUE: + lit = stix->_true; + break; + + case STIX_IOTOK_FALSE: + lit = stix->_false; + break; + + case STIX_IOTOK_APAREN: /* #( */ + case STIX_IOTOK_LPAREN: /* ( */ + saved_arlit_count = stix->c->mth.arlit_count; +/* TODO: get rid of recursion?? */ + GET_TOKEN (stix); + if (__compile_array_literal (stix, &lit) <= -1) return -1; + stix->c->mth.arlit_count = saved_arlit_count; + break; + + case STIX_IOTOK_BPAREN: /* #[ */ + case STIX_IOTOK_LBRACK: /* [ */ + GET_TOKEN (stix); + if (__compile_byte_array_literal (stix, &lit) <= -1) return -1; + break; + + default: + goto done; + } + + if (!lit || add_to_arlit_buffer(stix, lit) <= -1) return -1; + info.len++; + + GET_TOKEN (stix); + } + while (1); + +done: + if (stix->c->tok.type != STIX_IOTOK_RPAREN) + { + set_syntax_error (stix, STIX_SYNERR_RPAREN, &stix->c->tok.loc, &stix->c->tok.name); + return -1; + } + + a = stix_instantiate (stix, stix->_array, STIX_NULL, info.len); + if (!a) return -1; + + for (i = 0; i < info.len; i++) + { + ((stix_oop_oop_t)a)->slot[i] = stix->c->mth.arlit[info.pos + i]; + } + + *xlit = a; + return 0; +} + +static int compile_byte_array_literal (stix_t* stix) +{ + stix_oop_t lit; + stix_size_t index; + + GET_TOKEN (stix); /* skip #[ and read the next token */ + if (__compile_byte_array_literal (stix, &lit) <= -1) return -1; + +printf ("\tpush_literal byte_array\n"); + if (add_literal (stix, lit, &index) <= -1 || + emit_positional_instruction (stix, CMD_PUSH_LITERAL, index) <= -1) return -1; + + GET_TOKEN (stix); + return 0; +} + +static int compile_array_literal (stix_t* stix) +{ + stix_oop_t lit; + stix_size_t index; + int x; + + stix->c->in_array = 1; + GET_TOKEN (stix); /* skip #( and read the next token */ + x = __compile_array_literal (stix, &lit); + stix->c->in_array = 0; + if (x <= -1) return -1; + +printf ("\tpush_literal array\n"); + if (add_literal (stix, lit, &index) <= -1 || + emit_positional_instruction (stix, CMD_PUSH_LITERAL, index) <= -1) return -1; + + GET_TOKEN (stix); + return 0; +} + static int compile_expression_primary (stix_t* stix, const stix_ucs_t* ident, const stix_ioloc_t* ident_loc, int* to_super) { /* @@ -2291,8 +2559,8 @@ printf ("\tpush context...\n"); break; case STIX_IOTOK_CHRLIT: - STIX_ASSERT (stix->c->tok.name.len == 1); - if (add_character_literal(stix, stix->c->tok.name.ptr[0], &index) <= -1 || + STIX_ASSERT (stix->c->tok.name.len == 2); /* the token includes $ */ + if (add_character_literal(stix, stix->c->tok.name.ptr[1], &index) <= -1 || emit_positional_instruction(stix, CMD_PUSH_LITERAL, index) <= -1) return -1; printf ("\tpush character literal %d\n", (int)index); GET_TOKEN (stix); @@ -2321,9 +2589,13 @@ printf ("\tpush symbol literal %d\n", (int)index); if (string_to_smint(stix, &stix->c->tok.name, stix->c->tok.type == STIX_IOTOK_RADNUMLIT, &tmp) <= -1) { + /* the token reader reads a valid token. no other errors + * than the range error must not occur */ + STIX_ASSERT (stix->errnum == STIX_ERANGE); + printf ("NOT IMPLEMENTED LARGE_INTEGER or ERROR?\n"); - stix->errnum = STIX_ENOIMPL; - return -1; + stix->errnum = STIX_ENOIMPL; + return -1; } else { @@ -2335,15 +2607,19 @@ printf ("\tpush int literal\n"); break; } - case STIX_IOTOK_APAREN: -/* TODO: array literal */ + case STIX_IOTOK_BPAREN: /* #[ */ + /*GET_TOKEN (stix);*/ + if (compile_byte_array_literal(stix) <= -1) return -1; break; - case STIX_IOTOK_BPAREN: -/* TODO: byte array literal */ + case STIX_IOTOK_APAREN: /* #( */ + /*GET_TOKEN (stix);*/ + if (compile_array_literal(stix) <= -1) return -1; break; - case STIX_IOTOK_LBRACK: + /* TODO: dynamic array, non constant array #<> or #{} or what is a better bracket? */ + + case STIX_IOTOK_LBRACK: /* [ */ /*GET_TOKEN (stix);*/ if (compile_block_expression(stix) <= -1) return -1; break; @@ -3019,12 +3295,7 @@ static int add_compiled_method (stix_t* stix) if (cmd == CMD_EXTEND) { cmd = stix->c->mth.code.ptr[0] & 0xF; - index_size = 1; - } - else if (cmd == CMD_EXTEND_DOUBLE) - { - cmd = stix->c->mth.code.ptr[0] & 0xF; - index_size = 2; + index_size = STIX_CODE_EXTEND_SIZE; } else { @@ -3086,6 +3357,8 @@ static int compile_method_definition (stix_t* stix) stix->c->mth.tmpr_count = 0; stix->c->mth.tmpr_nargs = 0; stix->c->mth.literal_count = 0; + stix->c->mth.balit_count = 0; + stix->c->mth.arlit_count = 0; stix->c->mth.code.len = 0; stix->c->mth.prim_no = -1; @@ -3546,7 +3819,8 @@ static int compile_class_definition (stix_t* stix) stix->c->cls.mthdic_oop[MTH_INSTANCE] = STIX_NULL; stix->c->cls.mthdic_oop[MTH_CLASS] = STIX_NULL; stix->c->mth.literal_count = 0; - + stix->c->mth.balit_count = 0; + stix->c->mth.arlit_count = 0; /* do main compilation work */ n = __compile_class_definition (stix); @@ -3557,6 +3831,8 @@ static int compile_class_definition (stix_t* stix) stix->c->cls.mthdic_oop[MTH_INSTANCE] = STIX_NULL; stix->c->cls.mthdic_oop[MTH_CLASS] = STIX_NULL; stix->c->mth.literal_count = 0; + stix->c->mth.balit_count = 0; + stix->c->mth.arlit_count = 0; return n; } @@ -3630,6 +3906,12 @@ static void gc_compiler (stix_t* stix) stix->c->mth.literals[i] = stix_moveoop (stix, stix->c->mth.literals[i]); } } + + for (i = 0; i < stix->c->mth.arlit_count; i++) + { + if (STIX_OOP_IS_POINTER(stix->c->mth.arlit[i])) + stix->c->mth.arlit[i] = stix_moveoop (stix, stix->c->mth.arlit[i]); + } } } @@ -3659,6 +3941,8 @@ static void fini_compiler (stix_t* stix) if (stix->c->mth.tmprs.ptr) stix_freemem (stix, stix->c->mth.tmprs.ptr); if (stix->c->mth.code.ptr) stix_freemem (stix, stix->c->mth.code.ptr); if (stix->c->mth.literals) stix_freemem (stix, stix->c->mth.literals); + if (stix->c->mth.balit) stix_freemem (stix, stix->c->mth.balit); + if (stix->c->mth.arlit) stix_freemem (stix, stix->c->mth.arlit); stix_freemem (stix, stix->c); stix->c = STIX_NULL; diff --git a/stix/lib/debug.c b/stix/lib/debug.c index 9d5415d..cca913e 100644 --- a/stix/lib/debug.c +++ b/stix/lib/debug.c @@ -124,8 +124,11 @@ void print_object (stix_t* stix, stix_oop_t oop) stix_size_t ucslen, bcslen; c = (stix_oop_class_t)STIX_CLASSOF(stix, oop); - if ((stix_oop_t)c == stix->_symbol || (stix_oop_t)c == stix->_string) + if (STIX_OBJ_GET_FLAGS_TYPE(oop) == STIX_OBJ_TYPE_CHAR) { + if ((stix_oop_t)c == stix->_symbol) printf ("#"); + else if ((stix_oop_t)c == stix->_string) printf ("'"); + for (i = 0; i < STIX_OBJ_GET_SIZE(oop); i++) { bcslen = STIX_COUNTOF(bcs); @@ -135,6 +138,26 @@ void print_object (stix_t* stix, stix_oop_t oop) printf ("%.*s", (int)bcslen, bcs); } } + if ((stix_oop_t)c == stix->_string) printf ("'"); + } + else if (STIX_OBJ_GET_FLAGS_TYPE(oop) == STIX_OBJ_TYPE_BYTE) + { + printf ("#["); + for (i = 0; i < STIX_OBJ_GET_SIZE(oop); i++) + { + printf (" %d", ((stix_oop_byte_t)oop)->slot[i]); + } + printf ("]"); + } + else if ((stix_oop_t)c == stix->_array) + { + printf ("#("); + for (i = 0; i < STIX_OBJ_GET_SIZE(oop); i++) + { + printf (" "); + print_object (stix, ((stix_oop_oop_t)oop)->slot[i]); + } + printf (")"); } else { @@ -146,7 +169,7 @@ void print_object (stix_t* stix, stix_oop_t oop) } } -void __dump_object (stix_t* stix, stix_oop_t oop, int depth) +static void __dump_object (stix_t* stix, stix_oop_t oop, int depth) { stix_oop_class_t c; stix_ucs_t s; diff --git a/stix/lib/exec.c b/stix/lib/exec.c index 7890fea..48973b2 100644 --- a/stix/lib/exec.c +++ b/stix/lib/exec.c @@ -742,13 +742,35 @@ static primitive_t primitives[] = { 1, primitive_integer_gt } }; + +#if (STIX_CODE_EXTEND_SIZE == 2) + +#define FETCH_UNSIGNED_CODE_TO(stix, v_code, v_ooi) \ + do { \ + v_ooi = (v_code)->slot[(stix)->ip++]; \ + v_ooi = (v_ooi << 8) | (v_code)->slot[stix->ip++]; \ + } while (0) + +#define FETCH_SIGNED_CODE_TO(stix, v_code, v_ooi) \ + do { \ + v_ooi = (v_code)->slot[(stix)->ip++]; \ + v_ooi = (stix_int16_t)((v_ooi << 8) | (v_code)->slot[stix->ip++]); \ + } while (0) + +#else /* STIX_CODE_EXTEND_SIZE == 2 */ + +#define FETCH_UNSIGNED_CODE_TO(stix, v_code, v_ooi) (v_ooi = (v_code)->slot[(stix)->ip++]) +#define FETCH_SIGNED_CODE_TO(stix, v_code, v_ooi) (v_ooi = (stix_int8_t)(v_code)->slot[(stix)->ip++]) + +#endif /* STIX_CODE_EXTEND_SIZE == 2 */ + int stix_execute (stix_t* stix) { stix_oop_method_t mth; stix_oop_byte_t code; stix_byte_t bc, cmd; - stix_ooi_t b1; + stix_ooi_t b1, b2; STIX_ASSERT (stix->active_context != STIX_NULL); @@ -761,30 +783,54 @@ int stix_execute (stix_t* stix) printf ("IP => %d ", (int)stix->ip); #endif bc = code->slot[stix->ip++]; - /*if (bc == CODE_NOOP) continue; TODO: DO I NEED THIS???*/ + while (bc == CODE_NOOP) + bc = code->slot[stix->ip++]; cmd = bc >> 4; if (cmd == CMD_EXTEND) { cmd = bc & 0xF; - if (cmd == CMD_JUMP || cmd == CMD_JUMP_IF_FALSE) - b1 = (stix_int8_t)code->slot[stix->ip++]; - else - b1 = code->slot[stix->ip++]; - } - else if (cmd == CMD_EXTEND_DOUBLE) - { - cmd = bc & 0xF; - b1 = code->slot[stix->ip++]; + switch (cmd) + { + case CMD_JUMP: + case CMD_JUMP_IF_FALSE: + FETCH_SIGNED_CODE_TO (stix, code, b1); + break; - if (cmd == CMD_JUMP || cmd == CMD_JUMP_IF_FALSE) - b1 = (stix_int16_t)((b1 << 8) | code->slot[stix->ip++]); /* JUMP encodes a signed offset */ - else - b1 = (b1 << 8) | code->slot[stix->ip++]; + case CMD_PUSH_OBJVAR: + case CMD_STORE_INTO_OBJVAR: + case CMD_SEND_MESSAGE: + case CMD_SEND_MESSAGE_TO_SUPER: + FETCH_UNSIGNED_CODE_TO (stix, code, b1); + FETCH_UNSIGNED_CODE_TO (stix, code, b2); + break; + + default: + FETCH_UNSIGNED_CODE_TO (stix, code, b1); + break; + } } else { - b1 = bc & 0xF; + switch (cmd) + { + case CMD_JUMP: + case CMD_JUMP_IF_FALSE: + b1 = bc & 0xF; /* the short jump offset is unsigned */ + break; + + case CMD_PUSH_OBJVAR: + case CMD_STORE_INTO_OBJVAR: + case CMD_SEND_MESSAGE: + case CMD_SEND_MESSAGE_TO_SUPER: + b1 = bc & 0xF; + b2 = code->slot[stix->ip++]; + break; + + default: + b1 = bc & 0xF; + break; + } } #if 0 @@ -800,14 +846,12 @@ printf ("PUSH_INSTVAR %d\n", (int)b1); break; case CMD_PUSH_TEMPVAR: -/* TODO: consider temp offset, block context, etc */ - printf ("PUSH_TEMPVAR idx=%d - ", (int)b1); if (stix->active_context->home != stix->_nil) { /*TODO: improve this slow temporary access */ - /* this code assuments that the method context and - * the block context places some key fields in the + /* this code assumes that the method context and + * the block context place some key fields in the * same offset. such fields include 'home', 'ntmprs' */ stix_oop_context_t ctx; stix_oop_t home; @@ -915,22 +959,15 @@ printf ("JUMP %d\n", (int)b1); case CMD_PUSH_OBJVAR: { -/* COMPACT CODE FOR CMD_PUSH_OBJVAR AND CMD_STORE_INTO_OBJVAR by sharing */ - /* b1 -> variable index */ - stix_ooi_t obj_index; + /* b1 -> variable index to the object indicated by b2. + * b2 -> object index stored in the literal frame. */ + stix_oop_oop_t obj; - - obj_index = code->slot[stix->ip++]; - if (cmd == CMD_EXTEND_DOUBLE) - { - obj_index = (obj_index << 8) | code->slot[stix->ip++]; - } - - obj = (stix_oop_oop_t)stix->active_context->origin->method->slot[obj_index]; -printf ("PUSH OBJVAR index=%d object_index_in_literal_frame=%d - ", (int)b1, (int)obj_index); - +printf ("PUSH OBJVAR index=%d object_index_in_literal_frame=%d - ", (int)b1, (int)b2); + obj = (stix_oop_oop_t)stix->active_context->origin->method->slot[b2]; STIX_ASSERT (STIX_OBJ_GET_FLAGS_TYPE(obj) == STIX_OBJ_TYPE_OOP); - STIX_ASSERT (obj_index < STIX_OBJ_GET_SIZE(obj)); + STIX_ASSERT (b1 < STIX_OBJ_GET_SIZE(obj)); + print_object (stix, obj->slot[b1]); printf ("\n"); ACTIVE_STACK_PUSH (stix, obj->slot[b1]); @@ -939,19 +976,15 @@ printf ("\n"); case CMD_STORE_INTO_OBJVAR: { - stix_ooi_t obj_index; stix_oop_oop_t obj; - obj_index = code->slot[stix->ip++]; - if (cmd == CMD_EXTEND_DOUBLE) - obj_index = (obj_index << 8) | code->slot[stix->ip++]; - -printf ("STORE OBJVAR index=%d object_index_in_literal_frame=%d - ", (int)b1, (int)obj_index); - obj = (stix_oop_oop_t)stix->active_context->origin->method->slot[obj_index]; +printf ("STORE OBJVAR index=%d object_index_in_literal_frame=%d - ", (int)b1, (int)b2); + obj = (stix_oop_oop_t)stix->active_context->origin->method->slot[b2]; STIX_ASSERT (STIX_OBJ_GET_FLAGS_TYPE(obj) == STIX_OBJ_TYPE_OOP); - STIX_ASSERT (obj_index < STIX_OBJ_GET_SIZE(obj)); - obj->slot[b1] = ACTIVE_STACK_GETTOP(stix); -print_object (stix, obj->slot[b1]); + STIX_ASSERT (b1 < STIX_OBJ_GET_SIZE(obj)); + +print_object (stix, ACTIVE_STACK_GETTOP(stix)); printf ("\n"); + obj->slot[b1] = ACTIVE_STACK_GETTOP(stix); break; } @@ -960,23 +993,21 @@ printf ("\n"); case CMD_SEND_MESSAGE_TO_SUPER: { /* TODO: tail call optimization */ + /* b1 -> number of arguments + * b2 -> index to the selector stored in the literal frame */ stix_ucs_t mthname; stix_oop_t newrcv; stix_oop_method_t newmth; stix_oop_char_t selector; - stix_ooi_t selector_index; stix_ooi_t preamble; /* the next byte is the message selector index to the * literal frame. */ - selector_index = code->slot[stix->ip++]; - if (cmd == CMD_EXTEND_DOUBLE) - selector_index = (selector_index << 8) | code->slot[stix->ip++]; /* get the selector from the literal frame */ - selector = (stix_oop_char_t)stix->active_context->origin->method->slot[selector_index]; + selector = (stix_oop_char_t)stix->active_context->origin->method->slot[b2]; if (cmd == CMD_SEND_MESSAGE) printf ("SEND_MESSAGE TO RECEIVER AT STACKPOS=%d NARGS=%d RECEIER=", (int)(stix->sp - b1), (int)b1); @@ -1149,11 +1180,10 @@ printf ("RETURN_RECEIVER\n"); case SUBCMD_SEND_BLOCK_COPY: { -printf ("SEND_BLOCK_COPY\n"); stix_ooi_t nargs, ntmprs; stix_oop_t rctx; stix_oop_block_context_t blkctx; - +printf ("SEND_BLOCK_COPY\n"); /* it emulates thisContext blockCopy: nargs ofTmprCount: ntmprs */ STIX_ASSERT (stix->sp >= 2); @@ -1182,8 +1212,13 @@ printf ("SEND_BLOCK_COPY\n"); rctx = ACTIVE_STACK_GETTOP(stix); /* blkctx->caller is left to nil */ - /*blkctx->iip = STIX_OOP_FROM_SMINT(stix->ip + 3); */ - blkctx->ip = STIX_OOP_FROM_SMINT(stix->ip + 3); /* TOOD: change +3 to the configured JUMP SIZE */ + /*blkctx->iip = STIX_OOP_FROM_SMINT(stix->ip + STIX_CODE_EXTEND_SIZE + 1); */ + + /* the extended jump instruction has the format of + * 0000XXXX KKKKKKKK or 0000XXXX KKKKKKKK KKKKKKKK + * depending on STIX_CODE_EXTEND_SIZE. change 'ip' to point to + * the instruction after the jump. */ + blkctx->ip = STIX_OOP_FROM_SMINT(stix->ip + STIX_CODE_EXTEND_SIZE + 1); blkctx->sp = STIX_OOP_FROM_SMINT(-1); /* the number of arguments for a block context is local to the block */ blkctx->nargs = STIX_OOP_FROM_SMINT(nargs); diff --git a/stix/lib/main.c b/stix/lib/main.c index 0892d7e..efb22fb 100644 --- a/stix/lib/main.c +++ b/stix/lib/main.c @@ -177,6 +177,7 @@ static char* syntax_error_msg[] = "string expected", "invalid radix", "invalid numeric literal", + "byte too small or too large", "{ expected", "} expected", "( expected", diff --git a/stix/lib/stix-prv.h b/stix/lib/stix-prv.h index 8aecf26..ff56c10 100644 --- a/stix/lib/stix-prv.h +++ b/stix/lib/stix-prv.h @@ -29,6 +29,9 @@ #include "stix.h" +/* you can define this to either 1 or 2 */ +#define STIX_CODE_EXTEND_SIZE 2 + /* this is useful for debugging. stix_gc() can be called * while stix has not been fully initialized when this is defined*/ #define STIX_SUPPORT_GC_DURING_IGNITION @@ -277,6 +280,7 @@ enum stix_synerrnum_t STIX_SYNERR_STRING, /* string expected */ STIX_SYNERR_RADIX, /* invalid radix */ STIX_SYNERR_RADNUMLIT, /* invalid numeric literal with radix */ + STIX_SYNERR_BYTERANGE, /* byte too small or too large */ STIX_SYNERR_LBRACE, /* { expected */ STIX_SYNERR_RBRACE, /* } expected */ STIX_SYNERR_LPAREN, /* ( expected */ @@ -360,6 +364,7 @@ struct stix_compiler_t /* the last token read */ stix_iotok_t tok; stix_iolink_t* io_names; + int in_array; stix_synerr_t synerr; @@ -433,6 +438,16 @@ struct stix_compiler_t stix_size_t literal_count; stix_size_t literal_capa; + /* byte array elements */ + stix_byte_t* balit; + stix_size_t balit_count; + stix_size_t balit_capa; + + /* array elements */ + stix_oop_t* arlit; + stix_size_t arlit_count; + stix_size_t arlit_capa; + /* primitive number */ stix_ooi_t prim_no; @@ -444,62 +459,72 @@ struct stix_compiler_t #endif +#if defined(STIX_CODE_EXTEND_SIZE) && (STIX_CODE_EXTEND_SIZE == 1) +# define MAX_CODE_INDEX (0xFFu) +# define MAX_CODE_NTMPRS (0xFFu) +# define MAX_CODE_NARGS (0xFFu) +# define MAX_CODE_NBLKARGS (0xFFu) +# define MAX_CODE_NBLKTMPRS (0xFFu) +# define MAX_CODE_PRIMNO (0xFFFFu) +# define MIN_CODE_JUMP (-0x80) +# define MAX_CODE_JUMP (0x7F) +#elif defined(STIX_CODE_EXTEND_SIZE) && (STIX_CODE_EXTEND_SIZE == 2) +# define MAX_CODE_INDEX (0xFFFFu) +# define MAX_CODE_NTMPRS (0xFFFFu) +# define MAX_CODE_NARGS (0xFFFFu) +# define MAX_CODE_NBLKARGS (0xFFFFu) +# define MAX_CODE_NBLKTMPRS (0xFFFFu) +# define MAX_CODE_PRIMNO (0xFFFFu) +# define MIN_CODE_JUMP (-0x8000) +# define MAX_CODE_JUMP (0x7FFF) +#else +# error Unsupported STIX_CODE_EXTEND_SIZE +#endif -#define MAKE_CODE(x,y) (((x) << 4) | y) -#define MAX_CODE_INDEX (0xFFFFu) -#define MAX_CODE_NTMPRS (0xFFFFu) -#define MAX_CODE_NARGS (0xFFFFu) -#define MAX_CODE_NBLKARGS (0xFFFFu) -#define MAX_CODE_NBLKTMPRS (0xFFFFu) -#define MAX_CODE_PRIMNO (0xFFFFu) - -#define MIN_CODE_JUMP (-0x8000) -#define MAX_CODE_JUMP (0x7FFF) #define MAX_CODE_BLKCODE MAX_CODE_JUMP +#define MAKE_CODE(x,y) (((x) << 4) | y) enum stix_cmdcode_t { CMD_EXTEND = 0x0, - CMD_EXTEND_DOUBLE = 0x1, /* Single positional instructions * * XXXXJJJJ * 0000XXXX JJJJJJJJ - * 0001XXXX JJJJJJJJ JJJJJJJJ + * 0000XXXX JJJJJJJJ JJJJJJJJ * * XXXX is one of the following positional instructions. * JJJJ or JJJJJJJJ is the position. */ - CMD_PUSH_INSTVAR = 0x2, - CMD_PUSH_TEMPVAR = 0x3, - CMD_PUSH_LITERAL = 0x4, - CMD_STORE_INTO_INSTVAR = 0x5, - CMD_STORE_INTO_TEMPVAR = 0x6, + CMD_PUSH_INSTVAR = 0x1, + CMD_PUSH_TEMPVAR = 0x2, + CMD_PUSH_LITERAL = 0x3, + CMD_STORE_INTO_INSTVAR = 0x4, + CMD_STORE_INTO_TEMPVAR = 0x5, + CMD_POP_INTO_INSTVAR = 0x6, + CMD_POP_INTO_TEMPVAR = 0x7, -/* - * CMD_POP_INTO_INSTVAR - * CMD_POP_INTO_TEMPVAR - */ - - CMD_JUMP = 0x7, - CMD_JUMP_IF_FALSE = 0x8, + /* Jump is a single positional instructions. + * JJJJJJJJ in the extended format is encoded as a signed offset + * while JJJJ in the compact format is an unsigned offset. */ + CMD_JUMP = 0x8, + CMD_JUMP_IF_FALSE = 0x9, /* * Double positional instructions * * XXXXJJJJ KKKKKKKK * 0000XXXX JJJJJJJJ KKKKKKKK - * 0001XXXX JJJJJJJJ JJJJJJJJ KKKKKKKK KKKKKKKK + * 0000XXXX JJJJJJJJ JJJJJJJJ KKKKKKKK KKKKKKKK * * Access instance variable #JJJJ of an object at literal frame #KKKKKKKK * Send message at literal frame #KKKKKKKK with #JJJJ arguments. */ - CMD_PUSH_OBJVAR = 0x9, - CMD_STORE_INTO_OBJVAR = 0xA, - - CMD_SEND_MESSAGE = 0xB, - CMD_SEND_MESSAGE_TO_SUPER = 0xC, + CMD_PUSH_OBJVAR = 0xA, + CMD_STORE_INTO_OBJVAR = 0xB, + CMD_SEND_MESSAGE = 0xC, + CMD_SEND_MESSAGE_TO_SUPER = 0xD, /* * Single byte instructions diff --git a/stix/lib/stix.h b/stix/lib/stix.h index 387cef5..4c095f9 100644 --- a/stix/lib/stix.h +++ b/stix/lib/stix.h @@ -278,6 +278,7 @@ enum stix_errnum_t STIX_EINTERN, /**< internal error */ STIX_ENOMEM, /**< insufficient memory */ STIX_EINVAL, /**< invalid parameter or data */ + STIX_ERANGE, /**< range error. overflow and underflow */ STIX_ENOENT, /**< no matching entry */ STIX_EIOERR, /**< I/O error */ STIX_EECERR, /**< encoding conversion error */