From 42009d3ccecbefe4669665f12cb5486b6f467aa0 Mon Sep 17 00:00:00 2001 From: hyung-hwan Date: Mon, 7 Oct 2024 21:46:41 +0900 Subject: [PATCH] enhanced compile_class_attr_list() with data table and binary search --- lib/comp.c | 158 +++++++++++++++++++++++++++-------------------- lib/decode.c | 8 +-- lib/exec.c | 12 ++-- lib/gc.c | 2 +- lib/hcl-prv.h | 4 ++ src/kernel.hcl | 10 +-- t/class-5001.err | 24 +++++++ t/var-5004.err | 2 +- 8 files changed, 137 insertions(+), 83 deletions(-) diff --git a/lib/comp.c b/lib/comp.c index c782f90..0913a61 100644 --- a/lib/comp.c +++ b/lib/comp.c @@ -2469,29 +2469,43 @@ static int check_class_attr_list (hcl_t* hcl, hcl_cnode_t* attr_list, unsigned i static struct { const hcl_bch_t* name; - unsigned int flag; - } flag_tab[] = { - { "v", HCL_CLASS_SPEC_FLAG_INDEXED }, - { "var", HCL_CLASS_SPEC_FLAG_INDEXED }, - { "varying", HCL_CLASS_SPEC_FLAG_INDEXED }, - { "immutable", HCL_CLASS_SPEC_FLAG_IMMUTABLE }, - { "uncopyable", HCL_CLASS_SPEC_FLAG_UNCOPYABLE }, - }; + int shifts; + unsigned int mask; + unsigned int value; + } attr_tab[] = { + /* the value with 0xFF in the mask field takes up the whole byte + * the value with 0x00 in the mask field is a bit value. + * + * shifts: 0 for object type, 8 for selfspec bit, 12 for spec bit + * mask: 0xFF for object type, 0x00 for spec/selfspec bit. + * + * keep the table sorted in alphabestical order ascending for + * binary search */ + + { "b", 0, 0xFF, HCL_OBJ_TYPE_BYTE }, + { "byte", 0, 0xFF, HCL_OBJ_TYPE_BYTE }, + { "c", 0, 0xFF, HCL_OBJ_TYPE_CHAR }, + { "char", 0, 0xFF, HCL_OBJ_TYPE_CHAR }, + { "character", 0, 0xFF, HCL_OBJ_TYPE_CHAR }, + + { "final", 8, 0x00, HCL_CLASS_SELFSPEC_FLAG_FINAL }, + + { "halfword", 0, 0xFF, HCL_OBJ_TYPE_HALFWORD }, + { "hw", 0, 0xFF, HCL_OBJ_TYPE_HALFWORD }, + + { "immutable", 12, 0x00, HCL_CLASS_SPEC_FLAG_IMMUTABLE }, + + { "limited", 8, 0x00, HCL_CLASS_SELFSPEC_FLAG_LIMITED }, + + { "uncopyable", 12, 0x00, HCL_CLASS_SPEC_FLAG_UNCOPYABLE }, + + { "v", 12, 0x00, HCL_CLASS_SPEC_FLAG_INDEXED }, + { "var", 12, 0x00, HCL_CLASS_SPEC_FLAG_INDEXED }, + { "varying", 12, 0x00, HCL_CLASS_SPEC_FLAG_INDEXED }, + + { "w", 0, 0xFF, HCL_OBJ_TYPE_WORD }, + { "word", 0, 0xFF, HCL_OBJ_TYPE_WORD } - static struct - { - const hcl_bch_t* name; - hcl_obj_type_t indexed_type; - } type_tab[] = { - { "b", HCL_OBJ_TYPE_BYTE }, - { "byte", HCL_OBJ_TYPE_BYTE }, - { "c", HCL_OBJ_TYPE_CHAR }, - { "char", HCL_OBJ_TYPE_CHAR }, - { "character", HCL_OBJ_TYPE_CHAR }, - { "halfword", HCL_OBJ_TYPE_HALFWORD }, - { "hw", HCL_OBJ_TYPE_HALFWORD }, - { "w", HCL_OBJ_TYPE_WORD }, - { "word", HCL_OBJ_TYPE_WORD } /* TODO: uint32 uint16 .. etc */ }; hcl_obj_type_t ct; @@ -2517,7 +2531,7 @@ static int check_class_attr_list (hcl_t* hcl, hcl_cnode_t* attr_list, unsigned i { hcl_setsynerrbfmt ( hcl, HCL_SYNERR_FUN, HCL_CNODE_GET_LOC(attr_list), HCL_NULL, - "empty attribute list on unamed class for '%.*js'", + "empty attribute list on unnamed class for '%.*js'", HCL_CNODE_GET_TOKLEN(cmd), HCL_CNODE_GET_TOKPTR(cmd)); } return -1; @@ -2525,69 +2539,78 @@ static int check_class_attr_list (hcl_t* hcl, hcl_cnode_t* attr_list, unsigned i if (HCL_CNODE_IS_CONS_CONCODED(attr_list, HCL_CONCODE_XLIST)) { - hcl_cnode_t* c, * a; + hcl_cnode_t* c; const hcl_ooch_t* tokptr; - hcl_oow_t toklen, i; + hcl_oow_t toklen; c = attr_list; while (c) { - a = HCL_CNODE_CONS_CAR(c); + /* [NOTE] this algorithm is underflow safe with hcl_oow_t types */ + hcl_oow_t base, lim; + hcl_cnode_t* attr; - tokptr = HCL_CNODE_GET_TOKPTR(a); - toklen = HCL_CNODE_GET_TOKLEN(a); + attr = HCL_CNODE_CONS_CAR(c); - if (!HCL_CNODE_IS_TYPED(a, HCL_CNODE_SYMLIT)) + tokptr = HCL_CNODE_GET_TOKPTR(attr); + toklen = HCL_CNODE_GET_TOKLEN(attr); + + if (!HCL_CNODE_IS_TYPED(attr, HCL_CNODE_SYMLIT)) { hcl_setsynerrbfmt ( - hcl, HCL_SYNERR_FUN, HCL_CNODE_GET_LOC(a), HCL_NULL, + hcl, HCL_SYNERR_FUN, HCL_CNODE_GET_LOC(attr), HCL_NULL, "invalid class attribute name '%.*js'", toklen, tokptr); return -1; } /* - * upper 4 bits: object flags - only 4 bit flags are possible - * lower 4 bits: object type for indexing/variablilty - 16 different combinations are possible - * these are kept as compact as possbile to make use a single byte in encoding this information - * for the bytecode instruction CLASS_ENTER + * 4 bits for spec flags (bit 12 .. 15) + * 4 bits for selfspec flags (bit 8 .. 11) + * 8 bits for object type for indexing/variablility (bit 0 .. 7) */ - for (i = 0; i < HCL_COUNTOF(flag_tab); i++) + + for (base = 0, lim = HCL_COUNTOF(attr_tab); lim > 0; lim >>= 1) /* binary search */ { - if (hcl_comp_oochars_bcstr(tokptr, toklen, flag_tab[i].name) == 0) + hcl_oow_t i; + int n; + + i = base + (lim >> 1); /* mid-point */ + n = hcl_comp_oochars_bcstr(tokptr, toklen, attr_tab[i].name); + if (n == 0) { - if ((ct >> 4) & flag_tab[i].flag) + /* this is to derive the real mask: (attr_tab[i].mask | attr_tab[i].value). + * roughly speaking, it's similary to + * real_mask = attr_tab[i].mask == 0? attr_tab[i].value: attr_tab[i].mask; + * + * To flag out duplicate or conflicting attribute, we check if + * - the same bit is already set for a bit-based item (mask field 0x00). + * - a value is non-zero for a byte-based item (mask field 0xFF) + */ + if (!!((ct >> attr_tab[i].shifts) & (attr_tab[i].mask | attr_tab[i].value))) { - conflict: hcl_setsynerrbfmt ( - hcl, HCL_SYNERR_FUN, HCL_CNODE_GET_LOC(a), HCL_NULL, - "conflicting or duplicate class attribute name '%.*js'", toklen, tokptr); + hcl, HCL_SYNERR_FUN, HCL_CNODE_GET_LOC(attr), HCL_NULL, + "conflicting or duplicate class attribute name '#%.*js'", toklen, tokptr); return -1; } - ct |= (flag_tab[i].flag << 4); - break; + + ct &= ~attr_tab[i].mask; + ct |= (attr_tab[i].value << attr_tab[i].shifts); + goto found; } + + if (n > 0) { base = i + 1; lim--; } } - if (i >= HCL_COUNTOF(flag_tab)) + + if (lim <= 0) { - for (i = 0; i < HCL_COUNTOF(type_tab); i++) - { - if (hcl_comp_oochars_bcstr(tokptr, toklen, type_tab[i].name) == 0) - { - if (ct & 0x0F) goto conflict; - ct = type_tab[i].indexed_type; - break; - } - } - - if (i >= HCL_COUNTOF(type_tab)) - { - hcl_setsynerrbfmt ( - hcl, HCL_SYNERR_FUN, HCL_CNODE_GET_LOC(a), HCL_NULL, - "unrecognized class attribute name '%.*js'", toklen, tokptr); - return -1; - } + hcl_setsynerrbfmt ( + hcl, HCL_SYNERR_FUN, HCL_CNODE_GET_LOC(attr), HCL_NULL, + "unrecognized class attribute name '%.*js'", toklen, tokptr); + return -1; } + found: c = HCL_CNODE_CONS_CDR(c); } } @@ -2878,10 +2901,11 @@ static HCL_INLINE int compile_class_p1 (hcl_t* hcl) /* class_enter nsuperclasses, nivars, ncvars */ if (emit_byte_instruction(hcl, HCL_CODE_CLASS_ENTER, &cf->u._class.start_loc) <= -1) goto oops; - if (emit_byte_instruction(hcl, (hcl_oob_t)cf->u._class.indexed_type, &cf->u._class.start_loc) <= -1) goto oops; if (emit_long_param(hcl, cf->u._class.nsuperclasses) <= -1) goto oops; if (emit_long_param(hcl, vardcl.nivars) <= -1) goto oops; if (emit_long_param(hcl, vardcl.ncvars) <= -1) goto oops; + if (emit_byte_instruction(hcl, (hcl_oob_t)((cf->u._class.indexed_type >> 8) & 0xFF), &cf->u._class.start_loc) <= -1) goto oops; + if (emit_byte_instruction(hcl, (hcl_oob_t)(cf->u._class.indexed_type & 0xFF), &cf->u._class.start_loc) <= -1) goto oops; /* remember the first byte code position to be emitted for the body of * this class. this posistion is used for empty class body check at the @@ -2989,7 +3013,7 @@ static int check_fun_attr_list (hcl_t* hcl, hcl_cnode_t* attr_list, unsigned int { hcl_setsynerrbfmt ( hcl, HCL_SYNERR_FUN, HCL_CNODE_GET_LOC(attr_list), HCL_NULL, - "empty attribute list on unamed function for '%.*js'", + "empty attribute list on unnamed function for '%.*js'", HCL_CNODE_GET_TOKLEN(cmd), HCL_CNODE_GET_TOKPTR(cmd)); } return -1; @@ -3025,7 +3049,7 @@ static int check_fun_attr_list (hcl_t* hcl, hcl_cnode_t* attr_list, unsigned int conflicting: hcl_setsynerrbfmt ( hcl, HCL_SYNERR_FUN, HCL_CNODE_GET_LOC(a), HCL_NULL, - "conflicting function attribute name '%.*js'", toklen, tokptr); + "conflicting function attribute name '#%.*js'", toklen, tokptr); return -1; } ft = FUN_CM; @@ -3091,7 +3115,7 @@ static int compile_fun (hcl_t* hcl, hcl_cnode_t* src) /* fun (arg..) * fun name(arg..) * fun(#attr..) name(arg..) ## valid as class method, not valid as plain function - * fun(#attr..) (arg..) ## not valid. not attribute list for unamed functions + * fun(#attr..) (arg..) ## not valid. not attribute list for unnamed functions * fun(#attr..) class:name(arg..) */ @@ -3205,7 +3229,7 @@ static int compile_fun (hcl_t* hcl, hcl_cnode_t* src) * another hack is to disallow ELIST as attribute list? */ hcl_setsynerrbfmt ( hcl, HCL_SYNERR_FUN, HCL_CNODE_GET_LOC(cmd), HCL_NULL, - "unamed function not followed by function body for '%.*js'", + "unnamed function not followed by function body for '%.*js'", HCL_CNODE_GET_TOKLEN(cmd), HCL_CNODE_GET_TOKPTR(cmd)); return -1; } @@ -3230,7 +3254,7 @@ static int compile_fun (hcl_t* hcl, hcl_cnode_t* src) { hcl_setsynerrbfmt ( hcl, HCL_SYNERR_FUN, HCL_CNODE_GET_LOC(cmd), HCL_NULL, - "no function body after attribute list and argument list of unamed function for '%.*js'", + "no function body after attribute list and argument list of unnamed function for '%.*js'", HCL_CNODE_GET_TOKLEN(cmd), HCL_CNODE_GET_TOKPTR(cmd)); return -1; } @@ -3320,7 +3344,7 @@ static int compile_fun (hcl_t* hcl, hcl_cnode_t* src) { hcl_setsynerrbfmt ( hcl, HCL_SYNERR_FUN, HCL_CNODE_GET_LOC(attr_list), HCL_NULL, - "attribute list prohibited on unamed function for '%.*js'", + "attribute list prohibited on unnamed function for '%.*js'", HCL_CNODE_GET_TOKLEN(cmd), HCL_CNODE_GET_TOKPTR(cmd)); } return -1; diff --git a/lib/decode.c b/lib/decode.c index e137689..ae22fec 100644 --- a/lib/decode.c +++ b/lib/decode.c @@ -379,13 +379,13 @@ int hcl_decode (hcl_t* hcl, const hcl_code_t* code, hcl_oow_t start, hcl_oow_t e case HCL_CODE_CLASS_ENTER: { - hcl_oow_t b0, b3; - - FETCH_BYTE_CODE_TO (hcl, b0); + hcl_oow_t b3, b4, b5; FETCH_PARAM_CODE_TO (hcl, b1); FETCH_PARAM_CODE_TO (hcl, b2); FETCH_PARAM_CODE_TO (hcl, b3); - LOG_INST_4 (hcl, "class_enter %zu %zu %zu %zu", b0, b1, b2, b3); + FETCH_BYTE_CODE_TO (hcl, b4); /* spec/selfspec */ + FETCH_BYTE_CODE_TO (hcl, b5); /* indexed_type */ + LOG_INST_5 (hcl, "class_enter %zu %zu %zu %#zx %zu", b1, b2, b3, b4, b5); break; } diff --git a/lib/exec.c b/lib/exec.c index 3dd71ad..ba0b13a 100644 --- a/lib/exec.c +++ b/lib/exec.c @@ -4037,14 +4037,15 @@ static int execute (hcl_t* hcl) hcl_oop_t superclass, ivars_str, cvars_str, class_name, v; hcl_ooi_t expected_spec, expected_selfspec; hcl_oop_class_t class_obj; - hcl_oow_t b0, b3; + hcl_oow_t b3, b4, b5; - FETCH_BYTE_CODE_TO (hcl, b0); /* indexed_type */ FETCH_PARAM_CODE_TO (hcl, b1); /* nsuperclasses */ FETCH_PARAM_CODE_TO (hcl, b2); /* nivars */ FETCH_PARAM_CODE_TO (hcl, b3); /* ncvars */ + FETCH_BYTE_CODE_TO (hcl, b4); /* spec/selfspec */ + FETCH_BYTE_CODE_TO (hcl, b5); /* indexed_type */ - LOG_INST_4 (hcl, "class_enter %zu %zu %zu %zu", b0, b1, b2, b3); + LOG_INST_5 (hcl, "class_enter %zu %zu %zu %#zx %zu", b1, b2, b3, b4, b5); if (b3 > 0) { @@ -4072,8 +4073,8 @@ static int execute (hcl_t* hcl) } else superclass = hcl->_nil; - expected_spec = HCL_CLASS_SPEC_MAKE(b2, (b0 >> 4), b0 & 0x0F); - expected_selfspec = HCL_CLASS_SELFSPEC_MAKE(b3, 0, 0); + expected_spec = HCL_CLASS_SPEC_MAKE(b2, ((b4 >> 4) & 0x0F), (b5 & 0xFF)); + expected_selfspec = HCL_CLASS_SELFSPEC_MAKE(b3, 0, (b4 & 0x0F)); HCL_STACK_POP_TO(hcl, v); if (HCL_IS_CONS(hcl, v)) @@ -4102,6 +4103,7 @@ static int execute (hcl_t* hcl) hcl_logbfmt (hcl, HCL_LOG_STDERR, ">>>%O c->sc=%O sc=%O b2=%d b3=%d nivars=%d ncvars=%d<<<\n", class_obj, class_obj->superclass, superclass, b2, b3, (int)HCL_CLASS_SPEC_NAMED_INSTVARS(spec), (int)HCL_CLASS_SELFSPEC_CLASSVARS(spec)); #endif +hcl_logbfmt (hcl, HCL_LOG_STDERR, " spec %d %d | selfspec %d %d\n", expected_spec, spec, expected_selfspec, selfspec); if (class_obj->superclass != superclass || expected_spec != spec || expected_selfspec != selfspec || diff --git a/lib/gc.c b/lib/gc.c index 666d8cf..102a81b 100644 --- a/lib/gc.c +++ b/lib/gc.c @@ -166,7 +166,7 @@ static kernel_class_info_t kernel_classes[__KCI_MAX__] = "Class", KCI_APEX, HCL_BRAND_CLASS, - HCL_CLASS_SELFSPEC_FLAG_LIMITED, + HCL_CLASS_SELFSPEC_FLAG_FINAL | HCL_CLASS_SELFSPEC_FLAG_LIMITED, 0, /* ncvars */ HCL_CLASS_NAMED_INSTVARS, /* nivars */ HCL_CLASS_SPEC_FLAG_INDEXED | HCL_CLASS_SPEC_FLAG_UNCOPYABLE, diff --git a/lib/hcl-prv.h b/lib/hcl-prv.h index fdbf345..1984cfe 100644 --- a/lib/hcl-prv.h +++ b/lib/hcl-prv.h @@ -210,6 +210,8 @@ #define HCL_CLASS_SPEC_INDEXED_TYPE(spec) \ ((((hcl_oow_t)(spec)) >> HCL_CLASS_SPEC_FLAG_BITS) & HCL_LBMASK(hcl_oow_t, HCL_OBJ_FLAGS_TYPE_BITS)) +/* If you add more than 4 items, you must update code related to CLASS_ENTER instruction + * and class attributes as well as HCL_CLASS_SPEC_FLAG_BITS. */ #define HCL_CLASS_SPEC_FLAG_INDEXED (1 << 0) #define HCL_CLASS_SPEC_FLAG_IMMUTABLE (1 << 1) #define HCL_CLASS_SPEC_FLAG_UNCOPYABLE (1 << 2) @@ -260,6 +262,8 @@ #define HCL_CLASS_SELFSPEC_FLAGS(spec) \ (((hcl_oow_t)spec) & HCL_LBMASK(hcl_oow_t, HCL_CLASS_SELFSPEC_FLAG_BITS)) +/* If you add more than 4 items, you must update code related to CLASS_ENTER instruction + * and class attributes as well as HCL_CLASS_SELFSPEC_FLAG_BITS. */ #define HCL_CLASS_SELFSPEC_FLAG_FINAL (1 << 0) #define HCL_CLASS_SELFSPEC_FLAG_LIMITED (1 << 1) /* not allowed to instantiate normally */ diff --git a/src/kernel.hcl b/src/kernel.hcl index fe57565..bd620cd 100644 --- a/src/kernel.hcl +++ b/src/kernel.hcl @@ -35,7 +35,7 @@ class Apex { class Object: Apex { } -class(#uncopyable #varying #limited) Class: Apex [ +class(#uncopyable #varying #limited #final) Class: Apex [ _name _mdic _spec @@ -89,9 +89,9 @@ class FixedSizedCollection: IndexedCollection { if (self:respondsTo "initValue") { ## TODO: change "initValue" to a symbol once supported i := 0 iv := (self:initValue) - while (i < size) { + while (< i size) { ## TODO: change to i < size after having implemented these methods on integer/smallintger core.basicAtPut obj i iv - i := (i + 1) + i := (+ i 1) ## TODO: change to i + 1 ## TODO: change to i < size after having implemented these methods on integer/smallintger } } return obj @@ -102,10 +102,10 @@ class FixedSizedCollection: IndexedCollection { ##} } -class Array: FixedSizedCollection { +class(#varying) Array: FixedSizedCollection { } -class String: FixedSizedCollection { +class(#char #varying) String: FixedSizedCollection { fun(#class) initValue() { ##return '\0' return ' ' diff --git a/t/class-5001.err b/t/class-5001.err index 00577bf..cccd261 100644 --- a/t/class-5001.err +++ b/t/class-5001.err @@ -248,3 +248,27 @@ class X11 { ##ERROR: exception not handled - "prohibited redefintion of X11" class String { ##ERROR: exception not handled - "incompatible redefintion of String" } + +--- + +class() { ##ERROR: syntax error - empty attribute list on unnamed class for 'class' +} + +--- + +class() Kuduro { ##ERROR: syntax error - empty attribute list on 'Kuduro' for 'class' +} + +--- + +class(#byte #limited #char) Kuduro { ##ERROR: syntax error - conflicting or duplicate class attribute name '#char' +} + +--- +class(#byte #limited #final #limited) Kuduro { ##ERROR: syntax error - conflicting or duplicate class attribute name '#limited' +} + + +--- +class(#byte #bytes) Kuduro { ##ERROR: syntax error - unrecognized class attribute name 'bytes' +} diff --git a/t/var-5004.err b/t/var-5004.err index b7893c1..370b6b9 100644 --- a/t/var-5004.err +++ b/t/var-5004.err @@ -84,7 +84,7 @@ fun(#ci) fun1() { ##ERROR: syntax error - attribute list prohibited on plain fun --- -fun() () { ##ERROR: syntax error - attribute list prohibited on unamed function for 'fun' +fun() () { ##ERROR: syntax error - attribute list prohibited on unnamed function for 'fun' } ---