From 364fe63ac3c072c2f495e881d7f17a0f7e807a2a Mon Sep 17 00:00:00 2001 From: "hyunghwan.chung" Date: Wed, 20 May 2015 14:27:47 +0000 Subject: [PATCH] added #include handling --- stix/lib/comp.c | 280 ++++++++++++++++++++++++-------------------- stix/lib/main.c | 5 +- stix/lib/memo.txt | 2 +- stix/lib/stix-prv.h | 13 +- 4 files changed, 163 insertions(+), 137 deletions(-) diff --git a/stix/lib/comp.c b/stix/lib/comp.c index ee9860b..b57699c 100644 --- a/stix/lib/comp.c +++ b/stix/lib/comp.c @@ -124,7 +124,7 @@ static STIX_INLINE int is_alnumchar (stix_uci_t c) static STIX_INLINE int is_binselchar (stix_uci_t c) { /* - * binarySelectorCharacter ::= + * binary-selector-character := * '!' | '%' | '&' | '*' | '+' | ',' | * '/' | '<' | '>' | '=' | '?' | '@' | * '\' | '~' | '|' | '-' @@ -353,8 +353,8 @@ static int skip_comment (stix_t* stix) static int get_ident (stix_t* stix) { /* - * identifier ::= letter (letter | digit)* - * keyword ::= identifier ':' + * identifier := alpha-char (alpha-char | digit-char)* + * keyword := identifier ":" */ stix_uci_t c = stix->c->lxc.c; @@ -381,27 +381,27 @@ static int get_ident (stix_t* stix) static int get_numlit (stix_t* stix, int negated) { /* - * ::= ['-'] - * ::= integer | float | scaledDecimal - * integer ::= decimalInteger | radixInteger - * decimalInteger ::= digits - * digits ::= digit+ - * radixInteger ::= radixSpecifier 'r' radixDigits - * radixSpecifier := digits - * radixDigits ::= (digit | uppercaseAlphabetic)+ - * float ::= mantissa [exponentLetter exponent] - * mantissa ::= digits'.' digits - * exponent ::= ['-']decimalInteger - * exponentLetter ::= 'e' | 'd' | 'q' - * scaledDecimal ::= scaledMantissa 's' [fractionalDigits] - * scaledMantissa ::= decimalInteger | mantissa - * fractionalDigits ::= decimalInteger + * number-literal := number | ("-" number) + * number := integer | float | scaledDecimal + * integer := decimal-integer | radix-integer + * decimal-integer := digit-char+ + * radix-integer := radix-specifier "r" radix-digit+ + * radix-specifier := digit-char+ + * radix-digit := digit-char | upper-alpha-char + * + * float := mantissa [exponentLetter exponent] + * mantissa := digit-char+ "." digit-char+ + * exponent := ['-'] decimal-integer + * exponentLetter := 'e' | 'd' | 'q' + * scaledDecimal := scaledMantissa 's' [fractionalDigits] + * scaledMantissa := decimal-integer | mantissa + * fractionalDigits := decimal-integer */ stix_uci_t c = stix->c->lxc.c; stix->c->tok.type = STIX_IOTOK_NUMLIT; -/*TODO: support complext numeric literals */ +/*TODO: support a complex numeric literal */ do { ADD_TOKEN_CHAR(stix, c); @@ -417,8 +417,8 @@ static int get_numlit (stix_t* stix, int negated) static int get_charlit (stix_t* stix) { /* - * character_literal ::= '$' character - * character ::= "Any character in the implementation-defined character set" + * character-literal := "$" character + * character := normal-character | "'" */ stix_uci_t c = stix->c->lxc.c; /* even a new-line or white space would be taken */ @@ -437,9 +437,10 @@ static int get_charlit (stix_t* stix) static int get_strlit (stix_t* stix) { /* - * string_literal ::= stringDelimiter stringBody stringDelimiter - * stringBody ::= (nonStringDelimiter | (stringDelimiter stringDelimiter)*) - * stringDelimiter ::= ''' "a single quote" + * string-literal := single-quote string-character* single-quote + * string-character := normal-character | (single-quote single-quote) + * single-quote := "'" + * normal-character := character-except-single-quote */ /* TODO: C-like string */ @@ -452,20 +453,18 @@ static int get_strlit (stix_t* stix) do { ADD_TOKEN_CHAR (stix, c); - GET_CHAR (stix); - c = stix->c->lxc.c; + GET_CHAR_TO (stix, c); if (c == STIX_UCI_EOF) { /* string not closed */ - set_syntax_error (stix, STIX_SYNERR_STRNC, &stix->c->lxc.l, STIX_NULL); + set_syntax_error (stix, STIX_SYNERR_STRNC, &stix->c->tok.loc /*&stix->c->lxc.l*/, STIX_NULL); return -1; } } while (c != '\''); - GET_CHAR (stix); - c = stix->c->lxc.c; + GET_CHAR_TO (stix, c); } while (c == '\''); @@ -475,7 +474,7 @@ static int get_strlit (stix_t* stix) static int get_binsel (stix_t* stix) { /* - * binarySelector ::= binarySelectorCharacter+ + * binary-selector := binary-selector-character+ */ stix_uci_t oc; @@ -597,7 +596,9 @@ retry: goto single_char_token; case '#': - /*ADD_TOKEN_CHAR(stix, c);*/ + /* + * The hash sign is not the part of the token name. + * ADD_TOKEN_CHAR(stix, c); */ GET_CHAR_TO (stix, c); switch (c) { @@ -612,21 +613,26 @@ retry: GET_CHAR (stix); break; + case '[': + /* #[ - byte array literal */ + ADD_TOKEN_CHAR(stix, c); + stix->c->tok.type = STIX_IOTOK_BPAREN; + GET_CHAR (stix); + break; + case '\'': - /* #' - quoted symbol literal */ + /* quoted symbol literal */ GET_CHAR (stix); if (get_strlit(stix) <= -1) return -1; stix->c->tok.type = STIX_IOTOK_SYMLIT; break; - case '[': - /* #[ - byte array literal */ - /* TODO */ - break; - default: - /* unquoted symbol literal */ + /* symbol-literal := "#" symbol-body + * symbol-body := identifier | keyword+ | binary-selector | string-literal + */ + /* unquoted symbol literal */ if (is_binselchar(c)) { do @@ -705,7 +711,6 @@ retry: break; } -/*wprintf (L"TOK: %S\n", stix->c->tok.name.ptr);*/ return 0; } @@ -745,7 +750,7 @@ static int begin_include (stix_t* stix) const stix_uch_t* io_name; io_name = add_io_name (stix, &stix->c->tok.name); - if (!io_name) goto oops; + if (!io_name) return -1; arg = (stix_ioarg_t*) stix_callocmem (stix, STIX_SIZEOF(*arg)); if (!arg) goto oops; @@ -774,9 +779,6 @@ static int begin_include (stix_t* stix) return 0; oops: - /* i don't need to free 'link' since it's linked to - * stix->c->io_names that's freed at the beginning of stix_read() - * or by stix_fini() */ if (arg) stix_freemem (stix, arg); return -1; } @@ -817,11 +819,56 @@ static int end_include (stix_t* stix) } +static struct ksym_t +{ + stix_oow_t len; + stix_uch_t str[10]; +} ksyms[] = { + { 4, { 'b','y','t','e' } }, + { 5, { 'c','l','a','s','s' } }, + { 9, { 'c','l','a','s','s','i','n','s','t' } }, + { 3, { 'd','c','l' } }, + { 7, { 'd','e','c','l','a','r','e' } }, + { 3, { 'f','u','n' } }, + { 8, { 'f','u','n','c','t','i','o','n' } }, + { 7, { 'i','n','c','l','u','d','e' } }, + { 8, { 'i','n','s','t','a','n','c','e' } }, + { 4, { 'm','a','i','n' } }, + { 7, { 'p','o','i','n','t','e','r' } }, + { 4, { 'w','o','r','d' } } +}; + +enum ksym_id_t +{ + KSYM_BYTE, + KSYM_CLASS, + KSYM_CLASSINST, + KSYM_DCL, + KSYM_DECLARE, + KSYM_FUN, + KSYM_FUNCTION, + KSYM_INCLUDE, + KSYM_INSTANCE, + KSYM_MAIN, + KSYM_POINTER, + KSYM_WORD +}; +typedef enum ksym_id_t ksym_id_t; + +static int is_token_ksym (stix_t* stix, ksym_id_t id) +{ + return stix->c->tok.type == STIX_IOTOK_SYMLIT && + stix->c->tok.name.len == ksyms[id].len && + stix_equalchars(stix->c->tok.name.ptr, ksyms[id].str, ksyms[id].len); +} + #if 0 /* --------------------------------------------------------------------- * Parser and Code Generator * --------------------------------------------------------------------- */ + + static STIX_INLINE int is_tok_pseudovar (stix_t* fsc) { return fsc->tok.type == STIX_IOTOK_IDENT && @@ -839,6 +886,7 @@ static STIX_INLINE int is_tok_binsel (stix_t* fsc, const stix_uch_t* sel) stix_strequal (fsc->tok.name.ptr, sel); } + #if 0 #define EMIT_CODE_TEST(fsc,high,low) \ @@ -1193,10 +1241,10 @@ static int finish_method (stix_t* fsc) static int parse_statements (stix_t* fsc) { /* - * ::= (ORIGINAL->maybe wrong) + * := (ORIGINAL->maybe wrong) * ( ['.'] ) | * ( ['.' []]) - * ::= (REVISED->correct?) + * := (REVISED->correct?) * ['. []] */ @@ -1237,9 +1285,9 @@ static int parse_block_statements (stix_t* fsc) static int parse_statement (stix_t* fsc) { /* - * ::= | - * ::= returnOperator - * returnOperator ::= '^' + * := | + * := returnOperator + * returnOperator := '^' */ if (fsc->tok.type == STIX_IOTOK_RETURN) { @@ -1257,11 +1305,11 @@ static int parse_statement (stix_t* fsc) static int parse_expression (stix_t* fsc) { /* - * ::= | - * ::= assignmentOperator - * ::= [ ] - * ::= identifier - * assignmentOperator ::= ':=' + * := | + * := assignmentOperator + * := [ ] + * := identifier + * assignmentOperator := ':=' */ stix_vm_t* stx = fsc->stx; @@ -1301,7 +1349,7 @@ static int parse_basic_expression ( stix_t* fsc, const stix_uch_t* ident) { /* - * ::= [ ] + * := [ ] */ int is_super; @@ -1318,7 +1366,7 @@ static int parse_assignment ( stix_t* fsc, const stix_uch_t* target) { /* - * ::= assignmentOperator + * := assignmentOperator */ stix_word_t i; @@ -1363,7 +1411,7 @@ static int parse_primary ( stix_t* fsc, const stix_uch_t* ident, int* is_super) { /* - * ::= + * := * identifier | | * | ( '('')' ) */ @@ -1495,7 +1543,7 @@ static int parse_primary_ident ( { EMIT_PUSH_RECEIVER_VARIABLE (fsc, i); return 0; - } + } /* TODO: what is the best way to look up a class variable? */ /* 1. Use the class containing it and using its position */ @@ -1520,10 +1568,10 @@ static int parse_primary_ident ( static int parse_block_constructor (stix_t* fsc) { /* - * ::= '[' ']' - * ::= [* '|'] + * := '[' ']' + * := [* '|'] * [] [] - * ::= ':' identifier + * := ':' identifier */ if (fsc->tok.type == STIX_IOTOK_COLON) @@ -1573,11 +1621,11 @@ static int parse_message_continuation ( stix_t* fsc, int is_super) { /* - * ::= + * := * (+ * [] ) | * (+ [] ) | * - * ::= (';' )* + * := (';' )* */ if (parse_keyword_message(fsc, is_super) == -1) return -1; @@ -1596,8 +1644,8 @@ static int parse_message_continuation ( static int parse_keyword_message (stix_t* fsc, int is_super) { /* - * ::= (keyword )+ - * ::= * * + * := (keyword )+ + * := * * */ stix_name_t name; @@ -1661,8 +1709,8 @@ static int parse_keyword_message (stix_t* fsc, int is_super) static int parse_binary_message (stix_t* fsc, int is_super) { /* - * ::= binarySelector - * ::= * + * := binary-selector + * := * */ stix_word_t pos; int is_super2; @@ -1711,7 +1759,7 @@ static int parse_binary_message (stix_t* fsc, int is_super) static int parse_unary_message (stix_t* fsc, int is_super) { - /* ::= unarySelector */ + /* := unarySelector */ stix_word_t pos; int n; @@ -1735,7 +1783,7 @@ static int parse_unary_message (stix_t* fsc, int is_super) static int parse_method (stix_t* fsc, stix_word_t method_class, void* input) { /* - * ::= + * := * [] [] [] */ @@ -1945,10 +1993,10 @@ static int parse_keyword_pattern (stix_t* fsc) static int parse_method_name_pattern (stix_t* fsc) { /* - * ::= | | - * ::= unarySelector - * ::= binarySelector - * ::= (keyword )+ + * := | | + * := unarySelector + * := binary-selector + * := (keyword )+ */ int n; @@ -1982,8 +2030,8 @@ static int parse_method_name_pattern (stix_t* fsc) static int parse_method_temporaries (stix_t* fsc) { /* - * ::= '|' '|' - * ::= identifier* + * := '|' '|' + * := identifier* */ if (!is_tok_binsel (fsc, STIX_T("|"))) return 0; @@ -2030,7 +2078,7 @@ static int parse_method_temporaries (stix_t* fsc) static int parse_method_primitive (stix_t* fsc) { /* - * ::= '<' 'primitive:' number '>' + * := '<' 'primitive:' number '>' */ int prim_no; @@ -2228,47 +2276,7 @@ static int compile_classdef (stix_t* fsc, class_type_t class_type) GET_TOKEN (fsc); return 0; } - - -static int compile_directive (stix_t* fsc) -{ - if (fsc->tok.type == STIX_IOTOK_IDENT) - { - class_type_t class_type; - - if (get_class_type (fsc->tok.name.ptr, &class_type) >= 0) - { - if (get_token (fsc) <= -1) return -1; - return compile_classdef (fsc, class_type); - } - else if (stix_strequal (fsc->tok.name.ptr, STIX_T("include"))) - { - if (get_token (fsc) <= -1) return -1; - - if (fsc->tok.type != STIX_IOTOK_STRLIT) - { - stix_seterror (fsc, STIX_FSC_ESTRLIT, &fsc->tok.name, &fsc->tok.loc); - return -1; - } - - if (begin_include (fsc) <= -1) return -1; - } - else - { - stix_seterror (fsc, STIX_FSC_EILDIR, &fsc->tok.name, &fsc->tok.loc); - return -1; - } - } - else - { - stix_seterror (fsc, STIX_FSC_EILDIR, &fsc->tok.name, &fsc->tok.loc); - return -1; - } - - return 0; -} #endif - static int compile_stream (stix_t* stix) { @@ -2288,25 +2296,39 @@ static int compile_stream (stix_t* stix) while (stix->c->tok.type != STIX_IOTOK_EOF) { - stix_size_t i; - printf ("%d [", stix->c->tok.type); - for (i = 0; i < stix->c->tok.name.len; i++) - printf ("%c", stix->c->tok.name.ptr[i]); - printf ("]\n"); - GET_TOKEN (stix); -#if 0 - if (is_tok_binsel (stix, STIX_T("@"))) + if (is_token_ksym(stix, KSYM_INCLUDE)) { + /* #include 'xxxx' */ GET_TOKEN (stix); - if (compile_directive (stix) <= -1) return -1; + + if (stix->c->tok.type != STIX_IOTOK_STRLIT) + { + set_syntax_error (stix, STIX_SYNERR_STREX, &stix->c->tok.loc, &stix->c->tok.name); + return -1; + } + + if (begin_include(stix) <= -1) return -1; } - /* TODO: normal smalltalk message sending expressions */ - else + +/* + else if (is_token_ksym(stix, KSYM_CLASS)) { - stix_seterror (stix, STIX_FSC_EILTTOK, &stix->tok.name, &stix->tok.loc); - return -1; } -#endif + else if (is_token_ksym(stix, KSYM_MAIN)) + { + } +*/ + + else + { + /* TODO: error */ + stix_size_t i; + printf ("%d [", stix->c->tok.type); + for (i = 0; i < stix->c->tok.name.len; i++) + printf ("%c", stix->c->tok.name.ptr[i]); + printf ("]\n"); + GET_TOKEN (stix); + } } return 0; diff --git a/stix/lib/main.c b/stix/lib/main.c index d6aaed6..85b0609 100644 --- a/stix/lib/main.c +++ b/stix/lib/main.c @@ -78,6 +78,8 @@ static STIX_INLINE stix_ssize_t open_input (stix_t* stix, stix_ioarg_t* arg) stix_seterrnum (stix, STIX_EECERR); return -1; } + + arg->handle = fopen (bcs, "r"); } else { @@ -187,7 +189,8 @@ static char* syntax_error_msg[] = "string not closed", "no character after $", "no valid character after #", - "missing colon" + "missing colon", + "string expected" /* string expected in place of ${1} */ }; int main (int argc, char* argv[]) diff --git a/stix/lib/memo.txt b/stix/lib/memo.txt index d93362a..a55a2dd 100644 --- a/stix/lib/memo.txt +++ b/stix/lib/memo.txt @@ -485,7 +485,7 @@ class Association(Magnitude) -> new Association inheriting Magnitude class Association() -> new Association inheriting Stix class(#byte) Association() -> new Association class inheriting Stix, but it's byte indexed. class(#word) Association() -> new Association class inheriting Stix, but it's word indexed. -class(#oop) Association() -> new Association class inheriting Stix, but it's oop indexed. (it can have the variable part on top of the fixed part. response to the 'new: aSize' message) +class(#pointer) Association() -> new Association class inheriting Stix, but it's oop indexed. (it can have the variable part on top of the fixed part. response to the 'new: aSize' message) class(#word) Association(Magnitude) -> new Association class inheriting Magnitude, but it's word indexed. class Association -> revisit the Association class defined previsously. Revisiting can add new methods. diff --git a/stix/lib/stix-prv.h b/stix/lib/stix-prv.h index 5f2925e..a4c1bfe 100644 --- a/stix/lib/stix-prv.h +++ b/stix/lib/stix-prv.h @@ -248,12 +248,13 @@ typedef struct stix_iotok_t stix_iotok_t; enum stix_synerrnum_t { STIX_SYNERR_NOERR, - STIX_SYNERR_ILCHR, /* illegal character */ - STIX_SYNERR_CMTNC, /* comment not closed */ - STIX_SYNERR_STRNC, /* string not closed */ - STIX_SYNERR_CLTNT, /* character literal not terminated */ - STIX_SYNERR_HLTNT, /* hased literal not terminated */ - STIX_SYNERR_CLNMS, /* colon missing */ + STIX_SYNERR_ILCHR, /* illegal character */ + STIX_SYNERR_CMTNC, /* comment not closed */ + STIX_SYNERR_STRNC, /* string not closed */ + STIX_SYNERR_CLTNT, /* character literal not terminated */ + STIX_SYNERR_HLTNT, /* hased literal not terminated */ + STIX_SYNERR_CLNMS, /* colon missing */ + STIX_SYNERR_STREX /* string expected */ }; typedef enum stix_synerrnum_t stix_synerrnum_t;