From 75bb3e9a40bf4a22d888337d0f91e60d16601f72 Mon Sep 17 00:00:00 2001 From: "hyunghwan.chung" Date: Fri, 15 May 2015 14:55:12 +0000 Subject: [PATCH] added some code for a bootstraping compiler --- stix/lib/comp.c | 2298 +++++++++++++++++++++++++++++++++++++++++++ stix/lib/dic.c | 11 +- stix/lib/ignite.c | 5 +- stix/lib/main.c | 85 +- stix/lib/memo.txt | 620 ++++++++++++ stix/lib/stix-prv.h | 100 ++ stix/lib/stix.c | 45 + stix/lib/stix.h | 136 ++- stix/lib/sym.c | 3 - stix/lib/utf8.c | 184 ++++ 10 files changed, 3392 insertions(+), 95 deletions(-) create mode 100644 stix/lib/comp.c create mode 100644 stix/lib/memo.txt create mode 100644 stix/lib/utf8.c diff --git a/stix/lib/comp.c b/stix/lib/comp.c new file mode 100644 index 0000000..29188dc --- /dev/null +++ b/stix/lib/comp.c @@ -0,0 +1,2298 @@ +/* + * $Id$ + * + Copyright (c) 2014-2015 Chung, Hyung-Hwan. All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED BY THE AUTHOR "AS IS" AND ANY EXPRESS OR + IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "stix-prv.h" + +#if 0 +enum class_type_t +{ + CLASS_NORMAL = 0, + CLASS_VARIABLE, + CLASS_VARIABLE_BYTE, + CLASS_VARIABLE_CHAR +}; +typedef enum class_type_t class_type_t; + +enum vardef_type_t +{ + VARDEF_INSTANCE, + VARDEF_CLASS_INSTANCE, + VARDEF_CLASS +}; +typedef enum vardef_type_t vardef_type_t; + +enum stix_send_target_t +{ + STIX_SELF = 0, + STIX_SUPER = 1 +}; +typedef enum stix_send_target_t stix_send_target_t; + +enum stix_stack_operand_t +{ + STIX_RECEIVER_VARIABLE = 0, + STIX_TEMPORARY_LOCATION = 1, + STIX_LITERAL_CONSTANT = 2, + STIX_LITERAL_VARIABLE = 3 +}; +typedef enum stix_stack_operand_t stix_stack_operand_t; + +static void clear_sio_names (stix_t* fsc); +static int begin_include (stix_t* fsc); +static int end_include (stix_t* fsc); + +stix_t* stix_open (stix_mmgr_t* mmgr, stix_size_t xtnsize, stix_size_t vmheapsize) +{ + stix_t* fsc; + + fsc = STIX_MMGR_ALLOC (mmgr, STIX_SIZEOF(*fsc) + xtnsize); + if (fsc) + { + if (stix_init (fsc, mmgr, vmheapsize) <= -1) + { + STIX_MMGR_FREE (mmgr, fsc); + fsc = STIX_NULL; + } + else STIX_MEMSET (fsc + 1, 0, xtnsize); + } + + return fsc; +} + +void stix_close (stix_t* fsc) +{ + stix_fini (fsc); + STIX_MMGR_FREE (fsc->mmgr, fsc); +} + +int stix_init (stix_t* fsc, stix_mmgr_t* mmgr, stix_size_t vmheapsize) +{ + int trait; + + STIX_MEMSET (fsc, 0, STIX_SIZEOF(*fsc)); + fsc->mmgr = mmgr; + fsc->errstr = stix_dflerrstr; + + fsc->vm = stix_vm_open (mmgr, 0, vmheapsize); + if (fsc->vm == STIX_NULL) + { + fsc->errnum = STIX_FSC_ENOMEM; + return -1; + } + + /* turn off garbage collection by force */ + stix_vm_getopt (fsc->vm, STIX_VM_TRAIT, &trait); + trait |= STIX_VM_NOGC; + stix_vm_setopt (fsc->vm, STIX_VM_TRAIT, &trait); + + if (stix_vm_boom (fsc->vm) <= -1) + { + /* TODO: translate vm error to fsc error */ +fsc->errnum = STIX_FSC_ENOMEM; + stix_vm_close (fsc->vm); + return -1; + } + +#if 0 + if (stix_tok_open (&fsc->tok, 0) == STIX_NULL) + { + if (fsc->__dynamic) stix_free (fsc); + return STIX_NULL; + } + + if (stix_arr_open ( + &fsc->bcd, 256, + stix_sizeof(stix_uint8_t), STIX_NULL) == STIX_NULL) + { + stix_tok_close (&fsc->tok); + if (fsc->__dynamic) stix_free (fsc); + return STIX_NULL; + } + + fsc->stx = stx; + fsc->errnum = STIX_FSC_ERROR_NONE; + + fsc->met.tmpr.count = 0; + fsc->met.tmpr.nargs = 0; + fsc->literal_count = 0; + + fsc->sio.lxc.c = STIX_CHAR_EOF; + fsc->ungotc_count = 0; + + fsc->input_owner = STIX_NULL; + fsc->input_func = STIX_NULL; + return fsc; +#endif + + return 0; +} + +void stix_fini (stix_t* fsc) +{ + stix_vm_close (fsc->vm); + +#if 0 + stix_arr_close (&fsc->bcd); +#endif + + clear_sio_names (fsc); +} + +/* --------------------------------------------------------------------- + * Tokenizer + * --------------------------------------------------------------------- */ + +static STIX_INLINE int is_binselchar (stix_cint_t c) +{ + /* + * binarySelectorCharacter ::= + * '!' | '%' | '&' | '*' | '+' | ',' | + * '/' | '<' | '>' | '=' | '?' | '@' | + * '\' | '~' | '|' | '-' + */ + + switch (c) + { + case STIX_T('!'): + case STIX_T('%'): + case STIX_T('&'): + case STIX_T('*'): + case STIX_T('+'): + case STIX_T(','): + case STIX_T('/'): + case STIX_T('<'): + case STIX_T('>'): + case STIX_T('='): + case STIX_T('?'): + case STIX_T('@'): + case STIX_T('\\'): + case STIX_T('|'): + case STIX_T('~'): + case STIX_T('-'): + return 1; + + default: + return 0; + } +} + +static STIX_INLINE int is_closing_char (stix_cint_t c) +{ + return + c == STIX_T('.') || c == STIX_T(']') || + c == STIX_T(')') || c == STIX_T(';') || + c == STIX_T('\"') || c == STIX_T('\''); +} + +#define GET_CHAR(fsc) \ + do { if (get_char (fsc) == -1) return -1; } while (0) + +#define GET_TOKEN(fsc) \ + do { if (get_token (fsc) == -1) return -1; } while (0) + +#define ADD_TOKEN_CHAR(fsc,c) \ + do { if (add_token_char (fsc, c) == -1) return -1; } while (0) + +#define ADD_TOKEN_STR(fsc,s) \ + do { if (add_token_str (fsc, s) == -1) return -1; } while (0) + +static STIX_INLINE int add_token_char (stix_t* fsc, stix_char_t c) +{ + if (fsc->tok.name.len >= STIX_COUNTOF(fsc->tok.buf) - 1) + { + /* the tok buffer is full. cannot add more characters to it */ + fsc->errnum = STIX_FSC_ETOKTL; + return -1; + } + fsc->tok.buf[fsc->tok.name.len++] = c; + fsc->tok.buf[fsc->tok.name.len] = STIX_T('\0'); + return 0; +} + +static STIX_INLINE int add_token_str (stix_t* fsc, const stix_char_t* str) +{ + stix_size_t len; + + len = stix_strlen (str); + if (fsc->tok.name.len + len >= STIX_COUNTOF(fsc->tok.buf) - 1) + { + /* the tok buffer is full. cannot add more characters to it */ + fsc->errnum = STIX_FSC_ETOKTL; + return -1; + } + + fsc->tok.name.len += stix_strcpy (&fsc->tok.buf[fsc->tok.name.len], str); + return 0; +} + +static int get_char (stix_t* fsc) +{ + stix_ssize_t n; + + if (fsc->sio.inp->b.pos >= fsc->sio.inp->b.len) + { + n = fsc->sio.impl (fsc, STIX_FSC_IO_READ, fsc->sio.inp); + if (n <= -1) return -1; + + if (n == 0) + { + fsc->sio.inp->lxc.c = STIX_CHAR_EOF; + fsc->sio.inp->lxc.line = fsc->sio.inp->line; + fsc->sio.inp->lxc.colm = fsc->sio.inp->colm; + fsc->sio.inp->lxc.file = fsc->sio.inp->name; + fsc->sio.lxc = fsc->sio.inp->lxc; + return 0; + } + + fsc->sio.inp->b.pos = 0; + fsc->sio.inp->b.len = n; + } + + if (fsc->sio.inp->lxc.c == STIX_T('\n')) + { + /* if the previous charater was a newline, + * increment the line counter and reset column to 1. + * incrementing it line number here instead of + * updating inp->lxc causes the line number for + * TOK_EOF to be the same line as the lxc newline. */ + fsc->sio.inp->line++; + fsc->sio.inp->colm = 1; + } + + fsc->sio.inp->lxc.c = fsc->sio.inp->buf[fsc->sio.inp->b.pos++]; + fsc->sio.inp->lxc.line = fsc->sio.inp->line; + fsc->sio.inp->lxc.colm = fsc->sio.inp->colm++; + fsc->sio.inp->lxc.file = fsc->sio.inp->name; + fsc->sio.lxc = fsc->sio.inp->lxc; + return 0; +} + +static int skip_spaces (stix_t* fsc) +{ + while (STIX_ISSPACE(fsc->sio.lxc.c)) GET_CHAR (fsc); + return 0; +} + +static int skip_comment (stix_t* fsc) +{ + /* comment is a double quoted string */ + while (fsc->sio.lxc.c != STIX_T('"')) GET_CHAR (fsc); + GET_CHAR (fsc); /* skip the closing quote */ + return 0; +} + +static int get_ident (stix_t* fsc) +{ + /* + * identifier ::= letter (letter | digit)* + * keyword ::= identifier ':' + */ + + stix_cint_t c = fsc->sio.lxc.c; + fsc->tok.type = STIX_FSC_TOK_IDENT; + + do + { + ADD_TOKEN_CHAR(fsc, c); + GET_CHAR (fsc); + c = fsc->sio.lxc.c; + } + while (STIX_ISALPHA(c) || STIX_ISDIGIT(c)); + + if (c == STIX_T(':')) + { + ADD_TOKEN_CHAR (fsc, c); + fsc->tok.type = STIX_FSC_TOK_KEYWORD; + GET_CHAR (fsc); + } + + return 0; +} + +static int get_numlit (stix_t* fsc, int negated) +{ + /* + * ::= ['-'] + * ::= integer | float | scaledDecimal + * integer ::= decimalInteger | radixInteger + * decimalInteger ::= digits + * digits ::= digit+ + * radixInteger ::= radixSpecifier 'r' radixDigits + * radixSpecifier := digits + * radixDigits ::= (digit | uppercaseAlphabetic)+ + * float ::= mantissa [exponentLetter exponent] + * mantissa ::= digits'.' digits + * exponent ::= ['-']decimalInteger + * exponentLetter ::= 'e' | 'd' | 'q' + * scaledDecimal ::= scaledMantissa 's' [fractionalDigits] + * scaledMantissa ::= decimalInteger | mantissa + * fractionalDigits ::= decimalInteger + */ + + stix_cint_t c = fsc->sio.lxc.c; + fsc->tok.type = STIX_FSC_TOK_NUMLIT; + + do + { + ADD_TOKEN_CHAR(fsc, c); + GET_CHAR (fsc); + c = fsc->sio.lxc.c; + } + while (STIX_ISALPHA(c) || STIX_ISDIGIT(c)); + + /* TODO; more */ + return 0; +} + +static int get_charlit (stix_t* fsc) +{ + /* + * character_literal ::= '$' character + * character ::= "Any character in the implementation-defined character set" + */ + + stix_cint_t c = fsc->sio.lxc.c; /* even a new-line or white space would be taken */ + if (c == STIX_CHAR_EOF) + { + stix_seterrnum (fsc, STIX_FSC_ECHRNT, STIX_NULL); + return -1; + } + + fsc->tok.type = STIX_FSC_TOK_CHRLIT; + ADD_TOKEN_CHAR(fsc, c); + GET_CHAR (fsc); + return 0; +} + +static int get_strlit (stix_t* fsc) +{ + /* + * string_literal ::= stringDelimiter stringBody stringDelimiter + * stringBody ::= (nonStringDelimiter | (stringDelimiter stringDelimiter)*) + * stringDelimiter ::= ''' "a single quote" + */ + + /* TODO: C-like string */ + + stix_cint_t c = fsc->sio.lxc.c; + fsc->tok.type = STIX_FSC_TOK_STRLIT; + + do + { + do + { + ADD_TOKEN_CHAR (fsc, c); + GET_CHAR (fsc); + c = fsc->sio.lxc.c; + + if (c == STIX_CHAR_EOF) + { + stix_seterrnum (fsc, STIX_FSC_ESTRNT, STIX_NULL); + return -1; + } + } + while (c != STIX_T('\'')); + + GET_CHAR (fsc); + c = fsc->sio.lxc.c; + } + while (c == STIX_T('\'')); + + return 0; +} + +static int get_binsel (stix_t* fsc) +{ + /* + * binarySelector ::= binarySelectorCharacter+ + */ + + ADD_TOKEN_CHAR (fsc, fsc->sio.lxc.c); + + if (fsc->sio.lxc.c == STIX_T('-')) + { + /* special case if a minus is followed by a digit immediately */ + GET_CHAR (fsc); + if (STIX_ISDIGIT(fsc->sio.lxc.c)) return get_numlit (fsc, 1); + } + else GET_CHAR (fsc); + + /* up to 2 characters only */ + if (is_binselchar (fsc->sio.lxc.c)) + { + ADD_TOKEN_CHAR (fsc, fsc->sio.lxc.c); + GET_CHAR (fsc); + } + + /* or up to any occurrences */ + /* + while (is_binselchar(fsc->sio.lxc.c)) + { + ADD_TOKEN_CHAR (fsc, c); + GET_CHAR (fsc); + } + */ + + fsc->tok.type = STIX_FSC_TOK_BINSEL; + return 0; +} + +static int get_token (stix_t* fsc) +{ + stix_cint_t c; + +retry: + do + { + if (skip_spaces(fsc) <= -1) return -1; + if (fsc->sio.lxc.c != STIX_T('"')) break; + + GET_CHAR (fsc); + if (skip_comment(fsc) <= -1) return -1; + } + while (1); + + /* clear the token resetting its location */ + fsc->tok.type = 0; + fsc->tok.buf[0] = STIX_T('\0'); + fsc->tok.name.len = 0; + fsc->tok.name.ptr = fsc->tok.buf; + fsc->tok.loc.file = fsc->sio.lxc.file; + fsc->tok.loc.line = fsc->sio.lxc.line; + fsc->tok.loc.colm = fsc->sio.lxc.colm; + c = fsc->sio.lxc.c; + + switch (c) + { + case STIX_CHAR_EOF: + { + int n; + + n = end_include (fsc); + if (n <= -1) return -1; + if (n >= 1) goto retry; + + fsc->tok.type = STIX_FSC_TOK_EOF; + ADD_TOKEN_STR(fsc, STIX_T("")); + break; + } + + case STIX_T('$'): /* character literal */ + GET_CHAR (fsc); + if (get_charlit(fsc) == -1) return -1; + break; + + case STIX_T('\''): /* string literal */ + GET_CHAR (fsc); + if (get_strlit(fsc) == -1) return -1; + break; + + case STIX_T(':'): + fsc->tok.type = STIX_FSC_TOK_COLON; + ADD_TOKEN_CHAR(fsc, c); + GET_CHAR (fsc); + + c = fsc->sio.lxc.c; + if (c == STIX_T('=')) + { + fsc->tok.type = STIX_FSC_TOK_ASSIGN; + ADD_TOKEN_CHAR(fsc, c); + GET_CHAR (fsc); + } + break; + + case STIX_T('^'): + case STIX_T('{'): /* extension */ + case STIX_T('}'): /* extension */ + case STIX_T('['): + case STIX_T(']'): + case STIX_T('('): + case STIX_T(')'): + case STIX_T('.'): + case STIX_T(';'): + case STIX_T('#'): + switch (c) + { + case STIX_T('^'): + fsc->tok.type = STIX_FSC_TOK_RETURN; + break; + + case STIX_T('{'): + fsc->tok.type = STIX_FSC_TOK_LBRACE; + break; + + case STIX_T('}'): + fsc->tok.type = STIX_FSC_TOK_RBRACE; + break; + + case STIX_T('['): + fsc->tok.type = STIX_FSC_TOK_LBRACK; + break; + + case STIX_T(']'): + fsc->tok.type = STIX_FSC_TOK_RBRACK; + break; + + case STIX_T('('): + fsc->tok.type = STIX_FSC_TOK_LPAREN; + break; + + case STIX_T(')'): + fsc->tok.type = STIX_FSC_TOK_RPAREN; + break; + + case STIX_T('.'): + fsc->tok.type = STIX_FSC_TOK_PERIOD; + break; + + case STIX_T(';'): + fsc->tok.type = STIX_FSC_TOK_SEMICOLON; + break; + + case STIX_T('#'): + fsc->tok.type = STIX_FSC_TOK_HASH; + break; + } + + ADD_TOKEN_CHAR(fsc, c); + GET_CHAR (fsc); + break; + +#if 0 + case STIX_T('#'): + /*ADD_TOKEN_CHAR(fsc, c);*/ + GET_CHAR (fsc); + + c = fsc->sio.lxc.c; + switch (c) + { + case STIX_CHAR_EOF: + fsc->errnum = STIX_FSC_ELITNT; + return -1; + + case STIX_T('('): + /* #( */ + ADD_TOKEN_CHAR(fsc, c); + fsc->tok.type = STIX_FSC_TOK_APAREN; + GET_CHAR (fsc); + break; + + case STIX_T('\''): + /* #' - quoted symbol literal */ + GET_CHAR (fsc); + if (get_strlit(fsc) <= -1) return -1; + fsc->tok.type = STIX_FSC_TOK_SYMLIT; + break; + + case STIX_T('['): + /* #[ - byte array literal */ + /* TODO */ + break; + + default: + /* unquoted symbol literal */ + if (is_closing_char(c) || STIX_ISSPACE(c)) + { + fsc->errnum = STIX_FSC_ELITNT; + return -1; + } + + do + { + ADD_TOKEN_CHAR(fsc, c); + GET_CHAR (fsc); + c = fsc->sio.lxc.c; + } + while (!is_closing_char(c) && !STIX_ISSPACE(c)); + + fsc->tok.type = STIX_FSC_TOK_SYMLIT; + break; + } + + break; +#endif + + + default: + if (STIX_ISALPHA (c)) + { + if (get_ident (fsc) <= -1) return -1; + } + else if (STIX_ISDIGIT (c)) + { + if (get_numlit (fsc, 0) <= -1) return -1; + } + else if (is_binselchar (c)) + { + /* binary selector */ + if (get_binsel (fsc) <= -1) return -1; + } + else + { + stix_cstr_t ea; + stix_char_t cc; + + cc = (stix_char_t)c; + ea.ptr = &cc; + ea.len = 1; + + stix_seterrnum (fsc, STIX_FSC_EILCHR, &ea); + return -1; + } + break; + } + +wprintf (L"TOK: %S\n", fsc->tok.name.ptr); + return 0; +} + +static void clear_sio_names (stix_t* fsc) +{ + stix_link_t* cur; + while (fsc->sio_names) + { + cur = fsc->sio_names; + fsc->sio_names = cur->link; + stix_freemem (fsc, cur); + } +} + +static int begin_include (stix_t* fsc) +{ + stix_ioarg_t* arg; + stix_link_t* link; + + link = (stix_link_t*) stix_callocmem (fsc, STIX_SIZEOF(*link) + STIX_SIZEOF(stix_char_t) * (fsc->tok.name.len + 1)); + if (link == STIX_NULL) goto oops; + + stix_strcpy ((stix_char_t*)(link + 1), fsc->tok.name.ptr); + link->link = fsc->sio_names; + fsc->sio_names = link; + + arg = (stix_ioarg_t*) stix_callocmem (fsc, STIX_SIZEOF(*arg)); + if (arg == STIX_NULL) goto oops; + + arg->name = (const stix_char_t*)(link + 1); + arg->line = 1; + arg->colm = 1; + arg->prev = fsc->sio.inp; + + if (fsc->sio.impl (fsc, STIX_FSC_IO_OPEN, arg) <= -1) goto oops; + + fsc->sio.inp = arg; + /* fsc->parse.depth.incl++; */ + + /* read in the first character in the included file. + * so the next call to get_token() sees the character read + * from this file. */ + if (get_char (fsc) <= -1 || get_token (fsc) <= -1) + { + end_include (fsc); + /* i don't jump to oops since i've called + * end_include() where fsc->sio.inp/arg is freed. */ + return -1; + } + + return 0; + +oops: + /* i don't need to free 'link' since it's linked to + * fsc->sio_names that's freed at the beginning of stix_read() + * or by stix_fini() */ + if (arg) stix_freemem (fsc, arg); + return -1; +} + +static int end_include (stix_t* fsc) +{ + int x; + stix_ioarg_t* cur; + + if (fsc->sio.inp == &fsc->sio.arg) return 0; /* no include */ + + /* if it is an included file, close it and + * retry to read a character from an outer file */ + + x = fsc->sio.impl (fsc, STIX_FSC_IO_CLOSE, fsc->sio.inp); + + /* if closing has failed, still destroy the + * sio structure first as normal and return + * the failure below. this way, the caller + * does not call STIX_FSC_SIO_CLOSE on + * fsc->sio.inp again. */ + + cur = fsc->sio.inp; + fsc->sio.inp = fsc->sio.inp->prev; + + STIX_ASSERT (cur->name != STIX_NULL); + stix_freemem (fsc, cur); + /* fsc->parse.depth.incl--; */ + + if (x != 0) + { + /* the failure mentioned above is returned here */ + return -1; + } + + fsc->sio.lxc = fsc->sio.inp->lxc; + return 1; /* ended the included file successfully */ +} + + +/* --------------------------------------------------------------------- + * Parser and Code Generator + * --------------------------------------------------------------------- */ + +static STIX_INLINE int is_tok_pseudovar (stix_t* fsc) +{ + return fsc->tok.type == STIX_FSC_TOK_IDENT && + (stix_strequal(fsc->tok.name.ptr, STIX_T("self")) || + stix_strequal(fsc->tok.name.ptr, STIX_T("super")) || + stix_strequal(fsc->tok.name.ptr, STIX_T("thisContext")) || + stix_strequal(fsc->tok.name.ptr, STIX_T("nil")) || + stix_strequal(fsc->tok.name.ptr, STIX_T("true")) || + stix_strequal(fsc->tok.name.ptr, STIX_T("false"))); +} + +static STIX_INLINE int is_tok_binsel (stix_t* fsc, const stix_char_t* sel) +{ + return fsc->tok.type == STIX_FSC_TOK_BINSEL && + stix_strequal (fsc->tok.name.ptr, sel); +} + +#if 0 + +#define EMIT_CODE_TEST(fsc,high,low) \ + do { if (emit_code_test(fsc,high,low) == -1) return -1; } while (0) + +#define EMIT_PUSH_RECEIVER_VARIABLE(fsc,pos) \ + do { \ + if (emit_stack_positional ( \ + fsc, PUSH_RECEIVER_VARIABLE, pos) == -1) return -1; \ + } while (0) + +#define EMIT_PUSH_TEMPORARY_LOCATION(fsc,pos) \ + do { \ + if (emit_stack_positional ( \ + fsc, PUSH_TEMPORARY_LOCATION, pos) == -1) return -1; \ + } while (0) + +#define EMIT_PUSH_LITERAL_CONSTANT(fsc,pos) \ + do { \ + if (emit_stack_positional ( \ + fsc, PUSH_LITERAL_CONSTANT, pos) == -1) return -1; \ + } while (0) + + +#define EMIT_PUSH_LITERAL_VARIABLE(fsc,pos) \ + do { \ + if (emit_stack_positional ( \ + fsc, PUSH_LITERAL_VARIABLE, pos) == -1) return -1; \ + } while (0) + +#define EMIT_STORE_RECEIVER_VARIABLE(fsc,pos) \ + do { \ + if (emit_stack_positional ( \ + fsc, STORE_RECEIVER_VARIABLE, pos) == -1) return -1; \ + } while (0) + +#define EMIT_STORE_TEMPORARY_LOCATION(fsc,pos) \ + do { \ + if (emit_stack_positional ( \ + fsc, STORE_TEMPORARY_LOCATION, pos) == -1) return -1; \ + } while (0) + + +#define EMIT_POP_STACK_TOP(fsc) EMIT_CODE(fsc, POP_STACK_TOP) +#define EMIT_DUPLICATE_STACK_TOP(fsc) EMIT_CODE(fsc, DUPLICATE_STACK_TOP) +#define EMIT_PUSH_ACTIVE_CONTEXT(fsc) EMIT_CODE(fsc, PUSH_ACTIVE_CONTEXT) +#define EMIT_RETURN_FROM_MESSAGE(fsc) EMIT_CODE(fsc, RETURN_FROM_MESSAGE) +#define EMIT_RETURN_FROM_BLOCK(fsc) EMIT_CODE(fsc, RETURN_FROM_BLOCK) + +#define EMIT_SEND_TO_SELF(fsc,nargs,selector) \ + do { \ + if (emit_send_to_self(fsc,nargs,selector) == -1) return -1; \ + } while (0) + +#define EMIT_SEND_TO_SUPER(fsc,nargs,selector) \ + do { \ + if (emit_send_to_super(fsc,nargs,selector) == -1) return -1; \ + } while (0) + +#define EMIT_DO_PRIMITIVE(fsc,no) \ + do { if (emit_do_primitive(fsc,no) == -1) return -1; } while(0) + +#endif + +static STIX_INLINE int emit_code_test ( + stix_t* fsc, const stix_char_t* high, const stix_char_t* low) +{ +wprintf (L"CODE: %s %s\n", high, low); + return 0; +} + +static STIX_INLINE int emit_code (stix_t* fsc, const stix_uint8_t* code, int len) +{ + int i; + + if ((fsc->bcd.len + len) > STIX_COUNTOF(fsc->bcd.buf)) + { + stix_seterrnum (fsc, STIX_FSC_EBCDTL, STIX_NULL); + return -1; + } + + for (i = 0; i < len; i++) fsc->bcd.buf[fsc->bcd.len++] = code[i]; + return 0; +} + +static int emit_push_stack (stix_t* fsc, stix_stack_operand_t type, int pos) +{ + /* + * 0-15 0000iiii Push Receiver Variable #iiii + * 16-31 0001iiii Push Temporary Location #iiii + * 32-63 001iiiii Push Literal Constant #iiiii + * 64-95 010iiiii Push Literal Variable #iiiii + * 128 10000000 jjkkkkkk Push (Receiver Variable, Temporary Location, Literal Constant, Literal Variable) [jj] #kkkkkk + */ + + static int bcds[] = + { + STIX_PUSH_RECEIVER_VARIABLE, + STIX_PUSH_TEMPORARY_LOCATION, + STIX_PUSH_LITERAL_CONSTANT, + STIX_PUSH_LITERAL_VARIABLE + }; + static int bounds[] = { 0x0F, 0x0F, 0x1F, 0x1F }; + + stix_uint8_t code[2]; + int len = 0; + + STIX_ASSERT (pos >= 0x0 && pos <= 0x3F); /* 0 to 63 */ + STIX_ASSERT (type >= STIX_RECEIVER_VARIABLE && type <= STIX_LITERAL_VARIABLE); + + if (pos <= bounds[type]) + { + code[len++] = bcds[type] | pos; + } + else + { + code[len++] = STIX_PUSH_EXTENDED; + code[len++] = (type << 6) | pos; + } + + return emit_code (fsc, code, len); +} + +static int emit_store_stack (stix_t* fsc, stix_stack_operand_t type, int pos) +{ + /* + * 129 10000001 jjkkkkkk Store (Receiver Variable, Temporary Location, Illegal, Literal Variable) [jj] #kkkkkk + */ + + stix_uint8_t code[2]; + int len = 0; + + STIX_ASSERT (pos >= 0x0 && pos <= 0x3F); /* 0 to 63 */ + STIX_ASSERT (type >= STIX_RECEIVER_VARIABLE && type <= STIX_LITERAL_VARIABLE); + + code[len++] = STIX_STORE_EXTENDED; + code[len++] = (type << 6) | pos; + + return emit_code (fsc, code, len); +} + +static int emit_pop_store_stack (stix_t* fsc, stix_stack_operand_t type, int pos) +{ + /* + * 96-103 01100iii Pop and Store Receiver Variable #iii + * 104-111 01101iii Pop and Store Temporary Location #iii + * 129 10000001 jjkkkkkk Store (Receiver Variable, Temporary Location, Illegal, Literal Variable) [jj] #kkkkkk + * 130 10000010 jjkkkkkk Pop and Store (Receiver Variable, Temporary Location, Illegal, Literal Variable) [jj] #kkkkkk + */ + + stix_uint8_t code[2]; + int len = 0; + + static int bcds[] = + { + STIX_POP_STORE_RECEIVER_VARIABLE, + STIX_POP_STORE_TEMPORARY_LOCATION + }; + + STIX_ASSERT (pos >= 0x0 && pos <= 0x3F); /* 0 to 63 */ + STIX_ASSERT (type >= STIX_RECEIVER_VARIABLE && type <= STIX_LITERAL_VARIABLE && type != STIX_LITERAL_CONSTANT); + + switch (type) + { + case STIX_RECEIVER_VARIABLE: + case STIX_TEMPORARY_LOCATION: + if (pos <= 0x07) + { + code[len++] = bcds[type] | pos; + break; + } + /* fall through */ + + default: + code[len++] = STIX_POP_STORE_EXTENDED; + code[len++] = (type << 6) | pos; + break; + } + + return emit_code (fsc, code, len); +} + +static int emit_send_message (stix_t* fsc, stix_send_target_t target, int selector, int nargs) +{ + /* + * 131 10000011 jjjkkkkk Send Literal Selector #kkkkk With jjj Arguments + * 132 10000100 jjjjjjjj kkkkkkkk Send Literal Selector #kkkkkkkk With jjjjjjjj Arguments + * 133 10000101 jjjkkkkk Send Literal Selector #kkkkk To Superclass With jjj Arguments + * 134 10000110 jjjjjjjj kkkkkkkk Send Literal Selector #kkkkkkkk To Superclass With jjjjjjjj Arguments + */ + + static struct { int basic; int extended; } bcds[] = + { + { STIX_SEND_TO_SELF, STIX_SEND_TO_SELF_EXTENDED }, + { STIX_SEND_TO_SUPER, STIX_SEND_TO_SUPER_EXTENDED } + }; + + stix_uint8_t code[3]; + int len = 0; + + STIX_ASSERT (selector >= 0 && selector <= 0xFF); + STIX_ASSERT (nargs >= 0 && nargs <= 0xFF); + + if (nargs <= 0x7 && selector <= 0x1F) + { + code[len++] = bcds[target].basic; + code[len++] = (nargs << 5) | selector; + } + else + { + code[len++] = bcds[target].extended; + code[len++] = nargs; + code[len++] = selector; + } + + return emit_code (fsc, code, len); +} + +static int emit_do_primitive (stix_t* fsc, int no) +{ + stix_uint8_t code[2]; + int len = 0; + + STIX_ASSERT (no >= 0x0 && no <= 0xFF); + + code[len++] = STIX_DO_PRIMITIVE; + code[len++] = no; + + return emit_code (fsc, code, len); +} + +#if 0 +static int __add_literal (stix_t* fsc, stix_word_t literal) +{ + stix_word_t i; + + for (i = 0; i < fsc->literal_count; i++) { + /* + * it would remove redundancy of symbols and small integers. + * more complex redundacy check may be done somewhere else + * like in __add_string_literal. + */ + if (fsc->literals[i] == literal) return i; + } + + if (fsc->literal_count >= STIX_COUNTOF(fsc->literals)) { + fsc->errnum = STIX_FSC_ERROR_TOO_MANY_LITERALS; + return -1; + } + + fsc->literals[fsc->literal_count++] = literal; + return fsc->literal_count - 1; +} + +static int __add_character_literal (stix_t* fsc, stix_char_t ch) +{ + stix_word_t i, c, literal; + stix_vm_t* stx = fsc->stx; + + for (i = 0; i < fsc->literal_count; i++) { + c = STIX_ISSMALLINT(fsc->literals[i])? + stx->class_smallinteger: STIX_CLASS (stx, fsc->literals[i]); + if (c != stx->class_character) continue; + + if (ch == STIX_CHAR_AT(stx,fsc->literals[i],0)) return i; + } + + literal = stix_instantiate ( + stx, stx->class_character, &ch, STIX_NULL, 0); + return __add_literal (fsc, literal); +} + +static int __add_string_literal ( + stix_t* fsc, const stix_char_t* str, stix_word_t size) +{ + stix_word_t i, c, literal; + stix_vm_t* stx = fsc->stx; + + for (i = 0; i < fsc->literal_count; i++) { + c = STIX_ISSMALLINT(fsc->literals[i])? + stx->class_smallinteger: STIX_CLASS (stx, fsc->literals[i]); + if (c != stx->class_string) continue; + + if (stix_strxncmp (str, size, + STIX_DATA(stx,fsc->literals[i]), + STIX_SIZE(stx,fsc->literals[i])) == 0) return i; + } + + literal = stix_instantiate ( + stx, stx->class_string, STIX_NULL, str, size); + return __add_literal (fsc, literal); +} + +static int __add_symbol_literal ( + stix_t* fsc, const stix_char_t* str, stix_word_t size) +{ + stix_vm_t* stx = fsc->stx; + return __add_literal (fsc, stix_new_symbolx(stx, str, size)); +} + +static int finish_method (stix_t* fsc) +{ + stix_vm_t* stx = fsc->stx; + stix_class_t* class_obj; + stix_method_t* method_obj; + stix_word_t method, selector; + + STIX_ASSERT (fsc->bcd.size != 0); + + class_obj = (stix_class_t*) STIX_OBJPTR(stx, fsc->method_class); + + if (class_obj->methods == stx->nil) + { + /* TODO: reconfigure method dictionary size */ + class_obj->methods = stix_instantiate ( + stx, stx->class_system_dictionary, + STIX_NULL, STIX_NULL, 64); + } + STIX_ASSERT (class_obj->methods != stx->nil); + + selector = stix_new_symbolx ( + stx, fsc->met.name.buf, fsc->method_name.size); + + method = stix_instantiate(stx, stx->class_method, + STIX_NULL, fsc->literals, fsc->literal_count); + method_obj = (stix_method_t*)STIX_OBJPTR(stx, method); + + /* TODO: text saving must be optional */ + /*method_obj->text = stix_instantiate ( + stx, stx->class_string, STIX_NULL, + fsc->text, stix_strlen(fsc->text)); + */ + method_obj->selector = selector; + method_obj->bytecodes = stix_instantiate ( + stx, stx->class_bytearray, STIX_NULL, + fsc->bcd.buf, fsc->bcd.size); + + /* TODO: better way to store argument count & temporary count */ + method_obj->tmpcount = STIX_TO_SMALLINT(fsc->met.tmpr.count - fsc->met.tmpr.nargs); + method_obj->argcount = STIX_TO_SMALLINT(fsc->met.tmpr.nargs); + + stix_dict_put (stx, class_obj->methods, selector, method); + return 0; +} +#endif + + + + +#if 0 + +static int parse_statements (stix_t* fsc) +{ + /* + * ::= (ORIGINAL->maybe wrong) + * ( ['.'] ) | + * ( ['.' []]) + * ::= (REVISED->correct?) + * ['. []] + */ + + while (fsc->tok.type != STIX_FSC_TOK_EOF) + { + if (parse_statement (fsc) == -1) return -1; + + if (fsc->tok.type == STIX_FSC_TOK_PERIOD) + { + GET_TOKEN (fsc); + continue; + } + + if (fsc->tok.type != STIX_FSC_TOK_EOF) + { + fsc->errnum = STIX_FSC_ERROR_NO_PERIOD; + return -1; + } + } + + EMIT_CODE (fsc, RETURN_RECEIVER); + return 0; +} + +static int parse_block_statements (stix_t* fsc) +{ + while (fsc->tok.type != STIX_FSC_TOK_RBRACK && + fsc->tok.type != STIX_FSC_TOK_EOF) { + + if (parse_statement(fsc) == -1) return -1; + if (fsc->tok.type != STIX_FSC_TOK_PERIOD) break; + GET_TOKEN (fsc); + } + + return 0; +} + +static int parse_statement (stix_t* fsc) +{ + /* + * ::= | + * ::= returnOperator + * returnOperator ::= '^' + */ + + if (fsc->tok.type == STIX_FSC_TOK_RETURN) { + GET_TOKEN (fsc); + if (parse_expression(fsc) == -1) return -1; + EMIT_RETURN_FROM_MESSAGE (fsc); + } + else { + if (parse_expression(fsc) == -1) return -1; + } + + return 0; +} + +static int parse_expression (stix_t* fsc) +{ + /* + * ::= | + * ::= assignmentOperator + * ::= [ ] + * ::= identifier + * assignmentOperator ::= ':=' + */ + stix_vm_t* stx = fsc->stx; + + if (fsc->tok.type == STIX_FSC_TOK_IDENT) { + stix_char_t* ident = stix_tok_yield (&fsc->tok, 0); + if (ident == STIX_NULL) { + fsc->errnum = STIX_FSC_ERROR_MEMORY; + return -1; + } + + GET_TOKEN (fsc); + if (fsc->tok.type == STIX_FSC_TOK_ASSIGN) { + GET_TOKEN (fsc); + if (parse_assignment(fsc, ident) == -1) { + stix_free (ident); + return -1; + } + } + else { + if (parse_basic_expression(fsc, ident) == -1) { + stix_free (ident); + return -1; + } + } + + stix_free (ident); + } + else + { + if (parse_basic_expression(fsc, STIX_NULL) == -1) return -1; + } + + return 0; +} + +static int parse_basic_expression ( + stix_t* fsc, const stix_char_t* ident) +{ + /* + * ::= [ ] + */ + int is_super; + + if (parse_primary(fsc, ident, &is_super) == -1) return -1; + if (fsc->tok.type != STIX_FSC_TOK_EOF && + fsc->tok.type != STIX_FSC_TOK_PERIOD) + { + if (parse_message_continuation(fsc, is_super) == -1) return -1; + } + return 0; +} + +static int parse_assignment ( + stix_t* fsc, const stix_char_t* target) +{ + /* + * ::= assignmentOperator + */ + + stix_word_t i; + stix_vm_t* stx = fsc->stx; + + for (i = fsc->met.tmpr.nargs; i < fsc->met.tmpr.count; i++) + { + if (stix_strequal (target, fsc->met.tmpr.names[i])) + { + if (parse_expression(fsc) == -1) return -1; + EMIT_STORE_TEMPORARY_LOCATION (fsc, i); + return 0; + } + } + + if (stix_get_instance_variable_index (stx, fsc->method_class, target, &i) == 0) + { + if (parse_expression(fsc) == -1) return -1; + EMIT_STORE_RECEIVER_VARIABLE (fsc, i); + return 0; + } + + if (stix_lookup_class_variable (stx, fsc->method_class, target) != stx->nil) + { + if (parse_expression(fsc) == -1) return -1; + + /* TODO */ + EMIT_CODE_TEST (fsc, STIX_T("ASSIGN_CLASSVAR #"), target); + //EMIT_STORE_CLASS_VARIABLE (fsc, target); + return 0; + } + + /* TODO: IMPLEMENT POOL DICTIONARIES */ + + /* TODO: IMPLEMENT GLOBLAS, but i don't like this idea */ + + fsc->errnum = STIX_FSC_ERROR_UNDECLARED_NAME; + return -1; +} + +static int parse_primary ( + stix_t* fsc, const stix_char_t* ident, int* is_super) +{ + /* + * ::= + * identifier | | + * | ( '('')' ) + */ + + stix_vm_t* stx = fsc->stx; + + if (ident == STIX_NULL) + { + int pos; + stix_word_t literal; + + *is_super = stix_false; + + if (fsc->tok.type == STIX_FSC_TOK_IDENT) + { + if (parse_primary_ident(fsc, + fsc->tok.name.buffer, is_super) == -1) return -1; + GET_TOKEN (fsc); + } + else if (fsc->tok.type == STIX_FSC_TOK_CHRLIT) { + pos = __add_character_literal( + fsc, fsc->tok.name.buffer[0]); + if (pos == -1) return -1; + EMIT_PUSH_LITERAL_CONSTANT (fsc, pos); + GET_TOKEN (fsc); + } + else if (fsc->tok.type == STIX_FSC_TOK_STRLIT) { + pos = __add_string_literal (fsc, + fsc->tok.name.buffer, fsc->tok.name.size); + if (pos == -1) return -1; + EMIT_PUSH_LITERAL_CONSTANT (fsc, pos); + GET_TOKEN (fsc); + } + else if (fsc->tok.type == STIX_FSC_TOK_NUMLIT) + { + /* TODO: other types of numbers, negative numbers, etc */ + stix_word_t tmp; + STIX_STRTOI (tmp, fsc->tok.name.buffer, STIX_NULL, 10); + literal = STIX_TO_SMALLINT(tmp); + pos = __add_literal(fsc, literal); + if (pos == -1) return -1; + EMIT_PUSH_LITERAL_CONSTANT (fsc, pos); + GET_TOKEN (fsc); + } + else if (fsc->tok.type == STIX_FSC_TOK_SYMLIT) { + pos = __add_symbol_literal (fsc, + fsc->tok.name.buffer, fsc->tok.name.size); + if (pos == -1) return -1; + EMIT_PUSH_LITERAL_CONSTANT (fsc, pos); + GET_TOKEN (fsc); + } + else if (fsc->tok.type == STIX_FSC_TOK_LBRACK) { + GET_TOKEN (fsc); + if (parse_block_constructor(fsc) == -1) return -1; + } + else if (fsc->tok.type == STIX_FSC_TOK_APAREN) { + /* TODO: array literal */ + } + else if (fsc->tok.type == STIX_FSC_TOK_LPAREN) { + GET_TOKEN (fsc); + if (parse_expression(fsc) == -1) return -1; + if (fsc->tok.type != STIX_FSC_TOK_RPAREN) { + fsc->errnum = STIX_FSC_ERROR_NO_RPAREN; + return -1; + } + GET_TOKEN (fsc); + } + else { + fsc->errnum = STIX_FSC_ERROR_PRIMARY; + return -1; + } + } + else { + /*if (parse_primary_ident(fsc, fsc->tok.name.buffer) == -1) return -1;*/ + if (parse_primary_ident(fsc, ident, is_super) == -1) return -1; + } + + return 0; +} + +static int parse_primary_ident ( + stix_t* fsc, const stix_char_t* ident, int* is_super) +{ + stix_word_t i; + stix_vm_t* stx = fsc->stx; + + *is_super = stix_false; + + if (stix_strequal(ident, STIX_T("self"))) + { + EMIT_CODE (fsc, PUSH_RECEIVER); + return 0; + } + else if (stix_strequal(ident, STIX_T("super"))) + { + *is_super = stix_true; + EMIT_CODE (fsc, PUSH_RECEIVER); + return 0; + } + else if (stix_strequal(ident, STIX_T("nil"))) + { + EMIT_CODE (fsc, PUSH_NIL); + return 0; + } + else if (stix_strequal(ident, STIX_T("true"))) + { + EMIT_CODE (fsc, PUSH_TRUE); + return 0; + } + else if (stix_strequal(ident, STIX_T("false"))) + { + EMIT_CODE (fsc, PUSH_FALSE); + return 0; + } + + /* Refer to parse_assignment for identifier lookup */ + + for (i = 0; i < fsc->met.tmpr.count; i++) + { + if (stix_strequal(ident, fsc->met.tmpr.names[i])) + { + EMIT_PUSH_TEMPORARY_LOCATION (fsc, i); + return 0; + } + } + + if (get_instance_variable_index ( + stx, fsc->method_class, ident, &i) == 0) + { + EMIT_PUSH_RECEIVER_VARIABLE (fsc, i); + return 0; + } + + /* TODO: what is the best way to look up a class variable? */ + /* 1. Use the class containing it and using its position */ + /* 2. Use a primitive method after pushing the name as a symbol */ + /* 3. Implement a vm instruction to do it */ +/* + if (stix_lookup_class_variable ( + stx, fsc->method_class, ident) != stx->nil) { + //EMIT_LOOKUP_CLASS_VARIABLE (fsc, ident); + return 0; + } +*/ + + /* TODO: IMPLEMENT POOL DICTIONARIES */ + + /* TODO: IMPLEMENT GLOBLAS, but i don't like this idea */ + + fsc->errnum = STIX_FSC_ERROR_UNDECLARED_NAME; + return -1; +} + +static int parse_block_constructor (stix_t* fsc) +{ + /* + * ::= '[' ']' + * ::= [* '|'] + * [] [] + * ::= ':' identifier + */ + + if (fsc->tok.type == STIX_FSC_TOK_COLON) + { + do + { + GET_TOKEN (fsc); + + if (fsc->tok.type != STIX_FSC_TOK_IDENT) + { + fsc->errnum = STIX_FSC_ERROR_BLOCK_ARGUMENT_NAME; + return -1; + } + + /* TODO : store block arguments */ + GET_TOKEN (fsc); + } + while (fsc->tok.type == STIX_FSC_TOK_COLON); + + if (!is_vbar_tok(&fsc->tok)) + { + fsc->errnum = STIX_FSC_ERROR_BLOCK_ARGUMENT_LIST; + return -1; + } + + GET_TOKEN (fsc); + } + + /* TODO: create a block closure */ + if (parse_method_temporaries(fsc) == -1) return -1; + if (parse_block_statements(fsc) == -1) return -1; + + if (fsc->tok.type != STIX_FSC_TOK_RBRACK) + { + fsc->errnum = STIX_FSC_ERROR_BLOCK_NOT_CLOSED; + return -1; + } + + GET_TOKEN (fsc); + + /* TODO: do special treatment for block closures */ + + return 0; +} + +static int parse_message_continuation ( + stix_t* fsc, int is_super) +{ + /* + * ::= + * (+ * [] ) | + * (+ [] ) | + * + * ::= (';' )* + */ + if (parse_keyword_message(fsc, is_super) == -1) return -1; + + while (fsc->tok.type == STIX_FSC_TOK_SEMICOLON) + { + EMIT_CODE_TEST (fsc, STIX_T("DoSpecial(DUP_RECEIVER(CASCADE))"), STIX_T("")); + GET_TOKEN (fsc); + + if (parse_keyword_message(fsc, stix_false) == -1) return -1; + EMIT_CODE_TEST (fsc, STIX_T("DoSpecial(POP_TOP)"), STIX_T("")); + } + + return 0; +} + +static int parse_keyword_message (stix_t* fsc, int is_super) +{ + /* + * ::= (keyword )+ + * ::= * * + */ + + stix_name_t name; + stix_word_t pos; + int is_super2; + int nargs = 0, n; + + if (parse_binary_message (fsc, is_super) == -1) return -1; + if (fsc->tok.type != STIX_FSC_TOK_KEYWORD) return 0; + + if (stix_name_open(&name, 0) == STIX_NULL) { + fsc->errnum = STIX_FSC_ERROR_MEMORY; + return -1; + } + + do + { + if (stix_name_adds(&name, fsc->tok.name.buffer) == -1) + { + fsc->errnum = STIX_FSC_ERROR_MEMORY; + stix_name_close (&name); + return -1; + } + + GET_TOKEN (fsc); + if (parse_primary(fsc, STIX_NULL, &is_super2) == -1) + { + stix_name_close (&name); + return -1; + } + + if (parse_binary_message(fsc, is_super2) == -1) + { + stix_name_close (&name); + return -1; + } + + nargs++; + /* TODO: check if it has too many arguments.. */ + } + while (fsc->tok.type == STIX_FSC_TOK_KEYWORD); + + pos = __add_symbol_literal (fsc, name.buffer, name.size); + if (pos == -1) + { + stix_name_close (&name); + return -1; + } + + n = (is_super)? emit_send_to_super(fsc,nargs,pos): + emit_send_to_self(fsc,nargs,pos); + if (n == -1) { + stix_name_close (&name); + return -1; + } + + stix_name_close (&name); + return 0; +} + +static int parse_binary_message (stix_t* fsc, int is_super) +{ + /* + * ::= binarySelector + * ::= * + */ + stix_word_t pos; + int is_super2; + int n; + + if (parse_unary_message (fsc, is_super) == -1) return -1; + + while (fsc->tok.type == STIX_FSC_TOK_BINSEL) + { + stix_char_t* op = stix_tok_yield (&fsc->tok, 0); + if (op == STIX_NULL) { + fsc->errnum = STIX_FSC_ERROR_MEMORY; + return -1; + } + + GET_TOKEN (fsc); + if (parse_primary(fsc, STIX_NULL, &is_super2) == -1) { + stix_free (op); + return -1; + } + + if (parse_unary_message(fsc, is_super2) == -1) { + stix_free (op); + return -1; + } + + pos = __add_symbol_literal (fsc, op, stix_strlen(op)); + if (pos == -1) { + stix_free (op); + return -1; + } + + n = (is_super)? + emit_send_to_super(fsc,2,pos): + emit_send_to_self(fsc,2,pos); + if (n == -1) { + stix_free (op); + return -1; + } + + stix_free (op); + } + + return 0; +} + +static int parse_unary_message (stix_t* fsc, int is_super) +{ + /* ::= unarySelector */ + + stix_word_t pos; + int n; + + while (fsc->tok.type == STIX_FSC_TOK_IDENT) + { + pos = __add_symbol_literal (fsc, + fsc->tok.name.buffer, fsc->tok.name.size); + if (pos == -1) return -1; + + n = (is_super)? emit_send_to_super (fsc, 0, pos): + emit_send_to_self (fsc, 0, pos); + if (n == -1) return -1; + + GET_TOKEN (fsc); + } + + return 0; +} + +static int parse_method (stix_t* fsc, stix_word_t method_class, void* input) +{ + /* + * ::= + * [] [] [] + */ + + GET_CHAR (fsc); + GET_TOKEN (fsc); + + stix_name_clear (&fsc->method_name); + stix_arr_clear (&fsc->bcd); + + while (fsc->met.tmpr.count > 0) + { + stix_free (fsc->met.tmpr.names[--fsc->met.tmpr.count]); + } + fsc->met.tmpr.nargs = 0; + fsc->literal_count = 0; + + if (parse_method_name_pattern(fsc) <= -1 || + parse_method_temporaries(fsc) <= -1 || + parse_method_primitive(fsc) <= -1 || + parse_statements(fsc) <= -1 || + finish_method (fsc) <= -1) return -1; + + return 0; +} + +#endif + +static int get_class_type (const stix_char_t* str, class_type_t* type) +{ + static struct + { + stix_char_t* word; + class_type_t type; + } tab[] = + { + { STIX_T("class"), CLASS_NORMAL }, + { STIX_T("variableClass"), CLASS_VARIABLE }, + { STIX_T("variableByteClass"), CLASS_VARIABLE_BYTE }, + { STIX_T("variableCharClass"), CLASS_VARIABLE_CHAR } + }; + + int i; + + for (i = 0; i < STIX_COUNTOF(tab); i++) + { + if (stix_strequal(str, tab[i].word)) + { + *type = tab[i].type; + return 0; + } + } + + return -1; +} + +static int get_vardef_type (const stix_char_t* str, vardef_type_t* type) +{ + static struct + { + stix_char_t* word; + class_type_t type; + } tab[] = + { + { STIX_T("|-"), VARDEF_INSTANCE }, + { STIX_T("|+"), VARDEF_CLASS_INSTANCE }, + { STIX_T("|*"), VARDEF_CLASS } + /* TODO: shared pools */ + }; + + int i; + + for (i = 0; i < STIX_COUNTOF(tab); i++) + { + if (stix_strequal(str, tab[i].word)) + { + *type = tab[i].type; + return 0; + } + } + + return -1; +} + +static int compile_vardef (stix_t* fsc, vardef_type_t vardef_type) +{ + return -1; +} + +static int parse_unary_pattern (stix_t* fsc) +{ + STIX_ASSERT (fsc->met.name.len == 0); + STIX_ASSERT (fsc->met.tmpr.nargs == 0); + +/* TODO: check if the method name exists */ + + if (fsc->tok.name.len >= STIX_COUNTOF(fsc->met.name.buf)) + { + stix_seterror (fsc, STIX_FSC_EMETNTL, &fsc->tok.name, &fsc->tok.loc); + return -1; + } + + /* collect the method name */ + fsc->met.name.len = stix_strcpy (fsc->met.name.buf, fsc->tok.name.ptr); + GET_TOKEN (fsc); + return 0; +} + +static int parse_binary_pattern (stix_t* fsc) +{ + STIX_ASSERT (fsc->met.name.len == 0); + STIX_ASSERT (fsc->met.tmpr.nargs == 0); + +/* TODO: check if the method name exists */ + if (fsc->tok.name.len >= STIX_COUNTOF(fsc->met.name.buf)) + { + stix_seterror (fsc, STIX_FSC_EMETNTL, &fsc->tok.name, &fsc->tok.loc); + return -1; + } + + /* collect the method name */ + fsc->met.name.len = stix_strcpy (fsc->met.name.buf, fsc->tok.name.ptr); + GET_TOKEN (fsc); + + /* collect the argument name */ + if (fsc->tok.type != STIX_FSC_TOK_IDENT) + { + stix_seterror (fsc, STIX_FSC_EILARGN, &fsc->tok.name, &fsc->tok.loc); + return -1; + } + + STIX_ASSERT (fsc->met.tmpr.nargs == 0); + /* + * check if there are too many arguments defined. + * however, in this function, this condition will never be met. + * so let me just comment out the check. + * + if (fsc->met.tmpr.nargs >= STIX_COUNTOF(fsc->met.tmpr.names)) + { + stix_seterror (fsc, STIX_FSC_ETMARGS, &fsc->tok.name, &fsc->tok.loc); + return -1; + } + */ + +/* TODO: check for duplicate entries...in instvars */ + + if (fsc->tok.name.len >= STIX_COUNTOF(fsc->met.tmpr.names[fsc->met.tmpr.count])) + { + /* argument name is too long */ + stix_seterror (fsc, STIX_FSC_EARGNTL, &fsc->tok.name, &fsc->tok.loc); + return -1; + } + stix_strcpy (fsc->met.tmpr.names[fsc->met.tmpr.nargs], fsc->tok.name.ptr); + fsc->met.tmpr.nargs++; + + GET_TOKEN (fsc); + return 0; +} + +static int parse_keyword_pattern (stix_t* fsc) +{ + STIX_ASSERT (fsc->met.name.len == 0); + STIX_ASSERT (fsc->met.tmpr.nargs == 0); + + do + { + if (fsc->tok.name.len + fsc->met.name.len >= STIX_COUNTOF(fsc->met.name.buf)) + { + stix_seterror (fsc, STIX_FSC_EMETNTL, &fsc->tok.name, &fsc->tok.loc); + return -1; + } + fsc->met.name.len += stix_strcpy (&fsc->met.name.buf[fsc->met.name.len], fsc->tok.name.ptr); + + GET_TOKEN (fsc); + if (fsc->tok.type != STIX_FSC_TOK_IDENT || is_tok_pseudovar(fsc)) + { + stix_seterror (fsc, STIX_FSC_EILARGN, &fsc->tok.name, &fsc->tok.loc); + return -1; + } + + if (fsc->met.tmpr.nargs >= STIX_COUNTOF(fsc->met.tmpr.names)) + { + /* too many arguments */ + stix_seterror (fsc, STIX_FSC_ETMARGS, &fsc->tok.name, &fsc->tok.loc); + return -1; + } + +/* TODO: check for duplicate entries...in instvars/arguments */ + if (fsc->tok.name.len >= STIX_COUNTOF(fsc->met.tmpr.names[fsc->met.tmpr.nargs])) + { + /* argument name is too long */ + stix_seterror (fsc, STIX_FSC_EARGNTL, &fsc->tok.name, &fsc->tok.loc); + return -1; + } + stix_strcpy (fsc->met.tmpr.names[fsc->met.tmpr.nargs], fsc->tok.name.ptr); + fsc->met.tmpr.nargs++; + + GET_TOKEN (fsc); + } + while (fsc->tok.type == STIX_FSC_TOK_KEYWORD); + + /* TODO: check if the method name exists */ + /* if it exists, collapse arguments */ + + return 0; +} + +static int parse_method_name_pattern (stix_t* fsc) +{ + /* + * ::= | | + * ::= unarySelector + * ::= binarySelector + * ::= (keyword )+ + */ + int n; + + STIX_ASSERT (fsc->met.tmpr.count == 0); + + switch (fsc->tok.type) + { + case STIX_FSC_TOK_IDENT: + n = parse_unary_pattern (fsc); + break; + + case STIX_FSC_TOK_BINSEL: + n = parse_binary_pattern (fsc); + break; + + case STIX_FSC_TOK_KEYWORD: + n = parse_keyword_pattern (fsc); + break; + + default: + stix_seterror (fsc, STIX_FSC_EILMETN, &fsc->tok.name, &fsc->tok.loc); + n = -1; + } + + /* the total number of temporaries is equal to the number of arguments + * after having processed the message pattern */ + fsc->met.tmpr.count = fsc->met.tmpr.nargs; + return n; +} + +static int parse_method_temporaries (stix_t* fsc) +{ + /* + * ::= '|' '|' + * ::= identifier* + */ + + if (!is_tok_binsel (fsc, STIX_T("|"))) return 0; + + GET_TOKEN (fsc); + while (fsc->tok.type == STIX_FSC_TOK_IDENT) + { + if (fsc->met.tmpr.count >= STIX_COUNTOF(fsc->met.tmpr.names)) + { + stix_seterror (fsc, STIX_FSC_ETMTMPS, &fsc->tok.name, &fsc->tok.loc); + return -1; + } + + if (is_tok_pseudovar(fsc)) + { + stix_seterror (fsc, STIX_FSC_EILTMPN, &fsc->tok.name, &fsc->tok.loc); + return -1; + } + +/* TODO: check for duplicate entries...in instvars/arguments/temporaries */ + + if (fsc->tok.name.len >= STIX_COUNTOF(fsc->met.tmpr.names[fsc->met.tmpr.count])) + { + /* temporary variable name is too long */ + stix_seterror (fsc, STIX_FSC_ETMPNTL, &fsc->tok.name, &fsc->tok.loc); + return -1; + } + stix_strcpy (fsc->met.tmpr.names[fsc->met.tmpr.count], fsc->tok.name.ptr); + fsc->met.tmpr.count++; + + GET_TOKEN (fsc); + } + + if (!is_tok_binsel (fsc, STIX_T("|"))) return 0; + { + stix_seterror (fsc, STIX_FSC_EVRTBAR, &fsc->tok.name, &fsc->tok.loc); + return -1; + } + + GET_TOKEN (fsc); + return 0; +} + +static int parse_method_primitive (stix_t* fsc) +{ + /* + * ::= '<' 'primitive:' number '>' + */ + + int prim_no; + + if (!is_tok_binsel (fsc, STIX_T("<"))) return 0; + + GET_TOKEN (fsc); + if (fsc->tok.type != STIX_FSC_TOK_KEYWORD || + !stix_strequal (fsc->tok.name.ptr, STIX_T("primitive:"))) + { + fsc->errnum = STIX_FSC_ERROR_PRIMITIVE_KEYWORD; + return -1; + } + + GET_TOKEN (fsc); /* TODO: only integer */ + if (fsc->tok.type != STIX_FSC_TOK_NUMLIT) + { + fsc->errnum = STIX_FSC_ERROR_PRIMITIVE_NUMBER; + return -1; + } + +/*TODO: more checks the validity of the primitive number */ + if (!stix_stristype(fsc->tok.name.buffer, stix_isdigit)) + { + fsc->errnum = STIX_FSC_ERROR_PRIMITIVE_NUMBER; + return -1; + } + + STIX_STRTOI (prim_no, fsc->tok.name.buffer, STIX_NULL, 10); + if (prim_no < 0 || prim_no > 0xFF) + { + fsc->errnum = STIX_FSC_ERROR_PRIMITIVE_NUMBER_RANGE; + return -1; + } + + EMIT_DO_PRIMITIVE (fsc, prim_no); + + GET_TOKEN (fsc); + if (!is_tok_binsel (fsc, STIX_T(">"))) return 0; + { + stix_seterror (fsc, STIX_FSC_ERABRCK, &fsc->tok.name, &fsc->tok.loc); + return -1; + } + + GET_TOKEN (fsc); + return 0; +} + + +static int compile_method (stix_t* fsc, int instance) +{ + /* clear data required to compile a method */ + STIX_MEMSET (&fsc->met, 0, STIX_SIZEOF(fsc->met)); + +#if 0 + /* clear the byte-code buffer */ + fsc->bcd.len = 0; +#endif + + if (parse_method_name_pattern (fsc) <= -1) return -1; + + if (fsc->tok.type != STIX_FSC_TOK_LBRACE) + { + /* { expected */ + stix_seterror (fsc, STIX_FSC_ELBRACE, &fsc->tok.name, &fsc->tok.loc); + return -1; + } + GET_TOKEN (fsc); + + if (parse_method_temporaries (fsc) <= -1 || + parse_method_primitive (fsc) <= -1 /*|| + parse_statements (fsc) <= -1 || + finish_method (fsc) <= -1*/) return -1; + + if (fsc->tok.type != STIX_FSC_TOK_RBRACE) + { + /* } expected */ + stix_seterror (fsc, STIX_FSC_ERBRACE, &fsc->tok.name, &fsc->tok.loc); + return -1; + } + GET_TOKEN (fsc); + +wprintf (L"METHOD NAME ==> [%S] temporaries => %d\n", fsc->met.name.buf, fsc->met.tmpr.count); + return 0; +} + +static int compile_classdef (stix_t* fsc, class_type_t class_type) +{ + stix_oop_t oop1, oop2; + int extend; + + if (fsc->tok.type != STIX_FSC_TOK_IDENT) + { + /* class name expected. */ + stix_seterror (fsc, STIX_FSC_ECLSNAM, &fsc->tok.name, &fsc->tok.loc); + return -1; + } + + if (stix_vm_findclass (fsc->vm, fsc->tok.name.ptr, &oop1) <= -1) + { + /* this class is a new class. you can only extend an existing class */ + GET_TOKEN (fsc); + if (fsc->tok.type == STIX_FSC_TOK_IDENT && + stix_strequal (fsc->tok.name.ptr, STIX_T("extend"))) + { + GET_TOKEN (fsc); + if (fsc->tok.type != STIX_FSC_TOK_IDENT) + { + stix_seterror (fsc, STIX_FSC_ECLSNAM, &fsc->tok.name, &fsc->tok.loc); + return -1; + } + + if (stix_vm_findclass (fsc->vm, fsc->tok.name.ptr, &oop2) <= -1) + { + stix_seterror (fsc, STIX_FSC_EILBCLS, &fsc->tok.name, &fsc->tok.loc); + return -1; + } + } + else + { + stix_seterror (fsc, STIX_FSC_EEXTEND, &fsc->tok.name, &fsc->tok.loc); + return -1; + } + + extend = 1; + } + else + { +/* TODO: check the existing class layout againt class_type (variableByteClass, variableCharClass, class, etc). + * if no match, return -1 */ + extend = 0; + } + + GET_TOKEN (fsc); + if (fsc->tok.type != STIX_FSC_TOK_LBRACE) + { + /* { expected */ + stix_seterror (fsc, STIX_FSC_ELBRACE, &fsc->tok.name, &fsc->tok.loc); + return -1; + } + + GET_TOKEN (fsc); + + if (!extend) + { + while (1) + { + vardef_type_t vardef_type; + + /* TODO: compile other components including various pragma statements + * + * + */ + if (fsc->tok.type == STIX_FSC_TOK_BINSEL && + get_vardef_type (fsc->tok.name.ptr, &vardef_type) >= 0) + { + if (compile_vardef (fsc, vardef_type) <= -1) return -1; + } + else + { + break; + } + } + } + + while (1) + { + /* TODO: compile other components including various pragma statements + * + * + */ + if (is_tok_binsel (fsc, STIX_T("-"))) + { + GET_TOKEN (fsc); + if (compile_method (fsc, 1) <= -1) return -1; + } + else if (is_tok_binsel (fsc, STIX_T("+"))) + { + GET_TOKEN (fsc); + if (compile_method (fsc, 0) <= -1) return -1; + } + else + { + break; + } + } + + if (fsc->tok.type != STIX_FSC_TOK_RBRACE) + { + /* TODO: } expected */ + stix_seterror (fsc, STIX_FSC_ERBRACE, &fsc->tok.name, &fsc->tok.loc); + return -1; + } + + GET_TOKEN (fsc); + return 0; +} + + +static int compile_directive (stix_t* fsc) +{ + if (fsc->tok.type == STIX_FSC_TOK_IDENT) + { + class_type_t class_type; + + if (get_class_type (fsc->tok.name.ptr, &class_type) >= 0) + { + if (get_token (fsc) <= -1) return -1; + return compile_classdef (fsc, class_type); + } + else if (stix_strequal (fsc->tok.name.ptr, STIX_T("include"))) + { + if (get_token (fsc) <= -1) return -1; + + if (fsc->tok.type != STIX_FSC_TOK_STRLIT) + { + stix_seterror (fsc, STIX_FSC_ESTRLIT, &fsc->tok.name, &fsc->tok.loc); + return -1; + } + + if (begin_include (fsc) <= -1) return -1; + } + else + { + stix_seterror (fsc, STIX_FSC_EILDIR, &fsc->tok.name, &fsc->tok.loc); + return -1; + } + } + else + { + stix_seterror (fsc, STIX_FSC_EILDIR, &fsc->tok.name, &fsc->tok.loc); + return -1; + } + + return 0; +} + +static int compile_stream (stix_t* fsc) +{ + GET_CHAR (fsc); + GET_TOKEN (fsc); + + while (fsc->tok.type != STIX_FSC_TOK_EOF) + { + if (is_tok_binsel (fsc, STIX_T("@"))) + { + GET_TOKEN (fsc); + if (compile_directive (fsc) <= -1) return -1; + } + /* TODO: normal smalltalk message sending expressions */ + else + { + stix_seterror (fsc, STIX_FSC_EILTTOK, &fsc->tok.name, &fsc->tok.loc); + return -1; + } + } + + return 0; +} +#endif + +int stix_compile (stix_t* stix, stix_ioimpl_t io) +{ + int n; + + if (!io) + { + stix->errnum = STIX_EINVAL; + return -1; + } + + STIX_ASSERT (stix->c == STIX_NULL); + + stix->c = stix_callocmem (stix, STIX_SIZEOF(*stix->c)); + if (!stix->c) return -1; + stix->c->impl = io; + stix->c->arg.line = 1; + stix->c->arg.colm = 1; + stix->c->curinp = &stix->c->arg; + clear_sio_names (stix); + + /* open the top-level stream */ + n = stix->c->impl (stix, STIX_IO_OPEN, stix->c->curinp); + if (n <= -1) return -1; + + if (compile_stream (stix) <= -1) goto oops; + + /* close the stream */ + STIX_ASSERT (stix->c->curinp == &stix->c->arg); + stix->c->impl (stix, STIX_IO_CLOSE, stix->c->curinp); + + stix_freemem (stix, stix->c); + stix->c = STIX_NULL; + return 0; + +oops: + /* an error occurred and control has reached here + * probably, some included files might not have been + * closed. close them */ + while (stix->c->curinp != &stix->c->arg) + { + stix_ioarg_t* prev; + + /* nothing much to do about a close error */ + stix->c->impl (stix, STIX_IO_CLOSE, stix->c->curinp); + + prev = stix->c->curinp->includer; + STIX_ASSERT (stix->c->curinp->name != STIX_NULL); + STIX_MMGR_FREE (stix->mmgr, stix->c->curinp); + stix->c->curinp = prev; + } + + stix->c->impl (stix, STIX_IO_CLOSE, stix->c->curinp); + + stix_freemem (stix, stix->c); + stix->c = STIX_NULL; + return -1; +} + + + diff --git a/stix/lib/dic.c b/stix/lib/dic.c index fc02c1f..d0c30dc 100644 --- a/stix/lib/dic.c +++ b/stix/lib/dic.c @@ -74,13 +74,6 @@ static stix_oop_t find_or_insert (stix_t* stix, stix_oop_char_t key, stix_oop_t index = stix_hashchars(key->slot, STIX_OBJ_GET_SIZE(key)) % STIX_OBJ_GET_SIZE(stix->sysdic->bucket); -{ -int i; -printf ("FINDING IN SYSDIC ["); -for (i = 0; i < STIX_OBJ_GET_SIZE(key); i++) printf ("%c", key->slot[i]); -printf ("]\n"); -} - while (stix->sysdic->bucket->slot[index] != stix->_nil) { ass = (stix_oop_association_t)stix->sysdic->bucket->slot[index]; @@ -99,11 +92,9 @@ printf ("]\n"); if (value == STIX_NULL) { - /* + /* when value is STIX_NULL, perform no insertion */ stix->errnum = STIX_ENOENT; return STIX_NULL; - */ - return stix->_nil; } stix_pushtmp (stix, (stix_oop_t*)&key); tmp_count++; diff --git a/stix/lib/ignite.c b/stix/lib/ignite.c index 2cbf3da..059ea7a 100644 --- a/stix/lib/ignite.c +++ b/stix/lib/ignite.c @@ -204,14 +204,15 @@ static int ignite_3 (stix_t* stix) stix_oop_t* stix_ptr; stix_ptr = &stix->_stix; + /* The loop here repies on the proper order of fields in stix_t. + * Be sure to keep in sync the order of items in symnames and + * the releated fields of stix_t */ for (i = 0; i < STIX_COUNTOF(symnames); i++) { sym = stix_makesymbol (stix, symnames[i].str, symnames[i].len); - //sym = stix_makesymbol (stix, symnames[0].str, symnames[0].len); if (!sym) return -1; if (!stix_putatsysdic (stix, sym, *stix_ptr)) return -1; - stix_ptr++; } diff --git a/stix/lib/main.c b/stix/lib/main.c index 23321b2..1f1866b 100644 --- a/stix/lib/main.c +++ b/stix/lib/main.c @@ -29,6 +29,14 @@ #include #include + +typedef struct xtn_t xtn_t; +struct xtn_t +{ + char source_path[1024]; +}; + + static void* sys_alloc (stix_mmgr_t* mmgr, stix_size_t size) { return malloc (size); @@ -52,6 +60,69 @@ static stix_mmgr_t sys_mmgr = STIX_NULL }; + +static STIX_INLINE stix_oow_t open_input (stix_t* stix, stix_ioarg_t* arg) +{ + if (arg->includer) + { + /* includee */ + xtn_t* xtn = stix_getxtn(stix); + + arg->handle = fopen (xtn->source_path, "r"); + if (!arg->handle) + { + stix_seterrnum (stix, STIX_EIOERR); + return -1; + } + } + else + { + /* main stream */ + /*char tmp[PATH_MAX];*/ + } +} + + +static STIX_INLINE stix_oow_t read_input (stix_t* stix, stix_ioarg_t* arg) +{ + STIX_ASSERT (arg->handle != STIX_NULL); + if (fread (arg->buf, STIX_SIZEOF(arg->buf[0]), STIX_COUNTOF(arg->buf), arg->handle) == 0) + { + if (ferror(arg->handle)) + { + } + } + + return 0; +} + +static STIX_INLINE stix_oow_t close_input (stix_t* stix, stix_ioarg_t* arg) +{ + STIX_ASSERT (arg->handle != STIX_NULL); + fclose (arg->handle); + return 0; +} +/* TODO: IMPLEMENT PROPER INPUT HANDLER */ + +static stix_oow_t input_handler (stix_t* stix, stix_iocmd_t cmd, stix_ioarg_t* arg) +{ + switch (cmd) + { + case STIX_IO_OPEN: + return open_input (stix, arg); + + case STIX_IO_CLOSE: + return close_input (stix, arg); + + case STIX_IO_READ: + return read_input (stix, arg); + + default: + stix->errnum = STIX_EINTERN; + return -1; + } +} + static void dump_symbol_table (stix_t* stix) { stix_oow_t i, j; @@ -95,7 +166,7 @@ int main (int argc, char* argv[]) (unsigned long int)STIX_CLASS_SPEC_INDEXED_TYPE(x)); } - stix = stix_open (&sys_mmgr, 0, 512000lu, STIX_NULL); + stix = stix_open (&sys_mmgr, STIX_SIZEOF(xtn_t), 512000lu, STIX_NULL); if (!stix) { printf ("cannot open stix\n"); @@ -108,14 +179,13 @@ int main (int argc, char* argv[]) stix_setoption (stix, STIX_DFL_SYSDIC_SIZE, &symtab_size); } - if (stix_ignite(stix) <= -1) + if (stix_ignite (stix) <= -1) { printf ("cannot ignite stix\n"); stix_close (stix); return -1; } - { stix_char_t x[] = { 'S', 't', 'r', 'i', 'n', 'g', '\0' }; stix_char_t y[] = { 'S', 'y', 'm', 'b', 'o', 'l', '\0' }; @@ -135,6 +205,15 @@ a = stix_findsymbol (stix, x, 6); printf ("%p\n", a); dump_symbol_table (stix); } + + + if (stix_compile (stix, input_handler) <= -1) + { + printf ("cannot compile code\n"); + stix_close (stix); + return -1; + } + stix_close (stix); return 0; diff --git a/stix/lib/memo.txt b/stix/lib/memo.txt new file mode 100644 index 0000000..6abbd97 --- /dev/null +++ b/stix/lib/memo.txt @@ -0,0 +1,620 @@ + +/* + * Multi-Process within a single threaded-process. + * How to embed in a single threaded web server + * +* +* stix_exec +* VM(shceduler) ---> Context1(obj1,method1) +* ---> Context2(obj2,method2) +* ---> Context3(obj3,method3) +* +* all functions must be asynchronous +* blocking functions will block scheduler. + */ + + + + + +class Stix::Stix # Namespace name is indicated by :: +{ +} + +class Stix::Array +{ + + makeSymbol: aString + { + | s | + + s := Symbol new: aString. # Symbol belongs to the Stix namespace. + ^s. + } +} + + +A name space is stored in the namespace table of Stix.Namespaces. + + +Stix.Symbols - symbols are global. not affected by namespaces. +Stix.Sysdict - + (#QSE => Namespace( Another Sysdict)) + ( + +class Stix::Namespace +{ +} + +class Stix::Class +{ +} + +Stix.Namespaces is a system dictionary + +class QSE::Array +{ +} + +class QSE::Tiara::Array +{ + +} + + +Stix.Namespaces -> 'QSE' + 'QSE::Tiara' + + + +------------------------------------------------------------ + +ARRAY CONSTANT TO ALLOW DYNAMIC VALUES. + +#( ...... ) array literal +in original smalltalk, a block can't be placed inside the array literal + arrayConstant := '#' array + array := "(" { number | string | symbol | array | characterConstant }* ")". +So #(1 2 [^20]) is illegal. + +if a block is there, treat it as a valid stix expression and evaluate it. + +#(1 2 [1 + 2] 5) +t = Array new: 4. +t at: 1 put: 1. +t at: 2 put: 2. +t at: 3 put: (1 + 2). +t at: 4 put: 5. + +Evaluate the expressions in the array first +Create an array +Put the right element. + +----------------------------------------------- +command line + +libstix.a + +stix stix.im Class1.st Class2.st Main.st Main + --> load the image, compile Class1.st, compile Class2.st compile Main.st + --> + + +stix stix.im + --> load the image + +------------------------------------------------------------------------- + +#!/usr/bin/stix + +################################### +## Main.st +################################### + +#include 'Class1.st' +#include 'Class2.st' + +#class(#byte) Association(Magnitude) +{ + declare a, b, c. + declare(#class_instance) x. + declare(#class) MAX_SIZE. + + function(#class) initialize + { + MAX_SIZE := 20. + + true whileTrue: [ + Stdout print: 10. + ]. + } + + function(#class) new: anInteger + { + Stix error: 'invalid message'. + } +} + +#main + | a | + + a := Class1 new. + Stdout format: #( 1 2 [a toInteger] ) with: '%.5d %.6d\n'. + ^0. + +------------------------------------------------------------------------- +The statements after the #main directives are compiled as a class method of Stix. +That is, 'Stix::main'. It becomes the entry point. + +------------------------------------------------------------------------- + +If no #main directive is found, there is no official entry point. +However, if you have the initialize class method, it's invoked when a class +is compiled, the statement in the class is executed before #main. +if the statement creates a certain loop, it can act as a entry point as well. + +-------------------------------------------------------------------------- + +Top level directive +#main, #class, #include, + +#include is avaialble everywheren. It doesn't have to be in the top level. +Do i need #import? + + +--------------------------------------------------------------------------- + +if there are multiple #main, do i need to concatenate all? +or disallow only 1 #main?? + +--------------------------------------------------------------------------- + +#namespace directive? + +#namespace Stix::Compiler + +naming convention for multiple ?? . conflicts with the statement terminator. +:: is ok a single : is used for various purpose but more than 1 is illegal in smalltalk. +so use :: as a namespace separator. + + +Relative naming and absoluate naming? + Stix::Compiler <- is Stix the absolute top or a subname class under the current space? + ::Stix::Compiler <- i don't like this + +---------------------------------------------------------------------------- + + + + + + + + + + + + + + + + + + + + + + + + + + +" + Stix + Class + NilObject + Object + NilObject + Collection + IndexedCollection + FixedSizedCollection + Array + ByteArray + String + Symbol + Set + Dictionary + SystemDictionary + SymbolSet + Magnitude + Association + Character + Number + Integer + SmallInteger + LargeInteger +" + + +class Stix +{ + + + + alloc + { + + } + + + new + { + ^self alloc init. + } + + - init + { + ^self. + } + + - finalize + { + } + + + findClass: aString + { + + | a b c | + } + + +} + +class Class extends Stix +{ +} + +class NilObject extends Stix +{ +} + +class Object extends Stix +{ +} + + + +----------------------------------------- +class variable +and class instance variable +----------------------------------------- + +A CV X Y + CIV x y + +B CV Z + civ z + +C civ q + + +A: getX + return x (return instance variable 1) + +B getX + return A'X (return invance variable 3) + +x is index 1. +y is index 2. +z is index 3. +X is index 3 of A. +Y is index 3 of A. +Z is index 2 of B. +q is index 4 of C. + + +A has x y X Y +B has x y z Z +C has x y z q + +place class intance variables before class variables. + +------------------------------------------- + + +class Magnitude extends Stix +{ +} + +%include 'Association.st' + +%class Association(Magnitude) +{ +%category(Association class) + +%constant + ABC := XXX + BCD := KKK + TTT := 20 + +%self(private) + + +%self(instance creation) + + | Key Value | "class variables" <--- index + | xxx yyy | "class instance variables" <--- index + + key: aKey + { + ^self key: aKey value: nil. + } + + key: aKey value: aValue + { + | ass | + ass := self new. + ass key: aKey value: aValue. + ^ass. + } + +%instance(initialization) + | key value | "instance variables" + + key: aKey value: aValue + { + key := aKey. + value := aValue. + } + + key + { + ^key + } + + value + { + ^value + } + + value: value + { + self->value := aValue + } + + = anAssociation + { + ^self->key = anAssociation key. + } + + hash + { + ^self->key hash + } +} + + + +"Creates a new class Association inheriting nil" +%class Association(nil) +{ + %func more + { + ^self + } +} + + +"Extends an existing Association class" +%class Association +{ +} + + +class Character extends Magnitude +{ +} + +class Number extends Magnitude +{ +} + +class Integer extends Number +{ +} + +class SmallInteger extends Integer +{ +} + +class LargeInteger extends Integer +{ + +} + + + +Association +{ +%class + | x y z | + + value: xxx + { + } +} + +Association: Magnitude +{ +} + +Association: <- for extending +{ +} + +Association: +{ +} + +Association key: xxx +{ +} + +Association key: xxx +{ +} + + + + +---------------------------------------------------------------- +class ByteArray(FixedSizeCollection): #byte +{ + fun at: anIndex put: aValue + { + ^self basicAt: anIndex put: aValue. + } +} + +class(#byte) ByteArray(FixedSizedCollection) +{ +} + +class(#byte) ByteArray(Stix) +{ +} + +class Association(Magnitude) -> new Association inheriting Magnitude +class Association() -> new Association inheriting Stix +class(#byte) Association() -> new Association class inheriting Stix, but it's byte indexed. +class(#word) Association() -> new Association class inheriting Stix, but it's word indexed. +class(#oop) Association() -> new Association class inheriting Stix, but it's oop indexed. (it can have the variable part on top of the fixed part. response to the 'new: aSize' message) +class(#word) Association(Magnitude) -> new Association class inheriting Magnitude, but it's word indexed. + +class Association -> revisit the Association class defined previsously. Revisiting can add new methods. + +#include 'Magnitude.st' + +#class(#byte) Association(Magnitude) +{ +## class variables can be accessed by class methods and instance methods. +## methods of subclasses can also access them. + declare(#class) a b c. + +## class instance variable can be accessed inside the class method only. + + declare(#class_instance) d, e, f + +## All instance variables are protected by default. + declare key, value. + + +## +## declare(#class) a, b, c. ## class variables +## declare(#class_instance) a, b, c. ## class instance variables +## declare(#instance) a, b, c. ## isntance variables +## declare a,b, c. ## instance variables + +## function(#class) ## class method +## function(#instance) ## instance method +## function ## instance method + +## var and fun are not keywords. they can be a method name or a variable name. +## Casing is not used to differentiate variable kinds like global local temporary etc. + +## other modifiers (EXPERIMENTAL. JUST THINKING). +## declare(#class,#public,#rw) x. x can be accessed by other classes in read-write mode. +## function(#private) xxx xxx is a private method +## function(#class,#private) xxx xxx is private class method. + + function(#class) initialize + { + ## This is the initilizer for the class object. + ## executed when this class is added to the system. + ## initialize the class variables and class instance variables. + SIZE := 20. + } + + function(#class) key: aKey + { + ^self key: aKey value: nil. + } + + function(#class) key: aKey value: aValue + { + | ass | + ass := self new. + ass key: aKey value: aValue. + ^ass. + } + + function key: aKey value: aValue + { + key := aKey. + value := aValue. + } + + function key + { + ^key + } + + function value + { + ^value + } + + function value: value + { + self->value := aValue + } + + function = anAssociation + { + ^self->key = anAssociation key. + } + + function hash + { + ^self->key hash + } + + function value: aBlock + { + |a | + + a := [ :t1 | t1 value ] with: 10. + ^a + 10. + } +} + + +; message cascading +. steatement terminator or flaoting point if in number and followed by a digit. +^ return +[ ] block +# symbol or array +() grouping +$ character constant +| temporary variable or end of block arguments. + +"" comment +'' string +: at the of the keyword or before block argument name. + +------------------- +avaialbel +' ! + +-------------------------------------------------- +#! for comment +## for comment +----------------------------- + +@ binarySelector for coordianate number @ number. + +---------------------------------------------------------------------------- + +Single line comment +## comment text +#! comment text (easy handling to skip hash bang) + +Multi-line comments - double quoted as in smalltalk +" comment text " diff --git a/stix/lib/stix-prv.h b/stix/lib/stix-prv.h index a2ae4e7..f9c4b32 100644 --- a/stix/lib/stix-prv.h +++ b/stix/lib/stix-prv.h @@ -122,6 +122,96 @@ */ #define STIX_MAX_INDEXED_INSTVARS(named_instvar) ((~(stix_oow_t)0) - named_instvar) + +#if defined(STIX_INCLUDE_COMPILER) + +/* ========================================================================= */ +/* SOURCE CODE I/O FOR COMPILER */ +/* ========================================================================= */ + +enum stix_iocmd_t +{ + STIX_IO_OPEN, + STIX_IO_CLOSE, + STIX_IO_READ +}; +typedef enum stix_iocmd_t stix_iocmd_t; + +typedef struct stix_iolxc_t stix_iolxc_t; +struct stix_iolxc_t +{ + stix_char_t c; /**< character */ + unsigned long line; /**< line */ + unsigned long colm; /**< column */ + const stix_char_t* file; /**< file specified in #include */ +}; + +enum stix_ioarg_flag_t +{ + STIX_IO_INCLUDED = (1 << 0) +}; +typedef enum stix_ioarg_flag_t stix_ioarg_flag_t; + +typedef struct stix_ioarg_t stix_ioarg_t; +struct stix_ioarg_t +{ + /** + * [IN] I/O object name. + * It is #STIX_NULL for the main stream and points to a non-NULL string + * for an included stream. + */ + const stix_char_t* name; + + /** + * [OUT] I/O handle set by a handler. + * The source stream handler can set this field when it opens a stream. + * All subsequent operations on the stream see this field as set + * during opening. + */ + void* handle; + + /** + * [OUT] place data here + */ + stix_char_t buf[1024]; + + /** + * [IN] points to the data of the includer. It is #STIX_NULL for the + * main stream. + */ + stix_ioarg_t* includer; + + /*-----------------------------------------------------------------*/ + /*----------- from here down, internal use only -------------------*/ + struct + { + int pos, len; + } b; + + stix_oow_t line; + stix_oow_t colm; + + stix_iolxc_t lxc; + /*-----------------------------------------------------------------*/ +}; + +typedef stix_oow_t (*stix_ioimpl_t) ( + stix_t* stix, + stix_iocmd_t cmd, + stix_ioarg_t* arg +); + +struct stix_compiler_t +{ + stix_ioimpl_t impl; /* input handler */ + stix_iolxc_t lxc; + stix_ioarg_t arg; /* static top-level data */ + stix_ioarg_t* curinp; /* pointer to the current data */ +}; + +#endif + + #if defined(__cplusplus) extern "C" { #endif @@ -244,6 +334,16 @@ stix_oop_t stix_getatsysdic ( stix_oop_t key ); + + +/* ========================================================================= */ +/* comp.c */ +/* ========================================================================= */ +int stix_compile ( + stix_t* stix, + stix_ioimpl_t io +); + #if defined(__cplusplus) } #endif diff --git a/stix/lib/stix.c b/stix/lib/stix.c index b6b0aa5..4c48bcb 100644 --- a/stix/lib/stix.c +++ b/stix/lib/stix.c @@ -84,6 +84,27 @@ void stix_fini (stix_t* stix) stix_killheap (stix, stix->permheap); } +stix_mmgr_t* stix_getmmgr (stix_t* stix) +{ + return stix->mmgr; +} + +void* stix_getxtn (stix_t* stix) +{ + return (void*)(stix + 1); +} + + +stix_errnum_t stix_geterrnum (stix_t* stix) +{ + return stix->errnum; +} + +void stix_seterrnum (stix_t* stix, stix_errnum_t errnum) +{ + stix->errnum = errnum; +} + int stix_setoption (stix_t* stix, stix_option_t id, const void* value) { switch (id) @@ -151,3 +172,27 @@ int stix_equalchars (const stix_char_t* str1, const stix_char_t* str2, stix_oow_ return 1; } + +void* stix_allocmem (stix_t* stix, stix_size_t size) +{ + void* ptr; + + ptr = STIX_MMGR_ALLOC (stix->mmgr, size); + if (ptr == STIX_NULL) stix->errnum = STIX_ENOMEM; + return ptr; +} + +void* stix_callocmem (stix_t* stix, stix_size_t size) +{ + void* ptr; + + ptr = STIX_MMGR_ALLOC (stix->mmgr, size); + if (ptr == STIX_NULL) stix->errnum = STIX_ENOMEM; + else STIX_MEMSET (ptr, 0, size); + return ptr; +} + +void stix_freemem (stix_t* stix, void* ptr) +{ + STIX_MMGR_FREE (stix->mmgr, ptr); +} diff --git a/stix/lib/stix.h b/stix/lib/stix.h index d5cfaac..3170e23 100644 --- a/stix/lib/stix.h +++ b/stix/lib/stix.h @@ -27,6 +27,10 @@ #ifndef _STIX_H_ #define _STIX_H_ + +/* TODO: move this macro out to the build files.... */ +#define STIX_INCLUDE_COMPILER + #if defined(__MSDOS__) # define STIX_INCPTR(type,base,inc) (((type __huge*)base) + (inc)) # define STIX_DECPTR(type,base,inc) (((type __huge*)base) - (inc)) @@ -51,11 +55,16 @@ /* TODO: define these types and macros using autoconf */ typedef unsigned char stix_uint8_t; typedef unsigned short int stix_uint16_t; -/*typedef unsigned int stix_uint32_t;*/ +#if defined(__MSDOS__) + typedef unsigned long int stix_uint32_t; +#else + typedef unsigned int stix_uint32_t; +#endif typedef unsigned long int stix_uintptr_t; typedef unsigned long int stix_size_t; typedef unsigned short int stix_char_t; /* TODO ... wchar_t??? */ +typedef char stix_iochar_t; #define STIX_SIZEOF(x) (sizeof(x)) #define STIX_COUNTOF(x) (sizeof(x) / sizeof(x[0])) @@ -202,7 +211,8 @@ enum stix_errnum_t STIX_EINTERN, /**< internal error */ STIX_ENOMEM, /**< insufficient memory */ STIX_EINVAL, /**< invalid parameter or data */ - STIX_ENOENT /**< no matching entry */ + STIX_ENOENT, /**< no matching entry */ + STIX_EIOERR /**< I/O error */ }; typedef enum stix_errnum_t stix_errnum_t; @@ -405,8 +415,6 @@ enum stix_code_t typedef enum stix_code_t stix_code_t; - - /* * OOP encoding * An object pointer(OOP) is an ordinary pointer value to an object. @@ -607,77 +615,7 @@ struct stix_association_t typedef struct stix_association_t stix_association_t; typedef struct stix_association_t* stix_oop_association_t; -#if 0 -/* ----------------------------------------- - * class structures for classes known to VM - * ----------------------------------------- */ -enum stix_class_desc_t -{ - /* STIX_XXX_SIZE represents the size of the class. other - * enumerators represent the index of instance variables of - * the class */ - STIX_ASSOCIATION_KEY = 0, - STIX_ASSOCIATION_VALUE, - STIX_ASSOCIATION_SIZE, - - STIX_DICTIONARY_TALLY = 0, - STIX_DICTIONARY_BUCKET, - STIX_DICTIONARY_SIZE, - - STIX_BEHAVIOR_SPEC = 0, - STIX_BEHAVIOR_METHODS, - STIX_BEHAVIOR_SUPERCLASS, - STIX_BEHAVIOR_SUBCLASSES, - STIX_BEHAVIOR_SIZE, - - STIX_CLASS_SPEC = 0, - STIX_CLASS_METHODS, - STIX_CLASS_SUPERCLASS, - STIX_CLASS_SUBCLASSES, - STIX_CLASS_NAME, - STIX_CLASS_INSTANCE_VARIABLES, - STIX_CLASS_CLASS_VARIABLES, - STIX_CLASS_POOL_DICTIONARIES, - STIX_CLASS_SIZE, - - STIX_METACLASS_SPEC = 0, - STIX_METACLASS_METHODS, - STIX_METACLASS_SUPERCLASS, - STIX_METACLASS_SUBCLASSES, - STIX_METACLASS_INSTANCE_CLASS, - STIX_METACLASS_INSTANCE_VARIABLES, - STIX_METACLASS_SIZE, - - STIX_BLOCK_CONTEXT = 0, - STIX_BLOCK_ARG_COUNT, - STIX_BLOCK_ARG_LOC, - STIX_BLOCK_BYTE_POINTER, - STIX_BLOCK_SIZE, - - STIX_CONTEXT_STACK = 0, - STIX_CONTEXT_STACK_TOP, - STIX_CONTEXT_RECEIVER, - STIX_CONTEXT_PC, - STIX_CONTEXT_METHOD, - STIX_CONTEXT_SIZE, - - STIX_METHOD_TEXT = 0, - STIX_METHOD_SELECTOR, - STIX_METHOD_BYTECODES, - STIX_METHOD_TMPCOUNT, - STIX_METHOD_ARGCOUNT, - STIX_METHOD_SIZE, - - STIX_SYMTAB_TALLY = 0, - STIX_SYMTAB_BUCKET, - STIX_SYMTAB_SIZE, - - STIX_SYSDIC_TALLY = STIX_DICTIONARY_TALLY, - STIX_SYSDIC_BUCKET = STIX_DICTIONARY_BUCKET, - STIX_SYSDIC_SIZE = STIX_DICTIONARY_SIZE -}; -#endif /** * The STIX_CLASSOF() macro return the class of an object including a numeric @@ -705,6 +643,10 @@ struct stix_heap_t stix_uint8_t* ptr; /* next allocation pointer */ }; +#if defined(STIX_INCLUDE_COMPILER) +typedef struct stix_compiler_t stix_compiler_t; +#endif + typedef struct stix_t stix_t; struct stix_t @@ -731,6 +673,7 @@ struct stix_t stix_oop_t _false; /* == NEVER CHANGE THE ORDER OF FIELDS BELOW == */ + /* stix_ignite() assumes this order */ stix_oop_t _stix; /* Stix */ stix_oop_t _nil_object; /* NilObject */ stix_oop_t _class; /* Class */ @@ -751,6 +694,10 @@ struct stix_t stix_oop_t* tmp_stack[100]; /* stack for temporaries */ stix_oow_t tmp_count; + +#if defined(STIX_INCLUDE_COMPILER) + stix_compiler_t* c; +#endif }; @@ -779,6 +726,25 @@ STIX_EXPORT void stix_fini ( stix_t* vm ); + +STIX_EXPORT stix_mmgr_t* stix_getmmgr ( + stix_t* stix +); + +STIX_EXPORT void* stix_getxtn ( + stix_t* stix +); + + +STIX_EXPORT stix_errnum_t stix_geterrnum ( + stix_t* stix +); + +STIX_EXPORT void stix_seterrnum ( + stix_t* stix, + stix_errnum_t errnum +); + /** * The stix_getoption() function gets the value of an option * specified by \a id into the buffer pointed to by \a value. @@ -849,9 +815,7 @@ STIX_EXPORT int stix_ignite ( ); -/** - * Temporary OOP management - */ +/* Temporary OOP management */ STIX_EXPORT void stix_pushtmp ( stix_t* stix, stix_oop_t* oop_ptr @@ -866,6 +830,24 @@ STIX_EXPORT void stix_poptmps ( stix_oow_t count ); + +/* Memory allocation/deallocation functions using stix's MMGR */ + +STIX_EXPORT void* stix_allocmem ( + stix_t* stix, + stix_size_t size +); + +STIX_EXPORT void* stix_callocmem ( + stix_t* stix, + stix_size_t size +); + +STIX_EXPORT void stix_freemem ( + stix_t* stix, + void* ptr +); + #if defined(__cplusplus) } #endif diff --git a/stix/lib/sym.c b/stix/lib/sym.c index 2387170..70adc10 100644 --- a/stix/lib/sym.c +++ b/stix/lib/sym.c @@ -90,11 +90,8 @@ static stix_oop_t find_or_make_symbol (stix_t* stix, const stix_char_t* ptr, sti if (!create) { - /* stix->errnum = STIX_ENOENT; return STIX_NULL; - */ - return stix->_nil; } tally = STIX_OOP_TO_SMINT(stix->symtab->tally); diff --git a/stix/lib/utf8.c b/stix/lib/utf8.c new file mode 100644 index 0000000..4c0ce4f --- /dev/null +++ b/stix/lib/utf8.c @@ -0,0 +1,184 @@ +/* + * $Id$ + * + Copyright (c) 2014-2015 Chung, Hyung-Hwan. All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED BY THE AUTHOR "AS IS" AND ANY EXPRESS OR + IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "stix-prv.h" + +/* + * from RFC 2279 UTF-8, a transformation format of ISO 10646 + * + * UCS-4 range (hex.) UTF-8 octet sequence (binary) + * 1:2 00000000-0000007F 0xxxxxxx + * 2:2 00000080-000007FF 110xxxxx 10xxxxxx + * 3:2 00000800-0000FFFF 1110xxxx 10xxxxxx 10xxxxxx + * 4:4 00010000-001FFFFF 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx + * inv 00200000-03FFFFFF 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx + * inv 04000000-7FFFFFFF 1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx + */ + +struct __utf8_t +{ + stix_uint32_t lower; + stix_uint32_t upper; + stix_uint8_t fbyte; /* mask to the first utf8 byte */ + stix_uint8_t mask; + stix_uint8_t fmask; + int length; /* number of bytes */ +}; + +typedef struct __utf8_t __utf8_t; + +static __utf8_t utf8_table[] = +{ + {0x00000000ul, 0x0000007Ful, 0x00, 0x80, 0x7F, 1}, + {0x00000080ul, 0x000007FFul, 0xC0, 0xE0, 0x1F, 2}, + {0x00000800ul, 0x0000FFFFul, 0xE0, 0xF0, 0x0F, 3}, + {0x00010000ul, 0x001FFFFFul, 0xF0, 0xF8, 0x07, 4}, + {0x00200000ul, 0x03FFFFFFul, 0xF8, 0xFC, 0x03, 5}, + {0x04000000ul, 0x7FFFFFFFul, 0xFC, 0xFE, 0x01, 6} +}; + +static STIX_INLINE __utf8_t* get_utf8_slot (stix_char_t uc) +{ + __utf8_t* cur, * end; + + STIX_ASSERT (STIX_SIZEOF(stix_iochar_t) == 1); + STIX_ASSERT (STIX_SIZEOF(stix_char_t) >= 2); + + end = utf8_table + STIX_COUNTOF(utf8_table); + cur = utf8_table; + + while (cur < end) + { + if (uc >= cur->lower && uc <= cur->upper) return cur; + cur++; + } + + return STIX_NULL; /* invalid character */ +} + +stix_size_t stix_uctoutf8 (stix_char_t uc, stix_iochar_t* utf8, stix_size_t size) +{ + __utf8_t* cur = get_utf8_slot (uc); + + if (cur == STIX_NULL) return 0; /* illegal character */ + + if (utf8 && cur->length <= size) + { + int index = cur->length; + while (index > 1) + { + /* + * 0x3F: 00111111 + * 0x80: 10000000 + */ + utf8[--index] = (uc & 0x3F) | 0x80; + uc >>= 6; + } + + utf8[0] = uc | cur->fbyte; + } + + /* small buffer is also indicated by this return value + * greater than 'size'. */ + return (stix_size_t)cur->length; +} + +stix_size_t stix_utf8touc (const stix_iochar_t* utf8, stix_size_t size, stix_char_t* uc) +{ + __utf8_t* cur, * end; + + STIX_ASSERT (utf8 != STIX_NULL); + STIX_ASSERT (size > 0); + STIX_ASSERT (STIX_SIZEOF(stix_iochar_t) == 1); + STIX_ASSERT (STIX_SIZEOF(stix_char_t) >= 2); + + end = utf8_table + STIX_COUNTOF(utf8_table); + cur = utf8_table; + + while (cur < end) + { + if ((utf8[0] & cur->mask) == cur->fbyte) + { + + /* if size is less that cur->length, the incomplete-seqeunce + * error is naturally indicated. so validate the string + * only if size is as large as cur->length. */ + + if (size >= cur->length) + { + int i; + + if (uc) + { + stix_char_t w; + + w = utf8[0] & cur->fmask; + for (i = 1; i < cur->length; i++) + { + /* in utf8, trailing bytes are all + * set with 0x80. + * + * 10XXXXXX & 11000000 => 10000000 + * + * if not, invalid. */ + if ((utf8[i] & 0xC0) != 0x80) return 0; + w = (w << 6) | (utf8[i] & 0x3F); + } + *uc = w; + } + else + { + for (i = 1; i < cur->length; i++) + { + /* in utf8, trailing bytes are all + * set with 0x80. + * + * 10XXXXXX & 11000000 => 10000000 + * + * if not, invalid. */ + if ((utf8[i] & 0xC0) != 0x80) return 0; + } + } + } + + /* this return value can indicate both + * the correct length (len >= cur->length) + * and + * the incomplete seqeunce error (len < cur->length). + */ + return (stix_size_t)cur->length; + } + cur++; + } + + return 0; /* error - invalid sequence */ +} + +stix_size_t stix_utf8len (const stix_iochar_t* utf8, stix_size_t size) +{ + return stix_utf8touc (utf8, size, STIX_NULL); +} +