added some code for a bootstraping compiler

This commit is contained in:
hyunghwan.chung 2015-05-15 14:55:12 +00:00
parent ccb232329b
commit 75bb3e9a40
10 changed files with 3392 additions and 95 deletions

2298
stix/lib/comp.c Normal file

File diff suppressed because it is too large Load Diff

View File

@ -74,13 +74,6 @@ static stix_oop_t find_or_insert (stix_t* stix, stix_oop_char_t key, stix_oop_t
index = stix_hashchars(key->slot, STIX_OBJ_GET_SIZE(key)) % STIX_OBJ_GET_SIZE(stix->sysdic->bucket);
{
int i;
printf ("FINDING IN SYSDIC [");
for (i = 0; i < STIX_OBJ_GET_SIZE(key); i++) printf ("%c", key->slot[i]);
printf ("]\n");
}
while (stix->sysdic->bucket->slot[index] != stix->_nil)
{
ass = (stix_oop_association_t)stix->sysdic->bucket->slot[index];
@ -99,11 +92,9 @@ printf ("]\n");
if (value == STIX_NULL)
{
/*
/* when value is STIX_NULL, perform no insertion */
stix->errnum = STIX_ENOENT;
return STIX_NULL;
*/
return stix->_nil;
}
stix_pushtmp (stix, (stix_oop_t*)&key); tmp_count++;

View File

@ -204,14 +204,15 @@ static int ignite_3 (stix_t* stix)
stix_oop_t* stix_ptr;
stix_ptr = &stix->_stix;
/* The loop here repies on the proper order of fields in stix_t.
* Be sure to keep in sync the order of items in symnames and
* the releated fields of stix_t */
for (i = 0; i < STIX_COUNTOF(symnames); i++)
{
sym = stix_makesymbol (stix, symnames[i].str, symnames[i].len);
//sym = stix_makesymbol (stix, symnames[0].str, symnames[0].len);
if (!sym) return -1;
if (!stix_putatsysdic (stix, sym, *stix_ptr)) return -1;
stix_ptr++;
}

View File

@ -29,6 +29,14 @@
#include <stdio.h>
#include <stdlib.h>
typedef struct xtn_t xtn_t;
struct xtn_t
{
char source_path[1024];
};
static void* sys_alloc (stix_mmgr_t* mmgr, stix_size_t size)
{
return malloc (size);
@ -52,6 +60,69 @@ static stix_mmgr_t sys_mmgr =
STIX_NULL
};
static STIX_INLINE stix_oow_t open_input (stix_t* stix, stix_ioarg_t* arg)
{
if (arg->includer)
{
/* includee */
xtn_t* xtn = stix_getxtn(stix);
arg->handle = fopen (xtn->source_path, "r");
if (!arg->handle)
{
stix_seterrnum (stix, STIX_EIOERR);
return -1;
}
}
else
{
/* main stream */
/*char tmp[PATH_MAX];*/
}
}
static STIX_INLINE stix_oow_t read_input (stix_t* stix, stix_ioarg_t* arg)
{
STIX_ASSERT (arg->handle != STIX_NULL);
if (fread (arg->buf, STIX_SIZEOF(arg->buf[0]), STIX_COUNTOF(arg->buf), arg->handle) == 0)
{
if (ferror(arg->handle))
{
}
}
return 0;
}
static STIX_INLINE stix_oow_t close_input (stix_t* stix, stix_ioarg_t* arg)
{
STIX_ASSERT (arg->handle != STIX_NULL);
fclose (arg->handle);
return 0;
}
/* TODO: IMPLEMENT PROPER INPUT HANDLER */
static stix_oow_t input_handler (stix_t* stix, stix_iocmd_t cmd, stix_ioarg_t* arg)
{
switch (cmd)
{
case STIX_IO_OPEN:
return open_input (stix, arg);
case STIX_IO_CLOSE:
return close_input (stix, arg);
case STIX_IO_READ:
return read_input (stix, arg);
default:
stix->errnum = STIX_EINTERN;
return -1;
}
}
static void dump_symbol_table (stix_t* stix)
{
stix_oow_t i, j;
@ -95,7 +166,7 @@ int main (int argc, char* argv[])
(unsigned long int)STIX_CLASS_SPEC_INDEXED_TYPE(x));
}
stix = stix_open (&sys_mmgr, 0, 512000lu, STIX_NULL);
stix = stix_open (&sys_mmgr, STIX_SIZEOF(xtn_t), 512000lu, STIX_NULL);
if (!stix)
{
printf ("cannot open stix\n");
@ -108,14 +179,13 @@ int main (int argc, char* argv[])
stix_setoption (stix, STIX_DFL_SYSDIC_SIZE, &symtab_size);
}
if (stix_ignite(stix) <= -1)
if (stix_ignite (stix) <= -1)
{
printf ("cannot ignite stix\n");
stix_close (stix);
return -1;
}
{
stix_char_t x[] = { 'S', 't', 'r', 'i', 'n', 'g', '\0' };
stix_char_t y[] = { 'S', 'y', 'm', 'b', 'o', 'l', '\0' };
@ -135,6 +205,15 @@ a = stix_findsymbol (stix, x, 6);
printf ("%p\n", a);
dump_symbol_table (stix);
}
if (stix_compile (stix, input_handler) <= -1)
{
printf ("cannot compile code\n");
stix_close (stix);
return -1;
}
stix_close (stix);
return 0;

620
stix/lib/memo.txt Normal file
View File

@ -0,0 +1,620 @@
/*
* Multi-Process within a single threaded-process.
* How to embed in a single threaded web server
*
*
* stix_exec
* VM(shceduler) ---> Context1(obj1,method1)
* ---> Context2(obj2,method2)
* ---> Context3(obj3,method3)
*
* all functions must be asynchronous
* blocking functions will block scheduler.
*/
class Stix::Stix # Namespace name is indicated by ::
{
}
class Stix::Array
{
+ makeSymbol: aString
{
| s |
s := Symbol new: aString. # Symbol belongs to the Stix namespace.
^s.
}
}
A name space is stored in the namespace table of Stix.Namespaces.
Stix.Symbols - symbols are global. not affected by namespaces.
Stix.Sysdict -
(#QSE => Namespace( Another Sysdict))
(
class Stix::Namespace
{
}
class Stix::Class
{
}
Stix.Namespaces is a system dictionary
class QSE::Array
{
}
class QSE::Tiara::Array
{
}
Stix.Namespaces -> 'QSE'
'QSE::Tiara'
------------------------------------------------------------
ARRAY CONSTANT TO ALLOW DYNAMIC VALUES.
#( ...... ) array literal
in original smalltalk, a block can't be placed inside the array literal
arrayConstant := '#' array
array := "(" { number | string | symbol | array | characterConstant }* ")".
So #(1 2 [^20]) is illegal.
if a block is there, treat it as a valid stix expression and evaluate it.
#(1 2 [1 + 2] 5)
t = Array new: 4.
t at: 1 put: 1.
t at: 2 put: 2.
t at: 3 put: (1 + 2).
t at: 4 put: 5.
Evaluate the expressions in the array first
Create an array
Put the right element.
-----------------------------------------------
command line
libstix.a
stix stix.im Class1.st Class2.st Main.st Main
--> load the image, compile Class1.st, compile Class2.st compile Main.st
-->
stix stix.im
--> load the image
-------------------------------------------------------------------------
#!/usr/bin/stix
###################################
## Main.st
###################################
#include 'Class1.st'
#include 'Class2.st'
#class(#byte) Association(Magnitude)
{
declare a, b, c.
declare(#class_instance) x.
declare(#class) MAX_SIZE.
function(#class) initialize
{
MAX_SIZE := 20.
true whileTrue: [
Stdout print: 10.
].
}
function(#class) new: anInteger
{
Stix error: 'invalid message'.
}
}
#main
| a |
a := Class1 new.
Stdout format: #( 1 2 [a toInteger] ) with: '%.5d %.6d\n'.
^0.
-------------------------------------------------------------------------
The statements after the #main directives are compiled as a class method of Stix.
That is, 'Stix::main'. It becomes the entry point.
-------------------------------------------------------------------------
If no #main directive is found, there is no official entry point.
However, if you have the initialize class method, it's invoked when a class
is compiled, the statement in the class is executed before #main.
if the statement creates a certain loop, it can act as a entry point as well.
--------------------------------------------------------------------------
Top level directive
#main, #class, #include,
#include is avaialble everywheren. It doesn't have to be in the top level.
Do i need #import?
---------------------------------------------------------------------------
if there are multiple #main, do i need to concatenate all?
or disallow only 1 #main??
---------------------------------------------------------------------------
#namespace directive?
#namespace Stix::Compiler
naming convention for multiple ?? . conflicts with the statement terminator.
:: is ok a single : is used for various purpose but more than 1 is illegal in smalltalk.
so use :: as a namespace separator.
Relative naming and absoluate naming?
Stix::Compiler <- is Stix the absolute top or a subname class under the current space?
::Stix::Compiler <- i don't like this
----------------------------------------------------------------------------
"
Stix
Class
NilObject
Object
NilObject
Collection
IndexedCollection
FixedSizedCollection
Array
ByteArray
String
Symbol
Set
Dictionary
SystemDictionary
SymbolSet
Magnitude
Association
Character
Number
Integer
SmallInteger
LargeInteger
"
class Stix
{
+ alloc
{
<primitive: 1>
}
+ new
{
^self alloc init.
}
- init
{
^self.
}
- finalize
{
}
+ findClass: aString
{
| a b c |
}
}
class Class extends Stix
{
}
class NilObject extends Stix
{
}
class Object extends Stix
{
}
-----------------------------------------
class variable
and class instance variable
-----------------------------------------
A CV X Y
CIV x y
B CV Z
civ z
C civ q
A: getX
return x (return instance variable 1)
B getX
return A'X (return invance variable 3)
x is index 1.
y is index 2.
z is index 3.
X is index 3 of A.
Y is index 3 of A.
Z is index 2 of B.
q is index 4 of C.
A has x y X Y
B has x y z Z
C has x y z q
place class intance variables before class variables.
-------------------------------------------
class Magnitude extends Stix
{
}
%include 'Association.st'
%class Association(Magnitude)
{
%category(Association class)
%constant
ABC := XXX
BCD := KKK
TTT := 20
%self(private)
%self(instance creation)
| Key Value | "class variables" <--- index
| xxx yyy | "class instance variables" <--- index
key: aKey
{
^self key: aKey value: nil.
}
key: aKey value: aValue
{
| ass |
ass := self new.
ass key: aKey value: aValue.
^ass.
}
%instance(initialization)
| key value | "instance variables"
key: aKey value: aValue
{
key := aKey.
value := aValue.
}
key
{
^key
}
value
{
^value
}
value: value
{
self->value := aValue
}
= anAssociation
{
^self->key = anAssociation key.
}
hash
{
^self->key hash
}
}
"Creates a new class Association inheriting nil"
%class Association(nil)
{
%func more
{
^self
}
}
"Extends an existing Association class"
%class Association
{
}
class Character extends Magnitude
{
}
class Number extends Magnitude
{
}
class Integer extends Number
{
}
class SmallInteger extends Integer
{
}
class LargeInteger extends Integer
{
}
Association
{
%class
| x y z |
value: xxx
{
}
}
Association: Magnitude
{
}
Association: <- for extending
{
}
Association:
{
}
Association key: xxx
{
}
Association key: xxx
{
}
----------------------------------------------------------------
class ByteArray(FixedSizeCollection): #byte
{
fun at: anIndex put: aValue
{
^self basicAt: anIndex put: aValue.
}
}
class(#byte) ByteArray(FixedSizedCollection)
{
}
class(#byte) ByteArray(Stix)
{
}
class Association(Magnitude) -> new Association inheriting Magnitude
class Association() -> new Association inheriting Stix
class(#byte) Association() -> new Association class inheriting Stix, but it's byte indexed.
class(#word) Association() -> new Association class inheriting Stix, but it's word indexed.
class(#oop) Association() -> new Association class inheriting Stix, but it's oop indexed. (it can have the variable part on top of the fixed part. response to the 'new: aSize' message)
class(#word) Association(Magnitude) -> new Association class inheriting Magnitude, but it's word indexed.
class Association -> revisit the Association class defined previsously. Revisiting can add new methods.
#include 'Magnitude.st'
#class(#byte) Association(Magnitude)
{
## class variables can be accessed by class methods and instance methods.
## methods of subclasses can also access them.
declare(#class) a b c.
## class instance variable can be accessed inside the class method only.
declare(#class_instance) d, e, f
## All instance variables are protected by default.
declare key, value.
##
## declare(#class) a, b, c. ## class variables
## declare(#class_instance) a, b, c. ## class instance variables
## declare(#instance) a, b, c. ## isntance variables
## declare a,b, c. ## instance variables
## function(#class) ## class method
## function(#instance) ## instance method
## function ## instance method
## var and fun are not keywords. they can be a method name or a variable name.
## Casing is not used to differentiate variable kinds like global local temporary etc.
## other modifiers (EXPERIMENTAL. JUST THINKING).
## declare(#class,#public,#rw) x. x can be accessed by other classes in read-write mode.
## function(#private) xxx xxx is a private method
## function(#class,#private) xxx xxx is private class method.
function(#class) initialize
{
## This is the initilizer for the class object.
## executed when this class is added to the system.
## initialize the class variables and class instance variables.
SIZE := 20.
}
function(#class) key: aKey
{
^self key: aKey value: nil.
}
function(#class) key: aKey value: aValue
{
| ass |
ass := self new.
ass key: aKey value: aValue.
^ass.
}
function key: aKey value: aValue
{
key := aKey.
value := aValue.
}
function key
{
^key
}
function value
{
^value
}
function value: value
{
self->value := aValue
}
function = anAssociation
{
^self->key = anAssociation key.
}
function hash
{
^self->key hash
}
function value: aBlock
{
|a |
a := [ :t1 | t1 value ] with: 10.
^a + 10.
}
}
; message cascading
. steatement terminator or flaoting point if in number and followed by a digit.
^ return
[ ] block
# symbol or array
() grouping
$ character constant
| temporary variable or end of block arguments.
"" comment
'' string
: at the of the keyword or before block argument name.
-------------------
avaialbel
' !
--------------------------------------------------
#! for comment
## for comment
-----------------------------
@ binarySelector for coordianate number @ number.
----------------------------------------------------------------------------
Single line comment
## comment text
#! comment text (easy handling to skip hash bang)
Multi-line comments - double quoted as in smalltalk
" comment text "

View File

@ -122,6 +122,96 @@
*/
#define STIX_MAX_INDEXED_INSTVARS(named_instvar) ((~(stix_oow_t)0) - named_instvar)
#if defined(STIX_INCLUDE_COMPILER)
/* ========================================================================= */
/* SOURCE CODE I/O FOR COMPILER */
/* ========================================================================= */
enum stix_iocmd_t
{
STIX_IO_OPEN,
STIX_IO_CLOSE,
STIX_IO_READ
};
typedef enum stix_iocmd_t stix_iocmd_t;
typedef struct stix_iolxc_t stix_iolxc_t;
struct stix_iolxc_t
{
stix_char_t c; /**< character */
unsigned long line; /**< line */
unsigned long colm; /**< column */
const stix_char_t* file; /**< file specified in #include */
};
enum stix_ioarg_flag_t
{
STIX_IO_INCLUDED = (1 << 0)
};
typedef enum stix_ioarg_flag_t stix_ioarg_flag_t;
typedef struct stix_ioarg_t stix_ioarg_t;
struct stix_ioarg_t
{
/**
* [IN] I/O object name.
* It is #STIX_NULL for the main stream and points to a non-NULL string
* for an included stream.
*/
const stix_char_t* name;
/**
* [OUT] I/O handle set by a handler.
* The source stream handler can set this field when it opens a stream.
* All subsequent operations on the stream see this field as set
* during opening.
*/
void* handle;
/**
* [OUT] place data here
*/
stix_char_t buf[1024];
/**
* [IN] points to the data of the includer. It is #STIX_NULL for the
* main stream.
*/
stix_ioarg_t* includer;
/*-----------------------------------------------------------------*/
/*----------- from here down, internal use only -------------------*/
struct
{
int pos, len;
} b;
stix_oow_t line;
stix_oow_t colm;
stix_iolxc_t lxc;
/*-----------------------------------------------------------------*/
};
typedef stix_oow_t (*stix_ioimpl_t) (
stix_t* stix,
stix_iocmd_t cmd,
stix_ioarg_t* arg
);
struct stix_compiler_t
{
stix_ioimpl_t impl; /* input handler */
stix_iolxc_t lxc;
stix_ioarg_t arg; /* static top-level data */
stix_ioarg_t* curinp; /* pointer to the current data */
};
#endif
#if defined(__cplusplus)
extern "C" {
#endif
@ -244,6 +334,16 @@ stix_oop_t stix_getatsysdic (
stix_oop_t key
);
/* ========================================================================= */
/* comp.c */
/* ========================================================================= */
int stix_compile (
stix_t* stix,
stix_ioimpl_t io
);
#if defined(__cplusplus)
}
#endif

View File

@ -84,6 +84,27 @@ void stix_fini (stix_t* stix)
stix_killheap (stix, stix->permheap);
}
stix_mmgr_t* stix_getmmgr (stix_t* stix)
{
return stix->mmgr;
}
void* stix_getxtn (stix_t* stix)
{
return (void*)(stix + 1);
}
stix_errnum_t stix_geterrnum (stix_t* stix)
{
return stix->errnum;
}
void stix_seterrnum (stix_t* stix, stix_errnum_t errnum)
{
stix->errnum = errnum;
}
int stix_setoption (stix_t* stix, stix_option_t id, const void* value)
{
switch (id)
@ -151,3 +172,27 @@ int stix_equalchars (const stix_char_t* str1, const stix_char_t* str2, stix_oow_
return 1;
}
void* stix_allocmem (stix_t* stix, stix_size_t size)
{
void* ptr;
ptr = STIX_MMGR_ALLOC (stix->mmgr, size);
if (ptr == STIX_NULL) stix->errnum = STIX_ENOMEM;
return ptr;
}
void* stix_callocmem (stix_t* stix, stix_size_t size)
{
void* ptr;
ptr = STIX_MMGR_ALLOC (stix->mmgr, size);
if (ptr == STIX_NULL) stix->errnum = STIX_ENOMEM;
else STIX_MEMSET (ptr, 0, size);
return ptr;
}
void stix_freemem (stix_t* stix, void* ptr)
{
STIX_MMGR_FREE (stix->mmgr, ptr);
}

View File

@ -27,6 +27,10 @@
#ifndef _STIX_H_
#define _STIX_H_
/* TODO: move this macro out to the build files.... */
#define STIX_INCLUDE_COMPILER
#if defined(__MSDOS__)
# define STIX_INCPTR(type,base,inc) (((type __huge*)base) + (inc))
# define STIX_DECPTR(type,base,inc) (((type __huge*)base) - (inc))
@ -51,11 +55,16 @@
/* TODO: define these types and macros using autoconf */
typedef unsigned char stix_uint8_t;
typedef unsigned short int stix_uint16_t;
/*typedef unsigned int stix_uint32_t;*/
#if defined(__MSDOS__)
typedef unsigned long int stix_uint32_t;
#else
typedef unsigned int stix_uint32_t;
#endif
typedef unsigned long int stix_uintptr_t;
typedef unsigned long int stix_size_t;
typedef unsigned short int stix_char_t; /* TODO ... wchar_t??? */
typedef char stix_iochar_t;
#define STIX_SIZEOF(x) (sizeof(x))
#define STIX_COUNTOF(x) (sizeof(x) / sizeof(x[0]))
@ -202,7 +211,8 @@ enum stix_errnum_t
STIX_EINTERN, /**< internal error */
STIX_ENOMEM, /**< insufficient memory */
STIX_EINVAL, /**< invalid parameter or data */
STIX_ENOENT /**< no matching entry */
STIX_ENOENT, /**< no matching entry */
STIX_EIOERR /**< I/O error */
};
typedef enum stix_errnum_t stix_errnum_t;
@ -405,8 +415,6 @@ enum stix_code_t
typedef enum stix_code_t stix_code_t;
/*
* OOP encoding
* An object pointer(OOP) is an ordinary pointer value to an object.
@ -607,77 +615,7 @@ struct stix_association_t
typedef struct stix_association_t stix_association_t;
typedef struct stix_association_t* stix_oop_association_t;
#if 0
/* -----------------------------------------
* class structures for classes known to VM
* ----------------------------------------- */
enum stix_class_desc_t
{
/* STIX_XXX_SIZE represents the size of the class. other
* enumerators represent the index of instance variables of
* the class */
STIX_ASSOCIATION_KEY = 0,
STIX_ASSOCIATION_VALUE,
STIX_ASSOCIATION_SIZE,
STIX_DICTIONARY_TALLY = 0,
STIX_DICTIONARY_BUCKET,
STIX_DICTIONARY_SIZE,
STIX_BEHAVIOR_SPEC = 0,
STIX_BEHAVIOR_METHODS,
STIX_BEHAVIOR_SUPERCLASS,
STIX_BEHAVIOR_SUBCLASSES,
STIX_BEHAVIOR_SIZE,
STIX_CLASS_SPEC = 0,
STIX_CLASS_METHODS,
STIX_CLASS_SUPERCLASS,
STIX_CLASS_SUBCLASSES,
STIX_CLASS_NAME,
STIX_CLASS_INSTANCE_VARIABLES,
STIX_CLASS_CLASS_VARIABLES,
STIX_CLASS_POOL_DICTIONARIES,
STIX_CLASS_SIZE,
STIX_METACLASS_SPEC = 0,
STIX_METACLASS_METHODS,
STIX_METACLASS_SUPERCLASS,
STIX_METACLASS_SUBCLASSES,
STIX_METACLASS_INSTANCE_CLASS,
STIX_METACLASS_INSTANCE_VARIABLES,
STIX_METACLASS_SIZE,
STIX_BLOCK_CONTEXT = 0,
STIX_BLOCK_ARG_COUNT,
STIX_BLOCK_ARG_LOC,
STIX_BLOCK_BYTE_POINTER,
STIX_BLOCK_SIZE,
STIX_CONTEXT_STACK = 0,
STIX_CONTEXT_STACK_TOP,
STIX_CONTEXT_RECEIVER,
STIX_CONTEXT_PC,
STIX_CONTEXT_METHOD,
STIX_CONTEXT_SIZE,
STIX_METHOD_TEXT = 0,
STIX_METHOD_SELECTOR,
STIX_METHOD_BYTECODES,
STIX_METHOD_TMPCOUNT,
STIX_METHOD_ARGCOUNT,
STIX_METHOD_SIZE,
STIX_SYMTAB_TALLY = 0,
STIX_SYMTAB_BUCKET,
STIX_SYMTAB_SIZE,
STIX_SYSDIC_TALLY = STIX_DICTIONARY_TALLY,
STIX_SYSDIC_BUCKET = STIX_DICTIONARY_BUCKET,
STIX_SYSDIC_SIZE = STIX_DICTIONARY_SIZE
};
#endif
/**
* The STIX_CLASSOF() macro return the class of an object including a numeric
@ -705,6 +643,10 @@ struct stix_heap_t
stix_uint8_t* ptr; /* next allocation pointer */
};
#if defined(STIX_INCLUDE_COMPILER)
typedef struct stix_compiler_t stix_compiler_t;
#endif
typedef struct stix_t stix_t;
struct stix_t
@ -731,6 +673,7 @@ struct stix_t
stix_oop_t _false;
/* == NEVER CHANGE THE ORDER OF FIELDS BELOW == */
/* stix_ignite() assumes this order */
stix_oop_t _stix; /* Stix */
stix_oop_t _nil_object; /* NilObject */
stix_oop_t _class; /* Class */
@ -751,6 +694,10 @@ struct stix_t
stix_oop_t* tmp_stack[100]; /* stack for temporaries */
stix_oow_t tmp_count;
#if defined(STIX_INCLUDE_COMPILER)
stix_compiler_t* c;
#endif
};
@ -779,6 +726,25 @@ STIX_EXPORT void stix_fini (
stix_t* vm
);
STIX_EXPORT stix_mmgr_t* stix_getmmgr (
stix_t* stix
);
STIX_EXPORT void* stix_getxtn (
stix_t* stix
);
STIX_EXPORT stix_errnum_t stix_geterrnum (
stix_t* stix
);
STIX_EXPORT void stix_seterrnum (
stix_t* stix,
stix_errnum_t errnum
);
/**
* The stix_getoption() function gets the value of an option
* specified by \a id into the buffer pointed to by \a value.
@ -849,9 +815,7 @@ STIX_EXPORT int stix_ignite (
);
/**
* Temporary OOP management
*/
/* Temporary OOP management */
STIX_EXPORT void stix_pushtmp (
stix_t* stix,
stix_oop_t* oop_ptr
@ -866,6 +830,24 @@ STIX_EXPORT void stix_poptmps (
stix_oow_t count
);
/* Memory allocation/deallocation functions using stix's MMGR */
STIX_EXPORT void* stix_allocmem (
stix_t* stix,
stix_size_t size
);
STIX_EXPORT void* stix_callocmem (
stix_t* stix,
stix_size_t size
);
STIX_EXPORT void stix_freemem (
stix_t* stix,
void* ptr
);
#if defined(__cplusplus)
}
#endif

View File

@ -90,11 +90,8 @@ static stix_oop_t find_or_make_symbol (stix_t* stix, const stix_char_t* ptr, sti
if (!create)
{
/*
stix->errnum = STIX_ENOENT;
return STIX_NULL;
*/
return stix->_nil;
}
tally = STIX_OOP_TO_SMINT(stix->symtab->tally);

184
stix/lib/utf8.c Normal file
View File

@ -0,0 +1,184 @@
/*
* $Id$
*
Copyright (c) 2014-2015 Chung, Hyung-Hwan. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE AUTHOR "AS IS" AND ANY EXPRESS OR
IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "stix-prv.h"
/*
* from RFC 2279 UTF-8, a transformation format of ISO 10646
*
* UCS-4 range (hex.) UTF-8 octet sequence (binary)
* 1:2 00000000-0000007F 0xxxxxxx
* 2:2 00000080-000007FF 110xxxxx 10xxxxxx
* 3:2 00000800-0000FFFF 1110xxxx 10xxxxxx 10xxxxxx
* 4:4 00010000-001FFFFF 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
* inv 00200000-03FFFFFF 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
* inv 04000000-7FFFFFFF 1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
*/
struct __utf8_t
{
stix_uint32_t lower;
stix_uint32_t upper;
stix_uint8_t fbyte; /* mask to the first utf8 byte */
stix_uint8_t mask;
stix_uint8_t fmask;
int length; /* number of bytes */
};
typedef struct __utf8_t __utf8_t;
static __utf8_t utf8_table[] =
{
{0x00000000ul, 0x0000007Ful, 0x00, 0x80, 0x7F, 1},
{0x00000080ul, 0x000007FFul, 0xC0, 0xE0, 0x1F, 2},
{0x00000800ul, 0x0000FFFFul, 0xE0, 0xF0, 0x0F, 3},
{0x00010000ul, 0x001FFFFFul, 0xF0, 0xF8, 0x07, 4},
{0x00200000ul, 0x03FFFFFFul, 0xF8, 0xFC, 0x03, 5},
{0x04000000ul, 0x7FFFFFFFul, 0xFC, 0xFE, 0x01, 6}
};
static STIX_INLINE __utf8_t* get_utf8_slot (stix_char_t uc)
{
__utf8_t* cur, * end;
STIX_ASSERT (STIX_SIZEOF(stix_iochar_t) == 1);
STIX_ASSERT (STIX_SIZEOF(stix_char_t) >= 2);
end = utf8_table + STIX_COUNTOF(utf8_table);
cur = utf8_table;
while (cur < end)
{
if (uc >= cur->lower && uc <= cur->upper) return cur;
cur++;
}
return STIX_NULL; /* invalid character */
}
stix_size_t stix_uctoutf8 (stix_char_t uc, stix_iochar_t* utf8, stix_size_t size)
{
__utf8_t* cur = get_utf8_slot (uc);
if (cur == STIX_NULL) return 0; /* illegal character */
if (utf8 && cur->length <= size)
{
int index = cur->length;
while (index > 1)
{
/*
* 0x3F: 00111111
* 0x80: 10000000
*/
utf8[--index] = (uc & 0x3F) | 0x80;
uc >>= 6;
}
utf8[0] = uc | cur->fbyte;
}
/* small buffer is also indicated by this return value
* greater than 'size'. */
return (stix_size_t)cur->length;
}
stix_size_t stix_utf8touc (const stix_iochar_t* utf8, stix_size_t size, stix_char_t* uc)
{
__utf8_t* cur, * end;
STIX_ASSERT (utf8 != STIX_NULL);
STIX_ASSERT (size > 0);
STIX_ASSERT (STIX_SIZEOF(stix_iochar_t) == 1);
STIX_ASSERT (STIX_SIZEOF(stix_char_t) >= 2);
end = utf8_table + STIX_COUNTOF(utf8_table);
cur = utf8_table;
while (cur < end)
{
if ((utf8[0] & cur->mask) == cur->fbyte)
{
/* if size is less that cur->length, the incomplete-seqeunce
* error is naturally indicated. so validate the string
* only if size is as large as cur->length. */
if (size >= cur->length)
{
int i;
if (uc)
{
stix_char_t w;
w = utf8[0] & cur->fmask;
for (i = 1; i < cur->length; i++)
{
/* in utf8, trailing bytes are all
* set with 0x80.
*
* 10XXXXXX & 11000000 => 10000000
*
* if not, invalid. */
if ((utf8[i] & 0xC0) != 0x80) return 0;
w = (w << 6) | (utf8[i] & 0x3F);
}
*uc = w;
}
else
{
for (i = 1; i < cur->length; i++)
{
/* in utf8, trailing bytes are all
* set with 0x80.
*
* 10XXXXXX & 11000000 => 10000000
*
* if not, invalid. */
if ((utf8[i] & 0xC0) != 0x80) return 0;
}
}
}
/* this return value can indicate both
* the correct length (len >= cur->length)
* and
* the incomplete seqeunce error (len < cur->length).
*/
return (stix_size_t)cur->length;
}
cur++;
}
return 0; /* error - invalid sequence */
}
stix_size_t stix_utf8len (const stix_iochar_t* utf8, stix_size_t size)
{
return stix_utf8touc (utf8, size, STIX_NULL);
}