added utf8 string conversion functions
This commit is contained in:
parent
090c9ac1bf
commit
b70d9a976a
@ -219,7 +219,7 @@ static STIX_INLINE int is_closing_char (stix_cint_t c)
|
|||||||
#define ADD_TOKEN_STR(fsc,s) \
|
#define ADD_TOKEN_STR(fsc,s) \
|
||||||
do { if (add_token_str (fsc, s) == -1) return -1; } while (0)
|
do { if (add_token_str (fsc, s) == -1) return -1; } while (0)
|
||||||
|
|
||||||
static STIX_INLINE int add_token_char (stix_t* fsc, stix_char_t c)
|
static STIX_INLINE int add_token_char (stix_t* fsc, stix_uch_t c)
|
||||||
{
|
{
|
||||||
if (fsc->tok.name.len >= STIX_COUNTOF(fsc->tok.buf) - 1)
|
if (fsc->tok.name.len >= STIX_COUNTOF(fsc->tok.buf) - 1)
|
||||||
{
|
{
|
||||||
@ -232,7 +232,7 @@ static STIX_INLINE int add_token_char (stix_t* fsc, stix_char_t c)
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
static STIX_INLINE int add_token_str (stix_t* fsc, const stix_char_t* str)
|
static STIX_INLINE int add_token_str (stix_t* fsc, const stix_uch_t* str)
|
||||||
{
|
{
|
||||||
stix_size_t len;
|
stix_size_t len;
|
||||||
|
|
||||||
@ -247,49 +247,52 @@ static STIX_INLINE int add_token_str (stix_t* fsc, const stix_char_t* str)
|
|||||||
fsc->tok.name.len += stix_strcpy (&fsc->tok.buf[fsc->tok.name.len], str);
|
fsc->tok.name.len += stix_strcpy (&fsc->tok.buf[fsc->tok.name.len], str);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
static int get_char (stix_t* fsc)
|
static int get_char (stix_t* stix)
|
||||||
{
|
{
|
||||||
stix_ssize_t n;
|
stix_ssize_t n;
|
||||||
|
|
||||||
if (fsc->sio.inp->b.pos >= fsc->sio.inp->b.len)
|
if (stix->c->curinp->b.pos >= stix->c->curinp->b.len)
|
||||||
{
|
{
|
||||||
n = fsc->sio.impl (fsc, STIX_FSC_IO_READ, fsc->sio.inp);
|
n = stix->c->impl (stix, STIX_IO_READ, stix->c->curinp);
|
||||||
if (n <= -1) return -1;
|
if (n <= -1) return -1;
|
||||||
|
|
||||||
if (n == 0)
|
if (n == 0)
|
||||||
{
|
{
|
||||||
fsc->sio.inp->lxc.c = STIX_CHAR_EOF;
|
// stix->c->curinp->lxc.c = STIX_CHAR_EOF;
|
||||||
fsc->sio.inp->lxc.line = fsc->sio.inp->line;
|
stix->c->curinp->lxc.c = 0;
|
||||||
fsc->sio.inp->lxc.colm = fsc->sio.inp->colm;
|
stix->c->curinp->lxc.line = stix->c->curinp->line;
|
||||||
fsc->sio.inp->lxc.file = fsc->sio.inp->name;
|
stix->c->curinp->lxc.colm = stix->c->curinp->colm;
|
||||||
fsc->sio.lxc = fsc->sio.inp->lxc;
|
stix->c->curinp->lxc.file = stix->c->curinp->name;
|
||||||
return 0;
|
stix->c->lxc = stix->c->curinp->lxc;
|
||||||
|
return 0; /* indicate that EOF has been read */
|
||||||
}
|
}
|
||||||
|
|
||||||
fsc->sio.inp->b.pos = 0;
|
stix->c->curinp->b.pos = 0;
|
||||||
fsc->sio.inp->b.len = n;
|
stix->c->curinp->b.len = n;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (fsc->sio.inp->lxc.c == STIX_T('\n'))
|
if (stix->c->curinp->lxc.c == '\n')
|
||||||
{
|
{
|
||||||
/* if the previous charater was a newline,
|
/* if the previous charater was a newline,
|
||||||
* increment the line counter and reset column to 1.
|
* increment the line counter and reset column to 1.
|
||||||
* incrementing it line number here instead of
|
* incrementing it line number here instead of
|
||||||
* updating inp->lxc causes the line number for
|
* updating inp->lxc causes the line number for
|
||||||
* TOK_EOF to be the same line as the lxc newline. */
|
* TOK_EOF to be the same line as the lxc newline. */
|
||||||
fsc->sio.inp->line++;
|
stix->c->curinp->line++;
|
||||||
fsc->sio.inp->colm = 1;
|
stix->c->curinp->colm = 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
fsc->sio.inp->lxc.c = fsc->sio.inp->buf[fsc->sio.inp->b.pos++];
|
stix->c->curinp->lxc.c = stix->c->curinp->buf[stix->c->curinp->b.pos++];
|
||||||
fsc->sio.inp->lxc.line = fsc->sio.inp->line;
|
stix->c->curinp->lxc.line = stix->c->curinp->line;
|
||||||
fsc->sio.inp->lxc.colm = fsc->sio.inp->colm++;
|
stix->c->curinp->lxc.colm = stix->c->curinp->colm++;
|
||||||
fsc->sio.inp->lxc.file = fsc->sio.inp->name;
|
stix->c->curinp->lxc.file = stix->c->curinp->name;
|
||||||
fsc->sio.lxc = fsc->sio.inp->lxc;
|
stix->c->lxc = stix->c->curinp->lxc;
|
||||||
return 0;
|
return 1; /* indicate that a normal character has been read */
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#if 0
|
||||||
static int skip_spaces (stix_t* fsc)
|
static int skip_spaces (stix_t* fsc)
|
||||||
{
|
{
|
||||||
while (STIX_ISSPACE(fsc->sio.lxc.c)) GET_CHAR (fsc);
|
while (STIX_ISSPACE(fsc->sio.lxc.c)) GET_CHAR (fsc);
|
||||||
@ -653,9 +656,9 @@ retry:
|
|||||||
else
|
else
|
||||||
{
|
{
|
||||||
stix_cstr_t ea;
|
stix_cstr_t ea;
|
||||||
stix_char_t cc;
|
stix_uch_t cc;
|
||||||
|
|
||||||
cc = (stix_char_t)c;
|
cc = (stix_uch_t)c;
|
||||||
ea.ptr = &cc;
|
ea.ptr = &cc;
|
||||||
ea.len = 1;
|
ea.len = 1;
|
||||||
|
|
||||||
@ -685,17 +688,17 @@ static int begin_include (stix_t* fsc)
|
|||||||
stix_ioarg_t* arg;
|
stix_ioarg_t* arg;
|
||||||
stix_link_t* link;
|
stix_link_t* link;
|
||||||
|
|
||||||
link = (stix_link_t*) stix_callocmem (fsc, STIX_SIZEOF(*link) + STIX_SIZEOF(stix_char_t) * (fsc->tok.name.len + 1));
|
link = (stix_link_t*) stix_callocmem (fsc, STIX_SIZEOF(*link) + STIX_SIZEOF(stix_uch_t) * (fsc->tok.name.len + 1));
|
||||||
if (link == STIX_NULL) goto oops;
|
if (link == STIX_NULL) goto oops;
|
||||||
|
|
||||||
stix_strcpy ((stix_char_t*)(link + 1), fsc->tok.name.ptr);
|
stix_strcpy ((stix_uch_t*)(link + 1), fsc->tok.name.ptr);
|
||||||
link->link = fsc->sio_names;
|
link->link = fsc->sio_names;
|
||||||
fsc->sio_names = link;
|
fsc->sio_names = link;
|
||||||
|
|
||||||
arg = (stix_ioarg_t*) stix_callocmem (fsc, STIX_SIZEOF(*arg));
|
arg = (stix_ioarg_t*) stix_callocmem (fsc, STIX_SIZEOF(*arg));
|
||||||
if (arg == STIX_NULL) goto oops;
|
if (arg == STIX_NULL) goto oops;
|
||||||
|
|
||||||
arg->name = (const stix_char_t*)(link + 1);
|
arg->name = (const stix_uch_t*)(link + 1);
|
||||||
arg->line = 1;
|
arg->line = 1;
|
||||||
arg->colm = 1;
|
arg->colm = 1;
|
||||||
arg->prev = fsc->sio.inp;
|
arg->prev = fsc->sio.inp;
|
||||||
@ -777,7 +780,7 @@ static STIX_INLINE int is_tok_pseudovar (stix_t* fsc)
|
|||||||
stix_strequal(fsc->tok.name.ptr, STIX_T("false")));
|
stix_strequal(fsc->tok.name.ptr, STIX_T("false")));
|
||||||
}
|
}
|
||||||
|
|
||||||
static STIX_INLINE int is_tok_binsel (stix_t* fsc, const stix_char_t* sel)
|
static STIX_INLINE int is_tok_binsel (stix_t* fsc, const stix_uch_t* sel)
|
||||||
{
|
{
|
||||||
return fsc->tok.type == STIX_FSC_TOK_BINSEL &&
|
return fsc->tok.type == STIX_FSC_TOK_BINSEL &&
|
||||||
stix_strequal (fsc->tok.name.ptr, sel);
|
stix_strequal (fsc->tok.name.ptr, sel);
|
||||||
@ -848,7 +851,7 @@ static STIX_INLINE int is_tok_binsel (stix_t* fsc, const stix_char_t* sel)
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
static STIX_INLINE int emit_code_test (
|
static STIX_INLINE int emit_code_test (
|
||||||
stix_t* fsc, const stix_char_t* high, const stix_char_t* low)
|
stix_t* fsc, const stix_uch_t* high, const stix_uch_t* low)
|
||||||
{
|
{
|
||||||
wprintf (L"CODE: %s %s\n", high, low);
|
wprintf (L"CODE: %s %s\n", high, low);
|
||||||
return 0;
|
return 0;
|
||||||
@ -1037,7 +1040,7 @@ static int __add_literal (stix_t* fsc, stix_word_t literal)
|
|||||||
return fsc->literal_count - 1;
|
return fsc->literal_count - 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int __add_character_literal (stix_t* fsc, stix_char_t ch)
|
static int __add_character_literal (stix_t* fsc, stix_uch_t ch)
|
||||||
{
|
{
|
||||||
stix_word_t i, c, literal;
|
stix_word_t i, c, literal;
|
||||||
stix_vm_t* stx = fsc->stx;
|
stix_vm_t* stx = fsc->stx;
|
||||||
@ -1056,7 +1059,7 @@ static int __add_character_literal (stix_t* fsc, stix_char_t ch)
|
|||||||
}
|
}
|
||||||
|
|
||||||
static int __add_string_literal (
|
static int __add_string_literal (
|
||||||
stix_t* fsc, const stix_char_t* str, stix_word_t size)
|
stix_t* fsc, const stix_uch_t* str, stix_word_t size)
|
||||||
{
|
{
|
||||||
stix_word_t i, c, literal;
|
stix_word_t i, c, literal;
|
||||||
stix_vm_t* stx = fsc->stx;
|
stix_vm_t* stx = fsc->stx;
|
||||||
@ -1077,7 +1080,7 @@ static int __add_string_literal (
|
|||||||
}
|
}
|
||||||
|
|
||||||
static int __add_symbol_literal (
|
static int __add_symbol_literal (
|
||||||
stix_t* fsc, const stix_char_t* str, stix_word_t size)
|
stix_t* fsc, const stix_uch_t* str, stix_word_t size)
|
||||||
{
|
{
|
||||||
stix_vm_t* stx = fsc->stx;
|
stix_vm_t* stx = fsc->stx;
|
||||||
return __add_literal (fsc, stix_new_symbolx(stx, str, size));
|
return __add_literal (fsc, stix_new_symbolx(stx, str, size));
|
||||||
@ -1210,7 +1213,7 @@ static int parse_expression (stix_t* fsc)
|
|||||||
stix_vm_t* stx = fsc->stx;
|
stix_vm_t* stx = fsc->stx;
|
||||||
|
|
||||||
if (fsc->tok.type == STIX_FSC_TOK_IDENT) {
|
if (fsc->tok.type == STIX_FSC_TOK_IDENT) {
|
||||||
stix_char_t* ident = stix_tok_yield (&fsc->tok, 0);
|
stix_uch_t* ident = stix_tok_yield (&fsc->tok, 0);
|
||||||
if (ident == STIX_NULL) {
|
if (ident == STIX_NULL) {
|
||||||
fsc->errnum = STIX_FSC_ERROR_MEMORY;
|
fsc->errnum = STIX_FSC_ERROR_MEMORY;
|
||||||
return -1;
|
return -1;
|
||||||
@ -1242,7 +1245,7 @@ static int parse_expression (stix_t* fsc)
|
|||||||
}
|
}
|
||||||
|
|
||||||
static int parse_basic_expression (
|
static int parse_basic_expression (
|
||||||
stix_t* fsc, const stix_char_t* ident)
|
stix_t* fsc, const stix_uch_t* ident)
|
||||||
{
|
{
|
||||||
/*
|
/*
|
||||||
* <basic expression> ::= <primary> [<messages> <cascaded messages>]
|
* <basic expression> ::= <primary> [<messages> <cascaded messages>]
|
||||||
@ -1259,7 +1262,7 @@ static int parse_basic_expression (
|
|||||||
}
|
}
|
||||||
|
|
||||||
static int parse_assignment (
|
static int parse_assignment (
|
||||||
stix_t* fsc, const stix_char_t* target)
|
stix_t* fsc, const stix_uch_t* target)
|
||||||
{
|
{
|
||||||
/*
|
/*
|
||||||
* <assignment> ::= <assignment target> assignmentOperator <expression>
|
* <assignment> ::= <assignment target> assignmentOperator <expression>
|
||||||
@ -1304,7 +1307,7 @@ static int parse_assignment (
|
|||||||
}
|
}
|
||||||
|
|
||||||
static int parse_primary (
|
static int parse_primary (
|
||||||
stix_t* fsc, const stix_char_t* ident, int* is_super)
|
stix_t* fsc, const stix_uch_t* ident, int* is_super)
|
||||||
{
|
{
|
||||||
/*
|
/*
|
||||||
* <primary> ::=
|
* <primary> ::=
|
||||||
@ -1389,7 +1392,7 @@ static int parse_primary (
|
|||||||
}
|
}
|
||||||
|
|
||||||
static int parse_primary_ident (
|
static int parse_primary_ident (
|
||||||
stix_t* fsc, const stix_char_t* ident, int* is_super)
|
stix_t* fsc, const stix_uch_t* ident, int* is_super)
|
||||||
{
|
{
|
||||||
stix_word_t i;
|
stix_word_t i;
|
||||||
stix_vm_t* stx = fsc->stx;
|
stix_vm_t* stx = fsc->stx;
|
||||||
@ -1616,7 +1619,7 @@ static int parse_binary_message (stix_t* fsc, int is_super)
|
|||||||
|
|
||||||
while (fsc->tok.type == STIX_FSC_TOK_BINSEL)
|
while (fsc->tok.type == STIX_FSC_TOK_BINSEL)
|
||||||
{
|
{
|
||||||
stix_char_t* op = stix_tok_yield (&fsc->tok, 0);
|
stix_uch_t* op = stix_tok_yield (&fsc->tok, 0);
|
||||||
if (op == STIX_NULL) {
|
if (op == STIX_NULL) {
|
||||||
fsc->errnum = STIX_FSC_ERROR_MEMORY;
|
fsc->errnum = STIX_FSC_ERROR_MEMORY;
|
||||||
return -1;
|
return -1;
|
||||||
@ -1707,11 +1710,11 @@ static int parse_method (stix_t* fsc, stix_word_t method_class, void* input)
|
|||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
static int get_class_type (const stix_char_t* str, class_type_t* type)
|
static int get_class_type (const stix_uch_t* str, class_type_t* type)
|
||||||
{
|
{
|
||||||
static struct
|
static struct
|
||||||
{
|
{
|
||||||
stix_char_t* word;
|
stix_uch_t* word;
|
||||||
class_type_t type;
|
class_type_t type;
|
||||||
} tab[] =
|
} tab[] =
|
||||||
{
|
{
|
||||||
@ -1735,11 +1738,11 @@ static int get_class_type (const stix_char_t* str, class_type_t* type)
|
|||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int get_vardef_type (const stix_char_t* str, vardef_type_t* type)
|
static int get_vardef_type (const stix_uch_t* str, vardef_type_t* type)
|
||||||
{
|
{
|
||||||
static struct
|
static struct
|
||||||
{
|
{
|
||||||
stix_char_t* word;
|
stix_uch_t* word;
|
||||||
class_type_t type;
|
class_type_t type;
|
||||||
} tab[] =
|
} tab[] =
|
||||||
{
|
{
|
||||||
@ -2254,13 +2257,20 @@ int stix_compile (stix_t* stix, stix_ioimpl_t io)
|
|||||||
stix->c->arg.line = 1;
|
stix->c->arg.line = 1;
|
||||||
stix->c->arg.colm = 1;
|
stix->c->arg.colm = 1;
|
||||||
stix->c->curinp = &stix->c->arg;
|
stix->c->curinp = &stix->c->arg;
|
||||||
clear_sio_names (stix);
|
// clear_sio_names (stix);
|
||||||
|
|
||||||
/* open the top-level stream */
|
/* open the top-level stream */
|
||||||
n = stix->c->impl (stix, STIX_IO_OPEN, stix->c->curinp);
|
n = stix->c->impl (stix, STIX_IO_OPEN, stix->c->curinp);
|
||||||
if (n <= -1) return -1;
|
if (n <= -1) return -1;
|
||||||
|
|
||||||
if (compile_stream (stix) <= -1) goto oops;
|
// if (compile_stream (stix) <= -1) goto oops;
|
||||||
|
while (get_char(stix) > 0)
|
||||||
|
{
|
||||||
|
stix_bch_t buf[16];
|
||||||
|
stix_size_t len;
|
||||||
|
len = stix_uctoutf8 (stix->c->curinp->lxc.c, buf, STIX_COUNTOF(buf));
|
||||||
|
printf ("%.*s", (int)len, buf);
|
||||||
|
}
|
||||||
|
|
||||||
/* close the stream */
|
/* close the stream */
|
||||||
STIX_ASSERT (stix->c->curinp == &stix->c->arg);
|
STIX_ASSERT (stix->c->curinp == &stix->c->arg);
|
||||||
|
@ -183,7 +183,7 @@ static int ignite_3 (stix_t* stix)
|
|||||||
static struct symbol_name_t
|
static struct symbol_name_t
|
||||||
{
|
{
|
||||||
stix_oow_t len;
|
stix_oow_t len;
|
||||||
stix_char_t str[16];
|
stix_uch_t str[16];
|
||||||
} symnames[] = {
|
} symnames[] = {
|
||||||
{ 4, { 'S','t','i','x' } },
|
{ 4, { 'S','t','i','x' } },
|
||||||
{ 6, { 'O','b','j','e','c','t' } },
|
{ 6, { 'O','b','j','e','c','t' } },
|
||||||
|
@ -28,7 +28,7 @@
|
|||||||
|
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
|
#include <string.h>
|
||||||
|
|
||||||
typedef struct xtn_t xtn_t;
|
typedef struct xtn_t xtn_t;
|
||||||
struct xtn_t
|
struct xtn_t
|
||||||
@ -64,31 +64,41 @@ static stix_mmgr_t sys_mmgr =
|
|||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
static STIX_INLINE stix_oow_t open_input (stix_t* stix, stix_ioarg_t* arg)
|
static STIX_INLINE stix_ssize_t open_input (stix_t* stix, stix_ioarg_t* arg)
|
||||||
{
|
{
|
||||||
if (arg->includer)
|
if (arg->includer)
|
||||||
{
|
{
|
||||||
/* includee */
|
/* includee */
|
||||||
xtn_t* xtn = stix_getxtn(stix);
|
stix_bch_t bcs[1024]; /* TODO: right buffer size */
|
||||||
|
stix_size_t bcslen = STIX_COUNTOF(bcs);
|
||||||
|
stix_size_t ucslen = ~(stix_size_t)0;
|
||||||
|
|
||||||
arg->handle = fopen (xtn->source_path, "r");
|
if (stix_ucstoutf8 (arg->name, &ucslen, bcs, &bcslen) <= -1)
|
||||||
if (!arg->handle)
|
|
||||||
{
|
{
|
||||||
stix_seterrnum (stix, STIX_EIOERR);
|
stix_seterrnum (stix, STIX_EECERR);
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
/* main stream */
|
/* main stream */
|
||||||
/*char tmp[PATH_MAX];*/
|
xtn_t* xtn = stix_getxtn(stix);
|
||||||
|
arg->handle = fopen (xtn->source_path, "r");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (!arg->handle)
|
||||||
|
{
|
||||||
|
stix_seterrnum (stix, STIX_EIOERR);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
static STIX_INLINE stix_oow_t read_input (stix_t* stix, stix_ioarg_t* arg)
|
static STIX_INLINE stix_ssize_t read_input (stix_t* stix, stix_ioarg_t* arg)
|
||||||
{
|
{
|
||||||
xtn_t* xtn = stix_getxtn(stix);
|
xtn_t* xtn = stix_getxtn(stix);
|
||||||
stix_size_t n, bcslen, cslen;
|
stix_size_t n, bcslen, ucslen, remlen;
|
||||||
int x;
|
int x;
|
||||||
|
|
||||||
STIX_ASSERT (arg->handle != STIX_NULL);
|
STIX_ASSERT (arg->handle != STIX_NULL);
|
||||||
@ -100,32 +110,32 @@ static STIX_INLINE stix_oow_t read_input (stix_t* stix, stix_ioarg_t* arg)
|
|||||||
stix_seterrnum (stix, STIX_EIOERR);
|
stix_seterrnum (stix, STIX_EIOERR);
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
xtn->bchar_len += n;
|
xtn->bchar_len += n;
|
||||||
bcslen = xtn->bchar_len;
|
bcslen = xtn->bchar_len;
|
||||||
cslen = STIX_COUNTOF(arg->buf);
|
ucslen = STIX_COUNTOF(arg->buf);
|
||||||
x = stix_utf8toucs (xtn->bchar_buf, &bcslen, arg->buf, &cslen);
|
x = stix_utf8toucs (xtn->bchar_buf, &bcslen, arg->buf, &ucslen);
|
||||||
if (x == -2)
|
if (x <= -1 && ucslen <= 0)
|
||||||
{
|
|
||||||
/* buffer to small */
|
|
||||||
}
|
|
||||||
if (x <= -1)
|
|
||||||
{
|
{
|
||||||
|
stix_seterrnum (stix, STIX_EECERR);
|
||||||
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
remlen = xtn->bchar_len - bcslen;
|
||||||
|
if (remlen > 0) memmove (xtn->bchar_buf, &xtn->bchar_buf[bcslen], remlen);
|
||||||
|
xtn->bchar_len = remlen;
|
||||||
|
return ucslen;
|
||||||
}
|
}
|
||||||
|
|
||||||
static STIX_INLINE stix_oow_t close_input (stix_t* stix, stix_ioarg_t* arg)
|
static STIX_INLINE stix_ssize_t close_input (stix_t* stix, stix_ioarg_t* arg)
|
||||||
{
|
{
|
||||||
STIX_ASSERT (arg->handle != STIX_NULL);
|
STIX_ASSERT (arg->handle != STIX_NULL);
|
||||||
fclose (arg->handle);
|
fclose (arg->handle);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
/* TODO: IMPLEMENT PROPER INPUT HANDLER */
|
|
||||||
|
|
||||||
static stix_oow_t input_handler (stix_t* stix, stix_iocmd_t cmd, stix_ioarg_t* arg)
|
static stix_ssize_t input_handler (stix_t* stix, stix_iocmd_t cmd, stix_ioarg_t* arg)
|
||||||
{
|
{
|
||||||
switch (cmd)
|
switch (cmd)
|
||||||
{
|
{
|
||||||
@ -217,8 +227,8 @@ int main (int argc, char* argv[])
|
|||||||
}
|
}
|
||||||
|
|
||||||
{
|
{
|
||||||
stix_char_t x[] = { 'S', 't', 'r', 'i', 'n', 'g', '\0' };
|
stix_uch_t x[] = { 'S', 't', 'r', 'i', 'n', 'g', '\0' };
|
||||||
stix_char_t y[] = { 'S', 'y', 'm', 'b', 'o', 'l', '\0' };
|
stix_uch_t y[] = { 'S', 'y', 'm', 'b', 'o', 'l', '\0' };
|
||||||
stix_oop_t a, b;
|
stix_oop_t a, b;
|
||||||
|
|
||||||
a = stix_makesymbol (stix, x, 6);
|
a = stix_makesymbol (stix, x, 6);
|
||||||
|
@ -617,4 +617,5 @@ Single line comment
|
|||||||
#! comment text (easy handling to skip hash bang)
|
#! comment text (easy handling to skip hash bang)
|
||||||
|
|
||||||
Multi-line comments - double quoted as in smalltalk
|
Multi-line comments - double quoted as in smalltalk
|
||||||
" comment text "
|
" comment text 설명이라지요. "
|
||||||
|
|
||||||
|
@ -114,9 +114,9 @@ static stix_oop_t alloc_numeric_array (stix_t* stix, const void* ptr, stix_oow_t
|
|||||||
return hdr;
|
return hdr;
|
||||||
}
|
}
|
||||||
|
|
||||||
stix_oop_t stix_alloccharobj (stix_t* stix, const stix_char_t* ptr, stix_oow_t len)
|
stix_oop_t stix_alloccharobj (stix_t* stix, const stix_uch_t* ptr, stix_oow_t len)
|
||||||
{
|
{
|
||||||
return alloc_numeric_array (stix, ptr, len, STIX_OBJ_TYPE_CHAR, STIX_SIZEOF(stix_char_t), 1);
|
return alloc_numeric_array (stix, ptr, len, STIX_OBJ_TYPE_CHAR, STIX_SIZEOF(stix_uch_t), 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
stix_oop_t stix_allocuint8obj (stix_t* stix, const stix_uint8_t* ptr, stix_oow_t len)
|
stix_oop_t stix_allocuint8obj (stix_t* stix, const stix_uint8_t* ptr, stix_oow_t len)
|
||||||
|
@ -140,10 +140,10 @@ typedef enum stix_iocmd_t stix_iocmd_t;
|
|||||||
typedef struct stix_iolxc_t stix_iolxc_t;
|
typedef struct stix_iolxc_t stix_iolxc_t;
|
||||||
struct stix_iolxc_t
|
struct stix_iolxc_t
|
||||||
{
|
{
|
||||||
stix_char_t c; /**< character */
|
stix_uch_t c; /**< character */
|
||||||
unsigned long line; /**< line */
|
unsigned long line; /**< line */
|
||||||
unsigned long colm; /**< column */
|
unsigned long colm; /**< column */
|
||||||
const stix_char_t* file; /**< file specified in #include */
|
const stix_uch_t* file; /**< file specified in #include */
|
||||||
};
|
};
|
||||||
|
|
||||||
enum stix_ioarg_flag_t
|
enum stix_ioarg_flag_t
|
||||||
@ -160,7 +160,7 @@ struct stix_ioarg_t
|
|||||||
* It is #STIX_NULL for the main stream and points to a non-NULL string
|
* It is #STIX_NULL for the main stream and points to a non-NULL string
|
||||||
* for an included stream.
|
* for an included stream.
|
||||||
*/
|
*/
|
||||||
const stix_char_t* name;
|
const stix_uch_t* name;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* [OUT] I/O handle set by a handler.
|
* [OUT] I/O handle set by a handler.
|
||||||
@ -173,7 +173,7 @@ struct stix_ioarg_t
|
|||||||
/**
|
/**
|
||||||
* [OUT] place data here
|
* [OUT] place data here
|
||||||
*/
|
*/
|
||||||
stix_char_t buf[1024];
|
stix_uch_t buf[1024];
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* [IN] points to the data of the includer. It is #STIX_NULL for the
|
* [IN] points to the data of the includer. It is #STIX_NULL for the
|
||||||
@ -195,7 +195,7 @@ struct stix_ioarg_t
|
|||||||
/*-----------------------------------------------------------------*/
|
/*-----------------------------------------------------------------*/
|
||||||
};
|
};
|
||||||
|
|
||||||
typedef stix_oow_t (*stix_ioimpl_t) (
|
typedef stix_ssize_t (*stix_ioimpl_t) (
|
||||||
stix_t* stix,
|
stix_t* stix,
|
||||||
stix_iocmd_t cmd,
|
stix_iocmd_t cmd,
|
||||||
stix_ioarg_t* arg
|
stix_ioarg_t* arg
|
||||||
@ -260,13 +260,13 @@ stix_oow_t stix_hashbytes (
|
|||||||
);
|
);
|
||||||
|
|
||||||
stix_oow_t stix_hashchars (
|
stix_oow_t stix_hashchars (
|
||||||
const stix_char_t* ptr,
|
const stix_uch_t* ptr,
|
||||||
stix_oow_t len
|
stix_oow_t len
|
||||||
);
|
);
|
||||||
|
|
||||||
int stix_equalchars (
|
int stix_equalchars (
|
||||||
const stix_char_t* str1,
|
const stix_uch_t* str1,
|
||||||
const stix_char_t* str2,
|
const stix_uch_t* str2,
|
||||||
stix_oow_t len
|
stix_oow_t len
|
||||||
);
|
);
|
||||||
|
|
||||||
@ -289,7 +289,7 @@ stix_oop_t stix_allocoopobj (
|
|||||||
|
|
||||||
stix_oop_t stix_alloccharobj (
|
stix_oop_t stix_alloccharobj (
|
||||||
stix_t* stix,
|
stix_t* stix,
|
||||||
const stix_char_t* ptr,
|
const stix_uch_t* ptr,
|
||||||
stix_oow_t len
|
stix_oow_t len
|
||||||
);
|
);
|
||||||
|
|
||||||
@ -310,13 +310,13 @@ stix_oop_t stix_allocuint16obj (
|
|||||||
/* ========================================================================= */
|
/* ========================================================================= */
|
||||||
stix_oop_t stix_makesymbol (
|
stix_oop_t stix_makesymbol (
|
||||||
stix_t* stix,
|
stix_t* stix,
|
||||||
const stix_char_t* ptr,
|
const stix_uch_t* ptr,
|
||||||
stix_oow_t len
|
stix_oow_t len
|
||||||
);
|
);
|
||||||
|
|
||||||
stix_oop_t stix_findsymbol (
|
stix_oop_t stix_findsymbol (
|
||||||
stix_t* stix,
|
stix_t* stix,
|
||||||
const stix_char_t* ptr,
|
const stix_uch_t* ptr,
|
||||||
stix_oow_t len
|
stix_oow_t len
|
||||||
);
|
);
|
||||||
|
|
||||||
@ -338,22 +338,21 @@ stix_oop_t stix_getatsysdic (
|
|||||||
/* utf8.c */
|
/* utf8.c */
|
||||||
/* ========================================================================= */
|
/* ========================================================================= */
|
||||||
stix_size_t stix_uctoutf8 (
|
stix_size_t stix_uctoutf8 (
|
||||||
stix_char_t uc,
|
stix_uch_t uc,
|
||||||
stix_bchar_t* utf8,
|
stix_bch_t* utf8,
|
||||||
stix_size_t size
|
stix_size_t size
|
||||||
);
|
);
|
||||||
|
|
||||||
stix_size_t stix_utf8touc (
|
stix_size_t stix_utf8touc (
|
||||||
const stix_bchar_t* utf8,
|
const stix_bch_t* utf8,
|
||||||
stix_size_t size,
|
stix_size_t size,
|
||||||
stix_char_t* uc
|
stix_uch_t* uc
|
||||||
);
|
);
|
||||||
|
|
||||||
|
|
||||||
int stix_ucstoutf8 (
|
int stix_ucstoutf8 (
|
||||||
const stix_char_t* ucs,
|
const stix_uch_t* ucs,
|
||||||
stix_size_t* ucslen,
|
stix_size_t* ucslen,
|
||||||
stix_bchar_t* bcs,
|
stix_bch_t* bcs,
|
||||||
stix_size_t* bcslen
|
stix_size_t* bcslen
|
||||||
);
|
);
|
||||||
|
|
||||||
@ -363,27 +362,46 @@ int stix_ucstoutf8 (
|
|||||||
* It never returns -2 if \a ucs is #STIX_NULL.
|
* It never returns -2 if \a ucs is #STIX_NULL.
|
||||||
*
|
*
|
||||||
* \code
|
* \code
|
||||||
* const stix_bchar_t* bcs = "a multibyte string";
|
* const stix_bch_t* bcs = "test string";
|
||||||
* stix_char_t ucs[100];
|
* stix_uch_t ucs[100];
|
||||||
* qse_size_t ucslen = STIX_COUNTOF(buf), n;
|
* qse_size_t ucslen = STIX_COUNTOF(buf), n;
|
||||||
* qse_size_t bcslen = strlen(bcs);
|
* qse_size_t bcslen = 11;
|
||||||
* int n;
|
* int n;
|
||||||
* n = qse_bcstoucs (bcs, &bcslen, ucs, &ucslen);
|
* n = qse_bcstoucs (bcs, &bcslen, ucs, &ucslen);
|
||||||
* if (n <= -1) { invalid/incomplenete sequence or buffer to small }
|
* if (n <= -1) { invalid/incomplenete sequence or buffer to small }
|
||||||
* \endcode
|
* \endcode
|
||||||
*
|
*
|
||||||
|
* For a null-terminated string, you can specify ~(stix_size_t)0 in
|
||||||
|
* \a bcslen. The destination buffer \a ucs also must be large enough to
|
||||||
|
* store a terminating null. Otherwise, -2 is returned.
|
||||||
|
*
|
||||||
|
* The resulting \a ucslen can still be greater than 0 even if the return
|
||||||
|
* value is negative. The value indiates the number of characters converted
|
||||||
|
* before the error has occurred.
|
||||||
|
*
|
||||||
* \return 0 on success.
|
* \return 0 on success.
|
||||||
* -1 if \a bcs contains an illegal character.
|
* -1 if \a bcs contains an illegal character.
|
||||||
* -2 if the wide-character string buffer is too small.
|
* -2 if the wide-character string buffer is too small.
|
||||||
* -3 if \a bcs is not a complete sequence.
|
* -3 if \a bcs is not a complete sequence.
|
||||||
*/
|
*/
|
||||||
int stix_utf8toucs (
|
int stix_utf8toucs (
|
||||||
const stix_bchar_t* bcs,
|
const stix_bch_t* bcs,
|
||||||
stix_size_t* bcslen,
|
stix_size_t* bcslen,
|
||||||
stix_char_t* ucs,
|
stix_uch_t* ucs,
|
||||||
stix_size_t* ucslen
|
stix_size_t* ucslen
|
||||||
);
|
);
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The stix_ucslen() function returns the number of characters before
|
||||||
|
* a terminating null.
|
||||||
|
*/
|
||||||
|
/*
|
||||||
|
stix_size_t stix_ucslen (
|
||||||
|
const stix_uch_t* ucs
|
||||||
|
);
|
||||||
|
*/
|
||||||
|
|
||||||
/* ========================================================================= */
|
/* ========================================================================= */
|
||||||
/* comp.c */
|
/* comp.c */
|
||||||
/* ========================================================================= */
|
/* ========================================================================= */
|
||||||
|
@ -155,12 +155,12 @@ stix_oow_t stix_hashbytes (const stix_uint8_t* ptr, stix_oow_t len)
|
|||||||
return h;
|
return h;
|
||||||
}
|
}
|
||||||
|
|
||||||
stix_oow_t stix_hashchars (const stix_char_t* ptr, stix_oow_t len)
|
stix_oow_t stix_hashchars (const stix_uch_t* ptr, stix_oow_t len)
|
||||||
{
|
{
|
||||||
return stix_hashbytes ((const stix_uint8_t *)ptr, len * STIX_SIZEOF(*ptr));
|
return stix_hashbytes ((const stix_uint8_t *)ptr, len * STIX_SIZEOF(*ptr));
|
||||||
}
|
}
|
||||||
|
|
||||||
int stix_equalchars (const stix_char_t* str1, const stix_char_t* str2, stix_oow_t len)
|
int stix_equalchars (const stix_uch_t* str1, const stix_uch_t* str2, stix_oow_t len)
|
||||||
{
|
{
|
||||||
stix_oow_t i;
|
stix_oow_t i;
|
||||||
|
|
||||||
|
@ -44,9 +44,26 @@ typedef unsigned short int stix_uint16_t;
|
|||||||
#endif
|
#endif
|
||||||
typedef unsigned long int stix_uintptr_t;
|
typedef unsigned long int stix_uintptr_t;
|
||||||
typedef unsigned long int stix_size_t;
|
typedef unsigned long int stix_size_t;
|
||||||
|
typedef long int stix_ssize_t;
|
||||||
|
|
||||||
typedef unsigned short int stix_char_t; /* TODO ... wchar_t??? */
|
typedef unsigned short int stix_uch_t; /* TODO ... wchar_t??? */
|
||||||
typedef char stix_bchar_t;
|
typedef char stix_bch_t;
|
||||||
|
|
||||||
|
|
||||||
|
struct stix_ucs_t
|
||||||
|
{
|
||||||
|
stix_uch_t* ptr;
|
||||||
|
stix_size_t len;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct stix_bcs_t
|
||||||
|
{
|
||||||
|
stix_bch_t* ptr;
|
||||||
|
stix_size_t len;
|
||||||
|
};
|
||||||
|
|
||||||
|
typedef struct stix_ucs_t stix_ucs_t;
|
||||||
|
typedef struct stix_bcs_t stix_bcs_t;
|
||||||
|
|
||||||
/* =========================================================================
|
/* =========================================================================
|
||||||
* PRIMITIVE MACROS
|
* PRIMITIVE MACROS
|
||||||
@ -185,15 +202,15 @@ struct stix_mmgr_t
|
|||||||
|
|
||||||
typedef struct stix_cmgr_t stix_cmgr_t;
|
typedef struct stix_cmgr_t stix_cmgr_t;
|
||||||
|
|
||||||
typedef stix_size_t (*stix_cmgr_bctoc_t) (
|
typedef stix_size_t (*stix_cmgr_bctouc_t) (
|
||||||
const stix_bchar_t* mb,
|
const stix_bch_t* mb,
|
||||||
stix_size_t size,
|
stix_size_t size,
|
||||||
stix_char_t* wc
|
stix_uch_t* wc
|
||||||
);
|
);
|
||||||
|
|
||||||
typedef stix_size_t (*stix_cmgr_ctobc_t) (
|
typedef stix_size_t (*stix_cmgr_uctobc_t) (
|
||||||
stix_char_t wc,
|
stix_uch_t wc,
|
||||||
stix_bchar_t* mb,
|
stix_bch_t* mb,
|
||||||
stix_size_t size
|
stix_size_t size
|
||||||
);
|
);
|
||||||
|
|
||||||
@ -206,8 +223,8 @@ typedef stix_size_t (*stix_cmgr_ctobc_t) (
|
|||||||
*/
|
*/
|
||||||
struct stix_cmgr_t
|
struct stix_cmgr_t
|
||||||
{
|
{
|
||||||
stix_cmgr_bctoc_t bctoc;
|
stix_cmgr_bctouc_t bctouc;
|
||||||
stix_cmgr_ctobc_t ctobc;
|
stix_cmgr_uctobc_t uctobc;
|
||||||
};
|
};
|
||||||
|
|
||||||
/* =========================================================================
|
/* =========================================================================
|
||||||
@ -258,7 +275,8 @@ enum stix_errnum_t
|
|||||||
STIX_ENOMEM, /**< insufficient memory */
|
STIX_ENOMEM, /**< insufficient memory */
|
||||||
STIX_EINVAL, /**< invalid parameter or data */
|
STIX_EINVAL, /**< invalid parameter or data */
|
||||||
STIX_ENOENT, /**< no matching entry */
|
STIX_ENOENT, /**< no matching entry */
|
||||||
STIX_EIOERR /**< I/O error */
|
STIX_EIOERR, /**< I/O error */
|
||||||
|
STIX_EECERR /**< encoding conversion error */
|
||||||
};
|
};
|
||||||
typedef enum stix_errnum_t stix_errnum_t;
|
typedef enum stix_errnum_t stix_errnum_t;
|
||||||
|
|
||||||
@ -605,7 +623,7 @@ struct stix_obj_oop_t
|
|||||||
struct stix_obj_char_t
|
struct stix_obj_char_t
|
||||||
{
|
{
|
||||||
STIX_OBJ_HEADER;
|
STIX_OBJ_HEADER;
|
||||||
stix_char_t slot[1];
|
stix_uch_t slot[1];
|
||||||
};
|
};
|
||||||
|
|
||||||
struct stix_obj_uint8_t
|
struct stix_obj_uint8_t
|
||||||
@ -832,7 +850,7 @@ STIX_EXPORT void stix_gc (
|
|||||||
*/
|
*/
|
||||||
STIX_EXPORT int stix_findclass (
|
STIX_EXPORT int stix_findclass (
|
||||||
stix_t* vm,
|
stix_t* vm,
|
||||||
const stix_char_t* name,
|
const stix_uch_t* name,
|
||||||
stix_oop_t* oop
|
stix_oop_t* oop
|
||||||
);
|
);
|
||||||
|
|
||||||
|
@ -57,7 +57,7 @@ static stix_oop_oop_t expand_bucket (stix_t* stix, stix_oop_oop_t old_bucket)
|
|||||||
return new_bucket;
|
return new_bucket;
|
||||||
}
|
}
|
||||||
|
|
||||||
static stix_oop_t find_or_make_symbol (stix_t* stix, const stix_char_t* ptr, stix_oow_t len, int create)
|
static stix_oop_t find_or_make_symbol (stix_t* stix, const stix_uch_t* ptr, stix_oow_t len, int create)
|
||||||
{
|
{
|
||||||
stix_oow_t index, tally;
|
stix_oow_t index, tally;
|
||||||
stix_oop_char_t symbol;
|
stix_oop_char_t symbol;
|
||||||
@ -130,12 +130,12 @@ static stix_oop_t find_or_make_symbol (stix_t* stix, const stix_char_t* ptr, sti
|
|||||||
return (stix_oop_t)symbol;
|
return (stix_oop_t)symbol;
|
||||||
}
|
}
|
||||||
|
|
||||||
stix_oop_t stix_makesymbol (stix_t* stix, const stix_char_t* ptr, stix_oow_t len)
|
stix_oop_t stix_makesymbol (stix_t* stix, const stix_uch_t* ptr, stix_oow_t len)
|
||||||
{
|
{
|
||||||
return find_or_make_symbol (stix, ptr, len, 1);
|
return find_or_make_symbol (stix, ptr, len, 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
stix_oop_t stix_findsymbol (stix_t* stix, const stix_char_t* ptr, stix_oow_t len)
|
stix_oop_t stix_findsymbol (stix_t* stix, const stix_uch_t* ptr, stix_oow_t len)
|
||||||
{
|
{
|
||||||
return find_or_make_symbol (stix, ptr, len, 0);
|
return find_or_make_symbol (stix, ptr, len, 0);
|
||||||
}
|
}
|
||||||
|
232
stix/lib/utf8.c
232
stix/lib/utf8.c
@ -26,7 +26,7 @@
|
|||||||
|
|
||||||
#include "stix-prv.h"
|
#include "stix-prv.h"
|
||||||
|
|
||||||
#define STIX_BCLEN_MAX 16
|
#define STIX_BCLEN_MAX 6
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* from RFC 2279 UTF-8, a transformation format of ISO 10646
|
* from RFC 2279 UTF-8, a transformation format of ISO 10646
|
||||||
@ -62,12 +62,12 @@ static __utf8_t utf8_table[] =
|
|||||||
{0x04000000ul, 0x7FFFFFFFul, 0xFC, 0xFE, 0x01, 6}
|
{0x04000000ul, 0x7FFFFFFFul, 0xFC, 0xFE, 0x01, 6}
|
||||||
};
|
};
|
||||||
|
|
||||||
static STIX_INLINE __utf8_t* get_utf8_slot (stix_char_t uc)
|
static STIX_INLINE __utf8_t* get_utf8_slot (stix_uch_t uc)
|
||||||
{
|
{
|
||||||
__utf8_t* cur, * end;
|
__utf8_t* cur, * end;
|
||||||
|
|
||||||
STIX_ASSERT (STIX_SIZEOF(stix_bchar_t) == 1);
|
STIX_ASSERT (STIX_SIZEOF(stix_bch_t) == 1);
|
||||||
STIX_ASSERT (STIX_SIZEOF(stix_char_t) >= 2);
|
STIX_ASSERT (STIX_SIZEOF(stix_uch_t) >= 2);
|
||||||
|
|
||||||
end = utf8_table + STIX_COUNTOF(utf8_table);
|
end = utf8_table + STIX_COUNTOF(utf8_table);
|
||||||
cur = utf8_table;
|
cur = utf8_table;
|
||||||
@ -81,7 +81,7 @@ static STIX_INLINE __utf8_t* get_utf8_slot (stix_char_t uc)
|
|||||||
return STIX_NULL; /* invalid character */
|
return STIX_NULL; /* invalid character */
|
||||||
}
|
}
|
||||||
|
|
||||||
stix_size_t stix_uctoutf8 (stix_char_t uc, stix_bchar_t* utf8, stix_size_t size)
|
stix_size_t stix_uctoutf8 (stix_uch_t uc, stix_bch_t* utf8, stix_size_t size)
|
||||||
{
|
{
|
||||||
__utf8_t* cur = get_utf8_slot (uc);
|
__utf8_t* cur = get_utf8_slot (uc);
|
||||||
|
|
||||||
@ -108,14 +108,14 @@ stix_size_t stix_uctoutf8 (stix_char_t uc, stix_bchar_t* utf8, stix_size_t size)
|
|||||||
return (stix_size_t)cur->length;
|
return (stix_size_t)cur->length;
|
||||||
}
|
}
|
||||||
|
|
||||||
stix_size_t stix_utf8touc (const stix_bchar_t* utf8, stix_size_t size, stix_char_t* uc)
|
stix_size_t stix_utf8touc (const stix_bch_t* utf8, stix_size_t size, stix_uch_t* uc)
|
||||||
{
|
{
|
||||||
__utf8_t* cur, * end;
|
__utf8_t* cur, * end;
|
||||||
|
|
||||||
STIX_ASSERT (utf8 != STIX_NULL);
|
STIX_ASSERT (utf8 != STIX_NULL);
|
||||||
STIX_ASSERT (size > 0);
|
STIX_ASSERT (size > 0);
|
||||||
STIX_ASSERT (STIX_SIZEOF(stix_bchar_t) == 1);
|
STIX_ASSERT (STIX_SIZEOF(stix_bch_t) == 1);
|
||||||
STIX_ASSERT (STIX_SIZEOF(stix_char_t) >= 2);
|
STIX_ASSERT (STIX_SIZEOF(stix_uch_t) >= 2);
|
||||||
|
|
||||||
end = utf8_table + STIX_COUNTOF(utf8_table);
|
end = utf8_table + STIX_COUNTOF(utf8_table);
|
||||||
cur = utf8_table;
|
cur = utf8_table;
|
||||||
@ -135,7 +135,7 @@ stix_size_t stix_utf8touc (const stix_bchar_t* utf8, stix_size_t size, stix_char
|
|||||||
|
|
||||||
if (uc)
|
if (uc)
|
||||||
{
|
{
|
||||||
stix_char_t w;
|
stix_uch_t w;
|
||||||
|
|
||||||
w = utf8[0] & cur->fmask;
|
w = utf8[0] & cur->fmask;
|
||||||
for (i = 1; i < cur->length; i++)
|
for (i = 1; i < cur->length; i++)
|
||||||
@ -167,9 +167,9 @@ stix_size_t stix_utf8touc (const stix_bchar_t* utf8, stix_size_t size, stix_char
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* this return value can indicate both
|
/* this return value can indicate both
|
||||||
* the correct length (len >= cur->length)
|
* the correct length (size >= cur->length)
|
||||||
* and
|
* and
|
||||||
* the incomplete seqeunce error (len < cur->length).
|
* the incomplete seqeunce error (size < cur->length).
|
||||||
*/
|
*/
|
||||||
return (stix_size_t)cur->length;
|
return (stix_size_t)cur->length;
|
||||||
}
|
}
|
||||||
@ -179,28 +179,26 @@ stix_size_t stix_utf8touc (const stix_bchar_t* utf8, stix_size_t size, stix_char
|
|||||||
return 0; /* error - invalid sequence */
|
return 0; /* error - invalid sequence */
|
||||||
}
|
}
|
||||||
|
|
||||||
stix_size_t stix_utf8len (const stix_bchar_t* utf8, stix_size_t size)
|
|
||||||
{
|
|
||||||
return stix_utf8touc (utf8, size, STIX_NULL);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* ----------------------------------------------------------------------- */
|
/* ----------------------------------------------------------------------- */
|
||||||
|
|
||||||
static int bcsn_to_csn_with_cmgr (
|
static STIX_INLINE int bcsn_to_ucsn_with_cmgr (
|
||||||
const stix_bchar_t* bcs, stix_size_t* bcslen,
|
const stix_bch_t* bcs, stix_size_t* bcslen,
|
||||||
stix_char_t* cs, stix_size_t* cslen, stix_cmgr_t* cmgr, int all)
|
stix_uch_t* ucs, stix_size_t* ucslen, stix_cmgr_t* cmgr, int all)
|
||||||
{
|
{
|
||||||
const stix_bchar_t* p;
|
const stix_bch_t* p;
|
||||||
int ret = 0;
|
int ret = 0;
|
||||||
stix_size_t mlen;
|
stix_size_t mlen;
|
||||||
|
|
||||||
if (cs)
|
if (ucs)
|
||||||
{
|
{
|
||||||
stix_char_t* q, * qend;
|
/* destination buffer is specified.
|
||||||
|
* copy the conversion result to the buffer */
|
||||||
|
|
||||||
|
stix_uch_t* q, * qend;
|
||||||
|
|
||||||
p = bcs;
|
p = bcs;
|
||||||
q = cs;
|
q = ucs;
|
||||||
qend = cs + *cslen;
|
qend = ucs + *ucslen;
|
||||||
mlen = *bcslen;
|
mlen = *bcslen;
|
||||||
|
|
||||||
while (mlen > 0)
|
while (mlen > 0)
|
||||||
@ -214,7 +212,7 @@ static int bcsn_to_csn_with_cmgr (
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
n = cmgr->bctoc (p, mlen, q);
|
n = cmgr->bctouc (p, mlen, q);
|
||||||
if (n == 0)
|
if (n == 0)
|
||||||
{
|
{
|
||||||
/* invalid sequence */
|
/* invalid sequence */
|
||||||
@ -249,12 +247,18 @@ static int bcsn_to_csn_with_cmgr (
|
|||||||
mlen -= n;
|
mlen -= n;
|
||||||
}
|
}
|
||||||
|
|
||||||
*cslen = q - cs;
|
*ucslen = q - ucs;
|
||||||
*bcslen = p - bcs;
|
*bcslen = p - bcs;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
stix_char_t w;
|
/* no destination buffer is specified. perform conversion
|
||||||
|
* but don't copy the result. the caller can call this function
|
||||||
|
* without a buffer to find the required buffer size, allocate
|
||||||
|
* a buffer with the size and call this function again with
|
||||||
|
* the buffer. */
|
||||||
|
|
||||||
|
stix_uch_t w;
|
||||||
stix_size_t wlen = 0;
|
stix_size_t wlen = 0;
|
||||||
|
|
||||||
p = bcs;
|
p = bcs;
|
||||||
@ -264,7 +268,7 @@ static int bcsn_to_csn_with_cmgr (
|
|||||||
{
|
{
|
||||||
stix_size_t n;
|
stix_size_t n;
|
||||||
|
|
||||||
n = cmgr->bctoc (p, mlen, &w);
|
n = cmgr->bctouc (p, mlen, &w);
|
||||||
if (n == 0)
|
if (n == 0)
|
||||||
{
|
{
|
||||||
/* invalid sequence */
|
/* invalid sequence */
|
||||||
@ -291,19 +295,42 @@ static int bcsn_to_csn_with_cmgr (
|
|||||||
wlen += 1;
|
wlen += 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
*cslen = wlen;
|
*ucslen = wlen;
|
||||||
*bcslen = p - bcs;
|
*bcslen = p - bcs;
|
||||||
}
|
}
|
||||||
|
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int csn_to_bcsn_with_cmgr (
|
static STIX_INLINE int bcs_to_ucs_with_cmgr (
|
||||||
const stix_char_t* cs, stix_size_t* cslen,
|
const stix_bch_t* bcs, stix_size_t* bcslen,
|
||||||
stix_bchar_t* bcs, stix_size_t* bcslen, stix_cmgr_t* cmgr)
|
stix_uch_t* ucs, stix_size_t* ucslen, stix_cmgr_t* cmgr, int all)
|
||||||
{
|
{
|
||||||
const stix_char_t* p = cs;
|
const stix_bch_t* bp;
|
||||||
const stix_char_t* end = cs + *cslen;
|
stix_size_t mlen, wlen;
|
||||||
|
int n;
|
||||||
|
|
||||||
|
for (bp = bcs; *bp != '\0'; bp++);
|
||||||
|
|
||||||
|
mlen = bp - bcs; wlen = *ucslen;
|
||||||
|
n = bcsn_to_ucsn_with_cmgr (bcs, &mlen, ucs, &wlen, cmgr, all);
|
||||||
|
if (ucs)
|
||||||
|
{
|
||||||
|
/* null-terminate the target buffer if it has room for it. */
|
||||||
|
if (wlen < *ucslen) ucs[wlen] = '\0';
|
||||||
|
else n = -2; /* buffer too small */
|
||||||
|
}
|
||||||
|
*bcslen = mlen; *ucslen = wlen;
|
||||||
|
|
||||||
|
return n;
|
||||||
|
}
|
||||||
|
|
||||||
|
static STIX_INLINE int ucsn_to_bcsn_with_cmgr (
|
||||||
|
const stix_uch_t* ucs, stix_size_t* ucslen,
|
||||||
|
stix_bch_t* bcs, stix_size_t* bcslen, stix_cmgr_t* cmgr)
|
||||||
|
{
|
||||||
|
const stix_uch_t* p = ucs;
|
||||||
|
const stix_uch_t* end = ucs + *ucslen;
|
||||||
int ret = 0;
|
int ret = 0;
|
||||||
|
|
||||||
if (bcs)
|
if (bcs)
|
||||||
@ -320,7 +347,7 @@ static int csn_to_bcsn_with_cmgr (
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
n = cmgr->ctobc (*p, bcs, rem);
|
n = cmgr->uctobc (*p, bcs, rem);
|
||||||
if (n == 0)
|
if (n == 0)
|
||||||
{
|
{
|
||||||
ret = -1;
|
ret = -1;
|
||||||
@ -338,14 +365,96 @@ static int csn_to_bcsn_with_cmgr (
|
|||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
stix_bchar_t bcsbuf[STIX_BCLEN_MAX];
|
stix_bch_t bcsbuf[STIX_BCLEN_MAX];
|
||||||
stix_size_t mlen = 0;
|
stix_size_t mlen = 0;
|
||||||
|
|
||||||
while (p < end)
|
while (p < end)
|
||||||
{
|
{
|
||||||
stix_size_t n;
|
stix_size_t n;
|
||||||
|
|
||||||
n = cmgr->ctobc (*p, bcsbuf, STIX_COUNTOF(bcsbuf));
|
n = cmgr->uctobc (*p, bcsbuf, STIX_COUNTOF(bcsbuf));
|
||||||
|
if (n == 0)
|
||||||
|
{
|
||||||
|
ret = -1;
|
||||||
|
break; /* illegal character */
|
||||||
|
}
|
||||||
|
|
||||||
|
/* it assumes that bcsbuf is large enough to hold a character */
|
||||||
|
STIX_ASSERT (n <= STIX_COUNTOF(bcsbuf));
|
||||||
|
|
||||||
|
p++; mlen += n;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* this length excludes the terminating null character.
|
||||||
|
* this function doesn't even null-terminate the result. */
|
||||||
|
*bcslen = mlen;
|
||||||
|
}
|
||||||
|
|
||||||
|
*ucslen = p - ucs;
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static int ucs_to_bcs_with_cmgr (
|
||||||
|
const stix_uch_t* ucs, stix_size_t* ucslen,
|
||||||
|
stix_bch_t* bcs, stix_size_t* bcslen, stix_cmgr_t* cmgr)
|
||||||
|
{
|
||||||
|
const stix_uch_t* p = ucs;
|
||||||
|
int ret = 0;
|
||||||
|
|
||||||
|
if (bcs)
|
||||||
|
{
|
||||||
|
stix_size_t rem = *bcslen;
|
||||||
|
|
||||||
|
while (*p != '\0')
|
||||||
|
{
|
||||||
|
stix_size_t n;
|
||||||
|
|
||||||
|
if (rem <= 0)
|
||||||
|
{
|
||||||
|
ret = -2;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
n = cmgr->uctobc (*p, bcs, rem);
|
||||||
|
if (n == 0)
|
||||||
|
{
|
||||||
|
ret = -1;
|
||||||
|
break; /* illegal character */
|
||||||
|
}
|
||||||
|
if (n > rem)
|
||||||
|
{
|
||||||
|
ret = -2;
|
||||||
|
break; /* buffer too small */
|
||||||
|
}
|
||||||
|
|
||||||
|
bcs += n; rem -= n; p++;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* update bcslen to the length of the bcs string converted excluding
|
||||||
|
* terminating null */
|
||||||
|
*bcslen -= rem;
|
||||||
|
|
||||||
|
/* null-terminate the multibyte sequence if it has sufficient space */
|
||||||
|
if (rem > 0) *bcs = '\0';
|
||||||
|
else
|
||||||
|
{
|
||||||
|
/* if ret is -2 and cs[cslen] == '\0',
|
||||||
|
* this means that the bcs buffer was lacking one
|
||||||
|
* slot for the terminating null */
|
||||||
|
ret = -2; /* buffer too small */
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
stix_bch_t bcsbuf[STIX_BCLEN_MAX];
|
||||||
|
stix_size_t mlen = 0;
|
||||||
|
|
||||||
|
while (*p != '\0')
|
||||||
|
{
|
||||||
|
stix_size_t n;
|
||||||
|
|
||||||
|
n = cmgr->uctobc (*p, bcsbuf, STIX_COUNTOF(bcsbuf));
|
||||||
if (n == 0)
|
if (n == 0)
|
||||||
{
|
{
|
||||||
ret = -1;
|
ret = -1;
|
||||||
@ -353,22 +462,20 @@ static int csn_to_bcsn_with_cmgr (
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* it assumes that bcs is large enough to hold a character */
|
/* it assumes that bcs is large enough to hold a character */
|
||||||
STIX_ASSERT (n <= STIX_COUNTOF(bcsbuf));
|
STIX_ASSERT (n <= STIX_COUNTOF(bcs));
|
||||||
|
|
||||||
p++; mlen += n;
|
p++; mlen += n;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* this length excludes the terminating null character.
|
/* this length holds the number of resulting multi-byte characters
|
||||||
* this function doesn't event null-terminate the result. */
|
* excluding the terminating null character */
|
||||||
*bcslen = mlen;
|
*bcslen = mlen;
|
||||||
}
|
}
|
||||||
|
|
||||||
*cslen = p - cs;
|
*ucslen = p - ucs; /* the number of wide characters handled. */
|
||||||
|
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
static stix_cmgr_t utf8_cmgr =
|
static stix_cmgr_t utf8_cmgr =
|
||||||
{
|
{
|
||||||
stix_utf8touc,
|
stix_utf8touc,
|
||||||
@ -376,15 +483,42 @@ static stix_cmgr_t utf8_cmgr =
|
|||||||
};
|
};
|
||||||
|
|
||||||
int stix_utf8toucs (
|
int stix_utf8toucs (
|
||||||
const stix_bchar_t* bcs, stix_size_t* bcslen,
|
const stix_bch_t* bcs, stix_size_t* bcslen,
|
||||||
stix_char_t* ucs, stix_size_t* ucslen)
|
stix_uch_t* ucs, stix_size_t* ucslen)
|
||||||
{
|
{
|
||||||
return bcsn_to_csn_with_cmgr (bcs, bcslen, ucs, ucslen, &utf8_cmgr, 0);
|
if (*bcslen == ~(stix_size_t)0)
|
||||||
|
{
|
||||||
|
/* the source is null-terminated. */
|
||||||
|
return bcs_to_ucs_with_cmgr (bcs, bcslen, ucs, ucslen, &utf8_cmgr, 0);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
/* the source is length bound */
|
||||||
|
return bcsn_to_ucsn_with_cmgr (bcs, bcslen, ucs, ucslen, &utf8_cmgr, 0);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
int stix_ucstoutf8 (
|
int stix_ucstoutf8 (
|
||||||
const stix_char_t* ucs, stix_size_t *ucslen,
|
const stix_uch_t* ucs, stix_size_t *ucslen,
|
||||||
stix_bchar_t* bcs, stix_size_t* bcslen)
|
stix_bch_t* bcs, stix_size_t* bcslen)
|
||||||
{
|
{
|
||||||
return csn_to_bcsn_with_cmgr (ucs, ucslen, bcs, bcslen, &utf8_cmgr);
|
if (*ucslen == ~(stix_size_t)0)
|
||||||
|
{
|
||||||
|
/* null-terminated */
|
||||||
|
return ucs_to_bcs_with_cmgr (ucs, ucslen, bcs, bcslen, &utf8_cmgr);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
/* length bound */
|
||||||
|
return ucsn_to_bcsn_with_cmgr (ucs, ucslen, bcs, bcslen, &utf8_cmgr);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
stix_size_t stix_ucslen (const stix_uch_t* ucs)
|
||||||
|
{
|
||||||
|
const stix_uch_t* ptr = ucs;
|
||||||
|
while (*ptr) ptr = STIX_INCPTR(const stix_uch_t, ptr, 1);
|
||||||
|
return STIX_SUBPTR(const stix_uch_t, ptr, ucs);
|
||||||
|
}
|
||||||
|
*/
|
||||||
|
Loading…
Reference in New Issue
Block a user