added utf8 conversion functions
This commit is contained in:
parent
75bb3e9a40
commit
090c9ac1bf
@ -33,9 +33,12 @@
|
|||||||
typedef struct xtn_t xtn_t;
|
typedef struct xtn_t xtn_t;
|
||||||
struct xtn_t
|
struct xtn_t
|
||||||
{
|
{
|
||||||
char source_path[1024];
|
const char* source_path;
|
||||||
};
|
|
||||||
|
|
||||||
|
char bchar_buf[1024];
|
||||||
|
stix_size_t bchar_pos;
|
||||||
|
stix_size_t bchar_len;
|
||||||
|
};
|
||||||
|
|
||||||
static void* sys_alloc (stix_mmgr_t* mmgr, stix_size_t size)
|
static void* sys_alloc (stix_mmgr_t* mmgr, stix_size_t size)
|
||||||
{
|
{
|
||||||
@ -82,18 +85,36 @@ static STIX_INLINE stix_oow_t open_input (stix_t* stix, stix_ioarg_t* arg)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
static STIX_INLINE stix_oow_t read_input (stix_t* stix, stix_ioarg_t* arg)
|
static STIX_INLINE stix_oow_t read_input (stix_t* stix, stix_ioarg_t* arg)
|
||||||
{
|
{
|
||||||
|
xtn_t* xtn = stix_getxtn(stix);
|
||||||
|
stix_size_t n, bcslen, cslen;
|
||||||
|
int x;
|
||||||
|
|
||||||
STIX_ASSERT (arg->handle != STIX_NULL);
|
STIX_ASSERT (arg->handle != STIX_NULL);
|
||||||
if (fread (arg->buf, STIX_SIZEOF(arg->buf[0]), STIX_COUNTOF(arg->buf), arg->handle) == 0)
|
n = fread (&xtn->bchar_buf[xtn->bchar_len], STIX_SIZEOF(xtn->bchar_buf[0]), STIX_COUNTOF(xtn->bchar_buf) - xtn->bchar_len, arg->handle);
|
||||||
|
if (n == 0)
|
||||||
{
|
{
|
||||||
if (ferror(arg->handle))
|
if (ferror(arg->handle))
|
||||||
{
|
{
|
||||||
|
stix_seterrnum (stix, STIX_EIOERR);
|
||||||
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return 0;
|
xtn->bchar_len += n;
|
||||||
|
bcslen = xtn->bchar_len;
|
||||||
|
cslen = STIX_COUNTOF(arg->buf);
|
||||||
|
x = stix_utf8toucs (xtn->bchar_buf, &bcslen, arg->buf, &cslen);
|
||||||
|
if (x == -2)
|
||||||
|
{
|
||||||
|
/* buffer to small */
|
||||||
|
}
|
||||||
|
if (x <= -1)
|
||||||
|
{
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static STIX_INLINE stix_oow_t close_input (stix_t* stix, stix_ioarg_t* arg)
|
static STIX_INLINE stix_oow_t close_input (stix_t* stix, stix_ioarg_t* arg)
|
||||||
@ -151,10 +172,19 @@ static void dump_symbol_table (stix_t* stix)
|
|||||||
int main (int argc, char* argv[])
|
int main (int argc, char* argv[])
|
||||||
{
|
{
|
||||||
stix_t* stix;
|
stix_t* stix;
|
||||||
|
xtn_t* xtn;
|
||||||
|
|
||||||
printf ("Stix 1.0.0 - max named %lu max indexed %lu\n",
|
printf ("Stix 1.0.0 - max named %lu max indexed %lu\n",
|
||||||
(unsigned long int)STIX_MAX_NAMED_INSTVARS, (unsigned long int)STIX_MAX_INDEXED_INSTVARS(STIX_MAX_NAMED_INSTVARS));
|
(unsigned long int)STIX_MAX_NAMED_INSTVARS, (unsigned long int)STIX_MAX_INDEXED_INSTVARS(STIX_MAX_NAMED_INSTVARS));
|
||||||
|
|
||||||
|
|
||||||
|
if (argc != 2)
|
||||||
|
{
|
||||||
|
fprintf (stderr, "Usage: %s filename\n", argv[0]);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
{
|
{
|
||||||
stix_oow_t x;
|
stix_oow_t x;
|
||||||
|
|
||||||
@ -206,7 +236,8 @@ printf ("%p\n", a);
|
|||||||
dump_symbol_table (stix);
|
dump_symbol_table (stix);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
xtn = stix_getxtn (stix);
|
||||||
|
xtn->source_path = argv[1];
|
||||||
if (stix_compile (stix, input_handler) <= -1)
|
if (stix_compile (stix, input_handler) <= -1)
|
||||||
{
|
{
|
||||||
printf ("cannot compile code\n");
|
printf ("cannot compile code\n");
|
||||||
|
@ -334,7 +334,55 @@ stix_oop_t stix_getatsysdic (
|
|||||||
stix_oop_t key
|
stix_oop_t key
|
||||||
);
|
);
|
||||||
|
|
||||||
|
/* ========================================================================= */
|
||||||
|
/* utf8.c */
|
||||||
|
/* ========================================================================= */
|
||||||
|
stix_size_t stix_uctoutf8 (
|
||||||
|
stix_char_t uc,
|
||||||
|
stix_bchar_t* utf8,
|
||||||
|
stix_size_t size
|
||||||
|
);
|
||||||
|
|
||||||
|
stix_size_t stix_utf8touc (
|
||||||
|
const stix_bchar_t* utf8,
|
||||||
|
stix_size_t size,
|
||||||
|
stix_char_t* uc
|
||||||
|
);
|
||||||
|
|
||||||
|
|
||||||
|
int stix_ucstoutf8 (
|
||||||
|
const stix_char_t* ucs,
|
||||||
|
stix_size_t* ucslen,
|
||||||
|
stix_bchar_t* bcs,
|
||||||
|
stix_size_t* bcslen
|
||||||
|
);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The stix_utf8toucs() function converts a UTF8 string to a uncide string.
|
||||||
|
*
|
||||||
|
* It never returns -2 if \a ucs is #STIX_NULL.
|
||||||
|
*
|
||||||
|
* \code
|
||||||
|
* const stix_bchar_t* bcs = "a multibyte string";
|
||||||
|
* stix_char_t ucs[100];
|
||||||
|
* qse_size_t ucslen = STIX_COUNTOF(buf), n;
|
||||||
|
* qse_size_t bcslen = strlen(bcs);
|
||||||
|
* int n;
|
||||||
|
* n = qse_bcstoucs (bcs, &bcslen, ucs, &ucslen);
|
||||||
|
* if (n <= -1) { invalid/incomplenete sequence or buffer to small }
|
||||||
|
* \endcode
|
||||||
|
*
|
||||||
|
* \return 0 on success.
|
||||||
|
* -1 if \a bcs contains an illegal character.
|
||||||
|
* -2 if the wide-character string buffer is too small.
|
||||||
|
* -3 if \a bcs is not a complete sequence.
|
||||||
|
*/
|
||||||
|
int stix_utf8toucs (
|
||||||
|
const stix_bchar_t* bcs,
|
||||||
|
stix_size_t* bcslen,
|
||||||
|
stix_char_t* ucs,
|
||||||
|
stix_size_t* ucslen
|
||||||
|
);
|
||||||
|
|
||||||
/* ========================================================================= */
|
/* ========================================================================= */
|
||||||
/* comp.c */
|
/* comp.c */
|
||||||
|
100
stix/lib/stix.h
100
stix/lib/stix.h
@ -31,27 +31,9 @@
|
|||||||
/* TODO: move this macro out to the build files.... */
|
/* TODO: move this macro out to the build files.... */
|
||||||
#define STIX_INCLUDE_COMPILER
|
#define STIX_INCLUDE_COMPILER
|
||||||
|
|
||||||
#if defined(__MSDOS__)
|
/* =========================================================================
|
||||||
# define STIX_INCPTR(type,base,inc) (((type __huge*)base) + (inc))
|
* PRIMITIVE TYPE DEFINTIONS
|
||||||
# define STIX_DECPTR(type,base,inc) (((type __huge*)base) - (inc))
|
* ========================================================================= */
|
||||||
# define STIX_GTPTR(type,ptr1,ptr2) (((type __huge*)ptr1) > ((type __huge*)ptr2))
|
|
||||||
# define STIX_GEPTR(type,ptr1,ptr2) (((type __huge*)ptr1) >= ((type __huge*)ptr2))
|
|
||||||
# define STIX_LTPTR(type,ptr1,ptr2) (((type __huge*)ptr1) < ((type __huge*)ptr2))
|
|
||||||
# define STIX_LEPTR(type,ptr1,ptr2) (((type __huge*)ptr1) <= ((type __huge*)ptr2))
|
|
||||||
# define STIX_EQPTR(type,ptr1,ptr2) (((type __huge*)ptr1) == ((type __huge*)ptr2))
|
|
||||||
# define STIX_SUBPTR(type,ptr1,ptr2) (((type __huge*)ptr1) - ((type __huge*)ptr2))
|
|
||||||
#else
|
|
||||||
# define STIX_INCPTR(type,base,inc) (((type*)base) + (inc))
|
|
||||||
# define STIX_DECPTR(type,base,inc) (((type*)base) - (inc))
|
|
||||||
# define STIX_GTPTR(type,ptr1,ptr2) (((type*)ptr1) > ((type*)ptr2))
|
|
||||||
# define STIX_GEPTR(type,ptr1,ptr2) (((type*)ptr1) >= ((type*)ptr2))
|
|
||||||
# define STIX_LTPTR(type,ptr1,ptr2) (((type*)ptr1) < ((type*)ptr2))
|
|
||||||
# define STIX_LEPTR(type,ptr1,ptr2) (((type*)ptr1) <= ((type*)ptr2))
|
|
||||||
# define STIX_EQPTR(type,ptr1,ptr2) (((type*)ptr1) == ((type*)ptr2))
|
|
||||||
# define STIX_SUBPTR(type,ptr1,ptr2) (((type*)ptr1) - ((type*)ptr2))
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/* ========================================================================== */
|
|
||||||
/* TODO: define these types and macros using autoconf */
|
/* TODO: define these types and macros using autoconf */
|
||||||
typedef unsigned char stix_uint8_t;
|
typedef unsigned char stix_uint8_t;
|
||||||
typedef unsigned short int stix_uint16_t;
|
typedef unsigned short int stix_uint16_t;
|
||||||
@ -64,8 +46,11 @@ typedef unsigned long int stix_uintptr_t;
|
|||||||
typedef unsigned long int stix_size_t;
|
typedef unsigned long int stix_size_t;
|
||||||
|
|
||||||
typedef unsigned short int stix_char_t; /* TODO ... wchar_t??? */
|
typedef unsigned short int stix_char_t; /* TODO ... wchar_t??? */
|
||||||
typedef char stix_iochar_t;
|
typedef char stix_bchar_t;
|
||||||
|
|
||||||
|
/* =========================================================================
|
||||||
|
* PRIMITIVE MACROS
|
||||||
|
* ========================================================================= */
|
||||||
#define STIX_SIZEOF(x) (sizeof(x))
|
#define STIX_SIZEOF(x) (sizeof(x))
|
||||||
#define STIX_COUNTOF(x) (sizeof(x) / sizeof(x[0]))
|
#define STIX_COUNTOF(x) (sizeof(x) / sizeof(x[0]))
|
||||||
|
|
||||||
@ -92,7 +77,6 @@ typedef char stix_iochar_t;
|
|||||||
# define STIX_NULL ((void*)0)
|
# define STIX_NULL ((void*)0)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
||||||
/* make a low bit mask that can mask off low n bits*/
|
/* make a low bit mask that can mask off low n bits*/
|
||||||
#define STIX_LBMASK(type,n) (~(~((type)0) << (n)))
|
#define STIX_LBMASK(type,n) (~(~((type)0) << (n)))
|
||||||
|
|
||||||
@ -114,16 +98,43 @@ typedef char stix_iochar_t;
|
|||||||
/*#define STIX_BITS_MAX(type,nbits) ((((type)1) << (nbits)) - 1)*/
|
/*#define STIX_BITS_MAX(type,nbits) ((((type)1) << (nbits)) - 1)*/
|
||||||
#define STIX_BITS_MAX(type,nbits) ((~(type)0) >> (STIX_SIZEOF(type) * 8 - (nbits)))
|
#define STIX_BITS_MAX(type,nbits) ((~(type)0) >> (STIX_SIZEOF(type) * 8 - (nbits)))
|
||||||
|
|
||||||
|
/* =========================================================================
|
||||||
|
* POINTER MANIPULATION MACROS
|
||||||
|
* ========================================================================= */
|
||||||
|
|
||||||
|
#if defined(__MSDOS__)
|
||||||
|
# define STIX_INCPTR(type,base,inc) (((type __huge*)base) + (inc))
|
||||||
|
# define STIX_DECPTR(type,base,inc) (((type __huge*)base) - (inc))
|
||||||
|
# define STIX_GTPTR(type,ptr1,ptr2) (((type __huge*)ptr1) > ((type __huge*)ptr2))
|
||||||
|
# define STIX_GEPTR(type,ptr1,ptr2) (((type __huge*)ptr1) >= ((type __huge*)ptr2))
|
||||||
|
# define STIX_LTPTR(type,ptr1,ptr2) (((type __huge*)ptr1) < ((type __huge*)ptr2))
|
||||||
|
# define STIX_LEPTR(type,ptr1,ptr2) (((type __huge*)ptr1) <= ((type __huge*)ptr2))
|
||||||
|
# define STIX_EQPTR(type,ptr1,ptr2) (((type __huge*)ptr1) == ((type __huge*)ptr2))
|
||||||
|
# define STIX_SUBPTR(type,ptr1,ptr2) (((type __huge*)ptr1) - ((type __huge*)ptr2))
|
||||||
|
#else
|
||||||
|
# define STIX_INCPTR(type,base,inc) (((type*)base) + (inc))
|
||||||
|
# define STIX_DECPTR(type,base,inc) (((type*)base) - (inc))
|
||||||
|
# define STIX_GTPTR(type,ptr1,ptr2) (((type*)ptr1) > ((type*)ptr2))
|
||||||
|
# define STIX_GEPTR(type,ptr1,ptr2) (((type*)ptr1) >= ((type*)ptr2))
|
||||||
|
# define STIX_LTPTR(type,ptr1,ptr2) (((type*)ptr1) < ((type*)ptr2))
|
||||||
|
# define STIX_LEPTR(type,ptr1,ptr2) (((type*)ptr1) <= ((type*)ptr2))
|
||||||
|
# define STIX_EQPTR(type,ptr1,ptr2) (((type*)ptr1) == ((type*)ptr2))
|
||||||
|
# define STIX_SUBPTR(type,ptr1,ptr2) (((type*)ptr1) - ((type*)ptr2))
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* =========================================================================
|
||||||
|
* MMGR
|
||||||
|
* ========================================================================= */
|
||||||
typedef struct stix_mmgr_t stix_mmgr_t;
|
typedef struct stix_mmgr_t stix_mmgr_t;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* allocate a memory chunk of the size \a n.
|
* allocate a memory chunk of the size \a n.
|
||||||
* @return pointer to a memory chunk on success, #STIX_NULL on failure.
|
* \return pointer to a memory chunk on success, #STIX_NULL on failure.
|
||||||
*/
|
*/
|
||||||
typedef void* (*stix_mmgr_alloc_t) (stix_mmgr_t* mmgr, stix_size_t n);
|
typedef void* (*stix_mmgr_alloc_t) (stix_mmgr_t* mmgr, stix_size_t n);
|
||||||
/**
|
/**
|
||||||
* resize a memory chunk pointed to by \a ptr to the size \a n.
|
* resize a memory chunk pointed to by \a ptr to the size \a n.
|
||||||
* @return pointer to a memory chunk on success, #STIX_NULL on failure.
|
* \return pointer to a memory chunk on success, #STIX_NULL on failure.
|
||||||
*/
|
*/
|
||||||
typedef void* (*stix_mmgr_realloc_t) (stix_mmgr_t* mmgr, void* ptr, stix_size_t n);
|
typedef void* (*stix_mmgr_realloc_t) (stix_mmgr_t* mmgr, void* ptr, stix_size_t n);
|
||||||
/**
|
/**
|
||||||
@ -168,6 +179,41 @@ struct stix_mmgr_t
|
|||||||
#define STIX_MMGR_FREE(mmgr,ptr) ((mmgr)->free(mmgr,ptr))
|
#define STIX_MMGR_FREE(mmgr,ptr) ((mmgr)->free(mmgr,ptr))
|
||||||
|
|
||||||
|
|
||||||
|
/* =========================================================================
|
||||||
|
* CMGR
|
||||||
|
* =========================================================================*/
|
||||||
|
|
||||||
|
typedef struct stix_cmgr_t stix_cmgr_t;
|
||||||
|
|
||||||
|
typedef stix_size_t (*stix_cmgr_bctoc_t) (
|
||||||
|
const stix_bchar_t* mb,
|
||||||
|
stix_size_t size,
|
||||||
|
stix_char_t* wc
|
||||||
|
);
|
||||||
|
|
||||||
|
typedef stix_size_t (*stix_cmgr_ctobc_t) (
|
||||||
|
stix_char_t wc,
|
||||||
|
stix_bchar_t* mb,
|
||||||
|
stix_size_t size
|
||||||
|
);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The stix_cmgr_t type defines the character-level interface to
|
||||||
|
* multibyte/wide-character conversion. This interface doesn't
|
||||||
|
* provide any facility to store conversion state in a context
|
||||||
|
* independent manner. This leads to the limitation that it can
|
||||||
|
* handle a stateless multibyte encoding only.
|
||||||
|
*/
|
||||||
|
struct stix_cmgr_t
|
||||||
|
{
|
||||||
|
stix_cmgr_bctoc_t bctoc;
|
||||||
|
stix_cmgr_ctobc_t ctobc;
|
||||||
|
};
|
||||||
|
|
||||||
|
/* =========================================================================
|
||||||
|
* MACROS THAT CHANGES THE BEHAVIORS OF THE C COMPILER/LINKER
|
||||||
|
* =========================================================================*/
|
||||||
|
|
||||||
#if defined(_WIN32) || defined(__WATCOMC__)
|
#if defined(_WIN32) || defined(__WATCOMC__)
|
||||||
# define STIX_IMPORT __declspec(dllimport)
|
# define STIX_IMPORT __declspec(dllimport)
|
||||||
# define STIX_EXPORT __declspec(dllexport)
|
# define STIX_EXPORT __declspec(dllexport)
|
||||||
@ -467,7 +513,7 @@ enum stix_obj_type_t
|
|||||||
};
|
};
|
||||||
typedef enum stix_obj_type_t stix_obj_type_t;
|
typedef enum stix_obj_type_t stix_obj_type_t;
|
||||||
|
|
||||||
/* -------------------------------------------------------------------------
|
/* =========================================================================
|
||||||
* Object header structure
|
* Object header structure
|
||||||
*
|
*
|
||||||
* _flags:
|
* _flags:
|
||||||
@ -502,7 +548,7 @@ typedef enum stix_obj_type_t stix_obj_type_t;
|
|||||||
* class can have normal instance variables. On the contrary, the actual byte
|
* class can have normal instance variables. On the contrary, the actual byte
|
||||||
* size calculation and the access to the payload fields become more complex.
|
* size calculation and the access to the payload fields become more complex.
|
||||||
* Therefore, i've dropped the idea.
|
* Therefore, i've dropped the idea.
|
||||||
* ------------------------------------------------------------------------- */
|
* ========================================================================= */
|
||||||
#define STIX_OBJ_FLAGS_TYPE_BITS 6
|
#define STIX_OBJ_FLAGS_TYPE_BITS 6
|
||||||
#define STIX_OBJ_FLAGS_UNIT_BITS 5
|
#define STIX_OBJ_FLAGS_UNIT_BITS 5
|
||||||
#define STIX_OBJ_FLAGS_EXTRA_BITS 1
|
#define STIX_OBJ_FLAGS_EXTRA_BITS 1
|
||||||
|
216
stix/lib/utf8.c
216
stix/lib/utf8.c
@ -26,6 +26,8 @@
|
|||||||
|
|
||||||
#include "stix-prv.h"
|
#include "stix-prv.h"
|
||||||
|
|
||||||
|
#define STIX_BCLEN_MAX 16
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* from RFC 2279 UTF-8, a transformation format of ISO 10646
|
* from RFC 2279 UTF-8, a transformation format of ISO 10646
|
||||||
*
|
*
|
||||||
@ -64,7 +66,7 @@ static STIX_INLINE __utf8_t* get_utf8_slot (stix_char_t uc)
|
|||||||
{
|
{
|
||||||
__utf8_t* cur, * end;
|
__utf8_t* cur, * end;
|
||||||
|
|
||||||
STIX_ASSERT (STIX_SIZEOF(stix_iochar_t) == 1);
|
STIX_ASSERT (STIX_SIZEOF(stix_bchar_t) == 1);
|
||||||
STIX_ASSERT (STIX_SIZEOF(stix_char_t) >= 2);
|
STIX_ASSERT (STIX_SIZEOF(stix_char_t) >= 2);
|
||||||
|
|
||||||
end = utf8_table + STIX_COUNTOF(utf8_table);
|
end = utf8_table + STIX_COUNTOF(utf8_table);
|
||||||
@ -79,7 +81,7 @@ static STIX_INLINE __utf8_t* get_utf8_slot (stix_char_t uc)
|
|||||||
return STIX_NULL; /* invalid character */
|
return STIX_NULL; /* invalid character */
|
||||||
}
|
}
|
||||||
|
|
||||||
stix_size_t stix_uctoutf8 (stix_char_t uc, stix_iochar_t* utf8, stix_size_t size)
|
stix_size_t stix_uctoutf8 (stix_char_t uc, stix_bchar_t* utf8, stix_size_t size)
|
||||||
{
|
{
|
||||||
__utf8_t* cur = get_utf8_slot (uc);
|
__utf8_t* cur = get_utf8_slot (uc);
|
||||||
|
|
||||||
@ -106,13 +108,13 @@ stix_size_t stix_uctoutf8 (stix_char_t uc, stix_iochar_t* utf8, stix_size_t size
|
|||||||
return (stix_size_t)cur->length;
|
return (stix_size_t)cur->length;
|
||||||
}
|
}
|
||||||
|
|
||||||
stix_size_t stix_utf8touc (const stix_iochar_t* utf8, stix_size_t size, stix_char_t* uc)
|
stix_size_t stix_utf8touc (const stix_bchar_t* utf8, stix_size_t size, stix_char_t* uc)
|
||||||
{
|
{
|
||||||
__utf8_t* cur, * end;
|
__utf8_t* cur, * end;
|
||||||
|
|
||||||
STIX_ASSERT (utf8 != STIX_NULL);
|
STIX_ASSERT (utf8 != STIX_NULL);
|
||||||
STIX_ASSERT (size > 0);
|
STIX_ASSERT (size > 0);
|
||||||
STIX_ASSERT (STIX_SIZEOF(stix_iochar_t) == 1);
|
STIX_ASSERT (STIX_SIZEOF(stix_bchar_t) == 1);
|
||||||
STIX_ASSERT (STIX_SIZEOF(stix_char_t) >= 2);
|
STIX_ASSERT (STIX_SIZEOF(stix_char_t) >= 2);
|
||||||
|
|
||||||
end = utf8_table + STIX_COUNTOF(utf8_table);
|
end = utf8_table + STIX_COUNTOF(utf8_table);
|
||||||
@ -177,8 +179,212 @@ stix_size_t stix_utf8touc (const stix_iochar_t* utf8, stix_size_t size, stix_cha
|
|||||||
return 0; /* error - invalid sequence */
|
return 0; /* error - invalid sequence */
|
||||||
}
|
}
|
||||||
|
|
||||||
stix_size_t stix_utf8len (const stix_iochar_t* utf8, stix_size_t size)
|
stix_size_t stix_utf8len (const stix_bchar_t* utf8, stix_size_t size)
|
||||||
{
|
{
|
||||||
return stix_utf8touc (utf8, size, STIX_NULL);
|
return stix_utf8touc (utf8, size, STIX_NULL);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* ----------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
static int bcsn_to_csn_with_cmgr (
|
||||||
|
const stix_bchar_t* bcs, stix_size_t* bcslen,
|
||||||
|
stix_char_t* cs, stix_size_t* cslen, stix_cmgr_t* cmgr, int all)
|
||||||
|
{
|
||||||
|
const stix_bchar_t* p;
|
||||||
|
int ret = 0;
|
||||||
|
stix_size_t mlen;
|
||||||
|
|
||||||
|
if (cs)
|
||||||
|
{
|
||||||
|
stix_char_t* q, * qend;
|
||||||
|
|
||||||
|
p = bcs;
|
||||||
|
q = cs;
|
||||||
|
qend = cs + *cslen;
|
||||||
|
mlen = *bcslen;
|
||||||
|
|
||||||
|
while (mlen > 0)
|
||||||
|
{
|
||||||
|
stix_size_t n;
|
||||||
|
|
||||||
|
if (q >= qend)
|
||||||
|
{
|
||||||
|
/* buffer too small */
|
||||||
|
ret = -2;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
n = cmgr->bctoc (p, mlen, q);
|
||||||
|
if (n == 0)
|
||||||
|
{
|
||||||
|
/* invalid sequence */
|
||||||
|
if (all)
|
||||||
|
{
|
||||||
|
n = 1;
|
||||||
|
*q = '?';
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
ret = -1;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (n > mlen)
|
||||||
|
{
|
||||||
|
/* incomplete sequence */
|
||||||
|
if (all)
|
||||||
|
{
|
||||||
|
n = 1;
|
||||||
|
*q = '?';
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
ret = -3;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
q++;
|
||||||
|
p += n;
|
||||||
|
mlen -= n;
|
||||||
|
}
|
||||||
|
|
||||||
|
*cslen = q - cs;
|
||||||
|
*bcslen = p - bcs;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
stix_char_t w;
|
||||||
|
stix_size_t wlen = 0;
|
||||||
|
|
||||||
|
p = bcs;
|
||||||
|
mlen = *bcslen;
|
||||||
|
|
||||||
|
while (mlen > 0)
|
||||||
|
{
|
||||||
|
stix_size_t n;
|
||||||
|
|
||||||
|
n = cmgr->bctoc (p, mlen, &w);
|
||||||
|
if (n == 0)
|
||||||
|
{
|
||||||
|
/* invalid sequence */
|
||||||
|
if (all) n = 1;
|
||||||
|
else
|
||||||
|
{
|
||||||
|
ret = -1;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (n > mlen)
|
||||||
|
{
|
||||||
|
/* incomplete sequence */
|
||||||
|
if (all) n = 1;
|
||||||
|
else
|
||||||
|
{
|
||||||
|
ret = -3;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
p += n;
|
||||||
|
mlen -= n;
|
||||||
|
wlen += 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
*cslen = wlen;
|
||||||
|
*bcslen = p - bcs;
|
||||||
|
}
|
||||||
|
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int csn_to_bcsn_with_cmgr (
|
||||||
|
const stix_char_t* cs, stix_size_t* cslen,
|
||||||
|
stix_bchar_t* bcs, stix_size_t* bcslen, stix_cmgr_t* cmgr)
|
||||||
|
{
|
||||||
|
const stix_char_t* p = cs;
|
||||||
|
const stix_char_t* end = cs + *cslen;
|
||||||
|
int ret = 0;
|
||||||
|
|
||||||
|
if (bcs)
|
||||||
|
{
|
||||||
|
stix_size_t rem = *bcslen;
|
||||||
|
|
||||||
|
while (p < end)
|
||||||
|
{
|
||||||
|
stix_size_t n;
|
||||||
|
|
||||||
|
if (rem <= 0)
|
||||||
|
{
|
||||||
|
ret = -2; /* buffer too small */
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
n = cmgr->ctobc (*p, bcs, rem);
|
||||||
|
if (n == 0)
|
||||||
|
{
|
||||||
|
ret = -1;
|
||||||
|
break; /* illegal character */
|
||||||
|
}
|
||||||
|
if (n > rem)
|
||||||
|
{
|
||||||
|
ret = -2; /* buffer too small */
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
bcs += n; rem -= n; p++;
|
||||||
|
}
|
||||||
|
|
||||||
|
*bcslen -= rem;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
stix_bchar_t bcsbuf[STIX_BCLEN_MAX];
|
||||||
|
stix_size_t mlen = 0;
|
||||||
|
|
||||||
|
while (p < end)
|
||||||
|
{
|
||||||
|
stix_size_t n;
|
||||||
|
|
||||||
|
n = cmgr->ctobc (*p, bcsbuf, STIX_COUNTOF(bcsbuf));
|
||||||
|
if (n == 0)
|
||||||
|
{
|
||||||
|
ret = -1;
|
||||||
|
break; /* illegal character */
|
||||||
|
}
|
||||||
|
|
||||||
|
/* it assumes that bcs is large enough to hold a character */
|
||||||
|
STIX_ASSERT (n <= STIX_COUNTOF(bcsbuf));
|
||||||
|
|
||||||
|
p++; mlen += n;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* this length excludes the terminating null character.
|
||||||
|
* this function doesn't event null-terminate the result. */
|
||||||
|
*bcslen = mlen;
|
||||||
|
}
|
||||||
|
|
||||||
|
*cslen = p - cs;
|
||||||
|
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static stix_cmgr_t utf8_cmgr =
|
||||||
|
{
|
||||||
|
stix_utf8touc,
|
||||||
|
stix_uctoutf8
|
||||||
|
};
|
||||||
|
|
||||||
|
int stix_utf8toucs (
|
||||||
|
const stix_bchar_t* bcs, stix_size_t* bcslen,
|
||||||
|
stix_char_t* ucs, stix_size_t* ucslen)
|
||||||
|
{
|
||||||
|
return bcsn_to_csn_with_cmgr (bcs, bcslen, ucs, ucslen, &utf8_cmgr, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
int stix_ucstoutf8 (
|
||||||
|
const stix_char_t* ucs, stix_size_t *ucslen,
|
||||||
|
stix_bchar_t* bcs, stix_size_t* bcslen)
|
||||||
|
{
|
||||||
|
return csn_to_bcsn_with_cmgr (ucs, ucslen, bcs, bcslen, &utf8_cmgr);
|
||||||
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user