added simple utf16 conversion functions
This commit is contained in:
parent
07c11dc9da
commit
8d19c60359
@ -89,6 +89,7 @@ libmoo_la_SOURCES = \
|
||||
rbt.c \
|
||||
sym.c \
|
||||
utf8.c \
|
||||
utf16.c \
|
||||
utl.c \
|
||||
std.c
|
||||
libmoo_la_CPPFLAGS = $(CPPFLAGS_LIB_COMMON) $(CPPFLAGS_PFMOD)
|
||||
|
@ -173,8 +173,8 @@ am_libmoo_la_OBJECTS = libmoo_la-bigint.lo libmoo_la-comp.lo \
|
||||
libmoo_la-gc.lo libmoo_la-heap.lo libmoo_la-moo.lo \
|
||||
libmoo_la-obj.lo libmoo_la-opt.lo libmoo_la-pf-basic.lo \
|
||||
libmoo_la-pf-sys.lo libmoo_la-pf-utf8.lo libmoo_la-rbt.lo \
|
||||
libmoo_la-sym.lo libmoo_la-utf8.lo libmoo_la-utl.lo \
|
||||
libmoo_la-std.lo
|
||||
libmoo_la-sym.lo libmoo_la-utf8.lo libmoo_la-utf16.lo \
|
||||
libmoo_la-utl.lo libmoo_la-std.lo
|
||||
libmoo_la_OBJECTS = $(am_libmoo_la_OBJECTS)
|
||||
AM_V_lt = $(am__v_lt_@AM_V@)
|
||||
am__v_lt_ = $(am__v_lt_@AM_DEFAULT_V@)
|
||||
@ -480,6 +480,7 @@ libmoo_la_SOURCES = \
|
||||
rbt.c \
|
||||
sym.c \
|
||||
utf8.c \
|
||||
utf16.c \
|
||||
utl.c \
|
||||
std.c
|
||||
|
||||
@ -663,6 +664,7 @@ distclean-compile:
|
||||
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libmoo_la-rbt.Plo@am__quote@
|
||||
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libmoo_la-std.Plo@am__quote@
|
||||
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libmoo_la-sym.Plo@am__quote@
|
||||
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libmoo_la-utf16.Plo@am__quote@
|
||||
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libmoo_la-utf8.Plo@am__quote@
|
||||
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libmoo_la-utl.Plo@am__quote@
|
||||
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/moo-main.Po@am__quote@
|
||||
@ -825,6 +827,13 @@ libmoo_la-utf8.lo: utf8.c
|
||||
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
|
||||
@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libmoo_la_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o libmoo_la-utf8.lo `test -f 'utf8.c' || echo '$(srcdir)/'`utf8.c
|
||||
|
||||
libmoo_la-utf16.lo: utf16.c
|
||||
@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libmoo_la_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT libmoo_la-utf16.lo -MD -MP -MF $(DEPDIR)/libmoo_la-utf16.Tpo -c -o libmoo_la-utf16.lo `test -f 'utf16.c' || echo '$(srcdir)/'`utf16.c
|
||||
@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/libmoo_la-utf16.Tpo $(DEPDIR)/libmoo_la-utf16.Plo
|
||||
@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='utf16.c' object='libmoo_la-utf16.lo' libtool=yes @AMDEPBACKSLASH@
|
||||
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
|
||||
@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libmoo_la_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o libmoo_la-utf16.lo `test -f 'utf16.c' || echo '$(srcdir)/'`utf16.c
|
||||
|
||||
libmoo_la-utl.lo: utl.c
|
||||
@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libmoo_la_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT libmoo_la-utl.lo -MD -MP -MF $(DEPDIR)/libmoo_la-utl.Tpo -c -o libmoo_la-utl.lo `test -f 'utl.c' || echo '$(srcdir)/'`utl.c
|
||||
@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/libmoo_la-utl.Tpo $(DEPDIR)/libmoo_la-utl.Plo
|
||||
|
@ -17,6 +17,7 @@ struct moo_cfgstd_t
|
||||
|
||||
moo_oow_t memsize;
|
||||
int large_pages;
|
||||
moo_cmgr_t* cmgr;
|
||||
|
||||
union
|
||||
{
|
||||
|
@ -361,6 +361,14 @@ MOO_EXPORT int moo_concatoocharstosbuf (
|
||||
);
|
||||
|
||||
|
||||
/* ------------------------------------------------------------------------- */
|
||||
|
||||
MOO_EXPORT int moo_ucwidth (
|
||||
moo_uch_t uc
|
||||
);
|
||||
|
||||
/* ------------------------------------------------------------------------- */
|
||||
|
||||
#if defined(MOO_OOCH_IS_UCH)
|
||||
# define moo_conv_oocs_to_bcs_with_cmgr(oocs,oocslen,bcs,bcslen,cmgr) moo_conv_ucs_to_bcs_with_cmgr(oocs,oocslen,bcs,bcslen,cmgr)
|
||||
# define moo_conv_oochars_to_bchars_with_cmgr(oocs,oocslen,bcs,bcslen,cmgr) moo_conv_uchars_to_bchars_with_cmgr(oocs,oocslen,bcs,bcslen,cmgr)
|
||||
@ -378,7 +386,7 @@ MOO_EXPORT int moo_conv_bcs_to_ucs_with_cmgr (
|
||||
moo_cmgr_t* cmgr,
|
||||
int all
|
||||
);
|
||||
|
||||
|
||||
MOO_EXPORT int moo_conv_bchars_to_uchars_with_cmgr (
|
||||
const moo_bch_t* bcs,
|
||||
moo_oow_t* bcslen,
|
||||
@ -404,6 +412,8 @@ MOO_EXPORT int moo_conv_uchars_to_bchars_with_cmgr (
|
||||
moo_cmgr_t* cmgr
|
||||
);
|
||||
|
||||
/* ------------------------------------------------------------------------- */
|
||||
|
||||
MOO_EXPORT moo_cmgr_t* moo_get_utf8_cmgr (
|
||||
void
|
||||
);
|
||||
@ -503,10 +513,52 @@ MOO_EXPORT moo_oow_t moo_utf8_to_uc (
|
||||
moo_uch_t* uc
|
||||
);
|
||||
|
||||
MOO_EXPORT int moo_ucwidth (
|
||||
moo_uch_t uc
|
||||
/* ------------------------------------------------------------------------- */
|
||||
|
||||
MOO_EXPORT moo_cmgr_t* moo_get_utf16_cmgr (
|
||||
void
|
||||
);
|
||||
|
||||
MOO_EXPORT int moo_conv_uchars_to_utf16 (
|
||||
const moo_uch_t* ucs,
|
||||
moo_oow_t* ucslen,
|
||||
moo_bch_t* bcs,
|
||||
moo_oow_t* bcslen
|
||||
);
|
||||
|
||||
MOO_EXPORT int moo_conv_utf16_to_uchars (
|
||||
const moo_bch_t* bcs,
|
||||
moo_oow_t* bcslen,
|
||||
moo_uch_t* ucs,
|
||||
moo_oow_t* ucslen
|
||||
);
|
||||
|
||||
|
||||
MOO_EXPORT int moo_conv_ucstr_to_utf16 (
|
||||
const moo_uch_t* ucs,
|
||||
moo_oow_t* ucslen,
|
||||
moo_bch_t* bcs,
|
||||
moo_oow_t* bcslen
|
||||
);
|
||||
|
||||
MOO_EXPORT int moo_conv_utf16_to_ucstr (
|
||||
const moo_bch_t* bcs,
|
||||
moo_oow_t* bcslen,
|
||||
moo_uch_t* ucs,
|
||||
moo_oow_t* ucslen
|
||||
);
|
||||
|
||||
MOO_EXPORT moo_oow_t moo_uc_to_utf16 (
|
||||
moo_uch_t uc,
|
||||
moo_bch_t* utf16,
|
||||
moo_oow_t size
|
||||
);
|
||||
|
||||
MOO_EXPORT moo_oow_t moo_utf16_to_uc (
|
||||
const moo_bch_t* utf16,
|
||||
moo_oow_t size,
|
||||
moo_uch_t* uc
|
||||
);
|
||||
|
||||
/* ------------------------------------------------------------------------- */
|
||||
|
||||
|
@ -26,7 +26,7 @@
|
||||
|
||||
#include "moo-prv.h"
|
||||
|
||||
moo_t* moo_open (moo_mmgr_t* mmgr, moo_oow_t xtnsize, moo_oow_t heapsize, const moo_vmprim_t* vmprim, moo_errinf_t* errinfo)
|
||||
moo_t* moo_open (moo_mmgr_t* mmgr, moo_oow_t xtnsize, moo_oow_t heapsize, moo_cmgr_t* cmgr, const moo_vmprim_t* vmprim, moo_errinf_t* errinfo)
|
||||
{
|
||||
moo_t* moo;
|
||||
|
||||
@ -36,7 +36,7 @@ moo_t* moo_open (moo_mmgr_t* mmgr, moo_oow_t xtnsize, moo_oow_t heapsize, const
|
||||
moo = (moo_t*)MOO_MMGR_ALLOC(mmgr, MOO_SIZEOF(*moo) + xtnsize);
|
||||
if (moo)
|
||||
{
|
||||
if (moo_init(moo, mmgr, heapsize, vmprim) <= -1)
|
||||
if (moo_init(moo, mmgr, heapsize, cmgr, vmprim) <= -1)
|
||||
{
|
||||
if (errinfo) moo_geterrinf (moo, errinfo);
|
||||
MOO_MMGR_FREE (mmgr, moo);
|
||||
@ -96,7 +96,7 @@ static void free_heap (moo_t* moo, void* ptr)
|
||||
MOO_MMGR_FREE(moo->mmgr, ptr);
|
||||
}
|
||||
|
||||
int moo_init (moo_t* moo, moo_mmgr_t* mmgr, moo_oow_t heapsz, const moo_vmprim_t* vmprim)
|
||||
int moo_init (moo_t* moo, moo_mmgr_t* mmgr, moo_oow_t heapsz, moo_cmgr_t* cmgr, const moo_vmprim_t* vmprim)
|
||||
{
|
||||
int modtab_inited = 0;
|
||||
|
||||
@ -108,7 +108,7 @@ int moo_init (moo_t* moo, moo_mmgr_t* mmgr, moo_oow_t heapsz, const moo_vmprim_t
|
||||
|
||||
MOO_MEMSET (moo, 0, MOO_SIZEOF(*moo));
|
||||
moo->mmgr = mmgr;
|
||||
moo->cmgr = moo_get_utf8_cmgr();
|
||||
moo->cmgr = cmgr;
|
||||
moo->vmprim = *vmprim;
|
||||
if (!moo->vmprim.alloc_heap) moo->vmprim.alloc_heap = alloc_heap;
|
||||
if (!moo->vmprim.free_heap) moo->vmprim.free_heap = free_heap;
|
||||
@ -129,7 +129,7 @@ int moo_init (moo_t* moo, moo_mmgr_t* mmgr, moo_oow_t heapsz, const moo_vmprim_t
|
||||
* routine still function despite some side-effects when
|
||||
* reallocation fails */
|
||||
/* +1 required for consistency with put_oocs and put_ooch in logfmt.c */
|
||||
moo->log.ptr = moo_allocmem (moo, (moo->log.capa + 1) * MOO_SIZEOF(*moo->log.ptr));
|
||||
moo->log.ptr = moo_allocmem(moo, (moo->log.capa + 1) * MOO_SIZEOF(*moo->log.ptr));
|
||||
if (!moo->log.ptr) goto oops;
|
||||
|
||||
if (moo_rbt_init (&moo->modtab, moo, MOO_SIZEOF(moo_ooch_t), 1) <= -1) goto oops;
|
||||
|
@ -1834,6 +1834,7 @@ MOO_EXPORT moo_t* moo_open (
|
||||
moo_mmgr_t* mmgr,
|
||||
moo_oow_t xtnsize,
|
||||
moo_oow_t heapsize,
|
||||
moo_cmgr_t* cmgr,
|
||||
const moo_vmprim_t* vmprim,
|
||||
moo_errinf_t* errinfo
|
||||
);
|
||||
@ -1846,6 +1847,7 @@ MOO_EXPORT int moo_init (
|
||||
moo_t* moo,
|
||||
moo_mmgr_t* mmgr,
|
||||
moo_oow_t heapsize,
|
||||
moo_cmgr_t* cmgr,
|
||||
const moo_vmprim_t* vmprim
|
||||
);
|
||||
|
||||
|
@ -94,5 +94,6 @@ moo_pfrc_t moo_pf_utf8_to_uc (moo_t* moo, moo_mod_t* mod, moo_ooi_t nargs)
|
||||
moo_pfrc_t moo_pf_uc_to_utf8 (moo_t* moo, moo_mod_t* mod, moo_ooi_t nargs)
|
||||
{
|
||||
MOO_STACK_SETRET (moo, nargs, moo->_nil);
|
||||
/* TODO: */
|
||||
return MOO_PF_SUCCESS;
|
||||
}
|
||||
|
@ -3203,7 +3203,8 @@ moo_t* moo_openstd (moo_oow_t xtnsize, const moo_cfgstd_t* cfg, moo_errinf_t* er
|
||||
vmprim.vm_muxwait = vm_muxwait;
|
||||
vmprim.vm_sleep = vm_sleep;
|
||||
|
||||
moo = moo_open(&sys_mmgr, MOO_SIZEOF(xtn_t) + xtnsize, cfg->memsize, &vmprim, errinfo);
|
||||
moo = moo_open(&sys_mmgr, MOO_SIZEOF(xtn_t) + xtnsize, cfg->memsize,
|
||||
(cfg->cmgr? cfg->cmgr: moo_get_utf8_cmgr), &vmprim, errinfo);
|
||||
if (!moo) return MOO_NULL;
|
||||
|
||||
xtn = GET_XTN(moo);
|
||||
|
77
moo/lib/utf16.c
Normal file
77
moo/lib/utf16.c
Normal file
@ -0,0 +1,77 @@
|
||||
/*
|
||||
* $Id$
|
||||
*
|
||||
Copyright (c) 2014-2018 Chung, Hyung-Hwan. All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE AUTHOR "AS IS" AND ANY EXPRESS OR
|
||||
IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
||||
OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
||||
IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
|
||||
NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
|
||||
THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include "moo-prv.h"
|
||||
|
||||
/* TODO: handle different endians - UTF16BE or UTF16LE */
|
||||
|
||||
moo_oow_t moo_uc_to_utf16 (moo_uch_t uc, moo_bch_t* utf16, moo_oow_t size)
|
||||
{
|
||||
moo_uint16_t* u16 = (moo_uint16_t*)utf16;
|
||||
|
||||
if (uc <= 0xFFFF)
|
||||
{
|
||||
u16[0] = (moo_uint16_t)uc;
|
||||
return 2;
|
||||
}
|
||||
#if (MOO_SIZEOF_UCH_T > 2)
|
||||
else if (uc <= 0x10FFFF)
|
||||
{
|
||||
u16[0] = 0xD800 | (((uc >> 16) & 0x1F) - 1) | (uc >> 10);
|
||||
u16[1] = 0xDC00 | (uc & 0x3FF);
|
||||
return 4;
|
||||
}
|
||||
#endif
|
||||
|
||||
return 0; /* illegal character */
|
||||
}
|
||||
|
||||
moo_oow_t moo_utf16_to_uc (const moo_bch_t* utf16, moo_oow_t size, moo_uch_t* uc)
|
||||
{
|
||||
const moo_uint16_t* u16 = (const moo_uint16_t*)utf16;
|
||||
|
||||
if (size < 2) return 0; /* incomplete sequence */
|
||||
|
||||
if (u16[0] <= 0xD7FF || u16[0] >= 0xE000)
|
||||
{
|
||||
/* BMP - U+0000 - U+D7FF, U+E000 - U+FFFF */
|
||||
*uc = u16[0];
|
||||
return 2;
|
||||
}
|
||||
#if (MOO_SIZEOF_UCH_T > 2)
|
||||
else if (u16[0] >= 0xD800 && u16[0] <= 0xDBFF) /* high-surrogate */
|
||||
{
|
||||
if (size < 4) return 0; /* incomplete */
|
||||
if (u16[1] >= 0xDC00 && u16[1] <= 0xDFFF) /* low-surrogate */
|
||||
{
|
||||
*uc = (((u16[0] & 0x3FF) << 10) | (u16[1] & 0x3FF)) + 0x10000;
|
||||
return 4;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
return 0;
|
||||
}
|
@ -178,6 +178,8 @@ moo_oow_t moo_utf8_to_uc (const moo_bch_t* utf8, moo_oow_t size, moo_uch_t* uc)
|
||||
}
|
||||
|
||||
|
||||
/* ----------------------------------------------------------------------- */
|
||||
|
||||
/*
|
||||
* See http://www.cl.cam.ac.uk/~mgk25/ucs/wcwidth.c
|
||||
*/
|
||||
|
@ -731,6 +731,43 @@ int moo_conv_ucstr_to_utf8 (const moo_uch_t* ucs, moo_oow_t* ucslen, moo_bch_t*
|
||||
|
||||
/* ----------------------------------------------------------------------- */
|
||||
|
||||
static moo_cmgr_t utf16_cmgr =
|
||||
{
|
||||
moo_utf16_to_uc,
|
||||
moo_uc_to_utf16
|
||||
};
|
||||
|
||||
moo_cmgr_t* moo_get_utf16_cmgr (void)
|
||||
{
|
||||
return &utf16_cmgr;
|
||||
}
|
||||
|
||||
int moo_conv_utf16_to_uchars (const moo_bch_t* bcs, moo_oow_t* bcslen, moo_uch_t* ucs, moo_oow_t* ucslen)
|
||||
{
|
||||
/* the source is length bound */
|
||||
return moo_conv_bchars_to_uchars_with_cmgr(bcs, bcslen, ucs, ucslen, &utf16_cmgr, 0);
|
||||
}
|
||||
|
||||
int moo_conv_uchars_to_utf16 (const moo_uch_t* ucs, moo_oow_t* ucslen, moo_bch_t* bcs, moo_oow_t* bcslen)
|
||||
{
|
||||
/* length bound */
|
||||
return moo_conv_uchars_to_bchars_with_cmgr(ucs, ucslen, bcs, bcslen, &utf16_cmgr);
|
||||
}
|
||||
|
||||
int moo_conv_utf16_to_ucstr (const moo_bch_t* bcs, moo_oow_t* bcslen, moo_uch_t* ucs, moo_oow_t* ucslen)
|
||||
{
|
||||
/* null-terminated. */
|
||||
return moo_conv_bcs_to_ucs_with_cmgr(bcs, bcslen, ucs, ucslen, &utf16_cmgr, 0);
|
||||
}
|
||||
|
||||
int moo_conv_ucstr_to_utf16 (const moo_uch_t* ucs, moo_oow_t* ucslen, moo_bch_t* bcs, moo_oow_t* bcslen)
|
||||
{
|
||||
/* null-terminated */
|
||||
return moo_conv_ucs_to_bcs_with_cmgr(ucs, ucslen, bcs, bcslen, &utf16_cmgr);
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------- */
|
||||
|
||||
int moo_convbtouchars (moo_t* moo, const moo_bch_t* bcs, moo_oow_t* bcslen, moo_uch_t* ucs, moo_oow_t* ucslen)
|
||||
{
|
||||
/* length bound */
|
||||
|
Loading…
x
Reference in New Issue
Block a user