diff --git a/moo/lib/Makefile.am b/moo/lib/Makefile.am index 4a0a972..693f99b 100644 --- a/moo/lib/Makefile.am +++ b/moo/lib/Makefile.am @@ -89,6 +89,7 @@ libmoo_la_SOURCES = \ rbt.c \ sym.c \ utf8.c \ + utf16.c \ utl.c \ std.c libmoo_la_CPPFLAGS = $(CPPFLAGS_LIB_COMMON) $(CPPFLAGS_PFMOD) diff --git a/moo/lib/Makefile.in b/moo/lib/Makefile.in index 544ec73..57d7a04 100644 --- a/moo/lib/Makefile.in +++ b/moo/lib/Makefile.in @@ -173,8 +173,8 @@ am_libmoo_la_OBJECTS = libmoo_la-bigint.lo libmoo_la-comp.lo \ libmoo_la-gc.lo libmoo_la-heap.lo libmoo_la-moo.lo \ libmoo_la-obj.lo libmoo_la-opt.lo libmoo_la-pf-basic.lo \ libmoo_la-pf-sys.lo libmoo_la-pf-utf8.lo libmoo_la-rbt.lo \ - libmoo_la-sym.lo libmoo_la-utf8.lo libmoo_la-utl.lo \ - libmoo_la-std.lo + libmoo_la-sym.lo libmoo_la-utf8.lo libmoo_la-utf16.lo \ + libmoo_la-utl.lo libmoo_la-std.lo libmoo_la_OBJECTS = $(am_libmoo_la_OBJECTS) AM_V_lt = $(am__v_lt_@AM_V@) am__v_lt_ = $(am__v_lt_@AM_DEFAULT_V@) @@ -480,6 +480,7 @@ libmoo_la_SOURCES = \ rbt.c \ sym.c \ utf8.c \ + utf16.c \ utl.c \ std.c @@ -663,6 +664,7 @@ distclean-compile: @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libmoo_la-rbt.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libmoo_la-std.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libmoo_la-sym.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libmoo_la-utf16.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libmoo_la-utf8.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libmoo_la-utl.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/moo-main.Po@am__quote@ @@ -825,6 +827,13 @@ libmoo_la-utf8.lo: utf8.c @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libmoo_la_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o libmoo_la-utf8.lo `test -f 'utf8.c' || echo '$(srcdir)/'`utf8.c +libmoo_la-utf16.lo: utf16.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libmoo_la_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT libmoo_la-utf16.lo -MD -MP -MF $(DEPDIR)/libmoo_la-utf16.Tpo -c -o libmoo_la-utf16.lo `test -f 'utf16.c' || echo '$(srcdir)/'`utf16.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/libmoo_la-utf16.Tpo $(DEPDIR)/libmoo_la-utf16.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='utf16.c' object='libmoo_la-utf16.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libmoo_la_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o libmoo_la-utf16.lo `test -f 'utf16.c' || echo '$(srcdir)/'`utf16.c + libmoo_la-utl.lo: utl.c @am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libmoo_la_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT libmoo_la-utl.lo -MD -MP -MF $(DEPDIR)/libmoo_la-utl.Tpo -c -o libmoo_la-utl.lo `test -f 'utl.c' || echo '$(srcdir)/'`utl.c @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/libmoo_la-utl.Tpo $(DEPDIR)/libmoo_la-utl.Plo diff --git a/moo/lib/moo-std.h b/moo/lib/moo-std.h index 92c0052..a084a0a 100644 --- a/moo/lib/moo-std.h +++ b/moo/lib/moo-std.h @@ -17,6 +17,7 @@ struct moo_cfgstd_t moo_oow_t memsize; int large_pages; + moo_cmgr_t* cmgr; union { diff --git a/moo/lib/moo-utl.h b/moo/lib/moo-utl.h index 000e83f..264bd26 100644 --- a/moo/lib/moo-utl.h +++ b/moo/lib/moo-utl.h @@ -361,6 +361,14 @@ MOO_EXPORT int moo_concatoocharstosbuf ( ); +/* ------------------------------------------------------------------------- */ + +MOO_EXPORT int moo_ucwidth ( + moo_uch_t uc +); + +/* ------------------------------------------------------------------------- */ + #if defined(MOO_OOCH_IS_UCH) # define moo_conv_oocs_to_bcs_with_cmgr(oocs,oocslen,bcs,bcslen,cmgr) moo_conv_ucs_to_bcs_with_cmgr(oocs,oocslen,bcs,bcslen,cmgr) # define moo_conv_oochars_to_bchars_with_cmgr(oocs,oocslen,bcs,bcslen,cmgr) moo_conv_uchars_to_bchars_with_cmgr(oocs,oocslen,bcs,bcslen,cmgr) @@ -378,7 +386,7 @@ MOO_EXPORT int moo_conv_bcs_to_ucs_with_cmgr ( moo_cmgr_t* cmgr, int all ); - + MOO_EXPORT int moo_conv_bchars_to_uchars_with_cmgr ( const moo_bch_t* bcs, moo_oow_t* bcslen, @@ -404,6 +412,8 @@ MOO_EXPORT int moo_conv_uchars_to_bchars_with_cmgr ( moo_cmgr_t* cmgr ); +/* ------------------------------------------------------------------------- */ + MOO_EXPORT moo_cmgr_t* moo_get_utf8_cmgr ( void ); @@ -503,10 +513,52 @@ MOO_EXPORT moo_oow_t moo_utf8_to_uc ( moo_uch_t* uc ); -MOO_EXPORT int moo_ucwidth ( - moo_uch_t uc +/* ------------------------------------------------------------------------- */ + +MOO_EXPORT moo_cmgr_t* moo_get_utf16_cmgr ( + void ); +MOO_EXPORT int moo_conv_uchars_to_utf16 ( + const moo_uch_t* ucs, + moo_oow_t* ucslen, + moo_bch_t* bcs, + moo_oow_t* bcslen +); + +MOO_EXPORT int moo_conv_utf16_to_uchars ( + const moo_bch_t* bcs, + moo_oow_t* bcslen, + moo_uch_t* ucs, + moo_oow_t* ucslen +); + + +MOO_EXPORT int moo_conv_ucstr_to_utf16 ( + const moo_uch_t* ucs, + moo_oow_t* ucslen, + moo_bch_t* bcs, + moo_oow_t* bcslen +); + +MOO_EXPORT int moo_conv_utf16_to_ucstr ( + const moo_bch_t* bcs, + moo_oow_t* bcslen, + moo_uch_t* ucs, + moo_oow_t* ucslen +); + +MOO_EXPORT moo_oow_t moo_uc_to_utf16 ( + moo_uch_t uc, + moo_bch_t* utf16, + moo_oow_t size +); + +MOO_EXPORT moo_oow_t moo_utf16_to_uc ( + const moo_bch_t* utf16, + moo_oow_t size, + moo_uch_t* uc +); /* ------------------------------------------------------------------------- */ diff --git a/moo/lib/moo.c b/moo/lib/moo.c index ecf1912..138f92b 100644 --- a/moo/lib/moo.c +++ b/moo/lib/moo.c @@ -26,7 +26,7 @@ #include "moo-prv.h" -moo_t* moo_open (moo_mmgr_t* mmgr, moo_oow_t xtnsize, moo_oow_t heapsize, const moo_vmprim_t* vmprim, moo_errinf_t* errinfo) +moo_t* moo_open (moo_mmgr_t* mmgr, moo_oow_t xtnsize, moo_oow_t heapsize, moo_cmgr_t* cmgr, const moo_vmprim_t* vmprim, moo_errinf_t* errinfo) { moo_t* moo; @@ -36,7 +36,7 @@ moo_t* moo_open (moo_mmgr_t* mmgr, moo_oow_t xtnsize, moo_oow_t heapsize, const moo = (moo_t*)MOO_MMGR_ALLOC(mmgr, MOO_SIZEOF(*moo) + xtnsize); if (moo) { - if (moo_init(moo, mmgr, heapsize, vmprim) <= -1) + if (moo_init(moo, mmgr, heapsize, cmgr, vmprim) <= -1) { if (errinfo) moo_geterrinf (moo, errinfo); MOO_MMGR_FREE (mmgr, moo); @@ -96,7 +96,7 @@ static void free_heap (moo_t* moo, void* ptr) MOO_MMGR_FREE(moo->mmgr, ptr); } -int moo_init (moo_t* moo, moo_mmgr_t* mmgr, moo_oow_t heapsz, const moo_vmprim_t* vmprim) +int moo_init (moo_t* moo, moo_mmgr_t* mmgr, moo_oow_t heapsz, moo_cmgr_t* cmgr, const moo_vmprim_t* vmprim) { int modtab_inited = 0; @@ -108,7 +108,7 @@ int moo_init (moo_t* moo, moo_mmgr_t* mmgr, moo_oow_t heapsz, const moo_vmprim_t MOO_MEMSET (moo, 0, MOO_SIZEOF(*moo)); moo->mmgr = mmgr; - moo->cmgr = moo_get_utf8_cmgr(); + moo->cmgr = cmgr; moo->vmprim = *vmprim; if (!moo->vmprim.alloc_heap) moo->vmprim.alloc_heap = alloc_heap; if (!moo->vmprim.free_heap) moo->vmprim.free_heap = free_heap; @@ -129,7 +129,7 @@ int moo_init (moo_t* moo, moo_mmgr_t* mmgr, moo_oow_t heapsz, const moo_vmprim_t * routine still function despite some side-effects when * reallocation fails */ /* +1 required for consistency with put_oocs and put_ooch in logfmt.c */ - moo->log.ptr = moo_allocmem (moo, (moo->log.capa + 1) * MOO_SIZEOF(*moo->log.ptr)); + moo->log.ptr = moo_allocmem(moo, (moo->log.capa + 1) * MOO_SIZEOF(*moo->log.ptr)); if (!moo->log.ptr) goto oops; if (moo_rbt_init (&moo->modtab, moo, MOO_SIZEOF(moo_ooch_t), 1) <= -1) goto oops; diff --git a/moo/lib/moo.h b/moo/lib/moo.h index 0e82aa5..d8e2266 100644 --- a/moo/lib/moo.h +++ b/moo/lib/moo.h @@ -1834,6 +1834,7 @@ MOO_EXPORT moo_t* moo_open ( moo_mmgr_t* mmgr, moo_oow_t xtnsize, moo_oow_t heapsize, + moo_cmgr_t* cmgr, const moo_vmprim_t* vmprim, moo_errinf_t* errinfo ); @@ -1846,6 +1847,7 @@ MOO_EXPORT int moo_init ( moo_t* moo, moo_mmgr_t* mmgr, moo_oow_t heapsize, + moo_cmgr_t* cmgr, const moo_vmprim_t* vmprim ); diff --git a/moo/lib/pf-utf8.c b/moo/lib/pf-utf8.c index 43b6d11..1ece9ea 100644 --- a/moo/lib/pf-utf8.c +++ b/moo/lib/pf-utf8.c @@ -94,5 +94,6 @@ moo_pfrc_t moo_pf_utf8_to_uc (moo_t* moo, moo_mod_t* mod, moo_ooi_t nargs) moo_pfrc_t moo_pf_uc_to_utf8 (moo_t* moo, moo_mod_t* mod, moo_ooi_t nargs) { MOO_STACK_SETRET (moo, nargs, moo->_nil); + /* TODO: */ return MOO_PF_SUCCESS; } diff --git a/moo/lib/std.c b/moo/lib/std.c index 4bc4b30..ce9c0db 100644 --- a/moo/lib/std.c +++ b/moo/lib/std.c @@ -3203,7 +3203,8 @@ moo_t* moo_openstd (moo_oow_t xtnsize, const moo_cfgstd_t* cfg, moo_errinf_t* er vmprim.vm_muxwait = vm_muxwait; vmprim.vm_sleep = vm_sleep; - moo = moo_open(&sys_mmgr, MOO_SIZEOF(xtn_t) + xtnsize, cfg->memsize, &vmprim, errinfo); + moo = moo_open(&sys_mmgr, MOO_SIZEOF(xtn_t) + xtnsize, cfg->memsize, + (cfg->cmgr? cfg->cmgr: moo_get_utf8_cmgr), &vmprim, errinfo); if (!moo) return MOO_NULL; xtn = GET_XTN(moo); diff --git a/moo/lib/utf16.c b/moo/lib/utf16.c new file mode 100644 index 0000000..1f97ef2 --- /dev/null +++ b/moo/lib/utf16.c @@ -0,0 +1,77 @@ +/* + * $Id$ + * + Copyright (c) 2014-2018 Chung, Hyung-Hwan. All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED BY THE AUTHOR "AS IS" AND ANY EXPRESS OR + IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "moo-prv.h" + +/* TODO: handle different endians - UTF16BE or UTF16LE */ + +moo_oow_t moo_uc_to_utf16 (moo_uch_t uc, moo_bch_t* utf16, moo_oow_t size) +{ + moo_uint16_t* u16 = (moo_uint16_t*)utf16; + + if (uc <= 0xFFFF) + { + u16[0] = (moo_uint16_t)uc; + return 2; + } +#if (MOO_SIZEOF_UCH_T > 2) + else if (uc <= 0x10FFFF) + { + u16[0] = 0xD800 | (((uc >> 16) & 0x1F) - 1) | (uc >> 10); + u16[1] = 0xDC00 | (uc & 0x3FF); + return 4; + } +#endif + + return 0; /* illegal character */ +} + +moo_oow_t moo_utf16_to_uc (const moo_bch_t* utf16, moo_oow_t size, moo_uch_t* uc) +{ + const moo_uint16_t* u16 = (const moo_uint16_t*)utf16; + + if (size < 2) return 0; /* incomplete sequence */ + + if (u16[0] <= 0xD7FF || u16[0] >= 0xE000) + { + /* BMP - U+0000 - U+D7FF, U+E000 - U+FFFF */ + *uc = u16[0]; + return 2; + } +#if (MOO_SIZEOF_UCH_T > 2) + else if (u16[0] >= 0xD800 && u16[0] <= 0xDBFF) /* high-surrogate */ + { + if (size < 4) return 0; /* incomplete */ + if (u16[1] >= 0xDC00 && u16[1] <= 0xDFFF) /* low-surrogate */ + { + *uc = (((u16[0] & 0x3FF) << 10) | (u16[1] & 0x3FF)) + 0x10000; + return 4; + } + } +#endif + + return 0; +} diff --git a/moo/lib/utf8.c b/moo/lib/utf8.c index c62a4a4..1ca1de4 100644 --- a/moo/lib/utf8.c +++ b/moo/lib/utf8.c @@ -178,6 +178,8 @@ moo_oow_t moo_utf8_to_uc (const moo_bch_t* utf8, moo_oow_t size, moo_uch_t* uc) } +/* ----------------------------------------------------------------------- */ + /* * See http://www.cl.cam.ac.uk/~mgk25/ucs/wcwidth.c */ diff --git a/moo/lib/utl.c b/moo/lib/utl.c index de07132..a9e165c 100644 --- a/moo/lib/utl.c +++ b/moo/lib/utl.c @@ -731,6 +731,43 @@ int moo_conv_ucstr_to_utf8 (const moo_uch_t* ucs, moo_oow_t* ucslen, moo_bch_t* /* ----------------------------------------------------------------------- */ +static moo_cmgr_t utf16_cmgr = +{ + moo_utf16_to_uc, + moo_uc_to_utf16 +}; + +moo_cmgr_t* moo_get_utf16_cmgr (void) +{ + return &utf16_cmgr; +} + +int moo_conv_utf16_to_uchars (const moo_bch_t* bcs, moo_oow_t* bcslen, moo_uch_t* ucs, moo_oow_t* ucslen) +{ + /* the source is length bound */ + return moo_conv_bchars_to_uchars_with_cmgr(bcs, bcslen, ucs, ucslen, &utf16_cmgr, 0); +} + +int moo_conv_uchars_to_utf16 (const moo_uch_t* ucs, moo_oow_t* ucslen, moo_bch_t* bcs, moo_oow_t* bcslen) +{ + /* length bound */ + return moo_conv_uchars_to_bchars_with_cmgr(ucs, ucslen, bcs, bcslen, &utf16_cmgr); +} + +int moo_conv_utf16_to_ucstr (const moo_bch_t* bcs, moo_oow_t* bcslen, moo_uch_t* ucs, moo_oow_t* ucslen) +{ + /* null-terminated. */ + return moo_conv_bcs_to_ucs_with_cmgr(bcs, bcslen, ucs, ucslen, &utf16_cmgr, 0); +} + +int moo_conv_ucstr_to_utf16 (const moo_uch_t* ucs, moo_oow_t* ucslen, moo_bch_t* bcs, moo_oow_t* bcslen) +{ + /* null-terminated */ + return moo_conv_ucs_to_bcs_with_cmgr(ucs, ucslen, bcs, bcslen, &utf16_cmgr); +} + +/* ----------------------------------------------------------------------- */ + int moo_convbtouchars (moo_t* moo, const moo_bch_t* bcs, moo_oow_t* bcslen, moo_uch_t* ucs, moo_oow_t* ucslen) { /* length bound */