split some common files
All checks were successful
continuous-integration/drone/push Build is passing
All checks were successful
continuous-integration/drone/push Build is passing
This commit is contained in:
parent
955b734a79
commit
169c626819
@ -174,7 +174,7 @@ am__DIST_COMMON = $(srcdir)/Makefile.in $(top_srcdir)/ac/ar-lib \
|
||||
$(top_srcdir)/ac/compile $(top_srcdir)/ac/config.guess \
|
||||
$(top_srcdir)/ac/config.sub $(top_srcdir)/ac/install-sh \
|
||||
$(top_srcdir)/ac/ltmain.sh $(top_srcdir)/ac/missing README.md \
|
||||
ac/ar-lib ac/compile ac/config.guess ac/config.sub ac/depcomp \
|
||||
ac/ar-lib ac/compile ac/config.guess ac/config.sub \
|
||||
ac/install-sh ac/ltmain.sh ac/missing
|
||||
DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
|
||||
distdir = $(PACKAGE)-$(VERSION)
|
||||
|
@ -30,6 +30,7 @@
|
||||
|
||||
#include <hcl.h>
|
||||
#include <hcl-chr.h>
|
||||
#include <hcl-str.h>
|
||||
#include <hcl-utl.h>
|
||||
#include <hcl-opt.h>
|
||||
|
||||
|
@ -25,6 +25,7 @@
|
||||
*/
|
||||
|
||||
#include <hcl-x.h>
|
||||
#include <hcl-cmgr.h>
|
||||
#include <hcl-json.h>
|
||||
#include <hcl-opt.h>
|
||||
#include <hcl-str.h>
|
||||
|
6
configure
vendored
6
configure
vendored
@ -624,7 +624,7 @@ PACKAGE_TARNAME='hcl'
|
||||
PACKAGE_VERSION='0.1.0'
|
||||
PACKAGE_STRING='hcl 0.1.0'
|
||||
PACKAGE_BUGREPORT='Chung, Hyung-Hwan (hyunghwan.chung@gmail.com)'
|
||||
PACKAGE_URL='http://code.miflux.com/@hcl'
|
||||
PACKAGE_URL='http://code.miflux.com/hyung-hwan/hcl'
|
||||
|
||||
# Factoring default headers for most tests.
|
||||
ac_includes_default="\
|
||||
@ -1536,7 +1536,7 @@ Use these variables to override the choices made by `configure' or to help
|
||||
it to find libraries and programs with nonstandard names/locations.
|
||||
|
||||
Report bugs to <Chung, Hyung-Hwan (hyunghwan.chung@gmail.com)>.
|
||||
hcl home page: <http://code.miflux.com/@hcl>.
|
||||
hcl home page: <http://code.miflux.com/hyung-hwan/hcl>.
|
||||
_ACEOF
|
||||
ac_status=$?
|
||||
fi
|
||||
@ -19511,7 +19511,7 @@ Configuration commands:
|
||||
$config_commands
|
||||
|
||||
Report bugs to <Chung, Hyung-Hwan (hyunghwan.chung@gmail.com)>.
|
||||
hcl home page: <http://code.miflux.com/@hcl>."
|
||||
hcl home page: <http://code.miflux.com/hyung-hwan/hcl>."
|
||||
|
||||
_ACEOF
|
||||
ac_cs_config=`printf "%s\n" "$ac_configure_args" | sed "$ac_safe_unquote"`
|
||||
|
@ -49,6 +49,7 @@ pkglibdir = $(libdir)
|
||||
pkginclude_HEADERS = \
|
||||
hcl.h \
|
||||
hcl-chr.h \
|
||||
hcl-cmgr.h \
|
||||
hcl-cmn.h \
|
||||
hcl-fmt.h \
|
||||
hcl-opt.h \
|
||||
@ -64,6 +65,7 @@ libhcl_la_SOURCES = \
|
||||
hcl-prv.h \
|
||||
bigint.c \
|
||||
chr.c \
|
||||
cmgr.c \
|
||||
cnode.c \
|
||||
comp.c \
|
||||
debug.c \
|
||||
|
@ -150,15 +150,16 @@ am__DEPENDENCIES_5 = $(am__DEPENDENCIES_1) $(am__DEPENDENCIES_2) \
|
||||
$(am__DEPENDENCIES_3) $(am__DEPENDENCIES_4)
|
||||
libhcl_la_DEPENDENCIES = $(am__DEPENDENCIES_5) $(am__append_6)
|
||||
am_libhcl_la_OBJECTS = libhcl_la-bigint.lo libhcl_la-chr.lo \
|
||||
libhcl_la-cnode.lo libhcl_la-comp.lo libhcl_la-debug.lo \
|
||||
libhcl_la-decode.lo libhcl_la-dic.lo libhcl_la-err.lo \
|
||||
libhcl_la-exec.lo libhcl_la-fmt.lo libhcl_la-gc.lo \
|
||||
libhcl_la-hcl.lo libhcl_la-heap.lo libhcl_la-mb8.lo \
|
||||
libhcl_la-number.lo libhcl_la-obj.lo libhcl_la-opt.lo \
|
||||
libhcl_la-prim.lo libhcl_la-print.lo libhcl_la-rbt.lo \
|
||||
libhcl_la-read.lo libhcl_la-std.lo libhcl_la-str.lo \
|
||||
libhcl_la-sym.lo libhcl_la-utf16.lo libhcl_la-utf8.lo \
|
||||
libhcl_la-utl.lo libhcl_la-xchg.lo libhcl_la-xma.lo
|
||||
libhcl_la-cmgr.lo libhcl_la-cnode.lo libhcl_la-comp.lo \
|
||||
libhcl_la-debug.lo libhcl_la-decode.lo libhcl_la-dic.lo \
|
||||
libhcl_la-err.lo libhcl_la-exec.lo libhcl_la-fmt.lo \
|
||||
libhcl_la-gc.lo libhcl_la-hcl.lo libhcl_la-heap.lo \
|
||||
libhcl_la-mb8.lo libhcl_la-number.lo libhcl_la-obj.lo \
|
||||
libhcl_la-opt.lo libhcl_la-prim.lo libhcl_la-print.lo \
|
||||
libhcl_la-rbt.lo libhcl_la-read.lo libhcl_la-std.lo \
|
||||
libhcl_la-str.lo libhcl_la-sym.lo libhcl_la-utf16.lo \
|
||||
libhcl_la-utf8.lo libhcl_la-utl.lo libhcl_la-xchg.lo \
|
||||
libhcl_la-xma.lo
|
||||
libhcl_la_OBJECTS = $(am_libhcl_la_OBJECTS)
|
||||
AM_V_lt = $(am__v_lt_@AM_V@)
|
||||
am__v_lt_ = $(am__v_lt_@AM_DEFAULT_V@)
|
||||
@ -197,8 +198,9 @@ DEFAULT_INCLUDES =
|
||||
depcomp = $(SHELL) $(top_srcdir)/ac/depcomp
|
||||
am__maybe_remake_depfiles = depfiles
|
||||
am__depfiles_remade = ./$(DEPDIR)/libhcl_la-bigint.Plo \
|
||||
./$(DEPDIR)/libhcl_la-chr.Plo ./$(DEPDIR)/libhcl_la-cnode.Plo \
|
||||
./$(DEPDIR)/libhcl_la-comp.Plo ./$(DEPDIR)/libhcl_la-debug.Plo \
|
||||
./$(DEPDIR)/libhcl_la-chr.Plo ./$(DEPDIR)/libhcl_la-cmgr.Plo \
|
||||
./$(DEPDIR)/libhcl_la-cnode.Plo ./$(DEPDIR)/libhcl_la-comp.Plo \
|
||||
./$(DEPDIR)/libhcl_la-debug.Plo \
|
||||
./$(DEPDIR)/libhcl_la-decode.Plo ./$(DEPDIR)/libhcl_la-dic.Plo \
|
||||
./$(DEPDIR)/libhcl_la-err.Plo ./$(DEPDIR)/libhcl_la-exec.Plo \
|
||||
./$(DEPDIR)/libhcl_la-fmt.Plo ./$(DEPDIR)/libhcl_la-gc.Plo \
|
||||
@ -243,9 +245,9 @@ am__can_run_installinfo = \
|
||||
n|no|NO) false;; \
|
||||
*) (install-info --version) >/dev/null 2>&1;; \
|
||||
esac
|
||||
am__pkginclude_HEADERS_DIST = hcl.h hcl-chr.h hcl-cmn.h hcl-fmt.h \
|
||||
hcl-opt.h hcl-pac1.h hcl-rbt.h hcl-str.h hcl-upac.h hcl-utl.h \
|
||||
hcl-xma.h hcl-x.h hcl-tmr.h hcl-json.h
|
||||
am__pkginclude_HEADERS_DIST = hcl.h hcl-chr.h hcl-cmgr.h hcl-cmn.h \
|
||||
hcl-fmt.h hcl-opt.h hcl-pac1.h hcl-rbt.h hcl-str.h hcl-upac.h \
|
||||
hcl-utl.h hcl-xma.h hcl-x.h hcl-tmr.h hcl-json.h
|
||||
HEADERS = $(pkginclude_HEADERS)
|
||||
am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP) \
|
||||
hcl-cfg.h.in
|
||||
@ -434,14 +436,15 @@ LIBADD_LIB_COMMON = $(LIBM) $(am__append_1) $(am__append_2) \
|
||||
@WIN32_TRUE@CPPFLAGS_PFMOD = -DHCL_DEFAULT_PFMODPREFIX=\"libhcl-\" \
|
||||
@WIN32_TRUE@ -DHCL_DEFAULT_PFMODPOSTFIX=\"-1.dll\" \
|
||||
@WIN32_TRUE@ $(am__append_4) $(am__append_5)
|
||||
pkginclude_HEADERS = hcl.h hcl-chr.h hcl-cmn.h hcl-fmt.h hcl-opt.h \
|
||||
hcl-pac1.h hcl-rbt.h hcl-str.h hcl-upac.h hcl-utl.h hcl-xma.h \
|
||||
$(am__append_8)
|
||||
pkginclude_HEADERS = hcl.h hcl-chr.h hcl-cmgr.h hcl-cmn.h hcl-fmt.h \
|
||||
hcl-opt.h hcl-pac1.h hcl-rbt.h hcl-str.h hcl-upac.h hcl-utl.h \
|
||||
hcl-xma.h $(am__append_8)
|
||||
pkglib_LTLIBRARIES = libhcl.la $(am__append_7)
|
||||
libhcl_la_SOURCES = \
|
||||
hcl-prv.h \
|
||||
bigint.c \
|
||||
chr.c \
|
||||
cmgr.c \
|
||||
cnode.c \
|
||||
comp.c \
|
||||
debug.c \
|
||||
@ -593,6 +596,7 @@ distclean-compile:
|
||||
|
||||
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libhcl_la-bigint.Plo@am__quote@ # am--include-marker
|
||||
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libhcl_la-chr.Plo@am__quote@ # am--include-marker
|
||||
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libhcl_la-cmgr.Plo@am__quote@ # am--include-marker
|
||||
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libhcl_la-cnode.Plo@am__quote@ # am--include-marker
|
||||
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libhcl_la-comp.Plo@am__quote@ # am--include-marker
|
||||
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libhcl_la-debug.Plo@am__quote@ # am--include-marker
|
||||
@ -672,6 +676,13 @@ libhcl_la-chr.lo: chr.c
|
||||
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
|
||||
@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libhcl_la_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o libhcl_la-chr.lo `test -f 'chr.c' || echo '$(srcdir)/'`chr.c
|
||||
|
||||
libhcl_la-cmgr.lo: cmgr.c
|
||||
@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libhcl_la_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT libhcl_la-cmgr.lo -MD -MP -MF $(DEPDIR)/libhcl_la-cmgr.Tpo -c -o libhcl_la-cmgr.lo `test -f 'cmgr.c' || echo '$(srcdir)/'`cmgr.c
|
||||
@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/libhcl_la-cmgr.Tpo $(DEPDIR)/libhcl_la-cmgr.Plo
|
||||
@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='cmgr.c' object='libhcl_la-cmgr.lo' libtool=yes @AMDEPBACKSLASH@
|
||||
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
|
||||
@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libhcl_la_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o libhcl_la-cmgr.lo `test -f 'cmgr.c' || echo '$(srcdir)/'`cmgr.c
|
||||
|
||||
libhcl_la-cnode.lo: cnode.c
|
||||
@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libhcl_la_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT libhcl_la-cnode.lo -MD -MP -MF $(DEPDIR)/libhcl_la-cnode.Tpo -c -o libhcl_la-cnode.lo `test -f 'cnode.c' || echo '$(srcdir)/'`cnode.c
|
||||
@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/libhcl_la-cnode.Tpo $(DEPDIR)/libhcl_la-cnode.Plo
|
||||
@ -1066,6 +1077,7 @@ clean-am: clean-generic clean-libtool clean-pkglibLTLIBRARIES \
|
||||
distclean: distclean-am
|
||||
-rm -f ./$(DEPDIR)/libhcl_la-bigint.Plo
|
||||
-rm -f ./$(DEPDIR)/libhcl_la-chr.Plo
|
||||
-rm -f ./$(DEPDIR)/libhcl_la-cmgr.Plo
|
||||
-rm -f ./$(DEPDIR)/libhcl_la-cnode.Plo
|
||||
-rm -f ./$(DEPDIR)/libhcl_la-comp.Plo
|
||||
-rm -f ./$(DEPDIR)/libhcl_la-debug.Plo
|
||||
@ -1148,6 +1160,7 @@ installcheck-am:
|
||||
maintainer-clean: maintainer-clean-am
|
||||
-rm -f ./$(DEPDIR)/libhcl_la-bigint.Plo
|
||||
-rm -f ./$(DEPDIR)/libhcl_la-chr.Plo
|
||||
-rm -f ./$(DEPDIR)/libhcl_la-cmgr.Plo
|
||||
-rm -f ./$(DEPDIR)/libhcl_la-cnode.Plo
|
||||
-rm -f ./$(DEPDIR)/libhcl_la-comp.Plo
|
||||
-rm -f ./$(DEPDIR)/libhcl_la-debug.Plo
|
||||
|
165
lib/cmgr.c
Normal file
165
lib/cmgr.c
Normal file
@ -0,0 +1,165 @@
|
||||
/*
|
||||
Copyright (c) 2016-2018 Chung, Hyung-Hwan. All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE AUTHOR "AS IS" AND ANY EXPRESS OR
|
||||
IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
||||
OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
||||
IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
|
||||
NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
|
||||
THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <hcl-cmgr.h>
|
||||
#include <hcl-chr.h>
|
||||
#include <hcl-str.h>
|
||||
|
||||
static hcl_cmgr_t builtin_cmgr[] =
|
||||
{
|
||||
/* keep the order aligned with hcl_cmgr_id_t values in <hcl-utl.h> */
|
||||
{ hcl_utf8_to_uc, hcl_uc_to_utf8 },
|
||||
{ hcl_utf16_to_uc, hcl_uc_to_utf16 },
|
||||
{ hcl_mb8_to_uc, hcl_uc_to_mb8 }
|
||||
};
|
||||
|
||||
hcl_cmgr_t* hcl_get_cmgr_by_id (hcl_cmgr_id_t id)
|
||||
{
|
||||
return &builtin_cmgr[id];
|
||||
}
|
||||
|
||||
static struct
|
||||
{
|
||||
const hcl_bch_t* name;
|
||||
hcl_cmgr_id_t id;
|
||||
} builtin_cmgr_tab[] =
|
||||
{
|
||||
{ "utf8", HCL_CMGR_UTF8 },
|
||||
{ "utf16", HCL_CMGR_UTF16 },
|
||||
{ "mb8", HCL_CMGR_MB8 }
|
||||
};
|
||||
|
||||
hcl_cmgr_t* hcl_get_cmgr_by_bcstr (const hcl_bch_t* name)
|
||||
{
|
||||
if (name)
|
||||
{
|
||||
hcl_oow_t i;
|
||||
|
||||
for (i = 0; i < HCL_COUNTOF(builtin_cmgr_tab); i++)
|
||||
{
|
||||
if (hcl_comp_bcstr(name, builtin_cmgr_tab[i].name) == 0)
|
||||
{
|
||||
return &builtin_cmgr[builtin_cmgr_tab[i].id];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return HCL_NULL;
|
||||
}
|
||||
|
||||
hcl_cmgr_t* hcl_get_cmgr_by_ucstr (const hcl_uch_t* name)
|
||||
{
|
||||
if (name)
|
||||
{
|
||||
hcl_oow_t i;
|
||||
|
||||
for (i = 0; i < HCL_COUNTOF(builtin_cmgr_tab); i++)
|
||||
{
|
||||
if (hcl_comp_ucstr_bcstr(name, builtin_cmgr_tab[i].name) == 0)
|
||||
{
|
||||
return &builtin_cmgr[builtin_cmgr_tab[i].id];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return HCL_NULL;
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------- */
|
||||
|
||||
int hcl_conv_utf8_to_uchars (const hcl_bch_t* bcs, hcl_oow_t* bcslen, hcl_uch_t* ucs, hcl_oow_t* ucslen)
|
||||
{
|
||||
/* the source is length bound */
|
||||
return hcl_conv_bchars_to_uchars_with_cmgr(bcs, bcslen, ucs, ucslen, &builtin_cmgr[HCL_CMGR_UTF8], 0);
|
||||
}
|
||||
|
||||
int hcl_conv_uchars_to_utf8 (const hcl_uch_t* ucs, hcl_oow_t* ucslen, hcl_bch_t* bcs, hcl_oow_t* bcslen)
|
||||
{
|
||||
/* length bound */
|
||||
return hcl_conv_uchars_to_bchars_with_cmgr(ucs, ucslen, bcs, bcslen, &builtin_cmgr[HCL_CMGR_UTF8]);
|
||||
}
|
||||
|
||||
int hcl_conv_utf8_to_ucstr (const hcl_bch_t* bcs, hcl_oow_t* bcslen, hcl_uch_t* ucs, hcl_oow_t* ucslen)
|
||||
{
|
||||
/* null-terminated. */
|
||||
return hcl_conv_bcstr_to_ucstr_with_cmgr(bcs, bcslen, ucs, ucslen, &builtin_cmgr[HCL_CMGR_UTF8], 0);
|
||||
}
|
||||
|
||||
int hcl_conv_ucstr_to_utf8 (const hcl_uch_t* ucs, hcl_oow_t* ucslen, hcl_bch_t* bcs, hcl_oow_t* bcslen)
|
||||
{
|
||||
/* null-terminated */
|
||||
return hcl_conv_ucstr_to_bcstr_with_cmgr(ucs, ucslen, bcs, bcslen, &builtin_cmgr[HCL_CMGR_UTF8]);
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------- */
|
||||
|
||||
int hcl_conv_utf16_to_uchars (const hcl_bch_t* bcs, hcl_oow_t* bcslen, hcl_uch_t* ucs, hcl_oow_t* ucslen)
|
||||
{
|
||||
/* the source is length bound */
|
||||
return hcl_conv_bchars_to_uchars_with_cmgr(bcs, bcslen, ucs, ucslen, &builtin_cmgr[HCL_CMGR_UTF16], 0);
|
||||
}
|
||||
|
||||
int hcl_conv_uchars_to_utf16 (const hcl_uch_t* ucs, hcl_oow_t* ucslen, hcl_bch_t* bcs, hcl_oow_t* bcslen)
|
||||
{
|
||||
/* length bound */
|
||||
return hcl_conv_uchars_to_bchars_with_cmgr(ucs, ucslen, bcs, bcslen, &builtin_cmgr[HCL_CMGR_UTF16]);
|
||||
}
|
||||
|
||||
int hcl_conv_utf16_to_ucstr (const hcl_bch_t* bcs, hcl_oow_t* bcslen, hcl_uch_t* ucs, hcl_oow_t* ucslen)
|
||||
{
|
||||
/* null-terminated. */
|
||||
return hcl_conv_bcstr_to_ucstr_with_cmgr(bcs, bcslen, ucs, ucslen, &builtin_cmgr[HCL_CMGR_UTF16], 0);
|
||||
}
|
||||
|
||||
int hcl_conv_ucstr_to_utf16 (const hcl_uch_t* ucs, hcl_oow_t* ucslen, hcl_bch_t* bcs, hcl_oow_t* bcslen)
|
||||
{
|
||||
/* null-terminated */
|
||||
return hcl_conv_ucstr_to_bcstr_with_cmgr(ucs, ucslen, bcs, bcslen, &builtin_cmgr[HCL_CMGR_UTF16]);
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------- */
|
||||
|
||||
int hcl_conv_mb8_to_uchars (const hcl_bch_t* bcs, hcl_oow_t* bcslen, hcl_uch_t* ucs, hcl_oow_t* ucslen)
|
||||
{
|
||||
/* the source is length bound */
|
||||
return hcl_conv_bchars_to_uchars_with_cmgr(bcs, bcslen, ucs, ucslen, &builtin_cmgr[HCL_CMGR_MB8], 0);
|
||||
}
|
||||
|
||||
int hcl_conv_uchars_to_mb8 (const hcl_uch_t* ucs, hcl_oow_t* ucslen, hcl_bch_t* bcs, hcl_oow_t* bcslen)
|
||||
{
|
||||
/* length bound */
|
||||
return hcl_conv_uchars_to_bchars_with_cmgr(ucs, ucslen, bcs, bcslen, &builtin_cmgr[HCL_CMGR_MB8]);
|
||||
}
|
||||
|
||||
int hcl_conv_mb8_to_ucstr (const hcl_bch_t* bcs, hcl_oow_t* bcslen, hcl_uch_t* ucs, hcl_oow_t* ucslen)
|
||||
{
|
||||
/* null-terminated. */
|
||||
return hcl_conv_bcstr_to_ucstr_with_cmgr(bcs, bcslen, ucs, ucslen, &builtin_cmgr[HCL_CMGR_MB8], 0);
|
||||
}
|
||||
|
||||
int hcl_conv_ucstr_to_mb8 (const hcl_uch_t* ucs, hcl_oow_t* ucslen, hcl_bch_t* bcs, hcl_oow_t* bcslen)
|
||||
{
|
||||
/* null-terminated */
|
||||
return hcl_conv_ucstr_to_bcstr_with_cmgr(ucs, ucslen, bcs, bcslen, &builtin_cmgr[HCL_CMGR_MB8]);
|
||||
}
|
157
lib/hcl-cmgr.h
Normal file
157
lib/hcl-cmgr.h
Normal file
@ -0,0 +1,157 @@
|
||||
/*
|
||||
Copyright (c) 2016-2018 Chung, Hyung-Hwan. All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE AUTHOR "AS IS" AND ANY EXPRESS OR
|
||||
IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
||||
OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
||||
IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
|
||||
NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
|
||||
THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef _HCL_CMGR_H_
|
||||
#define _HCL_CMGR_H_
|
||||
|
||||
#include <hcl-cmn.h>
|
||||
|
||||
enum hcl_cmgr_id_t
|
||||
{
|
||||
HCL_CMGR_UTF8,
|
||||
HCL_CMGR_UTF16,
|
||||
HCL_CMGR_MB8
|
||||
};
|
||||
typedef enum hcl_cmgr_id_t hcl_cmgr_id_t;
|
||||
|
||||
|
||||
#if defined(__cplusplus)
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
HCL_EXPORT hcl_cmgr_t* hcl_get_utf8_cmgr (
|
||||
void
|
||||
);
|
||||
|
||||
HCL_EXPORT hcl_cmgr_t* hcl_get_cmgr_by_id (
|
||||
hcl_cmgr_id_t id
|
||||
);
|
||||
|
||||
HCL_EXPORT hcl_cmgr_t* hcl_get_cmgr_by_bcstr (
|
||||
const hcl_bch_t* name
|
||||
);
|
||||
|
||||
HCL_EXPORT hcl_cmgr_t* hcl_get_cmgr_by_ucstr (
|
||||
const hcl_uch_t* name
|
||||
);
|
||||
|
||||
#if defined(HCL_OOCH_IS_UCH)
|
||||
# define hcl_get_cmgr_by_name(name) hcl_get_cmgr_by_ucstr(name)
|
||||
#else
|
||||
# define hcl_get_cmgr_by_name(name) hcl_get_cmgr_by_bcstr(name)
|
||||
#endif
|
||||
|
||||
#define hcl_get_utf8_cmgr() hcl_get_cmgr_by_id(HCL_CMGR_UTF8)
|
||||
#define hcl_get_utf16_cmgr() hcl_get_cmgr_by_id(HCL_CMGR_UTF16)
|
||||
#define hcl_get_mb8_cmgr() hcl_get_cmgr_by_id(HCL_CMGR_MB8)
|
||||
|
||||
|
||||
/* ------------------------------------------------------------------------- */
|
||||
|
||||
/**
|
||||
* The hcl_conv_uchars_to_utf8() function converts a unicode character string \a ucs
|
||||
* to a UTF8 string and writes it into the buffer pointed to by \a bcs, but
|
||||
* not more than \a bcslen bytes including the terminating null.
|
||||
*
|
||||
* Upon return, \a bcslen is modified to the actual number of bytes written to
|
||||
* \a bcs excluding the terminating null; \a ucslen is modified to the number of
|
||||
* wide characters converted.
|
||||
*
|
||||
* You may pass #HCL_NULL for \a bcs to dry-run conversion or to get the
|
||||
* required buffer size for conversion. -2 is never returned in this case.
|
||||
*
|
||||
* \return
|
||||
* - 0 on full conversion,
|
||||
* - -1 on no or partial conversion for an illegal character encountered,
|
||||
* - -2 on no or partial conversion for a small buffer.
|
||||
*
|
||||
* \code
|
||||
* const hcl_uch_t ucs[] = { 'H', 'e', 'l', 'l', 'o' };
|
||||
* hcl_bch_t bcs[10];
|
||||
* hcl_oow_t ucslen = 5;
|
||||
* hcl_oow_t bcslen = HCL_COUNTOF(bcs);
|
||||
* n = hcl_conv_uchars_to_utf8 (ucs, &ucslen, bcs, &bcslen);
|
||||
* if (n <= -1)
|
||||
* {
|
||||
* // conversion error
|
||||
* }
|
||||
* \endcode
|
||||
*/
|
||||
HCL_EXPORT int hcl_conv_uchars_to_utf8 (
|
||||
const hcl_uch_t* ucs,
|
||||
hcl_oow_t* ucslen,
|
||||
hcl_bch_t* bcs,
|
||||
hcl_oow_t* bcslen
|
||||
);
|
||||
|
||||
/**
|
||||
* The hcl_conv_utf8_to_uchars() function converts a UTF8 string to a uncide string.
|
||||
*
|
||||
* It never returns -2 if \a ucs is #HCL_NULL.
|
||||
*
|
||||
* \code
|
||||
* const hcl_bch_t* bcs = "test string";
|
||||
* hcl_uch_t ucs[100];
|
||||
* hcl_oow_t ucslen = HCL_COUNTOF(buf), n;
|
||||
* hcl_oow_t bcslen = 11;
|
||||
* int n;
|
||||
* n = hcl_conv_utf8_to_uchars (bcs, &bcslen, ucs, &ucslen);
|
||||
* if (n <= -1) { invalid/incomplenete sequence or buffer to small }
|
||||
* \endcode
|
||||
*
|
||||
* The resulting \a ucslen can still be greater than 0 even if the return
|
||||
* value is negative. The value indiates the number of characters converted
|
||||
* before the error has occurred.
|
||||
*
|
||||
* \return 0 on success.
|
||||
* -1 if \a bcs contains an illegal character.
|
||||
* -2 if the wide-character string buffer is too small.
|
||||
* -3 if \a bcs is not a complete sequence.
|
||||
*/
|
||||
HCL_EXPORT int hcl_conv_utf8_to_uchars (
|
||||
const hcl_bch_t* bcs,
|
||||
hcl_oow_t* bcslen,
|
||||
hcl_uch_t* ucs,
|
||||
hcl_oow_t* ucslen
|
||||
);
|
||||
|
||||
|
||||
HCL_EXPORT int hcl_conv_ucstr_to_utf8 (
|
||||
const hcl_uch_t* ucs,
|
||||
hcl_oow_t* ucslen,
|
||||
hcl_bch_t* bcs,
|
||||
hcl_oow_t* bcslen
|
||||
);
|
||||
|
||||
HCL_EXPORT int hcl_conv_utf8_to_ucstr (
|
||||
const hcl_bch_t* bcs,
|
||||
hcl_oow_t* bcslen,
|
||||
hcl_uch_t* ucs,
|
||||
hcl_oow_t* ucslen
|
||||
);
|
||||
#if defined(__cplusplus)
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
@ -27,6 +27,7 @@
|
||||
|
||||
#include <hcl.h>
|
||||
#include <hcl-chr.h>
|
||||
#include <hcl-cmgr.h>
|
||||
#include <hcl-fmt.h>
|
||||
#include <hcl-str.h>
|
||||
#include <hcl-utl.h>
|
||||
|
138
lib/hcl-utl.h
138
lib/hcl-utl.h
@ -361,18 +361,6 @@
|
||||
#define HCL_HASH_UCSTR(hv, ptr) HCL_HASH_VPTR(hv, ptr, const hcl_uch_t)
|
||||
#define HCL_HASH_MORE_UCSTR(hv, ptr) HCL_HASH_MORE_VPTR(hv, ptr, const hcl_uch_t)
|
||||
|
||||
/* =========================================================================
|
||||
* CMGR
|
||||
* ========================================================================= */
|
||||
enum hcl_cmgr_id_t
|
||||
{
|
||||
HCL_CMGR_UTF8,
|
||||
HCL_CMGR_UTF16,
|
||||
HCL_CMGR_MB8
|
||||
};
|
||||
typedef enum hcl_cmgr_id_t hcl_cmgr_id_t;
|
||||
|
||||
|
||||
/* =========================================================================
|
||||
* PATH-RELATED MACROS
|
||||
* ========================================================================= */
|
||||
@ -452,132 +440,6 @@ static HCL_INLINE hcl_oow_t hcl_hash_liwords(const hcl_liw_t* ptr, hcl_oow_t len
|
||||
# define hcl_hash_oochars(ptr,len) hcl_hash_bchars(ptr,len)
|
||||
#endif
|
||||
|
||||
/* ------------------------------------------------------------------------- */
|
||||
|
||||
HCL_EXPORT hcl_cmgr_t* hcl_get_utf8_cmgr (
|
||||
void
|
||||
);
|
||||
|
||||
HCL_EXPORT hcl_cmgr_t* hcl_get_cmgr_by_id (
|
||||
hcl_cmgr_id_t id
|
||||
);
|
||||
|
||||
HCL_EXPORT hcl_cmgr_t* hcl_get_cmgr_by_bcstr (
|
||||
const hcl_bch_t* name
|
||||
);
|
||||
|
||||
HCL_EXPORT hcl_cmgr_t* hcl_get_cmgr_by_ucstr (
|
||||
const hcl_uch_t* name
|
||||
);
|
||||
|
||||
#if defined(HCL_OOCH_IS_UCH)
|
||||
# define hcl_get_cmgr_by_name(name) hcl_get_cmgr_by_ucstr(name)
|
||||
#else
|
||||
# define hcl_get_cmgr_by_name(name) hcl_get_cmgr_by_bcstr(name)
|
||||
#endif
|
||||
|
||||
#define hcl_get_utf8_cmgr() hcl_get_cmgr_by_id(HCL_CMGR_UTF8)
|
||||
#define hcl_get_utf16_cmgr() hcl_get_cmgr_by_id(HCL_CMGR_UTF16)
|
||||
#define hcl_get_mb8_cmgr() hcl_get_cmgr_by_id(HCL_CMGR_MB8)
|
||||
|
||||
/**
|
||||
* The hcl_conv_uchars_to_utf8() function converts a unicode character string \a ucs
|
||||
* to a UTF8 string and writes it into the buffer pointed to by \a bcs, but
|
||||
* not more than \a bcslen bytes including the terminating null.
|
||||
*
|
||||
* Upon return, \a bcslen is modified to the actual number of bytes written to
|
||||
* \a bcs excluding the terminating null; \a ucslen is modified to the number of
|
||||
* wide characters converted.
|
||||
*
|
||||
* You may pass #HCL_NULL for \a bcs to dry-run conversion or to get the
|
||||
* required buffer size for conversion. -2 is never returned in this case.
|
||||
*
|
||||
* \return
|
||||
* - 0 on full conversion,
|
||||
* - -1 on no or partial conversion for an illegal character encountered,
|
||||
* - -2 on no or partial conversion for a small buffer.
|
||||
*
|
||||
* \code
|
||||
* const hcl_uch_t ucs[] = { 'H', 'e', 'l', 'l', 'o' };
|
||||
* hcl_bch_t bcs[10];
|
||||
* hcl_oow_t ucslen = 5;
|
||||
* hcl_oow_t bcslen = HCL_COUNTOF(bcs);
|
||||
* n = hcl_conv_uchars_to_utf8 (ucs, &ucslen, bcs, &bcslen);
|
||||
* if (n <= -1)
|
||||
* {
|
||||
* // conversion error
|
||||
* }
|
||||
* \endcode
|
||||
*/
|
||||
HCL_EXPORT int hcl_conv_uchars_to_utf8 (
|
||||
const hcl_uch_t* ucs,
|
||||
hcl_oow_t* ucslen,
|
||||
hcl_bch_t* bcs,
|
||||
hcl_oow_t* bcslen
|
||||
);
|
||||
|
||||
/**
|
||||
* The hcl_conv_utf8_to_uchars() function converts a UTF8 string to a uncide string.
|
||||
*
|
||||
* It never returns -2 if \a ucs is #HCL_NULL.
|
||||
*
|
||||
* \code
|
||||
* const hcl_bch_t* bcs = "test string";
|
||||
* hcl_uch_t ucs[100];
|
||||
* hcl_oow_t ucslen = HCL_COUNTOF(buf), n;
|
||||
* hcl_oow_t bcslen = 11;
|
||||
* int n;
|
||||
* n = hcl_conv_utf8_to_uchars (bcs, &bcslen, ucs, &ucslen);
|
||||
* if (n <= -1) { invalid/incomplenete sequence or buffer to small }
|
||||
* \endcode
|
||||
*
|
||||
* The resulting \a ucslen can still be greater than 0 even if the return
|
||||
* value is negative. The value indiates the number of characters converted
|
||||
* before the error has occurred.
|
||||
*
|
||||
* \return 0 on success.
|
||||
* -1 if \a bcs contains an illegal character.
|
||||
* -2 if the wide-character string buffer is too small.
|
||||
* -3 if \a bcs is not a complete sequence.
|
||||
*/
|
||||
HCL_EXPORT int hcl_conv_utf8_to_uchars (
|
||||
const hcl_bch_t* bcs,
|
||||
hcl_oow_t* bcslen,
|
||||
hcl_uch_t* ucs,
|
||||
hcl_oow_t* ucslen
|
||||
);
|
||||
|
||||
|
||||
HCL_EXPORT int hcl_conv_ucstr_to_utf8 (
|
||||
const hcl_uch_t* ucs,
|
||||
hcl_oow_t* ucslen,
|
||||
hcl_bch_t* bcs,
|
||||
hcl_oow_t* bcslen
|
||||
);
|
||||
|
||||
HCL_EXPORT int hcl_conv_utf8_to_ucstr (
|
||||
const hcl_bch_t* bcs,
|
||||
hcl_oow_t* bcslen,
|
||||
hcl_uch_t* ucs,
|
||||
hcl_oow_t* ucslen
|
||||
);
|
||||
|
||||
HCL_EXPORT hcl_oow_t hcl_uc_to_utf8 (
|
||||
hcl_uch_t uc,
|
||||
hcl_bch_t* utf8,
|
||||
hcl_oow_t size
|
||||
);
|
||||
|
||||
HCL_EXPORT hcl_oow_t hcl_utf8_to_uc (
|
||||
const hcl_bch_t* utf8,
|
||||
hcl_oow_t size,
|
||||
hcl_uch_t* uc
|
||||
);
|
||||
|
||||
HCL_EXPORT int hcl_ucwidth (
|
||||
hcl_uch_t uc
|
||||
);
|
||||
|
||||
/* =========================================================================
|
||||
* TIME CALCULATION WITH OVERFLOW/UNDERFLOW DETECTION
|
||||
* ========================================================================= */
|
||||
|
@ -24,7 +24,6 @@
|
||||
|
||||
#include "hcl-prv.h"
|
||||
|
||||
|
||||
hcl_t* hcl_open (hcl_mmgr_t* mmgr, hcl_oow_t xtnsize, const hcl_vmprim_t* vmprim, hcl_errnum_t* errnum)
|
||||
{
|
||||
hcl_t* hcl;
|
||||
|
@ -23,7 +23,7 @@
|
||||
*/
|
||||
|
||||
#include <hcl-opt.h>
|
||||
#include <hcl-utl.h>
|
||||
#include <hcl-str.h>
|
||||
|
||||
#define BADCH '?'
|
||||
#define BADARG ':'
|
||||
|
@ -22,7 +22,7 @@
|
||||
THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include "hcl-prv.h"
|
||||
#include "hcl-chr.h"
|
||||
|
||||
/* TODO: handle different endians - UTF16BE or UTF16LE */
|
||||
|
||||
|
150
lib/utf8.c
150
lib/utf8.c
@ -22,7 +22,7 @@
|
||||
THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include "hcl-prv.h"
|
||||
#include <hcl-chr.h>
|
||||
|
||||
/*#define RETAIN_RFC2279 1*/
|
||||
|
||||
@ -187,151 +187,3 @@ hcl_oow_t hcl_utf8_to_uc (const hcl_bch_t* utf8, hcl_oow_t size, hcl_uch_t* uc)
|
||||
return 0; /* error - invalid sequence */
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* See http://www.cl.cam.ac.uk/~mgk25/ucs/wcwidth.c
|
||||
*/
|
||||
struct interval
|
||||
{
|
||||
int first;
|
||||
int last;
|
||||
};
|
||||
|
||||
/* auxiliary function for binary search in interval table */
|
||||
static int bisearch(hcl_uch_t ucs, const struct interval *table, int max)
|
||||
{
|
||||
int min = 0;
|
||||
int mid;
|
||||
|
||||
if (ucs < table[0].first || ucs > table[max].last) return 0;
|
||||
while (max >= min)
|
||||
{
|
||||
mid = (min + max) / 2;
|
||||
if (ucs > table[mid].last) min = mid + 1;
|
||||
else if (ucs < table[mid].first) max = mid - 1;
|
||||
else return 1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* The following two functions define the column width of an ISO 10646
|
||||
* character as follows:
|
||||
*
|
||||
* - The null character (U+0000) has a column width of 0.
|
||||
*
|
||||
* - Other C0/C1 control characters and DEL will lead to a return
|
||||
* value of -1.
|
||||
*
|
||||
* - Non-spacing and enclosing combining characters (general
|
||||
* category code Mn or Me in the Unicode database) have a
|
||||
* column width of 0.
|
||||
*
|
||||
* - SOFT HYPHEN (U+00AD) has a column width of 1.
|
||||
*
|
||||
* - Other format characters (general category code Cf in the Unicode
|
||||
* database) and ZERO WIDTH SPACE (U+200B) have a column width of 0.
|
||||
*
|
||||
* - Hangul Jamo medial vowels and final consonants (U+1160-U+11FF)
|
||||
* have a column width of 0.
|
||||
*
|
||||
* - Spacing characters in the East Asian Wide (W) or East Asian
|
||||
* Full-width (F) category as defined in Unicode Technical
|
||||
* Report #11 have a column width of 2.
|
||||
*
|
||||
* - All remaining characters (including all printable
|
||||
* ISO 8859-1 and WGL4 characters, Unicode control characters,
|
||||
* etc.) have a column width of 1.
|
||||
*
|
||||
* This implementation assumes that wchar_t characters are encoded
|
||||
* in ISO 10646.
|
||||
*/
|
||||
|
||||
int hcl_ucwidth (hcl_uch_t uc)
|
||||
{
|
||||
/* sorted list of non-overlapping intervals of non-spacing characters */
|
||||
/* generated by "uniset +cat=Me +cat=Mn +cat=Cf -00AD +1160-11FF +200B c" */
|
||||
static const struct interval combining[] = {
|
||||
{ 0x0300, 0x036F }, { 0x0483, 0x0486 }, { 0x0488, 0x0489 },
|
||||
{ 0x0591, 0x05BD }, { 0x05BF, 0x05BF }, { 0x05C1, 0x05C2 },
|
||||
{ 0x05C4, 0x05C5 }, { 0x05C7, 0x05C7 }, { 0x0600, 0x0603 },
|
||||
{ 0x0610, 0x0615 }, { 0x064B, 0x065E }, { 0x0670, 0x0670 },
|
||||
{ 0x06D6, 0x06E4 }, { 0x06E7, 0x06E8 }, { 0x06EA, 0x06ED },
|
||||
{ 0x070F, 0x070F }, { 0x0711, 0x0711 }, { 0x0730, 0x074A },
|
||||
{ 0x07A6, 0x07B0 }, { 0x07EB, 0x07F3 }, { 0x0901, 0x0902 },
|
||||
{ 0x093C, 0x093C }, { 0x0941, 0x0948 }, { 0x094D, 0x094D },
|
||||
{ 0x0951, 0x0954 }, { 0x0962, 0x0963 }, { 0x0981, 0x0981 },
|
||||
{ 0x09BC, 0x09BC }, { 0x09C1, 0x09C4 }, { 0x09CD, 0x09CD },
|
||||
{ 0x09E2, 0x09E3 }, { 0x0A01, 0x0A02 }, { 0x0A3C, 0x0A3C },
|
||||
{ 0x0A41, 0x0A42 }, { 0x0A47, 0x0A48 }, { 0x0A4B, 0x0A4D },
|
||||
{ 0x0A70, 0x0A71 }, { 0x0A81, 0x0A82 }, { 0x0ABC, 0x0ABC },
|
||||
{ 0x0AC1, 0x0AC5 }, { 0x0AC7, 0x0AC8 }, { 0x0ACD, 0x0ACD },
|
||||
{ 0x0AE2, 0x0AE3 }, { 0x0B01, 0x0B01 }, { 0x0B3C, 0x0B3C },
|
||||
{ 0x0B3F, 0x0B3F }, { 0x0B41, 0x0B43 }, { 0x0B4D, 0x0B4D },
|
||||
{ 0x0B56, 0x0B56 }, { 0x0B82, 0x0B82 }, { 0x0BC0, 0x0BC0 },
|
||||
{ 0x0BCD, 0x0BCD }, { 0x0C3E, 0x0C40 }, { 0x0C46, 0x0C48 },
|
||||
{ 0x0C4A, 0x0C4D }, { 0x0C55, 0x0C56 }, { 0x0CBC, 0x0CBC },
|
||||
{ 0x0CBF, 0x0CBF }, { 0x0CC6, 0x0CC6 }, { 0x0CCC, 0x0CCD },
|
||||
{ 0x0CE2, 0x0CE3 }, { 0x0D41, 0x0D43 }, { 0x0D4D, 0x0D4D },
|
||||
{ 0x0DCA, 0x0DCA }, { 0x0DD2, 0x0DD4 }, { 0x0DD6, 0x0DD6 },
|
||||
{ 0x0E31, 0x0E31 }, { 0x0E34, 0x0E3A }, { 0x0E47, 0x0E4E },
|
||||
{ 0x0EB1, 0x0EB1 }, { 0x0EB4, 0x0EB9 }, { 0x0EBB, 0x0EBC },
|
||||
{ 0x0EC8, 0x0ECD }, { 0x0F18, 0x0F19 }, { 0x0F35, 0x0F35 },
|
||||
{ 0x0F37, 0x0F37 }, { 0x0F39, 0x0F39 }, { 0x0F71, 0x0F7E },
|
||||
{ 0x0F80, 0x0F84 }, { 0x0F86, 0x0F87 }, { 0x0F90, 0x0F97 },
|
||||
{ 0x0F99, 0x0FBC }, { 0x0FC6, 0x0FC6 }, { 0x102D, 0x1030 },
|
||||
{ 0x1032, 0x1032 }, { 0x1036, 0x1037 }, { 0x1039, 0x1039 },
|
||||
{ 0x1058, 0x1059 }, { 0x1160, 0x11FF }, { 0x135F, 0x135F },
|
||||
{ 0x1712, 0x1714 }, { 0x1732, 0x1734 }, { 0x1752, 0x1753 },
|
||||
{ 0x1772, 0x1773 }, { 0x17B4, 0x17B5 }, { 0x17B7, 0x17BD },
|
||||
{ 0x17C6, 0x17C6 }, { 0x17C9, 0x17D3 }, { 0x17DD, 0x17DD },
|
||||
{ 0x180B, 0x180D }, { 0x18A9, 0x18A9 }, { 0x1920, 0x1922 },
|
||||
{ 0x1927, 0x1928 }, { 0x1932, 0x1932 }, { 0x1939, 0x193B },
|
||||
{ 0x1A17, 0x1A18 }, { 0x1B00, 0x1B03 }, { 0x1B34, 0x1B34 },
|
||||
{ 0x1B36, 0x1B3A }, { 0x1B3C, 0x1B3C }, { 0x1B42, 0x1B42 },
|
||||
{ 0x1B6B, 0x1B73 }, { 0x1DC0, 0x1DCA }, { 0x1DFE, 0x1DFF },
|
||||
{ 0x200B, 0x200F }, { 0x202A, 0x202E }, { 0x2060, 0x2063 },
|
||||
{ 0x206A, 0x206F }, { 0x20D0, 0x20EF }, { 0x302A, 0x302F },
|
||||
{ 0x3099, 0x309A }, { 0xA806, 0xA806 }, { 0xA80B, 0xA80B },
|
||||
{ 0xA825, 0xA826 }, { 0xFB1E, 0xFB1E }, { 0xFE00, 0xFE0F },
|
||||
{ 0xFE20, 0xFE23 }, { 0xFEFF, 0xFEFF }, { 0xFFF9, 0xFFFB },
|
||||
{ 0x10A01, 0x10A03 }, { 0x10A05, 0x10A06 }, { 0x10A0C, 0x10A0F },
|
||||
{ 0x10A38, 0x10A3A }, { 0x10A3F, 0x10A3F }, { 0x1D167, 0x1D169 },
|
||||
{ 0x1D173, 0x1D182 }, { 0x1D185, 0x1D18B }, { 0x1D1AA, 0x1D1AD },
|
||||
{ 0x1D242, 0x1D244 }, { 0xE0001, 0xE0001 }, { 0xE0020, 0xE007F },
|
||||
{ 0xE0100, 0xE01EF }
|
||||
};
|
||||
|
||||
/* test for 8-bit control characters */
|
||||
if (uc == 0) return 0;
|
||||
if (uc < 32 || (uc >= 0x7f && uc < 0xa0)) return -1;
|
||||
|
||||
/* binary search in table of non-spacing characters */
|
||||
if (bisearch(uc, combining, sizeof(combining) / sizeof(struct interval) - 1)) return 0;
|
||||
|
||||
/* if we arrive here, uc is not a combining or C0/C1 control character */
|
||||
|
||||
if (uc >= 0x1100)
|
||||
{
|
||||
if (uc <= 0x115f || /* Hangul Jamo init. consonants */
|
||||
uc == 0x2329 || uc == 0x232a ||
|
||||
(uc >= 0x2e80 && uc <= 0xa4cf && uc != 0x303f) || /* CJK ... Yi */
|
||||
(uc >= 0xac00 && uc <= 0xd7a3) || /* Hangul Syllables */
|
||||
(uc >= 0xf900 && uc <= 0xfaff) || /* CJK Compatibility Ideographs */
|
||||
(uc >= 0xfe10 && uc <= 0xfe19) || /* Vertical forms */
|
||||
(uc >= 0xfe30 && uc <= 0xfe6f) || /* CJK Compatibility Forms */
|
||||
(uc >= 0xff00 && uc <= 0xff60) || /* Fullwidth Forms */
|
||||
(uc >= 0xffe0 && uc <= 0xffe6)
|
||||
#if (HCL_SIZEOF_UCH_T > 2)
|
||||
||
|
||||
(uc >= 0x20000 && uc <= 0x2fffd) ||
|
||||
(uc >= 0x30000 && uc <= 0x3fffd)
|
||||
#endif
|
||||
)
|
||||
{
|
||||
return 2;
|
||||
}
|
||||
}
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
143
lib/utl.c
143
lib/utl.c
@ -44,149 +44,6 @@ hcl_oow_t hcl_hash_bytes_ (const hcl_oob_t* ptr, hcl_oow_t len)
|
||||
|
||||
/* ----------------------------------------------------------------------- */
|
||||
|
||||
static hcl_cmgr_t builtin_cmgr[] =
|
||||
{
|
||||
/* keep the order aligned with hcl_cmgr_id_t values in <hcl-utl.h> */
|
||||
{ hcl_utf8_to_uc, hcl_uc_to_utf8 },
|
||||
{ hcl_utf16_to_uc, hcl_uc_to_utf16 },
|
||||
{ hcl_mb8_to_uc, hcl_uc_to_mb8 }
|
||||
};
|
||||
|
||||
hcl_cmgr_t* hcl_get_cmgr_by_id (hcl_cmgr_id_t id)
|
||||
{
|
||||
return &builtin_cmgr[id];
|
||||
}
|
||||
|
||||
static struct
|
||||
{
|
||||
const hcl_bch_t* name;
|
||||
hcl_cmgr_id_t id;
|
||||
} builtin_cmgr_tab[] =
|
||||
{
|
||||
{ "utf8", HCL_CMGR_UTF8 },
|
||||
{ "utf16", HCL_CMGR_UTF16 },
|
||||
{ "mb8", HCL_CMGR_MB8 }
|
||||
};
|
||||
|
||||
|
||||
hcl_cmgr_t* hcl_get_cmgr_by_bcstr (const hcl_bch_t* name)
|
||||
{
|
||||
if (name)
|
||||
{
|
||||
hcl_oow_t i;
|
||||
|
||||
for (i = 0; i < HCL_COUNTOF(builtin_cmgr_tab); i++)
|
||||
{
|
||||
if (hcl_comp_bcstr(name, builtin_cmgr_tab[i].name) == 0)
|
||||
{
|
||||
return &builtin_cmgr[builtin_cmgr_tab[i].id];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return HCL_NULL;
|
||||
}
|
||||
|
||||
hcl_cmgr_t* hcl_get_cmgr_by_ucstr (const hcl_uch_t* name)
|
||||
{
|
||||
if (name)
|
||||
{
|
||||
hcl_oow_t i;
|
||||
|
||||
for (i = 0; i < HCL_COUNTOF(builtin_cmgr_tab); i++)
|
||||
{
|
||||
if (hcl_comp_ucstr_bcstr(name, builtin_cmgr_tab[i].name) == 0)
|
||||
{
|
||||
return &builtin_cmgr[builtin_cmgr_tab[i].id];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return HCL_NULL;
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------- */
|
||||
|
||||
int hcl_conv_utf8_to_uchars (const hcl_bch_t* bcs, hcl_oow_t* bcslen, hcl_uch_t* ucs, hcl_oow_t* ucslen)
|
||||
{
|
||||
/* the source is length bound */
|
||||
return hcl_conv_bchars_to_uchars_with_cmgr(bcs, bcslen, ucs, ucslen, &builtin_cmgr[HCL_CMGR_UTF8], 0);
|
||||
}
|
||||
|
||||
int hcl_conv_uchars_to_utf8 (const hcl_uch_t* ucs, hcl_oow_t* ucslen, hcl_bch_t* bcs, hcl_oow_t* bcslen)
|
||||
{
|
||||
/* length bound */
|
||||
return hcl_conv_uchars_to_bchars_with_cmgr(ucs, ucslen, bcs, bcslen, &builtin_cmgr[HCL_CMGR_UTF8]);
|
||||
}
|
||||
|
||||
int hcl_conv_utf8_to_ucstr (const hcl_bch_t* bcs, hcl_oow_t* bcslen, hcl_uch_t* ucs, hcl_oow_t* ucslen)
|
||||
{
|
||||
/* null-terminated. */
|
||||
return hcl_conv_bcstr_to_ucstr_with_cmgr(bcs, bcslen, ucs, ucslen, &builtin_cmgr[HCL_CMGR_UTF8], 0);
|
||||
}
|
||||
|
||||
int hcl_conv_ucstr_to_utf8 (const hcl_uch_t* ucs, hcl_oow_t* ucslen, hcl_bch_t* bcs, hcl_oow_t* bcslen)
|
||||
{
|
||||
/* null-terminated */
|
||||
return hcl_conv_ucstr_to_bcstr_with_cmgr(ucs, ucslen, bcs, bcslen, &builtin_cmgr[HCL_CMGR_UTF8]);
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------- */
|
||||
|
||||
int hcl_conv_utf16_to_uchars (const hcl_bch_t* bcs, hcl_oow_t* bcslen, hcl_uch_t* ucs, hcl_oow_t* ucslen)
|
||||
{
|
||||
/* the source is length bound */
|
||||
return hcl_conv_bchars_to_uchars_with_cmgr(bcs, bcslen, ucs, ucslen, &builtin_cmgr[HCL_CMGR_UTF16], 0);
|
||||
}
|
||||
|
||||
int hcl_conv_uchars_to_utf16 (const hcl_uch_t* ucs, hcl_oow_t* ucslen, hcl_bch_t* bcs, hcl_oow_t* bcslen)
|
||||
{
|
||||
/* length bound */
|
||||
return hcl_conv_uchars_to_bchars_with_cmgr(ucs, ucslen, bcs, bcslen, &builtin_cmgr[HCL_CMGR_UTF16]);
|
||||
}
|
||||
|
||||
int hcl_conv_utf16_to_ucstr (const hcl_bch_t* bcs, hcl_oow_t* bcslen, hcl_uch_t* ucs, hcl_oow_t* ucslen)
|
||||
{
|
||||
/* null-terminated. */
|
||||
return hcl_conv_bcstr_to_ucstr_with_cmgr(bcs, bcslen, ucs, ucslen, &builtin_cmgr[HCL_CMGR_UTF16], 0);
|
||||
}
|
||||
|
||||
int hcl_conv_ucstr_to_utf16 (const hcl_uch_t* ucs, hcl_oow_t* ucslen, hcl_bch_t* bcs, hcl_oow_t* bcslen)
|
||||
{
|
||||
/* null-terminated */
|
||||
return hcl_conv_ucstr_to_bcstr_with_cmgr(ucs, ucslen, bcs, bcslen, &builtin_cmgr[HCL_CMGR_UTF16]);
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------- */
|
||||
|
||||
int hcl_conv_mb8_to_uchars (const hcl_bch_t* bcs, hcl_oow_t* bcslen, hcl_uch_t* ucs, hcl_oow_t* ucslen)
|
||||
{
|
||||
/* the source is length bound */
|
||||
return hcl_conv_bchars_to_uchars_with_cmgr(bcs, bcslen, ucs, ucslen, &builtin_cmgr[HCL_CMGR_MB8], 0);
|
||||
}
|
||||
|
||||
int hcl_conv_uchars_to_mb8 (const hcl_uch_t* ucs, hcl_oow_t* ucslen, hcl_bch_t* bcs, hcl_oow_t* bcslen)
|
||||
{
|
||||
/* length bound */
|
||||
return hcl_conv_uchars_to_bchars_with_cmgr(ucs, ucslen, bcs, bcslen, &builtin_cmgr[HCL_CMGR_MB8]);
|
||||
}
|
||||
|
||||
int hcl_conv_mb8_to_ucstr (const hcl_bch_t* bcs, hcl_oow_t* bcslen, hcl_uch_t* ucs, hcl_oow_t* ucslen)
|
||||
{
|
||||
/* null-terminated. */
|
||||
return hcl_conv_bcstr_to_ucstr_with_cmgr(bcs, bcslen, ucs, ucslen, &builtin_cmgr[HCL_CMGR_MB8], 0);
|
||||
}
|
||||
|
||||
int hcl_conv_ucstr_to_mb8 (const hcl_uch_t* ucs, hcl_oow_t* ucslen, hcl_bch_t* bcs, hcl_oow_t* bcslen)
|
||||
{
|
||||
/* null-terminated */
|
||||
return hcl_conv_ucstr_to_bcstr_with_cmgr(ucs, ucslen, bcs, bcslen, &builtin_cmgr[HCL_CMGR_MB8]);
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------- */
|
||||
|
||||
/* ----------------------------------------------------------------------- */
|
||||
|
||||
int hcl_convbtouchars (hcl_t* hcl, const hcl_bch_t* bcs, hcl_oow_t* bcslen, hcl_uch_t* ucs, hcl_oow_t* ucslen)
|
||||
{
|
||||
/* length bound */
|
||||
|
Loading…
x
Reference in New Issue
Block a user