added the mb8 cmgr

This commit is contained in:
hyunghwan.chung 2019-05-07 17:29:48 +00:00
parent bbf0f6acd1
commit a3ca71a2b8
6 changed files with 141 additions and 10 deletions

View File

@ -79,6 +79,7 @@ libmoo_la_SOURCES = \
fmtoutv.h \
gc.c \
heap.c \
mb8.c \
moo.c \
number.c \
obj.c \

View File

@ -170,7 +170,7 @@ am__DEPENDENCIES_5 = $(am__DEPENDENCIES_1) $(am__DEPENDENCIES_2) \
am_libmoo_la_OBJECTS = libmoo_la-bigint.lo libmoo_la-comp.lo \
libmoo_la-debug.lo libmoo_la-decode.lo libmoo_la-dic.lo \
libmoo_la-err.lo libmoo_la-exec.lo libmoo_la-fmtout.lo \
libmoo_la-gc.lo libmoo_la-heap.lo libmoo_la-moo.lo \
libmoo_la-gc.lo libmoo_la-heap.lo libmoo_la-mb8.lo libmoo_la-moo.lo \
libmoo_la-number.lo libmoo_la-obj.lo libmoo_la-opt.lo \
libmoo_la-pf-basic.lo libmoo_la-pf-sys.lo libmoo_la-pf-utf8.lo \
libmoo_la-rbt.lo libmoo_la-sym.lo libmoo_la-utf8.lo \
@ -470,6 +470,7 @@ libmoo_la_SOURCES = \
fmtoutv.h \
gc.c \
heap.c \
mb8.c \
moo.c \
number.c \
obj.c \
@ -656,6 +657,7 @@ distclean-compile:
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libmoo_la-fmtout.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libmoo_la-gc.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libmoo_la-heap.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libmoo_la-mb8.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libmoo_la-moo.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libmoo_la-number.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libmoo_la-obj.Plo@am__quote@
@ -766,6 +768,13 @@ libmoo_la-heap.lo: heap.c
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libmoo_la_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o libmoo_la-heap.lo `test -f 'heap.c' || echo '$(srcdir)/'`heap.c
libmoo_la-mb8.lo: mb8.c
@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libmoo_la_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT libmoo_la-mb8.lo -MD -MP -MF $(DEPDIR)/libmoo_la-mb8.Tpo -c -o libmoo_la-mb8.lo `test -f 'mb8.c' || echo '$(srcdir)/'`mb8.c
@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/libmoo_la-mb8.Tpo $(DEPDIR)/libmoo_la-mb8.Plo
@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='mb8.c' object='libmoo_la-mb8.lo' libtool=yes @AMDEPBACKSLASH@
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libmoo_la_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o libmoo_la-mb8.lo `test -f 'mb8.c' || echo '$(srcdir)/'`mb8.c
libmoo_la-moo.lo: moo.c
@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libmoo_la_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT libmoo_la-moo.lo -MD -MP -MF $(DEPDIR)/libmoo_la-moo.Tpo -c -o libmoo_la-moo.lo `test -f 'moo.c' || echo '$(srcdir)/'`moo.c
@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/libmoo_la-moo.Tpo $(DEPDIR)/libmoo_la-moo.Plo

View File

@ -160,9 +160,25 @@ static int fmtoutv (moo_t* moo, const fmtchar_t* fmt, moo_fmtout_data_t* data, v
while (checkpoint < fmt - 1)
{
moo_oow_t cvlen, bclen;
moo_cmgr_t* cmgr;
bclen = fmt - checkpoint - 1;
cvlen = moo->cmgr->bctouc(checkpoint, bclen, &ch);
if (cvlen == 0 || cvlen > bclen) goto oops;
cmgr = moo->cmgr;
cvlen = cmgr->bctouc(checkpoint, bclen, &ch);
if (cvlen == 0 || cvlen > bclen)
{
/* conversion error. just emit the byte as it is. */
#if defined(FMTOUT_STRICT)
goto oops;
#else
cmgr = moo_get_utf8_cmgr();
if (cmgr == moo->cmgr || (cvlen = cmgr->bctouc(checkpoint, bclen, &ch)) == 0 || cvlen > bclen)
{
cmgr = moo_get_mb8_cmgr();
if (cmgr == moo->cmgr || (cvlen = cmgr->bctouc(checkpoint, bclen, &ch)) == 0 || cvlen > bclen) goto oops;
}
#endif
}
checkpoint += cvlen;
PUT_OOCH (ch, 1);
}
@ -462,7 +478,8 @@ static int fmtoutv (moo_t* moo, const fmtchar_t* fmt, moo_fmtout_data_t* data, v
case 's':
{
const moo_bch_t* bsp;
moo_oow_t bslen, slen;
moo_oow_t bslen, obslen, slen;
moo_cmgr_t* cmgr;
/* zeropad must not take effect for 'S' */
if (flagc & FLAGC_ZEROPAD) padc = ' ';
@ -479,14 +496,31 @@ static int fmtoutv (moo_t* moo, const fmtchar_t* fmt, moo_fmtout_data_t* data, v
/* get the length */
if (flagc & FLAGC_DOT)
{
for (bslen = 0; bslen < precision && bsp[bslen]; bslen++);
for (obslen = 0; obslen < precision && bsp[obslen]; obslen++);
}
else
{
for (bslen = 0; bsp[bslen]; bslen++);
for (obslen = 0; bsp[obslen]; obslen++);
}
if (moo_conv_bchars_to_uchars_with_cmgr(bsp, &bslen, MOO_NULL, &slen, moo->cmgr, 0) <= -1) goto oops;
/* get the required length for successful conversion in a fail-safe manner */
cmgr = moo->cmgr;
bslen = obslen;
if (moo_conv_bchars_to_uchars_with_cmgr(bsp, &bslen, MOO_NULL, &slen, cmgr, 0) <= -1)
{
#if defined(FMTOUT_STRICT)
goto oops;
#else
cmgr = moo_get_utf8_cmgr();
bslen = obslen;
if (cmgr == moo->cmgr || moo_conv_bchars_to_uchars_with_cmgr(bsp, &bslen, MOO_NULL, &slen, cmgr, 0) <= -1)
{
cmgr = moo_get_mb8_cmgr();
bslen = obslen;
if (cmgr == moo->cmgr || moo_conv_bchars_to_uchars_with_cmgr(bsp, &bslen, MOO_NULL, &slen, cmgr, 0) <= -1) goto oops;
}
#endif
}
/* slen holds the length after conversion */
n = slen;
@ -506,7 +540,7 @@ static int fmtoutv (moo_t* moo, const fmtchar_t* fmt, moo_fmtout_data_t* data, v
conv_len = MOO_COUNTOF(conv_buf);
/* this must not fail since the dry-run above was successful */
moo_conv_bchars_to_uchars_with_cmgr(&bsp[tot_len], &src_len, conv_buf, &conv_len, moo->cmgr, 0);
moo_conv_bchars_to_uchars_with_cmgr(&bsp[tot_len], &src_len, conv_buf, &conv_len, cmgr, 0);
tot_len += src_len;
if (conv_len > n) conv_len = n;

View File

@ -736,6 +736,53 @@ MOO_EXPORT moo_oow_t moo_utf16_to_uc (
moo_uch_t* uc
);
/* ------------------------------------------------------------------------- */
MOO_EXPORT moo_cmgr_t* moo_get_mb8_cmgr (
void
);
MOO_EXPORT int moo_conv_uchars_to_mb8 (
const moo_uch_t* ucs,
moo_oow_t* ucslen,
moo_bch_t* bcs,
moo_oow_t* bcslen
);
MOO_EXPORT int moo_conv_mb8_to_uchars (
const moo_bch_t* bcs,
moo_oow_t* bcslen,
moo_uch_t* ucs,
moo_oow_t* ucslen
);
MOO_EXPORT int moo_conv_ucstr_to_mb8 (
const moo_uch_t* ucs,
moo_oow_t* ucslen,
moo_bch_t* bcs,
moo_oow_t* bcslen
);
MOO_EXPORT int moo_conv_mb8_to_ucstr (
const moo_bch_t* bcs,
moo_oow_t* bcslen,
moo_uch_t* ucs,
moo_oow_t* ucslen
);
MOO_EXPORT moo_oow_t moo_uc_to_mb8 (
moo_uch_t uc,
moo_bch_t* mb8,
moo_oow_t size
);
MOO_EXPORT moo_oow_t moo_mb8_to_uc (
const moo_bch_t* mb8,
moo_oow_t size,
moo_uch_t* uc
);
/* =========================================================================
* BIT SWAP
* ========================================================================= */

View File

@ -573,8 +573,11 @@ static MOO_INLINE moo_ooi_t read_input (moo_t* moo, moo_ioarg_t* arg)
#if defined(MOO_OOCH_IS_UCH)
bcslen = bb->len;
ucslen = MOO_COUNTOF(arg->buf);
/* TODO: use the default cmgr first.
* then fallback to utf8, mb8, etc */
x = moo_convbtooochars(moo, bb->buf, &bcslen, arg->buf, &ucslen);
if (x <= -1 && ucslen <= 0) return -1;
if (x <= -1 /*&& ucslen <= 0 */) return -1;
/* if ucslen is greater than 0, i see that some characters have been
* converted properly */
#else

View File

@ -808,6 +808,43 @@ int moo_conv_ucstr_to_utf16 (const moo_uch_t* ucs, moo_oow_t* ucslen, moo_bch_t*
/* ----------------------------------------------------------------------- */
static moo_cmgr_t mb8_cmgr =
{
moo_mb8_to_uc,
moo_uc_to_mb8
};
moo_cmgr_t* moo_get_mb8_cmgr (void)
{
return &mb8_cmgr;
}
int moo_conv_mb8_to_uchars (const moo_bch_t* bcs, moo_oow_t* bcslen, moo_uch_t* ucs, moo_oow_t* ucslen)
{
/* the source is length bound */
return moo_conv_bchars_to_uchars_with_cmgr(bcs, bcslen, ucs, ucslen, &mb8_cmgr, 0);
}
int moo_conv_uchars_to_mb8 (const moo_uch_t* ucs, moo_oow_t* ucslen, moo_bch_t* bcs, moo_oow_t* bcslen)
{
/* length bound */
return moo_conv_uchars_to_bchars_with_cmgr(ucs, ucslen, bcs, bcslen, &mb8_cmgr);
}
int moo_conv_mb8_to_ucstr (const moo_bch_t* bcs, moo_oow_t* bcslen, moo_uch_t* ucs, moo_oow_t* ucslen)
{
/* null-terminated. */
return moo_conv_bcstr_to_ucstr_with_cmgr(bcs, bcslen, ucs, ucslen, &mb8_cmgr, 0);
}
int moo_conv_ucstr_to_mb8 (const moo_uch_t* ucs, moo_oow_t* ucslen, moo_bch_t* bcs, moo_oow_t* bcslen)
{
/* null-terminated */
return moo_conv_ucstr_to_bcstr_with_cmgr(ucs, ucslen, bcs, bcslen, &mb8_cmgr);
}
/* ----------------------------------------------------------------------- */
int moo_convbtouchars (moo_t* moo, const moo_bch_t* bcs, moo_oow_t* bcslen, moo_uch_t* ucs, moo_oow_t* ucslen)
{
/* length bound */