did some mbwc makeover

This commit is contained in:
2011-12-31 15:24:48 +00:00
parent a0fc992c65
commit d1883d2a72
20 changed files with 501 additions and 683 deletions

View File

@ -20,6 +20,7 @@
#include <qse/awk/StdAwk.hpp>
#include <qse/cmn/str.h>
#include <qse/cmn/mbwc.h>
#include <qse/cmn/time.h>
#include <qse/cmn/fio.h>
#include <qse/cmn/pio.h>

View File

@ -23,6 +23,7 @@
#include <qse/cmn/sio.h>
#include <qse/cmn/pio.h>
#include <qse/cmn/str.h>
#include <qse/cmn/mbwc.h>
#include <qse/cmn/time.h>
#include <qse/cmn/path.h>
#include <qse/cmn/stdio.h> /* TODO: remove dependency on qse_vsprintf */

View File

@ -25,7 +25,6 @@ libqsecmn_la_SOURCES = \
alg-sort.c \
assert.c \
chr.c \
chr-cnv.c \
dll.c \
env.c \
gdl.c \
@ -38,6 +37,9 @@ libqsecmn_la_SOURCES = \
fs-err.c \
fs-move.c \
main.c \
mbwc.c \
mbwc-chr.c \
mbwc-str.c \
mem.c \
oht.c \
opt.c \
@ -66,7 +68,6 @@ libqsecmn_la_SOURCES = \
str-fnmat.c \
str-incl.c \
str-len.c \
str-mbwc.c \
str-pac.c \
str-pbrk.c \
str-put.c \

View File

@ -76,19 +76,19 @@ am__installdirs = "$(DESTDIR)$(libdir)"
LTLIBRARIES = $(lib_LTLIBRARIES)
libqsecmn_la_DEPENDENCIES =
am_libqsecmn_la_OBJECTS = alg-rand.lo alg-search.lo alg-sort.lo \
assert.lo chr.lo chr-cnv.lo dll.lo env.lo gdl.lo htb.lo lda.lo \
fio.lo fma.lo fmt.lo fs.lo fs-err.lo fs-move.lo main.lo mem.lo \
oht.lo opt.lo path-basename.lo path-canon.lo pio.lo pma.lo \
rbt.lo rex.lo sio.lo sll.lo stdio.lo str-beg.lo str-cat.lo \
str-chr.lo str-cnv.lo str-cmp.lo str-cpy.lo str-del.lo \
str-dup.lo str-dynm.lo str-dynw.lo str-end.lo str-excl.lo \
str-fcpy.lo str-fnmat.lo str-incl.lo str-len.lo str-mbwc.lo \
str-pac.lo str-pbrk.lo str-put.lo str-rev.lo str-rot.lo \
str-set.lo str-spl.lo str-spn.lo str-str.lo str-subst.lo \
str-tok.lo str-trm.lo str-word.lo time.lo tio.lo tio-get.lo \
tio-put.lo tre.lo tre-ast.lo tre-compile.lo \
tre-match-backtrack.lo tre-match-parallel.lo tre-parse.lo \
tre-stack.lo utf8.lo xma.lo
assert.lo chr.lo dll.lo env.lo gdl.lo htb.lo lda.lo fio.lo \
fma.lo fmt.lo fs.lo fs-err.lo fs-move.lo main.lo mbwc.lo \
mbwc-chr.lo mbwc-str.lo mem.lo oht.lo opt.lo path-basename.lo \
path-canon.lo pio.lo pma.lo rbt.lo rex.lo sio.lo sll.lo \
stdio.lo str-beg.lo str-cat.lo str-chr.lo str-cnv.lo \
str-cmp.lo str-cpy.lo str-del.lo str-dup.lo str-dynm.lo \
str-dynw.lo str-end.lo str-excl.lo str-fcpy.lo str-fnmat.lo \
str-incl.lo str-len.lo str-pac.lo str-pbrk.lo str-put.lo \
str-rev.lo str-rot.lo str-set.lo str-spl.lo str-spn.lo \
str-str.lo str-subst.lo str-tok.lo str-trm.lo str-word.lo \
time.lo tio.lo tio-get.lo tio-put.lo tre.lo tre-ast.lo \
tre-compile.lo tre-match-backtrack.lo tre-match-parallel.lo \
tre-parse.lo tre-stack.lo utf8.lo xma.lo
libqsecmn_la_OBJECTS = $(am_libqsecmn_la_OBJECTS)
libqsecmn_la_LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) \
$(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \
@ -295,7 +295,6 @@ libqsecmn_la_SOURCES = \
alg-sort.c \
assert.c \
chr.c \
chr-cnv.c \
dll.c \
env.c \
gdl.c \
@ -308,6 +307,9 @@ libqsecmn_la_SOURCES = \
fs-err.c \
fs-move.c \
main.c \
mbwc.c \
mbwc-chr.c \
mbwc-str.c \
mem.c \
oht.c \
opt.c \
@ -336,7 +338,6 @@ libqsecmn_la_SOURCES = \
str-fnmat.c \
str-incl.c \
str-len.c \
str-mbwc.c \
str-pac.c \
str-pbrk.c \
str-put.c \
@ -452,7 +453,6 @@ distclean-compile:
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/alg-search.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/alg-sort.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/assert.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/chr-cnv.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/chr.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/dll.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/env.Plo@am__quote@
@ -466,6 +466,9 @@ distclean-compile:
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/htb.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/lda.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/main.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/mbwc-chr.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/mbwc-str.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/mbwc.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/mem.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/oht.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/opt.Plo@am__quote@
@ -494,7 +497,6 @@ distclean-compile:
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/str-fnmat.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/str-incl.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/str-len.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/str-mbwc.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/str-pac.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/str-pbrk.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/str-put.Plo@am__quote@

View File

@ -21,6 +21,7 @@
#include <qse/cmn/env.h>
#include <qse/cmn/str.h>
#include <qse/cmn/mbwc.h>
#include "mem.h"
#if defined(_WIN32)

View File

@ -20,6 +20,7 @@
#include <qse/cmn/fio.h>
#include <qse/cmn/str.h>
#include <qse/cmn/mbwc.h>
#include <qse/cmn/fmt.h>
#include <qse/cmn/alg.h>
#include <qse/cmn/time.h>
@ -100,7 +101,9 @@ int qse_fio_init (
);
temp_no = 0;
for (temp_ptr = path; *temp_ptr; temp_ptr++)
/* if QSE_FIO_TEMPORARY is used, the path name must
* be writable. */
for (temp_ptr = (qse_char_t*)path; *temp_ptr; temp_ptr++)
temp_no += *temp_ptr;
/* The path name template must be at least 4 characters long

View File

@ -19,7 +19,7 @@
*/
#include "fs.h"
#include <qse/cmn/str.h>
#include <qse/cmn/mbwc.h>
#include <qse/cmn/path.h>
#include "mem.h"

View File

@ -20,6 +20,7 @@
#include "fs.h"
#include <qse/cmn/str.h>
#include <qse/cmn/mbwc.h>
#include <qse/cmn/path.h>
#include "mem.h"

View File

@ -18,7 +18,7 @@
License along with QSE. If not, see <http://www.gnu.org/licenses/>.
*/
#include <qse/cmn/chr.h>
#include <qse/cmn/mbwc.h>
#include <qse/cmn/utf8.h>
#include "mem.h"

View File

@ -1,5 +1,5 @@
/*
* $Id: str-cnv.c 556 2011-08-31 15:43:46Z hyunghwan.chung $
* $Id$
*
Copyright 2006-2011 Chung, Hyung-Hwan.
This file is part of QSE.
@ -18,7 +18,7 @@
License along with QSE. If not, see <http://www.gnu.org/licenses/>.
*/
#include <qse/cmn/str.h>
#include <qse/cmn/mbwc.h>
#include "mem.h"
int qse_mbstowcswithcmgr (

102
qse/lib/cmn/mbwc.c Normal file
View File

@ -0,0 +1,102 @@
/*
* $Id$
*
Copyright 2006-2011 Chung, Hyung-Hwan.
This file is part of QSE.
QSE is free software: you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License as
published by the Free Software Foundation, either version 3 of
the License, or (at your option) any later version.
QSE is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with QSE. If not, see <http://www.gnu.org/licenses/>.
*/
#include <qse/cmn/mbwc.h>
#include <qse/cmn/utf8.h>
static qse_cmgr_t builtin_cmgr =
{
qse_utf8touc,
qse_uctoutf8
};
static qse_cmgr_t* dfl_cmgr = &builtin_cmgr;
qse_cmgr_t* qse_getdflcmgr (void)
{
return dfl_cmgr;
}
void qse_setdflcmgr (qse_cmgr_t* cmgr)
{
dfl_cmgr = (cmgr? cmgr: &builtin_cmgr);
}
/* string conversion function using default character conversion manager */
int qse_mbstowcs (
const qse_mchar_t* mbs, qse_size_t* mbslen,
qse_wchar_t* wcs, qse_size_t* wcslen)
{
return qse_mbstowcswithcmgr (mbs, mbslen, wcs, wcslen, dfl_cmgr);
}
int qse_mbsntowcsn (
const qse_mchar_t* mbs, qse_size_t* mbslen,
qse_wchar_t* wcs, qse_size_t* wcslen)
{
return qse_mbsntowcsnwithcmgr (mbs, mbslen, wcs, wcslen, dfl_cmgr);
}
int qse_mbsntowcsnupto (
const qse_mchar_t* mbs, qse_size_t* mbslen,
qse_wchar_t* wcs, qse_size_t* wcslen, qse_wchar_t stopper)
{
return qse_mbsntowcsnuptowithcmgr (mbs, mbslen, wcs, wcslen, stopper, dfl_cmgr);
}
qse_wchar_t* qse_mbstowcsdup (const qse_mchar_t* mbs, qse_mmgr_t* mmgr)
{
return qse_mbstowcsdupwithcmgr (mbs, mmgr, dfl_cmgr);
}
qse_wchar_t* qse_mbsatowcsdup (const qse_mchar_t* mbs[], qse_mmgr_t* mmgr)
{
return qse_mbsatowcsdupwithcmgr (mbs, mmgr, dfl_cmgr);
}
int qse_wcstombs (
const qse_wchar_t* wcs, qse_size_t* wcslen,
qse_mchar_t* mbs, qse_size_t* mbslen)
{
return qse_wcstombswithcmgr (wcs, wcslen, mbs, mbslen, dfl_cmgr);
}
int qse_wcsntombsn (
const qse_wchar_t* wcs, qse_size_t* wcslen,
qse_mchar_t* mbs, qse_size_t* mbslen)
{
return qse_wcsntombsnwithcmgr (wcs, wcslen, mbs, mbslen, dfl_cmgr);
}
qse_mchar_t* qse_wcstombsdup (const qse_wchar_t* wcs, qse_mmgr_t* mmgr)
{
return qse_wcstombsdupwithcmgr (wcs, mmgr, dfl_cmgr);
}
qse_mchar_t* qse_wcsatombsdup (const qse_wchar_t* wcs[], qse_mmgr_t* mmgr)
{
return qse_wcsatombsdupwithcmgr (wcs, mmgr, dfl_cmgr);
}

View File

@ -468,7 +468,7 @@ static qse_mmgr_t builtin_mmgr =
static qse_mmgr_t* dfl_mmgr = &builtin_mmgr;
qse_mmgr_t* qse_getdflmmgr ()
qse_mmgr_t* qse_getdflmmgr (void)
{
return dfl_mmgr;
}

View File

@ -19,7 +19,7 @@
*/
#include <qse/cmn/pio.h>
#include <qse/cmn/str.h>
#include <qse/cmn/mbwc.h>
#include "mem.h"
#if defined(_WIN32)

View File

@ -20,7 +20,7 @@
#include <qse/cmn/stdio.h>
#include <qse/cmn/chr.h>
#include <qse/cmn/str.h>
#include <qse/cmn/mbwc.h>
#include "mem.h"
#include <wchar.h>

View File

@ -134,427 +134,6 @@ qse_ulong_t qse_strxtoulong (const qse_char_t* str, qse_size_t len)
return v;
}
/*
* TODO: fix wrong mbstate handling
*/
int qse_mbstowcs (
const qse_mchar_t* mbs, qse_size_t* mbslen,
qse_wchar_t* wcs, qse_size_t* wcslen)
{
const qse_mchar_t* mp;
qse_size_t mlen, wlen;
int n;
for (mp = mbs; *mp != QSE_MT('\0'); mp++);
mlen = mp - mbs; wlen = *wcslen;
n = qse_mbsntowcsn (mbs, &mlen, wcs, &wlen);
if (wcs)
{
if (wlen < *wcslen) wcs[wlen] = QSE_WT('\0');
else n = -2; /* buffer too small */
}
*mbslen = mlen; *wcslen = wlen;
return n;
}
int qse_mbsntowcsn (
const qse_mchar_t* mbs, qse_size_t* mbslen,
qse_wchar_t* wcs, qse_size_t* wcslen)
{
const qse_mchar_t* p;
qse_mbstate_t state = {{ 0, }};
int ret = 0;
qse_size_t mlen;
if (wcs)
{
qse_wchar_t* q, * qend;
p = mbs;
q = wcs;
qend = wcs + *wcslen;
mlen = *mbslen;
while (mlen > 0)
{
qse_size_t n;
if (q >= qend)
{
/* buffer too small */
ret = -2;
break;
}
n = qse_mbrtowc (p, mlen, q, &state);
if (n == 0)
{
/* invalid sequence */
ret = -1;
break;
}
if (n > mlen)
{
/* incomplete sequence */
ret = -3;
break;
}
q++;
p += n;
mlen -= n;
}
*wcslen = q - wcs;
*mbslen = p - mbs;
}
else
{
qse_wchar_t w;
qse_size_t wlen = 0;
p = mbs;
mlen = *mbslen;
while (mlen > 0)
{
qse_size_t n;
n = qse_mbrtowc (p, mlen, &w, &state);
if (n == 0)
{
/* invalid sequence */
ret = -1;
break;
}
if (n > mlen)
{
/* incomplete sequence */
ret = -3;
break;
}
p += n;
mlen -= n;
wlen += 1;
}
*wcslen = wlen;
*mbslen = p - mbs;
}
return ret;
}
int qse_mbsntowcsnupto (
const qse_mchar_t* mbs, qse_size_t* mbslen,
qse_wchar_t* wcs, qse_size_t* wcslen, qse_wchar_t stopper)
{
const qse_mchar_t* p;
qse_mbstate_t state = {{ 0, }};
int ret = 0;
qse_size_t mlen;
qse_wchar_t w;
qse_size_t wlen = 0;
qse_wchar_t* wend;
p = mbs;
mlen = *mbslen;
if (wcs) wend = wcs + *wcslen;
/* since it needs to break when a stopper is met,
* i can't perform bulky conversion using the buffer
* provided. so conversion is conducted character by
* character */
while (mlen > 0)
{
qse_size_t n;
n = qse_mbrtowc (p, mlen, &w, &state);
if (n == 0)
{
/* invalid sequence */
ret = -1;
break;
}
if (n > mlen)
{
/* incomplete sequence */
ret = -3;
break;
}
if (wcs)
{
if (wcs >= wend) break;
*wcs++ = w;
}
p += n;
mlen -= n;
wlen += 1;
if (w == stopper) break;
}
*wcslen = wlen;
*mbslen = p - mbs;
return ret;
}
qse_wchar_t* qse_mbstowcsdup (const qse_mchar_t* mbs, qse_mmgr_t* mmgr)
{
qse_size_t mbslen, wcslen;
qse_wchar_t* wcs;
if (qse_mbstowcs (mbs, &mbslen, QSE_NULL, &wcslen) <= -1) return QSE_NULL;
wcslen++; /* for terminating null */
wcs = QSE_MMGR_ALLOC (mmgr, wcslen * QSE_SIZEOF(*wcs));
if (wcs == QSE_NULL) return QSE_NULL;
qse_mbstowcs (mbs, &mbslen, wcs, &wcslen);
return wcs;
}
qse_wchar_t* qse_mbsatowcsdup (const qse_mchar_t* mbs[], qse_mmgr_t* mmgr)
{
qse_wchar_t* buf, * ptr;
qse_size_t i;
qse_size_t capa = 0;
qse_size_t wl, ml;
QSE_ASSERT (mmgr != QSE_NULL);
for (i = 0; mbs[i]; i++)
{
if (qse_mbstowcs(mbs[i], &ml, QSE_NULL, &wl) <= -1) return QSE_NULL;
capa += wl;
}
buf = (qse_wchar_t*) QSE_MMGR_ALLOC (
mmgr, (capa + 1) * QSE_SIZEOF(*buf));
if (buf == QSE_NULL) return QSE_NULL;
ptr = buf;
for (i = 0; mbs[i]; i++)
{
wl = capa + 1;
qse_mbstowcs (mbs[i], &ml, ptr, &wl);
ptr += wl;
capa -= wl;
}
return buf;
}
int qse_wcstombs (
const qse_wchar_t* wcs, qse_size_t* wcslen,
qse_mchar_t* mbs, qse_size_t* mbslen)
{
const qse_wchar_t* p = wcs;
qse_mbstate_t state = {{ 0, }};
int ret = 0;
if (mbs)
{
qse_size_t rem = *mbslen;
while (*p != QSE_WT('\0'))
{
qse_size_t n;
if (rem <= 0)
{
ret = -2;
break;
}
n = qse_wcrtomb (*p, mbs, rem, &state);
if (n == 0)
{
ret = -1;
break; /* illegal character */
}
if (n > rem)
{
ret = -2;
break; /* buffer too small */
}
mbs += n; rem -= n; p++;
}
/* update mbslen to the length of the mbs string converted excluding
* terminating null */
*mbslen -= rem;
/* null-terminate the multibyte sequence if it has sufficient space */
if (rem > 0) *mbs = QSE_MT('\0');
else
{
/* if ret is -2 and wcs[wcslen] == QSE_WT('\0'),
* this means that the mbs buffer was lacking one
* slot for the terminating null */
ret = -2; /* buffer too small */
}
}
else
{
qse_mchar_t mbsbuf[QSE_MBLEN_MAX];
qse_size_t mlen = 0;
while (*p != QSE_WT('\0'))
{
qse_size_t n;
n = qse_wcrtomb (*p, mbsbuf, QSE_COUNTOF(mbsbuf), &state);
if (n == 0)
{
ret = -1;
break; /* illegal character */
}
/* it assumes that mbs is large enough to hold a character */
QSE_ASSERT (n <= QSE_COUNTOF(mbs));
p++; mlen += n;
}
/* this length holds the number of resulting multi-byte characters
* excluding the terminating null character */
*mbslen = mlen;
}
*wcslen = p - wcs; /* the number of wide characters handled. */
return ret;
}
int qse_wcsntombsn (
const qse_wchar_t* wcs, qse_size_t* wcslen,
qse_mchar_t* mbs, qse_size_t* mbslen)
{
const qse_wchar_t* p = wcs;
const qse_wchar_t* end = wcs + *wcslen;
qse_mbstate_t state = {{ 0, }};
int ret = 0;
if (mbs)
{
qse_size_t rem = *mbslen;
while (p < end)
{
qse_size_t n;
if (rem <= 0)
{
ret = -2; /* buffer too small */
break;
}
n = qse_wcrtomb (*p, mbs, rem, &state);
if (n == 0)
{
ret = -1;
break; /* illegal character */
}
if (n > rem)
{
ret = -2; /* buffer too small */
break;
}
mbs += n; rem -= n; p++;
}
*mbslen -= rem;
}
else
{
qse_mchar_t mbsbuf[QSE_MBLEN_MAX];
qse_size_t mlen = 0;
while (p < end)
{
qse_size_t n;
n = qse_wcrtomb (*p, mbsbuf, QSE_COUNTOF(mbsbuf), &state);
if (n == 0)
{
ret = -1;
break; /* illegal character */
}
/* it assumes that mbs is large enough to hold a character */
QSE_ASSERT (n <= QSE_COUNTOF(mbsbuf));
p++; mlen += n;
}
/* this length excludes the terminating null character.
* this function doesn't event null-terminate the result. */
*mbslen = mlen;
}
*wcslen = p - wcs;
return ret;
}
qse_mchar_t* qse_wcstombsdup (const qse_wchar_t* wcs, qse_mmgr_t* mmgr)
{
qse_size_t wcslen, mbslen;
qse_mchar_t* mbs;
if (qse_wcstombs (wcs, &wcslen, QSE_NULL, &mbslen) <= -1) return QSE_NULL;
mbslen++; /* for the terminating null character */
mbs = QSE_MMGR_ALLOC (mmgr, mbslen * QSE_SIZEOF(*mbs));
if (mbs == QSE_NULL) return QSE_NULL;
qse_wcstombs (wcs, &wcslen, mbs, &mbslen);
return mbs;
}
qse_mchar_t* qse_wcsatombsdup (const qse_wchar_t* wcs[], qse_mmgr_t* mmgr)
{
qse_mchar_t* buf, * ptr;
qse_size_t i;
qse_size_t wl, ml;
qse_size_t capa = 0;
QSE_ASSERT (mmgr != QSE_NULL);
for (i = 0; wcs[i]; i++)
{
if (qse_wcstombs (wcs[i], &wl, QSE_NULL, &ml) <= -1) return QSE_NULL;
capa += ml;
}
buf = (qse_mchar_t*) QSE_MMGR_ALLOC (
mmgr, (capa + 1) * QSE_SIZEOF(*buf));
if (buf == QSE_NULL) return QSE_NULL;
ptr = buf;
for (i = 0; wcs[i]; i++)
{
ml = capa + 1;
qse_wcstombs (wcs[i], &wl, ptr, &ml);
ptr += ml;
capa -= ml;
}
return buf;
}
/* case conversion */
qse_size_t qse_mbslwr (qse_mchar_t* str)

View File

@ -19,7 +19,7 @@
*/
#include <qse/cmn/tio.h>
#include <qse/cmn/chr.h>
#include <qse/cmn/mbwc.h>
#include "mem.h"
#define STATUS_ILLSEQ (1 << 0)