did some mbwc makeover

This commit is contained in:
hyung-hwan 2011-12-31 15:24:48 +00:00
parent a0fc992c65
commit d1883d2a72
20 changed files with 501 additions and 683 deletions

View File

@ -14,6 +14,7 @@ pkginclude_HEADERS = \
lda.h \
main.h \
map.h \
mbwc.h \
mem.h \
oht.h \
opt.h \

View File

@ -162,20 +162,6 @@ typedef qse_ctype_t qse_wctype_t;
((c) >= QSE_WT('A') && (c) <= QSE_WT('F'))? ((c) - QSE_WT('A') + 10): \
((c) >= QSE_WT('a') && (c) <= QSE_WT('f'))? ((c) - QSE_WT('a') + 10): -1)
/**
* The qse_mbstate_t type defines a structure large enough to hold
* the standard mbstate_t.
*/
typedef struct qse_mbstate_t qse_mbstate_t;
struct qse_mbstate_t
{
#if defined(QSE_SIZEOF_MBSTATE_T) && (QSE_SIZEOF_MBSTATE_T > 0)
char dummy[QSE_SIZEOF_MBSTATE_T];
#else
char dummy[1];
#endif
};
#ifdef __cplusplus
extern "C" {
#endif
@ -244,77 +230,6 @@ qse_mctype_t qse_getmctype (
# define qse_getctype(name) qse_getwctype(name)
#endif
qse_size_t qse_mbrlen (
const qse_mchar_t* mb,
qse_size_t mblen,
qse_mbstate_t* state
);
qse_size_t qse_mbrtowc (
const qse_mchar_t* mb,
qse_size_t mblen,
qse_wchar_t* wc,
qse_mbstate_t* state
);
qse_size_t qse_wcrtomb (
qse_wchar_t wc,
qse_mchar_t* mb,
qse_size_t mblen,
qse_mbstate_t* state
);
/**
* The qse_mblen() function scans a multibyte sequence to get the number of
* bytes needed to form a wide character. It does not scan more than @a mblen
* bytes.
* @return number of bytes processed on success,
* 0 for invalid sequences,
* mblen + 1 for incomplete sequences
* @note This function can not handle conversion producing non-initial
* states. For each call, it assumes initial state.
*/
qse_size_t qse_mblen (
const qse_mchar_t* mb,
qse_size_t mblen
);
/**
* The qse_mbtowc() function converts a multibyte sequence to a wide character.
* It returns 0 if an invalid multibyte sequence is detected, mblen + 1 if the
* sequence is incomplete. It returns the number of bytes processed to form a
* wide character.
* @note This function can not handle conversion producing non-initial
* states. For each call, it assumes initial state.
*/
qse_size_t qse_mbtowc (
const qse_mchar_t* mb,
qse_size_t mblen,
qse_wchar_t* wc
);
/**
* The qse_wctomb() function converts a wide character to a multibyte sequence.
* It returns 0 if the wide character is illegal, mblen + 1 if mblen is not
* large enough to hold the multibyte sequence. On successful conversion, it
* returns the number of bytes in the sequence.
* @note This function can not handle conversion producing non-initial
* states. For each call, it assumes initial state.
*/
qse_size_t qse_wctomb (
qse_wchar_t wc,
qse_mchar_t* mb,
qse_size_t mblen
);
/**
* The qse_getmbcurmax() function returns the value of MB_CUR_MAX.
* Note that QSE_MBLEN_MAX defines MB_LEN_MAX.
*/
int qse_getmbcurmax (
void
);
#ifdef __cplusplus
}
#endif

360
qse/include/qse/cmn/mbwc.h Normal file
View File

@ -0,0 +1,360 @@
/*
* $Id$
*
Copyright 2006-2011 Chung, Hyung-Hwan.
This file is part of QSE.
QSE is free software: you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License as
published by the Free Software Foundation, either version 3 of
the License, or (at your option) any later version.
QSE is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with QSE. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef _QSE_CMN_MBWC_H_
#define _QSE_CMN_MBWC_H_
#include <qse/types.h>
#include <qse/macros.h>
/**
* The qse_mbstate_t type defines a structure large enough to hold
* the standard mbstate_t.
*/
typedef struct qse_mbstate_t qse_mbstate_t;
struct qse_mbstate_t
{
#if defined(QSE_SIZEOF_MBSTATE_T) && (QSE_SIZEOF_MBSTATE_T > 0)
char dummy[QSE_SIZEOF_MBSTATE_T];
#else
char dummy[1];
#endif
};
#ifdef __cplusplus
extern "C" {
#endif
qse_cmgr_t* qse_getdflcmgr (
void
);
void qse_setdflcmgr (
qse_cmgr_t* cmgr
);
/* --------------------------------------------------- */
/* CHARACTER CONVERSION */
/* --------------------------------------------------- */
qse_size_t qse_mbrlen (
const qse_mchar_t* mb,
qse_size_t mblen,
qse_mbstate_t* state
);
qse_size_t qse_mbrtowc (
const qse_mchar_t* mb,
qse_size_t mblen,
qse_wchar_t* wc,
qse_mbstate_t* state
);
qse_size_t qse_wcrtomb (
qse_wchar_t wc,
qse_mchar_t* mb,
qse_size_t mblen,
qse_mbstate_t* state
);
/**
* The qse_mblen() function scans a multibyte sequence to get the number of
* bytes needed to form a wide character. It does not scan more than @a mblen
* bytes.
* @return number of bytes processed on success,
* 0 for invalid sequences,
* mblen + 1 for incomplete sequences
* @note This function can not handle conversion producing non-initial
* states. For each call, it assumes initial state.
*/
qse_size_t qse_mblen (
const qse_mchar_t* mb,
qse_size_t mblen
);
/**
* The qse_mbtowc() function converts a multibyte sequence to a wide character.
* It returns 0 if an invalid multibyte sequence is detected, mblen + 1 if the
* sequence is incomplete. It returns the number of bytes processed to form a
* wide character.
* @note This function can not handle conversion producing non-initial
* states. For each call, it assumes initial state.
*/
qse_size_t qse_mbtowc (
const qse_mchar_t* mb,
qse_size_t mblen,
qse_wchar_t* wc
);
/**
* The qse_wctomb() function converts a wide character to a multibyte sequence.
* It returns 0 if the wide character is illegal, mblen + 1 if mblen is not
* large enough to hold the multibyte sequence. On successful conversion, it
* returns the number of bytes in the sequence.
* @note This function can not handle conversion producing non-initial
* states. For each call, it assumes initial state.
*/
qse_size_t qse_wctomb (
qse_wchar_t wc,
qse_mchar_t* mb,
qse_size_t mblen
);
/**
* The qse_getmbcurmax() function returns the value of MB_CUR_MAX.
* Note that QSE_MBLEN_MAX defines MB_LEN_MAX.
*/
int qse_getmbcurmax (
void
);
/* --------------------------------------------------- */
/* STRING CONVERSION USING CMGR */
/* --------------------------------------------------- */
int qse_mbstowcswithcmgr (
const qse_mchar_t* mbs,
qse_size_t* mbslen,
qse_wchar_t* wcs,
qse_size_t* wcslen,
qse_cmgr_t* cmgr
);
int qse_mbsntowcsnwithcmgr (
const qse_mchar_t* mbs,
qse_size_t* mbslen,
qse_wchar_t* wcs,
qse_size_t* wcslen,
qse_cmgr_t* cmgr
);
int qse_mbsntowcsnuptowithcmgr (
const qse_mchar_t* mbs,
qse_size_t* mbslen,
qse_wchar_t* wcs,
qse_size_t* wcslen,
qse_wchar_t stopper,
qse_cmgr_t* cmgr
);
qse_wchar_t* qse_mbstowcsdupwithcmgr (
const qse_mchar_t* mbs,
qse_mmgr_t* mmgr,
qse_cmgr_t* cmgr
);
qse_wchar_t* qse_mbsatowcsdupwithcmgr (
const qse_mchar_t* mbs[],
qse_mmgr_t* mmgr,
qse_cmgr_t* cmgr
);
int qse_wcstombswithcmgr (
const qse_wchar_t* wcs, /**< [in] wide-character string to convert*/
qse_size_t* wcslen, /**< [out] number of wide-characters handled */
qse_mchar_t* mbs, /**< [out] #QSE_NULL or buffer pointer */
qse_size_t* mbslen, /**< [in,out] buffer size for in,
actual length for out*/
qse_cmgr_t* cmgr
);
int qse_wcsntombsnwithcmgr (
const qse_wchar_t* wcs, /**< [in] wide string */
qse_size_t* wcslen, /**< [in,out] wide string length for in,
number of wide characters handled for out */
qse_mchar_t* mbs, /**< [out] #QSE_NULL or buffer pointer */
qse_size_t* mbslen, /**< [in,out] buffer size for in,
actual size for out */
qse_cmgr_t* cmgr
);
qse_mchar_t* qse_wcstombsdupwithcmgr (
const qse_wchar_t* wcs,
qse_mmgr_t* mmgr,
qse_cmgr_t* cmgr
);
qse_mchar_t* qse_wcsatombsdupwithcmgr (
const qse_wchar_t* wcs[],
qse_mmgr_t* mmgr,
qse_cmgr_t* cmgr
);
/* --------------------------------------------------- */
/* STRING CONVERSION */
/* --------------------------------------------------- */
/**
* The qse_mbstowcs() function converts a null-terminated multibyte string to
* a wide character string.
*
* It never returns -2 if @a wcs is #QSE_NULL.
*
* @code
* const qse_mchar_t* mbs = QSE_MT("a multibyte string");
* qse_wchar_t wcs[100];
* qse_size_t wcslen = QSE_COUNTOF(buf), n;
* qse_size_t mbslen;
* int n;
* n = qse_mbstowcs (mbs, &mbslen, wcs, &wcslen);
* if (n <= -1) { invalid/incomplenete sequence or buffer to small }
* @endcode
*
* @return 0 on success.
* -1 if @a mbs contains an illegal character.
* -2 if the wide-character string buffer is too small.
* -3 if @a mbs is not a complete sequence.
*/
int qse_mbstowcs (
const qse_mchar_t* mbs, /**< [in] multibyte string to convert */
qse_size_t* mbslen, /**< [out] number of multibyte characters
handled */
qse_wchar_t* wcs, /**< [out] wide-character string buffer */
qse_size_t* wcslen /**< [in,out] buffer size for in,
number of characters in the buffer for out */
);
/**
* The qse_mbsntowcsn() function converts a multibyte string to a
* wide character string.
*
* It never returns -2 if @a wcs is #QSE_NULL.
*
* @return 0 on success.
* -1 if @a mbs contains an illegal character.
* -2 if the wide-character string buffer is too small.
* -3 if @a mbs is not a complete sequence.
*/
int qse_mbsntowcsn (
const qse_mchar_t* mbs,
qse_size_t* mbslen,
qse_wchar_t* wcs,
qse_size_t* wcslen
);
/**
* The qse_mbsntowcsnupto() function is the same as qse_mbsntowcsn()
* except that it stops once it has processed the @a stopper character.
*/
int qse_mbsntowcsnupto (
const qse_mchar_t* mbs,
qse_size_t* mbslen,
qse_wchar_t* wcs,
qse_size_t* wcslen,
qse_wchar_t stopper
);
qse_wchar_t* qse_mbstowcsdup (
const qse_mchar_t* mbs,
qse_mmgr_t* mmgr
);
qse_wchar_t* qse_mbsatowcsdup (
const qse_mchar_t* mbs[],
qse_mmgr_t* mmgr
);
/**
* The qse_wcstombs() function converts a null-terminated wide character
* string @a wcs to a multibyte string and writes it into the buffer pointed to
* by @a mbs, but not more than @a mbslen bytes including the terminating null.
*
* Upon return, @a mbslen is modifed to the actual bytes written to @a mbs
* excluding the terminating null; @a wcslen is modifed to the number of
* wide characters converted.
*
* You may pass #QSE_NULL for @a mbs to dry-run conversion or to get the
* required buffer size for conversion. -2 is never returned in this case.
*
* @return
* - 0 on full conversion,
* - -1 on no or partial conversion for an illegal character encountered,
* - -2 on no or partial conversion for a small buffer.
*
* @code
* const qse_wchar_t* wcs = QSE_T("hello");
* qse_mchar_t mbs[10];
* qse_size_t wcslen;
* qse_size_t mbslen = QSE_COUNTOF(mbs);
* n = qse_wcstombs (wcs, &wcslen, mbs, &mbslen);
* if (n <= -1)
* {
* // wcs fully scanned and mbs null-terminated
* }
* @endcode
*/
int qse_wcstombs (
const qse_wchar_t* wcs, /**< [in] wide-character string to convert*/
qse_size_t* wcslen, /**< [out] number of wide-characters handled */
qse_mchar_t* mbs, /**< [out] #QSE_NULL or buffer pointer */
qse_size_t* mbslen /**< [in,out] buffer size for in,
actual length for out*/
);
/**
* The qse_wcsntombsn() function converts the first @a wcslen characters from
* a wide character string @a wcs to a multibyte string and writes it to a
* buffer @a mbs not more than @a mbslen bytes.
*
* Upon return, it modifies @a mbslen to the actual bytes written to @a mbs
* and @a wcslen to the number of wide characters converted.
*
* You may pass #QSE_NULL for @a mbs to dry-run conversion or to get the
* required buffer size for conversion.
*
* 0 is returned on full conversion. The number of wide characters handled
* is stored into @a wcslen and the number of produced multibyte characters
* is stored into @a mbslen. -1 is returned if an illegal character is
* encounterd during conversion and -2 is returned if the buffer is not
* large enough to perform full conversion. however, the number of wide
* characters handled so far stored into @a wcslen and the number of produced
* multibyte characters so far stored into @a mbslen are still valid.
* If @a mbs is #QSE_NULL, -2 is never returned.
*
* @return 0 on success,
* -1 if @a wcs contains an illegal character,
* -2 if the multibyte string buffer is too small.
*/
int qse_wcsntombsn (
const qse_wchar_t* wcs, /**< [in] wide string */
qse_size_t* wcslen,/**< [in,out] wide string length for in,
number of wide characters handled for out */
qse_mchar_t* mbs, /**< [out] #QSE_NULL or buffer pointer */
qse_size_t* mbslen /**< [in,out] buffer size for in,
actual size for out */
);
qse_mchar_t* qse_wcstombsdup (
const qse_wchar_t* wcs,
qse_mmgr_t* mmgr
);
qse_mchar_t* qse_wcsatombsdup (
const qse_wchar_t* wcs[],
qse_mmgr_t* mmgr
);
#ifdef __cplusplus
}
#endif
#endif

View File

@ -2261,155 +2261,6 @@ int qse_wcsxnfnmat (
# define qse_strxnfnmat(str,slen,ptn,plen,flags) qse_wcsxnfnmat(str,slen,ptn,plen,flags)
#endif
/**
* The qse_mbstowcs() function converts a null-terminated multibyte string to
* a wide character string.
*
* It never returns -2 if @a wcs is #QSE_NULL.
*
* @code
* const qse_mchar_t* mbs = QSE_MT("a multibyte string");
* qse_wchar_t wcs[100];
* qse_size_t wcslen = QSE_COUNTOF(buf), n;
* qse_size_t mbslen;
* int n;
* n = qse_mbstowcs (mbs, &mbslen, wcs, &wcslen);
* if (n <= -1) { invalid/incomplenete sequence or buffer to small }
* @endcode
*
* @return 0 on success.
* -1 if @a mbs contains an illegal character.
* -2 if the wide-character string buffer is too small.
* -3 if @a mbs is not a complete sequence.
*/
int qse_mbstowcs (
const qse_mchar_t* mbs, /**< [in] multibyte string to convert */
qse_size_t* mbslen, /**< [out] number of multibyte characters
handled */
qse_wchar_t* wcs, /**< [out] wide-character string buffer */
qse_size_t* wcslen /**< [in,out] buffer size for in,
number of characters in the buffer for out */
);
/**
* The qse_mbsntowcsn() function converts a multibyte string to a
* wide character string.
*
* It never returns -2 if @a wcs is #QSE_NULL.
*
* @return 0 on success.
* -1 if @a mbs contains an illegal character.
* -2 if the wide-character string buffer is too small.
* -3 if @a mbs is not a complete sequence.
*/
int qse_mbsntowcsn (
const qse_mchar_t* mbs,
qse_size_t* mbslen,
qse_wchar_t* wcs,
qse_size_t* wcslen
);
/**
* The qse_mbsntowcsnupto() function is the same as qse_mbsntowcsn()
* except that it stops once it has processed the @a stopper character.
*/
int qse_mbsntowcsnupto (
const qse_mchar_t* mbs,
qse_size_t* mbslen,
qse_wchar_t* wcs,
qse_size_t* wcslen,
qse_wchar_t stopper
);
qse_wchar_t* qse_mbstowcsdup (
const qse_mchar_t* mbs,
qse_mmgr_t* mmgr
);
qse_wchar_t* qse_mbsatowcsdup (
const qse_mchar_t* mbs[],
qse_mmgr_t* mmgr
);
/**
* The qse_wcstombs() function converts a null-terminated wide character
* string @a wcs to a multibyte string and writes it into the buffer pointed to
* by @a mbs, but not more than @a mbslen bytes including the terminating null.
*
* Upon return, @a mbslen is modifed to the actual bytes written to @a mbs
* excluding the terminating null; @a wcslen is modifed to the number of
* wide characters converted.
*
* You may pass #QSE_NULL for @a mbs to dry-run conversion or to get the
* required buffer size for conversion. -2 is never returned in this case.
*
* @return
* - 0 on full conversion,
* - -1 on no or partial conversion for an illegal character encountered,
* - -2 on no or partial conversion for a small buffer.
*
* @code
* const qse_wchar_t* wcs = QSE_T("hello");
* qse_mchar_t mbs[10];
* qse_size_t wcslen;
* qse_size_t mbslen = QSE_COUNTOF(mbs);
* n = qse_wcstombs (wcs, &wcslen, mbs, &mbslen);
* if (n <= -1)
* {
* // wcs fully scanned and mbs null-terminated
* }
* @endcode
*/
int qse_wcstombs (
const qse_wchar_t* wcs, /**< [in] wide-character string to convert*/
qse_size_t* wcslen, /**< [out] number of wide-characters handled */
qse_mchar_t* mbs, /**< [out] #QSE_NULL or buffer pointer */
qse_size_t* mbslen /**< [in,out] buffer size for in,
actual length for out*/
);
/**
* The qse_wcsntombsn() function converts the first @a wcslen characters from
* a wide character string @a wcs to a multibyte string and writes it to a
* buffer @a mbs not more than @a mbslen bytes.
*
* Upon return, it modifies @a mbslen to the actual bytes written to @a mbs
* and @a wcslen to the number of wide characters converted.
*
* You may pass #QSE_NULL for @a mbs to dry-run conversion or to get the
* required buffer size for conversion.
*
* 0 is returned on full conversion. The number of wide characters handled
* is stored into @a wcslen and the number of produced multibyte characters
* is stored into @a mbslen. -1 is returned if an illegal character is
* encounterd during conversion and -2 is returned if the buffer is not
* large enough to perform full conversion. however, the number of wide
* characters handled so far stored into @a wcslen and the number of produced
* multibyte characters so far stored into @a mbslen are still valid.
* If @a mbs is #QSE_NULL, -2 is never returned.
*
* @return 0 on success,
* -1 if @a wcs contains an illegal character,
* -2 if the multibyte string buffer is too small.
*/
int qse_wcsntombsn (
const qse_wchar_t* wcs, /**< [in] wide string */
qse_size_t* wcslen,/**< [in,out] wide string length for in,
number of wide characters handled for out */
qse_mchar_t* mbs, /**< [out] #QSE_NULL or buffer pointer */
qse_size_t* mbslen /**< [in,out] buffer size for in,
actual size for out */
);
qse_mchar_t* qse_wcstombsdup (
const qse_wchar_t* wcs,
qse_mmgr_t* mmgr
);
qse_mchar_t* qse_wcsatombsdup (
const qse_wchar_t* wcs[],
qse_mmgr_t* mmgr
);
QSE_DEFINE_COMMON_FUNCTIONS (mbs)

View File

@ -20,6 +20,7 @@
#include <qse/awk/StdAwk.hpp>
#include <qse/cmn/str.h>
#include <qse/cmn/mbwc.h>
#include <qse/cmn/time.h>
#include <qse/cmn/fio.h>
#include <qse/cmn/pio.h>

View File

@ -23,6 +23,7 @@
#include <qse/cmn/sio.h>
#include <qse/cmn/pio.h>
#include <qse/cmn/str.h>
#include <qse/cmn/mbwc.h>
#include <qse/cmn/time.h>
#include <qse/cmn/path.h>
#include <qse/cmn/stdio.h> /* TODO: remove dependency on qse_vsprintf */

View File

@ -25,7 +25,6 @@ libqsecmn_la_SOURCES = \
alg-sort.c \
assert.c \
chr.c \
chr-cnv.c \
dll.c \
env.c \
gdl.c \
@ -38,6 +37,9 @@ libqsecmn_la_SOURCES = \
fs-err.c \
fs-move.c \
main.c \
mbwc.c \
mbwc-chr.c \
mbwc-str.c \
mem.c \
oht.c \
opt.c \
@ -66,7 +68,6 @@ libqsecmn_la_SOURCES = \
str-fnmat.c \
str-incl.c \
str-len.c \
str-mbwc.c \
str-pac.c \
str-pbrk.c \
str-put.c \

View File

@ -76,19 +76,19 @@ am__installdirs = "$(DESTDIR)$(libdir)"
LTLIBRARIES = $(lib_LTLIBRARIES)
libqsecmn_la_DEPENDENCIES =
am_libqsecmn_la_OBJECTS = alg-rand.lo alg-search.lo alg-sort.lo \
assert.lo chr.lo chr-cnv.lo dll.lo env.lo gdl.lo htb.lo lda.lo \
fio.lo fma.lo fmt.lo fs.lo fs-err.lo fs-move.lo main.lo mem.lo \
oht.lo opt.lo path-basename.lo path-canon.lo pio.lo pma.lo \
rbt.lo rex.lo sio.lo sll.lo stdio.lo str-beg.lo str-cat.lo \
str-chr.lo str-cnv.lo str-cmp.lo str-cpy.lo str-del.lo \
str-dup.lo str-dynm.lo str-dynw.lo str-end.lo str-excl.lo \
str-fcpy.lo str-fnmat.lo str-incl.lo str-len.lo str-mbwc.lo \
str-pac.lo str-pbrk.lo str-put.lo str-rev.lo str-rot.lo \
str-set.lo str-spl.lo str-spn.lo str-str.lo str-subst.lo \
str-tok.lo str-trm.lo str-word.lo time.lo tio.lo tio-get.lo \
tio-put.lo tre.lo tre-ast.lo tre-compile.lo \
tre-match-backtrack.lo tre-match-parallel.lo tre-parse.lo \
tre-stack.lo utf8.lo xma.lo
assert.lo chr.lo dll.lo env.lo gdl.lo htb.lo lda.lo fio.lo \
fma.lo fmt.lo fs.lo fs-err.lo fs-move.lo main.lo mbwc.lo \
mbwc-chr.lo mbwc-str.lo mem.lo oht.lo opt.lo path-basename.lo \
path-canon.lo pio.lo pma.lo rbt.lo rex.lo sio.lo sll.lo \
stdio.lo str-beg.lo str-cat.lo str-chr.lo str-cnv.lo \
str-cmp.lo str-cpy.lo str-del.lo str-dup.lo str-dynm.lo \
str-dynw.lo str-end.lo str-excl.lo str-fcpy.lo str-fnmat.lo \
str-incl.lo str-len.lo str-pac.lo str-pbrk.lo str-put.lo \
str-rev.lo str-rot.lo str-set.lo str-spl.lo str-spn.lo \
str-str.lo str-subst.lo str-tok.lo str-trm.lo str-word.lo \
time.lo tio.lo tio-get.lo tio-put.lo tre.lo tre-ast.lo \
tre-compile.lo tre-match-backtrack.lo tre-match-parallel.lo \
tre-parse.lo tre-stack.lo utf8.lo xma.lo
libqsecmn_la_OBJECTS = $(am_libqsecmn_la_OBJECTS)
libqsecmn_la_LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) \
$(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \
@ -295,7 +295,6 @@ libqsecmn_la_SOURCES = \
alg-sort.c \
assert.c \
chr.c \
chr-cnv.c \
dll.c \
env.c \
gdl.c \
@ -308,6 +307,9 @@ libqsecmn_la_SOURCES = \
fs-err.c \
fs-move.c \
main.c \
mbwc.c \
mbwc-chr.c \
mbwc-str.c \
mem.c \
oht.c \
opt.c \
@ -336,7 +338,6 @@ libqsecmn_la_SOURCES = \
str-fnmat.c \
str-incl.c \
str-len.c \
str-mbwc.c \
str-pac.c \
str-pbrk.c \
str-put.c \
@ -452,7 +453,6 @@ distclean-compile:
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/alg-search.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/alg-sort.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/assert.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/chr-cnv.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/chr.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/dll.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/env.Plo@am__quote@
@ -466,6 +466,9 @@ distclean-compile:
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/htb.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/lda.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/main.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/mbwc-chr.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/mbwc-str.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/mbwc.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/mem.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/oht.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/opt.Plo@am__quote@
@ -494,7 +497,6 @@ distclean-compile:
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/str-fnmat.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/str-incl.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/str-len.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/str-mbwc.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/str-pac.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/str-pbrk.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/str-put.Plo@am__quote@

View File

@ -21,6 +21,7 @@
#include <qse/cmn/env.h>
#include <qse/cmn/str.h>
#include <qse/cmn/mbwc.h>
#include "mem.h"
#if defined(_WIN32)

View File

@ -20,6 +20,7 @@
#include <qse/cmn/fio.h>
#include <qse/cmn/str.h>
#include <qse/cmn/mbwc.h>
#include <qse/cmn/fmt.h>
#include <qse/cmn/alg.h>
#include <qse/cmn/time.h>
@ -100,7 +101,9 @@ int qse_fio_init (
);
temp_no = 0;
for (temp_ptr = path; *temp_ptr; temp_ptr++)
/* if QSE_FIO_TEMPORARY is used, the path name must
* be writable. */
for (temp_ptr = (qse_char_t*)path; *temp_ptr; temp_ptr++)
temp_no += *temp_ptr;
/* The path name template must be at least 4 characters long

View File

@ -19,7 +19,7 @@
*/
#include "fs.h"
#include <qse/cmn/str.h>
#include <qse/cmn/mbwc.h>
#include <qse/cmn/path.h>
#include "mem.h"

View File

@ -20,6 +20,7 @@
#include "fs.h"
#include <qse/cmn/str.h>
#include <qse/cmn/mbwc.h>
#include <qse/cmn/path.h>
#include "mem.h"

View File

@ -18,7 +18,7 @@
License along with QSE. If not, see <http://www.gnu.org/licenses/>.
*/
#include <qse/cmn/chr.h>
#include <qse/cmn/mbwc.h>
#include <qse/cmn/utf8.h>
#include "mem.h"

View File

@ -1,5 +1,5 @@
/*
* $Id: str-cnv.c 556 2011-08-31 15:43:46Z hyunghwan.chung $
* $Id$
*
Copyright 2006-2011 Chung, Hyung-Hwan.
This file is part of QSE.
@ -18,7 +18,7 @@
License along with QSE. If not, see <http://www.gnu.org/licenses/>.
*/
#include <qse/cmn/str.h>
#include <qse/cmn/mbwc.h>
#include "mem.h"
int qse_mbstowcswithcmgr (

102
qse/lib/cmn/mbwc.c Normal file
View File

@ -0,0 +1,102 @@
/*
* $Id$
*
Copyright 2006-2011 Chung, Hyung-Hwan.
This file is part of QSE.
QSE is free software: you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License as
published by the Free Software Foundation, either version 3 of
the License, or (at your option) any later version.
QSE is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with QSE. If not, see <http://www.gnu.org/licenses/>.
*/
#include <qse/cmn/mbwc.h>
#include <qse/cmn/utf8.h>
static qse_cmgr_t builtin_cmgr =
{
qse_utf8touc,
qse_uctoutf8
};
static qse_cmgr_t* dfl_cmgr = &builtin_cmgr;
qse_cmgr_t* qse_getdflcmgr (void)
{
return dfl_cmgr;
}
void qse_setdflcmgr (qse_cmgr_t* cmgr)
{
dfl_cmgr = (cmgr? cmgr: &builtin_cmgr);
}
/* string conversion function using default character conversion manager */
int qse_mbstowcs (
const qse_mchar_t* mbs, qse_size_t* mbslen,
qse_wchar_t* wcs, qse_size_t* wcslen)
{
return qse_mbstowcswithcmgr (mbs, mbslen, wcs, wcslen, dfl_cmgr);
}
int qse_mbsntowcsn (
const qse_mchar_t* mbs, qse_size_t* mbslen,
qse_wchar_t* wcs, qse_size_t* wcslen)
{
return qse_mbsntowcsnwithcmgr (mbs, mbslen, wcs, wcslen, dfl_cmgr);
}
int qse_mbsntowcsnupto (
const qse_mchar_t* mbs, qse_size_t* mbslen,
qse_wchar_t* wcs, qse_size_t* wcslen, qse_wchar_t stopper)
{
return qse_mbsntowcsnuptowithcmgr (mbs, mbslen, wcs, wcslen, stopper, dfl_cmgr);
}
qse_wchar_t* qse_mbstowcsdup (const qse_mchar_t* mbs, qse_mmgr_t* mmgr)
{
return qse_mbstowcsdupwithcmgr (mbs, mmgr, dfl_cmgr);
}
qse_wchar_t* qse_mbsatowcsdup (const qse_mchar_t* mbs[], qse_mmgr_t* mmgr)
{
return qse_mbsatowcsdupwithcmgr (mbs, mmgr, dfl_cmgr);
}
int qse_wcstombs (
const qse_wchar_t* wcs, qse_size_t* wcslen,
qse_mchar_t* mbs, qse_size_t* mbslen)
{
return qse_wcstombswithcmgr (wcs, wcslen, mbs, mbslen, dfl_cmgr);
}
int qse_wcsntombsn (
const qse_wchar_t* wcs, qse_size_t* wcslen,
qse_mchar_t* mbs, qse_size_t* mbslen)
{
return qse_wcsntombsnwithcmgr (wcs, wcslen, mbs, mbslen, dfl_cmgr);
}
qse_mchar_t* qse_wcstombsdup (const qse_wchar_t* wcs, qse_mmgr_t* mmgr)
{
return qse_wcstombsdupwithcmgr (wcs, mmgr, dfl_cmgr);
}
qse_mchar_t* qse_wcsatombsdup (const qse_wchar_t* wcs[], qse_mmgr_t* mmgr)
{
return qse_wcsatombsdupwithcmgr (wcs, mmgr, dfl_cmgr);
}

View File

@ -468,7 +468,7 @@ static qse_mmgr_t builtin_mmgr =
static qse_mmgr_t* dfl_mmgr = &builtin_mmgr;
qse_mmgr_t* qse_getdflmmgr ()
qse_mmgr_t* qse_getdflmmgr (void)
{
return dfl_mmgr;
}

View File

@ -19,7 +19,7 @@
*/
#include <qse/cmn/pio.h>
#include <qse/cmn/str.h>
#include <qse/cmn/mbwc.h>
#include "mem.h"
#if defined(_WIN32)

View File

@ -20,7 +20,7 @@
#include <qse/cmn/stdio.h>
#include <qse/cmn/chr.h>
#include <qse/cmn/str.h>
#include <qse/cmn/mbwc.h>
#include "mem.h"
#include <wchar.h>

View File

@ -134,427 +134,6 @@ qse_ulong_t qse_strxtoulong (const qse_char_t* str, qse_size_t len)
return v;
}
/*
* TODO: fix wrong mbstate handling
*/
int qse_mbstowcs (
const qse_mchar_t* mbs, qse_size_t* mbslen,
qse_wchar_t* wcs, qse_size_t* wcslen)
{
const qse_mchar_t* mp;
qse_size_t mlen, wlen;
int n;
for (mp = mbs; *mp != QSE_MT('\0'); mp++);
mlen = mp - mbs; wlen = *wcslen;
n = qse_mbsntowcsn (mbs, &mlen, wcs, &wlen);
if (wcs)
{
if (wlen < *wcslen) wcs[wlen] = QSE_WT('\0');
else n = -2; /* buffer too small */
}
*mbslen = mlen; *wcslen = wlen;
return n;
}
int qse_mbsntowcsn (
const qse_mchar_t* mbs, qse_size_t* mbslen,
qse_wchar_t* wcs, qse_size_t* wcslen)
{
const qse_mchar_t* p;
qse_mbstate_t state = {{ 0, }};
int ret = 0;
qse_size_t mlen;
if (wcs)
{
qse_wchar_t* q, * qend;
p = mbs;
q = wcs;
qend = wcs + *wcslen;
mlen = *mbslen;
while (mlen > 0)
{
qse_size_t n;
if (q >= qend)
{
/* buffer too small */
ret = -2;
break;
}
n = qse_mbrtowc (p, mlen, q, &state);
if (n == 0)
{
/* invalid sequence */
ret = -1;
break;
}
if (n > mlen)
{
/* incomplete sequence */
ret = -3;
break;
}
q++;
p += n;
mlen -= n;
}
*wcslen = q - wcs;
*mbslen = p - mbs;
}
else
{
qse_wchar_t w;
qse_size_t wlen = 0;
p = mbs;
mlen = *mbslen;
while (mlen > 0)
{
qse_size_t n;
n = qse_mbrtowc (p, mlen, &w, &state);
if (n == 0)
{
/* invalid sequence */
ret = -1;
break;
}
if (n > mlen)
{
/* incomplete sequence */
ret = -3;
break;
}
p += n;
mlen -= n;
wlen += 1;
}
*wcslen = wlen;
*mbslen = p - mbs;
}
return ret;
}
int qse_mbsntowcsnupto (
const qse_mchar_t* mbs, qse_size_t* mbslen,
qse_wchar_t* wcs, qse_size_t* wcslen, qse_wchar_t stopper)
{
const qse_mchar_t* p;
qse_mbstate_t state = {{ 0, }};
int ret = 0;
qse_size_t mlen;
qse_wchar_t w;
qse_size_t wlen = 0;
qse_wchar_t* wend;
p = mbs;
mlen = *mbslen;
if (wcs) wend = wcs + *wcslen;
/* since it needs to break when a stopper is met,
* i can't perform bulky conversion using the buffer
* provided. so conversion is conducted character by
* character */
while (mlen > 0)
{
qse_size_t n;
n = qse_mbrtowc (p, mlen, &w, &state);
if (n == 0)
{
/* invalid sequence */
ret = -1;
break;
}
if (n > mlen)
{
/* incomplete sequence */
ret = -3;
break;
}
if (wcs)
{
if (wcs >= wend) break;
*wcs++ = w;
}
p += n;
mlen -= n;
wlen += 1;
if (w == stopper) break;
}
*wcslen = wlen;
*mbslen = p - mbs;
return ret;
}
qse_wchar_t* qse_mbstowcsdup (const qse_mchar_t* mbs, qse_mmgr_t* mmgr)
{
qse_size_t mbslen, wcslen;
qse_wchar_t* wcs;
if (qse_mbstowcs (mbs, &mbslen, QSE_NULL, &wcslen) <= -1) return QSE_NULL;
wcslen++; /* for terminating null */
wcs = QSE_MMGR_ALLOC (mmgr, wcslen * QSE_SIZEOF(*wcs));
if (wcs == QSE_NULL) return QSE_NULL;
qse_mbstowcs (mbs, &mbslen, wcs, &wcslen);
return wcs;
}
qse_wchar_t* qse_mbsatowcsdup (const qse_mchar_t* mbs[], qse_mmgr_t* mmgr)
{
qse_wchar_t* buf, * ptr;
qse_size_t i;
qse_size_t capa = 0;
qse_size_t wl, ml;
QSE_ASSERT (mmgr != QSE_NULL);
for (i = 0; mbs[i]; i++)
{
if (qse_mbstowcs(mbs[i], &ml, QSE_NULL, &wl) <= -1) return QSE_NULL;
capa += wl;
}
buf = (qse_wchar_t*) QSE_MMGR_ALLOC (
mmgr, (capa + 1) * QSE_SIZEOF(*buf));
if (buf == QSE_NULL) return QSE_NULL;
ptr = buf;
for (i = 0; mbs[i]; i++)
{
wl = capa + 1;
qse_mbstowcs (mbs[i], &ml, ptr, &wl);
ptr += wl;
capa -= wl;
}
return buf;
}
int qse_wcstombs (
const qse_wchar_t* wcs, qse_size_t* wcslen,
qse_mchar_t* mbs, qse_size_t* mbslen)
{
const qse_wchar_t* p = wcs;
qse_mbstate_t state = {{ 0, }};
int ret = 0;
if (mbs)
{
qse_size_t rem = *mbslen;
while (*p != QSE_WT('\0'))
{
qse_size_t n;
if (rem <= 0)
{
ret = -2;
break;
}
n = qse_wcrtomb (*p, mbs, rem, &state);
if (n == 0)
{
ret = -1;
break; /* illegal character */
}
if (n > rem)
{
ret = -2;
break; /* buffer too small */
}
mbs += n; rem -= n; p++;
}
/* update mbslen to the length of the mbs string converted excluding
* terminating null */
*mbslen -= rem;
/* null-terminate the multibyte sequence if it has sufficient space */
if (rem > 0) *mbs = QSE_MT('\0');
else
{
/* if ret is -2 and wcs[wcslen] == QSE_WT('\0'),
* this means that the mbs buffer was lacking one
* slot for the terminating null */
ret = -2; /* buffer too small */
}
}
else
{
qse_mchar_t mbsbuf[QSE_MBLEN_MAX];
qse_size_t mlen = 0;
while (*p != QSE_WT('\0'))
{
qse_size_t n;
n = qse_wcrtomb (*p, mbsbuf, QSE_COUNTOF(mbsbuf), &state);
if (n == 0)
{
ret = -1;
break; /* illegal character */
}
/* it assumes that mbs is large enough to hold a character */
QSE_ASSERT (n <= QSE_COUNTOF(mbs));
p++; mlen += n;
}
/* this length holds the number of resulting multi-byte characters
* excluding the terminating null character */
*mbslen = mlen;
}
*wcslen = p - wcs; /* the number of wide characters handled. */
return ret;
}
int qse_wcsntombsn (
const qse_wchar_t* wcs, qse_size_t* wcslen,
qse_mchar_t* mbs, qse_size_t* mbslen)
{
const qse_wchar_t* p = wcs;
const qse_wchar_t* end = wcs + *wcslen;
qse_mbstate_t state = {{ 0, }};
int ret = 0;
if (mbs)
{
qse_size_t rem = *mbslen;
while (p < end)
{
qse_size_t n;
if (rem <= 0)
{
ret = -2; /* buffer too small */
break;
}
n = qse_wcrtomb (*p, mbs, rem, &state);
if (n == 0)
{
ret = -1;
break; /* illegal character */
}
if (n > rem)
{
ret = -2; /* buffer too small */
break;
}
mbs += n; rem -= n; p++;
}
*mbslen -= rem;
}
else
{
qse_mchar_t mbsbuf[QSE_MBLEN_MAX];
qse_size_t mlen = 0;
while (p < end)
{
qse_size_t n;
n = qse_wcrtomb (*p, mbsbuf, QSE_COUNTOF(mbsbuf), &state);
if (n == 0)
{
ret = -1;
break; /* illegal character */
}
/* it assumes that mbs is large enough to hold a character */
QSE_ASSERT (n <= QSE_COUNTOF(mbsbuf));
p++; mlen += n;
}
/* this length excludes the terminating null character.
* this function doesn't event null-terminate the result. */
*mbslen = mlen;
}
*wcslen = p - wcs;
return ret;
}
qse_mchar_t* qse_wcstombsdup (const qse_wchar_t* wcs, qse_mmgr_t* mmgr)
{
qse_size_t wcslen, mbslen;
qse_mchar_t* mbs;
if (qse_wcstombs (wcs, &wcslen, QSE_NULL, &mbslen) <= -1) return QSE_NULL;
mbslen++; /* for the terminating null character */
mbs = QSE_MMGR_ALLOC (mmgr, mbslen * QSE_SIZEOF(*mbs));
if (mbs == QSE_NULL) return QSE_NULL;
qse_wcstombs (wcs, &wcslen, mbs, &mbslen);
return mbs;
}
qse_mchar_t* qse_wcsatombsdup (const qse_wchar_t* wcs[], qse_mmgr_t* mmgr)
{
qse_mchar_t* buf, * ptr;
qse_size_t i;
qse_size_t wl, ml;
qse_size_t capa = 0;
QSE_ASSERT (mmgr != QSE_NULL);
for (i = 0; wcs[i]; i++)
{
if (qse_wcstombs (wcs[i], &wl, QSE_NULL, &ml) <= -1) return QSE_NULL;
capa += ml;
}
buf = (qse_mchar_t*) QSE_MMGR_ALLOC (
mmgr, (capa + 1) * QSE_SIZEOF(*buf));
if (buf == QSE_NULL) return QSE_NULL;
ptr = buf;
for (i = 0; wcs[i]; i++)
{
ml = capa + 1;
qse_wcstombs (wcs[i], &wl, ptr, &ml);
ptr += ml;
capa -= ml;
}
return buf;
}
/* case conversion */
qse_size_t qse_mbslwr (qse_mchar_t* str)

View File

@ -19,7 +19,7 @@
*/
#include <qse/cmn/tio.h>
#include <qse/cmn/chr.h>
#include <qse/cmn/mbwc.h>
#include "mem.h"
#define STATUS_ILLSEQ (1 << 0)