qse/qse/lib/cmn/chr-cnv.c

/*
 * $Id: chr-cnv.c 556 2011-08-31 15:43:46Z hyunghwan.chung $
 *
    Copyright 2006-2011 Chung, Hyung-Hwan.
    This file is part of QSE.

    QSE is free software: you can redistribute it and/or modify
    it under the terms of the GNU Lesser General Public License as
    published by the Free Software Foundation, either version 3 of
    the License, or (at your option) any later version.

    QSE is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU Lesser General Public License for more details.

    You should have received a copy of the GNU Lesser General Public
    License along with QSE. If not, see <http://www.gnu.org/licenses/>.
 */

#include <qse/cmn/chr.h>
#include <qse/cmn/utf8.h>
#include "mem.h"

#if !defined(QSE_HAVE_CONFIG_H)
#	if defined(_WIN32) || defined(__OS2__) || defined(__DOS__)
#		define HAVE_WCHAR_H
#		define HAVE_STDLIB_H
#		define HAVE_MBRLEN
#		define HAVE_MBRTOWC
#		define HAVE_WCRTOMB
#	endif
#endif

#ifdef HAVE_WCHAR_H
#	include <wchar.h>
#endif
#ifdef HAVE_STDLIB_H
#	include <stdlib.h>
#endif

qse_size_t qse_mbrlen (
	const qse_mchar_t* mb, qse_size_t mbl, qse_mbstate_t* state)
{
#if defined(_WIN32)
	/* TODO: provide an option to use windows api */
	return qse_utf8len (mb, mbl);

#elif defined(HAVE_MBRLEN)
	size_t n;

	n = mbrlen (mb, mbl, (mbstate_t*)state);
	if (n == 0) return 1; /* a null character */

	if (n == (size_t)-1) return 0; /* invalid sequence */
	if (n == (size_t)-2) return mbl + 1; /* incomplete sequence */

	return (qse_size_t)n;

	#if 0
	n = mblen (mb, mbl);
	if (n == 0) return 1; /* a null character */
	if (n == (size_t)-1) return 0; /* invalid or incomplete sequence */
	return (qse_size_t)n;
	#endif
#else
	#error #### NOT SUPPORTED ####
#endif
}

qse_size_t qse_mbrtowc (
	const qse_mchar_t* mb, qse_size_t mbl,
	qse_wchar_t* wc, qse_mbstate_t* state)
{
#if defined(_WIN32)
/*
	int n;

	n = MultiByteToWideChar (CP_ACP,  MB_ERR_INVALID_CHARS, mb, mbl, wc, 1);
	if (n == 0) return 0;
	return mbl;
*/
	return qse_utf8touc (mb, mbl, wc);

#elif defined(HAVE_MBRTOWC)
	size_t n;

	n = mbrtowc (wc, mb, mbl, (mbstate_t*)state);
	if (n == 0)
	{
		*wc = QSE_WT('\0');
		return 1;
	}

	if (n == (size_t)-1) return 0; /* invalid sequence */
	if (n == (size_t)-2) return mbl + 1; /* incomplete sequence */
	return (qse_size_t)n;
#else
	#error #### NOT SUPPORTED ####
#endif
}

qse_size_t qse_wcrtomb (
	qse_wchar_t wc, qse_mchar_t* mb,
	qse_size_t mbl, qse_mbstate_t* state)
{
#if defined(_WIN32)
	return qse_uctoutf8 (wc, mb, mbl);

#elif defined(HAVE_WCRTOMB)
	size_t n;

	if (mbl < QSE_MBLEN_MAX)
	{
		/* the buffer given is too small. try conversion on
		 * a temporary buffer large enough to handle all locales
		 * and copy the result to the original buffer.
		 */
		qse_mchar_t buf[QSE_MBLEN_MAX];

		n = wcrtomb (buf, wc, (mbstate_t*)state);
		if (n > mbl) return mbl + 1; /* buffer to small */
		if (n == (size_t)-1) return 0; /* illegal character */

		QSE_MEMCPY (mb, buf, mbl);
	}
	else
	{
		n = wcrtomb (mb, wc, (mbstate_t*)state);
		if (n > mbl) return mbl + 1; /* buffer to small */
		if (n == (size_t)-1) return 0; /* illegal character */
	}

	return n; /* number of bytes written to the buffer */
#else
	#error #### NOT SUPPORTED ####
#endif
}

/* man mbsinit
 * For 8-bit encodings, all states are equivalent to  the  initial  state.
 * For multibyte encodings like UTF-8, EUC-*, BIG5 or SJIS, the wide char‐
 * acter to  multibyte  conversion  functions  never  produce  non-initial
 * states,  but  the multibyte to wide-character conversion functions like
 * mbrtowc(3) do produce non-initial states when interrupted in the middle
 * of a character.
 */
qse_size_t qse_mblen (const qse_mchar_t* mb, qse_size_t mbl)
{
	qse_mbstate_t state = { { 0, } };
	return qse_mbrlen (mb, mbl, &state);
}

qse_size_t qse_mbtowc (const qse_mchar_t* mb, qse_size_t mbl, qse_wchar_t* wc)
{
	qse_mbstate_t state = { { 0, } };
	return qse_mbrtowc (mb, mbl, wc, &state);
}

qse_size_t qse_wctomb (qse_wchar_t wc, qse_mchar_t* mb, qse_size_t mbl)
{
	qse_mbstate_t state = { { 0, } };
	return qse_wcrtomb (wc, mb, mbl, &state);
}

int qse_mbcurmax (void)
{
/* TODO: consider other encodings */
	return (QSE_UTF8LEN_MAX > MB_CUR_MAX)? QSE_UTF8LEN_MAX: MB_CUR_MAX;
}