adding wide string and multibyte string conversion

This commit is contained in:
2008-10-18 05:43:20 +00:00
parent fdf56f1c40
commit fa6dfeddc9
12 changed files with 401 additions and 29 deletions

View File

@ -3,6 +3,7 @@
*/
#include <ase/cmn/chr.h>
#include "mem.h"
#ifdef HAVE_WCHAR_H
#include <wchar.h>
@ -56,12 +57,6 @@ ase_size_t ase_wctomb (ase_wchar_t wc, ase_mchar_t* mb, ase_size_t mblen)
size_t n;
mbstate_t mbs = { 0 };
if (mblen < MB_CUR_MAX)
{
/* buffer too small */
return mblen + 1;
}
/* man mbsinit
* For 8-bit encodings, all states are equivalent to the initial state.
* For multibyte encodings like UTF-8, EUC-*, BIG5 or SJIS, the wide char
@ -71,8 +66,22 @@ ase_size_t ase_wctomb (ase_wchar_t wc, ase_mchar_t* mb, ase_size_t mblen)
* of a character.
*/
n = wcrtomb (mb, wc, &mbs);
if (n == (size_t)-1) n = 0; // illegal character
if (mblen < MB_CUR_MAX)
{
ase_mchar_t buf[MB_CUR_MAX];
n = wcrtomb (buf, wc, &mbs);
if (n > mblen) return mblen + 1; /* buffer to small */
if (n == (size_t)-1) return 0; /* illegal character */
ASE_MEMCPY (mb, buf, mblen);
}
else
{
n = wcrtomb (mb, wc, &mbs);
if (n > mblen) return mblen + 1; /* buffer to small */
if (n == (size_t)-1) return 0; /* illegal character */
}
return n;
#else

View File

@ -1,11 +1,15 @@
/*
* $Id: str_cnv.c 332 2008-08-18 11:21:48Z baconevi $
* $Id: str_cnv.c 430 2008-10-17 11:43:20Z baconevi $
*
* {License}
*/
#include <ase/cmn/str.h>
#ifdef HAVE_WCHAR_H
#include <wchar.h>
#endif
int ase_strtoi (const ase_char_t* str)
{
int v;
@ -117,3 +121,108 @@ ase_ulong_t ase_strxtoulong (const ase_char_t* str, ase_size_t len)
ASE_STRXTONUM (v, str, len, ASE_NULL, 10);
return v;
}
ase_size_t ase_mbstowcs (
const ase_mchar_t* mbs, ase_wchar_t* wcs, ase_size_t* wcslen)
{
ase_size_t len, wlen;
for (len = 0; *mbs++ != '\0'; len++);
if (*wcslen <= 0) return 0;
if (*wcslen == 1)
{
wcs[0] = L'\0';
return 0;
}
/* because ase_mbtowc needs the length, we get the lenght of mbs
* and pass it to ase_mbsntowcsn */
wlen = *wcslen - 1;
len = ase_mbsntowcsn (mbs, len, wcs, &wlen);
wcs[wlen] = L'\0';
*wcslen = wlen;
/* TODO: wcslen should include the length including null? */
return len;
}
ase_size_t ase_mbsntowcsn (
const ase_mchar_t* mbs, ase_size_t mbslen,
ase_wchar_t* wcs, ase_size_t* wcslen)
{
ase_size_t mlen = mbslen, n;
const ase_mchar_t* p;
ase_wchar_t* q, * qend ;
qend = wcs + *wcslen;
for (p = mbs, q = wcs; mlen > 0 && q < qend; p += n, mlen -= n)
{
n = ase_mbtowc (p, mlen, q);
if (n == 0 || n > mlen)
{
/* wrong sequence or insufficient input */
break;
}
q++;
}
*wcslen = q - wcs;
return p - mbs; /* returns the number of bytes processed */
}
ase_size_t wcstombs (
const ase_wchar_t* wcs, ase_mchar_t* mbs, ase_size_t* mbslen)
{
const ase_wchar_t* p = wcs;
ase_size_t len = *mbslen;
while (*p != ASE_T('\0') && len > 1)
{
ase_size_t n = ase_wctomb (*p, mbs, len);
if (n == 0 || n > len)
{
/* illegal character or buffer not enough */
break;
}
mbs += n; len -= n; p++;
}
*mbslen -= len;
if (len > 0) *mbs = '\0';
/* returns the number of characters handled.
* the caller can check if the return value is as large is wcslen
* for an error. */
return p - wcs;
}
ase_size_t ase_wcsntombsn (
const ase_wchar_t* wcs, ase_size_t wcslen,
ase_mchar_t* mbs, ase_size_t* mbslen)
{
const ase_wchar_t* p = wcs;
const ase_wchar_t* end = wcs + wcslen;
ase_size_t len = *mbslen;
while (p < end && len > 0)
{
ase_size_t n = ase_wctomb (*p, mbs, len);
if (n == 0 || n > len)
{
/* illegal character or buffer not enough */
break;
}
mbs += n; len -= n; p++;
}
*mbslen -= len;
/* returns the number of characters handled.
* the caller can check if the return value is as large is wcslen
* for an error. */
return p - wcs;
}

View File

@ -69,6 +69,21 @@ int ase_tio_fini (ase_tio_t* tio)
return 0;
}
void* ase_tio_getextension (ase_tio_t* tio)
{
return tio + 1;
}
ase_mmgr_t* ase_tio_getmmgr (ase_tio_t* tio)
{
return tio->mmgr;
}
void ase_tio_setmmgr (ase_tio_t* tio, ase_mmgr_t* mmgr)
{
tio->mmgr = mmgr;
}
int ase_tio_geterrnum (ase_tio_t* tio)
{
return tio->errnum;
@ -82,6 +97,7 @@ const ase_char_t* ase_tio_geterrstr (ase_tio_t* tio)
ASE_T("out of memory"),
ASE_T("no more space"),
ASE_T("illegal multibyte sequence"),
ASE_T("incomplete multibyte sequence"),
ASE_T("illegal wide character"),
ASE_T("no input function attached"),
ASE_T("input function returned an error"),

View File

@ -39,7 +39,17 @@ ase_ssize_t ase_tio_getc (ase_tio_t* tio, ase_char_t* c)
n = tio->input_func (
ASE_TIO_IO_DATA, tio->input_arg,
&tio->inbuf[left], ASE_COUNTOF(tio->inbuf) - left);
if (n == 0) return 0;
if (n == 0)
{
if (tio->inbuf_curp < tio->inbuf_len)
{
/* gargage left in the buffer */
tio->errnum = ASE_TIO_EICSEQ;
return -1;
}
return 0;
}
if (n <= -1)
{
tio->errnum = ASE_TIO_EINPUT;
@ -100,6 +110,7 @@ ase_ssize_t ase_tio_getc (ase_tio_t* tio, ase_char_t* c)
goto getc_conv;
}
#endif
n = ase_mbtowc (&tio->inbuf[tio->inbuf_curp], left, &curc);
if (n == 0)
{
@ -161,6 +172,7 @@ ase_ssize_t ase_tio_getsx (ase_tio_t* tio, ase_char_t* buf, ase_size_t size)
if (n == 0) break;
*p++ = c;
/* TODO: support a different line breaker */
if (c == ASE_T('\n')) break;
}
@ -194,6 +206,7 @@ ase_ssize_t ase_tio_getstr (ase_tio_t* tio, ase_str_t* buf)
return -1;
}
/* TODO: support a different line breaker */
if (c == ASE_T('\n')) break;
}