adding wide string and multibyte string conversion

This commit is contained in:
hyung-hwan 2008-10-18 05:43:20 +00:00
parent fdf56f1c40
commit fa6dfeddc9
12 changed files with 401 additions and 29 deletions

97
ase/configure vendored
View File

@ -21134,6 +21134,103 @@ fi
done
for ac_func in mbsnrtowcs mbsrtowcs wcsnrtombs wcsrtombs
do
as_ac_var=`echo "ac_cv_func_$ac_func" | $as_tr_sh`
{ echo "$as_me:$LINENO: checking for $ac_func" >&5
echo $ECHO_N "checking for $ac_func... $ECHO_C" >&6; }
if { as_var=$as_ac_var; eval "test \"\${$as_var+set}\" = set"; }; then
echo $ECHO_N "(cached) $ECHO_C" >&6
else
cat >conftest.$ac_ext <<_ACEOF
/* confdefs.h. */
_ACEOF
cat confdefs.h >>conftest.$ac_ext
cat >>conftest.$ac_ext <<_ACEOF
/* end confdefs.h. */
/* Define $ac_func to an innocuous variant, in case <limits.h> declares $ac_func.
For example, HP-UX 11i <limits.h> declares gettimeofday. */
#define $ac_func innocuous_$ac_func
/* System header to define __stub macros and hopefully few prototypes,
which can conflict with char $ac_func (); below.
Prefer <limits.h> to <assert.h> if __STDC__ is defined, since
<limits.h> exists even on freestanding compilers. */
#ifdef __STDC__
# include <limits.h>
#else
# include <assert.h>
#endif
#undef $ac_func
/* Override any GCC internal prototype to avoid an error.
Use char because int might match the return type of a GCC
builtin and then its argument prototype would still apply. */
#ifdef __cplusplus
extern "C"
#endif
char $ac_func ();
/* The GNU C library defines this for functions which it implements
to always fail with ENOSYS. Some functions are actually named
something starting with __ and the normal name is an alias. */
#if defined __stub_$ac_func || defined __stub___$ac_func
choke me
#endif
int
main ()
{
return $ac_func ();
;
return 0;
}
_ACEOF
rm -f conftest.$ac_objext conftest$ac_exeext
if { (ac_try="$ac_link"
case "(($ac_try" in
*\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
*) ac_try_echo=$ac_try;;
esac
eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
(eval "$ac_link") 2>conftest.er1
ac_status=$?
grep -v '^ *+' conftest.er1 >conftest.err
rm -f conftest.er1
cat conftest.err >&5
echo "$as_me:$LINENO: \$? = $ac_status" >&5
(exit $ac_status); } && {
test -z "$ac_c_werror_flag" ||
test ! -s conftest.err
} && test -s conftest$ac_exeext &&
$as_test_x conftest$ac_exeext; then
eval "$as_ac_var=yes"
else
echo "$as_me: failed program was:" >&5
sed 's/^/| /' conftest.$ac_ext >&5
eval "$as_ac_var=no"
fi
rm -f core conftest.err conftest.$ac_objext conftest_ipa8_conftest.oo \
conftest$ac_exeext conftest.$ac_ext
fi
ac_res=`eval echo '${'$as_ac_var'}'`
{ echo "$as_me:$LINENO: result: $ac_res" >&5
echo "${ECHO_T}$ac_res" >&6; }
if test `eval echo '${'$as_ac_var'}'` = yes; then
cat >>confdefs.h <<_ACEOF
#define `echo "HAVE_$ac_func" | $as_tr_cpp` 1
_ACEOF
fi
done
{ echo "$as_me:$LINENO: checking for char" >&5
echo $ECHO_N "checking for char... $ECHO_C" >&6; }
if test "${ac_cv_type_char+set}" = set; then

View File

@ -86,6 +86,7 @@ AC_CHECK_TYPE([wchar_t],
dnl check functions
AC_CHECK_FUNCS([uselocale])
AC_CHECK_FUNCS([mbrlen mbrtowc wcrtomb])
AC_CHECK_FUNCS([mbsnrtowcs mbsrtowcs wcsnrtombs wcsrtombs])
dnl Checks the size of primitive data types
AC_CHECK_SIZEOF(char)

View File

@ -1,5 +1,5 @@
/*
* $Id: Awk.hpp 399 2008-09-29 10:26:26Z baconevi $
* $Id: Awk.hpp 430 2008-10-17 11:43:20Z baconevi $
*
* {License}
*/
@ -11,6 +11,7 @@
#include <ase/cmn/map.h>
#include <ase/cmn/chr.h>
#include <stdarg.h>
#include <stddef.h>
/////////////////////////////////
ASE_BEGIN_NAMESPACE(ASE)
@ -323,8 +324,10 @@ public:
public:
// initialization
void* operator new (size_t n, awk_t* awk) throw ();
void* operator new[] (size_t n, awk_t* awk) throw ();
//void* operator new (size_t n, awk_t* awk) throw ();
//void* operator new[] (size_t n, awk_t* awk) throw ();
void* operator new (::size_t n, awk_t* awk) throw ();
void* operator new[] (::size_t n, awk_t* awk) throw ();
#if !defined(__BORLANDC__)
// deletion when initialization fails

View File

@ -54,17 +54,41 @@ ase_size_t ase_mblen (
ase_size_t mblen
);
/****f* ase.cmn.chr/ase_mbtowc
* NAME
* ase_mbtowc - convert a multibyte sequence to a wide character.
*
* RETURN
* The ase_mbtowc() function returns 0 if an invalid multibyte sequence is
* detected, mblen + 1 if the sequence is incomplete. It returns the number
* of bytes processed to form a wide character.
*
* SYNOPSIS
*/
ase_size_t ase_mbtowc (
const ase_mchar_t* mb,
ase_size_t mblen,
ase_wchar_t* wc
);
/******/
/****f* ase.cmn.chr/ase_wctomb
* NAME
* ase_wctomb - convert a wide character to a multibyte sequence
*
* RETURN
* The ase_wctomb() functions returns 0 if the wide character is illegal,
* mblen + 1 if mblen is not large enough to hold the multibyte sequence.
* On successful conversion, it returns the number of bytes in the sequence.
*
* SYNOPSIS
*/
ase_size_t ase_wctomb (
ase_wchar_t wc,
ase_mchar_t* mb,
ase_size_t mblen
);
/******/
#ifdef __cplusplus
}

View File

@ -1,5 +1,5 @@
/*
* $Id: str.h 389 2008-09-26 08:01:24Z baconevi $
* $Id: str.h 430 2008-10-17 11:43:20Z baconevi $
*
* {License}
*/
@ -229,10 +229,6 @@ ase_long_t ase_strxtolong (const ase_char_t* str, ase_size_t len);
ase_uint_t ase_strxtouint (const ase_char_t* str, ase_size_t len);
ase_ulong_t ase_strxtoulong (const ase_char_t* str, ase_size_t len);
/*
* dynamic string
*/
ase_str_t* ase_str_open (
ase_mmgr_t* mmgr,
ase_size_t ext,
@ -346,16 +342,94 @@ ase_size_t ase_str_setcapa (
ase_size_t capa /* a new capacity */
);
void ase_str_clear (ase_str_t* str);
void ase_str_swap (ase_str_t* str, ase_str_t* str2);
void ase_str_clear (
ase_str_t* str
);
ase_size_t ase_str_cpy (ase_str_t* str, const ase_char_t* s);
ase_size_t ase_str_ncpy (ase_str_t* str, const ase_char_t* s, ase_size_t len);
void ase_str_swap (
ase_str_t* str,
ase_str_t* str2
);
ase_size_t ase_str_cat (ase_str_t* str, const ase_char_t* s);
ase_size_t ase_str_ncat (ase_str_t* str, const ase_char_t* s, ase_size_t len);
ase_size_t ase_str_ccat (ase_str_t* str, ase_char_t c);
ase_size_t ase_str_nccat (ase_str_t* str, ase_char_t c, ase_size_t len);
ase_size_t ase_str_cpy (
ase_str_t* str,
const ase_char_t* s
);
ase_size_t ase_str_ncpy (
ase_str_t* str,
const ase_char_t* s,
ase_size_t len
);
ase_size_t ase_str_cat (
ase_str_t* str,
const ase_char_t* s
);
ase_size_t ase_str_ncat (
ase_str_t* str,
const ase_char_t* s,
ase_size_t len
);
ase_size_t ase_str_ccat (
ase_str_t* str,
ase_char_t c
);
ase_size_t ase_str_nccat (
ase_str_t* str,
ase_char_t c,
ase_size_t len
);
ase_size_t ase_mbstowcs (
const ase_mchar_t* mbs,
ase_wchar_t* wcs,
ase_size_t* wcslen
);
/****f* ase.cmn.str/ase_mbsntowcsn
* NAME
* ase_mbsntowcsn - conver a multibyte string to a wide character string
*
* RETURN
* The ase_mbstowcs() function returns the number of bytes handled.
*
* SYNOPSIS
*/
ase_size_t ase_mbsntowcsn (
const ase_mchar_t* mbs,
ase_size_t mbslen,
ase_wchar_t* wcs,
ase_size_t* wcslen
);
/******/
ase_size_t ase_wcstombs (
const ase_wchar_t* wcs,
ase_mchar_t* mbs,
ase_size_t* mbslen
);
/****f* ase.cmn.str/ase_wcsntombsn
* NAME
* ase_wcstombs - convert a wide character string to a multibyte string
*
* RETURN
* The ase_wcstombs() function returns the number of wide characters handled.
*
* SYNOPSIS
*/
ase_size_t ase_wcsntombsn (
const ase_wchar_t* wcs,
ase_size_t wcslen,
ase_mchar_t* mbs,
ase_size_t* mbslen
);
/******/
#ifdef __cplusplus
}

View File

@ -15,6 +15,7 @@ enum
ASE_TIO_ENOMEM, /* out of memory */
ASE_TIO_ENOSPC, /* no more space */
ASE_TIO_EILSEQ, /* illegal sequence */
ASE_TIO_EICSEQ, /* incomplete sequence */
ASE_TIO_EILCHR, /* illegal character */
ASE_TIO_ENOINF, /* no input function attached */
ASE_TIO_EINPUT, /* input function returned an error */
@ -114,6 +115,19 @@ int ase_tio_fini (
ase_tio_t* tio
);
void* ase_tio_getextension (
ase_tio_t* tio
);
ase_mmgr_t* ase_tio_getmmgr (
ase_tio_t* tio
);
void ase_tio_setmmgr (
ase_tio_t* tio,
ase_mmgr_t* mmgr
);
/*
* FUNCTION: ase_tio_geterrnum
* Returns an error code

View File

@ -87,6 +87,12 @@
/* Define to 1 if you have the `mbrtowc' function. */
#undef HAVE_MBRTOWC
/* Define to 1 if you have the `mbsnrtowcs' function. */
#undef HAVE_MBSNRTOWCS
/* Define to 1 if you have the `mbsrtowcs' function. */
#undef HAVE_MBSRTOWCS
/* Define to 1 if you have the <memory.h> header file. */
#undef HAVE_MEMORY_H
@ -126,6 +132,12 @@
/* Define to 1 if you have the `wcrtomb' function. */
#undef HAVE_WCRTOMB
/* Define to 1 if you have the `wcsnrtombs' function. */
#undef HAVE_WCSNRTOMBS
/* Define to 1 if you have the `wcsrtombs' function. */
#undef HAVE_WCSRTOMBS
/* Define to 1 if you have the <wctype.h> header file. */
#undef HAVE_WCTYPE_H

View File

@ -1,5 +1,5 @@
/*
* $Id: Awk.cpp 399 2008-09-29 10:26:26Z baconevi $
* $Id: Awk.cpp 430 2008-10-17 11:43:20Z baconevi $
*
* {License}
*/
@ -229,7 +229,7 @@ void Awk::Argument::clear ()
this->inum = 0;
}
void* Awk::Argument::operator new (size_t n, awk_t* awk) throw ()
void* Awk::Argument::operator new (::size_t n, awk_t* awk) throw ()
{
void* ptr = ase_awk_alloc (awk, ASE_SIZEOF(awk) + n);
if (ptr == ASE_NULL) return ASE_NULL;
@ -238,7 +238,7 @@ void* Awk::Argument::operator new (size_t n, awk_t* awk) throw ()
return (char*)ptr+ASE_SIZEOF(awk);
}
void* Awk::Argument::operator new[] (size_t n, awk_t* awk) throw ()
void* Awk::Argument::operator new[] (::size_t n, awk_t* awk) throw ()
{
void* ptr = ase_awk_alloc (awk, ASE_SIZEOF(awk) + n);
if (ptr == ASE_NULL) return ASE_NULL;

View File

@ -3,6 +3,7 @@
*/
#include <ase/cmn/chr.h>
#include "mem.h"
#ifdef HAVE_WCHAR_H
#include <wchar.h>
@ -56,12 +57,6 @@ ase_size_t ase_wctomb (ase_wchar_t wc, ase_mchar_t* mb, ase_size_t mblen)
size_t n;
mbstate_t mbs = { 0 };
if (mblen < MB_CUR_MAX)
{
/* buffer too small */
return mblen + 1;
}
/* man mbsinit
* For 8-bit encodings, all states are equivalent to the initial state.
* For multibyte encodings like UTF-8, EUC-*, BIG5 or SJIS, the wide char
@ -71,8 +66,22 @@ ase_size_t ase_wctomb (ase_wchar_t wc, ase_mchar_t* mb, ase_size_t mblen)
* of a character.
*/
n = wcrtomb (mb, wc, &mbs);
if (n == (size_t)-1) n = 0; // illegal character
if (mblen < MB_CUR_MAX)
{
ase_mchar_t buf[MB_CUR_MAX];
n = wcrtomb (buf, wc, &mbs);
if (n > mblen) return mblen + 1; /* buffer to small */
if (n == (size_t)-1) return 0; /* illegal character */
ASE_MEMCPY (mb, buf, mblen);
}
else
{
n = wcrtomb (mb, wc, &mbs);
if (n > mblen) return mblen + 1; /* buffer to small */
if (n == (size_t)-1) return 0; /* illegal character */
}
return n;
#else

View File

@ -1,11 +1,15 @@
/*
* $Id: str_cnv.c 332 2008-08-18 11:21:48Z baconevi $
* $Id: str_cnv.c 430 2008-10-17 11:43:20Z baconevi $
*
* {License}
*/
#include <ase/cmn/str.h>
#ifdef HAVE_WCHAR_H
#include <wchar.h>
#endif
int ase_strtoi (const ase_char_t* str)
{
int v;
@ -117,3 +121,108 @@ ase_ulong_t ase_strxtoulong (const ase_char_t* str, ase_size_t len)
ASE_STRXTONUM (v, str, len, ASE_NULL, 10);
return v;
}
ase_size_t ase_mbstowcs (
const ase_mchar_t* mbs, ase_wchar_t* wcs, ase_size_t* wcslen)
{
ase_size_t len, wlen;
for (len = 0; *mbs++ != '\0'; len++);
if (*wcslen <= 0) return 0;
if (*wcslen == 1)
{
wcs[0] = L'\0';
return 0;
}
/* because ase_mbtowc needs the length, we get the lenght of mbs
* and pass it to ase_mbsntowcsn */
wlen = *wcslen - 1;
len = ase_mbsntowcsn (mbs, len, wcs, &wlen);
wcs[wlen] = L'\0';
*wcslen = wlen;
/* TODO: wcslen should include the length including null? */
return len;
}
ase_size_t ase_mbsntowcsn (
const ase_mchar_t* mbs, ase_size_t mbslen,
ase_wchar_t* wcs, ase_size_t* wcslen)
{
ase_size_t mlen = mbslen, n;
const ase_mchar_t* p;
ase_wchar_t* q, * qend ;
qend = wcs + *wcslen;
for (p = mbs, q = wcs; mlen > 0 && q < qend; p += n, mlen -= n)
{
n = ase_mbtowc (p, mlen, q);
if (n == 0 || n > mlen)
{
/* wrong sequence or insufficient input */
break;
}
q++;
}
*wcslen = q - wcs;
return p - mbs; /* returns the number of bytes processed */
}
ase_size_t wcstombs (
const ase_wchar_t* wcs, ase_mchar_t* mbs, ase_size_t* mbslen)
{
const ase_wchar_t* p = wcs;
ase_size_t len = *mbslen;
while (*p != ASE_T('\0') && len > 1)
{
ase_size_t n = ase_wctomb (*p, mbs, len);
if (n == 0 || n > len)
{
/* illegal character or buffer not enough */
break;
}
mbs += n; len -= n; p++;
}
*mbslen -= len;
if (len > 0) *mbs = '\0';
/* returns the number of characters handled.
* the caller can check if the return value is as large is wcslen
* for an error. */
return p - wcs;
}
ase_size_t ase_wcsntombsn (
const ase_wchar_t* wcs, ase_size_t wcslen,
ase_mchar_t* mbs, ase_size_t* mbslen)
{
const ase_wchar_t* p = wcs;
const ase_wchar_t* end = wcs + wcslen;
ase_size_t len = *mbslen;
while (p < end && len > 0)
{
ase_size_t n = ase_wctomb (*p, mbs, len);
if (n == 0 || n > len)
{
/* illegal character or buffer not enough */
break;
}
mbs += n; len -= n; p++;
}
*mbslen -= len;
/* returns the number of characters handled.
* the caller can check if the return value is as large is wcslen
* for an error. */
return p - wcs;
}

View File

@ -69,6 +69,21 @@ int ase_tio_fini (ase_tio_t* tio)
return 0;
}
void* ase_tio_getextension (ase_tio_t* tio)
{
return tio + 1;
}
ase_mmgr_t* ase_tio_getmmgr (ase_tio_t* tio)
{
return tio->mmgr;
}
void ase_tio_setmmgr (ase_tio_t* tio, ase_mmgr_t* mmgr)
{
tio->mmgr = mmgr;
}
int ase_tio_geterrnum (ase_tio_t* tio)
{
return tio->errnum;
@ -82,6 +97,7 @@ const ase_char_t* ase_tio_geterrstr (ase_tio_t* tio)
ASE_T("out of memory"),
ASE_T("no more space"),
ASE_T("illegal multibyte sequence"),
ASE_T("incomplete multibyte sequence"),
ASE_T("illegal wide character"),
ASE_T("no input function attached"),
ASE_T("input function returned an error"),

View File

@ -39,7 +39,17 @@ ase_ssize_t ase_tio_getc (ase_tio_t* tio, ase_char_t* c)
n = tio->input_func (
ASE_TIO_IO_DATA, tio->input_arg,
&tio->inbuf[left], ASE_COUNTOF(tio->inbuf) - left);
if (n == 0) return 0;
if (n == 0)
{
if (tio->inbuf_curp < tio->inbuf_len)
{
/* gargage left in the buffer */
tio->errnum = ASE_TIO_EICSEQ;
return -1;
}
return 0;
}
if (n <= -1)
{
tio->errnum = ASE_TIO_EINPUT;
@ -100,6 +110,7 @@ ase_ssize_t ase_tio_getc (ase_tio_t* tio, ase_char_t* c)
goto getc_conv;
}
#endif
n = ase_mbtowc (&tio->inbuf[tio->inbuf_curp], left, &curc);
if (n == 0)
{
@ -161,6 +172,7 @@ ase_ssize_t ase_tio_getsx (ase_tio_t* tio, ase_char_t* buf, ase_size_t size)
if (n == 0) break;
*p++ = c;
/* TODO: support a different line breaker */
if (c == ASE_T('\n')) break;
}
@ -194,6 +206,7 @@ ase_ssize_t ase_tio_getstr (ase_tio_t* tio, ase_str_t* buf)
return -1;
}
/* TODO: support a different line breaker */
if (c == ASE_T('\n')) break;
}