changed qse_awk_parsestd_t and related code.

changed to use windows API for WIN32 in slmb.c
This commit is contained in:
2012-01-06 14:38:11 +00:00
parent 42431d2642
commit 70090bc117
22 changed files with 665 additions and 521 deletions

View File

@ -37,59 +37,31 @@
#ifdef HAVE_STDLIB_H
# include <stdlib.h>
#endif
qse_size_t qse_slmbrlen (
const qse_mchar_t* mb, qse_size_t mbl, qse_mbstate_t* state)
{
#if defined(HAVE_MBRLEN)
size_t n;
n = mbrlen (mb, mbl, (mbstate_t*)state);
if (n == 0) return 1; /* a null character */
if (n == (size_t)-1) return 0; /* invalid sequence */
if (n == (size_t)-2) return mbl + 1; /* incomplete sequence */
return (qse_size_t)n;
#if 0
n = mblen (mb, mbl);
if (n == (size_t)-1) return 0; /* invalid or incomplete sequence */
if (n == 0) return 1; /* a null character */
return (qse_size_t)n;
#endif
#else
#error #### NOT SUPPORTED ####
#if defined(_WIN32)
# include <windows.h>
#endif
}
qse_size_t qse_slmbrtoslwc (
const qse_mchar_t* mb, qse_size_t mbl,
qse_wchar_t* wc, qse_mbstate_t* state)
{
#if defined(HAVE_MBRTOWC)
size_t n;
n = mbrtowc (wc, mb, mbl, (mbstate_t*)state);
if (n == 0)
{
*wc = QSE_WT('\0');
return 1;
}
if (n == (size_t)-1) return 0; /* invalid sequence */
if (n == (size_t)-2) return mbl + 1; /* incomplete sequence */
return (qse_size_t)n;
#else
#error #### NOT SUPPORTED ####
#endif
}
qse_size_t qse_slwcrtoslmb (
qse_wchar_t wc, qse_mchar_t* mb,
qse_size_t mbl, qse_mbstate_t* state)
{
#if defined(HAVE_WCRTOMB)
#if defined(_WIN32)
int n;
n = WideCharToMultiByte (
CP_THREAD_ACP, 0 /*WC_ERR_INVALID_CHARS*/,
&wc, 1, mb, mbl, NULL, NULL);
if (n == 0)
{
DWORD e = GetLastError();
if (e == ERROR_INSUFFICIENT_BUFFER) return mbl + 1;
/*if (e == ERROR_NO_UNICODE_TRANSLATION) return 0;*/
/* treat all other erros as invalid unicode character */
}
return (qse_size_t)n;
#elif defined(HAVE_WCRTOMB)
size_t n;
if (mbl < QSE_MBLEN_MAX)
@ -123,6 +95,90 @@ qse_size_t qse_slwcrtoslmb (
#endif
}
qse_size_t qse_slmbrtoslwc (
const qse_mchar_t* mb, qse_size_t mbl,
qse_wchar_t* wc, qse_mbstate_t* state)
{
#if defined(_WIN32)
qse_size_t dbcslen;
int n;
QSE_ASSERT (mb != QSE_NULL);
QSE_ASSERT (mbl > 0);
dbcslen = IsDBCSLeadByteEx(CP_THREAD_ACP, *mb)? 2: 1;
if (mbl < dbcslen) return mbl + 1; /* incomplete sequence */
n = MultiByteToWideChar (
CP_THREAD_ACP, MB_ERR_INVALID_CHARS, mb, dbcslen, wc, 1);
if (n == 0)
{
/*DWORD e = GetLastError();*/
/*if (e == ERROR_NO_UNICODE_TRANSLATION) return 0;*/
/*if (e == ERROR_INSUFFICIENT_BUFFER) return mbl + 1;*/
return 0;
}
return dbcslen;
#elif defined(HAVE_MBRTOWC)
size_t n;
QSE_ASSERT (mb != QSE_NULL);
QSE_ASSERT (mbl > 0);
n = mbrtowc (wc, mb, mbl, (mbstate_t*)state);
if (n == 0)
{
if (wc) *wc = QSE_WT('\0');
return 1;
}
if (n == (size_t)-1) return 0; /* invalid sequence */
if (n == (size_t)-2) return mbl + 1; /* incomplete sequence */
return (qse_size_t)n;
#else
#error #### NOT SUPPORTED ####
#endif
}
qse_size_t qse_slmbrlen (
const qse_mchar_t* mb, qse_size_t mbl, qse_mbstate_t* state)
{
#if defined(_WIN32)
qse_size_t dbcslen;
QSE_ASSERT (mb != QSE_NULL);
QSE_ASSERT (mbl > 0);
dbcslen = IsDBCSLeadByteEx(CP_THREAD_ACP, *mb)? 2: 1;
if (mbl < dbcslen) return mbl + 1; /* incomplete sequence */
return dbcslen;
#elif defined(HAVE_MBRLEN)
size_t n;
QSE_ASSERT (mb != QSE_NULL);
QSE_ASSERT (mbl > 0);
n = mbrlen (mb, mbl, (mbstate_t*)state);
if (n == 0) return 1; /* a null character */
if (n == (size_t)-1) return 0; /* invalid sequence */
if (n == (size_t)-2) return mbl + 1; /* incomplete sequence */
return (qse_size_t)n;
#if 0
n = mblen (mb, mbl);
if (n == (size_t)-1) return 0; /* invalid or incomplete sequence */
if (n == 0) return 1; /* a null character */
return (qse_size_t)n;
#endif
#else
#error #### NOT SUPPORTED ####
#endif
}
/* man mbsinit
* For 8-bit encodings, all states are equivalent to the initial state.
* For multibyte encodings like UTF-8, EUC-*, BIG5 or SJIS, the wide char
@ -131,11 +187,6 @@ qse_size_t qse_slwcrtoslmb (
* mbrtowc(3) do produce non-initial states when interrupted in the middle
* of a character.
*/
qse_size_t qse_slmblen (const qse_mchar_t* mb, qse_size_t mbl)
{
qse_mbstate_t state = { { 0, } };
return qse_slmbrlen (mb, mbl, &state);
}
qse_size_t qse_slmbtoslwc (const qse_mchar_t* mb, qse_size_t mbl, qse_wchar_t* wc)
{
@ -149,7 +200,20 @@ qse_size_t qse_slwctoslmb (qse_wchar_t wc, qse_mchar_t* mb, qse_size_t mbl)
return qse_slwcrtoslmb (wc, mb, mbl, &state);
}
int qse_slmblenmax (void)
qse_size_t qse_slmblen (const qse_mchar_t* mb, qse_size_t mbl)
{
return MB_CUR_MAX;
qse_mbstate_t state = { { 0, } };
return qse_slmbrlen (mb, mbl, &state);
}
qse_size_t qse_slmblenmax (void)
{
#if defined(_WIN32)
/* Windows doesn't handle utf8 properly even when your code page
* is CP_UTF8(65001). you should use functions in utf8.c for utf8
* handleing on windows. */
return 2;
#else
return MB_CUR_MAX;
#endif
}

View File

@ -73,53 +73,40 @@ static QSE_INLINE __utf8_t* get_utf8_slot (qse_wchar_t uc)
return QSE_NULL; /* invalid character */
}
qse_size_t qse_uctoutf8len (qse_wchar_t uc)
{
__utf8_t* cur = get_utf8_slot (uc);
return (cur == QSE_NULL)? 0: (qse_size_t)cur->length;
}
/* wctomb for utf8/unicode */
qse_size_t qse_uctoutf8 (qse_wchar_t uc, qse_mchar_t* utf8, qse_size_t size)
{
__utf8_t* cur = get_utf8_slot (uc);
int index;
if (cur == QSE_NULL) return 0; /* illegal character */
if (cur->length > size)
if (utf8 && cur->length <= size)
{
/* buffer not big enough. index indicates the buffer
* size needed */
return size + 1;
int index = cur->length;
while (index > 1)
{
/*
* 0x3F: 00111111
* 0x80: 10000000
*/
utf8[--index] = (uc & 0x3F) | 0x80;
uc >>= 6;
}
utf8[0] = uc | cur->fbyte;
}
index = cur->length;
while (index > 1)
{
/*
* 0x3F: 00111111
* 0x80: 10000000
*/
utf8[--index] = (uc & 0x3F) | 0x80;
uc >>= 6;
}
utf8[0] = uc | cur->fbyte;
/* small buffer is also indicated by this return value
* greater than 'size'. */
return (qse_size_t)cur->length;
}
/* mbtowc for utf8/unicode */
qse_size_t qse_utf8touc (
const qse_mchar_t* utf8, qse_size_t size, qse_wchar_t* uc)
{
__utf8_t* cur, * end;
#if 0
qse_mchar_t c, t;
int count = 0;
#endif
QSE_ASSERT (utf8 != QSE_NULL);
QSE_ASSERT (size > 0);
QSE_ASSERT (QSE_SIZEOF(qse_mchar_t) == 1);
QSE_ASSERT (QSE_SIZEOF(qse_wchar_t) >= 2);
@ -130,81 +117,37 @@ qse_size_t qse_utf8touc (
{
if ((utf8[0] & cur->mask) == cur->fbyte)
{
int i;
qse_wchar_t w;
if (size < cur->length) return size + 1;
w = utf8[0] & cur->fmask;
for (i = 1; i < cur->length; i++)
{
if (!(utf8[i] & 0x80)) return 0;
w = (w << 6) | (utf8[i] & 0x3F);
}
*uc = w;
return (qse_size_t)cur->length;
}
cur++;
}
return 0; /* error - invalid sequence */
#if 0
c = *utf8;
w = c;
while (cur < end)
{
count++;
if ((c & cur->mask) == cur->fbyte)
{
w &= cur->upper;
if (w < cur->lower) break; /* wrong value */
*uc = w;
return (qse_size_t)count;
}
if (size <= count) break; /* insufficient input */
utf8++; /* advance to the next character in the sequence */
t = (*utf8 ^ 0x80) & 0xFF;
if (t & 0xC0) break;
w = (w << 6) | t;
cur++;
}
return 0; /* error - invalid sequence */
#endif
}
/* mblen for utf8 */
qse_size_t qse_utf8len (const qse_mchar_t* utf8, qse_size_t len)
{
__utf8_t* cur, * end;
end = utf8_table + QSE_COUNTOF(utf8_table);
cur = utf8_table;
while (cur < end)
{
if ((utf8[0] & cur->mask) == cur->fbyte)
{
int i;
/* if len is less that cur->length, the incomplete-seqeunce
/* if size is less that cur->length, the incomplete-seqeunce
* error is naturally indicated. so validate the string
* only if len is as large as cur->length. */
* only if size is as large as cur->length. */
if (len >= cur->length)
if (size >= cur->length)
{
for (i = 1; i < cur->length; i++)
int i;
if (uc)
{
/* in utf8, trailing bytes are all
* set with 0x80. if not, invalid */
if (!(utf8[i] & 0x80)) return 0;
qse_wchar_t w;
w = utf8[0] & cur->fmask;
for (i = 1; i < cur->length; i++)
{
/* in utf8, trailing bytes are all
* set with 0x80. if not, invalid */
if (!(utf8[i] & 0x80)) return 0;
w = (w << 6) | (utf8[i] & 0x3F);
}
*uc = w;
}
else
{
for (i = 1; i < cur->length; i++)
{
/* in utf8, trailing bytes are all
* set with 0x80. if not, invalid */
if (!(utf8[i] & 0x80)) return 0;
}
}
}
@ -213,7 +156,7 @@ qse_size_t qse_utf8len (const qse_mchar_t* utf8, qse_size_t len)
* and
* the incomplete seqeunce error (len < cur->length).
*/
return (qse_size_t)cur->length;
return (qse_size_t)cur->length;
}
cur++;
}
@ -221,7 +164,13 @@ qse_size_t qse_utf8len (const qse_mchar_t* utf8, qse_size_t len)
return 0; /* error - invalid sequence */
}
qse_size_t qse_utf8len (const qse_mchar_t* utf8, qse_size_t size)
{
return qse_utf8touc (utf8, size, QSE_NULL);
}
qse_size_t qse_utf8lenmax (void)
{
return QSE_UTF8LEN_MAX;
}