changed qse_awk_parsestd_t and related code.
changed to use windows API for WIN32 in slmb.c
This commit is contained in:
@ -37,59 +37,31 @@
|
||||
#ifdef HAVE_STDLIB_H
|
||||
# include <stdlib.h>
|
||||
#endif
|
||||
|
||||
qse_size_t qse_slmbrlen (
|
||||
const qse_mchar_t* mb, qse_size_t mbl, qse_mbstate_t* state)
|
||||
{
|
||||
#if defined(HAVE_MBRLEN)
|
||||
size_t n;
|
||||
|
||||
n = mbrlen (mb, mbl, (mbstate_t*)state);
|
||||
if (n == 0) return 1; /* a null character */
|
||||
|
||||
if (n == (size_t)-1) return 0; /* invalid sequence */
|
||||
if (n == (size_t)-2) return mbl + 1; /* incomplete sequence */
|
||||
|
||||
return (qse_size_t)n;
|
||||
|
||||
#if 0
|
||||
n = mblen (mb, mbl);
|
||||
if (n == (size_t)-1) return 0; /* invalid or incomplete sequence */
|
||||
if (n == 0) return 1; /* a null character */
|
||||
return (qse_size_t)n;
|
||||
#endif
|
||||
#else
|
||||
#error #### NOT SUPPORTED ####
|
||||
#if defined(_WIN32)
|
||||
# include <windows.h>
|
||||
#endif
|
||||
}
|
||||
|
||||
qse_size_t qse_slmbrtoslwc (
|
||||
const qse_mchar_t* mb, qse_size_t mbl,
|
||||
qse_wchar_t* wc, qse_mbstate_t* state)
|
||||
{
|
||||
#if defined(HAVE_MBRTOWC)
|
||||
size_t n;
|
||||
|
||||
n = mbrtowc (wc, mb, mbl, (mbstate_t*)state);
|
||||
if (n == 0)
|
||||
{
|
||||
*wc = QSE_WT('\0');
|
||||
return 1;
|
||||
}
|
||||
|
||||
if (n == (size_t)-1) return 0; /* invalid sequence */
|
||||
if (n == (size_t)-2) return mbl + 1; /* incomplete sequence */
|
||||
return (qse_size_t)n;
|
||||
#else
|
||||
#error #### NOT SUPPORTED ####
|
||||
#endif
|
||||
}
|
||||
|
||||
qse_size_t qse_slwcrtoslmb (
|
||||
qse_wchar_t wc, qse_mchar_t* mb,
|
||||
qse_size_t mbl, qse_mbstate_t* state)
|
||||
{
|
||||
#if defined(HAVE_WCRTOMB)
|
||||
#if defined(_WIN32)
|
||||
int n;
|
||||
|
||||
n = WideCharToMultiByte (
|
||||
CP_THREAD_ACP, 0 /*WC_ERR_INVALID_CHARS*/,
|
||||
&wc, 1, mb, mbl, NULL, NULL);
|
||||
if (n == 0)
|
||||
{
|
||||
DWORD e = GetLastError();
|
||||
if (e == ERROR_INSUFFICIENT_BUFFER) return mbl + 1;
|
||||
/*if (e == ERROR_NO_UNICODE_TRANSLATION) return 0;*/
|
||||
/* treat all other erros as invalid unicode character */
|
||||
}
|
||||
|
||||
return (qse_size_t)n;
|
||||
|
||||
#elif defined(HAVE_WCRTOMB)
|
||||
size_t n;
|
||||
|
||||
if (mbl < QSE_MBLEN_MAX)
|
||||
@ -123,6 +95,90 @@ qse_size_t qse_slwcrtoslmb (
|
||||
#endif
|
||||
}
|
||||
|
||||
qse_size_t qse_slmbrtoslwc (
|
||||
const qse_mchar_t* mb, qse_size_t mbl,
|
||||
qse_wchar_t* wc, qse_mbstate_t* state)
|
||||
{
|
||||
#if defined(_WIN32)
|
||||
qse_size_t dbcslen;
|
||||
int n;
|
||||
|
||||
QSE_ASSERT (mb != QSE_NULL);
|
||||
QSE_ASSERT (mbl > 0);
|
||||
|
||||
dbcslen = IsDBCSLeadByteEx(CP_THREAD_ACP, *mb)? 2: 1;
|
||||
if (mbl < dbcslen) return mbl + 1; /* incomplete sequence */
|
||||
|
||||
n = MultiByteToWideChar (
|
||||
CP_THREAD_ACP, MB_ERR_INVALID_CHARS, mb, dbcslen, wc, 1);
|
||||
if (n == 0)
|
||||
{
|
||||
/*DWORD e = GetLastError();*/
|
||||
/*if (e == ERROR_NO_UNICODE_TRANSLATION) return 0;*/
|
||||
/*if (e == ERROR_INSUFFICIENT_BUFFER) return mbl + 1;*/
|
||||
return 0;
|
||||
}
|
||||
|
||||
return dbcslen;
|
||||
|
||||
#elif defined(HAVE_MBRTOWC)
|
||||
size_t n;
|
||||
|
||||
QSE_ASSERT (mb != QSE_NULL);
|
||||
QSE_ASSERT (mbl > 0);
|
||||
|
||||
n = mbrtowc (wc, mb, mbl, (mbstate_t*)state);
|
||||
if (n == 0)
|
||||
{
|
||||
if (wc) *wc = QSE_WT('\0');
|
||||
return 1;
|
||||
}
|
||||
|
||||
if (n == (size_t)-1) return 0; /* invalid sequence */
|
||||
if (n == (size_t)-2) return mbl + 1; /* incomplete sequence */
|
||||
return (qse_size_t)n;
|
||||
#else
|
||||
#error #### NOT SUPPORTED ####
|
||||
#endif
|
||||
}
|
||||
|
||||
qse_size_t qse_slmbrlen (
|
||||
const qse_mchar_t* mb, qse_size_t mbl, qse_mbstate_t* state)
|
||||
{
|
||||
#if defined(_WIN32)
|
||||
qse_size_t dbcslen;
|
||||
|
||||
QSE_ASSERT (mb != QSE_NULL);
|
||||
QSE_ASSERT (mbl > 0);
|
||||
|
||||
dbcslen = IsDBCSLeadByteEx(CP_THREAD_ACP, *mb)? 2: 1;
|
||||
if (mbl < dbcslen) return mbl + 1; /* incomplete sequence */
|
||||
return dbcslen;
|
||||
#elif defined(HAVE_MBRLEN)
|
||||
size_t n;
|
||||
|
||||
QSE_ASSERT (mb != QSE_NULL);
|
||||
QSE_ASSERT (mbl > 0);
|
||||
|
||||
n = mbrlen (mb, mbl, (mbstate_t*)state);
|
||||
if (n == 0) return 1; /* a null character */
|
||||
|
||||
if (n == (size_t)-1) return 0; /* invalid sequence */
|
||||
if (n == (size_t)-2) return mbl + 1; /* incomplete sequence */
|
||||
|
||||
return (qse_size_t)n;
|
||||
|
||||
#if 0
|
||||
n = mblen (mb, mbl);
|
||||
if (n == (size_t)-1) return 0; /* invalid or incomplete sequence */
|
||||
if (n == 0) return 1; /* a null character */
|
||||
return (qse_size_t)n;
|
||||
#endif
|
||||
#else
|
||||
#error #### NOT SUPPORTED ####
|
||||
#endif
|
||||
}
|
||||
|
||||
/* man mbsinit
|
||||
* For 8-bit encodings, all states are equivalent to the initial state.
|
||||
* For multibyte encodings like UTF-8, EUC-*, BIG5 or SJIS, the wide char‐
|
||||
@ -131,11 +187,6 @@ qse_size_t qse_slwcrtoslmb (
|
||||
* mbrtowc(3) do produce non-initial states when interrupted in the middle
|
||||
* of a character.
|
||||
*/
|
||||
qse_size_t qse_slmblen (const qse_mchar_t* mb, qse_size_t mbl)
|
||||
{
|
||||
qse_mbstate_t state = { { 0, } };
|
||||
return qse_slmbrlen (mb, mbl, &state);
|
||||
}
|
||||
|
||||
qse_size_t qse_slmbtoslwc (const qse_mchar_t* mb, qse_size_t mbl, qse_wchar_t* wc)
|
||||
{
|
||||
@ -149,7 +200,20 @@ qse_size_t qse_slwctoslmb (qse_wchar_t wc, qse_mchar_t* mb, qse_size_t mbl)
|
||||
return qse_slwcrtoslmb (wc, mb, mbl, &state);
|
||||
}
|
||||
|
||||
int qse_slmblenmax (void)
|
||||
qse_size_t qse_slmblen (const qse_mchar_t* mb, qse_size_t mbl)
|
||||
{
|
||||
return MB_CUR_MAX;
|
||||
qse_mbstate_t state = { { 0, } };
|
||||
return qse_slmbrlen (mb, mbl, &state);
|
||||
}
|
||||
|
||||
qse_size_t qse_slmblenmax (void)
|
||||
{
|
||||
#if defined(_WIN32)
|
||||
/* Windows doesn't handle utf8 properly even when your code page
|
||||
* is CP_UTF8(65001). you should use functions in utf8.c for utf8
|
||||
* handleing on windows. */
|
||||
return 2;
|
||||
#else
|
||||
return MB_CUR_MAX;
|
||||
#endif
|
||||
}
|
||||
|
@ -73,53 +73,40 @@ static QSE_INLINE __utf8_t* get_utf8_slot (qse_wchar_t uc)
|
||||
return QSE_NULL; /* invalid character */
|
||||
}
|
||||
|
||||
qse_size_t qse_uctoutf8len (qse_wchar_t uc)
|
||||
{
|
||||
__utf8_t* cur = get_utf8_slot (uc);
|
||||
return (cur == QSE_NULL)? 0: (qse_size_t)cur->length;
|
||||
}
|
||||
|
||||
/* wctomb for utf8/unicode */
|
||||
qse_size_t qse_uctoutf8 (qse_wchar_t uc, qse_mchar_t* utf8, qse_size_t size)
|
||||
{
|
||||
__utf8_t* cur = get_utf8_slot (uc);
|
||||
int index;
|
||||
|
||||
if (cur == QSE_NULL) return 0; /* illegal character */
|
||||
|
||||
if (cur->length > size)
|
||||
if (utf8 && cur->length <= size)
|
||||
{
|
||||
/* buffer not big enough. index indicates the buffer
|
||||
* size needed */
|
||||
return size + 1;
|
||||
int index = cur->length;
|
||||
while (index > 1)
|
||||
{
|
||||
/*
|
||||
* 0x3F: 00111111
|
||||
* 0x80: 10000000
|
||||
*/
|
||||
utf8[--index] = (uc & 0x3F) | 0x80;
|
||||
uc >>= 6;
|
||||
}
|
||||
|
||||
utf8[0] = uc | cur->fbyte;
|
||||
}
|
||||
|
||||
index = cur->length;
|
||||
while (index > 1)
|
||||
{
|
||||
/*
|
||||
* 0x3F: 00111111
|
||||
* 0x80: 10000000
|
||||
*/
|
||||
utf8[--index] = (uc & 0x3F) | 0x80;
|
||||
uc >>= 6;
|
||||
}
|
||||
|
||||
utf8[0] = uc | cur->fbyte;
|
||||
/* small buffer is also indicated by this return value
|
||||
* greater than 'size'. */
|
||||
return (qse_size_t)cur->length;
|
||||
}
|
||||
|
||||
/* mbtowc for utf8/unicode */
|
||||
qse_size_t qse_utf8touc (
|
||||
const qse_mchar_t* utf8, qse_size_t size, qse_wchar_t* uc)
|
||||
{
|
||||
__utf8_t* cur, * end;
|
||||
#if 0
|
||||
qse_mchar_t c, t;
|
||||
int count = 0;
|
||||
#endif
|
||||
|
||||
QSE_ASSERT (utf8 != QSE_NULL);
|
||||
QSE_ASSERT (size > 0);
|
||||
QSE_ASSERT (QSE_SIZEOF(qse_mchar_t) == 1);
|
||||
QSE_ASSERT (QSE_SIZEOF(qse_wchar_t) >= 2);
|
||||
|
||||
@ -130,81 +117,37 @@ qse_size_t qse_utf8touc (
|
||||
{
|
||||
if ((utf8[0] & cur->mask) == cur->fbyte)
|
||||
{
|
||||
int i;
|
||||
qse_wchar_t w;
|
||||
|
||||
if (size < cur->length) return size + 1;
|
||||
|
||||
w = utf8[0] & cur->fmask;
|
||||
for (i = 1; i < cur->length; i++)
|
||||
{
|
||||
if (!(utf8[i] & 0x80)) return 0;
|
||||
w = (w << 6) | (utf8[i] & 0x3F);
|
||||
}
|
||||
|
||||
*uc = w;
|
||||
return (qse_size_t)cur->length;
|
||||
}
|
||||
cur++;
|
||||
}
|
||||
|
||||
return 0; /* error - invalid sequence */
|
||||
|
||||
#if 0
|
||||
c = *utf8;
|
||||
w = c;
|
||||
|
||||
while (cur < end)
|
||||
{
|
||||
count++;
|
||||
|
||||
if ((c & cur->mask) == cur->fbyte)
|
||||
{
|
||||
w &= cur->upper;
|
||||
if (w < cur->lower) break; /* wrong value */
|
||||
*uc = w;
|
||||
return (qse_size_t)count;
|
||||
}
|
||||
|
||||
if (size <= count) break; /* insufficient input */
|
||||
utf8++; /* advance to the next character in the sequence */
|
||||
|
||||
t = (*utf8 ^ 0x80) & 0xFF;
|
||||
if (t & 0xC0) break;
|
||||
w = (w << 6) | t;
|
||||
|
||||
cur++;
|
||||
}
|
||||
|
||||
return 0; /* error - invalid sequence */
|
||||
#endif
|
||||
}
|
||||
|
||||
/* mblen for utf8 */
|
||||
qse_size_t qse_utf8len (const qse_mchar_t* utf8, qse_size_t len)
|
||||
{
|
||||
__utf8_t* cur, * end;
|
||||
|
||||
end = utf8_table + QSE_COUNTOF(utf8_table);
|
||||
cur = utf8_table;
|
||||
|
||||
while (cur < end)
|
||||
{
|
||||
if ((utf8[0] & cur->mask) == cur->fbyte)
|
||||
{
|
||||
int i;
|
||||
|
||||
/* if len is less that cur->length, the incomplete-seqeunce
|
||||
/* if size is less that cur->length, the incomplete-seqeunce
|
||||
* error is naturally indicated. so validate the string
|
||||
* only if len is as large as cur->length. */
|
||||
* only if size is as large as cur->length. */
|
||||
|
||||
if (len >= cur->length)
|
||||
if (size >= cur->length)
|
||||
{
|
||||
for (i = 1; i < cur->length; i++)
|
||||
int i;
|
||||
|
||||
if (uc)
|
||||
{
|
||||
/* in utf8, trailing bytes are all
|
||||
* set with 0x80. if not, invalid */
|
||||
if (!(utf8[i] & 0x80)) return 0;
|
||||
qse_wchar_t w;
|
||||
|
||||
w = utf8[0] & cur->fmask;
|
||||
for (i = 1; i < cur->length; i++)
|
||||
{
|
||||
/* in utf8, trailing bytes are all
|
||||
* set with 0x80. if not, invalid */
|
||||
if (!(utf8[i] & 0x80)) return 0;
|
||||
w = (w << 6) | (utf8[i] & 0x3F);
|
||||
}
|
||||
*uc = w;
|
||||
}
|
||||
else
|
||||
{
|
||||
for (i = 1; i < cur->length; i++)
|
||||
{
|
||||
/* in utf8, trailing bytes are all
|
||||
* set with 0x80. if not, invalid */
|
||||
if (!(utf8[i] & 0x80)) return 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -213,7 +156,7 @@ qse_size_t qse_utf8len (const qse_mchar_t* utf8, qse_size_t len)
|
||||
* and
|
||||
* the incomplete seqeunce error (len < cur->length).
|
||||
*/
|
||||
return (qse_size_t)cur->length;
|
||||
return (qse_size_t)cur->length;
|
||||
}
|
||||
cur++;
|
||||
}
|
||||
@ -221,7 +164,13 @@ qse_size_t qse_utf8len (const qse_mchar_t* utf8, qse_size_t len)
|
||||
return 0; /* error - invalid sequence */
|
||||
}
|
||||
|
||||
qse_size_t qse_utf8len (const qse_mchar_t* utf8, qse_size_t size)
|
||||
{
|
||||
return qse_utf8touc (utf8, size, QSE_NULL);
|
||||
}
|
||||
|
||||
qse_size_t qse_utf8lenmax (void)
|
||||
{
|
||||
return QSE_UTF8LEN_MAX;
|
||||
}
|
||||
|
||||
|
Reference in New Issue
Block a user