This commit is contained in:
28
lib/utf8.c
28
lib/utf8.c
@ -56,7 +56,7 @@ struct __utf8_t
|
||||
|
||||
typedef struct __utf8_t __utf8_t;
|
||||
|
||||
static __utf8_t utf8_table[] =
|
||||
static __utf8_t utf8_table[] =
|
||||
{
|
||||
{0x00000000ul, 0x0000007Ful, 0x00, 0x80, 0x7F, 1},
|
||||
{0x00000080ul, 0x000007FFul, 0xC0, 0xE0, 0x1F, 2},
|
||||
@ -80,7 +80,7 @@ static HAWK_INLINE __utf8_t* get_utf8_slot (hawk_uch_t uc)
|
||||
end = utf8_table + HAWK_COUNTOF(utf8_table);
|
||||
cur = utf8_table;
|
||||
|
||||
while (cur < end)
|
||||
while (cur < end)
|
||||
{
|
||||
if (uc >= cur->lower && uc <= cur->upper) return cur;
|
||||
cur++;
|
||||
@ -98,7 +98,7 @@ hawk_oow_t hawk_uc_to_utf8 (hawk_uch_t uc, hawk_bch_t* utf8, hawk_oow_t size)
|
||||
if (utf8 && cur->length <= size)
|
||||
{
|
||||
int index = cur->length;
|
||||
while (index > 1)
|
||||
while (index > 1)
|
||||
{
|
||||
/*
|
||||
* 0x3F: 00111111
|
||||
@ -128,16 +128,16 @@ hawk_oow_t hawk_utf8_to_uc (const hawk_bch_t* utf8, hawk_oow_t size, hawk_uch_t*
|
||||
end = utf8_table + HAWK_COUNTOF(utf8_table);
|
||||
cur = utf8_table;
|
||||
|
||||
while (cur < end)
|
||||
while (cur < end)
|
||||
{
|
||||
if ((utf8[0] & cur->mask) == cur->fbyte)
|
||||
if ((utf8[0] & cur->mask) == cur->fbyte)
|
||||
{
|
||||
|
||||
/* if size is less that cur->length, the incomplete-seqeunce
|
||||
/* if size is less that cur->length, the incomplete-seqeunce
|
||||
* error is naturally indicated. so validate the string
|
||||
* only if size is as large as cur->length. */
|
||||
|
||||
if (size >= cur->length)
|
||||
if (size >= cur->length)
|
||||
{
|
||||
int i;
|
||||
|
||||
@ -149,12 +149,12 @@ hawk_oow_t hawk_utf8_to_uc (const hawk_bch_t* utf8, hawk_oow_t size, hawk_uch_t*
|
||||
for (i = 1; i < cur->length; i++)
|
||||
{
|
||||
/* in utf8, trailing bytes are all
|
||||
* set with 0x80.
|
||||
* set with 0x80.
|
||||
*
|
||||
* 10XXXXXX & 11000000 => 10000000
|
||||
*
|
||||
* if not, invalid. */
|
||||
if ((utf8[i] & 0xC0) != 0x80) return 0;
|
||||
if ((utf8[i] & 0xC0) != 0x80) return 0;
|
||||
w = (w << 6) | (utf8[i] & 0x3F);
|
||||
}
|
||||
*uc = w;
|
||||
@ -164,19 +164,19 @@ hawk_oow_t hawk_utf8_to_uc (const hawk_bch_t* utf8, hawk_oow_t size, hawk_uch_t*
|
||||
for (i = 1; i < cur->length; i++)
|
||||
{
|
||||
/* in utf8, trailing bytes are all
|
||||
* set with 0x80.
|
||||
* set with 0x80.
|
||||
*
|
||||
* 10XXXXXX & 11000000 => 10000000
|
||||
*
|
||||
* if not, invalid. */
|
||||
if ((utf8[i] & 0xC0) != 0x80) return 0;
|
||||
if ((utf8[i] & 0xC0) != 0x80) return 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* this return value can indicate both
|
||||
* the correct length (size >= cur->length)
|
||||
* and
|
||||
/* this return value can indicate both
|
||||
* the correct length (size >= cur->length)
|
||||
* and
|
||||
* the incomplete seqeunce error (size < cur->length).
|
||||
*/
|
||||
return (hawk_oow_t)cur->length;
|
||||
|
Reference in New Issue
Block a user