limited the utf8 ranges
This commit is contained in:
		| @ -24,6 +24,8 @@ | ||||
|  | ||||
| #include "hio-prv.h" | ||||
|  | ||||
| /*#define RETAIN_RFC2279 1*/ | ||||
|  | ||||
| /* | ||||
|  * from RFC 2279 UTF-8, a transformation format of ISO 10646 | ||||
|  * | ||||
| @ -34,6 +36,12 @@ | ||||
|  * 4:4 00010000-001FFFFF  11110xxx 10xxxxxx 10xxxxxx 10xxxxxx | ||||
|  * inv 00200000-03FFFFFF  111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx | ||||
|  * inv 04000000-7FFFFFFF  1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx | ||||
|  * | ||||
|  * RFC3629 limits the ranges like this: | ||||
|  * 1:2 00000000-0000007F  0xxxxxxx | ||||
|  * 2:2 00000080-000007FF  110xxxxx 10xxxxxx | ||||
|  * 3:2 00000800-0000FFFF  1110xxxx 10xxxxxx 10xxxxxx | ||||
|  * 4:4 00010000-0010FFFF  11110xxx 10xxxxxx 10xxxxxx 10xxxxxx | ||||
|  */ | ||||
|  | ||||
| struct __utf8_t | ||||
| @ -53,9 +61,13 @@ static __utf8_t utf8_table[] = | ||||
| 	{0x00000000ul, 0x0000007Ful, 0x00, 0x80, 0x7F, 1}, | ||||
| 	{0x00000080ul, 0x000007FFul, 0xC0, 0xE0, 0x1F, 2}, | ||||
| 	{0x00000800ul, 0x0000FFFFul, 0xE0, 0xF0, 0x0F, 3}, | ||||
| #if defined(RETAIN_RFC2279) | ||||
| 	{0x00010000ul, 0x001FFFFFul, 0xF0, 0xF8, 0x07, 4}, | ||||
| 	{0x00200000ul, 0x03FFFFFFul, 0xF8, 0xFC, 0x03, 5}, | ||||
| 	{0x04000000ul, 0x7FFFFFFFul, 0xFC, 0xFE, 0x01, 6} | ||||
| #else | ||||
| 	{0x00010000ul, 0x0010FFFFul, 0xF0, 0xF8, 0x07, 4} | ||||
| #endif | ||||
| }; | ||||
|  | ||||
| static HIO_INLINE __utf8_t* get_utf8_slot (hio_uch_t uc) | ||||
|  | ||||
| @ -50,6 +50,25 @@ int main () | ||||
| 		T_ASSERT1 (v == 0 && *endptr == '\0' && is_sober == 1, "integer in E notation"); | ||||
| 	} | ||||
|  | ||||
| 	{ | ||||
| 		hio_bch_t tmp[10]; | ||||
| 		hio_oow_t x; | ||||
| 		hio_uch_t uc; | ||||
|  | ||||
| 		x = hio_uc_to_utf8(0x2665, tmp, HIO_COUNTOF(tmp)); | ||||
| 		T_ASSERT1 (x == 3 && (hio_uint8_t)tmp[0] == 0xE2 && (hio_uint8_t)tmp[1] == 0x99 && (hio_uint8_t)tmp[2] == 0xA5, "unicode to utf8 conversion"); | ||||
|  | ||||
| 		x = hio_utf8_to_uc(tmp, x, &uc); | ||||
| 		T_ASSERT1 (x == 3 && uc == 0x2665, "utf8 to unicode conversion"); | ||||
|  | ||||
| 	#if (HIO_SIZEOF_UCH_T > 2) | ||||
| 		x = hio_uc_to_utf8(0x1F3E9, tmp, HIO_COUNTOF(tmp)); | ||||
| 		T_ASSERT1 (x == 4 && (hio_uint8_t)tmp[0] == 0xF0 && (hio_uint8_t)tmp[1] == 0x9F && (hio_uint8_t)tmp[2] == 0x8F && (hio_uint8_t)tmp[3] == 0xA9, "unicode to utf8 conversion"); | ||||
|  | ||||
| 		x = hio_utf8_to_uc(tmp, x, &uc); | ||||
| 		T_ASSERT1 (x == 4 && uc == 0x1F3E9, "utf8 to unicode conversion"); | ||||
| 	#endif | ||||
| 	} | ||||
| 	return 0; | ||||
|  | ||||
| oops: | ||||
|  | ||||
		Reference in New Issue
	
	Block a user