84 lines
		
	
	
		
			2.6 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
			
		
		
	
	
			84 lines
		
	
	
		
			2.6 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
/*
 | 
						|
    Copyright (c) 2016-2018 Chung, Hyung-Hwan. All rights reserved.
 | 
						|
 | 
						|
    Redistribution and use in source and binary forms, with or without
 | 
						|
    modification, are permitted provided that the following conditions
 | 
						|
    are met:
 | 
						|
    1. Redistributions of source code must retain the above copyright
 | 
						|
       notice, this list of conditions and the following disclaimer.
 | 
						|
    2. Redistributions in binary form must reproduce the above copyright
 | 
						|
       notice, this list of conditions and the following disclaimer in the
 | 
						|
       documentation and/or other materials provided with the distribution.
 | 
						|
 | 
						|
    THIS SOFTWARE IS PROVIDED BY THE AUTHOR "AS IS" AND ANY EXPRESS OR
 | 
						|
    IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 | 
						|
    OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
 | 
						|
    IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
 | 
						|
    INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
 | 
						|
    NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 | 
						|
    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 | 
						|
    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 | 
						|
    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
 | 
						|
    THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 | 
						|
 */
 | 
						|
 | 
						|
#include "hak-chr.h"
 | 
						|
 | 
						|
/* TODO: handle different endians - UTF16BE or UTF16LE */
 | 
						|
 | 
						|
enum
 | 
						|
{
 | 
						|
	HIGH_SURROGATE_START = 0xD800,
 | 
						|
	HIGH_SURROGATE_END   = 0xDBFF,
 | 
						|
	LOW_SURROGATE_START  = 0xDC00,
 | 
						|
	LOW_SURROGATE_END    = 0xDFFF
 | 
						|
};
 | 
						|
 | 
						|
hak_oow_t hak_uc_to_utf16 (hak_uch_t uc, hak_bch_t* utf16, hak_oow_t size)
 | 
						|
{
 | 
						|
	hak_uint16_t* u16 = (hak_uint16_t*)utf16;
 | 
						|
 | 
						|
	if (uc <= 0xFFFF)
 | 
						|
	{
 | 
						|
		u16[0] = (hak_uint16_t)uc;
 | 
						|
		return 2;
 | 
						|
	}
 | 
						|
#if (HAK_SIZEOF_UCH_T > 2)
 | 
						|
	else if (uc <= 0x10FFFF)
 | 
						|
	{
 | 
						|
		u16[0] = HIGH_SURROGATE_START | (((uc >> 16) & 0x1F) - 1) | (uc >> 10);
 | 
						|
		u16[1] = LOW_SURROGATE_START | (uc & 0x3FF);
 | 
						|
		return 4;
 | 
						|
	}
 | 
						|
#endif
 | 
						|
 | 
						|
	return 0; /* illegal character */
 | 
						|
}
 | 
						|
 | 
						|
hak_oow_t hak_utf16_to_uc (const hak_bch_t* utf16, hak_oow_t size, hak_uch_t* uc)
 | 
						|
{
 | 
						|
	const hak_uint16_t* u16 = (const hak_uint16_t*)utf16;
 | 
						|
 | 
						|
	if (size < 2) return 0; /* incomplete sequence */
 | 
						|
 | 
						|
	if (u16[0] < HIGH_SURROGATE_START || u16[0] > LOW_SURROGATE_END)
 | 
						|
	{
 | 
						|
		/* BMP - U+0000 - U+D7FF, U+E000 - U+FFFF */
 | 
						|
		*uc = u16[0];
 | 
						|
		return 2;
 | 
						|
	}
 | 
						|
#if (HAK_SIZEOF_UCH_T > 2)
 | 
						|
	else if (u16[0] >= HIGH_SURROGATE_START && u16[0] <= HIGH_SURROGATE_END) /* high-surrogate */
 | 
						|
	{
 | 
						|
		if (size < 4) return 0; /* incomplete */
 | 
						|
		if (u16[1] >= LOW_SURROGATE_START && u16[1] <= LOW_SURROGATE_END) /* low-surrogate */
 | 
						|
		{
 | 
						|
			*uc = (((u16[0] & 0x3FF) << 10) | (u16[1] & 0x3FF)) + 0x10000;
 | 
						|
			return 4;
 | 
						|
		}
 | 
						|
	}
 | 
						|
#endif
 | 
						|
 | 
						|
	return 0;
 | 
						|
}
 |