added utf8 functions
This commit is contained in:
		@ -347,15 +347,15 @@ package body H3.Arrays is
 | 
			
		||||
		return System_Size'First;
 | 
			
		||||
	end Find;
 | 
			
		||||
 | 
			
		||||
	function "=" (Obj: in Elastic_Array; Obj2: in Elastic_Array) return Boolean is
 | 
			
		||||
	function Equals (Obj: in Elastic_Array; Obj2: in Elastic_Array) return Boolean is
 | 
			
		||||
	begin
 | 
			
		||||
		return Obj.Buffer = Obj2.Buffer or else Obj.Buffer.Slot(Get_First_Index(Obj) .. Get_Last_Index(Obj)) = Obj2.Buffer.Slot(Get_First_Index(Obj2) .. Get_Last_Index(Obj2));
 | 
			
		||||
	end "=";
 | 
			
		||||
	end Equals;
 | 
			
		||||
 | 
			
		||||
	function "=" (Obj: in Elastic_Array; Obj2: in Item_Array) return Boolean is
 | 
			
		||||
	function Equals (Obj: in Elastic_Array; Obj2: in Item_Array) return Boolean is
 | 
			
		||||
	begin
 | 
			
		||||
		return Obj.Buffer.Slot(Get_First_Index(Obj) .. Get_Last_Index(Obj)) = Obj2;
 | 
			
		||||
	end "=";
 | 
			
		||||
	end Equals;
 | 
			
		||||
 | 
			
		||||
	-- ---------------------------------------------------------------------
 | 
			
		||||
	-- Controlled Management
 | 
			
		||||
 | 
			
		||||
@ -69,8 +69,11 @@ package H3.Arrays is
 | 
			
		||||
	function Find (Obj: in Elastic_Array; V: in Item_Type; Start_Pos: in System_Index; Find_Dir: in Direction := DIRECTION_FORWARD) return System_Size;
 | 
			
		||||
	function Find (Obj: in Elastic_Array; V: in Item_Array; Start_Pos: in System_Index; Find_Dir: in Direction := DIRECTION_FORWARD) return System_Size;
 | 
			
		||||
 | 
			
		||||
	function "=" (Obj: in Elastic_Array; Obj2: in Elastic_Array) return Boolean;
 | 
			
		||||
	function "=" (Obj: in Elastic_Array; Obj2: in Item_Array) return Boolean;
 | 
			
		||||
	function Equals (Obj: in Elastic_Array; Obj2: in Elastic_Array) return Boolean;
 | 
			
		||||
	function Equals (Obj: in Elastic_Array; Obj2: in Item_Array) return Boolean;
 | 
			
		||||
 | 
			
		||||
	function "=" (Obj: in Elastic_Array; Obj2: in Elastic_Array) return Boolean renames Equals;
 | 
			
		||||
	function "=" (Obj: in Elastic_Array; Obj2: in Item_Array) return Boolean renames Equals;
 | 
			
		||||
 | 
			
		||||
private
 | 
			
		||||
	type Buffer_Record(Capa: System_Size) is limited record
 | 
			
		||||
 | 
			
		||||
@ -1,7 +1,12 @@
 | 
			
		||||
with H3.Utf8;
 | 
			
		||||
with ada.text_io;
 | 
			
		||||
 | 
			
		||||
package body H3.Compilers is
 | 
			
		||||
	type Char_Array is array(System_Index range<>) of Standard.Character;
 | 
			
		||||
	package Utf8 is new H3.Utf8(Standard.Character, S.Rune, Char_Array, S.Rune_Array);
 | 
			
		||||
 | 
			
		||||
	LB_EOF: constant S.Rune_Array := (R.V.Left_Arrow,R.V.UC_E,R.V.UC_O,R.V.UC_F,R.V.Right_Arrow); -- <EOF>
 | 
			
		||||
	LB_XINCLUDE: constant S.Rune_Array := (R.V.Number_Sign,R.V.LC_I,R.V.LC_N,R.V.LC_C,R.V.LC_L,R.V.LC_U,R.V.LC_D,R.V.LC_E); -- #include
 | 
			
		||||
 | 
			
		||||
	procedure Set_Lexer_State (C: in out Compiler; State: in Lexer_State) is
 | 
			
		||||
	begin
 | 
			
		||||
@ -21,21 +26,13 @@ package body H3.Compilers is
 | 
			
		||||
		Set_Lexer_State (C, State, R.To_Rune(Code));
 | 
			
		||||
	end Set_Lexer_State;
 | 
			
		||||
 | 
			
		||||
	procedure Got_Token (C: in out Compiler) is
 | 
			
		||||
	procedure Set_Parser_State (C: in out Compiler; State: in Parser_State) is
 | 
			
		||||
	begin
 | 
			
		||||
		--case C.P.State IS
 | 
			
		||||
		--	when START =>
 | 
			
		||||
		--		null;
 | 
			
		||||
		--end case;
 | 
			
		||||
 | 
			
		||||
ada.text_io.put (C.Tk.Id'Img);
 | 
			
		||||
ada.text_io.put (" ");
 | 
			
		||||
for i in C.Tk.Buf.Get_First_Index .. C.Tk.Buf.Get_Last_Index loop
 | 
			
		||||
	ada.text_io.put (standard.character'val(S.Rune'Pos(C.Tk.Buf.Get_Item(i))));
 | 
			
		||||
end loop;
 | 
			
		||||
ada.text_io.put_line("");
 | 
			
		||||
 | 
			
		||||
		C.Ps.State := State;
 | 
			
		||||
	end Set_Parser_State;
 | 
			
		||||
 | 
			
		||||
	procedure Parse_Start (C: in out Compiler) is
 | 
			
		||||
	begin
 | 
			
		||||
		case C.Tk.Id is
 | 
			
		||||
			when TK_BSTR =>
 | 
			
		||||
				null;
 | 
			
		||||
@ -46,8 +43,11 @@ ada.text_io.put_line("");
 | 
			
		||||
			when TK_CSTR =>
 | 
			
		||||
				null;
 | 
			
		||||
			when TK_DIRECTIVE =>
 | 
			
		||||
				--Push_Feed_Layer (...
 | 
			
		||||
				null;
 | 
			
		||||
				if C.Tk.Buf.Equals(LB_XINCLUDE) then
 | 
			
		||||
					Set_Parser_State (C, PS_INCLUDE);
 | 
			
		||||
				else
 | 
			
		||||
					raise Syntax_Error;
 | 
			
		||||
				end if;
 | 
			
		||||
			when TK_EOF =>
 | 
			
		||||
				null;
 | 
			
		||||
			when TK_EOL =>
 | 
			
		||||
@ -65,6 +65,69 @@ ada.text_io.put_line("");
 | 
			
		||||
			when TK_SEMICOLON =>
 | 
			
		||||
				null;
 | 
			
		||||
		end case;
 | 
			
		||||
	end Parse_Start;
 | 
			
		||||
 | 
			
		||||
	procedure Start_Inclusion (C: in out Compiler; Name: in S.Rune_Array) is
 | 
			
		||||
		Top: System_Index;
 | 
			
		||||
	begin
 | 
			
		||||
		if C.St.Top = C.St.Items'Last then
 | 
			
		||||
			raise Syntax_Error; -- TODO: inclusion depth too deep
 | 
			
		||||
		end if;
 | 
			
		||||
 | 
			
		||||
		Top := C.St.Top + 1;
 | 
			
		||||
		Ada.Text_IO.Open (C.St.Items(Top).Handle, Ada.Text_IO.In_File, Standard.String(Utf8.From_Unicode_String(Name)));
 | 
			
		||||
		C.St.Top := Top;
 | 
			
		||||
	end Start_Inclusion;
 | 
			
		||||
 | 
			
		||||
	procedure End_Inclusion (C: in out Compiler) is
 | 
			
		||||
	begin
 | 
			
		||||
		Ada.Text_IO.Close (C.St.Items(C.St.Top).Handle);
 | 
			
		||||
		C.St.Top := C.St.Top - 1;
 | 
			
		||||
	end End_Inclusion;
 | 
			
		||||
 | 
			
		||||
	procedure Parse_Include (C: in out Compiler) is
 | 
			
		||||
	begin
 | 
			
		||||
		if C.Tk.Id = TK_CSTR then
 | 
			
		||||
			-- arrange to feed more data from the included file.
 | 
			
		||||
			Start_Inclusion (C, S.To_Rune_Array(C.Tk.Buf));
 | 
			
		||||
			null;
 | 
			
		||||
		else
 | 
			
		||||
			raise Syntax_Error; -- string literal required
 | 
			
		||||
		end if;
 | 
			
		||||
	end Parse_Include;
 | 
			
		||||
 | 
			
		||||
	procedure Parse_Include_End (C: in out Compiler) is
 | 
			
		||||
	begin
 | 
			
		||||
		if C.Tk.Id /= TK_SEMICOLON then
 | 
			
		||||
			raise Syntax_Error;
 | 
			
		||||
		end if;
 | 
			
		||||
 | 
			
		||||
		-- TODO: put the state back to START???
 | 
			
		||||
	end Parse_Include_End;
 | 
			
		||||
 | 
			
		||||
	procedure Got_Token (C: in out Compiler) is
 | 
			
		||||
	begin
 | 
			
		||||
		--case C.P.State IS
 | 
			
		||||
		--	when START =>
 | 
			
		||||
		--		null;
 | 
			
		||||
		--end case;
 | 
			
		||||
 | 
			
		||||
ada.text_io.put (C.Tk.Id'Img);
 | 
			
		||||
ada.text_io.put (" ");
 | 
			
		||||
for i in C.Tk.Buf.Get_First_Index .. C.Tk.Buf.Get_Last_Index loop
 | 
			
		||||
	ada.text_io.put (standard.character'val(S.Rune'Pos(C.Tk.Buf.Get_Item(i))));
 | 
			
		||||
end loop;
 | 
			
		||||
ada.text_io.put_line("");
 | 
			
		||||
 | 
			
		||||
		case C.Ps.State is
 | 
			
		||||
			when PS_START => 
 | 
			
		||||
				Parse_Start (C);
 | 
			
		||||
			when PS_INCLUDE =>
 | 
			
		||||
				Parse_Include (C);
 | 
			
		||||
			when others =>
 | 
			
		||||
				raise Syntax_Error; -- TODO: change this...
 | 
			
		||||
		end case;
 | 
			
		||||
 | 
			
		||||
	end Got_Token;
 | 
			
		||||
 | 
			
		||||
	procedure Start_Token (C: in out Compiler) is
 | 
			
		||||
@ -208,6 +271,18 @@ end if;
 | 
			
		||||
	begin
 | 
			
		||||
		for i in Data'Range loop
 | 
			
		||||
			Feed_Char_Code (C, R.To_Code(Data(i)));
 | 
			
		||||
 | 
			
		||||
			if C.St.Top > 0 then
 | 
			
		||||
				declare
 | 
			
		||||
					Ch: Standard.Character;
 | 
			
		||||
				begin		
 | 
			
		||||
					while not Ada.Text_IO.End_Of_File(C.St.Items(C.St.Top).Handle) loop
 | 
			
		||||
						Ada.Text_IO.Get (C.St.Items(C.St.Top).Handle, Ch);
 | 
			
		||||
						Feed_Char_Code (C, Standard.Character'Pos(Ch));
 | 
			
		||||
						--if inclusion stack is not Empty???
 | 
			
		||||
					end loop;
 | 
			
		||||
				end;
 | 
			
		||||
			end if;
 | 
			
		||||
		end loop;
 | 
			
		||||
	end Feed;
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@ -1,5 +1,6 @@
 | 
			
		||||
with H3.Runes;
 | 
			
		||||
with H3.Strings;
 | 
			
		||||
with Ada.Text_IO;
 | 
			
		||||
 | 
			
		||||
generic
 | 
			
		||||
	type Rune_Type is (<>);
 | 
			
		||||
@ -9,7 +10,7 @@ package H3.Compilers is
 | 
			
		||||
 | 
			
		||||
	Syntax_Error: exception;
 | 
			
		||||
 | 
			
		||||
	type Compiler is tagged private;
 | 
			
		||||
	type Compiler is tagged limited private;
 | 
			
		||||
 | 
			
		||||
	procedure Feed (C: in out Compiler; Data: in S.Rune_Array);
 | 
			
		||||
	procedure End_Feed (C: in out Compiler);
 | 
			
		||||
@ -28,6 +29,17 @@ private
 | 
			
		||||
		State: Lexer_State := LX_START;
 | 
			
		||||
	end record;
 | 
			
		||||
 | 
			
		||||
	type Stream is record
 | 
			
		||||
		Handle: Ada.Text_IO.File_Type;
 | 
			
		||||
		--Handle: System_Size;
 | 
			
		||||
	end record;
 | 
			
		||||
 | 
			
		||||
	type Stream_Array is array(System_Index range <>) of Stream;
 | 
			
		||||
	type Stream_Stack(Capa: System_Index) is record
 | 
			
		||||
		Items: Stream_Array(System_Index'First .. Capa);
 | 
			
		||||
		Top: System_Size := 0;
 | 
			
		||||
	end record;
 | 
			
		||||
	
 | 
			
		||||
	type Token_Id is (
 | 
			
		||||
		TK_BSTR,
 | 
			
		||||
		TK_BYTE,
 | 
			
		||||
@ -48,13 +60,18 @@ private
 | 
			
		||||
		Buf: S.Elastic_String;
 | 
			
		||||
	end record;
 | 
			
		||||
 | 
			
		||||
	type Parser_State is (START, INCLUDE);
 | 
			
		||||
	type Parser_State is (
 | 
			
		||||
		PS_START,
 | 
			
		||||
		PS_INCLUDE
 | 
			
		||||
	);
 | 
			
		||||
	type Parser is record
 | 
			
		||||
		State: Parser_State := START;
 | 
			
		||||
		State: Parser_State := PS_START;
 | 
			
		||||
	end record;
 | 
			
		||||
 | 
			
		||||
	type Compiler is tagged record
 | 
			
		||||
	type Compiler is tagged limited record
 | 
			
		||||
		Lx: Lexer;
 | 
			
		||||
		Tk: Token;
 | 
			
		||||
		Ps: Parser;
 | 
			
		||||
		St: Stream_Stack(32);
 | 
			
		||||
	end record;
 | 
			
		||||
end H3.Compilers;
 | 
			
		||||
 | 
			
		||||
							
								
								
									
										217
									
								
								lib2/h3-utf8.adb
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										217
									
								
								lib2/h3-utf8.adb
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,217 @@
 | 
			
		||||
with Interfaces;
 | 
			
		||||
 | 
			
		||||
package body H3.Utf8 is
 | 
			
		||||
 | 
			
		||||
--|----------------------------------------------------------------------------
 | 
			
		||||
--| From RFC 2279 UTF-8, a transformation format of ISO 10646
 | 
			
		||||
--|
 | 
			
		||||
--|    UCS-4 range (hex.) UTF-8 octet sequence (binary)
 | 
			
		||||
--| 1:2 00000000-0000007F 0xxxxxxx
 | 
			
		||||
--| 2:2 00000080-000007FF 110xxxxx 10xxxxxx
 | 
			
		||||
--| 3:2 00000800-0000FFFF 1110xxxx 10xxxxxx 10xxxxxx
 | 
			
		||||
--| 4:4 00010000-001FFFFF 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
 | 
			
		||||
--| inv 00200000-03FFFFFF 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
 | 
			
		||||
--| inv 04000000-7FFFFFFF 1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
 | 
			
		||||
--|----------------------------------------------------------------------------
 | 
			
		||||
 | 
			
		||||
	--type Uint8 is mod 2 ** 8;
 | 
			
		||||
	--type Uint32 is mod 2 ** 32;
 | 
			
		||||
	use type Interfaces.Unsigned_8;
 | 
			
		||||
	use type Interfaces.Unsigned_32;
 | 
			
		||||
	subtype Uint8 is Interfaces.Unsigned_8;
 | 
			
		||||
	subtype Uint32 is Interfaces.Unsigned_32;
 | 
			
		||||
 | 
			
		||||
	type Conv_Record is record
 | 
			
		||||
		Lower: Uint32;
 | 
			
		||||
		Upper: Uint32;
 | 
			
		||||
 | 
			
		||||
		Fbyte: Uint8; 
 | 
			
		||||
		Mask: Uint8; -- Mask for getting the fixed bits in the first byte.
 | 
			
		||||
		             -- (First-Byte and Mask) = Fbyte
 | 
			
		||||
 | 
			
		||||
		Fmask: Uint8; -- Mask for getting the actual values bits off the first byte.
 | 
			
		||||
 | 
			
		||||
		Length: System_Size; -- Number of bytes
 | 
			
		||||
	end record;
 | 
			
		||||
 | 
			
		||||
	type Conv_Record_Array is array(System_Index range<>) of Conv_Record;
 | 
			
		||||
 | 
			
		||||
	Conv_Table: constant Conv_Record_Array := (
 | 
			
		||||
		(16#0000_0000#, 16#0000_007F#, 2#0000_0000#, 2#1000_0000#, 2#0111_1111#, 1),
 | 
			
		||||
		(16#0000_0080#, 16#0000_07FF#, 2#1100_0000#, 2#1110_0000#, 2#0001_1111#, 2),
 | 
			
		||||
		(16#0000_0800#, 16#0000_FFFF#, 2#1110_0000#, 2#1111_0000#, 2#0000_1111#, 3),
 | 
			
		||||
		(16#0001_0000#, 16#001F_FFFF#, 2#1111_0000#, 2#1111_1000#, 2#0000_0111#, 4),
 | 
			
		||||
		(16#0020_0000#, 16#03FF_FFFF#, 2#1111_1000#, 2#1111_1100#, 2#0000_0011#, 5),
 | 
			
		||||
		(16#0400_0000#, 16#7FFF_FFFF#, 2#1111_1100#, 2#1111_1110#, 2#0000_0001#, 6)
 | 
			
		||||
	);
 | 
			
		||||
 | 
			
		||||
	function Get_Utf8_Slot (UV: in Uint32) return System_Size is
 | 
			
		||||
		pragma Inline (Get_Utf8_Slot);
 | 
			
		||||
	begin
 | 
			
		||||
		for I in Conv_Table'Range loop
 | 
			
		||||
			if UV >= Conv_Table(I).Lower and then UV <= Conv_Table(I).Upper then
 | 
			
		||||
				return I;
 | 
			
		||||
			end if;
 | 
			
		||||
		end loop;
 | 
			
		||||
		return System_Size'First;
 | 
			
		||||
	end  Get_Utf8_Slot;
 | 
			
		||||
 | 
			
		||||
	function From_Unicode_Character (Chr: in Unicode_Character) return Utf8_String is
 | 
			
		||||
		UV: Uint32;
 | 
			
		||||
		I: System_Size;
 | 
			
		||||
	begin
 | 
			
		||||
		UV := Unicode_Character'Pos(Chr);
 | 
			
		||||
 | 
			
		||||
		I := Get_Utf8_Slot(UV);
 | 
			
		||||
		if I not in System_Index'Range then
 | 
			
		||||
			raise Invalid_Unicode_Character;
 | 
			
		||||
		end if;
 | 
			
		||||
 | 
			
		||||
		declare
 | 
			
		||||
			Result: Utf8_String (1 .. System_Index(Conv_Table(I).Length));
 | 
			
		||||
		begin
 | 
			
		||||
			for J in reverse Result'First + 1 .. Result'Last loop
 | 
			
		||||
				-- 2#0011_1111#: 16#3F#
 | 
			
		||||
				-- 2#1000_0000#: 16#80#
 | 
			
		||||
				Result(J) := Utf8_Character'Val((UV and Uint32'(2#0011_1111#)) or Uint32'(2#1000_0000#));
 | 
			
		||||
				--UV := UV / (2 ** 6); --UV := UV >> 6;
 | 
			
		||||
				UV := Interfaces.Shift_Right (UV, 6);
 | 
			
		||||
			end loop;
 | 
			
		||||
 | 
			
		||||
			Result(Result'First) := Utf8_Character'Val(UV or Uint32(Conv_Table(I).Fbyte));
 | 
			
		||||
			return Result;
 | 
			
		||||
		end;
 | 
			
		||||
	end From_Unicode_Character;
 | 
			
		||||
 | 
			
		||||
	function From_Unicode_String (Str: in Unicode_String) return Utf8_String is
 | 
			
		||||
		-- this function has high stack pressure if the input string is too long
 | 
			
		||||
		-- TODO: create a procedure to overcome this problem.
 | 
			
		||||
		Tmp: System_Size;
 | 
			
		||||
	begin
 | 
			
		||||
		-- Calculate the length first
 | 
			
		||||
		Tmp := 0;
 | 
			
		||||
		for I in Str'Range loop
 | 
			
		||||
			declare
 | 
			
		||||
				Utf8: Utf8_String := From_Unicode_Character(Chr => Str(I));
 | 
			
		||||
			begin
 | 
			
		||||
				Tmp := Tmp + Utf8'Length;
 | 
			
		||||
			end;
 | 
			
		||||
		end loop;
 | 
			
		||||
 | 
			
		||||
		declare
 | 
			
		||||
			Result: Utf8_String (1 .. Tmp);
 | 
			
		||||
		begin
 | 
			
		||||
			Tmp := Result'First;
 | 
			
		||||
			for I in Str'Range loop
 | 
			
		||||
				declare
 | 
			
		||||
					Utf8: Utf8_String := From_Unicode_Character(Str(I));
 | 
			
		||||
				begin
 | 
			
		||||
					Result(Tmp .. Tmp + Utf8'Length - 1) := Utf8;
 | 
			
		||||
					Tmp := Tmp + Utf8'Length;
 | 
			
		||||
				end;
 | 
			
		||||
			end loop;
 | 
			
		||||
			return Result;
 | 
			
		||||
		end;
 | 
			
		||||
	end From_Unicode_String;
 | 
			
		||||
 | 
			
		||||
	function Sequence_Length (Seq: in Utf8_Character) return System_Size is
 | 
			
		||||
	begin
 | 
			
		||||
		for I in Conv_Table'Range loop
 | 
			
		||||
			if (Utf8_Character'Pos(Seq) and Conv_Table(I).Mask) = Conv_Table(I).Fbyte then
 | 
			
		||||
				return Conv_Table(I).Length;
 | 
			
		||||
			end if;
 | 
			
		||||
		end loop;
 | 
			
		||||
		return System_Size'First;
 | 
			
		||||
	end Sequence_Length;
 | 
			
		||||
 | 
			
		||||
	procedure To_Unicode_Character (Seq:     in  Utf8_String; 
 | 
			
		||||
	                                Seq_Len: out System_Size;
 | 
			
		||||
	                                Chr:     out Unicode_Character) is
 | 
			
		||||
		W: Uint32;
 | 
			
		||||
	begin
 | 
			
		||||
		for I in Conv_Table'Range loop
 | 
			
		||||
 | 
			
		||||
			-- Check if the first byte matches the desired bit patterns.
 | 
			
		||||
			if (Utf8_Character'Pos(Seq(Seq'First)) and Conv_Table(I).Mask) = Conv_Table(I).Fbyte then
 | 
			
		||||
				
 | 
			
		||||
				if Seq'Length < Conv_Table(I).Length then
 | 
			
		||||
					raise Insufficient_Utf8_Sequence;
 | 
			
		||||
				end if;
 | 
			
		||||
 | 
			
		||||
				-- Get the values bits off the first byte.
 | 
			
		||||
				W := Utf8_Character'Pos(Seq(Seq'First)) and Uint32(Conv_Table(I).Fmask);
 | 
			
		||||
 | 
			
		||||
				-- Get the values bits off subsequent bytes.
 | 
			
		||||
				for J in 1 .. Conv_Table(I).Length - 1 loop
 | 
			
		||||
					if (Utf8_Character'Pos(Seq(Seq'First + J)) and Uint32'(2#1100_0000#)) /= Uint32'(2#1000_0000#) then
 | 
			
		||||
						-- Each UTF8 byte except the first must be set with 2#1000_0000.
 | 
			
		||||
						raise Invalid_Utf8_Sequence;
 | 
			
		||||
					end if;
 | 
			
		||||
					W := Interfaces.Shift_Left(W, 6) or (Utf8_Character'Pos(Seq(Seq'First + J)) and Uint32'(2#0011_1111#)); 
 | 
			
		||||
				end loop;
 | 
			
		||||
 | 
			
		||||
				-- Return the character matching the word
 | 
			
		||||
				Chr := Unicode_Character'Val(W);
 | 
			
		||||
				Seq_Len := Conv_Table(I).Length;
 | 
			
		||||
				return;
 | 
			
		||||
			end if;
 | 
			
		||||
		end loop;
 | 
			
		||||
		
 | 
			
		||||
		raise Invalid_Utf8_Sequence;
 | 
			
		||||
	end To_Unicode_Character;
 | 
			
		||||
 | 
			
		||||
	function To_Unicode_Character (Seq: in Utf8_String) return Unicode_Character is
 | 
			
		||||
		Seq_Len: System_Size;
 | 
			
		||||
		Chr: Unicode_Character;
 | 
			
		||||
	begin
 | 
			
		||||
		To_Unicode_Character (Seq, Seq_Len, Chr);
 | 
			
		||||
		return Chr;
 | 
			
		||||
	end To_Unicode_Character;
 | 
			
		||||
 | 
			
		||||
	procedure To_Unicode_String (Seq:     in  Utf8_String; 
 | 
			
		||||
	                             Seq_Len: out System_Size;
 | 
			
		||||
	                             Str:     out Unicode_String;
 | 
			
		||||
	                             Str_Len: out System_Size) is
 | 
			
		||||
		Seq_Pos: System_Index := Seq'First;
 | 
			
		||||
		Str_Pos: System_Index := Str'First;
 | 
			
		||||
		Len: System_Size;
 | 
			
		||||
	begin
 | 
			
		||||
		while Seq_Pos <= Seq'Last and then Str_Pos <= Str'Last loop
 | 
			
		||||
			To_Unicode_Character(Seq(Seq_Pos .. Seq'Last), Len, Str(Str_Pos));
 | 
			
		||||
			Seq_Pos := Seq_Pos + Len;
 | 
			
		||||
			Str_Pos := Str_Pos + 1;
 | 
			
		||||
		end loop;
 | 
			
		||||
 | 
			
		||||
		Seq_Len := Seq_Pos - Seq'First;
 | 
			
		||||
		Str_Len := Str_Pos - Str'First;
 | 
			
		||||
	end To_Unicode_String;
 | 
			
		||||
 | 
			
		||||
	function To_Unicode_String (Seq: in Utf8_String) return Unicode_String is
 | 
			
		||||
		UL: System_Size := 0;
 | 
			
		||||
	begin
 | 
			
		||||
		declare
 | 
			
		||||
			Chr: Unicode_Character;
 | 
			
		||||
			Pos: System_Index := Seq'First;
 | 
			
		||||
			Seq_Len: System_Size;
 | 
			
		||||
		begin
 | 
			
		||||
			while Pos <= Seq'Last loop
 | 
			
		||||
				To_Unicode_Character(Seq(Pos .. Seq'Last), Seq_Len, Chr);
 | 
			
		||||
				UL := UL + 1;
 | 
			
		||||
				Pos := Pos + Seq_Len;
 | 
			
		||||
			end loop;
 | 
			
		||||
		end;
 | 
			
		||||
 | 
			
		||||
		declare
 | 
			
		||||
			Str: Unicode_String (1 .. UL);
 | 
			
		||||
			Pos: System_Index := Seq'First;
 | 
			
		||||
			Seq_Len: System_Size;
 | 
			
		||||
		begin
 | 
			
		||||
			for I in Str'Range loop
 | 
			
		||||
				To_Unicode_Character(Seq(Pos .. Seq'Last), Seq_Len, Str(I));
 | 
			
		||||
				Pos := Pos + Seq_Len;
 | 
			
		||||
			end loop;
 | 
			
		||||
			return Str;
 | 
			
		||||
		end;
 | 
			
		||||
	end To_Unicode_String;
 | 
			
		||||
 | 
			
		||||
end H3.Utf8;
 | 
			
		||||
							
								
								
									
										51
									
								
								lib2/h3-utf8.ads
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										51
									
								
								lib2/h3-utf8.ads
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,51 @@
 | 
			
		||||
generic 
 | 
			
		||||
	type Slim_Character is (<>);
 | 
			
		||||
	type Wide_Character is (<>);
 | 
			
		||||
	type Slim_String is array(System_Index range<>) of Slim_Character;
 | 
			
		||||
	type Wide_String is array(System_Index range<>) of Wide_Character;
 | 
			
		||||
package H3.Utf8 is
 | 
			
		||||
	--pragma Preelaborate (Utf8);
 | 
			
		||||
 | 
			
		||||
	--Invalid_Unicode_Character: exception renames Invalid_Wide_Character;
 | 
			
		||||
	--Invalid_Utf8_Sequence: exception renames Invalid_Slim_Sequence;
 | 
			
		||||
	--Insufficient_Utf8_Sequence: exception renames Insifficient_Slim_Sequence;
 | 
			
		||||
	Invalid_Unicode_Character: exception;
 | 
			
		||||
	Invalid_Utf8_Sequence: exception;
 | 
			
		||||
	Insufficient_Utf8_Sequence: exception;
 | 
			
		||||
 | 
			
		||||
	subtype Utf8_Character is Slim_Character;
 | 
			
		||||
	subtype Unicode_Character is Wide_Character;
 | 
			
		||||
	subtype Utf8_String is Slim_String;
 | 
			
		||||
	subtype Unicode_String is Wide_String;
 | 
			
		||||
	subtype Utf8_Sequence is Utf8_String;
 | 
			
		||||
 | 
			
		||||
	--type Unicode_Character_Kit is record
 | 
			
		||||
	--	Seq: System_Size; -- sequence length
 | 
			
		||||
	--	Chr: Unicode_Character;
 | 
			
		||||
	--end record;
 | 
			
		||||
 | 
			
		||||
	--type Unicode_String_Kit(Length: System_Size) is record
 | 
			
		||||
	--	Seq: System_Size;
 | 
			
		||||
	--	Str: Unicode_String(System_Index'First .. Length);
 | 
			
		||||
	--end record;
 | 
			
		||||
 | 
			
		||||
	function From_Unicode_Character (Chr: in Unicode_Character) return Utf8_String;
 | 
			
		||||
	function From_Unicode_String (Str: in Unicode_String) return Utf8_String;
 | 
			
		||||
	--| The Sequence_Length function returns the length of a full UTF8 
 | 
			
		||||
	--| sequence representing a single Unicode character given the first
 | 
			
		||||
	--| sequence byte. It returns 0 if the first byte is invalid.
 | 
			
		||||
	function Sequence_Length (Seq: in Utf8_Character) return System_Size;
 | 
			
		||||
 | 
			
		||||
	procedure To_Unicode_Character (Seq:     in  Utf8_String; 
 | 
			
		||||
	                                Seq_Len: out System_Size;
 | 
			
		||||
	                                Chr:     out Unicode_Character);
 | 
			
		||||
 | 
			
		||||
	procedure To_Unicode_String (Seq:     in  Utf8_String; 
 | 
			
		||||
	                             Seq_Len: out System_Size;
 | 
			
		||||
	                             Str:     out Unicode_String;
 | 
			
		||||
	                             Str_Len: out System_Size);
 | 
			
		||||
 | 
			
		||||
	function To_Unicode_Character (Seq: in Utf8_String) return Unicode_Character;
 | 
			
		||||
	function To_Unicode_String (Seq: in Utf8_String) return Unicode_String;
 | 
			
		||||
 | 
			
		||||
end H3.Utf8;
 | 
			
		||||
@ -6,6 +6,6 @@ procedure hello3 is
 | 
			
		||||
 | 
			
		||||
	Compiler: C.Compiler;
 | 
			
		||||
begin
 | 
			
		||||
	Compiler.Feed ("<<=hello world");
 | 
			
		||||
	Compiler.Feed ("#include 'abc.txt'");
 | 
			
		||||
	Compiler.End_Feed;
 | 
			
		||||
end hello3;
 | 
			
		||||
 | 
			
		||||
		Reference in New Issue
	
	Block a user