added utf8 functions
This commit is contained in:
parent
84762f7682
commit
de3770b6ef
@ -347,15 +347,15 @@ package body H3.Arrays is
|
|||||||
return System_Size'First;
|
return System_Size'First;
|
||||||
end Find;
|
end Find;
|
||||||
|
|
||||||
function "=" (Obj: in Elastic_Array; Obj2: in Elastic_Array) return Boolean is
|
function Equals (Obj: in Elastic_Array; Obj2: in Elastic_Array) return Boolean is
|
||||||
begin
|
begin
|
||||||
return Obj.Buffer = Obj2.Buffer or else Obj.Buffer.Slot(Get_First_Index(Obj) .. Get_Last_Index(Obj)) = Obj2.Buffer.Slot(Get_First_Index(Obj2) .. Get_Last_Index(Obj2));
|
return Obj.Buffer = Obj2.Buffer or else Obj.Buffer.Slot(Get_First_Index(Obj) .. Get_Last_Index(Obj)) = Obj2.Buffer.Slot(Get_First_Index(Obj2) .. Get_Last_Index(Obj2));
|
||||||
end "=";
|
end Equals;
|
||||||
|
|
||||||
function "=" (Obj: in Elastic_Array; Obj2: in Item_Array) return Boolean is
|
function Equals (Obj: in Elastic_Array; Obj2: in Item_Array) return Boolean is
|
||||||
begin
|
begin
|
||||||
return Obj.Buffer.Slot(Get_First_Index(Obj) .. Get_Last_Index(Obj)) = Obj2;
|
return Obj.Buffer.Slot(Get_First_Index(Obj) .. Get_Last_Index(Obj)) = Obj2;
|
||||||
end "=";
|
end Equals;
|
||||||
|
|
||||||
-- ---------------------------------------------------------------------
|
-- ---------------------------------------------------------------------
|
||||||
-- Controlled Management
|
-- Controlled Management
|
||||||
|
@ -69,8 +69,11 @@ package H3.Arrays is
|
|||||||
function Find (Obj: in Elastic_Array; V: in Item_Type; Start_Pos: in System_Index; Find_Dir: in Direction := DIRECTION_FORWARD) return System_Size;
|
function Find (Obj: in Elastic_Array; V: in Item_Type; Start_Pos: in System_Index; Find_Dir: in Direction := DIRECTION_FORWARD) return System_Size;
|
||||||
function Find (Obj: in Elastic_Array; V: in Item_Array; Start_Pos: in System_Index; Find_Dir: in Direction := DIRECTION_FORWARD) return System_Size;
|
function Find (Obj: in Elastic_Array; V: in Item_Array; Start_Pos: in System_Index; Find_Dir: in Direction := DIRECTION_FORWARD) return System_Size;
|
||||||
|
|
||||||
function "=" (Obj: in Elastic_Array; Obj2: in Elastic_Array) return Boolean;
|
function Equals (Obj: in Elastic_Array; Obj2: in Elastic_Array) return Boolean;
|
||||||
function "=" (Obj: in Elastic_Array; Obj2: in Item_Array) return Boolean;
|
function Equals (Obj: in Elastic_Array; Obj2: in Item_Array) return Boolean;
|
||||||
|
|
||||||
|
function "=" (Obj: in Elastic_Array; Obj2: in Elastic_Array) return Boolean renames Equals;
|
||||||
|
function "=" (Obj: in Elastic_Array; Obj2: in Item_Array) return Boolean renames Equals;
|
||||||
|
|
||||||
private
|
private
|
||||||
type Buffer_Record(Capa: System_Size) is limited record
|
type Buffer_Record(Capa: System_Size) is limited record
|
||||||
|
@ -1,7 +1,12 @@
|
|||||||
|
with H3.Utf8;
|
||||||
with ada.text_io;
|
with ada.text_io;
|
||||||
|
|
||||||
package body H3.Compilers is
|
package body H3.Compilers is
|
||||||
|
type Char_Array is array(System_Index range<>) of Standard.Character;
|
||||||
|
package Utf8 is new H3.Utf8(Standard.Character, S.Rune, Char_Array, S.Rune_Array);
|
||||||
|
|
||||||
LB_EOF: constant S.Rune_Array := (R.V.Left_Arrow,R.V.UC_E,R.V.UC_O,R.V.UC_F,R.V.Right_Arrow); -- <EOF>
|
LB_EOF: constant S.Rune_Array := (R.V.Left_Arrow,R.V.UC_E,R.V.UC_O,R.V.UC_F,R.V.Right_Arrow); -- <EOF>
|
||||||
|
LB_XINCLUDE: constant S.Rune_Array := (R.V.Number_Sign,R.V.LC_I,R.V.LC_N,R.V.LC_C,R.V.LC_L,R.V.LC_U,R.V.LC_D,R.V.LC_E); -- #include
|
||||||
|
|
||||||
procedure Set_Lexer_State (C: in out Compiler; State: in Lexer_State) is
|
procedure Set_Lexer_State (C: in out Compiler; State: in Lexer_State) is
|
||||||
begin
|
begin
|
||||||
@ -21,21 +26,13 @@ package body H3.Compilers is
|
|||||||
Set_Lexer_State (C, State, R.To_Rune(Code));
|
Set_Lexer_State (C, State, R.To_Rune(Code));
|
||||||
end Set_Lexer_State;
|
end Set_Lexer_State;
|
||||||
|
|
||||||
procedure Got_Token (C: in out Compiler) is
|
procedure Set_Parser_State (C: in out Compiler; State: in Parser_State) is
|
||||||
begin
|
begin
|
||||||
--case C.P.State IS
|
C.Ps.State := State;
|
||||||
-- when START =>
|
end Set_Parser_State;
|
||||||
-- null;
|
|
||||||
--end case;
|
|
||||||
|
|
||||||
ada.text_io.put (C.Tk.Id'Img);
|
|
||||||
ada.text_io.put (" ");
|
|
||||||
for i in C.Tk.Buf.Get_First_Index .. C.Tk.Buf.Get_Last_Index loop
|
|
||||||
ada.text_io.put (standard.character'val(S.Rune'Pos(C.Tk.Buf.Get_Item(i))));
|
|
||||||
end loop;
|
|
||||||
ada.text_io.put_line("");
|
|
||||||
|
|
||||||
|
|
||||||
|
procedure Parse_Start (C: in out Compiler) is
|
||||||
|
begin
|
||||||
case C.Tk.Id is
|
case C.Tk.Id is
|
||||||
when TK_BSTR =>
|
when TK_BSTR =>
|
||||||
null;
|
null;
|
||||||
@ -46,8 +43,11 @@ ada.text_io.put_line("");
|
|||||||
when TK_CSTR =>
|
when TK_CSTR =>
|
||||||
null;
|
null;
|
||||||
when TK_DIRECTIVE =>
|
when TK_DIRECTIVE =>
|
||||||
--Push_Feed_Layer (...
|
if C.Tk.Buf.Equals(LB_XINCLUDE) then
|
||||||
null;
|
Set_Parser_State (C, PS_INCLUDE);
|
||||||
|
else
|
||||||
|
raise Syntax_Error;
|
||||||
|
end if;
|
||||||
when TK_EOF =>
|
when TK_EOF =>
|
||||||
null;
|
null;
|
||||||
when TK_EOL =>
|
when TK_EOL =>
|
||||||
@ -65,6 +65,69 @@ ada.text_io.put_line("");
|
|||||||
when TK_SEMICOLON =>
|
when TK_SEMICOLON =>
|
||||||
null;
|
null;
|
||||||
end case;
|
end case;
|
||||||
|
end Parse_Start;
|
||||||
|
|
||||||
|
procedure Start_Inclusion (C: in out Compiler; Name: in S.Rune_Array) is
|
||||||
|
Top: System_Index;
|
||||||
|
begin
|
||||||
|
if C.St.Top = C.St.Items'Last then
|
||||||
|
raise Syntax_Error; -- TODO: inclusion depth too deep
|
||||||
|
end if;
|
||||||
|
|
||||||
|
Top := C.St.Top + 1;
|
||||||
|
Ada.Text_IO.Open (C.St.Items(Top).Handle, Ada.Text_IO.In_File, Standard.String(Utf8.From_Unicode_String(Name)));
|
||||||
|
C.St.Top := Top;
|
||||||
|
end Start_Inclusion;
|
||||||
|
|
||||||
|
procedure End_Inclusion (C: in out Compiler) is
|
||||||
|
begin
|
||||||
|
Ada.Text_IO.Close (C.St.Items(C.St.Top).Handle);
|
||||||
|
C.St.Top := C.St.Top - 1;
|
||||||
|
end End_Inclusion;
|
||||||
|
|
||||||
|
procedure Parse_Include (C: in out Compiler) is
|
||||||
|
begin
|
||||||
|
if C.Tk.Id = TK_CSTR then
|
||||||
|
-- arrange to feed more data from the included file.
|
||||||
|
Start_Inclusion (C, S.To_Rune_Array(C.Tk.Buf));
|
||||||
|
null;
|
||||||
|
else
|
||||||
|
raise Syntax_Error; -- string literal required
|
||||||
|
end if;
|
||||||
|
end Parse_Include;
|
||||||
|
|
||||||
|
procedure Parse_Include_End (C: in out Compiler) is
|
||||||
|
begin
|
||||||
|
if C.Tk.Id /= TK_SEMICOLON then
|
||||||
|
raise Syntax_Error;
|
||||||
|
end if;
|
||||||
|
|
||||||
|
-- TODO: put the state back to START???
|
||||||
|
end Parse_Include_End;
|
||||||
|
|
||||||
|
procedure Got_Token (C: in out Compiler) is
|
||||||
|
begin
|
||||||
|
--case C.P.State IS
|
||||||
|
-- when START =>
|
||||||
|
-- null;
|
||||||
|
--end case;
|
||||||
|
|
||||||
|
ada.text_io.put (C.Tk.Id'Img);
|
||||||
|
ada.text_io.put (" ");
|
||||||
|
for i in C.Tk.Buf.Get_First_Index .. C.Tk.Buf.Get_Last_Index loop
|
||||||
|
ada.text_io.put (standard.character'val(S.Rune'Pos(C.Tk.Buf.Get_Item(i))));
|
||||||
|
end loop;
|
||||||
|
ada.text_io.put_line("");
|
||||||
|
|
||||||
|
case C.Ps.State is
|
||||||
|
when PS_START =>
|
||||||
|
Parse_Start (C);
|
||||||
|
when PS_INCLUDE =>
|
||||||
|
Parse_Include (C);
|
||||||
|
when others =>
|
||||||
|
raise Syntax_Error; -- TODO: change this...
|
||||||
|
end case;
|
||||||
|
|
||||||
end Got_Token;
|
end Got_Token;
|
||||||
|
|
||||||
procedure Start_Token (C: in out Compiler) is
|
procedure Start_Token (C: in out Compiler) is
|
||||||
@ -208,6 +271,18 @@ end if;
|
|||||||
begin
|
begin
|
||||||
for i in Data'Range loop
|
for i in Data'Range loop
|
||||||
Feed_Char_Code (C, R.To_Code(Data(i)));
|
Feed_Char_Code (C, R.To_Code(Data(i)));
|
||||||
|
|
||||||
|
if C.St.Top > 0 then
|
||||||
|
declare
|
||||||
|
Ch: Standard.Character;
|
||||||
|
begin
|
||||||
|
while not Ada.Text_IO.End_Of_File(C.St.Items(C.St.Top).Handle) loop
|
||||||
|
Ada.Text_IO.Get (C.St.Items(C.St.Top).Handle, Ch);
|
||||||
|
Feed_Char_Code (C, Standard.Character'Pos(Ch));
|
||||||
|
--if inclusion stack is not Empty???
|
||||||
|
end loop;
|
||||||
|
end;
|
||||||
|
end if;
|
||||||
end loop;
|
end loop;
|
||||||
end Feed;
|
end Feed;
|
||||||
|
|
||||||
|
@ -1,5 +1,6 @@
|
|||||||
with H3.Runes;
|
with H3.Runes;
|
||||||
with H3.Strings;
|
with H3.Strings;
|
||||||
|
with Ada.Text_IO;
|
||||||
|
|
||||||
generic
|
generic
|
||||||
type Rune_Type is (<>);
|
type Rune_Type is (<>);
|
||||||
@ -9,7 +10,7 @@ package H3.Compilers is
|
|||||||
|
|
||||||
Syntax_Error: exception;
|
Syntax_Error: exception;
|
||||||
|
|
||||||
type Compiler is tagged private;
|
type Compiler is tagged limited private;
|
||||||
|
|
||||||
procedure Feed (C: in out Compiler; Data: in S.Rune_Array);
|
procedure Feed (C: in out Compiler; Data: in S.Rune_Array);
|
||||||
procedure End_Feed (C: in out Compiler);
|
procedure End_Feed (C: in out Compiler);
|
||||||
@ -28,6 +29,17 @@ private
|
|||||||
State: Lexer_State := LX_START;
|
State: Lexer_State := LX_START;
|
||||||
end record;
|
end record;
|
||||||
|
|
||||||
|
type Stream is record
|
||||||
|
Handle: Ada.Text_IO.File_Type;
|
||||||
|
--Handle: System_Size;
|
||||||
|
end record;
|
||||||
|
|
||||||
|
type Stream_Array is array(System_Index range <>) of Stream;
|
||||||
|
type Stream_Stack(Capa: System_Index) is record
|
||||||
|
Items: Stream_Array(System_Index'First .. Capa);
|
||||||
|
Top: System_Size := 0;
|
||||||
|
end record;
|
||||||
|
|
||||||
type Token_Id is (
|
type Token_Id is (
|
||||||
TK_BSTR,
|
TK_BSTR,
|
||||||
TK_BYTE,
|
TK_BYTE,
|
||||||
@ -48,13 +60,18 @@ private
|
|||||||
Buf: S.Elastic_String;
|
Buf: S.Elastic_String;
|
||||||
end record;
|
end record;
|
||||||
|
|
||||||
type Parser_State is (START, INCLUDE);
|
type Parser_State is (
|
||||||
|
PS_START,
|
||||||
|
PS_INCLUDE
|
||||||
|
);
|
||||||
type Parser is record
|
type Parser is record
|
||||||
State: Parser_State := START;
|
State: Parser_State := PS_START;
|
||||||
end record;
|
end record;
|
||||||
|
|
||||||
type Compiler is tagged record
|
type Compiler is tagged limited record
|
||||||
Lx: Lexer;
|
Lx: Lexer;
|
||||||
Tk: Token;
|
Tk: Token;
|
||||||
|
Ps: Parser;
|
||||||
|
St: Stream_Stack(32);
|
||||||
end record;
|
end record;
|
||||||
end H3.Compilers;
|
end H3.Compilers;
|
||||||
|
217
lib2/h3-utf8.adb
Normal file
217
lib2/h3-utf8.adb
Normal file
@ -0,0 +1,217 @@
|
|||||||
|
with Interfaces;
|
||||||
|
|
||||||
|
package body H3.Utf8 is
|
||||||
|
|
||||||
|
--|----------------------------------------------------------------------------
|
||||||
|
--| From RFC 2279 UTF-8, a transformation format of ISO 10646
|
||||||
|
--|
|
||||||
|
--| UCS-4 range (hex.) UTF-8 octet sequence (binary)
|
||||||
|
--| 1:2 00000000-0000007F 0xxxxxxx
|
||||||
|
--| 2:2 00000080-000007FF 110xxxxx 10xxxxxx
|
||||||
|
--| 3:2 00000800-0000FFFF 1110xxxx 10xxxxxx 10xxxxxx
|
||||||
|
--| 4:4 00010000-001FFFFF 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
|
||||||
|
--| inv 00200000-03FFFFFF 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
|
||||||
|
--| inv 04000000-7FFFFFFF 1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
|
||||||
|
--|----------------------------------------------------------------------------
|
||||||
|
|
||||||
|
--type Uint8 is mod 2 ** 8;
|
||||||
|
--type Uint32 is mod 2 ** 32;
|
||||||
|
use type Interfaces.Unsigned_8;
|
||||||
|
use type Interfaces.Unsigned_32;
|
||||||
|
subtype Uint8 is Interfaces.Unsigned_8;
|
||||||
|
subtype Uint32 is Interfaces.Unsigned_32;
|
||||||
|
|
||||||
|
type Conv_Record is record
|
||||||
|
Lower: Uint32;
|
||||||
|
Upper: Uint32;
|
||||||
|
|
||||||
|
Fbyte: Uint8;
|
||||||
|
Mask: Uint8; -- Mask for getting the fixed bits in the first byte.
|
||||||
|
-- (First-Byte and Mask) = Fbyte
|
||||||
|
|
||||||
|
Fmask: Uint8; -- Mask for getting the actual values bits off the first byte.
|
||||||
|
|
||||||
|
Length: System_Size; -- Number of bytes
|
||||||
|
end record;
|
||||||
|
|
||||||
|
type Conv_Record_Array is array(System_Index range<>) of Conv_Record;
|
||||||
|
|
||||||
|
Conv_Table: constant Conv_Record_Array := (
|
||||||
|
(16#0000_0000#, 16#0000_007F#, 2#0000_0000#, 2#1000_0000#, 2#0111_1111#, 1),
|
||||||
|
(16#0000_0080#, 16#0000_07FF#, 2#1100_0000#, 2#1110_0000#, 2#0001_1111#, 2),
|
||||||
|
(16#0000_0800#, 16#0000_FFFF#, 2#1110_0000#, 2#1111_0000#, 2#0000_1111#, 3),
|
||||||
|
(16#0001_0000#, 16#001F_FFFF#, 2#1111_0000#, 2#1111_1000#, 2#0000_0111#, 4),
|
||||||
|
(16#0020_0000#, 16#03FF_FFFF#, 2#1111_1000#, 2#1111_1100#, 2#0000_0011#, 5),
|
||||||
|
(16#0400_0000#, 16#7FFF_FFFF#, 2#1111_1100#, 2#1111_1110#, 2#0000_0001#, 6)
|
||||||
|
);
|
||||||
|
|
||||||
|
function Get_Utf8_Slot (UV: in Uint32) return System_Size is
|
||||||
|
pragma Inline (Get_Utf8_Slot);
|
||||||
|
begin
|
||||||
|
for I in Conv_Table'Range loop
|
||||||
|
if UV >= Conv_Table(I).Lower and then UV <= Conv_Table(I).Upper then
|
||||||
|
return I;
|
||||||
|
end if;
|
||||||
|
end loop;
|
||||||
|
return System_Size'First;
|
||||||
|
end Get_Utf8_Slot;
|
||||||
|
|
||||||
|
function From_Unicode_Character (Chr: in Unicode_Character) return Utf8_String is
|
||||||
|
UV: Uint32;
|
||||||
|
I: System_Size;
|
||||||
|
begin
|
||||||
|
UV := Unicode_Character'Pos(Chr);
|
||||||
|
|
||||||
|
I := Get_Utf8_Slot(UV);
|
||||||
|
if I not in System_Index'Range then
|
||||||
|
raise Invalid_Unicode_Character;
|
||||||
|
end if;
|
||||||
|
|
||||||
|
declare
|
||||||
|
Result: Utf8_String (1 .. System_Index(Conv_Table(I).Length));
|
||||||
|
begin
|
||||||
|
for J in reverse Result'First + 1 .. Result'Last loop
|
||||||
|
-- 2#0011_1111#: 16#3F#
|
||||||
|
-- 2#1000_0000#: 16#80#
|
||||||
|
Result(J) := Utf8_Character'Val((UV and Uint32'(2#0011_1111#)) or Uint32'(2#1000_0000#));
|
||||||
|
--UV := UV / (2 ** 6); --UV := UV >> 6;
|
||||||
|
UV := Interfaces.Shift_Right (UV, 6);
|
||||||
|
end loop;
|
||||||
|
|
||||||
|
Result(Result'First) := Utf8_Character'Val(UV or Uint32(Conv_Table(I).Fbyte));
|
||||||
|
return Result;
|
||||||
|
end;
|
||||||
|
end From_Unicode_Character;
|
||||||
|
|
||||||
|
function From_Unicode_String (Str: in Unicode_String) return Utf8_String is
|
||||||
|
-- this function has high stack pressure if the input string is too long
|
||||||
|
-- TODO: create a procedure to overcome this problem.
|
||||||
|
Tmp: System_Size;
|
||||||
|
begin
|
||||||
|
-- Calculate the length first
|
||||||
|
Tmp := 0;
|
||||||
|
for I in Str'Range loop
|
||||||
|
declare
|
||||||
|
Utf8: Utf8_String := From_Unicode_Character(Chr => Str(I));
|
||||||
|
begin
|
||||||
|
Tmp := Tmp + Utf8'Length;
|
||||||
|
end;
|
||||||
|
end loop;
|
||||||
|
|
||||||
|
declare
|
||||||
|
Result: Utf8_String (1 .. Tmp);
|
||||||
|
begin
|
||||||
|
Tmp := Result'First;
|
||||||
|
for I in Str'Range loop
|
||||||
|
declare
|
||||||
|
Utf8: Utf8_String := From_Unicode_Character(Str(I));
|
||||||
|
begin
|
||||||
|
Result(Tmp .. Tmp + Utf8'Length - 1) := Utf8;
|
||||||
|
Tmp := Tmp + Utf8'Length;
|
||||||
|
end;
|
||||||
|
end loop;
|
||||||
|
return Result;
|
||||||
|
end;
|
||||||
|
end From_Unicode_String;
|
||||||
|
|
||||||
|
function Sequence_Length (Seq: in Utf8_Character) return System_Size is
|
||||||
|
begin
|
||||||
|
for I in Conv_Table'Range loop
|
||||||
|
if (Utf8_Character'Pos(Seq) and Conv_Table(I).Mask) = Conv_Table(I).Fbyte then
|
||||||
|
return Conv_Table(I).Length;
|
||||||
|
end if;
|
||||||
|
end loop;
|
||||||
|
return System_Size'First;
|
||||||
|
end Sequence_Length;
|
||||||
|
|
||||||
|
procedure To_Unicode_Character (Seq: in Utf8_String;
|
||||||
|
Seq_Len: out System_Size;
|
||||||
|
Chr: out Unicode_Character) is
|
||||||
|
W: Uint32;
|
||||||
|
begin
|
||||||
|
for I in Conv_Table'Range loop
|
||||||
|
|
||||||
|
-- Check if the first byte matches the desired bit patterns.
|
||||||
|
if (Utf8_Character'Pos(Seq(Seq'First)) and Conv_Table(I).Mask) = Conv_Table(I).Fbyte then
|
||||||
|
|
||||||
|
if Seq'Length < Conv_Table(I).Length then
|
||||||
|
raise Insufficient_Utf8_Sequence;
|
||||||
|
end if;
|
||||||
|
|
||||||
|
-- Get the values bits off the first byte.
|
||||||
|
W := Utf8_Character'Pos(Seq(Seq'First)) and Uint32(Conv_Table(I).Fmask);
|
||||||
|
|
||||||
|
-- Get the values bits off subsequent bytes.
|
||||||
|
for J in 1 .. Conv_Table(I).Length - 1 loop
|
||||||
|
if (Utf8_Character'Pos(Seq(Seq'First + J)) and Uint32'(2#1100_0000#)) /= Uint32'(2#1000_0000#) then
|
||||||
|
-- Each UTF8 byte except the first must be set with 2#1000_0000.
|
||||||
|
raise Invalid_Utf8_Sequence;
|
||||||
|
end if;
|
||||||
|
W := Interfaces.Shift_Left(W, 6) or (Utf8_Character'Pos(Seq(Seq'First + J)) and Uint32'(2#0011_1111#));
|
||||||
|
end loop;
|
||||||
|
|
||||||
|
-- Return the character matching the word
|
||||||
|
Chr := Unicode_Character'Val(W);
|
||||||
|
Seq_Len := Conv_Table(I).Length;
|
||||||
|
return;
|
||||||
|
end if;
|
||||||
|
end loop;
|
||||||
|
|
||||||
|
raise Invalid_Utf8_Sequence;
|
||||||
|
end To_Unicode_Character;
|
||||||
|
|
||||||
|
function To_Unicode_Character (Seq: in Utf8_String) return Unicode_Character is
|
||||||
|
Seq_Len: System_Size;
|
||||||
|
Chr: Unicode_Character;
|
||||||
|
begin
|
||||||
|
To_Unicode_Character (Seq, Seq_Len, Chr);
|
||||||
|
return Chr;
|
||||||
|
end To_Unicode_Character;
|
||||||
|
|
||||||
|
procedure To_Unicode_String (Seq: in Utf8_String;
|
||||||
|
Seq_Len: out System_Size;
|
||||||
|
Str: out Unicode_String;
|
||||||
|
Str_Len: out System_Size) is
|
||||||
|
Seq_Pos: System_Index := Seq'First;
|
||||||
|
Str_Pos: System_Index := Str'First;
|
||||||
|
Len: System_Size;
|
||||||
|
begin
|
||||||
|
while Seq_Pos <= Seq'Last and then Str_Pos <= Str'Last loop
|
||||||
|
To_Unicode_Character(Seq(Seq_Pos .. Seq'Last), Len, Str(Str_Pos));
|
||||||
|
Seq_Pos := Seq_Pos + Len;
|
||||||
|
Str_Pos := Str_Pos + 1;
|
||||||
|
end loop;
|
||||||
|
|
||||||
|
Seq_Len := Seq_Pos - Seq'First;
|
||||||
|
Str_Len := Str_Pos - Str'First;
|
||||||
|
end To_Unicode_String;
|
||||||
|
|
||||||
|
function To_Unicode_String (Seq: in Utf8_String) return Unicode_String is
|
||||||
|
UL: System_Size := 0;
|
||||||
|
begin
|
||||||
|
declare
|
||||||
|
Chr: Unicode_Character;
|
||||||
|
Pos: System_Index := Seq'First;
|
||||||
|
Seq_Len: System_Size;
|
||||||
|
begin
|
||||||
|
while Pos <= Seq'Last loop
|
||||||
|
To_Unicode_Character(Seq(Pos .. Seq'Last), Seq_Len, Chr);
|
||||||
|
UL := UL + 1;
|
||||||
|
Pos := Pos + Seq_Len;
|
||||||
|
end loop;
|
||||||
|
end;
|
||||||
|
|
||||||
|
declare
|
||||||
|
Str: Unicode_String (1 .. UL);
|
||||||
|
Pos: System_Index := Seq'First;
|
||||||
|
Seq_Len: System_Size;
|
||||||
|
begin
|
||||||
|
for I in Str'Range loop
|
||||||
|
To_Unicode_Character(Seq(Pos .. Seq'Last), Seq_Len, Str(I));
|
||||||
|
Pos := Pos + Seq_Len;
|
||||||
|
end loop;
|
||||||
|
return Str;
|
||||||
|
end;
|
||||||
|
end To_Unicode_String;
|
||||||
|
|
||||||
|
end H3.Utf8;
|
51
lib2/h3-utf8.ads
Normal file
51
lib2/h3-utf8.ads
Normal file
@ -0,0 +1,51 @@
|
|||||||
|
generic
|
||||||
|
type Slim_Character is (<>);
|
||||||
|
type Wide_Character is (<>);
|
||||||
|
type Slim_String is array(System_Index range<>) of Slim_Character;
|
||||||
|
type Wide_String is array(System_Index range<>) of Wide_Character;
|
||||||
|
package H3.Utf8 is
|
||||||
|
--pragma Preelaborate (Utf8);
|
||||||
|
|
||||||
|
--Invalid_Unicode_Character: exception renames Invalid_Wide_Character;
|
||||||
|
--Invalid_Utf8_Sequence: exception renames Invalid_Slim_Sequence;
|
||||||
|
--Insufficient_Utf8_Sequence: exception renames Insifficient_Slim_Sequence;
|
||||||
|
Invalid_Unicode_Character: exception;
|
||||||
|
Invalid_Utf8_Sequence: exception;
|
||||||
|
Insufficient_Utf8_Sequence: exception;
|
||||||
|
|
||||||
|
subtype Utf8_Character is Slim_Character;
|
||||||
|
subtype Unicode_Character is Wide_Character;
|
||||||
|
subtype Utf8_String is Slim_String;
|
||||||
|
subtype Unicode_String is Wide_String;
|
||||||
|
subtype Utf8_Sequence is Utf8_String;
|
||||||
|
|
||||||
|
--type Unicode_Character_Kit is record
|
||||||
|
-- Seq: System_Size; -- sequence length
|
||||||
|
-- Chr: Unicode_Character;
|
||||||
|
--end record;
|
||||||
|
|
||||||
|
--type Unicode_String_Kit(Length: System_Size) is record
|
||||||
|
-- Seq: System_Size;
|
||||||
|
-- Str: Unicode_String(System_Index'First .. Length);
|
||||||
|
--end record;
|
||||||
|
|
||||||
|
function From_Unicode_Character (Chr: in Unicode_Character) return Utf8_String;
|
||||||
|
function From_Unicode_String (Str: in Unicode_String) return Utf8_String;
|
||||||
|
--| The Sequence_Length function returns the length of a full UTF8
|
||||||
|
--| sequence representing a single Unicode character given the first
|
||||||
|
--| sequence byte. It returns 0 if the first byte is invalid.
|
||||||
|
function Sequence_Length (Seq: in Utf8_Character) return System_Size;
|
||||||
|
|
||||||
|
procedure To_Unicode_Character (Seq: in Utf8_String;
|
||||||
|
Seq_Len: out System_Size;
|
||||||
|
Chr: out Unicode_Character);
|
||||||
|
|
||||||
|
procedure To_Unicode_String (Seq: in Utf8_String;
|
||||||
|
Seq_Len: out System_Size;
|
||||||
|
Str: out Unicode_String;
|
||||||
|
Str_Len: out System_Size);
|
||||||
|
|
||||||
|
function To_Unicode_Character (Seq: in Utf8_String) return Unicode_Character;
|
||||||
|
function To_Unicode_String (Seq: in Utf8_String) return Unicode_String;
|
||||||
|
|
||||||
|
end H3.Utf8;
|
@ -6,6 +6,6 @@ procedure hello3 is
|
|||||||
|
|
||||||
Compiler: C.Compiler;
|
Compiler: C.Compiler;
|
||||||
begin
|
begin
|
||||||
Compiler.Feed ("<<=hello world");
|
Compiler.Feed ("#include 'abc.txt'");
|
||||||
Compiler.End_Feed;
|
Compiler.End_Feed;
|
||||||
end hello3;
|
end hello3;
|
||||||
|
Loading…
Reference in New Issue
Block a user