added Unicode_To_Utf8 converter
This commit is contained in:
parent
d839e8c41a
commit
bcf50fe381
@ -6,7 +6,6 @@ with Stream;
|
||||
with Ada.Text_IO;
|
||||
with Ada.Unchecked_Deallocation;
|
||||
|
||||
|
||||
procedure scheme is
|
||||
--package S renames H2.Scheme;
|
||||
--package S is new H2.Scheme (Wide_Character, Wide_String);
|
||||
@ -43,8 +42,13 @@ begin
|
||||
);
|
||||
|
||||
File_Stream.Name := File_Name'Unchecked_Access;
|
||||
S.Set_Input_Stream (SI, File_Stream); -- specify main input stream
|
||||
--S.Set_Input_Stream (SI, String_Stream);
|
||||
begin
|
||||
S.Set_Input_Stream (SI, File_Stream); -- specify main input stream
|
||||
--S.Set_Input_Stream (SI, String_Stream);
|
||||
exception
|
||||
when others =>
|
||||
Ada.Text_IO.Put_Line ("Cannot open Input Stream");
|
||||
end;
|
||||
--S.Set_Output_Stream (SI, Stream); -- specify main output stream.
|
||||
|
||||
Ada.Text_IO.Put_Line ("-------------------------------------------");
|
||||
|
@ -23,7 +23,7 @@ project Scheme is
|
||||
|
||||
package Compiler is
|
||||
for Default_Switches ("Ada") use (
|
||||
"-gnata", "-gnato", "-gnatN", "-gnatwl", "-gnat95",
|
||||
"-gnata", "-gnato", "-gnatN", "-gnatwl", "-gnat95", "-gnatW8",
|
||||
"-I@abs_srcdir@/../lib"
|
||||
);
|
||||
end Compiler;
|
||||
|
@ -1,7 +1,8 @@
|
||||
with H2.Pool;
|
||||
with Ada.Characters.Conversions;
|
||||
with Ada.Unchecked_Conversion;
|
||||
|
||||
with Ada.Text_IO; -- for debugging
|
||||
|
||||
package body Stream is
|
||||
|
||||
------------------------------------------------------------------
|
||||
@ -9,13 +10,13 @@ package body Stream is
|
||||
|
||||
procedure Open (Stream: in out String_Input_Stream_Record) is
|
||||
begin
|
||||
Ada.Wide_Text_IO.Put_Line ("****** OPEN STRING STREAM ******");
|
||||
Ada.Text_IO.Put_Line ("****** OPEN STRING STREAM ******");
|
||||
Stream.Pos := 0;
|
||||
end Open;
|
||||
|
||||
procedure Close (Stream: in out String_Input_Stream_Record) is
|
||||
begin
|
||||
Ada.Wide_Text_IO.Put_Line ("****** CLOSE STRING STREAM ******");
|
||||
Ada.Text_IO.Put_Line ("****** CLOSE STRING STREAM ******");
|
||||
Stream.Pos := Stream.Str'Last;
|
||||
end Close;
|
||||
|
||||
@ -50,18 +51,18 @@ Ada.Wide_Text_IO.Put_Line ("****** CLOSE STRING STREAM ******");
|
||||
------------------------------------------------------------------
|
||||
|
||||
procedure Open (Stream: in out File_Stream_Record) is
|
||||
subtype Wide_String is Standard.Wide_String(1 .. Standard.Natural(Stream.Name'Length));
|
||||
function To_Wide_String is new Ada.Unchecked_Conversion (S.Object_String, Wide_String);
|
||||
begin
|
||||
Ada.Wide_Text_IO.Put_Line (">>>>> OPEN File STREAM <<<<< " & To_Wide_String(Stream.Name.all));
|
||||
Ada.Wide_Text_IO.Open (Stream.Handle, Ada.Wide_Text_IO.In_File, Ada.Characters.Conversions.To_String(To_Wide_String(Stream.Name.all)));
|
||||
Ada.Text_IO.Put_Line (">>>>> OPEN File STREAM <<<<< " & Standard.String(UTF8.Unicode_To_Utf8(UTF8.Unicode_String(Stream.Name.all))));
|
||||
--Ada.Wide_Text_IO.Open (Stream.Handle, Ada.Wide_Text_IO.In_File, Ada.Characters.Conversions.To_String(Standard.Wide_String(Stream.Name.all)));
|
||||
Ada.Wide_Text_IO.Open (Stream.Handle, Ada.Wide_Text_IO.In_File, Standard.String(UTF8.Unicode_To_Utf8(UTF8.Unicode_String(Stream.Name.all))));
|
||||
end Open;
|
||||
|
||||
procedure Close (Stream: in out File_Stream_Record) is
|
||||
subtype Wide_String is Standard.Wide_String(1 .. Standard.Natural(Stream.Name'Length));
|
||||
function To_Wide_String is new Ada.Unchecked_Conversion (S.Object_String, Wide_String);
|
||||
begin
|
||||
Ada.Wide_Text_IO.Put_Line (">>>>> CLOSE File STREAM <<<<< " & To_Wide_String(Stream.Name.all));
|
||||
--Ada.Wide_Text_IO.Put_Line (">>>>> CLOSE File STREAM <<<<< " & Standard.Wide_String(Stream.Name.all));
|
||||
Ada.Text_IO.Put_Line (">>>>> CLOSE File STREAM <<<<< " & Standard.String(UTF8.Unicode_To_Utf8(UTF8.Unicode_String(Stream.Name.all))));
|
||||
Ada.Wide_Text_IO.Close (Stream.Handle);
|
||||
end Close;
|
||||
|
||||
@ -71,6 +72,10 @@ Ada.Wide_Text_IO.Put_Line (">>>>> CLOSE File STREAM <<<<< " & To_Wide_String(Str
|
||||
begin
|
||||
for I in Data'First .. Data'Last loop
|
||||
begin
|
||||
if Ada.Wide_Text_IO.End_Of_File (Stream.Handle) then
|
||||
Last := I - 1;
|
||||
return;
|
||||
end if;
|
||||
Ada.Wide_Text_IO.Get_Immediate (Stream.Handle, Data(I));
|
||||
exception
|
||||
when Ada.Wide_Text_IO.End_Error =>
|
||||
|
@ -1,9 +1,11 @@
|
||||
with H2.Scheme;
|
||||
with H2.UTF8;
|
||||
with Ada.Wide_Text_IO;
|
||||
|
||||
package Stream is
|
||||
|
||||
package S is new H2.Scheme (Standard.Wide_Character);
|
||||
package UTF8 is new H2.UTF8 (Standard.Wide_Character, Standard.Character);
|
||||
|
||||
------------------------------------------------------------
|
||||
--type Object_String_Pointer is access all S.Object_String;
|
||||
|
@ -1,20 +1,13 @@
|
||||
with H2.Ascii;
|
||||
with H2.Pool;
|
||||
with System.Address_To_Access_Conversions;
|
||||
|
||||
with Ada.Unchecked_Deallocation; -- for h2scm c interface. TOOD: move it to a separate file
|
||||
with Interfaces.C;
|
||||
|
||||
|
||||
-- XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXx
|
||||
with Ada.Characters.Handling;
|
||||
with Ada.Wide_Characters.Handling;
|
||||
|
||||
-- TODO: delete these after debugging
|
||||
with Ada.Unchecked_Deallocation; -- for h2scm c interface. TOOD: move it to a separate file
|
||||
with Interfaces.C;
|
||||
with ada.text_io;
|
||||
with ada.wide_text_io;
|
||||
-- TODO: delete above after debugging
|
||||
|
||||
-- XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXx
|
||||
|
||||
package body H2.Scheme is
|
||||
@ -1480,8 +1473,8 @@ Ada.Text_IO.Put_Line ("2222222222222222222222222");
|
||||
Make_Procedure_Objects;
|
||||
Ada.Text_IO.Put_Line ("99999");
|
||||
|
||||
Ada.Text_IO.Put_Line (IO_Character_Record'Size'Img);
|
||||
Ada.Text_IO.Put_Line (IO_Character_Record'Max_Size_In_Storage_Elements'Img);
|
||||
Ada.Text_IO.Put_Line (Object_Size'Image(IO_Character_Record'Size));
|
||||
Ada.Text_IO.Put_Line (Object_Size'Image(IO_Character_Record'Max_Size_In_Storage_Elements));
|
||||
exception
|
||||
when others =>
|
||||
Deinitialize_Heap (Interp);
|
||||
@ -2819,6 +2812,8 @@ Ada.Text_IO.Put_Line ("INFO: NO MORE TOKEN ");
|
||||
Result: out Object_Pointer) is
|
||||
-- standard read-eval-print loop
|
||||
begin
|
||||
pragma Assert (Interp.Base_Input.Stream /= null);
|
||||
|
||||
Result := Nil_Pointer;
|
||||
|
||||
loop
|
||||
|
@ -117,17 +117,20 @@ package H2.Scheme is
|
||||
-- The Object_Size type defines the size of object payload.
|
||||
-- It is the number of payload items for each object kind.
|
||||
--type Object_Size is new Object_Word range 0 .. (2 ** (System.Word_Size - 1)) - 1;
|
||||
--type Object_Size is new Object_Word range 0 .. 1000; -- TODO: remove this line and uncommect the live above
|
||||
type Object_Size is new Object_Word;
|
||||
--type Object_Size is new Object_Word range 0 .. 1000;
|
||||
--type Object_Size is new Object_Word;
|
||||
type Object_Size is new System_Size;
|
||||
for Object_Size'Size use Object_Pointer_Bits; -- for GC
|
||||
subtype Object_Index is Object_Size range Object_Size(System_Index'First) .. Object_Size(System_Index'Last);
|
||||
|
||||
type Object_Byte is mod 2 ** System.Storage_Unit;
|
||||
for Object_Byte'Size use System.Storage_Unit;
|
||||
|
||||
subtype Object_Character is Character_Type;
|
||||
subtype Object_String_Size is Object_Size range 0 .. Object_Size'Last - 1;
|
||||
subtype Object_String_Range is Object_Size range 1 .. Object_Size'Last - 1;
|
||||
type Object_String is array (Object_String_Range range <>) of Object_Character;
|
||||
|
||||
subtype Object_String_Size is Object_Size;
|
||||
subtype Object_String_Index is Object_Index;
|
||||
type Object_String is array (Object_String_Index range <>) of Object_Character;
|
||||
|
||||
type Object_String_Pointer is access all Object_String;
|
||||
for Object_String_Pointer'Size use Object_Pointer_Bits;
|
||||
@ -135,14 +138,14 @@ package H2.Scheme is
|
||||
for Constant_Object_String_Pointer'Size use Object_Pointer_Bits;
|
||||
|
||||
-- TODO: are these Thin_XXXX necessary?
|
||||
subtype Thin_Object_String is Object_String (Object_String_Range'Range);
|
||||
subtype Thin_Object_String is Object_String(Object_Index'Range);
|
||||
type Thin_Object_String_Pointer is access all Thin_Object_String;
|
||||
for Thin_Object_String_Pointer'Size use Object_Pointer_Bits;
|
||||
|
||||
type Object_Byte_Array is array (Object_Size range <>) of Object_Byte;
|
||||
type Object_Byte_Array is array (Object_Index range <>) of Object_Byte;
|
||||
subtype Object_Character_Array is Object_String;
|
||||
type Object_Pointer_Array is array (Object_Size range <>) of Object_Pointer;
|
||||
type Object_Word_Array is array (Object_Size range <>) of Object_Word;
|
||||
type Object_Pointer_Array is array (Object_Index range <>) of Object_Pointer;
|
||||
type Object_Word_Array is array (Object_Index range <>) of Object_Word;
|
||||
|
||||
type Object_Kind is (
|
||||
Moved_Object, -- internal use only
|
||||
|
110
lib/h2-utf8.adb
Normal file
110
lib/h2-utf8.adb
Normal file
@ -0,0 +1,110 @@
|
||||
with ada.text_io;
|
||||
|
||||
package body H2.UTF8 is
|
||||
|
||||
type Uint8 is mod 2 ** 8;
|
||||
type Uint32 is mod 2 ** 32;
|
||||
|
||||
type Conv_Record is record
|
||||
Lower: Uint32;
|
||||
Upper: Uint32;
|
||||
Fbyte: Uint8; -- Mask to the first utf8 byte */
|
||||
Mask: Uint8;
|
||||
Fmask: Uint8;
|
||||
Length: Uint8; -- number of bytes
|
||||
end record;
|
||||
|
||||
type Conv_Record_Array is array(System_Index range<>) of Conv_Record;
|
||||
|
||||
Conv_Table: constant Conv_Record_Array := (
|
||||
(16#0000_0000#, 16#0000_007F#, 16#00#, 16#80#, 16#7F#, 1),
|
||||
(16#0000_0080#, 16#0000_07FF#, 16#C0#, 16#E0#, 16#1F#, 2),
|
||||
(16#0000_0800#, 16#0000_FFFF#, 16#E0#, 16#F0#, 16#0F#, 3),
|
||||
(16#0001_0000#, 16#001F_FFFF#, 16#F0#, 16#F8#, 16#07#, 4),
|
||||
(16#0020_0000#, 16#03FF_FFFF#, 16#F8#, 16#FC#, 16#03#, 5),
|
||||
(16#0400_0000#, 16#7FFF_FFFF#, 16#FC#, 16#FE#, 16#01#, 6)
|
||||
);
|
||||
|
||||
function Get_UTF8_Slot (UV: in Uint32) return System_Size is
|
||||
pragma Inline (Get_UTF8_Slot);
|
||||
begin
|
||||
for I in Conv_Table'Range loop
|
||||
if UV >= Conv_Table(I).Lower and then UV <= Conv_Table(I).Upper then
|
||||
return I;
|
||||
end if;
|
||||
end loop;
|
||||
return System_Size'First;
|
||||
end Get_UTF8_Slot;
|
||||
|
||||
function Unicode_To_UTF8 (UC: in Unicode_Character) return UTF8_String is
|
||||
UV: Uint32;
|
||||
I: System_Size;
|
||||
begin
|
||||
UV := Unicode_Character'Pos(UC);
|
||||
|
||||
I := Get_UTF8_Slot(UV);
|
||||
if I not in System_Index'Range then
|
||||
raise Invalid_Unicode_Character;
|
||||
end if;
|
||||
|
||||
declare
|
||||
subtype Result_String is UTF8_String(1 .. System_Index(Conv_Table(I).Length));
|
||||
Result: Result_String;
|
||||
begin
|
||||
for J in reverse Result_String'First + 1 .. Result_String'Last loop
|
||||
-- 2#0011_1111#: 16#3F#
|
||||
-- 2#1000_0000#: 16#80#
|
||||
Result(J) := UTF8_Character'Val((UV and 2#0011_1111#) or 2#1000_0000#);
|
||||
UV := UV / (2 ** 6); --UV := UV >> 6;
|
||||
end loop;
|
||||
|
||||
Result(Result_String'First) := UTF8_Character'Val(UV or Uint32(Conv_Table(I).Fbyte));
|
||||
return Result;
|
||||
end;
|
||||
end Unicode_To_UTF8;
|
||||
|
||||
|
||||
function Unicode_To_UTF8 (US: in Unicode_String) return UTF8_String is
|
||||
-- this function has high stack pressur if the input string is too long
|
||||
-- TODO: create a procedure to overcome this problem.
|
||||
Tmp: System_Size;
|
||||
begin
|
||||
Tmp := 0;
|
||||
for I in US'Range loop
|
||||
declare
|
||||
UTF8: UTF8_String := Unicode_To_UTF8(US(I));
|
||||
begin
|
||||
Tmp := Tmp + UTF8'Length;
|
||||
end;
|
||||
end loop;
|
||||
|
||||
declare
|
||||
subtype Result_String is UTF8_String(1 .. Tmp);
|
||||
Result: Result_String;
|
||||
begin
|
||||
Tmp := Result'First;
|
||||
for I in US'Range loop
|
||||
declare
|
||||
UTF8: UTF8_String := Unicode_To_UTF8(US(I));
|
||||
begin
|
||||
Result(Tmp .. Tmp + UTF8'Length - 1) := UTF8;
|
||||
Tmp := Tmp + UTF8'Length;
|
||||
end;
|
||||
end loop;
|
||||
return Result;
|
||||
end;
|
||||
end Unicode_To_UTF8;
|
||||
|
||||
procedure UTF8_To_Unicode (UTF8: in UTF8_String;
|
||||
UC: out Unicode_Character) is
|
||||
begin
|
||||
null;
|
||||
end UTF8_To_Unicode;
|
||||
|
||||
procedure UTF8_To_Unicode (UTF8: in UTF8_String;
|
||||
US: in out Unicode_String) is
|
||||
begin
|
||||
null;
|
||||
end UTF8_To_Unicode;
|
||||
|
||||
end H2.UTF8;
|
20
lib/h2-utf8.ads
Normal file
20
lib/h2-utf8.ads
Normal file
@ -0,0 +1,20 @@
|
||||
generic
|
||||
type Unicode_Character_Type is (<>);
|
||||
type UTF8_Character_Type is (<>);
|
||||
package H2.UTF8 is
|
||||
|
||||
Invalid_Unicode_Character: exception;
|
||||
|
||||
subtype Unicode_Character is Unicode_Character_Type;
|
||||
subtype UTF8_Character is UTF8_Character_Type;
|
||||
|
||||
type UTF8_String is array(System_Index range<>) of UTF8_Character;
|
||||
type Unicode_String is array(System_Index range<>) of Unicode_Character;
|
||||
|
||||
function Unicode_To_UTF8 (UC: in Unicode_Character) return UTF8_String;
|
||||
function Unicode_To_UTF8 (US: in Unicode_String) return UTF8_String;
|
||||
|
||||
--procedure UTF8_To_Unicode (UTF8: in UTF8_String;
|
||||
-- UC: out Unicode_Character_Type);
|
||||
|
||||
end H2.UTF8;
|
19
lib/h2.ads
19
lib/h2.ads
@ -1,8 +1,25 @@
|
||||
with System;
|
||||
with System.Storage_Pools;
|
||||
|
||||
package H2 is
|
||||
|
||||
type Storage_Pool_Pointer is
|
||||
System_Word_Bits: constant := System.Word_Size;
|
||||
System_Word_Bytes: constant := System_Word_Bits / System.Storage_Unit;
|
||||
|
||||
--type System_Byte is mod 2 ** System.Storage_Unit;
|
||||
--for System_Byte'Size use System.Storage_Unit;
|
||||
|
||||
type System_Word is mod 2 ** System_Word_Bits;
|
||||
--for System_Word'Size use System_Word_Bits;
|
||||
|
||||
type System_Signed_Word is range -(2 ** (System_Word_Bits - 1)) ..
|
||||
+(2 ** (System_Word_Bits - 1)) - 1;
|
||||
--for System_Signed_Word'Size use System_Word_Bits;
|
||||
|
||||
type System_Size is new System_Word range 0 .. (2 ** System_Word_Bits) - 1;
|
||||
subtype System_Index is System_Size range 1 .. System_Size'Last;
|
||||
|
||||
type Storage_Pool_Pointer is
|
||||
access all System.Storage_Pools.Root_Storage_Pool'Class;
|
||||
|
||||
end H2;
|
||||
|
@ -14,18 +14,21 @@ project Lib is
|
||||
"h2-pool.ads",
|
||||
"h2-scheme.adb",
|
||||
"h2-scheme.ads",
|
||||
"h2-scheme-token.adb"
|
||||
"h2-scheme-token.adb",
|
||||
"h2-utf8.adb",
|
||||
"h2-utf8.ads"
|
||||
);
|
||||
for Library_Interface use (
|
||||
"h2",
|
||||
"h2.ascii",
|
||||
"h2.pool",
|
||||
"h2.scheme"
|
||||
"h2.scheme",
|
||||
"h2.utf8"
|
||||
);
|
||||
|
||||
package Compiler is
|
||||
for Default_Switches ("Ada") use (
|
||||
"-gnata", "-gnato", "-gnatN", "-gnatwl", "-gnat95"
|
||||
"-gnata", "-gnato", "-gnatN", "-gnatwl", "-gnat95", "-gnatW8"
|
||||
);
|
||||
end Compiler;
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user