adding experimental changes

This commit is contained in:
hyung-hwan 2021-10-30 01:57:19 +00:00
parent 2625942e08
commit eadeb5af07
10 changed files with 496 additions and 74 deletions

View File

@ -7,6 +7,8 @@ generic
G_Terminator_Value: Item_Type;
package H3.Arrays is
subtype Item is Item_Type;
Terminator_Length: constant System_Zero_Or_One := G_Terminator_Length;
Terminator_Value: constant Item_Type := G_Terminator_Value;

View File

@ -5,86 +5,146 @@ package body H3.CC is
package UC renames System.UTF_32;
use type System.UTF_32.Category;
SP: constant Item_Type := Item_Type'Val(32);
HT: constant Item_Type := Item_Type'Val(9);
SP: constant Rune := Rune'Val(32);
HT: constant Rune := Rune'Val(9);
function Is_Alpha (V: in Item_Type) return Boolean is
function Is_Alpha (V: in Rune) return Boolean is
begin
return UC.Is_UTF_32_Letter(Item_Type'Pos(V));
return UC.Is_UTF_32_Letter(Rune'Pos(V));
end Is_Alpha;
function Is_Alnum (V: in Item_Type) return Boolean is
function Is_Alpha (C: in Code) return Boolean is
begin
return UC.Is_UTF_32_Letter(Item_Type'Pos(V)) or else
UC.Is_UTF_32_Digit(Item_Type'Pos(V));
return not Is_Eof(C) and then Is_Alpha(Rune'Val(C));
end Is_Alpha;
function Is_Alnum (V: in Rune) return Boolean is
begin
return UC.Is_UTF_32_Letter(Rune'Pos(V)) or else
UC.Is_UTF_32_Digit(Rune'Pos(V));
end Is_Alnum;
function Is_Blank (V: in Item_Type) return Boolean is
function Is_Alnum (C: in Code) return Boolean is
begin
return not Is_Eof(C) and then Is_Alnum(Rune'Val(C));
end Is_Alnum;
function Is_Blank (V: in Rune) return Boolean is
begin
return V = SP or else V = HT;
end Is_Blank;
function Is_Cntrl (V: in Item_Type) return Boolean is
function Is_Blank (C: in Code) return Boolean is
begin
return UC.Get_Category(Item_Type'Pos(V)) = UC.Cc;
return not Is_Eof(C) and then Is_Blank(Rune'Val(C));
end Is_Blank;
function Is_Cntrl (V: in Rune) return Boolean is
begin
return UC.Get_Category(Rune'Pos(V)) = UC.Cc;
end Is_Cntrl;
function Is_Digit (V: in Item_Type) return Boolean is
function Is_Cntrl (C: in Code) return Boolean is
begin
return UC.Is_UTF_32_Digit(Item_Type'Pos(V));
return not Is_Eof(C) and then Is_Cntrl(Rune'Val(C));
end Is_Cntrl;
function Is_Digit (V: in Rune) return Boolean is
begin
return UC.Is_UTF_32_Digit(Rune'Pos(V));
end Is_Digit;
function Is_Graph (V: in Item_Type) return Boolean is
function Is_Digit (C: in Code) return Boolean is
begin
return not Is_Eof(C) and then Is_Digit(Rune'Val(C));
end Is_Digit;
function Is_Graph (V: in Rune) return Boolean is
begin
return Is_Print(V) and then V /= SP;
end Is_Graph;
function Is_Lower (V: in Item_Type) return Boolean is
function Is_Graph (C: in Code) return Boolean is
begin
return UC.Get_Category(Item_Type'Pos(V)) = UC.Ll;
return not Is_Eof(C) and then Is_Graph(Rune'Val(C));
end Is_Graph;
function Is_Lower (V: in Rune) return Boolean is
begin
return UC.Get_Category(Rune'Pos(V)) = UC.Ll;
end Is_Lower;
function Is_Print (V: in Item_Type) return Boolean is
function Is_Lower (C: in Code) return Boolean is
begin
return not UC.IS_UTF_32_Non_Graphic(Item_Type'Pos(V));
return not Is_Eof(C) and then Is_Lower(Rune'Val(C));
end Is_Lower;
function Is_Print (V: in Rune) return Boolean is
begin
return not UC.IS_UTF_32_Non_Graphic(Rune'Pos(V));
end Is_Print;
function Is_Punct (V: in Item_Type) return Boolean is
function Is_Print (C: in Code) return Boolean is
begin
--return UC.Is_UTF_32_Punctuation(Item_Type'Pos(V));
return not Is_Eof(C) and then Is_Print(Rune'Val(C));
end Is_Print;
function Is_Punct (V: in Rune) return Boolean is
begin
--return UC.Is_UTF_32_Punctuation(Rune'Pos(V));
return Is_Print(V) and then not Is_Space(V) and then not Is_Alnum(V);
end Is_Punct;
function Is_Space (V: in Item_Type) return Boolean is
function Is_Punct (C: in Code) return Boolean is
begin
return UC.Is_UTF_32_Space(Item_Type'Pos(V)) or else
UC.Is_UTF_32_Line_Terminator(Item_Type'Pos(V)) or else
return not Is_Eof(C) and then Is_Punct(Rune'Val(C));
end Is_Punct;
function Is_Space (V: in Rune) return Boolean is
begin
return UC.Is_UTF_32_Space(Rune'Pos(V)) or else
UC.Is_UTF_32_Line_Terminator(Rune'Pos(V)) or else
V = HT;
end Is_Space;
function Is_Upper (V: in Item_Type) return Boolean is
function Is_Space (C: in Code) return Boolean is
begin
return UC.Get_Category(Item_Type'Pos(V)) = UC.Lu;
return not Is_Eof(C) and then Is_Space(Rune'Val(C));
end Is_Space;
function Is_Upper (V: in Rune) return Boolean is
begin
return UC.Get_Category(Rune'Pos(V)) = UC.Lu;
end Is_Upper;
function Is_Xdigit (V: in Item_Type) return Boolean is
function Is_Upper (C: in Code) return Boolean is
begin
return UC.Is_UTF_32_Digit(Item_Type'Pos(V)) or else
Item_Type'Pos(V) in System_Character'Pos('A') .. System_Character'Pos('F') or else
Item_Type'Pos(V) in System_Character'Pos('a') .. System_Character'Pos('f');
return not Is_Eof(C) and then Is_Upper(Rune'Val(C));
end Is_Upper;
function Is_Xdigit (V: in Rune) return Boolean is
begin
return UC.Is_UTF_32_Digit(Rune'Pos(V)) or else
Rune'Pos(V) in System_Rune'Pos('A') .. System_Rune'Pos('F') or else
Rune'Pos(V) in System_Rune'Pos('a') .. System_Rune'Pos('f');
end Is_Xdigit;
function To_Lower (V: in Item_Type) return Item_Type is
function Is_Xdigit (C: in Code) return Boolean is
begin
return Item_Type'Val(UC.UTF_32_To_Lower_Case(Item_Type'Pos(V)));
return not Is_Eof(C) and then Is_Xdigit(Rune'Val(C));
end Is_Xdigit;
function To_Lower (V: in Rune) return Rune is
begin
return Rune'Val(UC.UTF_32_To_Lower_Case(Rune'Pos(V)));
end To_Lower;
function To_Upper (V: in Item_Type) return Item_Type is
function To_Upper (V: in Rune) return Rune is
begin
return Item_Type'Val(UC.UTF_32_To_Upper_Case(Item_Type'Pos(V)));
return Rune'Val(UC.UTF_32_To_Upper_Case(Rune'Pos(V)));
end To_Upper;
function Is_Class (V: in Item_Type; Class: in Item_Class) return Boolean is
function Is_Class (V: in Rune; Class: in Item_Class) return Boolean is
begin
case Class is
when ALPHA => return Is_Alpha(V);
@ -102,10 +162,37 @@ package body H3.CC is
end case;
end Is_Class;
function Is_Code (V: in Item_Type; Code: in Item_Code) return Boolean is
function Is_Class (C: in Code; Class: in Item_Class) return Boolean is
begin
return not Is_Eof(C) and then Is_Class(To_Rune(C), Class);
end Is_Class;
function Is_Eof (C: in Code) return Boolean is
begin
return C = EOF;
end Is_Eof;
function Is_Code (V: in Rune; C: in Code) return Boolean is
begin
-- a clumsy way to work around strong type checking
-- with unknown Item_Type at the generic level?
return Item_Type'Pos(V) = Code;
-- with unknown Rune at the generic level?
return To_Code(V) = C;
end Is_Code;
function Is_Rune (C: in Code; V: in Rune) return Boolean is
begin
return To_Code(V) = C;
end Is_Rune;
function To_Rune (C: in Code) return Rune is
begin
pragma Assert (not Is_Eof(C));
return Rune'Val(C);
end To_Rune;
function To_Code (V: in Rune) return Code is
begin
return Rune'Pos(V);
end To_Code;
end H3.CC;

View File

@ -1,38 +1,105 @@
generic
type Item_Type is (<>); -- any discrete type
-- any discrete type accepted.
-- can't ada limit type to one of Character, Wide_Character, Wide_Wide_Character?
type Rune_Type is (<>);
package H3.CC is
-- <ctype.h>-like character classification package
-- <ctype.h>-like character classification plus other features.
-- unicode-based. no system locale honored.
subtype Item_Code is H3.Natural;
subtype Rune is Rune_Type;
type Code is range -1 .. 16#7FFF_FFFF#;
Colon: constant Item_Code := System_Character'Pos(':');
Semicolon: constant Item_Code := System_Character'Pos(';');
Tilde: constant Item_Code := System_Character'Pos('~');
Underline: constant Item_Code := System_Character'Pos('_');
Equal: constant Item_Code := System_Character'Pos('=');
L_Arrow: constant Item_Code := System_Character'Pos('<');
R_Arrow: constant Item_Code := System_Character'Pos('>');
-- virtual code to indicate end of input
EOF: constant Code := Code'First;
type Item_Class is (ALPHA, ALNUM, BLANK, CNTRL, DIGIT, GRAPH, LOWER, PRINT, PUNCT, SPACE, UPPER, XDIGIT);
function Is_Class (V: in Item_Type; Class: in Item_Class) return Boolean;
C_Colon: constant Code := System_Rune'Pos(':');
C_Semicolon: constant Code := System_Rune'Pos(';');
C_Tilde: constant Code := System_Rune'Pos('~');
C_Underline: constant Code := System_Rune'Pos('_');
C_Equal: constant Code := System_Rune'Pos('=');
C_Left_Arrow: constant Code := System_Rune'Pos('<');
C_Right_Arrow: constant Code := System_Rune'Pos('>');
function Is_Alpha (V: in Item_Type) return Boolean;
function Is_Alnum (V: in Item_Type) return Boolean;
function Is_Blank (V: in Item_Type) return Boolean;
function Is_Cntrl (V: in Item_Type) return Boolean;
function Is_Digit (V: in Item_Type) return Boolean;
function Is_Graph (V: in Item_Type) return Boolean;
function Is_Print (V: in Item_Type) return Boolean;
function Is_Punct (V: in Item_Type) return Boolean;
function Is_Space (V: in Item_Type) return Boolean;
function Is_Xdigit (V: in Item_Type) return Boolean;
C_A: constant Code := System_Rune'Pos('A');
C_B: constant Code := System_Rune'Pos('B');
C_C: constant Code := System_Rune'Pos('C');
C_D: constant Code := System_Rune'Pos('D');
C_E: constant Code := System_Rune'Pos('E');
function Is_Lower (V: in Item_Type) return Boolean;
function Is_Upper (V: in Item_Type) return Boolean;
Colon: constant Rune := Rune'Val(C_Colon);
Semicolon: constant Rune := Rune'Val(C_Semicolon);
Tilde: constant Rune := Rune'Val(C_Tilde);
Underline: constant Rune := Rune'Val(C_Underline);
Equal: constant Rune := Rune'Val(C_Equal);
Left_Arrow: constant Rune := Rune'Val(C_Left_Arrow);
Right_Arrow: constant Rune := Rune'Val(C_Right_Arrow);
function To_Lower (V: in Item_Type) return Item_Type;
function To_Upper (V: in Item_Type) return Item_Type;
UC_A: constant Rune := Rune'Val(C_A);
UC_B: constant Rune := Rune'Val(C_B);
UC_C: constant Rune := Rune'Val(C_C);
UC_D: constant Rune := Rune'Val(C_D);
UC_E: constant Rune := Rune'Val(C_E);
UC_O: constant Rune := Rune'Val(System_Rune'Pos('O'));
UC_F: constant Rune := Rune'Val(System_Rune'Pos('F'));
type Item_Class is (
ALPHA,
ALNUM,
BLANK,
CNTRL,
DIGIT,
GRAPH,
LOWER,
PRINT,
PUNCT,
SPACE,
UPPER,
XDIGIT
);
function Is_Alpha (V: in Rune) return Boolean;
function Is_Alnum (V: in Rune) return Boolean;
function Is_Blank (V: in Rune) return Boolean;
function Is_Cntrl (V: in Rune) return Boolean;
function Is_Digit (V: in Rune) return Boolean;
function Is_Graph (V: in Rune) return Boolean;
function Is_Print (V: in Rune) return Boolean;
function Is_Punct (V: in Rune) return Boolean;
function Is_Space (V: in Rune) return Boolean;
function Is_Xdigit (V: in Rune) return Boolean;
function Is_Lower (V: in Rune) return Boolean;
function Is_Upper (V: in Rune) return Boolean;
function To_Lower (V: in Rune) return Rune;
function To_Upper (V: in Rune) return Rune;
function Is_Alpha (C: in Code) return Boolean;
function Is_Alnum (C: in Code) return Boolean;
function Is_Blank (C: in Code) return Boolean;
function Is_Cntrl (C: in Code) return Boolean;
function Is_Digit (C: in Code) return Boolean;
function Is_Graph (C: in Code) return Boolean;
function Is_Print (C: in Code) return Boolean;
function Is_Punct (C: in Code) return Boolean;
function Is_Space (C: in Code) return Boolean;
function Is_Xdigit (C: in Code) return Boolean;
function Is_Lower (C: in Code) return Boolean;
function Is_Upper (C: in Code) return Boolean;
function Is_Class (V: in Rune; Class: in Item_Class) return Boolean;
function Is_Class (C: in Code; Class: in Item_Class) return Boolean;
function Is_Eof (C: in Code) return Boolean;
pragma Inline (Is_Eof);
function Is_Code (V: in Rune; C: in Code) return Boolean;
pragma Inline (Is_Code);
function Is_Rune (C: in Code; V: in Rune) return Boolean;
pragma Inline (Is_Rune);
function To_Rune (C: in Code) return Rune;
pragma Inline (To_Rune);
function To_Code (V: in Rune) return Code;
pragma Inline (To_Code);
function Is_Code (V: in Item_Type; Code: in Item_Code) return Boolean;
end H3.CC;

197
lib2/h3-compilers.adb Normal file
View File

@ -0,0 +1,197 @@
with ada.text_io;
package body H3.Compilers is
procedure Set_Lexer_State (C: in out Compiler; State: in Lexer_State) is
begin
C.Lx.State := State;
end Set_Lexer_State;
procedure Set_Lexer_State (C: in out Compiler; State: in Lexer_State; Ch: in R.Rune) is
begin
-- change the lexer state while storing the first character in the token buffer.
C.Lx.State := State;
S.Clear (C.Tk.Buf);
S.Append (C.Tk.Buf, Ch);
end Set_Lexer_State;
procedure Set_Lexer_State (C: in out Compiler; State: in Lexer_State; Code: in R.Code) is
begin
Set_Lexer_State (C, State, R.To_Rune(Code));
end Set_Lexer_State;
procedure Got_Token (C: in out Compiler) is
begin
--case C.P.State IS
-- when START =>
-- null;
--end case;
ada.text_io.put_line (C.Tk.Id'Img);
case C.Tk.Id is
when TK_BSTR =>
null;
when TK_BYTE =>
null;
when TK_CHAR =>
null;
when TK_CSTR =>
null;
when TK_EOF =>
null;
when TK_EOL =>
null;
when TK_GE =>
null;
when TK_GT =>
null;
when TK_IDENT =>
null;
when TK_LE =>
null;
when TK_LT =>
null;
when TK_SEMICOLON =>
null;
end case;
end Got_Token;
procedure Start_Token (C: in out Compiler) is
begin
C.Tk.Id := TK_EOF; -- indicate the token id is not set yet
-- TODO: store token location.
S.Clear (C.Tk.Buf);
end Start_Token;
procedure Start_Token (C: in out Compiler; Ch: in R.Rune) is
begin
Start_Token (C);
S.Append (C.Tk.Buf, Ch);
end Start_Token;
procedure Start_Token (C: in out Compiler; Code: in R.Code) is
begin
Start_Token (C, R.To_Rune(Code));
end Start_Token;
procedure Start_Token (C: in out Compiler; Str: in S.Rune_Array) is
begin
Start_Token (C);
S.Append (C.Tk.Buf, Str);
end Start_Token;
procedure Feed_Token (C: in out Compiler; Ch: in R.Rune) is
begin
S.Append (C.Tk.Buf, Ch);
end Feed_Token;
procedure Feed_Token (C: in out Compiler; Code: in R.Code) is
begin
Feed_Token(C, R.To_Rune(Code));
end Feed_Token;
procedure End_Token (C: in out Compiler; Id: in Token_Id) is
begin
C.Tk.Id := Id;
Got_Token (C);
Set_Lexer_State (C, LX_START);
end End_Token;
procedure End_Token (C: in out Compiler; Id: in Token_Id; Ch: in R.Rune) is
begin
S.Append (C.Tk.Buf, Ch);
C.Tk.Id := Id;
Got_Token (C);
Set_Lexer_State (C, LX_START);
end End_Token;
procedure End_Token (C: in out Compiler; Id: in Token_Id; Code: in R.Code) is
begin
S.Append (C.Tk.Buf, R.To_Rune(Code));
C.Tk.Id := Id;
Got_Token (C);
Set_Lexer_State (C, LX_START);
end End_Token;
procedure Feed_Char_Code (C: in out Compiler; Code: in R.Code) is
begin
<<Start_Over>>
if R.Is_Eof(Code) then
ada.text_io.put_line ("EOF");
else
ada.text_io.put_line (R.To_Rune(Code)'Img);
end if;
case C.Lx.State is
when LX_START =>
if R.Is_Eof(Code) then
Start_Token (C, S.Rune_Array'(R.Left_Arrow, R.UC_E, R.UC_O, R.UC_F, R.Right_Arrow));
End_Token (C, TK_EOF);
-- this procedure doesn't prevent you from feeding more
-- after EOF. but it's not desirable to feed more after EOF.
elsif R.Is_Space(Code) then
-- ignore. carry on
null;
elsif R.Is_Alpha(Code) then
Set_Lexer_State (C, LX_IDENT, Code);
elsif R.Is_Digit(Code) then
Set_Lexer_State (C, LX_NUMBER, Code);
elsif R.Is_Rune(Code, R.Semicolon) then
Start_Token (C, Code);
End_Token (C, TK_SEMICOLON);
elsif R.Is_Rune(Code, R.Left_Arrow) then
Set_Lexer_State (C, LX_OP_LESS, Code);
elsif R.Is_Rune(Code, R.Right_Arrow) then
Set_Lexer_State (C, LX_OP_GREATER, Code);
else
raise Syntax_Error;
end if;
when LX_OP_GREATER =>
if R.Is_Rune(Code, R.Equal) then
End_Token (C, TK_GE, Code);
else
End_Token (C, TK_GT);
goto Start_Over;
end if;
when LX_OP_LESS =>
if R.Is_Rune(Code, R.Equal) then
End_Token (C, TK_LE, Code);
else
End_Token (C, TK_LT);
goto Start_Over;
end if;
when LX_COMMENT =>
null;
when LX_IDENT =>
if R.Is_Alnum(Code) or else R.Is_Rune(Code, R.Underline) then
Feed_Token (C, Code);
else
End_Token (C, TK_IDENT);
goto Start_Over;
end if;
when LX_NUMBER =>
if R.Is_Digit(Code) then
Feed_Token (C, Code);
else
End_Token (C, TK_IDENT); -- TODO: change this
goto Start_Over;
end if;
end case;
end Feed_Char_Code;
procedure Feed (C: in out Compiler; Data: in S.Rune_Array) is
begin
for i in Data'Range loop
Feed_Char_Code (C, R.To_Code(Data(i)));
end loop;
end Feed;
procedure End_Feed (C: in out Compiler) is
begin
Feed_Char_Code (C, R.EOF);
end End_Feed;
end H3.Compilers;

58
lib2/h3-compilers.ads Normal file
View File

@ -0,0 +1,58 @@
with H3.CC;
with H3.Strings;
generic
type Rune_Type is (<>);
package H3.Compilers is
package R is new H3.CC(Rune_Type);
package S is new H3.Strings(Rune_Type);
Syntax_Error: exception;
type Compiler is tagged private;
procedure Feed (C: in out Compiler; Data: in S.Rune_Array);
procedure End_Feed (C: in out Compiler);
private
type Lexer_State is (
LX_START,
LX_COMMENT,
LX_IDENT,
LX_NUMBER,
LX_OP_GREATER,
LX_OP_LESS
);
type Lexer is record
State: Lexer_State := LX_START;
end record;
type Token_Id is (
TK_BSTR,
TK_BYTE,
TK_CHAR,
TK_CSTR,
TK_EOF,
TK_EOL,
TK_IDENT,
TK_GE,
TK_GT,
TK_LE,
TK_LT,
TK_SEMICOLON
);
type Token is record
Id: Token_Id := TK_EOF;
Buf: S.Elastic_String;
end record;
type Parser_State is (START, INCLUDE);
type Parser is record
State: Parser_State := START;
end record;
type Compiler is tagged record
Lx: Lexer;
Tk: Token;
end record;
end H3.Compilers;

View File

@ -22,10 +22,10 @@ package H3.MM is
procedure Create (R: in out Ref_Counted; V: in Item_Type);
function Get_Item_Pointer (R: in Ref_Counted) return Item_Pointer;
pragma Inline(Get_Item_Pointer);
pragma Inline (Get_Item_Pointer);
function Is_Shared (R: in Ref_Counted) return Boolean;
pragma Inline(Is_Shared);
pragma Inline (Is_Shared);
overriding procedure Initialize (R: in out Ref_Counted);
overriding procedure Adjust (R: in out Ref_Counted);

View File

@ -1,6 +1,6 @@
package body H3.Strings is
procedure Append (Obj: in out Elastic_String; V: in Character_Array) is
procedure Append (Obj: in out Elastic_String; V: in Rune_Array) is
begin
P.Append (P.Elastic_Array(Obj), V);
end;

View File

@ -1,22 +1,23 @@
with H3.Arrays;
generic
type Item_Type is (<>);
type Rune_Type is (<>);
package H3.Strings is
package P is new H3.Arrays(Item_Type, 1, Item_Type'First);
package P is new H3.Arrays(Rune_Type, 1, Rune_Type'First);
subtype Rune is P.Item;
subtype Rune_Array is P.Item_Array;
subtype Thin_Rune_Array_Pointer is P.Thin_Item_Array_Pointer;
Terminator_Length: System_Zero_Or_One renames P.Terminator_Length;
Terminator_Value: Item_Type renames P.Terminator_Value;
subtype Character_Array is P.Item_Array;
subtype Thin_Character_Array_Pointer is P.Thin_Item_Array_Pointer;
Terminator_Value: Rune renames P.Terminator_Value;
type Elastic_String is new P.Elastic_Array with record
--A: standard.integer := 999;
null;
end record;
overriding procedure Append (Obj: in out Elastic_String; V: in Character_Array);
overriding procedure Append (Obj: in out Elastic_String; V: in Rune_Array);
end H3.Strings;

View File

@ -6,7 +6,7 @@ package H3 is
subtype Boolean is Standard.Boolean;
subtype Natural is Standard.Natural;
subtype System_Character is Standard.Wide_Character;
subtype System_Rune is Standard.Wide_Character;
System_Byte_Bits: constant := System.Storage_Unit;
System_Word_Bits: constant := System.Word_Size;

10
lib2/hello3.adb Normal file
View File

@ -0,0 +1,10 @@
with H3.Compilers;
procedure hello3 is
package C is new H3.Compilers(Standard.Wide_Character);
Compiler: C.Compiler;
begin
Compiler.Feed ("<<=hello world");
Compiler.End_Feed;
end hello3;