more structural parsing
This commit is contained in:
parent
b118cb2bd3
commit
0942615c27
@ -7,6 +7,23 @@ package body H3.Compilers is
|
||||
LB_EOF: constant S.Rune_Array := (R.V.Left_Arrow,R.V.UC_E,R.V.UC_O,R.V.UC_F,R.V.Right_Arrow); -- <EOF>
|
||||
LB_XINCLUDE: constant S.Rune_Array := (R.V.Number_Sign,R.V.LC_I,R.V.LC_N,R.V.LC_C,R.V.LC_L,R.V.LC_U,R.V.LC_D,R.V.LC_E); -- #include
|
||||
|
||||
|
||||
procedure Dump_Token (Tk: in Token) is
|
||||
begin
|
||||
Ada.Text_IO.Put (Tk.Id'Img);
|
||||
Ada.Text_IO.Put (": ");
|
||||
Ada.Text_IO.Put_Line (Standard.String(Utf8.From_Unicode_String(Tk.Buf.To_Rune_Array)));
|
||||
end Dump_Token;
|
||||
|
||||
procedure Dump_Rune (Code: in R.Code) is
|
||||
begin
|
||||
if R.Is_Eof(Code) then
|
||||
Ada.Text_IO.Put_Line ("EOF");
|
||||
else
|
||||
Ada.Text_IO.Put_Line (R.To_Rune(Code)'Img);
|
||||
end if;
|
||||
end Dump_Rune;
|
||||
|
||||
-- -------------------------------------------------------------------
|
||||
|
||||
procedure Start_Token (C: in out Compiler) is
|
||||
@ -86,27 +103,40 @@ package body H3.Compilers is
|
||||
|
||||
procedure End_Token (C: in out Compiler; Id: in Token_Id; Code: in R.Code) is
|
||||
begin
|
||||
S.Append (C.Tk.Buf, R.To_Rune(Code));
|
||||
C.Tk.Id := Id;
|
||||
Got_Token (C);
|
||||
Set_Lexer_State (C, LX_START);
|
||||
End_Token (C, Id, R.To_Rune(Code));
|
||||
end End_Token;
|
||||
|
||||
procedure Dump_Token (Tk: in Token) is
|
||||
begin
|
||||
Ada.Text_IO.Put (Tk.Id'Img);
|
||||
Ada.Text_IO.Put (": ");
|
||||
Ada.Text_IO.Put_Line (Standard.String(Utf8.From_Unicode_String(Tk.Buf.To_Rune_Array)));
|
||||
end Dump_Token;
|
||||
-- -------------------------------------------------------------------
|
||||
|
||||
procedure Set_Parser_State (C: in out Compiler; State: in Parser_State) is
|
||||
procedure Set_Parse_State (C: in out Compiler; Code: in Parse_State_Code) is
|
||||
begin
|
||||
C.Ps.Prev_State := C.Ps.State;
|
||||
C.Ps.State := State;
|
||||
end Set_Parser_State;
|
||||
C.Prs.States(C.Prs.Top).Current := Code;
|
||||
end Set_Parse_State;
|
||||
|
||||
procedure Start_Inclusion (C: in out Compiler; Name: in S.Rune_Array) is
|
||||
procedure Push_Parse_State (C: in out Compiler; Code: in Parse_State_Code) is
|
||||
Top: System_Index;
|
||||
begin
|
||||
if C.Prs.Top = C.Prs.States'Last then
|
||||
raise Syntax_Error with "parse state stack exhausted";
|
||||
end if;
|
||||
|
||||
Top := C.Prs.Top + 1;
|
||||
declare
|
||||
S: Parse_State renames C.Prs.States(Top);
|
||||
begin
|
||||
S.Current := Code;
|
||||
end;
|
||||
C.Prs.Top := Top;
|
||||
end Push_Parse_State;
|
||||
|
||||
procedure Pop_Parse_State (C: in out Compiler) is
|
||||
begin
|
||||
C.Prs.top := C.Prs.Top - 1;
|
||||
end Pop_Parse_State;
|
||||
|
||||
-- -------------------------------------------------------------------
|
||||
|
||||
procedure Push_Inclusion (C: in out Compiler; Name: in S.Rune_Array) is
|
||||
Top: System_Index;
|
||||
begin
|
||||
if C.Inc.Top = C.Inc.Streams'Last then
|
||||
@ -114,31 +144,35 @@ package body H3.Compilers is
|
||||
end if;
|
||||
|
||||
Top := C.Inc.Top + 1;
|
||||
declare
|
||||
St: Stream renames C.Inc.Streams(Top);
|
||||
declare
|
||||
S: Stream renames C.Inc.Streams(Top);
|
||||
begin
|
||||
Ada.Text_IO.Open (St.Handle, Ada.Text_IO.In_File, Standard.String(Utf8.From_Unicode_String(Name)));
|
||||
St.Initial_Level := C.Ps.Level;
|
||||
St.Initial_Parser_State := C.Ps.Prev_State;
|
||||
St.Next_Parser_State := PS_INCLUDE_TERMINATOR;
|
||||
Ada.Text_IO.Open (S.Handle, Ada.Text_IO.In_File, Standard.String(Utf8.From_Unicode_String(Name)));
|
||||
S.Prs_Level := C.Prs.Top; -- this is the parse state level of this include directive.
|
||||
end;
|
||||
C.Inc.Top := Top;
|
||||
|
||||
-- the parser should resume at the state when the include directive is seen
|
||||
Set_Parser_State (C, C.Ps.Prev_State); -- the state when the include directive is seen
|
||||
end Start_Inclusion;
|
||||
-- Switch the parse state to handle the terminator
|
||||
-- after the new pushed state has been popped out.
|
||||
Set_Parse_State (C, PS_INCLUDE_TERMINATOR);
|
||||
|
||||
-- Let the inner content be handled at the state as the include directive is seen.
|
||||
Push_Parse_State (C, C.Prs.States(C.Prs.Top - 1).Current);
|
||||
end Push_Inclusion;
|
||||
|
||||
procedure End_Inclusion (C: in out Compiler) is
|
||||
Top: constant System_Index := C.Inc.Top;
|
||||
procedure Pop_Inclusion (C: in out Compiler; Check: Boolean) is
|
||||
begin
|
||||
if C.Ps.State /= C.Inc.Streams(Top).Initial_Parser_State or else C.Ps.Level /= C.Inc.Streams(Top).Initial_Level then
|
||||
raise Syntax_Error with "unexpected end of inclusion";
|
||||
end if;
|
||||
Ada.Text_IO.Close (C.Inc.Streams(C.Inc.Top).Handle);
|
||||
Set_Parser_State (C, C.Inc.Streams(C.Inc.Top).Next_Parser_State);
|
||||
if Check then
|
||||
if C.Prs.Top /= C.Inc.Streams(C.Inc.Top).Prs_Level + 1 then
|
||||
raise Syntax_Error with "unblanced inclusion content";
|
||||
end if;
|
||||
end if;
|
||||
C.Inc.Top := C.Inc.Top - 1;
|
||||
end End_Inclusion;
|
||||
Pop_Parse_State (C);
|
||||
end Pop_Inclusion;
|
||||
|
||||
-- -------------------------------------------------------------------
|
||||
|
||||
procedure Parse_Start (C: in out Compiler) is
|
||||
begin
|
||||
@ -151,33 +185,44 @@ package body H3.Compilers is
|
||||
null;
|
||||
when TK_CSTR =>
|
||||
null;
|
||||
|
||||
when TK_DIRECTIVE =>
|
||||
if C.Tk.Buf.Equals(LB_XINCLUDE) then
|
||||
Set_Parser_State (C, PS_INCLUDE_TARGET);
|
||||
--Set_Parse_State (C, PS_INCLUDE_TARGET);
|
||||
Push_Parse_State (C, PS_INCLUDE_TARGET);
|
||||
else
|
||||
raise Syntax_Error with "unknown directive name";
|
||||
end if;
|
||||
|
||||
when TK_EOF =>
|
||||
if C.Inc.Top > 0 then
|
||||
End_Inclusion (C);
|
||||
Pop_Inclusion (C, True);
|
||||
else
|
||||
-- end of really the input??
|
||||
null;
|
||||
end if;
|
||||
|
||||
when TK_EOL =>
|
||||
null;
|
||||
when TK_GE =>
|
||||
null;
|
||||
when TK_GT =>
|
||||
null;
|
||||
|
||||
when TK_IDENT =>
|
||||
null;
|
||||
when TK_LE =>
|
||||
null;
|
||||
when TK_LT =>
|
||||
null;
|
||||
|
||||
--when TK_NUMBER =>
|
||||
-- null;
|
||||
|
||||
--when TK_CLASS =>
|
||||
--when TK_FUNC =>
|
||||
|
||||
-- plus or minus signed may be allowed here too.
|
||||
-- plusplus or miniusminus may be allowed here too.
|
||||
|
||||
when TK_SEMICOLON =>
|
||||
null;
|
||||
|
||||
when others =>
|
||||
raise Syntax_Error with "unexpected token";
|
||||
|
||||
end case;
|
||||
end Parse_Start;
|
||||
|
||||
@ -185,10 +230,10 @@ package body H3.Compilers is
|
||||
begin
|
||||
if C.Tk.Id = TK_CSTR then
|
||||
-- arrange to feed more data from the included file.
|
||||
Start_Inclusion (C, S.To_Rune_Array(C.Tk.Buf));
|
||||
Push_Inclusion (C, S.To_Rune_Array(C.Tk.Buf));
|
||||
else
|
||||
-- the target is not a string.
|
||||
Dump_Token (C.Tk);
|
||||
--Dump_Token (C.Tk);
|
||||
raise Syntax_Error with "string literal required";
|
||||
end if;
|
||||
end Parse_Include_Target;
|
||||
@ -198,23 +243,14 @@ package body H3.Compilers is
|
||||
if C.Tk.Id /= TK_SEMICOLON then
|
||||
raise Syntax_Error with "semicolon required";
|
||||
end if;
|
||||
|
||||
-- it is not safe to access information at the previous stack top.
|
||||
-- no problem in doing that becuase the current implementation uses
|
||||
-- a static array.
|
||||
Set_Parser_State (C, C.Inc.Streams(C.Inc.Top + 1).Initial_Parser_State);
|
||||
Pop_Parse_State (C);
|
||||
end Parse_Include_Terminator;
|
||||
|
||||
procedure Got_Token (C: in out Compiler) is
|
||||
begin
|
||||
ada.text_io.put (C.Tk.Id'Img);
|
||||
ada.text_io.put (" ");
|
||||
for i in C.Tk.Buf.Get_First_Index .. C.Tk.Buf.Get_Last_Index loop
|
||||
ada.text_io.put (Standard.Character'val(S.Rune'Pos(C.Tk.Buf.Get_Item(i))));
|
||||
end loop;
|
||||
ada.text_io.put_line("");
|
||||
Dump_Token (C.Tk);
|
||||
|
||||
case C.Ps.State is
|
||||
case C.Prs.States(C.Prs.Top).Current is
|
||||
when PS_START =>
|
||||
Parse_Start (C);
|
||||
when PS_INCLUDE_TARGET =>
|
||||
@ -230,17 +266,14 @@ ada.text_io.put_line("");
|
||||
procedure Feed_Char_Code (C: in out Compiler; Code: in R.Code) is
|
||||
begin
|
||||
<<Start_Over>>
|
||||
if R.Is_Eof(Code) then
|
||||
ada.text_io.put_line ("EOF");
|
||||
else
|
||||
ada.text_io.put_line (R.To_Rune(Code)'Img);
|
||||
end if;
|
||||
--Dump_Rune (Code);
|
||||
|
||||
case C.Lx.State is
|
||||
when LX_START =>
|
||||
if R.Is_Eof(Code) then
|
||||
Start_Token (C, LB_EOF);
|
||||
End_Token (C, TK_EOF);
|
||||
-- this procedure doesn't prevent you from feeding more
|
||||
-- this procedure doesn't prevent you from feeding more runes
|
||||
-- after EOF. but it's not desirable to feed more after EOF.
|
||||
elsif R.Is_Space(Code) then
|
||||
-- ignore. carry on
|
||||
@ -249,17 +282,25 @@ end if;
|
||||
Set_Lexer_State (C, LX_IDENT, Code);
|
||||
elsif R.Is_Digit(Code) then
|
||||
Set_Lexer_State (C, LX_NUMBER, Code);
|
||||
elsif R.Is_Rune(Code, R.V.Plus_Sign) then -- +
|
||||
Set_Lexer_State (C, LX_OP_PLUS, Code);
|
||||
elsif R.Is_Rune(Code, R.V.Minus_Sign) then -- -
|
||||
Set_Lexer_State (C, LX_OP_MINUS, Code);
|
||||
elsif R.Is_Rune(Code, R.V.Asterisk) then -- *
|
||||
Set_Lexer_State (C, LX_OP_MUL, Code);
|
||||
elsif R.Is_Rune(Code, R.V.Slash) then -- /
|
||||
Set_Lexer_State (C, LX_OP_DIV, Code);
|
||||
elsif R.Is_Rune(Code, R.V.Left_Arrow) then -- <
|
||||
Set_Lexer_State (C, LX_OP_LESS, Code);
|
||||
elsif R.Is_Rune(Code, R.V.Right_Arrow) then -- >
|
||||
Set_Lexer_State (C, LX_OP_GREATER, Code);
|
||||
elsif R.Is_Rune(Code, R.V.Number_Sign) then -- #
|
||||
Set_Lexer_State (C, LX_DIRECTIVE, Code);
|
||||
elsif R.Is_Rune(Code, R.V.Quotation) then -- "
|
||||
Set_Lexer_State (C, LX_CSTR);
|
||||
elsif R.Is_Rune(Code, R.V.Semicolon) then
|
||||
Start_Token (C, Code);
|
||||
End_Token (C, TK_SEMICOLON);
|
||||
elsif R.Is_Rune(Code, R.V.Left_Arrow) then
|
||||
Set_Lexer_State (C, LX_OP_LESS, Code);
|
||||
elsif R.Is_Rune(Code, R.V.Right_Arrow) then
|
||||
Set_Lexer_State (C, LX_OP_GREATER, Code);
|
||||
elsif R.Is_Rune(Code, R.V.Number_Sign) then
|
||||
Set_Lexer_State (C, LX_DIRECTIVE, Code);
|
||||
elsif R.Is_Rune(Code, R.V.Quotation) then -- double quote
|
||||
Set_Lexer_State (C, LX_CSTR);
|
||||
else
|
||||
raise Syntax_Error;
|
||||
end if;
|
||||
@ -299,6 +340,38 @@ end if;
|
||||
goto Start_Over;
|
||||
end if;
|
||||
|
||||
when LX_OP_PLUS =>
|
||||
if R.Is_Rune(Code, R.V.Plus_Sign) then
|
||||
End_Token (C, TK_PLUSPLUS, Code);
|
||||
else
|
||||
End_Token (C, TK_PLUS);
|
||||
goto Start_Over;
|
||||
end if;
|
||||
|
||||
when LX_OP_MINUS =>
|
||||
if R.Is_Rune(Code, R.V.Minus_Sign) then
|
||||
End_Token (C, TK_MINUSMINUS, Code);
|
||||
else
|
||||
End_Token (C, TK_MINUS);
|
||||
goto Start_Over;
|
||||
end if;
|
||||
|
||||
when LX_OP_MUL =>
|
||||
if R.Is_Rune(Code, R.V.Asterisk) then
|
||||
End_Token (C, TK_MULMUL, Code);
|
||||
else
|
||||
End_Token (C, TK_MUL);
|
||||
goto Start_Over;
|
||||
end if;
|
||||
|
||||
when LX_OP_DIV =>
|
||||
if R.Is_Rune(Code, R.V.Slash) then
|
||||
End_Token (C, TK_DIVDIV, Code);
|
||||
else
|
||||
End_Token (C, TK_DIV);
|
||||
goto Start_Over;
|
||||
end if;
|
||||
|
||||
when LX_OP_GREATER =>
|
||||
if R.Is_Rune(Code, R.V.Equal_Sign) then
|
||||
End_Token (C, TK_GE, Code);
|
||||
@ -338,7 +411,7 @@ end if;
|
||||
End_Feed (C);
|
||||
|
||||
if C.Inc.Top < Entry_Top then
|
||||
-- End_Inclusion() is called on EOF which is fed by End_Feed().
|
||||
-- Pop_Inclusion() is called on EOF which is fed by End_Feed().
|
||||
-- It also decrements the stack pointer. The current inclusion
|
||||
-- stack pointer will get less that First_Top if the first inclusion
|
||||
-- level entered is exited.
|
||||
@ -363,4 +436,20 @@ end if;
|
||||
Feed_Char_Code (C, R.P.EOF);
|
||||
end End_Feed;
|
||||
|
||||
|
||||
-- -------------------------------------------------------------------
|
||||
procedure Initialize (C: in out Compiler) is
|
||||
begin
|
||||
Push_Parse_State (C, PS_START);
|
||||
end Initialize;
|
||||
|
||||
procedure Finalize (C: in out Compiler) is
|
||||
begin
|
||||
while C.Inc.Top > 0 loop
|
||||
Pop_Inclusion (C, False);
|
||||
end loop;
|
||||
while C.Prs.Top > 0 loop
|
||||
Pop_Parse_State (C);
|
||||
end loop;
|
||||
end Finalize;
|
||||
end H3.Compilers;
|
||||
|
@ -1,5 +1,6 @@
|
||||
with H3.Runes;
|
||||
with H3.Strings;
|
||||
with Ada.Finalization;
|
||||
with Ada.Text_IO;
|
||||
|
||||
generic
|
||||
@ -10,11 +11,15 @@ package H3.Compilers is
|
||||
|
||||
Syntax_Error: exception;
|
||||
|
||||
type Compiler is tagged limited private;
|
||||
--type Compiler is tagged limited private;
|
||||
type Compiler is new Ada.Finalization.Limited_Controlled with private;
|
||||
|
||||
procedure Feed (C: in out Compiler; Data: in S.Rune_Array);
|
||||
procedure End_Feed (C: in out Compiler);
|
||||
|
||||
overriding procedure Initialize (C: in out Compiler);
|
||||
overriding procedure Finalize (C: in out Compiler);
|
||||
|
||||
private
|
||||
type Lexer_State is (
|
||||
LX_START,
|
||||
@ -24,8 +29,12 @@ private
|
||||
LX_DIRECTIVE,
|
||||
LX_IDENT,
|
||||
LX_NUMBER,
|
||||
LX_OP_DIV,
|
||||
LX_OP_GREATER,
|
||||
LX_OP_LESS
|
||||
LX_OP_LESS,
|
||||
LX_OP_MINUS,
|
||||
LX_OP_MUL,
|
||||
LX_OP_PLUS
|
||||
);
|
||||
type Lexer is record
|
||||
State: Lexer_State := LX_START;
|
||||
@ -37,6 +46,8 @@ private
|
||||
TK_CHAR,
|
||||
TK_CSTR,
|
||||
TK_DIRECTIVE,
|
||||
TK_DIV,
|
||||
TK_DIVDIV,
|
||||
TK_EOF,
|
||||
TK_EOL,
|
||||
TK_IDENT,
|
||||
@ -44,6 +55,12 @@ private
|
||||
TK_GT,
|
||||
TK_LE,
|
||||
TK_LT,
|
||||
TK_MINUS,
|
||||
TK_MINUSMINUS,
|
||||
TK_MUL,
|
||||
TK_MULMUL,
|
||||
TK_PLUS,
|
||||
TK_PLUSPLUS,
|
||||
TK_SEMICOLON
|
||||
);
|
||||
type Token is record
|
||||
@ -51,37 +68,46 @@ private
|
||||
Buf: S.Elastic_String;
|
||||
end record;
|
||||
|
||||
type Parser_State is (
|
||||
-- ------------------------------------------------------------------
|
||||
|
||||
type Parse_State_Code is (
|
||||
PS_START,
|
||||
PS_INCLUDE_TARGET,
|
||||
PS_INCLUDE_TERMINATOR
|
||||
);
|
||||
type Parser is record
|
||||
State: Parser_State := PS_START;
|
||||
Prev_State: Parser_State := PS_START;
|
||||
Level: System_Index := 1;
|
||||
|
||||
type Parse_State is record
|
||||
Current: Parse_State_Code := PS_START;
|
||||
end record;
|
||||
|
||||
type Parse_State_Array is array(System_Index range<>) of Parse_State;
|
||||
|
||||
type Parse_State_Stack(Capa: System_Index) is record
|
||||
States: Parse_State_Array(System_Index'First .. Capa);
|
||||
Top: System_Size := System_Size'First; -- 0
|
||||
end record;
|
||||
|
||||
-- ------------------------------------------------------------------
|
||||
|
||||
type Stream is record
|
||||
Handle: Ada.Text_IO.File_Type;
|
||||
--Handle: System_Size;
|
||||
|
||||
|
||||
Initial_Level: System_Index; -- the block level where this inclusion is entered
|
||||
Initial_Parser_State: Parser_State; -- the parser state before the #include has been seen?
|
||||
Next_Parser_State: Parser_State;
|
||||
Prs_Level: System_Index;
|
||||
end record;
|
||||
|
||||
type Stream_Array is array(System_Index range <>) of Stream;
|
||||
|
||||
type Include_Stack(Capa: System_Index) is record
|
||||
Streams: Stream_Array(System_Index'First .. Capa);
|
||||
Top: System_Size := 0;
|
||||
Top: System_Size := System_Size'First; -- 0
|
||||
end record;
|
||||
-- ------------------------------------------------------------------
|
||||
|
||||
type Compiler is tagged limited record
|
||||
--type Compiler is tagged limited record
|
||||
type Compiler is new Ada.Finalization.Limited_Controlled with record
|
||||
Lx: Lexer;
|
||||
Tk: Token;
|
||||
Ps: Parser;
|
||||
Inc: Include_Stack(32);
|
||||
Prs: Parse_State_Stack(128); -- TODO: make this dynamic. single access type. dynamic allocation
|
||||
Inc: Include_Stack(32); -- TODO: make this dynamic. single access type. dynamic allocation
|
||||
end record;
|
||||
|
||||
end H3.Compilers;
|
||||
|
@ -5,6 +5,9 @@ package H3 is
|
||||
--pragma Preelaborate (H3);
|
||||
|
||||
subtype Boolean is Standard.Boolean;
|
||||
True: constant Boolean := (1 = 1);
|
||||
False: constant Boolean := (1 /= 1);
|
||||
|
||||
subtype Natural is Standard.Natural;
|
||||
|
||||
subtype System_Rune is Standard.Wide_Character;
|
||||
|
Loading…
x
Reference in New Issue
Block a user