From 0942615c2703866124d744f63229601ba4017ced Mon Sep 17 00:00:00 2001 From: hyung-hwan Date: Tue, 16 Nov 2021 13:34:32 +0000 Subject: [PATCH] more structural parsing --- lib2/h3-compilers.adb | 229 +++++++++++++++++++++++++++++------------- lib2/h3-compilers.ads | 60 +++++++---- lib2/h3.ads | 3 + 3 files changed, 205 insertions(+), 87 deletions(-) diff --git a/lib2/h3-compilers.adb b/lib2/h3-compilers.adb index f0dc2d8..9edadae 100644 --- a/lib2/h3-compilers.adb +++ b/lib2/h3-compilers.adb @@ -7,6 +7,23 @@ package body H3.Compilers is LB_EOF: constant S.Rune_Array := (R.V.Left_Arrow,R.V.UC_E,R.V.UC_O,R.V.UC_F,R.V.Right_Arrow); -- LB_XINCLUDE: constant S.Rune_Array := (R.V.Number_Sign,R.V.LC_I,R.V.LC_N,R.V.LC_C,R.V.LC_L,R.V.LC_U,R.V.LC_D,R.V.LC_E); -- #include + + procedure Dump_Token (Tk: in Token) is + begin + Ada.Text_IO.Put (Tk.Id'Img); + Ada.Text_IO.Put (": "); + Ada.Text_IO.Put_Line (Standard.String(Utf8.From_Unicode_String(Tk.Buf.To_Rune_Array))); + end Dump_Token; + + procedure Dump_Rune (Code: in R.Code) is + begin + if R.Is_Eof(Code) then + Ada.Text_IO.Put_Line ("EOF"); + else + Ada.Text_IO.Put_Line (R.To_Rune(Code)'Img); + end if; + end Dump_Rune; + -- ------------------------------------------------------------------- procedure Start_Token (C: in out Compiler) is @@ -86,27 +103,40 @@ package body H3.Compilers is procedure End_Token (C: in out Compiler; Id: in Token_Id; Code: in R.Code) is begin - S.Append (C.Tk.Buf, R.To_Rune(Code)); - C.Tk.Id := Id; - Got_Token (C); - Set_Lexer_State (C, LX_START); + End_Token (C, Id, R.To_Rune(Code)); end End_Token; - procedure Dump_Token (Tk: in Token) is - begin - Ada.Text_IO.Put (Tk.Id'Img); - Ada.Text_IO.Put (": "); - Ada.Text_IO.Put_Line (Standard.String(Utf8.From_Unicode_String(Tk.Buf.To_Rune_Array))); - end Dump_Token; -- ------------------------------------------------------------------- - procedure Set_Parser_State (C: in out Compiler; State: in Parser_State) is + procedure Set_Parse_State (C: in out Compiler; Code: in Parse_State_Code) is begin - C.Ps.Prev_State := C.Ps.State; - C.Ps.State := State; - end Set_Parser_State; + C.Prs.States(C.Prs.Top).Current := Code; + end Set_Parse_State; - procedure Start_Inclusion (C: in out Compiler; Name: in S.Rune_Array) is + procedure Push_Parse_State (C: in out Compiler; Code: in Parse_State_Code) is + Top: System_Index; + begin + if C.Prs.Top = C.Prs.States'Last then + raise Syntax_Error with "parse state stack exhausted"; + end if; + + Top := C.Prs.Top + 1; + declare + S: Parse_State renames C.Prs.States(Top); + begin + S.Current := Code; + end; + C.Prs.Top := Top; + end Push_Parse_State; + + procedure Pop_Parse_State (C: in out Compiler) is + begin + C.Prs.top := C.Prs.Top - 1; + end Pop_Parse_State; + + -- ------------------------------------------------------------------- + + procedure Push_Inclusion (C: in out Compiler; Name: in S.Rune_Array) is Top: System_Index; begin if C.Inc.Top = C.Inc.Streams'Last then @@ -114,31 +144,35 @@ package body H3.Compilers is end if; Top := C.Inc.Top + 1; - declare - St: Stream renames C.Inc.Streams(Top); + declare + S: Stream renames C.Inc.Streams(Top); begin - Ada.Text_IO.Open (St.Handle, Ada.Text_IO.In_File, Standard.String(Utf8.From_Unicode_String(Name))); - St.Initial_Level := C.Ps.Level; - St.Initial_Parser_State := C.Ps.Prev_State; - St.Next_Parser_State := PS_INCLUDE_TERMINATOR; + Ada.Text_IO.Open (S.Handle, Ada.Text_IO.In_File, Standard.String(Utf8.From_Unicode_String(Name))); + S.Prs_Level := C.Prs.Top; -- this is the parse state level of this include directive. end; C.Inc.Top := Top; - -- the parser should resume at the state when the include directive is seen - Set_Parser_State (C, C.Ps.Prev_State); -- the state when the include directive is seen - end Start_Inclusion; + -- Switch the parse state to handle the terminator + -- after the new pushed state has been popped out. + Set_Parse_State (C, PS_INCLUDE_TERMINATOR); + -- Let the inner content be handled at the state as the include directive is seen. + Push_Parse_State (C, C.Prs.States(C.Prs.Top - 1).Current); + end Push_Inclusion; - procedure End_Inclusion (C: in out Compiler) is - Top: constant System_Index := C.Inc.Top; + procedure Pop_Inclusion (C: in out Compiler; Check: Boolean) is begin - if C.Ps.State /= C.Inc.Streams(Top).Initial_Parser_State or else C.Ps.Level /= C.Inc.Streams(Top).Initial_Level then - raise Syntax_Error with "unexpected end of inclusion"; - end if; Ada.Text_IO.Close (C.Inc.Streams(C.Inc.Top).Handle); - Set_Parser_State (C, C.Inc.Streams(C.Inc.Top).Next_Parser_State); + if Check then + if C.Prs.Top /= C.Inc.Streams(C.Inc.Top).Prs_Level + 1 then + raise Syntax_Error with "unblanced inclusion content"; + end if; + end if; C.Inc.Top := C.Inc.Top - 1; - end End_Inclusion; + Pop_Parse_State (C); + end Pop_Inclusion; + + -- ------------------------------------------------------------------- procedure Parse_Start (C: in out Compiler) is begin @@ -151,33 +185,44 @@ package body H3.Compilers is null; when TK_CSTR => null; + when TK_DIRECTIVE => if C.Tk.Buf.Equals(LB_XINCLUDE) then - Set_Parser_State (C, PS_INCLUDE_TARGET); + --Set_Parse_State (C, PS_INCLUDE_TARGET); + Push_Parse_State (C, PS_INCLUDE_TARGET); else raise Syntax_Error with "unknown directive name"; end if; + when TK_EOF => if C.Inc.Top > 0 then - End_Inclusion (C); + Pop_Inclusion (C, True); else -- end of really the input?? null; end if; + when TK_EOL => null; - when TK_GE => - null; - when TK_GT => - null; + when TK_IDENT => null; - when TK_LE => - null; - when TK_LT => - null; + + --when TK_NUMBER => + -- null; + + --when TK_CLASS => + --when TK_FUNC => + + -- plus or minus signed may be allowed here too. + -- plusplus or miniusminus may be allowed here too. + when TK_SEMICOLON => null; + + when others => + raise Syntax_Error with "unexpected token"; + end case; end Parse_Start; @@ -185,10 +230,10 @@ package body H3.Compilers is begin if C.Tk.Id = TK_CSTR then -- arrange to feed more data from the included file. - Start_Inclusion (C, S.To_Rune_Array(C.Tk.Buf)); + Push_Inclusion (C, S.To_Rune_Array(C.Tk.Buf)); else -- the target is not a string. - Dump_Token (C.Tk); + --Dump_Token (C.Tk); raise Syntax_Error with "string literal required"; end if; end Parse_Include_Target; @@ -198,23 +243,14 @@ package body H3.Compilers is if C.Tk.Id /= TK_SEMICOLON then raise Syntax_Error with "semicolon required"; end if; - - -- it is not safe to access information at the previous stack top. - -- no problem in doing that becuase the current implementation uses - -- a static array. - Set_Parser_State (C, C.Inc.Streams(C.Inc.Top + 1).Initial_Parser_State); + Pop_Parse_State (C); end Parse_Include_Terminator; procedure Got_Token (C: in out Compiler) is begin -ada.text_io.put (C.Tk.Id'Img); -ada.text_io.put (" "); -for i in C.Tk.Buf.Get_First_Index .. C.Tk.Buf.Get_Last_Index loop - ada.text_io.put (Standard.Character'val(S.Rune'Pos(C.Tk.Buf.Get_Item(i)))); -end loop; -ada.text_io.put_line(""); + Dump_Token (C.Tk); - case C.Ps.State is + case C.Prs.States(C.Prs.Top).Current is when PS_START => Parse_Start (C); when PS_INCLUDE_TARGET => @@ -230,17 +266,14 @@ ada.text_io.put_line(""); procedure Feed_Char_Code (C: in out Compiler; Code: in R.Code) is begin <> -if R.Is_Eof(Code) then - ada.text_io.put_line ("EOF"); -else - ada.text_io.put_line (R.To_Rune(Code)'Img); -end if; + --Dump_Rune (Code); + case C.Lx.State is when LX_START => if R.Is_Eof(Code) then Start_Token (C, LB_EOF); End_Token (C, TK_EOF); - -- this procedure doesn't prevent you from feeding more + -- this procedure doesn't prevent you from feeding more runes -- after EOF. but it's not desirable to feed more after EOF. elsif R.Is_Space(Code) then -- ignore. carry on @@ -249,17 +282,25 @@ end if; Set_Lexer_State (C, LX_IDENT, Code); elsif R.Is_Digit(Code) then Set_Lexer_State (C, LX_NUMBER, Code); + elsif R.Is_Rune(Code, R.V.Plus_Sign) then -- + + Set_Lexer_State (C, LX_OP_PLUS, Code); + elsif R.Is_Rune(Code, R.V.Minus_Sign) then -- - + Set_Lexer_State (C, LX_OP_MINUS, Code); + elsif R.Is_Rune(Code, R.V.Asterisk) then -- * + Set_Lexer_State (C, LX_OP_MUL, Code); + elsif R.Is_Rune(Code, R.V.Slash) then -- / + Set_Lexer_State (C, LX_OP_DIV, Code); + elsif R.Is_Rune(Code, R.V.Left_Arrow) then -- < + Set_Lexer_State (C, LX_OP_LESS, Code); + elsif R.Is_Rune(Code, R.V.Right_Arrow) then -- > + Set_Lexer_State (C, LX_OP_GREATER, Code); + elsif R.Is_Rune(Code, R.V.Number_Sign) then -- # + Set_Lexer_State (C, LX_DIRECTIVE, Code); + elsif R.Is_Rune(Code, R.V.Quotation) then -- " + Set_Lexer_State (C, LX_CSTR); elsif R.Is_Rune(Code, R.V.Semicolon) then Start_Token (C, Code); End_Token (C, TK_SEMICOLON); - elsif R.Is_Rune(Code, R.V.Left_Arrow) then - Set_Lexer_State (C, LX_OP_LESS, Code); - elsif R.Is_Rune(Code, R.V.Right_Arrow) then - Set_Lexer_State (C, LX_OP_GREATER, Code); - elsif R.Is_Rune(Code, R.V.Number_Sign) then - Set_Lexer_State (C, LX_DIRECTIVE, Code); - elsif R.Is_Rune(Code, R.V.Quotation) then -- double quote - Set_Lexer_State (C, LX_CSTR); else raise Syntax_Error; end if; @@ -299,6 +340,38 @@ end if; goto Start_Over; end if; + when LX_OP_PLUS => + if R.Is_Rune(Code, R.V.Plus_Sign) then + End_Token (C, TK_PLUSPLUS, Code); + else + End_Token (C, TK_PLUS); + goto Start_Over; + end if; + + when LX_OP_MINUS => + if R.Is_Rune(Code, R.V.Minus_Sign) then + End_Token (C, TK_MINUSMINUS, Code); + else + End_Token (C, TK_MINUS); + goto Start_Over; + end if; + + when LX_OP_MUL => + if R.Is_Rune(Code, R.V.Asterisk) then + End_Token (C, TK_MULMUL, Code); + else + End_Token (C, TK_MUL); + goto Start_Over; + end if; + + when LX_OP_DIV => + if R.Is_Rune(Code, R.V.Slash) then + End_Token (C, TK_DIVDIV, Code); + else + End_Token (C, TK_DIV); + goto Start_Over; + end if; + when LX_OP_GREATER => if R.Is_Rune(Code, R.V.Equal_Sign) then End_Token (C, TK_GE, Code); @@ -338,7 +411,7 @@ end if; End_Feed (C); if C.Inc.Top < Entry_Top then - -- End_Inclusion() is called on EOF which is fed by End_Feed(). + -- Pop_Inclusion() is called on EOF which is fed by End_Feed(). -- It also decrements the stack pointer. The current inclusion -- stack pointer will get less that First_Top if the first inclusion -- level entered is exited. @@ -363,4 +436,20 @@ end if; Feed_Char_Code (C, R.P.EOF); end End_Feed; + + -- ------------------------------------------------------------------- + procedure Initialize (C: in out Compiler) is + begin + Push_Parse_State (C, PS_START); + end Initialize; + + procedure Finalize (C: in out Compiler) is + begin + while C.Inc.Top > 0 loop + Pop_Inclusion (C, False); + end loop; + while C.Prs.Top > 0 loop + Pop_Parse_State (C); + end loop; + end Finalize; end H3.Compilers; diff --git a/lib2/h3-compilers.ads b/lib2/h3-compilers.ads index 2751e7b..dfe5db4 100644 --- a/lib2/h3-compilers.ads +++ b/lib2/h3-compilers.ads @@ -1,5 +1,6 @@ with H3.Runes; with H3.Strings; +with Ada.Finalization; with Ada.Text_IO; generic @@ -10,11 +11,15 @@ package H3.Compilers is Syntax_Error: exception; - type Compiler is tagged limited private; + --type Compiler is tagged limited private; + type Compiler is new Ada.Finalization.Limited_Controlled with private; procedure Feed (C: in out Compiler; Data: in S.Rune_Array); procedure End_Feed (C: in out Compiler); + overriding procedure Initialize (C: in out Compiler); + overriding procedure Finalize (C: in out Compiler); + private type Lexer_State is ( LX_START, @@ -24,8 +29,12 @@ private LX_DIRECTIVE, LX_IDENT, LX_NUMBER, + LX_OP_DIV, LX_OP_GREATER, - LX_OP_LESS + LX_OP_LESS, + LX_OP_MINUS, + LX_OP_MUL, + LX_OP_PLUS ); type Lexer is record State: Lexer_State := LX_START; @@ -37,6 +46,8 @@ private TK_CHAR, TK_CSTR, TK_DIRECTIVE, + TK_DIV, + TK_DIVDIV, TK_EOF, TK_EOL, TK_IDENT, @@ -44,6 +55,12 @@ private TK_GT, TK_LE, TK_LT, + TK_MINUS, + TK_MINUSMINUS, + TK_MUL, + TK_MULMUL, + TK_PLUS, + TK_PLUSPLUS, TK_SEMICOLON ); type Token is record @@ -51,37 +68,46 @@ private Buf: S.Elastic_String; end record; - type Parser_State is ( + -- ------------------------------------------------------------------ + + type Parse_State_Code is ( PS_START, PS_INCLUDE_TARGET, PS_INCLUDE_TERMINATOR ); - type Parser is record - State: Parser_State := PS_START; - Prev_State: Parser_State := PS_START; - Level: System_Index := 1; + + type Parse_State is record + Current: Parse_State_Code := PS_START; end record; + type Parse_State_Array is array(System_Index range<>) of Parse_State; + + type Parse_State_Stack(Capa: System_Index) is record + States: Parse_State_Array(System_Index'First .. Capa); + Top: System_Size := System_Size'First; -- 0 + end record; + + -- ------------------------------------------------------------------ + type Stream is record Handle: Ada.Text_IO.File_Type; - --Handle: System_Size; - - - Initial_Level: System_Index; -- the block level where this inclusion is entered - Initial_Parser_State: Parser_State; -- the parser state before the #include has been seen? - Next_Parser_State: Parser_State; + Prs_Level: System_Index; end record; type Stream_Array is array(System_Index range <>) of Stream; + type Include_Stack(Capa: System_Index) is record Streams: Stream_Array(System_Index'First .. Capa); - Top: System_Size := 0; + Top: System_Size := System_Size'First; -- 0 end record; + -- ------------------------------------------------------------------ - type Compiler is tagged limited record + --type Compiler is tagged limited record + type Compiler is new Ada.Finalization.Limited_Controlled with record Lx: Lexer; Tk: Token; - Ps: Parser; - Inc: Include_Stack(32); + Prs: Parse_State_Stack(128); -- TODO: make this dynamic. single access type. dynamic allocation + Inc: Include_Stack(32); -- TODO: make this dynamic. single access type. dynamic allocation end record; + end H3.Compilers; diff --git a/lib2/h3.ads b/lib2/h3.ads index edbb3a3..5e8de75 100644 --- a/lib2/h3.ads +++ b/lib2/h3.ads @@ -5,6 +5,9 @@ package H3 is --pragma Preelaborate (H3); subtype Boolean is Standard.Boolean; + True: constant Boolean := (1 = 1); + False: constant Boolean := (1 /= 1); + subtype Natural is Standard.Natural; subtype System_Rune is Standard.Wide_Character;