From b118cb2bd321562c9b1c137d70dc176e54d7d0f5 Mon Sep 17 00:00:00 2001 From: hyung-hwan Date: Sun, 14 Nov 2021 15:07:41 +0000 Subject: [PATCH] experimenting include handling --- lib2/Makefile | 6 +- lib2/h3-compilers.adb | 143 ++++++++++++++++++++++++++++-------------- lib2/h3-compilers.ads | 34 ++++++---- lib2/h3-runes.ads | 6 +- lib2/h3-utf8.adb | 16 ++--- lib2/h3-utf8.ads | 8 +-- lib2/hello3.adb | 2 +- 7 files changed, 136 insertions(+), 79 deletions(-) diff --git a/lib2/Makefile b/lib2/Makefile index 2952e7a..76683e3 100644 --- a/lib2/Makefile +++ b/lib2/Makefile @@ -2,13 +2,13 @@ OPTS := -gnata -gnatW8 -gnatwa -gnatya -gnatyb -gnatyk -gnatyn -gnatyp ##-gnatyt all: hello hello2 hello3 hello: hello.adb - gnat make $(OPTS) hello && valgrind ./hello + gnat make $(OPTS) hello hello2: hello2.adb - gnat make $(OPTS) hello2 && valgrind ./hello2 + gnat make $(OPTS) hello2 hello3: hello3.adb - gnat make $(OPTS) hello3 && valgrind ./hello3 + gnat make $(OPTS) hello3 clean: rm -rf *.o *.ali hello hello2 hello3 diff --git a/lib2/h3-compilers.adb b/lib2/h3-compilers.adb index a032c49..f0dc2d8 100644 --- a/lib2/h3-compilers.adb +++ b/lib2/h3-compilers.adb @@ -1,5 +1,4 @@ with H3.Utf8; -with ada.text_io; package body H3.Compilers is type Char_Array is array(System_Index range<>) of Standard.Character; @@ -93,13 +92,54 @@ package body H3.Compilers is Set_Lexer_State (C, LX_START); end End_Token; + procedure Dump_Token (Tk: in Token) is + begin + Ada.Text_IO.Put (Tk.Id'Img); + Ada.Text_IO.Put (": "); + Ada.Text_IO.Put_Line (Standard.String(Utf8.From_Unicode_String(Tk.Buf.To_Rune_Array))); + end Dump_Token; -- ------------------------------------------------------------------- procedure Set_Parser_State (C: in out Compiler; State: in Parser_State) is begin + C.Ps.Prev_State := C.Ps.State; C.Ps.State := State; end Set_Parser_State; + procedure Start_Inclusion (C: in out Compiler; Name: in S.Rune_Array) is + Top: System_Index; + begin + if C.Inc.Top = C.Inc.Streams'Last then + raise Syntax_Error with "inclusion depth too deep"; + end if; + + Top := C.Inc.Top + 1; + declare + St: Stream renames C.Inc.Streams(Top); + begin + Ada.Text_IO.Open (St.Handle, Ada.Text_IO.In_File, Standard.String(Utf8.From_Unicode_String(Name))); + St.Initial_Level := C.Ps.Level; + St.Initial_Parser_State := C.Ps.Prev_State; + St.Next_Parser_State := PS_INCLUDE_TERMINATOR; + end; + C.Inc.Top := Top; + + -- the parser should resume at the state when the include directive is seen + Set_Parser_State (C, C.Ps.Prev_State); -- the state when the include directive is seen + end Start_Inclusion; + + + procedure End_Inclusion (C: in out Compiler) is + Top: constant System_Index := C.Inc.Top; + begin + if C.Ps.State /= C.Inc.Streams(Top).Initial_Parser_State or else C.Ps.Level /= C.Inc.Streams(Top).Initial_Level then + raise Syntax_Error with "unexpected end of inclusion"; + end if; + Ada.Text_IO.Close (C.Inc.Streams(C.Inc.Top).Handle); + Set_Parser_State (C, C.Inc.Streams(C.Inc.Top).Next_Parser_State); + C.Inc.Top := C.Inc.Top - 1; + end End_Inclusion; + procedure Parse_Start (C: in out Compiler) is begin case C.Tk.Id is @@ -113,12 +153,17 @@ package body H3.Compilers is null; when TK_DIRECTIVE => if C.Tk.Buf.Equals(LB_XINCLUDE) then - Set_Parser_State (C, PS_INCLUDE); + Set_Parser_State (C, PS_INCLUDE_TARGET); else raise Syntax_Error with "unknown directive name"; end if; when TK_EOF => - null; + if C.Inc.Top > 0 then + End_Inclusion (C); + else + -- end of really the input?? + null; + end if; when TK_EOL => null; when TK_GE => @@ -136,65 +181,48 @@ package body H3.Compilers is end case; end Parse_Start; - procedure Start_Inclusion (C: in out Compiler; Name: in S.Rune_Array) is - Top: System_Index; - begin - if C.St.Top = C.St.Items'Last then - raise Syntax_Error with "inclusion depth too deep"; - end if; - - Top := C.St.Top + 1; - Ada.Text_IO.Open (C.St.Items(Top).Handle, Ada.Text_IO.In_File, Standard.String(Utf8.From_Unicode_String(Name))); - C.St.Top := Top; - end Start_Inclusion; - - procedure End_Inclusion (C: in out Compiler) is - begin - Ada.Text_IO.Close (C.St.Items(C.St.Top).Handle); - C.St.Top := C.St.Top - 1; - end End_Inclusion; - - procedure Parse_Include (C: in out Compiler) is + procedure Parse_Include_Target (C: in out Compiler) is begin if C.Tk.Id = TK_CSTR then -- arrange to feed more data from the included file. Start_Inclusion (C, S.To_Rune_Array(C.Tk.Buf)); - null; else + -- the target is not a string. + Dump_Token (C.Tk); raise Syntax_Error with "string literal required"; end if; - end Parse_Include; + end Parse_Include_Target; - procedure Parse_Include_End (C: in out Compiler) is + procedure Parse_Include_Terminator (C: in out Compiler) is begin if C.Tk.Id /= TK_SEMICOLON then raise Syntax_Error with "semicolon required"; end if; - -- TODO: put the state back to START??? - end Parse_Include_End; + -- it is not safe to access information at the previous stack top. + -- no problem in doing that becuase the current implementation uses + -- a static array. + Set_Parser_State (C, C.Inc.Streams(C.Inc.Top + 1).Initial_Parser_State); + end Parse_Include_Terminator; procedure Got_Token (C: in out Compiler) is begin - --case C.P.State IS - -- when START => - -- null; - --end case; - ada.text_io.put (C.Tk.Id'Img); ada.text_io.put (" "); for i in C.Tk.Buf.Get_First_Index .. C.Tk.Buf.Get_Last_Index loop - ada.text_io.put (standard.character'val(S.Rune'Pos(C.Tk.Buf.Get_Item(i)))); + ada.text_io.put (Standard.Character'val(S.Rune'Pos(C.Tk.Buf.Get_Item(i)))); end loop; ada.text_io.put_line(""); case C.Ps.State is - when PS_START => + when PS_START => Parse_Start (C); - when PS_INCLUDE => - Parse_Include (C); + when PS_INCLUDE_TARGET => + Parse_Include_Target (C); + when PS_INCLUDE_TERMINATOR => + Parse_Include_Terminator (C); when others => - raise Syntax_Error; -- TODO: change this... + raise Syntax_Error with "unknown parser state"; -- TODO: change this... end case; end Got_Token; @@ -290,21 +318,42 @@ end if; end case; end Feed_Char_Code; + procedure Feed_Inc (C: in out Compiler) is + Entry_Top: constant System_Index := C.Inc.Top; + begin + loop + while not Ada.Text_IO.End_Of_File(C.Inc.Streams(C.Inc.Top).Handle) loop + declare + Ch: Standard.Character; + begin + Ada.Text_IO.Get (C.Inc.Streams(C.Inc.Top).Handle, Ch); + Feed_Char_Code (C, Standard.Character'Pos(Ch)); + end; + -- After each feed, C.Inc.Top may get incremented if an inclusion + -- directive is found. so the while loop iterates over the streams + -- of all inner included levels. End_Feed() below drops C.Inc.Top + -- and the outer loop will resume the inner while loop at the outer + -- inclusion level until all entered inclusion levels are exited. + end loop; + End_Feed (C); + + if C.Inc.Top < Entry_Top then + -- End_Inclusion() is called on EOF which is fed by End_Feed(). + -- It also decrements the stack pointer. The current inclusion + -- stack pointer will get less that First_Top if the first inclusion + -- level entered is exited. + exit; + end if; + end loop; + end Feed_Inc; + procedure Feed (C: in out Compiler; Data: in S.Rune_Array) is begin for i in Data'Range loop Feed_Char_Code (C, R.To_Code(Data(i))); - if C.St.Top > 0 then - declare - Ch: Standard.Character; - begin - while not Ada.Text_IO.End_Of_File(C.St.Items(C.St.Top).Handle) loop - Ada.Text_IO.Get (C.St.Items(C.St.Top).Handle, Ch); - Feed_Char_Code (C, Standard.Character'Pos(Ch)); - --if inclusion stack is not Empty??? - end loop; - end; + if C.Inc.Top > 0 then + Feed_Inc (C); end if; end loop; end Feed; diff --git a/lib2/h3-compilers.ads b/lib2/h3-compilers.ads index 6854b62..2751e7b 100644 --- a/lib2/h3-compilers.ads +++ b/lib2/h3-compilers.ads @@ -31,17 +31,6 @@ private State: Lexer_State := LX_START; end record; - type Stream is record - Handle: Ada.Text_IO.File_Type; - --Handle: System_Size; - end record; - - type Stream_Array is array(System_Index range <>) of Stream; - type Stream_Stack(Capa: System_Index) is record - Items: Stream_Array(System_Index'First .. Capa); - Top: System_Size := 0; - end record; - type Token_Id is ( TK_BSTR, TK_BYTE, @@ -64,16 +53,35 @@ private type Parser_State is ( PS_START, - PS_INCLUDE + PS_INCLUDE_TARGET, + PS_INCLUDE_TERMINATOR ); type Parser is record State: Parser_State := PS_START; + Prev_State: Parser_State := PS_START; + Level: System_Index := 1; + end record; + + type Stream is record + Handle: Ada.Text_IO.File_Type; + --Handle: System_Size; + + + Initial_Level: System_Index; -- the block level where this inclusion is entered + Initial_Parser_State: Parser_State; -- the parser state before the #include has been seen? + Next_Parser_State: Parser_State; + end record; + + type Stream_Array is array(System_Index range <>) of Stream; + type Include_Stack(Capa: System_Index) is record + Streams: Stream_Array(System_Index'First .. Capa); + Top: System_Size := 0; end record; type Compiler is tagged limited record Lx: Lexer; Tk: Token; Ps: Parser; - St: Stream_Stack(32); + Inc: Include_Stack(32); end record; end H3.Compilers; diff --git a/lib2/h3-runes.ads b/lib2/h3-runes.ads index ca00603..1fd2ce9 100644 --- a/lib2/h3-runes.ads +++ b/lib2/h3-runes.ads @@ -1,7 +1,7 @@ generic -- any discrete type accepted. -- can't ada limit type to one of Character, Wide_Character, Wide_Wide_Character? - type Rune_Type is (<>); + type Rune_Type is (<>); package H3.Runes is -- -like character classification plus other features. -- unicode-based. no system locale honored. @@ -46,7 +46,7 @@ package H3.Runes is GS : constant Code := 29; RS : constant Code := 30; US : constant Code := 31; - Space : constant Code := 32; -- + Space : constant Code := 32; Exclamation : constant Code := 33; -- ! Quotation : constant Code := 34; -- " Number_Sign : constant Code := 35; -- # @@ -303,7 +303,7 @@ package H3.Runes is function Is_Xdigit (V: in Rune) return Boolean; function Is_Lower (V: in Rune) return Boolean; function Is_Upper (V: in Rune) return Boolean; - + function To_Lower (V: in Rune) return Rune; function To_Upper (V: in Rune) return Rune; diff --git a/lib2/h3-utf8.adb b/lib2/h3-utf8.adb index 80daa6f..907c0f1 100644 --- a/lib2/h3-utf8.adb +++ b/lib2/h3-utf8.adb @@ -25,7 +25,7 @@ package body H3.Utf8 is Lower: Uint32; Upper: Uint32; - Fbyte: Uint8; + Fbyte: Uint8; Mask: Uint8; -- Mask for getting the fixed bits in the first byte. -- (First-Byte and Mask) = Fbyte @@ -92,7 +92,7 @@ package body H3.Utf8 is Tmp := 0; for I in Str'Range loop declare - Utf8: Utf8_String := From_Unicode_Character(Chr => Str(I)); + Utf8: constant Utf8_String := From_Unicode_Character(Chr => Str(I)); begin Tmp := Tmp + Utf8'Length; end; @@ -104,7 +104,7 @@ package body H3.Utf8 is Tmp := Result'First; for I in Str'Range loop declare - Utf8: Utf8_String := From_Unicode_Character(Str(I)); + Utf8: constant Utf8_String := From_Unicode_Character(Str(I)); begin Result(Tmp .. Tmp + Utf8'Length - 1) := Utf8; Tmp := Tmp + Utf8'Length; @@ -124,7 +124,7 @@ package body H3.Utf8 is return System_Size'First; end Sequence_Length; - procedure To_Unicode_Character (Seq: in Utf8_String; + procedure To_Unicode_Character (Seq: in Utf8_String; Seq_Len: out System_Size; Chr: out Unicode_Character) is W: Uint32; @@ -133,7 +133,7 @@ package body H3.Utf8 is -- Check if the first byte matches the desired bit patterns. if (Utf8_Character'Pos(Seq(Seq'First)) and Conv_Table(I).Mask) = Conv_Table(I).Fbyte then - + if Seq'Length < Conv_Table(I).Length then raise Insufficient_Utf8_Sequence; end if; @@ -147,7 +147,7 @@ package body H3.Utf8 is -- Each UTF8 byte except the first must be set with 2#1000_0000. raise Invalid_Utf8_Sequence; end if; - W := Interfaces.Shift_Left(W, 6) or (Utf8_Character'Pos(Seq(Seq'First + J)) and Uint32'(2#0011_1111#)); + W := Interfaces.Shift_Left(W, 6) or (Utf8_Character'Pos(Seq(Seq'First + J)) and Uint32'(2#0011_1111#)); end loop; -- Return the character matching the word @@ -156,7 +156,7 @@ package body H3.Utf8 is return; end if; end loop; - + raise Invalid_Utf8_Sequence; end To_Unicode_Character; @@ -168,7 +168,7 @@ package body H3.Utf8 is return Chr; end To_Unicode_Character; - procedure To_Unicode_String (Seq: in Utf8_String; + procedure To_Unicode_String (Seq: in Utf8_String; Seq_Len: out System_Size; Str: out Unicode_String; Str_Len: out System_Size) is diff --git a/lib2/h3-utf8.ads b/lib2/h3-utf8.ads index 6d3e2e8..a202e6b 100644 --- a/lib2/h3-utf8.ads +++ b/lib2/h3-utf8.ads @@ -1,4 +1,4 @@ -generic +generic type Slim_Character is (<>); type Wide_Character is (<>); type Slim_String is array(System_Index range<>) of Slim_Character; @@ -31,16 +31,16 @@ package H3.Utf8 is function From_Unicode_Character (Chr: in Unicode_Character) return Utf8_String; function From_Unicode_String (Str: in Unicode_String) return Utf8_String; - --| The Sequence_Length function returns the length of a full UTF8 + --| The Sequence_Length function returns the length of a full UTF8 --| sequence representing a single Unicode character given the first --| sequence byte. It returns 0 if the first byte is invalid. function Sequence_Length (Seq: in Utf8_Character) return System_Size; - procedure To_Unicode_Character (Seq: in Utf8_String; + procedure To_Unicode_Character (Seq: in Utf8_String; Seq_Len: out System_Size; Chr: out Unicode_Character); - procedure To_Unicode_String (Seq: in Utf8_String; + procedure To_Unicode_String (Seq: in Utf8_String; Seq_Len: out System_Size; Str: out Unicode_String; Str_Len: out System_Size); diff --git a/lib2/hello3.adb b/lib2/hello3.adb index 929162a..7073fd6 100644 --- a/lib2/hello3.adb +++ b/lib2/hello3.adb @@ -6,6 +6,6 @@ procedure hello3 is Compiler: C.Compiler; begin - Compiler.Feed ("#include ""abc.txt"""); + Compiler.Feed ("#include ""abc.txt""; donkey"); Compiler.End_Feed; end hello3;