From 5fcc19de278fbebee5c8321ca541dbdca449bb86 Mon Sep 17 00:00:00 2001 From: hyung-hwan Date: Fri, 12 Nov 2021 23:31:25 +0000 Subject: [PATCH] tackling #include --- lib2/h3-compilers.adb | 303 +++++++++++++++++++++++------------------- lib2/h3-compilers.ads | 2 + lib2/hello3.adb | 2 +- 3 files changed, 166 insertions(+), 141 deletions(-) diff --git a/lib2/h3-compilers.adb b/lib2/h3-compilers.adb index 79dc048..a032c49 100644 --- a/lib2/h3-compilers.adb +++ b/lib2/h3-compilers.adb @@ -8,127 +8,7 @@ package body H3.Compilers is LB_EOF: constant S.Rune_Array := (R.V.Left_Arrow,R.V.UC_E,R.V.UC_O,R.V.UC_F,R.V.Right_Arrow); -- LB_XINCLUDE: constant S.Rune_Array := (R.V.Number_Sign,R.V.LC_I,R.V.LC_N,R.V.LC_C,R.V.LC_L,R.V.LC_U,R.V.LC_D,R.V.LC_E); -- #include - procedure Set_Lexer_State (C: in out Compiler; State: in Lexer_State) is - begin - C.Lx.State := State; - end Set_Lexer_State; - - procedure Set_Lexer_State (C: in out Compiler; State: in Lexer_State; Ch: in R.Rune) is - begin - -- change the lexer state while storing the first character in the token buffer. - C.Lx.State := State; - S.Clear (C.Tk.Buf); - S.Append (C.Tk.Buf, Ch); - end Set_Lexer_State; - - procedure Set_Lexer_State (C: in out Compiler; State: in Lexer_State; Code: in R.Code) is - begin - Set_Lexer_State (C, State, R.To_Rune(Code)); - end Set_Lexer_State; - - procedure Set_Parser_State (C: in out Compiler; State: in Parser_State) is - begin - C.Ps.State := State; - end Set_Parser_State; - - procedure Parse_Start (C: in out Compiler) is - begin - case C.Tk.Id is - when TK_BSTR => - null; - when TK_BYTE => - null; - when TK_CHAR => - null; - when TK_CSTR => - null; - when TK_DIRECTIVE => - if C.Tk.Buf.Equals(LB_XINCLUDE) then - Set_Parser_State (C, PS_INCLUDE); - else - raise Syntax_Error; - end if; - when TK_EOF => - null; - when TK_EOL => - null; - when TK_GE => - null; - when TK_GT => - null; - when TK_IDENT => - null; - when TK_LE => - null; - when TK_LT => - null; - when TK_SEMICOLON => - null; - end case; - end Parse_Start; - - procedure Start_Inclusion (C: in out Compiler; Name: in S.Rune_Array) is - Top: System_Index; - begin - if C.St.Top = C.St.Items'Last then - raise Syntax_Error; -- TODO: inclusion depth too deep - end if; - - Top := C.St.Top + 1; - Ada.Text_IO.Open (C.St.Items(Top).Handle, Ada.Text_IO.In_File, Standard.String(Utf8.From_Unicode_String(Name))); - C.St.Top := Top; - end Start_Inclusion; - - procedure End_Inclusion (C: in out Compiler) is - begin - Ada.Text_IO.Close (C.St.Items(C.St.Top).Handle); - C.St.Top := C.St.Top - 1; - end End_Inclusion; - - procedure Parse_Include (C: in out Compiler) is - begin - if C.Tk.Id = TK_CSTR then - -- arrange to feed more data from the included file. - Start_Inclusion (C, S.To_Rune_Array(C.Tk.Buf)); - null; - else - raise Syntax_Error; -- string literal required - end if; - end Parse_Include; - - procedure Parse_Include_End (C: in out Compiler) is - begin - if C.Tk.Id /= TK_SEMICOLON then - raise Syntax_Error; - end if; - - -- TODO: put the state back to START??? - end Parse_Include_End; - - procedure Got_Token (C: in out Compiler) is - begin - --case C.P.State IS - -- when START => - -- null; - --end case; - -ada.text_io.put (C.Tk.Id'Img); -ada.text_io.put (" "); -for i in C.Tk.Buf.Get_First_Index .. C.Tk.Buf.Get_Last_Index loop - ada.text_io.put (standard.character'val(S.Rune'Pos(C.Tk.Buf.Get_Item(i)))); -end loop; -ada.text_io.put_line(""); - - case C.Ps.State is - when PS_START => - Parse_Start (C); - when PS_INCLUDE => - Parse_Include (C); - when others => - raise Syntax_Error; -- TODO: change this... - end case; - - end Got_Token; + -- ------------------------------------------------------------------- procedure Start_Token (C: in out Compiler) is begin @@ -164,6 +44,32 @@ ada.text_io.put_line(""); Feed_Token(C, R.To_Rune(Code)); end Feed_Token; + procedure Set_Lexer_State (C: in out Compiler; State: in Lexer_State) is + begin + C.Lx.State := State; + Start_Token (C); -- empty the token buffer + end Set_Lexer_State; + + procedure Switch_Lexer_State (C: in out Compiler; State: in Lexer_State) is + begin + C.Lx.State := State; + -- don't reset the token buffer; + end Switch_Lexer_State; + + procedure Set_Lexer_State (C: in out Compiler; State: in Lexer_State; Ch: in R.Rune) is + begin + -- change the lexer state while storing the first character in the token buffer. + C.Lx.State := State; + Start_Token (C, Ch); + end Set_Lexer_State; + + procedure Set_Lexer_State (C: in out Compiler; State: in Lexer_State; Code: in R.Code) is + begin + Set_Lexer_State (C, State, R.To_Rune(Code)); + end Set_Lexer_State; + + procedure Got_Token (C: in out Compiler); -- defined further down + procedure End_Token (C: in out Compiler; Id: in Token_Id) is begin C.Tk.Id := Id; @@ -187,6 +93,112 @@ ada.text_io.put_line(""); Set_Lexer_State (C, LX_START); end End_Token; + -- ------------------------------------------------------------------- + + procedure Set_Parser_State (C: in out Compiler; State: in Parser_State) is + begin + C.Ps.State := State; + end Set_Parser_State; + + procedure Parse_Start (C: in out Compiler) is + begin + case C.Tk.Id is + when TK_BSTR => + null; + when TK_BYTE => + null; + when TK_CHAR => + null; + when TK_CSTR => + null; + when TK_DIRECTIVE => + if C.Tk.Buf.Equals(LB_XINCLUDE) then + Set_Parser_State (C, PS_INCLUDE); + else + raise Syntax_Error with "unknown directive name"; + end if; + when TK_EOF => + null; + when TK_EOL => + null; + when TK_GE => + null; + when TK_GT => + null; + when TK_IDENT => + null; + when TK_LE => + null; + when TK_LT => + null; + when TK_SEMICOLON => + null; + end case; + end Parse_Start; + + procedure Start_Inclusion (C: in out Compiler; Name: in S.Rune_Array) is + Top: System_Index; + begin + if C.St.Top = C.St.Items'Last then + raise Syntax_Error with "inclusion depth too deep"; + end if; + + Top := C.St.Top + 1; + Ada.Text_IO.Open (C.St.Items(Top).Handle, Ada.Text_IO.In_File, Standard.String(Utf8.From_Unicode_String(Name))); + C.St.Top := Top; + end Start_Inclusion; + + procedure End_Inclusion (C: in out Compiler) is + begin + Ada.Text_IO.Close (C.St.Items(C.St.Top).Handle); + C.St.Top := C.St.Top - 1; + end End_Inclusion; + + procedure Parse_Include (C: in out Compiler) is + begin + if C.Tk.Id = TK_CSTR then + -- arrange to feed more data from the included file. + Start_Inclusion (C, S.To_Rune_Array(C.Tk.Buf)); + null; + else + raise Syntax_Error with "string literal required"; + end if; + end Parse_Include; + + procedure Parse_Include_End (C: in out Compiler) is + begin + if C.Tk.Id /= TK_SEMICOLON then + raise Syntax_Error with "semicolon required"; + end if; + + -- TODO: put the state back to START??? + end Parse_Include_End; + + procedure Got_Token (C: in out Compiler) is + begin + --case C.P.State IS + -- when START => + -- null; + --end case; + +ada.text_io.put (C.Tk.Id'Img); +ada.text_io.put (" "); +for i in C.Tk.Buf.Get_First_Index .. C.Tk.Buf.Get_Last_Index loop + ada.text_io.put (standard.character'val(S.Rune'Pos(C.Tk.Buf.Get_Item(i)))); +end loop; +ada.text_io.put_line(""); + + case C.Ps.State is + when PS_START => + Parse_Start (C); + when PS_INCLUDE => + Parse_Include (C); + when others => + raise Syntax_Error; -- TODO: change this... + end case; + + end Got_Token; + procedure Feed_Char_Code (C: in out Compiler; Code: in R.Code) is begin <> @@ -217,7 +229,9 @@ end if; elsif R.Is_Rune(Code, R.V.Right_Arrow) then Set_Lexer_State (C, LX_OP_GREATER, Code); elsif R.Is_Rune(Code, R.V.Number_Sign) then - Set_Lexer_State (C, LX_DIRECTIVE); + Set_Lexer_State (C, LX_DIRECTIVE, Code); + elsif R.Is_Rune(Code, R.V.Quotation) then -- double quote + Set_Lexer_State (C, LX_CSTR); else raise Syntax_Error; end if; @@ -230,6 +244,33 @@ end if; goto Start_Over; end if; + when LX_COMMENT => + null; + + when LX_CSTR => + -- TODO: escaping... + if R.Is_Rune(Code, R.V.Quotation) then + End_Token (C, TK_CSTR); + else + Feed_Token (C, Code); + end if; + + when LX_IDENT => + if R.Is_Alnum(Code) or else R.Is_Rune(Code, R.V.Underline) then + Feed_Token (C, Code); + else + End_Token (C, TK_IDENT); + goto Start_Over; + end if; + + when LX_NUMBER => + if R.Is_Digit(Code) then + Feed_Token (C, Code); + else + End_Token (C, TK_IDENT); -- TODO: change this + goto Start_Over; + end if; + when LX_OP_GREATER => if R.Is_Rune(Code, R.V.Equal_Sign) then End_Token (C, TK_GE, Code); @@ -246,24 +287,6 @@ end if; goto Start_Over; end if; - when LX_COMMENT => - null; - - when LX_IDENT => - if R.Is_Alnum(Code) or else R.Is_Rune(Code, R.V.Underline) then - Feed_Token (C, Code); - else - End_Token (C, TK_IDENT); - goto Start_Over; - end if; - - when LX_NUMBER => - if R.Is_Digit(Code) then - Feed_Token (C, Code); - else - End_Token (C, TK_IDENT); -- TODO: change this - goto Start_Over; - end if; end case; end Feed_Char_Code; diff --git a/lib2/h3-compilers.ads b/lib2/h3-compilers.ads index c9aa9a8..6854b62 100644 --- a/lib2/h3-compilers.ads +++ b/lib2/h3-compilers.ads @@ -18,7 +18,9 @@ package H3.Compilers is private type Lexer_State is ( LX_START, + LX_COMMENT, + LX_CSTR, LX_DIRECTIVE, LX_IDENT, LX_NUMBER, diff --git a/lib2/hello3.adb b/lib2/hello3.adb index 30df9aa..929162a 100644 --- a/lib2/hello3.adb +++ b/lib2/hello3.adb @@ -6,6 +6,6 @@ procedure hello3 is Compiler: C.Compiler; begin - Compiler.Feed ("#include 'abc.txt'"); + Compiler.Feed ("#include ""abc.txt"""); Compiler.End_Feed; end hello3;