more structural parsing
This commit is contained in:
		| @ -7,6 +7,23 @@ package body H3.Compilers is | ||||
| 	LB_EOF: constant S.Rune_Array := (R.V.Left_Arrow,R.V.UC_E,R.V.UC_O,R.V.UC_F,R.V.Right_Arrow); -- <EOF> | ||||
| 	LB_XINCLUDE: constant S.Rune_Array := (R.V.Number_Sign,R.V.LC_I,R.V.LC_N,R.V.LC_C,R.V.LC_L,R.V.LC_U,R.V.LC_D,R.V.LC_E); -- #include | ||||
|  | ||||
|  | ||||
| 	procedure Dump_Token (Tk: in Token) is | ||||
| 	begin | ||||
| 		Ada.Text_IO.Put (Tk.Id'Img); | ||||
| 		Ada.Text_IO.Put (": "); | ||||
| 		Ada.Text_IO.Put_Line (Standard.String(Utf8.From_Unicode_String(Tk.Buf.To_Rune_Array))); | ||||
| 	end Dump_Token; | ||||
|  | ||||
| 	procedure Dump_Rune (Code: in R.Code) is | ||||
| 	begin | ||||
| 		if R.Is_Eof(Code) then | ||||
| 			Ada.Text_IO.Put_Line ("EOF"); | ||||
| 		else | ||||
| 			Ada.Text_IO.Put_Line (R.To_Rune(Code)'Img); | ||||
| 		end if; | ||||
| 	end Dump_Rune; | ||||
|  | ||||
| 	-- ------------------------------------------------------------------- | ||||
|  | ||||
| 	procedure Start_Token (C: in out Compiler) is | ||||
| @ -86,27 +103,40 @@ package body H3.Compilers is | ||||
|  | ||||
| 	procedure End_Token (C: in out Compiler; Id: in Token_Id; Code: in R.Code) is | ||||
| 	begin | ||||
| 		S.Append (C.Tk.Buf, R.To_Rune(Code)); | ||||
| 		C.Tk.Id := Id; | ||||
| 		Got_Token (C); | ||||
| 		Set_Lexer_State (C, LX_START); | ||||
| 		End_Token (C, Id, R.To_Rune(Code)); | ||||
| 	end End_Token; | ||||
|  | ||||
| 	procedure Dump_Token (Tk: in Token) is | ||||
| 	begin | ||||
| 		Ada.Text_IO.Put (Tk.Id'Img); | ||||
| 		Ada.Text_IO.Put (": "); | ||||
| 		Ada.Text_IO.Put_Line (Standard.String(Utf8.From_Unicode_String(Tk.Buf.To_Rune_Array))); | ||||
| 	end Dump_Token; | ||||
| 	-- ------------------------------------------------------------------- | ||||
|  | ||||
| 	procedure Set_Parser_State (C: in out Compiler; State: in Parser_State) is | ||||
| 	procedure Set_Parse_State (C: in out Compiler; Code: in Parse_State_Code) is | ||||
| 	begin | ||||
| 		C.Ps.Prev_State := C.Ps.State; | ||||
| 		C.Ps.State := State; | ||||
| 	end Set_Parser_State; | ||||
| 		C.Prs.States(C.Prs.Top).Current := Code; | ||||
| 	end Set_Parse_State; | ||||
|  | ||||
| 	procedure Start_Inclusion (C: in out Compiler; Name: in S.Rune_Array) is | ||||
| 	procedure Push_Parse_State (C: in out Compiler; Code: in Parse_State_Code) is | ||||
| 		Top: System_Index; | ||||
| 	begin | ||||
| 		if C.Prs.Top = C.Prs.States'Last then | ||||
| 			raise Syntax_Error with "parse state stack exhausted"; | ||||
| 		end if; | ||||
|  | ||||
| 		Top := C.Prs.Top + 1; | ||||
| 		declare | ||||
| 			S: Parse_State renames C.Prs.States(Top); | ||||
| 		begin | ||||
| 			S.Current := Code; | ||||
| 		end; | ||||
| 		C.Prs.Top := Top; | ||||
| 	end Push_Parse_State; | ||||
|  | ||||
| 	procedure Pop_Parse_State (C: in out Compiler) is | ||||
| 	begin | ||||
| 		C.Prs.top := C.Prs.Top - 1; | ||||
| 	end Pop_Parse_State; | ||||
|  | ||||
| 	-- ------------------------------------------------------------------- | ||||
|  | ||||
| 	procedure Push_Inclusion (C: in out Compiler; Name: in S.Rune_Array) is | ||||
| 		Top: System_Index; | ||||
| 	begin | ||||
| 		if C.Inc.Top = C.Inc.Streams'Last then | ||||
| @ -115,30 +145,34 @@ package body H3.Compilers is | ||||
|  | ||||
| 		Top := C.Inc.Top + 1; | ||||
| 		declare | ||||
| 			St: Stream renames C.Inc.Streams(Top); | ||||
| 			S: Stream renames C.Inc.Streams(Top); | ||||
| 		begin | ||||
| 			Ada.Text_IO.Open (St.Handle, Ada.Text_IO.In_File, Standard.String(Utf8.From_Unicode_String(Name))); | ||||
| 			St.Initial_Level := C.Ps.Level; | ||||
| 			St.Initial_Parser_State := C.Ps.Prev_State; | ||||
| 			St.Next_Parser_State := PS_INCLUDE_TERMINATOR; | ||||
| 			Ada.Text_IO.Open (S.Handle, Ada.Text_IO.In_File, Standard.String(Utf8.From_Unicode_String(Name))); | ||||
| 			S.Prs_Level := C.Prs.Top; -- this is the parse state level of this include directive. | ||||
| 		end; | ||||
| 		C.Inc.Top := Top; | ||||
|  | ||||
| 		-- the parser should resume at the state when the include directive is seen | ||||
| 		Set_Parser_State (C, C.Ps.Prev_State); -- the state when the include directive is seen | ||||
| 	end Start_Inclusion; | ||||
| 		-- Switch the parse state to handle the terminator | ||||
| 		-- after the new pushed state has been popped out. | ||||
| 		Set_Parse_State (C, PS_INCLUDE_TERMINATOR); | ||||
|  | ||||
| 		-- Let the inner content be handled at the state as the include directive is seen. | ||||
| 		Push_Parse_State (C, C.Prs.States(C.Prs.Top - 1).Current); | ||||
| 	end Push_Inclusion; | ||||
|  | ||||
| 	procedure End_Inclusion (C: in out Compiler) is | ||||
| 		Top: constant System_Index := C.Inc.Top; | ||||
| 	procedure Pop_Inclusion (C: in out Compiler; Check: Boolean) is | ||||
| 	begin | ||||
| 		if C.Ps.State /= C.Inc.Streams(Top).Initial_Parser_State or else C.Ps.Level /= C.Inc.Streams(Top).Initial_Level then | ||||
| 			raise Syntax_Error with "unexpected end of inclusion"; | ||||
| 		end if; | ||||
| 		Ada.Text_IO.Close (C.Inc.Streams(C.Inc.Top).Handle); | ||||
| 		Set_Parser_State (C, C.Inc.Streams(C.Inc.Top).Next_Parser_State); | ||||
| 		if Check then | ||||
| 			if C.Prs.Top /= C.Inc.Streams(C.Inc.Top).Prs_Level + 1 then | ||||
| 				raise Syntax_Error with "unblanced inclusion content"; | ||||
| 			end if; | ||||
| 		end if; | ||||
| 		C.Inc.Top := C.Inc.Top - 1; | ||||
| 	end End_Inclusion; | ||||
| 		Pop_Parse_State (C); | ||||
| 	end Pop_Inclusion; | ||||
|  | ||||
| 	-- ------------------------------------------------------------------- | ||||
|  | ||||
| 	procedure Parse_Start (C: in out Compiler) is | ||||
| 	begin | ||||
| @ -151,33 +185,44 @@ package body H3.Compilers is | ||||
| 				null; | ||||
| 			when TK_CSTR => | ||||
| 				null; | ||||
|  | ||||
| 			when TK_DIRECTIVE => | ||||
| 				if C.Tk.Buf.Equals(LB_XINCLUDE) then | ||||
| 					Set_Parser_State (C, PS_INCLUDE_TARGET); | ||||
| 					--Set_Parse_State (C, PS_INCLUDE_TARGET); | ||||
| 					Push_Parse_State (C, PS_INCLUDE_TARGET); | ||||
| 				else | ||||
| 					raise Syntax_Error with "unknown directive name"; | ||||
| 				end if; | ||||
|  | ||||
| 			when TK_EOF => | ||||
| 				if C.Inc.Top > 0 then | ||||
| 					End_Inclusion (C); | ||||
| 					Pop_Inclusion (C, True); | ||||
| 				else | ||||
| 					-- end of really the input?? | ||||
| 					null; | ||||
| 				end if; | ||||
|  | ||||
| 			when TK_EOL => | ||||
| 				null; | ||||
| 			when TK_GE => | ||||
| 				null; | ||||
| 			when TK_GT => | ||||
| 				null; | ||||
|  | ||||
| 			when TK_IDENT => | ||||
| 				null; | ||||
| 			when TK_LE => | ||||
| 				null; | ||||
| 			when TK_LT => | ||||
| 				null; | ||||
|  | ||||
| 			--when TK_NUMBER => | ||||
| 			--	null; | ||||
|  | ||||
| 			--when TK_CLASS => | ||||
| 			--when TK_FUNC => | ||||
|  | ||||
| 			-- plus or minus signed may be allowed here too. | ||||
| 			-- plusplus or miniusminus may be allowed here too. | ||||
|  | ||||
| 			when TK_SEMICOLON => | ||||
| 				null; | ||||
|  | ||||
| 			when others => | ||||
| 				raise Syntax_Error with "unexpected token"; | ||||
|  | ||||
| 		end case; | ||||
| 	end Parse_Start; | ||||
|  | ||||
| @ -185,10 +230,10 @@ package body H3.Compilers is | ||||
| 	begin | ||||
| 		if C.Tk.Id = TK_CSTR then | ||||
| 			-- arrange to feed more data from the included file. | ||||
| 			Start_Inclusion (C, S.To_Rune_Array(C.Tk.Buf)); | ||||
| 			Push_Inclusion (C, S.To_Rune_Array(C.Tk.Buf)); | ||||
| 		else | ||||
| 			-- the target is not a string. | ||||
| 			Dump_Token (C.Tk); | ||||
| 			--Dump_Token (C.Tk); | ||||
| 			raise Syntax_Error with "string literal required"; | ||||
| 		end if; | ||||
| 	end Parse_Include_Target; | ||||
| @ -198,23 +243,14 @@ package body H3.Compilers is | ||||
| 		if C.Tk.Id /= TK_SEMICOLON then | ||||
| 			raise Syntax_Error with "semicolon required"; | ||||
| 		end if; | ||||
|  | ||||
| 		-- it is not safe to access information at the previous stack top. | ||||
| 		-- no problem in doing that becuase the current implementation uses  | ||||
| 		-- a static array.  | ||||
| 		Set_Parser_State (C, C.Inc.Streams(C.Inc.Top + 1).Initial_Parser_State); | ||||
| 		Pop_Parse_State (C); | ||||
| 	end Parse_Include_Terminator; | ||||
|  | ||||
| 	procedure Got_Token (C: in out Compiler) is | ||||
| 	begin | ||||
| ada.text_io.put (C.Tk.Id'Img); | ||||
| ada.text_io.put (" "); | ||||
| for i in C.Tk.Buf.Get_First_Index .. C.Tk.Buf.Get_Last_Index loop | ||||
| 	ada.text_io.put (Standard.Character'val(S.Rune'Pos(C.Tk.Buf.Get_Item(i)))); | ||||
| end loop; | ||||
| ada.text_io.put_line(""); | ||||
| 		Dump_Token (C.Tk); | ||||
|  | ||||
| 		case C.Ps.State is | ||||
| 		case C.Prs.States(C.Prs.Top).Current is | ||||
| 			when PS_START => | ||||
| 				Parse_Start (C); | ||||
| 			when PS_INCLUDE_TARGET => | ||||
| @ -230,17 +266,14 @@ ada.text_io.put_line(""); | ||||
| 	procedure Feed_Char_Code (C: in out Compiler; Code: in R.Code) is | ||||
| 	begin | ||||
| 	<<Start_Over>> | ||||
| if R.Is_Eof(Code) then | ||||
| 	ada.text_io.put_line ("EOF"); | ||||
| else | ||||
| 	ada.text_io.put_line (R.To_Rune(Code)'Img); | ||||
| end if; | ||||
| 		--Dump_Rune (Code); | ||||
|  | ||||
| 		case C.Lx.State is | ||||
| 			when LX_START => | ||||
| 				if R.Is_Eof(Code) then | ||||
| 					Start_Token (C, LB_EOF); | ||||
| 					End_Token (C, TK_EOF); | ||||
| 					-- this procedure doesn't prevent you from feeding more | ||||
| 					-- this procedure doesn't prevent you from feeding more runes | ||||
| 					-- after EOF. but it's not desirable to feed more after EOF. | ||||
| 				elsif R.Is_Space(Code) then | ||||
| 					-- ignore. carry on | ||||
| @ -249,17 +282,25 @@ end if; | ||||
| 					Set_Lexer_State (C, LX_IDENT, Code); | ||||
| 				elsif R.Is_Digit(Code) then | ||||
| 					Set_Lexer_State (C, LX_NUMBER, Code); | ||||
| 				elsif R.Is_Rune(Code, R.V.Plus_Sign) then -- + | ||||
| 					Set_Lexer_State (C, LX_OP_PLUS, Code); | ||||
| 				elsif R.Is_Rune(Code, R.V.Minus_Sign) then -- - | ||||
| 					Set_Lexer_State (C, LX_OP_MINUS, Code); | ||||
| 				elsif R.Is_Rune(Code, R.V.Asterisk) then -- * | ||||
| 					Set_Lexer_State (C, LX_OP_MUL, Code); | ||||
| 				elsif R.Is_Rune(Code, R.V.Slash) then -- / | ||||
| 					Set_Lexer_State (C, LX_OP_DIV, Code); | ||||
| 				elsif R.Is_Rune(Code, R.V.Left_Arrow) then -- < | ||||
| 					Set_Lexer_State (C, LX_OP_LESS, Code); | ||||
| 				elsif R.Is_Rune(Code, R.V.Right_Arrow) then -- > | ||||
| 					Set_Lexer_State (C, LX_OP_GREATER, Code); | ||||
| 				elsif R.Is_Rune(Code, R.V.Number_Sign) then -- # | ||||
| 					Set_Lexer_State (C, LX_DIRECTIVE, Code); | ||||
| 				elsif R.Is_Rune(Code, R.V.Quotation) then -- " | ||||
| 					Set_Lexer_State (C, LX_CSTR); | ||||
| 				elsif R.Is_Rune(Code, R.V.Semicolon) then | ||||
| 					Start_Token (C, Code); | ||||
| 					End_Token (C, TK_SEMICOLON); | ||||
| 				elsif R.Is_Rune(Code, R.V.Left_Arrow) then | ||||
| 					Set_Lexer_State (C, LX_OP_LESS, Code); | ||||
| 				elsif R.Is_Rune(Code, R.V.Right_Arrow) then | ||||
| 					Set_Lexer_State (C, LX_OP_GREATER, Code); | ||||
| 				elsif R.Is_Rune(Code, R.V.Number_Sign) then | ||||
| 					Set_Lexer_State (C, LX_DIRECTIVE, Code); | ||||
| 				elsif R.Is_Rune(Code, R.V.Quotation) then -- double quote | ||||
| 					Set_Lexer_State (C, LX_CSTR); | ||||
| 				else | ||||
| 					raise Syntax_Error; | ||||
| 				end if; | ||||
| @ -299,6 +340,38 @@ end if; | ||||
| 					goto Start_Over; | ||||
| 				end if; | ||||
|  | ||||
| 			when LX_OP_PLUS => | ||||
| 				if R.Is_Rune(Code, R.V.Plus_Sign) then | ||||
| 					End_Token (C, TK_PLUSPLUS, Code); | ||||
| 				else | ||||
| 					End_Token (C, TK_PLUS); | ||||
| 					goto Start_Over; | ||||
| 				end if; | ||||
|  | ||||
| 			when LX_OP_MINUS => | ||||
| 				if R.Is_Rune(Code, R.V.Minus_Sign) then | ||||
| 					End_Token (C, TK_MINUSMINUS, Code); | ||||
| 				else | ||||
| 					End_Token (C, TK_MINUS); | ||||
| 					goto Start_Over; | ||||
| 				end if; | ||||
|  | ||||
| 			when LX_OP_MUL => | ||||
| 				if R.Is_Rune(Code, R.V.Asterisk) then | ||||
| 					End_Token (C, TK_MULMUL, Code); | ||||
| 				else | ||||
| 					End_Token (C, TK_MUL); | ||||
| 					goto Start_Over; | ||||
| 				end if; | ||||
|  | ||||
| 			when LX_OP_DIV => | ||||
| 				if R.Is_Rune(Code, R.V.Slash) then | ||||
| 					End_Token (C, TK_DIVDIV, Code); | ||||
| 				else | ||||
| 					End_Token (C, TK_DIV); | ||||
| 					goto Start_Over; | ||||
| 				end if; | ||||
|  | ||||
| 			when LX_OP_GREATER => | ||||
| 				if R.Is_Rune(Code, R.V.Equal_Sign) then | ||||
| 					End_Token (C, TK_GE, Code); | ||||
| @ -338,7 +411,7 @@ end if; | ||||
| 			End_Feed (C); | ||||
|  | ||||
| 			if C.Inc.Top < Entry_Top then | ||||
| 				-- End_Inclusion() is called on EOF which is fed by End_Feed(). | ||||
| 				-- Pop_Inclusion() is called on EOF which is fed by End_Feed(). | ||||
| 				-- It also decrements the stack pointer. The current inclusion | ||||
| 				-- stack pointer will get less that First_Top if the first inclusion | ||||
| 				-- level entered is exited. | ||||
| @ -363,4 +436,20 @@ end if; | ||||
| 		Feed_Char_Code (C, R.P.EOF); | ||||
| 	end End_Feed; | ||||
|  | ||||
|  | ||||
| 	-- ------------------------------------------------------------------- | ||||
| 	procedure Initialize (C: in out Compiler) is | ||||
| 	begin | ||||
| 		Push_Parse_State (C, PS_START); | ||||
| 	end Initialize; | ||||
|  | ||||
| 	procedure Finalize (C: in out Compiler) is | ||||
| 	begin | ||||
| 		while C.Inc.Top > 0 loop | ||||
| 			Pop_Inclusion (C, False); | ||||
| 		end loop; | ||||
| 		while C.Prs.Top > 0 loop | ||||
| 			Pop_Parse_State (C); | ||||
| 		end loop; | ||||
| 	end Finalize; | ||||
| end H3.Compilers; | ||||
|  | ||||
| @ -1,5 +1,6 @@ | ||||
| with H3.Runes; | ||||
| with H3.Strings; | ||||
| with Ada.Finalization; | ||||
| with Ada.Text_IO; | ||||
|  | ||||
| generic | ||||
| @ -10,11 +11,15 @@ package H3.Compilers is | ||||
|  | ||||
| 	Syntax_Error: exception; | ||||
|  | ||||
| 	type Compiler is tagged limited private; | ||||
| 	--type Compiler is tagged limited private; | ||||
| 	type Compiler is new Ada.Finalization.Limited_Controlled with private; | ||||
|  | ||||
| 	procedure Feed (C: in out Compiler; Data: in S.Rune_Array); | ||||
| 	procedure End_Feed (C: in out Compiler); | ||||
|  | ||||
| 	overriding procedure Initialize (C: in out Compiler); | ||||
| 	overriding procedure Finalize (C: in out Compiler); | ||||
|  | ||||
| private | ||||
| 	type Lexer_State is ( | ||||
| 		LX_START, | ||||
| @ -24,8 +29,12 @@ private | ||||
| 		LX_DIRECTIVE, | ||||
| 		LX_IDENT, | ||||
| 		LX_NUMBER, | ||||
| 		LX_OP_DIV, | ||||
| 		LX_OP_GREATER, | ||||
| 		LX_OP_LESS | ||||
| 		LX_OP_LESS, | ||||
| 		LX_OP_MINUS, | ||||
| 		LX_OP_MUL, | ||||
| 		LX_OP_PLUS | ||||
| 	); | ||||
| 	type Lexer is record | ||||
| 		State: Lexer_State := LX_START; | ||||
| @ -37,6 +46,8 @@ private | ||||
| 		TK_CHAR, | ||||
| 		TK_CSTR, | ||||
| 		TK_DIRECTIVE, | ||||
| 		TK_DIV, | ||||
| 		TK_DIVDIV, | ||||
| 		TK_EOF, | ||||
| 		TK_EOL, | ||||
| 		TK_IDENT, | ||||
| @ -44,6 +55,12 @@ private | ||||
| 		TK_GT, | ||||
| 		TK_LE, | ||||
| 		TK_LT, | ||||
| 		TK_MINUS, | ||||
| 		TK_MINUSMINUS, | ||||
| 		TK_MUL, | ||||
| 		TK_MULMUL, | ||||
| 		TK_PLUS, | ||||
| 		TK_PLUSPLUS, | ||||
| 		TK_SEMICOLON | ||||
| 	); | ||||
| 	type Token is record | ||||
| @ -51,37 +68,46 @@ private | ||||
| 		Buf: S.Elastic_String; | ||||
| 	end record; | ||||
|  | ||||
| 	type Parser_State is ( | ||||
| 	-- ------------------------------------------------------------------ | ||||
|  | ||||
| 	type Parse_State_Code is ( | ||||
| 		PS_START, | ||||
| 		PS_INCLUDE_TARGET, | ||||
| 		PS_INCLUDE_TERMINATOR | ||||
| 	); | ||||
| 	type Parser is record | ||||
| 		State: Parser_State := PS_START; | ||||
| 		Prev_State: Parser_State := PS_START; | ||||
| 		Level: System_Index := 1; | ||||
|  | ||||
| 	type Parse_State is record | ||||
| 		Current: Parse_State_Code := PS_START; | ||||
| 	end record; | ||||
|  | ||||
| 	type Parse_State_Array is array(System_Index range<>) of Parse_State; | ||||
|  | ||||
| 	type Parse_State_Stack(Capa: System_Index) is record | ||||
| 		States: Parse_State_Array(System_Index'First .. Capa); | ||||
| 		Top: System_Size := System_Size'First; -- 0 | ||||
| 	end record; | ||||
|  | ||||
| 	-- ------------------------------------------------------------------ | ||||
|  | ||||
| 	type Stream is record | ||||
| 		Handle: Ada.Text_IO.File_Type; | ||||
| 		--Handle: System_Size; | ||||
|  | ||||
| 		 | ||||
| 		Initial_Level: System_Index; -- the block level where this inclusion is entered | ||||
| 		Initial_Parser_State: Parser_State;  -- the parser state before the #include has been seen? | ||||
| 		Next_Parser_State: Parser_State; | ||||
| 		Prs_Level: System_Index; | ||||
| 	end record; | ||||
|  | ||||
| 	type Stream_Array is array(System_Index range <>) of Stream; | ||||
|  | ||||
| 	type Include_Stack(Capa: System_Index) is record | ||||
| 		Streams: Stream_Array(System_Index'First .. Capa); | ||||
| 		Top: System_Size := 0; | ||||
| 		Top: System_Size := System_Size'First; -- 0 | ||||
| 	end record; | ||||
| 	-- ------------------------------------------------------------------ | ||||
|  | ||||
| 	type Compiler is tagged limited record | ||||
| 	--type Compiler is tagged limited record | ||||
| 	type Compiler is new Ada.Finalization.Limited_Controlled with record | ||||
| 		Lx: Lexer; | ||||
| 		Tk: Token; | ||||
| 		Ps: Parser; | ||||
| 		Inc: Include_Stack(32); | ||||
| 		Prs: Parse_State_Stack(128);  -- TODO: make this dynamic. single access type. dynamic allocation | ||||
| 		Inc: Include_Stack(32); -- TODO: make this dynamic. single access type. dynamic allocation | ||||
| 	end record; | ||||
|  | ||||
| end H3.Compilers; | ||||
|  | ||||
| @ -5,6 +5,9 @@ package H3 is | ||||
| 	--pragma Preelaborate (H3); | ||||
|  | ||||
| 	subtype Boolean is Standard.Boolean; | ||||
| 	True: constant Boolean := (1 = 1); | ||||
| 	False: constant Boolean := (1 /= 1); | ||||
|  | ||||
| 	subtype Natural is Standard.Natural; | ||||
|  | ||||
| 	subtype System_Rune is Standard.Wide_Character; | ||||
|  | ||||
		Reference in New Issue
	
	Block a user