some ada experiments...

This commit is contained in:
2022-08-23 10:13:02 +00:00
parent 3f9d6d9e59
commit 695c55d8a7
17 changed files with 398 additions and 661 deletions

View File

@ -36,326 +36,10 @@ package body H3.Compilers is
end if;
end Dump_Rune;
-- -------------------------------------------------------------------
procedure Start_Token (C: in out Compiler) is
function Is_Line_Break(Code: in R.Code) return Boolean is
begin
C.Tk.Id := TK_EOF; -- indicate the token id is not set yet
-- TODO: store token location.
S.Clear (C.Tk.Buf);
end Start_Token;
procedure Start_Token (C: in out Compiler; Ch: in R.Rune) is
begin
Start_Token (C);
S.Append (C.Tk.Buf, Ch);
end Start_Token;
procedure Start_Token (C: in out Compiler; Code: in R.Code) is
begin
Start_Token (C, R.To_Rune(Code));
end Start_Token;
procedure Start_Token (C: in out Compiler; Str: in S.Rune_Array) is
begin
Start_Token (C);
S.Append (C.Tk.Buf, Str);
end Start_Token;
procedure Feed_Token (C: in out Compiler; Ch: in R.Rune) is
begin
S.Append (C.Tk.Buf, Ch);
end Feed_Token;
procedure Feed_Token (C: in out Compiler; Code: in R.Code) is
begin
Feed_Token(C, R.To_Rune(Code));
end Feed_Token;
procedure Set_Lexer_State (C: in out Compiler; State: in Lexer_State) is
begin
C.Lx.State := State;
Start_Token (C); -- empty the token buffer
end Set_Lexer_State;
procedure Switch_Lexer_State (C: in out Compiler; State: in Lexer_State) is
begin
C.Lx.State := State;
-- don't reset the token buffer;
end Switch_Lexer_State;
procedure Set_Lexer_State (C: in out Compiler; State: in Lexer_State; Ch: in R.Rune) is
begin
-- change the lexer state while storing the first character in the token buffer.
C.Lx.State := State;
Start_Token (C, Ch);
end Set_Lexer_State;
procedure Set_Lexer_State (C: in out Compiler; State: in Lexer_State; Code: in R.Code) is
begin
Set_Lexer_State (C, State, R.To_Rune(Code));
end Set_Lexer_State;
procedure Got_Token (C: in out Compiler); -- defined further down
procedure End_Token (C: in out Compiler; Id: in Token_Id) is
begin
C.Tk.Id := Id;
Got_Token (C);
Set_Lexer_State (C, LX_START);
end End_Token;
procedure End_Token (C: in out Compiler; Id: in Token_Id; Ch: in R.Rune) is
begin
S.Append (C.Tk.Buf, Ch);
C.Tk.Id := Id;
Got_Token (C);
Set_Lexer_State (C, LX_START);
end End_Token;
procedure End_Token (C: in out Compiler; Id: in Token_Id; Code: in R.Code) is
begin
End_Token (C, Id, R.To_Rune(Code));
end End_Token;
-- -------------------------------------------------------------------
procedure Set_Parse_State (C: in out Compiler; Code: in Parse_State_Code) is
begin
C.Prs.States(C.Prs.Top).Current := Code;
end Set_Parse_State;
procedure Push_Parse_State (C: in out Compiler; Code: in Parse_State_Code) is
Top: System_Index;
begin
if C.Prs.Top = C.Prs.States'Last then
raise Syntax_Error with "parse state stack exhausted";
end if;
Top := C.Prs.Top + 1;
declare
S: Parse_State renames C.Prs.States(Top);
begin
S.Current := Code;
end;
C.Prs.Top := Top;
--ada.text_io.put_line ("Push_Parse_State " & Code'Img);
end Push_Parse_State;
procedure Pop_Parse_State (C: in out Compiler) is
begin
--ada.text_io.put_line ("Pop_Parse_State " & C.Prs.States(C.Prs.Top).Current'Img);
C.Prs.Top := C.Prs.Top - 1;
end Pop_Parse_State;
-- -------------------------------------------------------------------
procedure Push_Inclusion (C: in out Compiler; Name: in S.Rune_Array) is
Top: System_Index;
begin
if C.Inc.Top = C.Inc.Streams'Last then
raise Syntax_Error with "inclusion depth too deep";
end if;
Top := C.Inc.Top + 1;
declare
S: Stream renames C.Inc.Streams(Top);
begin
Ada.Text_IO.Open (S.Handle, Ada.Text_IO.In_File, Standard.String(Utf8.From_Unicode_String(Name)));
S.Prs_Level := C.Prs.Top; -- this is the parse state level of this include directive.
end;
C.Inc.Top := Top;
-- Switch the parse state to handle the terminator
-- after the new pushed state has been popped out.
Set_Parse_State (C, PS_INCLUDE_TERMINATOR);
-- Let the inner content be handled at the state as the include directive is seen.
Push_Parse_State (C, C.Prs.States(C.Prs.Top - 1).Current);
end Push_Inclusion;
procedure Pop_Inclusion (C: in out Compiler; Check: in Boolean) is
begin
Ada.Text_IO.Close (C.Inc.Streams(C.Inc.Top).Handle);
if Check then
if C.Prs.Top /= C.Inc.Streams(C.Inc.Top).Prs_Level + 1 then
ada.text_io.put_line (">>>>>>>>>>> UNBALANCED INCLUSION CONTEXT..." & C.Prs.Top'Img & " " & C.Inc.Streams(C.Inc.Top).Prs_Level'Img);
raise Syntax_Error with "unbalanced inclusion content";
end if;
end if;
C.Inc.Top := C.Inc.Top - 1;
Pop_Parse_State (C);
end Pop_Inclusion;
-- -------------------------------------------------------------------
procedure Parse_Ident (C: in out Compiler) is
begin
if C.Tk.Buf.Equals(LB_CLASS) then
Push_Parse_State (C, PS_CLASS_1);
elsif C.Tk.Buf.Equals(LB_FUN) then
Push_Parse_State (C, PS_FUN_1);
else
-- probably a command name or a variable name?
Push_Parse_State (C, PS_PLAIN_STATEMENT_START);
C.Prs.States(C.Prs.Top).Data := (
Code => PD_STATEMENT,
Stmt_Starter => C.Tk.Buf
);
end if;
end Parse_Ident;
procedure Parse_Class_1 (C: in out Compiler) is
begin
null;
end Parse_Class_1;
procedure Parse_Class_2 (C: in out Compiler) is
begin
null;
end Parse_Class_2;
-- -------------------------------------------------------------------
procedure Parse_Plain_Statement_Start (C: in out Compiler) is
begin
case C.Tk.Id is
when TK_EOL =>
Pop_Parse_State (C);
when TK_EOF =>
Pop_Parse_State (C);
if C.Inc.Top > 0 then
Pop_Inclusion (C, True);
else
-- end of really the input??
null;
end if;
when TK_CSTR =>
null;
when TK_IDENT =>
null;
when TK_DOLLARED_LPAREN => -- $(
Push_Parse_State (C, PS_START);
when TK_RPAREN =>
Pop_Parse_State (C); -- pop as if EOL is seen.
Pop_Parse_State (C); -- pop against TK_DOLLARED_LPAREN
when TK_SEMICOLON =>
-- end of the current statement. go on to the next statement
Set_Parse_State (C, PS_PLAIN_STATEMENT_START);
when others =>
raise Syntax_Error with "invalid token in in plain statement";
end case;
end Parse_Plain_Statement_Start;
-- -------------------------------------------------------------------
procedure Parse_Start (C: in out Compiler) is
begin
case C.Tk.Id is
when TK_BSTR =>
null;
when TK_BYTE =>
null;
when TK_CHAR =>
null;
when TK_CSTR =>
null;
when TK_DIRECTIVE =>
if C.Tk.Buf.Equals(LB_XINCLUDE) then
--Set_Parse_State (C, PS_INCLUDE_TARGET);
Push_Parse_State (C, PS_INCLUDE_TARGET);
else
raise Syntax_Error with "unknown directive name";
end if;
when TK_EOF =>
if C.Inc.Top > 0 then
Pop_Inclusion (C, True);
else
-- end of really the input??
null;
end if;
when TK_EOL =>
null;
when TK_IDENT =>
Parse_Ident (C);
--when TK_NUMBER =>
-- null;
-- plus or minus signed may be allowed here too.
-- plusplus or miniusminus may be allowed here too.
when TK_SEMICOLON =>
null;
--when TK_HASHED_LBRACE =>
-- null;
--when TK_HASHED_LBRACK =>
-- null;
when TK_DOLLARED_LPAREN =>
Push_Parse_State (C, PS_START);
when TK_RPAREN =>
Pop_Parse_State (C);
when others =>
raise Syntax_Error with "unexpected token";
end case;
end Parse_Start;
procedure Parse_Include_Target (C: in out Compiler) is
begin
if C.Tk.Id = TK_CSTR then
-- arrange to feed more data from the included file.
Push_Inclusion (C, S.To_Rune_Array(C.Tk.Buf));
else
-- the target is not a string.
--Dump_Token (C.Tk);
raise Syntax_Error with "string literal required";
end if;
end Parse_Include_Target;
procedure Parse_Include_Terminator (C: in out Compiler) is
begin
if C.Tk.Id /= TK_SEMICOLON then
raise Syntax_Error with "semicolon required";
end if;
Pop_Parse_State (C);
end Parse_Include_Terminator;
procedure Got_Token (C: in out Compiler) is
begin
Dump_Token (C.Tk);
case C.Prs.States(C.Prs.Top).Current is
when PS_START =>
Parse_Start (C);
when PS_INCLUDE_TARGET =>
Parse_Include_Target (C);
when PS_INCLUDE_TERMINATOR =>
Parse_Include_Terminator (C);
when PS_PLAIN_STATEMENT_START =>
Parse_Plain_Statement_Start (C);
when others =>
raise Syntax_Error with "unknown parser state"; -- TODO: change this...
end case;
end Got_Token;
return R.Is_Rune(Code, R.V.LF); -- TODO: consider different line end convention
end Is_Line_Break;
function Is_Ident_Starter(Code: in R.Code) return Boolean is
begin
@ -369,269 +53,122 @@ ada.text_io.put_line (">>>>>>>>>>> UNBALANCED INCLUSION CONTEXT..." & C.Prs.Top'
return Is_Ident_Starter(Code); -- or else R.Is_Rune(Code, R.V.Underline); -- or else R.Is_Rune(C, ...);
end Is_Ident_Char;
procedure Feed_Char_Code (C: in out Compiler; Code: in R.Code) is
-- -------------------------------------------------------------------
package body Feeder is
procedure Switch_To_Start (C: in out Compiler) is
begin
C.F.Lx.Data := Lex_Data'(State => LX_START);
end Switch_To_Start;
procedure Start (C: in out Compiler; Code: in R.Code; Consumed: out Boolean) is
begin
null;
end Start;
procedure Comment (C: in out Compiler; Code: in R.Code; Consumed: out Boolean) is
begin
if Is_Line_Break(Code) then
Switch_To_Start (C);
end if;
end Comment;
procedure Delim_Token (C: in out Compiler; Code: in R.Code; Consumed: out Boolean) is
begin
null;
end Delim_Token;
procedure Hmarked_Token (C: in out Compiler; Code: in R.Code; Consumed: out Boolean) is
begin
null;
end Hmarked_Token;
procedure Update_Location (C: in out Compiler; Code: in R.Code) is
begin
if Is_Line_Break(Code) then
C.F.Lx.Loc.Line := C.F.Lx.Loc.Line + 1;
C.F.Lx.Loc.Colm := 1;
else
C.F.Lx.Loc.Colm := C.F.Lx.Loc.Colm + 1;
end if;
end Update_Location;
procedure Begin_Include (C: in out Compiler) is
begin
null;
end Begin_Include;
procedure Feed_From_Includee (C: in out Compiler) is
begin
null;
end Feed_From_Includee;
end Feeder;
-- -------------------------------------------------------------------
procedure Feed_Char_Code (C: in out Compiler; Code: in R.Code; Consumed: out Boolean) is
begin
<<Start_Over>>
--Dump_Rune (Code);
case C.Lx.State is
when LX_START =>
if R.Is_Eof(Code) then
Start_Token (C, LB_EOF);
End_Token (C, TK_EOF);
-- this procedure doesn't prevent you from feeding more runes
-- after EOF. but it's not desirable to feed more after EOF.
elsif R.Is_Rune(Code, R.V.LF) then -- TODO: support a different EOL scheme
Start_Token (C, LB_EOL);
End_Token (C, TK_EOL);
elsif R.Is_Space(Code) then
-- ignore. carry on
null;
elsif R.Is_Rune(Code, R.V.Number_Sign) then -- #
Set_Lexer_State (C, LX_HASHED, Code);
elsif R.Is_Rune(Code, R.V.Dollar_Sign) then -- $
Set_Lexer_State (C, LX_DOLLARED, Code);
elsif R.Is_Rune(Code, R.V.Left_Curly_Bracket) then -- {
Start_Token (C, Code);
End_Token (C, TK_LBRACE);
elsif R.Is_Rune(Code, R.V.Right_Curly_Bracket) then -- }
Start_Token (C, Code);
End_Token (C, TK_RBRACE);
elsif R.Is_Rune(Code, R.V.Left_Square_Bracket) then -- [
Start_Token (C, Code);
End_Token (C, TK_LBRACK);
elsif R.Is_Rune(Code, R.V.Right_Square_Bracket) then -- ]
Start_Token (C, Code);
End_Token (C, TK_RBRACK);
elsif R.Is_Rune(Code, R.V.Left_Parenthesis) then -- (
Start_Token (C, Code);
End_Token (C, TK_LPAREN);
elsif R.Is_Rune(Code, R.V.Right_Parenthesis) then -- )
Start_Token (C, Code);
End_Token (C, TK_RPAREN);
elsif R.Is_Rune(Code, R.V.Semicolon) then -- ;
Start_Token (C, Code);
End_Token (C, TK_SEMICOLON);
elsif R.Is_Rune(Code, R.V.Colon) then -- :
Set_Lexer_State (C, LX_COLON, Code);
elsif R.Is_Rune(Code, R.V.Quotation) then -- "
Set_Lexer_State (C, LX_CSTR);
elsif Is_Ident_Starter(Code) then
Set_Lexer_State (C, LX_IDENT, Code);
elsif R.Is_Digit(Code) then
Set_Lexer_State (C, LX_NUMBER, Code);
--elsif R.Is_Rune(Code, R.V.Plus_Sign) then -- +
-- Set_Lexer_State (C, LX_OP_PLUS, Code);
--elsif R.Is_Rune(Code, R.V.Minus_Sign) then -- -
-- Set_Lexer_State (C, LX_OP_MINUS, Code);
--elsif R.Is_Rune(Code, R.V.Asterisk) then -- *
-- Set_Lexer_State (C, LX_OP_MUL, Code);
--elsif R.Is_Rune(Code, R.V.Slash) then -- /
-- Set_Lexer_State (C, LX_OP_DIV, Code);
--elsif R.Is_Rune(Code, R.V.Left_Arrow) then -- <
-- Set_Lexer_State (C, LX_OP_LESS, Code);
--elsif R.Is_Rune(Code, R.V.Right_Arrow) then -- >
-- Set_Lexer_State (C, LX_OP_GREATER, Code);
else
raise Syntax_Error;
end if;
when LX_COLON =>
if R.Is_Rune(Code, R.V.Equal_Sign) then -- :=
End_Token (C, TK_ASSIGN, Code);
else
End_Token (C, TK_COLON);
goto Start_Over;
end if;
when LX_COMMENT =>
if R.Is_Eof(Code) then
Set_Lexer_State (C, LX_START);
goto Start_Over;
elsif R.Is_Rune(Code, R.V.LF) then -- TODO: support a different EOL scheme
Start_Token (C, LB_EOL);
End_Token (C, TK_EOL);
end if;
when LX_CSTR =>
-- TODO: escaping...
if R.Is_Rune(Code, R.V.Quotation) then
End_Token (C, TK_CSTR);
else
Feed_Token (C, Code);
end if;
when LX_DIRECTIVE =>
if R.Is_Alnum(Code) or else R.Is_Rune(Code, R.V.Underline) then
Feed_Token (C, Code);
else
End_Token (C, TK_DIRECTIVE);
goto Start_Over;
end if;
when LX_DOLLARED =>
if R.Is_Rune(Code, R.V.Left_Curly_Bracket) then
End_Token (C, TK_DOLLARED_LBRACE, Code);
elsif R.Is_Rune(Code, R.V.Left_Square_Bracket) then
End_Token (C, TK_DOLLARED_LBRACK, Code);
elsif R.Is_Rune(Code, R.V.Left_Parenthesis) then
End_Token (C, TK_DOLLARED_LPAREN, Code);
else
raise Syntax_Error with "invalid dollared token";
end if;
when LX_HASHED =>
if R.Is_Alnum(Code) or else R.Is_Rune(Code, R.V.Underline) then
Feed_Token (C, Code);
Switch_Lexer_State (C, LX_DIRECTIVE);
elsif R.Is_Rune(Code, R.V.Number_Sign) or else R.Is_Rune(Code, R.V.Exclamation) then -- ## or #!
Set_Lexer_State (C, LX_COMMENT);
elsif R.Is_Rune(Code, R.V.Left_Curly_Bracket) then
End_Token (C, TK_HASHED_LBRACE, Code);
elsif R.Is_Rune(Code, R.V.Left_Square_Bracket) then
End_Token (C, TK_HASHED_LBRACK, Code);
elsif R.Is_Rune(Code, R.V.Left_Parenthesis) then
End_Token (C, TK_HASHED_LPAREN, Code);
else
raise Syntax_Error with "invalid hashed token";
end if;
when LX_IDENT =>
if Is_Ident_Char(Code) then
Feed_Token (C, Code);
else
End_Token (C, TK_IDENT);
goto Start_Over;
end if;
when LX_NUMBER =>
if R.Is_Digit(Code) then
Feed_Token (C, Code);
else
End_Token (C, TK_IDENT); -- TODO: change this
goto Start_Over;
end if;
when LX_OP_PLUS =>
if R.Is_Rune(Code, R.V.Plus_Sign) then
End_Token (C, TK_PLUSPLUS, Code);
else
End_Token (C, TK_PLUS);
goto Start_Over;
end if;
when LX_OP_MINUS =>
if R.Is_Rune(Code, R.V.Minus_Sign) then
End_Token (C, TK_MINUSMINUS, Code);
else
End_Token (C, TK_MINUS);
goto Start_Over;
end if;
when LX_OP_MUL =>
if R.Is_Rune(Code, R.V.Asterisk) then
End_Token (C, TK_MULMUL, Code);
else
End_Token (C, TK_MUL);
goto Start_Over;
end if;
when LX_OP_DIV =>
if R.Is_Rune(Code, R.V.Slash) then
End_Token (C, TK_DIVDIV, Code);
else
End_Token (C, TK_DIV);
goto Start_Over;
end if;
when LX_OP_GREATER =>
if R.Is_Rune(Code, R.V.Equal_Sign) then
End_Token (C, TK_GE, Code);
else
End_Token (C, TK_GT);
goto Start_Over;
end if;
when LX_OP_LESS =>
if R.Is_Rune(Code, R.V.Equal_sign) then
End_Token (C, TK_LE, Code);
else
End_Token (C, TK_LT);
goto Start_Over;
end if;
case C.F.Lx.Data.State is
when Feeder.LX_START =>
Feeder.Start(C, Code, Consumed);
when Feeder.LX_COMMENT =>
Feeder.Comment(C, Code, Consumed);
when Feeder.LX_DT =>
Feeder.Delim_Token(C, Code, Consumed);
when Feeder.LX_HC =>
Feeder.Hmarked_Token(C, Code, Consumed);
end case;
--raise Internal_Error with "internal error - unknown flx state";
end Feed_Char_Code;
procedure Feed_Inc (C: in out Compiler) is
-- Feed the contents of a included stream.
Entry_Top: constant System_Index := C.Inc.Top;
Use_Immediate: constant Boolean := True;
begin
loop
while not Ada.Text_IO.End_Of_File(C.Inc.Streams(C.Inc.Top).Handle) loop
declare
Ch: Standard.Character;
begin
-- Get() skips line terminators. End_Of_Line() checks if it reaches EOL
-- but can't handle multiple consecutive EOLs. Get_Immediate() doesn't
-- skip EOLs. As detecting every EOL in the multiple consecutive sequence
-- is not required, End_Of_Line()+Get() is good too.
if Use_Immediate then
Ada.Text_IO.Get_Immediate (C.Inc.Streams(C.Inc.Top).Handle, Ch);
else
if Ada.Text_IO.End_Of_Line(C.Inc.Streams(C.Inc.Top).Handle) then
Feed_Char_Code (C, R.P.LF);
end if;
Ada.Text_IO.Get (C.Inc.Streams(C.Inc.Top).Handle, Ch);
end if;
Feed_Char_Code (C, Standard.Character'Pos(Ch));
end;
-- After each feed, C.Inc.Top may get incremented if an inclusion
-- directive is found. so the while loop iterates over the streams
-- of all inner included levels. End_Feed() below drops C.Inc.Top
-- and the outer loop will resume the inner while loop at the outer
-- inclusion level until all entered inclusion levels are exited.
end loop;
End_Feed (C);
if C.Inc.Top < Entry_Top then
-- Pop_Inclusion() is called on EOF which is fed by End_Feed().
-- It also decrements the stack pointer. The current inclusion
-- stack pointer will get less that First_Top if the first inclusion
-- level entered is exited.
exit;
end if;
end loop;
end Feed_Inc;
procedure Feed (C: in out Compiler; Data: in S.Rune_Array) is
Consumed: Boolean;
CC: R.Code;
I: System_Index := Data'First;
begin
for i in Data'Range loop
Feed_Char_Code (C, R.To_Code(Data(i)));
if C.Inc.Top > 0 then
Feed_Inc (C);
while I <= Data'Last loop
CC := R.To_Code(Data(I));
Feed_Char_Code (C, CC, consumed);
if Consumed then
Feeder.Update_Location (C, CC);
I := I + 1;
end if;
if C.F.Rd.Do_Include_File then
Feeder.Begin_Include (C);
C.F.Rd.Do_Include_File := False;
end if;
--if C->Cur then
-- Feeder.Feed_From_Includee (C);
--end if;
end loop;
end Feed;
procedure End_Feed (C: in out Compiler) is
Consumed: Boolean;
begin
Feed_Char_Code (C, R.P.EOF);
begin
loop
Feed_Char_Code(C, R.P.EOF, Consumed);
exit when Consumed;
end loop;
exception
when others =>
--if C.Feed.Rd.Level <= 0 and then C.Get_Error_Number = HCL_ERR_SYNTAX and then C.Get_Syntax_Error_Number = HCL_SYNERR_EOF then
-- null; -- normal EOF
--else
raise;
--end if;
end;
end End_Feed;
-- -------------------------------------------------------------------
procedure Initialize (C: in out Compiler) is
begin
Push_Parse_State (C, PS_START);
--Push_Parse_State (C, PS_START);
declare
X: H3.Trees.Tree;
@ -642,11 +179,13 @@ ada.text_io.put_line (">>>>>>>>>>> UNBALANCED INCLUSION CONTEXT..." & C.Prs.Top'
procedure Finalize (C: in out Compiler) is
begin
while C.Inc.Top > 0 loop
Pop_Inclusion (C, False);
end loop;
while C.Prs.Top > 0 loop
Pop_Parse_State (C);
end loop;
--while C.Inc.Top > 0 loop
-- Pop_Inclusion (C, False);
--end loop;
--while C.Prs.Top > 0 loop
-- Pop_Parse_State (C);
--end loop;
null;
end Finalize;
end H3.Compilers;