more tokens in the lexer code
This commit is contained in:
parent
c3a14cbfb7
commit
689346a843
@ -5,6 +5,7 @@ package body H3.Compilers is
|
|||||||
package Utf8 is new H3.Utf8(Standard.Character, S.Rune, Char_Array, S.Rune_Array);
|
package Utf8 is new H3.Utf8(Standard.Character, S.Rune, Char_Array, S.Rune_Array);
|
||||||
|
|
||||||
LB_EOF: constant S.Rune_Array := (R.V.Left_Arrow,R.V.UC_E,R.V.UC_O,R.V.UC_F,R.V.Right_Arrow); -- <EOF>
|
LB_EOF: constant S.Rune_Array := (R.V.Left_Arrow,R.V.UC_E,R.V.UC_O,R.V.UC_F,R.V.Right_Arrow); -- <EOF>
|
||||||
|
LB_EOL: constant S.Rune_Array := (R.V.Left_Arrow,R.V.UC_E,R.V.UC_O,R.V.UC_L,R.V.Right_Arrow); -- <EOL>
|
||||||
LB_XINCLUDE: constant S.Rune_Array := (R.V.Number_Sign,R.V.LC_I,R.V.LC_N,R.V.LC_C,R.V.LC_L,R.V.LC_U,R.V.LC_D,R.V.LC_E); -- #include
|
LB_XINCLUDE: constant S.Rune_Array := (R.V.Number_Sign,R.V.LC_I,R.V.LC_N,R.V.LC_C,R.V.LC_L,R.V.LC_U,R.V.LC_D,R.V.LC_E); -- #include
|
||||||
|
|
||||||
|
|
||||||
@ -165,7 +166,7 @@ package body H3.Compilers is
|
|||||||
Ada.Text_IO.Close (C.Inc.Streams(C.Inc.Top).Handle);
|
Ada.Text_IO.Close (C.Inc.Streams(C.Inc.Top).Handle);
|
||||||
if Check then
|
if Check then
|
||||||
if C.Prs.Top /= C.Inc.Streams(C.Inc.Top).Prs_Level + 1 then
|
if C.Prs.Top /= C.Inc.Streams(C.Inc.Top).Prs_Level + 1 then
|
||||||
raise Syntax_Error with "unblanced inclusion content";
|
raise Syntax_Error with "unblanced inclusion content";
|
||||||
end if;
|
end if;
|
||||||
end if;
|
end if;
|
||||||
C.Inc.Top := C.Inc.Top - 1;
|
C.Inc.Top := C.Inc.Top - 1;
|
||||||
@ -263,6 +264,16 @@ package body H3.Compilers is
|
|||||||
|
|
||||||
end Got_Token;
|
end Got_Token;
|
||||||
|
|
||||||
|
function Is_Ident_Starter(Code: in R.Code) return Boolean is
|
||||||
|
begin
|
||||||
|
return R.Is_Alnum(Code) or else R.Is_Rune(Code, R.V.Minus_Sign);
|
||||||
|
end Is_Ident_Starter;
|
||||||
|
|
||||||
|
function Is_Ident_Char(Code: in R.Code) return Boolean is
|
||||||
|
begin
|
||||||
|
return Is_Ident_Starter(Code) or else R.Is_Rune(Code, R.V.Underline); -- or else R.Is_Rune(C, ...);
|
||||||
|
end Is_Ident_Char;
|
||||||
|
|
||||||
procedure Feed_Char_Code (C: in out Compiler; Code: in R.Code) is
|
procedure Feed_Char_Code (C: in out Compiler; Code: in R.Code) is
|
||||||
begin
|
begin
|
||||||
<<Start_Over>>
|
<<Start_Over>>
|
||||||
@ -275,10 +286,41 @@ package body H3.Compilers is
|
|||||||
End_Token (C, TK_EOF);
|
End_Token (C, TK_EOF);
|
||||||
-- this procedure doesn't prevent you from feeding more runes
|
-- this procedure doesn't prevent you from feeding more runes
|
||||||
-- after EOF. but it's not desirable to feed more after EOF.
|
-- after EOF. but it's not desirable to feed more after EOF.
|
||||||
|
elsif R.Is_Rune(Code, R.V.LF) then -- TODO: support a different EOL scheme
|
||||||
|
Start_Token (C, LB_EOL);
|
||||||
|
End_Token (C, TK_EOL);
|
||||||
elsif R.Is_Space(Code) then
|
elsif R.Is_Space(Code) then
|
||||||
-- ignore. carry on
|
-- ignore. carry on
|
||||||
null;
|
null;
|
||||||
elsif R.Is_Alpha(Code) then
|
|
||||||
|
elsif R.Is_Rune(Code, R.V.Number_Sign) then -- #
|
||||||
|
Set_Lexer_State (C, LX_HASHED, Code);
|
||||||
|
elsif R.Is_Rune(Code, R.V.Dollar_Sign) then -- $
|
||||||
|
Set_Lexer_State (C, LX_DOLLARED, Code);
|
||||||
|
|
||||||
|
elsif R.Is_Rune(Code, R.V.Left_Curly_Bracket) then -- {
|
||||||
|
Start_Token (C, Code);
|
||||||
|
End_Token (C, TK_LBRACE);
|
||||||
|
elsif R.Is_Rune(Code, R.V.Right_Curly_Bracket) then -- }
|
||||||
|
Start_Token (C, Code);
|
||||||
|
End_Token (C, TK_RBRACE);
|
||||||
|
elsif R.Is_Rune(Code, R.V.Left_Square_Bracket) then -- [
|
||||||
|
Start_Token (C, Code);
|
||||||
|
End_Token (C, TK_LBRACK);
|
||||||
|
elsif R.Is_Rune(Code, R.V.Right_Square_Bracket) then -- ]
|
||||||
|
Start_Token (C, Code);
|
||||||
|
End_Token (C, TK_RBRACK);
|
||||||
|
elsif R.Is_Rune(Code, R.V.Left_Parenthesis) then -- (
|
||||||
|
Start_Token (C, Code);
|
||||||
|
End_Token (C, TK_LPAREN);
|
||||||
|
elsif R.Is_Rune(Code, R.V.Right_Parenthesis) then -- )
|
||||||
|
Start_Token (C, Code);
|
||||||
|
End_Token (C, TK_RPAREN);
|
||||||
|
elsif R.Is_Rune(Code, R.V.Semicolon) then -- ;
|
||||||
|
Start_Token (C, Code);
|
||||||
|
End_Token (C, TK_SEMICOLON);
|
||||||
|
|
||||||
|
elsif Is_Ident_Starter(Code) then
|
||||||
Set_Lexer_State (C, LX_IDENT, Code);
|
Set_Lexer_State (C, LX_IDENT, Code);
|
||||||
elsif R.Is_Digit(Code) then
|
elsif R.Is_Digit(Code) then
|
||||||
Set_Lexer_State (C, LX_NUMBER, Code);
|
Set_Lexer_State (C, LX_NUMBER, Code);
|
||||||
@ -294,13 +336,8 @@ package body H3.Compilers is
|
|||||||
Set_Lexer_State (C, LX_OP_LESS, Code);
|
Set_Lexer_State (C, LX_OP_LESS, Code);
|
||||||
elsif R.Is_Rune(Code, R.V.Right_Arrow) then -- >
|
elsif R.Is_Rune(Code, R.V.Right_Arrow) then -- >
|
||||||
Set_Lexer_State (C, LX_OP_GREATER, Code);
|
Set_Lexer_State (C, LX_OP_GREATER, Code);
|
||||||
elsif R.Is_Rune(Code, R.V.Number_Sign) then -- #
|
|
||||||
Set_Lexer_State (C, LX_DIRECTIVE, Code);
|
|
||||||
elsif R.Is_Rune(Code, R.V.Quotation) then -- "
|
elsif R.Is_Rune(Code, R.V.Quotation) then -- "
|
||||||
Set_Lexer_State (C, LX_CSTR);
|
Set_Lexer_State (C, LX_CSTR);
|
||||||
elsif R.Is_Rune(Code, R.V.Semicolon) then
|
|
||||||
Start_Token (C, Code);
|
|
||||||
End_Token (C, TK_SEMICOLON);
|
|
||||||
else
|
else
|
||||||
raise Syntax_Error;
|
raise Syntax_Error;
|
||||||
end if;
|
end if;
|
||||||
@ -314,7 +351,13 @@ package body H3.Compilers is
|
|||||||
end if;
|
end if;
|
||||||
|
|
||||||
when LX_COMMENT =>
|
when LX_COMMENT =>
|
||||||
null;
|
if R.Is_Eof(Code) then
|
||||||
|
Set_Lexer_State (C, LX_START);
|
||||||
|
goto Start_Over;
|
||||||
|
elsif R.Is_Rune(Code, R.V.LF) then -- TODO: support a different EOL scheme
|
||||||
|
Start_Token (C, LB_EOL);
|
||||||
|
End_Token (C, TK_EOL);
|
||||||
|
end if;
|
||||||
|
|
||||||
when LX_CSTR =>
|
when LX_CSTR =>
|
||||||
-- TODO: escaping...
|
-- TODO: escaping...
|
||||||
@ -324,9 +367,36 @@ package body H3.Compilers is
|
|||||||
Feed_Token (C, Code);
|
Feed_Token (C, Code);
|
||||||
end if;
|
end if;
|
||||||
|
|
||||||
when LX_IDENT =>
|
when LX_DOLLARED =>
|
||||||
|
if R.Is_Rune(Code, R.V.Left_Curly_Bracket) then
|
||||||
|
End_Token (C, TK_DOLLARED_LBRACE, Code);
|
||||||
|
elsif R.Is_Rune(Code, R.V.Left_Square_Bracket) then
|
||||||
|
End_Token (C, TK_DOLLARED_LBRACK, Code);
|
||||||
|
elsif R.Is_Rune(Code, R.V.Left_Parenthesis) then
|
||||||
|
End_Token (C, TK_DOLLARED_LPAREN, Code);
|
||||||
|
else
|
||||||
|
raise Syntax_Error with "invalid dollared token";
|
||||||
|
end if;
|
||||||
|
|
||||||
|
when LX_HASHED =>
|
||||||
if R.Is_Alnum(Code) or else R.Is_Rune(Code, R.V.Underline) then
|
if R.Is_Alnum(Code) or else R.Is_Rune(Code, R.V.Underline) then
|
||||||
Feed_Token (C, Code);
|
Feed_Token (C, Code);
|
||||||
|
Switch_Lexer_State (C, LX_DIRECTIVE);
|
||||||
|
elsif R.Is_Rune(Code, R.V.Number_Sign) then -- ##
|
||||||
|
Set_Lexer_State (C, LX_COMMENT);
|
||||||
|
elsif R.Is_Rune(Code, R.V.Left_Curly_Bracket) then
|
||||||
|
End_Token (C, TK_HASHED_LBRACE, Code);
|
||||||
|
elsif R.Is_Rune(Code, R.V.Left_Square_Bracket) then
|
||||||
|
End_Token (C, TK_HASHED_LBRACK, Code);
|
||||||
|
elsif R.Is_Rune(Code, R.V.Left_Parenthesis) then
|
||||||
|
End_Token (C, TK_HASHED_LPAREN, Code);
|
||||||
|
else
|
||||||
|
raise Syntax_Error with "invalid hashed token";
|
||||||
|
end if;
|
||||||
|
|
||||||
|
when LX_IDENT =>
|
||||||
|
if Is_Ident_Char(Code) then
|
||||||
|
Feed_Token (C, Code);
|
||||||
else
|
else
|
||||||
End_Token (C, TK_IDENT);
|
End_Token (C, TK_IDENT);
|
||||||
goto Start_Over;
|
goto Start_Over;
|
||||||
@ -392,14 +462,28 @@ package body H3.Compilers is
|
|||||||
end Feed_Char_Code;
|
end Feed_Char_Code;
|
||||||
|
|
||||||
procedure Feed_Inc (C: in out Compiler) is
|
procedure Feed_Inc (C: in out Compiler) is
|
||||||
|
-- Feed the contents of a included stream.
|
||||||
Entry_Top: constant System_Index := C.Inc.Top;
|
Entry_Top: constant System_Index := C.Inc.Top;
|
||||||
|
Use_Immediate: constant Boolean := True;
|
||||||
begin
|
begin
|
||||||
loop
|
loop
|
||||||
while not Ada.Text_IO.End_Of_File(C.Inc.Streams(C.Inc.Top).Handle) loop
|
while not Ada.Text_IO.End_Of_File(C.Inc.Streams(C.Inc.Top).Handle) loop
|
||||||
declare
|
declare
|
||||||
Ch: Standard.Character;
|
Ch: Standard.Character;
|
||||||
begin
|
begin
|
||||||
Ada.Text_IO.Get (C.Inc.Streams(C.Inc.Top).Handle, Ch);
|
-- Get() skips line terminators. End_Of_Line() checks if it reaches EOL
|
||||||
|
-- but can't handle multiple consecutive EOLs. Get_Immediate() doesn't
|
||||||
|
-- skip EOLs. As detecting every EOL in the multiple consecutive sequence
|
||||||
|
-- is not required, End_Of_Line()+Get() is good too.
|
||||||
|
if Use_Immediate then
|
||||||
|
Ada.Text_IO.Get_Immediate (C.Inc.Streams(C.Inc.Top).Handle, Ch);
|
||||||
|
else
|
||||||
|
if Ada.Text_IO.End_Of_Line(C.Inc.Streams(C.Inc.Top).Handle) then
|
||||||
|
Feed_Char_Code (C, R.P.LF);
|
||||||
|
end if;
|
||||||
|
Ada.Text_IO.Get (C.Inc.Streams(C.Inc.Top).Handle, Ch);
|
||||||
|
end if;
|
||||||
|
|
||||||
Feed_Char_Code (C, Standard.Character'Pos(Ch));
|
Feed_Char_Code (C, Standard.Character'Pos(Ch));
|
||||||
end;
|
end;
|
||||||
-- After each feed, C.Inc.Top may get incremented if an inclusion
|
-- After each feed, C.Inc.Top may get incremented if an inclusion
|
||||||
@ -424,7 +508,6 @@ package body H3.Compilers is
|
|||||||
begin
|
begin
|
||||||
for i in Data'Range loop
|
for i in Data'Range loop
|
||||||
Feed_Char_Code (C, R.To_Code(Data(i)));
|
Feed_Char_Code (C, R.To_Code(Data(i)));
|
||||||
|
|
||||||
if C.Inc.Top > 0 then
|
if C.Inc.Top > 0 then
|
||||||
Feed_Inc (C);
|
Feed_Inc (C);
|
||||||
end if;
|
end if;
|
||||||
|
@ -27,6 +27,8 @@ private
|
|||||||
LX_COMMENT,
|
LX_COMMENT,
|
||||||
LX_CSTR,
|
LX_CSTR,
|
||||||
LX_DIRECTIVE,
|
LX_DIRECTIVE,
|
||||||
|
LX_DOLLARED,
|
||||||
|
LX_HASHED,
|
||||||
LX_IDENT,
|
LX_IDENT,
|
||||||
LX_NUMBER,
|
LX_NUMBER,
|
||||||
LX_OP_DIV,
|
LX_OP_DIV,
|
||||||
@ -48,12 +50,21 @@ private
|
|||||||
TK_DIRECTIVE,
|
TK_DIRECTIVE,
|
||||||
TK_DIV,
|
TK_DIV,
|
||||||
TK_DIVDIV,
|
TK_DIVDIV,
|
||||||
|
TK_DOLLARED_LBRACE,
|
||||||
|
TK_DOLLARED_LBRACK,
|
||||||
|
TK_DOLLARED_LPAREN,
|
||||||
TK_EOF,
|
TK_EOF,
|
||||||
TK_EOL,
|
TK_EOL,
|
||||||
|
TK_HASHED_LBRACE,
|
||||||
|
TK_HASHED_LBRACK,
|
||||||
|
TK_HASHED_LPAREN,
|
||||||
TK_IDENT,
|
TK_IDENT,
|
||||||
TK_GE,
|
TK_GE,
|
||||||
TK_GT,
|
TK_GT,
|
||||||
|
TK_LBRACE,
|
||||||
|
TK_LBRACK,
|
||||||
TK_LE,
|
TK_LE,
|
||||||
|
TK_LPAREN,
|
||||||
TK_LT,
|
TK_LT,
|
||||||
TK_MINUS,
|
TK_MINUS,
|
||||||
TK_MINUSMINUS,
|
TK_MINUSMINUS,
|
||||||
@ -61,6 +72,9 @@ private
|
|||||||
TK_MULMUL,
|
TK_MULMUL,
|
||||||
TK_PLUS,
|
TK_PLUS,
|
||||||
TK_PLUSPLUS,
|
TK_PLUSPLUS,
|
||||||
|
TK_RBRACE,
|
||||||
|
TK_RBRACK,
|
||||||
|
TK_RPAREN,
|
||||||
TK_SEMICOLON
|
TK_SEMICOLON
|
||||||
);
|
);
|
||||||
type Token is record
|
type Token is record
|
||||||
|
115
lib2/sample-lang.txt
Normal file
115
lib2/sample-lang.txt
Normal file
@ -0,0 +1,115 @@
|
|||||||
|
ls -laF
|
||||||
|
|
||||||
|
print @get-jobs
|
||||||
|
print $(get-jobs)
|
||||||
|
|
||||||
|
(defun a (a b c)
|
||||||
|
ddddd
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
fun a (a b c) => e f
|
||||||
|
e = 20
|
||||||
|
f = 30
|
||||||
|
end
|
||||||
|
|
||||||
|
|
||||||
|
class t
|
||||||
|
fun a(a b c) => e f
|
||||||
|
while a < b
|
||||||
|
if a < b
|
||||||
|
else
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
for i = 1 to 20
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
|
||||||
|
#####################################################
|
||||||
|
|
||||||
|
$() <--- process execution expansion
|
||||||
|
{...} <--- range?
|
||||||
|
[ 1 2 3 ] <--- array
|
||||||
|
|
||||||
|
#() <-- array???
|
||||||
|
#[] <-- hash table??
|
||||||
|
#{} <-- ???
|
||||||
|
#<> <--?
|
||||||
|
|
||||||
|
$() <---
|
||||||
|
$[] <---
|
||||||
|
${} <---
|
||||||
|
$<> <---
|
||||||
|
|
||||||
|
|
||||||
|
if cmd
|
||||||
|
|
||||||
|
end
|
||||||
|
|
||||||
|
while cmd
|
||||||
|
end
|
||||||
|
|
||||||
|
fun fib a
|
||||||
|
let a = 20 <-- use it as if it's an declaration + init??
|
||||||
|
"let" a = 20 <--- call the command let
|
||||||
|
@a = 20 <-- lvalue
|
||||||
|
$a <-- rvalue
|
||||||
|
return $a
|
||||||
|
end
|
||||||
|
|
||||||
|
"fun" fib a <--- call the command 'fun', fun it not special??
|
||||||
|
|
||||||
|
function name as lvalue? function name as rvalue??
|
||||||
|
fib 10 <--- call the function.
|
||||||
|
let x = fib <--- what is this syntax? assigning the function fib to x? it's not call?
|
||||||
|
let x = $[fib]
|
||||||
|
let x = $[fib < /dev/null >/dev/null]
|
||||||
|
|
||||||
|
$(fib a) <-- capture
|
||||||
|
|
||||||
|
@a = $[fib 20] <-- capture return value
|
||||||
|
@a = $(fib 20) <-- capture stdout??
|
||||||
|
|
||||||
|
@a = $[fib $[ls -laF]]
|
||||||
|
@a = $[fib $(ls -laF)]
|
||||||
|
|
||||||
|
|
||||||
|
complex expression inside $[] and $()??
|
||||||
|
$(
|
||||||
|
printf "abc";
|
||||||
|
if ...
|
||||||
|
...
|
||||||
|
else
|
||||||
|
...
|
||||||
|
end
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class X ### class X Y <--- Y is a parent class?
|
||||||
|
names := #[] <- array?
|
||||||
|
tools := #{} <- hash table?
|
||||||
|
|
||||||
|
fun __construct name
|
||||||
|
names.add name
|
||||||
|
let k := 20
|
||||||
|
k := $(20 + 30)
|
||||||
|
return k
|
||||||
|
|
||||||
|
@names<20> = "jelly"
|
||||||
|
@tools<"donkey"> = names;
|
||||||
|
@tools.donkey = names?
|
||||||
|
@k = $tools.donkey >>>> "${tools.donkey}" ${tools.donkey} "${tools}.donkey"
|
||||||
|
end
|
||||||
|
|
||||||
|
fun say_hi msg
|
||||||
|
print msg
|
||||||
|
ls -laF << execute external command if the global variable PATH is not null
|
||||||
|
<< external command is disabled if PATH is null
|
||||||
|
/bin/ls -alF << if the command begins with /, it still allows execution if this feature is not disabled
|
||||||
|
return 20
|
||||||
|
end
|
||||||
|
end
|
Loading…
Reference in New Issue
Block a user