more tokens in the lexer code
This commit is contained in:
		| @ -5,6 +5,7 @@ package body H3.Compilers is | |||||||
| 	package Utf8 is new H3.Utf8(Standard.Character, S.Rune, Char_Array, S.Rune_Array); | 	package Utf8 is new H3.Utf8(Standard.Character, S.Rune, Char_Array, S.Rune_Array); | ||||||
|  |  | ||||||
| 	LB_EOF: constant S.Rune_Array := (R.V.Left_Arrow,R.V.UC_E,R.V.UC_O,R.V.UC_F,R.V.Right_Arrow); -- <EOF> | 	LB_EOF: constant S.Rune_Array := (R.V.Left_Arrow,R.V.UC_E,R.V.UC_O,R.V.UC_F,R.V.Right_Arrow); -- <EOF> | ||||||
|  | 	LB_EOL: constant S.Rune_Array := (R.V.Left_Arrow,R.V.UC_E,R.V.UC_O,R.V.UC_L,R.V.Right_Arrow); -- <EOL> | ||||||
| 	LB_XINCLUDE: constant S.Rune_Array := (R.V.Number_Sign,R.V.LC_I,R.V.LC_N,R.V.LC_C,R.V.LC_L,R.V.LC_U,R.V.LC_D,R.V.LC_E); -- #include | 	LB_XINCLUDE: constant S.Rune_Array := (R.V.Number_Sign,R.V.LC_I,R.V.LC_N,R.V.LC_C,R.V.LC_L,R.V.LC_U,R.V.LC_D,R.V.LC_E); -- #include | ||||||
|  |  | ||||||
|  |  | ||||||
| @ -165,7 +166,7 @@ package body H3.Compilers is | |||||||
| 		Ada.Text_IO.Close (C.Inc.Streams(C.Inc.Top).Handle); | 		Ada.Text_IO.Close (C.Inc.Streams(C.Inc.Top).Handle); | ||||||
| 		if Check then | 		if Check then | ||||||
| 			if C.Prs.Top /= C.Inc.Streams(C.Inc.Top).Prs_Level + 1 then | 			if C.Prs.Top /= C.Inc.Streams(C.Inc.Top).Prs_Level + 1 then | ||||||
| 				raise Syntax_Error with "unblanced inclusion content"; | 					raise Syntax_Error with "unblanced inclusion content"; | ||||||
| 			end if; | 			end if; | ||||||
| 		end if; | 		end if; | ||||||
| 		C.Inc.Top := C.Inc.Top - 1; | 		C.Inc.Top := C.Inc.Top - 1; | ||||||
| @ -263,6 +264,16 @@ package body H3.Compilers is | |||||||
|  |  | ||||||
| 	end Got_Token; | 	end Got_Token; | ||||||
|  |  | ||||||
|  | 	function Is_Ident_Starter(Code: in R.Code) return Boolean is | ||||||
|  | 	begin | ||||||
|  | 		return R.Is_Alnum(Code) or else R.Is_Rune(Code, R.V.Minus_Sign); | ||||||
|  | 	end Is_Ident_Starter; | ||||||
|  |  | ||||||
|  | 	function Is_Ident_Char(Code: in R.Code) return Boolean is | ||||||
|  | 	begin | ||||||
|  | 		return Is_Ident_Starter(Code) or else R.Is_Rune(Code, R.V.Underline); -- or else R.Is_Rune(C, ...); | ||||||
|  | 	end Is_Ident_Char; | ||||||
|  |  | ||||||
| 	procedure Feed_Char_Code (C: in out Compiler; Code: in R.Code) is | 	procedure Feed_Char_Code (C: in out Compiler; Code: in R.Code) is | ||||||
| 	begin | 	begin | ||||||
| 	<<Start_Over>> | 	<<Start_Over>> | ||||||
| @ -275,10 +286,41 @@ package body H3.Compilers is | |||||||
| 					End_Token (C, TK_EOF); | 					End_Token (C, TK_EOF); | ||||||
| 					-- this procedure doesn't prevent you from feeding more runes | 					-- this procedure doesn't prevent you from feeding more runes | ||||||
| 					-- after EOF. but it's not desirable to feed more after EOF. | 					-- after EOF. but it's not desirable to feed more after EOF. | ||||||
|  | 				elsif R.Is_Rune(Code, R.V.LF) then  -- TODO: support a different EOL scheme | ||||||
|  | 					Start_Token (C, LB_EOL); | ||||||
|  | 					End_Token (C, TK_EOL); | ||||||
| 				elsif R.Is_Space(Code) then | 				elsif R.Is_Space(Code) then | ||||||
| 					-- ignore. carry on | 					-- ignore. carry on | ||||||
| 					null; | 					null; | ||||||
| 				elsif R.Is_Alpha(Code) then |  | ||||||
|  | 				elsif R.Is_Rune(Code, R.V.Number_Sign) then -- # | ||||||
|  | 					Set_Lexer_State (C, LX_HASHED, Code); | ||||||
|  | 				elsif R.Is_Rune(Code, R.V.Dollar_Sign) then -- $ | ||||||
|  | 					Set_Lexer_State (C, LX_DOLLARED, Code); | ||||||
|  |  | ||||||
|  | 				elsif R.Is_Rune(Code, R.V.Left_Curly_Bracket) then -- { | ||||||
|  | 					Start_Token (C, Code); | ||||||
|  | 					End_Token (C, TK_LBRACE); | ||||||
|  | 				elsif R.Is_Rune(Code, R.V.Right_Curly_Bracket) then -- } | ||||||
|  | 					Start_Token (C, Code); | ||||||
|  | 					End_Token (C, TK_RBRACE); | ||||||
|  | 				elsif R.Is_Rune(Code, R.V.Left_Square_Bracket) then -- [ | ||||||
|  | 					Start_Token (C, Code); | ||||||
|  | 					End_Token (C, TK_LBRACK); | ||||||
|  | 				elsif R.Is_Rune(Code, R.V.Right_Square_Bracket) then -- ] | ||||||
|  | 					Start_Token (C, Code); | ||||||
|  | 					End_Token (C, TK_RBRACK); | ||||||
|  | 				elsif R.Is_Rune(Code, R.V.Left_Parenthesis) then -- ( | ||||||
|  | 					Start_Token (C, Code); | ||||||
|  | 					End_Token (C, TK_LPAREN); | ||||||
|  | 				elsif R.Is_Rune(Code, R.V.Right_Parenthesis) then -- ) | ||||||
|  | 					Start_Token (C, Code); | ||||||
|  | 					End_Token (C, TK_RPAREN); | ||||||
|  | 				elsif R.Is_Rune(Code, R.V.Semicolon) then -- ; | ||||||
|  | 					Start_Token (C, Code); | ||||||
|  | 					End_Token (C, TK_SEMICOLON); | ||||||
|  |  | ||||||
|  | 				elsif Is_Ident_Starter(Code) then | ||||||
| 					Set_Lexer_State (C, LX_IDENT, Code); | 					Set_Lexer_State (C, LX_IDENT, Code); | ||||||
| 				elsif R.Is_Digit(Code) then | 				elsif R.Is_Digit(Code) then | ||||||
| 					Set_Lexer_State (C, LX_NUMBER, Code); | 					Set_Lexer_State (C, LX_NUMBER, Code); | ||||||
| @ -294,13 +336,8 @@ package body H3.Compilers is | |||||||
| 					Set_Lexer_State (C, LX_OP_LESS, Code); | 					Set_Lexer_State (C, LX_OP_LESS, Code); | ||||||
| 				elsif R.Is_Rune(Code, R.V.Right_Arrow) then -- > | 				elsif R.Is_Rune(Code, R.V.Right_Arrow) then -- > | ||||||
| 					Set_Lexer_State (C, LX_OP_GREATER, Code); | 					Set_Lexer_State (C, LX_OP_GREATER, Code); | ||||||
| 				elsif R.Is_Rune(Code, R.V.Number_Sign) then -- # |  | ||||||
| 					Set_Lexer_State (C, LX_DIRECTIVE, Code); |  | ||||||
| 				elsif R.Is_Rune(Code, R.V.Quotation) then -- " | 				elsif R.Is_Rune(Code, R.V.Quotation) then -- " | ||||||
| 					Set_Lexer_State (C, LX_CSTR); | 					Set_Lexer_State (C, LX_CSTR); | ||||||
| 				elsif R.Is_Rune(Code, R.V.Semicolon) then |  | ||||||
| 					Start_Token (C, Code); |  | ||||||
| 					End_Token (C, TK_SEMICOLON); |  | ||||||
| 				else | 				else | ||||||
| 					raise Syntax_Error; | 					raise Syntax_Error; | ||||||
| 				end if; | 				end if; | ||||||
| @ -314,7 +351,13 @@ package body H3.Compilers is | |||||||
| 				end if; | 				end if; | ||||||
|  |  | ||||||
| 			when LX_COMMENT => | 			when LX_COMMENT => | ||||||
| 				null; | 				if R.Is_Eof(Code) then | ||||||
|  | 					Set_Lexer_State (C, LX_START); | ||||||
|  | 					goto Start_Over; | ||||||
|  | 				elsif R.Is_Rune(Code, R.V.LF) then -- TODO: support a different EOL scheme | ||||||
|  | 					Start_Token (C, LB_EOL); | ||||||
|  | 					End_Token (C, TK_EOL); | ||||||
|  | 				end if; | ||||||
|  |  | ||||||
| 			when LX_CSTR => | 			when LX_CSTR => | ||||||
| 				-- TODO: escaping... | 				-- TODO: escaping... | ||||||
| @ -324,9 +367,36 @@ package body H3.Compilers is | |||||||
| 					Feed_Token (C, Code); | 					Feed_Token (C, Code); | ||||||
| 				end if; | 				end if; | ||||||
|  |  | ||||||
| 			when LX_IDENT => | 			when LX_DOLLARED => | ||||||
|  | 				if R.Is_Rune(Code, R.V.Left_Curly_Bracket) then | ||||||
|  | 					End_Token (C, TK_DOLLARED_LBRACE, Code); | ||||||
|  | 				elsif R.Is_Rune(Code, R.V.Left_Square_Bracket) then | ||||||
|  | 					End_Token (C, TK_DOLLARED_LBRACK, Code); | ||||||
|  | 				elsif R.Is_Rune(Code, R.V.Left_Parenthesis) then | ||||||
|  | 					End_Token (C, TK_DOLLARED_LPAREN, Code); | ||||||
|  | 				else | ||||||
|  | 					raise Syntax_Error with "invalid dollared token"; | ||||||
|  | 				end if; | ||||||
|  |  | ||||||
|  | 			when LX_HASHED => | ||||||
| 				if R.Is_Alnum(Code) or else R.Is_Rune(Code, R.V.Underline) then | 				if R.Is_Alnum(Code) or else R.Is_Rune(Code, R.V.Underline) then | ||||||
| 					Feed_Token (C, Code); | 					Feed_Token (C, Code); | ||||||
|  | 					Switch_Lexer_State (C, LX_DIRECTIVE); | ||||||
|  | 				elsif R.Is_Rune(Code, R.V.Number_Sign) then -- ## | ||||||
|  | 					Set_Lexer_State (C, LX_COMMENT); | ||||||
|  | 				elsif R.Is_Rune(Code, R.V.Left_Curly_Bracket) then | ||||||
|  | 					End_Token (C, TK_HASHED_LBRACE, Code); | ||||||
|  | 				elsif R.Is_Rune(Code, R.V.Left_Square_Bracket) then | ||||||
|  | 					End_Token (C, TK_HASHED_LBRACK, Code); | ||||||
|  | 				elsif R.Is_Rune(Code, R.V.Left_Parenthesis) then | ||||||
|  | 					End_Token (C, TK_HASHED_LPAREN, Code); | ||||||
|  | 				else | ||||||
|  | 					raise Syntax_Error with "invalid hashed token"; | ||||||
|  | 				end if; | ||||||
|  |  | ||||||
|  | 			when LX_IDENT => | ||||||
|  | 				if Is_Ident_Char(Code) then | ||||||
|  | 					Feed_Token (C, Code); | ||||||
| 				else | 				else | ||||||
| 					End_Token (C, TK_IDENT); | 					End_Token (C, TK_IDENT); | ||||||
| 					goto Start_Over; | 					goto Start_Over; | ||||||
| @ -392,14 +462,28 @@ package body H3.Compilers is | |||||||
| 	end Feed_Char_Code; | 	end Feed_Char_Code; | ||||||
|  |  | ||||||
| 	procedure Feed_Inc (C: in out Compiler) is | 	procedure Feed_Inc (C: in out Compiler) is | ||||||
|  | 		-- Feed the contents of a included stream.	 | ||||||
| 		Entry_Top: constant System_Index := C.Inc.Top; | 		Entry_Top: constant System_Index := C.Inc.Top; | ||||||
|  | 		Use_Immediate: constant Boolean := True; | ||||||
| 	begin | 	begin | ||||||
| 		loop | 		loop | ||||||
| 			while not Ada.Text_IO.End_Of_File(C.Inc.Streams(C.Inc.Top).Handle) loop | 			while not Ada.Text_IO.End_Of_File(C.Inc.Streams(C.Inc.Top).Handle) loop | ||||||
| 				declare | 				declare | ||||||
| 					Ch: Standard.Character; | 					Ch: Standard.Character; | ||||||
| 				begin | 				begin | ||||||
| 					Ada.Text_IO.Get (C.Inc.Streams(C.Inc.Top).Handle, Ch); | 					-- Get() skips line terminators. End_Of_Line() checks if it reaches EOL  | ||||||
|  | 					-- but can't handle multiple consecutive EOLs. Get_Immediate() doesn't | ||||||
|  | 					-- skip EOLs. As detecting every EOL in the multiple consecutive sequence | ||||||
|  | 					-- is not required, End_Of_Line()+Get() is good too. | ||||||
|  | 					if Use_Immediate then | ||||||
|  | 						Ada.Text_IO.Get_Immediate (C.Inc.Streams(C.Inc.Top).Handle, Ch); | ||||||
|  | 					else | ||||||
|  | 						if Ada.Text_IO.End_Of_Line(C.Inc.Streams(C.Inc.Top).Handle) then | ||||||
|  | 							Feed_Char_Code (C, R.P.LF); | ||||||
|  | 						end if; | ||||||
|  | 						Ada.Text_IO.Get (C.Inc.Streams(C.Inc.Top).Handle, Ch); | ||||||
|  | 					end if; | ||||||
|  |  | ||||||
| 					Feed_Char_Code (C, Standard.Character'Pos(Ch)); | 					Feed_Char_Code (C, Standard.Character'Pos(Ch)); | ||||||
| 				end; | 				end; | ||||||
| 				-- After each feed, C.Inc.Top may get incremented if an inclusion | 				-- After each feed, C.Inc.Top may get incremented if an inclusion | ||||||
| @ -424,7 +508,6 @@ package body H3.Compilers is | |||||||
| 	begin | 	begin | ||||||
| 		for i in Data'Range loop | 		for i in Data'Range loop | ||||||
| 			Feed_Char_Code (C, R.To_Code(Data(i))); | 			Feed_Char_Code (C, R.To_Code(Data(i))); | ||||||
|  |  | ||||||
| 			if C.Inc.Top > 0 then | 			if C.Inc.Top > 0 then | ||||||
| 				Feed_Inc (C); | 				Feed_Inc (C); | ||||||
| 			end if; | 			end if; | ||||||
|  | |||||||
| @ -27,6 +27,8 @@ private | |||||||
| 		LX_COMMENT, | 		LX_COMMENT, | ||||||
| 		LX_CSTR, | 		LX_CSTR, | ||||||
| 		LX_DIRECTIVE, | 		LX_DIRECTIVE, | ||||||
|  | 		LX_DOLLARED, | ||||||
|  | 		LX_HASHED, | ||||||
| 		LX_IDENT, | 		LX_IDENT, | ||||||
| 		LX_NUMBER, | 		LX_NUMBER, | ||||||
| 		LX_OP_DIV, | 		LX_OP_DIV, | ||||||
| @ -48,12 +50,21 @@ private | |||||||
| 		TK_DIRECTIVE, | 		TK_DIRECTIVE, | ||||||
| 		TK_DIV, | 		TK_DIV, | ||||||
| 		TK_DIVDIV, | 		TK_DIVDIV, | ||||||
|  | 		TK_DOLLARED_LBRACE, | ||||||
|  | 		TK_DOLLARED_LBRACK, | ||||||
|  | 		TK_DOLLARED_LPAREN, | ||||||
| 		TK_EOF, | 		TK_EOF, | ||||||
| 		TK_EOL, | 		TK_EOL, | ||||||
|  | 		TK_HASHED_LBRACE, | ||||||
|  | 		TK_HASHED_LBRACK, | ||||||
|  | 		TK_HASHED_LPAREN, | ||||||
| 		TK_IDENT, | 		TK_IDENT, | ||||||
| 		TK_GE, | 		TK_GE, | ||||||
| 		TK_GT, | 		TK_GT, | ||||||
|  | 		TK_LBRACE, | ||||||
|  | 		TK_LBRACK, | ||||||
| 		TK_LE, | 		TK_LE, | ||||||
|  | 		TK_LPAREN, | ||||||
| 		TK_LT, | 		TK_LT, | ||||||
| 		TK_MINUS, | 		TK_MINUS, | ||||||
| 		TK_MINUSMINUS, | 		TK_MINUSMINUS, | ||||||
| @ -61,6 +72,9 @@ private | |||||||
| 		TK_MULMUL, | 		TK_MULMUL, | ||||||
| 		TK_PLUS, | 		TK_PLUS, | ||||||
| 		TK_PLUSPLUS, | 		TK_PLUSPLUS, | ||||||
|  | 		TK_RBRACE, | ||||||
|  | 		TK_RBRACK, | ||||||
|  | 		TK_RPAREN, | ||||||
| 		TK_SEMICOLON | 		TK_SEMICOLON | ||||||
| 	); | 	); | ||||||
| 	type Token is record | 	type Token is record | ||||||
|  | |||||||
							
								
								
									
										115
									
								
								lib2/sample-lang.txt
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										115
									
								
								lib2/sample-lang.txt
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,115 @@ | |||||||
|  | ls -laF | ||||||
|  |  | ||||||
|  | print @get-jobs | ||||||
|  | print $(get-jobs) | ||||||
|  |  | ||||||
|  | (defun a (a b c) | ||||||
|  | 	ddddd		 | ||||||
|  | ) | ||||||
|  |  | ||||||
|  |  | ||||||
|  |  | ||||||
|  | fun a (a b c) => e f | ||||||
|  | 	e = 20 | ||||||
|  | 	f = 30		 | ||||||
|  | end | ||||||
|  |  | ||||||
|  |  | ||||||
|  | class t | ||||||
|  | 	fun a(a b c) => e f | ||||||
|  | 		while a < b | ||||||
|  | 			if a < b | ||||||
|  | 			else | ||||||
|  | 			end | ||||||
|  | 		end | ||||||
|  |  | ||||||
|  | 		for i = 1 to 20 | ||||||
|  | 		end | ||||||
|  | 	end | ||||||
|  | end | ||||||
|  |  | ||||||
|  |  | ||||||
|  | ##################################################### | ||||||
|  |  | ||||||
|  | $() <--- process execution expansion | ||||||
|  | {...} <--- range? | ||||||
|  | [ 1 2 3 ] <--- array | ||||||
|  |  | ||||||
|  | #() <-- array??? | ||||||
|  | #[] <-- hash table?? | ||||||
|  | #{} <-- ??? | ||||||
|  | #<> <--? | ||||||
|  |  | ||||||
|  | $() <--- | ||||||
|  | $[] <--- | ||||||
|  | ${} <--- | ||||||
|  | $<> <--- | ||||||
|  |  | ||||||
|  |  | ||||||
|  | if cmd | ||||||
|  | 	 | ||||||
|  | end | ||||||
|  |  | ||||||
|  | while cmd | ||||||
|  | end | ||||||
|  |  | ||||||
|  | fun fib a | ||||||
|  | 	let a = 20 <-- use it as if it's an declaration + init?? | ||||||
|  | 	"let" a = 20 <--- call the command let | ||||||
|  | 	@a = 20 <-- lvalue | ||||||
|  | 	$a      <-- rvalue | ||||||
|  | 	return $a | ||||||
|  | end | ||||||
|  |  | ||||||
|  | "fun" fib a  <--- call the command 'fun', fun it not special?? | ||||||
|  |  | ||||||
|  | function name as lvalue? function name as rvalue?? | ||||||
|  | 	fib 10 <--- call the function.  | ||||||
|  | 	let x = fib <--- what is this syntax? assigning the function fib to x? it's not call? | ||||||
|  | 	let x = $[fib] | ||||||
|  | 	let x = $[fib < /dev/null >/dev/null] | ||||||
|  |  | ||||||
|  | $(fib a) <-- capture  | ||||||
|  |  | ||||||
|  | @a = $[fib 20]   <-- capture return value | ||||||
|  | @a = $(fib 20)  <-- capture stdout?? | ||||||
|  |  | ||||||
|  | @a = $[fib $[ls -laF]]   | ||||||
|  | @a = $[fib $(ls -laF)] | ||||||
|  |  | ||||||
|  |  | ||||||
|  | complex expression inside $[] and $()?? | ||||||
|  | $( | ||||||
|  | 	printf "abc";  | ||||||
|  | 	if ... | ||||||
|  | 		... | ||||||
|  | 	else | ||||||
|  | 		... | ||||||
|  | 	end | ||||||
|  | ) | ||||||
|  |  | ||||||
|  |  | ||||||
|  | class X   ### class X Y <--- Y is a parent class? | ||||||
|  | 	names := #[] <- array? | ||||||
|  | 	tools := #{} <- hash table? | ||||||
|  |  | ||||||
|  | 	fun __construct name | ||||||
|  | 		names.add name | ||||||
|  | 		let k := 20 | ||||||
|  | 		k := $(20 + 30) | ||||||
|  | 		return k | ||||||
|  |  | ||||||
|  | 		@names<20> = "jelly" | ||||||
|  | 		@tools<"donkey"> = names; | ||||||
|  | 		@tools.donkey = names? | ||||||
|  | 		@k = $tools.donkey  >>>> "${tools.donkey}"  ${tools.donkey} "${tools}.donkey" | ||||||
|  | 	end | ||||||
|  |  | ||||||
|  | 	fun say_hi msg | ||||||
|  | 		print msg | ||||||
|  | 		ls -laF << execute external command  if the global variable PATH is not null | ||||||
|  | 		        << external command is disabled if PATH is null | ||||||
|  | 		/bin/ls -alF  << if the command begins with /, it still allows execution if this feature is not disabled | ||||||
|  | 		return 20 | ||||||
|  | 	end | ||||||
|  | end | ||||||
		Reference in New Issue
	
	Block a user