From 483ee39dac8776ce35bc80938b6282259f3c4f03 Mon Sep 17 00:00:00 2001 From: hyung-hwan Date: Sun, 31 Oct 2021 05:41:19 +0000 Subject: [PATCH] added the rune code generator and data --- lib2/h3-compilers.adb | 14 +-- lib2/h3-runes.ads | 287 ++++++++++++++++++++++++++++++++++++++---- lib2/rune.hawk | 34 +++++ lib2/rune.txt | 128 +++++++++++++++++++ 4 files changed, 429 insertions(+), 34 deletions(-) create mode 100644 lib2/rune.hawk create mode 100644 lib2/rune.txt diff --git a/lib2/h3-compilers.adb b/lib2/h3-compilers.adb index 3ccb15e..ba99fd5 100644 --- a/lib2/h3-compilers.adb +++ b/lib2/h3-compilers.adb @@ -123,7 +123,7 @@ ada.text_io.put_line (C.Tk.Id'Img); case C.Lx.State is when LX_START => if R.Is_Eof(Code) then - Start_Token (C, S.Rune_Array'(R.Left_Arrow, R.UC_E, R.UC_O, R.UC_F, R.Right_Arrow)); + Start_Token (C, S.Rune_Array'(R.V.Left_Arrow, R.V.UC_E, R.V.UC_O, R.V.UC_F, R.V.Right_Arrow)); End_Token (C, TK_EOF); -- this procedure doesn't prevent you from feeding more -- after EOF. but it's not desirable to feed more after EOF. @@ -134,19 +134,19 @@ ada.text_io.put_line (C.Tk.Id'Img); Set_Lexer_State (C, LX_IDENT, Code); elsif R.Is_Digit(Code) then Set_Lexer_State (C, LX_NUMBER, Code); - elsif R.Is_Rune(Code, R.Semicolon) then + elsif R.Is_Rune(Code, R.V.Semicolon) then Start_Token (C, Code); End_Token (C, TK_SEMICOLON); - elsif R.Is_Rune(Code, R.Left_Arrow) then + elsif R.Is_Rune(Code, R.V.Left_Arrow) then Set_Lexer_State (C, LX_OP_LESS, Code); - elsif R.Is_Rune(Code, R.Right_Arrow) then + elsif R.Is_Rune(Code, R.V.Right_Arrow) then Set_Lexer_State (C, LX_OP_GREATER, Code); else raise Syntax_Error; end if; when LX_OP_GREATER => - if R.Is_Rune(Code, R.Equal) then + if R.Is_Rune(Code, R.V.Equal_Sign) then End_Token (C, TK_GE, Code); else End_Token (C, TK_GT); @@ -154,7 +154,7 @@ ada.text_io.put_line (C.Tk.Id'Img); end if; when LX_OP_LESS => - if R.Is_Rune(Code, R.Equal) then + if R.Is_Rune(Code, R.V.Equal_sign) then End_Token (C, TK_LE, Code); else End_Token (C, TK_LT); @@ -165,7 +165,7 @@ ada.text_io.put_line (C.Tk.Id'Img); null; when LX_IDENT => - if R.Is_Alnum(Code) or else R.Is_Rune(Code, R.Underline) then + if R.Is_Alnum(Code) or else R.Is_Rune(Code, R.V.Underline) then Feed_Token (C, Code); else End_Token (C, TK_IDENT); diff --git a/lib2/h3-runes.ads b/lib2/h3-runes.ads index 7ee15ef..4fb0b65 100644 --- a/lib2/h3-runes.ads +++ b/lib2/h3-runes.ads @@ -12,35 +12,268 @@ package H3.Runes is -- virtual code to indicate end of input EOF: constant Code := Code'First; - C_Colon: constant Code := System_Rune'Pos(':'); - C_Semicolon: constant Code := System_Rune'Pos(';'); - C_Tilde: constant Code := System_Rune'Pos('~'); - C_Underline: constant Code := System_Rune'Pos('_'); - C_Equal: constant Code := System_Rune'Pos('='); - C_Left_Arrow: constant Code := System_Rune'Pos('<'); - C_Right_Arrow: constant Code := System_Rune'Pos('>'); + package P is + NUL : constant := 0; + SOH : constant := 1; + STX : constant := 2; + ETX : constant := 3; + EOT : constant := 4; + ENQ : constant := 5; + ACK : constant := 6; + BEL : constant := 7; + BS : constant := 8; + HT : constant := 9; + LF : constant := 10; + VT : constant := 11; + FF : constant := 12; + CR : constant := 13; + SO : constant := 14; + SI : constant := 15; + DLE : constant := 16; + DC1 : constant := 17; + DC2 : constant := 18; + DC3 : constant := 19; + DC4 : constant := 20; + NAK : constant := 21; + SYN : constant := 22; + ETB : constant := 23; + CAN : constant := 24; + EM : constant := 25; + SUB : constant := 26; + ESC : constant := 27; + FS : constant := 28; + GS : constant := 29; + RS : constant := 30; + US : constant := 31; + Space : constant := 32; -- + Exclamation : constant := 33; -- ! + Quotation : constant := 34; -- " + Number_Sign : constant := 35; -- # + Dollar_Sign : constant := 36; -- $ + Percent_Sign : constant := 37; -- % + Ampersand : constant := 38; -- & + Apostrophe : constant := 39; -- ' + Left_Parenthesis : constant := 40; -- ( + Right_Parenthesis : constant := 41; -- ) + Asterisk : constant := 42; -- * + Plus_Sign : constant := 43; -- + + Comma : constant := 44; -- , + Minus_Sign : constant := 45; -- - + Period : constant := 46; -- . + Slash : constant := 47; -- / + Zero : constant := 48; -- 0 + One : constant := 49; -- 1 + Two : constant := 50; -- 2 + Three : constant := 51; -- 3 + Four : constant := 52; -- 4 + Five : constant := 53; -- 5 + Six : constant := 54; -- 6 + Seven : constant := 55; -- 7 + Eight : constant := 56; -- 8 + Nine : constant := 57; -- 9 + Colon : constant := 58; -- : + Semicolon : constant := 59; -- ; + Left_Arrow : constant := 60; -- < + Equal_Sign : constant := 61; -- = + Right_Arrow : constant := 62; -- > + Question : constant := 63; -- ? + Commercial_At : constant := 64; -- @ + UC_A : constant := 65; -- A + UC_B : constant := 66; -- B + UC_C : constant := 67; -- C + UC_D : constant := 68; -- D + UC_E : constant := 69; -- E + UC_F : constant := 70; -- F + UC_G : constant := 71; -- G + UC_H : constant := 72; -- H + UC_I : constant := 73; -- I + UC_J : constant := 74; -- J + UC_K : constant := 75; -- K + UC_L : constant := 76; -- L + UC_M : constant := 77; -- M + UC_N : constant := 78; -- N + UC_O : constant := 79; -- O + UC_P : constant := 80; -- P + UC_Q : constant := 81; -- Q + UC_R : constant := 82; -- R + UC_S : constant := 83; -- S + UC_T : constant := 84; -- T + UC_U : constant := 85; -- U + UC_V : constant := 86; -- V + UC_W : constant := 87; -- W + UC_X : constant := 88; -- X + UC_Y : constant := 89; -- Y + UC_Z : constant := 90; -- Z + Left_Square_Bracket : constant := 91; -- [ + Backslash : constant := 92; -- \ + Right_Square_Bracket: constant := 93; -- ] + Circumflex : constant := 94; -- ^ + Underline : constant := 95; -- _ + Grave : constant := 96; -- ` + LC_A : constant := 97; -- a + LC_B : constant := 98; -- b + LC_C : constant := 99; -- c + LC_D : constant := 100; -- d + LC_E : constant := 101; -- e + LC_F : constant := 102; -- f + LC_G : constant := 103; -- g + LC_H : constant := 104; -- h + LC_I : constant := 105; -- i + LC_J : constant := 106; -- j + LC_K : constant := 107; -- k + LC_L : constant := 108; -- l + LC_M : constant := 109; -- m + LC_N : constant := 110; -- n + LC_O : constant := 111; -- o + LC_P : constant := 112; -- p + LC_Q : constant := 113; -- q + LC_R : constant := 114; -- r + LC_S : constant := 115; -- s + LC_T : constant := 116; -- t + LC_U : constant := 117; -- u + LC_V : constant := 118; -- v + LC_W : constant := 119; -- w + LC_X : constant := 120; -- x + LC_Y : constant := 121; -- y + LC_Z : constant := 122; -- z + Left_Curly_Bracket : constant := 123; -- { + Vertical_Line : constant := 124; -- | + Right_Curly_Bracket : constant := 125; -- } + Tilde : constant := 126; -- ~ + DEL : constant := 127; + end P; - C_A: constant Code := System_Rune'Pos('A'); - C_B: constant Code := System_Rune'Pos('B'); - C_C: constant Code := System_Rune'Pos('C'); - C_D: constant Code := System_Rune'Pos('D'); - C_E: constant Code := System_Rune'Pos('E'); + package V is + NUL : constant Rune := Rune'Val(P.NUL); + SOH : constant Rune := Rune'Val(P.SOH); + STX : constant Rune := Rune'Val(P.STX); + ETX : constant Rune := Rune'Val(P.ETX); + EOT : constant Rune := Rune'Val(P.EOT); + ENQ : constant Rune := Rune'Val(P.ENQ); + ACK : constant Rune := Rune'Val(P.ACK); + BEL : constant Rune := Rune'Val(P.BEL); + BS : constant Rune := Rune'Val(P.BS); + HT : constant Rune := Rune'Val(P.HT); + LF : constant Rune := Rune'Val(P.LF); + VT : constant Rune := Rune'Val(P.VT); + FF : constant Rune := Rune'Val(P.FF); + CR : constant Rune := Rune'Val(P.CR); + SO : constant Rune := Rune'Val(P.SO); + SI : constant Rune := Rune'Val(P.SI); + DLE : constant Rune := Rune'Val(P.DLE); + DC1 : constant Rune := Rune'Val(P.DC1); + DC2 : constant Rune := Rune'Val(P.DC2); + DC3 : constant Rune := Rune'Val(P.DC3); + DC4 : constant Rune := Rune'Val(P.DC4); + NAK : constant Rune := Rune'Val(P.NAK); + SYN : constant Rune := Rune'Val(P.SYN); + ETB : constant Rune := Rune'Val(P.ETB); + CAN : constant Rune := Rune'Val(P.CAN); + EM : constant Rune := Rune'Val(P.EM); + SUB : constant Rune := Rune'Val(P.SUB); + ESC : constant Rune := Rune'Val(P.ESC); + FS : constant Rune := Rune'Val(P.FS); + GS : constant Rune := Rune'Val(P.GS); + RS : constant Rune := Rune'Val(P.RS); + US : constant Rune := Rune'Val(P.US); + Space : constant Rune := Rune'Val(P.Space); + Exclamation : constant Rune := Rune'Val(P.Exclamation); + Quotation : constant Rune := Rune'Val(P.Quotation); + Number_Sign : constant Rune := Rune'Val(P.Number_Sign); + Dollar_Sign : constant Rune := Rune'Val(P.Dollar_Sign); + Percent_Sign : constant Rune := Rune'Val(P.Percent_Sign); + Ampersand : constant Rune := Rune'Val(P.Ampersand); + Apostrophe : constant Rune := Rune'Val(P.Apostrophe); + Left_Parenthesis : constant Rune := Rune'Val(P.Left_Parenthesis); + Right_Parenthesis : constant Rune := Rune'Val(P.Right_Parenthesis); + Asterisk : constant Rune := Rune'Val(P.Asterisk); + Plus_Sign : constant Rune := Rune'Val(P.Plus_Sign); + Comma : constant Rune := Rune'Val(P.Comma); + Minus_Sign : constant Rune := Rune'Val(P.Minus_Sign); + Period : constant Rune := Rune'Val(P.Period); + Slash : constant Rune := Rune'Val(P.Slash); + Zero : constant Rune := Rune'Val(P.Zero); + One : constant Rune := Rune'Val(P.One); + Two : constant Rune := Rune'Val(P.Two); + Three : constant Rune := Rune'Val(P.Three); + Four : constant Rune := Rune'Val(P.Four); + Five : constant Rune := Rune'Val(P.Five); + Six : constant Rune := Rune'Val(P.Six); + Seven : constant Rune := Rune'Val(P.Seven); + Eight : constant Rune := Rune'Val(P.Eight); + Nine : constant Rune := Rune'Val(P.Nine); + Colon : constant Rune := Rune'Val(P.Colon); + Semicolon : constant Rune := Rune'Val(P.Semicolon); + Left_Arrow : constant Rune := Rune'Val(P.Left_Arrow); + Equal_Sign : constant Rune := Rune'Val(P.Equal_Sign); + Right_Arrow : constant Rune := Rune'Val(P.Right_Arrow); + Question : constant Rune := Rune'Val(P.Question); + Commercial_At : constant Rune := Rune'Val(P.Commercial_At); + UC_A : constant Rune := Rune'Val(P.UC_A); + UC_B : constant Rune := Rune'Val(P.UC_B); + UC_C : constant Rune := Rune'Val(P.UC_C); + UC_D : constant Rune := Rune'Val(P.UC_D); + UC_E : constant Rune := Rune'Val(P.UC_E); + UC_F : constant Rune := Rune'Val(P.UC_F); + UC_G : constant Rune := Rune'Val(P.UC_G); + UC_H : constant Rune := Rune'Val(P.UC_H); + UC_I : constant Rune := Rune'Val(P.UC_I); + UC_J : constant Rune := Rune'Val(P.UC_J); + UC_K : constant Rune := Rune'Val(P.UC_K); + UC_L : constant Rune := Rune'Val(P.UC_L); + UC_M : constant Rune := Rune'Val(P.UC_M); + UC_N : constant Rune := Rune'Val(P.UC_N); + UC_O : constant Rune := Rune'Val(P.UC_O); + UC_P : constant Rune := Rune'Val(P.UC_P); + UC_Q : constant Rune := Rune'Val(P.UC_Q); + UC_R : constant Rune := Rune'Val(P.UC_R); + UC_S : constant Rune := Rune'Val(P.UC_S); + UC_T : constant Rune := Rune'Val(P.UC_T); + UC_U : constant Rune := Rune'Val(P.UC_U); + UC_V : constant Rune := Rune'Val(P.UC_V); + UC_W : constant Rune := Rune'Val(P.UC_W); + UC_X : constant Rune := Rune'Val(P.UC_X); + UC_Y : constant Rune := Rune'Val(P.UC_Y); + UC_Z : constant Rune := Rune'Val(P.UC_Z); + Left_Square_Bracket : constant Rune := Rune'Val(P.Left_Square_Bracket); + Backslash : constant Rune := Rune'Val(P.Backslash); + Right_Square_Bracket: constant Rune := Rune'Val(P.Right_Square_Bracket); + Circumflex : constant Rune := Rune'Val(P.Circumflex); + Underline : constant Rune := Rune'Val(P.Underline); + Grave : constant Rune := Rune'Val(P.Grave); + LC_A : constant Rune := Rune'Val(P.LC_A); + LC_B : constant Rune := Rune'Val(P.LC_B); + LC_C : constant Rune := Rune'Val(P.LC_C); + LC_D : constant Rune := Rune'Val(P.LC_D); + LC_E : constant Rune := Rune'Val(P.LC_E); + LC_F : constant Rune := Rune'Val(P.LC_F); + LC_G : constant Rune := Rune'Val(P.LC_G); + LC_H : constant Rune := Rune'Val(P.LC_H); + LC_I : constant Rune := Rune'Val(P.LC_I); + LC_J : constant Rune := Rune'Val(P.LC_J); + LC_K : constant Rune := Rune'Val(P.LC_K); + LC_L : constant Rune := Rune'Val(P.LC_L); + LC_M : constant Rune := Rune'Val(P.LC_M); + LC_N : constant Rune := Rune'Val(P.LC_N); + LC_O : constant Rune := Rune'Val(P.LC_O); + LC_P : constant Rune := Rune'Val(P.LC_P); + LC_Q : constant Rune := Rune'Val(P.LC_Q); + LC_R : constant Rune := Rune'Val(P.LC_R); + LC_S : constant Rune := Rune'Val(P.LC_S); + LC_T : constant Rune := Rune'Val(P.LC_T); + LC_U : constant Rune := Rune'Val(P.LC_U); + LC_V : constant Rune := Rune'Val(P.LC_V); + LC_W : constant Rune := Rune'Val(P.LC_W); + LC_X : constant Rune := Rune'Val(P.LC_X); + LC_Y : constant Rune := Rune'Val(P.LC_Y); + LC_Z : constant Rune := Rune'Val(P.LC_Z); + Left_Curly_Bracket : constant Rune := Rune'Val(P.Left_Curly_Bracket); + Vertical_Line : constant Rune := Rune'Val(P.Vertical_Line); + Right_Curly_Bracket : constant Rune := Rune'Val(P.Right_Curly_Bracket); + Tilde : constant Rune := Rune'Val(P.Tilde); + DEL : constant Rune := Rune'Val(P.DEL); + end V; - Colon: constant Rune := Rune'Val(C_Colon); - Semicolon: constant Rune := Rune'Val(C_Semicolon); - Tilde: constant Rune := Rune'Val(C_Tilde); - Underline: constant Rune := Rune'Val(C_Underline); - Equal: constant Rune := Rune'Val(C_Equal); - Left_Arrow: constant Rune := Rune'Val(C_Left_Arrow); - Right_Arrow: constant Rune := Rune'Val(C_Right_Arrow); - - UC_A: constant Rune := Rune'Val(C_A); - UC_B: constant Rune := Rune'Val(C_B); - UC_C: constant Rune := Rune'Val(C_C); - UC_D: constant Rune := Rune'Val(C_D); - UC_E: constant Rune := Rune'Val(C_E); - UC_O: constant Rune := Rune'Val(System_Rune'Pos('O')); - UC_F: constant Rune := Rune'Val(System_Rune'Pos('F')); type Item_Class is ( ALPHA, diff --git a/lib2/rune.hawk b/lib2/rune.hawk new file mode 100644 index 0000000..5fb50c9 --- /dev/null +++ b/lib2/rune.hawk @@ -0,0 +1,34 @@ +# This script requires HAWK. + +BEGIN { + printf ("-- Generated with rune.txt and rune.awk\n"); + printf ("-- Run hawk -f rune.awk rune.txt > h2-runes.ads for regeneration\n\n"); + printf ("generic\n"); + printf ("\ttype Rune_Type is (<>);\n"); + printf ("package H2.Runes is\n\n"); + #printf ("\tpragma Preelaborate (Runes);\n\n"); + printf ("\tpackage P is\n"); +} + +{ + t = sprintf ("%c", NR - 1); + if (str::isprint(t)) t = " -- " t; + else t=""; + printf ("\t\t%-20s: constant := %d;%s\n", $1, NR-1, t); + X[NR - 1] = $1; +} + +END { + printf ("\tend P;\n\n"); + + printf ("\tpackage V is\n"); + for (i = 0; i < length(X); i++) + { + printf ("\t\t%-20s: constant Rune := Rune'Val(P.%s);\n", X[i], X[i]); + } + printf ("\tend V;\n"); + + printf ("\n"); + + printf ("\nend H2.Runes;\n"); +} diff --git a/lib2/rune.txt b/lib2/rune.txt new file mode 100644 index 0000000..12e4be4 --- /dev/null +++ b/lib2/rune.txt @@ -0,0 +1,128 @@ +NUL +SOH +STX +ETX +EOT +ENQ +ACK +BEL +BS +HT +LF +VT +FF +CR +SO +SI +DLE +DC1 +DC2 +DC3 +DC4 +NAK +SYN +ETB +CAN +EM +SUB +ESC +FS +GS +RS +US +Space +Exclamation +Quotation +Number_Sign +Dollar_Sign +Percent_Sign +Ampersand +Apostrophe +Left_Parenthesis +Right_Parenthesis +Asterisk +Plus_Sign +Comma +Minus_Sign +Period +Slash +Zero +One +Two +Three +Four +Five +Six +Seven +Eight +Nine +Colon +Semicolon +Left_Arrow +Equal_Sign +Right_Arrow +Question +Commercial_At +UC_A +UC_B +UC_C +UC_D +UC_E +UC_F +UC_G +UC_H +UC_I +UC_J +UC_K +UC_L +UC_M +UC_N +UC_O +UC_P +UC_Q +UC_R +UC_S +UC_T +UC_U +UC_V +UC_W +UC_X +UC_Y +UC_Z +Left_Square_Bracket +Backslash +Right_Square_Bracket +Circumflex +Underline +Grave +LC_A +LC_B +LC_C +LC_D +LC_E +LC_F +LC_G +LC_H +LC_I +LC_J +LC_K +LC_L +LC_M +LC_N +LC_O +LC_P +LC_Q +LC_R +LC_S +LC_T +LC_U +LC_V +LC_W +LC_X +LC_Y +LC_Z +Left_Curly_Bracket +Vertical_Line +Right_Curly_Bracket +Tilde +DEL