touched up experimental character classification functions

This commit is contained in:
hyung-hwan 2021-10-16 11:42:42 +00:00
parent 5a9b1fcac4
commit 3337936c8c
3 changed files with 83 additions and 25 deletions

View File

@ -6,7 +6,7 @@ package body H3.CC is
use type System.UTF_32.Category;
SP: constant Item_Type := Item_Type'Val(32);
VT: constant Item_Type := Item_Type'Val(9);
HT: constant Item_Type := Item_Type'Val(9);
function Is_Alpha (V: in Item_Type) return Standard.Boolean is
begin
@ -21,7 +21,7 @@ package body H3.CC is
function Is_Blank (V: in Item_Type) return Standard.Boolean is
begin
return V = SP or else V = VT;
return V = SP or else V = HT;
end Is_Blank;
function Is_Cntrl (V: in Item_Type) return Standard.Boolean is
@ -36,8 +36,7 @@ package body H3.CC is
function Is_Graph (V: in Item_Type) return Standard.Boolean is
begin
--return UC.Is_UTF_32_Graphic(Item_Type'Pos(V));
return True;
return Is_Print(V) and then V /= SP;
end Is_Graph;
function Is_Lower (V: in Item_Type) return Standard.Boolean is
@ -47,19 +46,20 @@ package body H3.CC is
function Is_Print (V: in Item_Type) return Standard.Boolean is
begin
--TODO:
return True;
return not UC.IS_UTF_32_Non_Graphic(Item_Type'Pos(V));
end Is_Print;
function Is_Punct (V: in Item_Type) return Standard.Boolean is
begin
return UC.Is_UTF_32_Punctuation(Item_Type'Pos(V));
--return UC.Is_UTF_32_Punctuation(Item_Type'Pos(V));
return Is_Print(V) and then not Is_Space(V) and then not Is_Alnum(V);
end Is_Punct;
function Is_Space (V: in Item_Type) return Standard.Boolean is
begin
return UC.Is_UTF_32_Space(Item_Type'Pos(V)) or else
UC.Is_UTF_32_Line_Terminator(Item_Type'Pos(V));
UC.Is_UTF_32_Line_Terminator(Item_Type'Pos(V)) or else
V = HT;
end Is_Space;
function Is_Upper (V: in Item_Type) return Standard.Boolean is
@ -84,9 +84,9 @@ package body H3.CC is
return Item_Type'Val(UC.UTF_32_To_Upper_Case(Item_Type'Pos(V)));
end To_Upper;
function Is_Class (V: in Item_Type; C: in Class) return Standard.Boolean is
function Is_Class (V: in Item_Type; Cls: in Class) return Standard.Boolean is
begin
case C is
case Cls is
when ALPHA => return Is_Alpha(V);
when ALNUM => return Is_Alnum(V);
when BLANK => return Is_Blank(V);
@ -97,8 +97,8 @@ package body H3.CC is
when PRINT => return Is_Print(V);
when PUNCT => return Is_Punct(V);
when SPACE => return Is_Space(V);
when XDIGIT => return Is_Xdigit(V);
when UPPER => return Is_Upper(V);
when XDIGIT => return Is_Xdigit(V);
end case;
end Is_Class;
end H3.CC;

View File

@ -4,8 +4,8 @@ package H3.CC is
-- <ctype.h>-like character classification package
-- unicode-based. no system locale honored.
type Class is (ALPHA, ALNUM, BLANK, CNTRL, DIGIT, GRAPH, LOWER, PRINT, PUNCT, SPACE, XDIGIT, UPPER);
function Is_Class (V: in Item_Type; C: in Class) return Standard.Boolean;
type Class is (ALPHA, ALNUM, BLANK, CNTRL, DIGIT, GRAPH, LOWER, PRINT, PUNCT, SPACE, UPPER, XDIGIT);
function Is_Class (V: in Item_Type; Cls: in Class) return Standard.Boolean;
function Is_Alpha (V: in Item_Type) return Standard.Boolean;
function Is_Alnum (V: in Item_Type) return Standard.Boolean;

View File

@ -5,12 +5,16 @@ with Ada.Text_IO;
with Ada.Wide_Text_IO;
with Ada.Assertions;
with Interfaces.C;
--with Interfaces.C.Strings;
with System;
use type H3.System_Size;
procedure hello2 is
package A is new H3.Arrays(Standard.Wide_Character, 1, Wide_Character'Val(0));
package S is new H3.Strings(Standard.Wide_Character, Wide_Character'Val(0));
package CC is new H3.CC(Standard.Wide_Character);
package C renames Interfaces.C;
--package S_I is new H3.Arrays(Integer, 1, 16#FF#);
Arr: A.Elastic_Array;
@ -20,23 +24,77 @@ procedure hello2 is
Str2: S.Elastic_String;
use type S.Elastic_String;
--procedure setlocale(a: C.int; b: Interfaces.C.Strings.chars_ptr);
procedure setlocale(a: C.int; b: System.Address);
pragma Import (C, setlocale, "setlocale");
function is_class (V: Standard.Wide_Character; Cls: CC.Class) return Standard.Boolean is
function iswalpha(a: C.int) return C.int;
pragma Import (C, iswalpha, "iswalpha");
function iswalnum(a: C.int) return C.int;
pragma Import (C, iswalnum, "iswalnum");
function iswblank(a: C.int) return C.int;
pragma Import (C, iswblank, "iswblank");
function iswcntrl(a: C.int) return C.int;
pragma Import (C, iswcntrl, "iswcntrl");
function iswdigit(a: C.int) return C.int;
pragma Import (C, iswdigit, "iswdigit");
function iswgraph(a: C.int) return C.int;
pragma Import (C, iswgraph, "iswgraph");
function iswlower(a: C.int) return C.int;
pragma Import (C, iswlower, "iswlower");
function iswprint(a: C.int) return C.int;
pragma Import (C, iswprint, "iswprint");
function iswpunct(a: C.int) return C.int;
pragma Import (C, iswpunct, "iswpunct");
function iswspace(a: C.int) return C.int;
pragma Import (C, iswspace, "iswspace");
function iswupper(a: C.int) return C.int;
pragma Import (C, iswupper, "iswupper");
function iswxdigit(a: C.int) return C.int;
pragma Import (C, iswxdigit, "iswxdigit");
use type C.int;
X: C.int := Standard.Wide_Character'Pos(V);
begin
case Cls is
when CC.ALPHA => return IswAlpha(X) /= 0;
when CC.ALNUM => return IswAlnum(X) /= 0;
when CC.BLANK => return IswBlank(X) /= 0;
when CC.CNTRL => return IswCntrl(X) /= 0;
when CC.DIGIT => return IswDigit(X) /= 0;
when CC.GRAPH => return IswGraph(X) /= 0;
when CC.LOWER => return IswLower(X) /= 0;
when CC.PRINT => return IswPrint(X) /= 0;
when CC.PUNCT => return IswPunct(X) /= 0;
when CC.SPACE => return IswSpace(X) /= 0;
when CC.UPPER => return IswUpper(X) /= 0;
when CC.XDIGIT => return IswXdigit(X) /= 0;
end case;
end is_class;
Empty_String: aliased Standard.String := "";
begin
--setlocale (6, Interfaces.C.Strings.To_Chars_Ptr(Empty_String'access));
setlocale (6, Empty_String'Address);
A.Append (Arr, "hello");
A.Append (Arr, "world");
Arr.Append ("fantastic");
Arr2 := Arr;
A.Delete (Arr2, 1, 5);
Str.Append ("wonderful");
Str.Delete (2, 3);
Str2 := Str;
Str2.Clear;
Str2.Append ("savage");
Str2.Append ("saxage");
Str := Str2;
Str.Clear;
@ -55,27 +113,27 @@ begin
Ada.Wide_Text_IO.Put_Line (Standard.Wide_String(Str2.To_Item_Array));
declare
package C renames Interfaces.C;
package CC is new H3.CC(Standard.Wide_Character);
--function isspace(a: Standard.Character) return C.int
-- with Import => True, Convention => C, External_Name => "isspace";
function isspace(a: Standard.Character) return C.int;
pragma Import (C, isspace, "isspace");
ch: Standard.Wide_Character;
begin
for i in 0 .. 255 loop
for i in 0 .. 10000 loop
ch := Standard.Wide_Character'Val(i);
Ada.Text_IO.Put (I'img & "[" & ch'Img & "]");
for j in CC.Class'Range loop
Ada.Text_IO.Put (" " & J'Img & ":" & CC.Is_Class(ch, j)'Img);
if CC.Is_Class(ch, j) /= Is_Class(ch, j) then
Ada.Text_IO.Put ("[X]");
--else
-- Ada.Text_IO.Put ("[O]");
end if;
end loop;
Ada.Text_IO.Put_Line ("");
end loop;
Ada.Text_IO.Put_line (CC.Is_Alpha('σ')'Img);
end;
end;