------------------------------------------------------------------------------ -- -- -- GNAT RUN-TIME COMPONENTS -- -- -- -- A D A . C H A R A C T E R S . H A N D L I N G -- -- -- -- B o d y -- -- -- -- Copyright (C) 1992-2005 Free Software Foundation, Inc. -- -- -- -- GNAT is free software; you can redistribute it and/or modify it under -- -- terms of the GNU General Public License as published by the Free Soft- -- -- ware Foundation; either version 2, or (at your option) any later ver- -- -- sion. GNAT is distributed in the hope that it will be useful, but WITH- -- -- OUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -- -- or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -- -- for more details. You should have received a copy of the GNU General -- -- Public License distributed with GNAT; see file COPYING. If not, write -- -- to the Free Software Foundation, 59 Temple Place - Suite 330, Boston, -- -- MA 02111-1307, USA. -- -- -- -- As a special exception, if other files instantiate generics from this -- -- unit, or you link this unit with other files to produce an executable, -- -- this unit does not by itself cause the resulting executable to be -- -- covered by the GNU General Public License. This exception does not -- -- however invalidate any other reasons why the executable file might be -- -- covered by the GNU Public License. -- -- -- -- GNAT was originally developed by the GNAT team at New York University. -- -- Extensive contributions were provided by Ada Core Technologies Inc. -- -- -- ------------------------------------------------------------------------------ with Ada.Characters.Latin_1; use Ada.Characters.Latin_1; with Ada.Strings.Maps; use Ada.Strings.Maps; with Ada.Strings.Maps.Constants; use Ada.Strings.Maps.Constants; package body Ada.Characters.Handling is ------------------------------------ -- Character Classification Table -- ------------------------------------ type Character_Flags is mod 256; for Character_Flags'Size use 8; Control : constant Character_Flags := 1; Lower : constant Character_Flags := 2; Upper : constant Character_Flags := 4; Basic : constant Character_Flags := 8; Hex_Digit : constant Character_Flags := 16; Digit : constant Character_Flags := 32; Special : constant Character_Flags := 64; Letter : constant Character_Flags := Lower or Upper; Alphanum : constant Character_Flags := Letter or Digit; Graphic : constant Character_Flags := Alphanum or Special; Char_Map : constant array (Character) of Character_Flags := ( NUL => Control, SOH => Control, STX => Control, ETX => Control, EOT => Control, ENQ => Control, ACK => Control, BEL => Control, BS => Control, HT => Control, LF => Control, VT => Control, FF => Control, CR => Control, SO => Control, SI => Control, DLE => Control, DC1 => Control, DC2 => Control, DC3 => Control, DC4 => Control, NAK => Control, SYN => Control, ETB => Control, CAN => Control, EM => Control, SUB => Control, ESC => Control, FS => Control, GS => Control, RS => Control, US => Control, Space => Special, Exclamation => Special, Quotation => Special, Number_Sign => Special, Dollar_Sign => Special, Percent_Sign => Special, Ampersand => Special, Apostrophe => Special, Left_Parenthesis => Special, Right_Parenthesis => Special, Asterisk => Special, Plus_Sign => Special, Comma => Special, Hyphen => Special, Full_Stop => Special, Solidus => Special, '0' .. '9' => Digit + Hex_Digit, Colon => Special, Semicolon => Special, Less_Than_Sign => Special, Equals_Sign => Special, Greater_Than_Sign => Special, Question => Special, Commercial_At => Special, 'A' .. 'F' => Upper + Basic + Hex_Digit, 'G' .. 'Z' => Upper + Basic, Left_Square_Bracket => Special, Reverse_Solidus => Special, Right_Square_Bracket => Special, Circumflex => Special, Low_Line => Special, Grave => Special, 'a' .. 'f' => Lower + Basic + Hex_Digit, 'g' .. 'z' => Lower + Basic, Left_Curly_Bracket => Special, Vertical_Line => Special, Right_Curly_Bracket => Special, Tilde => Special, DEL => Control, Reserved_128 => Control, Reserved_129 => Control, BPH => Control, NBH => Control, Reserved_132 => Control, NEL => Control, SSA => Control, ESA => Control, HTS => Control, HTJ => Control, VTS => Control, PLD => Control, PLU => Control, RI => Control, SS2 => Control, SS3 => Control, DCS => Control, PU1 => Control, PU2 => Control, STS => Control, CCH => Control, MW => Control, SPA => Control, EPA => Control, SOS => Control, Reserved_153 => Control, SCI => Control, CSI => Control, ST => Control, OSC => Control, PM => Control, APC => Control, No_Break_Space => Special, Inverted_Exclamation => Special, Cent_Sign => Special, Pound_Sign => Special, Currency_Sign => Special, Yen_Sign => Special, Broken_Bar => Special, Section_Sign => Special, Diaeresis => Special, Copyright_Sign => Special, Feminine_Ordinal_Indicator => Special, Left_Angle_Quotation => Special, Not_Sign => Special, Soft_Hyphen => Special, Registered_Trade_Mark_Sign => Special, Macron => Special, Degree_Sign => Special, Plus_Minus_Sign => Special, Superscript_Two => Special, Superscript_Three => Special, Acute => Special, Micro_Sign => Special, Pilcrow_Sign => Special, Middle_Dot => Special, Cedilla => Special, Superscript_One => Special, Masculine_Ordinal_Indicator => Special, Right_Angle_Quotation => Special, Fraction_One_Quarter => Special, Fraction_One_Half => Special, Fraction_Three_Quarters => Special, Inverted_Question => Special, UC_A_Grave => Upper, UC_A_Acute => Upper, UC_A_Circumflex => Upper, UC_A_Tilde => Upper, UC_A_Diaeresis => Upper, UC_A_Ring => Upper, UC_AE_Diphthong => Upper + Basic, UC_C_Cedilla => Upper, UC_E_Grave => Upper, UC_E_Acute => Upper, UC_E_Circumflex => Upper, UC_E_Diaeresis => Upper, UC_I_Grave => Upper, UC_I_Acute => Upper, UC_I_Circumflex => Upper, UC_I_Diaeresis => Upper, UC_Icelandic_Eth => Upper + Basic, UC_N_Tilde => Upper, UC_O_Grave => Upper, UC_O_Acute => Upper, UC_O_Circumflex => Upper, UC_O_Tilde => Upper, UC_O_Diaeresis => Upper, Multiplication_Sign => Special, UC_O_Oblique_Stroke => Upper, UC_U_Grave => Upper, UC_U_Acute => Upper, UC_U_Circumflex => Upper, UC_U_Diaeresis => Upper, UC_Y_Acute => Upper, UC_Icelandic_Thorn => Upper + Basic, LC_German_Sharp_S => Lower + Basic, LC_A_Grave => Lower, LC_A_Acute => Lower, LC_A_Circumflex => Lower, LC_A_Tilde => Lower, LC_A_Diaeresis => Lower, LC_A_Ring => Lower, LC_AE_Diphthong => Lower + Basic, LC_C_Cedilla => Lower, LC_E_Grave => Lower, LC_E_Acute => Lower, LC_E_Circumflex => Lower, LC_E_Diaeresis => Lower, LC_I_Grave => Lower, LC_I_Acute => Lower, LC_I_Circumflex => Lower, LC_I_Diaeresis => Lower, LC_Icelandic_Eth => Lower + Basic, LC_N_Tilde => Lower, LC_O_Grave => Lower, LC_O_Acute => Lower, LC_O_Circumflex => Lower, LC_O_Tilde => Lower, LC_O_Diaeresis => Lower, Division_Sign => Special, LC_O_Oblique_Stroke => Lower, LC_U_Grave => Lower, LC_U_Acute => Lower, LC_U_Circumflex => Lower, LC_U_Diaeresis => Lower, LC_Y_Acute => Lower, LC_Icelandic_Thorn => Lower + Basic, LC_Y_Diaeresis => Lower ); --------------------- -- Is_Alphanumeric -- --------------------- function Is_Alphanumeric (Item : Character) return Boolean is begin return (Char_Map (Item) and Alphanum) /= 0; end Is_Alphanumeric; -------------- -- Is_Basic -- -------------- function Is_Basic (Item : Character) return Boolean is begin return (Char_Map (Item) and Basic) /= 0; end Is_Basic; ------------------ -- Is_Character -- ------------------ function Is_Character (Item : Wide_Character) return Boolean is begin return Wide_Character'Pos (Item) < 256; end Is_Character; function Is_Character (Item : Wide_Wide_Character) return Boolean is begin return Wide_Wide_Character'Pos (Item) < 256; end Is_Character; ---------------- -- Is_Control -- ---------------- function Is_Control (Item : Character) return Boolean is begin return (Char_Map (Item) and Control) /= 0; end Is_Control; -------------- -- Is_Digit -- -------------- function Is_Digit (Item : Character) return Boolean is begin return Item in '0' .. '9'; end Is_Digit; ---------------- -- Is_Graphic -- ---------------- function Is_Graphic (Item : Character) return Boolean is begin return (Char_Map (Item) and Graphic) /= 0; end Is_Graphic; -------------------------- -- Is_Hexadecimal_Digit -- -------------------------- function Is_Hexadecimal_Digit (Item : Character) return Boolean is begin return (Char_Map (Item) and Hex_Digit) /= 0; end Is_Hexadecimal_Digit; ---------------- -- Is_ISO_646 -- ---------------- function Is_ISO_646 (Item : Character) return Boolean is begin return Item in ISO_646; end Is_ISO_646; -- Note: much more efficient coding of the following function is possible -- by testing several 16#80# bits in a complete word in a single operation function Is_ISO_646 (Item : String) return Boolean is begin for J in Item'Range loop if Item (J) not in ISO_646 then return False; end if; end loop; return True; end Is_ISO_646; --------------- -- Is_Letter -- --------------- function Is_Letter (Item : Character) return Boolean is begin return (Char_Map (Item) and Letter) /= 0; end Is_Letter; -------------- -- Is_Lower -- -------------- function Is_Lower (Item : Character) return Boolean is begin return (Char_Map (Item) and Lower) /= 0; end Is_Lower; ---------------- -- Is_Special -- ---------------- function Is_Special (Item : Character) return Boolean is begin return (Char_Map (Item) and Special) /= 0; end Is_Special; --------------- -- Is_String -- --------------- function Is_String (Item : Wide_String) return Boolean is begin for J in Item'Range loop if Wide_Character'Pos (Item (J)) >= 256 then return False; end if; end loop; return True; end Is_String; function Is_String (Item : Wide_Wide_String) return Boolean is begin for J in Item'Range loop if Wide_Wide_Character'Pos (Item (J)) >= 256 then return False; end if; end loop; return True; end Is_String; -------------- -- Is_Upper -- -------------- function Is_Upper (Item : Character) return Boolean is begin return (Char_Map (Item) and Upper) /= 0; end Is_Upper; ----------------------- -- Is_Wide_Character -- ----------------------- function Is_Wide_Character (Item : Wide_Wide_Character) return Boolean is begin return Wide_Wide_Character'Pos (Item) < 2**16; end Is_Wide_Character; -------------------- -- Is_Wide_String -- -------------------- function Is_Wide_String (Item : Wide_Wide_String) return Boolean is begin for J in Item'Range loop if Wide_Wide_Character'Pos (Item (J)) >= 2**16 then return False; end if; end loop; return True; end Is_Wide_String; -------------- -- To_Basic -- -------------- function To_Basic (Item : Character) return Character is begin return Value (Basic_Map, Item); end To_Basic; function To_Basic (Item : String) return String is Result : String (1 .. Item'Length); begin for J in Item'Range loop Result (J - (Item'First - 1)) := Value (Basic_Map, Item (J)); end loop; return Result; end To_Basic; ------------------ -- To_Character -- ------------------ function To_Character (Item : Wide_Character; Substitute : Character := ' ') return Character is begin if Is_Character (Item) then return Character'Val (Wide_Character'Pos (Item)); else return Substitute; end if; end To_Character; function To_Character (Item : Wide_Wide_Character; Substitute : Character := ' ') return Character is begin if Is_Character (Item) then return Character'Val (Wide_Wide_Character'Pos (Item)); else return Substitute; end if; end To_Character; ---------------- -- To_ISO_646 -- ---------------- function To_ISO_646 (Item : Character; Substitute : ISO_646 := ' ') return ISO_646 is begin if Item in ISO_646 then return Item; else return Substitute; end if; end To_ISO_646; function To_ISO_646 (Item : String; Substitute : ISO_646 := ' ') return String is Result : String (1 .. Item'Length); begin for J in Item'Range loop if Item (J) in ISO_646 then Result (J - (Item'First - 1)) := Item (J); else Result (J - (Item'First - 1)) := Substitute; end if; end loop; return Result; end To_ISO_646; -------------- -- To_Lower -- -------------- function To_Lower (Item : Character) return Character is begin return Value (Lower_Case_Map, Item); end To_Lower; function To_Lower (Item : String) return String is Result : String (1 .. Item'Length); begin for J in Item'Range loop Result (J - (Item'First - 1)) := Value (Lower_Case_Map, Item (J)); end loop; return Result; end To_Lower; --------------- -- To_String -- --------------- function To_String (Item : Wide_String; Substitute : Character := ' ') return String is Result : String (1 .. Item'Length); begin for J in Item'Range loop Result (J - (Item'First - 1)) := To_Character (Item (J), Substitute); end loop; return Result; end To_String; function To_String (Item : Wide_Wide_String; Substitute : Character := ' ') return String is Result : String (1 .. Item'Length); begin for J in Item'Range loop Result (J - (Item'First - 1)) := To_Character (Item (J), Substitute); end loop; return Result; end To_String; -------------- -- To_Upper -- -------------- function To_Upper (Item : Character) return Character is begin return Value (Upper_Case_Map, Item); end To_Upper; function To_Upper (Item : String) return String is Result : String (1 .. Item'Length); begin for J in Item'Range loop Result (J - (Item'First - 1)) := Value (Upper_Case_Map, Item (J)); end loop; return Result; end To_Upper; ----------------------- -- To_Wide_Character -- ----------------------- function To_Wide_Character (Item : Character) return Wide_Character is begin return Wide_Character'Val (Character'Pos (Item)); end To_Wide_Character; function To_Wide_Character (Item : Wide_Wide_Character; Substitute : Wide_Character := ' ') return Wide_Character is begin if Wide_Wide_Character'Pos (Item) < 2**16 then return Wide_Character'Val (Wide_Wide_Character'Pos (Item)); else return Substitute; end if; end To_Wide_Character; -------------------- -- To_Wide_String -- -------------------- function To_Wide_String (Item : String) return Wide_String is Result : Wide_String (1 .. Item'Length); begin for J in Item'Range loop Result (J - (Item'First - 1)) := To_Wide_Character (Item (J)); end loop; return Result; end To_Wide_String; function To_Wide_String (Item : Wide_Wide_String; Substitute : Wide_Character := ' ') return Wide_String is Result : Wide_String (1 .. Item'Length); begin for J in Item'Range loop Result (J - (Item'First - 1)) := To_Wide_Character (Item (J), Substitute); end loop; return Result; end To_Wide_String; ---------------------------- -- To_Wide_Wide_Character -- ---------------------------- function To_Wide_Wide_Character (Item : Character) return Wide_Wide_Character is begin return Wide_Wide_Character'Val (Character'Pos (Item)); end To_Wide_Wide_Character; function To_Wide_Wide_Character (Item : Wide_Character) return Wide_Wide_Character is begin return Wide_Wide_Character'Val (Wide_Character'Pos (Item)); end To_Wide_Wide_Character; ------------------------- -- To_Wide_Wide_String -- ------------------------- function To_Wide_Wide_String (Item : String) return Wide_Wide_String is Result : Wide_Wide_String (1 .. Item'Length); begin for J in Item'Range loop Result (J - (Item'First - 1)) := To_Wide_Wide_Character (Item (J)); end loop; return Result; end To_Wide_Wide_String; function To_Wide_Wide_String (Item : Wide_String) return Wide_Wide_String is Result : Wide_Wide_String (1 .. Item'Length); begin for J in Item'Range loop Result (J - (Item'First - 1)) := To_Wide_Wide_Character (Item (J)); end loop; return Result; end To_Wide_Wide_String; end Ada.Characters.Handling;