# Test Unicode entities BEGIN { if ($] < 5.006) { print "1..0 # skipped: This perl does not support Unicode\n"; exit; } } use HTML::Entities; unless (&HTML::Entities::UNICODE_SUPPORT) { print "1..0 # skipped: Unicode entities not selected\n"; exit; } print "1..10\n"; print "not " unless decode_entities("&euro") eq "\x{20AC}"; print "ok 1\n"; print "not " unless decode_entities("å") eq "م"; print "ok 2\n"; print "not " unless decode_entities("񺄠") eq chr(500000); print "ok 3\n"; { no warnings 'utf8'; # These are illegal unicode chars print "not " unless decode_entities("￿") eq "\x{FFFF}"; print "ok 4\n"; print "not " unless decode_entities("􏿿") eq chr(0x10FFFF); print "ok 5\n"; print "not " unless decode_entities("�") eq chr(0xFFFF_FFFF); print "ok 6\n"; } print "not " unless decode_entities("�") eq "\0" && decode_entities("�") eq "\0" && decode_entities("�") eq "\0" && decode_entities("�") eq "\0"; print "ok 7\n"; print "not " unless decode_entities("&#ååå࿿") eq "&#ممم\x{FFF}"; print "ok 8\n"; # This might fail when we get more than 64 bit UVs print "not " unless decode_entities("�") eq "�" and decode_entities("�") eq "�"; print "ok 9\n"; my $err; for ([32, 48], [120, 169], [240, 250], [250, 260], [965, 975], [3000, 3005]) { my $a = join("", map chr, $_->[0] .. $_->[1]); #print join(", ", unpack("U*", $a)), "\n"; my $e = encode_entities($a); my $d = decode_entities($e); print "\n$_->[0] .. $_->[1]\n"; #print "$a\n"; print "$e\n"; unless ($d eq $a) { print "Wrong decoding in range $_->[0] .. $_->[1]\n"; # use Devel::Peek; Dump($a); Dump($d); $err++; } } print "not " if $err; print "ok 10\n";