package Mail::SpamAssassin::Locales;
use strict;
use bytes;
use vars qw{
%charsets_for_locale
};
%charsets_for_locale = (
'ja' => 'EUCJP JISX020119760 JISX020819830 JISX020819900 JISX020819970 '.
'JISX021219900 JISX021320001 JISX021320002 SHIFT_JIS SHIFTJIS '.
'ISO2022JP SJIS JIS7 JISX0201 JISX0208 JISX0212',
'ko' => 'EUCKR KSC56011987',
'ru' => 'KOI8R KOI8U KOI8T ISOIR111 CP1251 GEORGIANPS CP1251 PT154 CP866',
'ka' => 'KOI8R KOI8U KOI8T ISOIR111 CP1251 GEORGIANPS CP1251 PT154 CP866',
'tg' => 'KOI8R KOI8U KOI8T ISOIR111 CP1251 GEORGIANPS CP1251 PT154 CP866',
'be' => 'KOI8R KOI8U KOI8T ISOIR111 CP1251 GEORGIANPS CP1251 PT154 CP866',
'uk' => 'KOI8R KOI8U KOI8T ISOIR111 CP1251 GEORGIANPS CP1251 PT154 CP866',
'bg' => 'KOI8R KOI8U KOI8T ISOIR111 CP1251 GEORGIANPS CP1251 PT154 CP866',
'th' => 'TIS620',
'zh' => 'GB1988 GB2312 GB231219800 GB18030 GBK BIG5HKSCS BIG5 EUCTW ISO2022CN',
'zh.big5' => 'BIG5HKSCS BIG5 EUCTW',
'zh.gb2312' => 'GB1988 GB2312 GB231219800 GB18030 GBK ISO2022CN',
);
sub is_charset_ok_for_locales {
my ($cs, @locales) = @_;
$cs = uc $cs; $cs =~ s/[^A-Z0-9]//g;
$cs =~ s/^3D//gs; # broken by quoted-printable
$cs =~ s/:.*$//gs; # trim off multiple charsets, just use 1st
study $cs;
return 1 if ($cs eq 'USASCII');
return 1 if ($cs =~ /^ISO8859/);
return 1 if ($cs =~ /^ISO10646/);
return 1 if ($cs =~ /^UTF/);
return 1 if ($cs =~ /^UCS/);
return 1 if ($cs =~ /^CP125/);
return 1 if ($cs =~ /^WINDOWS/); return 1 if ($cs eq 'IBM852');
return 1 if ($cs =~ /^UNICODE11UTF[78]/); return 1 if ($cs eq 'XUNKNOWN'); return 1 if ($cs eq 'ISO');
foreach my $locale (@locales) {
if (!defined($locale) || $locale eq 'C') { $locale = 'en'; }
$locale =~ s/^([a-z][a-z]).*$/$1/;
my $ok_for_loc = $charsets_for_locale{$locale};
next if (!defined $ok_for_loc);
if ($ok_for_loc =~ /(?:^| )\Q${cs}\E(?:$| )/) {
return 1;
}
}
return 0;
}
1;