#ifdef HAVE_CONFIG_H
# include "config.h"
#endif
#include <alloca.h>
#include "po-charset.h"
#include <stdlib.h>
#include <string.h>
#include "xerror.h"
#include "basename.h"
#include "progname.h"
#include "strstr.h"
#include "strcase.h"
#include "gettext.h"
#define _(str) gettext (str)
#define SIZEOF(a) (sizeof(a) / sizeof(a[0]))
static const char ascii[] = "ASCII";
const char *po_charset_ascii = ascii;
static const char utf8[] = "UTF-8";
const char *po_charset_utf8 = utf8;
const char *
po_charset_canonicalize (const char *charset)
{
static const char *standard_charsets[] =
{
ascii, "ANSI_X3.4-1968", "US-ASCII",
"ISO-8859-1", "ISO_8859-1",
"ISO-8859-2", "ISO_8859-2",
"ISO-8859-3", "ISO_8859-3",
"ISO-8859-4", "ISO_8859-4",
"ISO-8859-5", "ISO_8859-5",
"ISO-8859-6", "ISO_8859-6",
"ISO-8859-7", "ISO_8859-7",
"ISO-8859-8", "ISO_8859-8",
"ISO-8859-9", "ISO_8859-9",
"ISO-8859-13", "ISO_8859-13",
"ISO-8859-14", "ISO_8859-14",
"ISO-8859-15", "ISO_8859-15",
"KOI8-R",
"KOI8-U",
"KOI8-T",
"CP850",
"CP866",
"CP874",
"CP932",
"CP949",
"CP950",
"CP1250",
"CP1251",
"CP1252",
"CP1253",
"CP1254",
"CP1255",
"CP1256",
"CP1257",
"GB2312",
"EUC-JP",
"EUC-KR",
"EUC-TW",
"BIG5",
"BIG5-HKSCS",
"GBK",
"GB18030",
"SHIFT_JIS",
"JOHAB",
"TIS-620",
"VISCII",
"GEORGIAN-PS",
utf8
};
size_t i;
for (i = 0; i < SIZEOF (standard_charsets); i++)
if (strcasecmp (charset, standard_charsets[i]) == 0)
return standard_charsets[i < 3 ? 0 : i < 27 ? ((i - 3) & ~1) + 3 : i];
return NULL;
}
bool
po_charset_ascii_compatible (const char *canon_charset)
{
if (strcmp (canon_charset, "SHIFT_JIS") == 0
|| strcmp (canon_charset, "JOHAB") == 0
|| strcmp (canon_charset, "VISCII") == 0)
return false;
else
return true;
}
bool po_is_charset_weird (const char *canon_charset)
{
static const char *weird_charsets[] =
{
"BIG5",
"BIG5-HKSCS",
"GBK",
"GB18030",
"SHIFT_JIS",
"JOHAB"
};
size_t i;
for (i = 0; i < SIZEOF (weird_charsets); i++)
if (strcmp (canon_charset, weird_charsets[i]) == 0)
return true;
return false;
}
bool po_is_charset_weird_cjk (const char *canon_charset)
{
static const char *weird_cjk_charsets[] =
{
"BIG5",
"BIG5-HKSCS",
"GBK",
"GB18030",
"SHIFT_JIS",
"JOHAB"
};
size_t i;
for (i = 0; i < SIZEOF (weird_cjk_charsets); i++)
if (strcmp (canon_charset, weird_cjk_charsets[i]) == 0)
return true;
return false;
}
const char *po_lex_charset;
#if HAVE_ICONV
iconv_t po_lex_iconv;
#endif
bool po_lex_weird_cjk;
void
po_lex_charset_init ()
{
po_lex_charset = NULL;
#if HAVE_ICONV
po_lex_iconv = (iconv_t)(-1);
#endif
po_lex_weird_cjk = false;
}
void
po_lex_charset_set (const char *header_entry, const char *filename)
{
const char *charsetstr = strstr (header_entry, "charset=");
if (charsetstr != NULL)
{
size_t len;
char *charset;
const char *canon_charset;
charsetstr += strlen ("charset=");
len = strcspn (charsetstr, " \t\n");
charset = (char *) alloca (len + 1);
memcpy (charset, charsetstr, len);
charset[len] = '\0';
canon_charset = po_charset_canonicalize (charset);
if (canon_charset == NULL)
{
size_t filenamelen = strlen (filename);
if (!(filenamelen >= 4
&& memcmp (filename + filenamelen - 4, ".pot", 4) == 0
&& strcmp (charset, "CHARSET") == 0))
multiline_warning (xasprintf (_("%s: warning: "), filename),
xasprintf (_("\
Charset \"%s\" is not a portable encoding name.\n\
Message conversion to user's charset might not work.\n"),
charset));
}
else
{
const char *envval;
po_lex_charset = canon_charset;
#if HAVE_ICONV
if (po_lex_iconv != (iconv_t)(-1))
iconv_close (po_lex_iconv);
#endif
envval = getenv ("OLD_PO_FILE_INPUT");
if (envval != NULL && *envval != '\0')
{
#if HAVE_ICONV
po_lex_iconv = (iconv_t)(-1);
#endif
po_lex_weird_cjk = false;
}
else
{
#if HAVE_ICONV
# if (__GLIBC__ - 0 == 2 && __GLIBC_MINOR__ - 0 <= 1) && !defined _LIBICONV_VERSION
if (strcmp (po_lex_charset, "EUC-KR") == 0)
po_lex_iconv = (iconv_t)(-1);
else
# endif
# if defined __sun && !defined _LIBICONV_VERSION
if ( strcmp (po_lex_charset, "GB2312") == 0
|| strcmp (po_lex_charset, "EUC-TW") == 0
|| strcmp (po_lex_charset, "BIG5") == 0
|| strcmp (po_lex_charset, "BIG5-HKSCS") == 0
|| strcmp (po_lex_charset, "GBK") == 0
|| strcmp (po_lex_charset, "GB18030") == 0)
po_lex_iconv = (iconv_t)(-1);
else
# endif
po_lex_iconv = iconv_open ("UTF-8", po_lex_charset);
if (po_lex_iconv == (iconv_t)(-1))
{
const char *note;
po_lex_weird_cjk = po_is_charset_weird_cjk (po_lex_charset);
if (po_is_charset_weird (po_lex_charset)
&& !po_lex_weird_cjk)
note = _("Continuing anyway, expect parse errors.");
else
note = _("Continuing anyway.");
multiline_warning (xasprintf (_("%s: warning: "), filename),
xasprintf (_("\
Charset \"%s\" is not supported. %s relies on iconv(),\n\
and iconv() does not support \"%s\".\n"),
po_lex_charset,
basename (program_name),
po_lex_charset));
# if !defined _LIBICONV_VERSION
multiline_warning (NULL,
xasprintf (_("\
Installing GNU libiconv and then reinstalling GNU gettext\n\
would fix this problem.\n")));
# endif
multiline_warning (NULL, xasprintf (_("%s\n"), note));
}
#else
po_lex_weird_cjk = po_is_charset_weird_cjk (po_lex_charset);
if (po_is_charset_weird (po_lex_charset) && !po_lex_weird_cjk)
{
const char *note =
_("Continuing anyway, expect parse errors.");
multiline_warning (xasprintf (_("%s: warning: "), filename),
xasprintf (_("\
Charset \"%s\" is not supported. %s relies on iconv().\n\
This version was built without iconv().\n"),
po_lex_charset,
basename (program_name)));
multiline_warning (NULL,
xasprintf (_("\
Installing GNU libiconv and then reinstalling GNU gettext\n\
would fix this problem.\n")));
multiline_warning (NULL, xasprintf (_("%s\n"), note));
}
#endif
}
}
}
else
{
size_t filenamelen = strlen (filename);
if (!(filenamelen >= 4
&& memcmp (filename + filenamelen - 4, ".pot", 4) == 0))
multiline_warning (xasprintf (_("%s: warning: "), filename),
xasprintf (_("\
Charset missing in header.\n\
Message conversion to user's charset will not work.\n")));
}
}
void
po_lex_charset_close ()
{
po_lex_charset = NULL;
#if HAVE_ICONV
if (po_lex_iconv != (iconv_t)(-1))
{
iconv_close (po_lex_iconv);
po_lex_iconv = (iconv_t)(-1);
}
#endif
po_lex_weird_cjk = false;
}