#include "system.h"
#include <quotearg.h>
#include "common.h"
#ifdef HAVE_ICONV_H
# include <iconv.h>
#endif
#ifndef ICONV_CONST
# define ICONV_CONST
#endif
#ifdef HAVE_LIBICONV
struct langtab
{
char const *lang;
char const *terr;
char const *charset;
};
static struct langtab langtab[] = {
{ "C", NULL, "ASCII"},
{ "POSIX", NULL, "ASCII" },
{ "aa", NULL, NULL},
{ "ab", NULL, NULL},
{ "ae", NULL, NULL},
{ "af", NULL, "iso-8859-1"},
{ "am", NULL, "UTF-8"},
{ "ar", NULL, "iso-8859-6"},
{ "as", NULL, NULL},
{ "ay", NULL, "iso-8859-1"},
{ "az", NULL, NULL},
{ "ba", NULL, NULL},
{ "be", NULL, "UTF-8"},
{ "bg", NULL, "iso-8859-5"},
{ "bh", NULL, NULL},
{ "bi", NULL, NULL},
{ "bn", NULL, NULL},
{ "bo", NULL, NULL},
{ "br", NULL, "iso-8859-1"},
{ "bs", NULL, NULL},
{ "ca", NULL, "iso-8859-1"},
{ "ce", NULL, NULL},
{ "ch", NULL, NULL},
{ "co", NULL, "iso-8859-1"},
{ "cs", NULL, "iso-8859-2"},
{ "cu", NULL, NULL },
{ "cv", NULL, NULL},
{ "cy", NULL, "iso-8859-1"},
{ "da", NULL, "iso-8859-1"},
{ "de", NULL, "iso-8859-1"},
{ "dz", NULL, NULL },
{ "el", NULL, "iso-8859-7"},
{ "en", NULL, "iso-8859-1"},
{ "eo", NULL, "iso-8859-3"},
{ "es", NULL, "iso-8859-1"},
{ "et", NULL, "iso-8859-15"},
{ "eu", NULL, "iso-8859-1"},
{ "fa", NULL, "UTF-8"},
{ "fi", NULL, "iso-8859-15"},
{ "fj", NULL, NULL },
{ "fo", NULL, "iso-8859-1"},
{ "fr", NULL, "iso-8859-1"},
{ "fy", NULL, "iso-8859-1"},
{ "ga", NULL, "iso-8859-14"},
{ "gd", NULL, "iso-8859-14" },
{ "gl", NULL, NULL },
{ "gn", NULL, NULL},
{ "gu", NULL, NULL},
{ "gv", NULL, "iso-8859-14"},
{ "ha", NULL, NULL },
{ "he", NULL, "iso-8859-8" },
{ "hi", NULL, NULL},
{ "ho", NULL, NULL},
{ "hr", NULL, "iso-8859-2"},
{ "hu", NULL, "iso-8859-2"},
{ "hy", NULL, NULL},
{ "hz", NULL, NULL},
{ "id", NULL, "iso-8859-1"},
{ "ia", NULL, NULL},
{ "ie", NULL, NULL},
{ "ik", NULL, NULL},
{ "io", NULL, NULL},
{ "is", NULL, "iso-8859-1"},
{ "it", NULL, "iso-8859-1"},
{ "iu", NULL, NULL},
{ "ja", NULL, "EUC-JP"},
{ "jv", NULL, NULL},
{ "ka", NULL, NULL},
{ "ki", NULL, NULL},
{ "kj", NULL, NULL},
{ "kk", NULL, NULL},
{ "kl", NULL, "iso-8859-1"},
{ "km", NULL, NULL},
{ "kn", NULL, NULL},
{ "ko", NULL, "EUC-KR"},
{ "ks", NULL, NULL},
{ "ku", NULL, NULL},
{ "kv", NULL, NULL},
{ "kw", NULL, "iso-8859-14"},
{ "ky", NULL, NULL},
{ "la", NULL, "iso-8859-1"},
{ "lb", NULL, "iso-8859-1"},
{ "ln", NULL, NULL},
{ "lo", NULL, NULL},
{ "lt", NULL, "iso-8859-4"},
{ "lv", NULL, "iso-8859-4"},
{ "mg", NULL, NULL},
{ "mh", NULL, NULL},
{ "mi", NULL, NULL},
{ "mk", NULL, NULL},
{ "ml", NULL, NULL},
{ "mn", NULL, NULL},
{ "mo", NULL, "iso-8859-2"},
{ "mr", NULL, NULL},
{ "ms", NULL, NULL},
{ "mt", NULL, "iso-8859-3"},
{ "my", NULL, NULL},
{ "na", NULL, NULL},
{ "nb", NULL, "iso-8859-1"},
{ "nd", NULL, NULL},
{ "ne", NULL, NULL},
{ "ng", NULL, NULL},
{ "nl", NULL, "iso-8859-1"},
{ "nn", NULL, "iso-8859-1"},
{ "no", NULL, "iso-8859-1"},
{ "nr", NULL, NULL},
{ "nv", NULL, NULL},
{ "ny", NULL, NULL},
{ "oc", NULL, NULL},
{ "om", NULL, NULL},
{ "or", NULL, NULL},
{ "os", NULL, NULL},
{ "pa", NULL, NULL},
{ "pi", NULL, NULL},
{ "pl", NULL, "iso-8859-2"},
{ "ps", NULL, NULL},
{ "pt", NULL, "iso-8859-1"},
{ "qu", NULL, "iso-8859-1"},
{ "rm", NULL, "iso-8859-1"},
{ "rn", NULL, NULL },
{ "ro", NULL, "iso-8859-2"},
{ "ru", NULL, "koi8-r"},
{ "rw", NULL, NULL},
{ "sa", NULL, NULL},
{ "sc", NULL, "iso-8859-1"},
{ "sd", NULL, NULL},
{ "se", NULL, "iso-8859-10"},
{ "sg", NULL, NULL},
{ "si", NULL, NULL},
{ "sk", NULL, "iso-8859-2"},
{ "sl", NULL, "iso-8859-1"},
{ "sm", NULL, NULL},
{ "sn", NULL, NULL},
{ "so", NULL, NULL},
{ "sq", NULL, "iso-8859-1"},
{ "sr", NULL, "iso-8859-2"},
{ "ss", NULL, NULL},
{ "st", NULL, NULL},
{ "su", NULL, NULL},
{ "sv", NULL, "iso-8859-1"},
{ "sw", NULL, NULL},
{ "ta", NULL, NULL},
{ "te", NULL, NULL},
{ "tg", NULL, NULL},
{ "th", NULL, "iso-8859-11"},
{ "ti", NULL, NULL},
{ "tk", NULL, NULL},
{ "tl", NULL, "iso-8859-1"},
{ "tn", NULL, NULL},
{ "to", NULL, NULL},
{ "tr", NULL, "iso-8859-9"},
{ "ts", NULL, NULL},
{ "tt", NULL, NULL},
{ "tw", NULL, NULL},
{ "ty", NULL, NULL},
{ "ug", NULL, NULL},
{ "uk", NULL, "koi8-u"},
{ "ur", NULL, NULL},
{ "uz", NULL, NULL},
{ "vi", NULL, NULL},
{ "vo", NULL, NULL},
{ "wa", NULL, "iso-8859-1"},
{ "wo", NULL, NULL},
{ "xh", NULL, NULL},
{ "yi", NULL, "iso-8859-8"},
{ "yo", NULL, NULL},
{ "za", NULL, NULL},
{ "zh", "TW", "big5"},
{ "zh", NULL, "gb2312"},
{ "zu", NULL, NULL},
{ NULL, NULL, NULL}
};
static char const *
charset_lookup (char const *lang, char const *terr)
{
struct langtab const *p;
if (!lang)
return NULL;
for (p = langtab; p->lang; p++)
if (strcasecmp (p->lang, lang) == 0
&& (terr == NULL
|| p->terr == NULL
|| !strcasecmp (p->terr, terr) == 0))
return p->charset;
return NULL;
}
static const char *
get_input_charset (void)
{
const char *charset = NULL;
char *tmp;
tmp = getenv ("LC_ALL");
if (!tmp)
tmp = getenv ("LANG");
if (tmp)
{
char *lang;
char *terr;
lang = strtok (tmp, "_");
terr = strtok (NULL, ".");
charset = strtok (NULL, "@");
if (!charset)
charset = charset_lookup (lang, terr);
}
if (!charset)
charset = "iso-8859-1";
return charset;
}
#else
# undef iconv_open
# define iconv_open(tocode, fromcode) ((iconv_t) -1)
# undef iconv
# define iconv(cd, inbuf, inbytesleft, outbuf, outbytesleft) ((size_t) 0)
# undef iconv_close
# define iconv_close(cd) 0
#endif
static iconv_t conv_desc[2] = { (iconv_t) -1, (iconv_t) -1 };
static iconv_t
utf8_init (bool to_utf)
{
if (conv_desc[(int) to_utf] == (iconv_t) -1)
{
if (to_utf)
conv_desc[(int) to_utf] = iconv_open ("UTF-8", get_input_charset ());
else
conv_desc[(int) to_utf] = iconv_open (get_input_charset (), "UTF-8");
}
return conv_desc[(int) to_utf];
}
bool
utf8_convert (bool to_utf, char const *input, char **output)
{
char ICONV_CONST *ib;
char *ob;
size_t inlen;
size_t outlen;
size_t rc;
iconv_t cd = utf8_init (to_utf);
if (cd == 0)
{
*output = xstrdup (input);
return true;
}
else if (cd == (iconv_t)-1)
return false;
inlen = strlen (input) + 1;
outlen = inlen * MB_LEN_MAX + 1;
ob = *output = xmalloc (outlen);
ib = (char ICONV_CONST *) input;
rc = iconv (cd, &ib, &inlen, &ob, &outlen);
*ob = 0;
return rc != -1;
}
bool
string_ascii_p (const char *str)
{
const unsigned char *p = (const unsigned char *)str;
for (; *p; p++)
if (*p > 127)
return false;
return true;
}