xsltlocale.c   [plain text]


/*
 * xsltlocale.c: locale handling
 *
 * Reference:
 * RFC 3066: Tags for the Identification of Languages
 * http://www.ietf.org/rfc/rfc3066.txt
 * ISO 639-1, ISO 3166-1
 *
 * Author: Nick Wellnhofer
 * winapi port: Roumen Petrov
 */

#define IN_LIBXSLT
#include "libxslt.h"

#include <string.h>
#include <libxml/xmlmemory.h>

#include "xsltlocale.h"
#include "xsltutils.h"

#if defined(__GLIBC__) && __GLIBC__ == 2 && __GLIBC_MINOR__ <= 2
#define newlocale __newlocale
#define freelocale __freelocale
#define strxfrm_l __strxfrm_l
#define LC_COLLATE_MASK (1 << LC_COLLATE)
#endif

#define TOUPPER(c) (c & ~0x20)
#define TOLOWER(c) (c | 0x20)
#define ISALPHA(c) ((unsigned)(TOUPPER(c) - 'A') < 26)

/*without terminating null character*/
#define XSLTMAX_ISO639LANGLEN		8
#define XSLTMAX_ISO3166CNTRYLEN		8
					/* <lang>-<cntry> */
#define XSLTMAX_LANGTAGLEN		(XSLTMAX_ISO639LANGLEN+1+XSLTMAX_ISO3166CNTRYLEN)

static const xmlChar* xsltDefaultRegion(const xmlChar *localeName);

#ifdef XSLT_LOCALE_WINAPI
xmlRMutexPtr xsltLocaleMutex = NULL;

struct xsltRFC1766Info_s {
      /*note typedef unsigned char xmlChar !*/
    xmlChar    tag[XSLTMAX_LANGTAGLEN+1];
      /*note typedef LCID xsltLocale !*/
    xsltLocale lcid;
};
typedef struct xsltRFC1766Info_s xsltRFC1766Info;

static int xsltLocaleListSize = 0;
static xsltRFC1766Info *xsltLocaleList = NULL;


static xsltLocale
xslt_locale_WINAPI(const xmlChar *languageTag) {
    int k;
    xsltRFC1766Info *p = xsltLocaleList;

    for (k=0; k<xsltLocaleListSize; k++, p++)
	if (xmlStrcmp(p->tag, languageTag) == 0) return p->lcid;
    return((xsltLocale)0);
}

static void xsltEnumSupportedLocales(void);
#endif

/**
 * xsltFreeLocales:
 *
 * Cleanup function for the locale support on shutdown
 */
void
xsltFreeLocales(void) {
#ifdef XSLT_LOCALE_WINAPI
    xmlRMutexLock(xsltLocaleMutex);
    xmlFree(xsltLocaleList);
    xsltLocaleList = NULL;
    xmlRMutexUnlock(xsltLocaleMutex);
#endif
}

/**
 * xsltNewLocale:
 * @languageTag: RFC 3066 language tag
 *
 * Creates a new locale of an opaque system dependent type based on the
 * language tag.
 *
 * Returns the locale or NULL on error or if no matching locale was found
 */
xsltLocale
xsltNewLocale(const xmlChar *languageTag) {
#ifdef XSLT_LOCALE_XLOCALE
    xsltLocale locale;
    char localeName[XSLTMAX_LANGTAGLEN+6]; /* 6 chars for ".utf8\0" */
    const xmlChar *p = languageTag;
    const char *region = NULL;
    char *q = localeName;
    int i, llen;
    
    /* Convert something like "pt-br" to "pt_BR.utf8" */
    
    if (languageTag == NULL)
    	return(NULL);
    
    for (i=0; i<XSLTMAX_ISO639LANGLEN && ISALPHA(*p); ++i)
	*q++ = TOLOWER(*p++);
    
    if (i == 0)
    	return(NULL);
    
    llen = i;
    
    if (*p) {
    	if (*p++ != '-')
    	    return(NULL);
        *q++ = '_';
	
	for (i=0; i<XSLTMAX_ISO3166CNTRYLEN && ISALPHA(*p); ++i)
	    *q++ = TOUPPER(*p++);
    
    	if (i == 0 || *p)
    	    return(NULL);
    	
        memcpy(q, ".utf8", 6);
        locale = newlocale(LC_COLLATE_MASK, localeName, NULL);
        if (locale != NULL)
            return(locale);
        
        /* Continue without using country code */
        
        q = localeName + llen;
    }
    
    /* Try locale without territory, e.g. for Esperanto (eo) */

    memcpy(q, ".utf8", 6);
    locale = newlocale(LC_COLLATE_MASK, localeName, NULL);
    if (locale != NULL)
        return(locale);

    /* Try to find most common country for language */
    
    if (llen != 2)
        return(NULL);

    region = (char *)xsltDefaultRegion((xmlChar *)localeName);
    if (region == NULL)
        return(NULL);
     
    q = localeName + llen;
    *q++ = '_';
    *q++ = region[0];
    *q++ = region[1];
    memcpy(q, ".utf8", 6);
    locale = newlocale(LC_COLLATE_MASK, localeName, NULL);
    
    return(locale);
#endif

#ifdef XSLT_LOCALE_WINAPI
{
    xsltLocale    locale = (xsltLocale)0;
    xmlChar       localeName[XSLTMAX_LANGTAGLEN+1];
    xmlChar       *q = localeName;
    const xmlChar *p = languageTag;
    int           i, llen;
    const xmlChar *region = NULL;

    if (languageTag == NULL) goto end;

    xsltEnumSupportedLocales();

    for (i=0; i<XSLTMAX_ISO639LANGLEN && ISALPHA(*p); ++i)
	*q++ = TOLOWER(*p++);
    if (i == 0) goto end;

    llen = i;
    *q++ = '-';
    if (*p) { /*if country tag is given*/
	if (*p++ != '-') goto end;
	
	for (i=0; i<XSLTMAX_ISO3166CNTRYLEN && ISALPHA(*p); ++i)
	    *q++ = TOUPPER(*p++);
	if (i == 0 || *p) goto end;

	*q = '\0';
	locale = xslt_locale_WINAPI(localeName);
	if (locale != (xsltLocale)0) goto end;
    }
    /* Try to find most common country for language */
    region = xsltDefaultRegion(localeName);
    if (region == NULL) goto end;

    strcpy(localeName + llen + 1, region);
    locale = xslt_locale_WINAPI(localeName);
end:
    return(locale);
}
#endif

#ifdef XSLT_LOCALE_NONE
    return(NULL);
#endif
}

static const xmlChar*
xsltDefaultRegion(const xmlChar *localeName) {
    xmlChar c;
    /* region should be xmlChar, but gcc warns on all string assignments */
    const char *region = NULL;
    
    c = localeName[1];
    /* This is based on the locales from glibc 2.3.3 */
    
    switch (localeName[0]) {
        case 'a':
            if (c == 'a' || c == 'm') region = "ET";
            else if (c == 'f') region = "ZA";
            else if (c == 'n') region = "ES";
            else if (c == 'r') region = "AE";
            else if (c == 'z') region = "AZ";
            break;
        case 'b':
            if (c == 'e') region = "BY";
            else if (c == 'g') region = "BG";
            else if (c == 'n') region = "BD";
            else if (c == 'r') region = "FR";
            else if (c == 's') region = "BA";
            break;
        case 'c':
            if (c == 'a') region = "ES";
            else if (c == 's') region = "CZ";
            else if (c == 'y') region = "GB";
            break;
        case 'd':
            if (c == 'a') region = "DK";
            else if (c == 'e') region = "DE";
            break;
        case 'e':
            if (c == 'l') region = "GR";
            else if (c == 'n' || c == 'o') region = "US";
            else if (c == 's' || c == 'u') region = "ES";
            else if (c == 't') region = "EE";
            break;
        case 'f':
            if (c == 'a') region = "IR";
            else if (c == 'i') region = "FI";
            else if (c == 'o') region = "FO";
            else if (c == 'r') region = "FR";
            break;
        case 'g':
            if (c == 'a') region = "IE";
            else if (c == 'l') region = "ES";
            else if (c == 'v') region = "GB";
            break;
        case 'h':
            if (c == 'e') region = "IL";
            else if (c == 'i') region = "IN";
            else if (c == 'r') region = "HT";
            else if (c == 'u') region = "HU";
            break;
        case 'i':
            if (c == 'd') region = "ID";
            else if (c == 's') region = "IS";
            else if (c == 't') region = "IT";
            else if (c == 'w') region = "IL";
            break;
        case 'j':
            if (c == 'a') region = "JP";
            break;
        case 'k':
            if (c == 'l') region = "GL";
            else if (c == 'o') region = "KR";
            else if (c == 'w') region = "GB";
            break;
        case 'l':
            if (c == 't') region = "LT";
            else if (c == 'v') region = "LV";
            break;
        case 'm':
            if (c == 'k') region = "MK";
            else if (c == 'l' || c == 'r') region = "IN";
            else if (c == 'n') region = "MN";
            else if (c == 's') region = "MY";
            else if (c == 't') region = "MT";
            break;
        case 'n':
            if (c == 'b' || c == 'n' || c == 'o') region = "NO";
            else if (c == 'e') region = "NP";
            else if (c == 'l') region = "NL";
            break;
        case 'o':
            if (c == 'm') region = "ET";
            break;
        case 'p':
            if (c == 'a') region = "IN";
            else if (c == 'l') region = "PL";
            else if (c == 't') region = "PT";
            break;
        case 'r':
            if (c == 'o') region = "RO";
            else if (c == 'u') region = "RU";
            break;
        case 's':
            switch (c) {
                case 'e': region = "NO"; break;
                case 'h': region = "YU"; break;
                case 'k': region = "SK"; break;
                case 'l': region = "SI"; break;
                case 'o': region = "ET"; break;
                case 'q': region = "AL"; break;
                case 't': region = "ZA"; break;
                case 'v': region = "SE"; break;
            }
            break;
        case 't':
            if (c == 'a' || c == 'e') region = "IN";
            else if (c == 'h') region = "TH";
            else if (c == 'i') region = "ER";
            else if (c == 'r') region = "TR";
            else if (c == 't') region = "RU";
            break;
        case 'u':
            if (c == 'k') region = "UA";
            else if (c == 'r') region = "PK";
            break;
        case 'v':
            if (c == 'i') region = "VN";
            break;
        case 'w':
            if (c == 'a') region = "BE";
            break;
        case 'x':
            if (c == 'h') region = "ZA";
            break;
        case 'z':
            if (c == 'h') region = "CN";
            else if (c == 'u') region = "ZA";
            break;
    }
    return((xmlChar *)region);
}

/**
 * xsltFreeLocale:
 * @locale: the locale to free
 *
 * Frees a locale created with xsltNewLocale
 */
void
xsltFreeLocale(xsltLocale locale) {
#ifdef XSLT_LOCALE_XLOCALE
    freelocale(locale);
#endif
}

/**
 * xsltStrxfrm:
 * @locale: locale created with xsltNewLocale
 * @string: UTF-8 string to transform
 *
 * Transforms a string according to locale. The transformed string must then be
 * compared with xsltLocaleStrcmp and freed with xmlFree.
 *
 * Returns the transformed string or NULL on error
 */
xsltLocaleChar *
xsltStrxfrm(xsltLocale locale, const xmlChar *string)
{
#ifdef XSLT_LOCALE_NONE
    return(NULL);
#else
    size_t xstrlen, r;
    xsltLocaleChar *xstr;
    
#ifdef XSLT_LOCALE_XLOCALE
    xstrlen = strxfrm_l(NULL, (const char *)string, 0, locale) + 1;
    xstr = (xsltLocaleChar *) xmlMalloc(xstrlen);
    if (xstr == NULL) {
	xsltTransformError(NULL, NULL, NULL,
	    "xsltStrxfrm : out of memory error\n");
	return(NULL);
    }

    r = strxfrm_l((char *)xstr, (const char *)string, xstrlen, locale);
#endif

#ifdef XSLT_LOCALE_WINAPI
    xstrlen = MultiByteToWideChar(CP_UTF8, 0, string, -1, NULL, 0);
    if (xstrlen == 0) {
        xsltTransformError(NULL, NULL, NULL, "xsltStrxfrm : MultiByteToWideChar check failed\n");
        return(NULL);
    }
    xstr = (xsltLocaleChar*) xmlMalloc(xstrlen * sizeof(xsltLocaleChar));
    if (xstr == NULL) {
        xsltTransformError(NULL, NULL, NULL, "xsltStrxfrm : out of memory\n");
        return(NULL);
    }
    r = MultiByteToWideChar(CP_UTF8, 0, string, -1, xstr, xstrlen);
    if (r == 0) {
        xsltTransformError(NULL, NULL, NULL, "xsltStrxfrm : MultiByteToWideChar failed\n");
        xmlFree(xstr);
        return(NULL);
    }
    return(xstr);
#endif /* XSLT_LOCALE_WINAPI */

    if (r >= xstrlen) {
	xsltTransformError(NULL, NULL, NULL, "xsltStrxfrm : strxfrm failed\n");
        xmlFree(xstr);
        return(NULL);
    }

    return(xstr);
#endif /* XSLT_LOCALE_NONE */
}

/**
 * xsltLocaleStrcmp:
 * @locale: a locale identifier
 * @str1: a string transformed with xsltStrxfrm
 * @str2: a string transformed with xsltStrxfrm
 *
 * Compares two strings transformed with xsltStrxfrm
 *
 * Returns a value < 0 if str1 sorts before str2,
 *         a value > 0 if str1 sorts after str2,
 *         0 if str1 and str2 are equal wrt sorting
 */
int
xsltLocaleStrcmp(xsltLocale locale, const xsltLocaleChar *str1, const xsltLocaleChar *str2) {
    (void)locale;
#ifdef XSLT_LOCALE_WINAPI
{
    int ret;
    if (str1 == str2) return(0);
    if (str1 == NULL) return(-1);
    if (str2 == NULL) return(1);
    ret = CompareStringW(locale, 0, str1, -1, str2, -1);
    if (ret == 0) {
        xsltTransformError(NULL, NULL, NULL, "xsltLocaleStrcmp : CompareStringW fail\n");
        return(0);
    }
    return(ret - 2);
}
#else
    return(xmlStrcmp(str1, str2));
#endif
}

#ifdef XSLT_LOCALE_WINAPI
/**
 * xsltCountSupportedLocales:
 * @lcid: not used
 *
 * callback used to count locales
 *
 * Returns TRUE
 */
BOOL CALLBACK
xsltCountSupportedLocales(LPSTR lcid) {
    (void) lcid;
    ++xsltLocaleListSize;
    return(TRUE);
}

/**
 * xsltIterateSupportedLocales:
 * @lcid: not used
 *
 * callback used to track locales
 *
 * Returns TRUE if not at the end of the array
 */
BOOL CALLBACK
xsltIterateSupportedLocales(LPSTR lcid) {
    static int count = 0;
    xmlChar    iso639lang [XSLTMAX_ISO639LANGLEN  +1];
    xmlChar    iso3136ctry[XSLTMAX_ISO3166CNTRYLEN+1];
    int        k, l;
    xsltRFC1766Info *p = xsltLocaleList + count;

    k = sscanf(lcid, "%lx", (long*)&p->lcid);
    if (k < 1) goto end;
    /*don't count terminating null character*/
    k = GetLocaleInfoA(p->lcid, LOCALE_SISO639LANGNAME , iso639lang , sizeof(iso639lang ));
    if (--k < 1) goto end;
    l = GetLocaleInfoA(p->lcid, LOCALE_SISO3166CTRYNAME, iso3136ctry, sizeof(iso3136ctry));
    if (--l < 1) goto end;

    {  /*fill results*/
	xmlChar    *q = p->tag;
	memcpy(q, iso639lang, k);
	q += k;
	*q++ = '-';
	memcpy(q, iso3136ctry, l);
	q += l;
	*q = '\0';
    }
    ++count;
end:
    return((count < xsltLocaleListSize) ? TRUE : FALSE);
}


static void
xsltEnumSupportedLocales(void) {
    xmlRMutexLock(xsltLocaleMutex);
    if (xsltLocaleListSize <= 0) {
	size_t len;

	EnumSystemLocalesA(xsltCountSupportedLocales, LCID_SUPPORTED);

	len = xsltLocaleListSize * sizeof(xsltRFC1766Info);
	xsltLocaleList = xmlMalloc(len);
	memset(xsltLocaleList, 0, len);
	EnumSystemLocalesA(xsltIterateSupportedLocales, LCID_SUPPORTED);
    }
    xmlRMutexUnlock(xsltLocaleMutex);
}

#endif /*def XSLT_LOCALE_WINAPI*/