iconv.c   [plain text]


/***********************************************************************
*                                                                      *
*               This software is part of the ast package               *
*          Copyright (c) 1985-2007 AT&T Intellectual Property          *
*                      and is licensed under the                       *
*                  Common Public License, Version 1.0                  *
*                    by AT&T Intellectual Property                     *
*                                                                      *
*                A copy of the License is available at                 *
*            http://www.opensource.org/licenses/cpl1.0.txt             *
*         (with md5 checksum 059e8cd6165cb4c31e351f2b69388fd9)         *
*                                                                      *
*              Information and Software Systems Research               *
*                            AT&T Research                             *
*                           Florham Park NJ                            *
*                                                                      *
*                 Glenn Fowler <gsf@research.att.com>                  *
*                  David Korn <dgk@research.att.com>                   *
*                   Phong Vo <kpv@research.att.com>                    *
*                                                                      *
***********************************************************************/
#pragma prototyped

/*
 * Glenn Fowler
 * AT&T Research
 *
 * iconv intercept
 * minimally provides { utf*<=>bin ascii<=>ebcdic* }
 */

#include <ast.h>
#include <dirent.h>

#define DEBUG_TRACE		0
#define _ICONV_LIST_PRIVATE_

#include <ccode.h>
#include <ctype.h>
#include <iconv.h>

#include "lclib.h"

#if !_lib_iconv_open

#define _ast_iconv_t		iconv_t
#define _ast_iconv_f		iconv_f
#define _ast_iconv_list_t	iconv_list_t
#define _ast_iconv_open		iconv_open
#define _ast_iconv		iconv
#define _ast_iconv_close	iconv_close
#define _ast_iconv_list		iconv_list
#define _ast_iconv_move		iconv_move
#define _ast_iconv_name		iconv_name
#define _ast_iconv_write	iconv_write

#endif

#ifndef E2BIG
#define E2BIG			ENOMEM
#endif
#ifndef EILSEQ
#define EILSEQ			EIO
#endif

#define RETURN(e,n,fn) \
	if (*fn && !e) e = E2BIG; \
	if (e) { errno = e; return (size_t)(-1); } \
	return n;

typedef struct Map_s
{
	char*			name;
	const unsigned char*	map;
	_ast_iconv_f		fun;
	int			index;
} Map_t;

typedef struct Conv_s
{
	iconv_t			cvt;
	char*			buf;
	size_t			size;
	Map_t			from;
	Map_t			to;
} Conv_t;

static Conv_t*			freelist[4];
static int			freeindex;

static const char		name_local[] = "local";
static const char		name_native[] = "native";

static const _ast_iconv_list_t	codes[] =
{
	{
	"utf",
	"un|unicode|utf",
	"multibyte 8-bit unicode",
	"UTF-%s",
	"8",
	CC_UTF,
	},

	{
	"ume",
	"um|ume|utf?(-)7",
	"multibyte 7-bit unicode",
	"UTF-7",
	0,
	CC_UME,
	},

	{
	"euc",
	"(big|euc)*",
	"euc family",
	0,
	0,
	CC_ICONV,
	},

	{
	"dos",
	"dos?(-)?(855)",
	"dos code page",
	"DOS855",
	0,
	CC_ICONV,
	},

	{
	"ucs",
	"ucs?(-)?(2)?(be)|utf-16?(be)",
	"unicode runes",
	"UCS-%s",
	"2",
	CC_UCS,
	},

	{
	"ucs-le",
	"ucs?(-)?(2)le|utf-16le",
	"little endian unicode runes",
	"UCS-%sLE",
	"2",
	CC_SCU,
	},

	{ 0 },
};

#if _UWIN

#include <ast_windows.h>

#ifndef CP_UCS2
#define CP_UCS2	0x0000
#endif

static char	_win_maps[] = "/reg/local_machine/SOFTWARE/Classes/MIME/Database/Charset";

/*
 * return the codeset index given its name or alias
 * the map is in the what? oh, the registry
 */

static int
_win_codeset(const char* name)
{
	register char*	s;
	char*		e;
	int		n;
	Sfio_t*		sp;
	char		aka[128];
	char		tmp[128];

#if DEBUG_TRACE
error(DEBUG_TRACE, "AHA#%d _win_codeset name=%s", __LINE__, name);
#endif
	if (name == name_native)
		return CP_ACP;
	if (!strcasecmp(name, "utf") || !strcasecmp(name, "utf8") || !strcasecmp(name, "utf-8"))
		return CP_UTF8;
	if (!strcasecmp(name, "ucs") || !strcasecmp(name, "ucs2") || !strcasecmp(name, "ucs-2"))
		return CP_UCS2;
	if (name[0] == '0' && name[1] == 'x' && (n = strtol(name, &e, 0)) > 0 && !*e)
		return n;
	for (;;)
	{
		sfsprintf(tmp, sizeof(tmp), "%s/%s", _win_maps, name);
		if (!(sp = sfopen(0, tmp, "r")))
		{
			s = (char*)name;
			if ((s[0] == 'c' || s[0] == 'C') && (s[1] == 'p' || s[1] == 'P'))
				s += 2;
			if (!isdigit(s[0]))
				break;
			sfsprintf(tmp, sizeof(tmp), "%s/windows-%s", _win_maps, s);
			if (!(sp = sfopen(0, tmp, "r")))
				break;
		}
		for (;;)
		{
			if (!(s = sfgetr(sp, '\n', 0)))
			{
				sfclose(sp);
				return -1;
			}
			if (!strncasecmp(s, "AliasForCharSet=", 16))
			{
				n = sfvalue(sp) - 17;
				s += 16;
				if (n >= sizeof(aka))
					n = sizeof(aka) - 1;
				memcpy(aka, s, n);
				aka[n] = 0;
				sfclose(sp);
				name = (const char*)aka;
				break;
			}
			if (!strncasecmp(s, "CodePage=", 9))
			{
				s += 9;
				n = strtol(s, 0, 0);
				sfclose(sp);
				return n;
			}
		}
	}
	return -1;
}

/*
 * get and check the codeset indices
 */

static _ast_iconv_t
_win_iconv_open(register Conv_t* cc, const char* t, const char* f)
{
#if DEBUG_TRACE
error(DEBUG_TRACE, "AHA#%d _win_iconv_open f=%s t=%s\n", __LINE__, f, t);
#endif
	if ((cc->from.index = _win_codeset(f)) < 0)
		return (_ast_iconv_t)(-1);
	if ((cc->to.index = _win_codeset(t)) < 0)
		return (_ast_iconv_t)(-1);
#if DEBUG_TRACE
error(DEBUG_TRACE, "AHA#%d _win_iconv_open f=0x%04x t=0x%04x\n", __LINE__, cc->from.index, cc->to.index);
#endif
	return (_ast_iconv_t)cc;
}

/*
 * even though the indices already check out
 * they could still be rejected
 */

static size_t
_win_iconv(_ast_iconv_t cd, char** fb, size_t* fn, char** tb, size_t* tn)
{
	Conv_t*	cc = (Conv_t*)cd;
	size_t	un;
	size_t	tz;
	size_t	fz;
	size_t	bz;
	size_t	pz;
	size_t	oz;
	LPWSTR	ub;

#if DEBUG_TRACE
error(DEBUG_TRACE, "AHA#%d _win_iconv from=0x%04x to=0x%04x\n", __LINE__, cc->from.index, cc->to.index);
#endif
	if (cc->from.index == cc->to.index)
	{
		/*
		 * easy
		 */

		fz = tz = (*fn < *tn) ? *fn : *tn;
		memcpy(*tb, *fb, fz);
	}
	else
	{
		ub = 0;
		un = *fn;

		/*
		 * from => ucs-2
		 */

		if (cc->to.index == CP_UCS2)
		{
			if ((tz = MultiByteToWideChar(cc->from.index, 0, (LPCSTR)*fb, (int)*fn, (LPWSTR)*tb, *tn)) && tz <= *tn)
			{
				fz = *fn;
				tz *= sizeof(WCHAR);
			}
			else
			{
				/*
				 * target too small
				 * binary search on input size to make it fit
				 */

				oz = 0;
				pz = *fn / 2;
				fz = *fn - pz;
				for (;;)
				{
					while (!(tz = MultiByteToWideChar(cc->from.index, 0, (LPCSTR)*fb, (int)fz, (LPWSTR)*tb, 0)))
						if (++fz >= *fn)
							goto nope;
					tz *= sizeof(WCHAR);
					if (tz == *tn)
						break;
					if (!(pz /= 2))
					{
						if (!(fz = oz))
							goto nope;
						break;
					}
					if (tz > *tn)
						fz -= pz;
					else
					{
						oz = fz;
						fz += pz;
					}
				}
			}
		}
		else
		{
			if (cc->from.index == CP_UCS2)
			{
				un = *fn / sizeof(WCHAR);
				ub = (LPWSTR)*fb;
			}
			else if (!(un = MultiByteToWideChar(cc->from.index, 0, (LPCSTR)*fb, (int)*fn, (LPWSTR)*tb, 0)))
				goto nope;
			else if (!(ub = (LPWSTR)malloc(un * sizeof(WCHAR))))
				goto nope;
			else if (!(un = MultiByteToWideChar(cc->from.index, 0, (LPCSTR)*fb, (int)*fn, (LPWSTR)ub, un)))
				goto nope;

			/*
			 * ucs-2 => to
			 */

			if (tz = WideCharToMultiByte(cc->to.index, 0, (LPCWSTR)ub, un, *tb, *tn, 0, 0))
				fz = *fn;
			else
			{
				/*
				 * target too small
				 * binary search on input size to make it fit
				 */

				oz = 0;
				pz = *fn / 2;
				bz = *fn - pz;
				for (;;)
				{
					while (!(fz = MultiByteToWideChar(cc->from.index, 0, (LPCSTR)*fb, (int)bz, (LPWSTR)ub, un)))
						if (++bz > *fn)
							goto nope;
					if (!(tz = WideCharToMultiByte(cc->to.index, 0, (LPCWSTR)ub, fz, *tb, 0, 0, 0)))
						goto nope;
					if (tz == *tn)
						break;
					if (!(pz /= 2))
					{
						if (!(fz = oz))
							goto nope;
						break;
					}
					if (tz > *tn)
						bz -= pz;
					else
					{
						oz = bz;
						bz += pz;
					}
				}
				if (!(tz = WideCharToMultiByte(cc->to.index, 0, (LPCWSTR)ub, fz, *tb, tz, 0, 0)))
					goto nope;
#if DEBUG_TRACE
error(DEBUG_TRACE, "AHA#%d _win_iconv *fn=%u fz=%u[%u] *tn=%u tz=%u\n", __LINE__, *fn, fz, fz * sizeof(WCHAR), *tn, tz);
#endif
#if 0
				fz *= sizeof(WCHAR);
#endif
			}
			if (ub != (LPWSTR)*fb)
				free(ub);
		}
	}
	*fb += fz;
	*fn -= fz;
	*tb += tz;
	*tn -= tz;
	return fz;
 nope:
	if (ub && ub != (LPWSTR)*fb)
		free(ub);
	errno = EINVAL;
	return (size_t)(-1);
}

#endif

/*
 * return canonical character code set name for m
 * if b!=0 then canonical name placed in b of size n
 * <ccode.h> index returned
 */

int
_ast_iconv_name(register const char* m, register char* b, size_t n)
{
	register const _ast_iconv_list_t*	cp;
	const _ast_iconv_list_t*		bp;
	register int				c;
	register char*				e;
	int					sub[2];
	char					buf[16];
#if DEBUG_TRACE
	char*					o;
#endif

	if (!b)
	{
		b = buf;
		n = sizeof(buf);
	}
#if DEBUG_TRACE
	o = b;
#endif
	e = b + n - 1;
	bp = 0;
	n = 0;
	cp = ccmaplist(NiL);
#if DEBUG_TRACE
if (error_info.trace < DEBUG_TRACE) sfprintf(sfstderr, "%s: debug-%d: AHA%d _ast_iconv_name m=\"%s\"\n", error_info.id, error_info.trace, __LINE__, m);
#endif
	for (;;)
	{
#if DEBUG_TRACE
if (error_info.trace < DEBUG_TRACE) sfprintf(sfstderr, "%s: debug-%d: AHA%d _ast_iconv_name n=%d bp=%p cp=%p ccode=%d name=\"%s\"\n", error_info.id, error_info.trace, __LINE__, n, bp, cp, cp->ccode, cp->name);
#endif
		if (strgrpmatch(m, cp->match, sub, elementsof(sub) / 2, STR_MAXIMAL|STR_LEFT|STR_ICASE))
		{
			if (!(c = m[sub[1]]))
			{
				bp = cp;
				break;
			}
			if (sub[1] > n && !isalpha(c))
			{
				bp = cp;
				n = sub[1];
			}
		}
		if (cp->ccode < 0)
		{
			if (!(++cp)->name)
				break;
		}
		else if (!(cp = (const _ast_iconv_list_t*)ccmaplist((_ast_iconv_list_t*)cp)))
			cp = codes;
	}
	if (cp = bp)
	{
		if (cp->canon)
		{
			if (cp->index)
			{
				for (m += sub[1]; *m && !isalnum(*m); m++);
				if (!isdigit(*m))
					m = cp->index;
			}
			else
				m = "1";
			b += sfsprintf(b, e - b, cp->canon, m);
		}
		else if (cp->ccode == CC_NATIVE)
		{
			if ((locales[AST_LC_CTYPE]->flags & LC_default) || !locales[AST_LC_CTYPE]->charset || !(m = locales[AST_LC_CTYPE]->charset->code) || streq(m, "iso8859-1"))
				switch (CC_NATIVE)
				{
				case CC_EBCDIC:
					m = (const char*)"EBCDIC";
					break;
				case CC_EBCDIC_I:
					m = (const char*)"EBCDIC-I";
					break;
				case CC_EBCDIC_O:
					m = (const char*)"EBCDIC-O";
					break;
				default:
					m = (const char*)"ISO-8859-1";
					break;
				}
			b += sfsprintf(b, e - b, "%s", m);
		}
		*b = 0;
#if DEBUG_TRACE
if (error_info.trace < DEBUG_TRACE) sfprintf(sfstderr, "%s: debug-%d: AHA%d _ast_iconv_name ccode=%d canon=\"%s\"\n", error_info.id, error_info.trace, __LINE__, cp->ccode, o);
#endif
		return cp->ccode;
	}
	while (b < e && (c = *m++))
	{
		if (islower(c))
			c = toupper(c);
		*b++ = c;
	}
	*b = 0;
#if DEBUG_TRACE
if (error_info.trace < DEBUG_TRACE) sfprintf(sfstderr, "%s: debug-%d: AHA%d _ast_iconv_name ccode=%d canon=\"%s\"\n", error_info.id, error_info.trace, __LINE__, CC_ICONV, o);
#endif
	return CC_ICONV;
}

/*
 * convert utf-8 to bin
 */

static size_t
utf2bin(_ast_iconv_t cd, char** fb, size_t* fn, char** tb, size_t* tn)
{
	register unsigned char*		f;
	register unsigned char*		fe;
	register unsigned char*		t;
	register unsigned char*		te;
	register unsigned char*		p;
	register int			c;
	register int			w;
	size_t				n;
	int				e;

	e = 0;
	f = (unsigned char*)(*fb);
	fe = f + (*fn);
	t = (unsigned char*)(*tb);
	te = t + (*tn);
	while (t < te && f < fe)
	{
		p = f;
		c = *f++;
		if (c & 0x80)
		{
			if (!(c & 0x40))
			{
				f = p;
				e = EILSEQ;
				break;
			}
			if (c & 0x20)
			{
				w = (c & 0x0F) << 12;
				if (f >= fe)
				{
					f = p;
					e = EINVAL;
					break;
				}
				c = *f++;
				if (c & 0x40)
				{
					f = p;
					e = EILSEQ;
					break;
				}
				w |= (c & 0x3F) << 6;
			}
			else
				w = (c & 0x1F) << 6;
			if (f >= fe)
			{
				f = p;
				e = EINVAL;
				break;
			}
			c = *f++;
			w |= (c & 0x3F);
		}
		else
			w = c;
		*t++ = w;
	}
	*fn -= (char*)f - (*fb);
	*fb = (char*)f;
	*tn -= (n = (char*)t - (*tb));
	*tb = (char*)t;
	RETURN(e, n, fn);
}

/*
 * convert bin to utf-8
 */

static size_t
bin2utf(_ast_iconv_t cd, char** fb, size_t* fn, char** tb, size_t* tn)
{
	register unsigned char*		f;
	register unsigned char*		fe;
	register unsigned char*		t;
	register unsigned char*		te;
	register int			c;
	wchar_t				w;
	size_t				n;
	int				e;

	e = 0;
	f = (unsigned char*)(*fb);
	fe = f + (*fn);
	t = (unsigned char*)(*tb);
	te = t + (*tn);
	while (f < fe && t < te)
	{
		if (!mbwide())
		{
			c = 1;
			w = *f;
		}
		else if ((c = (*_ast_info.mb_towc)(&w, (char*)f, fe - f)) < 0)
		{
			e = EINVAL;
			break;
		}
		else if (!c)
			c = 1;
		if (!(w & ~0x7F))
			*t++ = w;
		else
		{
			if (!(w & ~0x7FF))
			{
				if (t >= (te - 2))
				{
					e = E2BIG;
					break;
				}
				*t++ = 0xC0 + (w >> 6);
			}
			else if (!(w & ~0xffff))
			{
				if (t >= (te - 3))
				{
					e = E2BIG;
					break;
				}
				*t++ = 0xE0 + (w >> 12);
				*t++ = 0x80 + ((w >> 6 ) & 0x3F);
			}
			else
			{
				e = EILSEQ;
				break;
			}
			*t++ = 0x80 + (w & 0x3F);
		}
		f += c;
	}
	*fn -= (n = (char*)f - (*fb));
	*fb = (char*)f;
	*tn -= (char*)t - (*tb);
	*tb = (char*)t;
	RETURN(e, n, fn);
}

static const unsigned char	ume_D[] =
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789'(),-./:?!\"#$%&*;<=>@[]^_`{|} \t\n";

static const unsigned char	ume_M[] =
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";

static unsigned char		ume_d[UCHAR_MAX+1];

static unsigned char		ume_m[UCHAR_MAX+1];

#define NOE			0xFF
#define UMEINIT()		(ume_d[ume_D[0]]?0:umeinit())

/*
 * initialize the ume tables
 */

static int
umeinit(void)
{
	register const unsigned char*	s;
	register int			i;
	register int			c;

	if (!ume_d[ume_D[0]])
	{
		s = ume_D; 
		while (c = *s++)
			ume_d[c] = 1;
		memset(ume_m, NOE, sizeof(ume_m));
		for (i = 0; c = ume_M[i]; i++)
			ume_m[c] = i;
	}
	return 0;
}

/*
 * convert utf-7 to bin
 */

static size_t
ume2bin(_ast_iconv_t cd, char** fb, size_t* fn, char** tb, size_t* tn)
{
	register unsigned char*		f;
	register unsigned char*		fe;
	register unsigned char*		t;
	register unsigned char*		te;
	register unsigned char*		p;
	register int			s;
	register int			c;
	register int			w;
	size_t				n;
	int				e;

	e = 0;
	UMEINIT();
	f = (unsigned char*)(*fb);
	fe = f + (*fn);
	t = (unsigned char*)(*tb);
	te = t + (*tn);
	s = 0;
	while (f < fe && t < te)
	{
		p = f;
		c = *f++;
		if (s)
		{
			if (c == '-' && s > 1)
				s = 0;
			else if ((w = ume_m[c]) == NOE)
			{
				s = 0;
				*t++ = c;
			}
			else if (f >= (fe - 2))
			{
				f = p;
				e = EINVAL;
				break;
			}
			else
			{
				s = 2;
				w = (w << 6) | ume_m[*f++];
				w = (w << 6) | ume_m[*f++];
				if (!(w & ~0xFF))
					*t++ = w;
				else if (t >= (te - 1))
				{
					f = p;
					e = E2BIG;
					break;
				}
				else
				{
					*t++ = (w >> 8) & 0xFF;
					*t++ = w & 0xFF;
				}
			}
		}
		else if (c == '+')
			s = 1;
		else
			*t++ = c;
	}
	*fn -= (char*)f - (*fb);
	*fb = (char*)f;
	*tn -= (n = (char*)t - (*tb));
	*tb = (char*)t;
	RETURN(e, n, fn);
}

/*
 * convert bin to utf-7
 */

static size_t
bin2ume(_ast_iconv_t cd, char** fb, size_t* fn, char** tb, size_t* tn)
{
	register unsigned char*		f;
	register unsigned char*		fe;
	register unsigned char*		t;
	register unsigned char*		te;
	register int			c;
	register int			s;
	wchar_t				w;
	size_t				n;
	int				e;

	e = 0;
	UMEINIT();
	f = (unsigned char*)(*fb);
	fe = f + (*fn);
	t = (unsigned char*)(*tb);
	te = t + (*tn);
	s = 0;
	while (f < fe && t < (te - s))
	{
		if (!mbwide())
		{
			c = 1;
			w = *f;
		}
		else if ((c = (*_ast_info.mb_towc)(&w, (char*)f, fe - f)) < 0)
		{
			e = EINVAL;
			break;
		}
		else if (!c)
			c = 1;
		if (!(w & ~0x7F) && ume_d[w])
		{
			if (s)
			{
				s = 0;
				*t++ = '-';
			}
			*t++ = w;
		}
		else if (t >= (te - (4 + s)))
		{
			e = E2BIG;
			break;
		}
		else
		{
			if (!s)
			{
				s = 1;
				*t++ = '+';
			}
			*t++ = ume_M[(w >> 12) & 0x3F];
			*t++ = ume_M[(w >> 6) & 0x3F];
			*t++ = ume_M[w & 0x3F];
		}
		f += c;
	}
	if (s)
		*t++ = '-';
	*fn -= (n = (char*)f - (*fb));
	*fb = (char*)f;
	*tn -= (char*)t - (*tb);
	*tb = (char*)t;
	RETURN(e, n, fn);
}

/*
 * convert ucs-2 to bin with no byte swap
 */

static size_t
ucs2bin(_ast_iconv_t cd, char** fb, size_t* fn, char** tb, size_t* tn)
{
	register unsigned char*		f;
	register unsigned char*		fe;
	register unsigned char*		t;
	register unsigned char*		te;
	register int			w;
	size_t				n;
	int				e;

	e = 0;
	f = (unsigned char*)(*fb);
	fe = f + (*fn);
	t = (unsigned char*)(*tb);
	te = t + (*tn);
	while (f < (fe - 1) && t < te)
	{
		w = *f++;
		w = (w << 8) | *f++;
		if (!(w & ~0xFF))
			*t++ = w;
		else if (t >= (te - 1))
		{
			f -= 2;
			e = E2BIG;
			break;
		}
		else
		{
			*t++ = (w >> 8) & 0xFF;
			*t++ = w & 0xFF;
		}
	}
	*fn -= (char*)f - (*fb);
	*fb = (char*)f;
	*tn -= (n = (char*)t - (*tb));
	*tb = (char*)t;
	RETURN(e, n, fn);
}

/*
 * convert bin to ucs-2 with no byte swap
 */

static size_t
bin2ucs(_ast_iconv_t cd, char** fb, size_t* fn, char** tb, size_t* tn)
{
	register unsigned char*		f;
	register unsigned char*		fe;
	register unsigned char*		t;
	register unsigned char*		te;
	register int			c;
	wchar_t				w;
	size_t				n;
	int				e;

	e = 0;
	f = (unsigned char*)(*fb);
	fe = f + (*fn);
	t = (unsigned char*)(*tb);
	te = t + (*tn);
	while (f < fe && t < (te - 1))
	{
		if (!mbwide())
		{
			c = 1;
			w = *f;
		}
		if ((c = (*_ast_info.mb_towc)(&w, (char*)f, fe - f)) < 0)
		{
			e = EINVAL;
			break;
		}
		else if (!c)
			c = 1;
		*t++ = (w >> 8) & 0xFF;
		*t++ = w & 0xFF;
		f += c;
	}
	*fn -= (n = (char*)f - (*fb));
	*fb = (char*)f;
	*tn -= (char*)t - (*tb);
	*tb = (char*)t;
	RETURN(e, n, fn);
}

/*
 * convert ucs-2 to bin with byte swap
 */

static size_t
scu2bin(_ast_iconv_t cd, char** fb, size_t* fn, char** tb, size_t* tn)
{
	register unsigned char*		f;
	register unsigned char*		fe;
	register unsigned char*		t;
	register unsigned char*		te;
	register int			w;
	size_t				n;
	int				e;

	e = 0;
	f = (unsigned char*)(*fb);
	fe = f + (*fn);
	t = (unsigned char*)(*tb);
	te = t + (*tn);
	while (f < (fe - 1) && t < te)
	{
		w = *f++;
		w = w | (*f++ << 8);
		if (!(w & ~0xFF))
			*t++ = w;
		else if (t >= (te - 1))
		{
			f -= 2;
			e = E2BIG;
			break;
		}
		else
		{
			*t++ = (w >> 8) & 0xFF;
			*t++ = w & 0xFF;
		}
	}
	*fn -= (char*)f - (*fb);
	*fb = (char*)f;
	*tn -= (n = (char*)t - (*tb));
	*tb = (char*)t;
	RETURN(e, n, fn);
}

/*
 * convert bin to ucs-2 with byte swap
 */

static size_t
bin2scu(_ast_iconv_t cd, char** fb, size_t* fn, char** tb, size_t* tn)
{
	register unsigned char*		f;
	register unsigned char*		fe;
	register unsigned char*		t;
	register unsigned char*		te;
	register int			c;
	wchar_t				w;
	size_t				n;
	int				e;

	e = 0;
	f = (unsigned char*)(*fb);
	fe = f + (*fn);
	t = (unsigned char*)(*tb);
	te = t + (*tn);
	while (f < fe && t < (te - 1))
	{
		if (!mbwide())
		{
			c = 1;
			w = *f;
		}
		else if ((c = (*_ast_info.mb_towc)(&w, (char*)f, fe - f)) < 0)
		{
			e = EINVAL;
			break;
		}
		else if (!c)
			c = 1;
		*t++ = w & 0xFF;
		*t++ = (w >> 8) & 0xFF;
		f += c;
	}
	*fn -= (n = (char*)f - (*fb));
	*fb = (char*)f;
	*tn -= (char*)t - (*tb);
	*tb = (char*)t;
	RETURN(e, n, fn);
}

/*
 * open a character code conversion map from f to t
 */

_ast_iconv_t
_ast_iconv_open(const char* t, const char* f)
{
	register Conv_t*	cc;
	int			fc;
	int			tc;
	int			i;

	char			fr[64];
	char			to[64];

#if DEBUG_TRACE
error(DEBUG_TRACE, "AHA#%d _ast_iconv_open f=%s t=%s\n", __LINE__, f, t);
#endif
	if (!t || !*t || *t == '-' && !*(t + 1) || !strcasecmp(t, name_local) || !strcasecmp(t, name_native))
		t = name_native;
	if (!f || !*f || *f == '-' && !*(f + 1) || !strcasecmp(t, name_local) || !strcasecmp(f, name_native))
		f = name_native;

	/*
	 * the ast identify is always (iconv_t)(0)
	 */

	if (t == f)
		return (iconv_t)(0);
	fc = _ast_iconv_name(f, fr, sizeof(fr));
	tc = _ast_iconv_name(t, to, sizeof(to));
#if DEBUG_TRACE
error(DEBUG_TRACE, "AHA#%d _ast_iconv_open f=%s:%s:%d t=%s:%s:%d\n", __LINE__, f, fr, fc, t, to, tc);
#endif
	if (fc != CC_ICONV && fc == tc || streq(fr, to))
		return (iconv_t)(0);

	/*
	 * first check the free list
	 */

	for (i = 0; i < elementsof(freelist); i++)
		if ((cc = freelist[i]) && streq(to, cc->to.name) && streq(fr, cc->from.name))
		{
			freelist[i] = 0;
#if _lib_iconv_open
			/*
			 * reset the shift state if any
			 */

			if (cc->cvt != (iconv_t)(-1))
				iconv(cc->cvt, NiL, NiL, NiL, NiL);
#endif
			return cc;
		}

	/*
	 * allocate a new one
	 */

	if (!(cc = newof(0, Conv_t, 1, strlen(to) + strlen(fr) + 2)))
		return (iconv_t)(-1);
	cc->to.name = (char*)(cc + 1);
	cc->from.name = strcopy(cc->to.name, to) + 1;
	strcpy(cc->from.name, fr);
	cc->cvt = (iconv_t)(-1);

	/*
	 * 8 bit maps are the easiest
	 */

	if (fc >= 0 && tc >= 0)
		cc->from.map = ccmap(fc, tc);
#if _lib_iconv_open
	else if ((cc->cvt = iconv_open(to, fr)) != (iconv_t)(-1))
		cc->from.fun = (_ast_iconv_f)iconv;
#endif
#if _UWIN
	else if ((cc->cvt = _win_iconv_open(cc, to, fr)) != (_ast_iconv_t)(-1))
		cc->from.fun = (_ast_iconv_f)_win_iconv;
#endif
	else
	{
		switch (fc)
		{
		case CC_UTF:
			cc->from.fun = utf2bin;
			break;
		case CC_UME:
			cc->from.fun = ume2bin;
			break;
		case CC_UCS:
			cc->from.fun = ucs2bin;
			break;
		case CC_SCU:
			cc->from.fun = scu2bin;
			break;
		case CC_ASCII:
			break;
		default:
			if (fc < 0)
				goto nope;
			cc->from.map = ccmap(fc, CC_ASCII);
			break;
		}
		switch (tc)
		{
		case CC_UTF:
			cc->to.fun = bin2utf;
			break;
		case CC_UME:
			cc->to.fun = bin2ume;
			break;
		case CC_UCS:
			cc->to.fun = bin2ucs;
			break;
		case CC_SCU:
			cc->to.fun = bin2scu;
			break;
		case CC_ASCII:
			break;
		default:
			if (tc < 0)
				goto nope;
			cc->to.map = ccmap(CC_ASCII, tc);
			break;
		}
	}
	return (iconv_t)cc;
 nope:
	return (iconv_t)(-1);
}

/*
 * close a character code conversion map
 */

int
_ast_iconv_close(_ast_iconv_t cd)
{
	Conv_t*	cc;
	Conv_t*	oc;
	int	i;
	int	r = 0;

	if (cd == (_ast_iconv_t)(-1))
		return -1;
	if (!(cc = (Conv_t*)cd))
		return 0;

	/*
	 * add to the free list
	 */

	i = freeindex;
	for (;;)
	{
		if (++ i >= elementsof(freelist))
			i = 0;
		if (!freelist[i])
			break;
		if (i == freeindex)
		{
			if (++ i >= elementsof(freelist))
				i = 0;

			/*
			 * close the oldest
			 */

			if (oc = freelist[i])
			{
#if _lib_iconv_open
				if (oc->cvt != (iconv_t)(-1))
					r = iconv_close(oc->cvt);
#endif
				if (oc->buf)
					free(oc->buf);
				free(oc);
			}
			break;
		}
	}
	freelist[freeindex = i] = cc;
	return r;
}

/*
 * copy *fb size *fn to *tb size *tn
 * fb,fn tb,tn updated on return
 */

size_t
_ast_iconv(_ast_iconv_t cd, char** fb, size_t* fn, char** tb, size_t* tn)
{
	Conv_t*				cc = (Conv_t*)cd;
	register unsigned char*		f;
	register unsigned char*		t;
	register unsigned char*		e;
	register const unsigned char*	m;
	register size_t			n;
	char*				b;
	char*				tfb;
	size_t				tfn;
	size_t				i;

	if (!fb || !*fb)
	{
		/* TODO: reset to the initial state */
		if (!tb || !*tb)
			return 0;
		/* TODO: write the initial state shift sequence */
		return 0;
	}
	n = *tn;
	if (cc)
	{
		if (cc->from.fun)
		{
			if (cc->to.fun)
			{
				if (!cc->buf && !(cc->buf = oldof(0, char, cc->size = SF_BUFSIZE, 0)))
				{
					errno = ENOMEM;
					return -1;
				}
				b = cc->buf;
				i = cc->size;
				tfb = *fb;
				tfn = *fn;
				if ((*cc->from.fun)(cc->cvt, &tfb, &tfn, &b, &i) == (size_t)(-1))
					return -1;
				tfn = b - cc->buf;
				tfb = cc->buf;
				n = (*cc->to.fun)(cc->cvt, &tfb, &tfn, tb, tn);
				i = tfb - cc->buf;
				*fb += i;
				*fn -= i;
				return n;
			}
			if ((*cc->from.fun)(cc->cvt, fb, fn, tb, tn) == (size_t)(-1))
				return -1;
			n -= *tn;
			if (m = cc->to.map)
			{
				e = (unsigned char*)(*tb);
				for (t = e - n; t < e; t++)
					*t = m[*t];
			}
			return n;
		}
		else if (cc->to.fun)
		{
			if (!(m = cc->from.map))
				return (*cc->to.fun)(cc->cvt, fb, fn, tb, tn);
			if (!cc->buf && !(cc->buf = oldof(0, char, cc->size = SF_BUFSIZE, 0)))
			{
				errno = ENOMEM;
				return -1;
			}
			if ((n = *fn) > cc->size)
				n = cc->size;
			f = (unsigned char*)(*fb);
			e = f + n;
			t = (unsigned char*)(b = cc->buf);
			while (f < e)
				*t++ = m[*f++];
			n = (*cc->to.fun)(cc->cvt, &b, fn, tb, tn);
			*fb += b - cc->buf;
			return n;
		}
	}
	if (n > *fn)
		n = *fn;
	if (cc && (m = cc->from.map))
	{
		f = (unsigned char*)(*fb);
		e = f + n;
		t = (unsigned char*)(*tb);
		while (f < e)
			*t++ = m[*f++];
	}
	else
		memcpy(*tb, *fb, n);
	*fb += n;
	*fn -= n;
	*tb += n;
	*tn -= n;
	return n;
}

/*
 * write *fb size *fn to op
 * fb,fn updated on return
 * total bytes written to op returned
 */

ssize_t
_ast_iconv_write(_ast_iconv_t cd, Sfio_t* op, char** fb, size_t* fn, size_t* e)
{
	char*		tb;
	char*		ts;
	size_t		tn;
	size_t		r;

	r = 0;
	tn = 0;
	while (*fn > 0)
	{
		if (!(tb = (char*)sfreserve(op, -(tn + 1), SF_WRITE|SF_LOCKR)))
			return r ? r : -1;
		ts = tb;
		tn = sfvalue(op);
#if DEBUG_TRACE
error(DEBUG_TRACE, "AHA#%d iconv_write ts=%p tn=%d", __LINE__, ts, tn);
		for (;;)
#else
		while (_ast_iconv(cd, fb, fn, &ts, &tn) == (size_t)(-1))
#endif
		{
#if DEBUG_TRACE
			ssize_t	_r;
error(DEBUG_TRACE, "AHA#%d iconv_write %d => %d `%-.*s'", __LINE__, *fn, tn, *fn, *fb);
			_r = _ast_iconv(cd, fb, fn, &ts, &tn);
error(DEBUG_TRACE, "AHA#%d iconv_write %d => %d [%d]", __LINE__, *fn, tn, _r);
			if (_r != (size_t)(-1))
				break;
#endif
			if (errno == E2BIG)
				break;
			if (e)
				(*e)++;
			if (!tn)
				break;
			*ts++ = *(*fb)++;
			tn--;
			(*fn)--;
		}
#if DEBUG_TRACE
error(DEBUG_TRACE, "AHA#%d iconv_write %d", __LINE__, ts - tb);
#endif

		sfwrite(op, tb, ts - tb);
		r += ts - tb;
	}
	return r;
}

/*
 * move n bytes from ip to op
 */

ssize_t
_ast_iconv_move(_ast_iconv_t cd, Sfio_t* ip, Sfio_t* op, size_t n, size_t* e)
{
	char*		fb;
	char*		fs;
	char*		tb;
	char*		ts;
	size_t		fn;
	size_t		fo;
	size_t		tn;
	size_t		i;
	ssize_t		r = 0;
	int		locked;

	fn = n;
	for (;;)
	{
		if (fn != SF_UNBOUND)
			fn = -((ssize_t)(fn & (((size_t)(~0))>>1)));
		if (!(fb = (char*)sfreserve(ip, fn, locked = SF_LOCKR)) &&
		    !(fb = (char*)sfreserve(ip, fn, locked = 0)))
			break;
		fs = fb;
		fn = fo = sfvalue(ip);
		if (!(tb = (char*)sfreserve(op, SF_UNBOUND, SF_WRITE|SF_LOCKR)))
		{
			sfread(ip, fb, 0);
			return r ? r : -1;
		}
		ts = tb;
		tn = sfvalue(op);
		while (_ast_iconv(cd, &fs, &fn, &ts, &tn) != (size_t)(-1) && fn > 0)
		{
			if (tn > 0)
			{
				*ts++ = '_';
				tn--;
			}
			if (e)
				(*e)++;
			fs++;
			fn--;
		}
		sfwrite(op, tb, ts - tb);
		r += ts - tb;
		if (locked)
			sfread(ip, fb, fs - fb);
		else
			for (i = fn; --i >= (fs - fb);)
				sfungetc(ip, fb[i]);
		if (n != SF_UNBOUND)
		{
			if (n <= (fs - fb))
				break;
			n -= fs - fb;
		}
		if (fn == fo)
			fn++;
	}
	return r;
}

/*
 * iconv_list_t iterator
 * call with arg 0 to start
 * prev return value is current arg
 */

_ast_iconv_list_t*
_ast_iconv_list(_ast_iconv_list_t* cp)
{
#if _UWIN
	struct dirent*	ent;

	if (!cp)
	{
		if (!(cp = newof(0, _ast_iconv_list_t, 1, 0)))
			return ccmaplist(NiL);
		if (!(cp->data = opendir(_win_maps)))
		{
			free(cp);
			return ccmaplist(NiL);
		}
	}
	if (cp->data)
	{
		if (ent = readdir((DIR*)cp->data))
		{
			cp->name = cp->match = cp->desc = (const char*)ent->d_name;
			return cp;
		}
		closedir((DIR*)cp->data);
		free(cp);
		return ccmaplist(NiL);
	}
#else
	if (!cp)
		return ccmaplist(NiL);
#endif
	if (cp->ccode >= 0)
		return (cp = ccmaplist(cp)) ? cp : (_ast_iconv_list_t*)codes;
	return (++cp)->name ? cp : (_ast_iconv_list_t*)0;
}