#include <stdio.h>
#include "cstring.h"
#include "unicode/uloc.h"
#include "unicode/ucnv.h"
#include "unicode/ucnv_err.h"
#include "cintltst.h"
#include "unicode/utypes.h"
#include "unicode/ustring.h"
#include "unicode/ucol.h"
#include "cmemory.h"
static void TestNextUChar(UConverter* cnv, const char* source, const char* limit, const int32_t results[], const char* message);
static void TestNextUCharError(UConverter* cnv, const char* source, const char* limit, UErrorCode expected, const char* message);
#if !UCONFIG_NO_COLLATION
static void TestJitterbug981(void);
#endif
static void TestJitterbug1293(void);
static void TestNewConvertWithBufferSizes(int32_t osize, int32_t isize) ;
static void TestConverterTypesAndStarters(void);
static void TestAmbiguous(void);
static void TestSignatureDetection(void);
static void TestUTF7(void);
static void TestIMAP(void);
static void TestUTF8(void);
static void TestCESU8(void);
static void TestUTF16(void);
static void TestUTF16BE(void);
static void TestUTF16LE(void);
static void TestUTF32(void);
static void TestUTF32BE(void);
static void TestUTF32LE(void);
static void TestLATIN1(void);
static void TestSBCS(void);
static void TestDBCS(void);
static void TestMBCS(void);
#ifdef U_ENABLE_GENERIC_ISO_2022
static void TestISO_2022(void);
#endif
static void TestISO_2022_JP(void);
static void TestISO_2022_JP_1(void);
static void TestISO_2022_JP_2(void);
static void TestISO_2022_KR(void);
static void TestISO_2022_KR_1(void);
static void TestISO_2022_CN(void);
static void TestISO_2022_CN_EXT(void);
static void TestJIS(void);
static void TestHZ(void);
static void TestSCSU(void);
static void TestEBCDIC_STATEFUL(void);
static void TestGB18030(void);
static void TestLMBCS(void);
static void TestJitterbug255(void);
static void TestEBCDICUS4XML(void);
static void TestJitterbug915(void);
static void TestISCII(void);
static void TestConv(const uint16_t in[],
int len,
const char* conv,
const char* lang,
char byteArr[],
int byteArrLen);
static void TestRoundTrippingAllUTF(void);
static void TestCoverageMBCS(void);
static void TestJitterbug2346(void);
static void TestJitterbug2411(void);
void addTestNewConvert(TestNode** root);
static UConverter *my_ucnv_open(const char *cnv, UErrorCode *err);
#define NEW_MAX_BUFFER 999
static int32_t gInBufferSize = NEW_MAX_BUFFER;
static int32_t gOutBufferSize = NEW_MAX_BUFFER;
static char gNuConvTestName[1024];
#define nct_min(x,y) ((x<y) ? x : y)
static UConverter *my_ucnv_open(const char *cnv, UErrorCode *err)
{
if(cnv && cnv[0] == '@') {
return ucnv_openPackage(loadTestData(err), cnv+1, err);
} else {
return ucnv_open(cnv, err);
}
}
static void printSeq(const unsigned char* a, int len)
{
int i=0;
log_verbose("{");
while (i<len)
log_verbose("0x%02x ", a[i++]);
log_verbose("}\n");
}
static void printUSeq(const UChar* a, int len)
{
int i=0;
log_verbose("{U+");
while (i<len) log_verbose("0x%04x ", a[i++]);
log_verbose("}\n");
}
static void printSeqErr(const unsigned char* a, int len)
{
int i=0;
fprintf(stderr, "{");
while (i<len)
fprintf(stderr, "0x%02x ", a[i++]);
fprintf(stderr, "}\n");
}
static void printUSeqErr(const UChar* a, int len)
{
int i=0;
fprintf(stderr, "{U+");
while (i<len)
fprintf(stderr, "0x%04x ", a[i++]);
fprintf(stderr,"}\n");
}
static void
TestNextUChar(UConverter* cnv, const char* source, const char* limit, const int32_t results[], const char* message)
{
const char* s0;
const char* s=(char*)source;
const int32_t *r=results;
UErrorCode errorCode=U_ZERO_ERROR;
UChar32 c;
while(s<limit) {
s0=s;
c=ucnv_getNextUChar(cnv, &s, limit, &errorCode);
if(errorCode==U_INDEX_OUTOFBOUNDS_ERROR) {
break;
} else if(U_FAILURE(errorCode)) {
log_err("%s ucnv_getNextUChar() failed: %s\n", message, u_errorName(errorCode));
break;
} else if(
(*r>=0 && (int32_t)(s-s0)!=*r) ||
c!=*(r+1)
) {
log_err("%s ucnv_getNextUChar() result %lx from %d bytes, should have been %lx from %d bytes.\n",
message, c, (s-s0), *(r+1), *r);
break;
}
r+=2;
}
}
static void
TestNextUCharError(UConverter* cnv, const char* source, const char* limit, UErrorCode expected, const char* message)
{
const char* s=(char*)source;
UErrorCode errorCode=U_ZERO_ERROR;
uint32_t c;
c=ucnv_getNextUChar(cnv, &s, limit, &errorCode);
if(errorCode != expected){
log_err("FAIL: Expected:%s when %s-----Got:%s\n", myErrorName(expected), message, myErrorName(errorCode));
}
if(c != 0xFFFD && c != 0xffff){
log_err("FAIL: Expected return value of 0xfffd or 0xffff when %s-----Got 0x%lx\n", message, c);
}
}
static void TestInBufSizes(void)
{
TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,1);
#if 1
TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,2);
TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,3);
TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,4);
TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,5);
TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,6);
TestNewConvertWithBufferSizes(1,1);
TestNewConvertWithBufferSizes(2,3);
TestNewConvertWithBufferSizes(3,2);
#endif
}
static void TestOutBufSizes(void)
{
#if 1
TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,NEW_MAX_BUFFER);
TestNewConvertWithBufferSizes(1,NEW_MAX_BUFFER);
TestNewConvertWithBufferSizes(2,NEW_MAX_BUFFER);
TestNewConvertWithBufferSizes(3,NEW_MAX_BUFFER);
TestNewConvertWithBufferSizes(4,NEW_MAX_BUFFER);
TestNewConvertWithBufferSizes(5,NEW_MAX_BUFFER);
#endif
}
void addTestNewConvert(TestNode** root)
{
addTest(root, &TestInBufSizes, "tsconv/nucnvtst/TestInBufSizes");
addTest(root, &TestOutBufSizes, "tsconv/nucnvtst/TestOutBufSizes");
addTest(root, &TestConverterTypesAndStarters, "tsconv/nucnvtst/TestConverterTypesAndStarters");
addTest(root, &TestAmbiguous, "tsconv/nucnvtst/TestAmbiguous");
addTest(root, &TestSignatureDetection, "tsconv/nucnvtst/TestSignatureDetection");
addTest(root, &TestUTF7, "tsconv/nucnvtst/TestUTF7");
addTest(root, &TestIMAP, "tsconv/nucnvtst/TestIMAP");
addTest(root, &TestUTF8, "tsconv/nucnvtst/TestUTF8");
addTest(root, &TestCESU8, "tsconv/nucnvtst/TestCESU8");
addTest(root, &TestUTF16, "tsconv/nucnvtst/TestUTF16");
addTest(root, &TestUTF16BE, "tsconv/nucnvtst/TestUTF16BE");
addTest(root, &TestUTF16LE, "tsconv/nucnvtst/TestUTF16LE");
addTest(root, &TestUTF32, "tsconv/nucnvtst/TestUTF32");
addTest(root, &TestUTF32BE, "tsconv/nucnvtst/TestUTF32BE");
addTest(root, &TestUTF32LE, "tsconv/nucnvtst/TestUTF32LE");
addTest(root, &TestLMBCS, "tsconv/nucnvtst/TestLMBCS");
addTest(root, &TestLATIN1, "tsconv/nucnvtst/TestLATIN1");
addTest(root, &TestSBCS, "tsconv/nucnvtst/TestSBCS");
addTest(root, &TestDBCS, "tsconv/nucnvtst/TestDBCS");
addTest(root, &TestMBCS, "tsconv/nucnvtst/TestMBCS");
#ifdef U_ENABLE_GENERIC_ISO_2022
addTest(root, &TestISO_2022, "tsconv/nucnvtst/TestISO_2022");
#endif
addTest(root, &TestISO_2022_JP, "tsconv/nucnvtst/TestISO_2022_JP");
addTest(root, &TestJIS, "tsconv/nucnvtst/TestJIS");
addTest(root, &TestISO_2022_JP_1, "tsconv/nucnvtst/TestISO_2022_JP_1");
addTest(root, &TestISO_2022_JP_2, "tsconv/nucnvtst/TestISO_2022_JP_2");
addTest(root, &TestISO_2022_KR, "tsconv/nucnvtst/TestISO_2022_KR");
addTest(root, &TestISO_2022_KR_1, "tsconv/nucnvtst/TestISO_2022_KR_1");
addTest(root, &TestISO_2022_CN, "tsconv/nucnvtst/TestISO_2022_CN");
addTest(root, &TestISO_2022_CN_EXT, "tsconv/nucnvtst/TestISO_2022_CN_EXT");
addTest(root, &TestJitterbug915, "tsconv/nucnvtst/TestJitterbug915");
addTest(root, &TestHZ, "tsconv/nucnvtst/TestHZ");
addTest(root, &TestSCSU, "tsconv/nucnvtst/TestSCSU");
addTest(root, &TestEBCDIC_STATEFUL, "tsconv/nucnvtst/TestEBCDIC_STATEFUL");
addTest(root, &TestGB18030, "tsconv/nucnvtst/TestGB18030");
addTest(root, &TestJitterbug255, "tsconv/nucnvtst/TestJitterbug255");
addTest(root, &TestEBCDICUS4XML, "tsconv/nucnvtst/TestEBCDICUS4XML");
addTest(root, &TestISCII, "tsconv/nucnvtst/TestISCII");
#if !UCONFIG_NO_COLLATION
addTest(root, &TestJitterbug981, "tsconv/nucnvtst/TestJitterbug981");
#endif
addTest(root, &TestJitterbug1293, "tsconv/nucnvtst/TestJitterbug1293");
addTest(root, &TestCoverageMBCS, "tsconv/nucnvtst/TestCoverageMBCS");
addTest(root, &TestRoundTrippingAllUTF, "tsconv/nucnvtst/TestRoundTrippingAllUTF");
addTest(root, &TestJitterbug2346, "tsconv/nucnvtst/TestJitterbug2346");
addTest(root, &TestJitterbug2411, "tsconv/nucnvtst/TestJitterbug2411");
}
static void setNuConvTestName(const char *codepage, const char *direction)
{
sprintf(gNuConvTestName, "[Testing %s %s Unicode, InputBufSiz=%d, OutputBufSiz=%d]",
codepage,
direction,
(int)gInBufferSize,
(int)gOutBufferSize);
}
typedef enum
{
TC_OK = 0,
TC_MISMATCH = 1,
TC_FAIL = 2
} ETestConvertResult;
static ETestConvertResult testConvertFromU( const UChar *source, int sourceLen, const uint8_t *expect, int expectLen,
const char *codepage, const int32_t *expectOffsets , UBool useFallback)
{
UErrorCode status = U_ZERO_ERROR;
UConverter *conv = 0;
uint8_t junkout[NEW_MAX_BUFFER];
int32_t junokout[NEW_MAX_BUFFER];
uint8_t *p;
const UChar *src;
uint8_t *end;
uint8_t *targ;
int32_t *offs;
int i;
int32_t realBufferSize;
uint8_t *realBufferEnd;
const UChar *realSourceEnd;
const UChar *sourceLimit;
UBool checkOffsets = TRUE;
UBool doFlush;
for(i=0;i<NEW_MAX_BUFFER;i++)
junkout[i] = 0xF0;
for(i=0;i<NEW_MAX_BUFFER;i++)
junokout[i] = 0xFF;
setNuConvTestName(codepage, "FROM");
log_verbose("\n========= %s\n", gNuConvTestName);
conv = my_ucnv_open(codepage, &status);
if(U_FAILURE(status))
{
log_data_err("Couldn't open converter %s\n",codepage);
return TC_FAIL;
}
if(useFallback){
ucnv_setFallback(conv,useFallback);
}
log_verbose("Converter opened..\n");
src = source;
targ = junkout;
offs = junokout;
realBufferSize = (sizeof(junkout)/sizeof(junkout[0]));
realBufferEnd = junkout + realBufferSize;
realSourceEnd = source + sourceLen;
if ( gOutBufferSize != realBufferSize || gInBufferSize != NEW_MAX_BUFFER )
checkOffsets = FALSE;
do
{
end = nct_min(targ + gOutBufferSize, realBufferEnd);
sourceLimit = nct_min(src + gInBufferSize, realSourceEnd);
doFlush = (UBool)(sourceLimit == realSourceEnd);
if(targ == realBufferEnd) {
log_err("Error, overflowed the real buffer while about to call fromUnicode! targ=%08lx %s", targ, gNuConvTestName);
return TC_FAIL;
}
log_verbose("calling fromUnicode @ SOURCE:%08lx to %08lx TARGET: %08lx to %08lx, flush=%s\n", src,sourceLimit, targ,end, doFlush?"TRUE":"FALSE");
status = U_ZERO_ERROR;
ucnv_fromUnicode (conv,
(char **)&targ,
(const char*)end,
&src,
sourceLimit,
checkOffsets ? offs : NULL,
doFlush,
&status);
} while ( (status == U_BUFFER_OVERFLOW_ERROR) || (U_SUCCESS(status) && sourceLimit < realSourceEnd) );
if(U_FAILURE(status)) {
log_err("Problem doing fromUnicode to %s, errcode %s %s\n", codepage, myErrorName(status), gNuConvTestName);
return TC_FAIL;
}
log_verbose("\nConversion done [%d uchars in -> %d chars out]. \nResult :",
sourceLen, targ-junkout);
if(VERBOSITY)
{
char junk[9999];
char offset_str[9999];
uint8_t *ptr;
junk[0] = 0;
offset_str[0] = 0;
for(ptr = junkout;ptr<targ;ptr++) {
sprintf(junk + strlen(junk), "0x%02x, ", (int)(0xFF & *ptr));
sprintf(offset_str + strlen(offset_str), "0x%02x, ", (int)(0xFF & junokout[ptr-junkout]));
}
log_verbose(junk);
printSeq((const uint8_t *)expect, expectLen);
if ( checkOffsets ) {
log_verbose("\nOffsets:");
log_verbose(offset_str);
}
log_verbose("\n");
}
ucnv_close(conv);
if(expectLen != targ-junkout) {
log_err("Expected %d chars out, got %d %s\n", expectLen, targ-junkout, gNuConvTestName);
log_verbose("Expected %d chars out, got %d %s\n", expectLen, targ-junkout, gNuConvTestName);
printf("\nGot:");
printSeqErr((const unsigned char*)junkout, targ-junkout);
printf("\nExpected:");
printSeqErr((const unsigned char*)expect, expectLen);
return TC_MISMATCH;
}
if (checkOffsets && (expectOffsets != 0) ) {
log_verbose("comparing %d offsets..\n", targ-junkout);
if(memcmp(junokout,expectOffsets,(targ-junkout) * sizeof(int32_t) )){
log_err("did not get the expected offsets. %s\n", gNuConvTestName);
printSeqErr((const unsigned char*)junkout, targ-junkout);
log_err("\n");
log_err("Got : ");
for(p=junkout;p<targ;p++) {
log_err("%d,", junokout[p-junkout]);
}
log_err("\n");
log_err("Expected: ");
for(i=0; i<(targ-junkout); i++) {
log_err("%d,", expectOffsets[i]);
}
log_err("\n");
}
}
log_verbose("comparing..\n");
if(!memcmp(junkout, expect, expectLen)) {
log_verbose("Matches!\n");
return TC_OK;
} else {
log_err("String does not match u->%s\n", gNuConvTestName);
printUSeqErr(source, sourceLen);
printf("\nGot:");
printSeqErr((const unsigned char *)junkout, expectLen);
printf("\nExpected:");
printSeqErr((const unsigned char *)expect, expectLen);
return TC_MISMATCH;
}
}
static ETestConvertResult testConvertToU( const uint8_t *source, int sourcelen, const UChar *expect, int expectlen,
const char *codepage, const int32_t *expectOffsets, UBool useFallback)
{
UErrorCode status = U_ZERO_ERROR;
UConverter *conv = 0;
UChar junkout[NEW_MAX_BUFFER];
int32_t junokout[NEW_MAX_BUFFER];
const uint8_t *src;
const uint8_t *realSourceEnd;
const uint8_t *srcLimit;
UChar *p;
UChar *targ;
UChar *end;
int32_t *offs;
int i;
UBool checkOffsets = TRUE;
int32_t realBufferSize;
UChar *realBufferEnd;
for(i=0;i<NEW_MAX_BUFFER;i++)
junkout[i] = 0xFFFE;
for(i=0;i<NEW_MAX_BUFFER;i++)
junokout[i] = -1;
setNuConvTestName(codepage, "TO");
log_verbose("\n========= %s\n", gNuConvTestName);
conv = my_ucnv_open(codepage, &status);
if(U_FAILURE(status))
{
log_data_err("Couldn't open converter %s\n",gNuConvTestName);
return TC_FAIL;
}
if(useFallback){
ucnv_setFallback(conv,useFallback);
}
log_verbose("Converter opened..\n");
src = source;
targ = junkout;
offs = junokout;
realBufferSize = (sizeof(junkout)/sizeof(junkout[0]));
realBufferEnd = junkout + realBufferSize;
realSourceEnd = src + sourcelen;
if ( gOutBufferSize != realBufferSize || gInBufferSize != NEW_MAX_BUFFER )
checkOffsets = FALSE;
do
{
end = nct_min( targ + gOutBufferSize, realBufferEnd);
srcLimit = nct_min(realSourceEnd, src + gInBufferSize);
if(targ == realBufferEnd)
{
log_err("Error, the end would overflow the real output buffer while about to call toUnicode! tarjet=%08lx %s",targ,gNuConvTestName);
return TC_FAIL;
}
log_verbose("calling toUnicode @ %08lx to %08lx\n", targ,end);
status = U_ZERO_ERROR;
ucnv_toUnicode (conv,
&targ,
end,
(const char **)&src,
(const char *)srcLimit,
checkOffsets ? offs : NULL,
(UBool)(srcLimit == realSourceEnd),
&status);
} while ( (status == U_BUFFER_OVERFLOW_ERROR) || (U_SUCCESS(status) && (srcLimit < realSourceEnd)) );
if(U_FAILURE(status))
{
log_err("Problem doing %s toUnicode, errcode %s %s\n", codepage, myErrorName(status), gNuConvTestName);
return TC_FAIL;
}
log_verbose("\nConversion done. %d bytes -> %d chars.\nResult :",
sourcelen, targ-junkout);
if(VERBOSITY)
{
char junk[9999];
char offset_str[9999];
UChar *ptr;
junk[0] = 0;
offset_str[0] = 0;
for(ptr = junkout;ptr<targ;ptr++)
{
sprintf(junk + strlen(junk), "0x%04x, ", (0xFFFF) & (unsigned int)*ptr);
sprintf(offset_str + strlen(offset_str), "0x%04x, ", (0xFFFF) & (unsigned int)junokout[ptr-junkout]);
}
log_verbose(junk);
printUSeq(expect, expectlen);
if ( checkOffsets )
{
log_verbose("\nOffsets:");
log_verbose(offset_str);
}
log_verbose("\n");
}
ucnv_close(conv);
log_verbose("comparing %d uchars (%d bytes)..\n",expectlen,expectlen*2);
if (checkOffsets && (expectOffsets != 0))
{
if(memcmp(junokout,expectOffsets,(targ-junkout) * sizeof(int32_t))){
log_err("did not get the expected offsets. %s\n",gNuConvTestName);
log_err("Got: ");
for(p=junkout;p<targ;p++) {
log_err("%d,", junokout[p-junkout]);
}
log_err("\n");
log_err("Expected: ");
for(i=0; i<(targ-junkout); i++) {
log_err("%d,", expectOffsets[i]);
}
log_err("\n");
log_err("output: ");
for(i=0; i<(targ-junkout); i++) {
log_err("%X,", junkout[i]);
}
log_err("\n");
log_err("input: ");
for(i=0; i<(src-source); i++) {
log_err("%X,", (unsigned char)source[i]);
}
log_err("\n");
}
}
if(!memcmp(junkout, expect, expectlen*2))
{
log_verbose("Matches!\n");
return TC_OK;
}
else
{
log_err("String does not match. %s\n", gNuConvTestName);
log_verbose("String does not match. %s\n", gNuConvTestName);
printf("\nGot:");
printUSeqErr(junkout, expectlen);
printf("\nExpected:");
printUSeqErr(expect, expectlen);
return TC_MISMATCH;
}
}
static void TestNewConvertWithBufferSizes(int32_t outsize, int32_t insize )
{
UChar sampleText[] =
{ 0x0031, 0x0032, 0x0033, 0x0000, 0x4e00, 0x4e8c, 0x4e09, 0x002E };
const uint8_t expectedUTF8[] =
{ 0x31, 0x32, 0x33, 0x00, 0xe4, 0xb8, 0x80, 0xe4, 0xba, 0x8c, 0xe4, 0xb8, 0x89, 0x2E };
int32_t toUTF8Offs[] =
{ 0x00, 0x01, 0x02, 0x03, 0x04, 0x04, 0x04, 0x05, 0x05, 0x05, 0x06, 0x06, 0x06, 0x07};
int32_t fmUTF8Offs[] =
{ 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0007, 0x000a, 0x000d };
#ifdef U_ENABLE_GENERIC_ISO_2022
const uint8_t expectedISO2022[] =
{ 0x1b, 0x25, 0x42, 0x31, 0x32, 0x33, 0x00, 0xe4, 0xb8, 0x80, 0xe4, 0xba, 0x8c, 0xe4, 0xb8, 0x89, 0x2E };
int32_t toISO2022Offs[] =
{ -1, -1, -1, 0x00, 0x01, 0x02, 0x03, 0x04, 0x04,
0x04, 0x05, 0x05, 0x05, 0x06, 0x06, 0x06, 0x07 };
int32_t fmISO2022Offs[] =
{ 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x000a, 0x000d, 0x0010 };
#endif
const uint8_t expectedIBM930[] =
{ 0xF1, 0xF2, 0xF3, 0x00, 0x0E, 0x45, 0x41, 0x45, 0x42, 0x45, 0x43, 0x0F, 0x4B };
int32_t toIBM930Offs[] =
{ 0x00, 0x01, 0x02, 0x03, 0x04, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07, };
int32_t fmIBM930Offs[] =
{ 0x0000, 0x0001, 0x0002, 0x0003, 0x0005, 0x0007, 0x0009, 0x000c};
const uint8_t expectedIBM943[] =
{ 0x31, 0x32, 0x33, 0x00, 0x88, 0xea, 0x93, 0xf1, 0x8e, 0x4f, 0x2e };
int32_t toIBM943Offs [] =
{ 0x00, 0x01, 0x02, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07 };
int32_t fmIBM943Offs[] =
{ 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0006, 0x0008, 0x000a};
const uint8_t expectedIBM9027[] =
{ 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0x4c, 0x41, 0x4c, 0x48, 0x4c, 0x55, 0xfe, 0xfe};
int32_t toIBM9027Offs [] =
{ 0x00, 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07};
const uint8_t expectedIBM920[] =
{ 0x31, 0x32, 0x33, 0x00, 0x1a, 0x1a, 0x1a, 0x2e };
int32_t toIBM920Offs [] =
{ 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07 };
const uint8_t expectedISO88593[] =
{ 0x31, 0x32, 0x33, 0x00, 0x1a, 0x1a, 0x1a, 0x2E };
int32_t toISO88593Offs[] =
{ 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07};
const uint8_t expectedLATIN1[] =
{ 0x31, 0x32, 0x33, 0x00, 0x1a, 0x1a, 0x1a, 0x2E };
int32_t toLATIN1Offs[] =
{ 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07};
const uint8_t expectedUTF16BE[] =
{ 0x00, 0x31, 0x00, 0x32, 0x00, 0x33, 0x00, 0x00, 0x4e, 0x00, 0x4e, 0x8c, 0x4e, 0x09, 0x00, 0x2e };
int32_t toUTF16BEOffs[]=
{ 0x00, 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07};
int32_t fmUTF16BEOffs[] =
{ 0x0000, 0x0002, 0x0004, 0x0006, 0x0008, 0x000a, 0x000c, 0x000e };
const uint8_t expectedUTF16LE[] =
{ 0x31, 0x00, 0x32, 0x00, 0x33, 0x00, 0x00, 0x00, 0x00, 0x4e, 0x8c, 0x4e, 0x09, 0x4e, 0x2e, 0x00 };
int32_t toUTF16LEOffs[]=
{ 0x00, 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07};
int32_t fmUTF16LEOffs[] =
{ 0x0000, 0x0002, 0x0004, 0x0006, 0x0008, 0x000a, 0x000c, 0x000e };
const uint8_t expectedUTF32BE[] =
{ 0x00, 0x00, 0x00, 0x31,
0x00, 0x00, 0x00, 0x32,
0x00, 0x00, 0x00, 0x33,
0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x4e, 0x00,
0x00, 0x00, 0x4e, 0x8c,
0x00, 0x00, 0x4e, 0x09,
0x00, 0x00, 0x00, 0x2e };
int32_t toUTF32BEOffs[]=
{ 0x00, 0x00, 0x00, 0x00,
0x01, 0x01, 0x01, 0x01,
0x02, 0x02, 0x02, 0x02,
0x03, 0x03, 0x03, 0x03,
0x04, 0x04, 0x04, 0x04,
0x05, 0x05, 0x05, 0x05,
0x06, 0x06, 0x06, 0x06,
0x07, 0x07, 0x07, 0x07,
0x08, 0x08, 0x08, 0x08 };
int32_t fmUTF32BEOffs[] =
{ 0x0000, 0x0004, 0x0008, 0x000c, 0x0010, 0x0014, 0x0018, 0x001c };
const uint8_t expectedUTF32LE[] =
{ 0x31, 0x00, 0x00, 0x00,
0x32, 0x00, 0x00, 0x00,
0x33, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00,
0x00, 0x4e, 0x00, 0x00,
0x8c, 0x4e, 0x00, 0x00,
0x09, 0x4e, 0x00, 0x00,
0x2e, 0x00, 0x00, 0x00 };
int32_t toUTF32LEOffs[]=
{ 0x00, 0x00, 0x00, 0x00,
0x01, 0x01, 0x01, 0x01,
0x02, 0x02, 0x02, 0x02,
0x03, 0x03, 0x03, 0x03,
0x04, 0x04, 0x04, 0x04,
0x05, 0x05, 0x05, 0x05,
0x06, 0x06, 0x06, 0x06,
0x07, 0x07, 0x07, 0x07,
0x08, 0x08, 0x08, 0x08 };
int32_t fmUTF32LEOffs[] =
{ 0x0000, 0x0004, 0x0008, 0x000c, 0x0010, 0x0014, 0x0018, 0x001c };
const UChar malteseUChars[] = { 0x0053, 0x0061, 0x0127, 0x0127, 0x0061 };
const uint8_t expectedMaltese913[] = { 0x53, 0x61, 0xB1, 0xB1, 0x61 };
const UChar LMBCSUChars[] = { 0x0027, 0x010A, 0x0000, 0x0127, 0x2666, 0x0220 };
const uint8_t expectedLMBCS[] = { 0x27, 0x06, 0x04, 0x00, 0x01, 0x73, 0x01, 0x04, 0x14, 0x02, 0x20 };
int32_t toLMBCSOffs[] = { 0x00, 0x01, 0x01, 0x02, 0x03, 0x03, 0x04, 0x04 , 0x05, 0x05, 0x05 };
int32_t fmLMBCSOffs[] = { 0x0000, 0x0001, 0x0003, 0x0004, 0x0006, 0x0008};
gInBufferSize = insize;
gOutBufferSize = outsize;
log_verbose("\n\n\nTesting conversions with InputBufferSize = %d, OutputBufferSize = %d\n", gInBufferSize, gOutBufferSize);
#if 1
testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
expectedUTF8, sizeof(expectedUTF8), "UTF8", toUTF8Offs,FALSE );
log_verbose("Test surrogate behaviour for UTF8\n");
{
const UChar testinput[]={ 0x20ac, 0xd801, 0xdc01, 0xdc01 };
const uint8_t expectedUTF8test2[]= { 0xe2, 0x82, 0xac,
0xf0, 0x90, 0x90, 0x81,
0xef, 0xbf, 0xbd
};
int32_t offsets[]={ 0, 0, 0, 1, 1, 1, 1, 3, 3, 3 };
testConvertFromU(testinput, sizeof(testinput)/sizeof(testinput[0]),
expectedUTF8test2, sizeof(expectedUTF8test2), "UTF8", offsets,FALSE );
}
#ifdef U_ENABLE_GENERIC_ISO_2022
testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
expectedISO2022, sizeof(expectedISO2022), "ISO_2022", toISO2022Offs,FALSE );
#endif
testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
expectedUTF16LE, sizeof(expectedUTF16LE), "utf-16le", toUTF16LEOffs,FALSE );
testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
expectedUTF16BE, sizeof(expectedUTF16BE), "utf-16be", toUTF16BEOffs,FALSE );
testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
expectedUTF32LE, sizeof(expectedUTF32LE), "utf-32le", toUTF32LEOffs,FALSE );
testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
expectedUTF32BE, sizeof(expectedUTF32BE), "utf-32be", toUTF32BEOffs,FALSE );
testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
expectedLATIN1, sizeof(expectedLATIN1), "LATIN_1", toLATIN1Offs,FALSE );
testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
expectedIBM930, sizeof(expectedIBM930), "ibm-930", toIBM930Offs,FALSE );
testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
expectedISO88593, sizeof(expectedISO88593), "iso-8859-3", toISO88593Offs,FALSE );
testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
expectedIBM943, sizeof(expectedIBM943), "ibm-943", toIBM943Offs,FALSE );
testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
expectedIBM9027, sizeof(expectedIBM9027), "@ibm9027", toIBM9027Offs,FALSE );
testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
expectedIBM920, sizeof(expectedIBM920), "ibm-920", toIBM920Offs,FALSE );
testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
expectedISO88593, sizeof(expectedISO88593), "iso-8859-3", toISO88593Offs,FALSE );
#endif
#if 1
testConvertToU(expectedUTF8, sizeof(expectedUTF8),
sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "utf8", fmUTF8Offs,FALSE);
#ifdef U_ENABLE_GENERIC_ISO_2022
testConvertToU(expectedISO2022, sizeof(expectedISO2022),
sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "ISO_2022", fmISO2022Offs,FALSE);
#endif
testConvertToU(expectedUTF16LE, sizeof(expectedUTF16LE),
sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "utf-16le", fmUTF16LEOffs,FALSE);
testConvertToU(expectedUTF16BE, sizeof(expectedUTF16BE),
sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "utf-16be", fmUTF16BEOffs,FALSE);
testConvertToU(expectedUTF32LE, sizeof(expectedUTF32LE),
sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "utf-32le", fmUTF32LEOffs,FALSE);
testConvertToU(expectedUTF32BE, sizeof(expectedUTF32BE),
sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "utf-32be", fmUTF32BEOffs,FALSE);
testConvertToU(expectedIBM930, sizeof(expectedIBM930),
sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "ibm-930", fmIBM930Offs,FALSE);
testConvertToU(expectedIBM943, sizeof(expectedIBM943),
sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "ibm-943", fmIBM943Offs,FALSE);
testConvertToU(expectedUTF16LE, sizeof(expectedUTF16LE),
sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "utf-16le", fmUTF16LEOffs,FALSE);
testConvertToU(expectedMaltese913, sizeof(expectedMaltese913),
malteseUChars, sizeof(malteseUChars)/sizeof(malteseUChars[0]), "latin3", NULL,FALSE);
testConvertFromU(malteseUChars, sizeof(malteseUChars)/sizeof(malteseUChars[0]),
expectedMaltese913, sizeof(expectedMaltese913), "iso-8859-3", NULL,FALSE );
testConvertFromU(LMBCSUChars, sizeof(LMBCSUChars)/sizeof(LMBCSUChars[0]),
expectedLMBCS, sizeof(expectedLMBCS), "LMBCS-1", toLMBCSOffs,FALSE );
testConvertToU(expectedLMBCS, sizeof(expectedLMBCS),
LMBCSUChars, sizeof(LMBCSUChars)/sizeof(LMBCSUChars[0]), "LMBCS-1", fmLMBCSOffs,FALSE);
{
static const uint8_t utf7[] = {
0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x2b, 0x4a, 0x6a, 0x6f, 0x2d, 0x2d, 0x21,
0x41, 0x2b, 0x49, 0x6d, 0x49, 0x44, 0x6b, 0x51, 0x2e,
0x2b, 0x2d,
0x2b, 0x5a, 0x65, 0x56, 0x6e, 0x4c, 0x49, 0x71, 0x65
};
static const UChar unicode[] = {
0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x263a, 0x2d, 0x21,
0x41, 0x2262, 0x0391, 0x2e,
0x2b,
0x65e5, 0x672c, 0x8a9e
};
static const int32_t toUnicodeOffsets[] = {
0, 1, 2, 3, 4, 5, 6, 7, 9, 13, 14,
15, 17, 19, 23,
24,
27, 29, 32
};
static const int32_t fromUnicodeOffsets[] = {
0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 9, 10,
11, 12, 12, 12, 13, 13, 13, 13, 14,
15, 15,
16, 16, 16, 17, 17, 17, 18, 18, 18
};
static const uint8_t utf7Restricted[] = {
0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x2b, 0x4a, 0x6a, 0x6f, 0x2d, 0x2d, 0x2b, 0x41, 0x43, 0x45, 0x2d,
0x41, 0x2b, 0x49, 0x6d, 0x49, 0x44, 0x6b, 0x51, 0x2e,
0x2b, 0x2d,
0x2b, 0x5a, 0x65, 0x56, 0x6e, 0x4c, 0x49, 0x71, 0x65
};
static const int32_t toUnicodeOffsetsR[] = {
0, 1, 2, 3, 4, 5, 6, 7, 9, 13, 15,
19, 21, 23, 27,
28,
31, 33, 36
};
static const int32_t fromUnicodeOffsetsR[] = {
0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 9, 10, 10, 10, 10, 10,
11, 12, 12, 12, 13, 13, 13, 13, 14,
15, 15,
16, 16, 16, 17, 17, 17, 18, 18, 18
};
testConvertFromU(unicode, sizeof(unicode)/U_SIZEOF_UCHAR, utf7, sizeof(utf7), "UTF-7", fromUnicodeOffsets,FALSE);
testConvertToU(utf7, sizeof(utf7), unicode, sizeof(unicode)/U_SIZEOF_UCHAR, "UTF-7", toUnicodeOffsets,FALSE);
testConvertFromU(unicode, sizeof(unicode)/U_SIZEOF_UCHAR, utf7Restricted, sizeof(utf7Restricted), "UTF-7,version=1", fromUnicodeOffsetsR,FALSE);
testConvertToU(utf7Restricted, sizeof(utf7Restricted), unicode, sizeof(unicode)/U_SIZEOF_UCHAR, "UTF-7,version=1", toUnicodeOffsetsR,FALSE);
}
{
static const uint8_t imap[] = {
0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x26, 0x4a, 0x6a, 0x6f, 0x2d, 0x2d, 0x21,
0x41, 0x26, 0x49, 0x6d, 0x49, 0x44, 0x6b, 0x51, 0x2d, 0x2e,
0x26, 0x2d,
0x26, 0x5a, 0x65, 0x56, 0x6e, 0x4c, 0x49, 0x71, 0x65, 0x2d,
0x5c,
0x7e, 0x70, 0x65, 0x74, 0x65, 0x72,
0x2f, 0x6d, 0x61, 0x69, 0x6c,
0x2f, 0x26, 0x5a, 0x65, 0x56, 0x6e, 0x4c, 0x49, 0x71, 0x65, 0x2d,
0x2f, 0x26, 0x55, 0x2c, 0x42, 0x54, 0x46, 0x77, 0x2d
};
static const UChar unicode[] = {
0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x263a, 0x2d, 0x21,
0x41, 0x2262, 0x0391, 0x2e,
0x26,
0x65e5, 0x672c, 0x8a9e,
0x5c,
0x7e, 0x70, 0x65, 0x74, 0x65, 0x72,
0x2f, 0x6d, 0x61, 0x69, 0x6c,
0x2f, 0x65e5, 0x672c, 0x8a9e,
0x2f, 0x53f0, 0x5317
};
static const int32_t toUnicodeOffsets[] = {
0, 1, 2, 3, 4, 5, 6, 7, 9, 13, 14,
15, 17, 19, 24,
25,
28, 30, 33,
37,
38, 39, 40, 41, 42, 43,
44, 45, 46, 47, 48,
49, 51, 53, 56,
60, 62, 64
};
static const int32_t fromUnicodeOffsets[] = {
0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 9, 10,
11, 12, 12, 12, 13, 13, 13, 13, 13, 14,
15, 15,
16, 16, 16, 17, 17, 17, 18, 18, 18, 18,
19,
20, 21, 22, 23, 24, 25,
26, 27, 28, 29, 30,
31, 32, 32, 32, 33, 33, 33, 34, 34, 34, 34,
35, 36, 36, 36, 37, 37, 37, 37, 37
};
testConvertFromU(unicode, sizeof(unicode)/U_SIZEOF_UCHAR, imap, sizeof(imap), "IMAP-mailbox-name", fromUnicodeOffsets,FALSE);
testConvertToU(imap, sizeof(imap), unicode, sizeof(unicode)/U_SIZEOF_UCHAR, "IMAP-mailbox-name", toUnicodeOffsets,FALSE);
}
{
static const uint8_t utf8[]={
0x61,
0xf7, 0xbf, 0xbf, 0xbf,
0x00,
0x62,
0xfb, 0xbf, 0xbf, 0xbf, 0xbf,
0xfb, 0xbf, 0xbf, 0xbf, 0xbf,
0xf4, 0x8f, 0xbf, 0xbf,
0xdf, 0xbf,
0xbf,
0xf4, 0x90, 0x80, 0x80,
0x02
};
static const uint16_t utf8Expected[]={
0x0061,
0xfffd,
0x0000,
0x0062,
0xfffd,
0xfffd,
0xdbff, 0xdfff,
0x07ff,
0xfffd,
0xfffd,
0x0002
};
static const int32_t utf8Offsets[]={
0, 1, 5, 6, 7, 12, 17, 17, 21, 23, 24, 28
};
testConvertToU(utf8, sizeof(utf8),
utf8Expected, sizeof(utf8Expected)/sizeof(utf8Expected[0]), "utf-8", utf8Offsets ,FALSE);
}
{
static const uint8_t utf32[]={
0x00, 0x00, 0x00, 0x61,
0x00, 0x11, 0x00, 0x00,
0x00, 0x10, 0xff, 0xff,
0x00, 0x00, 0x00, 0x62,
0xff, 0xff, 0xff, 0xff,
0x7f, 0xff, 0xff, 0xff,
0x00, 0x00, 0x01, 0x62,
0x00, 0x00, 0x02, 0x62
};
static const uint16_t utf32Expected[]={
0x0061,
0xfffd,
0xDBFF,
0xDFFF,
0x0062,
0xfffd,
0xfffd,
0x0162,
0x0262
};
static const int32_t utf32Offsets[]={
0, 4, 8, 8, 12, 16, 20, 24, 28
};
testConvertToU(utf32, sizeof(utf32),
utf32Expected, sizeof(utf32Expected)/sizeof(utf32Expected[0]), "utf-32be", utf32Offsets ,FALSE);
}
{
static const uint8_t utf32[]={
0x61, 0x00, 0x00, 0x00,
0x00, 0x00, 0x11, 0x00,
0xff, 0xff, 0x10, 0x00,
0x62, 0x00, 0x00, 0x00,
0xff, 0xff, 0xff, 0xff,
0xff, 0xff, 0xff, 0x7f,
0x62, 0x01, 0x00, 0x00,
0x62, 0x02, 0x00, 0x00,
};
static const uint16_t utf32Expected[]={
0x0061,
0xfffd,
0xDBFF,
0xDFFF,
0x0062,
0xfffd,
0xfffd,
0x0162,
0x0262
};
static const int32_t utf32Offsets[]={
0, 4, 8, 8, 12, 16, 20, 24, 28
};
testConvertToU(utf32, sizeof(utf32),
utf32Expected, sizeof(utf32Expected)/sizeof(utf32Expected[0]), "utf-32le", utf32Offsets,FALSE );
}
}
static void TestCoverageMBCS(){
#if 0
UErrorCode status = U_ZERO_ERROR;
const char *directory = loadTestData(&status);
char* tdpath = NULL;
char* saveDirectory = (char*)malloc(sizeof(char) *(strlen(u_getDataDirectory())+1));
int len = strlen(directory);
char* index=NULL;
tdpath = (char*) malloc(sizeof(char) * (len * 2));
uprv_strcpy(saveDirectory,u_getDataDirectory());
log_verbose("Retrieved data directory %s \n",saveDirectory);
uprv_strcpy(tdpath,directory);
index=strrchr(tdpath,(char)U_FILE_SEP_CHAR);
if((unsigned int)(index-tdpath) != (strlen(tdpath)-1)){
*(index+1)=0;
}
u_setDataDirectory(tdpath);
log_verbose("ICU data directory is set to: %s \n" ,tdpath);
#endif
{
const UChar unicodeInput[] = { 0x20ac, 0x0005, 0x0006, 0xdbc4, 0xde34, 0x0003};
const uint8_t expectedtest1[] = { 0x00, 0x05, 0xff, 0x07, 0xff,};
int32_t totest1Offs[] = { 0, 1, 2, 3, 5, };
testConvertFromU(unicodeInput, sizeof(unicodeInput)/sizeof(unicodeInput[0]),
expectedtest1, sizeof(expectedtest1), "@test1", totest1Offs,FALSE );
}
{
const UChar unicodeInput[] = { 0x20ac, 0x0005, 0x0006, 0x000b, 0xdbc4, 0xde34, 0xd84d, 0xdc56, 0x000e};
const uint8_t expectedtest3[] = { 0x00, 0x05, 0xff, 0x01, 0x02, 0x0b, 0x07, 0x01, 0x02, 0x0a, 0xff,};
int32_t totest3Offs[] = { 0, 1, 2, 3, 3, 3, 4, 6, 6, 6, 8};
const uint8_t test3input[] = { 0x00, 0x05, 0x06, 0x01, 0x02, 0x0b, 0x07, 0x01, 0x02, 0x0a, 0x01, 0x02, 0x0c,};
const UChar expectedUnicode[] = { 0x20ac, 0x0005, 0x0006, 0x000b, 0xdbc4, 0xde34, 0xd84d, 0xdc56, 0xfffd};
int32_t fromtest3Offs[] = { 0, 1, 2, 3, 6, 6, 7, 7, 10 };
testConvertFromU(unicodeInput, sizeof(unicodeInput)/sizeof(unicodeInput[0]),
expectedtest3, sizeof(expectedtest3), "@test3", totest3Offs,FALSE );
testConvertToU(test3input, sizeof(test3input),
expectedUnicode, sizeof(expectedUnicode)/sizeof(expectedUnicode[0]), "@test3", fromtest3Offs ,FALSE);
}
{
static const UChar unicodeInput[] = { 0x20ac, 0x0005, 0x0006, 0x000b, 0xdbc4, 0xde34, 0xd84d, 0xdc56, 0x000e};
static const uint8_t expectedtest4[] = { 0x00, 0x05, 0xff, 0x01, 0x02, 0x03, 0x0b, 0x07, 0x01, 0x02, 0x03, 0x0a, 0xff,};
static const int32_t totest4Offs[] = { 0, 1, 2, 3, 3, 3, 3, 4, 6, 6, 6, 6, 8,};
static const uint8_t test4input[] = { 0x00, 0x05, 0x06, 0x01, 0x02, 0x03, 0x0b, 0x07, 0x01, 0x02, 0x03, 0x0a, 0x01, 0x02, 0x03, 0x0c,};
static const UChar expectedUnicode[] = { 0x20ac, 0x0005, 0x0006, 0x000b, 0xdbc4, 0xde34, 0xd84d, 0xdc56, 0xfffd};
static const int32_t fromtest4Offs[] = { 0, 1, 2, 3, 7, 7, 8, 8, 12,};
testConvertFromU(unicodeInput, sizeof(unicodeInput)/sizeof(unicodeInput[0]),
expectedtest4, sizeof(expectedtest4), "@test4", totest4Offs,FALSE );
testConvertToU(test4input, sizeof(test4input),
expectedUnicode, sizeof(expectedUnicode)/sizeof(expectedUnicode[0]), "@test4", fromtest4Offs,FALSE );
}
#if 0
free(tdpath);
log_verbose("Setting the data directory to %s \n", saveDirectory);
u_setDataDirectory(saveDirectory);
free(saveDirectory);
#endif
}
static void TestConverterType(const char *convName, UConverterType convType) {
UConverter* myConverter;
UErrorCode err = U_ZERO_ERROR;
myConverter = my_ucnv_open(convName, &err);
if (U_FAILURE(err)) {
log_data_err("Failed to create an %s converter\n", convName);
return;
}
else
{
if (ucnv_getType(myConverter)!=convType) {
log_err("ucnv_getType Failed for %s. Got enum value 0x%X\n",
convName, convType);
}
else {
log_verbose("ucnv_getType %s ok\n", convName);
}
}
ucnv_close(myConverter);
}
static void TestConverterTypesAndStarters()
{
UConverter* myConverter;
UErrorCode err = U_ZERO_ERROR;
UBool mystarters[256];
log_verbose("Testing KSC, ibm-930, ibm-878 for starters and their conversion types.");
myConverter = ucnv_open("ksc", &err);
if (U_FAILURE(err)) {
log_data_err("Failed to create an ibm-ksc converter\n");
return;
}
else
{
if (ucnv_getType(myConverter)!=UCNV_MBCS)
log_err("ucnv_getType Failed for ibm-949\n");
else
log_verbose("ucnv_getType ibm-949 ok\n");
if(myConverter!=NULL)
ucnv_getStarters(myConverter, mystarters, &err);
}
ucnv_close(myConverter);
TestConverterType("ibm-930", UCNV_EBCDIC_STATEFUL);
TestConverterType("ibm-878", UCNV_SBCS);
TestConverterType("iso-8859-1", UCNV_LATIN_1);
TestConverterType("ibm-1208", UCNV_UTF8);
TestConverterType("utf-8", UCNV_UTF8);
TestConverterType("UTF-16BE", UCNV_UTF16_BigEndian);
TestConverterType("UTF-16LE", UCNV_UTF16_LittleEndian);
TestConverterType("UTF-32BE", UCNV_UTF32_BigEndian);
TestConverterType("UTF-32LE", UCNV_UTF32_LittleEndian);
#ifdef U_ENABLE_GENERIC_ISO_2022
TestConverterType("iso-2022", UCNV_ISO_2022);
#endif
TestConverterType("hz", UCNV_HZ);
TestConverterType("scsu", UCNV_SCSU);
TestConverterType("x-iscii-de", UCNV_ISCII);
TestConverterType("ascii", UCNV_US_ASCII);
TestConverterType("utf-7", UCNV_UTF7);
TestConverterType("IMAP-mailbox-name", UCNV_IMAP_MAILBOX);
TestConverterType("bocu-1", UCNV_BOCU1);
}
static void
TestAmbiguousConverter(UConverter *cnv) {
static const char inBytes[2]={ 0x61, 0x5c };
UChar outUnicode[20]={ 0, 0, 0, 0 };
const char *s;
UChar *u;
UErrorCode errorCode;
UBool isAmbiguous;
errorCode=U_ZERO_ERROR;
s=inBytes;
u=outUnicode;
ucnv_toUnicode(cnv, &u, u+20, &s, s+2, NULL, TRUE, &errorCode);
if(U_FAILURE(errorCode)) {
return;
}
if(outUnicode[0]!=0x61 || outUnicode[1]==0xfffd) {
return;
}
isAmbiguous=ucnv_isAmbiguous(cnv);
if((outUnicode[1]!=0x5c)!=isAmbiguous) {
log_err("error: converter \"%s\" needs a backslash fix: %d but ucnv_isAmbiguous()==%d\n",
ucnv_getName(cnv, &errorCode), outUnicode[1]!=0x5c, isAmbiguous);
return;
}
if(outUnicode[1]!=0x5c) {
ucnv_fixFileSeparator(cnv, outUnicode, (int32_t)(u-outUnicode));
if(outUnicode[1]!=0x5c) {
log_err("error: ucnv_fixFileSeparator(%s) failed\n", ucnv_getName(cnv, &errorCode));
return;
}
}
}
static void TestAmbiguous()
{
UErrorCode status = U_ZERO_ERROR;
UConverter *ascii_cnv = 0, *sjis_cnv = 0, *cnv;
const char target[] = {
0x5c, 0x75, 0x73, 0x72,
0x5c, 0x6c, 0x6f, 0x63, 0x61, 0x6c,
0x5c, 0x73, 0x68, 0x61, 0x72, 0x65,
0x5c, 0x64, 0x61, 0x74, 0x61,
0x5c, 0x69, 0x63, 0x75, 0x74, 0x65, 0x73, 0x74, 0x2e, 0x74, 0x78, 0x74,
0
};
UChar asciiResult[200], sjisResult[200];
int32_t asciiLength = 0, sjisLength = 0, i;
const char *name;
status=U_ZERO_ERROR;
for(i=0; (name=ucnv_getAvailableName(i))!=NULL; ++i) {
cnv=ucnv_open(name, &status);
if(U_SUCCESS(status)) {
TestAmbiguousConverter(cnv);
ucnv_close(cnv);
} else {
log_err("error: unable to open available converter \"%s\"\n", name);
status=U_ZERO_ERROR;
}
}
sjis_cnv = ucnv_open("ibm-943", &status);
if (U_FAILURE(status))
{
log_data_err("Failed to create a SJIS converter\n");
return;
}
ascii_cnv = ucnv_open("LATIN-1", &status);
if (U_FAILURE(status))
{
log_data_err("Failed to create a LATIN-1 converter\n");
ucnv_close(sjis_cnv);
return;
}
sjisLength = ucnv_toUChars(sjis_cnv, sjisResult, sizeof(sjisResult)/U_SIZEOF_UCHAR, target, strlen(target), &status);
if (U_FAILURE(status))
{
log_err("Failed to convert the SJIS string.\n");
ucnv_close(sjis_cnv);
ucnv_close(ascii_cnv);
return;
}
asciiLength = ucnv_toUChars(ascii_cnv, asciiResult, sizeof(asciiResult)/U_SIZEOF_UCHAR, target, strlen(target), &status);
if (U_FAILURE(status))
{
log_err("Failed to convert the Latin-1 string.\n");
free(sjisResult);
ucnv_close(sjis_cnv);
ucnv_close(ascii_cnv);
return;
}
if (!ucnv_isAmbiguous(sjis_cnv))
{
log_err("SJIS converter should contain ambiguous character mappings.\n");
free(sjisResult);
free(asciiResult);
ucnv_close(sjis_cnv);
ucnv_close(ascii_cnv);
return;
}
if (u_strcmp(sjisResult, asciiResult) == 0)
{
log_err("File separators for SJIS don't need to be fixed.\n");
}
ucnv_fixFileSeparator(sjis_cnv, sjisResult, sjisLength);
if (u_strcmp(sjisResult, asciiResult) != 0)
{
log_err("Fixing file separator for SJIS failed.\n");
}
ucnv_close(sjis_cnv);
ucnv_close(ascii_cnv);
}
static void
TestSignatureDetection(){
{
static const char* data[] = {
"\xFE\xFF\x00\x00",
"\xFF\xFE\x00\x00",
"\xEF\xBB\xBF\x00",
"\x0E\xFE\xFF\x00",
"\xFE\xFF",
"\xFF\xFE",
"\xEF\xBB\xBF",
"\x0E\xFE\xFF",
"\xFE\xFF\x41\x42",
"\xFF\xFE\x41\x41",
"\xEF\xBB\xBF\x41",
"\x0E\xFE\xFF\x41",
"\x2B\x2F\x76\x38\x2D",
"\x2B\x2F\x76\x38\x41",
"\x2B\x2F\x76\x39\x41",
"\x2B\x2F\x76\x2B\x41",
"\x2B\x2F\x76\x2F\x41",
"\xDD\x73\x66\x73"
};
static const char* expected[] = {
"UTF-16BE",
"UTF-16LE",
"UTF-8",
"SCSU",
"UTF-16BE",
"UTF-16LE",
"UTF-8",
"SCSU",
"UTF-16BE",
"UTF-16LE",
"UTF-8",
"SCSU",
"UTF-7",
"UTF-7",
"UTF-7",
"UTF-7",
"UTF-7",
"UTF-EBCDIC"
};
static const int32_t expectedLength[] ={
2,
2,
3,
3,
2,
2,
3,
3,
2,
2,
3,
3,
5,
4,
4,
4,
4,
4
};
int i=0;
UErrorCode err;
int32_t signatureLength = -1;
const char* source = NULL;
const char* enc = NULL;
for( ; i<sizeof(data)/sizeof(char*); i++){
err = U_ZERO_ERROR;
source = data[i];
enc = ucnv_detectUnicodeSignature(source, -1 , &signatureLength, &err);
if(U_FAILURE(err)){
log_err("ucnv_detectUnicodeSignature failed for source : %s at index :%i. Error: %s\n", source,i,u_errorName(err));
continue;
}
if(enc == NULL || strcmp(enc,expected[i]) !=0){
log_err("ucnv_detectUnicodeSignature failed for source : %s at index :%i. Expected: %s. Got: %s\n",source,i,expected[i],enc);
continue;
}
if(signatureLength != expectedLength[i]){
log_err("ucnv_detectUnicodeSignature failed for source : %s at index :%i.Expected Length: %i. Got length: %i\n",source,i,signatureLength,expectedLength[i]);
}
}
}
{
static const char* data[] = {
"\xFE\xFF\x00",
"\xFF\xFE\x00",
"\xEF\xBB\xBF\x00",
"\x0E\xFE\xFF\x00",
"\x00\x00\xFE\xFF",
"\xFF\xFE\x00\x00",
"\xFE\xFF",
"\xFF\xFE",
"\xEF\xBB\xBF",
"\x0E\xFE\xFF",
"\x00\x00\xFE\xFF",
"\xFF\xFE\x00\x00",
"\xFE\xFF\x41\x42",
"\xFF\xFE\x41\x41",
"\xEF\xBB\xBF\x41",
"\x0E\xFE\xFF\x41",
"\x00\x00\xFE\xFF\x41",
"\xFF\xFE\x00\x00\x42",
"\xFB\xEE\x28",
"\xFF\x41\x42"
};
static const int len[] = {
3,
3,
4,
4,
4,
4,
2,
2,
3,
3,
4,
4,
4,
4,
4,
4,
5,
5,
3,
3
};
static const char* expected[] = {
"UTF-16BE",
"UTF-16LE",
"UTF-8",
"SCSU",
"UTF-32BE",
"UTF-32LE",
"UTF-16BE",
"UTF-16LE",
"UTF-8",
"SCSU",
"UTF-32BE",
"UTF-32LE",
"UTF-16BE",
"UTF-16LE",
"UTF-8",
"SCSU",
"UTF-32BE",
"UTF-32LE",
"BOCU-1",
NULL
};
static const int32_t expectedLength[] ={
2,
2,
3,
3,
4,
4,
2,
2,
3,
3,
4,
4,
2,
2,
3,
3,
4,
4,
3,
0
};
int i=0;
UErrorCode err;
int32_t signatureLength = -1;
int32_t sourceLength=-1;
const char* source = NULL;
const char* enc = NULL;
for( ; i<sizeof(data)/sizeof(char*); i++){
err = U_ZERO_ERROR;
source = data[i];
sourceLength = len[i];
enc = ucnv_detectUnicodeSignature(source, sourceLength , &signatureLength, &err);
if(U_FAILURE(err)){
log_err("ucnv_detectUnicodeSignature test2 failed for source : %s at index :%i. Error: %s\n", source,i,u_errorName(err));
continue;
}
if(enc == NULL || strcmp(enc,expected[i]) !=0){
if(expected[i] !=NULL){
log_err("ucnv_detectUnicodeSignature test2 failed for source : %s at index :%i. Expected: %s. Got: %s\n",source,i,expected[i],enc);
continue;
}
}
if(signatureLength != expectedLength[i]){
log_err("ucnv_detectUnicodeSignature test2 failed for source : %s at index :%i.Expected Length: %i. Got length: %i\n",source,i,signatureLength,expectedLength[i]);
}
}
}
}
void
static TestUTF7() {
static const uint8_t in[]={
0x48,
0x2d,
0x2b, 0x4a, 0x6a, 0x6f,
0x2d, 0x2d,
0x21,
0x2b, 0x2d,
0x2b, 0x32, 0x41, 0x48, 0x63, 0x41, 0x51
};
static const int32_t results[]={
1, 0x48,
1, 0x2d,
4, 0x263a,
2, 0x2d,
1, 0x21,
2, 0x2b,
7, 0x10401
};
const char *cnvName;
const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
UErrorCode errorCode=U_ZERO_ERROR;
UConverter *cnv=ucnv_open("UTF-7", &errorCode);
if(U_FAILURE(errorCode)) {
log_err("Unable to open a UTF-7 converter: %s\n", u_errorName(errorCode));
return;
}
TestNextUChar(cnv, source, limit, results, "UTF-7");
TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
cnvName = ucnv_getName(cnv, &errorCode);
if (U_FAILURE(errorCode) || uprv_strcmp(cnvName, "UTF-7") != 0) {
log_err("UTF-7 converter is called %s: %s\n", cnvName, u_errorName(errorCode));
}
ucnv_close(cnv);
}
void
static TestIMAP() {
static const uint8_t in[]={
0x48,
0x2d,
0x26, 0x4a, 0x6a, 0x6f,
0x2d, 0x2d,
0x21,
0x26, 0x2d,
0x26, 0x32, 0x41, 0x48, 0x63, 0x41, 0x51, 0x2d
};
static const int32_t results[]={
1, 0x48,
1, 0x2d,
4, 0x263a,
2, 0x2d,
1, 0x21,
2, 0x26,
7, 0x10401
};
const char *cnvName;
const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
UErrorCode errorCode=U_ZERO_ERROR;
UConverter *cnv=ucnv_open("IMAP-mailbox-name", &errorCode);
if(U_FAILURE(errorCode)) {
log_err("Unable to open a IMAP-mailbox-name converter: %s\n", u_errorName(errorCode));
return;
}
TestNextUChar(cnv, source, limit, results, "IMAP-mailbox-name");
TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
cnvName = ucnv_getName(cnv, &errorCode);
if (U_FAILURE(errorCode) || uprv_strcmp(cnvName, "IMAP-mailbox-name") != 0) {
log_err("IMAP-mailbox-name converter is called %s: %s\n", cnvName, u_errorName(errorCode));
}
ucnv_close(cnv);
}
void
static TestUTF8() {
static const uint8_t in[]={
0x61,
0xc2, 0x80,
0xe0, 0xa0, 0x80,
0xf0, 0x90, 0x80, 0x80,
0xf4, 0x84, 0x8c, 0xa1,
0xf0, 0x90, 0x90, 0x81
};
static const int32_t results[]={
1, 0x61,
2, 0x80,
3, 0x800,
4, 0x10000,
4, 0x104321,
4, 0x10401
};
static const uint8_t in2[]={
0x61,
0xc0, 0x80,
0xe0, 0x80, 0x80,
0xf0, 0x80, 0x80, 0x80,
0xc0, 0xc0,
0xf4, 0x90, 0x80, 0x80,
0xf8, 0x80, 0x80, 0x80, 0x80,
0xfe,
0x62
};
static const int32_t results2[]={
1, 0x61,
22, 0x62
};
UConverterToUCallback cb;
const void *p;
const char *source=(const char *)in,*limit=(const char *)in+sizeof(in);
UErrorCode errorCode=U_ZERO_ERROR;
UConverter *cnv=ucnv_open("UTF-8", &errorCode);
if(U_FAILURE(errorCode)) {
log_err("Unable to open a UTF-8 converter: %s\n", u_errorName(errorCode));
return;
}
TestNextUChar(cnv, source, limit, results, "UTF-8");
TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_SKIP, NULL, &cb, &p, &errorCode);
source=(const char *)in2;
limit=(const char *)(in2+sizeof(in2));
TestNextUChar(cnv, source, limit, results2, "UTF-8");
ucnv_close(cnv);
}
void
static TestCESU8() {
static const uint8_t in[]={
0x61,
0xc2, 0x80,
0xe0, 0xa0, 0x80,
0xed, 0xa0, 0x80, 0xed, 0xb0, 0x80,
0xed, 0xb0, 0x81, 0xed, 0xa0, 0x82,
0xed, 0xaf, 0xbf, 0xed, 0xbf, 0xbf,
0xef, 0xbf, 0xbc
};
static const int32_t results[]={
1, 0x61,
2, 0x80,
3, 0x800,
6, 0x10000,
3, 0xdc01,
-1,0xd802,
-1,0x10ffff,
3, 0xfffc
};
static const uint8_t in2[]={
0x61,
0xc0, 0x80,
0xe0, 0x80, 0x80,
0xf0, 0x80, 0x80, 0x80,
0xc0, 0xc0,
0xf0, 0x90, 0x80, 0x80,
0xf4, 0x84, 0x8c, 0xa1,
0xf0, 0x90, 0x90, 0x81,
0xf4, 0x90, 0x80, 0x80,
0xf8, 0x80, 0x80, 0x80, 0x80,
0xfe,
0x62
};
static const int32_t results2[]={
1, 0x61,
34, 0x62
};
UConverterToUCallback cb;
const void *p;
const char *source=(const char *)in,*limit=(const char *)in+sizeof(in);
UErrorCode errorCode=U_ZERO_ERROR;
UConverter *cnv=ucnv_open("CESU-8", &errorCode);
if(U_FAILURE(errorCode)) {
log_err("Unable to open a CESU-8 converter: %s\n", u_errorName(errorCode));
return;
}
TestNextUChar(cnv, source, limit, results, "CESU-8");
TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_SKIP, NULL, &cb, &p, &errorCode);
source=(const char *)in2;
limit=(const char *)(in2+sizeof(in2));
TestNextUChar(cnv, source, limit, results2, "CESU-8");
ucnv_close(cnv);
}
void
static TestUTF16() {
static const uint8_t in1[]={
0xfe, 0xff, 0x4e, 0x00, 0xfe, 0xff
};
static const uint8_t in2[]={
0xff, 0xfe, 0x4e, 0x00, 0xfe, 0xff
};
static const uint8_t in3[]={
0xfe, 0xfe, 0x4e, 0x00, 0xfe, 0xff, 0xd8, 0x40, 0xdc, 0x01
};
static const int32_t results1[]={
4, 0x4e00,
2, 0xfeff
};
static const int32_t results2[]={
4, 0x004e,
2, 0xfffe
};
static const int32_t results3[]={
2, 0xfefe,
2, 0x4e00,
2, 0xfeff,
4, 0x20001
};
const char *source, *limit;
UErrorCode errorCode=U_ZERO_ERROR;
UConverter *cnv=ucnv_open("UTF-16", &errorCode);
if(U_FAILURE(errorCode)) {
log_err("Unable to open a UTF-16 converter: %s\n", u_errorName(errorCode));
return;
}
source=(const char *)in1, limit=(const char *)in1+sizeof(in1);
TestNextUChar(cnv, source, limit, results1, "UTF-16");
source=(const char *)in2, limit=(const char *)in2+sizeof(in2);
ucnv_resetToUnicode(cnv);
TestNextUChar(cnv, source, limit, results2, "UTF-16");
source=(const char *)in3, limit=(const char *)in3+sizeof(in3);
ucnv_resetToUnicode(cnv);
TestNextUChar(cnv, source, limit, results3, "UTF-16");
ucnv_resetToUnicode(cnv);
TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
ucnv_close(cnv);
}
void
static TestUTF16BE() {
static const uint8_t in[]={
0x00, 0x61,
0x00, 0xc0,
0x00, 0x31,
0x00, 0xf4,
0xce, 0xfe,
0xd8, 0x01, 0xdc, 0x01
};
static const int32_t results[]={
2, 0x61,
2, 0xc0,
2, 0x31,
2, 0xf4,
2, 0xcefe,
4, 0x10401
};
const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
UErrorCode errorCode=U_ZERO_ERROR;
UConverter *cnv=ucnv_open("utf-16be", &errorCode);
if(U_FAILURE(errorCode)) {
log_err("Unable to open a UTF16-BE converter: %s\n", u_errorName(errorCode));
return;
}
TestNextUChar(cnv, source, limit, results, "UTF-16BE");
TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
{
static const uint8_t source2[]={0x61};
ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode);
TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_TRUNCATED_CHAR_FOUND, "an invalid character");
}
#if 0
{
const uint8_t source2[]={0xd8, 0x01};
TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_TRUNCATED_CHAR_FOUND, "an truncated surrogate character");
}
#endif
ucnv_close(cnv);
}
static void
TestUTF16LE() {
static const uint8_t in[]={
0x61, 0x00,
0x31, 0x00,
0x4e, 0x2e,
0x4e, 0x00,
0x01, 0xd8, 0x01, 0xdc
};
static const int32_t results[]={
2, 0x61,
2, 0x31,
2, 0x2e4e,
2, 0x4e,
4, 0x10401
};
const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
UErrorCode errorCode=U_ZERO_ERROR;
UConverter *cnv=ucnv_open("utf-16le", &errorCode);
if(U_FAILURE(errorCode)) {
log_err("Unable to open a UTF16-LE converter: %s\n", u_errorName(errorCode));
return;
}
TestNextUChar(cnv, source, limit, results, "UTF-16LE");
TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
{
static const uint8_t source2[]={0x61};
ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode);
TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_TRUNCATED_CHAR_FOUND, "an invalid character");
}
#if 0
{
static const uint8_t source2[]={0x01, 0xd8};
TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_TRUNCATED_CHAR_FOUND, "an truncated surrogate character");
}
#endif
ucnv_close(cnv);
}
void
static TestUTF32() {
static const uint8_t in1[]={
0x00, 0x00, 0xfe, 0xff, 0x00, 0x10, 0x0f, 0x00, 0x00, 0x00, 0xfe, 0xff
};
static const uint8_t in2[]={
0xff, 0xfe, 0x00, 0x00, 0x00, 0x10, 0x0f, 0x00, 0xfe, 0xff, 0x00, 0x00
};
static const uint8_t in3[]={
0x00, 0x00, 0xfe, 0xfe, 0x00, 0x10, 0x0f, 0x00, 0x00, 0x00, 0xd8, 0x40, 0x00, 0x00, 0xdc, 0x01
};
static const int32_t results1[]={
8, 0x100f00,
4, 0xfeff
};
static const int32_t results2[]={
8, 0x0f1000,
4, 0xfffe
};
static const int32_t results3[]={
4, 0xfefe,
4, 0x100f00,
4, 0xfffd,
4, 0xfffd
};
const char *source, *limit;
UErrorCode errorCode=U_ZERO_ERROR;
UConverter *cnv=ucnv_open("UTF-32", &errorCode);
if(U_FAILURE(errorCode)) {
log_err("Unable to open a UTF-32 converter: %s\n", u_errorName(errorCode));
return;
}
source=(const char *)in1, limit=(const char *)in1+sizeof(in1);
TestNextUChar(cnv, source, limit, results1, "UTF-32");
source=(const char *)in2, limit=(const char *)in2+sizeof(in2);
ucnv_resetToUnicode(cnv);
TestNextUChar(cnv, source, limit, results2, "UTF-32");
source=(const char *)in3, limit=(const char *)in3+sizeof(in3);
ucnv_resetToUnicode(cnv);
TestNextUChar(cnv, source, limit, results3, "UTF-32");
ucnv_resetToUnicode(cnv);
TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
ucnv_close(cnv);
}
static void
TestUTF32BE() {
static const uint8_t in[]={
0x00, 0x00, 0x00, 0x61,
0x00, 0x00, 0x30, 0x61,
0x00, 0x00, 0xdc, 0x00,
0x00, 0x00, 0xd8, 0x00,
0x00, 0x00, 0xdf, 0xff,
0x00, 0x00, 0xff, 0xfe,
0x00, 0x10, 0xab, 0xcd,
0x00, 0x10, 0xff, 0xff
};
static const int32_t results[]={
4, 0x61,
4, 0x3061,
4, 0xfffd,
4, 0xfffd,
4, 0xfffd,
4, 0xfffe,
4, 0x10abcd,
4, 0x10ffff
};
static const uint8_t in2[]={
0x00, 0x00, 0x00, 0x61,
0x00, 0x11, 0x00, 0x00,
0x00, 0x00, 0x00, 0x62,
0xff, 0xff, 0xff, 0xff,
0x7f, 0xff, 0xff, 0xff,
0x00, 0x00, 0x01, 0x62,
0x00, 0x00, 0x02, 0x62
};
static const int32_t results2[]={
4, 0x61,
8, 0x62,
12, 0x162,
4, 0x262
};
UConverterToUCallback cb;
const void *p;
const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
UErrorCode errorCode=U_ZERO_ERROR;
UConverter *cnv=ucnv_open("UTF-32BE", &errorCode);
if(U_FAILURE(errorCode)) {
log_err("Unable to open a UTF-32BE converter: %s\n", u_errorName(errorCode));
return;
}
TestNextUChar(cnv, source, limit, results, "UTF-32BE");
TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_SKIP, NULL, &cb, &p, &errorCode);
source=(const char *)in2;
limit=(const char *)(in2+sizeof(in2));
TestNextUChar(cnv, source, limit, results2, "UTF-32BE");
ucnv_close(cnv);
}
static void
TestUTF32LE() {
static const uint8_t in[]={
0x61, 0x00, 0x00, 0x00,
0x61, 0x30, 0x00, 0x00,
0x00, 0xdc, 0x00, 0x00,
0x00, 0xd8, 0x00, 0x00,
0xff, 0xdf, 0x00, 0x00,
0xfe, 0xff, 0x00, 0x00,
0xcd, 0xab, 0x10, 0x00,
0xff, 0xff, 0x10, 0x00
};
static const int32_t results[]={
4, 0x61,
4, 0x3061,
4, 0xfffd,
4, 0xfffd,
4, 0xfffd,
4, 0xfffe,
4, 0x10abcd,
4, 0x10ffff
};
static const uint8_t in2[]={
0x61, 0x00, 0x00, 0x00,
0x00, 0x00, 0x11, 0x00,
0x62, 0x00, 0x00, 0x00,
0xff, 0xff, 0xff, 0xff,
0xff, 0xff, 0xff, 0x7f,
0x62, 0x01, 0x00, 0x00,
0x62, 0x02, 0x00, 0x00,
};
static const int32_t results2[]={
4, 0x61,
8, 0x62,
12, 0x162,
4, 0x262,
};
UConverterToUCallback cb;
const void *p;
const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
UErrorCode errorCode=U_ZERO_ERROR;
UConverter *cnv=ucnv_open("UTF-32LE", &errorCode);
if(U_FAILURE(errorCode)) {
log_err("Unable to open a UTF-32LE converter: %s\n", u_errorName(errorCode));
return;
}
TestNextUChar(cnv, source, limit, results, "UTF-32LE");
TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_SKIP, NULL, &cb, &p, &errorCode);
source=(const char *)in2;
limit=(const char *)(in2+sizeof(in2));
TestNextUChar(cnv, source, limit, results2, "UTF-32LE");
ucnv_close(cnv);
}
static void
TestLATIN1() {
static const uint8_t in[]={
0x61,
0x31,
0x32,
0xc0,
0xf0,
0xf4,
};
static const int32_t results[]={
1, 0x61,
1, 0x31,
1, 0x32,
1, 0xc0,
1, 0xf0,
1, 0xf4,
};
static const uint16_t in1[] = {
0x08, 0x00, 0x1b, 0x4c, 0xea, 0x16, 0xca, 0xd3, 0x94, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84,
0xc4, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84, 0xc4, 0x16, 0xca, 0xd3, 0x94, 0x08, 0x02, 0x0f,
0x53, 0x4a, 0x4e, 0x16, 0x7d, 0x00, 0x30, 0x82, 0x52, 0x4d, 0x30, 0x6b, 0x6d, 0x41, 0x88, 0x4c,
0xe5, 0x97, 0x9f, 0x08, 0x0c, 0x16, 0xca, 0xd3, 0x94, 0x15, 0xae, 0x0e, 0x6b, 0x4c, 0x08, 0x0d,
0x8c, 0xb4, 0xa3, 0x9f, 0xca, 0x99, 0xcb, 0x8b, 0xc2, 0x97, 0xcc, 0xaa, 0x84, 0x08, 0x02, 0x0e,
0x7c, 0x73, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x93, 0xd3, 0xb4, 0xc5, 0xdc, 0x9f, 0x0e, 0x79, 0x3e,
0x06, 0xae, 0xb1, 0x9d, 0x93, 0xd3, 0x08, 0x0c, 0xbe, 0xa3, 0x8f, 0x08, 0x88, 0xbe, 0xa3, 0x8d,
0xd3, 0xa8, 0xa3, 0x97, 0xc5, 0x17, 0x89, 0x08, 0x0d, 0x15, 0xd2, 0x08, 0x01, 0x93, 0xc8, 0xaa,
0x8f, 0x0e, 0x61, 0x1b, 0x99, 0xcb, 0x0e, 0x4e, 0xba, 0x9f, 0xa1, 0xae, 0x93, 0xa8, 0xa0, 0x08,
0x02, 0x08, 0x0c, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x0f, 0x4f, 0xe1, 0x80, 0x05, 0xec, 0x60, 0x8d,
0xea, 0x06, 0xd3, 0xe6, 0x0f, 0x8a, 0x00, 0x30, 0x44, 0x65, 0xb9, 0xe4, 0xfe, 0xe7, 0xc2, 0x06,
0xcb, 0x82
};
static const uint8_t out1[] = {
0x08, 0x00, 0x1b, 0x4c, 0xea, 0x16, 0xca, 0xd3, 0x94, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84,
0xc4, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84, 0xc4, 0x16, 0xca, 0xd3, 0x94, 0x08, 0x02, 0x0f,
0x53, 0x4a, 0x4e, 0x16, 0x7d, 0x00, 0x30, 0x82, 0x52, 0x4d, 0x30, 0x6b, 0x6d, 0x41, 0x88, 0x4c,
0xe5, 0x97, 0x9f, 0x08, 0x0c, 0x16, 0xca, 0xd3, 0x94, 0x15, 0xae, 0x0e, 0x6b, 0x4c, 0x08, 0x0d,
0x8c, 0xb4, 0xa3, 0x9f, 0xca, 0x99, 0xcb, 0x8b, 0xc2, 0x97, 0xcc, 0xaa, 0x84, 0x08, 0x02, 0x0e,
0x7c, 0x73, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x93, 0xd3, 0xb4, 0xc5, 0xdc, 0x9f, 0x0e, 0x79, 0x3e,
0x06, 0xae, 0xb1, 0x9d, 0x93, 0xd3, 0x08, 0x0c, 0xbe, 0xa3, 0x8f, 0x08, 0x88, 0xbe, 0xa3, 0x8d,
0xd3, 0xa8, 0xa3, 0x97, 0xc5, 0x17, 0x89, 0x08, 0x0d, 0x15, 0xd2, 0x08, 0x01, 0x93, 0xc8, 0xaa,
0x8f, 0x0e, 0x61, 0x1b, 0x99, 0xcb, 0x0e, 0x4e, 0xba, 0x9f, 0xa1, 0xae, 0x93, 0xa8, 0xa0, 0x08,
0x02, 0x08, 0x0c, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x0f, 0x4f, 0xe1, 0x80, 0x05, 0xec, 0x60, 0x8d,
0xea, 0x06, 0xd3, 0xe6, 0x0f, 0x8a, 0x00, 0x30, 0x44, 0x65, 0xb9, 0xe4, 0xfe, 0xe7, 0xc2, 0x06,
0xcb, 0x82
};
static const uint16_t in2[]={
0x1B, 0x24, 0x29, 0x47, 0x0E, 0x23, 0x21, 0x23, 0x22, 0x23,
0x23, 0x23, 0x24, 0x23, 0x25, 0x23, 0x26, 0x23, 0x27, 0x23,
0x28, 0x23, 0x29, 0x23, 0x2A, 0x23, 0x2B, 0x0F, 0x2F, 0x2A,
0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20, 0x31, 0x20, 0x2A, 0x2F,
0x0D, 0x0A, 0x1B, 0x24, 0x2A, 0x48, 0x1B, 0x4E, 0x22, 0x21,
0x1B, 0x4E, 0x22, 0x22, 0x1B, 0x4E, 0x22, 0x23, 0x1B, 0x4E,
0x22, 0x24, 0x1B, 0x4E, 0x22, 0x25, 0x0F, 0x2F, 0x2A, 0x70,
0x6C, 0x61, 0x6E, 0x65, 0x32, 0x2A, 0x2F, 0x20, 0x0D, 0x0A,
0x1B, 0x24, 0x2B, 0x49, 0x1B, 0x4F, 0x22, 0x44, 0x1B, 0x4F,
0x22, 0x45, 0x1B, 0x4F, 0x22, 0x46, 0x1B, 0x4F, 0x22, 0x47,
0x1B, 0x4F, 0x22, 0x48, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61,
0x6E, 0x65, 0x20, 0x33, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B,
0x24, 0x2B, 0x4A, 0x1B, 0x4F, 0x21, 0x44, 0x1B, 0x4F, 0x21,
0x45, 0x1B, 0x4F, 0x22, 0x6A, 0x1B, 0x4F, 0x22, 0x6B, 0x1B,
0x4F, 0x22, 0x6C, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E,
0x65, 0x20, 0x34, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24,
0x2B, 0x4B, 0x1B, 0x4F, 0x21, 0x74, 0x1B, 0x4F, 0x22, 0x50,
0x1B, 0x4F, 0x22, 0x51, 0x1B, 0x4F, 0x23, 0x37, 0x1B, 0x4F,
0x22, 0x5C, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E,
0x65, 0x20, 0x35, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24,
0x2B, 0x4C, 0x1B, 0x4F, 0x21, 0x23, 0x1B, 0x4F, 0x22, 0x2C,
0x1B, 0x4F, 0x23, 0x4E, 0x1B, 0x4F, 0x21, 0x6E, 0x1B, 0x4F,
0x23, 0x71, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65,
0x20, 0x36, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, 0x2B,
0x4D, 0x1B, 0x4F, 0x22, 0x71, 0x1B, 0x4F, 0x21, 0x4E, 0x1B,
0x4F, 0x21, 0x6A, 0x1B, 0x4F, 0x23, 0x3A, 0x1B, 0x4F, 0x23,
0x6F, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20,
0x37, 0x20, 0x2A, 0x2F,
};
static const unsigned char out2[]={
0x1B, 0x24, 0x29, 0x47, 0x0E, 0x23, 0x21, 0x23, 0x22, 0x23,
0x23, 0x23, 0x24, 0x23, 0x25, 0x23, 0x26, 0x23, 0x27, 0x23,
0x28, 0x23, 0x29, 0x23, 0x2A, 0x23, 0x2B, 0x0F, 0x2F, 0x2A,
0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20, 0x31, 0x20, 0x2A, 0x2F,
0x0D, 0x0A, 0x1B, 0x24, 0x2A, 0x48, 0x1B, 0x4E, 0x22, 0x21,
0x1B, 0x4E, 0x22, 0x22, 0x1B, 0x4E, 0x22, 0x23, 0x1B, 0x4E,
0x22, 0x24, 0x1B, 0x4E, 0x22, 0x25, 0x0F, 0x2F, 0x2A, 0x70,
0x6C, 0x61, 0x6E, 0x65, 0x32, 0x2A, 0x2F, 0x20, 0x0D, 0x0A,
0x1B, 0x24, 0x2B, 0x49, 0x1B, 0x4F, 0x22, 0x44, 0x1B, 0x4F,
0x22, 0x45, 0x1B, 0x4F, 0x22, 0x46, 0x1B, 0x4F, 0x22, 0x47,
0x1B, 0x4F, 0x22, 0x48, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61,
0x6E, 0x65, 0x20, 0x33, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B,
0x24, 0x2B, 0x4A, 0x1B, 0x4F, 0x21, 0x44, 0x1B, 0x4F, 0x21,
0x45, 0x1B, 0x4F, 0x22, 0x6A, 0x1B, 0x4F, 0x22, 0x6B, 0x1B,
0x4F, 0x22, 0x6C, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E,
0x65, 0x20, 0x34, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24,
0x2B, 0x4B, 0x1B, 0x4F, 0x21, 0x74, 0x1B, 0x4F, 0x22, 0x50,
0x1B, 0x4F, 0x22, 0x51, 0x1B, 0x4F, 0x23, 0x37, 0x1B, 0x4F,
0x22, 0x5C, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E,
0x65, 0x20, 0x35, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24,
0x2B, 0x4C, 0x1B, 0x4F, 0x21, 0x23, 0x1B, 0x4F, 0x22, 0x2C,
0x1B, 0x4F, 0x23, 0x4E, 0x1B, 0x4F, 0x21, 0x6E, 0x1B, 0x4F,
0x23, 0x71, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65,
0x20, 0x36, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, 0x2B,
0x4D, 0x1B, 0x4F, 0x22, 0x71, 0x1B, 0x4F, 0x21, 0x4E, 0x1B,
0x4F, 0x21, 0x6A, 0x1B, 0x4F, 0x23, 0x3A, 0x1B, 0x4F, 0x23,
0x6F, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20,
0x37, 0x20, 0x2A, 0x2F,
};
const char *source=(const char *)in;
const char *limit=(const char *)in+sizeof(in);
UErrorCode errorCode=U_ZERO_ERROR;
UConverter *cnv=ucnv_open("LATIN_1", &errorCode);
if(U_FAILURE(errorCode)) {
log_err("Unable to open a LATIN_1 converter: %s\n", u_errorName(errorCode));
return;
}
TestNextUChar(cnv, source, limit, results, "LATIN_1");
TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
TestConv((uint16_t*)in1,sizeof(in1)/2,"LATIN_1","LATIN-1",(char*)out1,sizeof(out1));
TestConv((uint16_t*)in2,sizeof(in2)/2,"ASCII","ASCII",(char*)out2,sizeof(out2));
ucnv_close(cnv);
}
static void
TestSBCS() {
static const uint8_t in[]={ 0x61, 0xc0, 0x80, 0xe0, 0xf0, 0xf4};
static const int32_t results[]={
1, 0x61,
1, 0xbf,
1, 0xc4,
1, 0x2021,
1, 0xf8ff,
1, 0x00d9
};
const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
UErrorCode errorCode=U_ZERO_ERROR;
UConverter *cnv=ucnv_open("x-mac-turkish", &errorCode);
if(U_FAILURE(errorCode)) {
log_data_err("Unable to open a SBCS(x-mac-turkish) converter: %s\n", u_errorName(errorCode));
return;
}
TestNextUChar(cnv, source, limit, results, "SBCS(x-mac-turkish)");
TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
ucnv_close(cnv);
}
static void
TestDBCS() {
static const uint8_t in[]={
0x44, 0x6a,
0xc4, 0x9c,
0x7a, 0x74,
0x46, 0xab,
0x42, 0x5b,
};
static const int32_t results[]={
2, 0x00a7,
2, 0xe1d2,
2, 0x6962,
2, 0xf842,
2, 0xffe5,
};
const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
UErrorCode errorCode=U_ZERO_ERROR;
UConverter *cnv=my_ucnv_open("@ibm9027", &errorCode);
if(U_FAILURE(errorCode)) {
log_data_err("Unable to open a DBCS(@ibm9027) converter: %s\n", u_errorName(errorCode));
return;
}
TestNextUChar(cnv, source, limit, results, "DBCS(@ibm9027)");
TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
{
static const uint8_t source2[]={0x1a, 0x1b};
TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character");
}
{
static const uint8_t source1[]={0xc4};
ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode);
TestNextUCharError(cnv, (const char*)source1, (const char*)source1+sizeof(source1), U_TRUNCATED_CHAR_FOUND, "a character is truncated");
}
ucnv_close(cnv);
}
static void
TestMBCS() {
static const uint8_t in[]={
0x01,
0xa6, 0xa3,
0x00,
0xa6, 0xa1,
0x08,
0xc2, 0x76,
0xc2, 0x78,
};
static const int32_t results[]={
1, 0x0001,
2, 0x250c,
1, 0x0000,
2, 0x2500,
1, 0x0008,
2, 0xd60c,
2, 0xd60e,
};
const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
UErrorCode errorCode=U_ZERO_ERROR;
UConverter *cnv=ucnv_open("ibm-1363", &errorCode);
if(U_FAILURE(errorCode)) {
log_data_err("Unable to open a MBCS(ibm-1363) converter: %s\n", u_errorName(errorCode));
return;
}
TestNextUChar(cnv, source, limit, results, "MBCS(ibm-1363)");
TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
{
static const uint8_t source2[]={0xa1, 0x01};
TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character");
}
{
static const uint8_t source1[]={0xc4};
ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode);
TestNextUCharError(cnv, (const char*)source1, (const char*)source1+sizeof(source1), U_TRUNCATED_CHAR_FOUND, "a character is truncated");
}
ucnv_close(cnv);
}
#ifdef U_ENABLE_GENERIC_ISO_2022
static void
TestISO_2022() {
static const uint8_t in[]={
0x1b, 0x25, 0x42,
0x31,
0x32,
0x61,
0xc2, 0x80,
0xe0, 0xa0, 0x80,
0xf0, 0x90, 0x80, 0x80
};
static const int32_t results[]={
4, 0x0031,
1, 0x0032,
1, 0x61,
2, 0x80,
3, 0x800,
4, 0x10000
};
const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
UErrorCode errorCode=U_ZERO_ERROR;
UConverter *cnv;
cnv=ucnv_open("ISO_2022", &errorCode);
if(U_FAILURE(errorCode)) {
log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode));
return;
}
TestNextUChar(cnv, source, limit, results, "ISO_2022");
TestNextUCharError(cnv, source, source-1, U_ILLEGAL_ARGUMENT_ERROR, "sourceLimit < source");
TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
{
static const uint8_t source1[]={0xc4};
ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode);
TestNextUCharError(cnv, (const char*)source1, (const char*)source1+sizeof(source1), U_TRUNCATED_CHAR_FOUND, "a character is truncated");
}
{
static const uint8_t source2[]={0xa1, 0x01};
TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ILLEGAL_CHAR_FOUND, "an invalid character");
}
ucnv_close(cnv);
}
#endif
static void
TestSmallTargetBuffer(const uint16_t* source, const UChar* sourceLimit,UConverter* cnv){
const UChar* uSource;
const UChar* uSourceLimit;
const char* cSource;
const char* cSourceLimit;
UChar *uTargetLimit =NULL;
UChar *uTarget;
char *cTarget;
const char *cTargetLimit;
char *cBuf;
UChar *uBuf,*test;
int32_t uBufSize = 120;
int len=0;
int i=2;
UErrorCode errorCode=U_ZERO_ERROR;
uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5);
cBuf =(char*)malloc(uBufSize * sizeof(char) * 10);
ucnv_reset(cnv);
for(;--i>0; ){
uSource = (UChar*) source;
uSourceLimit=(const UChar*)sourceLimit;
cTarget = cBuf;
uTarget = uBuf;
cSource = cBuf;
cTargetLimit = cBuf;
uTargetLimit = uBuf;
do{
cTargetLimit = cTargetLimit+ i;
ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,NULL,FALSE, &errorCode);
if(errorCode==U_BUFFER_OVERFLOW_ERROR){
errorCode=U_ZERO_ERROR;
continue;
}
if(U_FAILURE(errorCode)){
log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
return;
}
}while (uSource<uSourceLimit);
cSourceLimit =cTarget;
do{
uTargetLimit=uTargetLimit+i;
ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,NULL,FALSE,&errorCode);
if(errorCode==U_BUFFER_OVERFLOW_ERROR){
errorCode=U_ZERO_ERROR;
continue;
}
if(U_FAILURE(errorCode)){
log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
return;
}
}while(cSource<cSourceLimit);
uSource = source;
test =uBuf;
for(len=0;len<(int)(source - sourceLimit);len++){
if(uBuf[len]!=uSource[len]){
log_err("Expected : \\u%04X \t Got: \\u%04X\n",uSource[len],(int)uBuf[len]) ;
}
}
}
free(uBuf);
free(cBuf);
}
static void TestToAndFromUChars(const uint16_t* source, const UChar* sourceLimit,UConverter* cnv){
const UChar* uSource;
const UChar* uSourceLimit;
const char* cSource;
UChar *uTargetLimit =NULL;
UChar *uTarget;
char *cTarget;
const char *cTargetLimit;
char *cBuf;
UChar *uBuf,*test;
int32_t uBufSize = 120;
int numCharsInTarget=0;
UErrorCode errorCode=U_ZERO_ERROR;
uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5);
cBuf =(char*)malloc(uBufSize * sizeof(char) * 5);
uSource = source;
uSourceLimit=sourceLimit;
cTarget = cBuf;
cTargetLimit = cBuf +uBufSize*5;
uTarget = uBuf;
uTargetLimit = uBuf+ uBufSize*5;
ucnv_reset(cnv);
numCharsInTarget=ucnv_fromUChars( cnv , cTarget, (cTargetLimit-cTarget),uSource,(uSourceLimit-uSource), &errorCode);
if(U_FAILURE(errorCode)){
log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
return;
}
cSource = cBuf;
test =uBuf;
ucnv_toUChars(cnv,uTarget,(uTargetLimit-uTarget),cSource,numCharsInTarget,&errorCode);
if(U_FAILURE(errorCode)){
log_err("ucnv_toUChars conversion failed, reason %s\n", u_errorName(errorCode));
return;
}
uSource = source;
while(uSource<uSourceLimit){
if(*test!=*uSource){
log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ;
}
uSource++;
test++;
}
free(uBuf);
free(cBuf);
}
static void TestSmallSourceBuffer(const uint16_t* source, const UChar* sourceLimit,UConverter* cnv){
const UChar* uSource;
const UChar* uSourceLimit;
const char* cSource;
const char* cSourceLimit;
UChar *uTargetLimit =NULL;
UChar *uTarget;
char *cTarget;
const char *cTargetLimit;
char *cBuf;
UChar *uBuf,*test;
int32_t uBufSize = 120;
int len=0;
int i=2;
const UChar *temp = sourceLimit;
UErrorCode errorCode=U_ZERO_ERROR;
uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5);
cBuf =(char*)malloc(uBufSize * sizeof(char) * 10);
ucnv_reset(cnv);
for(;--i>0;){
uSource = (UChar*) source;
cTarget = cBuf;
uTarget = uBuf;
cSource = cBuf;
cTargetLimit = cBuf;
uTargetLimit = uBuf+uBufSize*5;
cTargetLimit = cTargetLimit+uBufSize*10;
uSourceLimit=uSource;
do{
if (uSourceLimit < sourceLimit) {
uSourceLimit = uSourceLimit+1;
}
ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,NULL,FALSE, &errorCode);
if(errorCode==U_BUFFER_OVERFLOW_ERROR){
errorCode=U_ZERO_ERROR;
continue;
}
if(U_FAILURE(errorCode)){
log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
return;
}
}while (uSource<temp);
cSourceLimit =cBuf;
do{
if (cSourceLimit < cBuf + (cTarget - cBuf)) {
cSourceLimit = cSourceLimit+1;
}
ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,NULL,FALSE,&errorCode);
if(errorCode==U_BUFFER_OVERFLOW_ERROR){
errorCode=U_ZERO_ERROR;
continue;
}
if(U_FAILURE(errorCode)){
log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
return;
}
}while(cSource<cTarget);
uSource = source;
test =uBuf;
for(;len<(int)(source - sourceLimit);len++){
if(uBuf[len]!=uSource[len]){
log_err("Expected : \\u%04X \t Got: \\u%04X\n",uSource[len],(int)uBuf[len]) ;
}
}
}
free(uBuf);
free(cBuf);
}
static void
TestGetNextUChar2022(UConverter* cnv, const char* source, const char* limit,
const uint16_t results[], const char* message){
const char* s0;
const char* s=(char*)source;
const uint16_t *r=results;
UErrorCode errorCode=U_ZERO_ERROR;
uint32_t c,exC;
ucnv_reset(cnv);
while(s<limit) {
s0=s;
c=ucnv_getNextUChar(cnv, &s, limit, &errorCode);
if(errorCode==U_INDEX_OUTOFBOUNDS_ERROR) {
break;
} else if(U_FAILURE(errorCode)) {
log_err("%s ucnv_getNextUChar() failed: %s\n", message, u_errorName(errorCode));
break;
} else {
if(UTF_IS_FIRST_SURROGATE(*r)){
int i =0, len = 2;
UTF_NEXT_CHAR_SAFE(r, i, len, exC, FALSE);
r++;
}else{
exC = *r;
}
if(c!=(uint32_t)(exC))
log_err("%s ucnv_getNextUChar() Expected: \\u%04X Got: \\u%04X \n",message,(uint32_t) (*r),c);
}
r++;
}
}
static int TestJitterbug930(const char* enc){
UErrorCode err = U_ZERO_ERROR;
UConverter*converter;
char out[80];
char*target = out;
UChar in[4];
const UChar*source = in;
int32_t off[80];
int32_t* offsets = off;
int numOffWritten=0;
UBool flush = 0;
converter = my_ucnv_open(enc, &err);
in[0] = 0x41;
in[1] = 0x4E01;
in[2] = 0x4E02;
in[3] = 0x4E03;
memset(off, '*', sizeof(off));
ucnv_fromUnicode (converter,
&target,
target+2,
&source,
source+3,
offsets,
flush,
&err);
while(*offsets< off[10]){
numOffWritten++;
offsets++;
}
log_verbose("Testing Jitterbug 930 for encoding %s",enc);
if(numOffWritten!= (int)(target-out)){
log_err("Jitterbug 930 test for enc: %s failed. Expected: %i Got: %i",enc, (int)(target-out),numOffWritten);
}
err = U_ZERO_ERROR;
memset(off,'*' , sizeof(off));
flush = 1;
offsets=off;
ucnv_fromUnicode (converter,
&target,
target+4,
&source,
source,
offsets,
flush,
&err);
numOffWritten=0;
while(*offsets< off[10]){
numOffWritten++;
if(*offsets!= -1){
log_err("Jitterbug 930 test for enc: %s failed. Expected: %i Got: %i",enc,-1,*offsets) ;
}
offsets++;
}
ucnv_close(converter);
return 0;
}
static void
TestHZ() {
static const uint16_t in[]={
0x3000, 0x3001, 0x3002, 0x00B7, 0x02C9, 0x02C7, 0x00A8, 0x3003, 0x3005, 0x2014,
0xFF5E, 0x2016, 0x2026, 0x007E, 0x997C, 0x70B3, 0x75C5, 0x5E76, 0x73BB, 0x83E0,
0x64AD, 0x62E8, 0x94B5, 0x000A, 0x6CE2, 0x535A, 0x52C3, 0x640F, 0x94C2, 0x7B94,
0x4F2F, 0x5E1B, 0x8236, 0x000A, 0x8116, 0x818A, 0x6E24, 0x6CCA, 0x9A73, 0x6355,
0x535C, 0x54FA, 0x8865, 0x000A, 0x57E0, 0x4E0D, 0x5E03, 0x6B65, 0x7C3F, 0x90E8,
0x6016, 0x248F, 0x2490, 0x000A, 0x2491, 0x2492, 0x2493, 0x2494, 0x2495, 0x2496,
0x2497, 0x2498, 0x2499, 0x000A, 0x249A, 0x249B, 0x2474, 0x2475, 0x2476, 0x2477,
0x2478, 0x2479, 0x247A, 0x000A, 0x247B, 0x247C, 0x247D, 0x247E, 0x247F, 0x2480,
0x2481, 0x2482, 0x2483, 0x000A, 0x0041, 0x0043, 0x0044, 0x0045, 0x0046, 0x007E,
0x0048, 0x0049, 0x004A, 0x000A, 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050,
0x0051, 0x0052, 0x0053, 0x000A, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059,
0x005A, 0x005B, 0x005C, 0x000A
};
const UChar* uSource;
const UChar* uSourceLimit;
const char* cSource;
const char* cSourceLimit;
UChar *uTargetLimit =NULL;
UChar *uTarget;
char *cTarget;
const char *cTargetLimit;
char *cBuf;
UChar *uBuf,*test;
int32_t uBufSize = 120;
UErrorCode errorCode=U_ZERO_ERROR;
UConverter *cnv;
int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5);
int32_t* myOff= offsets;
cnv=ucnv_open("HZ", &errorCode);
if(U_FAILURE(errorCode)) {
log_data_err("Unable to open HZ converter: %s\n", u_errorName(errorCode));
return;
}
uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5);
cBuf =(char*)malloc(uBufSize * sizeof(char) * 5);
uSource = (const UChar*)&in[0];
uSourceLimit=(const UChar*)&in[sizeof(in)/2];
cTarget = cBuf;
cTargetLimit = cBuf +uBufSize*5;
uTarget = uBuf;
uTargetLimit = uBuf+ uBufSize*5;
ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode);
if(U_FAILURE(errorCode)){
log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
return;
}
cSource = cBuf;
cSourceLimit =cTarget;
test =uBuf;
myOff=offsets;
ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode);
if(U_FAILURE(errorCode)){
log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
return;
}
uSource = (const UChar*)&in[0];
while(uSource<uSourceLimit){
if(*test!=*uSource){
log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ;
}
uSource++;
test++;
}
TestGetNextUChar2022(cnv, cBuf, cTarget, in, "HZ encoding");
TestSmallTargetBuffer(&in[0],(const UChar*)&in[sizeof(in)/2],cnv);
TestSmallSourceBuffer(&in[0],(const UChar*)&in[sizeof(in)/2],cnv);
TestToAndFromUChars(&in[0],(const UChar*)&in[sizeof(in)/2],cnv);
TestJitterbug930("csISO2022JP");
ucnv_close(cnv);
free(offsets);
free(uBuf);
free(cBuf);
}
static void
TestISCII(){
static const uint16_t in[]={
0x0901,0x0902,0x0903,0x0905,0x0906,0x0907,0x0908,0x0909,0x090A,
0x090B,0x090E,0x090F,0x0910,0x090D,0x0912,0x0913,0x0914,0x0911,
0x0915,0x0916,0x0917,0x0918,0x0919,0x091A,0x091B,0x091C,0x091D,
0x091E,0x091F,0x0920,0x0921,0x0922,0x0923,0x0924,0x0925,0x0926,
0x0927,0x0928,0x0929,0x092A,0x092B,0x092C,0x092D,0x092E,0x092F,
0x095F,0x0930,0x0931,0x0932,0x0933,0x0934,0x0935,0x0936,0x0937,
0x0938,0x0939,0x200D,0x093E,0x093F,0x0940,0x0941,0x0942,0x0943,
0x0946,0x0947,0x0948,0x0945,0x094A,0x094B,0x094C,0x0949,0x094D,
0x093d,0x0966,0x0967,0x0968,0x0969,0x096A,0x096B,0x096C,
0x096D,0x096E,0x096F,
0x0915,0x094d, 0x200D,
0x0915,0x094d, 0x200c,
0x965,
0x1B, 0x24, 0x29, 0x47, 0x0E, 0x23, 0x21, 0x23, 0x22, 0x23,
0x23, 0x23, 0x24, 0x23, 0x25, 0x23, 0x26, 0x23, 0x27, 0x23,
0x28, 0x23, 0x29, 0x23, 0x2A, 0x23, 0x2B, 0x0F, 0x2F, 0x2A,
0x0061,0x0915,0x000D,0x000A,0x0996,0x0043,
0x0930,0x094D,0x200D,
0x0901,0x000D,0x000A,0x0905,0x0985,0x0043,
0x0915,0x0921,0x002B,0x095F,
0x0B86, 0xB87, 0xB88,
0x0C05, 0x0C02, 0x0C03,0x0c31,
0x0C85, 0xC82, 0x0C83,
0x0970, 0x952,
0x0960 ,
0x0944 ,
0x090C ,
0x0962,
0x0961 ,
0x0963 ,
0x0950 ,
0x093D ,
0x0958,
0x0959,
0x095A,
0x095B,
0x095C,
0x095D,
0x095E,
0x0020, 0x094D, 0x0930, 0x0000, 0x00A0
};
static const unsigned char byteArr[]={
0xa1,0xa2,0xa3,0xa4,0xa5,0xa6,0xa7,0xa8,0xa9,
0xaa,0xab,0xac,0xad,0xae,0xaf,0xb0,0xb1,0xb2,
0xb3,0xb4,0xb5,0xb6,0xb7,0xb8,0xb9,0xba,0xbb,
0xbc,0xbd,0xbe,0xbf,0xc0,0xc1,0xc2,0xc3,0xc4,
0xc5,0xc6,0xc7,0xc8,0xc9,0xca,0xcb,0xcc,0xcd,
0xce,0xcf,0xd0,0xd1,0xd2,0xd3,0xd4,0xd5,0xd6,
0xd7,0xd8,0xd9,0xda,0xdb,0xdc,0xdd,0xde,0xdf,
0xe0,0xe1,0xe2,0xe3,0xe4,0xe5,0xe6,0xe7,0xe8,
0xea,0xe9,0xf1,0xf2,0xf3,0xf4,0xf5,0xf6,0xf7,
0xf8,0xf9,0xfa,
0xb3, 0xE8, 0xE9,
0xb3, 0xE8, 0xE8,
0xea, 0xea,
0x1B, 0x24, 0x29, 0x47, 0x0E, 0x23, 0x21, 0x23, 0x22, 0x23,
0x23, 0x23, 0x24, 0x23, 0x25, 0x23, 0x26, 0x23, 0x27, 0x23,
0x28, 0x23, 0x29, 0x23, 0x2A, 0x23, 0x2B, 0x0F, 0x2F, 0x2A,
0x61,0xEF,0x42,0xEF,0x30,0xB3,0x0D,0x0A,0xEF,0x43,0xB4,0x43,
0xEF,0x42,0xCF,0xE8,0xD9,
0xEF,0x42,0xA1,0x0D,0x0A,0xEF,0x42,0xA4,0xEF,0x43,0xA4,0x43,
0xEF,0x42,0xB3,0xBF,0x2B,0xEF,0x42,0xCE,
0xEF, 0x44, 0xa5, 0xa6, 0xa7,
0xEF, 0x45,0xa4, 0xa2, 0xa3,0xd0,
0xEF, 0x48,0xa4, 0xa2, 0xa3,
0xEF, 0x42, 0xF0, 0xBF, 0xF0, 0xB8,
0xAA, 0xE9,
0xDF, 0xE9,
0xa6, 0xE9,
0xdb, 0xE9,
0xa7, 0xE9,
0xdc, 0xE9,
0xa1, 0xE9,
0xEA, 0xE9,
0xB3, 0xE9,
0xB4, 0xE9,
0xB5, 0xE9,
0xBA, 0xE9,
0xBF, 0xE9,
0xC0, 0xE9,
0xC9, 0xE9,
0xD9, 0xE8, 0xCF,
0x00, 0x00A0,
0xEF, 0x30,
};
testConvertToU(byteArr,(sizeof(byteArr)),in,(sizeof(in)/U_SIZEOF_UCHAR),"x-iscii-de",NULL,TRUE);
TestConv(in,(sizeof(in)/2),"ISCII,version=0","hindi", (char *)byteArr,sizeof(byteArr));
}
static void
TestISO_2022_JP() {
static const uint16_t in[]={
0x0041,0x3000, 0x3001, 0x3002, 0x0020, 0x000D, 0x000A,
0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x000D, 0x000A,
0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 0x0051, 0x0052, 0x000D, 0x000A,
0x3005, 0x3006, 0x3007, 0x30FC, 0x2015, 0x2010, 0xFF0F, 0x005C, 0x000D, 0x000A,
0x301C, 0x2016, 0x2026, 0x2025, 0x2018, 0x2019, 0x201C, 0x000D, 0x000A,
0x201D, 0x3014, 0x000D, 0x000A,
0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A,
0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A,
};
const UChar* uSource;
const UChar* uSourceLimit;
const char* cSource;
const char* cSourceLimit;
UChar *uTargetLimit =NULL;
UChar *uTarget;
char *cTarget;
const char *cTargetLimit;
char *cBuf;
UChar *uBuf,*test;
int32_t uBufSize = 120;
UErrorCode errorCode=U_ZERO_ERROR;
UConverter *cnv;
int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5);
int32_t* myOff= offsets;
cnv=ucnv_open("ISO_2022_JP_1", &errorCode);
if(U_FAILURE(errorCode)) {
log_data_err("Unable to open an ISO_2022_JP_1 converter: %s\n", u_errorName(errorCode));
return;
}
uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5);
cBuf =(char*)malloc(uBufSize * sizeof(char) * 5);
uSource = (const UChar*)&in[0];
uSourceLimit=(const UChar*)&in[sizeof(in)/2];
cTarget = cBuf;
cTargetLimit = cBuf +uBufSize*5;
uTarget = uBuf;
uTargetLimit = uBuf+ uBufSize*5;
ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode);
if(U_FAILURE(errorCode)){
log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
return;
}
cSource = cBuf;
cSourceLimit =cTarget;
test =uBuf;
myOff=offsets;
ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode);
if(U_FAILURE(errorCode)){
log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
return;
}
uSource = (const UChar*)&in[0];
while(uSource<uSourceLimit){
if(*test!=*uSource){
log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ;
}
uSource++;
test++;
}
TestSmallTargetBuffer(&in[0],(const UChar*)&in[sizeof(in)/2],cnv);
TestSmallSourceBuffer(&in[0],(const UChar*)&in[sizeof(in)/2],cnv);
TestGetNextUChar2022(cnv, cBuf, cTarget, in, "ISO-2022-JP encoding");
TestToAndFromUChars(&in[0],(const UChar*)&in[sizeof(in)/2],cnv);
TestJitterbug930("csISO2022JP");
ucnv_close(cnv);
free(uBuf);
free(cBuf);
free(offsets);
}
static void TestConv(const uint16_t in[],int len, const char* conv, const char* lang, char byteArr[],int byteArrLen){
const UChar* uSource;
const UChar* uSourceLimit;
const char* cSource;
const char* cSourceLimit;
UChar *uTargetLimit =NULL;
UChar *uTarget;
char *cTarget;
const char *cTargetLimit;
char *cBuf;
UChar *uBuf,*test;
int32_t uBufSize = 120*10;
UErrorCode errorCode=U_ZERO_ERROR;
UConverter *cnv;
int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) );
int32_t* myOff= offsets;
cnv=my_ucnv_open(conv, &errorCode);
if(U_FAILURE(errorCode)) {
log_data_err("Unable to open a %s converter: %s\n", conv, u_errorName(errorCode));
return;
}
uBuf = (UChar*)malloc(uBufSize * sizeof(UChar));
cBuf =(char*)malloc(uBufSize * sizeof(char));
uSource = (const UChar*)&in[0];
uSourceLimit=uSource+len;
cTarget = cBuf;
cTargetLimit = cBuf +uBufSize;
uTarget = uBuf;
uTargetLimit = uBuf+ uBufSize;
ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode);
if(U_FAILURE(errorCode)){
log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
return;
}
cSource = cBuf;
cSourceLimit =cTarget;
test =uBuf;
myOff=offsets;
ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode);
if(U_FAILURE(errorCode)){
log_err("ucnv_toUnicode conversion failed, reason: %s\n", u_errorName(errorCode));
return;
}
uSource = (const UChar*)&in[0];
while(uSource<uSourceLimit){
if(*test!=*uSource){
log_err("for codepage %s : Expected : \\u%04X \t Got: \\u%04X\n",conv,*uSource,(int)*test) ;
}
uSource++;
test++;
}
TestSmallTargetBuffer(&in[0],(const UChar*)&in[len],cnv);
TestSmallSourceBuffer(&in[0],(const UChar*)&in[len],cnv);
TestGetNextUChar2022(cnv, cBuf, cTarget, in, conv);
if(byteArr && byteArrLen!=0){
TestGetNextUChar2022(cnv, byteArr, (byteArr+byteArrLen), in, lang);
TestToAndFromUChars(&in[0],(const UChar*)&in[len],cnv);
{
cSource = byteArr;
cSourceLimit = cSource+byteArrLen;
test=uBuf;
myOff = offsets;
ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode);
if(U_FAILURE(errorCode)){
log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
return;
}
uSource = (const UChar*)&in[0];
while(uSource<uSourceLimit){
if(*test!=*uSource){
log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ;
}
uSource++;
test++;
}
}
}
ucnv_close(cnv);
free(uBuf);
free(cBuf);
free(offsets);
}
static UChar U_CALLCONV
_charAt(int32_t offset, void *context) {
return ((char*)context)[offset];
}
static int32_t
unescape(UChar* dst, int32_t dstLen,const char* src,int32_t srcLen,UErrorCode *status){
int32_t srcIndex=0;
int32_t dstIndex=0;
if(U_FAILURE(*status)){
return 0;
}
if((dst==NULL && dstLen>0) || (src==NULL ) || dstLen < -1 || srcLen <-1 ){
*status = U_ILLEGAL_ARGUMENT_ERROR;
return 0;
}
if(srcLen==-1){
srcLen = uprv_strlen(src);
}
for (; srcIndex<srcLen; ) {
UChar32 c = src[srcIndex++];
if (c == 0x005C ) {
c = u_unescapeAt(_charAt,&srcIndex,srcLen,(void*)src);
if (c == (UChar32)0xFFFFFFFF) {
*status=U_INVALID_CHAR_FOUND;
break;
}
}
if(dstIndex < dstLen){
if(c>0xFFFF){
dst[dstIndex++] = UTF16_LEAD(c);
if(dstIndex<dstLen){
dst[dstIndex]=UTF16_TRAIL(c);
}else{
*status=U_BUFFER_OVERFLOW_ERROR;
}
}else{
dst[dstIndex]=(UChar)c;
}
}else{
*status = U_BUFFER_OVERFLOW_ERROR;
}
dstIndex++;
}
return dstIndex;
}
static void
TestFullRoundtrip(const char* cp){
UChar usource[10] ={0};
UChar nsrc[10] = {0};
uint32_t i=1;
int len=0, ulen;
nsrc[0]=0x0061;
TestConv(usource,1,cp,"",NULL,0);
TestConv(usource,2,cp,"",NULL,0);
nsrc[2]=0x5555;
TestConv(nsrc,3,cp,"",NULL,0);
for(;i<=0x10FFFF;i++){
if(i==0xD800){
i=0xDFFF;
continue;
}
if(i<=0xFFFF){
usource[0] =(UChar) i;
len=1;
}else{
usource[0]=UTF16_LEAD(i);
usource[1]=UTF16_TRAIL(i);
len=2;
}
ulen=len;
if(i==0x80) {
usource[2]=0;
}
TestConv(usource,ulen,cp,"",NULL,0);
usource[ulen]=usource[0];
usource[ulen+1]=usource[1];
ulen+=len;
TestConv(usource,ulen,cp,"",NULL,0);
usource[ulen]=usource[0];
usource[ulen+1]=usource[1];
ulen+=len;
TestConv(usource,ulen,cp,"",NULL,0);
nsrc[1]=usource[0];
nsrc[2]=usource[1];
nsrc[len+1]=0x5555;
TestConv(nsrc,len+2,cp,"",NULL,0);
uprv_memset(usource,0,sizeof(UChar)*10);
}
}
static void
TestRoundTrippingAllUTF(void){
if(!QUICK){
log_verbose("Running exhaustive round trip test for BOCU-1\n");
TestFullRoundtrip("BOCU-1");
log_verbose("Running exhaustive round trip test for SCSU\n");
TestFullRoundtrip("SCSU");
log_verbose("Running exhaustive round trip test for UTF-8\n");
TestFullRoundtrip("UTF-8");
log_verbose("Running exhaustive round trip test for CESU-8\n");
TestFullRoundtrip("CESU-8");
log_verbose("Running exhaustive round trip test for UTF-16BE\n");
TestFullRoundtrip("UTF-16BE");
log_verbose("Running exhaustive round trip test for UTF-16LE\n");
TestFullRoundtrip("UTF-16LE");
log_verbose("Running exhaustive round trip test for UTF-16\n");
TestFullRoundtrip("UTF-16");
log_verbose("Running exhaustive round trip test for UTF-32BE\n");
TestFullRoundtrip("UTF-32BE");
log_verbose("Running exhaustive round trip test for UTF-32LE\n");
TestFullRoundtrip("UTF-32LE");
log_verbose("Running exhaustive round trip test for UTF-32\n");
TestFullRoundtrip("UTF-32");
log_verbose("Running exhaustive round trip test for UTF-7\n");
TestFullRoundtrip("UTF-7");
log_verbose("Running exhaustive round trip test for UTF-7\n");
TestFullRoundtrip("UTF-7,version=1");
log_verbose("Running exhaustive round trip test for IMAP-mailbox-name\n");
TestFullRoundtrip("IMAP-mailbox-name");
log_verbose("Running exhaustive round trip test for GB18030\n");
TestFullRoundtrip("GB18030");
}
}
static void
TestSCSU() {
static const uint16_t germanUTF16[]={
0x00d6, 0x006c, 0x0020, 0x0066, 0x006c, 0x0069, 0x0065, 0x00df, 0x0074
};
static const uint8_t germanSCSU[]={
0xd6, 0x6c, 0x20, 0x66, 0x6c, 0x69, 0x65, 0xdf, 0x74
};
static const uint16_t russianUTF16[]={
0x041c, 0x043e, 0x0441, 0x043a, 0x0432, 0x0430
};
static const uint8_t russianSCSU[]={
0x12, 0x9c, 0xbe, 0xc1, 0xba, 0xb2, 0xb0
};
static const uint16_t japaneseUTF16[]={
0x3000, 0x266a, 0x30ea, 0x30f3, 0x30b4, 0x53ef, 0x611b,
0x3044, 0x3084, 0x53ef, 0x611b, 0x3044, 0x3084, 0x30ea, 0x30f3,
0x30b4, 0x3002, 0x534a, 0x4e16, 0x7d00, 0x3082, 0x524d, 0x306b,
0x6d41, 0x884c, 0x3057, 0x305f, 0x300c, 0x30ea, 0x30f3, 0x30b4,
0x306e, 0x6b4c, 0x300d, 0x304c, 0x3074, 0x3063, 0x305f, 0x308a,
0x3059, 0x308b, 0x304b, 0x3082, 0x3057, 0x308c, 0x306a, 0x3044,
0x3002, 0x7c73, 0x30a2, 0x30c3, 0x30d7, 0x30eb, 0x30b3, 0x30f3,
0x30d4, 0x30e5, 0x30fc, 0x30bf, 0x793e, 0x306e, 0x30d1, 0x30bd,
0x30b3, 0x30f3, 0x300c, 0x30de, 0x30c3, 0x30af, 0xff08, 0x30de,
0x30c3, 0x30ad, 0x30f3, 0x30c8, 0x30c3, 0x30b7, 0x30e5, 0xff09,
0x300d, 0x3092, 0x3001, 0x3053, 0x3088, 0x306a, 0x304f, 0x611b,
0x3059, 0x308b, 0x4eba, 0x305f, 0x3061, 0x306e, 0x3053, 0x3068,
0x3060, 0x3002, 0x300c, 0x30a2, 0x30c3, 0x30d7, 0x30eb, 0x4fe1,
0x8005, 0x300d, 0x306a, 0x3093, 0x3066, 0x8a00, 0x3044, 0x65b9,
0x307e, 0x3067, 0x3042, 0x308b, 0x3002
};
static const uint8_t japaneseSCSU[]={
0x08, 0x00, 0x1b, 0x4c, 0xea, 0x16, 0xca, 0xd3, 0x94, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84,
0xc4, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84, 0xc4, 0x16, 0xca, 0xd3, 0x94, 0x08, 0x02, 0x0f,
0x53, 0x4a, 0x4e, 0x16, 0x7d, 0x00, 0x30, 0x82, 0x52, 0x4d, 0x30, 0x6b, 0x6d, 0x41, 0x88, 0x4c,
0xe5, 0x97, 0x9f, 0x08, 0x0c, 0x16, 0xca, 0xd3, 0x94, 0x15, 0xae, 0x0e, 0x6b, 0x4c, 0x08, 0x0d,
0x8c, 0xb4, 0xa3, 0x9f, 0xca, 0x99, 0xcb, 0x8b, 0xc2, 0x97, 0xcc, 0xaa, 0x84, 0x08, 0x02, 0x0e,
0x7c, 0x73, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x93, 0xd3, 0xb4, 0xc5, 0xdc, 0x9f, 0x0e, 0x79, 0x3e,
0x06, 0xae, 0xb1, 0x9d, 0x93, 0xd3, 0x08, 0x0c, 0xbe, 0xa3, 0x8f, 0x08, 0x88, 0xbe, 0xa3, 0x8d,
0xd3, 0xa8, 0xa3, 0x97, 0xc5, 0x17, 0x89, 0x08, 0x0d, 0x15, 0xd2, 0x08, 0x01, 0x93, 0xc8, 0xaa,
0x8f, 0x0e, 0x61, 0x1b, 0x99, 0xcb, 0x0e, 0x4e, 0xba, 0x9f, 0xa1, 0xae, 0x93, 0xa8, 0xa0, 0x08,
0x02, 0x08, 0x0c, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x0f, 0x4f, 0xe1, 0x80, 0x05, 0xec, 0x60, 0x8d,
0xea, 0x06, 0xd3, 0xe6, 0x0f, 0x8a, 0x00, 0x30, 0x44, 0x65, 0xb9, 0xe4, 0xfe, 0xe7, 0xc2, 0x06,
0xcb, 0x82
};
static const uint16_t allFeaturesUTF16[]={
0x0041, 0x00df, 0x0401, 0x015f, 0x00df, 0x01df, 0xf000, 0xdbff,
0xdfff, 0x000d, 0x000a, 0x0041, 0x00df, 0x0401, 0x015f, 0x00df,
0x01df, 0xf000, 0xdbff, 0xdfff
};
static const uint8_t allFeaturesSCSU[]={
0x41, 0xdf, 0x12, 0x81, 0x03, 0x5f, 0x10, 0xdf, 0x1b, 0x03,
0xdf, 0x1c, 0x88, 0x80, 0x0b, 0xbf, 0xff, 0xff, 0x0d, 0x0a,
0x41, 0x10, 0xdf, 0x12, 0x81, 0x03, 0x5f, 0x10, 0xdf, 0x13,
0xdf, 0x14, 0x80, 0x15, 0xff
};
static const uint16_t monkeyIn[]={
0x00A8, 0x3003, 0x3005, 0x2015, 0xFF5E, 0x2016, 0x2026, 0x2018, 0x000D, 0x000A,
0x2019, 0x201C, 0x201D, 0x3014, 0x3015, 0x3008, 0x3009, 0x300A, 0x000D, 0x000A,
0x300B, 0x300C, 0x300D, 0x300E, 0x300F, 0x3016, 0x3017, 0x3010, 0x000D, 0x000A,
0x3011, 0x00B1, 0x00D7, 0x00F7, 0x2236, 0x2227, 0x7FC1, 0x8956, 0x000D, 0x000A,
0x9D2C, 0x9D0E, 0x9EC4, 0x5CA1, 0x6C96, 0x837B, 0x5104, 0x5C4B, 0x000D, 0x000A,
0x61B6, 0x81C6, 0x6876, 0x7261, 0x4E59, 0x4FFA, 0x5378, 0x57F7, 0x000D, 0x000A,
0x57F4, 0x57F9, 0x57FA, 0x57FC, 0x5800, 0x5802, 0x5805, 0x5806, 0x000D, 0x000A,
0x580A, 0x581E, 0x6BB5, 0x6BB7, 0x6BBA, 0x6BBC, 0x9CE2, 0x977C, 0x000D, 0x000A,
0x6BBF, 0x6BC1, 0x6BC5, 0x6BC6, 0x6BCB, 0x6BCD, 0x6BCF, 0x6BD2, 0x000D, 0x000A,
0x6BD3, 0x6BD4, 0x6BD6, 0x6BD7, 0x6BD8, 0x6BDB, 0x6BEB, 0x6BEC, 0x000D, 0x000A,
0x6C05, 0x6C08, 0x6C0F, 0x6C11, 0x6C13, 0x6C23, 0x6C34, 0x0041, 0x000D, 0x000A,
0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x000D, 0x000A,
0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 0x0051, 0x0052, 0x000D, 0x000A,
0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A,
0x005B, 0x9792, 0x9CCC, 0x9CCD, 0x9CCE, 0x9CCF, 0x9CD0, 0x9CD3, 0x000D, 0x000A,
0x9CD4, 0x9CD5, 0x9CD7, 0x9CD8, 0x9CD9, 0x9CDC, 0x9CDD, 0x9CDF, 0x000D, 0x000A,
0x9785, 0x9791, 0x00BD, 0x0390, 0x0385, 0x0386, 0x0388, 0x0389, 0x000D, 0x000A,
0x038E, 0x038F, 0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x000D, 0x000A,
0x0396, 0x0397, 0x0398, 0x0399, 0x039A, 0x038A, 0x038C, 0x039C, 0x000D, 0x000A,
0xD869, 0xDE99, 0xD869, 0xDE9C, 0xD869, 0xDE9D, 0xD869, 0xDE9E, 0xD869, 0xDE9F,
0xD869, 0xDEA0, 0xD869, 0xDEA5, 0xD869, 0xDEA6, 0xD869, 0xDEA7, 0xD869, 0xDEA8,
0xD869, 0xDEAB, 0xD869, 0xDEAC, 0xD869, 0xDEAD, 0xD869, 0xDEAE, 0xD869, 0xDEAF,
0xD869, 0xDEB0, 0xD869, 0xDEB1, 0xD869, 0xDEB3, 0xD869, 0xDEB5, 0xD869, 0xDEB6,
0xD869, 0xDEB7, 0xD869, 0xDEB8, 0xD869, 0xDEB9, 0xD869, 0xDEBA, 0xD869, 0xDEBB,
0xD869, 0xDEBC, 0xD869, 0xDEBD, 0xD869, 0xDEBE, 0xD869, 0xDEBF, 0xD869, 0xDEC0,
0xD869, 0xDEC1, 0xD869, 0xDEC2, 0xD869, 0xDEC3, 0xD869, 0xDEC4, 0xD869, 0xDEC8,
0xD869, 0xDECA, 0xD869, 0xDECB, 0xD869, 0xDECD, 0xD869, 0xDECE, 0xD869, 0xDECF,
0xD869, 0xDED0, 0xD869, 0xDED1, 0xD869, 0xDED2, 0xD869, 0xDED3, 0xD869, 0xDED4,
0xD869, 0xDED5, 0xD800, 0xDC00, 0xD800, 0xDC00, 0xD800, 0xDC00, 0xDBFF, 0xDFFF,
0xDBFF, 0xDFFF, 0xDBFF, 0xDFFF,
0x4DB3, 0x4DB4, 0x4DB5, 0x4E00, 0x4E00, 0x4E01, 0x4E02, 0x4E03, 0x000D, 0x000A,
0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397, 0x33E0, 0x33E6, 0x000D, 0x000A,
0x4E05, 0x4E07, 0x4E04, 0x4E08, 0x4E08, 0x4E09, 0x4E0A, 0x4E0B, 0x000D, 0x000A,
0x4E0C, 0x0021, 0x0022, 0x0023, 0x0024, 0xFF40, 0xFF41, 0xFF42, 0x000D, 0x000A,
0xFF43, 0xFF44, 0xFF45, 0xFF46, 0xFF47, 0xFF48, 0xFF49, 0xFF4A, 0x000D, 0x000A,
};
static const char *fTestCases [] = {
"\\ud800\\udc00",
"\\ud8ff\\udcff",
"\\udBff\\udFff",
"\\ud834\\udc00",
"\\U0010FFFF",
"Hello \\u9292 \\u9192 World!",
"Hell\\u0429o \\u9292 \\u9192 W\\u00e4rld!",
"Hell\\u0429o \\u9292 \\u9292W\\u00e4rld!",
"\\u0648\\u06c8",
"\\u0648\\u06c8",
"\\u4444\\uE001",
"\\u4444\\uf2FF",
"\\u4444\\uf188\\u4444",
"\\u4444\\uf188\\uf288",
"\\u4444\\uf188abc\\u0429\\uf288",
"\\u9292\\u2222",
"Hell\\u0429\\u04230o \\u9292 \\u9292W\\u00e4\\u0192rld!",
"Hell\\u0429o \\u9292 \\u9292W\\u00e4rld!",
"Hello World!123456",
"Hello W\\u0081\\u011f\\u0082!",
"abc\\u0301\\u0302",
"abc\\u4411d",
"abc\\u4411\\u4412d",
"abc\\u0401\\u0402\\u047f\\u00a5\\u0405",
"\\u9191\\u9191\\u3041\\u9191\\u3041\\u3041\\u3000",
"\\u9292\\u2222",
"\\u9191\\u9191\\u3041\\u9191\\u3041\\u3041\\u3000",
"\\u9999\\u3051\\u300c\\u9999\\u9999\\u3060\\u9999\\u3065\\u3065\\u3065\\u300c",
"\\u3000\\u266a\\u30ea\\u30f3\\u30b4\\u53ef\\u611b\\u3044\\u3084\\u53ef\\u611b\\u3044\\u3084\\u30ea\\u30f3\\u30b4\\u3002",
"",
"\\u0000",
"\\uFFFF",
"\\u6441\\ub413\\ua733\\uf8fe\\ueedb\\u587f\\u195f\\u4899\\uf23d\\u49fd\\u0aac\\u5792\\ufc22\\ufc3c\\ufc46\\u00aa",
"\\u00df\\u01df\\uf000\\udbff\\udfff\\u000d\n\\u0041\\u00df\\u0401\\u015f\\u00df\\u01df\\uf000\\udbff\\udfff",
"\\u30f9\\u8321\\u05e5\\u181c\\ud72b\\u2019\\u99c9\\u2f2f\\uc10c\\u82e1\\u2c4d\\u1ebc\\u6013\\u66dc\\ubbde\\u94a5\\u4726\\u74af\\u3083\\u55b9\\u000c",
"\\u0041\\u00df\\u0401\\u015f",
"\\u9066\\u2123abc",
"\\ud266\\u43d7\\u\\ue386\\uc9c0\\u4a6b\\u9222\\u901f\\u7410\\ua63f\\u539b\\u9596\\u482e\\u9d47\\ucfe4\\u7b71\\uc280\\uf26a\\u982f\\u862a\\u4edd\\uf513\\ufda6\\u869d\\u2ee0\\ua216\\u3ff6\\u3c70\\u89c0\\u9576\\ud5ec\\ubfda\\u6cca\\u5bb3\\ubcea\\u554c\\u914e\\ufa4a\\uede3\\u2990\\ud2f5\\u2729\\u5141\\u0f26\\uccd8\\u5413\\ud196\\ubbe2\\u51b9\\u9b48\\u0dc8\\u2195\\u21a2\\u21e9\\u00e4\\u9d92\\u0bc0\\u06c5",
"\\uf95b\\u2458\\u2468\\u0e20\\uf51b\\ue36e\\ubfc1\\u0080\\u02dd\\uf1b5\\u0cf3\\u6059\\u7489",
};
int i=0;
for(;i<sizeof(fTestCases)/sizeof(*fTestCases);i++){
const char* cSrc = fTestCases[i];
UErrorCode status = U_ZERO_ERROR;
int32_t cSrcLen,srcLen;
UChar* src;
cSrcLen= srcLen = uprv_strlen(fTestCases[i]);
src = (UChar*) malloc((sizeof(UChar) * srcLen) + sizeof(UChar));
srcLen=unescape(src,srcLen,cSrc,cSrcLen,&status);
log_verbose("Testing roundtrip for src: %s at index :%d\n",cSrc,i);
TestConv(src,srcLen,"SCSU","Coverage",NULL,0);
free(src);
}
TestConv(allFeaturesUTF16,(sizeof(allFeaturesUTF16)/2),"SCSU","all features", (char *)allFeaturesSCSU,sizeof(allFeaturesSCSU));
TestConv(allFeaturesUTF16,(sizeof(allFeaturesUTF16)/2),"SCSU","all features",(char *)allFeaturesSCSU,sizeof(allFeaturesSCSU));
TestConv(japaneseUTF16,(sizeof(japaneseUTF16)/2),"SCSU","japaneese",(char *)japaneseSCSU,sizeof(japaneseSCSU));
TestConv(japaneseUTF16,(sizeof(japaneseUTF16)/2),"SCSU,locale=ja","japaneese",(char *)japaneseSCSU,sizeof(japaneseSCSU));
TestConv(germanUTF16,(sizeof(germanUTF16)/2),"SCSU","german",(char *)germanSCSU,sizeof(germanSCSU));
TestConv(russianUTF16,(sizeof(russianUTF16)/2), "SCSU","russian",(char *)russianSCSU,sizeof(russianSCSU));
TestConv(monkeyIn,(sizeof(monkeyIn)/2),"SCSU","monkey",NULL,0);
}
static void TestJitterbug2346(){
char source[] = { 0x1b,0x24,0x42,0x3d,0x45,0x1b,0x28,0x4a,0x0d,0x0a,
0x1b,0x24,0x42,0x3d,0x45,0x1b,0x28,0x4a,0x0d,0x0a};
uint16_t expected[] = {0x91CD,0x000D,0x000A,0x91CD,0x000D,0x000A};
UChar uTarget[500]={'\0'};
UChar* utarget=uTarget;
UChar* utargetLimit=uTarget+sizeof(uTarget)/2;
char cTarget[500]={'\0'};
char* ctarget=cTarget;
char* ctargetLimit=cTarget+sizeof(cTarget);
const char* csource=source;
UChar* temp = expected;
UErrorCode err=U_ZERO_ERROR;
UConverter* conv =ucnv_open("ISO_2022_JP",&err);
if(U_FAILURE(err)) {
log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(err));
return;
}
ucnv_toUnicode(conv,&utarget,utargetLimit,&csource,csource+sizeof(source),NULL,TRUE,&err);
if(U_FAILURE(err)) {
log_err("ISO_2022_JP to Unicode conversion failed: %s\n", u_errorName(err));
return;
}
utargetLimit=utarget;
utarget = uTarget;
while(utarget<utargetLimit){
if(*temp!=*utarget){
log_err("Expected : \\u%04X \t Got: \\u%04X\n",*utarget,(int)*temp) ;
}
utarget++;
temp++;
}
ucnv_fromUnicode(conv,&ctarget,ctargetLimit,(const UChar**)&utarget,utargetLimit,NULL,TRUE,&err);
if(U_FAILURE(err)) {
log_err("ISO_2022_JP from Unicode conversion failed: %s\n", u_errorName(err));
return;
}
ctargetLimit=ctarget;
ctarget =cTarget;
ucnv_close(conv);
}
static void
TestISO_2022_JP_1() {
static const uint16_t in[]={
0x3000, 0x3001, 0x3002, 0x0020, 0xFF0E, 0x30FB, 0xFF1A, 0xFF1B, 0x000D, 0x000A,
0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x000D, 0x000A,
0x52C8, 0x52CC, 0x52CF, 0x52D1, 0x52D4, 0x52D6, 0x52DB, 0x52DC, 0x000D, 0x000A,
0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 0x0051, 0x0052, 0x000D, 0x000A,
0x3005, 0x3006, 0x3007, 0x30FC, 0x2015, 0x2010, 0xFF0F, 0x005C, 0x000D, 0x000A,
0x301C, 0x2016, 0x2026, 0x2025, 0x2018, 0x2019, 0x201C, 0x000D, 0x000A,
0x201D, 0x000D, 0x000A,
0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A,
0x4F94, 0x4F97, 0x52BA, 0x52BB, 0x52BD, 0x52C0, 0x52C4, 0x52C6, 0x000D, 0x000A,
0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A,
0x4F78, 0x4F79, 0x4F7A, 0x4F7D, 0x4F7E, 0x4F81, 0x4F82, 0x4F84, 0x000D, 0x000A,
0x4F85, 0x4F89, 0x4F8A, 0x4F8C, 0x4F8E, 0x4F90, 0x4F92, 0x4F93, 0x000D, 0x000A,
0x52E1, 0x52E5, 0x52E8, 0x52E9, 0x000D, 0x000A
};
const UChar* uSource;
const UChar* uSourceLimit;
const char* cSource;
const char* cSourceLimit;
UChar *uTargetLimit =NULL;
UChar *uTarget;
char *cTarget;
const char *cTargetLimit;
char *cBuf;
UChar *uBuf,*test;
int32_t uBufSize = 120;
UErrorCode errorCode=U_ZERO_ERROR;
UConverter *cnv;
cnv=ucnv_open("ISO_2022_JP_1", &errorCode);
if(U_FAILURE(errorCode)) {
log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode));
return;
}
uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5);
cBuf =(char*)malloc(uBufSize * sizeof(char) * 5);
uSource = (const UChar*)&in[0];
uSourceLimit=(const UChar*)&in[sizeof(in)/2];
cTarget = cBuf;
cTargetLimit = cBuf +uBufSize*5;
uTarget = uBuf;
uTargetLimit = uBuf+ uBufSize*5;
ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,NULL,TRUE, &errorCode);
if(U_FAILURE(errorCode)){
log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
return;
}
cSource = cBuf;
cSourceLimit =cTarget;
test =uBuf;
ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,NULL,TRUE,&errorCode);
if(U_FAILURE(errorCode)){
log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
return;
}
uSource = (const UChar*)&in[0];
while(uSource<uSourceLimit){
if(*test!=*uSource){
log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ;
}
uSource++;
test++;
}
ucnv_reset(cnv);
{
static const uint8_t source2[]={0x0e,0x24,0x053};
TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character [ISO-2022-JP-1]");
}
TestSmallTargetBuffer(&in[0],(const UChar*)&in[sizeof(in)/2],cnv);
TestSmallSourceBuffer(&in[0],(const UChar*)&in[sizeof(in)/2],cnv);
ucnv_close(cnv);
free(uBuf);
free(cBuf);
}
static void
TestISO_2022_JP_2() {
static const uint16_t in[]={
0x00A8, 0x3003, 0x3005, 0x2015, 0xFF5E, 0x2016, 0x2026, 0x2018, 0x000D, 0x000A,
0x2019, 0x201C, 0x201D, 0x3014, 0x3015, 0x3008, 0x3009, 0x300A, 0x000D, 0x000A,
0x300B, 0x300C, 0x300D, 0x300E, 0x300F, 0x3016, 0x3017, 0x3010, 0x000D, 0x000A,
0x3011, 0x00B1, 0x00D7, 0x00F7, 0x2236, 0x2227, 0x7FC1, 0x8956, 0x000D, 0x000A,
0x9D2C, 0x9D0E, 0x9EC4, 0x5CA1, 0x6C96, 0x837B, 0x5104, 0x5C4B, 0x000D, 0x000A,
0x61B6, 0x81C6, 0x6876, 0x7261, 0x4E59, 0x4FFA, 0x5378, 0x57F7, 0x000D, 0x000A,
0x57F4, 0x57F9, 0x57FA, 0x57FC, 0x5800, 0x5802, 0x5805, 0x5806, 0x000D, 0x000A,
0x580A, 0x581E, 0x6BB5, 0x6BB7, 0x6BBA, 0x6BBC, 0x9CE2, 0x977C, 0x000D, 0x000A,
0x6BBF, 0x6BC1, 0x6BC5, 0x6BC6, 0x6BCB, 0x6BCD, 0x6BCF, 0x6BD2, 0x000D, 0x000A,
0x6BD3, 0x6BD4, 0x6BD6, 0x6BD7, 0x6BD8, 0x6BDB, 0x6BEB, 0x6BEC, 0x000D, 0x000A,
0x6C05, 0x6C08, 0x6C0F, 0x6C11, 0x6C13, 0x6C23, 0x6C34, 0x0041, 0x000D, 0x000A,
0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x000D, 0x000A,
0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 0x0051, 0x0052, 0x000D, 0x000A,
0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A,
0x005B, 0x9792, 0x9CCC, 0x9CCD, 0x9CCE, 0x9CCF, 0x9CD0, 0x9CD3, 0x000D, 0x000A,
0x9CD4, 0x9CD5, 0x9CD7, 0x9CD8, 0x9CD9, 0x9CDC, 0x9CDD, 0x9CDF, 0x000D, 0x000A,
0x9785, 0x9791, 0x00BD, 0x0390, 0x0385, 0x0386, 0x0388, 0x0389, 0x000D, 0x000A,
0x038E, 0x038F, 0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x000D, 0x000A,
0x0396, 0x0397, 0x0398, 0x0399, 0x039A, 0x038A, 0x038C, 0x039C, 0x000D, 0x000A
};
const UChar* uSource;
const UChar* uSourceLimit;
const char* cSource;
const char* cSourceLimit;
UChar *uTargetLimit =NULL;
UChar *uTarget;
char *cTarget;
const char *cTargetLimit;
char *cBuf;
UChar *uBuf,*test;
int32_t uBufSize = 120;
UErrorCode errorCode=U_ZERO_ERROR;
UConverter *cnv;
int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5);
int32_t* myOff= offsets;
cnv=ucnv_open("ISO_2022_JP_2", &errorCode);
if(U_FAILURE(errorCode)) {
log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode));
return;
}
uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5);
cBuf =(char*)malloc(uBufSize * sizeof(char) * 5);
uSource = (const UChar*)&in[0];
uSourceLimit=(const UChar*)&in[sizeof(in)/2];
cTarget = cBuf;
cTargetLimit = cBuf +uBufSize*5;
uTarget = uBuf;
uTargetLimit = uBuf+ uBufSize*5;
ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode);
if(U_FAILURE(errorCode)){
log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
return;
}
cSource = cBuf;
cSourceLimit =cTarget;
test =uBuf;
myOff=offsets;
ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode);
if(U_FAILURE(errorCode)){
log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
return;
}
uSource = (const UChar*)&in[0];
while(uSource<uSourceLimit){
if(*test!=*uSource){
log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ;
}
uSource++;
test++;
}
TestSmallTargetBuffer(&in[0],(const UChar*)&in[sizeof(in)/2],cnv);
TestSmallSourceBuffer(&in[0],(const UChar*)&in[sizeof(in)/2],cnv);
TestToAndFromUChars(&in[0],(const UChar*)&in[sizeof(in)/2],cnv);
ucnv_reset(cnv);
{
static const uint8_t source2[]={0x0e,0x24,0x053};
TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character [ISO-2022-JP-2]");
}
ucnv_close(cnv);
free(uBuf);
free(cBuf);
free(offsets);
}
static void
TestISO_2022_KR() {
static const uint16_t in[]={
0x9F4B,0x9F4E,0x9F52,0x9F5F,0x9F61,0x9F66,0x9F67,0x9F6A,0x000A,0x000D
,0x9F6C,0x9F77,0x9F8D,0x9F90,0x9F95,0x9F9C,0xAC00,0xAC01,0xAC02,0xAC04
,0xAC07,0xAC08,0xAC09,0x0025,0x0026,0x0027,0x000A,0x000D,0x0028,0x0029
,0x002A,0x002B,0x002C,0x002D,0x002E,0x53C3,0x53C8,0x53C9,0x53CA,0x53CB
,0x53CD,0x53D4,0x53D6,0x53D7,0x53DB,0x000A,0x000D,0x53DF,0x53E1,0x53E2
,0x53E3,0x53E4,0x000A,0x000D};
const UChar* uSource;
const UChar* uSourceLimit;
const char* cSource;
const char* cSourceLimit;
UChar *uTargetLimit =NULL;
UChar *uTarget;
char *cTarget;
const char *cTargetLimit;
char *cBuf;
UChar *uBuf,*test;
int32_t uBufSize = 120;
UErrorCode errorCode=U_ZERO_ERROR;
UConverter *cnv;
int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5);
int32_t* myOff= offsets;
cnv=ucnv_open("ISO_2022,locale=kr", &errorCode);
if(U_FAILURE(errorCode)) {
log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode));
return;
}
uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5);
cBuf =(char*)malloc(uBufSize * sizeof(char) * 5);
uSource = (const UChar*)&in[0];
uSourceLimit=(const UChar*)&in[sizeof(in)/2];
cTarget = cBuf;
cTargetLimit = cBuf +uBufSize*5;
uTarget = uBuf;
uTargetLimit = uBuf+ uBufSize*5;
ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode);
if(U_FAILURE(errorCode)){
log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
return;
}
cSource = cBuf;
cSourceLimit =cTarget;
test =uBuf;
myOff=offsets;
ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode);
if(U_FAILURE(errorCode)){
log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
return;
}
uSource = (const UChar*)&in[0];
while(uSource<uSourceLimit){
if(*test!=*uSource){
log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,*test) ;
}
uSource++;
test++;
}
TestGetNextUChar2022(cnv, cBuf, cTarget, in, "ISO-2022-KR encoding");
TestSmallTargetBuffer(&in[0],(const UChar*)&in[sizeof(in)/2],cnv);
TestSmallSourceBuffer(&in[0],(const UChar*)&in[sizeof(in)/2],cnv);
TestToAndFromUChars(&in[0],(const UChar*)&in[sizeof(in)/2],cnv);
TestJitterbug930("csISO2022KR");
ucnv_reset(cnv);
{
static const uint8_t source2[]={0x1b,0x24,0x053};
ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode);
TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ILLEGAL_ESCAPE_SEQUENCE, "an invalid character [ISO-2022-KR]");
}
ucnv_close(cnv);
free(uBuf);
free(cBuf);
free(offsets);
}
static void
TestISO_2022_KR_1() {
static const uint16_t in[]={
0x9F4B,0x9F4E,0x9F52,0x9F5F,0x9F61,0x9F67,0x9F6A,0x000A,0x000D
,0x9F6C,0x9F77,0x9F8D,0x9F90,0x9F95,0x9F9C,0xAC00,0xAC01,0xAC04
,0xAC07,0xAC08,0xAC09,0x0025,0x0026,0x0027,0x000A,0x000D,0x0028,0x0029
,0x002A,0x002B,0x002C,0x002D,0x002E,0x53C3,0x53C8,0x53C9,0x53CA,0x53CB
,0x53CD,0x53D4,0x53D6,0x53D7,0x53DB,0x000A,0x000D,0x53E1,0x53E2
,0x53E3,0x53E4,0x000A,0x000D};
const UChar* uSource;
const UChar* uSourceLimit;
const char* cSource;
const char* cSourceLimit;
UChar *uTargetLimit =NULL;
UChar *uTarget;
char *cTarget;
const char *cTargetLimit;
char *cBuf;
UChar *uBuf,*test;
int32_t uBufSize = 120;
UErrorCode errorCode=U_ZERO_ERROR;
UConverter *cnv;
int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5);
int32_t* myOff= offsets;
cnv=ucnv_open("ibm-25546", &errorCode);
if(U_FAILURE(errorCode)) {
log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode));
return;
}
uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5);
cBuf =(char*)malloc(uBufSize * sizeof(char) * 5);
uSource = (const UChar*)&in[0];
uSourceLimit=(const UChar*)&in[sizeof(in)/2];
cTarget = cBuf;
cTargetLimit = cBuf +uBufSize*5;
uTarget = uBuf;
uTargetLimit = uBuf+ uBufSize*5;
ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode);
if(U_FAILURE(errorCode)){
log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
return;
}
cSource = cBuf;
cSourceLimit =cTarget;
test =uBuf;
myOff=offsets;
ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode);
if(U_FAILURE(errorCode)){
log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
return;
}
uSource = (const UChar*)&in[0];
while(uSource<uSourceLimit){
if(*test!=*uSource){
log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,*test) ;
}
uSource++;
test++;
}
ucnv_reset(cnv);
TestGetNextUChar2022(cnv, cBuf, cTarget, in, "ISO-2022-KR encoding");
TestSmallTargetBuffer(&in[0],(const UChar*)&in[sizeof(in)/2],cnv);
TestSmallSourceBuffer(&in[0],(const UChar*)&in[sizeof(in)/2],cnv);
ucnv_reset(cnv);
TestToAndFromUChars(&in[0],(const UChar*)&in[sizeof(in)/2],cnv);
ucnv_reset(cnv);
{
static const uint8_t source2[]={0x1b,0x24,0x053};
ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode);
TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ILLEGAL_ESCAPE_SEQUENCE, "an invalid character [ISO-2022-KR]");
}
ucnv_close(cnv);
free(uBuf);
free(cBuf);
free(offsets);
}
static void TestJitterbug2411(){
const char* source = "\x1b\x24\x29\x43\x6b\x6b\x6e\x6e\x6a\x68\x70\x6f\x69\x75\x79\x71\x77\x65\x68\x67\x0A"
"\x1b\x24\x29\x43\x6a\x61\x73\x64\x66\x6a\x61\x73\x64\x66\x68\x6f\x69\x75\x79\x1b\x24\x29\x43";
UConverter* kr=NULL, *kr1=NULL;
UErrorCode errorCode = U_ZERO_ERROR;
UChar tgt[100]={'\0'};
UChar* target = tgt;
UChar* targetLimit = target+100;
kr=ucnv_open("iso-2022-kr", &errorCode);
if(U_FAILURE(errorCode)) {
log_data_err("Unable to open a iso-2022-kr converter: %s\n", u_errorName(errorCode));
return;
}
ucnv_toUnicode(kr,&target,targetLimit,&source,source+uprv_strlen(source),NULL,TRUE,&errorCode);
if(U_FAILURE(errorCode)) {
log_err("iso-2022-kr cannot handle multiple escape sequences : %s\n", u_errorName(errorCode));
return;
}
kr1 = ucnv_open("ibm-25546", &errorCode);
if(U_FAILURE(errorCode)) {
log_data_err("Unable to open a iso-2022-kr_1 converter: %s\n", u_errorName(errorCode));
return;
}
target = tgt;
targetLimit = target+100;
ucnv_toUnicode(kr,&target,targetLimit,&source,source+uprv_strlen(source),NULL,TRUE,&errorCode);
if(U_FAILURE(errorCode)) {
log_err("iso-2022-kr_1 cannot handle multiple escape sequences : %s\n", u_errorName(errorCode));
return;
}
ucnv_close(kr);
ucnv_close(kr1);
}
static void
TestJIS(){
{
const uint8_t sampleTextJIS[] = {
0x1b,0x28,0x48,0x41,0x42,
0x1b,0x28,0x49,0x41,0x42,
0x1b,0x26,0x40,0x1b,0x24,0x42,0x21,0x21
};
const uint16_t expectedISO2022JIS[] = {
0x0041, 0x0042,
0xFF81, 0xFF82,
0x3000
};
int32_t toISO2022JISOffs[]={
3,4,
8,9,
16
};
const uint8_t sampleTextJIS7[] = {
0x1b,0x28,0x48,0x41,0x42,
0x1b,0x28,0x49,0x41,0x42,
0x1b,0x24,0x42,0x21,0x21,
0x0e,0x41,0x42,0x0f,
0x21,0x22,
0x1b,0x26,0x40,0x1b,0x24,0x42,0x21,0x21
};
const uint16_t expectedISO2022JIS7[] = {
0x0041, 0x0042,
0xFF81, 0xFF82,
0x3000,
0xFF81, 0xFF82,
0x3001,
0x3000
};
int32_t toISO2022JIS7Offs[]={
3,4,
8,9,
13,16,
17,
19,27
};
const uint8_t sampleTextJIS8[] = {
0x1b,0x28,0x48,0x41,0x42,
0xa1,0xc8,0xd9,
0x1b,0x28,0x42,
0x41,0x42,
0xb1,0xc3,
0x1b,0x24,0x42,0x21,0x21
};
const uint16_t expectedISO2022JIS8[] = {
0x0041, 0x0042,
0xff61, 0xff88, 0xff99,
0x0041, 0x0042,
0xff71, 0xff83,
0x3000
};
int32_t toISO2022JIS8Offs[]={
3, 4, 5, 6,
7, 11, 12, 13,
14, 18,
};
testConvertToU(sampleTextJIS,sizeof(sampleTextJIS),expectedISO2022JIS,
sizeof(expectedISO2022JIS)/sizeof(expectedISO2022JIS[0]),"JIS", toISO2022JISOffs,TRUE);
testConvertToU(sampleTextJIS7,sizeof(sampleTextJIS7),expectedISO2022JIS7,
sizeof(expectedISO2022JIS7)/sizeof(expectedISO2022JIS7[0]),"JIS7", toISO2022JIS7Offs,TRUE);
testConvertToU(sampleTextJIS8,sizeof(sampleTextJIS8),expectedISO2022JIS8,
sizeof(expectedISO2022JIS8)/sizeof(expectedISO2022JIS8[0]),"JIS8", toISO2022JIS8Offs,TRUE);
}
}
static void TestJitterbug915(){
static char cSource[]={
0x1B, 0x24, 0x29, 0x47, 0x0E, 0x23, 0x21, 0x23, 0x22, 0x23,
0x23, 0x23, 0x24, 0x23, 0x25, 0x23, 0x26, 0x23, 0x27, 0x23,
0x28, 0x23, 0x29, 0x23, 0x2A, 0x23, 0x2B, 0x0F, 0x2F, 0x2A,
0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20, 0x31, 0x20, 0x2A, 0x2F,
0x0D, 0x0A, 0x1B, 0x24, 0x2A, 0x48, 0x1B, 0x4E, 0x22, 0x21,
0x1B, 0x4E, 0x22, 0x22, 0x1B, 0x4E, 0x22, 0x23, 0x1B, 0x4E,
0x22, 0x24, 0x1B, 0x4E, 0x22, 0x25, 0x2F, 0x2A, 0x70,
0x6C, 0x61, 0x6E, 0x65, 0x32, 0x2A, 0x2F, 0x20, 0x0D, 0x0A,
0x1B, 0x24, 0x2B, 0x49, 0x1B, 0x4F, 0x22, 0x44, 0x1B, 0x4F,
0x22, 0x45, 0x1B, 0x4F, 0x22, 0x46, 0x1B, 0x4F, 0x22, 0x47,
0x1B, 0x4F, 0x22, 0x48, 0x2F, 0x2A, 0x70, 0x6C, 0x61,
0x6E, 0x65, 0x20, 0x33, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B,
0x24, 0x2B, 0x4A, 0x1B, 0x4F, 0x21, 0x44, 0x1B, 0x4F, 0x21,
0x45, 0x1B, 0x4F, 0x22, 0x6A, 0x1B, 0x4F, 0x22, 0x6B, 0x1B,
0x4F, 0x22, 0x6C, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E,
0x65, 0x20, 0x34, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24,
0x2B, 0x4B, 0x1B, 0x4F, 0x21, 0x74, 0x1B, 0x4F, 0x22, 0x50,
0x1B, 0x4F, 0x22, 0x51, 0x1B, 0x4F, 0x23, 0x37, 0x1B, 0x4F,
0x22, 0x5C, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E,
0x65, 0x20, 0x35, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24,
0x2B, 0x4C, 0x1B, 0x4F, 0x21, 0x23, 0x1B, 0x4F, 0x22, 0x2C,
0x1B, 0x4F, 0x23, 0x4E, 0x1B, 0x4F, 0x21, 0x6E, 0x1B, 0x4F,
0x23, 0x71, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65,
0x20, 0x36, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, 0x2B,
0x4D, 0x1B, 0x4F, 0x22, 0x71, 0x1B, 0x4F, 0x21, 0x4E, 0x1B,
0x4F, 0x21, 0x6A, 0x1B, 0x4F, 0x23, 0x3A, 0x1B, 0x4F, 0x23,
0x6F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20,
0x37, 0x20, 0x2A, 0x2F
};
UChar uTarget[500]={'\0'};
UChar* utarget=uTarget;
UChar* utargetLimit=uTarget+sizeof(uTarget)/2;
char cTarget[500]={'\0'};
char* ctarget=cTarget;
char* ctargetLimit=cTarget+sizeof(cTarget);
const char* csource=cSource;
char* tempSrc = cSource;
UErrorCode err=U_ZERO_ERROR;
UConverter* conv =ucnv_open("ISO_2022_CN_EXT",&err);
if(U_FAILURE(err)) {
log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(err));
return;
}
ucnv_toUnicode(conv,&utarget,utargetLimit,&csource,csource+sizeof(cSource),NULL,TRUE,&err);
if(U_FAILURE(err)) {
log_err("iso-2022-CN to Unicode conversion failed: %s\n", u_errorName(err));
return;
}
utargetLimit=utarget;
utarget = uTarget;
ucnv_fromUnicode(conv,&ctarget,ctargetLimit,(const UChar**)&utarget,utargetLimit,NULL,TRUE,&err);
if(U_FAILURE(err)) {
log_err("iso-2022-CN from Unicode conversion failed: %s\n", u_errorName(err));
return;
}
ctargetLimit=ctarget;
ctarget =cTarget;
while(ctarget<ctargetLimit){
if(*ctarget != *tempSrc){
log_err("j915[%d] Expected : \\x%02X \t Got: \\x%02X\n", (int)(ctarget-cTarget), *ctarget,(int)*tempSrc) ;
}
++ctarget;
++tempSrc;
}
ucnv_close(conv);
}
static void
TestISO_2022_CN_EXT() {
static const uint16_t in[]={
0xD869, 0xDE99, 0xD869, 0xDE9C, 0xD869, 0xDE9D, 0xD869, 0xDE9E, 0xD869, 0xDE9F,
0xD869, 0xDEA0, 0xD869, 0xDEA5, 0xD869, 0xDEA6, 0xD869, 0xDEA7, 0xD869, 0xDEA8,
0xD869, 0xDEAB, 0xD869, 0xDEAC, 0xD869, 0xDEAD, 0xD869, 0xDEAE, 0xD869, 0xDEAF,
0xD869, 0xDEB0, 0xD869, 0xDEB1, 0xD869, 0xDEB3, 0xD869, 0xDEB5, 0xD869, 0xDEB6,
0xD869, 0xDEB7, 0xD869, 0xDEB8, 0xD869, 0xDEB9, 0xD869, 0xDEBA, 0xD869, 0xDEBB,
0xD869, 0xDEBC, 0xD869, 0xDEBD, 0xD869, 0xDEBE, 0xD869, 0xDEBF, 0xD869, 0xDEC0,
0xD869, 0xDEC1, 0xD869, 0xDEC2, 0xD869, 0xDEC3, 0xD869, 0xDEC4, 0xD869, 0xDEC8,
0xD869, 0xDECA, 0xD869, 0xDECB, 0xD869, 0xDECD, 0xD869, 0xDECE, 0xD869, 0xDECF,
0xD869, 0xDED0, 0xD869, 0xDED1, 0xD869, 0xDED2, 0xD869, 0xDED3, 0xD869, 0xDED4,
0xD869, 0xDED5,
0x4DB3, 0x4DB4, 0x4DB5, 0x4E00, 0x4E00, 0x4E01, 0x4E02, 0x4E03, 0x000D, 0x000A,
0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397, 0x33E0, 0x33E6, 0x000D, 0x000A,
0x4E05, 0x4E07, 0x4E04, 0x4E08, 0x4E08, 0x4E09, 0x4E0A, 0x4E0B, 0x000D, 0x000A,
0x4E0C, 0x0021, 0x0022, 0x0023, 0x0024, 0xFF40, 0xFF41, 0xFF42, 0x000D, 0x000A,
0xFF43, 0xFF44, 0xFF45, 0xFF46, 0xFF47, 0xFF48, 0xFF49, 0xFF4A, 0x000D, 0x000A,
0xFF4B, 0xFF4C, 0xFF4D, 0xFF4E, 0xFF4F, 0x6332, 0x63B0, 0x643F, 0x000D, 0x000A,
0x64D8, 0x8004, 0x6BEA, 0x6BF3, 0x6BFD, 0x6BF5, 0x6BF9, 0x6C05, 0x000D, 0x000A,
0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x000D, 0x000A,
0x6C07, 0x6C06, 0x6C0D, 0x6C15, 0x9CD9, 0x9CDC, 0x9CDD, 0x9CDF, 0x000D, 0x000A,
0x9CE2, 0x977C, 0x9785, 0x9791, 0x9792, 0x9794, 0x97AF, 0x97AB, 0x000D, 0x000A,
0x97A3, 0x97B2, 0x97B4, 0x9AB1, 0x9AB0, 0x9AB7, 0x9E58, 0x9AB6, 0x000D, 0x000A,
0x9ABA, 0x9ABC, 0x9AC1, 0x9AC0, 0x9AC5, 0x9AC2, 0x9ACB, 0x9ACC, 0x000D, 0x000A,
0x9AD1, 0x9B45, 0x9B43, 0x9B47, 0x9B49, 0x9B48, 0x9B4D, 0x9B51, 0x000D, 0x000A,
0x98E8, 0x990D, 0x992E, 0x9955, 0x9954, 0x9ADF, 0x3443, 0x3444, 0x000D, 0x000A,
0x3445, 0x3449, 0x344A, 0x344B, 0x60F2, 0x60F3, 0x60F4, 0x60F5, 0x000D, 0x000A,
0x60F6, 0x60F7, 0x60F8, 0x60F9, 0x60FA, 0x60FB, 0x60FC, 0x60FD, 0x000D, 0x000A,
0x60FE, 0x60FF, 0x6100, 0x6101, 0x6102, 0x0041, 0x0042, 0x0043, 0x000D, 0x000A,
0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x004B, 0x000D, 0x000A,
0x33E7, 0x33E8, 0x33E9, 0x33EA, 0x000D, 0x000A
};
const UChar* uSource;
const UChar* uSourceLimit;
const char* cSource;
const char* cSourceLimit;
UChar *uTargetLimit =NULL;
UChar *uTarget;
char *cTarget;
const char *cTargetLimit;
char *cBuf;
UChar *uBuf,*test;
int32_t uBufSize = 180;
UErrorCode errorCode=U_ZERO_ERROR;
UConverter *cnv;
int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5);
int32_t* myOff= offsets;
cnv=ucnv_open("ISO_2022,locale=cn,version=1", &errorCode);
if(U_FAILURE(errorCode)) {
log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode));
return;
}
uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5);
cBuf =(char*)malloc(uBufSize * sizeof(char) * 10);
uSource = (const UChar*)&in[0];
uSourceLimit=(const UChar*)&in[sizeof(in)/2];
cTarget = cBuf;
cTargetLimit = cBuf +uBufSize*5;
uTarget = uBuf;
uTargetLimit = uBuf+ uBufSize*5;
ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode);
if(U_FAILURE(errorCode)){
log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
return;
}
cSource = cBuf;
cSourceLimit =cTarget;
test =uBuf;
myOff=offsets;
ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode);
if(U_FAILURE(errorCode)){
log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
return;
}
uSource = (const UChar*)&in[0];
while(uSource<uSourceLimit){
if(*test!=*uSource){
log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ;
}
else{
log_verbose(" Got: \\u%04X\n",(int)*test) ;
}
uSource++;
test++;
}
TestSmallTargetBuffer(&in[0],(const UChar*)&in[sizeof(in)/2],cnv);
TestSmallSourceBuffer(&in[0],(const UChar*)&in[sizeof(in)/2],cnv);
ucnv_reset(cnv);
{
static const uint8_t source2[]={0x0e,0x24,0x053};
TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character [ISO-2022-CN-EXT]");
}
ucnv_close(cnv);
free(uBuf);
free(cBuf);
free(offsets);
}
static void
TestISO_2022_CN() {
static const uint16_t in[]={
0xFF2D, 0xFF49, 0xFF58, 0xFF45, 0xFF44, 0x0020, 0xFF43, 0xFF48, 0xFF41, 0xFF52,
0x0020, 0xFF06, 0x0020, 0xFF11, 0xFF12, 0xFF13, 0xFF14, 0xFF15, 0xFF16, 0xFF17,
0xFF18, 0xFF19, 0xFF10, 0x0020, 0xFF4E, 0xFF55, 0xFF4D, 0xFF42, 0xFF45, 0xFF52,
0x0020, 0xFF54, 0xFF45, 0xFF53, 0xFF54, 0x0020, 0xFF4C, 0xFF49, 0xFF4E, 0xFF45,
0x0020, 0x0045, 0x004e, 0x0044,
0x4E00, 0x4E00, 0x4E01, 0x4E03, 0x60F6, 0x60F7, 0x60F8, 0x60FB, 0x000D, 0x000A,
0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397, 0x60FB, 0x60FC, 0x000D, 0x000A,
0x4E07, 0x4E08, 0x4E08, 0x4E09, 0x4E0A, 0x4E0B, 0x0042, 0x0043, 0x000D, 0x000A,
0x4E0C, 0x0021, 0x0022, 0x0023, 0x0024, 0xFF40, 0xFF41, 0xFF42, 0x000D, 0x000A,
0xFF43, 0xFF44, 0xFF45, 0xFF46, 0xFF47, 0xFF48, 0xFF49, 0xFF4A, 0x000D, 0x000A,
0xFF4B, 0xFF4C, 0xFF4D, 0xFF4E, 0xFF4F, 0x6332, 0x63B0, 0x643F, 0x000D, 0x000A,
0x64D8, 0x8004, 0x6BEA, 0x6BF3, 0x6BFD, 0x6BF5, 0x6BF9, 0x6C05, 0x000D, 0x000A,
0x6C07, 0x6C06, 0x6C0D, 0x6C15, 0x9CD9, 0x9CDC, 0x9CDD, 0x9CDF, 0x000D, 0x000A,
0x9CE2, 0x977C, 0x9785, 0x9791, 0x9792, 0x9794, 0x97AF, 0x97AB, 0x000D, 0x000A,
0x97A3, 0x97B2, 0x97B4, 0x9AB1, 0x9AB0, 0x9AB7, 0x9E58, 0x9AB6, 0x000D, 0x000A,
0x9ABA, 0x9ABC, 0x9AC1, 0x9AC0, 0x9AC5, 0x9AC2, 0x9ACB, 0x9ACC, 0x000D, 0x000A,
0x9AD1, 0x9B45, 0x9B43, 0x9B47, 0x9B49, 0x9B48, 0x9B4D, 0x9B51, 0x000D, 0x000A,
0x98E8, 0x990D, 0x992E, 0x9955, 0x9954, 0x9ADF, 0x60FE, 0x60FF, 0x000D, 0x000A,
0x60F2, 0x60F3, 0x60F4, 0x60F5, 0x000D, 0x000A, 0x60F9, 0x60FA, 0x000D, 0x000A,
0x6100, 0x6101, 0x0041, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x000D, 0x000A,
0x247D, 0x247E, 0x247F, 0x2480, 0x2481, 0x2482, 0x2483, 0x2484, 0x2485, 0x2486,
0x2487, 0x2460, 0x2461, 0xFF20, 0xFF21, 0xFF22, 0x0049, 0x004A, 0x000D, 0x000A,
};
const UChar* uSource;
const UChar* uSourceLimit;
const char* cSource;
const char* cSourceLimit;
UChar *uTargetLimit =NULL;
UChar *uTarget;
char *cTarget;
const char *cTargetLimit;
char *cBuf;
UChar *uBuf,*test;
int32_t uBufSize = 180;
UErrorCode errorCode=U_ZERO_ERROR;
UConverter *cnv;
int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5);
int32_t* myOff= offsets;
cnv=ucnv_open("ISO_2022,locale=cn,version=0", &errorCode);
if(U_FAILURE(errorCode)) {
log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode));
return;
}
uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5);
cBuf =(char*)malloc(uBufSize * sizeof(char) * 10);
uSource = (const UChar*)&in[0];
uSourceLimit=(const UChar*)&in[sizeof(in)/2];
cTarget = cBuf;
cTargetLimit = cBuf +uBufSize*5;
uTarget = uBuf;
uTargetLimit = uBuf+ uBufSize*5;
ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode);
if(U_FAILURE(errorCode)){
log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
return;
}
cSource = cBuf;
cSourceLimit =cTarget;
test =uBuf;
myOff=offsets;
ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode);
if(U_FAILURE(errorCode)){
log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
return;
}
uSource = (const UChar*)&in[0];
while(uSource<uSourceLimit){
if(*test!=*uSource){
log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ;
}
else{
log_verbose(" Got: \\u%04X\n",(int)*test) ;
}
uSource++;
test++;
}
TestGetNextUChar2022(cnv, cBuf, cTarget, in, "ISO-2022-CN encoding");
TestSmallTargetBuffer(&in[0],(const UChar*)&in[sizeof(in)/2],cnv);
TestSmallSourceBuffer(&in[0],(const UChar*)&in[sizeof(in)/2],cnv);
TestToAndFromUChars(&in[0],(const UChar*)&in[sizeof(in)/2],cnv);
TestJitterbug930("csISO2022CN");
ucnv_reset(cnv);
{
static const uint8_t source2[]={0x0e,0x24,0x053};
TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character [ISO-2022-CN]");
}
ucnv_close(cnv);
free(uBuf);
free(cBuf);
free(offsets);
}
static void
TestEBCDIC_STATEFUL() {
static const uint8_t in[]={
0x61,
0x1a,
0x0f, 0x4b,
0x42,
0x40,
0x36,
};
static const int32_t results[]={
1, 0x002f,
1, 0x0092,
2, 0x002e,
1, 0xff62,
1, 0x0020,
1, 0x0096,
};
static const uint8_t in2[]={
0x0f,
0xa1,
0x01
};
static const int32_t results2[]={
2, 0x203E,
1, 0x0001,
};
const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
UErrorCode errorCode=U_ZERO_ERROR;
UConverter *cnv=ucnv_open("ibm-930", &errorCode);
if(U_FAILURE(errorCode)) {
log_data_err("Unable to open a EBCDIC_STATEFUL(ibm-930) converter: %s\n", u_errorName(errorCode));
return;
}
TestNextUChar(cnv, source, limit, results, "EBCDIC_STATEFUL(ibm-930)");
ucnv_reset(cnv);
TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
ucnv_reset(cnv);
{
static const uint8_t source1[]={0x0f};
TestNextUCharError(cnv, (const char*)source1, (const char*)source1+sizeof(source1), U_INDEX_OUTOFBOUNDS_ERROR, "a character is truncated");
}
ucnv_reset(cnv);
{
static const uint8_t source2[]={0x0e, 0x7F, 0xFF};
TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character [EBCDIC STATEFUL]");
}
ucnv_reset(cnv);
source=(const char*)in2;
limit=(const char*)in2+sizeof(in2);
TestNextUChar(cnv,source,limit,results2,"EBCDIC_STATEFUL(ibm-930),seq#2");
ucnv_close(cnv);
}
static void
TestGB18030() {
static const uint8_t in[]={
0x24,
0x7f,
0x81, 0x30, 0x81, 0x30,
0xa8, 0xbf,
0xa2, 0xe3,
0xd2, 0xbb,
0x82, 0x35, 0x8f, 0x33,
0x84, 0x31, 0xa4, 0x39,
0x90, 0x30, 0x81, 0x30,
0xe3, 0x32, 0x9a, 0x35
#if 0
0x83, 0x36, 0xc8, 0x30, 0x83, 0x37, 0xb0, 0x34
#endif
};
static const int32_t results[]={
1, 0x24,
1, 0x7f,
4, 0x80,
2, 0x1f9,
2, 0x20ac,
2, 0x4e00,
4, 0x9fa6,
4, 0xffff,
4, 0x10000,
4, 0x10ffff
#if 0
8, 0x10000
#endif
};
UErrorCode errorCode=U_ZERO_ERROR;
UConverter *cnv=ucnv_open("gb18030", &errorCode);
if(U_FAILURE(errorCode)) {
log_data_err("Unable to open a gb18030 converter: %s\n", u_errorName(errorCode));
return;
}
TestNextUChar(cnv, (const char *)in, (const char *)in+sizeof(in), results, "gb18030");
ucnv_close(cnv);
}
static void
TestLMBCS() {
static const uint8_t pszLMBCS[]={
0x61,
0x01, 0x29,
0x81,
0xA0,
0x0F, 0x27,
0x0F, 0x91,
0x14, 0x0a, 0x74,
0x14, 0xF6, 0x02,
0x14, 0xd8, 0x4d, 0x14, 0xdc, 0x56,
0x10, 0x88, 0xA0,
};
static const UChar32 pszUnicode32[]={
0x00000061,
0x00002013,
0x000000FC,
0x000000E1,
0x00000007,
0x00000091,
0x00000a74,
0x00000200,
0x00023456,
0x00005516
};
static const UChar pszUnicode[]={
0x0061,
0x2013,
0x00FC,
0x00E1,
0x0007,
0x0091,
0x0a74,
0x0200,
0xD84D,
0xDC56,
0x5516
};
static const int offsets32[]={
0,
1,
3,
4,
5,
7,
9,
12,
15,
21,
24
};
static const int offsets[]={
0,
1,
3,
4,
5,
7,
9,
12,
15,
18,
21,
24
};
UConverter *cnv;
#define NAME_LMBCS_1 "LMBCS-1"
#define NAME_LMBCS_2 "LMBCS-2"
{
char expected_subchars[] = {0x3F};
char new_subchars [] = {0x7F};
char get_subchars [1];
const char * get_name;
UConverter *cnv1;
UConverter *cnv2;
int8_t len = sizeof(get_subchars);
UErrorCode errorCode=U_ZERO_ERROR;
cnv1=ucnv_open(NAME_LMBCS_1, &errorCode);
if(U_FAILURE(errorCode)) {
log_data_err("Unable to open a LMBCS-1 converter: %s\n", u_errorName(errorCode));
return;
}
cnv2=ucnv_open(NAME_LMBCS_2, &errorCode);
if(U_FAILURE(errorCode)) {
log_data_err("Unable to open a LMBCS-2 converter: %s\n", u_errorName(errorCode));
return;
}
get_name = ucnv_getName (cnv1, &errorCode);
if (strcmp(NAME_LMBCS_1,get_name)){
log_err("Unexpected converter name: %s\n", get_name);
}
get_name = ucnv_getName (cnv2, &errorCode);
if (strcmp(NAME_LMBCS_2,get_name)){
log_err("Unexpected converter name: %s\n", get_name);
}
ucnv_getSubstChars (cnv1, get_subchars, &len, &errorCode);
if(U_FAILURE(errorCode)) {
log_err("Failure on get subst chars: %s\n", u_errorName(errorCode));
}
if (len!=1){
log_err("Unexpected length of sub chars\n");
}
if (get_subchars[0] != expected_subchars[0]){
log_err("Unexpected value of sub chars\n");
}
ucnv_setSubstChars (cnv2,new_subchars, len, &errorCode);
if(U_FAILURE(errorCode)) {
log_err("Failure on set subst chars: %s\n", u_errorName(errorCode));
}
ucnv_getSubstChars (cnv2, get_subchars, &len, &errorCode);
if(U_FAILURE(errorCode)) {
log_err("Failure on get subst chars: %s\n", u_errorName(errorCode));
}
if (len!=1){
log_err("Unexpected length of sub chars\n");
}
if (get_subchars[0] != new_subchars[0]){
log_err("Unexpected value of sub chars\n");
}
ucnv_close(cnv1);
ucnv_close(cnv2);
}
{
UErrorCode errorCode=U_ZERO_ERROR;
const uint8_t * pSource = pszLMBCS;
const uint8_t * sourceLimit = pszLMBCS + sizeof(pszLMBCS);
UChar Out [sizeof(pszUnicode) + 1];
UChar * pOut = Out;
UChar * OutLimit = Out + sizeof(pszUnicode)/sizeof(UChar);
int32_t off [sizeof(offsets)];
off[(sizeof(offsets)/sizeof(offsets[0]))-1] = sizeof(pszLMBCS);
cnv=ucnv_open("lmbcs", &errorCode);
if(U_FAILURE(errorCode)) {
log_data_err("Unable to open a LMBCS converter: %s\n", u_errorName(errorCode));
return;
}
ucnv_toUnicode (cnv,
&pOut,
OutLimit,
(const char **)&pSource,
(const char *)sourceLimit,
off,
TRUE,
&errorCode);
if (memcmp(off,offsets,sizeof(offsets)))
{
log_err("LMBCS->Uni: Calculated offsets do not match expected results\n");
}
if (memcmp(Out,pszUnicode,sizeof(pszUnicode)))
{
log_err("LMBCS->Uni: Calculated codepoints do not match expected results\n");
}
ucnv_close(cnv);
}
{
const char * sourceStart;
const char *source=(const char *)pszLMBCS;
const char *limit=(const char *)pszLMBCS+sizeof(pszLMBCS);
const UChar32 *results= pszUnicode32;
const int *off = offsets32;
UErrorCode errorCode=U_ZERO_ERROR;
UChar32 uniChar;
cnv=ucnv_open("LMBCS-1", &errorCode);
if(U_FAILURE(errorCode)) {
log_data_err("Unable to open a LMBCS-1 converter: %s\n", u_errorName(errorCode));
return;
}
else
{
while(source<limit) {
sourceStart=source;
uniChar=ucnv_getNextUChar(cnv, &source, source + (off[1] - off[0]), &errorCode);
if(U_FAILURE(errorCode)) {
log_err("LMBCS-1 ucnv_getNextUChar() failed: %s\n", u_errorName(errorCode));
break;
} else if(source-sourceStart != off[1] - off[0] || uniChar != *results) {
log_err("LMBCS-1 ucnv_getNextUChar() result %lx from %d bytes, should have been %lx from %d bytes.\n",
uniChar, (source-sourceStart), *results, *off);
break;
}
results++;
off++;
}
}
ucnv_close(cnv);
}
{
UErrorCode errorCode=U_ZERO_ERROR;
UConverter *cnv16he = ucnv_open("LMBCS-16,locale=he", &errorCode);
UConverter *cnv16jp = ucnv_open("LMBCS-16,locale=ja_JP", &errorCode);
UConverter *cnv01us = ucnv_open("LMBCS-1,locale=us_EN", &errorCode);
UChar uniString [] = {0x0192};
const UChar * pUniOut = uniString;
UChar * pUniIn = uniString;
uint8_t lmbcsString [4];
const uint8_t * pLMBCSOut = lmbcsString;
uint8_t * pLMBCSIn = lmbcsString;
ucnv_fromUnicode (cnv16he,
(char **)&pLMBCSIn, (const char *)(pLMBCSIn + sizeof(lmbcsString)/sizeof(lmbcsString[0])),
&pUniOut, pUniOut + sizeof(uniString)/sizeof(uniString[0]),
NULL, 1, &errorCode);
if (lmbcsString[0] != 0x3 || lmbcsString[1] != 0x83)
{
log_err("LMBCS-16,locale=he gives unexpected translation\n");
}
pLMBCSIn=lmbcsString;
pUniOut = uniString;
ucnv_fromUnicode (cnv01us,
(char **)&pLMBCSIn, (const char *)(lmbcsString + sizeof(lmbcsString)/sizeof(lmbcsString[0])),
&pUniOut, pUniOut + sizeof(uniString)/sizeof(uniString[0]),
NULL, 1, &errorCode);
if (lmbcsString[0] != 0x9F)
{
log_err("LMBCS-1,locale=US gives unexpected translation\n");
}
lmbcsString[0] = 0xAE;
pLMBCSOut = lmbcsString;
pUniIn = uniString;
ucnv_toUnicode (cnv16jp,
&pUniIn, pUniIn + 1,
(const char **)&pLMBCSOut, (const char *)(pLMBCSOut + 1),
NULL, 1, &errorCode);
if (U_FAILURE(errorCode) || pLMBCSOut != lmbcsString+1 || pUniIn != uniString+1 || uniString[0] != 0xFF6E)
{
log_err("Unexpected results from LMBCS-16 single byte char\n");
}
pLMBCSIn = lmbcsString;
pUniOut = uniString;
ucnv_fromUnicode (cnv01us,
(char **)&pLMBCSIn, (const char *)(pLMBCSIn + 3),
&pUniOut, pUniOut + 1,
NULL, 1, &errorCode);
if (U_FAILURE(errorCode) || pLMBCSIn != lmbcsString+3 || pUniOut != uniString+1
|| lmbcsString[0] != 0x10 || lmbcsString[1] != 0x10 || lmbcsString[2] != 0xAE)
{
log_err("Unexpected results to LMBCS-1 single byte mbcs char\n");
}
pLMBCSOut = lmbcsString;
pUniIn = uniString;
ucnv_toUnicode (cnv01us,
&pUniIn, pUniIn + 1,
(const char **)&pLMBCSOut, (const char *)(pLMBCSOut + 3),
NULL, 1, &errorCode);
if (U_FAILURE(errorCode) || pLMBCSOut != lmbcsString+3 || pUniIn != uniString+1 || uniString[0] != 0xFF6E)
{
log_err("Unexpected results from LMBCS-1 single byte mbcs char\n");
}
pLMBCSIn = lmbcsString;
pUniOut = uniString;
ucnv_fromUnicode (cnv16jp,
(char **)&pLMBCSIn, (const char *)(pLMBCSIn + 1),
&pUniOut, pUniOut + 1,
NULL, 1, &errorCode);
if (U_FAILURE(errorCode) || pLMBCSIn != lmbcsString+1 || pUniOut != uniString+1 || lmbcsString[0] != 0xAE)
{
log_err("Unexpected results to LMBCS-16 single byte mbcs char\n");
}
ucnv_close(cnv16he);
ucnv_close(cnv16jp);
ucnv_close(cnv01us);
}
{
UErrorCode errorCode=U_ZERO_ERROR;
const uint8_t * pSource = pszLMBCS;
const uint8_t * sourceLimit = pszLMBCS + sizeof(pszLMBCS);
int codepointCount = 0;
UChar Out [sizeof(pszUnicode) + 1];
UChar * pOut = Out;
UChar * OutLimit = Out + sizeof(pszUnicode)/sizeof(UChar);
cnv = ucnv_open(NAME_LMBCS_1, &errorCode);
if(U_FAILURE(errorCode)) {
log_err("Unable to open a LMBCS-1 converter: %s\n", u_errorName(errorCode));
return;
}
while ((pSource < sourceLimit) && U_SUCCESS (errorCode))
{
ucnv_toUnicode (cnv,
&pOut,
OutLimit,
(const char **)&pSource,
(const char *)(pSource+1),
NULL,
FALSE,
&errorCode);
if (U_SUCCESS (errorCode))
{
if ((pSource - (const uint8_t *)pszLMBCS) == offsets [codepointCount+1])
{
if (Out[0] != pszUnicode[codepointCount]){
log_err("LMBCS->Uni result %lx should have been %lx \n",
Out[0], pszUnicode[codepointCount]);
}
pOut = Out;
codepointCount++;
}
}
else
{
log_err("Unexpected Error on toUnicode: %s\n", u_errorName(errorCode));
}
}
{
uint8_t LIn [sizeof(pszLMBCS)];
const uint8_t * pLIn = LIn;
char LOut [sizeof(pszLMBCS)];
char * pLOut = LOut;
UChar UOut [sizeof(pszUnicode)];
UChar * pUOut = UOut;
UChar UIn [sizeof(pszUnicode)];
const UChar * pUIn = UIn;
int32_t off [sizeof(offsets)];
UChar32 uniChar;
errorCode=U_ZERO_ERROR;
ucnv_fromUnicode(cnv, &pLOut,pLOut+1,&pUIn,pUIn-1,off,FALSE, &errorCode);
if (errorCode != U_ILLEGAL_ARGUMENT_ERROR)
{
log_err("Unexpected Error on negative source request to ucnv_fromUnicode: %s\n", u_errorName(errorCode));
}
errorCode=U_ZERO_ERROR;
ucnv_toUnicode(cnv, &pUOut,pUOut+1,(const char **)&pLIn,(const char *)(pLIn-1),off,FALSE, &errorCode);
if (errorCode != U_ILLEGAL_ARGUMENT_ERROR)
{
log_err("Unexpected Error on negative source request to ucnv_toUnicode: %s\n", u_errorName(errorCode));
}
errorCode=U_ZERO_ERROR;
uniChar = ucnv_getNextUChar(cnv, (const char **)&pLIn, (const char *)(pLIn-1), &errorCode);
if (errorCode != U_ILLEGAL_ARGUMENT_ERROR)
{
log_err("Unexpected Error on negative source request to ucnv_getNextUChar: %s\n", u_errorName(errorCode));
}
errorCode=U_ZERO_ERROR;
ucnv_toUnicode(cnv, &pUOut,pUOut+1,(const char **)&pLIn,(const char *)pLIn,off,FALSE, &errorCode);
ucnv_fromUnicode(cnv, &pLOut,pLOut+1,&pUIn,pUIn,off,FALSE, &errorCode);
if(U_FAILURE(errorCode)) {
log_err("0 byte source request: unexpected error: %s\n", u_errorName(errorCode));
}
if ((pUOut != UOut) || (pUIn != UIn) || (pLOut != LOut) || (pLIn != LIn))
{
log_err("Unexpected pointer move in 0 byte source request \n");
}
uniChar = ucnv_getNextUChar(cnv, (const char **)&pLIn, (const char *)pLIn, &errorCode);
if (errorCode != U_INDEX_OUTOFBOUNDS_ERROR)
{
log_err("Unexpected Error on 0-byte source request to ucnv_getnextUChar: %s\n", u_errorName(errorCode));
}
if (((uint32_t)uniChar - 0xfffe) > 1)
{
log_err("Unexpected value on 0-byte source request to ucnv_getnextUChar \n");
}
errorCode = U_ZERO_ERROR;
pUIn = pszUnicode;
ucnv_fromUnicode(cnv, &pLOut,pLOut+offsets[4],&pUIn,pUIn+sizeof(pszUnicode)/sizeof(UChar),off,FALSE, &errorCode);
if (errorCode != U_BUFFER_OVERFLOW_ERROR || pLOut != LOut + offsets[4] || pUIn != pszUnicode+4 )
{
log_err("Unexpected results on out of target room to ucnv_fromUnicode\n");
}
errorCode = U_ZERO_ERROR;
pLIn = pszLMBCS;
ucnv_toUnicode(cnv, &pUOut,pUOut+4,(const char **)&pLIn,(const char *)(pLIn+sizeof(pszLMBCS)),off,FALSE, &errorCode);
if (errorCode != U_BUFFER_OVERFLOW_ERROR || pUOut != UOut + 4 || pLIn != (const uint8_t *)pszLMBCS+offsets[4])
{
log_err("Unexpected results on out of target room to ucnv_toUnicode\n");
}
LIn [0] = 0x14;
LIn [1] = 0xD8;
LIn [2] = 0x01;
LIn [3] = 0x14;
LIn [4] = 0xDC;
pLIn = LIn;
errorCode = U_ZERO_ERROR;
pUOut = UOut;
ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode);
ucnv_toUnicode(cnv, &pUOut,pUOut+sizeof(UOut)/sizeof(UChar),(const char **)&pLIn,(const char *)(pLIn+5),off,TRUE, &errorCode);
if (UOut[0] != 0xD801 || errorCode != U_TRUNCATED_CHAR_FOUND || pUOut != UOut + 1 || pLIn != LIn + 5)
{
log_err("Unexpected results on chopped low surrogate\n");
}
LIn [0] = 0x14;
LIn [1] = 0xD8;
LIn [2] = 0x01;
pLIn = LIn;
errorCode = U_ZERO_ERROR;
pUOut = UOut;
ucnv_toUnicode(cnv, &pUOut,pUOut+sizeof(UOut)/sizeof(UChar),(const char **)&pLIn,(const char *)(pLIn+3),off,TRUE, &errorCode);
if (UOut[0] != 0xD801 || U_FAILURE(errorCode) || pUOut != UOut + 1 || pLIn != LIn + 3)
{
log_err("Unexpected results on chopped at surrogate boundary \n");
}
LIn [0] = 0x14;
LIn [1] = 0xD8;
LIn [2] = 0x01;
LIn [3] = 0x14;
LIn [4] = 0xC9;
LIn [5] = 0xD0;
pLIn = LIn;
errorCode = U_ZERO_ERROR;
pUOut = UOut;
ucnv_toUnicode(cnv, &pUOut,pUOut+sizeof(UOut)/sizeof(UChar),(const char **)&pLIn,(const char *)(pLIn+6),off,TRUE, &errorCode);
if (UOut[0] != 0xD801 || UOut[1] != 0xC9D0 || U_FAILURE(errorCode) || pUOut != UOut + 2 || pLIn != LIn + 6)
{
log_err("Unexpected results after unpaired surrogate plus valid Unichar \n");
}
LIn [0] = 0x14;
LIn [1] = 0xD8;
LIn [2] = 0x01;
LIn [3] = 0x14;
LIn [4] = 0xC9;
pLIn = LIn;
errorCode = U_ZERO_ERROR;
pUOut = UOut;
ucnv_toUnicode(cnv, &pUOut,pUOut+sizeof(UOut)/sizeof(UChar),(const char **)&pLIn,(const char *)(pLIn+5),off,TRUE, &errorCode);
if (UOut[0] != 0xD801 || errorCode != U_TRUNCATED_CHAR_FOUND || pUOut != UOut + 1 || pLIn != LIn + 5)
{
log_err("Unexpected results after unpaired surrogate plus chopped Unichar \n");
}
LIn [0] = 0x14;
LIn [1] = 0xD8;
LIn [2] = 0x01;
LIn [3] = 0x0F;
LIn [4] = 0x3B;
pLIn = LIn;
errorCode = U_ZERO_ERROR;
pUOut = UOut;
ucnv_toUnicode(cnv, &pUOut,pUOut+sizeof(UOut)/sizeof(UChar),(const char **)&pLIn,(const char *)(pLIn+5),off,TRUE, &errorCode);
if (UOut[0] != 0xD801 || UOut[1] != 0x1B || U_FAILURE(errorCode) || pUOut != UOut + 2 || pLIn != LIn + 5)
{
log_err("Unexpected results after unpaired surrogate plus valid non-Unichar\n");
}
LIn [0] = 0x14;
LIn [1] = 0xD8;
LIn [2] = 0x01;
LIn [3] = 0x0F;
pLIn = LIn;
errorCode = U_ZERO_ERROR;
pUOut = UOut;
ucnv_toUnicode(cnv, &pUOut,pUOut+sizeof(UOut)/sizeof(UChar),(const char **)&pLIn,(const char *)(pLIn+4),off,TRUE, &errorCode);
if (UOut[0] != 0xD801 || errorCode != U_TRUNCATED_CHAR_FOUND || pUOut != UOut + 1 || pLIn != LIn + 4)
{
log_err("Unexpected results after unpaired surrogate plus chopped non-Unichar\n");
}
}
}
ucnv_close(cnv);
}
static void TestJitterbug255()
{
const uint8_t testBytes[] = { 0x95, 0xcf, 0x8a, 0xb7, 0x0d, 0x0a, 0x00 };
const uint8_t *testBuffer = testBytes;
const uint8_t *testEnd = testBytes + sizeof(testBytes);
UErrorCode status = U_ZERO_ERROR;
UChar32 result;
UConverter *cnv = 0;
cnv = ucnv_open("shift-jis", &status);
if (U_FAILURE(status) || cnv == 0) {
log_data_err("Failed to open the converter for SJIS.\n");
return;
}
while (testBuffer != testEnd)
{
result = ucnv_getNextUChar (cnv, (const char **)&testBuffer, (const char *)testEnd , &status);
if (U_FAILURE(status))
{
log_err("Failed to convert the next UChar for SJIS.\n");
break;
}
}
ucnv_close(cnv);
}
static void TestEBCDICUS4XML()
{
UChar unicodes_x[] = {0x0000, 0x0000, 0x0000, 0x0000};
static const UChar toUnicodeMaps_x[] = {0x000A, 0x000A, 0x000D, 0x0000};
static const char fromUnicodeMaps_x[] = {0x25, 0x25, 0x0D, 0x00};
static const char newLines_x[] = {0x25, 0x15, 0x0D, 0x00};
char target_x[] = {0x00, 0x00, 0x00, 0x00};
UChar *unicodes = unicodes_x;
const UChar *toUnicodeMaps = toUnicodeMaps_x;
char *target = target_x;
const char* fromUnicodeMaps = fromUnicodeMaps_x, *newLines = newLines_x;
UErrorCode status = U_ZERO_ERROR;
UConverter *cnv = 0;
cnv = ucnv_open("ebcdic-xml-us", &status);
if (U_FAILURE(status) || cnv == 0) {
log_data_err("Failed to open the converter for EBCDIC-XML-US.\n");
return;
}
ucnv_toUnicode(cnv, &unicodes, unicodes+3, (const char**)&newLines, newLines+3, NULL, TRUE, &status);
if (U_FAILURE(status) || memcmp(unicodes_x, toUnicodeMaps, sizeof(UChar)*3) != 0) {
log_err("To Unicode conversion failed in EBCDICUS4XML test. %s\n",
u_errorName(status));
printUSeqErr(unicodes_x, 3);
printUSeqErr(toUnicodeMaps, 3);
}
status = U_ZERO_ERROR;
ucnv_fromUnicode(cnv, &target, target+3, (const UChar**)&toUnicodeMaps, toUnicodeMaps+3, NULL, TRUE, &status);
if (U_FAILURE(status) || memcmp(target_x, fromUnicodeMaps, sizeof(char)*3) != 0) {
log_err("From Unicode conversion failed in EBCDICUS4XML test. %s\n",
u_errorName(status));
printSeqErr((const unsigned char*)target_x, 3);
printSeqErr((const unsigned char*)fromUnicodeMaps, 3);
}
ucnv_close(cnv);
}
#if !UCONFIG_NO_COLLATION
static void TestJitterbug981(){
const UChar* rules;
int32_t rules_length, target_cap, bytes_needed, buff_size;
UErrorCode status = U_ZERO_ERROR;
UConverter *utf8cnv;
UCollator* myCollator;
char *buff;
int numNeeded=0;
utf8cnv = ucnv_open ("utf8", &status);
if(U_FAILURE(status)){
log_err("Could not open UTF-8 converter. Error: %s", u_errorName(status));
return;
}
myCollator = ucol_open("zh", &status);
if(U_FAILURE(status)){
log_err("Could not open collator for zh locale. Error: %s", u_errorName(status));
return;
}
rules = ucol_getRules(myCollator, &rules_length);
buff_size = rules_length * ucnv_getMaxCharSize(utf8cnv);
buff = malloc(buff_size);
target_cap = 0;
do {
ucnv_reset(utf8cnv);
status = U_ZERO_ERROR;
if(target_cap >= buff_size) {
log_err("wanted %d bytes, only %d available\n", target_cap, buff_size);
return;
}
bytes_needed = ucnv_fromUChars(utf8cnv, buff, target_cap,
rules, rules_length, &status);
target_cap = (bytes_needed > target_cap) ? bytes_needed : target_cap +1;
if(numNeeded!=0 && numNeeded!= bytes_needed){
log_err("ucnv_fromUChars returns different values for required capacity in pre-flight and conversion modes");
}
numNeeded = bytes_needed;
} while (status == U_BUFFER_OVERFLOW_ERROR);
ucol_close(myCollator);
ucnv_close(utf8cnv);
free(buff);
}
#endif
static void TestJitterbug1293(){
UChar src[] = {0x30DE, 0x30A4, 0x5E83, 0x544A, 0x30BF, 0x30A4, 0x30D7,0x000};
char target[256];
UErrorCode status = U_ZERO_ERROR;
UConverter* conv=NULL;
int32_t target_cap, bytes_needed, numNeeded = 0;
conv = ucnv_open("shift-jis",&status);
if(U_FAILURE(status)){
log_data_err("Could not open Shift-Jis converter. Error: %s", u_errorName(status));
return;
}
do{
target_cap =0;
bytes_needed = ucnv_fromUChars(conv,target,256,src,u_strlen(src),&status);
target_cap = (bytes_needed > target_cap) ? bytes_needed : target_cap +1;
if(numNeeded!=0 && numNeeded!= bytes_needed){
log_err("ucnv_fromUChars returns different values for required capacity in pre-flight and conversion modes");
}
numNeeded = bytes_needed;
} while (status == U_BUFFER_OVERFLOW_ERROR);
if(U_FAILURE(status)){
log_err("An error occured in ucnv_fromUChars. Error: %s", u_errorName(status));
return;
}
ucnv_close(conv);
}
#endif