#include "unicode/utypes.h"
#include "unicode/ucnv.h"
#include "unicode/ucnv_err.h"
#include "unicode/uset.h"
#include "ucnv_bld.h"
#include "ucnv_cnv.h"
#define LATIN1_UNROLL_TO_UNICODE 1
#define LATIN1_UNROLL_FROM_UNICODE 1
#define ASCII_UNROLL_TO_UNICODE 1
static void
_Latin1ToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs,
UErrorCode *pErrorCode) {
const uint8_t *source;
UChar *target;
int32_t targetCapacity, length;
int32_t *offsets;
int32_t sourceIndex;
source=(const uint8_t *)pArgs->source;
target=pArgs->target;
targetCapacity=pArgs->targetLimit-pArgs->target;
offsets=pArgs->offsets;
sourceIndex=0;
length=(const uint8_t *)pArgs->sourceLimit-source;
if(length<=targetCapacity) {
targetCapacity=length;
} else {
*pErrorCode=U_BUFFER_OVERFLOW_ERROR;
length=targetCapacity;
}
#if LATIN1_UNROLL_TO_UNICODE
if(targetCapacity>=16) {
int32_t count, loops;
loops=count=targetCapacity>>4;
length=targetCapacity&=0xf;
do {
*target++=*source++;
*target++=*source++;
*target++=*source++;
*target++=*source++;
*target++=*source++;
*target++=*source++;
*target++=*source++;
*target++=*source++;
*target++=*source++;
*target++=*source++;
*target++=*source++;
*target++=*source++;
*target++=*source++;
*target++=*source++;
*target++=*source++;
*target++=*source++;
} while(--count>0);
if(offsets!=NULL) {
do {
*offsets++=sourceIndex++;
*offsets++=sourceIndex++;
*offsets++=sourceIndex++;
*offsets++=sourceIndex++;
*offsets++=sourceIndex++;
*offsets++=sourceIndex++;
*offsets++=sourceIndex++;
*offsets++=sourceIndex++;
*offsets++=sourceIndex++;
*offsets++=sourceIndex++;
*offsets++=sourceIndex++;
*offsets++=sourceIndex++;
*offsets++=sourceIndex++;
*offsets++=sourceIndex++;
*offsets++=sourceIndex++;
*offsets++=sourceIndex++;
} while(--loops>0);
}
}
#endif
while(targetCapacity>0) {
*target++=*source++;
--targetCapacity;
}
pArgs->source=(const char *)source;
pArgs->target=target;
if(offsets!=NULL) {
while(length>0) {
*offsets++=sourceIndex++;
--length;
}
pArgs->offsets=offsets;
}
}
static UChar32
_Latin1GetNextUChar(UConverterToUnicodeArgs *pArgs,
UErrorCode *pErrorCode) {
const uint8_t *source=(const uint8_t *)pArgs->source;
if(source<(const uint8_t *)pArgs->sourceLimit) {
pArgs->source=(const char *)(source+1);
return *source;
}
*pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
return 0xffff;
}
static void
_Latin1FromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs,
UErrorCode *pErrorCode) {
UConverter *cnv;
const UChar *source, *sourceLimit, *lastSource;
uint8_t *target;
int32_t targetCapacity, length;
int32_t *offsets;
UChar32 c, max;
int32_t sourceIndex;
UConverterCallbackReason reason;
int32_t i;
cnv=pArgs->converter;
source=pArgs->source;
sourceLimit=pArgs->sourceLimit;
target=(uint8_t *)pArgs->target;
targetCapacity=pArgs->targetLimit-pArgs->target;
offsets=pArgs->offsets;
if(cnv->sharedData==&_Latin1Data) {
max=0xff;
} else {
max=0x7f;
}
c=cnv->fromUSurrogateLead;
sourceIndex= c==0 ? 0 : -1;
lastSource=source;
length=sourceLimit-source;
if(length<targetCapacity) {
targetCapacity=length;
}
if(c!=0 && targetCapacity>0) {
goto getTrail;
}
#if LATIN1_UNROLL_FROM_UNICODE
unrolled:
if(targetCapacity>=16) {
int32_t count, loops;
UChar u, oredChars;
loops=count=targetCapacity>>4;
do {
oredChars=u=*source++;
*target++=(uint8_t)u;
oredChars|=u=*source++;
*target++=(uint8_t)u;
oredChars|=u=*source++;
*target++=(uint8_t)u;
oredChars|=u=*source++;
*target++=(uint8_t)u;
oredChars|=u=*source++;
*target++=(uint8_t)u;
oredChars|=u=*source++;
*target++=(uint8_t)u;
oredChars|=u=*source++;
*target++=(uint8_t)u;
oredChars|=u=*source++;
*target++=(uint8_t)u;
oredChars|=u=*source++;
*target++=(uint8_t)u;
oredChars|=u=*source++;
*target++=(uint8_t)u;
oredChars|=u=*source++;
*target++=(uint8_t)u;
oredChars|=u=*source++;
*target++=(uint8_t)u;
oredChars|=u=*source++;
*target++=(uint8_t)u;
oredChars|=u=*source++;
*target++=(uint8_t)u;
oredChars|=u=*source++;
*target++=(uint8_t)u;
oredChars|=u=*source++;
*target++=(uint8_t)u;
if(oredChars>max) {
source-=16;
target-=16;
break;
}
} while(--count>0);
count=loops-count;
targetCapacity-=16*count;
if(offsets!=NULL) {
lastSource+=16*count;
while(count>0) {
*offsets++=sourceIndex++;
*offsets++=sourceIndex++;
*offsets++=sourceIndex++;
*offsets++=sourceIndex++;
*offsets++=sourceIndex++;
*offsets++=sourceIndex++;
*offsets++=sourceIndex++;
*offsets++=sourceIndex++;
*offsets++=sourceIndex++;
*offsets++=sourceIndex++;
*offsets++=sourceIndex++;
*offsets++=sourceIndex++;
*offsets++=sourceIndex++;
*offsets++=sourceIndex++;
*offsets++=sourceIndex++;
*offsets++=sourceIndex++;
--count;
}
}
c=0;
}
#endif
while(targetCapacity>0) {
c=*source++;
if(c<=max) {
*target++=(uint8_t)c;
--targetCapacity;
c=0;
} else {
if(!UTF_IS_SURROGATE(c)) {
reason=UCNV_UNASSIGNED;
*pErrorCode=U_INVALID_CHAR_FOUND;
} else if(UTF_IS_SURROGATE_FIRST(c)) {
getTrail:
if(source<sourceLimit) {
UChar trail=*source;
if(UTF_IS_SECOND_SURROGATE(trail)) {
++source;
c=UTF16_GET_PAIR_VALUE(c, trail);
reason=UCNV_UNASSIGNED;
*pErrorCode=U_INVALID_CHAR_FOUND;
} else {
reason=UCNV_ILLEGAL;
*pErrorCode=U_ILLEGAL_CHAR_FOUND;
}
} else {
break;
}
} else {
reason=UCNV_ILLEGAL;
*pErrorCode=U_ILLEGAL_CHAR_FOUND;
}
length=UTF_CHAR_LENGTH(c);
if(offsets!=NULL) {
int32_t count=(int32_t)(source-lastSource);
count-=length;
while(count>0) {
*offsets++=sourceIndex++;
--count;
}
}
pArgs->source=source;
pArgs->target=(char *)target;
pArgs->offsets=offsets;
cnv->fromUSurrogateLead=0;
i=0;
UTF_APPEND_CHAR_UNSAFE(cnv->invalidUCharBuffer, i, c);
cnv->invalidUCharLength=(int8_t)i;
cnv->fromUCharErrorBehaviour(cnv->fromUContext, pArgs, cnv->invalidUCharBuffer, i, c, reason, pErrorCode);
c=cnv->fromUSurrogateLead;
offsets=ucnv_updateCallbackOffsets(offsets, ((uint8_t *)pArgs->target)-target, sourceIndex);
target=(uint8_t *)pArgs->target;
sourceIndex+=length+(pArgs->source-source);
source=lastSource=pArgs->source;
targetCapacity=(uint8_t *)pArgs->targetLimit-target;
length=sourceLimit-source;
if(length<targetCapacity) {
targetCapacity=length;
}
if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) {
break;
} else if(U_FAILURE(*pErrorCode)) {
c=0;
break;
} else if(cnv->charErrorBufferLength>0) {
*pErrorCode=U_BUFFER_OVERFLOW_ERROR;
break;
}
#if LATIN1_UNROLL_FROM_UNICODE
goto unrolled;
#endif
}
}
if(U_SUCCESS(*pErrorCode) && source<sourceLimit && target>=(uint8_t *)pArgs->targetLimit) {
*pErrorCode=U_BUFFER_OVERFLOW_ERROR;
}
if(offsets!=NULL) {
size_t count=source-lastSource;
while(count>0) {
*offsets++=sourceIndex++;
--count;
}
}
if(pArgs->flush && source>=sourceLimit) {
if(c!=0 && U_SUCCESS(*pErrorCode)) {
*pErrorCode=U_TRUNCATED_CHAR_FOUND;
}
cnv->fromUSurrogateLead=0;
} else {
cnv->fromUSurrogateLead=(UChar)c;
}
pArgs->source=source;
pArgs->target=(char *)target;
pArgs->offsets=offsets;
}
static void
_Latin1GetUnicodeSet(const UConverter *cnv,
USet *set,
UConverterUnicodeSet which,
UErrorCode *pErrorCode) {
uset_addRange(set, 0, 0xff);
}
static const UConverterImpl _Latin1Impl={
UCNV_LATIN_1,
NULL,
NULL,
NULL,
NULL,
NULL,
_Latin1ToUnicodeWithOffsets,
_Latin1ToUnicodeWithOffsets,
_Latin1FromUnicodeWithOffsets,
_Latin1FromUnicodeWithOffsets,
_Latin1GetNextUChar,
NULL,
NULL,
NULL,
NULL,
_Latin1GetUnicodeSet
};
static const UConverterStaticData _Latin1StaticData={
sizeof(UConverterStaticData),
"ISO-8859-1",
819, UCNV_IBM, UCNV_LATIN_1, 1, 1,
{ 0x1a, 0, 0, 0 }, 1, FALSE, FALSE,
0,
0,
{ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 }
};
const UConverterSharedData _Latin1Data={
sizeof(UConverterSharedData), ~((uint32_t) 0),
NULL, NULL, &_Latin1StaticData, FALSE, &_Latin1Impl,
0
};
static void
_ASCIIToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs,
UErrorCode *pErrorCode) {
const uint8_t *source, *sourceLimit, *lastSource;
UChar *target;
int32_t targetCapacity, length;
int32_t *offsets;
int32_t sourceIndex;
source=(const uint8_t *)pArgs->source;
sourceLimit=(const uint8_t *)pArgs->sourceLimit;
target=pArgs->target;
targetCapacity=pArgs->targetLimit-pArgs->target;
offsets=pArgs->offsets;
sourceIndex=0;
lastSource=source;
length=sourceLimit-source;
if(length<targetCapacity) {
targetCapacity=length;
}
#if ASCII_UNROLL_TO_UNICODE
unrolled:
if(targetCapacity>=16) {
int32_t count, loops;
UChar oredChars;
loops=count=targetCapacity>>4;
do {
oredChars=*target++=*source++;
oredChars|=*target++=*source++;
oredChars|=*target++=*source++;
oredChars|=*target++=*source++;
oredChars|=*target++=*source++;
oredChars|=*target++=*source++;
oredChars|=*target++=*source++;
oredChars|=*target++=*source++;
oredChars|=*target++=*source++;
oredChars|=*target++=*source++;
oredChars|=*target++=*source++;
oredChars|=*target++=*source++;
oredChars|=*target++=*source++;
oredChars|=*target++=*source++;
oredChars|=*target++=*source++;
oredChars|=*target++=*source++;
if(oredChars>0x7f) {
source-=16;
target-=16;
break;
}
} while(--count>0);
count=loops-count;
targetCapacity-=16*count;
if(offsets!=NULL) {
lastSource+=16*count;
while(count>0) {
*offsets++=sourceIndex++;
*offsets++=sourceIndex++;
*offsets++=sourceIndex++;
*offsets++=sourceIndex++;
*offsets++=sourceIndex++;
*offsets++=sourceIndex++;
*offsets++=sourceIndex++;
*offsets++=sourceIndex++;
*offsets++=sourceIndex++;
*offsets++=sourceIndex++;
*offsets++=sourceIndex++;
*offsets++=sourceIndex++;
*offsets++=sourceIndex++;
*offsets++=sourceIndex++;
*offsets++=sourceIndex++;
*offsets++=sourceIndex++;
--count;
}
}
}
#endif
while(targetCapacity>0) {
if((*target++=*source++)<=0x7f) {
--targetCapacity;
} else {
UConverter *cnv;
--target;
cnv=pArgs->converter;
*pErrorCode=U_ILLEGAL_CHAR_FOUND;
if(offsets!=NULL) {
int32_t count=(int32_t)(source-lastSource);
while(--count>0) {
*offsets++=sourceIndex++;
}
}
pArgs->source=(const char *)source;
pArgs->target=target;
pArgs->offsets=offsets;
cnv->invalidCharBuffer[0]=*(source-1);
cnv->invalidCharLength=1;
cnv->fromCharErrorBehaviour(cnv->toUContext, pArgs, cnv->invalidCharBuffer, 1, UCNV_ILLEGAL, pErrorCode);
offsets=ucnv_updateCallbackOffsets(offsets, pArgs->target-target, sourceIndex);
target=pArgs->target;
sourceIndex+=1+((const uint8_t *)pArgs->source-source);
source=lastSource=(const uint8_t *)pArgs->source;
targetCapacity=pArgs->targetLimit-target;
length=sourceLimit-source;
if(length<targetCapacity) {
targetCapacity=length;
}
if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) {
break;
} else if(U_FAILURE(*pErrorCode)) {
break;
} else if(cnv->UCharErrorBufferLength>0) {
*pErrorCode=U_BUFFER_OVERFLOW_ERROR;
break;
}
#if ASCII_UNROLL_TO_UNICODE
goto unrolled;
#endif
}
}
if(U_SUCCESS(*pErrorCode) && source<sourceLimit && target>=pArgs->targetLimit) {
*pErrorCode=U_BUFFER_OVERFLOW_ERROR;
}
if(offsets!=NULL) {
size_t count=source-lastSource;
while(count>0) {
*offsets++=sourceIndex++;
--count;
}
}
pArgs->source=(const char *)source;
pArgs->target=target;
pArgs->offsets=offsets;
}
static UChar32
_ASCIIGetNextUChar(UConverterToUnicodeArgs *pArgs,
UErrorCode *pErrorCode) {
UChar buffer[UTF_MAX_CHAR_LENGTH];
const uint8_t *source;
uint8_t b;
source=(const uint8_t *)pArgs->source;
while(source<(const uint8_t *)pArgs->sourceLimit) {
b=*source++;
pArgs->source=(const char *)source;
if(b<=0x7f) {
return b;
} else {
UConverter *cnv=pArgs->converter;
*pErrorCode=U_ILLEGAL_CHAR_FOUND;
pArgs->target=buffer;
pArgs->targetLimit=buffer+UTF_MAX_CHAR_LENGTH;
cnv->invalidCharBuffer[0]=(char)b;
cnv->invalidCharLength=1;
cnv->fromCharErrorBehaviour(cnv->toUContext, pArgs, cnv->invalidCharBuffer, 1, UCNV_ILLEGAL, pErrorCode);
source=(const uint8_t *)pArgs->source;
if(U_SUCCESS(*pErrorCode)) {
int32_t length=pArgs->target-buffer;
if(length>0) {
return ucnv_getUChar32KeepOverflow(cnv, buffer, length);
}
} else if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) {
*pErrorCode=U_ZERO_ERROR;
return ucnv_getUChar32KeepOverflow(cnv, buffer, UTF_MAX_CHAR_LENGTH);
} else {
return 0xffff;
}
}
}
*pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
return 0xffff;
}
static void
_ASCIIGetUnicodeSet(const UConverter *cnv,
USet *set,
UConverterUnicodeSet which,
UErrorCode *pErrorCode) {
uset_addRange(set, 0, 0x7f);
}
static const UConverterImpl _ASCIIImpl={
UCNV_US_ASCII,
NULL,
NULL,
NULL,
NULL,
NULL,
_ASCIIToUnicodeWithOffsets,
_ASCIIToUnicodeWithOffsets,
_Latin1FromUnicodeWithOffsets,
_Latin1FromUnicodeWithOffsets,
_ASCIIGetNextUChar,
NULL,
NULL,
NULL,
NULL,
_ASCIIGetUnicodeSet
};
static const UConverterStaticData _ASCIIStaticData={
sizeof(UConverterStaticData),
"US-ASCII",
367, UCNV_IBM, UCNV_US_ASCII, 1, 1,
{ 0x1a, 0, 0, 0 }, 1, FALSE, FALSE,
0,
0,
{ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 }
};
const UConverterSharedData _ASCIIData={
sizeof(UConverterSharedData), ~((uint32_t) 0),
NULL, NULL, &_ASCIIStaticData, FALSE, &_ASCIIImpl,
0
};