#include <stdio.h>
#include <stdlib.h>
#include "unicode/utypes.h"
#include "unicode/uchar.h"
#include "unicode/ustring.h"
#include "cmemory.h"
#include "cstring.h"
#include "filestrm.h"
#include "utrie.h"
#include "unicode/udata.h"
#include "unewdata.h"
#include "propsvec.h"
#include "gencase.h"
static UDataInfo dataInfo={
sizeof(UDataInfo),
0,
U_IS_BIG_ENDIAN,
U_CHARSET_FAMILY,
U_SIZEOF_UCHAR,
0,
{ UCASE_FMT_0, UCASE_FMT_1, UCASE_FMT_2, UCASE_FMT_3 },
{ 1, 0, UTRIE_SHIFT, UTRIE_INDEX_SHIFT },
{ 4, 0, 1, 0 }
};
enum {
MAX_EXC_COUNT=1000
};
static uint16_t exceptions[UCASE_MAX_EXCEPTIONS+100];
static uint16_t exceptionsTop=0;
static Props excProps[MAX_EXC_COUNT];
static uint16_t exceptionsCount=0;
static int32_t maxFullLength=U16_MAX_LENGTH;
extern void
setUnicodeVersion(const char *v) {
UVersionInfo version;
u_versionFromString(version, v);
uprv_memcpy(dataInfo.dataVersion, version, 4);
}
extern void
setProps(Props *p) {
UErrorCode errorCode;
uint32_t value, oldValue;
int32_t delta;
value=oldValue=upvec_getValue(pv, p->code, 0);
delta=0;
if(p->gc==U_TITLECASE_LETTER) {
value|=UCASE_TITLE;
}
if(p->upperCase!=0) {
if((value&UCASE_TYPE_MASK)==UCASE_LOWER) {
delta=p->upperCase-p->code;
} else {
value|=UCASE_EXCEPTION;
}
}
if(p->lowerCase!=0) {
if((value&UCASE_TYPE_MASK)>=UCASE_UPPER) {
delta=p->lowerCase-p->code;
} else {
value|=UCASE_EXCEPTION;
}
}
if(p->upperCase!=p->titleCase) {
value|=UCASE_EXCEPTION;
}
if(p->specialCasing!=NULL) {
value|=UCASE_EXCEPTION;
}
if(p->caseFolding!=NULL) {
value|=UCASE_EXCEPTION;
}
if(delta<UCASE_MIN_DELTA || UCASE_MAX_DELTA<delta) {
value|=UCASE_EXCEPTION;
}
if(p->cc!=0) {
if(value&UCASE_DOT_MASK) {
fprintf(stderr, "gencase: a soft-dotted character has cc!=0\n");
exit(U_INTERNAL_PROGRAM_ERROR);
}
if(p->cc==230) {
value|=UCASE_ABOVE;
} else {
value|=UCASE_OTHER_ACCENT;
}
}
if(
(value&UCASE_TYPE_MASK)==UCASE_NONE &&
p->code!=0x307 &&
((U_MASK(p->gc)&(U_GC_MN_MASK|U_GC_ME_MASK|U_GC_CF_MASK|U_GC_LM_MASK|U_GC_SK_MASK))!=0 ||
p->code==0x27 || p->code==0xad || p->code==0x2019)
) {
if(value&UCASE_EXCEPTION) {
fprintf(stderr, "gencase error: unable to encode case-ignorable for U+%04lx with exceptions\n",
(unsigned long)p->code);
exit(U_INTERNAL_PROGRAM_ERROR);
}
delta=1;
}
if(value&UCASE_EXCEPTION) {
value|=(uint32_t)exceptionsCount<<UGENCASE_EXC_SHIFT;
uprv_memcpy(excProps+exceptionsCount, p, sizeof(*p));
if(++exceptionsCount==MAX_EXC_COUNT) {
fprintf(stderr, "gencase: too many exceptions\n");
exit(U_INDEX_OUTOFBOUNDS_ERROR);
}
} else {
value|=((uint32_t)delta<<UCASE_DELTA_SHIFT)&UCASE_DELTA_MASK;
}
errorCode=U_ZERO_ERROR;
if( value!=oldValue &&
!upvec_setValue(pv, p->code, p->code+1, 0, value, 0xffffffff, &errorCode)
) {
fprintf(stderr, "gencase error: unable to set case mapping values, code: %s\n",
u_errorName(errorCode));
exit(errorCode);
}
}
extern void
addCaseSensitive(UChar32 first, UChar32 last) {
UErrorCode errorCode=U_ZERO_ERROR;
if(!upvec_setValue(pv, first, last+1, 0, UCASE_SENSITIVE, UCASE_SENSITIVE, &errorCode)) {
fprintf(stderr, "gencase error: unable to set UCASE_SENSITIVE, code: %s\n",
u_errorName(errorCode));
exit(errorCode);
}
}
extern void
makeCaseClosure() {
}
static UBool
fullMappingEqualsSimple(const UChar *s, UChar32 simple, UChar32 c) {
int32_t i, length;
UChar32 full;
length=*s++;
if(length==0 || length>U16_MAX_LENGTH) {
return FALSE;
}
i=0;
U16_NEXT(s, i, length, full);
if(simple==0) {
simple=c;
}
return (UBool)(i==length && full==simple);
}
static uint16_t
makeException(uint32_t value, Props *p) {
uint32_t slots[8];
uint32_t slotBits;
uint16_t excWord, excIndex, excTop, i, count, length, fullLengths;
UBool doubleSlots;
excIndex=exceptionsTop;
if(excIndex>=UCASE_MAX_EXCEPTIONS) {
fprintf(stderr, "gencase error: too many exceptions words\n");
exit(U_BUFFER_OVERFLOW_ERROR);
}
excTop=excIndex+1;
excWord=((uint16_t)value&UCASE_DOT_MASK)<<UCASE_EXC_DOT_SHIFT;
if(p->specialCasing!=NULL) {
length=p->specialCasing->lowerCase[0];
if(length>maxFullLength) {
maxFullLength=length;
}
length=p->specialCasing->upperCase[0];
if(length>maxFullLength) {
maxFullLength=length;
}
length=p->specialCasing->titleCase[0];
if(length>maxFullLength) {
maxFullLength=length;
}
}
if(p->caseFolding!=NULL) {
length=p->caseFolding->full[0];
if(length>maxFullLength) {
maxFullLength=length;
}
}
if(p->specialCasing!=NULL && p->specialCasing->isComplex) {
excWord|=UCASE_EXC_CONDITIONAL_SPECIAL;
p->specialCasing=NULL;
}
if(p->caseFolding!=NULL && p->caseFolding->simple==0 && p->caseFolding->full[0]==0) {
excWord|=UCASE_EXC_CONDITIONAL_FOLD;
p->caseFolding=NULL;
}
if(p->specialCasing!=NULL) {
if(fullMappingEqualsSimple(p->specialCasing->lowerCase, p->lowerCase, p->code)) {
p->specialCasing->lowerCase[0]=0;
}
if(fullMappingEqualsSimple(p->specialCasing->upperCase, p->upperCase, p->code)) {
p->specialCasing->upperCase[0]=0;
}
if(fullMappingEqualsSimple(p->specialCasing->titleCase, p->titleCase, p->code)) {
p->specialCasing->titleCase[0]=0;
}
}
if( p->caseFolding!=NULL &&
fullMappingEqualsSimple(p->caseFolding->full, p->caseFolding->simple, p->code)
) {
p->caseFolding->full[0]=0;
}
slotBits=0;
count=0;
if(p->lowerCase!=0) {
slots[count]=(uint32_t)p->lowerCase;
slotBits|=slots[count];
++count;
excWord|=U_MASK(UCASE_EXC_LOWER);
}
if( p->caseFolding!=NULL &&
p->caseFolding->simple!=0 &&
(p->lowerCase!=0 ?
p->caseFolding->simple!=p->lowerCase :
p->caseFolding->simple!=p->code)
) {
slots[count]=(uint32_t)p->caseFolding->simple;
slotBits|=slots[count];
++count;
excWord|=U_MASK(UCASE_EXC_FOLD);
}
if(p->upperCase!=0) {
slots[count]=(uint32_t)p->upperCase;
slotBits|=slots[count];
++count;
excWord|=U_MASK(UCASE_EXC_UPPER);
}
if(p->upperCase!=p->titleCase) {
if(p->titleCase!=0) {
slots[count]=(uint32_t)p->titleCase;
} else {
slots[count]=(uint32_t)p->code;
}
slotBits|=slots[count];
++count;
excWord|=U_MASK(UCASE_EXC_TITLE);
}
fullLengths=0;
if(p->specialCasing!=NULL) {
fullLengths=p->specialCasing->lowerCase[0];
fullLengths|=p->specialCasing->upperCase[0]<<8;
fullLengths|=p->specialCasing->titleCase[0]<<12;
}
if(p->caseFolding!=NULL) {
fullLengths|=p->caseFolding->full[0]<<4;
}
if(fullLengths!=0) {
slots[count]=fullLengths;
slotBits|=slots[count];
++count;
excWord|=U_MASK(UCASE_EXC_FULL_MAPPINGS);
}
doubleSlots=(UBool)(slotBits>0xffff);
if(!doubleSlots) {
for(i=0; i<count; ++i) {
exceptions[excTop++]=(uint16_t)slots[i];
}
} else {
excWord|=UCASE_EXC_DOUBLE_SLOTS;
for(i=0; i<count; ++i) {
exceptions[excTop++]=(uint16_t)(slots[i]>>16);
exceptions[excTop++]=(uint16_t)slots[i];
}
}
if(p->specialCasing!=NULL) {
length=(uint16_t)p->specialCasing->lowerCase[0];
u_memcpy((UChar *)exceptions+excTop, p->specialCasing->lowerCase+1, length);
excTop+=length;
}
if(p->caseFolding!=NULL) {
length=(uint16_t)p->caseFolding->full[0];
u_memcpy((UChar *)exceptions+excTop, p->caseFolding->full+1, length);
excTop+=length;
}
if(p->specialCasing!=NULL) {
length=(uint16_t)p->specialCasing->upperCase[0];
u_memcpy((UChar *)exceptions+excTop, p->specialCasing->upperCase+1, length);
excTop+=length;
length=(uint16_t)p->specialCasing->titleCase[0];
u_memcpy((UChar *)exceptions+excTop, p->specialCasing->titleCase+1, length);
excTop+=length;
}
exceptionsTop=excTop;
exceptions[excIndex]=excWord;
return excIndex;
}
extern void
makeExceptions() {
uint32_t *row;
uint32_t value;
int32_t i;
uint16_t excIndex;
i=0;
while((row=upvec_getRow(pv, i, NULL, NULL))!=NULL) {
value=*row;
if(value&UCASE_EXCEPTION) {
excIndex=makeException(value, excProps+(value>>UGENCASE_EXC_SHIFT));
*row=(value&~(UGENCASE_EXC_MASK|UCASE_EXC_MASK))|(excIndex<<UCASE_EXC_SHIFT);
}
++i;
}
}
extern void
generateData(const char *dataDir) {
static int32_t indexes[UCASE_IX_TOP]={
UCASE_IX_TOP
};
static uint8_t trieBlock[40000];
const uint32_t *row;
UChar32 start, limit;
int32_t i;
UNewDataMemory *pData;
UNewTrie *pTrie;
UErrorCode errorCode=U_ZERO_ERROR;
int32_t trieSize;
long dataLength;
pTrie=utrie_open(NULL, NULL, 20000, 0, 0, TRUE);
if(pTrie==NULL) {
fprintf(stderr, "gencase error: unable to create a UNewTrie\n");
exit(U_MEMORY_ALLOCATION_ERROR);
}
for(i=0; (row=upvec_getRow(pv, i, &start, &limit))!=NULL; ++i) {
if(!utrie_setRange32(pTrie, start, limit, *row, TRUE)) {
fprintf(stderr, "gencase error: unable to set trie value (overflow)\n");
exit(U_BUFFER_OVERFLOW_ERROR);
}
}
trieSize=utrie_serialize(pTrie, trieBlock, sizeof(trieBlock), NULL, TRUE, &errorCode);
if(U_FAILURE(errorCode)) {
fprintf(stderr, "error: utrie_serialize failed: %s (length %ld)\n", u_errorName(errorCode), (long)trieSize);
exit(errorCode);
}
indexes[UCASE_IX_EXC_LENGTH]=exceptionsTop;
indexes[UCASE_IX_TRIE_SIZE]=trieSize;
indexes[UCASE_IX_LENGTH]=(int32_t)sizeof(indexes)+trieSize+2*exceptionsTop;
indexes[UCASE_IX_MAX_FULL_LENGTH]=maxFullLength;
if(beVerbose) {
printf("trie size in bytes: %5d\n", (int)trieSize);
printf("number of code points with exceptions: %5d\n", exceptionsCount);
printf("size in bytes of exceptions: %5d\n", 2*exceptionsTop);
printf("data size: %5d\n", (int)indexes[UCASE_IX_LENGTH]);
}
pData=udata_create(dataDir, UCASE_DATA_TYPE, UCASE_DATA_NAME, &dataInfo,
haveCopyright ? U_COPYRIGHT_STRING : NULL, &errorCode);
if(U_FAILURE(errorCode)) {
fprintf(stderr, "gencase: unable to create data memory, %s\n", u_errorName(errorCode));
exit(errorCode);
}
udata_writeBlock(pData, indexes, sizeof(indexes));
udata_writeBlock(pData, trieBlock, trieSize);
udata_writeBlock(pData, exceptions, 2*exceptionsTop);
dataLength=udata_finish(pData, &errorCode);
if(U_FAILURE(errorCode)) {
fprintf(stderr, "gencase: error %d writing the output file\n", errorCode);
exit(errorCode);
}
if(dataLength!=indexes[UCASE_IX_LENGTH]) {
fprintf(stderr, "gencase: data length %ld != calculated size %d\n",
dataLength, (int)indexes[UCASE_IX_LENGTH]);
exit(U_INTERNAL_PROGRAM_ERROR);
}
utrie_close(pTrie);
}