CFUnicodePrecomposition.c [plain text]
#if !defined(KERNEL)
#define KERNEL 0
#endif
#include <string.h>
#if KERNEL
#include "CFUnicodePrecomposition.h"
#include "CFUniCharPrecompData.h"
#else KERNEL
#include <CoreFoundation/CFBase.h>
#include <CoreFoundation/CFCharacterSet.h>
#include "CFUniChar.h"
#include "CFUnicodePrecomposition.h"
#include "CFInternal.h"
#include "CFUniCharPriv.h"
#endif KERNEL
#if KERNEL
static const uint32_t __CFUniCharPrecompositionTableLength = (sizeof(__CFUniCharPrecompSourceTable) / (sizeof(uint32_t) * 2));
CF_EXPORT uint8_t **CFUniCharCombiningPriorityTable;
CF_EXPORT uint8_t **CFUniCharCombiningPriorityExtraTable;
CF_EXPORT uint8_t CFUniCharNumberOfPlanesForCombiningPriority;
CF_EXPORT uint8_t __CFUniCharGetCombiningPriority(UTF32Char character) {
if (character < (CFUniCharNumberOfPlanesForCombiningPriority << 16)) {
uint32_t plane = character >> 16;
const uint8_t *bitmap = CFUniCharCombiningPriorityTable[plane];
if (bitmap) {
uint8_t value = bitmap[(character >> 8) & 0xFF];
if (value) {
bitmap = CFUniCharCombiningPriorityExtraTable[plane] + ((value - 1) * 256);
return bitmap[character % 256];
}
}
}
return 0;
}
CF_EXPORT uint8_t **CFUniCharNonBaseBitmap;
CF_EXPORT uint8_t CFUniCharNumberOfPlanesForNonBaseBitmap;
CF_INLINE bool __CFUniCharIsNonBaseCharacter(UTF32Char character) {
if (character < (CFUniCharNumberOfPlanesForNonBaseBitmap << 16)) {
const uint8_t *bitmap = CFUniCharNonBaseBitmap[character >> 16];
uint8_t value = bitmap[(character >> 8) & 0xFF];
if (value == 0xFF) {
return true;
} else if (value) {
bitmap = bitmap + ((value - 1) * 32) + 256;
return (bitmap[(character & 0xFF) / 8] & (1 << (character % 8)) ? true : false);
}
}
return false;
}
#else KERNEL
static UTF32Char *__CFUniCharPrecompSourceTable = NULL;
static uint32_t __CFUniCharPrecompositionTableLength = 0;
static uint16_t *__CFUniCharBMPPrecompDestinationTable = NULL;
static uint32_t *__CFUniCharNonBMPPrecompDestinationTable = NULL;
static const uint8_t *__CFUniCharNonBaseBitmapForBMP_P = NULL; static const uint8_t *__CFUniCharCombiningClassForBMP = NULL;
static CFSpinLock_t __CFUniCharPrecompositionTableLock = 0;
static void __CFUniCharLoadPrecompositionTable(void) {
__CFSpinLock(&__CFUniCharPrecompositionTableLock);
if (NULL == __CFUniCharPrecompSourceTable) {
const void *bytes = CFUniCharGetMappingData(kCFUniCharCanonicalPrecompMapping);
uint32_t bmpMappingLength;
if (NULL == bytes) {
__CFSpinUnlock(&__CFUniCharPrecompositionTableLock);
return;
}
__CFUniCharPrecompositionTableLength = *(((uint32_t *)bytes)++);
bmpMappingLength = *(((uint32_t *)bytes)++);
__CFUniCharPrecompSourceTable = (UTF32Char *)bytes;
__CFUniCharBMPPrecompDestinationTable = (uint16_t *)((intptr_t)bytes + (__CFUniCharPrecompositionTableLength * sizeof(UTF32Char) * 2));
__CFUniCharNonBMPPrecompDestinationTable = (uint32_t *)(((intptr_t)__CFUniCharBMPPrecompDestinationTable) + bmpMappingLength);
__CFUniCharNonBaseBitmapForBMP_P = CFUniCharGetBitmapPtrForPlane(kCFUniCharNonBaseCharacterSet, 0);
__CFUniCharCombiningClassForBMP = CFUniCharGetUnicodePropertyDataForPlane(kCFUniCharCombiningProperty, 0);
}
__CFSpinUnlock(&__CFUniCharPrecompositionTableLock);
}
#define __CFUniCharIsNonBaseCharacter __CFUniCharIsNonBaseCharacter_P
CF_INLINE bool __CFUniCharIsNonBaseCharacter(UTF32Char character) {
return CFUniCharIsMemberOfBitmap(character, (character < 0x10000 ? __CFUniCharNonBaseBitmapForBMP_P : CFUniCharGetBitmapPtrForPlane(kCFUniCharNonBaseCharacterSet, ((character >> 16) & 0xFF))));
}
#endif KERNEL
typedef struct {
UTF16Char _key;
UTF16Char _value;
} __CFUniCharPrecomposeBMPMappings;
static UTF16Char __CFUniCharGetMappedBMPValue(const __CFUniCharPrecomposeBMPMappings *theTable, uint32_t numElem, UTF16Char character) {
const __CFUniCharPrecomposeBMPMappings *p, *q, *divider;
if ((character < theTable[0]._key) || (character > theTable[numElem-1]._key)) {
return 0;
}
p = theTable;
q = p + (numElem-1);
while (p <= q) {
divider = p + ((q - p) >> 1);
if (character < divider->_key) { q = divider - 1; }
else if (character > divider->_key) { p = divider + 1; }
else { return divider->_value; }
}
return 0;
}
typedef struct {
UTF32Char _key;
uint32_t _value;
} __CFUniCharPrecomposeMappings;
static uint32_t __CFUniCharGetMappedValue_P(const __CFUniCharPrecomposeMappings *theTable, uint32_t numElem, UTF32Char character) {
const __CFUniCharPrecomposeMappings *p, *q, *divider;
if ((character < theTable[0]._key) || (character > theTable[numElem-1]._key)) {
return 0;
}
p = theTable;
q = p + (numElem-1);
while (p <= q) {
divider = p + ((q - p) >> 1);
if (character < divider->_key) { q = divider - 1; }
else if (character > divider->_key) { p = divider + 1; }
else { return divider->_value; }
}
return 0;
}
#if !KERNEL
__private_extern__
#endif !KERNEL
UTF32Char CFUniCharPrecomposeCharacter(UTF32Char base, UTF32Char combining) {
uint32_t value;
#if !KERNEL
if (NULL == __CFUniCharPrecompSourceTable) __CFUniCharLoadPrecompositionTable();
#endif !KERNEL
if (!(value = __CFUniCharGetMappedValue_P((const __CFUniCharPrecomposeMappings *)__CFUniCharPrecompSourceTable, __CFUniCharPrecompositionTableLength, combining))) return 0xFFFD;
#if !KERNEL
if (value & kCFUniCharNonBmpFlag) {
value = __CFUniCharGetMappedValue_P((const __CFUniCharPrecomposeMappings *)((uint32_t *)__CFUniCharNonBMPPrecompDestinationTable + (value & 0xFFFF)), (value >> 16) & 0x7FFF, base);
} else {
#endif !KERNEL
value = __CFUniCharGetMappedBMPValue((const __CFUniCharPrecomposeBMPMappings *)((uint32_t *)__CFUniCharBMPPrecompDestinationTable + (value & 0xFFFF)), (value >> 16), base);
#if !KERNEL
}
#endif !KERNEL
return (value ? value : 0xFFFD);
}
#define HANGUL_SBASE 0xAC00
#define HANGUL_LBASE 0x1100
#define HANGUL_VBASE 0x1161
#define HANGUL_TBASE 0x11A7
#define HANGUL_SCOUNT 11172
#define HANGUL_LCOUNT 19
#define HANGUL_VCOUNT 21
#define HANGUL_TCOUNT 28
#define HANGUL_NCOUNT (HANGUL_VCOUNT * HANGUL_TCOUNT)
CF_INLINE void __CFUniCharMoveBufferFromEnd(UTF16Char *convertedChars, uint32_t length, uint32_t delta) {
const UTF16Char *limit = convertedChars;
UTF16Char *dstP;
convertedChars += length;
dstP = convertedChars + delta;
while (convertedChars > limit) *(--dstP) = *(--convertedChars);
}
bool CFUniCharPrecompose(const UTF16Char *characters, uint32_t length, uint32_t *consumedLength, UTF16Char *precomposed, uint32_t maxLength, uint32_t *filledLength) {
UTF32Char currentChar = 0, lastChar = 0, precomposedChar = 0xFFFD;
uint32_t originalLength = length, usedLength = 0;
UTF16Char *currentBase = precomposed;
uint8_t currentClass, lastClass = 0;
bool currentBaseIsBMP = true;
bool isPrecomposed;
#if !KERNEL
if (NULL == __CFUniCharPrecompSourceTable) __CFUniCharLoadPrecompositionTable();
#endif !KERNEL
while (length > 0) {
currentChar = *(characters++);
--length;
if (CFUniCharIsSurrogateHighCharacter(currentChar) && (length > 0) && CFUniCharIsSurrogateLowCharacter(*characters)) {
currentChar = CFUniCharGetLongCharacterForSurrogatePair(currentChar, *(characters++));
--length;
}
if (lastChar && __CFUniCharIsNonBaseCharacter(currentChar)) {
isPrecomposed = (precomposedChar == 0xFFFD ? false : true);
if (isPrecomposed) lastChar = precomposedChar;
#if KERNEL
currentClass = __CFUniCharGetCombiningPriority(currentChar);
#else KERNEL
currentClass = (currentChar > 0xFFFF ? CFUniCharGetUnicodeProperty(currentChar, kCFUniCharCombiningProperty) : CFUniCharGetCombiningPropertyForCharacter(currentChar, __CFUniCharCombiningClassForBMP));
#endif KERNEL
if ((lastClass == 0) || (currentClass != lastClass)) {
if ((precomposedChar = CFUniCharPrecomposeCharacter(lastChar, currentChar)) == 0xFFFD) {
if (isPrecomposed) precomposedChar = lastChar;
lastClass = currentClass;
} else {
lastClass = 0;
continue;
}
}
if (currentChar > 0xFFFF) { usedLength += 2;
if (usedLength > maxLength) break;
currentChar -= 0x10000;
*(precomposed++) = (UTF16Char)((currentChar >> 10) + 0xD800UL);
*(precomposed++) = (UTF16Char)((currentChar & 0x3FF) + 0xDC00UL);
} else {
++usedLength;
if (usedLength > maxLength) break;
*(precomposed++) = (UTF16Char)currentChar;
}
} else {
if ((currentChar >= HANGUL_LBASE) && (currentChar < (HANGUL_LBASE + 0xFF))) { int8_t lIndex = currentChar - HANGUL_LBASE;
if ((length > 0) && (0 <= lIndex) && (lIndex <= HANGUL_LCOUNT)) {
int16_t vIndex = *characters - HANGUL_VBASE;
if ((vIndex >= 0) && (vIndex <= HANGUL_VCOUNT)) {
int16_t tIndex = 0;
++characters; --length;
if (length > 0) {
tIndex = *characters - HANGUL_TBASE;
if ((tIndex < 0) || (tIndex > HANGUL_TCOUNT)) {
tIndex = 0;
} else {
++characters; --length;
}
}
currentChar = (lIndex * HANGUL_VCOUNT + vIndex) * HANGUL_TCOUNT + tIndex + HANGUL_SBASE;
}
}
}
if (precomposedChar != 0xFFFD) {
if (currentBaseIsBMP) { if (lastChar > 0xFFFF) { --usedLength;
memmove(currentBase + 1, currentBase + 2, (precomposed - (currentBase + 2)) * sizeof(UTF16Char));
}
*(currentBase) = (UTF16Char)precomposedChar;
} else {
if (lastChar < 0x10000) { ++usedLength;
if (usedLength > maxLength) break;
__CFUniCharMoveBufferFromEnd(currentBase + 1, precomposed - (currentBase + 1), 1);
}
precomposedChar -= 0x10000;
*currentBase = (UTF16Char)((precomposedChar >> 10) + 0xD800UL);
*(currentBase + 1) = (UTF16Char)((precomposedChar & 0x3FF) + 0xDC00UL);
}
precomposedChar = 0xFFFD;
}
currentBase = precomposed;
lastChar = currentChar;
lastClass = 0;
if (currentChar > 0xFFFF) { usedLength += 2;
if (usedLength > maxLength) break;
currentChar -= 0x10000;
*(precomposed++) = (UTF16Char)((currentChar >> 10) + 0xD800UL);
*(precomposed++) = (UTF16Char)((currentChar & 0x3FF) + 0xDC00UL);
currentBaseIsBMP = false;
} else {
++usedLength;
if (usedLength > maxLength) break;
*(precomposed++) = (UTF16Char)currentChar;
currentBaseIsBMP = true;
}
}
}
if (precomposedChar != 0xFFFD) {
if (currentChar > 0xFFFF) { if (lastChar < 0x10000) { ++usedLength;
if (usedLength > maxLength) {
if (consumedLength) *consumedLength = originalLength - length;
if (filledLength) *filledLength = usedLength;
return false;
}
__CFUniCharMoveBufferFromEnd(currentBase + 1, precomposed - (currentBase + 1), 1);
}
precomposedChar -= 0x10000;
*currentBase = (UTF16Char)((precomposedChar >> 10) + 0xD800UL);
*(currentBase + 1) = (UTF16Char)((precomposedChar & 0x3FF) + 0xDC00UL);
} else {
if (lastChar > 0xFFFF) { --usedLength;
memmove(currentBase + 1, currentBase + 2, (precomposed - (currentBase + 2)) * sizeof(UTF16Char));
}
*(currentBase) = (UTF16Char)precomposedChar;
}
}
if (consumedLength) *consumedLength = originalLength - length;
if (filledLength) *filledLength = usedLength;
return true;
}