/* * Copyright (c) 2012 Apple Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * * This file contains Original Code and/or Modifications of Original Code * as defined in and that are subject to the Apple Public Source License * Version 2.0 (the 'License'). You may not use this file except in * compliance with the License. Please obtain a copy of the License at * http://www.opensource.apple.com/apsl/ and read it before using this * file. * * The Original Code and all software distributed under the License are * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. * Please see the License for the specific language governing rights and * limitations under the License. * * @APPLE_LICENSE_HEADER_END@ */ /* CFBurstTrie.c Copyright (c) 2008-2012, Apple Inc. All rights reserved. Responsibility: Jennifer Moore */ #include "CFInternal.h" #include "CFBurstTrie.h" #include "CFByteOrder.h" #include "CFNumber.h" #include #include #include #include #include #include #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_EMBEDDED_MINI #include #include #include #endif #include #include #if DEPLOYMENT_TARGET_WINDOWS #define open _NS_open #define statinfo _stat #define stat(x,y) _NS_stat(x,y) #define __builtin_memcmp(x, y, z) memcmp(x, y, z) #define __builtin_popcountll(x) popcountll(x) #define bzero(dst, size) ZeroMemory(dst, size) #define S_IWUSR 0 #define S_IRUSR 0 static int pwrite(int fd, const void *buf, size_t nbyte, off_t offset) { // Get where we are long pos = _tell(fd); // Move to new offset _lseek(fd, offset, SEEK_SET); // Write data int res = _write(fd, buf, nbyte); // Return to previous offset _lseek(fd, pos, SEEK_SET); return res; } #else #define statinfo stat #endif #if 0 #pragma mark Types and Utilities #endif #define MAX_STRING_ALLOCATION_SIZE 342 #define MAX_STRING_SIZE 1024 #define MAX_KEY_LENGTH MAX_STRING_SIZE * 4 #define CHARACTER_SET_SIZE 256 #define MAX_LIST_SIZE 256 // 64 #define MAX_BITMAP_SIZE 200 #define MAX_BUFFER_SIZE (4096<<2) #define NextTrie_GetPtr(p) (p & ((~(uintptr_t)0)-3)) #define NextTrie_GetKind(p) (p & 3) #define NextTrie_SetKind(p, kind) (p |= (3&kind)) #define DiskNextTrie_GetPtr(map,offset) (((uintptr_t)map) + (uintptr_t)(offset & ((~(uintptr_t)0)-3))) #define DiskNextTrie_GetKind(p) (p & 3) #define DiskNextTrie_SetKind(p, kind) (p |= (3&kind)) // Use this macro to avoid forgetting to check the pointer before assigning value to it. #define SetPayload(pointer, value) do { if (pointer) *pointer = value; } while (0) enum { Nothing = 0, TrieKind = 1, ListKind = 2, CompactTrieKind = 3 }; typedef enum { FailedInsert = 0, NewTerm = 1, ExistingTerm = 2 } CFBTInsertCode; #pragma pack (1) typedef uintptr_t NextTrie; typedef struct _TrieLevel { NextTrie slots[CHARACTER_SET_SIZE]; uint32_t weight; uint32_t payload; } TrieLevel; typedef TrieLevel *TrieLevelRef; typedef struct _MapTrieLevel { uint32_t slots[CHARACTER_SET_SIZE]; uint32_t payload; } MapTrieLevel; typedef MapTrieLevel *MapTrieLevelRef; typedef struct _CompactMapTrieLevel { uint64_t bitmap[CHARACTER_SET_SIZE / 64]; uint32_t payload; uint32_t slots[]; } CompactMapTrieLevel; typedef CompactMapTrieLevel *CompactMapTrieLevelRef; typedef struct _ListNode { struct _ListNode *next; uint32_t weight; uint32_t payload; uint16_t length; UInt8 string[]; }* ListNodeRef; typedef struct _Page { uint32_t length; char data[]; } Page; typedef struct _PageEntryPacked { uint8_t pfxLen; uint16_t strlen; uint32_t payload; UInt8 string[]; } PageEntryPacked; typedef struct _PageEntry { uint16_t strlen; uint32_t payload; UInt8 string[]; } PageEntry; typedef struct _TrieHeader { uint32_t signature; uint32_t rootOffset; uint32_t count; uint32_t size; uint32_t flags; uint64_t reserved[16]; } TrieHeader; typedef struct _TrieCursor { uint64_t signature; uint64_t counter; NextTrie next; uint32_t keylen; uint32_t prefixlen; const uint8_t *prefix; uint8_t key[MAX_KEY_LENGTH]; } TrieCursor; typedef struct _MapCursor { uint64_t signature; TrieHeader *header; uint32_t next; uint32_t prefixlen; uint32_t keylen; const uint8_t *prefix; uint8_t key[MAX_STRING_SIZE*4]; } MapCursor; typedef struct _CompactMapCursor { uint32_t next; uint32_t entryOffsetInPage; uint32_t offsetInEntry; uint32_t payload; // On a page, the first entry could has 0 strlen. So we need this variable to tell us whether // the cursor is merely pointing at the beginning of the page, or the first entry. // For example, if the trie contains "ab" and "abc", where "a" is stored on an array level, // while "b" and "bc" are stored on a page level. If we creat a cursor for string "a", this cursor // will point at the beginning of the page, but not at any particular key. The both entryOffsetInPage and // offsetInEntry fields of the cursor are set to 0 in this case. Now if we add "a" to the // trie. the page level will actually contains three entries. The first entry corresponds to string "a". // That entry has 0 strlen value. If we creat a cursor for string "a" again, this cursor will // point at the first entry on the page. But the entryOffsetInPage and offsetInEntry fields are still // set to 0s. So we need an additional variable to make distinction between these two situations. BOOL isOnPage; } CompactMapCursor; typedef struct _CompactMapCursor *MapCursorRef; enum { _kCFBurstTrieCursorTrieType = 0, _kCFBurstTrieCursorMapType }; typedef struct _CFBurstTrieCursor { CompactMapCursor mapCursor; CFIndex cursorType; CFBurstTrieRef trie; } _CFBurstTrieCursor; // ** Legacy typedef struct _DiskTrieLevel { uint32_t slots[CHARACTER_SET_SIZE]; uint32_t weight; uint32_t payload; } DiskTrieLevel; typedef DiskTrieLevel *DiskTrieLevelRef; typedef struct _CompactDiskTrieLevel { uint64_t bitmap[CHARACTER_SET_SIZE / 64]; // CHARACTER_SET_SIZE / 64bits per word uint32_t weight; uint32_t payload; uint32_t slots[]; } CompactDiskTrieLevel; typedef CompactDiskTrieLevel *CompactDiskTrieLevelRef; typedef struct _StringPage { uint32_t length; char data[]; } StringPage; typedef struct _StringPageEntryPacked { uint8_t pfxLen; uint16_t strlen; // make uint8_t if possible uint32_t payload; char string[]; } StringPageEntryPacked; typedef struct _StringPageEntry { uint16_t strlen; // make uint8_t if possible uint32_t payload; char string[]; } StringPageEntry; typedef struct _fileHeader { uint32_t signature; uint32_t rootOffset; uint32_t count; uint32_t size; uint32_t flags; } fileHeader; // ** #pragma pack() struct _CFBurstTrie { union { TrieLevel root; DiskTrieLevel diskRoot; MapTrieLevel maproot; }; char *mapBase; uint32_t mapSize; uint32_t mapOffset; uint32_t cflags; uint32_t count; uint32_t containerSize; int retain; #if DEPLOYMENT_TARGET_WINDOWS HANDLE mapHandle; HANDLE mappedFileHandle; #endif }; #if 0 #pragma mark - #pragma mark Forward declarations #endif typedef struct _TraverseContext { void *context; void (*callback)(void*, const UInt8*, uint32_t, uint32_t); } TraverseContext; bool foundKey(void *context, const uint8_t *key, uint32_t payload, bool exact) { if (context != NULL) { TraverseContext *ctx = (TraverseContext *)context; if (ctx->context && ctx->callback) { ctx->callback(ctx->context, key, 1, payload); } } return false; } void CFBurstTrieTraverseWithCursor(CFBurstTrieRef trie, const uint8_t *prefix, uint32_t prefixLen, void **cursor, void *ctx, bool (*callback)(void *, const uint8_t *, uint32_t, bool)); static CFBTInsertCode addCFBurstTrieLevel(CFBurstTrieRef trie, TrieLevelRef root, const uint8_t *key, uint32_t keylen, uint32_t weight, uint32_t payload); static void findCFBurstTrieLevel(CFBurstTrieRef trie, TrieCursor *cursor, bool exactmatch, void *ctx, bool (*callback)(void*, const uint8_t*, uint32_t, bool)); static void findCFBurstTrieMappedLevel(CFBurstTrieRef trie, MapCursor *cursor, bool exactmatch, void *ctx, bool (*callback)(void*, const uint8_t*, uint32_t, bool)); static void traverseCFBurstTrieLevel(CFBurstTrieRef trie, TrieLevelRef root, TrieCursor *cursor, bool exactmatch, void *ctx, bool (*callback)(void *, const uint8_t *, uint32_t, bool)); static void traverseCFBurstTrieMappedLevel(CFBurstTrieRef trie, MapTrieLevelRef root, MapCursor *cursor, bool exactmatch, void *ctx, bool (*callback)(void *, const uint8_t *, uint32_t, bool)); static void traverseCFBurstTrieCompactMappedLevel(CFBurstTrieRef trie, CompactMapTrieLevelRef root, MapCursor *cursor, bool exactmatch, void *ctx, bool (*callback)(void *, const uint8_t *, uint32_t, bool)); static void traverseCFBurstTrieWithCursor(CFBurstTrieRef trie, const uint8_t *prefix, uint32_t prefixLen, void **cursor, bool exactmatch, void *ctx, bool (*callback)(void *, const uint8_t *, uint32_t, bool)); static size_t serializeCFBurstTrie(CFBurstTrieRef trie, size_t start_offset, int fd); static Boolean burstTrieMappedFind(DiskTrieLevelRef trie, char *map, const UInt8 *key, uint32_t length, uint32_t *payload, bool prefix); static Boolean burstTrieMappedPageFind(StringPage *page, const UInt8 *key, uint32_t length, uint32_t *payload, bool prefix); static Boolean burstTrieCompactTrieMappedFind(CompactDiskTrieLevelRef trie, char *map, const UInt8 *key, uint32_t length, uint32_t *payload, bool prefix); static void destroyCFBurstTrie(CFBurstTrieRef trie); static void finalizeCFBurstTrie(TrieLevelRef trie); static void finalizeCFBurstTrieList(ListNodeRef node); static int nodeWeightCompare(const void *a, const void *b); static int nodeStringCompare(const void *a, const void *b); bool foundKey(void *context, const uint8_t *key, uint32_t payload, bool exact); bool containsKey(void *context, const uint8_t *key, uint32_t payload, bool exact); static CFIndex burstTrieConvertCharactersToUTF8(UniChar *chars, CFIndex numChars, UInt8 *buffer); static Boolean advanceMapCursor(CFBurstTrieRef trie, CompactMapCursor *cursor, const UInt8* bytes, CFIndex length); static Boolean getMapCursorPayload(CFBurstTrieRef trie, const CompactMapCursor *cursor, uint32_t *payload); static void copyMapCursor(const CompactMapCursor *source, CompactMapCursor* destination); static Boolean areMapCursorsEqual(const CompactMapCursor *lhs, const CompactMapCursor *rhs); static void traverseFromMapCursor(CFBurstTrieRef trie, CompactMapCursor *cursor, UInt8* bytes, uint32_t capacity, uint32_t length, Boolean *stop, void *ctx, CFBurstTrieTraversalCallback callback); static Boolean getMapCursorPayloadFromPackedPageEntry(PageEntryPacked *entry, const CompactMapCursor *cursor, uint32_t *payload); static Boolean getMapCursorPayloadFromPageEntry(PageEntry *entry, const CompactMapCursor *cursor, uint32_t *payload); #if 0 #pragma mark - #pragma mark Core Foundation boilerplate #endif static const void *_CFBurstTrieRetainCallback(CFAllocatorRef allocator, const void *value) { CFBurstTrieRetain((CFBurstTrieRef)value); return value; } static void _CFBurstTrieReleaseCallback(CFAllocatorRef allocator, const void *value) { CFBurstTrieRelease((CFBurstTrieRef)value); } const CFDictionaryValueCallBacks kCFBurstTrieValueCallbacks = {0, _CFBurstTrieRetainCallback, _CFBurstTrieReleaseCallback, NULL, NULL}; #if 0 #pragma mark - #pragma mark Public Interface #endif CFBurstTrieRef CFBurstTrieCreateWithOptions(CFDictionaryRef options) { CFBurstTrieRef trie = NULL; trie = (CFBurstTrieRef) calloc(1, sizeof(struct _CFBurstTrie)); trie->containerSize = MAX_LIST_SIZE; CFNumberRef valueAsCFNumber; if (CFDictionaryGetValueIfPresent(options, kCFBurstTrieCreationOptionNameContainerSize, (const void **)&valueAsCFNumber)) { int value; CFNumberGetValue(valueAsCFNumber, kCFNumberIntType, &value); trie->containerSize = value > 2 && value < 4096 ? value : MAX_LIST_SIZE; } trie->retain = 1; return trie; } CFBurstTrieRef CFBurstTrieCreate() { CFBurstTrieRef trie = NULL; int listSize = MAX_LIST_SIZE; CFNumberRef value = CFNumberCreate(kCFAllocatorDefault, kCFNumberIntType, &listSize); CFMutableDictionaryRef options = CFDictionaryCreateMutable(kCFAllocatorDefault, 1, NULL, NULL); CFDictionarySetValue(options, kCFBurstTrieCreationOptionNameContainerSize, value); trie = CFBurstTrieCreateWithOptions(options); CFRelease(value); CFRelease(options); return trie; } CFBurstTrieRef CFBurstTrieCreateFromFile(CFStringRef path) { struct statinfo sb; char filename[PATH_MAX]; int fd; /* Check valid path name */ if (!CFStringGetCString(path, filename, PATH_MAX, kCFStringEncodingUTF8)) return NULL; /* Check if file exists */ if (stat(filename, &sb) != 0) return NULL; /* Check if file can be opened */ if ((fd=open(filename, CF_OPENFLGS|O_RDONLY)) < 0) return NULL; #if DEPLOYMENT_TARGET_WINDOWS HANDLE mappedFileHandle = (HANDLE)_get_osfhandle(fd); if (!mappedFileHandle) return NULL; HANDLE mapHandle = CreateFileMapping(mappedFileHandle, NULL, PAGE_READONLY, 0, 0, NULL); if (!mapHandle) return NULL; char *map = (char *)MapViewOfFile(mapHandle, FILE_MAP_READ, 0, 0, sb.st_size); if (!map) return NULL; #else char *map = mmap(0, sb.st_size, PROT_READ, MAP_FILE|MAP_SHARED, fd, 0); #endif CFBurstTrieRef trie = NULL; TrieHeader *header = (TrieHeader *)map; if (((uint32_t*)map)[0] == 0xbabeface) { trie = (CFBurstTrieRef) calloc(1, sizeof(struct _CFBurstTrie)); trie->mapBase = map; trie->mapSize = CFSwapInt32LittleToHost(sb.st_size); trie->mapOffset = CFSwapInt32LittleToHost(((fileHeader*)trie->mapBase)->rootOffset); trie->cflags = CFSwapInt32LittleToHost(((fileHeader*)trie->mapBase)->flags); trie->count = CFSwapInt32LittleToHost(((fileHeader*)trie->mapBase)->count); trie->retain = 1; #if DEPLOYMENT_TARGET_WINDOWS trie->mappedFileHandle = mappedFileHandle; trie->mapHandle = mapHandle; #else // On Windows, the file being mapped must stay open as long as the map exists. Don't close it early. Other platforms close it here. close(fd); #endif } else if (header->signature == 0xcafebabe || header->signature == 0x0ddba11) { trie = (CFBurstTrieRef) calloc(1, sizeof(struct _CFBurstTrie)); trie->mapBase = map; trie->mapSize = CFSwapInt32LittleToHost(sb.st_size); trie->cflags = CFSwapInt32LittleToHost(header->flags); trie->count = CFSwapInt32LittleToHost(header->count); trie->retain = 1; #if DEPLOYMENT_TARGET_WINDOWS trie->mappedFileHandle = mappedFileHandle; trie->mapHandle = mapHandle; #else // On Windows, the file being mapped must stay open as long as the map exists. Don't close it early. Other platforms close it here. close(fd); #endif } return trie; } CFBurstTrieRef CFBurstTrieCreateFromMapBytes(char *mapBase) { CFBurstTrieRef trie = NULL; TrieHeader *header = (TrieHeader *)mapBase; if (mapBase && ((uint32_t*)mapBase)[0] == 0xbabeface) { trie = (CFBurstTrieRef) malloc(sizeof(struct _CFBurstTrie)); trie->mapBase = mapBase; trie->mapSize = CFSwapInt32LittleToHost(((fileHeader*)trie->mapBase)->size); trie->mapOffset = CFSwapInt32LittleToHost(((fileHeader*)trie->mapBase)->rootOffset); trie->cflags = CFSwapInt32LittleToHost(((fileHeader*)trie->mapBase)->flags); trie->count = CFSwapInt32LittleToHost(((fileHeader*)trie->mapBase)->count); trie->retain = 1; } else if (mapBase && (header->signature == 0xcafebabe || header->signature == 0x0ddba11)) { trie = (CFBurstTrieRef) malloc(sizeof(struct _CFBurstTrie)); trie->mapBase = mapBase; trie->mapSize = CFSwapInt32LittleToHost(header->size); trie->cflags = CFSwapInt32LittleToHost(header->flags); trie->count = CFSwapInt32LittleToHost(header->count); trie->retain = 1; } return trie; } Boolean CFBurstTrieInsert(CFBurstTrieRef trie, CFStringRef term, CFRange termRange, CFIndex payload) { return CFBurstTrieAddWithWeight(trie, term, termRange, 1, (uint32_t)payload); } Boolean CFBurstTrieAdd(CFBurstTrieRef trie, CFStringRef term, CFRange termRange, uint32_t payload) { return CFBurstTrieAddWithWeight(trie, term, termRange, 1, payload); } Boolean CFBurstTrieInsertCharacters(CFBurstTrieRef trie, UniChar *chars, CFIndex numChars, CFIndex payload) { return CFBurstTrieAddCharactersWithWeight(trie, chars, numChars, 1, (uint32_t)payload); } Boolean CFBurstTrieAddCharacters(CFBurstTrieRef trie, UniChar *chars, CFIndex numChars, uint32_t payload) { return CFBurstTrieAddCharactersWithWeight(trie, chars, numChars, 1, payload); } Boolean CFBurstTrieInsertUTF8String(CFBurstTrieRef trie, UInt8 *chars, CFIndex numChars, CFIndex payload) { return CFBurstTrieAddUTF8StringWithWeight(trie, chars, numChars, 1, (uint32_t)payload); } Boolean CFBurstTrieAddUTF8String(CFBurstTrieRef trie, UInt8 *chars, CFIndex numChars, uint32_t payload) { return CFBurstTrieAddUTF8StringWithWeight(trie, chars, numChars, 1, payload); } Boolean CFBurstTrieInsertWithWeight(CFBurstTrieRef trie, CFStringRef term, CFRange termRange, CFIndex weight, CFIndex payload) { return CFBurstTrieAddWithWeight(trie, term, termRange, weight, (uint32_t)payload); } Boolean CFBurstTrieAddWithWeight(CFBurstTrieRef trie, CFStringRef term, CFRange termRange, uint32_t weight, uint32_t payload) { Boolean success = false; CFIndex size = MAX_STRING_ALLOCATION_SIZE; CFIndex bytesize = termRange.length * 4; //** 4-byte max character size if (!trie->mapBase && termRange.length < MAX_STRING_SIZE && payload > 0) { CFIndex length; UInt8 buffer[MAX_STRING_ALLOCATION_SIZE + 1]; UInt8 *key = buffer; if (bytesize >= size) { size = bytesize; key = (UInt8 *) malloc(sizeof(UInt8) * size + 1); } CFStringGetBytes(term, termRange, kCFStringEncodingUTF8, (UInt8)'-', (Boolean)0, key, size, &length); key[length] = 0; success = CFBurstTrieAddUTF8StringWithWeight(trie, key, length, weight, payload); if (buffer != key) free(key); } return success; } Boolean CFBurstTrieInsertCharactersWithWeight(CFBurstTrieRef trie, UniChar *chars, CFIndex numChars, CFIndex weight, CFIndex payload) { return CFBurstTrieAddCharactersWithWeight(trie, chars, numChars, weight, (uint32_t)payload); } Boolean CFBurstTrieAddCharactersWithWeight(CFBurstTrieRef trie, UniChar *chars, CFIndex numChars, uint32_t weight, uint32_t payload) { Boolean success = false; CFIndex size = MAX_STRING_ALLOCATION_SIZE; CFIndex bytesize = numChars * 4; //** 4-byte max character size if (!trie->mapBase && numChars < MAX_STRING_SIZE && payload > 0) { CFIndex length; UInt8 buffer[MAX_STRING_ALLOCATION_SIZE + 1]; UInt8 *key = buffer; if (bytesize >= size) { size = bytesize; key = (UInt8 *) malloc(sizeof(UInt8) * size + 1); } length = burstTrieConvertCharactersToUTF8(chars, numChars, key); key[length] = 0; success = CFBurstTrieAddUTF8StringWithWeight(trie, key, length, weight, payload); if (buffer != key) free(key); } return success; } Boolean CFBurstTrieInsertUTF8StringWithWeight(CFBurstTrieRef trie, UInt8 *chars, CFIndex numChars, CFIndex weight, CFIndex payload) { return CFBurstTrieAddUTF8StringWithWeight(trie, chars, numChars, weight, (uint32_t)payload); } Boolean CFBurstTrieAddUTF8StringWithWeight(CFBurstTrieRef trie, UInt8 *chars, CFIndex numChars, uint32_t weight, uint32_t payload) { CFBTInsertCode code = FailedInsert; if (!trie->mapBase && numChars < MAX_STRING_SIZE*4 && payload > 0) { code = addCFBurstTrieLevel(trie, &trie->root, chars, numChars, weight, payload); if (code == NewTerm) trie->count++; } return code > FailedInsert; } Boolean CFBurstTrieFind(CFBurstTrieRef trie, CFStringRef term, CFRange termRange, CFIndex *payload) { uint32_t p; if (CFBurstTrieContains(trie, term, termRange, &p)) { SetPayload(payload, p); return true; } return false; } Boolean CFBurstTrieContains(CFBurstTrieRef trie, CFStringRef term, CFRange termRange, uint32_t *payload) { Boolean success = false; CFIndex size = MAX_STRING_ALLOCATION_SIZE; CFIndex bytesize = termRange.length * 4; //** 4-byte max character size if (termRange.length < MAX_STRING_SIZE) { CFIndex length; UInt8 buffer[MAX_STRING_ALLOCATION_SIZE+1]; UInt8 *key = buffer; if (bytesize >= size) { size = bytesize; key = (UInt8 *) malloc(sizeof(UInt8) * size + 1); } CFStringGetBytes(term, termRange, kCFStringEncodingUTF8, (UInt8)'-', (Boolean)0, key, size, &length); key[length] = 0; success = CFBurstTrieContainsUTF8String(trie, key, length, payload); if (buffer != key) free(key); } return success; } Boolean CFBurstTrieFindCharacters(CFBurstTrieRef trie, UniChar *chars, CFIndex numChars, CFIndex *payload) { uint32_t p; if (CFBurstTrieContainsCharacters(trie, chars, numChars, &p)) { SetPayload(payload, p); return true; } return false; } Boolean CFBurstTrieContainsCharacters(CFBurstTrieRef trie, UniChar *chars, CFIndex numChars, uint32_t *payload) { Boolean success = false; CFIndex size = MAX_STRING_ALLOCATION_SIZE; CFIndex bytesize = numChars * 4; //** 4-byte max character size if (numChars < MAX_STRING_SIZE) { CFIndex length; UInt8 buffer[MAX_STRING_ALLOCATION_SIZE + 1]; UInt8 *key = buffer; if (bytesize >= size) { size = bytesize; key = (UInt8 *) malloc(sizeof(UInt8) * size + 1); } length = burstTrieConvertCharactersToUTF8(chars, numChars, key); key[length] = 0; success = CFBurstTrieContainsUTF8String(trie, key, length, payload); if (buffer != key) free(key); } return success; } Boolean CFBurstTrieFindUTF8String(CFBurstTrieRef trie, UInt8 *key, CFIndex length, CFIndex *payload) { uint32_t p; if (CFBurstTrieContainsUTF8String(trie, key, length, &p)) { SetPayload(payload, p); return true; } return false; } Boolean CFBurstTrieContainsUTF8String(CFBurstTrieRef trie, UInt8 *key, CFIndex length, uint32_t *payload) { Boolean success = false; if (length < MAX_STRING_SIZE) { if (trie->mapBase && ((fileHeader *)trie->mapBase)->signature == 0xbabeface) { bool prefix = (trie->cflags & kCFBurstTriePrefixCompression); success = burstTrieMappedFind((DiskTrieLevelRef)(trie->mapBase+CFSwapInt32LittleToHost((((uint32_t*)trie->mapBase)[1]))), trie->mapBase, key, length, payload, prefix); } else if (trie->mapBase && trie->cflags & (kCFBurstTriePrefixCompression | kCFBurstTrieSortByKey)) { _CFBurstTrieCursor cursor; if (!CFBurstTrieSetCursorForBytes(trie, &cursor, key, length)) return FALSE; return CFBurstTrieCursorGetPayload(&cursor, payload); } else { uint32_t found = 0; void *cursor = 0; traverseCFBurstTrieWithCursor(trie, key, length, &cursor, true, &found, containsKey); if (found) SetPayload(payload, found); success = found > 0; } } return success; } Boolean CFBurstTrieSerialize(CFBurstTrieRef trie, CFStringRef path, CFBurstTrieOpts opts) { Boolean success = false; if (trie->mapBase) { return success; } else { int fd; char filename[PATH_MAX]; /* Check valid path name */ if (!CFStringGetCString(path, filename, PATH_MAX, kCFStringEncodingUTF8)) return success; /* Check if file can be opened */ if ((fd=open(filename, CF_OPENFLGS|O_RDWR|O_CREAT|O_TRUNC, S_IRUSR|S_IWUSR)) < 0) return success; if (CFBurstTrieSerializeWithFileDescriptor(trie, fd, opts)) success = true; close(fd); } return success; } Boolean CFBurstTrieSerializeWithFileDescriptor(CFBurstTrieRef trie, int fd, CFBurstTrieOpts opts) { Boolean success = false; if (!trie->mapBase && fd >= 0) { off_t start_offset = lseek(fd, 0, SEEK_END); trie->cflags = opts; trie->mapSize = serializeCFBurstTrie(trie, start_offset, fd); #if DEPLOYMENT_TARGET_WINDOWS HANDLE mappedFileHandle = (HANDLE)_get_osfhandle(fd); // We need to make sure we have our own handle to keep this file open as long as the mmap lasts DuplicateHandle(GetCurrentProcess(), mappedFileHandle, GetCurrentProcess(), &mappedFileHandle, 0, 0, DUPLICATE_SAME_ACCESS); HANDLE mapHandle = CreateFileMapping(mappedFileHandle, NULL, PAGE_READONLY, 0, 0, NULL); if (!mapHandle) return NULL; char *map = (char *)MapViewOfFile(mapHandle, FILE_MAP_READ, 0, start_offset, trie->mapSize); if (!map) return NULL; trie->mapBase = map; trie->mapHandle = mapHandle; trie->mappedFileHandle = mappedFileHandle; #else trie->mapBase = mmap(0, trie->mapSize, PROT_READ, MAP_FILE|MAP_SHARED, fd, start_offset); #endif success = true; } return success; } void CFBurstTrieTraverse(CFBurstTrieRef trie, void *ctx, void (*callback)(void*, const UInt8*, uint32_t, uint32_t)) { TrieHeader *header = (TrieHeader *)trie->mapBase; if (!trie->mapBase || (header->signature == 0xcafebabe || header->signature == 0x0ddba11)) { void *cursor = 0; TraverseContext context; context.context = ctx; context.callback = callback; traverseCFBurstTrieWithCursor(trie, (const uint8_t *)"", 0, &cursor, false, &context, foundKey); } } void CFBurstTrieTraverseWithCursor(CFBurstTrieRef trie, const uint8_t *prefix, uint32_t prefixLen, void **cursor, void *ctx, bool (*callback)(void *, const uint8_t *, uint32_t, bool)) { traverseCFBurstTrieWithCursor(trie, prefix, prefixLen, cursor, false, ctx, callback); } void CFBurstTriePrint(CFBurstTrieRef trie) { } CFIndex CFBurstTrieGetCount(CFBurstTrieRef trie) { return trie->count; } CFBurstTrieRef CFBurstTrieRetain(CFBurstTrieRef trie) { trie->retain++; return trie; } void CFBurstTrieRelease(CFBurstTrieRef trie) { trie->retain--; if (trie->retain == 0) destroyCFBurstTrie(trie); return; } Boolean CFBurstTrieSetCursorForBytes(CFBurstTrieRef trie, CFBurstTrieCursorRef cursor, const UInt8* bytes, CFIndex length) { if (!trie->mapBase || !(trie->cflags & (kCFBurstTriePrefixCompression | kCFBurstTrieSortByKey))) { //fprintf(stderr, "CFBurstTrieCreateCursorForBytes() only support file based trie in prefix compression format.\n"); return FALSE; } TrieHeader *header = (TrieHeader*)trie->mapBase; if (length < 0 || !trie) return FALSE; cursor->trie = trie; if (trie->mapBase) { cursor->cursorType = _kCFBurstTrieCursorMapType; cursor->mapCursor.next = header->rootOffset; cursor->mapCursor.isOnPage = FALSE; cursor->mapCursor.entryOffsetInPage = 0; cursor->mapCursor.offsetInEntry = 0; cursor->mapCursor.payload = 0; } else assert(false); if (!bytes || length == 0) return TRUE; return CFBurstTrieCursorAdvanceForBytes(cursor, bytes, length); } CFBurstTrieCursorRef CFBurstTrieCreateCursorForBytes(CFBurstTrieRef trie, const UInt8* bytes, CFIndex length) { CFBurstTrieCursorRef cursor = (CFBurstTrieCursorRef)calloc(sizeof(_CFBurstTrieCursor), 1); if (!CFBurstTrieSetCursorForBytes(trie, cursor, bytes, length)) { CFBurstTrieCursorRelease(cursor); return NULL; } return cursor; } CFBurstTrieCursorRef CFBurstTrieCursorCreateByCopy(CFBurstTrieCursorRef cursor) { if (!cursor) return NULL; CFBurstTrieCursorRef newCursor = (CFBurstTrieCursorRef)calloc(sizeof(_CFBurstTrieCursor), 1); switch (cursor->cursorType) { case _kCFBurstTrieCursorMapType: copyMapCursor(&cursor->mapCursor, &newCursor->mapCursor); break; case _kCFBurstTrieCursorTrieType: assert(false); break; } newCursor->cursorType = cursor->cursorType; newCursor->trie = cursor->trie; return newCursor; } Boolean CFBurstTrieCursorIsEqual(CFBurstTrieCursorRef lhs, CFBurstTrieCursorRef rhs) { if (lhs->trie != rhs->trie || lhs->cursorType != rhs->cursorType) return FALSE; if (lhs->cursorType == _kCFBurstTrieCursorMapType) return areMapCursorsEqual(&lhs->mapCursor, &rhs->mapCursor); else return FALSE; } Boolean CFBurstTrieCursorAdvanceForBytes(CFBurstTrieCursorRef cursor, const UInt8* bytes, CFIndex length) { switch (cursor->cursorType) { case _kCFBurstTrieCursorMapType: { CompactMapCursor tempCursor; copyMapCursor(&cursor->mapCursor, &tempCursor); if (advanceMapCursor(cursor->trie, (CompactMapCursor*)&cursor->mapCursor, bytes, length)) return TRUE; else { copyMapCursor(&tempCursor, &cursor->mapCursor); return FALSE; } } case _kCFBurstTrieCursorTrieType: return FALSE; } return FALSE; } Boolean CFBurstTrieCursorGetPayload(CFBurstTrieCursorRef cursor, uint32_t *payload) { switch (cursor->cursorType) { case _kCFBurstTrieCursorMapType: return getMapCursorPayload(cursor->trie, (CompactMapCursor*)&cursor->mapCursor, payload); case _kCFBurstTrieCursorTrieType: return FALSE; } return FALSE; } void CFBurstTrieCursorRelease(CFBurstTrieCursorRef cursor) { if (!cursor) return; free(cursor); } void CFBurstTrieTraverseFromCursor(CFBurstTrieCursorRef cursor, void *ctx, CFBurstTrieTraversalCallback callback) { if (!cursor) return; UInt8 *bytes = (UInt8*)calloc(1, MAX_KEY_LENGTH); uint32_t capacity = MAX_KEY_LENGTH; uint32_t length = 0; Boolean stop = FALSE; switch (cursor->cursorType) { case _kCFBurstTrieCursorMapType: { CompactMapCursor tempCursor; copyMapCursor(&cursor->mapCursor, &tempCursor); traverseFromMapCursor(cursor->trie, &tempCursor, bytes, capacity,length, &stop, ctx, callback); break; } case _kCFBurstTrieCursorTrieType: break; } free(bytes); } #if 0 #pragma mark - #pragma mark Insertion #endif static ListNodeRef makeCFBurstTrieListNode(const uint8_t *key, uint32_t keylen, uint32_t weight, uint32_t payload) { ListNodeRef node = (ListNodeRef) calloc(1, sizeof(*node) + keylen + 1); memcpy(node->string, key, keylen); node->string[keylen] = 0; node->next = 0; node->length = keylen; node->weight = weight; node->payload = payload; return node; } static void addCFBurstTrieBurstLevel(CFBurstTrieRef trie, TrieLevelRef root, const uint8_t *key, uint32_t keylen, uint32_t weight, uint32_t payload) { if (keylen) { NextTrie next = root->slots[*key]; ListNodeRef newNode = makeCFBurstTrieListNode(key+1, keylen-1, weight, payload); newNode->weight = weight; newNode->next = (ListNodeRef) NextTrie_GetPtr(next); next = (uintptr_t) newNode; NextTrie_SetKind(next, ListKind); root->slots[*key] = next; } else { // ** Handle payload. root->weight = weight; root->payload = payload; } } static TrieLevelRef burstCFBurstTrieLevel(CFBurstTrieRef trie, ListNodeRef list, uint32_t listCount) { TrieLevelRef newLevel = (TrieLevelRef) calloc(1, sizeof(struct _TrieLevel)); while (list) { addCFBurstTrieBurstLevel(trie, newLevel, list->string, list->length, list->weight, list->payload); ListNodeRef temp = list; list = list->next; free(temp); } return newLevel; } static CFBTInsertCode addCFBurstTrieListNode(CFBurstTrieRef trie, ListNodeRef list, const uint8_t *key, uint32_t keylen, uint32_t weight, uint32_t payload, uint32_t *listCount) { CFBTInsertCode code = FailedInsert; uint32_t count = 1; ListNodeRef last = list; while (list) { if (list->length == keylen && memcmp(key, list->string, keylen) == 0) { list->weight += weight; list->payload = payload; code = ExistingTerm; break; } else { count++; last = list; list = list->next; } } if (!list) { last->next = makeCFBurstTrieListNode(key, keylen, weight, payload); code = NewTerm; } *listCount = count; return code; } static CFBTInsertCode addCFBurstTrieLevel(CFBurstTrieRef trie, TrieLevelRef root, const uint8_t *key, uint32_t keylen, uint32_t weight, uint32_t payload) { CFBTInsertCode code = FailedInsert; if (keylen) { NextTrie next = root->slots[*key]; if (NextTrie_GetKind(next) == TrieKind) { TrieLevelRef nextLevel = (TrieLevelRef) NextTrie_GetPtr(next); code = addCFBurstTrieLevel(trie, nextLevel, key+1, keylen-1, weight, payload); } else { if (NextTrie_GetKind(next) == ListKind) { uint32_t listCount; ListNodeRef listNode = (ListNodeRef) NextTrie_GetPtr(next); code = addCFBurstTrieListNode(trie, listNode, key+1, keylen-1, weight, payload, &listCount); if (listCount > trie->containerSize) { next = (uintptr_t) burstCFBurstTrieLevel(trie, listNode, listCount); NextTrie_SetKind(next, TrieKind); } } else { // ** Make a new list node next = (uintptr_t) makeCFBurstTrieListNode(key+1, keylen-1, weight, payload); NextTrie_SetKind(next, ListKind); code = NewTerm; } root->slots[*key] = next; } } else { // ** Handle payload if (!root->weight) code = NewTerm; else code = ExistingTerm; root->weight += weight; root->payload = payload; } return code; } #if 0 #pragma mark - #pragma mark Searching #endif static void findCFBurstTrieList(CFBurstTrieRef trie, TrieCursor *cursor, void *ctx, bool (*callback)(void*, const uint8_t*, uint32_t, bool)) { ListNodeRef list = (ListNodeRef)NextTrie_GetPtr(cursor->next); int len = cursor->prefixlen-cursor->keylen; len = len <= 0 ? 0 : len; while (list) { int lencompare = list->length-len; if (list->length >= len && (len == 0 || memcmp(list->string, cursor->prefix+cursor->keylen, len) == 0)) { memcpy(cursor->key+cursor->keylen, list->string, list->length); cursor->key[cursor->keylen+list->length] = 0; cursor->next = (NextTrie)list; if (list->payload && callback(ctx, cursor->key, list->payload, lencompare==0)) return; } list = list->next; } } static void findCFBurstTrieMappedPage(CFBurstTrieRef trie, MapCursor *cursor, void *ctx, bool (*callback)(void*, const uint8_t*, uint32_t, bool)) { Page *page = (Page *)DiskNextTrie_GetPtr(trie->mapBase, cursor->next); uint32_t end = page->length; uint32_t cur = 0; int len = cursor->prefixlen-cursor->keylen; len = len <= 0 ? 0 : len; if (trie->cflags & kCFBurstTriePrefixCompression) { uint8_t pfx[CHARACTER_SET_SIZE]; PageEntryPacked *lastEntry = 0; while (cur < end) { PageEntryPacked *entry = (PageEntryPacked *)&page->data[cur]; int lencompare = (entry->strlen+entry->pfxLen)-len; if (lastEntry && entry->pfxLen>lastEntry->pfxLen) memcpy(pfx+lastEntry->pfxLen, lastEntry->string, entry->pfxLen-lastEntry->pfxLen); if (lencompare >= 0 && (len == 0 || (__builtin_memcmp(pfx, cursor->prefix+cursor->keylen, entry->pfxLen) == 0 && __builtin_memcmp(entry->string, cursor->prefix+cursor->keylen+entry->pfxLen, cursor->prefixlen-cursor->keylen-entry->pfxLen) == 0))) { memcpy(cursor->key+cursor->keylen, pfx, entry->pfxLen); memcpy(cursor->key+cursor->keylen+entry->pfxLen, entry->string, entry->strlen); cursor->key[cursor->keylen+entry->pfxLen+entry->strlen] = 0; if (entry->payload && callback(ctx, (const uint8_t *)cursor->key, entry->payload, lencompare==0)) return; } lastEntry = entry; cur += sizeof(*entry) + entry->strlen; } } else { while (cur < end) { PageEntry *entry = (PageEntry *)&page->data[cur]; int lencompare = entry->strlen-len; if (lencompare >= 0 && __builtin_memcmp(entry->string, cursor->prefix+cursor->keylen, len) == 0) { memcpy(cursor->key+cursor->keylen, entry->string, entry->strlen); cursor->key[cursor->keylen+entry->strlen] = 0; if (entry->payload && callback(ctx, (const uint8_t *)cursor->key, entry->payload, lencompare==0)) return; } cur += sizeof(*entry) + entry->strlen; } } } static void findCFBurstTrieLevel(CFBurstTrieRef trie, TrieCursor *cursor, bool exactmatch, void *ctx, bool (*callback)(void*, const uint8_t*, uint32_t, bool)) { if (cursor->keylen < cursor->prefixlen) { cursor->next = ((TrieLevelRef)NextTrie_GetPtr(cursor->next))->slots[cursor->prefix[cursor->keylen]]; cursor->key[cursor->keylen++] = cursor->prefix[cursor->keylen]; if (NextTrie_GetKind(cursor->next) == TrieKind) { findCFBurstTrieLevel(trie, cursor, exactmatch, ctx, callback); } else if (NextTrie_GetKind(cursor->next) == ListKind) { findCFBurstTrieList(trie, cursor, ctx, callback); } } else { TrieLevelRef root = (TrieLevelRef)NextTrie_GetPtr(cursor->next); if (root->payload && callback(ctx, cursor->key, root->payload, cursor->prefixlen==cursor->keylen)) return; if (cursor->keylen == cursor->prefixlen && exactmatch) return; traverseCFBurstTrieLevel(trie, root, cursor, exactmatch, ctx, callback); } } static void findCFBurstTrieCompactMappedLevel(CFBurstTrieRef trie, MapCursor *cursor, bool exactmatch, void *ctx, bool (*callback)(void*, const uint8_t*, uint32_t, bool)) { CompactMapTrieLevelRef root = (CompactMapTrieLevelRef)DiskNextTrie_GetPtr(trie->mapBase, cursor->next); if (cursor->keylen < cursor->prefixlen) { uint8_t mykey = *(cursor->prefix+cursor->keylen); cursor->key[cursor->keylen++] = *(cursor->prefix+cursor->keylen); uint8_t slot = mykey / 64; uint8_t bit = mykey % 64; uint32_t item = 0; uint64_t bword = root->bitmap[slot]; if (bword & (1ull << bit)) { // ** Count all the set bits up to this bit for (int i=0; i < slot; i++) item += __builtin_popcountll(root->bitmap[i]); item += __builtin_popcountll(bword & ((1ull << bit)-1)); uint32_t offset = root->slots[item]; cursor->next = offset; if (DiskNextTrie_GetKind(offset) == TrieKind) { findCFBurstTrieMappedLevel(trie, cursor, exactmatch, ctx, callback); } else if (DiskNextTrie_GetKind(offset) == CompactTrieKind) { findCFBurstTrieCompactMappedLevel(trie, cursor, exactmatch, ctx, callback); } else if (DiskNextTrie_GetKind(offset) == ListKind) { findCFBurstTrieMappedPage(trie, cursor, ctx, callback); } } } else { if(root->payload && callback(ctx, cursor->key, root->payload, cursor->prefixlen==cursor->keylen)) return; if (cursor->keylen == cursor->prefixlen && exactmatch) return; traverseCFBurstTrieCompactMappedLevel(trie, root, cursor, exactmatch, ctx, callback); } } static void findCFBurstTrieMappedLevel(CFBurstTrieRef trie, MapCursor *cursor, bool exactmatch, void *ctx, bool (*callback)(void*, const uint8_t*, uint32_t, bool)) { MapTrieLevelRef root = (MapTrieLevelRef)DiskNextTrie_GetPtr(trie->mapBase, cursor->next); if (cursor->keylen < cursor->prefixlen) { uint8_t slot = *(cursor->prefix+cursor->keylen); cursor->next = root->slots[slot]; cursor->key[cursor->keylen++] = slot; if (DiskNextTrie_GetKind(cursor->next) == TrieKind) { findCFBurstTrieMappedLevel(trie, cursor, exactmatch, ctx, callback); } else if (DiskNextTrie_GetKind(cursor->next) == CompactTrieKind) { findCFBurstTrieCompactMappedLevel(trie, cursor, exactmatch, ctx, callback); } else if (DiskNextTrie_GetKind(cursor->next) == ListKind) { findCFBurstTrieMappedPage(trie, cursor, ctx, callback); } } else { if (root->payload && callback(ctx, cursor->key, root->payload, cursor->prefixlen==cursor->keylen)) return; if (cursor->keylen == cursor->prefixlen && exactmatch) return; traverseCFBurstTrieMappedLevel(trie, root, cursor, exactmatch, ctx, callback); } } static void traverseCFBurstTrieLevel(CFBurstTrieRef trie, TrieLevelRef root, TrieCursor *cursor, bool exactmatch, void *ctx, bool (*callback)(void *, const uint8_t *, uint32_t, bool)) { cursor->key[cursor->keylen] = 0; uint32_t len = cursor->keylen; for (int i=0; i < CHARACTER_SET_SIZE; i++) { NextTrie next = root->slots[i]; cursor->keylen = len; cursor->key[cursor->keylen++] = i; if (NextTrie_GetKind(next) == TrieKind) { TrieLevelRef level = (TrieLevelRef)NextTrie_GetPtr(next); if (level->payload && callback(ctx, cursor->key, level->payload, cursor->prefixlen==cursor->keylen)) return; if (cursor->keylen == cursor->prefixlen && exactmatch) return; traverseCFBurstTrieLevel(trie, level, cursor, exactmatch, ctx, callback); } else if (NextTrie_GetKind(next) == ListKind) { cursor->next = next; cursor->key[cursor->keylen] = 0; findCFBurstTrieList(trie, cursor, ctx, callback); } } } static void traverseCFBurstTrieMappedLevel(CFBurstTrieRef trie, MapTrieLevelRef root, MapCursor *cursor, bool exactmatch, void *ctx, bool (*callback)(void *, const uint8_t *, uint32_t, bool)) { cursor->key[cursor->keylen] = 0; uint32_t len = cursor->keylen; for (int i=0; i < CHARACTER_SET_SIZE; i++) { uint32_t offset = (uint32_t)root->slots[i]; cursor->keylen = len; cursor->key[cursor->keylen++] = i; if (DiskNextTrie_GetKind(offset) == TrieKind) { MapTrieLevelRef level = (MapTrieLevelRef)DiskNextTrie_GetPtr(trie->mapBase, offset); if (level->payload && callback(ctx, cursor->key, level->payload, cursor->prefixlen==cursor->keylen)) return; if (cursor->keylen == cursor->prefixlen && exactmatch) return; traverseCFBurstTrieMappedLevel(trie, level, cursor, exactmatch, ctx, callback); } else if (DiskNextTrie_GetKind(offset) == CompactTrieKind) { CompactMapTrieLevelRef level = (CompactMapTrieLevelRef)DiskNextTrie_GetPtr(trie->mapBase, offset); if (level->payload && callback(ctx, cursor->key, level->payload, cursor->prefixlen==cursor->keylen)) return; if (cursor->keylen == cursor->prefixlen && exactmatch) return; traverseCFBurstTrieCompactMappedLevel(trie, level, cursor, exactmatch, ctx, callback); } else if (DiskNextTrie_GetKind(offset) == ListKind) { cursor->next = offset; cursor->key[cursor->keylen] = 0; findCFBurstTrieMappedPage(trie, cursor, ctx, callback); } } } static void traverseCFBurstTrieCompactMappedLevel(CFBurstTrieRef trie, CompactMapTrieLevelRef root, MapCursor *cursor, bool exactmatch, void *ctx, bool (*callback)(void *, const uint8_t *, uint32_t, bool)) { cursor->key[cursor->keylen] = 0; uint32_t len = cursor->keylen; for (uint32_t c=0; c < CHARACTER_SET_SIZE; c++) { //** This could be optimized to remember what the last slot/item was and not count bits over and over. uint8_t mykey = c; uint32_t slot = mykey / 64; uint32_t bit = mykey % 64; uint32_t item = 0; uint64_t bword = root->bitmap[slot]; cursor->keylen = len; if (bword & (1ull << bit)) { // ** Count all the set bits up to this bit for (int i=0; i < slot; i++) item += __builtin_popcountll(root->bitmap[i]); item += __builtin_popcountll(bword & ((1ull << bit)-1)); uint32_t offset = root->slots[item]; cursor->key[cursor->keylen++] = mykey; if(DiskNextTrie_GetKind(offset) == CompactTrieKind) { CompactMapTrieLevelRef level = (CompactMapTrieLevelRef)DiskNextTrie_GetPtr(trie->mapBase, offset); if (level->payload && callback(ctx, cursor->key, level->payload, cursor->prefixlen==cursor->keylen)) return; if (cursor->keylen == cursor->prefixlen && exactmatch) return; traverseCFBurstTrieCompactMappedLevel(trie, level, cursor, exactmatch, ctx, callback); } else if(DiskNextTrie_GetKind(offset) == TrieKind) { traverseCFBurstTrieMappedLevel(trie, (MapTrieLevelRef)DiskNextTrie_GetPtr(trie->mapBase, offset), cursor, exactmatch, ctx, callback); } else if (DiskNextTrie_GetKind(offset) == ListKind) { cursor->next = offset; cursor->key[cursor->keylen] = 0; findCFBurstTrieMappedPage(trie, cursor, ctx, callback); } } } } static void traverseCFBurstTrieWithCursor(CFBurstTrieRef trie, const uint8_t *prefix, uint32_t prefixLen, void **cursor, bool exactmatch, void *ctx, bool (*callback)(void *, const uint8_t *, uint32_t, bool)) { if (trie->mapBase) { if (trie->cflags & kCFBurstTriePrefixCompression) { fprintf(stderr, "Please use CFBurstTrieCursorRef API for file based trie.\n"); return; } else { TrieHeader *header = (TrieHeader *)trie->mapBase; MapCursor csr; csr.next = header->rootOffset; csr.prefix = prefix; csr.prefixlen = prefixLen; csr.key[0] = 0; csr.keylen = 0; findCFBurstTrieMappedLevel(trie, &csr, exactmatch, ctx, callback); } } else { TrieCursor csr; csr.next = ((unsigned long)&trie->root)|TrieKind; csr.prefix = prefix; csr.prefixlen = prefixLen; csr.key[0] = 0; csr.keylen = 0; findCFBurstTrieLevel(trie, &csr, exactmatch, ctx, callback); } } CF_INLINE uint32_t getPackedPageEntrySize(PageEntryPacked *entry) { return sizeof(PageEntryPacked) + entry->strlen; } CF_INLINE uint32_t getPageEntrySize(PageEntry *entry) { return sizeof(PageEntry) + entry->strlen; } /* static void _printPageEntry(PageEntryPacked *entry) { printf("entry 0x%p:\n", entry); printf("pfxLen = %d, strLen = %d\n", entry->pfxLen, entry->strlen); if (entry->strlen > 0) { printf("string = "); for (int i = 0; i < entry->strlen; ++i) printf("%d ", entry->string[i]); printf("\n"); } printf("\n"); } */ static Boolean advanceCursorOnMappedPageForByte(Page *page, CompactMapCursor *cursor, UInt8 byte) { PageEntryPacked *entry; Boolean found = FALSE; uint32_t minPrefixLength = 0; if (cursor->isOnPage) { entry = (PageEntryPacked *)&page->data[cursor->entryOffsetInPage]; //_printPageEntry(entry); BOOL shouldContinue = TRUE; if (!(cursor->entryOffsetInPage == 0 && entry->strlen == 0)) { if (cursor->entryOffsetInPage == entry->strlen - 1) { minPrefixLength = entry->pfxLen + entry->strlen; cursor->entryOffsetInPage += getPackedPageEntrySize(entry); } else { cursor->offsetInEntry++; if (entry->string[cursor->offsetInEntry] == byte) found = TRUE; else if (entry->string[cursor->offsetInEntry] > byte) shouldContinue = FALSE; else { minPrefixLength = entry->pfxLen + cursor->offsetInEntry; cursor->entryOffsetInPage += getPackedPageEntrySize(entry); } } } if (found) { cursor->isOnPage = TRUE; return TRUE; } else if (!shouldContinue) return FALSE; } else { cursor->entryOffsetInPage = 0; } uint32_t pageSize = page->length - sizeof(Page); while (cursor->entryOffsetInPage < pageSize) { entry = (PageEntryPacked *)&page->data[cursor->entryOffsetInPage]; //_printPageEntry(entry); if (minPrefixLength > entry->pfxLen) break; else if (minPrefixLength < entry->pfxLen) cursor->entryOffsetInPage += getPackedPageEntrySize(entry); else { if (entry->strlen == 0) cursor->entryOffsetInPage += getPackedPageEntrySize(entry); else { if (entry->string[0] > byte) // Entries are sorted alphabetically break; else if (entry->string[0] < byte) cursor->entryOffsetInPage += getPackedPageEntrySize(entry); else { cursor->offsetInEntry = 0; found = TRUE; break; } } } } if (found) cursor->isOnPage = TRUE; return found; } static Boolean advanceCursorMappedPageWithPerfixCompression(Page *page, CompactMapCursor *cursor, const UInt8* bytes, CFIndex length) { if (length == 0) { PageEntryPacked *entry = (PageEntryPacked*)&page->data[0]; if (!cursor->isOnPage) { cursor->entryOffsetInPage = 0; cursor->offsetInEntry = 0; cursor->isOnPage = entry->pfxLen == 0 && entry->strlen == 0; } getMapCursorPayloadFromPackedPageEntry(entry, cursor, &cursor->payload); return TRUE; } for (CFIndex i = 0; i < length; ++i) { if (!advanceCursorOnMappedPageForByte(page, cursor, bytes[i])) return FALSE; } PageEntryPacked *entry = (PageEntryPacked*)&page->data[cursor->entryOffsetInPage]; getMapCursorPayloadFromPackedPageEntry(entry, cursor, &cursor->payload); return TRUE; } static Boolean advanceCursorMappedPageSortedByKey(Page *page, CompactMapCursor *cursor, const UInt8* bytes, CFIndex length) { if (length == 0) { PageEntry*entry = (PageEntry*)&page->data[0]; if (!cursor->isOnPage) { cursor->entryOffsetInPage = 0; cursor->offsetInEntry = 0; cursor->isOnPage = entry->strlen == 0; } getMapCursorPayloadFromPageEntry(entry, cursor, &cursor->payload); return TRUE; } PageEntry *entry; uint32_t pageSize = page->length - sizeof(Page); const UInt8 * prefix = NULL; uint32_t prefixLength = 0; if (cursor->isOnPage) { entry = (PageEntry*)&page->data[cursor->entryOffsetInPage]; prefix = entry->string; prefixLength = cursor->offsetInEntry + 1; } while (cursor->entryOffsetInPage < pageSize) { PageEntry *entry = (PageEntry*)&page->data[cursor->entryOffsetInPage]; if (entry->strlen == 0) { cursor->entryOffsetInPage += getPageEntrySize(entry); continue; } if (entry->strlen <= prefixLength) return FALSE; else { int cmpResult = 0; if (prefixLength > 0) cmpResult = __builtin_memcmp(entry->string, prefix, prefixLength); if (cmpResult > 0) return FALSE; else if (cmpResult == 0) { if (entry->strlen < prefixLength + length) { int cmpResult2 = __builtin_memcmp(entry->string + prefixLength, bytes, entry->strlen - prefixLength); if (cmpResult2 > 0) return FALSE; else cursor->entryOffsetInPage += getPageEntrySize(entry); } else { int cmpResult2 = __builtin_memcmp(entry->string + prefixLength, bytes, length); if (cmpResult2 > 0) return FALSE; else if (cmpResult2 == 0) { cursor->isOnPage = TRUE; cursor->offsetInEntry = prefixLength + length - 1; getMapCursorPayloadFromPageEntry(entry, cursor, &cursor->payload); return TRUE; } else cursor->entryOffsetInPage += getPageEntrySize(entry); } } else cursor->entryOffsetInPage += getPageEntrySize(entry); } } return FALSE; } static Boolean advanceCursorMappedPage(CFBurstTrieRef trie, CompactMapCursor *cursor, const UInt8* bytes, CFIndex length) { if (!bytes || length < 0) return FALSE; Page *page = (Page *)DiskNextTrie_GetPtr(trie->mapBase, cursor->next); uint32_t pageSize = page->length - sizeof(Page); if (pageSize == 0) return FALSE; if (trie->cflags & kCFBurstTrieSortByKey) return advanceCursorMappedPageSortedByKey(page, cursor, bytes, length); else if (trie->cflags & kCFBurstTriePrefixCompression) return advanceCursorMappedPageWithPerfixCompression(page, cursor, bytes, length); else return FALSE; } static Boolean advanceCursorCompactMappedLevel(CFBurstTrieRef trie, CompactMapCursor *cursor, const UInt8* bytes, CFIndex length) { if (!bytes || length < 0) return FALSE; CompactMapTrieLevelRef root = (CompactMapTrieLevelRef)DiskNextTrie_GetPtr(trie->mapBase, cursor->next); if (length == 0) { cursor->payload = root->payload; return TRUE; } uint8_t slot = bytes[0] / 64; uint8_t bit = bytes[0] % 64; uint32_t item = 0; uint64_t bword = root->bitmap[slot]; if (bword & (1ull << bit)) { // Count all the set bits up to this bit for (int i = 0; i < slot; ++i) item += __builtin_popcountll(root->bitmap[i]); item += __builtin_popcountll(bword & ((1ull << bit)-1)); cursor->next = root->slots[item]; return advanceMapCursor(trie, cursor, bytes + 1, length - 1); } else { return FALSE; } } static Boolean advanceCursorMappedLevel(CFBurstTrieRef trie, CompactMapCursor *cursor, const UInt8* bytes, CFIndex length) { if (!bytes || length < 0) return FALSE; MapTrieLevelRef root = (MapTrieLevelRef)DiskNextTrie_GetPtr(trie->mapBase, cursor->next); if (length == 0) { cursor->payload = root->payload; return TRUE; } cursor->next = root->slots[bytes[0]]; return advanceMapCursor(trie, cursor, bytes + 1, length - 1); } static Boolean advanceMapCursor(CFBurstTrieRef trie, CompactMapCursor *cursor, const UInt8* bytes, CFIndex length) { bool result = FALSE; switch (DiskNextTrie_GetKind(cursor->next)) { case TrieKind: result = advanceCursorMappedLevel(trie, cursor, bytes, length); break; case CompactTrieKind: result = advanceCursorCompactMappedLevel(trie, cursor, bytes, length); break; case ListKind: result = advanceCursorMappedPage(trie, cursor, bytes, length); break; case Nothing: { TrieHeader *header = (TrieHeader*)trie->mapBase; if (cursor->next == header->rootOffset) result = advanceCursorMappedLevel(trie, cursor, bytes, length); } } return result; } static void traverseFromMapCursorMappedLevel(CFBurstTrieRef trie, CompactMapCursor *cursor, UInt8* bytes, uint32_t capacity, uint32_t length, Boolean *stop, void *ctx, CFBurstTrieTraversalCallback callback) { MapTrieLevelRef root = (MapTrieLevelRef)DiskNextTrie_GetPtr(trie->mapBase, cursor->next); if (root->payload) { callback(ctx, bytes, length, root->payload, stop); if (*stop) return; } if (length >= capacity) return; for (int i = 0; i < CHARACTER_SET_SIZE; ++i) {i = bytes[length] = i; cursor->next = (uint32_t)root->slots[i];; cursor->isOnPage = FALSE; cursor->entryOffsetInPage = 0; cursor->offsetInEntry = 0; cursor->payload = 0; traverseFromMapCursor(trie, cursor, bytes, capacity - 1, length + 1, stop, ctx, callback); if (*stop) return; } } static void traverseFromMapCursorCompactMappedLevel(CFBurstTrieRef trie, CompactMapCursor *cursor, UInt8* bytes, uint32_t capacity, uint32_t length, Boolean *stop, void *ctx, CFBurstTrieTraversalCallback callback) { CompactMapTrieLevelRef root = (CompactMapTrieLevelRef)DiskNextTrie_GetPtr(trie->mapBase, cursor->next); if (root->payload) { callback(ctx, bytes, length, root->payload, stop); if (*stop) return; } if (length >= capacity) return; for (int c = 0; c < CHARACTER_SET_SIZE; ++c) { bytes[length] = c; //This could be optimized to remember what the last slot/item was and not count bits over and over. uint8_t slot = c / 64; uint8_t bit = c % 64; uint32_t item = 0; uint64_t bword = root->bitmap[slot]; if (bword & (1ull << bit)) { // Count all the set bits up to this bit for (int i = 0; i < slot; ++i) item += __builtin_popcountll(root->bitmap[i]); item += __builtin_popcountll(bword & ((1ull << bit)-1)); cursor->next = root->slots[item]; cursor->isOnPage = FALSE; cursor->entryOffsetInPage = 0; cursor->offsetInEntry = 0; cursor->payload = 0; traverseFromMapCursor(trie, cursor, bytes, capacity - 1, length + 1, stop, ctx, callback); if (*stop) return; } } } static void traverseFromMapCursorMappedPageWithPrefixCompression(Page *page, CompactMapCursor *cursor, UInt8* bytes, uint32_t capacity, uint32_t length, Boolean *stop, void *ctx, CFBurstTrieTraversalCallback callback) { uint32_t pageSize = page->length - sizeof(Page); uint32_t offset = cursor->entryOffsetInPage; uint32_t minPrefixLength = 0; if (cursor->isOnPage) { PageEntryPacked *entry = (PageEntryPacked*)&page->data[offset]; int32_t remainingLength = entry->strlen - cursor->offsetInEntry - 1; if (remainingLength >= 0 && remainingLength <= capacity) { memcpy(bytes + length, entry->string + cursor->offsetInEntry + 1, remainingLength); callback(ctx, bytes, length + remainingLength, entry->payload, stop); if (*stop) return; } minPrefixLength = entry->pfxLen + cursor->offsetInEntry; offset += getPackedPageEntrySize(entry); } PageEntryPacked *previousEntry = NULL; while (offset < pageSize) { PageEntryPacked *entry = (PageEntryPacked*)&page->data[offset]; if (minPrefixLength > entry->pfxLen) break; else if (entry->payload && entry->strlen <= capacity) { if (previousEntry) length -= previousEntry->strlen + previousEntry->pfxLen - entry->pfxLen; memcpy(bytes + length, entry->string, entry->strlen); callback(ctx, bytes, length + entry->strlen, entry->payload, stop); length += entry->strlen; if (*stop) return; } previousEntry = entry; offset += getPackedPageEntrySize(entry); } } static void traverseFromMapCursorMappedPageSortedByKey(Page *page, CompactMapCursor *cursor, UInt8* bytes, uint32_t capacity, uint32_t length, Boolean *stop, void *ctx, CFBurstTrieTraversalCallback callback) { uint32_t pageSize = page->length - sizeof(Page); uint32_t offset = cursor->entryOffsetInPage; uint32_t prefixLength = 0; const UInt8 *prefix = NULL; if (cursor->isOnPage) { PageEntry *entry = (PageEntry*)&page->data[offset]; int32_t remainingLength = entry->strlen - cursor->offsetInEntry - 1; if (remainingLength >= 0 && remainingLength <= capacity) { memcpy(bytes + length, entry->string + cursor->offsetInEntry, remainingLength); callback(ctx, bytes, length + remainingLength, entry->payload, stop); if (*stop) return; } prefixLength = cursor->offsetInEntry + 1; prefix = entry->string; offset += getPageEntrySize(entry); } while (offset < pageSize) { PageEntry *entry = (PageEntry*)&page->data[offset]; if (entry->strlen < prefixLength) break; else { int cmpResult = __builtin_memcmp(entry->string, prefix, prefixLength); if (cmpResult > 0) break; else if (entry->payload && entry->strlen <= capacity) { if (entry->strlen > 0) memcpy(bytes + length, entry->string + prefixLength, entry->strlen - prefixLength); callback(ctx, bytes, length + entry->strlen - prefixLength, entry->payload, stop); if (*stop) return; } offset += getPageEntrySize(entry); } } } static void traverseFromMapCursorMappedPage(CFBurstTrieRef trie, CompactMapCursor *cursor, UInt8* bytes, uint32_t capacity, uint32_t length, Boolean *stop, void *ctx, CFBurstTrieTraversalCallback callback) { Page *page = (Page *)DiskNextTrie_GetPtr(trie->mapBase, cursor->next); if (trie->cflags & kCFBurstTrieSortByKey) traverseFromMapCursorMappedPageSortedByKey(page, cursor, bytes, capacity, length, stop, ctx, callback); else if (trie->cflags & kCFBurstTriePrefixCompression) traverseFromMapCursorMappedPageWithPrefixCompression(page, cursor, bytes, capacity, length, stop, ctx, callback); } void traverseFromMapCursor(CFBurstTrieRef trie, CompactMapCursor *cursor, UInt8* bytes, uint32_t capacity, uint32_t length, Boolean *stop, void *ctx, CFBurstTrieTraversalCallback callback) { switch (DiskNextTrie_GetKind(cursor->next)) { case TrieKind: traverseFromMapCursorMappedLevel(trie, cursor, bytes, capacity, length, stop, ctx, callback); break; case CompactTrieKind: traverseFromMapCursorCompactMappedLevel(trie, cursor, bytes, capacity, length, stop, ctx, callback); break; case ListKind: traverseFromMapCursorMappedPage(trie, cursor, bytes, capacity, length, stop, ctx, callback); break; case Nothing: { TrieHeader *header = (TrieHeader*)trie->mapBase; if (cursor->next == header->rootOffset) { traverseFromMapCursorMappedLevel(trie, cursor, bytes, capacity, length, stop, ctx, callback); } break; } } } void copyMapCursor(const CompactMapCursor *source, CompactMapCursor* destination) { destination->next = source->next; destination->entryOffsetInPage = source->entryOffsetInPage; destination->offsetInEntry = source->offsetInEntry; destination->isOnPage = source->isOnPage; destination->payload = source->payload; } Boolean areMapCursorsEqual(const CompactMapCursor *lhs, const CompactMapCursor *rhs) { return lhs->entryOffsetInPage == rhs->entryOffsetInPage && lhs->isOnPage == rhs->isOnPage && lhs->next == rhs->next && lhs->offsetInEntry == rhs->offsetInEntry; } static Boolean getMapCursorPayloadFromPackedPageEntry(PageEntryPacked *entry, const CompactMapCursor *cursor, uint32_t *payload) { if (payload) *payload = 0; if (cursor->entryOffsetInPage == 0 && cursor->offsetInEntry == 0 && entry->strlen == 0) { if (payload) *payload = entry->payload; return TRUE; } else if (cursor->offsetInEntry != entry->strlen - 1) return FALSE; else { if (payload) *payload = entry->payload; return TRUE; } } static Boolean getMapCursorPayloadFromPageEntry(PageEntry *entry, const CompactMapCursor *cursor, uint32_t *payload) { if (payload) *payload = 0; if (cursor->entryOffsetInPage == 0 && cursor->offsetInEntry == 0 && entry->strlen == 0) { if (payload) *payload = entry->payload; return TRUE; } else if (cursor->offsetInEntry != entry->strlen - 1) return FALSE; else { if (payload) *payload = entry->payload; return TRUE; } } Boolean getMapCursorPayload(CFBurstTrieRef trie, const CompactMapCursor *cursor, uint32_t *payload) { if (!cursor) return FALSE; if (cursor->payload) { if (payload) *payload = cursor->payload; return TRUE; } return FALSE; } // Legacy static Boolean burstTrieMappedFind(DiskTrieLevelRef trie, char *map, const UInt8 *key, uint32_t length, uint32_t *payload, bool prefix) { Boolean success = false; if (length) { uint32_t offset = CFSwapInt32LittleToHost((uint32_t)trie->slots[*key]); if(DiskNextTrie_GetKind(offset) == TrieKind) { return burstTrieMappedFind((DiskTrieLevelRef)DiskNextTrie_GetPtr(map,offset), map, key+1, length-1, payload, prefix); } else if(DiskNextTrie_GetKind(offset) == CompactTrieKind) { return burstTrieCompactTrieMappedFind((CompactDiskTrieLevelRef)DiskNextTrie_GetPtr(map, offset), map, key+1, length-1, payload, prefix); } else { if(DiskNextTrie_GetKind(offset) == ListKind) { return burstTrieMappedPageFind((StringPage *)DiskNextTrie_GetPtr(map, offset), key+1, length-1, payload, prefix); } else { return success; } } } else { if (trie->weight) { SetPayload(payload, CFSwapInt32LittleToHost(trie->payload)); success = true; } } return success; } static Boolean burstTrieCompactTrieMappedFind(CompactDiskTrieLevelRef trie, char *map, const UInt8 *key, uint32_t length, uint32_t *payload, bool prefix) { Boolean success = false; if (length) { uint32_t mykey = *key; uint32_t slot = mykey / 64; uint32_t bit = mykey % 64; uint32_t item = 0; uint64_t bword = CFSwapInt64LittleToHost(trie->bitmap[slot]); if (bword & (1ull << bit)) { /* Count all the set bits up to this bit */ for (int i=0; i < slot; i++) { item += __builtin_popcountll(trie->bitmap[i]); } item += __builtin_popcountll(bword & ((1ull << bit)-1)); uint32_t offset = CFSwapInt32LittleToHost((uint32_t)trie->slots[item]); if(DiskNextTrie_GetKind(offset) == TrieKind) { return burstTrieMappedFind((DiskTrieLevelRef)DiskNextTrie_GetPtr(map, offset), map, key+1, length-1, payload, prefix); } else if(DiskNextTrie_GetKind(offset) == CompactTrieKind) { return burstTrieCompactTrieMappedFind((CompactDiskTrieLevelRef)DiskNextTrie_GetPtr(map, offset), map, key+1, length-1, payload, prefix); } else { if(DiskNextTrie_GetKind(offset) == ListKind) { return burstTrieMappedPageFind((StringPage *)DiskNextTrie_GetPtr(map, offset), key+1, length-1, payload, prefix); } else { return success; } } } } else { if (trie->weight) { SetPayload(payload, CFSwapInt32LittleToHost(trie->payload)); success = true; } } return success; } static Boolean burstTrieMappedPageFind(StringPage *page, const UInt8 *key, uint32_t length, uint32_t *payload, bool prefix) { Boolean success = false; uint32_t end = CFSwapInt32LittleToHost(page->length); uint32_t cur = 0; if (prefix) { uint8_t pfx[256]; while (cur < end) { StringPageEntryPacked *entry = (StringPageEntryPacked *)&page->data[cur]; uint16_t strlen = entry->pfxLen+CFSwapInt16LittleToHost(entry->strlen); if (strlen == length && __builtin_memcmp(pfx, key, entry->pfxLen) == 0 && __builtin_memcmp(entry->string, key+entry->pfxLen, length-entry->pfxLen) == 0) { SetPayload(payload, CFSwapInt32LittleToHost(entry->payload)); success = true; return success; } else { memcpy(pfx+entry->pfxLen, entry->string, MIN(255-entry->pfxLen, length-entry->pfxLen)); cur += sizeof(*entry) + strlen - entry->pfxLen; } } } else { while (cur < end) { StringPageEntry *entry = (StringPageEntry *)&page->data[cur]; uint16_t strlen = CFSwapInt16LittleToHost(entry->strlen); if (strlen == length && __builtin_memcmp(entry->string, key, length) == 0) { SetPayload(payload, CFSwapInt32LittleToHost(entry->payload)); success = true; return success; } else { cur += sizeof(*entry) + strlen; } } } return success; } #if 0 #pragma mark - #pragma mark Serialization #endif static bool serializeCFBurstTrieLevels(CFBurstTrieRef trie, TrieLevelRef root, uint32_t *offset, off_t start_offset, bool dispose, bool isroot, int fd) { bool dense = true; int count = 0; for (int i=0; i < CHARACTER_SET_SIZE; i++) if (root->slots[i]) count++; uint32_t this_offset = *offset; if ((trie->cflags & kCFBurstTrieBitmapCompression) && count < MAX_BITMAP_SIZE && !isroot) { size_t size = sizeof(CompactMapTrieLevel) + sizeof(uint32_t) * count; int offsetSlot = 0; CompactMapTrieLevel *maptrie = (CompactMapTrieLevel *)alloca(size); bzero(maptrie, size); *offset += size; for (int i=0; i < CHARACTER_SET_SIZE; i++) { NextTrie next = root->slots[i]; if (next) { uint32_t slot = i / 64; uint32_t bit = i % 64; maptrie->bitmap[slot] |= 1ull<slots[offsetSlot] = (TrieKind|childOffset); } else { maptrie->slots[offsetSlot] = (CompactTrieKind|childOffset); } } else { maptrie->slots[offsetSlot] = next; } offsetSlot++; } } maptrie->payload = root->payload; int bitcount = 0; for (int i=0; i < 4; i++) bitcount += __builtin_popcountll(maptrie->bitmap[i]); assert(bitcount == count); pwrite(fd, maptrie, size, this_offset+start_offset); dense = false; } else { MapTrieLevel maptrie; *offset += sizeof(maptrie); for (int i=0; i < CHARACTER_SET_SIZE; i++) { NextTrie next = root->slots[i]; if (NextTrie_GetKind(next) == TrieKind) { TrieLevelRef nextLevel = (TrieLevelRef)NextTrie_GetPtr(next); uint32_t childOffset = *offset; if (serializeCFBurstTrieLevels(trie, nextLevel, offset, start_offset, true, false, fd)) { maptrie.slots[i] = (TrieKind|childOffset); } else { maptrie.slots[i] = (CompactTrieKind|childOffset); } } else { maptrie.slots[i] = next; } } maptrie.payload = root->payload; pwrite(fd, &maptrie, sizeof(maptrie), this_offset+start_offset); } if (dispose) free(root); return dense; } static void serializeCFBurstTrieList(CFBurstTrieRef trie, ListNodeRef listNode, int fd) { uint32_t listCount; size_t size = trie->containerSize; // ** Temp list of nodes to sort ListNodeRef *nodes = (ListNodeRef *)malloc(sizeof(ListNodeRef) * size); for (listCount = 0; listNode; listCount++) { if (listCount >= size) { size *= 2; nodes = (ListNodeRef *)realloc(nodes, sizeof(ListNodeRef) * size); } nodes[listCount] = listNode; listNode = listNode->next; } char _buffer[MAX_BUFFER_SIZE]; char bufferSize = (sizeof(Page) + size * (sizeof(PageEntryPacked) + MAX_STRING_SIZE)); char *buffer = bufferSize < MAX_BUFFER_SIZE ? _buffer : (char *) malloc(bufferSize); Page *page = (Page *)buffer; uint32_t current = 0; if (trie->cflags & kCFBurstTriePrefixCompression) { qsort(nodes, listCount, sizeof(ListNodeRef), nodeStringCompare); ListNodeRef last = 0; for (int i=0; i < listCount; i++) { listNode = nodes[i]; uint8_t pfxLen = 0; if (last) { for ( ; pfxLen < CHARACTER_SET_SIZE-1 && pfxLen < listNode->length && pfxLen < last->length && listNode->string[pfxLen] == last->string[pfxLen]; pfxLen++); } PageEntryPacked *entry = (PageEntryPacked *)(&page->data[current]); entry->strlen = listNode->length - pfxLen; entry->payload = listNode->payload; entry->pfxLen = pfxLen; memcpy(entry->string, listNode->string+pfxLen, listNode->length-pfxLen); current += listNode->length - pfxLen + sizeof(PageEntryPacked); last = listNode; } } else { if (trie->cflags & kCFBurstTrieSortByKey) qsort(nodes, listCount, sizeof(ListNodeRef), nodeStringCompare); else qsort(nodes, listCount, sizeof(ListNodeRef), nodeWeightCompare); for (int i=0; i < listCount; i++) { listNode = nodes[i]; PageEntry *entry = (PageEntry *)(&page->data[current]); entry->strlen = listNode->length; entry->payload = listNode->payload; memcpy(entry->string, listNode->string, listNode->length); current += listNode->length + sizeof(PageEntry); } } size_t len = (sizeof(Page) + current + 3) & ~3; page->length = current; write(fd, page, len); free(nodes); if (buffer != _buffer) free(buffer); } static void serializeCFBurstTrieLists(CFBurstTrieRef trie, TrieLevelRef root, off_t start_offset, int fd) { for (int i=0; i < CHARACTER_SET_SIZE; i++) { NextTrie next = root->slots[i]; uint32_t offset; if (NextTrie_GetKind(next) == TrieKind) { TrieLevelRef nextLevel = (TrieLevelRef)NextTrie_GetPtr(next); serializeCFBurstTrieLists(trie, nextLevel, start_offset, fd); } else { if (NextTrie_GetKind(next) == ListKind) { ListNodeRef listNode = (ListNodeRef)NextTrie_GetPtr(next); offset = lseek(fd, 0, SEEK_CUR) - start_offset; serializeCFBurstTrieList(trie, listNode, fd); finalizeCFBurstTrieList(listNode); //assert((offset & 3)==0); root->slots[i] = (offset|ListKind); } } } } static size_t serializeCFBurstTrie(CFBurstTrieRef trie, size_t start_offset, int fd) { TrieHeader header; header.signature = 0x0ddba11; header.rootOffset = 0; header.count = trie->count; header.size = 0; header.flags = trie->cflags; header.reserved[0] = 0; uint32_t offset; lseek(fd, start_offset, SEEK_SET); size_t header_size = sizeof(header); write(fd, &header, header_size); serializeCFBurstTrieLists(trie, &trie->root, start_offset, fd); offset = lseek(fd, 0, SEEK_CUR) - start_offset; size_t off = offsetof(TrieHeader, rootOffset); size_t offsize = sizeof(offset); pwrite(fd, &offset, offsize, off+start_offset); serializeCFBurstTrieLevels(trie, &trie->root, &offset, start_offset, false, true, fd); size_t off2 = offsetof(TrieHeader, size); offsize = sizeof(offset); pwrite(fd, &offset, offsize, off2+start_offset); offset = lseek(fd, 0, SEEK_END); return (size_t)(offset-start_offset); } #if 0 #pragma mark - #pragma mark Release #endif static void destroyCFBurstTrie(CFBurstTrieRef trie) { if (trie->mapBase) { #if DEPLOYMENT_TARGET_WINDOWS UnmapViewOfFile(trie->mapBase); CloseHandle(trie->mapHandle); CloseHandle(trie->mappedFileHandle); #else munmap(trie->mapBase, trie->mapSize); #endif } else { finalizeCFBurstTrie(&trie->root); } free(trie); return; } static void finalizeCFBurstTrie(TrieLevelRef trie) { for (int i=0; i < CHARACTER_SET_SIZE; i++) { if (NextTrie_GetKind(trie->slots[i]) == TrieKind) { finalizeCFBurstTrie((TrieLevelRef)NextTrie_GetPtr(trie->slots[i])); free((void *)NextTrie_GetPtr(trie->slots[i])); } else if (NextTrie_GetKind(trie->slots[i]) == ListKind) { finalizeCFBurstTrieList((ListNodeRef)NextTrie_GetPtr(trie->slots[i])); } } } static void finalizeCFBurstTrieList(ListNodeRef node) { do { ListNodeRef next = node->next; free(node); node = next; } while(node); } #if 0 #pragma mark - #pragma mark Helpers #endif static int nodeWeightCompare(const void *a, const void *b) { ListNodeRef nodeA = *(ListNodeRef *)a; ListNodeRef nodeB = *(ListNodeRef *)b; return (nodeB->weight - nodeA->weight); } static int nodeStringCompare(const void *a, const void *b) { ListNodeRef nodeA = *(ListNodeRef *)a; ListNodeRef nodeB = *(ListNodeRef *)b; int result = memcmp((char *)nodeA->string, (char *)nodeB->string, MIN(nodeA->length, nodeB->length)); if (result == 0) result = nodeA->length-nodeB->length; return result; } bool containsKey(void *context, const uint8_t *key, uint32_t payload, bool exact) { uint32_t *ctx = (uint32_t *)context; if (exact) *ctx = payload; return exact; } static CFIndex burstTrieConvertCharactersToUTF8(UniChar *chars, CFIndex numChars, UInt8 *buffer) { uint32_t i, j; for (i = j = 0; i < numChars; i++) { UniChar c = chars[i]; if (CFStringIsSurrogateHighCharacter(c) && i + 1 < numChars && CFStringIsSurrogateLowCharacter(chars[i + 1])) { UTF32Char lc = CFStringGetLongCharacterForSurrogatePair(c, chars[i + 1]); buffer[j++] = 0xf0 + (lc >> 18); buffer[j++] = 0x80 + ((lc & 0x3ffff) >> 12); buffer[j++] = 0x80 + ((lc & 0xfff) >> 6); buffer[j++] = 0x80 + (lc & 0x3f); i++; } else { if (c < 0x80) { buffer[j++] = c; } else if (c < 0x800) { buffer[j++] = 0xc0 + (c >> 6); buffer[j++] = 0x80 + (c & 0x3f); } else { buffer[j++] = 0xe0 + (c >> 12); buffer[j++] = 0x80 + ((c & 0xfff) >> 6); buffer[j++] = 0x80 + (c & 0x3f); } } } buffer[j] = 0; return j; }