genpname.cpp   [plain text]


/*
**********************************************************************
*   Copyright (C) 2002-2004, International Business Machines
*   Corporation and others.  All Rights Reserved.
**********************************************************************
*   Date        Name        Description
*   10/11/02    aliu        Creation.
**********************************************************************
*/

#include "unicode/utypes.h"
#include "unicode/putil.h"
#include "unicode/uclean.h"
#include "cmemory.h"
#include "cstring.h"
#include "filestrm.h"
#include "uarrsort.h"
#include "unewdata.h"
#include "uoptions.h"
#include "uprops.h"
#include "propname.h"
#include "uassert.h"

#include <stdio.h>

// TODO: Clean up and comment this code.

//----------------------------------------------------------------------
// BEGIN DATA
// 
// This is the raw data to be output.  We define the data structure,
// then include a machine-generated header that contains the actual
// data.

#include "unicode/uchar.h"
#include "unicode/uscript.h"
#include "unicode/unorm.h"

class AliasName {
public:
    const char* str;
    int32_t     index;

    AliasName(const char* str, int32_t index);

    int compare(const AliasName& other) const;

    UBool operator==(const AliasName& other) const {
        return compare(other) == 0;
    }

    UBool operator!=(const AliasName& other) const {
        return compare(other) != 0;
    }
};

AliasName::AliasName(const char* _str,
               int32_t _index) :
    str(_str),
    index(_index)
{
}

int AliasName::compare(const AliasName& other) const {
    return uprv_comparePropertyNames(str, other.str);
}

class Alias {
public:
    int32_t     enumValue;
    int32_t     nameGroupIndex;

    Alias(int32_t enumValue,
             int32_t nameGroupIndex);

    int32_t getUniqueNames(int32_t* nameGroupIndices) const;
};

Alias::Alias(int32_t anEnumValue,
                   int32_t aNameGroupIndex) :
    enumValue(anEnumValue),
    nameGroupIndex(aNameGroupIndex)
{
}

class Property : public Alias {
public:
    int32_t         valueCount;
    const Alias* valueList;

    Property(int32_t enumValue,
                       int32_t nameGroupIndex,
                       int32_t valueCount,
                       const Alias* valueList);
};

Property::Property(int32_t _enumValue,
                                       int32_t _nameGroupIndex,
                                       int32_t _valueCount,
                                       const Alias* _valueList) :
    Alias(_enumValue, _nameGroupIndex),
    valueCount(_valueCount),
    valueList(_valueList)
{
}

// *** Include the data header ***
#include "data.h"

/* return a list of unique names, not including "", for this property
 * @param stringIndices array of at least MAX_NAMES_PER_GROUP
 * elements, will be filled with indices into STRING_TABLE
 * @return number of indices, >= 1
 */
int32_t Alias::getUniqueNames(int32_t* stringIndices) const {
    int32_t count = 0;
    int32_t i = nameGroupIndex;
    UBool done = FALSE;
    while (!done) {
        int32_t j = NAME_GROUP[i++];
        if (j < 0) {
            done = TRUE;
            j = -j;
        }
        if (j == 0) continue; // omit "" entries
        UBool dupe = FALSE;
        for (int32_t k=0; k<count; ++k) {
            if (stringIndices[k] == j) {
                dupe = TRUE;
                break;
            }
            // also do a string check for things like "age|Age"
            if (STRING_TABLE[stringIndices[k]] == STRING_TABLE[j]) {
                //printf("Found dupe %s|%s\n",
                //       STRING_TABLE[stringIndices[k]].str,
                //       STRING_TABLE[j].str);
                dupe = TRUE;
                break;
            }
        }
        if (dupe) continue; // omit duplicates
        stringIndices[count++] = j;
    }
    return count;
}

// END DATA
//----------------------------------------------------------------------

#define MALLOC(type, count) \
  (type*) uprv_malloc(sizeof(type) * count)

void die(const char* msg) {
    fprintf(stderr, "Error: %s\n", msg);
    exit(1);
}

//----------------------------------------------------------------------

/**
 * A list of Alias objects.
 */
class AliasList {
public:
    virtual const Alias& operator[](int32_t i) const = 0;
    virtual int32_t count() const = 0;
};

/**
 * A single array.
 */
class AliasArrayList : public AliasList {
    const Alias* a;
    int32_t n;
public:
    AliasArrayList(const Alias* _a, int32_t _n) {
        a = _a;
        n = _n;
    }
    virtual const Alias& operator[](int32_t i) const {
        return a[i];
    }
    virtual int32_t count() const {
        return n;
    }
};

/**
 * A single array.
 */
class PropertyArrayList : public AliasList {
    const Property* a;
    int32_t n;
public:
    PropertyArrayList(const Property* _a, int32_t _n) {
        a = _a;
        n = _n;
    }
    virtual const Alias& operator[](int32_t i) const {
        return a[i];
    }
    virtual int32_t count() const {
        return n;
    }
};

//----------------------------------------------------------------------

/**
 * An element in a name index.  It maps a name (given by index) into
 * an enum value.
 */
class NameToEnumEntry {
public:
    int32_t nameIndex;
    int32_t enumValue;
    NameToEnumEntry(int32_t a, int32_t b) { nameIndex=a; enumValue=b; }
};

// Sort function for NameToEnumEntry (sort by name)
U_CFUNC int32_t
compareNameToEnumEntry(const void * /*context*/, const void* e1, const void* e2) {
    return
        STRING_TABLE[((NameToEnumEntry*)e1)->nameIndex].
            compare(STRING_TABLE[((NameToEnumEntry*)e2)->nameIndex]);
}

//----------------------------------------------------------------------

/**
 * An element in an enum index.  It maps an enum into a name group entry
 * (given by index).
 */
class EnumToNameGroupEntry {
public:
    int32_t enumValue;
    int32_t nameGroupIndex;
    EnumToNameGroupEntry(int32_t a, int32_t b) { enumValue=a; nameGroupIndex=b; }
    
    // are enumValues contiguous for count entries starting with this one?
    // ***!!!*** we assume we are in an array and look at neighbors ***!!!***
    UBool isContiguous(int32_t count) const {
        const EnumToNameGroupEntry* p = this;
        for (int32_t i=1; i<count; ++i) {
            if (p[i].enumValue != (this->enumValue + i)) {
                return FALSE;
            }
        }
        return TRUE;
    }
};

// Sort function for EnumToNameGroupEntry (sort by name index)
U_CFUNC int32_t
compareEnumToNameGroupEntry(const void * /*context*/, const void* e1, const void* e2) {
    return ((EnumToNameGroupEntry*)e1)->enumValue - ((EnumToNameGroupEntry*)e2)->enumValue;
}

//----------------------------------------------------------------------

/**
 * An element in the map from enumerated property enums to value maps.
 */
class EnumToValueEntry {
public:
    int32_t enumValue;
    EnumToNameGroupEntry* enumToName;
    int32_t enumToName_count;
    NameToEnumEntry* nameToEnum;
    int32_t nameToEnum_count;

    // are enumValues contiguous for count entries starting with this one?
    // ***!!!*** we assume we are in an array and look at neighbors ***!!!***
    UBool isContiguous(int32_t count) const {
        const EnumToValueEntry* p = this;
        for (int32_t i=1; i<count; ++i) {
            if (p[i].enumValue != (this->enumValue + i)) {
                return FALSE;
            }
        }
        return TRUE;
    }
};

// Sort function for EnumToValueEntry (sort by enum)
U_CFUNC int32_t
compareEnumToValueEntry(const void * /*context*/, const void* e1, const void* e2) {
    return ((EnumToValueEntry*)e1)->enumValue - ((EnumToValueEntry*)e2)->enumValue;
}

//----------------------------------------------------------------------
// BEGIN Builder

#define IS_VALID_OFFSET(x) (((x)>=0)&&((x)<=MAX_OFFSET))

class Builder {
    // header:
    PropertyAliases header;

    // 0:
    NonContiguousEnumToOffset* enumToName;
    int32_t enumToName_size;
    Offset enumToName_offset;

    // 1: (deleted)

    // 2:
    NameToEnum* nameToEnum;
    int32_t nameToEnum_size;
    Offset nameToEnum_offset;

    // 3:
    NonContiguousEnumToOffset* enumToValue;
    int32_t enumToValue_size;
    Offset enumToValue_offset;

    // 4:
    ValueMap* valueMap;
    int32_t valueMap_size;
    int32_t valueMap_count;
    Offset valueMap_offset;

    // for any i, one of valueEnumToName[i] or valueNCEnumToName[i] is
    // NULL and one is not.  valueEnumToName_size[i] is the size of
    // the non-NULL one.  i=0..valueMapCount-1
    // 5a:
    EnumToOffset** valueEnumToName;
    // 5b:
    NonContiguousEnumToOffset** valueNCEnumToName;
    int32_t* valueEnumToName_size;
    Offset* valueEnumToName_offset;
    // 6:
    // arrays of valueMap_count pointers, sizes, & offsets
    NameToEnum** valueNameToEnum;
    int32_t* valueNameToEnum_size;
    Offset* valueNameToEnum_offset;

    // 98:
    Offset* nameGroupPool;
    int32_t nameGroupPool_count;
    int32_t nameGroupPool_size;
    Offset nameGroupPool_offset;

    // 99:
    char* stringPool;
    int32_t stringPool_count;
    int32_t stringPool_size;
    Offset stringPool_offset;
    Offset* stringPool_offsetArray; // relative to stringPool

    int32_t total_size; // size of everything

    int32_t debug;

public:

    Builder(int32_t debugLevel);
    ~Builder();

    void buildTopLevelProperties(const NameToEnumEntry* propName,
                                 int32_t propNameCount,
                                 const EnumToNameGroupEntry* propEnum,
                                 int32_t propEnumCount);

    void buildValues(const EnumToValueEntry* e2v,
                     int32_t count);

    void buildStringPool(const AliasName* propertyNames,
                         int32_t propertyNameCount,
                         const int32_t* nameGroupIndices,
                         int32_t nameGroupIndicesCount);

    void fixup();

    int8_t* createData(int32_t& length) const;

private:

    static EnumToOffset* buildEnumToOffset(const EnumToNameGroupEntry* e2ng,
                                           int32_t count,
                                           int32_t& size);
    static NonContiguousEnumToOffset*
        buildNCEnumToNameGroup(const EnumToNameGroupEntry* e2ng,
                               int32_t count,
                               int32_t& size);

    static NonContiguousEnumToOffset*
        buildNCEnumToValue(const EnumToValueEntry* e2v,
                           int32_t count,
                           int32_t& size);

    static NameToEnum* buildNameToEnum(const NameToEnumEntry* nameToEnum,
                                       int32_t count,
                                       int32_t& size);

    Offset stringIndexToOffset(int32_t index, UBool allowNeg=FALSE) const;
    void fixupNameToEnum(NameToEnum* n);
    void fixupEnumToNameGroup(EnumToOffset* e2ng);
    void fixupNCEnumToNameGroup(NonContiguousEnumToOffset* e2ng);

    void computeOffsets();
    void fixupStringPoolOffsets();
    void fixupNameGroupPoolOffsets();
    void fixupMiscellaneousOffsets();

    static int32_t align(int32_t a);
    static void erase(void* p, int32_t size);
};

Builder::Builder(int32_t debugLevel) {
    debug = debugLevel;
    enumToName = 0;
    nameToEnum = 0;
    enumToValue = 0;
    valueMap_count = 0;
    valueMap = 0;
    valueEnumToName = 0;
    valueNCEnumToName = 0;
    valueEnumToName_size = 0;
    valueEnumToName_offset = 0;
    valueNameToEnum = 0;
    valueNameToEnum_size = 0;
    valueNameToEnum_offset = 0;
    nameGroupPool = 0;
    stringPool = 0;
    stringPool_offsetArray = 0;
}

Builder::~Builder() {
    uprv_free(enumToName);
    uprv_free(nameToEnum);
    uprv_free(enumToValue);
    uprv_free(valueMap);
    for (int32_t i=0; i<valueMap_count; ++i) {
        uprv_free(valueEnumToName[i]);
        uprv_free(valueNCEnumToName[i]);
        uprv_free(valueNameToEnum[i]);
    }
    uprv_free(valueEnumToName);
    uprv_free(valueNCEnumToName);
    uprv_free(valueEnumToName_size);
    uprv_free(valueEnumToName_offset);
    uprv_free(valueNameToEnum);
    uprv_free(valueNameToEnum_size);
    uprv_free(valueNameToEnum_offset);
    uprv_free(nameGroupPool);
    uprv_free(stringPool);
    uprv_free(stringPool_offsetArray);
}

int32_t Builder::align(int32_t a) {
    U_ASSERT(a >= 0);
    int32_t k = a % sizeof(int32_t);
    if (k == 0) {
        return a;
    }
    a += sizeof(int32_t) - k;
    return a;
}

void Builder::erase(void* p, int32_t size) {
    U_ASSERT(size >= 0);
    int8_t* q = (int8_t*) p;
    while (size--) {
        *q++ = 0;
    }
}

EnumToOffset* Builder::buildEnumToOffset(const EnumToNameGroupEntry* e2ng,
                                         int32_t count,
                                         int32_t& size) {
    U_ASSERT(e2ng->isContiguous(count));
    size = align(EnumToOffset::getSize(count));
    EnumToOffset* result = (EnumToOffset*) uprv_malloc(size);
    erase(result, size);
    result->enumStart = e2ng->enumValue;
    result->enumLimit = e2ng->enumValue + count;
    Offset* p = result->getOffsetArray();
    for (int32_t i=0; i<count; ++i) {
        // set these to NGI index values
        // fix them up to NGI offset values
        U_ASSERT(IS_VALID_OFFSET(e2ng[i].nameGroupIndex));
        p[i] = (Offset) e2ng[i].nameGroupIndex; // FIXUP later
    }
    return result;
}

NonContiguousEnumToOffset*
Builder::buildNCEnumToNameGroup(const EnumToNameGroupEntry* e2ng,
                                int32_t count,
                                int32_t& size) {
    U_ASSERT(!e2ng->isContiguous(count));
    size = align(NonContiguousEnumToOffset::getSize(count));
    NonContiguousEnumToOffset* nc = (NonContiguousEnumToOffset*) uprv_malloc(size);
    erase(nc, size);
    nc->count = count;
    EnumValue* e = nc->getEnumArray();
    Offset* p = nc->getOffsetArray();
    for (int32_t i=0; i<count; ++i) {
        // set these to NGI index values
        // fix them up to NGI offset values
        e[i] = e2ng[i].enumValue;
        U_ASSERT(IS_VALID_OFFSET(e2ng[i].nameGroupIndex));
        p[i] = (Offset) e2ng[i].nameGroupIndex; // FIXUP later
    }
    return nc;
}

NonContiguousEnumToOffset*
Builder::buildNCEnumToValue(const EnumToValueEntry* e2v,
                            int32_t count,
                            int32_t& size) {
    U_ASSERT(!e2v->isContiguous(count));
    size = align(NonContiguousEnumToOffset::getSize(count));
    NonContiguousEnumToOffset* result = (NonContiguousEnumToOffset*) uprv_malloc(size);
    erase(result, size);
    result->count = count;
    EnumValue* e = result->getEnumArray();
    for (int32_t i=0; i<count; ++i) {
        e[i] = e2v[i].enumValue;
        // offset must be set later
    }
    return result;
}

/**
 * Given an index into the string pool, return an offset.  computeOffsets()
 * must have been called already.  If allowNegative is true, allow negatives
 * and preserve their sign.
 */
Offset Builder::stringIndexToOffset(int32_t index, UBool allowNegative) const {
    // Index 0 is ""; we turn this into an Offset of zero
    if (index == 0) return 0;
    if (index < 0) {
        if (allowNegative) {
            return -Builder::stringIndexToOffset(-index);
        } else {
            die("Negative string pool index");
        }
    } else {
        if (index >= stringPool_count) {
            die("String pool index too large");
        }
        Offset result = stringPool_offset + stringPool_offsetArray[index];
        U_ASSERT(result >= 0 && result < total_size);
        return result;
    }
    return 0; // never executed; make compiler happy
}

NameToEnum* Builder::buildNameToEnum(const NameToEnumEntry* nameToEnum,
                                     int32_t count,
                                     int32_t& size) {
    size = align(NameToEnum::getSize(count));
    NameToEnum* n2e = (NameToEnum*) uprv_malloc(size);
    erase(n2e, size);
    n2e->count = count;
    Offset* p = n2e->getNameArray();
    EnumValue* e = n2e->getEnumArray();
    for (int32_t i=0; i<count; ++i) {
        // set these to SP index values
        // fix them up to SP offset values
        U_ASSERT(IS_VALID_OFFSET(nameToEnum[i].nameIndex));
        p[i] = (Offset) nameToEnum[i].nameIndex; // FIXUP later
        e[i] = nameToEnum[i].enumValue;
    }
    return n2e;
}


void Builder::buildTopLevelProperties(const NameToEnumEntry* propName,
                                      int32_t propNameCount,
                                      const EnumToNameGroupEntry* propEnum,
                                      int32_t propEnumCount) {
    enumToName = buildNCEnumToNameGroup(propEnum,
                                        propEnumCount,
                                        enumToName_size);
    nameToEnum = buildNameToEnum(propName,
                                 propNameCount,
                                 nameToEnum_size);
}

void Builder::buildValues(const EnumToValueEntry* e2v,
                          int32_t count) {
    int32_t i;
    
    U_ASSERT(!e2v->isContiguous(count));

    valueMap_count = count;

    enumToValue = buildNCEnumToValue(e2v, count,
                                     enumToValue_size);

    valueMap_size = align(count * sizeof(ValueMap));
    valueMap = (ValueMap*) uprv_malloc(valueMap_size);
    erase(valueMap, valueMap_size);

    valueEnumToName = MALLOC(EnumToOffset*, count);
    valueNCEnumToName = MALLOC(NonContiguousEnumToOffset*, count);
    valueEnumToName_size = MALLOC(int32_t, count);
    valueEnumToName_offset = MALLOC(Offset, count);
    valueNameToEnum = MALLOC(NameToEnum*, count);
    valueNameToEnum_size = MALLOC(int32_t, count);
    valueNameToEnum_offset = MALLOC(Offset, count);

    for (i=0; i<count; ++i) {
        UBool isContiguous =
            e2v[i].enumToName->isContiguous(e2v[i].enumToName_count);
        valueEnumToName[i] = 0;
        valueNCEnumToName[i] = 0;
        if (isContiguous) {
            valueEnumToName[i] = buildEnumToOffset(e2v[i].enumToName,
                                                   e2v[i].enumToName_count,
                                                   valueEnumToName_size[i]);
        } else {
            valueNCEnumToName[i] = buildNCEnumToNameGroup(e2v[i].enumToName,
                                                          e2v[i].enumToName_count,
                                                          valueEnumToName_size[i]);
        }
        valueNameToEnum[i] =
            buildNameToEnum(e2v[i].nameToEnum,
                            e2v[i].nameToEnum_count,
                            valueNameToEnum_size[i]);
    }
}

void Builder::buildStringPool(const AliasName* propertyNames,
                              int32_t propertyNameCount,
                              const int32_t* nameGroupIndices,
                              int32_t nameGroupIndicesCount) {
    int32_t i;

    nameGroupPool_count = nameGroupIndicesCount;
    nameGroupPool_size = sizeof(Offset) * nameGroupPool_count;
    nameGroupPool = MALLOC(Offset, nameGroupPool_count);

    for (i=0; i<nameGroupPool_count; ++i) {
        // Some indices are negative.
        int32_t a = nameGroupIndices[i];
        if (a < 0) a = -a;
        U_ASSERT(IS_VALID_OFFSET(a));
        nameGroupPool[i] = (Offset) nameGroupIndices[i];
    }

    stringPool_count = propertyNameCount;
    stringPool_size = 0;
    // first string must be "" -- we skip it
    U_ASSERT(*propertyNames[0].str == 0);
    for (i=1 /*sic*/; i<propertyNameCount; ++i) {
        stringPool_size += (int32_t)(uprv_strlen(propertyNames[i].str) + 1);
    }
    stringPool = MALLOC(char, stringPool_size);
    stringPool_offsetArray = MALLOC(Offset, stringPool_count);
    Offset soFar = 0;
    char* p = stringPool;
    stringPool_offsetArray[0] = -1; // we don't use this entry
    for (i=1 /*sic*/; i<propertyNameCount; ++i) {
        const char* str = propertyNames[i].str;
        int32_t len = (int32_t)uprv_strlen(str);
        uprv_strcpy(p, str);
        p += len;
        *p++ = 0;
        stringPool_offsetArray[i] = soFar;
        soFar += (Offset)(len+1);
    }
    U_ASSERT(soFar == stringPool_size);
    U_ASSERT(p == (stringPool + stringPool_size));
}

// Confirm that PropertyAliases is a POD (plain old data; see C++
// std).  The following union will _fail to compile_ if
// PropertyAliases is _not_ a POD.  (Note: We used to use the offsetof
// macro to check this, but that's not quite right, so that test is
// commented out -- see below.)
typedef union {
    int32_t i;
    PropertyAliases p;
} PropertyAliasesPODTest;

void Builder::computeOffsets() {
    int32_t i;
    Offset off = sizeof(header);

    if (debug>0) {
        printf("header   \t offset=%4d  size=%5d\n", 0, off);
    }

    // PropertyAliases must have no v-table and must be
    // padded (if necessary) to the next 32-bit boundary.
    //U_ASSERT(offsetof(PropertyAliases, enumToName_offset) == 0); // see above
    U_ASSERT(sizeof(header) % sizeof(int32_t) == 0);

    #define COMPUTE_OFFSET(foo) COMPUTE_OFFSET2(foo,int32_t)

    #define COMPUTE_OFFSET2(foo,type) \
      if (debug>0)\
        printf(#foo "\t offset=%4d  size=%5d\n", off, (int)foo##_size);\
      foo##_offset = off;\
      U_ASSERT(IS_VALID_OFFSET(off + foo##_size));\
      U_ASSERT(foo##_offset % sizeof(type) == 0);\
      off = (Offset) (off + foo##_size);

    COMPUTE_OFFSET(enumToName);     // 0:
    COMPUTE_OFFSET(nameToEnum);     // 2:
    COMPUTE_OFFSET(enumToValue);    // 3:
    COMPUTE_OFFSET(valueMap);       // 4:
        
    for (i=0; i<valueMap_count; ++i) {
        if (debug>0) {
            printf(" enumToName[%d]\t offset=%4d  size=%5d\n",
                   (int)i, off, (int)valueEnumToName_size[i]);
        }

        valueEnumToName_offset[i] = off;   // 5:
        U_ASSERT(IS_VALID_OFFSET(off + valueEnumToName_size[i]));
        off = (Offset) (off + valueEnumToName_size[i]);

        if (debug>0) {
            printf(" nameToEnum[%d]\t offset=%4d  size=%5d\n",
                   (int)i, off, (int)valueNameToEnum_size[i]);
        }

        valueNameToEnum_offset[i] = off;   // 6:
        U_ASSERT(IS_VALID_OFFSET(off + valueNameToEnum_size[i]));
        off = (Offset) (off + valueNameToEnum_size[i]);
    }
    
    // These last two chunks have weaker alignment needs
    COMPUTE_OFFSET2(nameGroupPool,Offset); // 98:
    COMPUTE_OFFSET2(stringPool,char);      // 99:

    total_size = off;
    if (debug>0) printf("total                         size=%5d\n\n", (int)total_size);
    U_ASSERT(total_size <= (MAX_OFFSET+1));
}

void Builder::fixupNameToEnum(NameToEnum* n) {
    // Fix the string pool offsets in n
    Offset* p = n->getNameArray();
    for (int32_t i=0; i<n->count; ++i) {
        p[i] = stringIndexToOffset(p[i]);
    }
}

void Builder::fixupStringPoolOffsets() {
    int32_t i;
    
    // 2:
    fixupNameToEnum(nameToEnum);

    // 6:
    for (i=0; i<valueMap_count; ++i) {
        fixupNameToEnum(valueNameToEnum[i]);
    }

    // 98:
    for (i=0; i<nameGroupPool_count; ++i) {
        nameGroupPool[i] = stringIndexToOffset(nameGroupPool[i], TRUE);
    }
}

void Builder::fixupEnumToNameGroup(EnumToOffset* e2ng) {
    EnumValue i;
    int32_t j;
    Offset* p = e2ng->getOffsetArray();
    for (i=e2ng->enumStart, j=0; i<e2ng->enumLimit; ++i, ++j) {
        p[j] = nameGroupPool_offset + sizeof(Offset) * p[j];
    }
}

void Builder::fixupNCEnumToNameGroup(NonContiguousEnumToOffset* e2ng) {
    int32_t i;
    /*EnumValue* e = e2ng->getEnumArray();*/
    Offset* p = e2ng->getOffsetArray();
    for (i=0; i<e2ng->count; ++i) {
        p[i] = nameGroupPool_offset + sizeof(Offset) * p[i];
    }    
}

void Builder::fixupNameGroupPoolOffsets() {
    int32_t i;

    // 0:
    fixupNCEnumToNameGroup(enumToName);

    // 1: (deleted)

    // 5:
    for (i=0; i<valueMap_count; ++i) {
        // 5a:
        if (valueEnumToName[i] != 0) {
            fixupEnumToNameGroup(valueEnumToName[i]);
        }
        // 5b:
        if (valueNCEnumToName[i] != 0) {
            fixupNCEnumToNameGroup(valueNCEnumToName[i]);
        }
    }
}

void Builder::fixupMiscellaneousOffsets() {
    int32_t i;

    // header:
    erase(&header, sizeof(header));
    header.enumToName_offset = enumToName_offset;
    header.nameToEnum_offset = nameToEnum_offset;
    header.enumToValue_offset = enumToValue_offset;
    // header meta-info used by Java:
    U_ASSERT(total_size > 0 && total_size < 0x7FFF);
    header.total_size = (int16_t) total_size;
    header.valueMap_offset = valueMap_offset;
    header.valueMap_count = (int16_t) valueMap_count;
    header.nameGroupPool_offset = nameGroupPool_offset;
    header.nameGroupPool_count = (int16_t) nameGroupPool_count;
    header.stringPool_offset = stringPool_offset;
    header.stringPool_count = (int16_t) stringPool_count - 1; // don't include "" entry

    U_ASSERT(valueMap_count <= 0x7FFF);
    U_ASSERT(nameGroupPool_count <= 0x7FFF);
    U_ASSERT(stringPool_count <= 0x7FFF);

    // 3:
    Offset* p = enumToValue->getOffsetArray();
    /*EnumValue* e = enumToValue->getEnumArray();*/
    U_ASSERT(valueMap_count == enumToValue->count);
    for (i=0; i<valueMap_count; ++i) {
        p[i] = (Offset)(valueMap_offset + sizeof(ValueMap) * i);
    }

    // 4:
    for (i=0; i<valueMap_count; ++i) {
        ValueMap& v = valueMap[i];
        v.enumToName_offset = v.ncEnumToName_offset = 0;
        if (valueEnumToName[i] != 0) {
            v.enumToName_offset = valueEnumToName_offset[i];
        }
        if (valueNCEnumToName[i] != 0) {
            v.ncEnumToName_offset = valueEnumToName_offset[i];
        }
        v.nameToEnum_offset = valueNameToEnum_offset[i];
    }
}

void Builder::fixup() {
    computeOffsets();
    fixupStringPoolOffsets();
    fixupNameGroupPoolOffsets();
    fixupMiscellaneousOffsets();
}

int8_t* Builder::createData(int32_t& length) const {
    length = total_size;
    int8_t* result = MALLOC(int8_t, length);
    
    int8_t* p = result;
    int8_t* limit = result + length;
    
    #define APPEND2(x, size)   \
      U_ASSERT((p+size)<=limit); \
      uprv_memcpy(p, x, size); \
      p += size

    #define APPEND(x) APPEND2(x, x##_size)

    APPEND2(&header, sizeof(header));
    APPEND(enumToName);
    APPEND(nameToEnum);
    APPEND(enumToValue);
    APPEND(valueMap);
 
    for (int32_t i=0; i<valueMap_count; ++i) {
        U_ASSERT((valueEnumToName[i] != 0 && valueNCEnumToName[i] == 0) ||
               (valueEnumToName[i] == 0 && valueNCEnumToName[i] != 0));
        if (valueEnumToName[i] != 0) {
            APPEND2(valueEnumToName[i], valueEnumToName_size[i]);
        }
        if (valueNCEnumToName[i] != 0) {
            APPEND2(valueNCEnumToName[i], valueEnumToName_size[i]);
        }
        APPEND2(valueNameToEnum[i], valueNameToEnum_size[i]);
    }

    APPEND(nameGroupPool);
    APPEND(stringPool);

    if (p != limit) {
        fprintf(stderr, "p != limit; p = %p, limit = %p", p, limit);
        exit(1);
    }
    return result;
}

// END Builder
//----------------------------------------------------------------------

/* UDataInfo cf. udata.h */
static UDataInfo dataInfo = {
    sizeof(UDataInfo),
    0,

    U_IS_BIG_ENDIAN,
    U_CHARSET_FAMILY,
    sizeof(UChar),
    0,

    {PNAME_SIG_0, PNAME_SIG_1, PNAME_SIG_2, PNAME_SIG_3},
    {PNAME_FORMAT_VERSION, 0, 0, 0},                 /* formatVersion */
    {VERSION_0, VERSION_1, VERSION_2, VERSION_3} /* Unicode version */
};

class genpname {

    // command-line options
    UBool useCopyright;
    UBool verbose;
    int32_t debug;

public:
    int      MMain(int argc, char *argv[]);

private:
    NameToEnumEntry* createNameIndex(const AliasList& list,
                                     int32_t& nameIndexCount);

    EnumToNameGroupEntry* createEnumIndex(const AliasList& list);

    int32_t  writeDataFile(const char *destdir, const Builder&);
};

int main(int argc, char *argv[]) {
    UErrorCode status = U_ZERO_ERROR;
    u_init(&status);
    if (U_FAILURE(status) && status != U_FILE_ACCESS_ERROR) {
        // Note: u_init() will try to open ICU property data.
        //       failures here are expected when building ICU from scratch.
        //       ignore them.
        fprintf(stderr, "genpname: can not initialize ICU.  Status = %s\n",
            u_errorName(status));
        exit(1);
    }

    genpname app;
    U_MAIN_INIT_ARGS(argc, argv);
    int retVal = app.MMain(argc, argv);
    u_cleanup();
    return retVal;
}

static UOption options[]={
    UOPTION_HELP_H,
    UOPTION_HELP_QUESTION_MARK,
    UOPTION_COPYRIGHT,
    UOPTION_DESTDIR,
    UOPTION_VERBOSE,
    UOPTION_DEF("debug", 'D', UOPT_REQUIRES_ARG),
};

NameToEnumEntry* genpname::createNameIndex(const AliasList& list,
                                           int32_t& nameIndexCount) {

    // Build name => enum map

    // This is an n->1 map.  There are typically multiple names
    // mapping to one enum.  The name index is sorted in order of the name,
    // as defined by the uprv_compareAliasNames() function.

    int32_t i, j;
    int32_t count = list.count();
    
    // compute upper limit on number of names in the index
    int32_t nameIndexCapacity = count * MAX_NAMES_PER_GROUP;
    NameToEnumEntry* nameIndex = MALLOC(NameToEnumEntry, nameIndexCapacity);

    nameIndexCount = 0;
    int32_t names[MAX_NAMES_PER_GROUP];
    for (i=0; i<count; ++i) {
        const Alias& p = list[i];
        int32_t n = p.getUniqueNames(names);
        for (j=0; j<n; ++j) {
            U_ASSERT(nameIndexCount < nameIndexCapacity);
            nameIndex[nameIndexCount++] =
                NameToEnumEntry(names[j], p.enumValue);
        }
    }

    /*
     * use a stable sort to ensure consistent results between
     * genpname.cpp and the propname.cpp swapping code
     */
    UErrorCode errorCode = U_ZERO_ERROR;
    uprv_sortArray(nameIndex, nameIndexCount, sizeof(nameIndex[0]),
                   compareNameToEnumEntry, NULL, TRUE, &errorCode);
    if (debug>1) {
        printf("Alias names: %d\n", (int)nameIndexCount);
        for (i=0; i<nameIndexCount; ++i) {
            printf("%s => %d\n",
                   STRING_TABLE[nameIndex[i].nameIndex].str,
                   (int)nameIndex[i].enumValue);
        }
        printf("\n");
    }
    // make sure there are no duplicates.  for a sorted list we need
    // only compare adjacent items.  Alias.getUniqueNames() has
    // already eliminated duplicate names for a single property, which
    // does occur, so we're checking for duplicate names between two
    // properties, which should never occur.
    UBool ok = TRUE;
    for (i=1; i<nameIndexCount; ++i) {
        if (STRING_TABLE[nameIndex[i-1].nameIndex] ==
            STRING_TABLE[nameIndex[i].nameIndex]) {
            printf("Error: Duplicate names in property list: \"%s\", \"%s\"\n",
                   STRING_TABLE[nameIndex[i-1].nameIndex].str,
                   STRING_TABLE[nameIndex[i].nameIndex].str);
            ok = FALSE;
        }
    }
    if (!ok) {
        die("Two or more duplicate names in property list");
    }

    return nameIndex;
}

EnumToNameGroupEntry* genpname::createEnumIndex(const AliasList& list) {

    // Build the enum => name map

    // This is a 1->n map.  Each enum maps to 1 or more names.  To
    // accomplish this the index entry points to an element of the
    // NAME_GROUP array.  This is the short name (which may be empty).
    // From there, subsequent elements of NAME_GROUP are alternate
    // names for this enum, up to and including the first one that is
    // negative (negate for actual index).

    int32_t i, j, k;
    int32_t count = list.count();
    
    EnumToNameGroupEntry* enumIndex = MALLOC(EnumToNameGroupEntry, count);
    for (i=0; i<count; ++i) {
        const Alias& p = list[i];
        enumIndex[i] = EnumToNameGroupEntry(p.enumValue, p.nameGroupIndex);
    }

    UErrorCode errorCode = U_ZERO_ERROR;
    uprv_sortArray(enumIndex, count, sizeof(enumIndex[0]),
                   compareEnumToNameGroupEntry, NULL, FALSE, &errorCode);
    if (debug>1) {
        printf("Property enums: %d\n", (int)count);
        for (i=0; i<count; ++i) {
            printf("%d => %d: ",
                   (int)enumIndex[i].enumValue,
                   (int)enumIndex[i].nameGroupIndex);
            UBool done = FALSE;
            for (j=enumIndex[i].nameGroupIndex; !done; ++j) {
                k = NAME_GROUP[j];
                if (k < 0) {
                    k = -k;
                    done = TRUE;
                }
                printf("\"%s\"", STRING_TABLE[k].str);
                if (!done) printf(", ");
            }
            printf("\n");
        }
        printf("\n");
    }
    return enumIndex;
}

int genpname::MMain(int argc, char* argv[])
{
    int32_t i, j;
    UErrorCode status = U_ZERO_ERROR;

    u_init(&status);
    if (U_FAILURE(status) && status != U_FILE_ACCESS_ERROR) {
        fprintf(stderr, "Error: u_init returned %s\n", u_errorName(status));
        status = U_ZERO_ERROR;
    }


    /* preset then read command line options */
    options[3].value=u_getDataDirectory();
    argc=u_parseArgs(argc, argv, sizeof(options)/sizeof(options[0]), options);

    /* error handling, printing usage message */
    if (argc<0) {
        fprintf(stderr,
            "error in command line argument \"%s\"\n",
            argv[-argc]);
    }

    debug = options[5].doesOccur ? (*options[5].value - '0') : 0;

    if (argc!=1 || options[0].doesOccur || options[1].doesOccur ||
       debug < 0 || debug > 9) {
        fprintf(stderr,
            "usage: %s [-options]\n"
            "\tcreate " PNAME_DATA_NAME "." PNAME_DATA_TYPE "\n"
            "options:\n"
            "\t-h or -? or --help  this usage text\n"
            "\t-v or --verbose     turn on verbose output\n"
            "\t-c or --copyright   include a copyright notice\n"
            "\t-d or --destdir     destination directory, followed by the path\n"
            "\t-D or --debug 0..9  emit debugging messages (if > 0)\n",
            argv[0]);
        return argc<0 ? U_ILLEGAL_ARGUMENT_ERROR : U_ZERO_ERROR;
    }

    /* get the options values */
    useCopyright=options[2].doesOccur;
    verbose = options[4].doesOccur;

    // ------------------------------------------------------------
    // Do not sort the string table, instead keep it in data.h order.
    // This simplifies data swapping and testing thereof because the string
    // table itself need not be sorted during swapping.
    // The NameToEnum sorter sorts each such map's string offsets instead.

    if (debug>1) {
        printf("String pool: %d\n", (int)STRING_COUNT);
        for (i=0; i<STRING_COUNT; ++i) {
            if (i != 0) {
                printf(", ");
            }
            printf("%s (%d)", STRING_TABLE[i].str, (int)STRING_TABLE[i].index);
        }
        printf("\n\n");
    }

    // ------------------------------------------------------------
    // Create top-level property indices

    PropertyArrayList props(PROPERTY, PROPERTY_COUNT);
    int32_t propNameCount;
    NameToEnumEntry* propName = createNameIndex(props, propNameCount);
    EnumToNameGroupEntry* propEnum = createEnumIndex(props);

    // ------------------------------------------------------------
    // Create indices for the value list for each enumerated property

    // This will have more entries than we need...
    EnumToValueEntry* enumToValue = MALLOC(EnumToValueEntry, PROPERTY_COUNT);
    int32_t enumToValue_count = 0;
    for (i=0, j=0; i<PROPERTY_COUNT; ++i) {
        if (PROPERTY[i].valueCount == 0) continue;
        AliasArrayList values(PROPERTY[i].valueList,
                              PROPERTY[i].valueCount);
        enumToValue[j].enumValue = PROPERTY[i].enumValue;
        enumToValue[j].enumToName = createEnumIndex(values);
        enumToValue[j].enumToName_count = PROPERTY[i].valueCount;
        enumToValue[j].nameToEnum = createNameIndex(values,
                                                    enumToValue[j].nameToEnum_count);
        ++j;
    }
    enumToValue_count = j;

    uprv_sortArray(enumToValue, enumToValue_count, sizeof(enumToValue[0]),
                   compareEnumToValueEntry, NULL, FALSE, &status);

    // ------------------------------------------------------------
    // Build PropertyAliases layout in memory

    Builder builder(debug);

    builder.buildTopLevelProperties(propName,
                                    propNameCount,
                                    propEnum,
                                    PROPERTY_COUNT);
    
    builder.buildValues(enumToValue,
                        enumToValue_count);

    builder.buildStringPool(STRING_TABLE,
                            STRING_COUNT,
                            NAME_GROUP,
                            NAME_GROUP_COUNT);

    builder.fixup();

    ////////////////////////////////////////////////////////////
    // Write the output file
    ////////////////////////////////////////////////////////////
    int32_t wlen = writeDataFile(options[3].value, builder);
    if (verbose) {
        fprintf(stdout, "Output file: %s.%s, %ld bytes\n",
            U_ICUDATA_NAME "_" PNAME_DATA_NAME, PNAME_DATA_TYPE, (long)wlen);
    }

    return 0; // success
}

int32_t genpname::writeDataFile(const char *destdir, const Builder& builder) {
    int32_t length;
    int8_t* data = builder.createData(length);

    UNewDataMemory *pdata;
    UErrorCode status = U_ZERO_ERROR;

    pdata = udata_create(destdir, PNAME_DATA_TYPE, PNAME_DATA_NAME, &dataInfo,
                         useCopyright ? U_COPYRIGHT_STRING : 0, &status);
    if (U_FAILURE(status)) {
        die("Unable to create data memory");
    }

    udata_writeBlock(pdata, data, length);

    int32_t dataLength = (int32_t) udata_finish(pdata, &status);
    if (U_FAILURE(status)) {
        die("Error writing output file");
    }
    if (dataLength != length) {
        die("Written file doesn't match expected size");
    }

    return dataLength;
}

//eof