rbbidata.h   [plain text]


//  file:  rbbidata.h
//
//**********************************************************************
//   Copyright (C) 1999 IBM Corp. All rights reserved.
//**********************************************************************
//
//   RBBI data formats  Includes
//
//                          Structs that describes the format of the Binary RBBI data,
//                          as it is stored in ICU's data file.
//
//      RBBIDataWrapper  -  Instances of this class sit between the
//                          raw data structs and the RulesBasedBreakIterator objects
//                          that are created by applications.  The wrapper class
//                          provides reference counting for the underlying data,
//                          and direct pointers to data that would not otherwise
//                          be accessible without ugly pointer arithmetic.  The
//                          wrapper does not attempt to provide any higher level
//                          abstractions for the data itself.
//
//                          There will be only one instance of RBBIDataWrapper for any
//                          set of RBBI run time data being shared by instances
//                          (clones) of RulesBasedBreakIterator.
//

#ifndef __RBBIDATA_H__
#define __RBBIDATA_H__

#include "unicode/utypes.h"
#include "unicode/uobject.h"
#include "unicode/unistr.h"
#include "unicode/udata.h"
#include "utrie.h"


U_NAMESPACE_BEGIN

//
//  The following structs map exactly onto the raw data from ICU common data file.
//
struct RBBIDataHeader {
    uint32_t         fMagic;       // == 0xbla0
    uint32_t         fVersion;     // == 1
    uint32_t         fLength;      // Total length in bytes of this RBBI Data,
                                   //     including all sections, not just the header.
    uint32_t         fCatCount;    // Number of character categories.

    //
    // Offsets and sizes of each of the subsections within the RBBI data.
    // All offsets are bytes from the start of the RBBIDataHeader.
    // All sizes are in bytes.
    //
    uint32_t         fFTable;      // forward state transition table.
    uint32_t         fFTableLen;
    uint32_t         fRTable;      // Offset to the reverse state transition table.
    uint32_t         fRTableLen;
    uint32_t         fTrie;        // Offset to Trie data for character categories
    uint32_t         fTrieLen;
    uint32_t         fRuleSource;  // Offset to the source for for the break
    uint32_t         fRuleSourceLen;  //   rules.  Stored UChar *.

    uint32_t         fReserved[8]; // Reserved for expansion

};



struct  RBBIStateTableRow {
    int16_t          fAccepting;    // Non-zero if this row is for an accepting state.
                                    // Value is the {nnn} value to return to calling
                                    //    application.
    int16_t          fLookAhead;    // Non-zero if this row is for a state that
                                    //   corresponds to a '/' in the rule source.
                                    //   Value is the same as the fAccepting
                                    //     value for the rule (which will appear
                                    //     in a different state.
    int16_t          fTag;          // Non-zero if this row covers a {tagged} position
                                    //    from a rule.  value is the tag number.
    int16_t          fReserved;
    uint16_t         fNextState[2]; // Next State, indexed by char category.
                                    //   Array Size is fNumCols from the
                                    //   state table header.
                                    //   CAUTION:  see RBBITableBuilder::getTableSize()
                                    //             before changing anything here.
};


struct RBBIStateTable {
    uint32_t         fNumStates;    // Number of states.
    uint32_t         fRowLen;       // Length of a state table row, in bytes.
    char             fTableData[4]; // First RBBIStateTableRow begins here.
                                    //   (making it char[] simplifies ugly address
                                    //    arithmetic for indexing variable length rows.)
};


//
//  The reference counting wrapper class
//
class RBBIDataWrapper : public UMemory {
public:
    RBBIDataWrapper(const RBBIDataHeader *data, UErrorCode &status);
    RBBIDataWrapper(UDataMemory* udm, UErrorCode &status);
    ~RBBIDataWrapper();

    void                  init(const RBBIDataHeader *data, UErrorCode &status);
    RBBIDataWrapper      *addReference();
    void                  removeReference();
    UBool                 operator ==(const RBBIDataWrapper &other) const;
    int32_t               hashCode();
    const UnicodeString  &getRuleSourceString();
    void                  printData();

    //
    //  Pointers to items within the data
    //
    const RBBIDataHeader     *fHeader;
    const RBBIStateTable     *fForwardTable;
    const RBBIStateTable     *fReverseTable;
    const UChar              *fRuleSource;

    UTrie               fTrie;

private:
    int32_t             fRefCount;
    UDataMemory        *fUDataMem;
    UnicodeString       fRuleString;

    RBBIDataWrapper(const RBBIDataWrapper &other); // forbid copying of this class
    RBBIDataWrapper &operator=(const RBBIDataWrapper &other); // forbid copying of this class
};

U_NAMESPACE_END

#endif