/** * Build options for BytesTrieBuilder and CharsTrieBuilder. * @stable ICU 4.8
*/ enum UStringTrieBuildOption { /** * Builds a trie quickly. * @stable ICU 4.8
*/
USTRINGTRIE_BUILD_FAST, /** * Builds a trie more slowly, attempting to generate * a shorter but equivalent serialization. * This build option also uses more memory. * * This option can be effective when many integer values are the same * and string/byte sequence suffixes can be shared. * Runtime speed is not expected to improve. * @stable ICU 4.8
*/
USTRINGTRIE_BUILD_SMALL
};
U_NAMESPACE_BEGIN
/** * Base class for string trie builder classes. * * This class is not intended for public subclassing. * @stable ICU 4.8
*/ class U_COMMON_API StringTrieBuilder : public UObject { public: #ifndef U_HIDE_INTERNAL_API /** @internal */ static int32_t hashNode(constvoid *node); /** @internal */ static UBool equalNodes(constvoid *left, constvoid *right); #endif/* U_HIDE_INTERNAL_API */
protected: // Do not enclose the protected default constructor with #ifndef U_HIDE_INTERNAL_API // or else the compiler will create a public default constructor. /** @internal */
StringTrieBuilder(); /** @internal */ virtual ~StringTrieBuilder();
// Finds the first unit index after this one where // the first and last element have different units again. /** @internal */ virtual int32_t getLimitOfLinearMatch(int32_t first, int32_t last, int32_t unitIndex) const = 0;
// Number of different units at unitIndex. /** @internal */ virtual int32_t countElementUnits(int32_t start, int32_t limit, int32_t unitIndex) const= 0; /** @internal */ virtual int32_t skipElementsBySomeUnits(int32_t i, int32_t unitIndex, int32_t count) const = 0; /** @internal */ virtual int32_t indexOfElementWithNextUnit(int32_t i, int32_t unitIndex, char16_t unit) const = 0;
// Maximum number of nested split-branch levels for a branch on all 2^16 possible char16_t units. // log2(2^16/kMaxBranchLinearSubNodeLength) rounded up. /** @internal */ staticconst int32_t kMaxSplitBranchLevels=14;
/** * Makes sure that there is only one unique node registered that is * equivalent to newNode. * @param newNode Input node. The builder takes ownership. * @param errorCode ICU in/out UErrorCode. Set to U_MEMORY_ALLOCATION_ERROR if it was success but newNode==nullptr. * @return newNode if it is the first of its kind, or * an equivalent node if newNode is a duplicate. * @internal
*/
Node *registerNode(Node *newNode, UErrorCode &errorCode); /** * Makes sure that there is only one unique FinalValueNode registered * with this value. * Avoids creating a node if the value is a duplicate. * @param value A final value. * @param errorCode ICU in/out UErrorCode. Set to U_MEMORY_ALLOCATION_ERROR if it was success but newNode==nullptr. * @return A FinalValueNode with the given value. * @internal
*/
Node *registerFinalValue(int32_t value, UErrorCode &errorCode); #endif/* U_HIDE_INTERNAL_API */
/* * C++ note: * registerNode() and registerFinalValue() take ownership of their input nodes, * and only return owned nodes. * If they see a failure UErrorCode, they will delete the input node. * If they get a nullptr pointer, they will record a U_MEMORY_ALLOCATION_ERROR. * If there is a failure, they return nullptr. * * nullptr Node pointers can be safely passed into other Nodes because * they call the static Node::hashCode() which checks for a nullptr pointer first. * * Therefore, as long as builder functions register a new node, * they need to check for failures only before explicitly dereferencing * a Node pointer, or before setting a new UErrorCode.
*/
// Hash set of nodes, maps from nodes to integer 1. /** @internal */
UHashtable *nodes;
// Do not conditionalize the following with #ifndef U_HIDE_INTERNAL_API, // it is needed for layout of other objects. /** * @internal * \cond
*/ class Node : public UObject { public:
Node(int32_t initialHash) : hash(initialHash), offset(0) {} inline int32_t hashCode() const { return hash; } // Handles node==nullptr. staticinline int32_t hashCode(const Node *node) { return node==nullptr ? 0 : node->hashCode(); } // Base class operator==() compares the actual class types. virtualbooloperator==(const Node &other) const; inlinebooloperator!=(const Node &other) const { return !operator==(other); } /** * Traverses the Node graph and numbers branch edges, with rightmost edges first. * This is to avoid writing a duplicate node twice. * * Branch nodes in this trie data structure are not symmetric. * Most branch edges "jump" to other nodes but the rightmost branch edges * just continue without a jump. * Therefore, write() must write the rightmost branch edge last * (trie units are written backwards), and must write it at that point even if * it is a duplicate of a node previously written elsewhere. * * This function visits and marks right branch edges first. * Edges are numbered with increasingly negative values because we share the * offset field which gets positive values when nodes are written. * A branch edge also remembers the first number for any of its edges. * * When a further-left branch edge has a number in the range of the rightmost * edge's numbers, then it will be written as part of the required right edge * and we can avoid writing it first. * * After root.markRightEdgesFirst(-1) the offsets of all nodes are negative * edge numbers. * * @param edgeNumber The first edge number for this node and its sub-nodes. * @return An edge number that is at least the maximum-negative * of the input edge number and the numbers of this node and all of its sub-nodes.
*/ virtual int32_t markRightEdgesFirst(int32_t edgeNumber); // write() must set the offset to a positive value. virtualvoid write(StringTrieBuilder &builder) = 0; // See markRightEdgesFirst. inlinevoid writeUnlessInsideRightEdge(int32_t firstRight, int32_t lastRight,
StringTrieBuilder &builder) { // Note: Edge numbers are negative, lastRight<=firstRight. // If offset>0 then this node and its sub-nodes have been written already // and we need not write them again. // If this node is part of the unwritten right branch edge, // then we wait until that is written. if(offset<0 && (offset<lastRight || firstRight<offset)) {
write(builder);
}
} inline int32_t getOffset() const { return offset; } protected:
int32_t hash;
int32_t offset;
};
#ifndef U_HIDE_INTERNAL_API // This class should not be overridden because // registerFinalValue() compares a stack-allocated FinalValueNode // (stack-allocated so that we don't unnecessarily create lots of duplicate nodes) // with the input node, and the // !Node::operator==(other) used inside FinalValueNode::operator==(other) // will be false if the typeid's are different. /** @internal */ class FinalValueNode : public Node { public:
FinalValueNode(int32_t v) : Node(0x111111u*37u+v), value(v) {} virtualbooloperator==(const Node &other) const override; virtualvoid write(StringTrieBuilder &builder) override; protected:
int32_t value;
}; #endif/* U_HIDE_INTERNAL_API */
// Do not conditionalize the following with #ifndef U_HIDE_INTERNAL_API, // it is needed for layout of other objects. /** * @internal
*/ class ValueNode : public Node { public:
ValueNode(int32_t initialHash) : Node(initialHash), hasValue(false), value(0) {} virtualbooloperator==(const Node &other) const override; void setValue(int32_t v) {
hasValue=true;
value=v;
hash=hash*37u+v;
} protected:
UBool hasValue;
int32_t value;
};
// Do not conditionalize the following with #ifndef U_HIDE_INTERNAL_API, // it is needed for layout of other objects. /** * @internal
*/ class LinearMatchNode : public ValueNode { public:
LinearMatchNode(int32_t len, Node *nextNode)
: ValueNode((0x333333u*37u+len)*37u+hashCode(nextNode)),
length(len), next(nextNode) {} virtualbooloperator==(const Node &other) const override; virtual int32_t markRightEdgesFirst(int32_t edgeNumber) override; protected:
int32_t length;
Node *next;
};
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.