// Requires start<limit, // and all strings of the [start..limit[ elements must be sorted and // have a common prefix of length unitIndex.
int32_t
StringTrieBuilder::writeNode(int32_t start, int32_t limit, int32_t unitIndex) {
UBool hasValue=false;
int32_t value=0;
int32_t type; if(unitIndex==getElementStringLength(start)) { // An intermediate or final value.
value=getElementValue(start++); if(start==limit) { return writeValueAndFinal(value, true); // final-value node
}
hasValue=true;
} // Now all [start..limit[ strings are longer than unitIndex.
int32_t minUnit=getElementUnit(start, unitIndex);
int32_t maxUnit=getElementUnit(limit-1, unitIndex); if(minUnit==maxUnit) { // Linear-match node: All strings have the same character at unitIndex.
int32_t lastUnitIndex=getLimitOfLinearMatch(start, limit-1, unitIndex);
writeNode(start, limit, lastUnitIndex); // Break the linear-match sequence into chunks of at most kMaxLinearMatchLength.
int32_t length=lastUnitIndex-unitIndex;
int32_t maxLinearMatchLength=getMaxLinearMatchLength(); while(length>maxLinearMatchLength) {
lastUnitIndex-=maxLinearMatchLength;
length-=maxLinearMatchLength;
writeElementUnits(start, lastUnitIndex, maxLinearMatchLength);
write(getMinLinearMatch()+maxLinearMatchLength-1);
}
writeElementUnits(start, unitIndex, length);
type=getMinLinearMatch()+length-1;
} else { // Branch node.
int32_t length=countElementUnits(start, limit, unitIndex); // length>=2 because minUnit!=maxUnit.
writeBranchSubNode(start, limit, unitIndex, length); if(--length<getMinLinearMatch()) {
type=length;
} else {
write(length);
type=0;
}
} return writeValueAndType(hasValue, value, type);
}
// start<limit && all strings longer than unitIndex && // length different units at unitIndex
int32_t
StringTrieBuilder::writeBranchSubNode(int32_t start, int32_t limit, int32_t unitIndex, int32_t length) {
char16_t middleUnits[kMaxSplitBranchLevels];
int32_t lessThan[kMaxSplitBranchLevels];
int32_t ltLength=0; while(length>getMaxBranchLinearSubNodeLength()) { // Branch on the middle unit. // First, find the middle unit.
int32_t i=skipElementsBySomeUnits(start, unitIndex, length/2); // Encode the less-than branch first.
middleUnits[ltLength]=getElementUnit(i, unitIndex); // middle unit
lessThan[ltLength]=writeBranchSubNode(start, i, unitIndex, length/2);
++ltLength; // Continue for the greater-or-equal branch.
start=i;
length=length-length/2;
} // For each unit, find its elements array start and whether it has a final value.
int32_t starts[kMaxBranchLinearSubNodeLength];
UBool isFinal[kMaxBranchLinearSubNodeLength-1];
int32_t unitNumber=0; do {
int32_t i=starts[unitNumber]=start;
char16_t unit=getElementUnit(i++, unitIndex);
i=indexOfElementWithNextUnit(i, unitIndex, unit);
isFinal[unitNumber]= start==i-1 && unitIndex+1==getElementStringLength(start);
start=i;
} while(++unitNumber<length-1); // unitNumber==length-1, and the maxUnit elements range is [start..limit[
starts[unitNumber]=start;
// Write the sub-nodes in reverse order: The jump lengths are deltas from // after their own positions, so if we wrote the minUnit sub-node first, // then its jump delta would be larger. // Instead we write the minUnit sub-node last, for a shorter delta.
int32_t jumpTargets[kMaxBranchLinearSubNodeLength-1]; do {
--unitNumber; if(!isFinal[unitNumber]) {
jumpTargets[unitNumber]=writeNode(starts[unitNumber], starts[unitNumber+1], unitIndex+1);
}
} while(unitNumber>0); // The maxUnit sub-node is written as the very last one because we do // not jump for it at all.
unitNumber=length-1;
writeNode(start, limit, unitIndex+1);
int32_t offset=write(getElementUnit(start, unitIndex)); // Write the rest of this node's unit-value pairs. while(--unitNumber>=0) {
start=starts[unitNumber];
int32_t value; if(isFinal[unitNumber]) { // Write the final value for the one string ending with this unit.
value=getElementValue(start);
} else { // Write the delta to the start position of the sub-node.
value=offset-jumpTargets[unitNumber];
}
writeValueAndFinal(value, isFinal[unitNumber]);
offset=write(getElementUnit(start, unitIndex));
} // Write the split-branch nodes. while(ltLength>0) {
--ltLength;
writeDeltaTo(lessThan[ltLength]);
offset=write(middleUnits[ltLength]);
} return offset;
}
// Requires start<limit, // and all strings of the [start..limit[ elements must be sorted and // have a common prefix of length unitIndex.
StringTrieBuilder::Node *
StringTrieBuilder::makeNode(int32_t start, int32_t limit, int32_t unitIndex, UErrorCode &errorCode) { if(U_FAILURE(errorCode)) { return nullptr;
}
UBool hasValue=false;
int32_t value=0; if(unitIndex==getElementStringLength(start)) { // An intermediate or final value.
value=getElementValue(start++); if(start==limit) { return registerFinalValue(value, errorCode);
}
hasValue=true;
}
Node *node; // Now all [start..limit[ strings are longer than unitIndex.
int32_t minUnit=getElementUnit(start, unitIndex);
int32_t maxUnit=getElementUnit(limit-1, unitIndex); if(minUnit==maxUnit) { // Linear-match node: All strings have the same character at unitIndex.
int32_t lastUnitIndex=getLimitOfLinearMatch(start, limit-1, unitIndex);
Node *nextNode=makeNode(start, limit, lastUnitIndex, errorCode); // Break the linear-match sequence into chunks of at most kMaxLinearMatchLength.
int32_t length=lastUnitIndex-unitIndex;
int32_t maxLinearMatchLength=getMaxLinearMatchLength(); while(length>maxLinearMatchLength) {
lastUnitIndex-=maxLinearMatchLength;
length-=maxLinearMatchLength;
node=createLinearMatchNode(start, lastUnitIndex, maxLinearMatchLength, nextNode);
nextNode=registerNode(node, errorCode);
}
node=createLinearMatchNode(start, unitIndex, length, nextNode);
} else { // Branch node.
int32_t length=countElementUnits(start, limit, unitIndex); // length>=2 because minUnit!=maxUnit.
Node *subNode=makeBranchSubNode(start, limit, unitIndex, length, errorCode);
node=new BranchHeadNode(length, subNode);
} if(hasValue && node!=nullptr) { if(matchNodesCanHaveValues()) {
((ValueNode *)node)->setValue(value);
} else {
node=new IntermediateValueNode(value, registerNode(node, errorCode));
}
} return registerNode(node, errorCode);
}
// start<limit && all strings longer than unitIndex && // length different units at unitIndex
StringTrieBuilder::Node *
StringTrieBuilder::makeBranchSubNode(int32_t start, int32_t limit, int32_t unitIndex,
int32_t length, UErrorCode &errorCode) { if(U_FAILURE(errorCode)) { return nullptr;
}
char16_t middleUnits[kMaxSplitBranchLevels];
Node *lessThan[kMaxSplitBranchLevels];
int32_t ltLength=0; while(length>getMaxBranchLinearSubNodeLength()) { // Branch on the middle unit. // First, find the middle unit.
int32_t i=skipElementsBySomeUnits(start, unitIndex, length/2); // Create the less-than branch.
middleUnits[ltLength]=getElementUnit(i, unitIndex); // middle unit
lessThan[ltLength]=makeBranchSubNode(start, i, unitIndex, length/2, errorCode);
++ltLength; // Continue for the greater-or-equal branch.
start=i;
length=length-length/2;
} if(U_FAILURE(errorCode)) { return nullptr;
}
ListBranchNode *listNode=new ListBranchNode(); if(listNode==nullptr) {
errorCode=U_MEMORY_ALLOCATION_ERROR; return nullptr;
} // For each unit, find its elements array start and whether it has a final value.
int32_t unitNumber=0; do {
int32_t i=start;
char16_t unit=getElementUnit(i++, unitIndex);
i=indexOfElementWithNextUnit(i, unitIndex, unit); if(start==i-1 && unitIndex+1==getElementStringLength(start)) {
listNode->add(unit, getElementValue(start));
} else {
listNode->add(unit, makeNode(start, i, unitIndex+1, errorCode));
}
start=i;
} while(++unitNumber<length-1); // unitNumber==length-1, and the maxUnit elements range is [start..limit[
char16_t unit=getElementUnit(start, unitIndex); if(start==limit-1 && unitIndex+1==getElementStringLength(start)) {
listNode->add(unit, getElementValue(start));
} else {
listNode->add(unit, makeNode(start, limit, unitIndex+1, errorCode));
}
Node *node=registerNode(listNode, errorCode); // Create the split-branch nodes. while(ltLength>0) {
--ltLength;
node=registerNode( new SplitBranchNode(middleUnits[ltLength], lessThan[ltLength], node), errorCode);
} return node;
}
StringTrieBuilder::Node *
StringTrieBuilder::registerNode(Node *newNode, UErrorCode &errorCode) { if(U_FAILURE(errorCode)) { delete newNode; return nullptr;
} if(newNode==nullptr) {
errorCode=U_MEMORY_ALLOCATION_ERROR; return nullptr;
} const UHashElement *old=uhash_find(nodes, newNode); if(old!=nullptr) { delete newNode; returnstatic_cast<Node*>(old->key.pointer);
} // If uhash_puti() returns a non-zero value from an equivalent, previously // registered node, then uhash_find() failed to find that and we will leak newNode. #if U_DEBUG
int32_t oldValue= // Only in debug mode to avoid a compiler warning about unused oldValue. #endif
uhash_puti(nodes, newNode, 1, &errorCode);
U_ASSERT(oldValue==0); if(U_FAILURE(errorCode)) { delete newNode; return nullptr;
} return newNode;
}
StringTrieBuilder::Node *
StringTrieBuilder::registerFinalValue(int32_t value, UErrorCode &errorCode) { if(U_FAILURE(errorCode)) { return nullptr;
}
FinalValueNode key(value); const UHashElement *old=uhash_find(nodes, &key); if(old!=nullptr) { returnstatic_cast<Node*>(old->key.pointer);
}
Node *newNode=new FinalValueNode(value); if(newNode==nullptr) {
errorCode=U_MEMORY_ALLOCATION_ERROR; return nullptr;
} // If uhash_puti() returns a non-zero value from an equivalent, previously // registered node, then uhash_find() failed to find that and we will leak newNode. #if U_DEBUG
int32_t oldValue= // Only in debug mode to avoid a compiler warning about unused oldValue. #endif
uhash_puti(nodes, newNode, 1, &errorCode);
U_ASSERT(oldValue==0); if(U_FAILURE(errorCode)) { delete newNode; return nullptr;
} return newNode;
}
int32_t
StringTrieBuilder::ListBranchNode::markRightEdgesFirst(int32_t edgeNumber) { if(offset==0) {
firstEdgeNumber=edgeNumber;
int32_t step=0;
int32_t i=length; do {
Node *edge=equal[--i]; if(edge!=nullptr) {
edgeNumber=edge->markRightEdgesFirst(edgeNumber-step);
} // For all but the rightmost edge, decrement the edge number.
step=1;
} while(i>0);
offset=edgeNumber;
} return edgeNumber;
}
void
StringTrieBuilder::ListBranchNode::write(StringTrieBuilder &builder) { // Write the sub-nodes in reverse order: The jump lengths are deltas from // after their own positions, so if we wrote the minUnit sub-node first, // then its jump delta would be larger. // Instead we write the minUnit sub-node last, for a shorter delta.
int32_t unitNumber=length-1;
Node *rightEdge=equal[unitNumber];
int32_t rightEdgeNumber= rightEdge==nullptr ? firstEdgeNumber : rightEdge->getOffset(); do {
--unitNumber; if(equal[unitNumber]!=nullptr) {
equal[unitNumber]->writeUnlessInsideRightEdge(firstEdgeNumber, rightEdgeNumber, builder);
}
} while(unitNumber>0); // The maxUnit sub-node is written as the very last one because we do // not jump for it at all.
unitNumber=length-1; if(rightEdge==nullptr) {
builder.writeValueAndFinal(values[unitNumber], true);
} else {
rightEdge->write(builder);
}
offset=builder.write(units[unitNumber]); // Write the rest of this node's unit-value pairs. while(--unitNumber>=0) {
int32_t value;
UBool isFinal; if(equal[unitNumber]==nullptr) { // Write the final value for the one string ending with this unit.
value=values[unitNumber];
isFinal=true;
} else { // Write the delta to the start position of the sub-node.
U_ASSERT(equal[unitNumber]->getOffset()>0);
value=offset-equal[unitNumber]->getOffset();
isFinal=false;
}
builder.writeValueAndFinal(value, isFinal);
offset=builder.write(units[unitNumber]);
}
}
void
StringTrieBuilder::SplitBranchNode::write(StringTrieBuilder &builder) { // Encode the less-than branch first.
lessThan->writeUnlessInsideRightEdge(firstEdgeNumber, greaterOrEqual->getOffset(), builder); // Encode the greater-or-equal branch last because we do not jump for it at all.
greaterOrEqual->write(builder); // Write this node.
U_ASSERT(lessThan->getOffset()>0);
builder.writeDeltaTo(lessThan->getOffset()); // less-than
offset=builder.write(unit);
}
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.