/** Search for an identical block. */
int32_t findSameBlock(const uint16_t *p, int32_t pStart, int32_t length, const uint16_t *q, int32_t qStart, int32_t blockLength) { // Ensure that we do not even partially get past length.
length -= blockLength;
q += qStart; while (pStart <= length) { if (equalBlocks(p + pStart, q, blockLength)) { return pStart;
}
++pStart;
} return -1;
}
int32_t findAllSameBlock(const uint32_t *p, int32_t start, int32_t limit,
uint32_t value, int32_t blockLength) { // Ensure that we do not even partially get past limit.
limit -= blockLength;
for (int32_t block = start; block <= limit; ++block) { if (p[block] == value) { for (int32_t i = 1;; ++i) { if (i == blockLength) { return block;
} if (p[block + i] != value) {
block += i; break;
}
}
}
} return -1;
}
/** * Look for maximum overlap of the beginning of the other block * with the previous, adjacent block.
*/ template<typename UIntA, typename UIntB>
int32_t getOverlap(const UIntA *p, int32_t length, const UIntB *q, int32_t qStart, int32_t blockLength) {
int32_t overlap = blockLength - 1;
U_ASSERT(overlap <= length);
q += qStart; while (overlap > 0 && !equalBlocks(p + (length - overlap), q, overlap)) {
--overlap;
} return overlap;
}
bool isStartOfSomeFastBlock(uint32_t dataOffset, const uint32_t index[], int32_t fastILimit) { for (int32_t i = 0; i < fastILimit; i += SMALL_DATA_BLOCKS_PER_BMP_BLOCK) { if (index[i] == dataOffset) { returntrue;
}
} returnfalse;
}
/** * Finds the start of the last range in the trie by enumerating backward. * Indexes for code points higher than this will be omitted.
*/
UChar32 MutableCodePointTrie::findHighStart() const {
int32_t i = highStart >> UCPTRIE_SHIFT_3; while (i > 0) { bool match; if (flags[--i] == ALL_SAME) {
match = index[i] == highValue;
} else/* MIXED */ { const uint32_t *p = data + index[i]; for (int32_t j = 0;; ++j) { if (j == UCPTRIE_SMALL_DATA_BLOCK_LENGTH) {
match = true; break;
} if (p[j] != highValue) {
match = false; break;
}
}
} if (!match) { return (i + 1) << UCPTRIE_SHIFT_3;
}
} return 0;
}
// Custom hash table for mixed-value blocks to be found anywhere in the // compacted data or index so far. class MixedBlocks { public:
MixedBlocks() {}
~MixedBlocks() {
uprv_free(table);
}
// Hash table. // The length is a prime number, larger than the maximum data length. // The "shift" lower bits store a data index + 1. // The remaining upper bits store a partial hashCode of the block data values.
uint32_t *table = nullptr;
int32_t capacity = 0;
int32_t length = 0;
int32_t shift = 0;
uint32_t mask = 0;
// ASCII data will be stored as a linear table, even if the following code // does not yet count it that way.
int32_t newDataCapacity = ASCII_LIMIT; // Add room for a small data null block in case it would match the start of // a fast data block where dataNullOffset must not be set in that case.
newDataCapacity += UCPTRIE_SMALL_DATA_BLOCK_LENGTH; // Add room for special values (errorValue, highValue) and padding.
newDataCapacity += 4;
int32_t iLimit = highStart >> UCPTRIE_SHIFT_3;
int32_t blockLength = UCPTRIE_FAST_DATA_BLOCK_LENGTH;
int32_t inc = SMALL_DATA_BLOCKS_PER_BMP_BLOCK; for (int32_t i = 0; i < iLimit; i += inc) { if (i == fastILimit) {
blockLength = UCPTRIE_SMALL_DATA_BLOCK_LENGTH;
inc = 1;
}
uint32_t value = index[i]; if (flags[i] == MIXED) { // Really mixed? const uint32_t *p = data + value;
value = *p; if (allValuesSameAs(p + 1, blockLength - 1, value)) {
flags[i] = ALL_SAME;
index[i] = value; // Fall through to ALL_SAME handling.
} else {
newDataCapacity += blockLength; continue;
}
} else {
U_ASSERT(flags[i] == ALL_SAME); if (inc > 1) { // Do all of the fast-range data block's ALL_SAME parts have the same value? bool allSame = true;
int32_t next_i = i + inc; for (int32_t j = i + 1; j < next_i; ++j) {
U_ASSERT(flags[j] == ALL_SAME); if (index[j] != value) {
allSame = false; break;
}
} if (!allSame) { // Turn it into a MIXED block. if (getDataBlock(i) < 0) { return -1;
}
newDataCapacity += blockLength; continue;
}
}
} // Is there another ALL_SAME block with the same value?
int32_t other = allSameBlocks.findOrAdd(i, inc, value); if (other == AllSameBlocks::OVERFLOW) { // The fixed-size array overflowed. Slow check for a duplicate block. #ifdef UCPTRIE_DEBUG if (!overflow) {
puts("UCPTrie AllSameBlocks overflow");
overflow = true;
} #endif
int32_t jInc = SMALL_DATA_BLOCKS_PER_BMP_BLOCK; for (int32_t j = 0;; j += jInc) { if (j == i) {
allSameBlocks.add(i, inc, value); break;
} if (j == fastILimit) {
jInc = 1;
} if (flags[j] == ALL_SAME && index[j] == value) {
allSameBlocks.add(j, jInc + inc, value);
other = j; break; // We could keep counting blocks with the same value // before we add the first one, which may improve compaction in rare cases, // but it would make it slower.
}
}
} if (other >= 0) {
flags[i] = SAME_AS;
index[i] = other;
} else { // New unique same-value block.
newDataCapacity += blockLength;
}
} return newDataCapacity;
}
/** * Compacts a build-time trie. * * The compaction * - removes blocks that are identical with earlier ones * - overlaps each new non-duplicate block as much as possible with the previously-written one * - works with fast-range data blocks whose length is a multiple of that of * higher-code-point data blocks * * It does not try to find an optimal order of writing, deduplicating, and overlapping blocks.
*/
int32_t MutableCodePointTrie::compactData(
int32_t fastILimit, uint32_t *newData, int32_t newDataCapacity,
int32_t dataNullIndex, MixedBlocks &mixedBlocks, UErrorCode &errorCode) { #ifdef UCPTRIE_DEBUG
int32_t countSame=0, sumOverlaps=0; bool printData = dataLength == 29088 /* line.brk */ || // dataLength == 30048 /* CanonIterData */ ||
dataLength == 50400 /* zh.txt~stroke */; #endif
// The linear ASCII data has been copied into newData already.
int32_t newDataLength = 0; for (int32_t i = 0; newDataLength < ASCII_LIMIT;
newDataLength += UCPTRIE_FAST_DATA_BLOCK_LENGTH, i += SMALL_DATA_BLOCKS_PER_BMP_BLOCK) {
index[i] = newDataLength; #ifdef UCPTRIE_DEBUG if (printData) {
printBlock(newData + newDataLength, UCPTRIE_FAST_DATA_BLOCK_LENGTH, 0, newDataLength, 0, initialValue);
} #endif
}
int32_t iLimit = highStart >> UCPTRIE_SHIFT_3;
int32_t inc = SMALL_DATA_BLOCKS_PER_BMP_BLOCK;
int32_t fastLength = 0; for (int32_t i = ASCII_I_LIMIT; i < iLimit; i += inc) { if (i == fastILimit) {
blockLength = UCPTRIE_SMALL_DATA_BLOCK_LENGTH;
inc = 1;
fastLength = newDataLength; if (!mixedBlocks.init(newDataCapacity, blockLength)) {
errorCode = U_MEMORY_ALLOCATION_ERROR; return 0;
}
mixedBlocks.extend(newData, 0, 0, newDataLength);
} if (flags[i] == ALL_SAME) {
uint32_t value = index[i]; // Find an earlier part of the data array of length blockLength // that is filled with this value.
int32_t n = mixedBlocks.findAllSameBlock(newData, value); // If we find a match, and the current block is the data null block, // and it is not a fast block but matches the start of a fast block, // then we need to continue looking. // This is because this small block is shorter than the fast block, // and not all of the rest of the fast block is filled with this value. // Otherwise trie.getRange() would detect that the fast block starts at // dataNullOffset and assume incorrectly that it is filled with the null value. while (n >= 0 && i == dataNullIndex && i >= fastILimit && n < fastLength &&
isStartOfSomeFastBlock(n, index, fastILimit)) {
n = findAllSameBlock(newData, n + 1, newDataLength, value, blockLength);
} if (n >= 0) {
DEBUG_DO(++countSame);
index[i] = n;
} else {
n = getAllSameOverlap(newData, newDataLength, value, blockLength);
DEBUG_DO(sumOverlaps += n); #ifdef UCPTRIE_DEBUG if (printData) {
printBlock(nullptr, blockLength, value, i << UCPTRIE_SHIFT_3, n, initialValue);
} #endif
index[i] = newDataLength - n;
int32_t prevDataLength = newDataLength; while (n < blockLength) {
newData[newDataLength++] = value;
++n;
}
mixedBlocks.extend(newData, 0, prevDataLength, newDataLength);
}
} elseif (flags[i] == MIXED) { const uint32_t *block = data + index[i];
int32_t n = mixedBlocks.findBlock(newData, block, 0); if (n >= 0) {
DEBUG_DO(++countSame);
index[i] = n;
} else {
n = getOverlap(newData, newDataLength, block, 0, blockLength);
DEBUG_DO(sumOverlaps += n); #ifdef UCPTRIE_DEBUG if (printData) {
printBlock(block, blockLength, 0, i << UCPTRIE_SHIFT_3, n, initialValue);
} #endif
index[i] = newDataLength - n;
int32_t prevDataLength = newDataLength; while (n < blockLength) {
newData[newDataLength++] = block[n++];
}
mixedBlocks.extend(newData, 0, prevDataLength, newDataLength);
}
} else/* SAME_AS */ {
uint32_t j = index[i];
index[i] = index[j];
}
}
#ifdef UCPTRIE_DEBUG /* we saved some space */
printf("compacting UCPTrie: count of 32-bit data words %lu->%lu countSame=%ld sumOverlaps=%ld\n",
(long)dataLength, (long)newDataLength, (long)countSame, (long)sumOverlaps); #endif return newDataLength;
}
int32_t MutableCodePointTrie::compactIndex(int32_t fastILimit, MixedBlocks &mixedBlocks,
UErrorCode &errorCode) {
int32_t fastIndexLength = fastILimit >> (UCPTRIE_FAST_SHIFT - UCPTRIE_SHIFT_3); if ((highStart >> UCPTRIE_FAST_SHIFT) <= fastIndexLength) { // Only the linear fast index, no multi-stage index tables.
index3NullOffset = UCPTRIE_NO_INDEX3_NULL_OFFSET; return fastIndexLength;
}
// Condense the fast index table. // Also, does it contain an index-3 block with all dataNullOffset?
uint16_t fastIndex[UCPTRIE_BMP_INDEX_LENGTH]; // fastIndexLength
int32_t i3FirstNull = -1; for (int32_t i = 0, j = 0; i < fastILimit; ++j) {
uint32_t i3 = index[i];
fastIndex[j] = static_cast<uint16_t>(i3); if (i3 == static_cast<uint32_t>(dataNullOffset)) { if (i3FirstNull < 0) {
i3FirstNull = j;
} elseif (index3NullOffset < 0 &&
(j - i3FirstNull + 1) == UCPTRIE_INDEX_3_BLOCK_LENGTH) {
index3NullOffset = i3FirstNull;
}
} else {
i3FirstNull = -1;
} // Set the index entries that compactData() skipped. // Needed when the multi-stage index covers the fast index range as well.
int32_t iNext = i + SMALL_DATA_BLOCKS_PER_BMP_BLOCK; while (++i < iNext) {
i3 += UCPTRIE_SMALL_DATA_BLOCK_LENGTH;
index[i] = i3;
}
}
// Examine index-3 blocks. For each determine one of: // - same as the index-3 null block // - same as a fast-index block // - 16-bit indexes // - 18-bit indexes // We store this in the first flags entry for the index-3 block. // // Also determine an upper limit for the index-3 table length.
int32_t index3Capacity = 0;
i3FirstNull = index3NullOffset; bool hasLongI3Blocks = false; // If the fast index covers the whole BMP, then // the multi-stage index is only for supplementary code points. // Otherwise, the multi-stage index covers all of Unicode.
int32_t iStart = fastILimit < BMP_I_LIMIT ? 0 : BMP_I_LIMIT;
int32_t iLimit = highStart >> UCPTRIE_SHIFT_3; for (int32_t i = iStart; i < iLimit;) {
int32_t j = i;
int32_t jLimit = i + UCPTRIE_INDEX_3_BLOCK_LENGTH;
uint32_t oredI3 = 0; bool isNull = true; do {
uint32_t i3 = index[j];
oredI3 |= i3; if (i3 != static_cast<uint32_t>(dataNullOffset)) {
isNull = false;
}
} while (++j < jLimit); if (isNull) {
flags[i] = I3_NULL; if (i3FirstNull < 0) { if (oredI3 <= 0xffff) {
index3Capacity += UCPTRIE_INDEX_3_BLOCK_LENGTH;
} else {
index3Capacity += INDEX_3_18BIT_BLOCK_LENGTH;
hasLongI3Blocks = true;
}
i3FirstNull = 0;
}
} else { if (oredI3 <= 0xffff) {
int32_t n = mixedBlocks.findBlock(fastIndex, index, i); if (n >= 0) {
flags[i] = I3_BMP;
index[i] = n;
} else {
flags[i] = I3_16;
index3Capacity += UCPTRIE_INDEX_3_BLOCK_LENGTH;
}
} else {
flags[i] = I3_18;
index3Capacity += INDEX_3_18BIT_BLOCK_LENGTH;
hasLongI3Blocks = true;
}
}
i = j;
}
// Length of the index-1 table, rounded up.
int32_t index1Length = (index2Capacity + UCPTRIE_INDEX_2_MASK) >> UCPTRIE_SHIFT_1_2;
// Index table: Fast index, index-1, index-3, index-2. // +1 for possible index table padding.
int32_t index16Capacity = fastIndexLength + index1Length + index3Capacity + index2Capacity + 1;
index16 = static_cast<uint16_t*>(uprv_malloc(index16Capacity * 2)); if (index16 == nullptr) {
errorCode = U_MEMORY_ALLOCATION_ERROR; return 0;
}
uprv_memcpy(index16, fastIndex, fastIndexLength * 2);
if (!mixedBlocks.init(index16Capacity, UCPTRIE_INDEX_3_BLOCK_LENGTH)) {
errorCode = U_MEMORY_ALLOCATION_ERROR; return 0;
}
MixedBlocks longI3Blocks; if (hasLongI3Blocks) { if (!longI3Blocks.init(index16Capacity, INDEX_3_18BIT_BLOCK_LENGTH)) {
errorCode = U_MEMORY_ALLOCATION_ERROR; return 0;
}
}
// Compact the index-3 table and write an uncompacted version of the index-2 table.
uint16_t index2[UNICODE_LIMIT >> UCPTRIE_SHIFT_2]; // index2Capacity
int32_t i2Length = 0;
i3FirstNull = index3NullOffset;
int32_t index3Start = fastIndexLength + index1Length;
int32_t indexLength = index3Start; for (int32_t i = iStart; i < iLimit; i += UCPTRIE_INDEX_3_BLOCK_LENGTH) {
int32_t i3;
uint8_t f = flags[i]; if (f == I3_NULL && i3FirstNull < 0) { // First index-3 null block. Write & overlap it like a normal block, then remember it.
f = dataNullOffset <= 0xffff ? I3_16 : I3_18;
i3FirstNull = 0;
} if (f == I3_NULL) {
i3 = index3NullOffset;
} elseif (f == I3_BMP) {
i3 = index[i];
} elseif (f == I3_16) {
int32_t n = mixedBlocks.findBlock(index16, index, i); if (n >= 0) {
i3 = n;
} else { if (indexLength == index3Start) { // No overlap at the boundary between the index-1 and index-3 tables.
n = 0;
} else {
n = getOverlap(index16, indexLength,
index, i, UCPTRIE_INDEX_3_BLOCK_LENGTH);
}
i3 = indexLength - n;
int32_t prevIndexLength = indexLength; while (n < UCPTRIE_INDEX_3_BLOCK_LENGTH) {
index16[indexLength++] = index[i + n++];
}
mixedBlocks.extend(index16, index3Start, prevIndexLength, indexLength); if (hasLongI3Blocks) {
longI3Blocks.extend(index16, index3Start, prevIndexLength, indexLength);
}
}
} else {
U_ASSERT(f == I3_18);
U_ASSERT(hasLongI3Blocks); // Encode an index-3 block that contains one or more data indexes exceeding 16 bits.
int32_t j = i;
int32_t jLimit = i + UCPTRIE_INDEX_3_BLOCK_LENGTH;
int32_t k = indexLength; do {
++k;
uint32_t v = index[j++];
uint32_t upperBits = (v & 0x30000) >> 2;
index16[k++] = v;
v = index[j++];
upperBits |= (v & 0x30000) >> 4;
index16[k++] = v;
v = index[j++];
upperBits |= (v & 0x30000) >> 6;
index16[k++] = v;
v = index[j++];
upperBits |= (v & 0x30000) >> 8;
index16[k++] = v;
v = index[j++];
upperBits |= (v & 0x30000) >> 10;
index16[k++] = v;
v = index[j++];
upperBits |= (v & 0x30000) >> 12;
index16[k++] = v;
v = index[j++];
upperBits |= (v & 0x30000) >> 14;
index16[k++] = v;
v = index[j++];
upperBits |= (v & 0x30000) >> 16;
index16[k++] = v;
index16[k - 9] = upperBits;
} while (j < jLimit);
int32_t n = longI3Blocks.findBlock(index16, index16, indexLength); if (n >= 0) {
i3 = n | 0x8000;
} else { if (indexLength == index3Start) { // No overlap at the boundary between the index-1 and index-3 tables.
n = 0;
} else {
n = getOverlap(index16, indexLength,
index16, indexLength, INDEX_3_18BIT_BLOCK_LENGTH);
}
i3 = (indexLength - n) | 0x8000;
int32_t prevIndexLength = indexLength; if (n > 0) {
int32_t start = indexLength; while (n < INDEX_3_18BIT_BLOCK_LENGTH) {
index16[indexLength++] = index16[start + n++];
}
} else {
indexLength += INDEX_3_18BIT_BLOCK_LENGTH;
}
mixedBlocks.extend(index16, index3Start, prevIndexLength, indexLength); if (hasLongI3Blocks) {
longI3Blocks.extend(index16, index3Start, prevIndexLength, indexLength);
}
}
} if (index3NullOffset < 0 && i3FirstNull >= 0) {
index3NullOffset = i3;
} // Set the index-2 table entry.
index2[i2Length++] = i3;
}
U_ASSERT(i2Length == index2Capacity);
U_ASSERT(indexLength <= index3Start + index3Capacity);
if (index3NullOffset < 0) {
index3NullOffset = UCPTRIE_NO_INDEX3_NULL_OFFSET;
} if (indexLength >= (UCPTRIE_NO_INDEX3_NULL_OFFSET + UCPTRIE_INDEX_3_BLOCK_LENGTH)) { // The index-3 offsets exceed 15 bits, or // the last one cannot be distinguished from the no-null-block value.
errorCode = U_INDEX_OUTOFBOUNDS_ERROR; return 0;
}
// Compact the index-2 table and write the index-1 table.
static_assert(UCPTRIE_INDEX_2_BLOCK_LENGTH == UCPTRIE_INDEX_3_BLOCK_LENGTH, "must re-init mixedBlocks");
int32_t blockLength = UCPTRIE_INDEX_2_BLOCK_LENGTH;
int32_t i1 = fastIndexLength; for (int32_t i = 0; i < i2Length; i += blockLength) {
int32_t n; if ((i2Length - i) >= blockLength) { // normal block
U_ASSERT(blockLength == UCPTRIE_INDEX_2_BLOCK_LENGTH);
n = mixedBlocks.findBlock(index16, index2, i);
} else { // highStart is inside the last index-2 block. Shorten it.
blockLength = i2Length - i;
n = findSameBlock(index16, index3Start, indexLength,
index2, i, blockLength);
}
int32_t i2; if (n >= 0) {
i2 = n;
} else { if (indexLength == index3Start) { // No overlap at the boundary between the index-1 and index-3/2 tables.
n = 0;
} else {
n = getOverlap(index16, indexLength, index2, i, blockLength);
}
i2 = indexLength - n;
int32_t prevIndexLength = indexLength; while (n < blockLength) {
index16[indexLength++] = index2[i + n++];
}
mixedBlocks.extend(index16, index3Start, prevIndexLength, indexLength);
} // Set the index-1 table entry.
index16[i1++] = i2;
}
U_ASSERT(i1 == index3Start);
U_ASSERT(indexLength <= index16Capacity);
#ifdef UCPTRIE_DEBUG /* we saved some space */
printf("compacting UCPTrie: count of 16-bit index words %lu->%lu\n",
(long)iLimit, (long)indexLength); #endif
return indexLength;
}
int32_t MutableCodePointTrie::compactTrie(int32_t fastILimit, UErrorCode &errorCode) { // Find the real highStart and round it up.
U_ASSERT((highStart & (UCPTRIE_CP_PER_INDEX_2_ENTRY - 1)) == 0);
highValue = get(MAX_UNICODE);
int32_t realHighStart = findHighStart();
realHighStart = (realHighStart + (UCPTRIE_CP_PER_INDEX_2_ENTRY - 1)) &
~(UCPTRIE_CP_PER_INDEX_2_ENTRY - 1); if (realHighStart == UNICODE_LIMIT) {
highValue = initialValue;
}
// We always store indexes and data values for the fast range. // Pin highStart to the top of that range while building.
UChar32 fastLimit = fastILimit << UCPTRIE_SHIFT_3; if (realHighStart < fastLimit) { for (int32_t i = (realHighStart >> UCPTRIE_SHIFT_3); i < fastILimit; ++i) {
flags[i] = ALL_SAME;
index[i] = highValue;
}
highStart = fastLimit;
} else {
highStart = realHighStart;
}
uint32_t asciiData[ASCII_LIMIT]; for (int32_t i = 0; i < ASCII_LIMIT; ++i) {
asciiData[i] = get(i);
}
// First we look for which data blocks have the same value repeated over the whole block, // deduplicate such blocks, find a good null data block (for faster enumeration), // and get an upper bound for the necessary data array length.
AllSameBlocks allSameBlocks;
int32_t newDataCapacity = compactWholeDataBlocks(fastILimit, allSameBlocks); if (newDataCapacity < 0) {
errorCode = U_MEMORY_ALLOCATION_ERROR; return 0;
}
uint32_t* newData = static_cast<uint32_t*>(uprv_malloc(newDataCapacity * 4)); if (newData == nullptr) {
errorCode = U_MEMORY_ALLOCATION_ERROR; return 0;
}
uprv_memcpy(newData, asciiData, sizeof(asciiData));
MixedBlocks mixedBlocks;
int32_t newDataLength = compactData(fastILimit, newData, newDataCapacity,
dataNullIndex, mixedBlocks, errorCode); if (U_FAILURE(errorCode)) { return 0; }
U_ASSERT(newDataLength <= newDataCapacity);
uprv_free(data);
data = newData;
dataCapacity = newDataCapacity;
dataLength = newDataLength; if (dataLength > (0x3ffff + UCPTRIE_SMALL_DATA_BLOCK_LENGTH)) { // The offset of the last data block is too high to be stored in the index table.
errorCode = U_INDEX_OUTOFBOUNDS_ERROR; return 0;
}
if (dataNullIndex >= 0) {
dataNullOffset = index[dataNullIndex]; #ifdef UCPTRIE_DEBUG if (data[dataNullOffset] != initialValue) {
printf("UCPTrie initialValue %lx -> more common nullValue %lx\n",
(long)initialValue, (long)data[dataNullOffset]);
} #endif
initialValue = data[dataNullOffset];
} else {
dataNullOffset = UCPTRIE_NO_DATA_NULL_OFFSET;
}
// The mutable trie always stores 32-bit values. // When we build a UCPTrie for a smaller value width, we first mask off unused bits // before compacting the data. switch (valueWidth) { case UCPTRIE_VALUE_BITS_32: break; case UCPTRIE_VALUE_BITS_16:
maskValues(0xffff); break; case UCPTRIE_VALUE_BITS_8:
maskValues(0xff); break; default: break;
}
// Ensure data table alignment: The index length must be even for uint32_t data. if (valueWidth == UCPTRIE_VALUE_BITS_32 && (indexLength & 1) != 0) {
index16[indexLength++] = 0xffee; // arbitrary value
}
// Make the total trie structure length a multiple of 4 bytes by padding the data table, // and store special values as the last two data values.
int32_t length = indexLength * 2; if (valueWidth == UCPTRIE_VALUE_BITS_16) { if (((indexLength ^ dataLength) & 1) != 0) { // padding
data[dataLength++] = errorValue;
} if (data[dataLength - 1] != errorValue || data[dataLength - 2] != highValue) {
data[dataLength++] = highValue;
data[dataLength++] = errorValue;
}
length += dataLength * 2;
} elseif (valueWidth == UCPTRIE_VALUE_BITS_32) { // 32-bit data words never need padding to a multiple of 4 bytes. if (data[dataLength - 1] != errorValue || data[dataLength - 2] != highValue) { if (data[dataLength - 1] != highValue) {
data[dataLength++] = highValue;
}
data[dataLength++] = errorValue;
}
length += dataLength * 4;
} else {
int32_t and3 = (length + dataLength) & 3; if (and3 == 0 && data[dataLength - 1] == errorValue && data[dataLength - 2] == highValue) { // all set
} elseif(and3 == 3 && data[dataLength - 1] == highValue) {
data[dataLength++] = errorValue;
} else { while (and3 != 2) {
data[dataLength++] = highValue;
and3 = (and3 + 1) & 3;
}
data[dataLength++] = highValue;
data[dataLength++] = errorValue;
}
length += dataLength;
}
// Calculate the total length of the UCPTrie as a single memory block.
length += sizeof(UCPTrie);
U_ASSERT((length & 3) == 0);
trie->highStart = highStart; // Round up shifted12HighStart to a multiple of 0x1000 for easy testing from UTF-8 lead bytes. // Runtime code needs to then test for the real highStart as well.
trie->shifted12HighStart = (highStart + 0xfff) >> 12;
trie->type = type;
trie->valueWidth = valueWidth;
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.