// Assume that the tailoring data is in initial state, // with nullptr pointers and 0 lengths.
// Set pointers to non-empty data parts. // Do this in order of their byte offsets. (Should help porting to Java.)
int32_t index; // one of the indexes[] slots
int32_t offset; // byte offset for the index part
int32_t length; // number of bytes in the index part
if(indexesLength > IX_TOTAL_SIZE) {
length = inIndexes[IX_TOTAL_SIZE];
} elseif(indexesLength > IX_REORDER_CODES_OFFSET) {
length = inIndexes[indexesLength - 1];
} else {
length = 0; // only indexes, and inLength was already checked for them
} if(0 <= inLength && inLength < length) {
errorCode = U_INVALID_FORMAT_ERROR; return;
}
const CollationData *baseData = base == nullptr ? nullptr : base->data; const int32_t *reorderCodes = nullptr;
int32_t reorderCodesLength = 0; const uint32_t *reorderRanges = nullptr;
int32_t reorderRangesLength = 0;
index = IX_REORDER_CODES_OFFSET;
offset = getIndex(inIndexes, indexesLength, index);
length = getIndex(inIndexes, indexesLength, index + 1) - offset; if(length >= 4) { if(baseData == nullptr) { // We assume for collation settings that // the base data does not have a reordering.
errorCode = U_INVALID_FORMAT_ERROR; return;
}
reorderCodes = reinterpret_cast<const int32_t *>(inBytes + offset);
reorderCodesLength = length / 4;
// The reorderRanges (if any) are the trailing reorderCodes entries. // Split the array at the boundary. // Script or reorder codes do not exceed 16-bit values. // Range limits are stored in the upper 16 bits, and are never 0. while(reorderRangesLength < reorderCodesLength &&
(reorderCodes[reorderCodesLength - reorderRangesLength - 1] & 0xffff0000) != 0) {
++reorderRangesLength;
}
U_ASSERT(reorderRangesLength < reorderCodesLength); if(reorderRangesLength != 0) {
reorderCodesLength -= reorderRangesLength;
reorderRanges = reinterpret_cast<const uint32_t *>(reorderCodes + reorderCodesLength);
}
}
// There should be a reorder table only if there are reorder codes. // However, when there are reorder codes the reorder table may be omitted to reduce // the data size. const uint8_t *reorderTable = nullptr;
index = IX_REORDER_TABLE_OFFSET;
offset = getIndex(inIndexes, indexesLength, index);
length = getIndex(inIndexes, indexesLength, index + 1) - offset; if(length >= 256) { if(reorderCodesLength == 0) {
errorCode = U_INVALID_FORMAT_ERROR; // Reordering table without reordering codes. return;
}
reorderTable = inBytes + offset;
} else { // If we have reorder codes, then build the reorderTable at the end, // when the CollationData is otherwise complete.
}
if(baseData != nullptr && baseData->numericPrimary != (inIndexes[IX_OPTIONS] & 0xff000000)) {
errorCode = U_INVALID_FORMAT_ERROR; return;
}
CollationData *data = nullptr; // Remains nullptr if there are no mappings.
index = IX_TRIE_OFFSET;
offset = getIndex(inIndexes, indexesLength, index);
length = getIndex(inIndexes, indexesLength, index + 1) - offset; if(length >= 8) { if(!tailoring.ensureOwnedData(errorCode)) { return; }
data = tailoring.ownedData;
data->base = baseData;
data->numericPrimary = inIndexes[IX_OPTIONS] & 0xff000000;
data->trie = tailoring.trie = utrie2_openFromSerialized(
UTRIE2_32_VALUE_BITS, inBytes + offset, length, nullptr,
&errorCode); if(U_FAILURE(errorCode)) { return; }
} elseif(baseData != nullptr) { // Use the base data. Only the settings are tailored.
tailoring.data = baseData;
} else {
errorCode = U_INVALID_FORMAT_ERROR; // No mappings. return;
}
index = IX_UNSAFE_BWD_OFFSET;
offset = getIndex(inIndexes, indexesLength, index);
length = getIndex(inIndexes, indexesLength, index + 1) - offset; if(length >= 2) { if(data == nullptr) {
errorCode = U_INVALID_FORMAT_ERROR; return;
} if(baseData == nullptr) { #ifdefined(COLLUNSAFE_COLL_VERSION) && defined (COLLUNSAFE_SERIALIZE)
tailoring.unsafeBackwardSet = new UnicodeSet(unsafe_serializedData, unsafe_serializedCount, UnicodeSet::kSerialized, errorCode); if(tailoring.unsafeBackwardSet == nullptr) {
errorCode = U_MEMORY_ALLOCATION_ERROR; return;
} elseif (U_FAILURE(errorCode)) { return;
} #else // Create the unsafe-backward set for the root collator. // Include all non-zero combining marks and trail surrogates. // We do this at load time, rather than at build time, // to simplify Unicode version bootstrapping: // The root data builder only needs the new FractionalUCA.txt data, // but it need not be built with a version of ICU already updated to // the corresponding new Unicode Character Database. // // The following is an optimized version of // new UnicodeSet("[[:^lccc=0:][\\udc00-\\udfff]]"). // It is faster and requires fewer code dependencies.
tailoring.unsafeBackwardSet = new UnicodeSet(0xdc00, 0xdfff); // trail surrogates if(tailoring.unsafeBackwardSet == nullptr) {
errorCode = U_MEMORY_ALLOCATION_ERROR; return;
}
data->nfcImpl.addLcccChars(*tailoring.unsafeBackwardSet); #endif// !COLLUNSAFE_SERIALIZE || !COLLUNSAFE_COLL_VERSION
} else { // Clone the root collator's set contents.
tailoring.unsafeBackwardSet = static_cast<UnicodeSet *>(
baseData->unsafeBackwardSet->cloneAsThawed()); if(tailoring.unsafeBackwardSet == nullptr) {
errorCode = U_MEMORY_ALLOCATION_ERROR; return;
}
} // Add the ranges from the data file to the unsafe-backward set.
USerializedSet sset; const uint16_t *unsafeData = reinterpret_cast<const uint16_t *>(inBytes + offset); if(!uset_getSerializedSet(&sset, unsafeData, length / 2)) {
errorCode = U_INVALID_FORMAT_ERROR; return;
}
int32_t count = uset_getSerializedRangeCount(&sset); for(int32_t i = 0; i < count; ++i) {
UChar32 start, end;
uset_getSerializedRange(&sset, i, &start, &end);
tailoring.unsafeBackwardSet->add(start, end);
} // Mark each lead surrogate as "unsafe" // if any of its 1024 associated supplementary code points is "unsafe".
UChar32 c = 0x10000; for(char16_t lead = 0xd800; lead < 0xdc00; ++lead, c += 0x400) { if(!tailoring.unsafeBackwardSet->containsNone(c, c + 0x3ff)) {
tailoring.unsafeBackwardSet->add(lead);
}
}
tailoring.unsafeBackwardSet->freeze();
data->unsafeBackwardSet = tailoring.unsafeBackwardSet;
} elseif(data == nullptr) { // Nothing to do.
} elseif(baseData != nullptr) { // No tailoring-specific data: Alias the root collator's set.
data->unsafeBackwardSet = baseData->unsafeBackwardSet;
} else {
errorCode = U_INVALID_FORMAT_ERROR; // No unsafeBackwardSet. return;
}
// If the fast Latin format version is different, // or the version is set to 0 for "no fast Latin table", // then just always use the normal string comparison path. if(data != nullptr) {
data->fastLatinTable = nullptr;
data->fastLatinTableLength = 0; if(((inIndexes[IX_OPTIONS] >> 16) & 0xff) == CollationFastLatin::VERSION) {
index = IX_FAST_LATIN_TABLE_OFFSET;
offset = getIndex(inIndexes, indexesLength, index);
length = getIndex(inIndexes, indexesLength, index + 1) - offset; if(length >= 2) {
data->fastLatinTable = reinterpret_cast<const uint16_t *>(inBytes + offset);
data->fastLatinTableLength = length / 2; if((*data->fastLatinTable >> 8) != CollationFastLatin::VERSION) {
errorCode = U_INVALID_FORMAT_ERROR; // header vs. table version mismatch return;
}
} elseif(baseData != nullptr) {
data->fastLatinTable = baseData->fastLatinTable;
data->fastLatinTableLength = baseData->fastLatinTableLength;
}
}
}
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.