UBool
TailoredSet::handleCE32(UChar32 start, UChar32 end, uint32_t ce32) {
U_ASSERT(ce32 != Collation::FALLBACK_CE32); if(Collation::isSpecialCE32(ce32)) {
ce32 = data->getIndirectCE32(ce32); if(ce32 == Collation::FALLBACK_CE32) { return U_SUCCESS(errorCode);
}
} do {
uint32_t baseCE32 = baseData->getFinalCE32(baseData->getCE32(start)); // Do not just continue if ce32 == baseCE32 because // contractions and expansions in different data objects // normally differ even if they have the same data offsets. if(Collation::isSelfContainedCE32(ce32) && Collation::isSelfContainedCE32(baseCE32)) { // fastpath if(ce32 != baseCE32) {
tailored->add(start);
}
} else {
compare(start, ce32, baseCE32);
}
} while(++start <= end); return U_SUCCESS(errorCode);
}
int32_t tag; if(Collation::isSpecialCE32(ce32)) {
tag = Collation::tagFromCE32(ce32);
U_ASSERT(tag != Collation::PREFIX_TAG);
U_ASSERT(tag != Collation::CONTRACTION_TAG); // Currently, the tailoring data builder does not write offset tags. // They might be useful for saving space, // but they would complicate the builder, // and in tailorings we assume that performance of tailored characters is more important.
U_ASSERT(tag != Collation::OFFSET_TAG);
} else {
tag = -1;
}
int32_t baseTag; if(Collation::isSpecialCE32(baseCE32)) {
baseTag = Collation::tagFromCE32(baseCE32);
U_ASSERT(baseTag != Collation::PREFIX_TAG);
U_ASSERT(baseTag != Collation::CONTRACTION_TAG);
} else {
baseTag = -1;
}
// Non-contextual mappings, expansions, etc. if(baseTag == Collation::OFFSET_TAG) { // We might be comparing a tailoring CE which is a copy of // a base offset-tag CE, via the [optimize [set]] syntax // or when a single-character mapping was copied for tailored contractions. // Offset tags always result in long-primary CEs, // with common secondary/tertiary weights. if(!Collation::isLongPrimaryCE32(ce32)) {
add(c); return;
}
int64_t dataCE = baseData->ces[Collation::indexFromCE32(baseCE32)];
uint32_t p = Collation::getThreeBytePrimaryForOffsetData(c, dataCE); if(Collation::primaryFromLongPrimaryCE32(ce32) != p) {
add(c); return;
}
}
static UBool U_CALLCONV
enumCnERange(constvoid *context, UChar32 start, UChar32 end, uint32_t ce32) {
ContractionsAndExpansions *cne = (ContractionsAndExpansions *)context; if(cne->checkTailored == 0) { // There is no tailoring. // No need to collect nor check the tailored set.
} elseif(cne->checkTailored < 0) { // Collect the set of code points with mappings in the tailoring data. if(ce32 == Collation::FALLBACK_CE32) { returntrue; // fallback to base, not tailored
} else {
cne->tailored.add(start, end);
} // checkTailored > 0: Exclude tailored ranges from the base data enumeration.
} elseif(start == end) { if(cne->tailored.contains(start)) { returntrue;
}
} elseif(cne->tailored.containsSome(start, end)) {
cne->ranges.set(start, end).removeAll(cne->tailored);
int32_t count = cne->ranges.getRangeCount(); for(int32_t i = 0; i < count; ++i) {
cne->handleCE32(cne->ranges.getRangeStart(i), cne->ranges.getRangeEnd(i), ce32);
} return U_SUCCESS(cne->errorCode);
}
cne->handleCE32(start, end, ce32); return U_SUCCESS(cne->errorCode);
}
U_CDECL_END
void
ContractionsAndExpansions::forData(const CollationData *d, UErrorCode &ec) { if(U_FAILURE(ec)) { return; }
errorCode = ec; // Preserve info & warning codes. // Add all from the data, can be tailoring or base. if(d->base != nullptr) {
checkTailored = -1;
}
data = d;
utrie2_enum(data->trie, nullptr, enumCnERange, this); if(d->base == nullptr || U_FAILURE(errorCode)) {
ec = errorCode; return;
} // Add all from the base data but only for un-tailored code points.
tailored.freeze();
checkTailored = 1;
data = d->base;
utrie2_enum(data->trie, nullptr, enumCnERange, this);
ec = errorCode;
}
void
ContractionsAndExpansions::handleCE32(UChar32 start, UChar32 end, uint32_t ce32) { for(;;) { if((ce32 & 0xff) < Collation::SPECIAL_CE32_LOW_BYTE) { // !isSpecialCE32() if(sink != nullptr) {
sink->handleCE(Collation::ceFromSimpleCE32(ce32));
} return;
} switch(Collation::tagFromCE32(ce32)) { case Collation::FALLBACK_TAG: return; case Collation::RESERVED_TAG_3: case Collation::BUILDER_DATA_TAG: case Collation::LEAD_SURROGATE_TAG: if(U_SUCCESS(errorCode)) { errorCode = U_INTERNAL_PROGRAM_ERROR; } return; case Collation::LONG_PRIMARY_TAG: if(sink != nullptr) {
sink->handleCE(Collation::ceFromLongPrimaryCE32(ce32));
} return; case Collation::LONG_SECONDARY_TAG: if(sink != nullptr) {
sink->handleCE(Collation::ceFromLongSecondaryCE32(ce32));
} return; case Collation::LATIN_EXPANSION_TAG: if(sink != nullptr) {
ces[0] = Collation::latinCE0FromCE32(ce32);
ces[1] = Collation::latinCE1FromCE32(ce32);
sink->handleExpansion(ces, 2);
} // Optimization: If we have a prefix, // then the relevant strings have been added already. if(unreversedPrefix.isEmpty()) {
addExpansions(start, end);
} return; case Collation::EXPANSION32_TAG: if(sink != nullptr) { const uint32_t *ce32s = data->ce32s + Collation::indexFromCE32(ce32);
int32_t length = Collation::lengthFromCE32(ce32); for(int32_t i = 0; i < length; ++i) {
ces[i] = Collation::ceFromCE32(*ce32s++);
}
sink->handleExpansion(ces, length);
} // Optimization: If we have a prefix, // then the relevant strings have been added already. if(unreversedPrefix.isEmpty()) {
addExpansions(start, end);
} return; case Collation::EXPANSION_TAG: if(sink != nullptr) {
int32_t length = Collation::lengthFromCE32(ce32);
sink->handleExpansion(data->ces + Collation::indexFromCE32(ce32), length);
} // Optimization: If we have a prefix, // then the relevant strings have been added already. if(unreversedPrefix.isEmpty()) {
addExpansions(start, end);
} return; case Collation::PREFIX_TAG:
handlePrefixes(start, end, ce32); return; case Collation::CONTRACTION_TAG:
handleContractions(start, end, ce32); return; case Collation::DIGIT_TAG: // Fetch the non-numeric-collation CE32 and continue.
ce32 = data->ce32s[Collation::indexFromCE32(ce32)]; break; case Collation::U0000_TAG:
U_ASSERT(start == 0 && end == 0); // Fetch the normal ce32 for U+0000 and continue.
ce32 = data->ce32s[0]; break; case Collation::HANGUL_TAG: if(sink != nullptr) { // TODO: This should be optimized, // especially if [start..end] is the complete Hangul range. (assert that)
UTF16CollationIterator iter(data, false, nullptr, nullptr, nullptr);
char16_t hangul[1] = { 0 }; for(UChar32 c = start; c <= end; ++c) {
hangul[0] = static_cast<char16_t>(c);
iter.setText(hangul, hangul + 1);
int32_t length = iter.fetchCEs(errorCode); if(U_FAILURE(errorCode)) { return; } // Ignore the terminating non-CE.
U_ASSERT(length >= 2 && iter.getCE(length - 1) == Collation::NO_CE);
sink->handleExpansion(iter.getCEs(), length - 1);
}
} // Optimization: If we have a prefix, // then the relevant strings have been added already. if(unreversedPrefix.isEmpty()) {
addExpansions(start, end);
} return; case Collation::OFFSET_TAG: // Currently no need to send offset CEs to the sink. return; case Collation::IMPLICIT_TAG: // Currently no need to send implicit CEs to the sink. return;
}
}
}
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.