ExtraData::ExtraData(Norms &n, UBool fast) :
Norms::Enumerator(n),
yesYesCompositions(1000, static_cast<UChar32>(0xffff), 2), // 0=inert, 1=Jamo L, 2=start of compositions
yesNoMappingsAndCompositions(1000, static_cast<UChar32>(0), 1), // 0=Hangul LV, 1=start of normal data
yesNoMappingsOnly(1000, static_cast<UChar32>(0), 1), // 0=Hangul LVT, 1=start of normal data
optimizeFast(fast) { // Hangul LV algorithmically decomposes to two Jamo. // Some code may harmlessly read this firstUnit.
yesNoMappingsAndCompositions.setCharAt(0, 2); // Hangul LVT algorithmically decomposes to three Jamo. // Some code may harmlessly read this firstUnit.
yesNoMappingsOnly.setCharAt(0, 3);
}
int32_t ExtraData::writeMapping(UChar32 c, const Norm &norm, UnicodeString &dataString) {
UnicodeString &m=*norm.mapping;
int32_t length=m.length(); // Write the mapping & raw mapping extraData.
int32_t firstUnit=length|(norm.trailCC<<8);
int32_t preMappingLength=0; if(norm.rawMapping!=nullptr) {
UnicodeString &rm=*norm.rawMapping;
int32_t rmLength=rm.length(); if(rmLength>Normalizer2Impl::MAPPING_LENGTH_MASK) {
fprintf(stderr, "gennorm2 error: " "raw mapping for U+%04lX longer than maximum of %d\n", static_cast<long>(c), Normalizer2Impl::MAPPING_LENGTH_MASK); exit(U_INVALID_FORMAT_ERROR);
}
char16_t rm0=rm.charAt(0); if( rmLength==length-1 && // 99: overlong substring lengths get pinned to remainder lengths anyway
0==rm.compare(1, 99, m, 2, 99) &&
rm0>Normalizer2Impl::MAPPING_LENGTH_MASK
) { // Compression: // rawMapping=rm0+mapping.substring(2) -> store only rm0 // // The raw mapping is the same as the final mapping after replacing // the final mapping's first two code units with the raw mapping's first one. // In this case, we store only that first unit, rm0. // This helps with a few hundred mappings.
dataString.append(rm0);
preMappingLength=1;
} else { // Store the raw mapping with its length.
dataString.append(rm);
dataString.append(static_cast<char16_t>(rmLength));
preMappingLength=rmLength+1;
}
firstUnit|=Normalizer2Impl::MAPPING_HAS_RAW_MAPPING;
}
int32_t cccLccc=norm.cc|(norm.leadCC<<8); if(cccLccc!=0) {
dataString.append(static_cast<char16_t>(cccLccc));
++preMappingLength;
firstUnit|=Normalizer2Impl::MAPPING_HAS_CCC_LCCC_WORD;
}
dataString.append(static_cast<char16_t>(firstUnit));
dataString.append(m); return preMappingLength;
}
int32_t ExtraData::writeNoNoMapping(UChar32 c, const Norm &norm,
UnicodeString &dataString,
Hashtable &previousMappings) {
UnicodeString newMapping;
int32_t offset=writeMapping(c, norm, newMapping);
UBool found=false;
int32_t previousOffset=previousMappings.getiAndFound(newMapping, found); if(found) { // Duplicate, point to the identical mapping that has already been stored.
offset=previousOffset;
} else { // Append this new mapping and // enter it into the hashtable, avoiding value 0 which is "not found".
offset=dataString.length()+offset;
dataString.append(newMapping);
IcuToolErrorCode errorCode("gennorm2/writeExtraData()/Hashtable.putiAllowZero()");
previousMappings.putiAllowZero(newMapping, offset, errorCode);
} return offset;
}
UBool ExtraData::setNoNoDelta(UChar32 c, Norm &norm) const { // Try a compact, algorithmic encoding to a single compYesAndZeroCC code point. // Do not map from ASCII to non-ASCII. if(norm.mappingCP>=0 &&
!(c<=0x7f && norm.mappingCP>0x7f) &&
norms.getNormRef(norm.mappingCP).type<Norm::NO_NO_COMP_YES) {
int32_t delta=norm.mappingCP-c; if(-Normalizer2Impl::MAX_DELTA<=delta && delta<=Normalizer2Impl::MAX_DELTA) {
norm.type=Norm::NO_NO_DELTA;
norm.offset=delta; returntrue;
}
} returnfalse;
}
void ExtraData::writeCompositions(UChar32 c, const Norm &norm, UnicodeString &dataString) { if(norm.cc!=0) {
fprintf(stderr, "gennorm2 error: " "U+%04lX combines-forward and has ccc!=0, not possible in Unicode normalization\n", static_cast<long>(c)); exit(U_INVALID_FORMAT_ERROR);
}
int32_t length; const CompositionPair *pairs=norm.getCompositionPairs(length); for(int32_t i=0; i<length; ++i) { const CompositionPair &pair=pairs[i]; // 22 bits for the composite character and whether it combines forward.
UChar32 compositeAndFwd=pair.composite<<1; if(norms.getNormRef(pair.composite).combinesFwd()) {
compositeAndFwd|=1; // The composite character also combines-forward.
} // Encode most pairs in two units and some in three.
int32_t firstUnit, secondUnit, thirdUnit; if(pair.trail<Normalizer2Impl::COMP_1_TRAIL_LIMIT) { if(compositeAndFwd<=0xffff) {
firstUnit=pair.trail<<1;
secondUnit=compositeAndFwd;
thirdUnit=-1;
} else {
firstUnit=(pair.trail<<1)|Normalizer2Impl::COMP_1_TRIPLE;
secondUnit=compositeAndFwd>>16;
thirdUnit=compositeAndFwd;
}
} else {
firstUnit=(Normalizer2Impl::COMP_1_TRAIL_LIMIT+
(pair.trail>>Normalizer2Impl::COMP_1_TRAIL_SHIFT))|
Normalizer2Impl::COMP_1_TRIPLE;
secondUnit=(pair.trail<<Normalizer2Impl::COMP_2_TRAIL_SHIFT)|
(compositeAndFwd>>16);
thirdUnit=compositeAndFwd;
} // Set the high bit of the first unit if this is the last composition pair. if(i==(length-1)) {
firstUnit|=Normalizer2Impl::COMP_1_LAST_TUPLE;
}
dataString.append(static_cast<char16_t>(firstUnit)).append(static_cast<char16_t>(secondUnit)); if(thirdUnit>=0) {
dataString.append(static_cast<char16_t>(thirdUnit));
}
}
}
// Ticket #13342 - Disable optimizations on MSVC for this function as a workaround. #if (defined(_MSC_VER) && (_MSC_VER >= 1900) && defined(_MSC_FULL_VER) && (_MSC_FULL_VER >= 190024210)) #pragma optimize( "", off ) #endif
void ExtraData::writeExtraData(UChar32 c, Norm &norm) { switch(norm.type) { case Norm::INERT: break; // no extra data case Norm::YES_YES_COMBINES_FWD:
norm.offset=yesYesCompositions.length();
writeCompositions(c, norm, yesYesCompositions); break; case Norm::YES_NO_COMBINES_FWD:
norm.offset=yesNoMappingsAndCompositions.length()+
writeMapping(c, norm, yesNoMappingsAndCompositions);
writeCompositions(c, norm, yesNoMappingsAndCompositions); break; case Norm::YES_NO_MAPPING_ONLY:
norm.offset=yesNoMappingsOnly.length()+
writeMapping(c, norm, yesNoMappingsOnly); break; case Norm::NO_NO_COMP_YES: if(!optimizeFast && setNoNoDelta(c, norm)) { break;
}
norm.offset=writeNoNoMapping(c, norm, noNoMappingsCompYes, previousNoNoMappingsCompYes); break; case Norm::NO_NO_COMP_BOUNDARY_BEFORE: if(!optimizeFast && setNoNoDelta(c, norm)) { break;
}
norm.offset=writeNoNoMapping(
c, norm, noNoMappingsCompBoundaryBefore, previousNoNoMappingsCompBoundaryBefore); break; case Norm::NO_NO_COMP_NO_MAYBE_CC:
norm.offset=writeNoNoMapping(
c, norm, noNoMappingsCompNoMaybeCC, previousNoNoMappingsCompNoMaybeCC); break; case Norm::NO_NO_EMPTY: // There can be multiple extra data entries for mappings to the empty string // if they have different raw mappings.
norm.offset=writeNoNoMapping(c, norm, noNoMappingsEmpty, previousNoNoMappingsEmpty); break; case Norm::MAYBE_NO_MAPPING_ONLY:
norm.offset=maybeNoMappingsOnly.length()+
writeMapping(c, norm, maybeNoMappingsOnly); break; case Norm::MAYBE_NO_COMBINES_FWD:
norm.offset=maybeNoMappingsAndCompositions.length()+
writeMapping(c, norm, maybeNoMappingsAndCompositions);
writeCompositions(c, norm, maybeNoMappingsAndCompositions); break; case Norm::MAYBE_YES_COMBINES_FWD:
norm.offset=maybeYesCompositions.length();
writeCompositions(c, norm, maybeYesCompositions); break; case Norm::MAYBE_YES_SIMPLE: break; // no extra data case Norm::YES_YES_WITH_CC: break; // no extra data default: // Should not occur. exit(U_INTERNAL_PROGRAM_ERROR);
}
}
// Ticket #13342 - Turn optimization back on. #if (defined(_MSC_VER) && (_MSC_VER >= 1900) && defined(_MSC_FULL_VER) && (_MSC_FULL_VER >= 190024210)) #pragma optimize( "", on ) #endif
U_NAMESPACE_END
#endif// #if !UCONFIG_NO_NORMALIZATION
Messung V0.5
¤ Dauer der Verarbeitung: 0.13 Sekunden
(vorverarbeitet)
¤
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.