// If given a nullptr string for the locale id, grab the default // name from the system. // (Different from most other locale APIs, where a null name means use // the current ICU default locale.) if (id == nullptr) {
id = uprv_getDefaultLocaleID(); // This function not thread safe? TODO: verify.
canonicalize = true; // always canonicalize host ID
}
/* * Internal constructor to allow construction of a locale object with * NO side effects. (Default constructor tries to get * the default locale.)
*/
Locale::Locale(Locale::ELocaleType)
: UObject(), fullName(fullNameBuffer), baseName(nullptr)
{
setToBogus();
}
if ( ksize != 0)
{ if (uprv_strchr(newKeywords, '=')) {
togo.append('@', status); /* keyword parsing */
} else {
togo.append('_', status); /* Variant parsing with a script */ if ( vsize == 0) {
togo.append('_', status); /* No country found */
}
}
togo.append(newKeywords, status);
}
if (U_FAILURE(status)) { // Something went wrong with appending, etc.
setToBogus(); return;
} // Parse it, because for example 'language' might really be a complete // string.
init(togo.data(), false);
}
}
// Read the languageAlias data from alias to // strings+types+replacementIndexes // The number of record will be stored into length. // Allocate length items for types, to store the type field. // Allocate length items for replacementIndexes, // to store the index in the strings for the replacement script. void readLanguageAlias(UResourceBundle* alias,
UniqueCharStrings* strings,
LocalMemory<constchar*>& types,
LocalMemory<int32_t>& replacementIndexes,
int32_t &length,
UErrorCode &status);
// Read the scriptAlias data from alias to // strings+types+replacementIndexes // Allocate length items for types, to store the type field. // Allocate length items for replacementIndexes, // to store the index in the strings for the replacement script. void readScriptAlias(UResourceBundle* alias,
UniqueCharStrings* strings,
LocalMemory<constchar*>& types,
LocalMemory<int32_t>& replacementIndexes,
int32_t &length, UErrorCode &status);
// Read the territoryAlias data from alias to // strings+types+replacementIndexes // Allocate length items for types, to store the type field. // Allocate length items for replacementIndexes, // to store the index in the strings for the replacement script. void readTerritoryAlias(UResourceBundle* alias,
UniqueCharStrings* strings,
LocalMemory<constchar*>& types,
LocalMemory<int32_t>& replacementIndexes,
int32_t &length, UErrorCode &status);
// Read the variantAlias data from alias to // strings+types+replacementIndexes // Allocate length items for types, to store the type field. // Allocate length items for replacementIndexes, // to store the index in the strings for the replacement variant. void readVariantAlias(UResourceBundle* alias,
UniqueCharStrings* strings,
LocalMemory<constchar*>& types,
LocalMemory<int32_t>& replacementIndexes,
int32_t &length, UErrorCode &status);
// Read the subdivisionAlias data from alias to // strings+types+replacementIndexes // Allocate length items for types, to store the type field. // Allocate length items for replacementIndexes, // to store the index in the strings for the replacement variant. void readSubdivisionAlias(UResourceBundle* alias,
UniqueCharStrings* strings,
LocalMemory<constchar*>& types,
LocalMemory<int32_t>& replacementIndexes,
int32_t &length, UErrorCode &status);
};
/** * A class to hold the Alias Data.
*/ class AliasData : public UMemory { public: staticconst AliasData* singleton(UErrorCode& status) { if (U_FAILURE(status)) { // Do not get into loadData if the status already has error. return nullptr;
}
umtx_initOnce(AliasData::gInitOnce, &AliasData::loadData, status); return gSingleton;
}
/** * Read the languageAlias data from alias to strings+types+replacementIndexes. * Allocate length items for types, to store the type field. Allocate length * items for replacementIndexes, to store the index in the strings for the * replacement language.
*/ void
AliasDataBuilder::readLanguageAlias(
UResourceBundle* alias,
UniqueCharStrings* strings,
LocalMemory<constchar*>& types,
LocalMemory<int32_t>& replacementIndexes,
int32_t &length,
UErrorCode &status)
{ return readAlias(
alias, strings, types, replacementIndexes, length, #if U_DEBUG
[](constchar* type) { // Assert the aliasFrom only contains the following possibilities // language_REGION_variant // language_REGION // language_variant // language // und_variant
Locale test(type); // Assert no script in aliasFrom
U_ASSERT(test.getScript()[0] == '\0'); // Assert when language is und, no REGION in aliasFrom.
U_ASSERT(test.getLanguage()[0] != '\0' || test.getCountry()[0] == '\0');
}, #else
[](constchar*) {}, #endif
[](const UChar*) {}, status);
}
/** * Read the scriptAlias data from alias to strings+types+replacementIndexes. * Allocate length items for types, to store the type field. Allocate length * items for replacementIndexes, to store the index in the strings for the * replacement script.
*/ void
AliasDataBuilder::readScriptAlias(
UResourceBundle* alias,
UniqueCharStrings* strings,
LocalMemory<constchar*>& types,
LocalMemory<int32_t>& replacementIndexes,
int32_t &length,
UErrorCode &status)
{ return readAlias(
alias, strings, types, replacementIndexes, length, #if U_DEBUG
[](constchar* type) {
U_ASSERT(uprv_strlen(type) == 4);
},
[](const UChar* replacement) {
U_ASSERT(u_strlen(replacement) == 4);
}, #else
[](constchar*) {},
[](const UChar*) { }, #endif
status);
}
/** * Read the territoryAlias data from alias to strings+types+replacementIndexes. * Allocate length items for types, to store the type field. Allocate length * items for replacementIndexes, to store the index in the strings for the * replacement regions.
*/ void
AliasDataBuilder::readTerritoryAlias(
UResourceBundle* alias,
UniqueCharStrings* strings,
LocalMemory<constchar*>& types,
LocalMemory<int32_t>& replacementIndexes,
int32_t &length,
UErrorCode &status)
{ return readAlias(
alias, strings, types, replacementIndexes, length, #if U_DEBUG
[](constchar* type) {
U_ASSERT(uprv_strlen(type) == 2 || uprv_strlen(type) == 3);
}, #else
[](constchar*) {}, #endif
[](const UChar*) { },
status);
}
/** * Read the variantAlias data from alias to strings+types+replacementIndexes. * Allocate length items for types, to store the type field. Allocate length * items for replacementIndexes, to store the index in the strings for the * replacement variant.
*/ void
AliasDataBuilder::readVariantAlias(
UResourceBundle* alias,
UniqueCharStrings* strings,
LocalMemory<constchar*>& types,
LocalMemory<int32_t>& replacementIndexes,
int32_t &length,
UErrorCode &status)
{ return readAlias(
alias, strings, types, replacementIndexes, length, #if U_DEBUG
[](constchar* type) {
U_ASSERT(uprv_strlen(type) >= 4 && uprv_strlen(type) <= 8);
U_ASSERT(uprv_strlen(type) != 4 ||
(type[0] >= '0' && type[0] <= '9'));
},
[](const UChar* replacement) {
int32_t len = u_strlen(replacement);
U_ASSERT(len >= 4 && len <= 8);
U_ASSERT(len != 4 ||
(*replacement >= u'0' &&
*replacement <= u'9'));
}, #else
[](constchar*) {},
[](const UChar*) { }, #endif
status);
}
/** * Read the subdivisionAlias data from alias to strings+types+replacementIndexes. * Allocate length items for types, to store the type field. Allocate length * items for replacementIndexes, to store the index in the strings for the * replacement regions.
*/ void
AliasDataBuilder::readSubdivisionAlias(
UResourceBundle* alias,
UniqueCharStrings* strings,
LocalMemory<constchar*>& types,
LocalMemory<int32_t>& replacementIndexes,
int32_t &length,
UErrorCode &status)
{ return readAlias(
alias, strings, types, replacementIndexes, length, #if U_DEBUG
[](constchar* type) {
U_ASSERT(uprv_strlen(type) >= 3 && uprv_strlen(type) <= 8);
}, #else
[](constchar*) {}, #endif
[](const UChar*) { },
status);
}
/** * Initializes the alias data from the ICU resource bundles. The alias data * contains alias of language, country, script and variants. * * If the alias data has already loaded, then this method simply returns without * doing anything meaningful.
*/ void U_CALLCONV
AliasData::loadData(UErrorCode &status)
{ #ifdef LOCALE_CANONICALIZATION_DEBUG
UDate start = uprv_getRawUTCtime(); #endif// LOCALE_CANONICALIZATION_DEBUG
ucln_common_registerCleanup(UCLN_COMMON_LOCALE_ALIAS, cleanup);
AliasDataBuilder builder;
gSingleton = builder.build(status); #ifdef LOCALE_CANONICALIZATION_DEBUG
UDate end = uprv_getRawUTCtime();
printf("AliasData::loadData took total %f ms\n", end - start); #endif// LOCALE_CANONICALIZATION_DEBUG
}
/** * Build the alias data from resources.
*/
AliasData*
AliasDataBuilder::build(UErrorCode &status) { if (U_FAILURE(status)) { return nullptr; }
// Read the languageAlias into languageTypes, languageReplacementIndexes // and strings
UniqueCharStrings strings(status);
LocalMemory<constchar*> languageTypes;
LocalMemory<int32_t> languageReplacementIndexes;
readLanguageAlias(languageAlias.getAlias(),
&strings,
languageTypes,
languageReplacementIndexes,
languagesLength,
status);
// Read the scriptAlias into scriptTypes, scriptReplacementIndexes // and strings
LocalMemory<constchar*> scriptTypes;
LocalMemory<int32_t> scriptReplacementIndexes;
readScriptAlias(scriptAlias.getAlias(),
&strings,
scriptTypes,
scriptReplacementIndexes,
scriptLength,
status);
// Read the territoryAlias into territoryTypes, territoryReplacementIndexes // and strings
LocalMemory<constchar*> territoryTypes;
LocalMemory<int32_t> territoryReplacementIndexes;
readTerritoryAlias(territoryAlias.getAlias(),
&strings,
territoryTypes,
territoryReplacementIndexes,
territoryLength, status);
// Read the variantAlias into variantTypes, variantReplacementIndexes // and strings
LocalMemory<constchar*> variantTypes;
LocalMemory<int32_t> variantReplacementIndexes;
readVariantAlias(variantAlias.getAlias(),
&strings,
variantTypes,
variantReplacementIndexes,
variantLength, status);
// Read the subdivisionAlias into subdivisionTypes, subdivisionReplacementIndexes // and strings
LocalMemory<constchar*> subdivisionTypes;
LocalMemory<int32_t> subdivisionReplacementIndexes;
readSubdivisionAlias(subdivisionAlias.getAlias(),
&strings,
subdivisionTypes,
subdivisionReplacementIndexes,
subdivisionLength, status);
if (U_FAILURE(status)) { return nullptr;
}
// We can only use strings after freeze it.
strings.freeze();
// Build the languageMap from languageTypes & languageReplacementIndexes
CharStringMap languageMap(490, status); for (int32_t i = 0; U_SUCCESS(status) && i < languagesLength; i++) {
languageMap.put(languageTypes[i],
strings.get(languageReplacementIndexes[i]),
status);
}
// Build the scriptMap from scriptTypes & scriptReplacementIndexes
CharStringMap scriptMap(1, status); for (int32_t i = 0; U_SUCCESS(status) && i < scriptLength; i++) {
scriptMap.put(scriptTypes[i],
strings.get(scriptReplacementIndexes[i]),
status);
}
// Build the territoryMap from territoryTypes & territoryReplacementIndexes
CharStringMap territoryMap(650, status); for (int32_t i = 0; U_SUCCESS(status) && i < territoryLength; i++) {
territoryMap.put(territoryTypes[i],
strings.get(territoryReplacementIndexes[i]),
status);
}
// Build the variantMap from variantTypes & variantReplacementIndexes.
CharStringMap variantMap(2, status); for (int32_t i = 0; U_SUCCESS(status) && i < variantLength; i++) {
variantMap.put(variantTypes[i],
strings.get(variantReplacementIndexes[i]),
status);
}
// Build the subdivisionMap from subdivisionTypes & subdivisionReplacementIndexes.
CharStringMap subdivisionMap(2, status); for (int32_t i = 0; U_SUCCESS(status) && i < subdivisionLength; i++) {
subdivisionMap.put(subdivisionTypes[i],
strings.get(subdivisionReplacementIndexes[i]),
status);
}
if (U_FAILURE(status)) { return nullptr;
}
// copy hashtables auto *data = new AliasData(
std::move(languageMap),
std::move(scriptMap),
std::move(territoryMap),
std::move(variantMap),
std::move(subdivisionMap),
strings.orphanCharStrings());
if (data == nullptr) {
status = U_MEMORY_ALLOCATION_ERROR;
} return data;
}
/** * A class that find the replacement values of locale fields by using AliasData.
*/ class AliasReplacer { public:
AliasReplacer(UErrorCode& status) :
language(nullptr), script(nullptr), region(nullptr),
extensions(nullptr), // store value in variants only once
variants(nullptr,
([](UElement e1, UElement e2) -> UBool { return 0==uprv_strcmp((constchar*)e1.pointer,
(constchar*)e2.pointer);}),
status),
data(nullptr) {
}
~AliasReplacer() {
}
// Check the fields inside locale, if need to replace fields, // place the the replaced locale ID in out and return true. // Otherwise return false for no replacement or error. bool replace( const Locale& locale, CharString& out, UErrorCode& status);
/** * If replacement is neither null nor empty and input is either null or empty, * return replacement. * If replacement is neither null nor empty but input is not empty, return input. * If replacement is either null or empty and type is either null or empty, * return input. * Otherwise return null. * replacement input type return * AAA nullptr * AAA * AAA BBB * BBB * nullptr || "" CCC nullptr CCC * nullptr || "" * DDD nullptr
*/ inlineconstchar* deleteOrReplace( constchar* input, constchar* type, constchar* replacement) { return notEmpty(replacement) ?
((input == nullptr) ? replacement : input) :
((type == nullptr) ? input : nullptr);
}
inlinebool same(constchar* a, constchar* b) { if (a == nullptr && b == nullptr) { returntrue;
} if ((a == nullptr && b != nullptr) ||
(a != nullptr && b == nullptr)) { returnfalse;
} return uprv_strcmp(a, b) == 0;
}
// Gather fields and generate locale ID into out.
CharString& outputToString(CharString& out, UErrorCode& status);
if ( same(language, replacedLanguage) &&
same(script, replacedScript) &&
same(region, replacedRegion) &&
same(searchVariant, replacedVariant) &&
replacedExtensions == nullptr) { // Replacement produce no changes. continue;
}
language = replacedLanguage;
region = replacedRegion;
script = replacedScript; if (searchVariant != nullptr) { if (notEmpty(replacedVariant)) {
variants.setElementAt((void*)replacedVariant, variant_index);
} else {
variants.removeElementAt(variant_index);
}
} if (replacedExtensions != nullptr) { // DO NOTHING // UTS35 does not specify what should we do if we have extensions in the // replacement. Currently we know only the following 4 "BCP47 LegacyRules" have // extensions in them languageAlias: // i_default => en_x_i_default // i_enochian => und_x_i_enochian // i_mingo => see_x_i_mingo // zh_min => nan_x_zh_min // But all of them are already changed by code inside ultag_parse() before // hitting this code.
}
// Something changed by language alias data. returntrue;
} // Nothing changed by language alias data. returnfalse;
}
bool
AliasReplacer::replaceTerritory(UVector& toBeFreed, UErrorCode& status)
{ if (U_FAILURE(status)) { returnfalse;
} if (region == nullptr) { // No region to search. returnfalse;
} constchar *replacement = data->territoryMap().get(region); if (replacement == nullptr) { // Found no replacement data for this region. returnfalse;
} constchar* replacedRegion = replacement; constchar* firstSpace = uprv_strchr(replacement, ' '); if (firstSpace != nullptr) { // If there are are more than one region in the replacement. // We need to check which one match based on the language. // Cannot use nullptr for language because that will construct // the default locale, in that case, use "und" to get the correct // locale.
Locale l = LocaleBuilder()
.setLanguage(language == nullptr ? "und" : language)
.setScript(script)
.build(status);
l.addLikelySubtags(status); constchar* likelyRegion = l.getCountry();
LocalPointer<CharString> item; if (likelyRegion != nullptr && uprv_strlen(likelyRegion) > 0) {
size_t len = uprv_strlen(likelyRegion); constchar* foundInReplacement = uprv_strstr(replacement,
likelyRegion); if (foundInReplacement != nullptr) { // Assuming the case there are no three letter region code in // the replacement of territoryAlias
U_ASSERT(foundInReplacement == replacement ||
*(foundInReplacement-1) == ' ');
U_ASSERT(foundInReplacement[len] == ' ' ||
foundInReplacement[len] == '\0');
item.adoptInsteadAndCheckErrorCode( new CharString(foundInReplacement, static_cast<int32_t>(len), status), status);
}
} if (item.isNull() && U_SUCCESS(status)) {
item.adoptInsteadAndCheckErrorCode( new CharString(replacement, static_cast<int32_t>(firstSpace - replacement), status), status);
} if (U_FAILURE(status)) { returnfalse; }
replacedRegion = item->data();
toBeFreed.adoptElement(item.orphan(), status); if (U_FAILURE(status)) { returnfalse; }
}
U_ASSERT(!same(region, replacedRegion));
region = replacedRegion; // The region is changed by data in territory alias. returntrue;
}
bool
AliasReplacer::replaceScript(UErrorCode& status)
{ if (U_FAILURE(status)) { returnfalse;
} if (script == nullptr) { // No script to search. returnfalse;
} constchar *replacement = data->scriptMap().get(script); if (replacement == nullptr) { // Found no replacement data for this script. returnfalse;
}
U_ASSERT(!same(script, replacement));
script = replacement; // The script is changed by data in script alias. returntrue;
}
bool
AliasReplacer::replaceVariant(UErrorCode& status)
{ if (U_FAILURE(status)) { returnfalse;
} // Since we may have more than one variant, we need to loop through them. for (int32_t i = 0; i < variants.size(); i++) { constchar* variant = static_cast<constchar*>(variants.elementAt(i)); constchar *replacement = data->variantMap().get(variant); if (replacement == nullptr) { // Found no replacement data for this variant. continue;
}
U_ASSERT((uprv_strlen(replacement) >= 5 &&
uprv_strlen(replacement) <= 8) ||
(uprv_strlen(replacement) == 4 &&
replacement[0] >= '0' &&
replacement[0] <= '9')); if (!same(variant, replacement)) {
variants.setElementAt((void*)replacement, i); // Special hack to handle hepburn-heploc => alalc97 if (uprv_strcmp(variant, "heploc") == 0) { for (int32_t j = 0; j < variants.size(); j++) { if (uprv_strcmp((constchar*)(variants.elementAt(j)), "hepburn") == 0) {
variants.removeElementAt(j);
}
}
} returntrue;
}
} returnfalse;
}
bool
AliasReplacer::replaceSubdivision(
StringPiece subdivision, CharString& output, UErrorCode& status)
{ if (U_FAILURE(status)) { returnfalse;
} constchar *replacement = data->subdivisionMap().get(subdivision.data()); if (replacement != nullptr) { constchar* firstSpace = uprv_strchr(replacement, ' '); // Found replacement data for this subdivision.
size_t len = (firstSpace != nullptr) ?
(firstSpace - replacement) : uprv_strlen(replacement); if (2 <= len && len <= 8) {
output.append(replacement, static_cast<int32_t>(len), status); if (2 == len) { // Add 'zzzz' based on changes to UTS #35 for CLDR-14312.
output.append("zzzz", 4, status);
}
} returntrue;
} returnfalse;
}
bool
AliasReplacer::replaceTransformedExtensions(
CharString& transformedExtensions, CharString& output, UErrorCode& status)
{ // The content of the transformedExtensions will be modified in this // function to NUL-terminating (tkey-tvalue) pairs. if (U_FAILURE(status)) { returnfalse;
}
int32_t len = transformedExtensions.length(); constchar* str = transformedExtensions.data(); constchar* tkey = ultag_getTKeyStart(str);
int32_t tlangLen = (tkey == str) ? 0 :
((tkey == nullptr) ? len : static_cast<int32_t>((tkey - str - 1))); if (tlangLen > 0) {
Locale tlang = LocaleBuilder()
.setLanguageTag(StringPiece(str, tlangLen))
.build(status);
tlang.canonicalize(status);
output = tlang.toLanguageTag<CharString>(status); if (U_FAILURE(status)) { returnfalse;
}
T_CString_toLowerCase(output.data());
} if (tkey != nullptr) { // We need to sort the tfields by tkey
UVector tfields(status); if (U_FAILURE(status)) { returnfalse;
} do { constchar* tvalue = uprv_strchr(tkey, '-'); if (tvalue == nullptr) {
status = U_ILLEGAL_ARGUMENT_ERROR; returnfalse;
} constchar* nextTKey = ultag_getTKeyStart(tvalue); if (nextTKey != nullptr) {
*const_cast<char*>(nextTKey - 1) = '\0'; // NUL terminate tvalue
}
tfields.insertElementAt((void*)tkey, tfields.size(), status); if (U_FAILURE(status)) { returnfalse;
}
tkey = nextTKey;
} while (tkey != nullptr);
tfields.sort([](UElement e1, UElement e2) -> int32_t { return uprv_strcmp((constchar*)e1.pointer, (constchar*)e2.pointer);
}, status); for (int32_t i = 0; i < tfields.size(); i++) { if (output.length() > 0) {
output.append('-', status);
} constchar* tfield = static_cast<constchar*>(tfields.elementAt(i)); constchar* tvalue = uprv_strchr(tfield, '-'); if (tvalue == nullptr) {
status = U_ILLEGAL_ARGUMENT_ERROR; returnfalse;
} // Split the "tkey-tvalue" pair string so that we can canonicalize the tvalue.
*const_cast<char*>(tvalue++) = '\0'; // NUL terminate tkey
output.append(tfield, status).append('-', status);
std::optional<std::string_view> bcpTValue = ulocimp_toBcpType(tfield, tvalue);
output.append(bcpTValue.has_value() ? *bcpTValue : tvalue, status);
}
} if (U_FAILURE(status)) { returnfalse;
} returntrue;
}
CharString&
AliasReplacer::outputToString(
CharString& out, UErrorCode& status)
{ if (U_FAILURE(status)) { return out; }
out.append(language, status); if (notEmpty(script)) {
out.append(SEP_CHAR, status)
.append(script, status);
} if (notEmpty(region)) {
out.append(SEP_CHAR, status)
.append(region, status);
} if (variants.size() > 0) { if (!notEmpty(script) && !notEmpty(region)) {
out.append(SEP_CHAR, status);
}
variants.sort([](UElement e1, UElement e2) -> int32_t { return uprv_strcmp((constchar*)e1.pointer, (constchar*)e2.pointer);
}, status);
int32_t variantsStart = out.length(); for (int32_t i = 0; i < variants.size(); i++) {
out.append(SEP_CHAR, status)
.append(static_cast<constchar*>(variants.elementAt(i)),
status);
}
T_CString_toUpperCase(out.data() + variantsStart);
} if (notEmpty(extensions)) {
CharString tmp("und_", status);
tmp.append(extensions, status);
Locale tmpLocale(tmp.data()); // only support x extension inside CLDR for now.
U_ASSERT(extensions[0] == 'x');
out.append(tmpLocale.getName() + 1, status);
} return out;
}
bool
AliasReplacer::replace(const Locale& locale, CharString& out, UErrorCode& status)
{
data = AliasData::singleton(status); if (U_FAILURE(status)) { returnfalse;
}
U_ASSERT(data != nullptr);
out.clear();
language = locale.getLanguage(); if (!notEmpty(language)) {
language = nullptr;
}
script = locale.getScript(); if (!notEmpty(script)) {
script = nullptr;
}
region = locale.getCountry(); if (!notEmpty(region)) {
region = nullptr;
} constchar* variantsStr = locale.getVariant();
CharString variantsBuff(variantsStr, -1, status); if (!variantsBuff.isEmpty()) { if (U_FAILURE(status)) { returnfalse; } char* start = variantsBuff.data();
T_CString_toLowerCase(start); char* end; while ((end = uprv_strchr(start, SEP_CHAR)) != nullptr &&
U_SUCCESS(status)) {
*end = NULL_CHAR; // null terminate inside variantsBuff // do not add "" or duplicate data to variants if (*start && !variants.contains(start)) {
variants.addElement(start, status);
}
start = end + 1;
} // do not add "" or duplicate data to variants if (*start && !variants.contains(start)) {
variants.addElement(start, status);
}
} if (U_FAILURE(status)) { returnfalse; }
// A changed count to assert when loop too many times. int changed = 0; // A UVector to to hold CharString allocated by the replace* method // and freed when out of scope from his function.
UVector stringsToBeFreed([](void *obj) { deletestatic_cast<CharString*>(obj); },
nullptr, 10, status); while (U_SUCCESS(status)) { // Something wrong with the data cause looping here more than 10 times // already.
U_ASSERT(changed < 5); // From observation of key in data/misc/metadata.txt // we know currently we only need to search in the following combination // of fields for type in languageAlias: // * lang_region_variant // * lang_region // * lang_variant // * lang // * und_variant // This assumption is ensured by the U_ASSERT in readLanguageAlias // // lang REGION variant if ( replaceLanguage(true, true, true, stringsToBeFreed, status) ||
replaceLanguage(true, true, false, stringsToBeFreed, status) ||
replaceLanguage(true, false, true, stringsToBeFreed, status) ||
replaceLanguage(true, false, false, stringsToBeFreed, status) ||
replaceLanguage(false,false, true, stringsToBeFreed, status) ||
replaceTerritory(stringsToBeFreed, status) ||
replaceScript(status) ||
replaceVariant(status)) { // Some values in data is changed, try to match from the beginning // again.
changed++; continue;
} // Nothing changed. Break out. break;
} // while(1)
if (U_FAILURE(status)) { returnfalse; } // Nothing changed and we know the order of the variants are not change // because we have no variant or only one. constchar* extensionsStr = locale_getKeywordsStart(locale.getName()); if (changed == 0 && variants.size() <= 1 && extensionsStr == nullptr) { returnfalse;
}
outputToString(out, status); if (U_FAILURE(status)) { returnfalse;
} if (extensionsStr != nullptr) {
changed = 0;
Locale temp(locale);
LocalPointer<icu::StringEnumeration> iter(locale.createKeywords(status)); if (U_SUCCESS(status) && !iter.isNull()) { constchar* key; while ((key = iter->next(nullptr, status)) != nullptr) { if (uprv_strcmp("sd", key) == 0 || uprv_strcmp("rg", key) == 0 ||
uprv_strcmp("t", key) == 0) { auto value = locale.getKeywordValue<CharString>(key, status); if (U_FAILURE(status)) {
status = U_ZERO_ERROR; continue;
}
CharString replacement; if (uprv_strlen(key) == 2) { if (replaceSubdivision(value.toStringPiece(), replacement, status)) {
changed++;
temp.setKeywordValue(key, replacement.data(), status);
}
} else {
U_ASSERT(uprv_strcmp(key, "t") == 0); if (replaceTransformedExtensions(value, replacement, status)) {
changed++;
temp.setKeywordValue(key, replacement.data(), status);
}
} if (U_FAILURE(status)) { returnfalse;
}
}
}
} if (changed != 0) {
extensionsStr = locale_getKeywordsStart(temp.getName());
}
out.append(extensionsStr, status);
} if (U_FAILURE(status)) { returnfalse;
} // If the tag is not changed, return. if (uprv_strcmp(out.data(), locale.getName()) == 0) {
out.clear(); returnfalse;
} returntrue;
}
// Return true if the locale is changed during canonicalization. // The replaced value then will be put into out. bool
canonicalizeLocale(const Locale& locale, CharString& out, UErrorCode& status)
{ if (U_FAILURE(status)) { returnfalse; }
AliasReplacer replacer(status); return replacer.replace(locale, out, status);
}
// Function to optimize for known cases without so we can skip the loading // of resources in the startup time until we really need it. bool
isKnownCanonicalizedLocale(constchar* locale, UErrorCode& status)
{ if (U_FAILURE(status)) { returnfalse; }
// Function for testing.
U_EXPORT constchar* const*
ulocimp_getKnownCanonicalizedLocaleForTest(int32_t& length)
{
U_NAMESPACE_USE
length = UPRV_LENGTHOF(KNOWN_CANONICALIZED); return KNOWN_CANONICALIZED;
}
// Function for testing.
U_EXPORT bool
ulocimp_isCanonicalizedLocaleForTest(constchar* localeName)
{
U_NAMESPACE_USE
Locale l(localeName);
UErrorCode status = U_ZERO_ERROR;
CharString temp; return !canonicalizeLocale(l, temp, status) && U_SUCCESS(status);
}
U_NAMESPACE_BEGIN
/*This function initializes a Locale from a C locale ID*/
Locale& Locale::init(constchar* localeID, UBool canonicalize)
{
fIsBogus = false; /* Free our current storage */ if ((baseName != fullName) && (baseName != fullNameBuffer)) {
uprv_free(baseName);
}
baseName = nullptr; if(fullName != fullNameBuffer) {
uprv_free(fullName);
fullName = fullNameBuffer;
}
// not a loop: // just an easy way to have a common error-exit // without goto and without another function do { char *separator; char *field[5] = {nullptr};
int32_t fieldLen[5] = {0};
int32_t fieldIdx;
int32_t variantField;
int32_t length;
UErrorCode err;
if(localeID == nullptr) { // not an error, just set the default locale return *this = getDefault();
}
/* preset all fields to empty */
language[0] = script[0] = country[0] = 0;
// "canonicalize" the locale ID to ICU/Java format
err = U_ZERO_ERROR;
length = canonicalize ?
uloc_canonicalize(localeID, fullName, sizeof(fullNameBuffer), &err) :
uloc_getName(localeID, fullName, sizeof(fullNameBuffer), &err);
if (err == U_BUFFER_OVERFLOW_ERROR || length >= static_cast<int32_t>(sizeof(fullNameBuffer))) {
U_ASSERT(baseName == nullptr); /*Go to heap for the fullName if necessary*/ char* newFullName = static_cast<char*>(uprv_malloc(sizeof(char) * (length + 1))); if (newFullName == nullptr) { break; // error: out of memory
}
fullName = newFullName;
err = U_ZERO_ERROR;
length = canonicalize ?
uloc_canonicalize(localeID, fullName, length+1, &err) :
uloc_getName(localeID, fullName, length+1, &err);
} if(U_FAILURE(err) || err == U_STRING_NOT_TERMINATED_WARNING) { /* should never occur */ break;
}
variantBegin = length;
/* after uloc_getName/canonicalize() we know that only '_' are separators */ /* But _ could also appeared in timezone such as "en@timezone=America/Los_Angeles" */
separator = field[0] = fullName;
fieldIdx = 1; char* at = uprv_strchr(fullName, '@'); while ((separator = uprv_strchr(field[fieldIdx-1], SEP_CHAR)) != nullptr &&
fieldIdx < UPRV_LENGTHOF(field)-1 &&
(at == nullptr || separator < at)) {
field[fieldIdx] = separator + 1;
fieldLen[fieldIdx - 1] = static_cast<int32_t>(separator - field[fieldIdx - 1]);
fieldIdx++;
} // variant may contain @foo or .foo POSIX cruft; remove it
separator = uprv_strchr(field[fieldIdx-1], '@'); char* sep2 = uprv_strchr(field[fieldIdx-1], '.'); if (separator!=nullptr || sep2!=nullptr) { if (separator==nullptr || (sep2!=nullptr && separator > sep2)) {
separator = sep2;
}
fieldLen[fieldIdx - 1] = static_cast<int32_t>(separator - field[fieldIdx - 1]);
} else {
fieldLen[fieldIdx - 1] = length - static_cast<int32_t>(field[fieldIdx - 1] - fullName);
}
if (fieldLen[0] >= static_cast<int32_t>(sizeof(language)))
{ break; // error: the language field is too long
}
variantField = 1; /* Usually the 2nd one, except when a script or country is also used. */ if (fieldLen[0] > 0) { /* We have a language */
--> --------------------
--> maximum size reached
--> --------------------
Messung V0.5
¤ Dauer der Verarbeitung: 0.62 Sekunden
(vorverarbeitet)
¤
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.