// initial size for an Any-XXXX transform's cache of script-XXXX transforms // (will grow as necessary, but we don't expect to have source text with more than 7 scripts) #define ANY_TRANS_CACHE_INIT_SIZE 7
/** * Returns a series of ranges corresponding to scripts. They will be * of the form: * * ccccSScSSccccTTcTcccc - c = common, S = first script, T = second * | | - first run (start, limit) * | | - second run (start, limit) * * That is, the runs will overlap. The reason for this is so that a * transliterator can consider common characters both before and after * the scripts.
*/ class ScriptRunIterator : public UMemory { private: const Replaceable& text;
int32_t textStart;
int32_t textLimit;
public: /** * The code of the current run, valid after next() returns. May * be USCRIPT_INVALID_CODE if and only if the entire text is * COMMON/INHERITED.
*/
UScriptCode scriptCode;
/** * The start of the run, inclusive, valid after next() returns.
*/
int32_t start;
/** * The end of the run, exclusive, valid after next() returns.
*/
int32_t limit;
/** * Constructs a run iterator over the given text from start * (inclusive) to limit (exclusive).
*/
ScriptRunIterator(const Replaceable& text, int32_t start, int32_t limit);
/** * Returns true if there are any more runs. true is always * returned at least once. Upon return, the caller should * examine scriptCode, start, and limit.
*/
UBool next();
/** * Adjusts internal indices for a change in the limit index of the * given delta. A positive delta means the limit has increased.
*/ void adjustLimit(int32_t delta);
private:
ScriptRunIterator(const ScriptRunIterator &other); // forbid copying of this class
ScriptRunIterator &operator=(const ScriptRunIterator &other); // forbid copying of this class
};
// Are we done? if (start == textLimit) { returnfalse;
}
// Move start back to include adjacent COMMON or INHERITED // characters while (start > textStart) {
ch = text.char32At(start - 1); // look back
s = uscript_getScript(ch, &ec); if (s == USCRIPT_COMMON || s == USCRIPT_INHERITED) {
--start;
} else { break;
}
}
// Move limit ahead to include COMMON, INHERITED, and characters // of the current script. while (limit < textLimit) {
ch = text.char32At(limit); // look ahead
s = uscript_getScript(ch, &ec); if (s != USCRIPT_COMMON && s != USCRIPT_INHERITED) { if (scriptCode == USCRIPT_INVALID_CODE) {
scriptCode = s;
} elseif (s != scriptCode) { break;
}
}
++limit;
}
// Return true even if the entire text is COMMON / INHERITED, in // which case scriptCode will be USCRIPT_INVALID_CODE. returntrue;
}
while (it.next()) { // Ignore runs in the ante context if (it.limit <= allStart) continue;
// Try to instantiate transliterator from it.scriptCode to // our target or target/variant
Transliterator* t = getTransliterator(it.scriptCode);
if (t == nullptr) { // We have no transliterator. Do nothing, but keep // pos.start up to date.
pos.start = it.limit; continue;
}
// If the run end is before the transliteration limit, do // a non-incremental transliteration. Otherwise do an // incremental one.
UBool incremental = isIncremental && (it.limit >= allLimit);
Transliterator* t = nullptr;
{
Mutex m(nullptr);
t = static_cast<Transliterator*>(uhash_iget(cache, static_cast<int32_t>(source)));
} if (t == nullptr) {
UErrorCode ec = U_ZERO_ERROR;
UnicodeString sourceName(uscript_getShortName(source), -1, US_INV);
UnicodeString id(sourceName);
id.append(TARGET_SEP).append(target);
t = Transliterator::createInstance(id, UTRANS_FORWARD, ec); if (U_FAILURE(ec) || t == nullptr) { delete t;
// Try to pivot around Latin, our most common script
id = sourceName;
id.append(LATIN_PIVOT, -1).append(target);
t = Transliterator::createInstance(id, UTRANS_FORWARD, ec); if (U_FAILURE(ec) || t == nullptr) { delete t;
t = nullptr;
}
}
if (t != nullptr) {
Transliterator *rt = nullptr;
{
Mutex m(nullptr);
rt = static_cast<Transliterator*>(uhash_iget(cache, static_cast<int32_t>(source))); if (rt == nullptr) { // Common case, no race to cache this new transliterator.
uhash_iput(cache, static_cast<int32_t>(source), t, &ec);
} else { // Race case, some other thread beat us to caching this transliterator.
Transliterator *temp = rt;
rt = t; // Our newly created transliterator that lost the race & now needs deleting.
t = temp; // The transliterator from the cache that we will return.
}
} delete rt; // will be non-null only in case of races.
}
} return t;
}
/** * Return the script code for a given name, or -1 if not found.
*/ static UScriptCode scriptNameToCode(const UnicodeString& name) { char buf[128];
UScriptCode code;
UErrorCode ec = U_ZERO_ERROR;
int32_t nameLen = name.length();
UBool isInvariant = uprv_isInvariantUString(name.getBuffer(), nameLen);
if (isInvariant) {
name.extract(0, nameLen, buf, static_cast<int32_t>(sizeof(buf)), US_INV);
buf[127] = 0; // Make sure that we nullptr terminate the string.
} if (!isInvariant || uscript_getCode(buf, &code, 1, &ec) != 1 || U_FAILURE(ec))
{
code = USCRIPT_INVALID_CODE;
} return code;
}
/** * Registers standard transliterators with the system. Called by * Transliterator during initialization. Scan all current targets and * register those that are scripts T as Any-T/V.
*/ void AnyTransliterator::registerIDs() {
// Only process each target once if (seen.geti(target) != 0) continue;
ec = U_ZERO_ERROR;
seen.puti(target, 1, ec);
// Get the script code for the target. If not a script, ignore.
UScriptCode targetScript = scriptNameToCode(target); if (targetScript == USCRIPT_INVALID_CODE) continue;
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.