// Allow implicit conversion from char16_t* to UnicodeString for this file: // Helpful in toString methods and elsewhere. #define UNISTR_FROM_STRING_EXPLICIT
/** * Helper method to return whether the given AffixPatternMatcher equals the given pattern string. * Either both arguments must be null or the pattern string inside the AffixPatternMatcher must equal * the given pattern string.
*/ bool matched(const AffixPatternMatcher* affix, const UnicodeString& patternString) { return (affix == nullptr && patternString.isBogus()) ||
(affix != nullptr && affix->getPattern() == patternString);
}
/** * Helper method to return the length of the given AffixPatternMatcher. Returns 0 for null.
*/
int32_t length(const AffixPatternMatcher* matcher) { return matcher == nullptr ? 0 : matcher->getPattern().length();
}
/** * Helper method to return whether (1) both lhs and rhs are null/invalid, or (2) if they are both * valid, whether they are equal according to operator==. Similar to Java Objects.equals()
*/ bool equals(const AffixPatternMatcher* lhs, const AffixPatternMatcher* rhs) { if (lhs == nullptr && rhs == nullptr) { returntrue;
} if (lhs == nullptr || rhs == nullptr) { returnfalse;
} return *lhs == *rhs;
}
void AffixPatternMatcherBuilder::consumeToken(AffixPatternType type, UChar32 cp, UErrorCode& status) { // This is called by AffixUtils.iterateWithConsumer() for each token.
// Add an ignorables matcher between tokens except between two literals, and don't put two // ignorables matchers in a row. if (fIgnorables != nullptr && fMatchersLen > 0 &&
(fLastTypeOrCp < 0 || !fIgnorables->getSet()->contains(fLastTypeOrCp))) {
addMatcher(*fIgnorables);
}
if (type != TYPE_CODEPOINT) { // Case 1: the token is a symbol. switch (type) { case TYPE_MINUS_SIGN:
addMatcher(fWarehouse.minusSign()); break; case TYPE_PLUS_SIGN:
addMatcher(fWarehouse.plusSign()); break; case TYPE_PERCENT:
addMatcher(fWarehouse.percent()); break; case TYPE_PERMILLE:
addMatcher(fWarehouse.permille()); break; case TYPE_CURRENCY_SINGLE: case TYPE_CURRENCY_DOUBLE: case TYPE_CURRENCY_TRIPLE: case TYPE_CURRENCY_QUAD: case TYPE_CURRENCY_QUINT: // All currency symbols use the same matcher
addMatcher(fWarehouse.currency(status)); break; default:
UPRV_UNREACHABLE_EXIT;
}
} elseif (fIgnorables != nullptr && fIgnorables->getSet()->contains(cp)) { // Case 2: the token is an ignorable literal. // No action necessary: the ignorables matcher has already been added.
} else { // Case 3: the token is a non-ignorable literal. if (auto* ptr = fWarehouse.nextCodePointMatcher(cp, status)) {
addMatcher(*ptr);
} else { // OOM; unwind the stack return;
}
}
fLastTypeOrCp = type != TYPE_CODEPOINT ? type : cp;
}
if (0 == (parseFlags & PARSE_FLAG_USE_FULL_AFFIXES) &&
AffixUtils::containsOnlySymbolsAndIgnorables(posPrefixString, *ignorables.getSet(), status) &&
AffixUtils::containsOnlySymbolsAndIgnorables(posSuffixString, *ignorables.getSet(), status) &&
AffixUtils::containsOnlySymbolsAndIgnorables(negPrefixString, *ignorables.getSet(), status) &&
AffixUtils::containsOnlySymbolsAndIgnorables(negSuffixString, *ignorables.getSet(), status) // HACK: Plus and minus sign are a special case: we accept them trailing only if they are // trailing in the pattern string.
&& !AffixUtils::containsType(posSuffixString, TYPE_PLUS_SIGN, status) &&
!AffixUtils::containsType(posSuffixString, TYPE_MINUS_SIGN, status) &&
!AffixUtils::containsType(negSuffixString, TYPE_PLUS_SIGN, status) &&
!AffixUtils::containsType(negSuffixString, TYPE_MINUS_SIGN, status)) { // The affixes contain only symbols and ignorables. // No need to generate affix matchers. returnfalse;
} returntrue;
}
// The affixes have interesting characters, or we are in strict mode. // Use initial capacity of 6, the highest possible number of AffixMatchers.
UnicodeString sb; bool includeUnpaired = 0 != (parseFlags & PARSE_FLAG_INCLUDE_UNPAIRED_AFFIXES);
if (type == PATTERN_SIGN_TYPE_POS) {
posPrefix = prefix;
posSuffix = suffix;
} elseif (equals(prefix, posPrefix) && equals(suffix, posSuffix)) { // Skip adding these matchers (we already have equivalents) continue;
}
// Flags for setting in the ParsedNumber; the token matchers may add more. int flags = (type == PATTERN_SIGN_TYPE_NEG) ? FLAG_NEGATIVE : 0;
// Note: it is indeed possible for posPrefix and posSuffix to both be null. // We still need to add that matcher for strict mode to work.
fAffixMatchers[numAffixMatchers++] = {prefix, suffix, flags}; if (includeUnpaired && prefix != nullptr && suffix != nullptr) { // The following if statements are designed to prevent adding two identical matchers. if (type == PATTERN_SIGN_TYPE_POS || !equals(prefix, posPrefix)) {
fAffixMatchers[numAffixMatchers++] = {prefix, nullptr, flags};
} if (type == PATTERN_SIGN_TYPE_POS || !equals(suffix, posSuffix)) {
fAffixMatchers[numAffixMatchers++] = {nullptr, suffix, flags};
}
}
}
// Put the AffixMatchers in order, and then add them to the output. // Since there are at most 9 elements, do a simple-to-implement bubble sort. bool madeChanges; do {
madeChanges = false; for (int32_t i = 1; i < numAffixMatchers; i++) { if (fAffixMatchers[i - 1].compareTo(fAffixMatchers[i]) > 0) {
madeChanges = true;
AffixMatcher temp = std::move(fAffixMatchers[i - 1]);
fAffixMatchers[i - 1] = std::move(fAffixMatchers[i]);
fAffixMatchers[i] = std::move(temp);
}
}
} while (madeChanges);
for (int32_t i = 0; i < numAffixMatchers; i++) { // Enable the following line to debug affixes //std::cout << "Adding affix matcher: " << CStr(fAffixMatchers[i].toString())() << std::endl;
output.addMatcher(fAffixMatchers[i]);
}
}
bool AffixMatcher::match(StringSegment& segment, ParsedNumber& result, UErrorCode& status) const { if (!result.seenNumber()) { // Prefix // Do not match if: // 1. We have already seen a prefix (result.prefix != null) // 2. The prefix in this AffixMatcher is empty (prefix == null) if (!result.prefix.isBogus() || fPrefix == nullptr) { returnfalse;
}
// Attempt to match the prefix. int initialOffset = segment.getOffset(); bool maybeMore = fPrefix->match(segment, result, status); if (initialOffset != segment.getOffset()) {
result.prefix = fPrefix->getPattern();
} return maybeMore;
} else { // Suffix // Do not match if: // 1. We have already seen a suffix (result.suffix != null) // 2. The suffix in this AffixMatcher is empty (suffix == null) // 3. The matched prefix does not equal this AffixMatcher's prefix if (!result.suffix.isBogus() || fSuffix == nullptr || !matched(fPrefix, result.prefix)) { returnfalse;
}
// Attempt to match the suffix. int initialOffset = segment.getOffset(); bool maybeMore = fSuffix->match(segment, result, status); if (initialOffset != segment.getOffset()) {
result.suffix = fSuffix->getPattern();
} return maybeMore;
}
}
void AffixMatcher::postProcess(ParsedNumber& result) const { // Check to see if our affix is the one that was matched. If so, set the flags in the result. if (matched(fPrefix, result.prefix) && matched(fSuffix, result.suffix)) { // Fill in the result prefix and suffix with non-null values (empty string). // Used by strict mode to determine whether an entire affix pair was matched. if (result.prefix.isBogus()) {
result.prefix = UnicodeString();
} if (result.suffix.isBogus()) {
result.suffix = UnicodeString();
}
result.flags |= fFlags; if (fPrefix != nullptr) {
fPrefix->postProcess(result);
} if (fSuffix != nullptr) {
fSuffix->postProcess(result);
}
}
}
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.