// Allow implicit conversion from char16_t* to UnicodeString for this file: // Helpful in toString methods and elsewhere. #define UNISTR_FROM_STRING_EXPLICIT
// Fraction grouping parsing is disabled for now but could be enabled later. // See https://unicode-org.atlassian.net/browse/ICU-10794 // fractionGrouping = 0 != (parseFlags & PARSE_FLAG_FRACTION_GROUPING_ENABLED);
}
bool DecimalMatcher::match(StringSegment& segment, ParsedNumber& result, int8_t exponentSign,
UErrorCode&) const { if (result.seenNumber() && exponentSign == 0) { // A number has already been consumed. returnfalse;
} elseif (exponentSign != 0) { // scientific notation always comes after the number
U_ASSERT(!result.quantity.bogus);
}
// Initial offset before any character consumption.
int32_t initialOffset = segment.getOffset();
// Return value: whether to ask for more characters. bool maybeMore = false;
// All digits consumed so far.
number::impl::DecimalQuantity digitsConsumed;
digitsConsumed.bogus = true;
// The total number of digits after the decimal place, used for scaling the result.
int32_t digitsAfterDecimalPlace = 0;
// The actual grouping and decimal separators used in the string. // If non-null, we have seen that token.
UnicodeString actualGroupingString;
UnicodeString actualDecimalString;
actualGroupingString.setToBogus();
actualDecimalString.setToBogus();
// Information for two groups: the previous group and the current group. // // Each group has three pieces of information: // // Offset: the string position of the beginning of the group, including a leading separator // if there was a leading separator. This is needed in case we need to rewind the parse to // that position. // // Separator type: // 0 => beginning of string // 1 => lead separator is a grouping separator // 2 => lead separator is a decimal separator // // Count: the number of digits in the group. If -1, the group has been validated.
int32_t currGroupOffset = 0;
int32_t currGroupSepType = 0;
int32_t currGroupCount = 0;
int32_t prevGroupOffset = -1;
int32_t prevGroupSepType = -1;
int32_t prevGroupCount = -1;
while (segment.length() > 0) {
maybeMore = false;
// Attempt to match a digit.
int8_t digit = -1;
// Try by code point digit value.
UChar32 cp = segment.getCodePoint(); if (u_isdigit(cp)) {
segment.adjustOffset(U16_LENGTH(cp));
digit = static_cast<int8_t>(u_digit(cp, 10));
}
// Try by digit string. if (digit == -1 && !fLocalDigitStrings.isNull()) { for (int32_t i = 0; i < 10; i++) { const UnicodeString& str = fLocalDigitStrings[i]; if (str.isEmpty()) { continue;
}
int32_t overlap = segment.getCommonPrefixLength(str); if (overlap == str.length()) {
segment.adjustOffset(overlap);
digit = static_cast<int8_t>(i); break;
}
maybeMore = maybeMore || (overlap == segment.length());
}
}
if (digit >= 0) { // Digit was found. if (digitsConsumed.bogus) {
digitsConsumed.bogus = false;
digitsConsumed.clear();
}
digitsConsumed.appendDigit(digit, 0, true);
currGroupCount++; if (!actualDecimalString.isBogus()) {
digitsAfterDecimalPlace++;
} continue;
}
// Attempt to match a literal grouping or decimal separator. bool isDecimal = false; bool isGrouping = false;
// 1) Attempt the decimal separator string literal. // if (we have not seen a decimal separator yet) { ... } if (actualDecimalString.isBogus() && !decimalSeparator.isEmpty()) {
int32_t overlap = segment.getCommonPrefixLength(decimalSeparator);
maybeMore = maybeMore || (overlap == segment.length()); if (overlap == decimalSeparator.length()) {
isDecimal = true;
actualDecimalString = decimalSeparator;
}
}
// 2) Attempt to match the actual grouping string literal. if (!actualGroupingString.isBogus()) {
int32_t overlap = segment.getCommonPrefixLength(actualGroupingString);
maybeMore = maybeMore || (overlap == segment.length()); if (overlap == actualGroupingString.length()) {
isGrouping = true;
}
}
// 2.5) Attempt to match a new the grouping separator string literal. // if (we have not seen a grouping or decimal separator yet) { ... } if (!groupingDisabled && actualGroupingString.isBogus() && actualDecimalString.isBogus() &&
!groupingSeparator.isEmpty()) {
int32_t overlap = segment.getCommonPrefixLength(groupingSeparator);
maybeMore = maybeMore || (overlap == segment.length()); if (overlap == groupingSeparator.length()) {
isGrouping = true;
actualGroupingString = groupingSeparator;
}
}
// 3) Attempt to match a decimal separator from the equivalence set. // if (we have not seen a decimal separator yet) { ... } // The !isGrouping is to confirm that we haven't yet matched the current character. if (!isGrouping && actualDecimalString.isBogus()) { if (decimalUniSet->contains(cp)) {
isDecimal = true;
actualDecimalString = UnicodeString(cp);
}
}
// 4) Attempt to match a grouping separator from the equivalence set. // if (we have not seen a grouping or decimal separator yet) { ... } if (!groupingDisabled && actualGroupingString.isBogus() && actualDecimalString.isBogus()) { if (groupingUniSet->contains(cp)) {
isGrouping = true;
actualGroupingString = UnicodeString(cp);
}
}
// Leave if we failed to match this as a separator. if (!isDecimal && !isGrouping) { break;
}
// Check for conditions when we don't want to accept the separator. if (isDecimal && integerOnly) { break;
} elseif (currGroupSepType == 2 && isGrouping) { // Fraction grouping break;
}
// Validate intermediate grouping sizes. bool prevValidSecondary = validateGroup(prevGroupSepType, prevGroupCount, false); bool currValidPrimary = validateGroup(currGroupSepType, currGroupCount, true); if (!prevValidSecondary || (isDecimal && !currValidPrimary)) { // Invalid grouping sizes. if (isGrouping && currGroupCount == 0) { // Trailing grouping separators: these are taken care of below
U_ASSERT(currGroupSepType == 1);
} elseif (requireGroupingMatch) { // Strict mode: reject the parse
digitsConsumed.clear();
digitsConsumed.bogus = true;
} break;
} elseif (requireGroupingMatch && currGroupCount == 0 && currGroupSepType == 1) { break;
} else { // Grouping sizes OK so far.
prevGroupOffset = currGroupOffset;
prevGroupCount = currGroupCount; if (isDecimal) { // Do not validate this group any more.
prevGroupSepType = -1;
} else {
prevGroupSepType = currGroupSepType;
}
}
// OK to accept the separator. // Special case: don't update currGroup if it is empty; this allows two grouping // separators in a row in lenient mode. if (currGroupCount != 0) {
currGroupOffset = segment.getOffset();
}
currGroupSepType = isGrouping ? 1 : 2;
currGroupCount = 0; if (isGrouping) {
segment.adjustOffset(actualGroupingString.length());
} else {
segment.adjustOffset(actualDecimalString.length());
}
}
// End of main loop. // Back up if there was a trailing grouping separator. // Shift prev -> curr so we can check it as a final group. if (currGroupSepType != 2 && currGroupCount == 0) {
maybeMore = true;
segment.setOffset(currGroupOffset);
currGroupOffset = prevGroupOffset;
currGroupSepType = prevGroupSepType;
currGroupCount = prevGroupCount;
prevGroupOffset = -1;
prevGroupSepType = 0;
prevGroupCount = 1;
}
// Validate final grouping sizes. bool prevValidSecondary = validateGroup(prevGroupSepType, prevGroupCount, false); bool currValidPrimary = validateGroup(currGroupSepType, currGroupCount, true); if (!requireGroupingMatch) { // The cases we need to handle here are lone digits. // Examples: "1,1" "1,1," "1,1,1" "1,1,1," ",1" (all parse as 1) // See more examples in numberformattestspecification.txt
int32_t digitsToRemove = 0; if (!prevValidSecondary) {
segment.setOffset(prevGroupOffset);
digitsToRemove += prevGroupCount;
digitsToRemove += currGroupCount;
} elseif (!currValidPrimary && (prevGroupSepType != 0 || prevGroupCount != 0)) {
maybeMore = true;
segment.setOffset(currGroupOffset);
digitsToRemove += currGroupCount;
} if (digitsToRemove != 0) {
digitsConsumed.adjustMagnitude(-digitsToRemove);
digitsConsumed.truncate();
}
prevValidSecondary = true;
currValidPrimary = true;
} if (currGroupSepType != 2 && (!prevValidSecondary || !currValidPrimary)) { // Grouping failure.
digitsConsumed.bogus = true;
}
// Strings that start with a separator but have no digits, // or strings that failed a grouping size check. if (digitsConsumed.bogus) {
maybeMore = maybeMore || (segment.length() == 0);
segment.setOffset(initialOffset); return maybeMore;
}
// We passed all inspections. Start post-processing.
// Adjust for fraction part.
digitsConsumed.adjustMagnitude(-digitsAfterDecimalPlace);
// Set the digits, either normal or exponent. if (exponentSign != 0 && segment.getOffset() != initialOffset) { bool overflow = false; if (digitsConsumed.fitsInLong()) {
int64_t exponentLong = digitsConsumed.toLong(false);
U_ASSERT(exponentLong >= 0); if (exponentLong <= INT32_MAX) { auto exponentInt = static_cast<int32_t>(exponentLong); if (result.quantity.adjustMagnitude(exponentSign * exponentInt)) {
overflow = true;
}
} else {
overflow = true;
}
} else {
overflow = true;
} if (overflow) { if (exponentSign == -1) { // Set to zero
result.quantity.clear();
} else { // Set to infinity
result.quantity.bogus = true;
result.flags |= FLAG_INFINITY;
}
}
} else {
result.quantity = digitsConsumed;
}
// Set other information into the result and return. if (!actualDecimalString.isBogus()) {
result.flags |= FLAG_HAS_DECIMAL_SEPARATOR;
}
result.setCharsConsumed(segment); return segment.length() == 0 || maybeMore;
}
bool DecimalMatcher::validateGroup(int32_t sepType, int32_t count, bool isPrimary) const { if (requireGroupingMatch) { if (sepType == -1) { // No such group (prevGroup before first shift). returntrue;
} elseif (sepType == 0) { // First group. if (isPrimary) { // No grouping separators is OK. returntrue;
} else { return count != 0 && count <= grouping2;
}
} elseif (sepType == 1) { // Middle group. if (isPrimary) { return count == grouping1;
} else { return count == grouping2;
}
} else {
U_ASSERT(sepType == 2); // After the decimal separator. returntrue;
}
} else { if (sepType == 1) { // #11230: don't accept middle groups with only 1 digit. return count != 1;
} else { returntrue;
}
}
}
bool DecimalMatcher::smokeTest(const StringSegment& segment) const { // The common case uses a static leadSet for efficiency. if (fLocalDigitStrings.isNull() && leadSet != nullptr) { return segment.startsWith(*leadSet);
} if (segment.startsWith(*separatorSet) || u_isdigit(segment.getCodePoint())) { returntrue;
} if (fLocalDigitStrings.isNull()) { returnfalse;
} for (int32_t i = 0; i < 10; i++) { if (segment.startsWith(fLocalDigitStrings[i])) { returntrue;
}
} returnfalse;
}