Anforderungen  |   Konzepte  |   Entwurf  |   Entwicklung  |   Qualitätssicherung  |   Lebenszyklus  |   Steuerung
 
 
 
 


Quelle  number_affixutils.cpp   Sprache: C

 
// © 2017 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html

#include "unicode/utypes.h"

#if !UCONFIG_NO_FORMATTING

#include "number_affixutils.h"
#include "unicode/utf16.h"
#include "unicode/uniset.h"

using namespace icu;
using namespace icu::number;
using namespace icu::number::impl;

TokenConsumer::~TokenConsumer() = default;
SymbolProvider::~SymbolProvider() = default;

int32_t AffixUtils::estimateLength(const UnicodeString &patternString, UErrorCode &status) {
    AffixPatternState state = STATE_BASE;
    int32_t offset = 0;
    int32_t length = 0;
    for (; offset < patternString.length();) {
        UChar32 cp = patternString.char32At(offset);

        switch (state) {
            case STATE_BASE:
                if (cp == u'\'') {
                    // First quote
                    state = STATE_FIRST_QUOTE;
                } else {
                    // Unquoted symbol
                    length++;
                }
                break;
            case STATE_FIRST_QUOTE:
                if (cp == u'\'') {
                    // Repeated quote
                    length++;
                    state = STATE_BASE;
                } else {
                    // Quoted code point
                    length++;
                    state = STATE_INSIDE_QUOTE;
                }
                break;
            case STATE_INSIDE_QUOTE:
                if (cp == u'\'') {
                    // End of quoted sequence
                    state = STATE_AFTER_QUOTE;
                } else {
                    // Quoted code point
                    length++;
                }
                break;
            case STATE_AFTER_QUOTE:
                if (cp == u'\'') {
                    // Double quote inside of quoted sequence
                    length++;
                    state = STATE_INSIDE_QUOTE;
                } else {
                    // Unquoted symbol
                    length++;
                }
                break;
            default:
                UPRV_UNREACHABLE_EXIT;
        }

        offset += U16_LENGTH(cp);
    }

    switch (state) {
        case STATE_FIRST_QUOTE:
        case STATE_INSIDE_QUOTE:
            status = U_ILLEGAL_ARGUMENT_ERROR;
            break;
        default:
            break;
    }

    return length;
}

UnicodeString AffixUtils::escape(const UnicodeString &input) {
    AffixPatternState state = STATE_BASE;
    int32_t offset = 0;
    UnicodeString output;
    for (; offset < input.length();) {
        UChar32 cp = input.char32At(offset);

        switch (cp) {
            case u'\'':
                output.append(u"''", -1);
                break;

            case u'-':
            case u'+':
            case u'%':
            case u'‰':
            case u'¤':
                if (state == STATE_BASE) {
                    output.append(u'\'');
                    output.append(cp);
                    state = STATE_INSIDE_QUOTE;
                } else {
                    output.append(cp);
                }
                break;

            default:
                if (state == STATE_INSIDE_QUOTE) {
                    output.append(u'\'');
                    output.append(cp);
                    state = STATE_BASE;
                } else {
                    output.append(cp);
                }
                break;
        }
        offset += U16_LENGTH(cp);
    }

    if (state == STATE_INSIDE_QUOTE) {
        output.append(u'\'');
    }

    return output;
}

Field AffixUtils::getFieldForType(AffixPatternType type) {
    switch (type) {
        case TYPE_MINUS_SIGN:
            return {UFIELD_CATEGORY_NUMBER, UNUM_SIGN_FIELD};
        case TYPE_PLUS_SIGN:
            return {UFIELD_CATEGORY_NUMBER, UNUM_SIGN_FIELD};
        case TYPE_APPROXIMATELY_SIGN:
            return {UFIELD_CATEGORY_NUMBER, UNUM_APPROXIMATELY_SIGN_FIELD};
        case TYPE_PERCENT:
            return {UFIELD_CATEGORY_NUMBER, UNUM_PERCENT_FIELD};
        case TYPE_PERMILLE:
            return {UFIELD_CATEGORY_NUMBER, UNUM_PERMILL_FIELD};
        case TYPE_CURRENCY_SINGLE:
            return {UFIELD_CATEGORY_NUMBER, UNUM_CURRENCY_FIELD};
        case TYPE_CURRENCY_DOUBLE:
            return {UFIELD_CATEGORY_NUMBER, UNUM_CURRENCY_FIELD};
        case TYPE_CURRENCY_TRIPLE:
            return {UFIELD_CATEGORY_NUMBER, UNUM_CURRENCY_FIELD};
        case TYPE_CURRENCY_QUAD:
            return {UFIELD_CATEGORY_NUMBER, UNUM_CURRENCY_FIELD};
        case TYPE_CURRENCY_QUINT:
            return {UFIELD_CATEGORY_NUMBER, UNUM_CURRENCY_FIELD};
        case TYPE_CURRENCY_OVERFLOW:
            return {UFIELD_CATEGORY_NUMBER, UNUM_CURRENCY_FIELD};
        default:
            UPRV_UNREACHABLE_EXIT;
    }
}

int32_t
AffixUtils::unescape(const UnicodeString &affixPattern, FormattedStringBuilder &output, int32_t position,
                     const SymbolProvider &provider, Field field, UErrorCode &status) {
    int32_t length = 0;
    AffixTag tag;
    while (hasNext(tag, affixPattern)) {
        tag = nextToken(tag, affixPattern, status);
        if (U_FAILURE(status)) { return length; }
        if (tag.type == TYPE_CURRENCY_OVERFLOW) {
            // Don't go to the provider for this special case
            length += output.insertCodePoint(
                position + length,
                0xFFFD,
                {UFIELD_CATEGORY_NUMBER, UNUM_CURRENCY_FIELD},
                status);
        } else if (tag.type < 0) {
            length += output.insert(
                    position + length, provider.getSymbol(tag.type), getFieldForType(tag.type), status);
        } else {
            length += output.insertCodePoint(position + length, tag.codePoint, field, status);
        }
    }
    return length;
}

int32_t AffixUtils::unescapedCodePointCount(const UnicodeString &affixPattern,
                                            const SymbolProvider &provider, UErrorCode &status) {
    int32_t length = 0;
    AffixTag tag;
    while (hasNext(tag, affixPattern)) {
        tag = nextToken(tag, affixPattern, status);
        if (U_FAILURE(status)) { return length; }
        if (tag.type == TYPE_CURRENCY_OVERFLOW) {
            length += 1;
        } else if (tag.type < 0) {
            length += provider.getSymbol(tag.type).length();
        } else {
            length += U16_LENGTH(tag.codePoint);
        }
    }
    return length;
}

bool
AffixUtils::containsType(const UnicodeString &affixPattern, AffixPatternType type, UErrorCode &status)&nbsp;{
    if (affixPattern.length() == 0) {
        return false;
    }
    AffixTag tag;
    while (hasNext(tag, affixPattern)) {
        tag = nextToken(tag, affixPattern, status);
        if (U_FAILURE(status)) { return false; }
        if (tag.type == type) {
            return true;
        }
    }
    return false;
}

bool AffixUtils::hasCurrencySymbols(const UnicodeString &affixPattern, UErrorCode &status) {
    if (affixPattern.length() == 0) {
        return false;
    }
    AffixTag tag;
    while (hasNext(tag, affixPattern)) {
        tag = nextToken(tag, affixPattern, status);
        if (U_FAILURE(status)) { return false; }
        if (tag.type < 0 && getFieldForType(tag.type) == Field(UFIELD_CATEGORY_NUMBER, UNUM_CURRENCY_FIELD)) {
            return true;
        }
    }
    return false;
}

UnicodeString AffixUtils::replaceType(const UnicodeString &affixPattern, AffixPatternType type,
                                      char16_t replacementChar, UErrorCode &status) {
    UnicodeString output(affixPattern); // copy
    if (affixPattern.length() == 0) {
        return output;
    }
    AffixTag tag;
    while (hasNext(tag, affixPattern)) {
        tag = nextToken(tag, affixPattern, status);
        if (U_FAILURE(status)) { return output; }
        if (tag.type == type) {
            output.replace(tag.offset - 1, 1, replacementChar);
        }
    }
    return output;
}

bool AffixUtils::containsOnlySymbolsAndIgnorables(const UnicodeString& affixPattern,
                                                  const UnicodeSet& ignorables, UErrorCode& status) {
    if (affixPattern.length() == 0) {
        return true;
    }
    AffixTag tag;
    while (hasNext(tag, affixPattern)) {
        tag = nextToken(tag, affixPattern, status);
        if (U_FAILURE(status)) { return false; }
        if (tag.type == TYPE_CODEPOINT && !ignorables.contains(tag.codePoint)) {
            return false;
        }
    }
    return true;
}

void AffixUtils::iterateWithConsumer(const UnicodeString& affixPattern, TokenConsumer& consumer,
                                     UErrorCode& status) {
    if (affixPattern.length() == 0) {
        return;
    }
    AffixTag tag;
    while (hasNext(tag, affixPattern)) {
        tag = nextToken(tag, affixPattern, status);
        if (U_FAILURE(status)) { return; }
        consumer.consumeToken(tag.type, tag.codePoint, status);
        if (U_FAILURE(status)) { return; }
    }
}

AffixTag AffixUtils::nextToken(AffixTag tag, const UnicodeString &patternString, UErrorCode &status) {
    int32_t offset = tag.offset;
    int32_t state = tag.state;
    for (; offset < patternString.length();) {
        UChar32 cp = patternString.char32At(offset);
        int32_t count = U16_LENGTH(cp);

        switch (state) {
            case STATE_BASE:
                switch (cp) {
                    case u'\'':
                        state = STATE_FIRST_QUOTE;
                        offset += count;
                        // continue to the next code point
                        break;
                    case u'-':
                        return makeTag(offset + count, TYPE_MINUS_SIGN, STATE_BASE, 0);
                    case u'+':
                        return makeTag(offset + count, TYPE_PLUS_SIGN, STATE_BASE, 0);
                    case u'~':
                        return makeTag(offset + count, TYPE_APPROXIMATELY_SIGN, STATE_BASE, 0);
                    case u'%':
                        return makeTag(offset + count, TYPE_PERCENT, STATE_BASE, 0);
                    case u'‰':
                        return makeTag(offset + count, TYPE_PERMILLE, STATE_BASE, 0);
                    case u'¤':
                        state = STATE_FIRST_CURR;
                        offset += count;
                        // continue to the next code point
                        break;
                    default:
                        return makeTag(offset + count, TYPE_CODEPOINT, STATE_BASE, cp);
                }
                break;
            case STATE_FIRST_QUOTE:
                if (cp == u'\'') {
                    return makeTag(offset + count, TYPE_CODEPOINT, STATE_BASE, cp);
                } else {
                    return makeTag(offset + count, TYPE_CODEPOINT, STATE_INSIDE_QUOTE, cp);
                }
            case STATE_INSIDE_QUOTE:
                if (cp == u'\'') {
                    state = STATE_AFTER_QUOTE;
                    offset += count;
                    // continue to the next code point
                    break;
                } else {
                    return makeTag(offset + count, TYPE_CODEPOINT, STATE_INSIDE_QUOTE, cp);
                }
            case STATE_AFTER_QUOTE:
                if (cp == u'\'') {
                    return makeTag(offset + count, TYPE_CODEPOINT, STATE_INSIDE_QUOTE, cp);
                } else {
                    state = STATE_BASE;
                    // re-evaluate this code point
                    break;
                }
            case STATE_FIRST_CURR:
                if (cp == u'¤') {
                    state = STATE_SECOND_CURR;
                    offset += count;
                    // continue to the next code point
                    break;
                } else {
                    return makeTag(offset, TYPE_CURRENCY_SINGLE, STATE_BASE, 0);
                }
            case STATE_SECOND_CURR:
                if (cp == u'¤') {
                    state = STATE_THIRD_CURR;
                    offset += count;
                    // continue to the next code point
                    break;
                } else {
                    return makeTag(offset, TYPE_CURRENCY_DOUBLE, STATE_BASE, 0);
                }
            case STATE_THIRD_CURR:
                if (cp == u'¤') {
                    state = STATE_FOURTH_CURR;
                    offset += count;
                    // continue to the next code point
                    break;
                } else {
                    return makeTag(offset, TYPE_CURRENCY_TRIPLE, STATE_BASE, 0);
                }
            case STATE_FOURTH_CURR:
                if (cp == u'¤') {
                    state = STATE_FIFTH_CURR;
                    offset += count;
                    // continue to the next code point
                    break;
                } else {
                    return makeTag(offset, TYPE_CURRENCY_QUAD, STATE_BASE, 0);
                }
            case STATE_FIFTH_CURR:
                if (cp == u'¤') {
                    state = STATE_OVERFLOW_CURR;
                    offset += count;
                    // continue to the next code point
                    break;
                } else {
                    return makeTag(offset, TYPE_CURRENCY_QUINT, STATE_BASE, 0);
                }
            case STATE_OVERFLOW_CURR:
                if (cp == u'¤') {
                    offset += count;
                    // continue to the next code point and loop back to this state
                    break;
                } else {
                    return makeTag(offset, TYPE_CURRENCY_OVERFLOW, STATE_BASE, 0);
                }
            default:
                UPRV_UNREACHABLE_EXIT;
        }
    }
    // End of string
    switch (state) {
        case STATE_BASE:
            // No more tokens in string.
            return {-1};
        case STATE_FIRST_QUOTE:
        case STATE_INSIDE_QUOTE:
            // For consistent behavior with the JDK and ICU 58, set an error here.
            status = U_ILLEGAL_ARGUMENT_ERROR;
            return {-1};
        case STATE_AFTER_QUOTE:
            // No more tokens in string.
            return {-1};
        case STATE_FIRST_CURR:
            return makeTag(offset, TYPE_CURRENCY_SINGLE, STATE_BASE, 0);
        case STATE_SECOND_CURR:
            return makeTag(offset, TYPE_CURRENCY_DOUBLE, STATE_BASE, 0);
        case STATE_THIRD_CURR:
            return makeTag(offset, TYPE_CURRENCY_TRIPLE, STATE_BASE, 0);
        case STATE_FOURTH_CURR:
            return makeTag(offset, TYPE_CURRENCY_QUAD, STATE_BASE, 0);
        case STATE_FIFTH_CURR:
            return makeTag(offset, TYPE_CURRENCY_QUINT, STATE_BASE, 0);
        case STATE_OVERFLOW_CURR:
            return makeTag(offset, TYPE_CURRENCY_OVERFLOW, STATE_BASE, 0);
        default:
            UPRV_UNREACHABLE_EXIT;
    }
}

bool AffixUtils::hasNext(const AffixTag &tag, const UnicodeString &string) {
    // First check for the {-1} and default initializer syntax.
    if (tag.offset < 0) {
        return false;
    } else if (tag.offset == 0) {
        return string.length() > 0;
    }
    // The rest of the fields are safe to use now.
    // Special case: the last character in string is an end quote.
    if (tag.state == STATE_INSIDE_QUOTE && tag.offset == string.length() - 1 &&
        string.charAt(tag.offset) == u'\'') {
        return false;
    } else if (tag.state != STATE_BASE) {
        return true;
    } else {
        return tag.offset < string.length();
    }
}

#endif /* #if !UCONFIG_NO_FORMATTING */

Messung V0.5
C=94 H=93 G=93

¤ Dauer der Verarbeitung: 0.13 Sekunden  (vorverarbeitet)  ¤

*© Formatika GbR, Deutschland






Wurzel

Suchen

Beweissystem der NASA

Beweissystem Isabelle

NIST Cobol Testsuite

Cephes Mathematical Library

Wiener Entwicklungsmethode

Haftungshinweis

Die Informationen auf dieser Webseite wurden nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit, noch Qualität der bereit gestellten Informationen zugesichert.

Bemerkung:

Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.






                                                                                                                                                                                                                                                                                                                                                                                                     


Neuigkeiten

     Aktuelles
     Motto des Tages

Software

     Produkte
     Quellcodebibliothek

Aktivitäten

     Artikel über Sicherheit
     Anleitung zur Aktivierung von SSL

Muße

     Gedichte
     Musik
     Bilder

Jenseits des Üblichen ....
    

Besucherstatistik

Besucherstatistik

Monitoring

Montastic status badge