Quellcodebibliothek Statistik Leitseite products/sources/formale Sprachen/C/Firefox/intl/icu/source/i18n/   (Browser von der Mozilla Stiftung Version 136.0.1©)  Datei vom 10.2.2025 mit Größe 6 kB image not shown  

Quelle  brktrans.cpp   Sprache: C

 
// © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
/*
**********************************************************************
*   Copyright (C) 2008-2015, International Business Machines
*   Corporation and others.  All Rights Reserved.
**********************************************************************
*   Date        Name        Description
*   05/11/2008  Andy Heninger  Port from Java
**********************************************************************
*/


#include <utility>

#include "unicode/utypes.h"

#if  !UCONFIG_NO_TRANSLITERATION && !UCONFIG_NO_BREAK_ITERATION

#include "unicode/brkiter.h"
#include "unicode/localpointer.h"
#include "unicode/uchar.h"
#include "unicode/unifilt.h"
#include "unicode/uniset.h"

#include "brktrans.h"
#include "cmemory.h"
#include "mutex.h"
#include "uprops.h"
#include "uinvchar.h"
#include "util.h"
#include "uvectr32.h"

U_NAMESPACE_BEGIN

UOBJECT_DEFINE_RTTI_IMPLEMENTATION(BreakTransliterator)

static const char16_t SPACE       = 32;  // ' '


/**
 * Constructs a transliterator with the default delimiters '{' and
 * '}'.
 */

BreakTransliterator::BreakTransliterator(UnicodeFilter* adoptedFilter) :
        Transliterator(UNICODE_STRING("Any-BreakInternal", 17), adoptedFilter),
        cachedBI(nullptr), cachedBoundaries(nullptr), fInsertion(SPACE) {
    }


/**
 * Destructor.
 */

BreakTransliterator::~BreakTransliterator() {
}

/**
 * Copy constructor.
 */

BreakTransliterator::BreakTransliterator(const BreakTransliterator& o) :
        Transliterator(o), cachedBI(nullptr), cachedBoundaries(nullptr), fInsertion(o.fInsertion) {
}


/**
 * Transliterator API.
 */

BreakTransliterator* BreakTransliterator::clone() const {
    return new BreakTransliterator(*this);
}

/**
 * Implements {@link Transliterator#handleTransliterate}.
 */

void BreakTransliterator::handleTransliterate(Replaceable& text, UTransPosition&&nbsp;offsets,
                                                    UBool isIncremental ) const {

        UErrorCode status = U_ZERO_ERROR;
        LocalPointer<BreakIterator> bi;
        LocalPointer<UVector32> boundaries;

        {
            Mutex m;
            BreakTransliterator *nonConstThis = const_cast<BreakTransliterator *>(this);
            boundaries = std::move(nonConstThis->cachedBoundaries);
            bi = std::move(nonConstThis->cachedBI);
        }
        if (bi.isNull()) {
            bi.adoptInstead(BreakIterator::createWordInstance(Locale::getEnglish(), status));
        }
        if (boundaries.isNull()) {
            boundaries.adoptInstead(new UVector32(status));
        }

        if (bi.isNull() || boundaries.isNull() || U_FAILURE(status)) {
            return;
        }

        boundaries->removeAllElements();
        UnicodeString sText = replaceableAsString(text);
        bi->setText(sText);
        bi->preceding(offsets.start);

        // To make things much easier, we will stack the boundaries, and then insert at the end.
        // generally, we won't need too many, since we will be filtered.

        int32_t boundary;
        for(boundary = bi->next(); boundary != UBRK_DONE && boundary < offsets.limit; boundary = bi->next()) {
            if (boundary == 0) continue;
            // HACK: Check to see that preceding item was a letter

            UChar32 cp = sText.char32At(boundary-1);
            int type = u_charType(cp);
            //System.out.println(Integer.toString(cp,16) + " (before): " + type);
            if ((U_MASK(type) & (U_GC_L_MASK | U_GC_M_MASK)) == 0) continue;

            cp = sText.char32At(boundary);
            type = u_charType(cp);
            //System.out.println(Integer.toString(cp,16) + " (after): " + type);
            if ((U_MASK(type) & (U_GC_L_MASK | U_GC_M_MASK)) == 0) continue;

            boundaries->addElement(boundary, status);
            // printf("Boundary at %d\n", boundary);
        }

        int delta = 0;
        int lastBoundary = 0;

        if (boundaries->size() != 0) { // if we found something, adjust
            delta = boundaries->size() * fInsertion.length();
            lastBoundary = boundaries->lastElementi();

            // we do this from the end backwards, so that we don't have to keep updating.

            while (boundaries->size() > 0) {
                boundary = boundaries->popi();
                text.handleReplaceBetween(boundary, boundary, fInsertion);
            }
        }

        // Now fix up the return values
        offsets.contextLimit += delta;
        offsets.limit += delta;
        offsets.start = isIncremental ? lastBoundary + delta : offsets.limit;

        // Return break iterator & boundaries vector to the cache.
        {
            Mutex m;
            BreakTransliterator *nonConstThis = const_cast<BreakTransliterator *>(this);
            if (nonConstThis->cachedBI.isNull()) {
                nonConstThis->cachedBI = std::move(bi);
            }
            if (nonConstThis->cachedBoundaries.isNull()) {
                nonConstThis->cachedBoundaries = std::move(boundaries);
            }
        }

        // TODO:  do something with U_FAILURE(status);
        //        (need to look at transliterators overall, not just here.)
}

//
//  getInsertion()
//
const UnicodeString &BreakTransliterator::getInsertion() const {
    return fInsertion;
}

//
//  setInsertion()
//
void BreakTransliterator::setInsertion(const UnicodeString &insertion) {
    this->fInsertion = insertion;
}

//
//   replaceableAsString   Hack to let break iterators work
//                         on the replaceable text from transliterators.
//                         In practice, the only real Replaceable type that we
//                         will be seeing is UnicodeString, so this function
//                         will normally be efficient.
//
UnicodeString BreakTransliterator::replaceableAsString(Replaceable &r) {
    UnicodeString s;
    UnicodeString *rs = dynamic_cast<UnicodeString *>(&r);
    if (rs != nullptr) {
        s = *rs;
    } else {
        r.extractBetween(0, r.length(), s);
    }
    return s;
}

U_NAMESPACE_END

#endif /* #if !UCONFIG_NO_TRANSLITERATION */

Messung V0.5
C=83 H=91 G=86

¤ Dauer der Verarbeitung: 0.12 Sekunden  (vorverarbeitet)  ¤

*© Formatika GbR, Deutschland






Wurzel

Suchen

Beweissystem der NASA

Beweissystem Isabelle

NIST Cobol Testsuite

Cephes Mathematical Library

Wiener Entwicklungsmethode

Haftungshinweis

Die Informationen auf dieser Webseite wurden nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit, noch Qualität der bereit gestellten Informationen zugesichert.

Bemerkung:

Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.