Ziele Untersuchung
mit Columbo Integrität von
Datenbanken Interaktion und
Portierbarkeit Ergonomie der
Schnittstellen

Angebot Produkte Projekt Beratung

Mittel Analytik Modellierung Sprachen Algebra Logik Hardware Denken Kreativität

Zusammenhänge Gesellschaft Wirtschaft Branche Firma


products/Sources/formale Sprachen/C/Firefox/intl/icu/source/i18n/ (Browser von der Mozilla Stiftung Version 136.0.1^©) Datei vom 10.2.2025 mit Größe 6 kB

Quelle brktrans.cpp

Sprache: C

// © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
/*
**********************************************************************
*   Copyright (C) 2008-2015, International Business Machines
*   Corporation and others.  All Rights Reserved.
**********************************************************************
*   Date        Name        Description
*   05/11/2008  Andy Heninger  Port from Java
**********************************************************************
*/

#include <utility>

#include "unicode/utypes.h"

#if  !UCONFIG_NO_TRANSLITERATION && !UCONFIG_NO_BREAK_ITERATION

#include "unicode/brkiter.h"
#include "unicode/localpointer.h"
#include "unicode/uchar.h"
#include "unicode/unifilt.h"
#include "unicode/uniset.h"

#include "brktrans.h"
#include "cmemory.h"
#include "mutex.h"
#include "uprops.h"
#include "uinvchar.h"
#include "util.h"
#include "uvectr32.h"

U_NAMESPACE_BEGIN

UOBJECT_DEFINE_RTTI_IMPLEMENTATION(BreakTransliterator)

static const char16_t SPACE       = 32;  // ' '

/**
* Constructs a transliterator with the default delimiters '{' and
* '}'.
*/
BreakTransliterator::BreakTransliterator(UnicodeFilter* adoptedFilter) :
        Transliterator(UNICODE_STRING("Any-BreakInternal", 17), adoptedFilter),
        cachedBI(nullptr), cachedBoundaries(nullptr), fInsertion(SPACE) {
    }

/**
* Destructor.
*/
BreakTransliterator::~BreakTransliterator() {
}

/**
* Copy constructor.
*/
BreakTransliterator::BreakTransliterator(const BreakTransliterator& o) :
        Transliterator(o), cachedBI(nullptr), cachedBoundaries(nullptr), fInsertion(o.fInsertion) {
}

/**
* Transliterator API.
*/
BreakTransliterator* BreakTransliterator::clone() const {
    return new BreakTransliterator(*this);
}

/**
* Implements {@link Transliterator#handleTransliterate}.
*/
void BreakTransliterator::handleTransliterate(Replaceable& text, UTransPosition& offsets,
                                                    UBool isIncremental ) const {

        UErrorCode status = U_ZERO_ERROR;
        LocalPointer<BreakIterator> bi;
        LocalPointer<UVector32> boundaries;

        {
            Mutex m;
            BreakTransliterator *nonConstThis = const_cast<BreakTransliterator *>(this);
            boundaries = std::move(nonConstThis->cachedBoundaries);
            bi = std::move(nonConstThis->cachedBI);
        }
        if (bi.isNull()) {
            bi.adoptInstead(BreakIterator::createWordInstance(Locale::getEnglish(), status));
        }
        if (boundaries.isNull()) {
            boundaries.adoptInstead(new UVector32(status));
        }

        if (bi.isNull() || boundaries.isNull() || U_FAILURE(status)) {
            return;
        }

        boundaries->removeAllElements();
        UnicodeString sText = replaceableAsString(text);
        bi->setText(sText);
        bi->preceding(offsets.start);

        // To make things much easier, we will stack the boundaries, and then insert at the end.
        // generally, we won't need too many, since we will be filtered.

        int32_t boundary;
        for(boundary = bi->next(); boundary != UBRK_DONE && boundary < offsets.limit; boundary = bi->next()) {
            if (boundary == 0) continue;
            // HACK: Check to see that preceding item was a letter

            UChar32 cp = sText.char32At(boundary-1);
            int type = u_charType(cp);
            //System.out.println(Integer.toString(cp,16) + " (before): " + type);
            if ((U_MASK(type) & (U_GC_L_MASK | U_GC_M_MASK)) == 0) continue;

            cp = sText.char32At(boundary);
            type = u_charType(cp);
            //System.out.println(Integer.toString(cp,16) + " (after): " + type);
            if ((U_MASK(type) & (U_GC_L_MASK | U_GC_M_MASK)) == 0) continue;

            boundaries->addElement(boundary, status);
            // printf("Boundary at %d\n", boundary);
        }

        int delta = 0;
        int lastBoundary = 0;

        if (boundaries->size() != 0) { // if we found something, adjust
            delta = boundaries->size() * fInsertion.length();
            lastBoundary = boundaries->lastElementi();

            // we do this from the end backwards, so that we don't have to keep updating.

            while (boundaries->size() > 0) {
                boundary = boundaries->popi();
                text.handleReplaceBetween(boundary, boundary, fInsertion);
            }
        }

        // Now fix up the return values
        offsets.contextLimit += delta;
        offsets.limit += delta;
        offsets.start = isIncremental ? lastBoundary + delta : offsets.limit;

        // Return break iterator & boundaries vector to the cache.
        {
            Mutex m;
            BreakTransliterator *nonConstThis = const_cast<BreakTransliterator *>(this);
            if (nonConstThis->cachedBI.isNull()) {
                nonConstThis->cachedBI = std::move(bi);
            }
            if (nonConstThis->cachedBoundaries.isNull()) {
                nonConstThis->cachedBoundaries = std::move(boundaries);
            }
        }

        // TODO:  do something with U_FAILURE(status);
        //        (need to look at transliterators overall, not just here.)
}

//
//  getInsertion()
//
const UnicodeString &BreakTransliterator::getInsertion() const {
    return fInsertion;
}

//
//  setInsertion()
//
void BreakTransliterator::setInsertion(const UnicodeString &insertion) {
    this->fInsertion = insertion;
}

//
//   replaceableAsString   Hack to let break iterators work
//                         on the replaceable text from transliterators.
//                         In practice, the only real Replaceable type that we
//                         will be seeing is UnicodeString, so this function
//                         will normally be efficient.
//
UnicodeString BreakTransliterator::replaceableAsString(Replaceable &r) {
    UnicodeString s;
    UnicodeString *rs = dynamic_cast<UnicodeString *>(&r);
    if (rs != nullptr) {
        s = *rs;
    } else {
        r.extractBetween(0, r.length(), s);
    }
    return s;
}

U_NAMESPACE_END

#endif /* #if !UCONFIG_NO_TRANSLITERATION */

Messung V0.5 in Prozent

¤ Dauer der Verarbeitung: 0.13 Sekunden (vorverarbeitet am 2026-04-25) ¤

Wurzel

Suchen

Beweissystem der NASA

Beweissystem Isabelle

NIST Cobol Testsuite

Cephes Mathematical Library

Wiener Entwicklungsmethode

Haftungshinweis

Die Informationen auf dieser Webseite wurden nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit, noch Qualität der bereit gestellten Informationen zugesichert.