Quellcodebibliothek Statistik Leitseite products/sources/formale Sprachen/C/LibreOffice/sax/source/expatwrap/   (Office von Apache Version 25.8.3.2©)  Datei vom 5.10.2025 mit Größe 45 kB image not shown  

Quelle  saxwriter.cxx   Sprache: C

 
/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
/*
 * This file is part of the LibreOffice project.
 *
 * This Source Code Form is subject to the terms of the Mozilla Public
 * License, v. 2.0. If a copy of the MPL was not distributed with this
 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
 *
 * This file incorporates work covered by the following license notice:
 *
 *   Licensed to the Apache Software Foundation (ASF) under one or more
 *   contributor license agreements. See the NOTICE file distributed
 *   with this work for additional information regarding copyright
 *   ownership. The ASF licenses this file to you under the Apache
 *   License, Version 2.0 (the "License"); you may not use this file
 *   except in compliance with the License. You may obtain a copy of
 *   the License at http://www.apache.org/licenses/LICENSE-2.0 .
 */


#include <string.h>

#include <cassert>
#include <set>
#include <stack>
#include <vector>

#include <com/sun/star/io/IOException.hpp>
#include <com/sun/star/lang/WrappedTargetRuntimeException.hpp>
#include <com/sun/star/lang/XServiceInfo.hpp>
#include <com/sun/star/uno/XComponentContext.hpp>
#include <com/sun/star/util/XCloneable.hpp>
#include <com/sun/star/xml/sax/SAXInvalidCharacterException.hpp>
#include <com/sun/star/xml/sax/XWriter.hpp>

#include <cppuhelper/exc_hlp.hxx>
#include <cppuhelper/weak.hxx>
#include <cppuhelper/implbase.hxx>
#include <cppuhelper/supportsservice.hxx>

#include <osl/diagnose.h>
#include <rtl/character.hxx>
#include <sal/log.hxx>

#include <memory>

using namespace ::cppu;
using namespace ::com::sun::star::uno;
using namespace ::com::sun::star::lang;
using namespace ::com::sun::star::xml::sax;
using namespace ::com::sun::star::io;

#define LINEFEED 10
#define SEQUENCESIZE 1024
#define MAXCOLUMNCOUNT 72

/******
*
*
* Character conversion functions
*
*
*****/


namespace
{
enum SaxInvalidCharacterError
{
    SAX_NONE,
    SAX_WARNING,
    SAX_ERROR
};

// Stuff for custom entity names
struct ReplacementPair
{
    OUString name;
    OUString replacement;
};
inline bool operator<(const ReplacementPair& lhs, const ReplacementPair& rhs)
{
    return lhs.replacement.compareTo(rhs.replacement) < 0;
}

class SaxWriterHelper
{
#ifdef DBG_UTIL
public:
    ::std::stack<OUString> m_DebugStartedElements;
#endif

private:
    Reference<XOutputStream> m_out;
    Sequence<sal_Int8> m_Sequence;
    sal_Int8* mp_Sequence;

    sal_Int32 nLastLineFeedPos; // is negative after writing a sequence
    sal_uInt32 nCurrentPos;
    bool m_bStartElementFinished;

    std::vector<ReplacementPair> m_Replacements;

    /// @throws SAXException
    sal_uInt32 writeSequence();

    // use only if to insert the bytes more space in the sequence is needed and
    // so the sequence has to write out and reset rPos to 0
    // writes sequence only on overflow, sequence could be full on the end (rPos == SEQUENCESIZE)
    /// @throws SAXException
    void AddBytes(sal_Int8* pTarget, sal_uInt32& rPos, const sal_Int8* pBytes,
                  sal_uInt32 nBytesCount);
    /// @throws SAXException
    bool convertToXML(const sal_Unicode* pStr, sal_Int32 nStrLen, bool bDoNormalization,
                      bool bNormalizeWhitespace, sal_Int8* pTarget, sal_uInt32& rPos);
    /// @throws SAXException
    void FinishStartElement();

    // Search for the correct replacement
    const ReplacementPair* findXMLReplacement(const sal_Unicode* pStr, sal_Int32 nStrLen);

public:
    explicit SaxWriterHelper(Reference<XOutputStream> const& m_TempOut)
        : m_out(m_TempOut)
        , m_Sequence(SEQUENCESIZE)
        , mp_Sequence(nullptr)
        , nLastLineFeedPos(0)
        , nCurrentPos(0)
        , m_bStartElementFinished(true)
    {
        OSL_ENSURE(SEQUENCESIZE > 50, "Sequence cache size too small");
        mp_Sequence = m_Sequence.getArray();
    }
    ~SaxWriterHelper()
    {
        OSL_ENSURE(!nCurrentPos, "cached Sequence not written");
        OSL_ENSURE(m_bStartElementFinished, "StartElement not completely written");
    }

    /// @throws SAXException
    void insertIndentation(sal_uInt32 m_nLevel);

    // returns whether it works correct or invalid characters were in the string
    // If there are invalid characters in the string it returns sal_False.
    // Than the calling method has to throw the needed Exception.
    /// @throws SAXException
    bool writeString(const OUString& rWriteOutString, bool bDoNormalization,
                     bool bNormalizeWhitespace);

    sal_uInt32 GetLastColumnCount() const noexcept
    {
        return static_cast<sal_uInt32>(nCurrentPos - nLastLineFeedPos);
    }

    /// @throws SAXException
    void startDocument();

    // returns whether it works correct or invalid characters were in the strings
    // If there are invalid characters in one of the strings it returns sal_False.
    // Than the calling method has to throw the needed Exception.
    /// @throws SAXException
    SaxInvalidCharacterError startElement(const OUString& rName,
                                          const Reference<XAttributeList>& xAttribs);
    /// @throws SAXException
    bool FinishEmptyElement();

    // returns whether it works correct or invalid characters were in the string
    // If there are invalid characters in the string it returns sal_False.
    // Than the calling method has to throw the needed Exception.
    /// @throws SAXException
    bool endElement(const OUString& rName);
    /// @throws SAXException
    void endDocument();

    // returns whether it works correct or invalid characters were in the strings
    // If there are invalid characters in the string it returns sal_False.
    // Than the calling method has to throw the needed Exception.
    /// @throws SAXException
    bool processingInstruction(const OUString& rTarget, const OUString& rData);
    /// @throws SAXException
    void startCDATA();
    /// @throws SAXException
    void endCDATA();

    // returns whether it works correct or invalid characters were in the strings
    // If there are invalid characters in the string it returns sal_False.
    // Than the calling method has to throw the needed Exception.
    /// @throws SAXException
    bool comment(const OUString& rComment);

    /// @throws SAXException
    void clearBuffer();

    // Use custom entity names
    void setCustomEntityNames(
        const ::css::uno::Sequence<::css::beans::Pair<::rtl::OUString, ::rtl::OUString>>&
            replacements);

    // Calculate length for convertToXML
    sal_Int32 calcXMLByteLength(const OUString& rStr, bool bDoNormalization,
                                bool bNormalizeWhitespace);
};

const bool g_bValidCharsBelow32[32] = {
    // clang-format off
//  0      1      2      3      4      5      6      7
    falsefalsefalsefalsefalsefalsefalsefalse,  //0
    falsetrue,  true,  falsefalsetrue,  falsefalse,  //8
    falsefalsefalsefalsefalsefalsefalsefalse,  //16
    falsefalsefalsefalsefalsefalsefalsefalse
    // clang-format on
};

bool IsInvalidChar(const sal_Unicode aChar)
{
    bool bRet(false);
    // check first for the most common characters
    if (aChar < 32 || aChar >= 0xd800)
        bRet = ((aChar < 32 && !g_bValidCharsBelow32[aChar]) || aChar == 0xffff || aChar == 0xfffe);
    return bRet;
}

/********
* write through to the output stream
*
*****/

sal_uInt32 SaxWriterHelper::writeSequence()
{
    try
    {
        m_out->writeBytes(m_Sequence);
    }
    catch (const IOException&)
    {
        css::uno::Any anyEx = cppu::getCaughtException();
        throw SAXException(u"IO exception during writing"_ustr, Reference<XInterface>(), anyEx);
    }
    nLastLineFeedPos -= SEQUENCESIZE;
    return 0;
}

void SaxWriterHelper::AddBytes(sal_Int8* pTarget, sal_uInt32& rPos, const sal_Int8* pBytes,
                               sal_uInt32 nBytesCount)
{
    OSL_ENSURE((rPos + nBytesCount) > SEQUENCESIZE, "wrong use of AddBytesMethod");
    sal_uInt32 nCount(SEQUENCESIZE - rPos);
    memcpy(&(pTarget[rPos]), pBytes, nCount);

    OSL_ENSURE(rPos + nCount == SEQUENCESIZE, "the position should be the at the end");

    rPos = writeSequence();
    sal_uInt32 nRestCount(nBytesCount - nCount);
    if ((rPos + nRestCount) <= SEQUENCESIZE)
    {
        memcpy(&(pTarget[rPos]), &pBytes[nCount], nRestCount);
        rPos += nRestCount;
    }
    else
        AddBytes(pTarget, rPos, &pBytes[nCount], nRestCount);
}

void SaxWriterHelper::setCustomEntityNames(
    const ::css::uno::Sequence<::css::beans::Pair<::rtl::OUString, ::rtl::OUString>>& replacements)
{
    m_Replacements.resize(replacements.size());
    for (size_t i = 0; i < replacements.size(); ++i)
    {
        m_Replacements[i].name = replacements[i].First;
        m_Replacements[i].replacement = replacements[i].Second;
    }
    if (replacements.size() > 1)
        std::sort(m_Replacements.begin(), m_Replacements.end());
}

/** Converts a UTF-16 string to UTF-8 and does XML normalization

    @param pTarget
           Pointer to a piece of memory, to where the output should be written. The caller
           must call calcXMLByteLength on the same string, to ensure,
           that there is enough memory for converting.
 */

bool SaxWriterHelper::convertToXML(const sal_Unicode* pStr, sal_Int32 nStrLen,
                                   bool bDoNormalization, bool bNormalizeWhitespace,
                                   sal_Int8* pTarget, sal_uInt32& rPos)
{
    bool bRet(true);
    sal_uInt32 nSurrogate = 0;

    for (sal_Int32 i = 0; i < nStrLen; i++)
    {
        sal_Unicode c = pStr[i];
        if (IsInvalidChar(c))
            bRet = false;
        else if ((c >= 0x0001) && (c <= 0x007F)) // Deal with ascii
        {
            if (bDoNormalization)
            {
                switch (c)
                {
                    case '&'// resemble to &
                    {
                        if ((rPos + 5) > SEQUENCESIZE)
                            AddBytes(pTarget, rPos, reinterpret_cast<sal_Int8 const*>("&"), 5);
                        else
                        {
                            memcpy(&(pTarget[rPos]), "&", 5);
                            rPos += 5;
                        }
                    }
                    break;
                    case '<':
                    {
                        if ((rPos + 4) > SEQUENCESIZE)
                            AddBytes(pTarget, rPos, reinterpret_cast<sal_Int8 const*>("<"), 4);
                        else
                        {
                            memcpy(&(pTarget[rPos]), "<", 4);
                            rPos += 4; // <
                        }
                    }
                    break;
                    case '>':
                    {
                        if ((rPos + 4) > SEQUENCESIZE)
                            AddBytes(pTarget, rPos, reinterpret_cast<sal_Int8 const*>(">"), 4);
                        else
                        {
                            memcpy(&(pTarget[rPos]), ">", 4);
                            rPos += 4; // >
                        }
                    }
                    break;
                    case '\'':
                    {
                        if ((rPos + 6) > SEQUENCESIZE)
                            AddBytes(pTarget, rPos, reinterpret_cast<sal_Int8 const*>("'"), 6);
                        else
                        {
                            memcpy(&(pTarget[rPos]), "'", 6);
                            rPos += 6; // '
                        }
                    }
                    break;
                    case '"':
                    {
                        if ((rPos + 6) > SEQUENCESIZE)
                            AddBytes(pTarget, rPos, reinterpret_cast<sal_Int8 const*>("""), 6);
                        else
                        {
                            memcpy(&(pTarget[rPos]), """, 6);
                            rPos += 6; // "
                        }
                    }
                    break;
                    case 13:
                    {
                        if ((rPos + 6) > SEQUENCESIZE)
                            AddBytes(pTarget, rPos, reinterpret_cast<sal_Int8 const*>(" "), 6);
                        else
                        {
                            memcpy(&(pTarget[rPos]), " ", 6);
                            rPos += 6;
                        }
                    }
                    break;
                    case LINEFEED:
                    {
                        if (bNormalizeWhitespace)
                        {
                            if ((rPos + 6) > SEQUENCESIZE)
                                AddBytes(pTarget, rPos, reinterpret_cast<sal_Int8 const*>(" "),
                                         6);
                            else
                            {
                                memcpy(&(pTarget[rPos]), " ", 6);
                                rPos += 6;
                            }
                        }
                        else
                        {
                            pTarget[rPos] = LINEFEED;
                            nLastLineFeedPos = rPos;
                            rPos++;
                        }
                    }
                    break;
                    case 9:
                    {
                        if (bNormalizeWhitespace)
                        {
                            if ((rPos + 6) > SEQUENCESIZE)
                                AddBytes(pTarget, rPos, reinterpret_cast<sal_Int8 const*>(" "),
                                         6);
                            else
                            {
                                memcpy(&(pTarget[rPos]), " ", 6);
                                rPos += 6;
                            }
                        }
                        else
                        {
                            pTarget[rPos] = 9;
                            rPos++;
                        }
                    }
                    break;
                    default:
                    {
                        pTarget[rPos] = static_cast<sal_Int8>(c);
                        rPos++;
                    }
                    break;
                }
            }
            else
            {
                pTarget[rPos] = static_cast<sal_Int8>(c);
                if (static_cast<sal_Int8>(c) == LINEFEED)
                    nLastLineFeedPos = rPos;
                rPos++;
            }
        }
        else
        {
            // Deal with replacements
            if (bDoNormalization && !m_Replacements.empty())
            {
                // search
                const ReplacementPair* it = findXMLReplacement(&pStr[i], nStrLen - i);

                // replace
                if (it != nullptr)
                {
                    OString name = ::rtl::OUStringToOString(it->name, RTL_TEXTENCODING_UTF8);
                    if (rPos + name.getLength() > SEQUENCESIZE)
                        AddBytes(pTarget, rPos, reinterpret_cast<sal_Int8 const*>(name.getStr()),
                                 name.getLength());
                    else
                    {
                        memcpy(&(pTarget[rPos]), name.getStr(), name.getLength());
                        rPos += name.getLength();
                    }
                    i += it->replacement.getLength() - 1;
                    continue;
                }
            }

            // Deal with other unicode cases
            if (rtl::isHighSurrogate(c))
            {
                // 1. surrogate: save (until 2. surrogate)
                if (nSurrogate != 0) // left-over lone 1st Unicode surrogate
                {
                    OSL_FAIL("left-over Unicode surrogate");
                    bRet = false;
                }
                nSurrogate = c;
            }
            else if (rtl::isLowSurrogate(c))
            {
                // 2. surrogate: write as UTF-8
                if (nSurrogate) // can only be 1st surrogate
                {
                    nSurrogate = rtl::combineSurrogates(nSurrogate, c);
                    sal_Int8 aBytes[] = { sal_Int8(0xF0 | ((nSurrogate >> 18) & 0x0F)),
                                          sal_Int8(0x80 | ((nSurrogate >> 12) & 0x3F)),
                                          sal_Int8(0x80 | ((nSurrogate >> 6) & 0x3F)),
                                          sal_Int8(0x80 | ((nSurrogate >> 0) & 0x3F)) };
                    if ((rPos + 4) > SEQUENCESIZE)
                        AddBytes(pTarget, rPos, aBytes, 4);
                    else
                    {
                        pTarget[rPos] = aBytes[0];
                        rPos++;
                        pTarget[rPos] = aBytes[1];
                        rPos++;
                        pTarget[rPos] = aBytes[2];
                        rPos++;
                        pTarget[rPos] = aBytes[3];
                        rPos++;
                    }
                }
                else // lone 2nd surrogate
                {
                    OSL_FAIL("illegal Unicode character");
                    bRet = false;
                }

                // reset surrogate
                nSurrogate = 0;
            }
            else if (c > 0x07FF)
            {
                sal_Int8 aBytes[]
                    = { sal_Int8(0xE0 | ((c >> 12) & 0x0F)), sal_Int8(0x80 | ((c >> 6) & 0x3F)),
                        sal_Int8(0x80 | ((c >> 0) & 0x3F)) };
                if ((rPos + 3) > SEQUENCESIZE)
                    AddBytes(pTarget, rPos, aBytes, 3);
                else
                {
                    pTarget[rPos] = aBytes[0];
                    rPos++;
                    pTarget[rPos] = aBytes[1];
                    rPos++;
                    pTarget[rPos] = aBytes[2];
                    rPos++;
                }
            }
            else
            {
                sal_Int8 aBytes[]
                    = { sal_Int8(0xC0 | ((c >> 6) & 0x1F)), sal_Int8(0x80 | ((c >> 0) & 0x3F)) };
                if ((rPos + 2) > SEQUENCESIZE)
                    AddBytes(pTarget, rPos, aBytes, 2);
                else
                {
                    pTarget[rPos] = aBytes[0];
                    rPos++;
                    pTarget[rPos] = aBytes[1];
                    rPos++;
                }
            }
        }

        OSL_ENSURE(rPos <= SEQUENCESIZE, "not reset current position");
        if (rPos == SEQUENCESIZE)
            rPos = writeSequence();

        // reset left-over surrogate
        if ((nSurrogate != 0) && !rtl::isHighSurrogate(c))
        {
            OSL_FAIL("left-over Unicode surrogate");
            nSurrogate = 0;
            bRet = false;
        }
    }
    if (nSurrogate != 0) // trailing lone 1st surrogate
    {
        OSL_FAIL("left-over Unicode surrogate");
        bRet = false;
    }
    return bRet;
}

void SaxWriterHelper::FinishStartElement()
{
    if (!m_bStartElementFinished)
    {
        mp_Sequence[nCurrentPos] = '>';
        nCurrentPos++;
        if (nCurrentPos == SEQUENCESIZE)
            nCurrentPos = writeSequence();
        m_bStartElementFinished = true;
    }
}

void SaxWriterHelper::insertIndentation(sal_uInt32 m_nLevel)
{
    FinishStartElement();
    if (m_nLevel > 0)
    {
        if ((nCurrentPos + m_nLevel + 1) <= SEQUENCESIZE)
        {
            mp_Sequence[nCurrentPos] = LINEFEED;
            nLastLineFeedPos = nCurrentPos;
            nCurrentPos++;
            memset(&(mp_Sequence[nCurrentPos]), 32, m_nLevel);
            nCurrentPos += m_nLevel;
            if (nCurrentPos == SEQUENCESIZE)
                nCurrentPos = writeSequence();
        }
        else
        {
            sal_uInt32 nCount(m_nLevel + 1);
            std::unique_ptr<sal_Int8[]> pBytes(new sal_Int8[nCount]);
            pBytes[0] = LINEFEED;
            memset(&(pBytes[1]), 32, m_nLevel);
            AddBytes(mp_Sequence, nCurrentPos, pBytes.get(), nCount);
            pBytes.reset();
            nLastLineFeedPos = nCurrentPos - nCount;
            if (nCurrentPos == SEQUENCESIZE)
                nCurrentPos = writeSequence();
        }
    }
    else
    {
        mp_Sequence[nCurrentPos] = LINEFEED;
        nLastLineFeedPos = nCurrentPos;
        nCurrentPos++;
        if (nCurrentPos == SEQUENCESIZE)
            nCurrentPos = writeSequence();
    }
}

bool SaxWriterHelper::writeString(const OUString& rWriteOutString, bool bDoNormalization,
                                  bool bNormalizeWhitespace)
{
    FinishStartElement();
    return convertToXML(rWriteOutString.getStr(), rWriteOutString.getLength(), bDoNormalization,
                        bNormalizeWhitespace, mp_Sequence, nCurrentPos);
}

void SaxWriterHelper::startDocument()
{
    const char pc[] = "1.0\" encoding=\"UTF-8\"?>";
    const int nLen = strlen(pc);
    if ((nCurrentPos + nLen) <= SEQUENCESIZE)
    {
        memcpy(mp_Sequence, pc, nLen);
        nCurrentPos += nLen;
    }
    else
    {
        AddBytes(mp_Sequence, nCurrentPos, reinterpret_cast<sal_Int8 const*>(pc), nLen);
    }
    OSL_ENSURE(nCurrentPos <= SEQUENCESIZE, "not reset current position");
    if (nCurrentPos == SEQUENCESIZE)
        nCurrentPos = writeSequence();
    mp_Sequence[nCurrentPos] = LINEFEED;
    nCurrentPos++;
    if (nCurrentPos == SEQUENCESIZE)
        nCurrentPos = writeSequence();
}

#ifndef NDEBUG
bool inrange(sal_Unicode c, sal_Unicode start, sal_Unicode end) { return c >= start && c <= end; }
#endif

void CheckValidName(OUString const& rName)
{
#ifdef NDEBUG
    (void)rName;
#else
    assert(!rName.isEmpty());
    bool hasColon(false);
    for (sal_Int32 i = 0; i < rName.getLength(); ++i)
    {
        auto const c(rName[i]);
        if (c == ':')
        {
            // see https://www.w3.org/TR/REC-xml-names/#ns-qualnames
            SAL_WARN_IF(hasColon, "sax""only one colon allowed: " << rName);
            assert(!hasColon && "only one colon allowed");
            hasColon = true;
        }
        else if (!rtl::isAsciiAlphanumeric(c) && c != '_' && c != '-' && c != '.'
                 && !inrange(c, 0x00C0, 0x00D6) && !inrange(c, 0x00D8, 0x00F6)
                 && !inrange(c, 0x00F8, 0x02FF) && !inrange(c, 0x0370, 0x037D)
                 && !inrange(c, 0x037F, 0x1FFF) && !inrange(c, 0x200C, 0x200D)
                 && !inrange(c, 0x2070, 0x218F) && !inrange(c, 0x2C00, 0x2FEF)
                 && !inrange(c, 0x3001, 0xD7FF) && !inrange(c, 0xF900, 0xFDCF)
                 && !inrange(c, 0xFDF0, 0xFFFD) && c != 0x00B7 && !inrange(c, 0x0300, 0x036F)
                 && !inrange(c, 0x203F, 0x2040))
        {
            // https://www.w3.org/TR/xml11/#NT-NameChar
            // (currently we don't warn about invalid start chars)
            SAL_WARN("sax""unexpected character in attribute name: " << rName);
            assert(!"unexpected character in attribute name");
        }
    }
#endif
}

SaxInvalidCharacterError SaxWriterHelper::startElement(const OUString& rName,
                                                       const Reference<XAttributeList>& xAttribs)
{
    FinishStartElement();

#ifdef DBG_UTIL
    m_DebugStartedElements.push(rName);
    ::std::set<OUString> DebugAttributes;
#endif

    mp_Sequence[nCurrentPos] = '<';
    nCurrentPos++;
    if (nCurrentPos == SEQUENCESIZE)
        nCurrentPos = writeSequence();

    SaxInvalidCharacterError eRet(SAX_NONE);
    CheckValidName(rName);
    if (!writeString(rName, falsefalse))
        eRet = SAX_ERROR;

    sal_Int16 nAttribCount = xAttribs.is() ? xAttribs->getLength() : 0;
    for (sal_Int16 i = 0; i < nAttribCount; i++)
    {
        mp_Sequence[nCurrentPos] = ' ';
        nCurrentPos++;
        if (nCurrentPos == SEQUENCESIZE)
            nCurrentPos = writeSequence();

        OUString const aAttrName(xAttribs->getNameByIndex(i));
#ifdef DBG_UTIL
        // Well-formedness constraint: Unique Att Spec
        assert(DebugAttributes.find(aAttrName) == DebugAttributes.end());
        DebugAttributes.insert(aAttrName);
#endif
        CheckValidName(aAttrName);
        if (!writeString(aAttrName, falsefalse))
            eRet = SAX_ERROR;

        mp_Sequence[nCurrentPos] = '=';
        nCurrentPos++;
        if (nCurrentPos == SEQUENCESIZE)
            nCurrentPos = writeSequence();
        mp_Sequence[nCurrentPos] = '"';
        nCurrentPos++;
        if (nCurrentPos == SEQUENCESIZE)
            nCurrentPos = writeSequence();

        if (!writeString(xAttribs->getValueByIndex(i), truetrue) && eRet != SAX_ERROR)
            eRet = SAX_WARNING;

        mp_Sequence[nCurrentPos] = '"';
        nCurrentPos++;
        if (nCurrentPos == SEQUENCESIZE)
            nCurrentPos = writeSequence();
    }

    m_bStartElementFinished = false// because the '>' character is not added,
        // because it is possible, that the "/>"
        // characters have to add
    return eRet;
}

bool SaxWriterHelper::FinishEmptyElement()
{
    if (m_bStartElementFinished)
        return false;

    mp_Sequence[nCurrentPos] = '/';
    nCurrentPos++;
    if (nCurrentPos == SEQUENCESIZE)
        nCurrentPos = writeSequence();
    mp_Sequence[nCurrentPos] = '>';
    nCurrentPos++;
    if (nCurrentPos == SEQUENCESIZE)
        nCurrentPos = writeSequence();

    m_bStartElementFinished = true;

    return true;
}

bool SaxWriterHelper::endElement(const OUString& rName)
{
    FinishStartElement();

    mp_Sequence[nCurrentPos] = '<';
    nCurrentPos++;
    if (nCurrentPos == SEQUENCESIZE)
        nCurrentPos = writeSequence();
    mp_Sequence[nCurrentPos] = '/';
    nCurrentPos++;
    if (nCurrentPos == SEQUENCESIZE)
        nCurrentPos = writeSequence();

    CheckValidName(rName);
    bool bRet(writeString(rName, falsefalse));

    mp_Sequence[nCurrentPos] = '>';
    nCurrentPos++;
    if (nCurrentPos == SEQUENCESIZE)
        nCurrentPos = writeSequence();

    return bRet;
}

void SaxWriterHelper::endDocument()
{
    if (nCurrentPos > 0)
    {
        m_Sequence.realloc(nCurrentPos);
        nCurrentPos = writeSequence();
        //m_Sequence.realloc(SEQUENCESIZE);
    }
}

void SaxWriterHelper::clearBuffer()
{
    FinishStartElement();
    if (nCurrentPos > 0)
    {
        m_Sequence.realloc(nCurrentPos);
        nCurrentPos = writeSequence();
        m_Sequence.realloc(SEQUENCESIZE);
        // Be sure to update the array pointer after the reallocation.
        mp_Sequence = m_Sequence.getArray();
    }
}

bool SaxWriterHelper::processingInstruction(const OUString& rTarget, const OUString& rData)
{
    FinishStartElement();
    mp_Sequence[nCurrentPos] = '<';
    nCurrentPos++;
    if (nCurrentPos == SEQUENCESIZE)
        nCurrentPos = writeSequence();
    mp_Sequence[nCurrentPos] = '?';
    nCurrentPos++;
    if (nCurrentPos == SEQUENCESIZE)
        nCurrentPos = writeSequence();

    bool bRet(writeString(rTarget, falsefalse));

    mp_Sequence[nCurrentPos] = ' ';
    nCurrentPos++;
    if (nCurrentPos == SEQUENCESIZE)
        nCurrentPos = writeSequence();

    if (!writeString(rData, falsefalse))
        bRet = false;

    mp_Sequence[nCurrentPos] = '?';
    nCurrentPos++;
    if (nCurrentPos == SEQUENCESIZE)
        nCurrentPos = writeSequence();
    mp_Sequence[nCurrentPos] = '>';
    nCurrentPos++;
    if (nCurrentPos == SEQUENCESIZE)
        nCurrentPos = writeSequence();

    return bRet;
}

void SaxWriterHelper::startCDATA()
{
    FinishStartElement();
    if ((nCurrentPos + 9) <= SEQUENCESIZE)
    {
        memcpy(&(mp_Sequence[nCurrentPos]), ", 9);
        nCurrentPos += 9;
    }
    else
        AddBytes(mp_Sequence, nCurrentPos, reinterpret_cast<sal_Int8 const*>("), 9);
    if (nCurrentPos == SEQUENCESIZE)
        nCurrentPos = writeSequence();
}

void SaxWriterHelper::endCDATA()
{
    FinishStartElement();
    if ((nCurrentPos + 3) <= SEQUENCESIZE)
    {
        memcpy(&(mp_Sequence[nCurrentPos]), "]]>", 3);
        nCurrentPos += 3;
    }
    else
        AddBytes(mp_Sequence, nCurrentPos, reinterpret_cast<sal_Int8 const*>("]]>"), 3);
    if (nCurrentPos == SEQUENCESIZE)
        nCurrentPos = writeSequence();
}

bool SaxWriterHelper::comment(const OUString& rComment)
{
    FinishStartElement();
    mp_Sequence[nCurrentPos] = '<';
    nCurrentPos++;
    if (nCurrentPos == SEQUENCESIZE)
        nCurrentPos = writeSequence();
    mp_Sequence[nCurrentPos] = '!';
    nCurrentPos++;
    if (nCurrentPos == SEQUENCESIZE)
        nCurrentPos = writeSequence();
    mp_Sequence[nCurrentPos] = '-';
    nCurrentPos++;
    if (nCurrentPos == SEQUENCESIZE)
        nCurrentPos = writeSequence();
    mp_Sequence[nCurrentPos] = '-';
    nCurrentPos++;
    if (nCurrentPos == SEQUENCESIZE)
        nCurrentPos = writeSequence();

    bool bRet(writeString(rComment, falsefalse));

    mp_Sequence[nCurrentPos] = '-';
    nCurrentPos++;
    if (nCurrentPos == SEQUENCESIZE)
        nCurrentPos = writeSequence();
    mp_Sequence[nCurrentPos] = '-';
    nCurrentPos++;
    if (nCurrentPos == SEQUENCESIZE)
        nCurrentPos = writeSequence();
    mp_Sequence[nCurrentPos] = '>';
    nCurrentPos++;
    if (nCurrentPos == SEQUENCESIZE)
        nCurrentPos = writeSequence();

    return bRet;
}

sal_Int32 SaxWriterHelper::calcXMLByteLength(const OUString& rStr, bool bDoNormalization,
                                             bool bNormalizeWhitespace)
{
    sal_Int32 nOutputLength = 0;
    sal_uInt32 nSurrogate = 0;

    const sal_Unicode* pStr = rStr.getStr();
    sal_Int32 nStrLen = rStr.getLength();
    for (sal_Int32 i = 0; i < nStrLen; i++)
    {
        sal_uInt16 c = pStr[i];
        if (!IsInvalidChar(c) && (c >= 0x0001) && (c <= 0x007F))
        {
            if (bDoNormalization)
            {
                switch (c)
                {
                    case '&'// resemble to &
                        nOutputLength += 5;
                        break;
                    case '<'// <
                    case '>'// >
                        nOutputLength += 4;
                        break;
                    case '\'': // '
                    case '"'// "
                    case 13: // 
                        nOutputLength += 6;
                        break;

                    case 10: // 
                    case 9: // 
                        if (bNormalizeWhitespace)
                        {
                            nOutputLength += 6;
                        }
                        else
                        {
                            nOutputLength++;
                        }
                        break;
                    default:
                        nOutputLength++;
                }
            }
            else
            {
                nOutputLength++;
            }
        }
        else
        {
            // Deal with replacements
            if (bDoNormalization && !m_Replacements.empty())
            {
                // search
                const ReplacementPair* it = findXMLReplacement(&pStr[i], nStrLen - i);

                if (it != nullptr)
                {
                    nOutputLength
                        += ::rtl::OUStringToOString(it->name, RTL_TEXTENCODING_UTF8).getLength();
                    i += it->replacement.getLength() - 1;
                    continue;
                }
            }

            // Deal with other unicode cases
            if (rtl::isHighSurrogate(c))
            {
                // save surrogate
                nSurrogate = c;
            }
            else if (rtl::isLowSurrogate(c))
            {
                // 2. surrogate: write as UTF-8 (if range is OK
                if (nSurrogate)
                    nOutputLength += 4;
                nSurrogate = 0;
            }
            else if (c > 0x07FF)
            {
                nOutputLength += 3;
            }
            else
            {
                nOutputLength += 2;
            }
        }

        // surrogate processing
        if ((nSurrogate != 0) && !rtl::isHighSurrogate(c))
            nSurrogate = 0;
    }

    return nOutputLength;
}

const ReplacementPair* SaxWriterHelper::findXMLReplacement(const sal_Unicode* pStr,
                                                           sal_Int32 nStrLen)
{
    for (size_t iter = 0; iter < m_Replacements.size(); ++iter)
    {
        if (m_Replacements[iter].replacement.getLength() > nStrLen)
            continue;
        sal_Int32 matches = m_Replacements[iter].replacement.compareTo(
            std::u16string_view(pStr, m_Replacements[iter].replacement.getLength()));
        if (matches == 0)
            return &m_Replacements[iter];
        if (matches > 0)
            return nullptr;
    }
    return nullptr;
}

class SAXWriter : public WeakImplHelper<XWriter, XServiceInfo>
{
public:
    SAXWriter()
        : m_bDocStarted(false)
        , m_bIsCDATA(false)
        , m_bForceLineBreak(false)
        , m_bAllowLineBreak(false)
        , m_nLevel(0)
    {
    }

public// XActiveDataSource
    virtual void SAL_CALL setOutputStream(const Reference<XOutputStream>& aStream) override
    {
        try
        {
            // temporary: set same stream again to clear buffer
            if (m_out == aStream && m_pSaxWriterHelper && m_bDocStarted)
                m_pSaxWriterHelper->clearBuffer();
            else
            {
                m_out = aStream;
                m_pSaxWriterHelper.reset(new SaxWriterHelper(m_out));
                m_bDocStarted = false;
                m_nLevel = 0;
                m_bIsCDATA = false;
            }
        }
        catch (const SAXException& e)
        {
            throw css::lang::WrappedTargetRuntimeException(e.Message, getXWeak(),
                                                           e.WrappedException);
        }
    }
    virtual Reference<XOutputStream> SAL_CALL getOutputStream() override { return m_out; }

public// XDocumentHandler
    virtual void SAL_CALL startDocument() override;

    virtual void SAL_CALL endDocument() override;

    virtual void SAL_CALL startElement(const OUString& aName,
                                       const Reference<XAttributeList>& xAttribs) override;

    virtual void SAL_CALL endElement(const OUString& aName) override;

    virtual void SAL_CALL characters(const OUString& aChars) override;

    virtual void SAL_CALL ignorableWhitespace(const OUString& aWhitespaces) override;
    virtual void SAL_CALL processingInstruction(const OUString& aTarget,
                                                const OUString& aData) override;
    virtual void SAL_CALL setDocumentLocator(const Reference<XLocator>& xLocator) override;
    virtual void SAL_CALL setCustomEntityNames(
        const ::css::uno::Sequence<::css::beans::Pair<::rtl::OUString, ::rtl::OUString>>&
            replacements) override;

public// XExtendedDocumentHandler
    virtual void SAL_CALL startCDATA() override;
    virtual void SAL_CALL endCDATA() override;
    virtual void SAL_CALL comment(const OUString& sComment) override;
    virtual void SAL_CALL unknown(const OUString& sString) override;
    virtual void SAL_CALL allowLineBreak() override;

public// XServiceInfo
    OUString SAL_CALL getImplementationName() override;
    Sequence<OUString> SAL_CALL getSupportedServiceNames() override;
    sal_Bool SAL_CALL supportsService(const OUString& ServiceName) override;

private:
    sal_Int32 getIndentPrefixLength(sal_Int32 nFirstLineBreakOccurrence) noexcept;

    Reference<XOutputStream> m_out;
    std::unique_ptr<SaxWriterHelper> m_pSaxWriterHelper;

    // Status information
    bool m_bDocStarted : 1;
    bool m_bIsCDATA : 1;
    bool m_bForceLineBreak : 1;
    bool m_bAllowLineBreak : 1;
    sal_Int32 m_nLevel;
};

sal_Int32 SAXWriter::getIndentPrefixLength(sal_Int32 nFirstLineBreakOccurrence) noexcept
{
    sal_Int32 nLength = -1;
    if (m_pSaxWriterHelper)
    {
        if (m_bForceLineBreak
            || (m_bAllowLineBreak
                && ((nFirstLineBreakOccurrence + m_pSaxWriterHelper->GetLastColumnCount())
                    > MAXCOLUMNCOUNT)))
            nLength = m_nLevel;
    }
    m_bForceLineBreak = false;
    m_bAllowLineBreak = false;
    return nLength;
}

bool isFirstCharWhitespace(const sal_Unicode* p) noexcept { return *p == ' '; }

// XServiceInfo
OUString SAXWriter::getImplementationName()
{
    return u"com.sun.star.extensions.xml.sax.Writer"_ustr;
}

// XServiceInfo
sal_Bool SAXWriter::supportsService(const OUString& ServiceName)
{
    return cppu::supportsService(this, ServiceName);
}

// XServiceInfo
Sequence<OUString> SAXWriter::getSupportedServiceNames()
{
    return { u"com.sun.star.xml.sax.Writer"_ustr };
}

void SAXWriter::startDocument()
{
    if (m_bDocStarted || !m_out.is() || !m_pSaxWriterHelper)
    {
        throw SAXException();
    }
    m_bDocStarted = true;
    m_pSaxWriterHelper->startDocument();
}

void SAXWriter::endDocument()
{
    if (!m_bDocStarted)
    {
        throw SAXException(u"endDocument called before startDocument"_ustr, Reference<XInterface>(),
                           Any());
    }
    if (m_nLevel)
    {
        throw SAXException(u"unexpected end of document"_ustr, Reference<XInterface>(), Any());
    }
    m_pSaxWriterHelper->endDocument();
    try
    {
        m_out->closeOutput();
    }
    catch (const IOException&)
    {
        css::uno::Any anyEx = cppu::getCaughtException();
        throw SAXException(u"IO exception during closing the IO Stream"_ustr,
                           Reference<XInterface>(), anyEx);
    }
}

void SAXWriter::startElement(const OUString& aName, const Reference<XAttributeList>&&nbsp;xAttribs)
{
    if (!m_bDocStarted)
    {
        throw SAXException(u"startElement called before startDocument"_ustr, {}, {});
    }
    if (m_bIsCDATA)
    {
        throw SAXException(u"startElement call not allowed with CDATA sections"_ustr, {}, {});
    }

    sal_Int32 nLength(0);
    if (m_bAllowLineBreak)
    {
        sal_Int32 nAttribCount = xAttribs.is() ? xAttribs->getLength() : 0;

        nLength++; // "<"
        nLength += m_pSaxWriterHelper->calcXMLByteLength(aName, falsefalse); // the tag name

        sal_Int16 n;
        for (n = 0; n < static_cast<sal_Int16>(nAttribCount); n++)
        {
            nLength++; // " "
            OUString tmp = xAttribs->getNameByIndex(n);

            nLength += m_pSaxWriterHelper->calcXMLByteLength(tmp, falsefalse);

            nLength += 2; // ="

            tmp = xAttribs->getValueByIndex(n);

            nLength += m_pSaxWriterHelper->calcXMLByteLength(tmp, truetrue);

            nLength += 1; // "
        }

        nLength++; // '>'
    }

    // Is there a new indentation necessary ?
    sal_Int32 nPrefix(getIndentPrefixLength(nLength));

    // write into sequence
    if (nPrefix >= 0)
        m_pSaxWriterHelper->insertIndentation(nPrefix);

    SaxInvalidCharacterError eRet(m_pSaxWriterHelper->startElement(aName, xAttribs));

    m_nLevel++;

    if (eRet == SAX_WARNING)
    {
        throw SAXInvalidCharacterException(
            u"Invalid character during XML-Export in an attribute value"_ustr, {}, {});
    }
    else if (eRet == SAX_ERROR)
    {
        throw SAXException(u"Invalid character during XML-Export"_ustr, {}, {});
    }
}

void SAXWriter::endElement(const OUString& aName)
{
    if (!m_bDocStarted)
    {
        throw SAXException();
    }
    m_nLevel--;

    if (m_nLevel < 0)
    {
        throw SAXException();
    }
    bool bRet(true);

    // check here because Helper's endElement is not always called
#ifdef DBG_UTIL
    assert(!m_pSaxWriterHelper->m_DebugStartedElements.empty());
    // Well-formedness constraint: Element Type Match
    assert(aName == m_pSaxWriterHelper->m_DebugStartedElements.top());
    m_pSaxWriterHelper->m_DebugStartedElements.pop();
#endif

    if (m_pSaxWriterHelper->FinishEmptyElement())
        m_bForceLineBreak = false;
    else
    {
        // only ascii chars allowed
        sal_Int32 nLength(0);
        if (m_bAllowLineBreak)
            nLength = 3 + m_pSaxWriterHelper->calcXMLByteLength(aName, falsefalse);
        sal_Int32 nPrefix = getIndentPrefixLength(nLength);

        if (nPrefix >= 0)
            m_pSaxWriterHelper->insertIndentation(nPrefix);

        bRet = m_pSaxWriterHelper->endElement(aName);
    }

    if (!bRet)
    {
        throw SAXException(u"Invalid character during XML-Export"_ustr, {}, {});
    }
}

void SAXWriter::characters(const OUString& aChars)
{
    if (!m_bDocStarted)
    {
        throw SAXException(u"characters method called before startDocument"_ustr, {}, {});
    }

    bool bThrowException(false);
    if (!aChars.isEmpty())
    {
        if (m_bIsCDATA)
            bThrowException = !m_pSaxWriterHelper->writeString(aChars, falsefalse);
        else
        {
            // Note : nFirstLineBreakOccurrence is not exact, because we don't know, how
            //        many 2 and 3 byte chars are inbetween. However this whole stuff
            //        is eitherway for pretty printing only, so it does not need to be exact.
            sal_Int32 nLength(0);
            sal_Int32 nIndentPrefix(-1);
            if (m_bAllowLineBreak)
            {
                // returns position of first ascii 10 within the string, -1 when no 10 in string.
                sal_Int32 nFirstLineBreakOccurrence = aChars.indexOf(LINEFEED);

                nLength = m_pSaxWriterHelper->calcXMLByteLength(aChars, !m_bIsCDATA, false);
                nIndentPrefix = getIndentPrefixLength(
                    nFirstLineBreakOccurrence >= 0 ? nFirstLineBreakOccurrence : nLength);
            }
            else
                nIndentPrefix = getIndentPrefixLength(nLength);

            // insert indentation
            if (nIndentPrefix >= 0)
            {
                if (isFirstCharWhitespace(aChars.getStr()))
                    m_pSaxWriterHelper->insertIndentation(nIndentPrefix - 1);
                else
                    m_pSaxWriterHelper->insertIndentation(nIndentPrefix);
            }
            bThrowException = !m_pSaxWriterHelper->writeString(aChars, truefalse);
        }
    }
    if (bThrowException)
    {
        throw SAXInvalidCharacterException(u"Invalid character during XML-Export"_ustr, {}, {});
    }
}

void SAXWriter::ignorableWhitespace(const OUString&)
{
    if (!m_bDocStarted)
    {
        throw SAXException();
    }

    m_bForceLineBreak = true;
}

void SAXWriter::processingInstruction(const OUString& aTarget, const OUString& ;aData)
{
    if (!m_bDocStarted || m_bIsCDATA)
    {
        throw SAXException();
    }

    sal_Int32 nLength(0);
    if (m_bAllowLineBreak)
    {
        nLength = 2; // "<?"
        nLength += m_pSaxWriterHelper->calcXMLByteLength(aTarget, falsefalse);

        nLength += 1; // " "

        nLength += m_pSaxWriterHelper->calcXMLByteLength(aData, falsefalse);

        nLength += 2; // "?>"
    }

    sal_Int32 nPrefix = getIndentPrefixLength(nLength);

    if (nPrefix >= 0)
        m_pSaxWriterHelper->insertIndentation(nPrefix);

    if (!m_pSaxWriterHelper->processingInstruction(aTarget, aData))
    {
        throw SAXException(u"Invalid character during XML-Export"_ustr, {}, {});
    }
}

void SAXWriter::setDocumentLocator(const Reference<XLocator>&) {}

void SAXWriter::setCustomEntityNames(
    const ::css::uno::Sequence<::css::beans::Pair<::rtl::OUString, ::rtl::OUString>>& replacements)
{
    m_pSaxWriterHelper->setCustomEntityNames(replacements);
}

void SAXWriter::startCDATA()
{
    if (!m_bDocStarted || m_bIsCDATA)
    {
        throw SAXException();
    }

    sal_Int32 nPrefix = getIndentPrefixLength(9);
    if (nPrefix >= 0)
        m_pSaxWriterHelper->insertIndentation(nPrefix);

    m_pSaxWriterHelper->startCDATA();

    m_bIsCDATA = true;
}

void SAXWriter::endCDATA()
{
    if (!m_bDocStarted || !m_bIsCDATA)
    {
        throw SAXException(u"endCDATA was called without startCDATA"_ustr, {}, {});
    }

    sal_Int32 nPrefix = getIndentPrefixLength(3);
    if (nPrefix >= 0)
        m_pSaxWriterHelper->insertIndentation(nPrefix);

    m_pSaxWriterHelper->endCDATA();

    m_bIsCDATA = false;
}

void SAXWriter::comment(const OUString& sComment)
{
    if (!m_bDocStarted || m_bIsCDATA)
    {
        throw SAXException();
    }

    sal_Int32 nLength(0);
    if (m_bAllowLineBreak)
    {
        nLength = 4; // "<!--"
        nLength += m_pSaxWriterHelper->calcXMLByteLength(sComment, falsefalse);

        nLength += 3;
    }

    sal_Int32 nPrefix = getIndentPrefixLength(nLength);
    if (nPrefix >= 0)
        m_pSaxWriterHelper->insertIndentation(nPrefix);

    if (!m_pSaxWriterHelper->comment(sComment))
    {
        throw SAXException(u"Invalid character during XML-Export"_ustr, {}, {});
    }
}

void SAXWriter::allowLineBreak()
{
    if (!m_bDocStarted || m_bAllowLineBreak)
    {
        throw SAXException();
    }

    m_bAllowLineBreak = true;
}

void SAXWriter::unknown(const OUString& sString)
{
    if (!m_bDocStarted)
    {
        throw SAXException();
    }
    if (m_bIsCDATA)
    {
        throw SAXException();
    }

    if (sString.startsWith("))
        return;

    sal_Int32 nLength(0);
    if (m_bAllowLineBreak)
        nLength = m_pSaxWriterHelper->calcXMLByteLength(sString, falsefalse);

    sal_Int32 nPrefix = getIndentPrefixLength(nLength);
    if (nPrefix >= 0)
        m_pSaxWriterHelper->insertIndentation(nPrefix);

    if (!m_pSaxWriterHelper->writeString(sString, falsefalse))
    {
        throw SAXException(u"Invalid character during XML-Export"_ustr, {}, {});
    }
}

// namespace

extern "C" SAL_DLLPUBLIC_EXPORT css::uno::XInterface*
com_sun_star_extensions_xml_sax_Writer_get_implementation(css::uno::XComponentContext*,
                                                          css::uno::Sequence<css::uno::Any> const&)
{
    return cppu::acquire(new SAXWriter);
}

/* vim:set shiftwidth=4 softtabstop=4 expandtab: */

Messung V0.5
C=90 H=96 G=93

¤ Dauer der Verarbeitung: 0.23 Sekunden  (vorverarbeitet)  ¤

*© Formatika GbR, Deutschland






Wurzel

Suchen

Beweissystem der NASA

Beweissystem Isabelle

NIST Cobol Testsuite

Cephes Mathematical Library

Wiener Entwicklungsmethode

Haftungshinweis

Die Informationen auf dieser Webseite wurden nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit, noch Qualität der bereit gestellten Informationen zugesichert.

Bemerkung:

Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.