Quellcodebibliothek Statistik Leitseite products/Sources/formale Sprachen/C/LibreOffice/xmlreader/source/   (Office von Apache Version 25.8.3.2©)  Datei vom 5.10.2025 mit Größe 29 kB image not shown  

Quelle  xmlreader.cxx   Sprache: C

 
/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
/*
 * This file is part of the LibreOffice project.
 *
 * This Source Code Form is subject to the terms of the Mozilla Public
 * License, v. 2.0. If a copy of the MPL was not distributed with this
 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
 *
 * This file incorporates work covered by the following license notice:
 *
 *   Licensed to the Apache Software Foundation (ASF) under one or more
 *   contributor license agreements. See the NOTICE file distributed
 *   with this work for additional information regarding copyright
 *   ownership. The ASF licenses this file to you under the Apache
 *   License, Version 2.0 (the "License"); you may not use this file
 *   except in compliance with the License. You may obtain a copy of
 *   the License at http://www.apache.org/licenses/LICENSE-2.0 .
 */


#include <sal/config.h>

#include <cassert>
#include <climits>

#include <com/sun/star/container/NoSuchElementException.hpp>
#include <com/sun/star/uno/RuntimeException.hpp>
#include <o3tl/numeric.hxx>
#include <osl/file.h>
#include <rtl/character.hxx>
#include <rtl/string.h>
#include <rtl/ustring.hxx>
#include <sal/log.hxx>
#include <sal/types.h>
#include <utility>
#include <xmlreader/pad.hxx>
#include <xmlreader/span.hxx>
#include <xmlreader/xmlreader.hxx>

namespace xmlreader {

namespace {

bool isSpace(char c) {
    switch (c) {
    case '\x09':
    case '\x0A':
    case '\x0D':
    case ' ':
        return true;
    default:
        return false;
    }
}

}

XmlReader::XmlReader(OUString fileUrl)
    : fileUrl_(std::move(fileUrl))
    , fileHandle_(nullptr)
{
    oslFileError e = osl_openFile(
        fileUrl_.pData, &fileHandle_, osl_File_OpenFlag_Read);
    switch (e)
    {
    case osl_File_E_None:
        break;
    case osl_File_E_NOENT:
        throw css::container::NoSuchElementException( fileUrl_ );
    default:
        throw css::uno::RuntimeException(
            "cannot open " + fileUrl_ + ": " + OUString::number(e));
    }
    e = osl_getFileSize(fileHandle_, &fileSize_);
    if (e == osl_File_E_None) {
        e = osl_mapFile(
            fileHandle_, &fileAddress_, fileSize_, 0,
            osl_File_MapFlag_WillNeed);
    }
    if (e != osl_File_E_None) {
        oslFileError e2 = osl_closeFile(fileHandle_);
        if (e2 != osl_File_E_None) {
            SAL_WARN(
                "xmlreader",
                "osl_closeFile of \"" << fileUrl_ << "\" failed with " << +e2);
        }
        throw css::uno::RuntimeException(
            "cannot mmap " + fileUrl_ + " (" + OUString::number(e) + ")" );
    }
    namespaceIris_.emplace_back("http://www.w3.org/XML/1998/namespace");
    namespaces_.emplace_back(Span("xml"), NAMESPACE_XML);
    pos_ = static_castchar * >(fileAddress_);
    end_ = pos_ + fileSize_;
    state_ = State::Content;
    firstAttribute_ = true;
}

XmlReader::~XmlReader() {
    if (!fileHandle_)
        return;
    oslFileError e = osl_unmapMappedFile(fileHandle_, fileAddress_, fileSize_);
    if (e != osl_File_E_None) {
        SAL_WARN(
            "xmlreader",
            "osl_unmapMappedFile of \"" << fileUrl_ << "\" failed with " << +e);
    }
    e = osl_closeFile(fileHandle_);
    if (e != osl_File_E_None) {
        SAL_WARN(
            "xmlreader",
            "osl_closeFile of \"" << fileUrl_ << "\" failed with " << +e);
    }
}

int XmlReader::registerNamespaceIri(Span const & iri) {
    int id = toNamespaceId(namespaceIris_.size());
    namespaceIris_.push_back(iri);
    if (iri == "http://www.w3.org/2001/XMLSchema-instance") {
        // Old user layer .xcu files used the xsi namespace prefix without
        // declaring a corresponding namespace binding, see issue 77174; reading
        // those files during migration would fail without this hack that can be
        // removed once migration is no longer relevant (see
        // configmgr::Components::parseModificationLayer):
        namespaces_.emplace_back(Span("xsi"), id);
    }
    return id;
}

XmlReader::Result XmlReader::nextItem(Text reportText, Span * data, int * nsId)
{
    switch (state_) {
    case State::Content:
        switch (reportText) {
        case Text::NONE:
            return handleSkippedText(data, nsId);
        case Text::Raw:
            return handleRawText(data);
        default// Text::Normalized
            return handleNormalizedText(data);
        }
    case State::StartTag:
        return handleStartTag(nsId, data);
    case State::EndTag:
        return handleEndTag();
    case State::EmptyElementTag:
        handleElementEnd();
        return Result::End;
    default// State::Done
        return Result::Done;
    }
}

bool XmlReader::nextAttribute(int * nsId, Span * localName) {
    assert(nsId != nullptr && localName != nullptr);
    if (firstAttribute_) {
        currentAttribute_ = attributes_.begin();
        firstAttribute_ = false;
    } else {
        ++currentAttribute_;
    }
    if (currentAttribute_ == attributes_.end()) {
        return false;
    }
    if (currentAttribute_->nameColon == nullptr) {
        *nsId = NAMESPACE_NONE;
        *localName = Span(
            currentAttribute_->nameBegin,
            currentAttribute_->nameEnd - currentAttribute_->nameBegin);
    } else {
        *nsId = getNamespaceId(
            Span(
                currentAttribute_->nameBegin,
                currentAttribute_->nameColon - currentAttribute_->nameBegin));
        *localName = Span(
            currentAttribute_->nameColon + 1,
            currentAttribute_->nameEnd - (currentAttribute_->nameColon + 1));
    }
    return true;
}

Span XmlReader::getAttributeValue(bool fullyNormalize) {
    return handleAttributeValue(
        currentAttribute_->valueBegin, currentAttribute_->valueEnd,
        fullyNormalize);
}

int XmlReader::getNamespaceId(Span const & prefix) const {
    auto i = std::find_if(namespaces_.crbegin(), namespaces_.crend(),
        [&prefix](const NamespaceData& rNamespaceData) { return prefix == rNamespaceData.prefix; });

    if (i != namespaces_.rend())
        return i->nsId;

    return NAMESPACE_UNKNOWN;
}


void XmlReader::normalizeLineEnds(Span const & text) {
    char const * p = text.begin;
    sal_Int32 n = text.length;
    for (;;) {
        sal_Int32 i = rtl_str_indexOfChar_WithLength(p, n, '\x0D');
        if (i < 0) {
            break;
        }
        pad_.add(p, i);
        p += i + 1;
        n -= i + 1;
        if (n == 0 || *p != '\x0A') {
            pad_.add("\x0A");
        }
    }
    pad_.add(p, n);
}

void XmlReader::skipSpace() {
    while (isSpace(peek())) {
        ++pos_;
    }
}

bool XmlReader::skipComment() {
    if (rtl_str_shortenedCompare_WithLength(
            pos_, end_ - pos_, RTL_CONSTASCII_STRINGPARAM("--"),
            RTL_CONSTASCII_LENGTH("--")) !=
        0)
    {
        return false;
    }
    pos_ += RTL_CONSTASCII_LENGTH("--");
    sal_Int32 i = rtl_str_indexOfStr_WithLength(
        pos_, end_ - pos_, RTL_CONSTASCII_STRINGPARAM("--"));
    if (i < 0) {
        throw css::uno::RuntimeException(
            "premature end (within comment) of " + fileUrl_ );
    }
    pos_ += i + RTL_CONSTASCII_LENGTH("--");
    if (read() != '>') {
        throw css::uno::RuntimeException(
            "illegal \"--\" within comment in " + fileUrl_ );
    }
    return true;
}

void XmlReader::skipProcessingInstruction() {
    sal_Int32 i = rtl_str_indexOfStr_WithLength(
        pos_, end_ - pos_, RTL_CONSTASCII_STRINGPARAM("?>"));
    if (i < 0) {
        throw css::uno::RuntimeException(
            "bad ' + fileUrl_ );
    }
    pos_ += i + RTL_CONSTASCII_LENGTH("?>");
}

void XmlReader::skipDocumentTypeDeclaration() {
    // Neither is it checked that the doctypedecl is at the correct position in
    // the document, nor that it is well-formed:
    for (;;) {
        char c = read();
        switch (c) {
        case '\0'// i.e., EOF
            throw css::uno::RuntimeException(
                "premature end (within DTD) of " + fileUrl_ );
        case '"':
        case '\'':
            {
                sal_Int32 i = rtl_str_indexOfChar_WithLength(
                    pos_, end_ - pos_, c);
                if (i < 0) {
                    throw css::uno::RuntimeException(
                        "premature end (within DTD) of " + fileUrl_ );
                }
                pos_ += i + 1;
            }
            break;
        case '>':
            return;
        case '[':
            for (;;) {
                c = read();
                switch (c) {
                case '\0'// i.e., EOF
                    throw css::uno::RuntimeException(
                        "premature end (within DTD) of " + fileUrl_ );
                case '"':
                case '\'':
                    {
                        sal_Int32 i = rtl_str_indexOfChar_WithLength(
                            pos_, end_ - pos_, c);
                        if (i < 0) {
                            throw css::uno::RuntimeException(
                                "premature end (within DTD) of " + fileUrl_ );
                        }
                        pos_ += i + 1;
                    }
                    break;
                case '<':
                    switch (read()) {
                    case '\0'// i.e., EOF
                        throw css::uno::RuntimeException(
                            "premature end (within DTD) of " + fileUrl_ );
                    case '!':
                        skipComment();
                        break;
                    case '?':
                        skipProcessingInstruction();
                        break;
                    default:
                        break;
                    }
                    break;
                case ']':
                    skipSpace();
                    if (read() != '>') {
                        throw css::uno::RuntimeException(
                            "missing \">\" of DTD in " + fileUrl_ );
                    }
                    return;
                default:
                    break;
                }
            }
        default:
            break;
        }
    }
}

Span XmlReader::scanCdataSection() {
    if (rtl_str_shortenedCompare_WithLength(
            pos_, end_ - pos_, RTL_CONSTASCII_STRINGPARAM("[CDATA["),
            RTL_CONSTASCII_LENGTH("[CDATA[")) !=
        0)
    {
        return Span();
    }
    pos_ += RTL_CONSTASCII_LENGTH("[CDATA[");
    char const * begin = pos_;
    sal_Int32 i = rtl_str_indexOfStr_WithLength(
        pos_, end_ - pos_, RTL_CONSTASCII_STRINGPARAM("]]>"));
    if (i < 0) {
        throw css::uno::RuntimeException(
            "premature end (within CDATA section) of " + fileUrl_ );
    }
    pos_ += i + RTL_CONSTASCII_LENGTH("]]>");
    return Span(begin, i);
}

bool XmlReader::scanName(char const ** nameColon) {
    assert(nameColon != nullptr && *nameColon == nullptr);
    for (char const * begin = pos_;; ++pos_) {
        switch (peek()) {
        case '\0'// i.e., EOF
        case '\x09':
        case '\x0A':
        case '\x0D':
        case ' ':
        case '/':
        case '=':
        case '>':
            return pos_ != begin;
        case ':':
            *nameColon = pos_;
            break;
        default:
            break;
        }
    }
}

int XmlReader::scanNamespaceIri(char const * begin, char const * end) {
    assert(begin != nullptr && begin <= end);
    Span iri(handleAttributeValue(begin, end, false));
    for (NamespaceIris::size_type i = 0; i < namespaceIris_.size(); ++i) {
        if (namespaceIris_[i] == iri) {
            return toNamespaceId(i);
        }
    }
    return XmlReader::NAMESPACE_UNKNOWN;
}

char const * XmlReader::handleReference(char const * position, char const * end)
{
    assert(position != nullptr && *position == '&' && position < end);
    ++position;
    if (*position == '#') {
        ++position;
        sal_uInt32 val = 0;
        char const * p;
        if (*position == 'x') {
            ++position;
            p = position;
            for (;; ++position)
            {
                val = o3tl::convertToHex<sal_uInt32>(*position);
                if (val >= 16)
                    break;

                if (!rtl::isUnicodeCodePoint(val)) { // avoid overflow
                    throw css::uno::RuntimeException(
                        "'&#x...' too large in " + fileUrl_ );
                }
            }
        } else {
            p = position;
            for (;; ++position) {
                char c = *position;
                if (c >= '0' && c <= '9') {
                    val = 10 * val + (c - '0');
                } else {
                    break;
                }
                if (!rtl::isUnicodeCodePoint(val)) { // avoid overflow
                    throw css::uno::RuntimeException(
                        "'&#...' too large in " + fileUrl_ );
                }
            }
        }
        if (position == p || *position++ != ';') {
            throw css::uno::RuntimeException(
                "'&#...' missing ';' in " + fileUrl_ );
        }
        assert(rtl::isUnicodeCodePoint(val));
        if ((val < 0x20 && val != 0x9 && val != 0xA && val != 0xD) ||
            (val >= 0xD800 && val <= 0xDFFF) || val == 0xFFFE || val == 0xFFFF)
        {
            throw css::uno::RuntimeException(
                "character reference denoting invalid character in " + fileUrl_ );
        }
        char buf[4];
        sal_Int32 len;
        if (val < 0x80) {
            buf[0] = static_castchar >(val);
            len = 1;
        } else if (val < 0x800) {
            buf[0] = static_castchar >((val >> 6) | 0xC0);
            buf[1] = static_castchar >((val & 0x3F) | 0x80);
            len = 2;
        } else if (val < 0x10000) {
            buf[0] = static_castchar >((val >> 12) | 0xE0);
            buf[1] = static_castchar >(((val >> 6) & 0x3F) | 0x80);
            buf[2] = static_castchar >((val & 0x3F) | 0x80);
            len = 3;
        } else {
            buf[0] = static_castchar >((val >> 18) | 0xF0);
            buf[1] = static_castchar >(((val >> 12) & 0x3F) | 0x80);
            buf[2] = static_castchar >(((val >> 6) & 0x3F) | 0x80);
            buf[3] = static_castchar >((val & 0x3F) | 0x80);
            len = 4;
        }
        pad_.addEphemeral(buf, len);
        return position;
    } else {
        struct EntityRef {
            char const * inBegin;
            sal_Int32 const inLength;
            char const * outBegin;
            sal_Int32 const outLength;
        };
        static EntityRef const refs[] = {
            { RTL_CONSTASCII_STRINGPARAM("amp;"),
              RTL_CONSTASCII_STRINGPARAM("&") },
            { RTL_CONSTASCII_STRINGPARAM("lt;"),
              RTL_CONSTASCII_STRINGPARAM("<") },
            { RTL_CONSTASCII_STRINGPARAM("gt;"),
              RTL_CONSTASCII_STRINGPARAM(">") },
            { RTL_CONSTASCII_STRINGPARAM("apos;"),
              RTL_CONSTASCII_STRINGPARAM("'") },
            { RTL_CONSTASCII_STRINGPARAM("quot;"),
              RTL_CONSTASCII_STRINGPARAM("\"") } };
        for (const auto & ref : refs) {
            if (rtl_str_shortenedCompare_WithLength(
                    position, end - position, ref.inBegin, ref.inLength,
                    ref.inLength) ==
                0)
            {
                position += ref.inLength;
                pad_.add(ref.outBegin, ref.outLength);
                return position;
            }
        }
        throw css::uno::RuntimeException(
            "unknown entity reference in " + fileUrl_ );
    }
}

Span XmlReader::handleAttributeValue(
    char const * begin, char const * end, bool fullyNormalize)
{
    pad_.clear();
    if (fullyNormalize) {
        while (begin != end && isSpace(*begin)) {
            ++begin;
        }
        while (end != begin && isSpace(end[-1])) {
            --end;
        }
        char const * p = begin;
        enum Space { SPACE_NONE, SPACE_SPAN, SPACE_BREAK };
            // a single true space character can go into the current span,
            // everything else breaks the span
        Space space = SPACE_NONE;
        while (p != end) {
            switch (*p) {
            case '\x09':
            case '\x0A':
            case '\x0D':
                switch (space) {
                case SPACE_NONE:
                    pad_.add(begin, p - begin);
                    pad_.add(" ");
                    space = SPACE_BREAK;
                    break;
                case SPACE_SPAN:
                    pad_.add(begin, p - begin);
                    space = SPACE_BREAK;
                    break;
                case SPACE_BREAK:
                    break;
                }
                begin = ++p;
                break;
            case ' ':
                switch (space) {
                case SPACE_NONE:
                    ++p;
                    space = SPACE_SPAN;
                    break;
                case SPACE_SPAN:
                    pad_.add(begin, p - begin);
                    begin = ++p;
                    space = SPACE_BREAK;
                    break;
                case SPACE_BREAK:
                    begin = ++p;
                    break;
                }
                break;
            case '&':
                pad_.add(begin, p - begin);
                p = handleReference(p, end);
                begin = p;
                space = SPACE_NONE;
                break;
            default:
                ++p;
                space = SPACE_NONE;
                break;
            }
        }
        pad_.add(begin, p - begin);
    } else {
        char const * p = begin;
        while (p != end) {
            switch (*p) {
            case '\x09':
            case '\x0A':
                pad_.add(begin, p - begin);
                begin = ++p;
                pad_.add(" ");
                break;
            case '\x0D':
                pad_.add(begin, p - begin);
                ++p;
                if (peek() == '\x0A') {
                    ++p;
                }
                begin = p;
                pad_.add(" ");
                break;
            case '&':
                pad_.add(begin, p - begin);
                p = handleReference(p, end);
                begin = p;
                break;
            default:
                ++p;
                break;
            }
        }
        pad_.add(begin, p - begin);
    }
    return pad_.get();
}

XmlReader::Result XmlReader::handleStartTag(int * nsId, Span * localName) {
    assert(nsId != nullptr && localName);
    char const * nameBegin = pos_;
    char const * nameColon = nullptr;
    if (!scanName(&nameColon)) {
        throw css::uno::RuntimeException(
            "bad tag name in " + fileUrl_ );
    }
    char const * nameEnd = pos_;
    NamespaceList::size_type inheritedNamespaces = namespaces_.size();
    bool hasDefaultNs = false;
    int defaultNsId = NAMESPACE_NONE;
    attributes_.clear();
    for (;;) {
        char const * p = pos_;
        skipSpace();
        if (peek() == '/' || peek() == '>') {
            break;
        }
        if (pos_ == p) {
            throw css::uno::RuntimeException(
                "missing whitespace before attribute in " + fileUrl_ );
        }
        char const * attrNameBegin = pos_;
        char const * attrNameColon = nullptr;
        if (!scanName(&attrNameColon)) {
            throw css::uno::RuntimeException(
                "bad attribute name in " + fileUrl_ );
        }
        char const * attrNameEnd = pos_;
        skipSpace();
        if (read() != '=') {
            throw css::uno::RuntimeException(
                "missing '=' in " + fileUrl_ );
        }
        skipSpace();
        char del = read();
        if (del != '\'' && del != '"') {
            throw css::uno::RuntimeException(
                "bad attribute value in " + fileUrl_ );
        }
        char const * valueBegin = pos_;
        sal_Int32 i = rtl_str_indexOfChar_WithLength(pos_, end_ - pos_, del);
        if (i < 0) {
            throw css::uno::RuntimeException(
                "unterminated attribute value in " + fileUrl_ );
        }
        char const * valueEnd = pos_ + i;
        pos_ += i + 1;
        if (attrNameColon == nullptr &&
            Span(attrNameBegin, attrNameEnd - attrNameBegin) == "xmlns")
        {
            hasDefaultNs = true;
            defaultNsId = scanNamespaceIri(valueBegin, valueEnd);
        } else if (attrNameColon != nullptr &&
                   Span(attrNameBegin, attrNameColon - attrNameBegin) ==
                       "xmlns")
        {
            namespaces_.emplace_back(
                    Span(attrNameColon + 1, attrNameEnd - (attrNameColon + 1)),
                    scanNamespaceIri(valueBegin, valueEnd));
        } else {
            attributes_.emplace_back(
                    attrNameBegin, attrNameEnd, attrNameColon, valueBegin,
                    valueEnd);
        }
    }
    if (!hasDefaultNs && !elements_.empty()) {
        defaultNsId = elements_.top().defaultNamespaceId;
    }
    firstAttribute_ = true;
    if (peek() == '/') {
        state_ = State::EmptyElementTag;
        ++pos_;
    } else {
        state_ = State::Content;
    }
    if (peek() != '>') {
        throw css::uno::RuntimeException(
            "missing '>' in " + fileUrl_ );
    }
    ++pos_;
    elements_.push(
        ElementData(
            Span(nameBegin, nameEnd - nameBegin), inheritedNamespaces,
            defaultNsId));
    if (nameColon == nullptr) {
        *nsId = defaultNsId;
        *localName = Span(nameBegin, nameEnd - nameBegin);
    } else {
        *nsId = getNamespaceId(Span(nameBegin, nameColon - nameBegin));
        *localName = Span(nameColon + 1, nameEnd - (nameColon + 1));
    }
    return Result::Begin;
}

XmlReader::Result XmlReader::handleEndTag() {
    if (elements_.empty()) {
        throw css::uno::RuntimeException(
            "spurious end tag in " + fileUrl_ );
    }
    char const * nameBegin = pos_;
    char const * nameColon = nullptr;
    if (!scanName(&nameColon) ||
        !elements_.top().name.equals(nameBegin, pos_ - nameBegin))
    {
        throw css::uno::RuntimeException(
            "tag mismatch in " + fileUrl_ );
    }
    handleElementEnd();
    skipSpace();
    if (peek() != '>') {
        throw css::uno::RuntimeException(
            "missing '>' in " + fileUrl_ );
    }
    ++pos_;
    return Result::End;
}

void XmlReader::handleElementEnd() {
    assert(!elements_.empty());
    auto end = elements_.top().inheritedNamespaces;
    namespaces_.resize(end);
    elements_.pop();
    state_ = elements_.empty() ? State::Done : State::Content;
}

XmlReader::Result XmlReader::handleSkippedText(Span * data, int * nsId) {
    for (;;) {
        auto i = static_cast<const char*>(std::memchr(pos_, '<', end_ - pos_));
        if (!i) {
            throw css::uno::RuntimeException(
                "premature end of " + fileUrl_ );
        }
        pos_ = i + 1;
        switch (peek()) {
        case '!':
            ++pos_;
            if (!skipComment() && !scanCdataSection().is()) {
                skipDocumentTypeDeclaration();
            }
            break;
        case '/':
            ++pos_;
            return handleEndTag();
        case '?':
            ++pos_;
            skipProcessingInstruction();
            break;
        default:
            return handleStartTag(nsId, data);
        }
    }
}

XmlReader::Result XmlReader::handleRawText(Span * text) {
    pad_.clear();
    for (char const * begin = pos_;;) {
        switch (peek()) {
        case '\0'// i.e., EOF
            throw css::uno::RuntimeException(
                "premature end of " + fileUrl_ );
        case '\x0D':
            pad_.add(begin, pos_ - begin);
            ++pos_;
            if (peek() != '\x0A') {
                pad_.add("\x0A");
            }
            begin = pos_;
            break;
        case '&':
            pad_.add(begin, pos_ - begin);
            pos_ = handleReference(pos_, end_);
            begin = pos_;
            break;
        case '<':
            pad_.add(begin, pos_ - begin);
            ++pos_;
            switch (peek()) {
            case '!':
                ++pos_;
                if (!skipComment()) {
                    Span cdata(scanCdataSection());
                    if (cdata.is()) {
                        normalizeLineEnds(cdata);
                    } else {
                        skipDocumentTypeDeclaration();
                    }
                }
                begin = pos_;
                break;
            case '/':
                *text = pad_.get();
                ++pos_;
                state_ = State::EndTag;
                return Result::Text;
            case '?':
                ++pos_;
                skipProcessingInstruction();
                begin = pos_;
                break;
            default:
                *text = pad_.get();
                state_ = State::StartTag;
                return Result::Text;
            }
            break;
        default:
            ++pos_;
            break;
        }
    }
}

XmlReader::Result XmlReader::handleNormalizedText(Span * text) {
    pad_.clear();
    char const * flowBegin = pos_;
    char const * flowEnd = pos_;
    enum Space { SPACE_START, SPACE_NONE, SPACE_SPAN, SPACE_BREAK };
        // a single true space character can go into the current flow,
        // everything else breaks the flow
    Space space = SPACE_START;
    for (;;) {
        switch (peek()) {
        case '\0'// i.e., EOF
            throw css::uno::RuntimeException(
                "premature end of " + fileUrl_ );
        case '\x09':
        case '\x0A':
        case '\x0D':
            switch (space) {
            case SPACE_START:
            case SPACE_BREAK:
                break;
            case SPACE_NONE:
            case SPACE_SPAN:
                space = SPACE_BREAK;
                break;
            }
            ++pos_;
            break;
        case ' ':
            switch (space) {
            case SPACE_START:
            case SPACE_BREAK:
                break;
            case SPACE_NONE:
                space = SPACE_SPAN;
                break;
            case SPACE_SPAN:
                space = SPACE_BREAK;
                break;
            }
            ++pos_;
            break;
        case '&':
            switch (space) {
            case SPACE_START:
                break;
            case SPACE_NONE:
            case SPACE_SPAN:
                pad_.add(flowBegin, pos_ - flowBegin);
                break;
            case SPACE_BREAK:
                pad_.add(flowBegin, flowEnd - flowBegin);
                pad_.add(" ");
                break;
            }
            pos_ = handleReference(pos_, end_);
            flowBegin = pos_;
            flowEnd = pos_;
            space = SPACE_NONE;
            break;
        case '<':
            ++pos_;
            switch (peek()) {
            case '!':
                ++pos_;
                if (skipComment()) {
                    space = SPACE_BREAK;
                } else {
                    Span cdata(scanCdataSection());
                    if (cdata.is()) {
                        // CDATA is not normalized (similar to character
                        // references; it keeps the code simple), but it might
                        // arguably be better to normalize it:
                        switch (space) {
                        case SPACE_START:
                            break;
                        case SPACE_NONE:
                        case SPACE_SPAN:
                            pad_.add(flowBegin, pos_ - flowBegin);
                            break;
                        case SPACE_BREAK:
                            pad_.add(flowBegin, flowEnd - flowBegin);
                            pad_.add(" ");
                            break;
                        }
                        normalizeLineEnds(cdata);
                        flowBegin = pos_;
                        flowEnd = pos_;
                        space = SPACE_NONE;
                    } else {
                        skipDocumentTypeDeclaration();
                    }
                }
                break;
            case '/':
                ++pos_;
                pad_.add(flowBegin, flowEnd - flowBegin);
                *text = pad_.get();
                state_ = State::EndTag;
                return Result::Text;
            case '?':
                ++pos_;
                skipProcessingInstruction();
                space = SPACE_BREAK;
                break;
            default:
                pad_.add(flowBegin, flowEnd - flowBegin);
                *text = pad_.get();
                state_ = State::StartTag;
                return Result::Text;
            }
            break;
        default:
            switch (space) {
            case SPACE_START:
                flowBegin = pos_;
                break;
            case SPACE_NONE:
            case SPACE_SPAN:
                break;
            case SPACE_BREAK:
                pad_.add(flowBegin, flowEnd - flowBegin);
                pad_.add(" ");
                flowBegin = pos_;
                break;
            }
            flowEnd = ++pos_;
            space = SPACE_NONE;
            break;
        }
    }
}

int XmlReader::toNamespaceId(NamespaceIris::size_type pos) {
    assert(pos <= INT_MAX);
    return static_castint >(pos);
}

}

/* vim:set shiftwidth=4 softtabstop=4 expandtab: */

98%


¤ Dauer der Verarbeitung: 0.26 Sekunden  (vorverarbeitet)  ¤

*© Formatika GbR, Deutschland






Wurzel

Suchen

Beweissystem der NASA

Beweissystem Isabelle

NIST Cobol Testsuite

Cephes Mathematical Library

Wiener Entwicklungsmethode

Haftungshinweis

Die Informationen auf dieser Webseite wurden nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit, noch Qualität der bereit gestellten Informationen zugesichert.

Bemerkung:

Die farbliche Syntaxdarstellung ist noch experimentell.