Quelle impex.cxx Sprache: C

/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
/*
* This file is part of the LibreOffice project.
*
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/.
*
* This file incorporates work covered by the following license notice:
*
*   Licensed to the Apache Software Foundation (ASF) under one or more
*   contributor license agreements. See the NOTICE file distributed
*   with this work for additional information regarding copyright
*   ownership. The ASF licenses this file to you under the Apache
*   License, Version 2.0 (the "License"); you may not use this file
*   except in compliance with the License. You may obtain a copy of
*   the License at http://www.apache.org/licenses/LICENSE-2.0 .
*/

#include <comphelper/processfactory.hxx>
#include <i18nlangtag/languagetag.hxx>
#include <i18nutil/unicode.hxx>
#include <sot/formats.hxx>
#include <sfx2/mieclip.hxx>
#include <com/sun/star/i18n/CalendarFieldIndex.hpp>
#include <sal/log.hxx>
#include <unotools/charclass.hxx>
#include <osl/module.hxx>
#include <o3tl/string_view.hxx>

#include <global.hxx>
#include <docsh.hxx>
#include <undoblk.hxx>
#include <rangenam.hxx>
#include <tabvwsh.hxx>
#include <filter.hxx>
#include <asciiopt.hxx>
#include <formulacell.hxx>
#include <cellform.hxx>
#include <progress.hxx>
#include <scitems.hxx>
#include <editable.hxx>
#include <compiler.hxx>
#include <warnbox.hxx>
#include <clipparam.hxx>
#include <impex.hxx>
#include <editutil.hxx>
#include <patattr.hxx>
#include <docpool.hxx>
#include <stringutil.hxx>
#include <cellvalue.hxx>
#include <tokenarray.hxx>
#include <documentimport.hxx>
#include <refundo.hxx>
#include <mtvelements.hxx>

#include <globstr.hrc>
#include <scresid.hxx>
#include <o3tl/safeint.hxx>
#include <tools/svlibrary.h>
#include <comphelper/configuration.hxx>
#include <vcl/svapp.hxx>
#include <vcl/weld.hxx>
#include <editeng/editobj.hxx>
#include <svl/numformat.hxx>
#include <rtl/character.hxx>
#include <rtl/math.hxx>
#include <sax/tools/converter.hxx>

#include <memory>
#include <string_view>

#include <unicode/uchar.h>

#include <osl/endian.h>
#include <osl/file.hxx>

// We don't want to end up with 2GB read in one line just because of malformed
// multiline fields, so chop it _somewhere_, which is twice supported columns
// times arbitrary maximum cell content length, 2*1024*64K=128M, and because
// it's sal_Unicode that's 256MB. If it's 2GB of data without LF we're out of
// luck anyway.
constexpr sal_Int32 nArbitraryCellLengthLimit = SAL_MAX_UINT16;
constexpr sal_Int32 nArbitraryLineLengthLimit = 2 * MAXCOLCOUNT * nArbitraryCellLengthLimit;

namespace
{
    const char SYLK_LF[]  = "\x1b :";
}

namespace {

enum class SylkVersion
{
    SCALC3,    // Wrote wrongly quoted strings and unescaped semicolons.
    OOO32,     // Correct strings, plus multiline content.
    OWN,       // Place our new versions, if any, before this value.
    OTHER      // Assume that aliens wrote correct strings.
};

}

// Whole document without Undo
ScImportExport::ScImportExport( ScDocument& r )
    : pDocSh( r.GetDocumentShell() ), rDoc( r ),
      nSizeLimit( 0 ), nMaxImportRow(!comphelper::IsFuzzing() ? rDoc.MaxRow() : SCROWS32K),
      cSep( '\t' ), cStr( '"' ),
      bFormulas( false ), bIncludeFiltered( true ),
      bAll( true ), bSingle( true ), bUndo( false ),
      bOverflowRow( false ), bOverflowCol( false ), bOverflowCell( false ),
      mbApi( true ), mbImportBroadcast(false), mbOverwriting( false ), mbIncludeBOM(false)
{
    pUndoDoc = nullptr;
    pExtOptions = nullptr;
}

// Insert am current cell without range(es)
ScImportExport::ScImportExport( ScDocument& r, const ScAddress& rPt )
    : pDocSh( r.GetDocumentShell() ), rDoc( r ),
      aRange( rPt ),
      nSizeLimit( 0 ), nMaxImportRow(!comphelper::IsFuzzing() ? rDoc.MaxRow() : SCROWS32K),
      cSep( '\t' ), cStr( '"' ),
      bFormulas( false ), bIncludeFiltered( true ),
      bAll( false ), bSingle( true ), bUndo( pDocSh != nullptr ),
      bOverflowRow( false ), bOverflowCol( false ), bOverflowCell( false ),
      mbApi( true ), mbImportBroadcast(false), mbOverwriting( false ), mbIncludeBOM(false)
{
    pUndoDoc = nullptr;
    pExtOptions = nullptr;
}

//  ctor with a range is only used for export
//! ctor with a string (and bSingle=true) is also used for DdeSetData
ScImportExport::ScImportExport( ScDocument& r, const ScRange& rRange )
    : pDocSh( r.GetDocumentShell() ), rDoc( r ),
      aRange( rRange ),
      nSizeLimit( 0 ), nMaxImportRow(!comphelper::IsFuzzing() ? rDoc.MaxRow() : SCROWS32K),
      cSep( '\t' ), cStr( '"' ),
      bFormulas( false ), bIncludeFiltered( true ),
      bAll( false ), bSingle( false ), bUndo( pDocSh != nullptr ),
      bOverflowRow( false ), bOverflowCol( false ), bOverflowCell( false ),
      mbApi( true ), mbImportBroadcast(false), mbOverwriting( false ), mbIncludeBOM(false)
{
    pUndoDoc = nullptr;
    pExtOptions = nullptr;
    // Only one sheet (table) supported
    aRange.aEnd.SetTab( aRange.aStart.Tab() );
}

// Evaluate input string - either range, cell or the whole document (when error)
// If a View exists, the TabNo of the view will be used.
ScImportExport::ScImportExport( ScDocument& r, const OUString& rPos )
    : pDocSh( r.GetDocumentShell() ), rDoc( r ),
      nSizeLimit( 0 ), nMaxImportRow(!comphelper::IsFuzzing() ? rDoc.MaxRow() : SCROWS32K),
      cSep( '\t' ), cStr( '"' ),
      bFormulas( false ), bIncludeFiltered( true ),
      bAll( false ), bSingle( true ), bUndo( pDocSh != nullptr ),
      bOverflowRow( false ), bOverflowCol( false ), bOverflowCell( false ),
      mbApi( true ), mbImportBroadcast(false), mbOverwriting( false ), mbIncludeBOM(false)
{
    pUndoDoc = nullptr;
    pExtOptions = nullptr;

    SCTAB nTab = ScDocShell::GetCurTab();
    aRange.aStart.SetTab( nTab );
    OUString aPos( rPos );
    // Named range?
    ScRangeName* pRange = rDoc.GetRangeName();
    if (pRange)
    {
        const ScRangeData* pData = pRange->findByUpperName(ScGlobal::getCharClass().uppercase(aPos));
        if (pData)
        {
            if( pData->HasType( ScRangeData::Type::RefArea )
                || pData->HasType( ScRangeData::Type::AbsArea )
                || pData->HasType( ScRangeData::Type::AbsPos ) )
            {
                aPos = pData->GetSymbol();
            }
        }
    }
    formula::FormulaGrammar::AddressConvention eConv = rDoc.GetAddressConvention();
    // Range?
    if (aRange.Parse(aPos, rDoc, eConv) & ScRefFlags::VALID)
        bSingle = false;
    // Cell?
    else if (aRange.aStart.Parse(aPos, rDoc, eConv) & ScRefFlags::VALID)
        aRange.aEnd = aRange.aStart;
    else
        bAll = true;
}

ScImportExport::~ScImportExport() COVERITY_NOEXCEPT_FALSE
{
    pUndoDoc.reset();
    pExtOptions.reset();
}

void ScImportExport::SetExtOptions( const ScAsciiOptions& rOpt )
{
    if ( pExtOptions )
        *pExtOptions = rOpt;
    else
        pExtOptions.reset(new ScAsciiOptions( rOpt ));

    //  "normal" Options

    cSep = ScAsciiOptions::GetWeightedFieldSep( rOpt.GetFieldSeps(), false);
    cStr = rOpt.GetTextSep();
}

void ScImportExport::SetFilterOptions(const OUString& rFilterOptions)
{
    maFilterOptions = rFilterOptions;
}

bool ScImportExport::IsFormatSupported( SotClipboardFormatId nFormat )
{
    return nFormat == SotClipboardFormatId::STRING
              || nFormat == SotClipboardFormatId::STRING_TSVC
              || nFormat == SotClipboardFormatId::SYLK
              || nFormat == SotClipboardFormatId::LINK
              || nFormat == SotClipboardFormatId::HTML
              || nFormat == SotClipboardFormatId::HTML_SIMPLE
              || nFormat == SotClipboardFormatId::DIF;
}

// Prepare for Undo
bool ScImportExport::StartPaste()
{
    if ( !bAll )
    {
        ScEditableTester aTester( rDoc, aRange, sc::EditAction::Unknown );
        if ( !aTester.IsEditable() )
        {
            std::unique_ptr<weld::MessageDialog> xInfoBox(Application::CreateMessageDialog(ScDocShell::GetActiveDialogParent(),
                                                          VclMessageType::Info, VclButtonsType::Ok,
                                                          ScResId(aTester.GetMessageId())));
            xInfoBox->run();
            return false;
        }
    }
    if( bUndo && pDocSh && rDoc.IsUndoEnabled())
    {
        pUndoDoc.reset(new ScDocument( SCDOCMODE_UNDO ));
        pUndoDoc->InitUndo( rDoc, aRange.aStart.Tab(), aRange.aEnd.Tab() );
        rDoc.CopyToDocument(aRange, InsertDeleteFlags::ALL | InsertDeleteFlags::NOCAPTIONS, false, *pUndoDoc);
    }
    return true;
}

// Create Undo/Redo actions, Invalidate/Repaint
void ScImportExport::EndPaste(bool bAutoRowHeight)
{
    bool bHeight = bAutoRowHeight && pDocSh && pDocSh->AdjustRowHeight(
                    aRange.aStart.Row(), aRange.aEnd.Row(), aRange.aStart.Tab() );

    if( pUndoDoc && rDoc.IsUndoEnabled() && pDocSh )
    {
        ScDocumentUniquePtr pRedoDoc(new ScDocument( SCDOCMODE_UNDO ));
        pRedoDoc->InitUndo( rDoc, aRange.aStart.Tab(), aRange.aEnd.Tab() );
        rDoc.CopyToDocument(aRange, InsertDeleteFlags::ALL | InsertDeleteFlags::NOCAPTIONS, false, *pRedoDoc);
        ScMarkData aDestMark(pRedoDoc->GetSheetLimits());
        aDestMark.SetMarkArea(aRange);
        pDocSh->GetUndoManager()->AddUndoAction(
            std::make_unique<ScUndoPaste>(*pDocSh, aRange, aDestMark, std::move(pUndoDoc), std::move(pRedoDoc), InsertDeleteFlags::ALL, nullptr));
    }
    pUndoDoc.reset();
    if( pDocSh )
    {
        if (!bHeight)
            pDocSh->PostPaint( aRange, PaintPartFlags::Grid );
        pDocSh->SetDocumentModified();
    }
    ScTabViewShell* pViewSh = ScTabViewShell::GetActiveViewShell();
    if ( pViewSh )
        pViewSh->UpdateInputHandler();

}

bool ScImportExport::ExportData( std::u16string_view rMimeType,
                                 css::uno::Any & rValue )
{
    SvMemoryStream aStrm;
    SotClipboardFormatId fmtId = SotExchange::GetFormatIdFromMimeType(rMimeType);
    if (fmtId == SotClipboardFormatId::STRING)
        aStrm.SetStreamCharSet(RTL_TEXTENCODING_UNICODE);
    // mba: no BaseURL for data exchange
    if (ExportStream(aStrm, OUString(), fmtId))
    {
        if (fmtId == SotClipboardFormatId::STRING)
        {
            assert(aStrm.TellEnd() % sizeof(sal_Unicode) == 0);
            rValue <<= OUString(static_cast<const sal_Unicode*>(aStrm.GetData()),
                                aStrm.TellEnd() / sizeof(sal_Unicode));
        }
        else
        {
            aStrm.WriteUChar(0);
            rValue <<= css::uno::Sequence<sal_Int8>(static_cast<sal_Int8 const*>(aStrm.GetData()),
                                                    aStrm.TellEnd());
        }
        return true;
    }
    return false;
}

bool ScImportExport::ImportString( const OUString& rText, SotClipboardFormatId nFmt )
{
    switch ( nFmt )
    {
        // formats supporting unicode
        case SotClipboardFormatId::STRING :
        case SotClipboardFormatId::STRING_TSVC :
        {
            ScImportStringStream aStrm( rText);
            return ImportStream( aStrm, OUString(), nFmt );
            // ImportStream must handle RTL_TEXTENCODING_UNICODE
        }
        default:
        {
            rtl_TextEncoding eEnc = osl_getThreadTextEncoding();
            OString aTmp( rText.getStr(), rText.getLength(), eEnc );
            SvMemoryStream aStrm( const_cast<char *>(aTmp.getStr()), aTmp.getLength() * sizeof(char), StreamMode::READ );
            aStrm.SetStreamCharSet( eEnc );
            SetNoEndianSwap( aStrm );       //! no swapping in memory
            return ImportStream( aStrm, OUString(), nFmt );
        }
    }
}

bool ScImportExport::ExportString( OUString& rText, SotClipboardFormatId nFmt )
{
    if ( nFmt != SotClipboardFormatId::STRING && nFmt != SotClipboardFormatId::STRING_TSVC )
    {
        SAL_WARN("sc.ui", "ScImportExport::ExportString: Unicode not supported for other formats than SotClipboardFormatId::STRING[_TSV]");
        rtl_TextEncoding eEnc = osl_getThreadTextEncoding();
        OString aTmp;
        bool bOk = ExportByteString( aTmp, eEnc, nFmt );
        rText = OStringToOUString( aTmp, eEnc );
        return bOk;
    }
    //  nSizeLimit not needed for OUString

    SvMemoryStream aStrm;
    aStrm.SetStreamCharSet( RTL_TEXTENCODING_UNICODE );
    SetNoEndianSwap( aStrm );       //! no swapping in memory
    // mba: no BaseURL for data exc
    if( ExportStream( aStrm, OUString(), nFmt ) )
    {
        aStrm.WriteUInt16( 0 );
        rText = OUString( static_cast<const sal_Unicode*>(aStrm.GetData()) );
        return true;
    }
    rText.clear();
    return false;

    // ExportStream must handle RTL_TEXTENCODING_UNICODE
}

bool ScImportExport::ExportByteString( OString& rText, rtl_TextEncoding eEnc, SotClipboardFormatId nFmt )
{
    OSL_ENSURE( eEnc != RTL_TEXTENCODING_UNICODE, "ScImportExport::ExportByteString: Unicode not supported" );
    if ( eEnc == RTL_TEXTENCODING_UNICODE )
        eEnc = osl_getThreadTextEncoding();

    if (!nSizeLimit)
        nSizeLimit = SAL_MAX_UINT16;

    SvMemoryStream aStrm;
    aStrm.SetStreamCharSet( eEnc );
    SetNoEndianSwap( aStrm );       //! no swapping in memory
    // mba: no BaseURL for data exchange
    if( ExportStream( aStrm, OUString(), nFmt ) )
    {
        aStrm.WriteChar( 0 );
        if( aStrm.TellEnd() <= nSizeLimit )
        {
            rText = static_cast<const char*>(aStrm.GetData());
            return true;
        }
    }
    rText.clear();
    return false;
}

bool ScImportExport::ImportStream( SvStream& rStrm, const OUString& rBaseURL, SotClipboardFormatId nFmt )
{
    if( nFmt == SotClipboardFormatId::STRING || nFmt == SotClipboardFormatId::STRING_TSVC )
    {
        if( ExtText2Doc( rStrm ) )      // evaluate pExtOptions
            return true;
    }
    if( nFmt == SotClipboardFormatId::SYLK )
    {
        if( Sylk2Doc( rStrm ) )
            return true;
    }
    if( nFmt == SotClipboardFormatId::DIF )
    {
        if( Dif2Doc( rStrm ) )
            return true;
    }
    if( nFmt == SotClipboardFormatId::RTF || nFmt == SotClipboardFormatId::RICHTEXT )
    {
        if( RTF2Doc( rStrm, rBaseURL ) )
            return true;
    }
    if( nFmt == SotClipboardFormatId::LINK )
        return true;            // Link-Import?
    if ( nFmt == SotClipboardFormatId::HTML )
    {
        if( HTML2Doc( rStrm, rBaseURL ) )
            return true;
    }
    if ( nFmt == SotClipboardFormatId::HTML_SIMPLE )
    {
        MSE40HTMLClipFormatObj aMSE40ClpObj;                // needed to skip the header data
        SvStream* pHTML = aMSE40ClpObj.IsValid( rStrm );
        if ( pHTML && HTML2Doc( *pHTML, rBaseURL ) )
            return true;
    }

    return false;
}

bool ScImportExport::ExportStream( SvStream& rStrm, const OUString& rBaseURL, SotClipboardFormatId nFmt )
{
    if( nFmt == SotClipboardFormatId::STRING || nFmt == SotClipboardFormatId::STRING_TSVC )
    {
        if( Doc2Text( rStrm ) )
            return true;
    }
    if( nFmt == SotClipboardFormatId::SYLK )
    {
        if( Doc2Sylk( rStrm ) )
            return true;
    }
    if( nFmt == SotClipboardFormatId::DIF )
    {
        if( Doc2Dif( rStrm ) )
            return true;
    }
    if( nFmt == SotClipboardFormatId::LINK && !bAll )
    {
        OUString aDocName;
        if ( rDoc.IsClipboard() )
            aDocName = ScGlobal::GetClipDocName();
        else
        {
            ScDocShell* pShell = rDoc.GetDocumentShell();
            if (pShell)
                aDocName = pShell->GetTitle( SFX_TITLE_FULLNAME );
        }

        OSL_ENSURE( !aDocName.isEmpty(), "ClipBoard document has no name! :-/" );
        if( !aDocName.isEmpty() )
        {
            // Always use Calc A1 syntax for paste link.
            OUString aRefName;
            ScRefFlags nFlags = ScRefFlags::VALID | ScRefFlags::TAB_3D;
            if( bSingle )
                aRefName = aRange.aStart.Format(nFlags, &rDoc, formula::FormulaGrammar::CONV_OOO);
            else
            {
                if( aRange.aStart.Tab() != aRange.aEnd.Tab() )
                    nFlags |= ScRefFlags::TAB2_3D;
                aRefName = aRange.Format(rDoc, nFlags, formula::FormulaGrammar::CONV_OOO);
            }
            OUString aAppName = Application::GetAppName();

            // extra bits are used to tell the client to prefer external
            // reference link.
            return TransferableDataHelper::WriteDDELink(rStrm, aAppName, aDocName, aRefName,
                                                        u"calc:extref");
        }
    }
    if( nFmt == SotClipboardFormatId::HTML )
    {
        if( Doc2HTML( rStrm, rBaseURL ) )
            return true;
    }
    if( nFmt == SotClipboardFormatId::RTF || nFmt == SotClipboardFormatId::RICHTEXT )
    {
        if( Doc2RTF( rStrm ) )
            return true;
    }

    return false;
}

// tdf#104927
// http://www.unicode.org/reports/tr11/
sal_Int32 ScImportExport::CountVisualWidth(std::u16string_view rStr, sal_Int32& nIdx, sal_Int32 nMaxWidth)
{
    sal_Int32 nWidth = 0;
    while(nIdx < static_cast<sal_Int32>(rStr.size()) && nWidth < nMaxWidth)
    {
        sal_uInt32 nCode = o3tl::iterateCodePoints(rStr, &nIdx);

        auto nEaWidth = u_getIntPropertyValue(nCode, UCHAR_EAST_ASIAN_WIDTH);
        if (nEaWidth == U_EA_FULLWIDTH || nEaWidth == U_EA_WIDE)
            nWidth += 2;
        else if (!u_getIntPropertyValue(nCode, UCHAR_DEFAULT_IGNORABLE_CODE_POINT))
            nWidth += 1;
    }

    if (nIdx < static_cast<sal_Int32>(rStr.size()))
    {
        sal_Int32 nTmpIdx = nIdx;
        sal_uInt32 nCode = o3tl::iterateCodePoints(rStr, &nTmpIdx);

        if (u_getIntPropertyValue(nCode, UCHAR_DEFAULT_IGNORABLE_CODE_POINT))
            nIdx = nTmpIdx;
    }
    return nWidth;
}

sal_Int32 ScImportExport::CountVisualWidth(std::u16string_view rStr)
{
    sal_Int32 nIdx = 0;
    return CountVisualWidth(rStr, nIdx, SAL_MAX_INT32);
}

void ScImportExport::SetNoEndianSwap( SvStream& rStrm )
{
#ifdef OSL_BIGENDIAN
    rStrm.SetEndian( SvStreamEndian::BIG );
#else
    rStrm.SetEndian( SvStreamEndian::LITTLE );
#endif
}

static inline bool lcl_isFieldEnd( sal_Unicode c, const sal_Unicode* pSeps )
{
    return !c || ScGlobal::UnicodeStrChr( pSeps, c);
}

namespace {

enum QuoteType
{
    FIELDSTART_QUOTE,
    FIRST_QUOTE,
    SECOND_QUOTE,
    FIELDEND_QUOTE,
    DONTKNOW_QUOTE
};

}

/** Determine if *p is a quote that ends a quoted field.

    Precondition: we are parsing a quoted field already and *p is a quote.

    @return
        FIELDEND_QUOTE if end of field quote
        DONTKNOW_QUOTE anything else
*/
static QuoteType lcl_isFieldEndQuote( const sal_Unicode* p, const sal_Unicode* pSeps, sal_Unicode& rcDetectSep )
{
    // Due to broken CSV generators that don't double embedded quotes check if
    // a field separator immediately or with trailing spaces follows the quote,
    // only then end the field, or at end of string.
    constexpr sal_Unicode cBlank = ' ';
    if (p[1] == cBlank && ScGlobal::UnicodeStrChr( pSeps, cBlank))
        return FIELDEND_QUOTE;
    // Detect a possible blank separator if it's not already in the list (which
    // was checked right above for p[1]==cBlank).
    const bool bBlankSep = (p[1] == cBlank && !rcDetectSep && p[2] && p[2] != cBlank);
    while (p[1] == cBlank)
        ++p;
    if (lcl_isFieldEnd( p[1], pSeps))
        return FIELDEND_QUOTE;
    // Extended separator detection after a closing quote (with or without
    // blanks). Note that nQuotes is incremented *after* the call so is not yet
    // even here, and that with separator detection we reach here only if
    // lcl_isEscapedOrFieldEndQuote() did not already detect FIRST_QUOTE or
    // SECOND_QUOTE for an escaped embedded quote, thus nQuotes does not have
    // to be checked.
    if (!rcDetectSep)
    {
        static constexpr sal_Unicode vSep[] = { ',', '\t', ';' };
        for (const sal_Unicode c : vSep)
        {
            if (p[1] == c)
            {
                rcDetectSep = c;
                return FIELDEND_QUOTE;
            }
        }
    }
    // Blank separator is least significant, after others.
    if (bBlankSep)
    {
        rcDetectSep = cBlank;
        return FIELDEND_QUOTE;
    }
    return DONTKNOW_QUOTE;
}

/** Determine if *p is a quote that is escaped by being doubled or ends a
    quoted field.

    Precondition: *p is a quote.

    @param nQuotes
        Quote characters encountered so far.
        Odd (after opening quote) means either no embedded quotes or only quote
        pairs so far.
        Even means either not in a quoted field or already one quote
        encountered, the first of a pair.

    @return
        FIELDSTART_QUOTE if first quote in a field, either starting content or
                            embedded so caller should check beforehand.
        FIRST_QUOTE      if first of a doubled quote
        SECOND_QUOTE     if second of a doubled quote
        FIELDEND_QUOTE   if end of field quote
        DONTKNOW_QUOTE   if an unescaped quote we don't consider as end of field,
                            do not increment nQuotes in caller then!
*/
static QuoteType lcl_isEscapedOrFieldEndQuote( sal_Int32 nQuotes, const sal_Unicode* p,
        const sal_Unicode* pSeps, sal_Unicode cStr, sal_Unicode& rcDetectSep )
{
    if ((nQuotes & 1) == 0)
    {
        if (p[-1] == cStr)
            return SECOND_QUOTE;
        else
        {
            SAL_WARN( "sc", "lcl_isEscapedOrFieldEndQuote: really want a FIELDSTART_QUOTE?");
            return FIELDSTART_QUOTE;
        }
    }
    if (p[1] == cStr)
        return FIRST_QUOTE;
    return lcl_isFieldEndQuote( p, pSeps, rcDetectSep);
}

/** Append characters of [p1,p2) to rField.

    @returns TRUE if ok; FALSE if data overflow, truncated
*/
static bool lcl_appendLineData( OUString& rField, const sal_Unicode* p1, const sal_Unicode* p2 )
{
    if (rField.getLength() + (p2 - p1) <= nArbitraryCellLengthLimit)
    {
        rField += std::u16string_view( p1, p2 - p1 );
        return true;
    }
    else
    {
        SAL_WARN( "sc", "lcl_appendLineData: data overflow");
        rField += std::u16string_view( p1, nArbitraryCellLengthLimit - rField.getLength() );
        return false;
    }
}

namespace {

enum class DoubledQuoteMode
{
    KEEP_ALL,   // both are taken, additionally start and end quote are included in string
    ESCAPE,     // escaped quote, one is taken, one ignored
};

}

/** Scan for a quoted string.

    Precondition: initial current position *p is a cStr quote.

    For DoubledQuoteMode::ESCAPE, if after the closing quote there is a field
    end (with or without trailing blanks and as determined by
    lcl_isFieldEndQuote()), then the content is appended to rField with quotes
    processed and removed. Else if no field end after the quoted string was
    detected, nothing is appended and processing continues and is repeated
    until the next quote. If no closing quote at a field end was found at all,
    nothing is appended and the initial position is returned and caller has to
    decide, usually just taking all as literal data.

    For DoubledQuoteMode::KEEP_ALL, the string up to and including the closing
    quote is appended to rField and the next position returned, regardless
    whether there is a field separator following or not.

*/
static const sal_Unicode* lcl_ScanString( const sal_Unicode* p, OUString& rField,
            const sal_Unicode* pSeps, sal_Unicode cStr, DoubledQuoteMode eMode, bool& rbOverflowCell )
{
    OUString aString;
    bool bClosingQuote = (eMode == DoubledQuoteMode::KEEP_ALL);
    const sal_Unicode* const pStart = p;
    if (eMode != DoubledQuoteMode::KEEP_ALL)
        p++;    //! jump over opening quote
    bool bCont;
    do
    {
        bCont = false;
        const sal_Unicode* p0 = p;
        for( ;; )
        {
            if (!*p)
            {
                // Encountering end of data after an opening quote is not a
                // quoted string, ReadCsvLine() concatenated lines with '\n'
                // for a properly quoted embedded linefeed.
                if (eMode == DoubledQuoteMode::KEEP_ALL)
                    // Caller would append that data anyway, so we can do it
                    // already here.
                    break;

                return pStart;
            }

            if( *p == cStr )
            {
                if ( *++p != cStr )
                {
                    // break or continue for loop
                    if (eMode == DoubledQuoteMode::ESCAPE)
                    {
                        sal_Unicode cDetectSep = 0xffff;    // No separator detection here.
                        if (lcl_isFieldEndQuote( p-1, pSeps, cDetectSep) == FIELDEND_QUOTE)
                        {
                            bClosingQuote = true;
                            break;
                        }
                        else
                            continue;
                    }
                    else
                        break;
                }
                // doubled quote char
                switch ( eMode )
                {
                    case DoubledQuoteMode::KEEP_ALL :
                        p++;            // both for us (not breaking for-loop)
                    break;
                    case DoubledQuoteMode::ESCAPE :
                        p++;            // one for us (breaking for-loop)
                        bCont = true;   // and more
                    break;
                }
                if ( eMode == DoubledQuoteMode::ESCAPE )
                    break;
            }
            else
                p++;
        }
        if ( p0 < p )
        {
            if (!lcl_appendLineData( aString, p0, ((eMode != DoubledQuoteMode::KEEP_ALL && (*p || *(p-1) == cStr)) ? p-1 : p)))
                rbOverflowCell = true;
        }
    } while ( bCont );

    if (!bClosingQuote)
        return pStart;

    if (!aString.isEmpty())
        rField += aString;

    return p;
}

static void lcl_UnescapeSylk( OUString & rString, SylkVersion eVersion )
{
    // Older versions didn't escape the semicolon.
    // Older versions quoted the string and doubled embedded quotes, but not
    // the semicolons, which was plain wrong.
    if (eVersion >= SylkVersion::OOO32)
        rString = rString.replaceAll(";;", ";");
    else
        rString = rString.replaceAll("\"\"", "\"");

    rString = rString.replaceAll(SYLK_LF, "\n");
}

static const sal_Unicode* lcl_ScanSylkString( const sal_Unicode* p,
        OUString& rString, SylkVersion eVersion )
{
    const sal_Unicode* pStartQuote = p;
    const sal_Unicode* pEndQuote = nullptr;
    while( *(++p) )
    {
        if( *p == '"' )
        {
            pEndQuote = p;
            if (eVersion >= SylkVersion::OOO32)
            {
                if (*(p+1) == ';')
                {
                    if (*(p+2) == ';')
                    {
                        p += 2;     // escaped ';'
                        pEndQuote = nullptr;
                    }
                    else
                        break;      // end field
                }
            }
            else
            {
                if (*(p+1) == '"')
                {
                    ++p;            // escaped '"'
                    pEndQuote = nullptr;
                }
                else if (*(p+1) == ';')
                    break;          // end field
            }
        }
    }
    if (!pEndQuote)
        pEndQuote = p;  // Take all data as string.
    rString += std::u16string_view(pStartQuote + 1, pEndQuote - pStartQuote - 1 );
    lcl_UnescapeSylk( rString, eVersion);
    return p;
}

static const sal_Unicode* lcl_ScanSylkFormula( const sal_Unicode* p,
        OUString& rString, SylkVersion eVersion )
{
    const sal_Unicode* pStart = p;
    if (eVersion >= SylkVersion::OOO32)
    {
        while (*p)
        {
            if (*p == ';')
            {
                if (*(p+1) == ';')
                    ++p;        // escaped ';'
                else
                    break;      // end field
            }
            ++p;
        }
        rString += std::u16string_view( pStart, p - pStart);
        lcl_UnescapeSylk( rString, eVersion);
    }
    else
    {
        // Nasty. If in old versions the formula contained a semicolon, it was
        // quoted and embedded quotes were doubled, but semicolons were not. If
        // there was no semicolon, it could still contain quotes and doubled
        // embedded quotes if it was something like ="a""b", which was saved as
        // E"a""b" as is and has to be preserved, even if older versions
        // couldn't even load it correctly. However, theoretically another
        // field might follow and thus the line contain a semicolon again, such
        // as ...;E"a""b";...
        bool bQuoted = false;
        if (*p == '"')
        {
            // May be a quoted expression or just a string constant expression
            // with quotes.
            while (*(++p))
            {
                if (*p == '"')
                {
                    if (*(p+1) == '"')
                        ++p;            // escaped '"'
                    else
                        break;          // closing '"', had no ';' yet
                }
                else if (*p == ';')
                {
                    bQuoted = true;     // ';' within quoted expression
                    break;
                }
            }
            p = pStart;
        }
        if (bQuoted)
            p = lcl_ScanSylkString( p, rString, eVersion);
        else
        {
            while (*p && *p != ';')
                ++p;
            rString += std::u16string_view( pStart, p - pStart);
        }
    }
    return p;
}

static void lcl_WriteString( SvStream& rStrm, OUString& rString, sal_Unicode cQuote, sal_Unicode cEsc )
{
    if (cEsc)
    {
        // the goal is to replace cStr by cStr+cStr
        OUString strFrom(cEsc);
        OUString strTo = strFrom + strFrom;
        rString = rString.replaceAll(strFrom, strTo);
    }

    if (cQuote)
    {
        rString = OUStringChar(cQuote) + rString + OUStringChar(cQuote);
    }

    rStrm.WriteUnicodeOrByteText(rString);
}

bool ScImportExport::Text2Doc( SvStream& rStrm )
{
    bool bOk = true;

    sal_Unicode pSeps[2];
    pSeps[0] = cSep;
    pSeps[1] = 0;

    ScSetStringParam aSetStringParam;
    aSetStringParam.mbCheckLinkFormula = true;

    SCCOL nStartCol = aRange.aStart.Col();
    SCROW nStartRow = aRange.aStart.Row();
    SCCOL nEndCol = aRange.aEnd.Col();
    SCROW nEndRow = aRange.aEnd.Row();
    sal_uInt64 nOldPos = rStrm.Tell();
    rStrm.StartReadingUnicodeText( rStrm.GetStreamCharSet() );
    bool   bData = !bSingle;
    if( !bSingle)
        bOk = StartPaste();

    while( bOk )
    {
        OUString aLine;
        OUString aCell;
        SCROW nRow = nStartRow;
        rStrm.Seek( nOldPos );
        for( ;; )
        {
            rStrm.ReadUniOrByteStringLine( aLine, rStrm.GetStreamCharSet(), nArbitraryLineLengthLimit );
            // tdf#125440 When inserting tab separated string, consider quotes as field markers
            DoubledQuoteMode mode = aLine.indexOf("\t") >= 0 ? DoubledQuoteMode::ESCAPE : DoubledQuoteMode::KEEP_ALL;
            if( rStrm.eof() )
                break;
            SCCOL nCol = nStartCol;
            const sal_Unicode* p = aLine.getStr();
            while( *p )
            {
                aCell.clear();
                const sal_Unicode* q = p;
                if (*p == cStr)
                {
                    // Look for a pairing quote.
                    q = p = lcl_ScanString( p, aCell, pSeps, cStr, mode, bOverflowCell );
                }
                // All until next separator.
                while (*p && *p != cSep)
                    ++p;
                if (!lcl_appendLineData( aCell, q, p))
                    bOverflowCell = true;   // display warning on import
                if (*p)
                    ++p;
                if (rDoc.ValidCol(nCol) && rDoc.ValidRow(nRow) )
                {
                    if( bSingle )
                    {
                        if (nCol>nEndCol) nEndCol = nCol;
                        if (nRow>nEndRow) nEndRow = nRow;
                    }
                    if( bData && nCol <= nEndCol && nRow <= nEndRow )
                        rDoc.SetString( nCol, nRow, aRange.aStart.Tab(), aCell, &aSetStringParam );
                }
                else                            // too many columns/rows
                {
                    if (!rDoc.ValidRow(nRow))
                        bOverflowRow = true;    // display warning on import
                    if (!rDoc.ValidCol(nCol))
                        bOverflowCol = true;    // display warning on import
                }
                ++nCol;
            }
            ++nRow;
        }

        if( !bData )
        {
            aRange.aEnd.SetCol( nEndCol );
            aRange.aEnd.SetRow( nEndRow );
            bOk = StartPaste();
            bData = true;
        }
        else
            break;
    }

    EndPaste();
    if (bOk && mbImportBroadcast)
    {
        rDoc.BroadcastCells(aRange, SfxHintId::ScDataChanged);
        pDocSh->PostDataChanged();
    }

    return bOk;
}

//  Extended Ascii-Import

static bool lcl_PutString(
    ScDocumentImport& rDocImport, bool bUseDocImport,
    SCCOL nCol, SCROW nRow, SCTAB nTab, const OUString& rStr, sal_uInt8 nColFormat,
    SvNumberFormatter* pFormatter, bool bDetectNumFormat, bool bDetectSciNumFormat, bool bEvaluateFormulas, bool bSkipEmptyCells,
    const ::utl::TransliterationWrapper& rTransliteration, CalendarWrapper& rCalendar,
    const ::utl::TransliterationWrapper* pSecondTransliteration, CalendarWrapper* pSecondCalendar )
{
    ScDocument& rDoc = rDocImport.getDoc();
    bool bMultiLine = false;
    if ( nColFormat == SC_COL_SKIP || !rDoc.ValidCol(nCol) || !rDoc.ValidRow(nRow) )
        return bMultiLine;
    if ( rStr.isEmpty() )
    {
        if ( !bSkipEmptyCells )
        {   // delete destination cell
            if ( bUseDocImport )
                rDocImport.setAutoInput(ScAddress(nCol, nRow, nTab), rStr );
            else
                rDoc.SetString( nCol, nRow, nTab, rStr );
        }
        return false;
    }

    const bool bForceFormulaText = (!bEvaluateFormulas && rStr[0] == '=');
    if (nColFormat == SC_COL_TEXT || bForceFormulaText)
    {
        if ( bUseDocImport )
        {
            double fDummy;
            sal_uInt32 nIndex = 0;
            if (bForceFormulaText || rDoc.GetFormatTable()->IsNumberFormat(rStr, nIndex, fDummy))
            {
                // Set the format of this cell to Text.
                // This is only necessary for ScDocumentImport,
                // ScDocument::SetTextCell() forces it by ScSetStringParam.
                sal_uInt32 nFormat = rDoc.GetFormatTable()->GetStandardFormat(SvNumFormatType::TEXT);
                ScPatternAttr aNewAttrs(rDoc.getCellAttributeHelper());
                SfxItemSet& rSet = aNewAttrs.GetItemSet();
                rSet.Put( SfxUInt32Item(ATTR_VALUE_FORMAT, nFormat) );
                rDoc.ApplyPattern(nCol, nRow, nTab, aNewAttrs);
            }
            if (ScStringUtil::isMultiline(rStr))
            {
                ScFieldEditEngine& rEngine = rDoc.GetEditEngine();
                rEngine.SetTextCurrentDefaults(rStr);
                rDocImport.setEditCell(ScAddress(nCol, nRow, nTab), rEngine.CreateTextObject());
                return true;
            }
            else
            {
                rDocImport.setStringCell(ScAddress(nCol, nRow, nTab), rStr);
                return false;
            }
        }
        else
        {
            rDoc.SetTextCell(ScAddress(nCol, nRow, nTab), rStr);
            return bMultiLine;
        }
    }

    if ( nColFormat == SC_COL_ENGLISH )
    {
        //! SetString with Extra-Flag ???

        SvNumberFormatter* pDocFormatter = rDoc.GetFormatTable();
        sal_uInt32 nEnglish = pDocFormatter->GetStandardIndex(LANGUAGE_ENGLISH_US);
        double fVal;
        if ( pDocFormatter->IsNumberFormat( rStr, nEnglish, fVal ) )
        {
            // Numberformat will not be set to English
            if ( bUseDocImport )
                rDocImport.setNumericCell( ScAddress( nCol, nRow, nTab ), fVal );
            else
                rDoc.SetValue( nCol, nRow, nTab, fVal );
            return bMultiLine;
        }
        // else, continue with SetString
    }
    else if ( nColFormat != SC_COL_STANDARD ) // Datumformats
    {
        const sal_uInt16 nMaxNumberParts = 7; // Y-M-D h:m:s.t
        const sal_Int32 nLen = rStr.getLength();
        sal_Int32 nStart[nMaxNumberParts];
        sal_Int32 nEnd[nMaxNumberParts];

        bool bIso;
        sal_uInt16 nDP, nMP, nYP;
        switch ( nColFormat )
        {
            case SC_COL_YMD: nDP = 2; nMP = 1; nYP = 0; bIso = true; break;
            case SC_COL_MDY: nDP = 1; nMP = 0; nYP = 2; bIso = false; break;
            case SC_COL_DMY:
            default:         nDP = 0; nMP = 1; nYP = 2; bIso = false; break;
        }

        sal_uInt16 nFound = 0;
        bool bInNum = false;
        for (sal_Int32 nPos = 0; nPos < nLen && (bInNum || nFound < nMaxNumberParts); ++nPos)
        {
            bool bLetter = false;
            if (rtl::isAsciiDigit(rStr[nPos]) ||
                    (((!bInNum && nFound==nMP) || (bInNum && nFound==nMP+1))
                     && (bLetter = ScGlobal::getCharClass().isLetterNumeric( rStr, nPos))))
            {
                if (!bInNum)
                {
                    bInNum = true;
                    nStart[nFound] = nPos;
                    ++nFound;
                }
                nEnd[nFound-1] = nPos;
                if (bIso && (bLetter || (2 <= nFound && nFound <= 6 && nPos > nStart[nFound-1] + 1)))
                    // Each M,D,h,m,s at most 2 digits.
                    bIso = false;
            }
            else
            {
                bInNum = false;
                if (bIso)
                {
                    // ([+-])YYYY-MM-DD([T ]hh:mm(:ss(.fff)))(([+-])TZ)
                    // XXX NOTE: timezone is accepted here, but number
                    // formatter parser will not, so the end result will be
                    // type Text to preserve timezone information.
                    switch (rStr[nPos])
                    {
                        case '+':
                            if (nFound >= 5 && nPos == nEnd[nFound-1] + 1)
                                // Accept timezone offset.
                                ;
                            else if (nPos > 0)
                                // Accept one leading sign.
                                bIso = false;
                        break;
                        case '-':
                            if (nFound >= 5 && nPos == nEnd[nFound-1] + 1)
                                // Accept timezone offset.
                                ;
                            else if (nFound == 0 && nPos > 0)
                                // Accept one leading sign.
                                bIso = false;
                            else if (nFound < 1 || 2 < nFound || nPos != nEnd[nFound-1] + 1)
                                // Not immediately after 1 or 1-2
                                bIso = false;
                        break;
                        case 'T':
                        case ' ':
                            if (nFound != 3 || nPos != nEnd[nFound-1] + 1)
                                // Not immediately after 1-2-3
                                bIso = false;
                        break;
                        case ':':
                            if (nFound < 4 || 5 < nFound || nPos != nEnd[nFound-1] + 1)
                                // Not at 1-2-3T4:5:
                                bIso = false;
                        break;
                        case '.':
                        case ',':
                            if (nFound != 6 || nPos != nEnd[nFound-1] + 1)
                                // Not at 1-2-3T4:5:6.
                                bIso = false;
                        break;
                        case 'Z':
                            if (nFound >= 5 && nPos == nEnd[nFound-1] + 1)
                                // Accept Zero timezone.
                                ;
                            else
                                bIso = false;
                        break;
                        default:
                            bIso = false;
                    }
                }
            }
        }

        if (nFound < 3)
            bIso = false;

        if (bIso)
        {
            // Leave conversion and detection of various possible number
            // formats to the number formatter. ISO is recognized in any locale
            // so we can directly use the document's formatter.
            sal_uInt32 nFormat = 0;
            double fVal = 0.0;
            SvNumberFormatter* pDocFormatter = rDoc.GetFormatTable();
            if (pDocFormatter->IsNumberFormat( rStr, nFormat, fVal))
            {
                if (pDocFormatter->GetType(nFormat) & SvNumFormatType::DATE)
                {
                    ScAddress aPos(nCol,nRow,nTab);
                    if (bUseDocImport)
                        rDocImport.setNumericCell(aPos, fVal);
                    else
                        rDoc.SetValue(aPos, fVal);
                    rDoc.SetNumberFormat(aPos, nFormat);

                    return bMultiLine;     // success
                }
            }
            // If we reach here it is type Text (e.g. timezone or trailing
            // characters). Handled below.
        }

        if ( nFound == 1 )
        {
            //  try to break one number (without separators) into date fields

            sal_Int32 nDateStart = nStart[0];
            sal_Int32 nDateLen = nEnd[0] + 1 - nDateStart;

            if ( nDateLen >= 5 && nDateLen <= 8 &&
                    ScGlobal::getCharClass().isNumeric( rStr.copy( nDateStart, nDateLen ) ) )
            {
                //  6 digits: 2 each for day, month, year
                //  8 digits: 4 for year, 2 each for day and month
                //  5 or 7 digits: first field is shortened by 1

                bool bLongYear = ( nDateLen >= 7 );
                bool bShortFirst = ( nDateLen == 5 || nDateLen == 7 );

                sal_uInt16 nFieldStart = nDateStart;
                for (sal_uInt16 nPos=0; nPos<3; nPos++)
                {
                    sal_uInt16 nFieldEnd = nFieldStart + 1;     // default: 2 digits
                    if ( bLongYear && nPos == nYP )
                        nFieldEnd += 2;                     // 2 extra digits for long year
                    if ( bShortFirst && nPos == 0 )
                        --nFieldEnd;                        // first field shortened?

                    nStart[nPos] = nFieldStart;
                    nEnd[nPos]   = nFieldEnd;
                    nFieldStart  = nFieldEnd + 1;
                }
                nFound = 3;
            }
        }

        if (!bIso && nFound >= 3)
        {
            using namespace ::com::sun::star;
            bool bSecondCal = false;
            sal_uInt16 nDay  = static_cast<sal_uInt16>(o3tl::toInt32(rStr.subView( nStart[nDP], nEnd[nDP]+1-nStart[nDP] )));
            sal_uInt16 nYear = static_cast<sal_uInt16>(o3tl::toInt32(rStr.subView( nStart[nYP], nEnd[nYP]+1-nStart[nYP] )));
            OUString aMStr = rStr.copy( nStart[nMP], nEnd[nMP]+1-nStart[nMP] );
            sal_Int16 nMonth = static_cast<sal_Int16>(aMStr.toInt32());
            if (!nMonth)
            {
                static constexpr OUString aSepShortened = u"SEP"_ustr;
                uno::Sequence< i18n::CalendarItem2 > xMonths;
                sal_Int32 i, nMonthCount;
                //  first test all month names from local international
                xMonths = rCalendar.getMonths();
                nMonthCount = xMonths.getLength();
                for (i=0; i<nMonthCount && !nMonth; i++)
                {
                    if ( rTransliteration.isEqual( aMStr, xMonths[i].FullName ) ||
                         rTransliteration.isEqual( aMStr, xMonths[i].AbbrevName ) )
                        nMonth = sal::static_int_cast<sal_Int16>( i+1 );
                    else if ( i == 8 && rTransliteration.isEqual( u"SEPT"_ustr,
                                xMonths[i].AbbrevName ) &&
                            rTransliteration.isEqual( aMStr, aSepShortened ) )
                    {   // correct English abbreviation is SEPT,
                        // but data mostly contains SEP only
                        nMonth = sal::static_int_cast<sal_Int16>( i+1 );
                    }
                }
                //  if none found, then test english month names
                if ( !nMonth && pSecondCalendar && pSecondTransliteration )
                {
                    xMonths = pSecondCalendar->getMonths();
                    nMonthCount = xMonths.getLength();
                    for (i=0; i<nMonthCount && !nMonth; i++)
                    {
                        if ( pSecondTransliteration->isEqual( aMStr, xMonths[i].FullName ) ||
                             pSecondTransliteration->isEqual( aMStr, xMonths[i].AbbrevName ) )
                        {
                            nMonth = sal::static_int_cast<sal_Int16>( i+1 );
                            bSecondCal = true;
                        }
                        else if ( i == 8 && pSecondTransliteration->isEqual(
                                    aMStr, aSepShortened ) )
                        {   // correct English abbreviation is SEPT,
                            // but data mostly contains SEP only
                            nMonth = sal::static_int_cast<sal_Int16>( i+1 );
                            bSecondCal = true;
                        }
                    }
                }
            }

            SvNumberFormatter* pDocFormatter = rDoc.GetFormatTable();
            if ( nYear < 100 )
                nYear = pDocFormatter->ExpandTwoDigitYear( nYear );

            CalendarWrapper* pCalendar = (bSecondCal ? pSecondCalendar : &rCalendar);
            sal_Int16 nNumMonths = pCalendar->getNumberOfMonthsInYear();
            if ( nDay && nMonth && nDay<=31 && nMonth<=nNumMonths )
            {
                --nMonth;
                pCalendar->setValue( i18n::CalendarFieldIndex::DAY_OF_MONTH, nDay );
                pCalendar->setValue( i18n::CalendarFieldIndex::MONTH, nMonth );
                pCalendar->setValue( i18n::CalendarFieldIndex::YEAR, nYear );
                sal_Int16 nHour, nMinute, nSecond;
                // #i14974# The imported value should have no fractional value, so set the
                // time fields to zero (ICU calendar instance defaults to current date/time)
                nHour = nMinute = nSecond = 0;
                if (nFound > 3)
                    nHour = static_cast<sal_Int16>(o3tl::toInt32(rStr.subView( nStart[3], nEnd[3]+1-nStart[3])));
                if (nFound > 4)
                    nMinute = static_cast<sal_Int16>(o3tl::toInt32(rStr.subView( nStart[4], nEnd[4]+1-nStart[4])));
                if (nFound > 5)
                    nSecond = static_cast<sal_Int16>(o3tl::toInt32(rStr.subView( nStart[5], nEnd[5]+1-nStart[5])));
                // do not use calendar's milliseconds, to avoid fractional part truncation
                double fFrac = 0.0;
                if (nFound > 6)
                {
                    sal_Unicode cDec = '.';
                    OUString aT = OUStringChar(cDec) + rStr.subView( nStart[6], nEnd[6]+1-nStart[6]);
                    rtl_math_ConversionStatus eStatus;
                    double fV = rtl::math::stringToDouble( aT, cDec, 0, &eStatus );
                    if (eStatus == rtl_math_ConversionStatus_Ok)
                        fFrac = fV / 86400.0;
                }
                sal_Int32 nPos;
                if (nFound > 3 && 1 <= nHour && nHour <= 12  // nHour 0 and >=13 can't be AM/PM
                        && (nPos = nEnd[nFound-1] + 1) < nLen)
                {
                    // Dreaded AM/PM may be following.
                    while (nPos < nLen && rStr[nPos] == ' ')
                        ++nPos;
                    if (nPos < nLen)
                    {
                        sal_Int32 nStop = nPos;
                        while (nStop < nLen && rStr[nStop] != ' ')
                            ++nStop;
                        OUString aAmPm = rStr.copy( nPos, nStop - nPos);
                        // For AM only 12 needs to be treated, whereas for PM
                        // it must not. Check both, locale and second/English
                        // strings.
                        if (nHour == 12 &&
                                (rTransliteration.isEqual( aAmPm, pFormatter->GetLocaleData()->getTimeAM()) ||
                                 (pSecondTransliteration && pSecondTransliteration->isEqual( aAmPm, u"AM"_ustr))))
                        {
                            nHour = 0;
                        }
                        else if (nHour < 12 &&
                                (rTransliteration.isEqual( aAmPm, pFormatter->GetLocaleData()->getTimePM()) ||
                                 (pSecondTransliteration && pSecondTransliteration->isEqual( aAmPm, u"PM"_ustr))))
                        {
                            nHour += 12;
                        }
                    }
                }
                pCalendar->setValue( i18n::CalendarFieldIndex::HOUR, nHour );
                pCalendar->setValue( i18n::CalendarFieldIndex::MINUTE, nMinute );
                pCalendar->setValue( i18n::CalendarFieldIndex::SECOND, nSecond );
                pCalendar->setValue( i18n::CalendarFieldIndex::MILLISECOND, 0 );
                if ( pCalendar->isValid() )
                {
                    // Whole days diff.
                    double fDiff = DateTime::Sub( DateTime(pDocFormatter->GetNullDate()),
                            pCalendar->getEpochStart());
                    // #i14974# must use getLocalDateTime to get the same
                    // date values as set above
                    double fDays = pCalendar->getLocalDateTime() + fFrac;
                    fDays -= fDiff;

                    LanguageType eLatin, eCjk, eCtl;
                    rDoc.GetLanguage( eLatin, eCjk, eCtl );
                    LanguageType eDocLang = eLatin;     //! which language for date formats?

                    SvNumFormatType nType = (nFound > 3 ? SvNumFormatType::DATETIME : SvNumFormatType::DATE);
                    sal_uLong nFormat = pDocFormatter->GetStandardFormat( nType, eDocLang );
                    // maybe there is a special format including seconds or milliseconds
                    if (nFound > 5)
                        nFormat = pDocFormatter->GetStandardFormat( fDays, nFormat, nType, eDocLang);

                    ScAddress aPos(nCol,nRow,nTab);
                    if ( bUseDocImport )
                        rDocImport.setNumericCell(aPos, fDays);
                    else
                        rDoc.SetValue( aPos, fDays );
                    rDoc.SetNumberFormat(aPos, nFormat);

                    return bMultiLine;     // success
                }
            }
        }
    }

    // Standard or date not determined -> SetString / EditCell
    if( rStr.indexOf( '\n' ) == -1 )
    {
        if (!bDetectNumFormat && nColFormat == SC_COL_STANDARD)
        {
            // Import a strict ISO 8601 date(+time) string even without
            // "Detect special numbers" or "Date (YMD)".
            do
            {
                // Simple pre-check before calling more expensive parser.
                // ([+-])(Y)YYYY-MM-DD
                if (rStr.getLength() < 10)
                    break;
                const sal_Int32 n1 = rStr.indexOf('-', 1);
                if (n1 < 4)
                    break;
                const sal_Int32 n2 = rStr.indexOf('-', n1 + 1);
                if (n2 < 7 || n1 + 3 < n2)
                    break;

                css::util::DateTime aDateTime;
                if (!sax::Converter::parseDateTime( aDateTime, rStr))
                    break;

                sal_uInt32 nFormat = 0;
                double fVal = 0.0;
                SvNumberFormatter* pDocFormatter = rDoc.GetFormatTable();
                if (pDocFormatter->IsNumberFormat( rStr, nFormat, fVal))
                {
                    if (pDocFormatter->GetType(nFormat) & SvNumFormatType::DATE)
                    {
                        ScAddress aPos(nCol,nRow,nTab);
                        if (bUseDocImport)
                            rDocImport.setNumericCell(aPos, fVal);
                        else
                            rDoc.SetValue(aPos, fVal);
                        rDoc.SetNumberFormat(aPos, nFormat);

                        return bMultiLine;     // success
                    }
                }
            }
            while(false);
        }

        ScSetStringParam aParam;
        aParam.mpNumFormatter = pFormatter;
        aParam.mbDetectNumberFormat = bDetectNumFormat;
        aParam.mbDetectScientificNumberFormat = bDetectSciNumFormat;
        aParam.meSetTextNumFormat = ScSetStringParam::SpecialNumberOnly;
        aParam.mbHandleApostrophe = false;
        aParam.mbCheckLinkFormula = true;
        if ( bUseDocImport )
            rDocImport.setAutoInput(ScAddress(nCol, nRow, nTab), rStr, &aParam);
        else
            rDoc.SetString( nCol, nRow, nTab, rStr, &aParam );
    }
    else
    {
        bMultiLine = true;
        ScFieldEditEngine& rEngine = rDoc.GetEditEngine();
        rEngine.SetTextCurrentDefaults(rStr);
        if ( bUseDocImport )
            rDocImport.setEditCell(ScAddress(nCol, nRow, nTab), rEngine.CreateTextObject());
        else
            rDoc.SetEditText( ScAddress( nCol, nRow, nTab ), rEngine.CreateTextObject() );
    }
    return bMultiLine;
}

static OUString lcl_GetFixed( const OUString& rLine, sal_Int32 nStart, sal_Int32 nNext,
                     bool& rbIsQuoted, bool& rbOverflowCell )
{
    sal_Int32 nLen = rLine.getLength();
    if (nNext > nLen)
        nNext = nLen;
    if ( nNext <= nStart )
        return OUString();

    const sal_Unicode* pStr = rLine.getStr();

    sal_Int32 nSpace = nNext;
    while ( nSpace > nStart && pStr[nSpace-1] == ' ' )
        --nSpace;

    rbIsQuoted = (pStr[nStart] == '"' && pStr[nSpace-1] == '"');
    if (rbIsQuoted)
    {
        bool bFits = (nSpace - nStart - 3 <= nArbitraryCellLengthLimit);
        if (bFits)
            return rLine.copy(nStart+1, std::max< sal_Int32 >(0, nSpace-nStart-2));
        else
        {
            SAL_WARN( "sc", "lcl_GetFixed: line doesn't fit into data");
            rbOverflowCell = true;
            return rLine.copy(nStart+1, nArbitraryCellLengthLimit);
        }
    }
    else
    {
        bool bFits = (nSpace - nStart <= nArbitraryCellLengthLimit);
        if (bFits)
            return rLine.copy(nStart, nSpace-nStart);
        else
        {
            SAL_WARN( "sc", "lcl_GetFixed: line doesn't fit into data");
            rbOverflowCell = true;
            return rLine.copy(nStart, nArbitraryCellLengthLimit);
        }
    }
}

bool ScImportExport::ExtText2Doc( SvStream& rStrm )
{
    if (!pExtOptions)
        return Text2Doc( rStrm );

    sal_uInt64 const nOldPos = rStrm.Tell();
    sal_uInt64 const nRemaining = rStrm.remainingSize();
    std::unique_ptr<ScProgress> xProgress( new ScProgress( pDocSh,
            ScResId( STR_LOAD_DOC ), nRemaining, true ));
    rStrm.StartReadingUnicodeText( rStrm.GetStreamCharSet() );
    // tdf#82254 - check whether to include a byte-order-mark in the output
    if (nOldPos != rStrm.Tell())
        mbIncludeBOM = true;

    SCCOL nStartCol = aRange.aStart.Col();
    SCCOL nEndCol = aRange.aEnd.Col();
    SCROW nStartRow = aRange.aStart.Row();
    const SCTAB nTab = aRange.aStart.Tab();

    bool    bFixed              = pExtOptions->IsFixedLen();
    OUString aSeps              = pExtOptions->GetFieldSeps();  // Need non-const for ReadCsvLine(),
    const sal_Unicode* pSeps    = aSeps.getStr();               // but it will be const anyway (asserted below).
    bool    bMerge              = pExtOptions->IsMergeSeps();
    bool    bRemoveSpace        = pExtOptions->IsRemoveSpace();
    sal_uInt16  nInfoCount      = pExtOptions->GetInfoCount();
    const sal_Int32* pColStart  = pExtOptions->GetColStart();
    const sal_uInt8* pColFormat = pExtOptions->GetColFormat();
    tools::Long nSkipLines             = pExtOptions->GetStartRow();

    LanguageType eDocLang = pExtOptions->GetLanguage();
    SvNumberFormatter aNumFormatter( comphelper::getProcessComponentContext(), eDocLang);
    bool bDetectNumFormat = pExtOptions->IsDetectSpecialNumber();
    bool bDetectSciNumFormat = pExtOptions->IsDetectScientificNumber();
    bool bEvaluateFormulas = pExtOptions->IsEvaluateFormulas();
    bool bSkipEmptyCells = pExtOptions->IsSkipEmptyCells();

    // For date recognition
    ::utl::TransliterationWrapper aTransliteration(
        comphelper::getProcessComponentContext(), TransliterationFlags::IGNORE_CASE );
    aTransliteration.loadModuleIfNeeded( eDocLang );
    CalendarWrapper aCalendar( comphelper::getProcessComponentContext() );
    aCalendar.loadDefaultCalendar(
        LanguageTag::convertToLocale( eDocLang ) );
    std::unique_ptr< ::utl::TransliterationWrapper > pEnglishTransliteration;
    std::unique_ptr< CalendarWrapper > pEnglishCalendar;
    if ( eDocLang != LANGUAGE_ENGLISH_US )
    {
        pEnglishTransliteration.reset(new ::utl::TransliterationWrapper (
            comphelper::getProcessComponentContext(), TransliterationFlags::IGNORE_CASE ));
        aTransliteration.loadModuleIfNeeded( LANGUAGE_ENGLISH_US );
        pEnglishCalendar.reset(new CalendarWrapper ( comphelper::getProcessComponentContext() ));
        pEnglishCalendar->loadDefaultCalendar(
            LanguageTag::convertToLocale( LANGUAGE_ENGLISH_US ) );
    }

    OUString aLine;
    OUString aCell;
    sal_uInt16 i;
    SCROW nRow = nStartRow;
    sal_Unicode cDetectSep = 0xffff;    // No separator detection here.

    while(--nSkipLines>0)
    {
        aLine = ReadCsvLine(rStrm, !bFixed, aSeps, cStr, cDetectSep); // content is ignored
        if ( rStrm.eof() )
            break;
    }

    // Determine range for Undo.
    // We don't need this during import of a file to a new sheet or document...
    bool bDetermineRange = bUndo;
    bool bColumnsAreDetermined = false;

    // Row heights don't need to be adjusted on the fly if EndPaste() is called
    // afterwards, which happens only if bDetermineRange. This variable also
    // survives the toggle of bDetermineRange down at the end of the do{} loop.
    bool bRangeIsDetermined = bDetermineRange;

    bool bQuotedAsText = pExtOptions && pExtOptions->IsQuotedAsText();

    sal_uInt64 nOriginalStreamPos = rStrm.Tell();

    SCROW nFirstUpdateRowHeight = SCROW_MAX;
    SCROW nLastUpdateRowHeight = -1;

    ScDocumentImport aDocImport(rDoc);
    do
    {
        const SCCOL nLastCol = nEndCol; // tdf#129701 preserve value of nEndCol
        for( ;; )
        {
            aLine = ReadCsvLine(rStrm, !bFixed, aSeps, cStr, cDetectSep);
            if ( rStrm.eof() && aLine.isEmpty() )
                break;

            assert(pSeps == aSeps.getStr());

            if ( nRow > rDoc.MaxRow() )
            {
                bOverflowRow = true;    // display warning on import
                break;  // for
            }

            if (!bDetermineRange)
                EmbeddedNullTreatment( aLine);

            sal_Int32 nLineLen = aLine.getLength();
            SCCOL nCol = nStartCol;
            bool bMultiLine = false;
            if ( bFixed ) //  Fixed line length
            {
                if (bDetermineRange)
                {
                    if (!bColumnsAreDetermined)
                    {
                        // Yes, the check is nCol<=rDoc.MaxCol()+1, +1 because it
                        // is only an overflow if there is really data following to
                        // be put behind the last column, which doesn't happen if
                        // info is SC_COL_SKIP.
                        for (i=0; i < nInfoCount && nCol <= rDoc.MaxCol()+1; ++i)
                        {
                            const sal_uInt8 nFmt = pColFormat[i];
                            if (nFmt != SC_COL_SKIP)        // otherwise don't increment nCol either
                            {
                                if (nCol > rDoc.MaxCol())
                                    bOverflowCol = true;    // display warning on import
                                ++nCol;
                            }
                        }
                        bColumnsAreDetermined = true;
                    }
                }
                else
                {
                    sal_Int32 nStartIdx = 0;
                    // Same maxcol+1 check reason as above.
                    for (i=0; i < nInfoCount && nCol <= rDoc.MaxCol()+1; ++i)
                    {
                        sal_Int32 nNextIdx = nStartIdx;
                        if (i + 1 < nInfoCount)
                            CountVisualWidth( aLine, nNextIdx, pColStart[i+1] - pColStart[i] );
                        else
                            nNextIdx = nLineLen;
                        sal_uInt8 nFmt = pColFormat[i];
                        if (nFmt != SC_COL_SKIP)        // otherwise don't increment nCol either
                        {
                            if (nCol > rDoc.MaxCol())
                                bOverflowCol = true;    // display warning on import
                            else
                            {
                                bool bIsQuoted = false;
                                aCell = lcl_GetFixed( aLine, nStartIdx, nNextIdx, bIsQuoted, bOverflowCell );
                                if (bIsQuoted && bQuotedAsText)
                                    nFmt = SC_COL_TEXT;

                                bMultiLine |= lcl_PutString(
                                        aDocImport, !mbOverwriting, nCol, nRow, nTab, aCell, nFmt,
                                        &aNumFormatter, bDetectNumFormat, bDetectSciNumFormat, bEvaluateFormulas, bSkipEmptyCells,
                                        aTransliteration, aCalendar,
                                        pEnglishTransliteration.get(), pEnglishCalendar.get());
                            }
                            ++nCol;
                        }
                        nStartIdx = nNextIdx;
                    }
                }
            }
            else // Search for the separator
            {
                SCCOL nSourceCol = 0;
                sal_uInt16 nInfoStart = 0;
                const sal_Unicode* p = aLine.getStr();
                // tdf#129701 if there is only one column, and user wants to treat empty cells,
                // we need to detect *p = null
                bool bIsLastColEmpty = !(*p) && !bSkipEmptyCells && !bDetermineRange;
                // Yes, the check is nCol<=rDoc.MaxCol()+1, +1 because it is only an
                // overflow if there is really data following to be put behind
                // the last column, which doesn't happen if info is
                // SC_COL_SKIP.
                while ( (*p || bIsLastColEmpty) && nCol <= rDoc.MaxCol()+1)
                {
                    bool bIsQuoted = false;
                    p = ScImportExport::ScanNextFieldFromString( p, aCell,
                            cStr, pSeps, bMerge, bIsQuoted, bOverflowCell, bRemoveSpace );

                    sal_uInt8 nFmt = SC_COL_STANDARD;
                    for ( i=nInfoStart; i<nInfoCount; i++ )
                    {
                        if ( pColStart[i] == nSourceCol + 1 )       // pColStart is 1-based
                        {
                            nFmt = pColFormat[i];
                            nInfoStart = i + 1;     // ColInfos are in succession
                            break;  // for
                        }
                    }
                    if ( nFmt != SC_COL_SKIP )
                    {
                        if (nCol > rDoc.MaxCol())
                            bOverflowCol = true;    // display warning on import
                        else if (!bDetermineRange)
                        {
                            if (bIsQuoted && bQuotedAsText)
                                nFmt = SC_COL_TEXT;

                            bMultiLine |= lcl_PutString(
                                aDocImport, !mbOverwriting, nCol, nRow, nTab, aCell, nFmt,
                                &aNumFormatter, bDetectNumFormat, bDetectSciNumFormat, bEvaluateFormulas, bSkipEmptyCells,
--> --------------------

--> maximum size reached

--> --------------------

Messung V0.5

¤ Dauer der Verarbeitung: 0.57 Sekunden ¤

Wurzel

Suchen

Beweissystem der NASA

Beweissystem Isabelle

NIST Cobol Testsuite

Cephes Mathematical Library

Wiener Entwicklungsmethode

Haftungshinweis

Die Informationen auf dieser Webseite wurden nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit, noch Qualität der bereit gestellten Informationen zugesichert.

Bemerkung:

Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.