Quelle scanner.cxx Sprache: C

/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
/*
* This file is part of the LibreOffice project.
*
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/.
*
* This file incorporates work covered by the following license notice:
*
*   Licensed to the Apache Software Foundation (ASF) under one or more
*   contributor license agreements. See the NOTICE file distributed
*   with this work for additional information regarding copyright
*   ownership. The ASF licenses this file to you under the Apache
*   License, Version 2.0 (the "License"); you may not use this file
*   except in compliance with the License. You may obtain a copy of
*   the License at http://www.apache.org/licenses/LICENSE-2.0 .
*/

#include <basiccharclass.hxx>
#include <scanner.hxx>
#include <sbintern.hxx>
#include <runtime.hxx>

#include <basic/sberrors.hxx>
#include <i18nlangtag/lang.h>
#include <svl/numformat.hxx>
#include <svl/zforlist.hxx>
#include <rtl/character.hxx>
#include <o3tl/string_view.hxx>
#include <utility>

SbiScanner::SbiScanner(OUString _aBuf, StarBASIC* p)
    : aBuf(std::move(_aBuf))
    , nLineIdx(-1)
    , nSaveLineIdx(-1)
    , pBasic(p)
    , eScanType(SbxVARIANT)
    , nVal(0)
    , nSavedCol1(0)
    , nCol(0)
    , nErrors(0)
    , nColLock(0)
    , nBufPos(0)
    , nLine(0)
    , nCol1(0)
    , nCol2(0)
    , bSymbol(false)
    , bNumber(false)
    , bSpaces(false)
    , bAbort(false)
    , bHash(true)
    , bError(false)
    , bCompatible(false)
    , bVBASupportOn(false)
    , bPrevLineExtentsComment(false)
    , bClosingUnderscore(false)
    , bLineEndsWithWhitespace(false)
    , bInStatement(false)
{
}

void SbiScanner::LockColumn()
{
    if( !nColLock++ )
        nSavedCol1 = nCol1;
}

void SbiScanner::UnlockColumn()
{
    if( nColLock )
        nColLock--;
}

void SbiScanner::GenError( ErrCode code )
{
    if( GetSbData()->bBlockCompilerError )
    {
        bAbort = true;
        return;
    }
    if( !bError )
    {
        bool bRes = true;
        // report only one error per statement
        bError = true;
        if( pBasic )
        {
            // in case of EXPECTED or UNEXPECTED it always refers
            // to the last token, so take the Col1 over
            sal_Int32 nc = nColLock ? nSavedCol1 : nCol1;
            if ( code.anyOf(
                    ERRCODE_BASIC_EXPECTED,
                    ERRCODE_BASIC_UNEXPECTED,
                    ERRCODE_BASIC_SYMBOL_EXPECTED,
                    ERRCODE_BASIC_LABEL_EXPECTED) )
            {
                    nc = nCol1;
                    if( nc > nCol2 ) nCol2 = nc;
            }
            bRes = pBasic->CError( code, aError, nLine, nc, nCol2 );
        }
        bAbort = bAbort || !bRes  || ( code == ERRCODE_BASIC_NO_MEMORY || code == ERRCODE_BASIC_PROG_TOO_LARGE );
    }
    nErrors++;
}

// used by SbiTokenizer::MayBeLabel() to detect a label
bool SbiScanner::DoesColonFollow()
{
    if(nCol < aLine.getLength() && aLine[nCol] == ':')
    {
        ++nLineIdx; ++nCol;
        return true;
    }
    else
        return false;
}

// test for legal suffix
static SbxDataType GetSuffixType( sal_Unicode c )
{
    switch (c)
    {
    case '%':
        return SbxINTEGER;
    case '&':
        return SbxLONG;
    case '!':
        return SbxSINGLE;
    case '#':
        return SbxDOUBLE;
    case '@':
        return SbxCURRENCY;
    case '$':
        return SbxSTRING;
    default:
        return SbxVARIANT;
    }
}

// reading the next symbol into the variables aSym, nVal and eType
// return value is sal_False at EOF or errors
#define BUF_SIZE 80

void SbiScanner::scanAlphanumeric()
{
    sal_Int32 n = nCol;
    while(nCol < aLine.getLength() && (BasicCharClass::isAlphaNumeric(aLine[nCol], bCompatible) || aLine[nCol] == '_'))
    {
        ++nLineIdx;
        ++nCol;
    }
    aSym = aLine.copy(n, nCol - n);
}

void SbiScanner::scanGoto()
{
    sal_Int32 n = nCol;
    while(n < aLine.getLength() && BasicCharClass::isWhitespace(aLine[n]))
        ++n;

    if(n + 1 < aLine.getLength())
    {
        std::u16string_view aTemp = aLine.subView(n, 2);
        if(o3tl::equalsIgnoreAsciiCase(aTemp, u"to"))
        {
            aSym = "goto";
            nLineIdx += n + 2 - nCol;
            nCol = n + 2;
        }
    }
}

bool SbiScanner::readLine()
{
    if(nBufPos >= aBuf.getLength())
        return false;

    sal_Int32 n = nBufPos;
    sal_Int32 nLen = aBuf.getLength();

    while(n < nLen && aBuf[n] != '\r' && aBuf[n] != '\n')
        ++n;

    // Trim trailing whitespace
    sal_Int32 nEnd = n;
    while(nBufPos < nEnd && BasicCharClass::isWhitespace(aBuf[nEnd - 1]))
        --nEnd;

    // tdf#149402 - check if line ends with a whitespace
    bLineEndsWithWhitespace = (n > nEnd);
    aLine = aBuf.copy(nBufPos, nEnd - nBufPos);

    // Fast-forward past the line ending
    if(n + 1 < nLen && aBuf[n] == '\r' && aBuf[n + 1] == '\n')
        n += 2;
    else if(n < nLen)
        ++n;

    nBufPos = n;
    nLineIdx = 0;

    ++nLine;
    nCol = nCol1 = nCol2 = 0;
    nColLock = 0;

    return true;
}

// Function to check if a string is a valid compiler directive
static bool isValidCompilerDirective(std::u16string_view directive)
{
    static constexpr std::string_view validDirectives[]
        = { "if", "elseif", "else", "end", "const" };

    return std::any_of(std::begin(validDirectives), std::end(validDirectives),
                       [&directive](const auto& valid)
                       { return o3tl::matchIgnoreAsciiCase(directive, valid); });
}

bool SbiScanner::NextSym()
{
    // memorize for the EOLN-case
    sal_Int32 nOldLine = nLine;
    sal_Int32 nOldCol1 = nCol1;
    sal_Int32 nOldCol2 = nCol2;
    sal_Unicode buf[ BUF_SIZE ], *p = buf;

    eScanType = SbxVARIANT;
    aSym.clear();
    bHash = bSymbol = bNumber = bSpaces = false;

    // read in line?
    if (nLineIdx == -1)
    {
        if(!readLine())
            return false;

        nOldLine = nLine;
        nOldCol1 = nOldCol2 = 0;
    }

    const sal_Int32 nLineIdxScanStart = nLineIdx;

    if(nCol < aLine.getLength() && BasicCharClass::isWhitespace(aLine[nCol]))
    {
        bSpaces = true;
        while(nCol < aLine.getLength() && BasicCharClass::isWhitespace(aLine[nCol]))
        {
            ++nLineIdx;
            ++nCol;
        }
    }

    nCol1 = nCol;

    // only blank line?
    if(nCol >= aLine.getLength())
        goto eoln;

    if( bPrevLineExtentsComment )
        goto PrevLineCommentLbl;

    if(nCol < aLine.getLength() && aLine[nCol] == '#')
    {
        sal_Int32 nLineTempIdx = nLineIdx;
        std::u16string_view candidate(aLine.subView(nCol + 1));

        do
        {
            nLineTempIdx++;
        } while (nLineTempIdx < aLine.getLength() && !BasicCharClass::isWhitespace(aLine[nLineTempIdx])
            && aLine[nLineTempIdx] != '#' && aLine[nLineTempIdx] != ',');
        // leave it if it is a date literal - it will be handled later
        if (nLineTempIdx >= aLine.getLength() || aLine[nLineTempIdx] != '#')
        {
            ++nLineIdx;
            ++nCol;
            //handle compiler directives (# is first non-space character)
            if (nOldCol2 == 0)
            {
                if (isValidCompilerDirective(candidate))
                {
                    // Skip the whole line if starts with a hash and is a valid compiler directive
                    nCol = 0;
                    goto eoln;
                }
                else
                {
                    GenError(ERRCODE_BASIC_SYNTAX);
                }
            }
            else
                bHash = true;
        }
    }

    // copy character if symbol
    if(nCol < aLine.getLength() && (BasicCharClass::isAlpha(aLine[nCol], bCompatible) || aLine[nCol] == '_'))
    {
        // if there's nothing behind '_' , it's the end of a line!
        if(nCol + 1 == aLine.getLength() && aLine[nCol] == '_')
        {
            // Note that nCol is not incremented here...
            ++nLineIdx;
            goto eoln;
        }

        bSymbol = true;

        scanAlphanumeric();

        // Special handling for "go to"
        if(nCol < aLine.getLength() && bCompatible && aSym.equalsIgnoreAsciiCase("go"))
            scanGoto();

        // tdf#125637 - check for closing underscore
        if (nCol == aLine.getLength() && aLine[nCol - 1] == '_')
        {
            bClosingUnderscore = true;
        }
        // type recognition?
        // don't test the exclamation mark
        // if there's a symbol behind it
        else if((nCol >= aLine.getLength() || aLine[nCol] != '!') ||
                (nCol + 1 >= aLine.getLength() || !BasicCharClass::isAlpha(aLine[nCol + 1], bCompatible)))
        {
            if(nCol < aLine.getLength())
            {
                SbxDataType t(GetSuffixType(aLine[nCol]));
                if( t != SbxVARIANT )
                {
                    eScanType = t;
                    ++nLineIdx;
                    ++nCol;
                }
            }
        }
    }

    // read in and convert if number
    else if((nCol < aLine.getLength() && rtl::isAsciiDigit(aLine[nCol])) ||
            (nCol + 1 < aLine.getLength() && aLine[nCol] == '.' && rtl::isAsciiDigit(aLine[nCol + 1])))
    {
        short exp = 0;
        short dec = 0;
        eScanType = SbxDOUBLE;
        bool bScanError = false;
        bool bBufOverflow = false;
        // All this because of 'D' or 'd' floating point type, sigh...
        while(!bScanError && nCol < aLine.getLength() && strchr("0123456789.DEde", aLine[nCol]))
        {
            // from 4.1.1996: buffer full? -> go on scanning empty
            if( (p-buf) == (BUF_SIZE-1) )
            {
                bBufOverflow = true;
                ++nLineIdx;
                ++nCol;
                continue;
            }
            // point or exponent?
            if(aLine[nCol] == '.')
            {
                if( ++dec > 1 )
                    bScanError = true;
                else
                    *p++ = '.';
            }
            else if(strchr("DdEe", aLine[nCol]))
            {
                if (++exp > 1)
                    bScanError = true;
                else
                {
                    *p++ = 'E';
                    if (nCol + 1 < aLine.getLength() && (aLine[nCol+1] == '+' || aLine[nCol+1] == '-'))
                    {
                        ++nLineIdx;
                        ++nCol;
                        if( (p-buf) == (BUF_SIZE-1) )
                        {
                            bBufOverflow = true;
                            continue;
                        }
                        *p++ = aLine[nCol];
                    }
                }
            }
            else
            {
                *p++ = aLine[nCol];
            }
            ++nLineIdx;
            ++nCol;
        }
        *p = 0;
        aSym = p; bNumber = true;

        // For bad characters, scan and parse errors generate only one error.
        ErrCode nError = ERRCODE_NONE;
        if (bScanError)
        {
            --nLineIdx;
            --nCol;
            aError = OUString( aLine[nCol]);
            nError = ERRCODE_BASIC_BAD_CHAR_IN_NUMBER;
        }

        rtl_math_ConversionStatus eStatus = rtl_math_ConversionStatus_Ok;
        const sal_Unicode* pParseEnd = buf;
        nVal = rtl_math_uStringToDouble( buf, buf+(p-buf), '.', ',', &eStatus, &pParseEnd );
        if (pParseEnd != buf+(p-buf))
        {
            // e.g. "12e" or "12e+", or with bScanError "12d"+"E".
            sal_Int32 nChars = buf+(p-buf) - pParseEnd;
            nLineIdx -= nChars;
            nCol -= nChars;
            // For bScanError, nLineIdx and nCol were already decremented, just
            // add that character to the parse end.
            if (bScanError)
                ++nChars;
            // Copy error position from original string, not the buffer
            // replacement where "12dE" => "12EE".
            aError = aLine.copy( nCol, nChars);
            nError = ERRCODE_BASIC_BAD_CHAR_IN_NUMBER;
        }
        else if (eStatus != rtl_math_ConversionStatus_Ok)
        {
            // Keep the scan error and character at position, if any.
            if (!nError)
                nError = ERRCODE_BASIC_MATH_OVERFLOW;
        }

        if (nError)
            GenError( nError );

        if( !dec && !exp )
        {
            if( nVal >= SbxMININT && nVal <= SbxMAXINT )
                eScanType = SbxINTEGER;
            else if( nVal >= SbxMINLNG && nVal <= SbxMAXLNG )
                    eScanType = SbxLONG;
        }

        if( bBufOverflow )
            GenError( ERRCODE_BASIC_MATH_OVERFLOW );

        // type recognition?
        if( nCol < aLine.getLength() )
        {
            SbxDataType t(GetSuffixType(aLine[nCol]));
            if( t != SbxVARIANT )
            {
                eScanType = t;
                ++nLineIdx;
                ++nCol;
            }
            // tdf#130476 - don't allow String trailing data type character with numbers
            if ( t == SbxSTRING )
            {
                GenError( ERRCODE_BASIC_SYNTAX );
            }
        }
    }

    // Hex/octal number? Read in and convert:
    else if(aLine.getLength() - nCol > 1 && aLine[nCol] == '&')
    {
        ++nLineIdx; ++nCol;
        sal_Unicode base = 16;
        sal_Unicode xch  = aLine[nCol];
        ++nLineIdx; ++nCol;
        switch( rtl::toAsciiUpperCase( xch ) )
        {
            case 'O':
                base = 8;
                break;
            case 'H':
                break;
            default :
                // treated as an operator
                --nLineIdx; --nCol; nCol1 = nCol-1;
                aSym = "&";
                return true;
        }
        bNumber = true;
        // Hex literals are signed Integers ( as defined by basic
        // e.g. -2,147,483,648 through 2,147,483,647 (signed)
        sal_uInt64 lu = 0;
        bool bOverflow = false;
        while(nCol < aLine.getLength() && BasicCharClass::isAlphaNumeric(aLine[nCol], false))
        {
            sal_Unicode ch = rtl::toAsciiUpperCase(aLine[nCol]);
            ++nLineIdx; ++nCol;
            if( ((base == 16 ) && rtl::isAsciiHexDigit( ch ) ) ||
                     ((base == 8) && rtl::isAsciiOctalDigit( ch )))
            {
                int i = ch  - '0';
                if( i > 9 ) i -= 7;
                lu = ( lu * base ) + i;
                if( lu > SAL_MAX_UINT32 )
                {
                    bOverflow = true;
                }
            }
            else
            {
                aError = OUString(ch);
                GenError( ERRCODE_BASIC_BAD_CHAR_IN_NUMBER );
            }
        }

        // tdf#130476 - take into account trailing data type characters
        if( nCol < aLine.getLength() )
        {
            SbxDataType t(GetSuffixType(aLine[nCol]));
            if( t != SbxVARIANT )
            {
                eScanType = t;
                ++nLineIdx;
                ++nCol;
            }
            // tdf#130476 - don't allow String trailing data type character with numbers
            if ( t == SbxSTRING )
            {
                GenError( ERRCODE_BASIC_SYNTAX );
            }
        }

        // tdf#130476 - take into account trailing data type characters
        switch ( eScanType )
        {
            case SbxINTEGER:
                nVal = static_cast<double>( static_cast<sal_Int16>(lu) );
                if ( lu > SbxMAXUINT )
                {
                    bOverflow = true;
                }
                break;
            case SbxLONG: nVal = static_cast<double>( static_cast<sal_Int32>(lu) ); break;
            case SbxVARIANT:
            {
                // tdf#62326 - If the value of the hex string without explicit type character lies within
                // the range of 0x8000 (SbxMAXINT + 1) and 0xFFFF (SbxMAXUINT) inclusive, cast the value
                // to 16 bit in order to get signed integers, e.g., SbxMININT through SbxMAXINT
                sal_Int32 ls = (lu > SbxMAXINT && lu <= SbxMAXUINT) ? static_cast<sal_Int16>(lu) : static_cast<sal_Int32>(lu);
                eScanType = ( ls >= SbxMININT && ls <= SbxMAXINT ) ? SbxINTEGER : SbxLONG;
                nVal = static_cast<double>(ls);
                break;
            }
            default:
                nVal = static_cast<double>(lu);
                break;
        }
        if( bOverflow )
            GenError( ERRCODE_BASIC_MATH_OVERFLOW );
    }

    // Strings:
    else if (nLineIdx < aLine.getLength() && (aLine[nLineIdx] == '"' || aLine[nLineIdx] == '['))
    {
        sal_Unicode cSep = aLine[nLineIdx];
        if( cSep == '[' )
        {
            bSymbol = true;
            cSep = ']';
        }
        sal_Int32 n = nCol + 1;
        while (nLineIdx < aLine.getLength())
        {
            do
            {
                nLineIdx++;
                nCol++;
            }
            while (nLineIdx < aLine.getLength() && (aLine[nLineIdx] != cSep));
            if (nLineIdx < aLine.getLength() && aLine[nLineIdx] == cSep)
            {
                nLineIdx++; nCol++;
                if (nLineIdx >= aLine.getLength() || aLine[nLineIdx] != cSep || cSep == ']')
                {
                    // If VBA Interop then doesn't eat the [] chars
                    if ( cSep == ']' && bVBASupportOn )
                        aSym = aLine.copy( n - 1, nCol - n  + 1);
                    else
                        aSym = aLine.copy( n, nCol - n - 1 );
                    // get out duplicate string delimiters
                    OUStringBuffer aSymBuf(aSym.getLength());
                    for ( sal_Int32 i = 0, len = aSym.getLength(); i < len; ++i )
                    {
                        aSymBuf.append( aSym[i] );
                        if ( aSym[i] == cSep && ( i+1 < len ) && aSym[i+1] == cSep )
                            ++i;
                    }
                    aSym = aSymBuf.makeStringAndClear();
                    if( cSep != ']' )
                        eScanType = SbxSTRING;
                    break;
                }
            }
            else
            {
                aError = OUString(cSep);
                GenError( ERRCODE_BASIC_EXPECTED );
            }
        }
    }

    // Date:
    else if (nLineIdx < aLine.getLength() && aLine[nLineIdx] == '#')
    {
        sal_Int32 n = nCol + 1;
        do
        {
            nLineIdx++;
            nCol++;
        }
        while (nLineIdx < aLine.getLength() && (aLine[nLineIdx] != '#'));
        if (nLineIdx < aLine.getLength() && aLine[nLineIdx] == '#')
        {
            nLineIdx++; nCol++;
            aSym = aLine.copy( n, nCol - n - 1 );

            // parse date literal
            std::shared_ptr<SvNumberFormatter> pFormatter;
            if (GetSbData()->pInst)
            {
                pFormatter = GetSbData()->pInst->GetNumberFormatter();
            }
            else
            {
                sal_uInt32 nDummy;
                pFormatter = SbiInstance::PrepareNumberFormatter( nDummy, nDummy, nDummy );
            }
            sal_uInt32 nIndex = pFormatter->GetStandardIndex( LANGUAGE_ENGLISH_US);
            bool bSuccess = pFormatter->IsNumberFormat(aSym, nIndex, nVal);
            if( bSuccess )
            {
                SvNumFormatType nType_ = pFormatter->GetType(nIndex);
                if( !(nType_ & SvNumFormatType::DATE) )
                    bSuccess = false;
            }

            if (!bSuccess)
                GenError( ERRCODE_BASIC_CONVERSION );

            bNumber = true;
            eScanType = SbxDATE;
        }
        else
        {
            aError = OUString('#');
            GenError( ERRCODE_BASIC_EXPECTED );
        }
    }
    // invalid characters:
    else if (nLineIdx < aLine.getLength() && aLine[nLineIdx] >= 0x7F)
    {
        GenError( ERRCODE_BASIC_SYNTAX ); nLineIdx++; nCol++;
    }
    // other groups:
    else
    {
        sal_Int32 n = 1;
        auto nChar = nLineIdx < aLine.getLength() ? aLine[nLineIdx] : 0;
        ++nLineIdx;
        if (nLineIdx < aLine.getLength())
        {
            switch (nChar)
            {
                case '<': if( aLine[nLineIdx] == '>' || aLine[nLineIdx] == '=' ) n = 2; break;
                case '>': if( aLine[nLineIdx] == '=' ) n = 2; break;
                case ':': if( aLine[nLineIdx] == '=' ) n = 2; break;
            }
        }
        aSym = aLine.copy(nCol, std::min(n, aLine.getLength() - nCol));
        nLineIdx += n-1; nCol = nCol + n;
    }

    nCol2 = nCol-1;

PrevLineCommentLbl:

    if (bPrevLineExtentsComment ||
        (eScanType != SbxSTRING &&
        (aSym.startsWith("'") || aSym.equalsIgnoreAsciiCase("REM") || aSym.startsWith("#"))))
    {
        bPrevLineExtentsComment = false;
        aSym = "REM";
        sal_Int32 nLen = aLine.getLength() - nLineIdx;
        // tdf#149402 - don't extend comment if line ends in a whitespace (BasicCharClass::isWhitespace)
        if (bCompatible && !bLineEndsWithWhitespace && aLine[nLineIdx + nLen - 1] == '_'
            && aLine[nLineIdx + nLen - 2] == ' ')
            bPrevLineExtentsComment = true;
        nCol2 = nCol2 + nLen;
        nLineIdx = -1;
    }

    if (nLineIdx == nLineIdxScanStart)
    {
        GenError( ERRCODE_BASIC_SYMBOL_EXPECTED );
        return false;
    }

    return true;

eoln:
    if (nCol && aLine[--nLineIdx] == '_' && !bClosingUnderscore)
    {
        nLineIdx = -1;
        bool bRes = NextSym();
        if( aSym.startsWith(".") )
        {
            // object _
            //    .Method
            // ^^^  <- spaces is legal in MSO VBA
            bSpaces = false;
        }
        return bRes;
    }
    else
    {
        nLineIdx = -1;
        nLine = nOldLine;
        nCol1 = nOldCol1;
        nCol2 = nOldCol2;
        aSym = "\n";
        nColLock = 0;
        bClosingUnderscore = false;
        // tdf#149157 - break multiline continuation in a comment after a new line
        bPrevLineExtentsComment = false;
        return true;
    }
}

/* vim:set shiftwidth=4 softtabstop=4 expandtab: */

Messung V0.5

¤ Dauer der Verarbeitung: 0.15 Sekunden (vorverarbeitet) ¤

Wurzel

Suchen

Beweissystem der NASA

Beweissystem Isabelle

NIST Cobol Testsuite

Cephes Mathematical Library

Wiener Entwicklungsmethode

Haftungshinweis

Die Informationen auf dieser Webseite wurden nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit, noch Qualität der bereit gestellten Informationen zugesichert.

Bemerkung:

Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.