/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ /* * This file is part of the LibreOffice project. * * This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. * * This file incorporates work covered by the following license notice: * * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed * with this work for additional information regarding copyright * ownership. The ASF licenses this file to you under the Apache * License, Version 2.0 (the "License"); you may not use this file * except in compliance with the License. You may obtain a copy of * the License at http://www.apache.org/licenses/LICENSE-2.0 .
*/
namespace
{ bool isAlpha(sal_Unicode c)
{ if (rtl::isAsciiAlpha(c)) returntrue; return u_isalpha(c);
}
}
class SyntaxHighlighter::Tokenizer
{ // Character information tables
CharFlags aCharTypeTab[256] = {};
// Auxiliary function: testing of the character flags bool testCharFlags(sal_Unicode c, CharFlags nTestFlags) const;
// Get new token, EmptyString == nothing more over there bool getNextToken(std::u16string_view::const_iterator& pos, std::u16string_view::const_iterator end, /*out*/TokenType& reType, /*out*/std::u16string_view::const_iterator& rpStartPos, /*out*/std::u16string_view::const_iterator& rpEndPos) const;
if( aByteStr == "rem" )
{ // Remove all characters until end of line or EOF for (;;)
{ if (pos == end) break;
sal_Unicode cPeek = *pos; if ( testCharFlags( cPeek, CharFlags::EOL ) ) break;
++pos;
}
reType = TokenType::Comment;
}
}
}
}
}
// Operator? // only for BASIC '\'' should be a comment, otherwise it is a normal string and handled there elseif ( testCharFlags( c, CharFlags::Operator ) || ( (c == '\'') && (aLanguage==HighlighterLanguage::Basic)) )
{ // parameters for SQL view if (((c==':') || (c=='?')) && (aLanguage == HighlighterLanguage::SQL))
{ if (c!='?')
{ bool bIdentifierChar; do
{ // Get next character if (pos == end) break;
c = *pos;
bIdentifierChar = isAlpha(c); if( bIdentifierChar )
++pos;
} while( bIdentifierChar );
}
reType = TokenType::Parameter;
} elseif ((c=='-') && (aLanguage == HighlighterLanguage::SQL))
{ if (pos != end && *pos=='-')
{ // Remove all characters until end of line or EOF while( pos != end && !testCharFlags( *pos, CharFlags::EOL ) )
{
++pos;
}
reType = TokenType::Comment;
} else
reType = TokenType::Operator;
} elseif ((c=='/') && (aLanguage == HighlighterLanguage::SQL))
{ if (pos != end && *pos=='/')
{ // Remove all characters until end of line or EOF while( pos != end && !testCharFlags( *pos, CharFlags::EOL ) )
{
++pos;
}
reType = TokenType::Comment;
} else
reType = TokenType::Operator;
} else
{ // Apostrophe is Basic comment if (( c == '\'') && (aLanguage == HighlighterLanguage::Basic))
{ // Skip all characters until end of input or end of line: for (;;) { if (pos == end) break;
c = *pos; if (testCharFlags(c, CharFlags::EOL)) { break;
}
++pos;
}
reType = TokenType::Comment;
}
// The real operator; can be easily used since not the actual // operator (e.g. +=) is concerned, but the fact that it is one if( reType != TokenType::Comment )
{
reType = TokenType::Operator;
}
}
}
// Object separator? Must be handled before Number elseif( c == '.' && ( pos == end || *pos < '0' || *pos > '9' ) )
{
reType = TokenType::Operator;
}
// Number system, 10 = normal, it is changed for Oct/Hex int nRadix = 10;
// Is it an Oct or a Hex number? if( c == '&' )
{ // Octal? if( pos != end && (*pos == 'o' || *pos == 'O' ))
{ // remove o
++pos;
nRadix = 8; // Octal base
// Read all numbers while( pos != end && testCharFlags( *pos, CharFlags::InOctNumber ) )
++pos;
} // Hexadecimal? elseif( pos != end && (*pos == 'h' || *pos == 'H' ))
{ // remove x
++pos;
nRadix = 16; // Hexadecimal base
// When it is not Oct or Hex, then it is double if( reType == TokenType::Number && nRadix == 10 )
{ // Flag if the last character is an exponent bool bAfterExpChar = false;
// Read all numbers while( pos != end && (testCharFlags( *pos, CharFlags::InNumber ) ||
(bAfterExpChar && *pos == '+' ) ||
(bAfterExpChar && *pos == '-' ) )) // After exponent +/- are OK, too
{
c = *pos++;
bAfterExpChar = ( c == 'e' || c == 'E' );
}
}
}
// String? elseif( testCharFlags( c, CharFlags::StartString ) )
{ // Remember which character has opened the string
sal_Unicode cEndString = c; if( c == '[' )
cEndString = ']';
// Read all characters while( pos == end || *pos != cEndString )
{ // Detect EOF before reading next char, so we do not lose EOF if( pos == end )
{ // ERROR: unterminated string literal
reType = TokenType::Error; break;
}
c = *pos++; if( testCharFlags( c, CharFlags::EOL ) )
{ // ERROR: unterminated string literal
reType = TokenType::Error; break;
}
}
// End of line? elseif( testCharFlags( c, CharFlags::EOL ) )
{ // If another EOL character comes, read it if (pos != end)
{
sal_Unicode cNext = *pos; if( cNext != c && testCharFlags( cNext, CharFlags::EOL ) )
++pos;
}
reType = TokenType::EOL;
}
// All other will remain TokenType::Unknown
// Save end position
rpEndPos = pos; returntrue;
}
SyntaxHighlighter::Tokenizer::Tokenizer( HighlighterLanguage aLang ): aLanguage(aLang)
{ // Fill character table
sal_uInt16 i;
// Allowed characters for identifiers
CharFlags nHelpMask = CharFlags::StartIdentifier | CharFlags::InIdentifier; for( i = 'a' ; i <= 'z' ; i++ )
aCharTypeTab[i] |= nHelpMask; for( i = 'A' ; i <= 'Z' ; i++ )
aCharTypeTab[i] |= nHelpMask;
aCharTypeTab[int('_')] |= nHelpMask;
aCharTypeTab[int('$')] |= nHelpMask;
// Digit (can be identifier and number)
nHelpMask = CharFlags::InIdentifier | CharFlags::StartNumber |
CharFlags::InNumber | CharFlags::InHexNumber; for( i = '0' ; i <= '9' ; i++ )
aCharTypeTab[i] |= nHelpMask;
// Add e, E, . and & here manually
aCharTypeTab[int('e')] |= CharFlags::InNumber;
aCharTypeTab[int('E')] |= CharFlags::InNumber;
aCharTypeTab[int('.')] |= CharFlags::InNumber | CharFlags::StartNumber;
aCharTypeTab[int('&')] |= CharFlags::StartNumber;
// Hexadecimal digit for( i = 'a' ; i <= 'f' ; i++ )
aCharTypeTab[i] |= CharFlags::InHexNumber; for( i = 'A' ; i <= 'F' ; i++ )
aCharTypeTab[i] |= CharFlags::InHexNumber;
// Octal digit for( i = '0' ; i <= '7' ; i++ )
aCharTypeTab[i] |= CharFlags::InOctNumber;
// Space
aCharTypeTab[int(' ') ] |= CharFlags::Space;
aCharTypeTab[int('\t')] |= CharFlags::Space;
// End of line characters
aCharTypeTab[int('\r')] |= CharFlags::EOL;
aCharTypeTab[int('\n')] |= CharFlags::EOL;
ppListKeyWords = nullptr;
nKeyWordCount = 0;
}
void SyntaxHighlighter::Tokenizer::getHighlightPortions(std::u16string_view rLine, /*out*/std::vector<HighlightPortion>& portions) const
{ // Set the position to the beginning of the source string auto pos = rLine.begin();
// Variables for the out parameter
TokenType eType;
std::u16string_view::const_iterator pStartPos;
std::u16string_view::const_iterator pEndPos;
// Loop over all the tokens while( getNextToken( pos, rLine.end(), eType, pStartPos, pEndPos ) )
{
portions.emplace_back(
pStartPos - rLine.begin(), pEndPos - rLine.begin(), eType);
}
}
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.