/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ /* * This file is part of the LibreOffice project. * * This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. * * This file incorporates work covered by the following license notice: * * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed * with this work for additional information regarding copyright * ownership. The ASF licenses this file to you under the Apache * License, Version 2.0 (the "License"); you may not use this file * except in compliance with the License. You may obtain a copy of * the License at http://www.apache.org/licenses/LICENSE-2.0 .
*/
// First character may be any alphabetic const sal_Int32 coStartFlags = KParseTokens::ANY_LETTER | KParseTokens::IGNORE_LEADING_WS;
// Continuing characters may be any alphabetic const sal_Int32 coContFlags = (coStartFlags & ~KParseTokens::IGNORE_LEADING_WS)
| KParseTokens::TWO_DOUBLE_QUOTES_BREAK_STRING; // First character for numbers, may be any numeric or dot const sal_Int32 coNumStartFlags
= KParseTokens::ASC_DIGIT | KParseTokens::ASC_DOT | KParseTokens::IGNORE_LEADING_WS; // Continuing characters for numbers, may be any numeric or dot or comma. // tdf#127873: additionally accept ',' comma group separator as too many // existing documents unwittingly may have used that as decimal separator // in such locales (though it never was as this is always the en-US locale // and the group separator is only parsed away). const sal_Int32 coNumContFlags = (coNumStartFlags & ~KParseTokens::IGNORE_LEADING_WS)
| KParseTokens::GROUP_SEPARATOR_IN_NUMBER; // First character for numbers hexadecimal const sal_Int32 coNum16StartFlags
= KParseTokens::ASC_DIGIT | KParseTokens::ASC_UPALPHA | KParseTokens::IGNORE_LEADING_WS;
// Continuing characters for numbers hexadecimal const sal_Int32 coNum16ContFlags = (coNum16StartFlags & ~KParseTokens::IGNORE_LEADING_WS); // user-defined char continuing characters may be any alphanumeric or dot. const sal_Int32 coUserDefinedCharContFlags = KParseTokens::ANY_LETTER_OR_NUMBER
| KParseTokens::ASC_DOT
| KParseTokens::TWO_DOUBLE_QUOTES_BREAK_STRING;
//Checks if keyword is in the list. staticinlinebool findCompare(const SmTokenTableEntry& lhs, const OUString& s)
{ return s.compareToIgnoreAsciiCase(lhs.aIdent) > 0;
}
//Returns the SmTokenTableEntry for a keyword const SmTokenTableEntry* GetTokenTableEntry(const OUString& rName)
{ if (rName.isEmpty()) return nullptr; //avoid null pointer exceptions //Looks for the first keyword after or equal to rName in alphabetical order. auto findIter
= std::lower_bound(std::begin(aTokenTable), std::end(aTokenTable), rName, findCompare); if (findIter != std::end(aTokenTable) && rName.equalsIgnoreAsciiCase(findIter->aIdent)) return &*findIter; //check is equal return nullptr; //not found
}
OUString encloseOrEscapeLiteral(const OUString& string, bool force)
{ if (force) return"\"" + string + "\"";
OUStringBuffer result; const std::unordered_set<sal_Unicode> DelimiterTable1{ //keeping " as first entry is important to not get into recursive replacement ' ', '\t', '\n', '\r', '+', '-', '*', '/', '=', '^', '_', '#', '%', '>', '<', '&', '|', '~', '`'
}; const std::unordered_set<sal_Unicode> DelimiterTable2{ //keeping " as first entry is important to not get into recursive replacement '{', '}', '(', ')', '[', ']',
}; for (sal_Int32 i = 0; i < string.getLength(); i++)
{ if (string[i] == '"')
result.append("\"\\\"\""); elseif (DelimiterTable1.find(string[i]) != DelimiterTable1.end())
result.append("\"" + OUStringChar(string[i]) + "\""); elseif (DelimiterTable2.find(string[i]) != DelimiterTable2.end())
result.append("\\" + OUStringChar(string[i])); else
result.append(string[i]);
}
OUString resultString = result.makeStringAndClear(); const SmTokenTableEntry* tkn = GetTokenTableEntry(resultString); // excluding function and operator as they take arguments and can't treat them as literal or else arguments are not displayed correctly if (tkn && tkn->nGroup != TG::Function && tkn->nGroup != TG::Oper)
{
resultString = "\"" + resultString + "\"";
} return resultString;
}
staticbool IsDelimiter(const OUString& rTxt, sal_Int32 nPos)
{ // returns 'true' iff cChar is '\0' or a delimiter
assert(nPos <= rTxt.getLength()); //index out of range if (nPos == rTxt.getLength()) returntrue; //This is EOF
sal_Unicode cChar = rTxt[nPos];
// check if 'cChar' is in the delimiter table static constexpr sal_Unicode aDelimiterTable[] = { ' ', '{', '}', '(', ')', '\t', '\n', '\r', '+', '-', '*', '/', '=', '[', ']', '^', '_', '#', '%', '>', '<', '&', '|', '\\', '"', '~', '`'
}; //reordered by usage (by eye) for nanoseconds saving.
//checks the array for (autoconst& cDelimiter : aDelimiterTable)
{ if (cDelimiter == cChar) returntrue;
}
// checks number used as arguments in Math formulas (e.g. 'size' command) // Format: no negative numbers, must start with a digit, no exponent notation, ... staticbool lcl_IsNumber(const OUString& rText)
{ bool bPoint = false; const sal_Unicode* pBuffer = rText.getStr(); for (sal_Int32 nPos = 0; nPos < rText.getLength(); nPos++, pBuffer++)
{ const sal_Unicode cChar = *pBuffer; if (cChar == '.')
{ if (bPoint) returnfalse; else
bPoint = true;
} elseif (!rtl::isAsciiDigit(cChar)) returnfalse;
} returntrue;
} // checks number used as arguments in Math formulas (e.g. 'size' command) // Format: no negative numbers, must start with a digit, no exponent notation, ... staticbool lcl_IsNotWholeNumber(const OUString& rText)
{ const sal_Unicode* pBuffer = rText.getStr(); for (sal_Int32 nPos = 0; nPos < rText.getLength(); nPos++, pBuffer++) if (!rtl::isAsciiDigit(*pBuffer)) returntrue; returnfalse;
} // checks hex number used as arguments in Math formulas (e.g. 'hex' command) // Format: no negative numbers, must start with a digit, no exponent notation, ... staticbool lcl_IsNotWholeNumber16(const OUString& rText)
{ const sal_Unicode* pBuffer = rText.getStr(); for (sal_Int32 nPos = 0; nPos < rText.getLength(); nPos++, pBuffer++) if (!rtl::isAsciiCanonicHexDigit(*pBuffer)) returntrue; returnfalse;
}
void SmParser5::NextToken() //Central part of the parser
{
sal_Int32 nBufLen = m_aBufferString.getLength();
ParseResult aRes;
sal_Int32 nRealStart; bool bCont; do
{ // skip white spaces while (UnicodeType::SPACE_SEPARATOR == m_pSysCC->getType(m_aBufferString, m_nBufferIndex))
++m_nBufferIndex;
// Try to parse a number in a locale-independent manner using // '.' as decimal separator. // See https://bz.apache.org/ooo/show_bug.cgi?id=45779
aRes
= m_aNumCC.parsePredefinedToken(KParseType::ASC_NUMBER, m_aBufferString, m_nBufferIndex,
coNumStartFlags, u""_ustr, coNumContFlags, u""_ustr);
if (aRes.TokenType == 0)
{ // Try again with the default token parsing.
aRes = m_pSysCC->parseAnyToken(m_aBufferString, m_nBufferIndex, coStartFlags, u""_ustr,
coContFlags, u""_ustr);
}
// default setting for the case that no identifier // i.e. a valid symbol-name is following the '%' // character
m_aCurToken.eType = TTEXT;
m_aCurToken.cMathChar = u""_ustr;
m_aCurToken.nGroup = TG::NONE;
m_aCurToken.nLevel = 5;
m_aCurToken.aText = "%";
if (aTmpRes.TokenType & KParseType::IDENTNAME)
{
sal_Int32 n = aTmpRes.EndPos - nTmpStart;
m_aCurToken.eType = TSPECIAL;
m_aCurToken.aText = m_aBufferString.copy(nTmpStart - 1, n + 1);
rnEndPos = nRealStart + 2;
} else
{
m_aCurToken.eType = TMINUS;
m_aCurToken.setChar(MS_MINUS);
m_aCurToken.nGroup = TG::UnOper | TG::Sum;
m_aCurToken.nLevel = 5;
m_aCurToken.aText = "-";
}
} break; case'.':
{ // Only one character? Then it can't be a number. if (m_nBufferIndex < m_aBufferString.getLength() - 1)
{ // for compatibility with SO5.2 // texts like .34 ...56 ... h ...78..90 // will be treated as numbers
m_aCurToken.eType = TNUMBER;
m_aCurToken.cMathChar = u""_ustr;
m_aCurToken.nGroup = TG::NONE;
m_aCurToken.nLevel = 5;
sal_Int32 nTxtStart = m_nBufferIndex;
sal_Unicode cChar; // if the equation ends with dot(.) then increment m_nBufferIndex till end of string only do
{
cChar = m_aBufferString[++m_nBufferIndex];
} while ((cChar == '.' || rtl::isAsciiDigit(cChar))
&& (m_nBufferIndex < m_aBufferString.getLength() - 1));
// tdf#129372: we may have to deal with surrogate pairs // (see https://en.wikipedia.org/wiki/Universal_Character_Set_characters#Surrogates) // in this case, we must read 2 sal_Unicode instead of 1 int nOffset(rtl::isSurrogate(m_aBufferString[nRealStart]) ? 2 : 1);
m_aCurToken.aText = m_aBufferString.copy(nRealStart, nOffset);
std::unique_ptr<SmNode> SmParser5::DoAlign(bool bUseExtraSpaces) // parse alignment info (if any), then go on with rest of expression
{
DepthProtect aDepthGuard(m_nParseDepth);
std::unique_ptr<SmStructureNode> xSNode;
if (TokenInGroup(TG::Align))
{
xSNode.reset(new SmAlignNode(m_aCurToken));
xSNode->SetSelection(m_aCurESelection);
NextToken();
// allow for just one align statement in 5.0 if (TokenInGroup(TG::Align)) return DoError(SmParseError::DoubleAlign);
}
// start with single expression that may have an alignment statement // (and go on with expressions that must not have alignment // statements in 'while' loop below. See also 'Expression()'.) if (m_aCurToken.eType != TEND && m_aCurToken.eType != TNEWLINE)
ExpressionArray.push_back(DoAlign());
while (m_aCurToken.eType != TEND && m_aCurToken.eType != TNEWLINE)
ExpressionArray.push_back(DoExpression());
//If there's no expression, add an empty one. //this is to avoid a formula tree without any caret //positions, in visual formula editor. if (ExpressionArray.empty())
{
SmToken aTok;
aTok.eType = TNEWLINE;
ExpressionArray.emplace_back(std::unique_ptr<SmNode>(new SmExpressionNode(aTok)));
}
auto xSNode = std::make_unique<SmLineNode>(m_aCurToken);
xSNode->SetSelection(m_aCurESelection);
xSNode->SetSubNodes(buildNodeArray(ExpressionArray)); return xSNode;
}
std::vector<std::unique_ptr<SmNode>> RelationArray;
RelationArray.push_back(DoRelation()); while (m_aCurToken.nLevel >= 4)
RelationArray.push_back(DoRelation());
if (RelationArray.size() > 1)
{
std::unique_ptr<SmExpressionNode> xSNode(new SmExpressionNode(m_aCurToken));
xSNode->SetSubNodes(buildNodeArray(RelationArray));
xSNode->SetUseExtraSpaces(bUseExtraSpaces); return xSNode;
} else
{ // This expression has only one node so just push this node. return std::move(RelationArray[0]);
}
}
while (TokenInGroup(TG::Product))
{ //this linear loop builds a recursive structure, if it gets //too deep then later processing, e.g. releasing the tree, //can exhaust stack if (m_nParseDepth + nDepthLimit > DEPTH_LIMIT) throw std::range_error("parser depth limit");
case TBOPER:
xSNode.reset(new SmBinHorNode(m_aCurToken));
NextToken();
//Let the glyph node know it's a binary operation
m_aCurToken.eType = TBOPER;
m_aCurToken.nGroup = TG::Product;
xOper = DoGlyphSpecial(); break;
case TOVERBRACE: case TUNDERBRACE:
xSNode.reset(new SmVerticalBraceNode(m_aCurToken));
xSNode->SetSelection(m_aCurESelection);
xOper.reset(new SmMathSymbolNode(m_aCurToken));
xOper->SetSelection(m_aCurESelection);
NextToken(); break;
case TWIDEBACKSLASH: case TWIDESLASH:
{
SmBinDiagonalNode* pSTmp = new SmBinDiagonalNode(m_aCurToken);
pSTmp->SetAscending(eType == TWIDESLASH);
xSNode.reset(pSTmp);
std::unique_ptr<SmSubSupNode> pNode(new SmSubSupNode(m_aCurToken));
pNode->SetSelection(m_aCurESelection); //! Of course 'm_aCurToken' is just the first sub-/supscript token. //! It should be of no further interest. The positions of the //! sub-/supscripts will be identified by the corresponding subnodes //! index in the 'aSubNodes' array (enum value from 'SmSubSup').
// process all sub-/supscripts int nIndex = 0; while (TokenInGroup(nActiveGroup))
{
SmTokenType eType(m_aCurToken.eType);
switch (eType)
{ case TRSUB:
nIndex = static_cast<int>(RSUB); break; case TRSUP:
nIndex = static_cast<int>(RSUP); break; case TFROM: case TCSUB:
nIndex = static_cast<int>(CSUB); break; case TTO: case TCSUP:
nIndex = static_cast<int>(CSUP); break; case TLSUB:
nIndex = static_cast<int>(LSUB); break; case TLSUP:
nIndex = static_cast<int>(LSUP); break; default:
SAL_WARN("starmath", "unknown case");
}
nIndex++;
assert(1 <= nIndex && nIndex <= SUBSUP_NUM_ENTRIES);
std::unique_ptr<SmNode> xENode; if (aSubNodes[nIndex]) // if already occupied at earlier iteration
{ // forget the earlier one, remember an error instead
aSubNodes[nIndex].reset();
xENode = DoError(SmParseError::DoubleSubsupscript); // this also skips current token.
} else
{ // skip sub-/supscript token
NextToken();
}
// get sub-/supscript node // (even when we saw a double-sub/supscript error in the above // in order to minimize mess and continue parsing.)
std::unique_ptr<SmNode> xSNode; if (eType == TFROM || eType == TTO)
{ // parse limits in old 4.0 and 5.0 style
xSNode = DoRelation();
} else
xSNode = DoTerm(true);
std::unique_ptr<SmNode> xENode; if (aSubNodes[nIndex]) // if already occupied at earlier iteration
{ // forget the earlier one, remember an error instead
aSubNodes[nIndex].reset();
xENode = DoError(SmParseError::DoubleSubsupscript); // this also skips current token.
} else
NextToken(); // skip sub-/supscript token
// get sub-/supscript node
std::unique_ptr<SmNode> xSNode;
xSNode = DoTerm(true);
// get operator symbol auto xNode = std::make_unique<SmMathSymbolNode>(m_aCurToken);
xNode->SetSelection(m_aCurESelection); // skip operator token
NextToken(); // get sub- supscripts if any if (m_aCurToken.nGroup == TG::Power) return DoSubSup(TG::Power, std::move(xNode)); return xNode;
}
switch (m_aCurToken.eType)
{ case TESCAPE: return DoEscape();
case TNOSPACE: case TLGROUP:
{ bool bNoSpace = m_aCurToken.eType == TNOSPACE; if (bNoSpace)
NextToken(); if (m_aCurToken.eType != TLGROUP) return DoTerm(false); // nospace is no longer concerned
--> --------------------
--> maximum size reached
--> --------------------
Messung V0.5
¤ Dauer der Verarbeitung: 0.80 Sekunden
(vorverarbeitet)
¤
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.