/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ /* * This file is part of the LibreOffice project. * * This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. * * This file incorporates work covered by the following license notice: * * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed * with this work for additional information regarding copyright * ownership. The ASF licenses this file to you under the Apache * License, Version 2.0 (the "License"); you may not use this file * except in compliance with the License. You may obtain a copy of * the License at http://www.apache.org/licenses/LICENSE-2.0 .
*/
// First character may be any alphabetic const sal_Int32 coStartFlags = KParseTokens::ANY_LETTER | KParseTokens::IGNORE_LEADING_WS;
// Continuing characters may be any alphabetic const sal_Int32 coContFlags = (coStartFlags & ~KParseTokens::IGNORE_LEADING_WS)
| KParseTokens::TWO_DOUBLE_QUOTES_BREAK_STRING; // First character for numbers, may be any numeric or dot const sal_Int32 coNumStartFlags
= KParseTokens::ASC_DIGIT | KParseTokens::ASC_DOT | KParseTokens::IGNORE_LEADING_WS; // Continuing characters for numbers, may be any numeric or dot or comma. // tdf#127873: additionally accept ',' comma group separator as too many // existing documents unwittingly may have used that as decimal separator // in such locales (though it never was as this is always the en-US locale // and the group separator is only parsed away). const sal_Int32 coNumContFlags = (coNumStartFlags & ~KParseTokens::IGNORE_LEADING_WS)
| KParseTokens::GROUP_SEPARATOR_IN_NUMBER; // First character for numbers hexadecimal const sal_Int32 coNum16StartFlags
= KParseTokens::ASC_DIGIT | KParseTokens::ASC_UPALPHA | KParseTokens::IGNORE_LEADING_WS;
// Continuing characters for numbers hexadecimal const sal_Int32 coNum16ContFlags = (coNum16StartFlags & ~KParseTokens::IGNORE_LEADING_WS); // user-defined char continuing characters may be any alphanumeric or dot. const sal_Int32 coUserDefinedCharContFlags = KParseTokens::ANY_LETTER_OR_NUMBER
| KParseTokens::ASC_DOT
| KParseTokens::TWO_DOUBLE_QUOTES_BREAK_STRING;
//Checks if keyword is in the list. staticinlinebool findCompare(const SmTokenTableEntry& lhs, const OUString& s)
{ return s.compareToIgnoreAsciiCase(lhs.aIdent) > 0;
}
//Returns the SmTokenTableEntry for a keyword const SmTokenTableEntry* GetTokenTableEntry(const OUString& rName)
{ if (rName.isEmpty()) return nullptr; //avoid null pointer exceptions //Looks for the first keyword after or equal to rName in alphabetical order. auto findIter
= std::lower_bound(std::begin(aTokenTable), std::end(aTokenTable), rName, findCompare); if (findIter != std::end(aTokenTable) && rName.equalsIgnoreAsciiCase(findIter->aIdent)) return &*findIter; //check is equal return nullptr; //not found
}
OUString encloseOrEscapeLiteral(const OUString& string, bool force)
{ if (force) return"\"" + string + "\"";
OUStringBuffer result; const std::unordered_set<sal_Unicode> DelimiterTable1{ //keeping " as first entry is important to not get into recursive replacement ' ', '\t', '\n', '\r', '+', '-', '*', '/', '=', '^', '_', '#', '%', '>', '<', '&', '|', '~', '`'
}; const std::unordered_set<sal_Unicode> DelimiterTable2{ //keeping " as first entry is important to not get into recursive replacement '{', '}', '(', ')', '[', ']',
}; for (sal_Int32 i = 0; i < string.getLength(); i++)
{ if (string[i] == '"')
result.append("\"\\\"\""); elseif (DelimiterTable1.find(string[i]) != DelimiterTable1.end())
result.append("\"" + OUStringChar(string[i]) + "\""); elseif (DelimiterTable2.find(string[i]) != DelimiterTable2.end())
result.append("\\" + OUStringChar(string[i])); else
result.append(string[i]);
}
OUString resultString = result.makeStringAndClear(); const SmTokenTableEntry* tkn = GetTokenTableEntry(resultString); // excluding function and operator as they take arguments and can't treat them as literal or else arguments are not displayed correctly if (tkn && tkn->nGroup != TG::Function && tkn->nGroup != TG::Oper)
{
resultString = "\"" + resultString + "\"";
} return resultString;
}
staticbool IsDelimiter(const OUString& rTxt, sal_Int32 nPos)
{ // returns 'true' iff cChar is '\0' or a delimiter
assert(nPos <= rTxt.getLength()); //index out of range if (nPos == rTxt.getLength()) returntrue; //This is EOF
sal_Unicode cChar = rTxt[nPos];
// check if 'cChar' is in the delimiter table static constexpr sal_Unicode aDelimiterTable[] = { ' ', '{', '}', '(', ')', '\t', '\n', '\r', '+', '-', '*', '/', '=', '[', ']', '^', '_', '#', '%', '>', '<', '&', '|', '\\', '"', '~', '`'
}; //reordered by usage (by eye) for nanoseconds saving.
//checks the array for (autoconst& cDelimiter : aDelimiterTable)
{ if (cDelimiter == cChar) returntrue;
}
// checks number used as arguments in Math formulas (e.g. 'size' command) // Format: no negative numbers, must start with a digit, no exponent notation, ... staticbool lcl_IsNumber(const OUString& rText)
{ bool bPoint = false; const sal_Unicode* pBuffer = rText.getStr(); for (sal_Int32 nPos = 0; nPos < rText.getLength(); nPos++, pBuffer++)
{ const sal_Unicode cChar = *pBuffer; if (cChar == '.')
{ if (bPoint) returnfalse; else
bPoint = true;
} elseif (!rtl::isAsciiDigit(cChar)) returnfalse;
} returntrue;
} // checks number used as arguments in Math formulas (e.g. 'size' command) // Format: no negative numbers, must start with a digit, no exponent notation, ... staticbool lcl_IsNotWholeNumber(const OUString& rText)
{ const sal_Unicode* pBuffer = rText.getStr(); for (sal_Int32 nPos = 0; nPos < rText.getLength(); nPos++, pBuffer++) if (!rtl::isAsciiDigit(*pBuffer)) returntrue; returnfalse;
} // checks hex number used as arguments in Math formulas (e.g. 'hex' command) // Format: no negative numbers, must start with a digit, no exponent notation, ... staticbool lcl_IsNotWholeNumber16(const OUString& rText)
{ const sal_Unicode* pBuffer = rText.getStr(); for (sal_Int32 nPos = 0; nPos < rText.getLength(); nPos++, pBuffer++) if (!rtl::isAsciiCanonicHexDigit(*pBuffer)) returntrue; returnfalse;
}
void SmParser5::NextToken() //Central part of the parser
{
sal_Int32 nBufLen = m_aBufferString.getLength();
ParseResult aRes;
sal_Int32 nRealStart; bool bCont; do
{ // skip white spaces while (UnicodeType::SPACE_SEPARATOR == m_pSysCC->getType(m_aBufferString, m_nBufferIndex))
++m_nBufferIndex;
// Try to parse a number in a locale-independent manner using // '.' as decimal separator. // See https://bz.apache.org/ooo/show_bug.cgi?id=45779
aRes
= m_aNumCC.parsePredefinedToken(KParseType::ASC_NUMBER, m_aBufferString, m_nBufferIndex,
coNumStartFlags, u""_ustr, coNumContFlags, u""_ustr);
if (aRes.TokenType == 0)
{ // Try again with the default token parsing.
aRes = m_pSysCC->parseAnyToken(m_aBufferString, m_nBufferIndex, coStartFlags, u""_ustr,
coContFlags, u""_ustr);
}
// default setting for the case that no identifier // i.e. a valid symbol-name is following the '%' // character
m_aCurToken.eType = TTEXT;
m_aCurToken.cMathChar = u""_ustr;
m_aCurToken.nGroup = TG::NONE;
m_aCurToken.nLevel = 5;
m_aCurToken.aText = "%";
if (aTmpRes.TokenType & KParseType::IDENTNAME)
{
sal_Int32 n = aTmpRes.EndPos - nTmpStart;
m_aCurToken.eType = TSPECIAL;
m_aCurToken.aText = m_aBufferString.copy(nTmpStart - 1, n + 1);
rnEndPos = nRealStart + 2;
} else
{
m_aCurToken.eType = TMINUS;
m_aCurToken.setChar(MS_MINUS);
m_aCurToken.nGroup = TG::UnOper | TG::Sum;
m_aCurToken.nLevel = 5;
m_aCurToken.aText = "-";
}
} break; case'.':
{ // Only one character? Then it can't be a number. if (m_nBufferIndex < m_aBufferString.getLength() - 1)
{ // for compatibility with SO5.2 // texts like .34 ...56 ... h ...78..90 // will be treated as numbers
m_aCurToken.eType = TNUMBER;
m_aCurToken.cMathChar = u""_ustr;
m_aCurToken.nGroup = TG::NONE;
m_aCurToken.nLevel = 5;
sal_Int32 nTxtStart = m_nBufferIndex;
sal_Unicode cChar; // if the equation ends with dot(.) then increment m_nBufferIndex till end of string only do
{
cChar = m_aBufferString[++m_nBufferIndex];
} while ((cChar == '.' || rtl::isAsciiDigit(cChar))
&& (m_nBufferIndex < m_aBufferString.getLength() - 1));
// tdf#129372: we may have to deal with surrogate pairs // (see https://en.wikipedia.org/wiki/Universal_Character_Set_characters#Surrogates) // in this case, we must read 2 sal_Unicode instead of 1 int nOffset(rtl::isSurrogate(m_aBufferString[nRealStart]) ? 2 : 1);
m_aCurToken.aText = m_aBufferString.copy(nRealStart, nOffset);
std::unique_ptr<SmNode> SmParser5::DoAlign(bool bUseExtraSpaces) // parse alignment info (if any), then go on with rest of expression
{
DepthProtect aDepthGuard(m_nParseDepth);
std::unique_ptr<SmStructureNode> xSNode;
if (TokenInGroup(TG::Align))
{
xSNode.reset(new SmAlignNode(m_aCurToken));
xSNode->SetSelection(m_aCurESelection);
NextToken();
// allow for just one align statement in 5.0 if (TokenInGroup(TG::Align)) return DoError(SmParseError::DoubleAlign);
}
// start with single expression that may have an alignment statement // (and go on with expressions that must not have alignment // statements in 'while' loop below. See also 'Expression()'.) if (m_aCurToken.eType != TEND && m_aCurToken.eType != TNEWLINE)
ExpressionArray.push_back(DoAlign());
while (m_aCurToken.eType != TEND && m_aCurToken.eType != TNEWLINE)
ExpressionArray.push_back(DoExpression());
//If there's no expression, add an empty one. //this is to avoid a formula tree without any caret //positions, in visual formula editor. if (ExpressionArray.empty())
{
SmToken aTok;
aTok.eType = TNEWLINE;
ExpressionArray.emplace_back(std::unique_ptr<SmNode>(new SmExpressionNode(aTok)));
}
auto xSNode = std::make_unique<SmLineNode>(m_aCurToken);
xSNode->SetSelection(m_aCurESelection);
xSNode->SetSubNodes(buildNodeArray(ExpressionArray)); return xSNode;
}
std::vector<std::unique_ptr<SmNode>> RelationArray;
RelationArray.push_back(DoRelation()); while (m_aCurToken.nLevel >= 4)
RelationArray.push_back(DoRelation());
if (RelationArray.size() > 1)
{
std::unique_ptr<SmExpressionNode> xSNode(new SmExpressionNode(m_aCurToken));
xSNode->SetSubNodes(buildNodeArray(RelationArray));
xSNode->SetUseExtraSpaces(bUseExtraSpaces); return xSNode;
} else
{ // This expression has only one node so just push this node. return std::move(RelationArray[0]);
}
}
while (TokenInGroup(TG::Product))
{ //this linear loop builds a recursive structure, if it gets //too deep then later processing, e.g. releasing the tree, //can exhaust stack if (m_nParseDepth + nDepthLimit > DEPTH_LIMIT) throw std::range_error("parser depth limit");
case TBOPER:
xSNode.reset(new SmBinHorNode(m_aCurToken));
NextToken();
//Let the glyph node know it's a binary operation
m_aCurToken.eType = TBOPER;
m_aCurToken.nGroup = TG::Product;
xOper = DoGlyphSpecial(); break;
case TOVERBRACE: case TUNDERBRACE:
xSNode.reset(new SmVerticalBraceNode(m_aCurToken));
xSNode->SetSelection(m_aCurESelection);
xOper.reset(new SmMathSymbolNode(m_aCurToken));
xOper->SetSelection(m_aCurESelection);
NextToken(); break;
case TWIDEBACKSLASH: case TWIDESLASH:
{
SmBinDiagonalNode* pSTmp = new SmBinDiagonalNode(m_aCurToken);
pSTmp->SetAscending(eType == TWIDESLASH);
xSNode.reset(pSTmp);
std::unique_ptr<SmSubSupNode> pNode(new SmSubSupNode(m_aCurToken));
pNode->SetSelection(m_aCurESelection); //! Of course 'm_aCurToken' is just the first sub-/supscript token. //! It should be of no further interest. The positions of the //! sub-/supscripts will be identified by the corresponding subnodes //! index in the 'aSubNodes' array (enum value from 'SmSubSup').
// process all sub-/supscripts int nIndex = 0; while (TokenInGroup(nActiveGroup))
{
SmTokenType eType(m_aCurToken.eType);
switch (eType)
{ case TRSUB:
nIndex = static_cast<int>(RSUB); break; case TRSUP:
nIndex = static_cast<int>(RSUP); break; case TFROM: case TCSUB:
nIndex = static_cast<int>(CSUB); break; case TTO: case TCSUP:
nIndex = static_cast<int>(CSUP); break; case TLSUB:
nIndex = static_cast<int>(LSUB); break; case TLSUP:
nIndex = static_cast<int>(LSUP); break; default:
SAL_WARN("starmath", "unknown case");
}
nIndex++;
assert(1 <= nIndex && nIndex <= SUBSUP_NUM_ENTRIES);
std::unique_ptr<SmNode> xENode; if (aSubNodes[nIndex]) // if already occupied at earlier iteration
{ // forget the earlier one, remember an error instead
aSubNodes[nIndex].reset();
xENode = DoError(SmParseError::DoubleSubsupscript); // this also skips current token.
} else
{ // skip sub-/supscript token
NextToken();
}
// get sub-/supscript node // (even when we saw a double-sub/supscript error in the above // in order to minimize mess and continue parsing.)
std::unique_ptr<SmNode> xSNode; if (eType == TFROM || eType == TTO)
{ // parse limits in old 4.0 and 5.0 style
xSNode = DoRelation();
} else
xSNode = DoTerm(true);
std::unique_ptr<SmNode> xENode; if (aSubNodes[nIndex]) // if already occupied at earlier iteration
{ // forget the earlier one, remember an error instead
aSubNodes[nIndex].reset();
xENode = DoError(SmParseError::DoubleSubsupscript); // this also skips current token.
} else
NextToken(); // skip sub-/supscript token
// get sub-/supscript node
std::unique_ptr<SmNode> xSNode;
xSNode = DoTerm(true);
// get operator symbol auto xNode = std::make_unique<SmMathSymbolNode>(m_aCurToken);
xNode->SetSelection(m_aCurESelection); // skip operator token
NextToken(); // get sub- supscripts if any if (m_aCurToken.nGroup == TG::Power) return DoSubSup(TG::Power, std::move(xNode)); return xNode;
}
switch (m_aCurToken.eType)
{ case TESCAPE: return DoEscape();
case TNOSPACE: case TLGROUP:
{ bool bNoSpace = m_aCurToken.eType == TNOSPACE; if (bNoSpace)
NextToken(); if (m_aCurToken.eType != TLGROUP) return DoTerm(false); // nospace is no longer concerned
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.