Quelle parse5.cxx

Sprache: C

/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
/*
* This file is part of the LibreOffice project.
*
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/.
*
* This file incorporates work covered by the following license notice:
*
*   Licensed to the Apache Software Foundation (ASF) under one or more
*   contributor license agreements. See the NOTICE file distributed
*   with this work for additional information regarding copyright
*   ownership. The ASF licenses this file to you under the Apache
*   License, Version 2.0 (the "License"); you may not use this file
*   except in compliance with the License. You may obtain a copy of
*   the License at http://www.apache.org/licenses/LICENSE-2.0 .
*/

#include <com/sun/star/i18n/UnicodeType.hpp>
#include <com/sun/star/i18n/KParseTokens.hpp>
#include <com/sun/star/i18n/KParseType.hpp>
#include <i18nlangtag/lang.h>
#include <tools/lineend.hxx>
#include <comphelper/configuration.hxx>
#include <unotools/syslocale.hxx>
#include <osl/diagnose.h>
#include <rtl/character.hxx>
#include <parse5.hxx>
#include <strings.hrc>
#include <smmod.hxx>
#include <symbol.hxx>
#include <cfgitem.hxx>
#include <starmathdatabase.hxx>

#include <stack>
#include <unordered_set>

using namespace ::com::sun::star::i18n;

//Definition of math keywords
const SmTokenTableEntry aTokenTable[]
    = { { u"abs"_ustr, TABS, '\0', TG::UnOper, 13 },
        { u"acute"_ustr, TACUTE, MS_ACUTE, TG::Attribute, 5 },
        { u"aleph"_ustr, TALEPH, MS_ALEPH, TG::Standalone, 5 },
        { u"alignb"_ustr, TALIGNC, '\0', TG::Align, 0 },
        { u"alignc"_ustr, TALIGNC, '\0', TG::Align, 0 },
        { u"alignl"_ustr, TALIGNL, '\0', TG::Align, 0 },
        { u"alignm"_ustr, TALIGNC, '\0', TG::Align, 0 },
        { u"alignr"_ustr, TALIGNR, '\0', TG::Align, 0 },
        { u"alignt"_ustr, TALIGNC, '\0', TG::Align, 0 },
        { u"and"_ustr, TAND, MS_AND, TG::Product, 0 },
        { u"approx"_ustr, TAPPROX, MS_APPROX, TG::Relation, 0 },
        { u"arccos"_ustr, TACOS, '\0', TG::Function, 5 },
        { u"arccot"_ustr, TACOT, '\0', TG::Function, 5 },
        { u"arcosh"_ustr, TACOSH, '\0', TG::Function, 5 },
        { u"arcoth"_ustr, TACOTH, '\0', TG::Function, 5 },
        { u"arcsin"_ustr, TASIN, '\0', TG::Function, 5 },
        { u"arctan"_ustr, TATAN, '\0', TG::Function, 5 },
        { u"arsinh"_ustr, TASINH, '\0', TG::Function, 5 },
        { u"artanh"_ustr, TATANH, '\0', TG::Function, 5 },
        { u"backepsilon"_ustr, TBACKEPSILON, MS_BACKEPSILON, TG::Standalone, 5 },
        { u"bar"_ustr, TBAR, MS_BAR, TG::Attribute, 5 },
        { u"binom"_ustr, TBINOM, '\0', TG::NONE, 5 },
        { u"bold"_ustr, TBOLD, '\0', TG::FontAttr, 5 },
        { u"boper"_ustr, TBOPER, '\0', TG::Product, 0 },
        { u"breve"_ustr, TBREVE, MS_BREVE, TG::Attribute, 5 },
        { u"bslash"_ustr, TBACKSLASH, MS_BACKSLASH, TG::Product, 0 },
        { u"cdot"_ustr, TCDOT, MS_CDOT, TG::Product, 0 },
        { u"check"_ustr, TCHECK, MS_CHECK, TG::Attribute, 5 },
        { u"circ"_ustr, TCIRC, MS_CIRC, TG::Standalone, 5 },
        { u"circle"_ustr, TCIRCLE, MS_CIRCLE, TG::Attribute, 5 },
        { u"color"_ustr, TCOLOR, '\0', TG::FontAttr, 5 },
        { u"coprod"_ustr, TCOPROD, MS_COPROD, TG::Oper, 5 },
        { u"cos"_ustr, TCOS, '\0', TG::Function, 5 },
        { u"cosh"_ustr, TCOSH, '\0', TG::Function, 5 },
        { u"cot"_ustr, TCOT, '\0', TG::Function, 5 },
        { u"coth"_ustr, TCOTH, '\0', TG::Function, 5 },
        { u"csub"_ustr, TCSUB, '\0', TG::Power, 0 },
        { u"csup"_ustr, TCSUP, '\0', TG::Power, 0 },
        { u"dddot"_ustr, TDDDOT, MS_DDDOT, TG::Attribute, 5 },
        { u"ddot"_ustr, TDDOT, MS_DDOT, TG::Attribute, 5 },
        { u"def"_ustr, TDEF, MS_DEF, TG::Relation, 0 },
        { u"div"_ustr, TDIV, MS_DIV, TG::Product, 0 },
        { u"divides"_ustr, TDIVIDES, MS_LINE, TG::Relation, 0 },
        { u"dlarrow"_ustr, TDLARROW, MS_DLARROW, TG::Standalone, 5 },
        { u"dlrarrow"_ustr, TDLRARROW, MS_DLRARROW, TG::Standalone, 5 },
        { u"dot"_ustr, TDOT, MS_DOT, TG::Attribute, 5 },
        { u"dotsaxis"_ustr, TDOTSAXIS, MS_DOTSAXIS, TG::Standalone, 5 }, // 5 to continue expression
        { u"dotsdiag"_ustr, TDOTSDIAG, MS_DOTSUP, TG::Standalone, 5 },
        { u"dotsdown"_ustr, TDOTSDOWN, MS_DOTSDOWN, TG::Standalone, 5 },
        { u"dotslow"_ustr, TDOTSLOW, MS_DOTSLOW, TG::Standalone, 5 },
        { u"dotsup"_ustr, TDOTSUP, MS_DOTSUP, TG::Standalone, 5 },
        { u"dotsvert"_ustr, TDOTSVERT, MS_DOTSVERT, TG::Standalone, 5 },
        { u"downarrow"_ustr, TDOWNARROW, MS_DOWNARROW, TG::Standalone, 5 },
        { u"drarrow"_ustr, TDRARROW, MS_DRARROW, TG::Standalone, 5 },
        { u"emptyset"_ustr, TEMPTYSET, MS_EMPTYSET, TG::Standalone, 5 },
        { u"equiv"_ustr, TEQUIV, MS_EQUIV, TG::Relation, 0 },
        { u"evaluate"_ustr, TEVALUATE, '\0', TG::NONE, 0 },
        { u"exists"_ustr, TEXISTS, MS_EXISTS, TG::Standalone, 5 },
        { u"exp"_ustr, TEXP, '\0', TG::Function, 5 },
        { u"fact"_ustr, TFACT, MS_FACT, TG::UnOper, 5 },
        { u"fixed"_ustr, TFIXED, '\0', TG::Font, 0 },
        { u"font"_ustr, TFONT, '\0', TG::FontAttr, 5 },
        { u"forall"_ustr, TFORALL, MS_FORALL, TG::Standalone, 5 },
        { u"fourier"_ustr, TFOURIER, MS_FOURIER, TG::Standalone, 5 },
        { u"frac"_ustr, TFRAC, '\0', TG::NONE, 5 },
        { u"from"_ustr, TFROM, '\0', TG::Limit, 0 },
        { u"func"_ustr, TFUNC, '\0', TG::Function, 5 },
        { u"ge"_ustr, TGE, MS_GE, TG::Relation, 0 },
        { u"geslant"_ustr, TGESLANT, MS_GESLANT, TG::Relation, 0 },
        { u"gg"_ustr, TGG, MS_GG, TG::Relation, 0 },
        { u"grave"_ustr, TGRAVE, MS_GRAVE, TG::Attribute, 5 },
        { u"gt"_ustr, TGT, MS_GT, TG::Relation, 0 },
        { u"hadd"_ustr, THADD, MS_HADD, TG::Oper, 5 },
        { u"harpoon"_ustr, THARPOON, MS_HARPOON, TG::Attribute, 5 },
        { u"hat"_ustr, THAT, MS_HAT, TG::Attribute, 5 },
        { u"hbar"_ustr, THBAR, MS_HBAR, TG::Standalone, 5 },
        { u"hex"_ustr, THEX, '\0', TG::NONE, 5 },
        { u"iiint"_ustr, TIIINT, MS_IIINT, TG::Oper, 5 },
        { u"iint"_ustr, TIINT, MS_IINT, TG::Oper, 5 },
        { u"im"_ustr, TIM, MS_IM, TG::Standalone, 5 },
        { u"in"_ustr, TIN, MS_IN, TG::Relation, 0 },
        { u"infinity"_ustr, TINFINITY, MS_INFINITY, TG::Standalone, 5 },
        { u"infty"_ustr, TINFINITY, MS_INFINITY, TG::Standalone, 5 },
        { u"int"_ustr, TINT, MS_INT, TG::Oper, 5 },
        { u"intd"_ustr, TINTD, MS_INT, TG::Oper, 5 },
        { u"intersection"_ustr, TINTERSECT, MS_INTERSECT, TG::Product, 0 },
        { u"it"_ustr, TIT, '\0', TG::Product, 0 },
        { u"ital"_ustr, TITALIC, '\0', TG::FontAttr, 5 },
        { u"italic"_ustr, TITALIC, '\0', TG::FontAttr, 5 },
        { u"lambdabar"_ustr, TLAMBDABAR, MS_LAMBDABAR, TG::Standalone, 5 },
        { u"langle"_ustr, TLANGLE, MS_LMATHANGLE, TG::LBrace, 5 },
        { u"laplace"_ustr, TLAPLACE, MS_LAPLACE, TG::Standalone, 5 },
        { u"lbrace"_ustr, TLBRACE, MS_LBRACE, TG::LBrace, 5 },
        { u"lceil"_ustr, TLCEIL, MS_LCEIL, TG::LBrace, 5 },
        { u"ldbracket"_ustr, TLDBRACKET, MS_LDBRACKET, TG::LBrace, 5 },
        { u"ldline"_ustr, TLDLINE, MS_DVERTLINE, TG::LBrace, 5 },
        { u"le"_ustr, TLE, MS_LE, TG::Relation, 0 },
        { u"left"_ustr, TLEFT, '\0', TG::NONE, 5 },
        { u"leftarrow"_ustr, TLEFTARROW, MS_LEFTARROW, TG::Standalone, 5 },
        { u"leslant"_ustr, TLESLANT, MS_LESLANT, TG::Relation, 0 },
        { u"lfloor"_ustr, TLFLOOR, MS_LFLOOR, TG::LBrace, 5 },
        { u"lim"_ustr, TLIM, '\0', TG::Oper, 5 },
        { u"liminf"_ustr, TLIMINF, '\0', TG::Oper, 5 },
        { u"limsup"_ustr, TLIMSUP, '\0', TG::Oper, 5 },
        { u"lint"_ustr, TLINT, MS_LINT, TG::Oper, 5 },
        { u"ll"_ustr, TLL, MS_LL, TG::Relation, 0 },
        { u"lline"_ustr, TLLINE, MS_VERTLINE, TG::LBrace, 5 },
        { u"llint"_ustr, TLLINT, MS_LLINT, TG::Oper, 5 },
        { u"lllint"_ustr, TLLLINT, MS_LLLINT, TG::Oper, 5 },
        { u"ln"_ustr, TLN, '\0', TG::Function, 5 },
        { u"log"_ustr, TLOG, '\0', TG::Function, 5 },
        { u"lrline"_ustr, TLRLINE, MS_VERTLINE, TG::LBrace | TG::RBrace, 5 },
        { u"lrdline"_ustr, TLRDLINE, MS_VERTLINE, TG::LBrace | TG::RBrace, 5 },
        { u"lsub"_ustr, TLSUB, '\0', TG::Power, 0 },
        { u"lsup"_ustr, TLSUP, '\0', TG::Power, 0 },
        { u"lt"_ustr, TLT, MS_LT, TG::Relation, 0 },
        { u"maj"_ustr, TSUM, MS_MAJ, TG::Oper, 5 },
        { u"matrix"_ustr, TMATRIX, '\0', TG::NONE, 5 },
        { u"minusplus"_ustr, TMINUSPLUS, MS_MINUSPLUS, TG::UnOper | TG::Sum, 5 },
        { u"mline"_ustr, TMLINE, MS_VERTLINE, TG::NONE, 0 }, //! not in TG::RBrace, Level 0
        { u"nabla"_ustr, TNABLA, MS_NABLA, TG::Standalone, 5 },
        { u"nbold"_ustr, TNBOLD, '\0', TG::FontAttr, 5 },
        { u"ndivides"_ustr, TNDIVIDES, MS_NDIVIDES, TG::Relation, 0 },
        { u"neg"_ustr, TNEG, MS_NEG, TG::UnOper, 5 },
        { u"neq"_ustr, TNEQ, MS_NEQ, TG::Relation, 0 },
        { u"newline"_ustr, TNEWLINE, '\0', TG::NONE, 0 },
        { u"ni"_ustr, TNI, MS_NI, TG::Relation, 0 },
        { u"nitalic"_ustr, TNITALIC, '\0', TG::FontAttr, 5 },
        { u"none"_ustr, TNONE, '\0', TG::LBrace | TG::RBrace, 0 },
        { u"nospace"_ustr, TNOSPACE, '\0', TG::Standalone, 5 },
        { u"notexists"_ustr, TNOTEXISTS, MS_NOTEXISTS, TG::Standalone, 5 },
        { u"notin"_ustr, TNOTIN, MS_NOTIN, TG::Relation, 0 },
        { u"nprec"_ustr, TNOTPRECEDES, MS_NOTPRECEDES, TG::Relation, 0 },
        { u"nroot"_ustr, TNROOT, MS_SQRT, TG::UnOper, 5 },
        { u"nsubset"_ustr, TNSUBSET, MS_NSUBSET, TG::Relation, 0 },
        { u"nsubseteq"_ustr, TNSUBSETEQ, MS_NSUBSETEQ, TG::Relation, 0 },
        { u"nsucc"_ustr, TNOTSUCCEEDS, MS_NOTSUCCEEDS, TG::Relation, 0 },
        { u"nsupset"_ustr, TNSUPSET, MS_NSUPSET, TG::Relation, 0 },
        { u"nsupseteq"_ustr, TNSUPSETEQ, MS_NSUPSETEQ, TG::Relation, 0 },
        { u"odivide"_ustr, TODIVIDE, MS_ODIVIDE, TG::Product, 0 },
        { u"odot"_ustr, TODOT, MS_ODOT, TG::Product, 0 },
        { u"ominus"_ustr, TOMINUS, MS_OMINUS, TG::Sum, 0 },
        { u"oper"_ustr, TOPER, '\0', TG::Oper, 5 },
        { u"oplus"_ustr, TOPLUS, MS_OPLUS, TG::Sum, 0 },
        { u"or"_ustr, TOR, MS_OR, TG::Sum, 0 },
        { u"ortho"_ustr, TORTHO, MS_ORTHO, TG::Relation, 0 },
        { u"otimes"_ustr, TOTIMES, MS_OTIMES, TG::Product, 0 },
        { u"over"_ustr, TOVER, '\0', TG::Product, 0 },
        { u"overbrace"_ustr, TOVERBRACE, MS_OVERBRACE, TG::Product, 5 },
        { u"overline"_ustr, TOVERLINE, '\0', TG::Attribute, 5 },
        { u"overstrike"_ustr, TOVERSTRIKE, '\0', TG::Attribute, 5 },
        { u"owns"_ustr, TNI, MS_NI, TG::Relation, 0 },
        { u"parallel"_ustr, TPARALLEL, MS_DLINE, TG::Relation, 0 },
        { u"partial"_ustr, TPARTIAL, MS_PARTIAL, TG::Standalone, 5 },
        { u"phantom"_ustr, TPHANTOM, '\0', TG::FontAttr, 5 },
        { u"plusminus"_ustr, TPLUSMINUS, MS_PLUSMINUS, TG::UnOper | TG::Sum, 5 },
        { u"prec"_ustr, TPRECEDES, MS_PRECEDES, TG::Relation, 0 },
        { u"preccurlyeq"_ustr, TPRECEDESEQUAL, MS_PRECEDESEQUAL, TG::Relation, 0 },
        { u"precsim"_ustr, TPRECEDESEQUIV, MS_PRECEDESEQUIV, TG::Relation, 0 },
        { u"prod"_ustr, TPROD, MS_PROD, TG::Oper, 5 },
        { u"prop"_ustr, TPROP, MS_PROP, TG::Relation, 0 },
        { u"rangle"_ustr, TRANGLE, MS_RMATHANGLE, TG::RBrace, 0 }, //! 0 to terminate expression
        { u"rbrace"_ustr, TRBRACE, MS_RBRACE, TG::RBrace, 0 },
        { u"rceil"_ustr, TRCEIL, MS_RCEIL, TG::RBrace, 0 },
        { u"rdbracket"_ustr, TRDBRACKET, MS_RDBRACKET, TG::RBrace, 0 },
        { u"rdline"_ustr, TRDLINE, MS_DVERTLINE, TG::RBrace, 0 },
        { u"re"_ustr, TRE, MS_RE, TG::Standalone, 5 },
        { u"rfloor"_ustr, TRFLOOR, MS_RFLOOR, TG::RBrace, 0 }, //! 0 to terminate expression
        { u"right"_ustr, TRIGHT, '\0', TG::NONE, 0 },
        { u"rightarrow"_ustr, TRIGHTARROW, MS_RIGHTARROW, TG::Standalone, 5 },
        { u"rline"_ustr, TRLINE, MS_VERTLINE, TG::RBrace, 0 }, //! 0 to terminate expression
        { u"rsub"_ustr, TRSUB, '\0', TG::Power, 0 },
        { u"rsup"_ustr, TRSUP, '\0', TG::Power, 0 },
        { u"sans"_ustr, TSANS, '\0', TG::Font, 0 },
        { u"serif"_ustr, TSERIF, '\0', TG::Font, 0 },
        { u"setC"_ustr, TSETC, MS_SETC, TG::Standalone, 5 },
        { u"setminus"_ustr, TSETMINUS, MS_BACKSLASH, TG::Product, 0 },
        { u"setN"_ustr, TSETN, MS_SETN, TG::Standalone, 5 },
        { u"setQ"_ustr, TSETQ, MS_SETQ, TG::Standalone, 5 },
        { u"setquotient"_ustr, TSETQUOTIENT, MS_SLASH, TG::Product, 0 },
        { u"setR"_ustr, TSETR, MS_SETR, TG::Standalone, 5 },
        { u"setZ"_ustr, TSETZ, MS_SETZ, TG::Standalone, 5 },
        { u"sim"_ustr, TSIM, MS_SIM, TG::Relation, 0 },
        { u"simeq"_ustr, TSIMEQ, MS_SIMEQ, TG::Relation, 0 },
        { u"sin"_ustr, TSIN, '\0', TG::Function, 5 },
        { u"sinh"_ustr, TSINH, '\0', TG::Function, 5 },
        { u"size"_ustr, TSIZE, '\0', TG::FontAttr, 5 },
        { u"slash"_ustr, TSLASH, MS_SLASH, TG::Product, 0 },
        { u"sqrt"_ustr, TSQRT, MS_SQRT, TG::UnOper, 5 },
        { u"stack"_ustr, TSTACK, '\0', TG::NONE, 5 },
        { u"sub"_ustr, TRSUB, '\0', TG::Power, 0 },
        { u"subset"_ustr, TSUBSET, MS_SUBSET, TG::Relation, 0 },
        { u"subseteq"_ustr, TSUBSETEQ, MS_SUBSETEQ, TG::Relation, 0 },
        { u"succ"_ustr, TSUCCEEDS, MS_SUCCEEDS, TG::Relation, 0 },
        { u"succcurlyeq"_ustr, TSUCCEEDSEQUAL, MS_SUCCEEDSEQUAL, TG::Relation, 0 },
        { u"succsim"_ustr, TSUCCEEDSEQUIV, MS_SUCCEEDSEQUIV, TG::Relation, 0 },
        { u"sum"_ustr, TSUM, MS_SUM, TG::Oper, 5 },
        { u"sup"_ustr, TRSUP, '\0', TG::Power, 0 },
        { u"supset"_ustr, TSUPSET, MS_SUPSET, TG::Relation, 0 },
        { u"supseteq"_ustr, TSUPSETEQ, MS_SUPSETEQ, TG::Relation, 0 },
        { u"tan"_ustr, TTAN, '\0', TG::Function, 5 },
        { u"tanh"_ustr, TTANH, '\0', TG::Function, 5 },
        { u"tilde"_ustr, TTILDE, MS_TILDE, TG::Attribute, 5 },
        { u"times"_ustr, TTIMES, MS_TIMES, TG::Product, 0 },
        { u"to"_ustr, TTO, '\0', TG::Limit, 0 },
        { u"toward"_ustr, TTOWARD, MS_RIGHTARROW, TG::Relation, 0 },
        { u"transl"_ustr, TTRANSL, MS_TRANSL, TG::Relation, 0 },
        { u"transr"_ustr, TTRANSR, MS_TRANSR, TG::Relation, 0 },
        { u"underbrace"_ustr, TUNDERBRACE, MS_UNDERBRACE, TG::Product, 5 },
        { u"underline"_ustr, TUNDERLINE, '\0', TG::Attribute, 5 },
        { u"union"_ustr, TUNION, MS_UNION, TG::Sum, 0 },
        { u"uoper"_ustr, TUOPER, '\0', TG::UnOper, 5 },
        { u"uparrow"_ustr, TUPARROW, MS_UPARROW, TG::Standalone, 5 },
        { u"vec"_ustr, TVEC, MS_VEC, TG::Attribute, 5 },
        { u"widebslash"_ustr, TWIDEBACKSLASH, MS_BACKSLASH, TG::Product, 0 },
        { u"wideharpoon"_ustr, TWIDEHARPOON, MS_HARPOON, TG::Attribute, 5 },
        { u"widehat"_ustr, TWIDEHAT, MS_HAT, TG::Attribute, 5 },
        { u"wideslash"_ustr, TWIDESLASH, MS_SLASH, TG::Product, 0 },
        { u"widetilde"_ustr, TWIDETILDE, MS_TILDE, TG::Attribute, 5 },
        { u"widevec"_ustr, TWIDEVEC, MS_VEC, TG::Attribute, 5 },
        { u"wp"_ustr, TWP, MS_WP, TG::Standalone, 5 },
        { u"جا"_ustr, TSIN, '\0', TG::Function, 5 },
        { u"جاز"_ustr, TSINH, '\0', TG::Function, 5 },
        { u"جتا"_ustr, TCOS, '\0', TG::Function, 5 },
        { u"جتاز"_ustr, TCOSH, '\0', TG::Function, 5 },
        { u"حا"_ustr, TSIN, '\0', TG::Function, 5 },
        { u"حاز"_ustr, TSINH, '\0', TG::Function, 5 },
        { u"حتا"_ustr, TCOS, '\0', TG::Function, 5 },
        { u"حتاز"_ustr, TCOSH, '\0', TG::Function, 5 },
        { u"حد"_ustr, THADD, MS_HADD, TG::Oper, 5 },
        { u"طا"_ustr, TTAN, '\0', TG::Function, 5 },
        { u"طاز"_ustr, TTANH, '\0', TG::Function, 5 },
        { u"طتا"_ustr, TCOT, '\0', TG::Function, 5 },
        { u"طتاز"_ustr, TCOTH, '\0', TG::Function, 5 },
        { u"ظا"_ustr, TTAN, '\0', TG::Function, 5 },
        { u"ظاز"_ustr, TTANH, '\0', TG::Function, 5 },
        { u"ظتا"_ustr, TCOT, '\0', TG::Function, 5 },
        { u"ظتاز"_ustr, TCOTH, '\0', TG::Function, 5 },
        { u"قا"_ustr, TSEC, '\0', TG::Function, 5 },
        { u"قاز"_ustr, TSECH, '\0', TG::Function, 5 },
        { u"قتا"_ustr, TCSC, '\0', TG::Function, 5 },
        { u"قتاز"_ustr, TCSCH, '\0', TG::Function, 5 },
        { u"لو"_ustr, TLOG, '\0', TG::Function, 5 },
        { u"مجـ"_ustr, TSUM, MS_MAJ, TG::Oper, 5 },
        { u"نها"_ustr, TNAHA, '\0', TG::Oper, 5 },
        { u"ٯا"_ustr, TSEC, '\0', TG::Function, 5 },
        { u"ٯاز"_ustr, TSECH, '\0', TG::Function, 5 },
        { u"ٯتا"_ustr, TCSC, '\0', TG::Function, 5 },
        { u"ٯتاز"_ustr, TCSCH, '\0', TG::Function, 5 } };

// First character may be any alphabetic
const sal_Int32 coStartFlags = KParseTokens::ANY_LETTER | KParseTokens::IGNORE_LEADING_WS;

// Continuing characters may be any alphabetic
const sal_Int32 coContFlags = (coStartFlags & ~KParseTokens::IGNORE_LEADING_WS)
                              | KParseTokens::TWO_DOUBLE_QUOTES_BREAK_STRING;
// First character for numbers, may be any numeric or dot
const sal_Int32 coNumStartFlags
    = KParseTokens::ASC_DIGIT | KParseTokens::ASC_DOT | KParseTokens::IGNORE_LEADING_WS;
// Continuing characters for numbers, may be any numeric or dot or comma.
// tdf#127873: additionally accept ',' comma group separator as too many
// existing documents unwittingly may have used that as decimal separator
// in such locales (though it never was as this is always the en-US locale
// and the group separator is only parsed away).
const sal_Int32 coNumContFlags = (coNumStartFlags & ~KParseTokens::IGNORE_LEADING_WS)
                                 | KParseTokens::GROUP_SEPARATOR_IN_NUMBER;
// First character for numbers hexadecimal
const sal_Int32 coNum16StartFlags
    = KParseTokens::ASC_DIGIT | KParseTokens::ASC_UPALPHA | KParseTokens::IGNORE_LEADING_WS;

// Continuing characters for numbers hexadecimal
const sal_Int32 coNum16ContFlags = (coNum16StartFlags & ~KParseTokens::IGNORE_LEADING_WS);
// user-defined char continuing characters may be any alphanumeric or dot.
const sal_Int32 coUserDefinedCharContFlags = KParseTokens::ANY_LETTER_OR_NUMBER
                                             | KParseTokens::ASC_DOT
                                             | KParseTokens::TWO_DOUBLE_QUOTES_BREAK_STRING;

//Checks if keyword is in the list.
static inline bool findCompare(const SmTokenTableEntry& lhs, const OUString& s)
{
    return s.compareToIgnoreAsciiCase(lhs.aIdent) > 0;
}

//Returns the SmTokenTableEntry for a keyword
const SmTokenTableEntry* GetTokenTableEntry(const OUString& rName)
{
    if (rName.isEmpty())
        return nullptr; //avoid null pointer exceptions
    //Looks for the first keyword after or equal to rName in alphabetical order.
    auto findIter
        = std::lower_bound(std::begin(aTokenTable), std::end(aTokenTable), rName, findCompare);
    if (findIter != std::end(aTokenTable) && rName.equalsIgnoreAsciiCase(findIter->aIdent))
        return &*findIter; //check is equal
    return nullptr; //not found
}

OUString encloseOrEscapeLiteral(const OUString& string, bool force)
{
    if (force)
        return "\"" + string + "\"";
    OUStringBuffer result;
    const std::unordered_set<sal_Unicode> DelimiterTable1{
        //keeping " as first entry is important to not get into recursive replacement
        ' ', '\t', '\n', '\r', '+', '-', '*', '/', '=', '^',
        '_', '#',  '%',  '>',  '<', '&', '|', '~', '`'
    };
    const std::unordered_set<sal_Unicode> DelimiterTable2{
        //keeping " as first entry is important to not get into recursive replacement
        '{', '}', '(', ')', '[', ']',
    };
    for (sal_Int32 i = 0; i < string.getLength(); i++)
    {
        if (string[i] == '"')
            result.append("\"\\\"\"");
        else if (DelimiterTable1.find(string[i]) != DelimiterTable1.end())
            result.append("\"" + OUStringChar(string[i]) + "\"");
        else if (DelimiterTable2.find(string[i]) != DelimiterTable2.end())
            result.append("\\" + OUStringChar(string[i]));
        else
            result.append(string[i]);
    }

    OUString resultString = result.makeStringAndClear();
    const SmTokenTableEntry* tkn = GetTokenTableEntry(resultString);
    // excluding function and operator as they take arguments and can't treat them as literal or else arguments are not displayed correctly
    if (tkn && tkn->nGroup != TG::Function && tkn->nGroup != TG::Oper)
    {
        resultString = "\"" + resultString + "\"";
    }
    return resultString;
}

static bool IsDelimiter(const OUString& rTxt, sal_Int32 nPos)
{ // returns 'true' iff cChar is '\0' or a delimiter

    assert(nPos <= rTxt.getLength()); //index out of range
    if (nPos == rTxt.getLength())
        return true; //This is EOF
    sal_Unicode cChar = rTxt[nPos];

    // check if 'cChar' is in the delimiter table
    static constexpr sal_Unicode aDelimiterTable[] = {
        ' ', '{', '}', '(', ')', '\t', '\n', '\r', '+', '-',  '*', '/', '=', '[',
        ']', '^', '_', '#', '%', '>',  '<',  '&',  '|', '\\', '"', '~', '`'
    }; //reordered by usage (by eye) for nanoseconds saving.

    //checks the array
    for (auto const& cDelimiter : aDelimiterTable)
    {
        if (cDelimiter == cChar)
            return true;
    }

    //special chars support
    sal_Int16 nTypJp = SmModule::get()->GetSysLocale().GetCharClass().getType(rTxt, nPos);
    return (nTypJp == css::i18n::UnicodeType::SPACE_SEPARATOR
            || nTypJp == css::i18n::UnicodeType::CONTROL);
}

// checks number used as arguments in Math formulas (e.g. 'size' command)
// Format: no negative numbers, must start with a digit, no exponent notation, ...
static bool lcl_IsNumber(const OUString& rText)
{
    bool bPoint = false;
    const sal_Unicode* pBuffer = rText.getStr();
    for (sal_Int32 nPos = 0; nPos < rText.getLength(); nPos++, pBuffer++)
    {
        const sal_Unicode cChar = *pBuffer;
        if (cChar == '.')
        {
            if (bPoint)
                return false;
            else
                bPoint = true;
        }
        else if (!rtl::isAsciiDigit(cChar))
            return false;
    }
    return true;
}
// checks number used as arguments in Math formulas (e.g. 'size' command)
// Format: no negative numbers, must start with a digit, no exponent notation, ...
static bool lcl_IsNotWholeNumber(const OUString& rText)
{
    const sal_Unicode* pBuffer = rText.getStr();
    for (sal_Int32 nPos = 0; nPos < rText.getLength(); nPos++, pBuffer++)
        if (!rtl::isAsciiDigit(*pBuffer))
            return true;
    return false;
}
// checks hex number used as arguments in Math formulas (e.g. 'hex' command)
// Format: no negative numbers, must start with a digit, no exponent notation, ...
static bool lcl_IsNotWholeNumber16(const OUString& rText)
{
    const sal_Unicode* pBuffer = rText.getStr();
    for (sal_Int32 nPos = 0; nPos < rText.getLength(); nPos++, pBuffer++)
        if (!rtl::isAsciiCanonicHexDigit(*pBuffer))
            return true;
    return false;
}

//Text replace onto m_aBufferString
void SmParser5::Replace(sal_Int32 nPos, sal_Int32 nLen, std::u16string_view aText)
{
    assert(nPos + nLen <= m_aBufferString.getLength()); //checks if length allows text replace

    m_aBufferString = m_aBufferString.replaceAt(nPos, nLen, aText); //replace and reindex
    sal_Int32 nChg = aText.size() - nLen;
    m_nBufferIndex = m_nBufferIndex + nChg;
    m_nTokenIndex = m_nTokenIndex + nChg;
}

void SmParser5::NextToken() //Central part of the parser
{
    sal_Int32 nBufLen = m_aBufferString.getLength();
    ParseResult aRes;
    sal_Int32 nRealStart;
    bool bCont;
    do
    {
        // skip white spaces
        while (UnicodeType::SPACE_SEPARATOR == m_pSysCC->getType(m_aBufferString, m_nBufferIndex))
            ++m_nBufferIndex;

        // Try to parse a number in a locale-independent manner using
        // '.' as decimal separator.
        // See https://bz.apache.org/ooo/show_bug.cgi?id=45779
        aRes
            = m_aNumCC.parsePredefinedToken(KParseType::ASC_NUMBER, m_aBufferString, m_nBufferIndex,
                                            coNumStartFlags, u""_ustr, coNumContFlags, u""_ustr);

        if (aRes.TokenType == 0)
        {
            // Try again with the default token parsing.
            aRes = m_pSysCC->parseAnyToken(m_aBufferString, m_nBufferIndex, coStartFlags, u""_ustr,
                                           coContFlags, u""_ustr);
        }

        nRealStart = m_nBufferIndex + aRes.LeadingWhiteSpace;
        m_nBufferIndex = nRealStart;

        bCont = false;
        if (aRes.TokenType == 0 && nRealStart < nBufLen && '\n' == m_aBufferString[nRealStart])
        {
            // keep data needed for tokens row and col entry up to date
            ++m_nRow;
            m_nBufferIndex = m_nColOff = nRealStart + 1;
            bCont = true;
        }
        else if (aRes.TokenType & KParseType::ONE_SINGLE_CHAR)
        {
            if (nRealStart + 2 <= nBufLen && m_aBufferString.match("%%", nRealStart))
            {
                //SkipComment
                m_nBufferIndex = nRealStart + 2;
                while (m_nBufferIndex < nBufLen && '\n' != m_aBufferString[m_nBufferIndex])
                    ++m_nBufferIndex;
                bCont = true;
            }
        }

    } while (bCont);

    // set index of current token
    m_nTokenIndex = m_nBufferIndex;
    sal_uInt32 nCol = nRealStart - m_nColOff;

    bool bHandled = true;
    if (nRealStart >= nBufLen)
    {
        m_aCurToken.eType = TEND;
        m_aCurToken.cMathChar = u""_ustr;
        m_aCurToken.nGroup = TG::NONE;
        m_aCurToken.nLevel = 0;
        m_aCurToken.aText.clear();
    }
    else if (aRes.TokenType & KParseType::ANY_NUMBER)
    {
        assert(aRes.EndPos > 0);
        if (m_aBufferString[aRes.EndPos - 1] == ',' && aRes.EndPos < nBufLen
            && m_pSysCC->getType(m_aBufferString, aRes.EndPos) != UnicodeType::SPACE_SEPARATOR)
        {
            // Comma followed by a non-space char is unlikely for decimal/thousands separator.
            --aRes.EndPos;
        }
        sal_Int32 n = aRes.EndPos - nRealStart;
        assert(n >= 0);
        m_aCurToken.eType = TNUMBER;
        m_aCurToken.cMathChar = u""_ustr;
        m_aCurToken.nGroup = TG::NONE;
        m_aCurToken.nLevel = 5;
        m_aCurToken.aText = m_aBufferString.copy(nRealStart, n);

        SAL_WARN_IF(!IsDelimiter(m_aBufferString, aRes.EndPos), "starmath",
                    "identifier really finished? (compatibility!)");
    }
    else if (aRes.TokenType & KParseType::DOUBLE_QUOTE_STRING)
    {
        m_aCurToken.eType = TTEXT;
        m_aCurToken.cMathChar = u""_ustr;
        m_aCurToken.nGroup = TG::NONE;
        m_aCurToken.nLevel = 5;
        m_aCurToken.aText = aRes.DequotedNameOrString;
        nCol++;
    }
    else if (aRes.TokenType & KParseType::IDENTNAME)
    {
        sal_Int32 n = aRes.EndPos - nRealStart;
        assert(n >= 0);
        OUString aName(m_aBufferString.copy(nRealStart, n));
        const SmTokenTableEntry* pEntry = GetTokenTableEntry(aName);

        if (pEntry)
        {
            m_aCurToken.eType = pEntry->eType;
            m_aCurToken.setChar(pEntry->cMathChar);
            m_aCurToken.nGroup = pEntry->nGroup;
            m_aCurToken.nLevel = pEntry->nLevel;
            m_aCurToken.aText = pEntry->aIdent;
        }
        else
        {
            m_aCurToken.eType = TIDENT;
            m_aCurToken.cMathChar = u""_ustr;
            m_aCurToken.nGroup = TG::NONE;
            m_aCurToken.nLevel = 5;
            m_aCurToken.aText = aName;

            SAL_WARN_IF(!IsDelimiter(m_aBufferString, aRes.EndPos), "starmath",
                        "identifier really finished? (compatibility!)");
        }
    }
    else if (aRes.TokenType == 0 && '_' == m_aBufferString[nRealStart])
    {
        m_aCurToken.eType = TRSUB;
        m_aCurToken.cMathChar = u""_ustr;
        m_aCurToken.nGroup = TG::Power;
        m_aCurToken.nLevel = 0;
        m_aCurToken.aText = "_";

        aRes.EndPos = nRealStart + 1;
    }
    else if (aRes.TokenType & KParseType::BOOLEAN)
    {
        sal_Int32& rnEndPos = aRes.EndPos;
        if (rnEndPos - nRealStart <= 2)
        {
            sal_Unicode ch = m_aBufferString[nRealStart];
            switch (ch)
            {
                case '<':
                {
                    if (m_aBufferString.match("<<", nRealStart))
                    {
                        m_aCurToken.eType = TLL;
                        m_aCurToken.setChar(MS_LL);
                        m_aCurToken.nGroup = TG::Relation;
                        m_aCurToken.nLevel = 0;
                        m_aCurToken.aText = "<<";

                        rnEndPos = nRealStart + 2;
                    }
                    else if (m_aBufferString.match("<=", nRealStart))
                    {
                        m_aCurToken.eType = TLE;
                        m_aCurToken.setChar(MS_LE);
                        m_aCurToken.nGroup = TG::Relation;
                        m_aCurToken.nLevel = 0;
                        m_aCurToken.aText = "<=";

                        rnEndPos = nRealStart + 2;
                    }
                    else if (m_aBufferString.match("<-", nRealStart))
                    {
                        m_aCurToken.eType = TLEFTARROW;
                        m_aCurToken.setChar(MS_LEFTARROW);
                        m_aCurToken.nGroup = TG::Standalone;
                        m_aCurToken.nLevel = 5;
                        m_aCurToken.aText = "<-";

                        rnEndPos = nRealStart + 2;
                    }
                    else if (m_aBufferString.match("<>", nRealStart))
                    {
                        m_aCurToken.eType = TNEQ;
                        m_aCurToken.setChar(MS_NEQ);
                        m_aCurToken.nGroup = TG::Relation;
                        m_aCurToken.nLevel = 0;
                        m_aCurToken.aText = "<>";

                        rnEndPos = nRealStart + 2;
                    }
                    else if (m_aBufferString.match("<?>", nRealStart))
                    {
                        m_aCurToken.eType = TPLACE;
                        m_aCurToken.setChar(MS_PLACE);
                        m_aCurToken.nGroup = TG::NONE;
                        m_aCurToken.nLevel = 5;
                        m_aCurToken.aText = "<?>";

                        rnEndPos = nRealStart + 3;
                    }
                    else
                    {
                        m_aCurToken.eType = TLT;
                        m_aCurToken.setChar(MS_LT);
                        m_aCurToken.nGroup = TG::Relation;
                        m_aCurToken.nLevel = 0;
                        m_aCurToken.aText = "<";
                    }
                }
                break;
                case '>':
                {
                    if (m_aBufferString.match(">=", nRealStart))
                    {
                        m_aCurToken.eType = TGE;
                        m_aCurToken.setChar(MS_GE);
                        m_aCurToken.nGroup = TG::Relation;
                        m_aCurToken.nLevel = 0;
                        m_aCurToken.aText = ">=";

                        rnEndPos = nRealStart + 2;
                    }
                    else if (m_aBufferString.match(">>", nRealStart))
                    {
                        m_aCurToken.eType = TGG;
                        m_aCurToken.setChar(MS_GG);
                        m_aCurToken.nGroup = TG::Relation;
                        m_aCurToken.nLevel = 0;
                        m_aCurToken.aText = ">>";

                        rnEndPos = nRealStart + 2;
                    }
                    else
                    {
                        m_aCurToken.eType = TGT;
                        m_aCurToken.setChar(MS_GT);
                        m_aCurToken.nGroup = TG::Relation;
                        m_aCurToken.nLevel = 0;
                        m_aCurToken.aText = ">";
                    }
                }
                break;
                default:
                    bHandled = false;
            }
        }
    }
    else if (aRes.TokenType & KParseType::ONE_SINGLE_CHAR)
    {
        sal_Int32& rnEndPos = aRes.EndPos;
        if (rnEndPos - nRealStart == 1)
        {
            sal_Unicode ch = m_aBufferString[nRealStart];
            switch (ch)
            {
                case '%':
                {
                    //! modifies aRes.EndPos

                    OSL_ENSURE(rnEndPos >= nBufLen || '%' != m_aBufferString[rnEndPos],
                               "unexpected comment start");

                    // get identifier of user-defined character
                    ParseResult aTmpRes = m_pSysCC->parseAnyToken(
                        m_aBufferString, rnEndPos, KParseTokens::ANY_LETTER, u""_ustr,
                        coUserDefinedCharContFlags, u""_ustr);

                    sal_Int32 nTmpStart = rnEndPos + aTmpRes.LeadingWhiteSpace;

                    // default setting for the case that no identifier
                    // i.e. a valid symbol-name is following the '%'
                    // character
                    m_aCurToken.eType = TTEXT;
                    m_aCurToken.cMathChar = u""_ustr;
                    m_aCurToken.nGroup = TG::NONE;
                    m_aCurToken.nLevel = 5;
                    m_aCurToken.aText = "%";

                    if (aTmpRes.TokenType & KParseType::IDENTNAME)
                    {
                        sal_Int32 n = aTmpRes.EndPos - nTmpStart;
                        m_aCurToken.eType = TSPECIAL;
                        m_aCurToken.aText = m_aBufferString.copy(nTmpStart - 1, n + 1);

                        OSL_ENSURE(aTmpRes.EndPos > rnEndPos, "empty identifier");
                        if (aTmpRes.EndPos > rnEndPos)
                            rnEndPos = aTmpRes.EndPos;
                        else
                            ++rnEndPos;
                    }

                    // if no symbol-name was found we start-over with
                    // finding the next token right after the '%' sign.
                    // I.e. we leave rnEndPos unmodified.
                }
                break;
                case '[':
                {
                    m_aCurToken.eType = TLBRACKET;
                    m_aCurToken.setChar(MS_LBRACKET);
                    m_aCurToken.nGroup = TG::LBrace;
                    m_aCurToken.nLevel = 5;
                    m_aCurToken.aText = "[";
                }
                break;
                case '\\':
                {
                    m_aCurToken.eType = TESCAPE;
                    m_aCurToken.cMathChar = u""_ustr;
                    m_aCurToken.nGroup = TG::NONE;
                    m_aCurToken.nLevel = 5;
                    m_aCurToken.aText = "\\";
                }
                break;
                case ']':
                {
                    m_aCurToken.eType = TRBRACKET;
                    m_aCurToken.setChar(MS_RBRACKET);
                    m_aCurToken.nGroup = TG::RBrace;
                    m_aCurToken.nLevel = 0;
                    m_aCurToken.aText = "]";
                }
                break;
                case '^':
                {
                    m_aCurToken.eType = TRSUP;
                    m_aCurToken.cMathChar = u""_ustr;
                    m_aCurToken.nGroup = TG::Power;
                    m_aCurToken.nLevel = 0;
                    m_aCurToken.aText = "^";
                }
                break;
                case '`':
                {
                    m_aCurToken.eType = TSBLANK;
                    m_aCurToken.cMathChar = u""_ustr;
                    m_aCurToken.nGroup = TG::Blank;
                    m_aCurToken.nLevel = 5;
                    m_aCurToken.aText = "`";
                }
                break;
                case '{':
                {
                    m_aCurToken.eType = TLGROUP;
                    m_aCurToken.setChar(MS_LBRACE);
                    m_aCurToken.nGroup = TG::NONE;
                    m_aCurToken.nLevel = 5;
                    m_aCurToken.aText = "{";
                }
                break;
                case '|':
                {
                    m_aCurToken.eType = TOR;
                    m_aCurToken.setChar(MS_OR);
                    m_aCurToken.nGroup = TG::Sum;
                    m_aCurToken.nLevel = 0;
                    m_aCurToken.aText = "|";
                }
                break;
                case '}':
                {
                    m_aCurToken.eType = TRGROUP;
                    m_aCurToken.setChar(MS_RBRACE);
                    m_aCurToken.nGroup = TG::NONE;
                    m_aCurToken.nLevel = 0;
                    m_aCurToken.aText = "}";
                }
                break;
                case '~':
                {
                    m_aCurToken.eType = TBLANK;
                    m_aCurToken.cMathChar = u""_ustr;
                    m_aCurToken.nGroup = TG::Blank;
                    m_aCurToken.nLevel = 5;
                    m_aCurToken.aText = "~";
                }
                break;
                case '#':
                {
                    if (m_aBufferString.match("##", nRealStart))
                    {
                        m_aCurToken.eType = TDPOUND;
                        m_aCurToken.cMathChar = u""_ustr;
                        m_aCurToken.nGroup = TG::NONE;
                        m_aCurToken.nLevel = 0;
                        m_aCurToken.aText = "##";

                        rnEndPos = nRealStart + 2;
                    }
                    else
                    {
                        m_aCurToken.eType = TPOUND;
                        m_aCurToken.cMathChar = u""_ustr;
                        m_aCurToken.nGroup = TG::NONE;
                        m_aCurToken.nLevel = 0;
                        m_aCurToken.aText = "#";
                    }
                }
                break;
                case '&':
                {
                    m_aCurToken.eType = TAND;
                    m_aCurToken.setChar(MS_AND);
                    m_aCurToken.nGroup = TG::Product;
                    m_aCurToken.nLevel = 0;
                    m_aCurToken.aText = "&";
                }
                break;
                case '(':
                {
                    m_aCurToken.eType = TLPARENT;
                    m_aCurToken.setChar(MS_LPARENT);
                    m_aCurToken.nGroup = TG::LBrace;
                    m_aCurToken.nLevel = 5; //! 0 to continue expression
                    m_aCurToken.aText = "(";
                }
                break;
                case ')':
                {
                    m_aCurToken.eType = TRPARENT;
                    m_aCurToken.setChar(MS_RPARENT);
                    m_aCurToken.nGroup = TG::RBrace;
                    m_aCurToken.nLevel = 0; //! 0 to terminate expression
                    m_aCurToken.aText = ")";
                }
                break;
                case '*':
                {
                    m_aCurToken.eType = TMULTIPLY;
                    m_aCurToken.setChar(MS_MULTIPLY);
                    m_aCurToken.nGroup = TG::Product;
                    m_aCurToken.nLevel = 0;
                    m_aCurToken.aText = "*";
                }
                break;
                case '+':
                {
                    if (m_aBufferString.match("+-", nRealStart))
                    {
                        m_aCurToken.eType = TPLUSMINUS;
                        m_aCurToken.setChar(MS_PLUSMINUS);
                        m_aCurToken.nGroup = TG::UnOper | TG::Sum;
                        m_aCurToken.nLevel = 5;
                        m_aCurToken.aText = "+-";

                        rnEndPos = nRealStart + 2;
                    }
                    else
                    {
                        m_aCurToken.eType = TPLUS;
                        m_aCurToken.setChar(MS_PLUS);
                        m_aCurToken.nGroup = TG::UnOper | TG::Sum;
                        m_aCurToken.nLevel = 5;
                        m_aCurToken.aText = "+";
                    }
                }
                break;
                case '-':
                {
                    if (m_aBufferString.match("-+", nRealStart))
                    {
                        m_aCurToken.eType = TMINUSPLUS;
                        m_aCurToken.setChar(MS_MINUSPLUS);
                        m_aCurToken.nGroup = TG::UnOper | TG::Sum;
                        m_aCurToken.nLevel = 5;
                        m_aCurToken.aText = "-+";

                        rnEndPos = nRealStart + 2;
                    }
                    else if (m_aBufferString.match("->", nRealStart))
                    {
                        m_aCurToken.eType = TRIGHTARROW;
                        m_aCurToken.setChar(MS_RIGHTARROW);
                        m_aCurToken.nGroup = TG::Standalone;
                        m_aCurToken.nLevel = 5;
                        m_aCurToken.aText = "->";

                        rnEndPos = nRealStart + 2;
                    }
                    else
                    {
                        m_aCurToken.eType = TMINUS;
                        m_aCurToken.setChar(MS_MINUS);
                        m_aCurToken.nGroup = TG::UnOper | TG::Sum;
                        m_aCurToken.nLevel = 5;
                        m_aCurToken.aText = "-";
                    }
                }
                break;
                case '.':
                {
                    // Only one character? Then it can't be a number.
                    if (m_nBufferIndex < m_aBufferString.getLength() - 1)
                    {
                        // for compatibility with SO5.2
                        // texts like .34 ...56 ... h ...78..90
                        // will be treated as numbers
                        m_aCurToken.eType = TNUMBER;
                        m_aCurToken.cMathChar = u""_ustr;
                        m_aCurToken.nGroup = TG::NONE;
                        m_aCurToken.nLevel = 5;

                        sal_Int32 nTxtStart = m_nBufferIndex;
                        sal_Unicode cChar;
                        // if the equation ends with dot(.) then increment m_nBufferIndex till end of string only
                        do
                        {
                            cChar = m_aBufferString[++m_nBufferIndex];
                        } while ((cChar == '.' || rtl::isAsciiDigit(cChar))
                                 && (m_nBufferIndex < m_aBufferString.getLength() - 1));

                        m_aCurToken.aText
                            = m_aBufferString.copy(nTxtStart, m_nBufferIndex - nTxtStart);
                        aRes.EndPos = m_nBufferIndex;
                    }
                    else
                        bHandled = false;
                }
                break;
                case '/':
                {
                    m_aCurToken.eType = TDIVIDEBY;
                    m_aCurToken.setChar(MS_SLASH);
                    m_aCurToken.nGroup = TG::Product;
                    m_aCurToken.nLevel = 0;
                    m_aCurToken.aText = "/";
                }
                break;
                case '=':
                {
                    m_aCurToken.eType = TASSIGN;
                    m_aCurToken.setChar(MS_ASSIGN);
                    m_aCurToken.nGroup = TG::Relation;
                    m_aCurToken.nLevel = 0;
                    m_aCurToken.aText = "=";
                }
                break;
                default:
                    bHandled = false;
            }
        }
    }
    else
        bHandled = false;

    if (!bHandled)
    {
        m_aCurToken.eType = TCHARACTER;
        m_aCurToken.cMathChar = u""_ustr;
        m_aCurToken.nGroup = TG::NONE;
        m_aCurToken.nLevel = 5;

        // tdf#129372: we may have to deal with surrogate pairs
        // (see https://en.wikipedia.org/wiki/Universal_Character_Set_characters#Surrogates)
        // in this case, we must read 2 sal_Unicode instead of 1
        int nOffset(rtl::isSurrogate(m_aBufferString[nRealStart]) ? 2 : 1);
        m_aCurToken.aText = m_aBufferString.copy(nRealStart, nOffset);

        aRes.EndPos = nRealStart + nOffset;
    }
    m_aCurESelection = ESelection(m_nRow, nCol, m_nRow, nCol + m_aCurToken.aText.getLength());

    if (TEND != m_aCurToken.eType)
        m_nBufferIndex = aRes.EndPos;
}

void SmParser5::NextTokenColor(SmTokenType dvipload)
{
    sal_Int32 nBufLen = m_aBufferString.getLength();
    ParseResult aRes;
    sal_Int32 nRealStart;
    bool bCont;

    do
    {
        // skip white spaces
        while (UnicodeType::SPACE_SEPARATOR == m_pSysCC->getType(m_aBufferString, m_nBufferIndex))
            ++m_nBufferIndex;
        //parse, there are few options, so less strict.
        aRes = m_pSysCC->parseAnyToken(m_aBufferString, m_nBufferIndex, coStartFlags, u""_ustr,
                                       coContFlags, u""_ustr);
        nRealStart = m_nBufferIndex + aRes.LeadingWhiteSpace;
        m_nBufferIndex = nRealStart;
        bCont = false;
        if (aRes.TokenType == 0 && nRealStart < nBufLen && '\n' == m_aBufferString[nRealStart])
        {
            // keep data needed for tokens row and col entry up to date
            ++m_nRow;
            m_nBufferIndex = m_nColOff = nRealStart + 1;
            bCont = true;
        }
        else if (aRes.TokenType & KParseType::ONE_SINGLE_CHAR)
        {
            if (nRealStart + 2 <= nBufLen && m_aBufferString.match("%%", nRealStart))
            {
                //SkipComment
                m_nBufferIndex = nRealStart + 2;
                while (m_nBufferIndex < nBufLen && '\n' != m_aBufferString[m_nBufferIndex])
                    ++m_nBufferIndex;
                bCont = true;
            }
        }
    } while (bCont);

    // set index of current token
    m_nTokenIndex = m_nBufferIndex;
    sal_uInt32 nCol = nRealStart - m_nColOff;

    if (nRealStart >= nBufLen)
        m_aCurToken.eType = TEND;
    else if (aRes.TokenType & KParseType::IDENTNAME)
    {
        sal_Int32 n = aRes.EndPos - nRealStart;
        assert(n >= 0);
        OUString aName(m_aBufferString.copy(nRealStart, n));
        switch (dvipload)
        {
            case TCOLOR:
                m_aCurToken = starmathdatabase::Identify_ColorName_Parser(aName);
                break;
            case TDVIPSNAMESCOL:
                m_aCurToken = starmathdatabase::Identify_ColorName_DVIPSNAMES(aName);
                break;
            default:
                m_aCurToken = starmathdatabase::Identify_ColorName_Parser(aName);
                break;
        }
    }
    else if (aRes.TokenType & KParseType::ONE_SINGLE_CHAR)
    {
        if (m_aBufferString[nRealStart] == '#' && !m_aBufferString.match("##", nRealStart))
        {
            m_aCurToken.eType = THEX;
            m_aCurToken.cMathChar = u""_ustr;
            m_aCurToken.nGroup = TG::Color;
            m_aCurToken.nLevel = 0;
            m_aCurToken.aText = "hex";
        }
    }
    else
        m_aCurToken.eType = TNONE;

    m_aCurESelection = ESelection(m_nRow, nCol, m_nRow, nCol + m_aCurToken.aText.getLength());
    if (TEND != m_aCurToken.eType)
        m_nBufferIndex = aRes.EndPos;
}

void SmParser5::NextTokenFontSize()
{
    sal_Int32 nBufLen = m_aBufferString.getLength();
    ParseResult aRes;
    sal_Int32 nRealStart;
    bool bCont;
    bool hex = false;

    do
    {
        // skip white spaces
        while (UnicodeType::SPACE_SEPARATOR == m_pSysCC->getType(m_aBufferString, m_nBufferIndex))
            ++m_nBufferIndex;
        //hexadecimal parser
        aRes = m_pSysCC->parseAnyToken(m_aBufferString, m_nBufferIndex, coNum16StartFlags,
                                       u"."_ustr, coNum16ContFlags, u".,"_ustr);
        if (aRes.TokenType == 0)
        {
            // Try again with the default token parsing.
            aRes = m_pSysCC->parseAnyToken(m_aBufferString, m_nBufferIndex, coStartFlags, u""_ustr,
                                           coContFlags, u""_ustr);
        }
        else
            hex = true;
        nRealStart = m_nBufferIndex + aRes.LeadingWhiteSpace;
        m_nBufferIndex = nRealStart;
        bCont = false;
        if (aRes.TokenType == 0 && nRealStart < nBufLen && '\n' == m_aBufferString[nRealStart])
        {
            // keep data needed for tokens row and col entry up to date
            ++m_nRow;
            m_nBufferIndex = m_nColOff = nRealStart + 1;
            bCont = true;
        }
        else if (aRes.TokenType & KParseType::ONE_SINGLE_CHAR)
        {
            if (nRealStart + 2 <= nBufLen && m_aBufferString.match("%%", nRealStart))
            {
                //SkipComment
                m_nBufferIndex = nRealStart + 2;
                while (m_nBufferIndex < nBufLen && '\n' != m_aBufferString[m_nBufferIndex])
                    ++m_nBufferIndex;
                bCont = true;
            }
        }
    } while (bCont);

    // set index of current token
    m_nTokenIndex = m_nBufferIndex;
    sal_uInt32 nCol = nRealStart - m_nColOff;

    if (nRealStart >= nBufLen)
        m_aCurToken.eType = TEND;
    else if (aRes.TokenType & KParseType::ONE_SINGLE_CHAR)
    {
        if (aRes.EndPos - nRealStart == 1)
        {
            switch (m_aBufferString[nRealStart])
            {
                case '*':
                    m_aCurToken.eType = TMULTIPLY;
                    m_aCurToken.setChar(MS_MULTIPLY);
                    m_aCurToken.nGroup = TG::Product;
                    m_aCurToken.nLevel = 0;
                    m_aCurToken.aText = "*";
                    break;
                case '+':
                    m_aCurToken.eType = TPLUS;
                    m_aCurToken.setChar(MS_PLUS);
                    m_aCurToken.nGroup = TG::UnOper | TG::Sum;
                    m_aCurToken.nLevel = 5;
                    m_aCurToken.aText = "+";
                    break;
                case '-':
                    m_aCurToken.eType = TMINUS;
                    m_aCurToken.setChar(MS_MINUS);
                    m_aCurToken.nGroup = TG::UnOper | TG::Sum;
                    m_aCurToken.nLevel = 5;
                    m_aCurToken.aText = "-";
                    break;
                case '/':
                    m_aCurToken.eType = TDIVIDEBY;
                    m_aCurToken.setChar(MS_SLASH);
                    m_aCurToken.nGroup = TG::Product;
                    m_aCurToken.nLevel = 0;
                    m_aCurToken.aText = "/";
                    break;
                default:
                    m_aCurToken.eType = TNONE;
                    break;
            }
        }
        else
            m_aCurToken.eType = TNONE;
    }
    else if (hex)
    {
        assert(aRes.EndPos > 0);
        sal_Int32 n = aRes.EndPos - nRealStart;
        assert(n >= 0);
        m_aCurToken.eType = THEX;
        m_aCurToken.cMathChar = u""_ustr;
        m_aCurToken.nGroup = TG::NONE;
        m_aCurToken.nLevel = 5;
        m_aCurToken.aText = m_aBufferString.copy(nRealStart, n);
    }
    else
        m_aCurToken.eType = TNONE;

    m_aCurESelection = ESelection(m_nRow, nCol, m_nRow, nCol + m_aCurToken.aText.getLength());
    if (TEND != m_aCurToken.eType)
        m_nBufferIndex = aRes.EndPos;
}

namespace
{
SmNodeArray buildNodeArray(std::vector<std::unique_ptr<SmNode>>& rSubNodes)
{
    SmNodeArray aSubArray(rSubNodes.size());
    for (size_t i = 0; i < rSubNodes.size(); ++i)
        aSubArray[i] = rSubNodes[i].release();
    return aSubArray;
}
} //end namespace

// grammar
/*************************************************************************************************/

std::unique_ptr<SmTableNode> SmParser5::DoTable()
{
    DepthProtect aDepthGuard(m_nParseDepth);

    std::vector<std::unique_ptr<SmNode>> aLineArray;
    aLineArray.push_back(DoLine());
    while (m_aCurToken.eType == TNEWLINE)
    {
        NextToken();
        aLineArray.push_back(DoLine());
    }
    assert(m_aCurToken.eType == TEND);
    std::unique_ptr<SmTableNode> xSNode(new SmTableNode(m_aCurToken));
    xSNode->SetSelection(m_aCurESelection);
    xSNode->SetSubNodes(buildNodeArray(aLineArray));
    return xSNode;
}

std::unique_ptr<SmNode> SmParser5::DoAlign(bool bUseExtraSpaces)
// parse alignment info (if any), then go on with rest of expression
{
    DepthProtect aDepthGuard(m_nParseDepth);

    std::unique_ptr<SmStructureNode> xSNode;

    if (TokenInGroup(TG::Align))
    {
        xSNode.reset(new SmAlignNode(m_aCurToken));
        xSNode->SetSelection(m_aCurESelection);

        NextToken();

        // allow for just one align statement in 5.0
        if (TokenInGroup(TG::Align))
            return DoError(SmParseError::DoubleAlign);
    }

    auto pNode = DoExpression(bUseExtraSpaces);

    if (xSNode)
    {
        xSNode->SetSubNode(0, pNode.release());
        return xSNode;
    }
    return pNode;
}

// Postcondition: m_aCurToken.eType == TEND || m_aCurToken.eType == TNEWLINE
std::unique_ptr<SmNode> SmParser5::DoLine()
{
    DepthProtect aDepthGuard(m_nParseDepth);

    std::vector<std::unique_ptr<SmNode>> ExpressionArray;

    // start with single expression that may have an alignment statement
    // (and go on with expressions that must not have alignment
    // statements in 'while' loop below. See also 'Expression()'.)
    if (m_aCurToken.eType != TEND && m_aCurToken.eType != TNEWLINE)
        ExpressionArray.push_back(DoAlign());

    while (m_aCurToken.eType != TEND && m_aCurToken.eType != TNEWLINE)
        ExpressionArray.push_back(DoExpression());

    //If there's no expression, add an empty one.
    //this is to avoid a formula tree without any caret
    //positions, in visual formula editor.
    if (ExpressionArray.empty())
    {
        SmToken aTok;
        aTok.eType = TNEWLINE;
        ExpressionArray.emplace_back(std::unique_ptr<SmNode>(new SmExpressionNode(aTok)));
    }

    auto xSNode = std::make_unique<SmLineNode>(m_aCurToken);
    xSNode->SetSelection(m_aCurESelection);
    xSNode->SetSubNodes(buildNodeArray(ExpressionArray));
    return xSNode;
}

std::unique_ptr<SmNode> SmParser5::DoExpression(bool bUseExtraSpaces)
{
    DepthProtect aDepthGuard(m_nParseDepth);

    std::vector<std::unique_ptr<SmNode>> RelationArray;
    RelationArray.push_back(DoRelation());
    while (m_aCurToken.nLevel >= 4)
        RelationArray.push_back(DoRelation());

    if (RelationArray.size() > 1)
    {
        std::unique_ptr<SmExpressionNode> xSNode(new SmExpressionNode(m_aCurToken));
        xSNode->SetSubNodes(buildNodeArray(RelationArray));
        xSNode->SetUseExtraSpaces(bUseExtraSpaces);
        return xSNode;
    }
    else
    {
        // This expression has only one node so just push this node.
        return std::move(RelationArray[0]);
    }
}

std::unique_ptr<SmNode> SmParser5::DoRelation()
{
    DepthProtect aDepthGuard(m_nParseDepth);

    int nDepthLimit = m_nParseDepth;

    auto xFirst = DoSum();
    while (TokenInGroup(TG::Relation))
    {
        std::unique_ptr<SmStructureNode> xSNode(new SmBinHorNode(m_aCurToken));
        xSNode->SetSelection(m_aCurESelection);
        auto xSecond = DoOpSubSup();
        auto xThird = DoSum();
        xSNode->SetSubNodes(std::move(xFirst), std::move(xSecond), std::move(xThird));
        xFirst = std::move(xSNode);

        ++m_nParseDepth;
        DepthProtect bDepthGuard(m_nParseDepth);
    }

    m_nParseDepth = nDepthLimit;

    return xFirst;
}

std::unique_ptr<SmNode> SmParser5::DoSum()
{
    DepthProtect aDepthGuard(m_nParseDepth);

    int nDepthLimit = m_nParseDepth;

    auto xFirst = DoProduct();
    while (TokenInGroup(TG::Sum))
    {
        std::unique_ptr<SmStructureNode> xSNode(new SmBinHorNode(m_aCurToken));
        xSNode->SetSelection(m_aCurESelection);
        auto xSecond = DoOpSubSup();
        auto xThird = DoProduct();
        xSNode->SetSubNodes(std::move(xFirst), std::move(xSecond), std::move(xThird));
        xFirst = std::move(xSNode);

        ++m_nParseDepth;
        DepthProtect bDepthGuard(m_nParseDepth);
    }

    m_nParseDepth = nDepthLimit;

    return xFirst;
}

std::unique_ptr<SmNode> SmParser5::DoProduct()
{
    DepthProtect aDepthGuard(m_nParseDepth);

    auto xFirst = DoPower();

    int nDepthLimit = 0;

    while (TokenInGroup(TG::Product))
    {
        //this linear loop builds a recursive structure, if it gets
        //too deep then later processing, e.g. releasing the tree,
        //can exhaust stack
        if (m_nParseDepth + nDepthLimit > DEPTH_LIMIT)
            throw std::range_error("parser depth limit");

        std::unique_ptr<SmStructureNode> xSNode;
        std::unique_ptr<SmNode> xOper;

        SmTokenType eType = m_aCurToken.eType;
        switch (eType)
        {
            case TOVER:
                xSNode.reset(new SmBinVerNode(m_aCurToken));
                xSNode->SetSelection(m_aCurESelection);
                xOper.reset(new SmRectangleNode(m_aCurToken));
                xOper->SetSelection(m_aCurESelection);
                NextToken();
                break;

            case TBOPER:
                xSNode.reset(new SmBinHorNode(m_aCurToken));

                NextToken();

                //Let the glyph node know it's a binary operation
                m_aCurToken.eType = TBOPER;
                m_aCurToken.nGroup = TG::Product;
                xOper = DoGlyphSpecial();
                break;

            case TOVERBRACE:
            case TUNDERBRACE:
                xSNode.reset(new SmVerticalBraceNode(m_aCurToken));
                xSNode->SetSelection(m_aCurESelection);
                xOper.reset(new SmMathSymbolNode(m_aCurToken));
                xOper->SetSelection(m_aCurESelection);

                NextToken();
                break;

            case TWIDEBACKSLASH:
            case TWIDESLASH:
            {
                SmBinDiagonalNode* pSTmp = new SmBinDiagonalNode(m_aCurToken);
                pSTmp->SetAscending(eType == TWIDESLASH);
                xSNode.reset(pSTmp);

                xOper.reset(new SmPolyLineNode(m_aCurToken));
                xOper->SetSelection(m_aCurESelection);
                NextToken();

                break;
            }

            default:
                xSNode.reset(new SmBinHorNode(m_aCurToken));
                xSNode->SetSelection(m_aCurESelection);

                xOper = DoOpSubSup();
        }

        auto xArg = DoPower();
        xSNode->SetSubNodesBinMo(std::move(xFirst), std::move(xOper), std::move(xArg));
        xFirst = std::move(xSNode);
        ++nDepthLimit;
    }
    return xFirst;
}

std::unique_ptr<SmNode> SmParser5::DoSubSup(TG nActiveGroup, std::unique_ptr<SmNode> xGivenNode)
{
    DepthProtect aDepthGuard(m_nParseDepth);

    assert(nActiveGroup == TG::Power || nActiveGroup == TG::Limit);
    assert(m_aCurToken.nGroup == nActiveGroup);

    std::unique_ptr<SmSubSupNode> pNode(new SmSubSupNode(m_aCurToken));
    pNode->SetSelection(m_aCurESelection);
    //! Of course 'm_aCurToken' is just the first sub-/supscript token.
    //! It should be of no further interest. The positions of the
    //! sub-/supscripts will be identified by the corresponding subnodes
    //! index in the 'aSubNodes' array (enum value from 'SmSubSup').

    pNode->SetUseLimits(nActiveGroup == TG::Limit);

    // initialize subnodes array
    std::vector<std::unique_ptr<SmNode>> aSubNodes(1 + SUBSUP_NUM_ENTRIES);
    aSubNodes[0] = std::move(xGivenNode);

    // process all sub-/supscripts
    int nIndex = 0;
    while (TokenInGroup(nActiveGroup))
    {
        SmTokenType eType(m_aCurToken.eType);

        switch (eType)
        {
            case TRSUB:
                nIndex = static_cast<int>(RSUB);
                break;
            case TRSUP:
                nIndex = static_cast<int>(RSUP);
                break;
            case TFROM:
            case TCSUB:
                nIndex = static_cast<int>(CSUB);
                break;
            case TTO:
            case TCSUP:
                nIndex = static_cast<int>(CSUP);
                break;
            case TLSUB:
                nIndex = static_cast<int>(LSUB);
                break;
            case TLSUP:
                nIndex = static_cast<int>(LSUP);
                break;
            default:
                SAL_WARN("starmath", "unknown case");
        }
        nIndex++;
        assert(1 <= nIndex && nIndex <= SUBSUP_NUM_ENTRIES);

        std::unique_ptr<SmNode> xENode;
        if (aSubNodes[nIndex]) // if already occupied at earlier iteration
        {
            // forget the earlier one, remember an error instead
            aSubNodes[nIndex].reset();
            xENode = DoError(SmParseError::DoubleSubsupscript); // this also skips current token.
        }
        else
        {
            // skip sub-/supscript token
            NextToken();
        }

        // get sub-/supscript node
        // (even when we saw a double-sub/supscript error in the above
        // in order to minimize mess and continue parsing.)
        std::unique_ptr<SmNode> xSNode;
        if (eType == TFROM || eType == TTO)
        {
            // parse limits in old 4.0 and 5.0 style
            xSNode = DoRelation();
        }
        else
            xSNode = DoTerm(true);

        aSubNodes[nIndex] = std::move(xENode ? xENode : xSNode);
    }

    pNode->SetSubNodes(buildNodeArray(aSubNodes));
    return pNode;
}

std::unique_ptr<SmNode> SmParser5::DoSubSupEvaluate(std::unique_ptr<SmNode> xGivenNode)
{
    DepthProtect aDepthGuard(m_nParseDepth);

    std::unique_ptr<SmSubSupNode> pNode(new SmSubSupNode(m_aCurToken));
    pNode->SetSelection(m_aCurESelection);
    pNode->SetUseLimits(true);

    // initialize subnodes array
    std::vector<std::unique_ptr<SmNode>> aSubNodes(1 + SUBSUP_NUM_ENTRIES);
    aSubNodes[0] = std::move(xGivenNode);

    // process all sub-/supscripts
    int nIndex = 0;
    while (TokenInGroup(TG::Limit))
    {
        SmTokenType eType(m_aCurToken.eType);

        switch (eType)
        {
            case TFROM:
                nIndex = static_cast<int>(RSUB);
                break;
            case TTO:
                nIndex = static_cast<int>(RSUP);
                break;
            default:
                SAL_WARN("starmath", "unknown case");
        }
        nIndex++;
        assert(1 <= nIndex && nIndex <= SUBSUP_NUM_ENTRIES);

        std::unique_ptr<SmNode> xENode;
        if (aSubNodes[nIndex]) // if already occupied at earlier iteration
        {
            // forget the earlier one, remember an error instead
            aSubNodes[nIndex].reset();
            xENode = DoError(SmParseError::DoubleSubsupscript); // this also skips current token.
        }
        else
            NextToken(); // skip sub-/supscript token

        // get sub-/supscript node
        std::unique_ptr<SmNode> xSNode;
        xSNode = DoTerm(true);

        aSubNodes[nIndex] = std::move(xENode ? xENode : xSNode);
    }

    pNode->SetSubNodes(buildNodeArray(aSubNodes));
    return pNode;
}

std::unique_ptr<SmNode> SmParser5::DoOpSubSup()
{
    DepthProtect aDepthGuard(m_nParseDepth);

    // get operator symbol
    auto xNode = std::make_unique<SmMathSymbolNode>(m_aCurToken);
    xNode->SetSelection(m_aCurESelection);
    // skip operator token
    NextToken();
    // get sub- supscripts if any
    if (m_aCurToken.nGroup == TG::Power)
        return DoSubSup(TG::Power, std::move(xNode));
    return xNode;
}

std::unique_ptr<SmNode> SmParser5::DoPower()
{
    DepthProtect aDepthGuard(m_nParseDepth);

    // get body for sub- supscripts on top of stack
    std::unique_ptr<SmNode> xNode(DoTerm(false));

    if (m_aCurToken.nGroup == TG::Power)
        return DoSubSup(TG::Power, std::move(xNode));
    return xNode;
}

std::unique_ptr<SmBlankNode> SmParser5::DoBlank()
{
    DepthProtect aDepthGuard(m_nParseDepth);

    assert(TokenInGroup(TG::Blank));
    std::unique_ptr<SmBlankNode> pBlankNode(new SmBlankNode(m_aCurToken));
    pBlankNode->SetSelection(m_aCurESelection);

    do
    {
        pBlankNode->IncreaseBy(m_aCurToken);
        NextToken();
    } while (TokenInGroup(TG::Blank));

    // Ignore trailing spaces, if corresponding option is set
    if (m_aCurToken.eType == TNEWLINE
        || (m_aCurToken.eType == TEND && !comphelper::IsFuzzing()
            && SmModule::get()->GetConfig()->IsIgnoreSpacesRight()))
    {
        pBlankNode->Clear();
    }
    return pBlankNode;
}

std::unique_ptr<SmNode> SmParser5::DoTerm(bool bGroupNumberIdent)
{
    DepthProtect aDepthGuard(m_nParseDepth);

    switch (m_aCurToken.eType)
    {
        case TESCAPE:
            return DoEscape();

        case TNOSPACE:
        case TLGROUP:
        {
            bool bNoSpace = m_aCurToken.eType == TNOSPACE;
            if (bNoSpace)
                NextToken();
            if (m_aCurToken.eType != TLGROUP)
                return DoTerm(false); // nospace is no longer concerned

            NextToken();

            // allow for empty group
            if (m_aCurToken.eType == TRGROUP)
            {
                std::unique_ptr<SmStructureNode> xSNode(new SmExpressionNode(m_aCurToken));
                xSNode->SetSelection(m_aCurESelection);
                xSNode->SetSubNodes(nullptr, nullptr);

                NextToken();
                return std::unique_ptr<SmNode>(xSNode.release());
            }

            auto pNode = DoAlign(!bNoSpace);
            if (m_aCurToken.eType == TRGROUP)
            {
                NextToken();
                return pNode;
            }
            auto xSNode = std::make_unique<SmExpressionNode>(m_aCurToken);
            xSNode->SetSelection(m_aCurESelection);
            std::unique_ptr<SmNode> xError(DoError(SmParseError::RgroupExpected));
            xSNode->SetSubNodes(std::move(pNode), std::move(xError));
            return std::unique_ptr<SmNode>(xSNode.release());
        }

        case TLEFT:
            return DoBrace();
        case TEVALUATE:
            return DoEvaluate();

        case TBLANK:
        case TSBLANK:
            return DoBlank();

        case TTEXT:
        {
            auto pNode = std::make_unique<SmTextNode>(m_aCurToken, FNT_TEXT);
            pNode->SetSelection(m_aCurESelection);
            NextToken();
            return std::unique_ptr<SmNode>(pNode.release());
        }
        case TCHARACTER:
        {
            auto pNode = std::make_unique<SmTextNode>(m_aCurToken, FNT_VARIABLE);
            pNode->SetSelection(m_aCurESelection);
            NextToken();
            return std::unique_ptr<SmNode>(pNode.release());
        }
        case TIDENT:
        case TNUMBER:
        {
            auto pTextNode = std::make_unique<SmTextNode>(
                m_aCurToken, m_aCurToken.eType == TNUMBER ? FNT_NUMBER : FNT_VARIABLE);
            pTextNode->SetSelection(m_aCurESelection);
            if (!bGroupNumberIdent)
            {
                NextToken();
                return std::unique_ptr<SmNode>(pTextNode.release());
            }
            std::vector<std::unique_ptr<SmNode>> aNodes;
            // Some people want to be able to write "x_2n" for "x_{2n}"
            // although e.g. LaTeX or AsciiMath interpret that as "x_2 n".
            // The tokenizer skips whitespaces so we need some additional
            // work to distinguish from "x_2 n".
            // See https://bz.apache.org/ooo/show_bug.cgi?id=11752 and
            // https://bugs.libreoffice.org/show_bug.cgi?id=55853
            sal_Int32 nBufLen = m_aBufferString.getLength();

            // We need to be careful to call NextToken() only after having
            // tested for a whitespace separator (otherwise it will be
            // skipped!)
            bool moveToNextToken = true;
            while (m_nBufferIndex < nBufLen
                   && m_pSysCC->getType(m_aBufferString, m_nBufferIndex)
                          != UnicodeType::SPACE_SEPARATOR)
            {
                NextToken();
                if (m_aCurToken.eType != TNUMBER && m_aCurToken.eType != TIDENT)
                {
                    // Neither a number nor an identifier. We just moved to
                    // the next token, so no need to do that again.
                    moveToNextToken = false;
                    break;
                }
                aNodes.emplace_back(std::unique_ptr<SmNode>(new SmTextNode(
                    m_aCurToken, m_aCurToken.eType == TNUMBER ? FNT_NUMBER : FNT_VARIABLE)));
            }
            if (moveToNextToken)
                NextToken();
            if (aNodes.empty())
                return std::unique_ptr<SmNode>(pTextNode.release());
            // We have several concatenated identifiers and numbers.
            // Let's group them into one SmExpressionNode.
            aNodes.insert(aNodes.begin(), std::move(pTextNode));
            std::unique_ptr<SmExpressionNode> xNode(new SmExpressionNode(SmToken()));
            xNode->SetSubNodes(buildNodeArray(aNodes));
            return std::unique_ptr<SmNode>(xNode.release());
        }
        case TLEFTARROW:
        case TRIGHTARROW:
        case TUPARROW:
        case TDOWNARROW:
        case TCIRC:
        case TDRARROW:
        case TDLARROW:
        case TDLRARROW:
        case TEXISTS:
        case TNOTEXISTS:
        case TFORALL:
        case TPARTIAL:
        case TNABLA:
        case TLAPLACE:
        case TFOURIER:
        case TTOWARD:
        case TDOTSAXIS:
        case TDOTSDIAG:
        case TDOTSDOWN:
        case TDOTSLOW:
        case TDOTSUP:
        case TDOTSVERT:
        {
            auto pNode = std::make_unique<SmMathSymbolNode>(m_aCurToken);
            pNode->SetSelection(m_aCurESelection);
            NextToken();
            return std::unique_ptr<SmNode>(pNode.release());
        }

        case TSETN:
        case TSETZ:
        case TSETQ:
        case TSETR:
        case TSETC:
        case THBAR:
        case TLAMBDABAR:
        case TBACKEPSILON:
        case TALEPH:
        case TIM:
        case TRE:
        case TWP:
        case TEMPTYSET:
        case TINFINITY:
        {
            auto pNode = std::make_unique<SmMathIdentifierNode>(m_aCurToken);
            pNode->SetSelection(m_aCurESelection);
            NextToken();
            return std::unique_ptr<SmNode>(pNode.release());
        }

        case TPLACE:
        {
            auto pNode = std::make_unique<SmPlaceNode>(m_aCurToken);
            pNode->SetSelection(m_aCurESelection);
            NextToken();
            return std::unique_ptr<SmNode>(pNode.release());
        }

        case TSPECIAL:
            return DoSpecial();

        case TBINOM:
            return DoBinom();

        case TFRAC:
            return DoFrac();

        case TSTACK:
            return DoStack();

        case TMATRIX:
            return DoMatrix();

        case THEX:
            NextTokenFontSize();
            if (m_aCurToken.eType == THEX)
            {
                auto pTextNode = std::make_unique<SmTextNode>(m_aCurToken, FNT_NUMBER);
                pTextNode->SetSelection(m_aCurESelection);
                NextToken();
                return pTextNode;
            }
            else
                return DoError(SmParseError::NumberExpected);
        default:
            if (TokenInGroup(TG::LBrace))
                return DoBrace();
            if (TokenInGroup(TG::Oper))
                return DoOperator();
            if (TokenInGroup(TG::UnOper))
                return DoUnOper();
            if (TokenInGroup(TG::Attribute) || TokenInGroup(TG::FontAttr))
            {
                std::stack<std::unique_ptr<SmStructureNode>,
                           std::vector<std::unique_ptr<SmStructureNode>>>
                    aStack;
                bool bIsAttr;
                for (;;)
                {
                    bIsAttr = TokenInGroup(TG::Attribute);
                    if (!bIsAttr && !TokenInGroup(TG::FontAttr))
                        break;
                    aStack.push(bIsAttr ? DoAttribute() : DoFontAttribute());
                }

                auto xFirstNode = DoPower();
                while (!aStack.empty())
                {
                    std::unique_ptr<SmStructureNode> xNode = std::move(aStack.top());
                    aStack.pop();
                    xNode->SetSubNodes(nullptr, std::move(xFirstNode));
                    xFirstNode = std::move(xNode);
                }
                return xFirstNode;
            }
            if (TokenInGroup(TG::Function))
                return DoFunction();
            return DoError(SmParseError::UnexpectedChar);
    }
}

std::unique_ptr<SmNode> SmParser5::DoEscape()
{
    DepthProtect aDepthGuard(m_nParseDepth);

    NextToken();

    switch (m_aCurToken.eType)
    {
        case TLPARENT:
        case TRPARENT:
        case TLBRACKET:
        case TRBRACKET:
        case TLDBRACKET:
        case TRDBRACKET:
        case TLBRACE:
        case TLGROUP:
        case TRBRACE:
        case TRGROUP:
        case TLANGLE:
        case TRANGLE:
        case TLCEIL:
        case TRCEIL:
        case TLFLOOR:
        case TRFLOOR:
        case TLLINE:
        case TRLINE:
        case TLDLINE:
        case TRDLINE:
        {
            auto pNode = std::make_unique<SmMathSymbolNode>(m_aCurToken);
            pNode->SetSelection(m_aCurESelection);
            NextToken();
            return std::unique_ptr<SmNode>(pNode.release());
        }
        default:
            return DoError(SmParseError::UnexpectedToken);
    }
}

std::unique_ptr<SmOperNode> SmParser5::DoOperator()
{
    DepthProtect aDepthGuard(m_nParseDepth);

    assert(TokenInGroup(TG::Oper));

    auto xSNode = std::make_unique<SmOperNode>(m_aCurToken);
    xSNode->SetSelection(m_aCurESelection);

    // get operator
    auto xOperator = DoOper();

    if (m_aCurToken.nGroup == TG::Limit || m_aCurToken.nGroup == TG::Power)
        xOperator = DoSubSup(m_aCurToken.nGroup, std::move(xOperator));

    // get argument
    auto xArg = DoPower();

    xSNode->SetSubNodes(std::move(xOperator), std::move(xArg));
    return xSNode;
}

std::unique_ptr<SmNode> SmParser5::DoOper()
{
    DepthProtect aDepthGuard(m_nParseDepth);

    SmTokenType eType(m_aCurToken.eType);
    std::unique_ptr<SmNode> pNode;

    switch (eType)
    {
        case TSUM:
        case TPROD:
        case TCOPROD:
        case TINT:
        case TINTD:
        case TIINT:
        case TIIINT:
        case TLINT:
        case TLLINT:
        case TLLLINT:
            pNode.reset(new SmMathSymbolNode(m_aCurToken));
            pNode->SetSelection(m_aCurESelection);
            break;

        case TLIM:
        case TLIMSUP:
        case TLIMINF:
        case THADD:
        case TNAHA:
            if (eType == TLIMSUP)
                m_aCurToken.aText = u"lim sup"_ustr;
            else if (eType == TLIMINF)
                m_aCurToken.aText = u"lim inf"_ustr;
            else if (eType == TNAHA)
                m_aCurToken.aText = u"نها"_ustr;
            else if (eType == THADD)
                m_aCurToken.aText = OUString(&MS_HADD, 1);
            else
                m_aCurToken.aText = u"lim"_ustr;
            pNode.reset(new SmTextNode(m_aCurToken, FNT_TEXT));
            pNode->SetSelection(m_aCurESelection);
            break;

        case TOPER:
            NextToken();
            OSL_ENSURE(m_aCurToken.eType == TSPECIAL, "Sm: wrong token");
            m_aCurToken.eType = TOPER;
            pNode.reset(new SmGlyphSpecialNode(m_aCurToken));
            pNode->SetSelection(m_aCurESelection);
            break;

        default:
            assert(false && "unknown case");
    }

    NextToken();
    return pNode;
}

std::unique_ptr<SmStructureNode> SmParser5::DoUnOper()
{
    DepthProtect aDepthGuard(m_nParseDepth);

    assert(TokenInGroup(TG::UnOper));

    SmToken aNodeToken = m_aCurToken;
    ESelection aESelection = m_aCurESelection;
    SmTokenType eType = m_aCurToken.eType;
    bool bIsPostfix = eType == TFACT;

    std::unique_ptr<SmStructureNode> xSNode;
    std::unique_ptr<SmNode> xOper;
    std::unique_ptr<SmNode> xExtra;
    std::unique_ptr<SmNode> xArg;

    switch (eType)
    {
        case TABS:
        case TSQRT:
            NextToken();
            break;

        case TNROOT:
            NextToken();
            xExtra = DoPower();
            break;

        case TUOPER:
            NextToken();
            //Let the glyph know what it is...
            m_aCurToken.eType = TUOPER;
            m_aCurToken.nGroup = TG::UnOper;
            xOper = DoGlyphSpecial();
            break;

        case TPLUS:
        case TMINUS:
        case TPLUSMINUS:
        case TMINUSPLUS:
        case TNEG:
        case TFACT:
            xOper = DoOpSubSup();
            break;

        default:
            assert(false);
    }

    // get argument
    xArg = DoPower();

    if (eType == TABS)
    {
        xSNode.reset(new SmBraceNode(aNodeToken));
        xSNode->SetSelection(aESelection);
        xSNode->SetScaleMode(SmScaleMode::Height);

        // build nodes for left & right lines
        // (text, group, level of the used token are of no interest here)
        // we'll use row & column of the keyword for abs
        aNodeToken.eType = TABS;

        aNodeToken.setChar(MS_VERTLINE);
        std::unique_ptr<SmNode> xLeft(new SmMathSymbolNode(aNodeToken));
        xLeft->SetSelection(aESelection);
        std::unique_ptr<SmNode> xRight(new SmMathSymbolNode(aNodeToken));
        xRight->SetSelection(aESelection);

        xSNode->SetSubNodes(std::move(xLeft), std::move(xArg), std::move(xRight));
    }
    else if (eType == TSQRT || eType == TNROOT)
    {
        xSNode.reset(new SmRootNode(aNodeToken));
        xSNode->SetSelection(aESelection);
        xOper.reset(new SmRootSymbolNode(aNodeToken));
        xOper->SetSelection(aESelection);
        xSNode->SetSubNodes(std::move(xExtra), std::move(xOper), std::move(xArg));
    }
    else
    {
        xSNode.reset(new SmUnHorNode(aNodeToken));
        xSNode->SetSelection(aESelection);
        if (bIsPostfix)
            xSNode->SetSubNodes(std::move(xArg), std::move(xOper));
        else
        {
            // prefix operator
            xSNode->SetSubNodes(std::move(xOper), std::move(xArg));
        }
    }
    return xSNode;
}

std::unique_ptr<SmStructureNode> SmParser5::DoAttribute()
{
    DepthProtect aDepthGuard(m_nParseDepth);

    assert(TokenInGroup(TG::Attribute));

    auto xSNode = std::make_unique<SmAttributeNode>(m_aCurToken);
    xSNode->SetSelection(m_aCurESelection);
    std::unique_ptr<SmNode> xAttr;
    SmScaleMode eScaleMode = SmScaleMode::None;

    // get appropriate node for the attribute itself
    switch (m_aCurToken.eType)
    {
        case TUNDERLINE:
        case TOVERLINE:
        case TOVERSTRIKE:
            xAttr.reset(new SmRectangleNode(m_aCurToken));
            xAttr->SetSelection(m_aCurESelection);
            eScaleMode = SmScaleMode::Width;
            break;

        case TWIDEVEC:
        case TWIDEHARPOON:
        case TWIDEHAT:
        case TWIDETILDE:
            xAttr.reset(new SmMathSymbolNode(m_aCurToken));
            xAttr->SetSelection(m_aCurESelection);
            eScaleMode = SmScaleMode::Width;
            break;

        default:
            xAttr.reset(new SmMathSymbolNode(m_aCurToken));
            xAttr->SetSelection(m_aCurESelection);
    }

    NextToken();

    xSNode->SetSubNodes(std::move(xAttr), nullptr); // the body will be filled later
    xSNode->SetScaleMode(eScaleMode);
    return xSNode;
}

std::unique_ptr<SmStructureNode> SmParser5::DoFontAttribute()
{
    DepthProtect aDepthGuard(m_nParseDepth);

    assert(TokenInGroup(TG::FontAttr));

    switch (m_aCurToken.eType)
    {
        case TITALIC:
        case TNITALIC:
        case TBOLD:
        case TNBOLD:
        case TPHANTOM:
        {
            auto pNode = std::make_unique<SmFontNode>(m_aCurToken);
            pNode->SetSelection(m_aCurESelection);
            NextToken();
            return pNode;
        }

        case TSIZE:
            return DoFontSize();

        case TFONT:
            return DoFont();

        case TCOLOR:
            return DoColor();

        default:
            assert(false);
            return {};
    }
}

std::unique_ptr<SmStructureNode> SmParser5::DoColor()
{
    DepthProtect aDepthGuard(m_nParseDepth);

    assert(m_aCurToken.eType == TCOLOR);
    sal_Int32 nBufferIndex = m_nBufferIndex;
    NextTokenColor(TCOLOR);
    SmToken aToken;
    ESelection aESelection;

    if (m_aCurToken.eType == TDVIPSNAMESCOL)
        NextTokenColor(TDVIPSNAMESCOL);
    if (m_aCurToken.eType == TERROR)
        return DoError(SmParseError::ColorExpected);
    if (TokenInGroup(TG::Color))
    {
        aToken = m_aCurToken;
        aESelection = m_aCurESelection;
        if (m_aCurToken.eType == TRGB) //loads r, g and b
        {
            sal_uInt32 nr, ng, nb, nc;
            NextTokenFontSize();
            if (lcl_IsNotWholeNumber(m_aCurToken.aText))
                return DoError(SmParseError::ColorExpected);
            nr = m_aCurToken.aText.toUInt32();
            if (nr > 255)
                return DoError(SmParseError::ColorExpected);
            NextTokenFontSize();
            if (lcl_IsNotWholeNumber(m_aCurToken.aText))
                return DoError(SmParseError::ColorExpected);
            ng = m_aCurToken.aText.toUInt32();
            if (ng > 255)
                return DoError(SmParseError::ColorExpected);
            NextTokenFontSize();
            if (lcl_IsNotWholeNumber(m_aCurToken.aText))
                return DoError(SmParseError::ColorExpected);
            nb = m_aCurToken.aText.toUInt32();
            if (nb > 255)
                return DoError(SmParseError::ColorExpected);
            nc = nb | ng << 8 | nr << 16 | sal_uInt32(0) << 24;
            aToken.cMathChar = OUString::number(nc, 16);
        }
        else if (m_aCurToken.eType == TRGBA) //loads r, g and b
        {
            sal_uInt32 nr, na, ng, nb, nc;
            NextTokenFontSize();
            if (lcl_IsNotWholeNumber(m_aCurToken.aText))
                return DoError(SmParseError::ColorExpected);
            nr = m_aCurToken.aText.toUInt32();
            if (nr > 255)
                return DoError(SmParseError::ColorExpected);
            NextTokenFontSize();
            if (lcl_IsNotWholeNumber(m_aCurToken.aText))
                return DoError(SmParseError::ColorExpected);
            ng = m_aCurToken.aText.toUInt32();
            if (ng > 255)
                return DoError(SmParseError::ColorExpected);
            NextTokenFontSize();
            if (lcl_IsNotWholeNumber(m_aCurToken.aText))
                return DoError(SmParseError::ColorExpected);
            nb = m_aCurToken.aText.toUInt32();
            if (nb > 255)
                return DoError(SmParseError::ColorExpected);
            NextTokenFontSize();
            if (lcl_IsNotWholeNumber(m_aCurToken.aText))
                return DoError(SmParseError::ColorExpected);
            na = m_aCurToken.aText.toUInt32();
            if (na > 255)
                return DoError(SmParseError::ColorExpected);
            nc = nb | ng << 8 | nr << 16 | na << 24;
            aToken.cMathChar = OUString::number(nc, 16);
        }
        else if (m_aCurToken.eType == THEX) //loads hex code
        {
            sal_uInt32 nc;
            NextTokenFontSize();
            if (lcl_IsNotWholeNumber16(m_aCurToken.aText))
                return DoError(SmParseError::ColorExpected);
            nc = m_aCurToken.aText.toUInt32(16);
            aToken.cMathChar = OUString::number(nc, 16);
        }
        aToken.aText = m_aBufferString.subView(nBufferIndex, m_nBufferIndex - nBufferIndex);
        NextToken();
    }
    else
        return DoError(SmParseError::ColorExpected);

    std::unique_ptr<SmStructureNode> xNode;
    xNode.reset(new SmFontNode(aToken));
    xNode->SetSelection(aESelection);
    return xNode;
}

std::unique_ptr<SmStructureNode> SmParser5::DoFont()
{
    DepthProtect aDepthGuard(m_nParseDepth);

    assert(m_aCurToken.eType == TFONT);

    std::unique_ptr<SmStructureNode> xNode;
    // last font rules, get that one
    SmToken aToken;
    ESelection aESelection = m_aCurESelection;
    do
    {
        NextToken();

        if (TokenInGroup(TG::Font))
        {
            aToken = m_aCurToken;
            NextToken();
        }
        else
        {
            return DoError(SmParseError::FontExpected);
        }
    } while (m_aCurToken.eType == TFONT);

    xNode.reset(new SmFontNode(aToken));
    xNode->SetSelection(aESelection);
    return xNode;
}

std::unique_ptr<SmStructureNode> SmParser5::DoFontSize()
{
    DepthProtect aDepthGuard(m_nParseDepth);
    std::unique_ptr<SmFontNode> pFontNode(new SmFontNode(m_aCurToken));
    pFontNode->SetSelection(m_aCurESelection);
    NextTokenFontSize();
    FontSizeType Type;

    switch (m_aCurToken.eType)
    {
        case THEX:
            Type = FontSizeType::ABSOLUT;
            break;
        case TPLUS:
            Type = FontSizeType::PLUS;
            break;
        case TMINUS:
            Type = FontSizeType::MINUS;
            break;
        case TMULTIPLY:
            Type = FontSizeType::MULTIPLY;
            break;
        case TDIVIDEBY:
            Type = FontSizeType::DIVIDE;
            break;

        default:
            return DoError(SmParseError::SizeExpected);
    }

    if (Type != FontSizeType::ABSOLUT)
    {
        NextTokenFontSize();
        if (m_aCurToken.eType != THEX)
            return DoError(SmParseError::SizeExpected);
    }

    // get number argument
    Fraction aValue(1);
    if (lcl_IsNumber(m_aCurToken.aText))
    {
        aValue = m_aCurToken.aText.toDouble();
        //!! Reduce values in order to avoid numerical errors
        if (aValue.GetDenominator() > 1000)
        {
            tools::Long nNum = aValue.GetNumerator();
            tools::Long nDenom = aValue.GetDenominator();
            while (nDenom > 1000) //remove big denominator
            {
                nNum /= 10;
                nDenom /= 10;
            }
            aValue = Fraction(nNum, nDenom);
        }
    }
    else
        return DoError(SmParseError::SizeExpected);

    pFontNode->SetSizeParameter(aValue, Type);
    NextToken();
    return pFontNode;
}

std::unique_ptr<SmStructureNode> SmParser5::DoBrace()
{
    DepthProtect aDepthGuard(m_nParseDepth);

    assert(m_aCurToken.eType == TLEFT || TokenInGroup(TG::LBrace));

    std::unique_ptr<SmStructureNode> xSNode(new SmBraceNode(m_aCurToken));
    xSNode->SetSelection(m_aCurESelection);
    std::unique_ptr<SmNode> pBody, pLeft, pRight;
    SmScaleMode eScaleMode = SmScaleMode::None;
    SmParseError eError = SmParseError::None;

    if (m_aCurToken.eType == TLEFT)
    {
        NextToken();

        eScaleMode = SmScaleMode::Height;

        // check for left bracket
        if (TokenInGroup(TG::LBrace) || TokenInGroup(TG::RBrace))
        {
            pLeft.reset(new SmMathSymbolNode(m_aCurToken));
            pLeft->SetSelection(m_aCurESelection);

            NextToken();
            pBody = DoBracebody(true);

            if (m_aCurToken.eType == TRIGHT)
            {
                NextToken();

                // check for right bracket
                if (TokenInGroup(TG::LBrace) || TokenInGroup(TG::RBrace))
                {
                    pRight.reset(new SmMathSymbolNode(m_aCurToken));
                    pRight->SetSelection(m_aCurESelection);
                    NextToken();
                }
                else
                    eError = SmParseError::RbraceExpected;
            }
            else
                eError = SmParseError::RightExpected;
        }
        else
            eError = SmParseError::LbraceExpected;
    }
    else
    {
        assert(TokenInGroup(TG::LBrace));

        pLeft.reset(new SmMathSymbolNode(m_aCurToken));
        pLeft->SetSelection(m_aCurESelection);

        NextToken();
        pBody = DoBracebody(false);

        SmTokenType eExpectedType = TUNKNOWN;
        switch (pLeft->GetToken().eType)
        {
            case TLPARENT:
                eExpectedType = TRPARENT;
                break;
            case TLBRACKET:
                eExpectedType = TRBRACKET;
                break;
            case TLBRACE:
                eExpectedType = TRBRACE;
                break;
            case TLDBRACKET:
                eExpectedType = TRDBRACKET;
                break;
            case TLLINE:
                eExpectedType = TRLINE;
                break;
            case TLDLINE:
                eExpectedType = TRDLINE;
                break;
            case TLANGLE:
                eExpectedType = TRANGLE;
                break;
            case TLFLOOR:
                eExpectedType = TRFLOOR;
                break;
            case TLCEIL:
                eExpectedType = TRCEIL;
                break;
            case TLRLINE:
                eExpectedType = TLRLINE;
                break;
            case TLRDLINE:
                eExpectedType = TLRDLINE;
                break;
            default:
                SAL_WARN("starmath", "unknown case");
        }

        if (m_aCurToken.eType == eExpectedType)
        {
            pRight.reset(new SmMathSymbolNode(m_aCurToken));
            pRight->SetSelection(m_aCurESelection);
            NextToken();
        }
        else
            eError = SmParseError::ParentMismatch;
    }

    if (eError == SmParseError::None)
    {
        assert(pLeft);
        assert(pRight);
        xSNode->SetSubNodes(std::move(pLeft), std::move(pBody), std::move(pRight));
        xSNode->SetScaleMode(eScaleMode);
        return xSNode;
    }
    return DoError(eError);
}

std::unique_ptr<SmBracebodyNode> SmParser5::DoBracebody(bool bIsLeftRight)
{
    DepthProtect aDepthGuard(m_nParseDepth);

    auto pBody = std::make_unique<SmBracebodyNode>(m_aCurToken);
    pBody->SetSelection(m_aCurESelection);

    std::vector<std::unique_ptr<SmNode>> aNodes;
    // get body if any
    if (bIsLeftRight)
    {
        do
        {
            if (m_aCurToken.eType == TMLINE)
            {
                SmMathSymbolNode* pTempNode = new SmMathSymbolNode(m_aCurToken);
                pTempNode->SetSelection(m_aCurESelection);
                aNodes.emplace_back(std::unique_ptr<SmMathSymbolNode>(pTempNode));
                NextToken();
            }
            else if (m_aCurToken.eType != TRIGHT)
            {
                aNodes.push_back(DoAlign());
                if (m_aCurToken.eType != TMLINE && m_aCurToken.eType != TRIGHT)
                    aNodes.emplace_back(DoError(SmParseError::RightExpected));
            }
        } while (m_aCurToken.eType != TEND && m_aCurToken.eType != TRIGHT);
    }
    else
    {
        do
        {
            if (m_aCurToken.eType == TMLINE)
            {
                SmMathSymbolNode* pTempNode = new SmMathSymbolNode(m_aCurToken);
                pTempNode->SetSelection(m_aCurESelection);
                aNodes.emplace_back(std::unique_ptr<SmMathSymbolNode>(pTempNode));
                NextToken();
            }
            else if (!TokenInGroup(TG::RBrace))
            {
                aNodes.push_back(DoAlign());
                if (m_aCurToken.eType != TMLINE && !TokenInGroup(TG::RBrace))
                    aNodes.emplace_back(DoError(SmParseError::RbraceExpected));
            }
        } while (m_aCurToken.eType != TEND && !TokenInGroup(TG::RBrace));
    }

    pBody->SetSubNodes(buildNodeArray(aNodes));
    pBody->SetScaleMode(bIsLeftRight ? SmScaleMode::Height : SmScaleMode::None);
    return pBody;
}

std::unique_ptr<SmNode> SmParser5::DoEvaluate()
{
    DepthProtect aDepthGuard(m_nParseDepth);

    // Create node
    std::unique_ptr<SmStructureNode> xSNode(new SmBraceNode(m_aCurToken));
    xSNode->SetSelection(m_aCurESelection);
    SmToken aToken(TRLINE, MS_VERTLINE, u"evaluate"_ustr, TG::RBrace, 5);

    // Parse body && left none
    NextToken();
    std::unique_ptr<SmNode> pBody = DoPower();
    SmToken bToken(TNONE, '\0', u""_ustr, TG::LBrace, 5);
    std::unique_ptr<SmNode> pLeft;
    pLeft.reset(new SmMathSymbolNode(bToken));

    // Mount nodes
    std::unique_ptr<SmNode> pRight;
    pRight.reset(new SmMathSymbolNode(aToken));
    xSNode->SetSubNodes(std::move(pLeft), std::move(pBody), std::move(pRight));
    xSNode->SetScaleMode(SmScaleMode::Height); // scalable line

    // Parse from to
    if (m_aCurToken.nGroup == TG::Limit)
    {
        std::unique_ptr<SmNode> rSNode;
        rSNode = DoSubSupEvaluate(std::move(xSNode));
        rSNode->GetToken().eType = TEVALUATE;
        return rSNode;
    }

    return xSNode;
}

std::unique_ptr<SmTextNode> SmParser5::DoFunction()
{
    DepthProtect aDepthGuard(m_nParseDepth);

    if (m_aCurToken.eType == TFUNC)
    {
        NextToken(); // skip "FUNC"-statement
        m_aCurToken.eType = TFUNC;
        m_aCurToken.nGroup = TG::Function;
    }
    auto pNode = std::make_unique<SmTextNode>(m_aCurToken, FNT_FUNCTION);
    pNode->SetSelection(m_aCurESelection);
    NextToken();
    return pNode;
}

std::unique_ptr<SmTableNode> SmParser5::DoBinom()
{
    DepthProtect aDepthGuard(m_nParseDepth);

    auto xSNode = std::make_unique<SmTableNode>(m_aCurToken);
    xSNode->SetSelection(m_aCurESelection);

    NextToken();

    auto xFirst = DoSum();
    auto xSecond = DoSum();
    xSNode->SetSubNodes(std::move(xFirst), std::move(xSecond));
    return xSNode;
}

std::unique_ptr<SmBinVerNode> SmParser5::DoFrac()
{
    DepthProtect aDepthGuard(m_nParseDepth);

    std::unique_ptr<SmBinVerNode> xSNode = std::make_unique<SmBinVerNode>(m_aCurToken);
    xSNode->SetSelection(m_aCurESelection);
    std::unique_ptr<SmNode> xOper = std::make_unique<SmRectangleNode>(m_aCurToken);
    xOper->SetSelection(m_aCurESelection);

    NextToken();

    auto xFirst = DoSum();
    auto xSecond = DoSum();
    xSNode->SetSubNodes(std::move(xFirst), std::move(xOper), std::move(xSecond));
    return xSNode;
}

std::unique_ptr<SmStructureNode> SmParser5::DoStack()
{
    DepthProtect aDepthGuard(m_nParseDepth);

    std::unique_ptr<SmStructureNode> xSNode(new SmTableNode(m_aCurToken));
    xSNode->SetSelection(m_aCurESelection);
    NextToken();
    if (m_aCurToken.eType != TLGROUP)
        return DoError(SmParseError::LgroupExpected);
    std::vector<std::unique_ptr<SmNode>> aExprArr;
    do
    {
        NextToken();
        aExprArr.push_back(DoAlign());
    } while (m_aCurToken.eType == TPOUND);

    if (m_aCurToken.eType == TRGROUP)
        NextToken();
    else
        aExprArr.emplace_back(DoError(SmParseError::RgroupExpected));

    xSNode->SetSubNodes(buildNodeArray(aExprArr));
    return xSNode;
}

std::unique_ptr<SmStructureNode> SmParser5::DoMatrix()
{
    DepthProtect aDepthGuard(m_nParseDepth);

    std::unique_ptr<SmMatrixNode> xMNode(new SmMatrixNode(m_aCurToken));
    xMNode->SetSelection(m_aCurESelection);
    NextToken();
    if (m_aCurToken.eType != TLGROUP)
        return DoError(SmParseError::LgroupExpected);

    std::vector<std::unique_ptr<SmNode>> aExprArr;
    do
    {
        NextToken();
        aExprArr.push_back(DoAlign());
    } while (m_aCurToken.eType == TPOUND);

    size_t nCol = aExprArr.size();
    size_t nRow = 1;
    while (m_aCurToken.eType == TDPOUND)
    {
        NextToken();
        for (size_t i = 0; i < nCol; i++)
        {
            auto xNode = DoAlign();
            if (i < (nCol - 1))
            {
                if (m_aCurToken.eType == TPOUND)
                    NextToken();
                else
                    xNode = DoError(SmParseError::PoundExpected);
            }
            aExprArr.emplace_back(std::move(xNode));
        }
        ++nRow;
    }

    if (m_aCurToken.eType == TRGROUP)
        NextToken();
    else
    {
        std::unique_ptr<SmNode> xENode(DoError(SmParseError::RgroupExpected));
        if (aExprArr.empty())
            nRow = nCol = 1;
        else
            aExprArr.pop_back();
        aExprArr.emplace_back(std::move(xENode));
    }

    xMNode->SetSubNodes(buildNodeArray(aExprArr));
    xMNode->SetRowCol(static_cast<sal_uInt16>(nRow), static_cast<sal_uInt16>(nCol));
    return std::unique_ptr<SmStructureNode>(xMNode.release());
}

std::unique_ptr<SmSpecialNode> SmParser5::DoSpecial()
{
    DepthProtect aDepthGuard(m_nParseDepth);

    bool bReplace = false;
    OUString& rName = m_aCurToken.aText;
    OUString aNewName;

    // conversion of symbol names for 6.0 (XML) file format
    // (name change on import / export.
    // UI uses localized names XML file format does not.)
    if (rName.startsWith("%"))
    {
        if (IsImportSymbolNames())
        {
            const SmSym* pSym
                = SmModule::get()->GetSymbolManager().GetSymbolByExportName(rName.subView(1));
            if (pSym)
            {
                aNewName = pSym->GetUiName();
                bReplace = true;
            }
        }
        else if (IsExportSymbolNames())
        {
            const SmSym* pSym
                = SmModule::get()->GetSymbolManager().GetSymbolByUiName(rName.subView(1));
            if (pSym)
            {
                aNewName = pSym->GetExportName();
                bReplace = true;
            }
        }
    }
    if (!aNewName.isEmpty())
        aNewName = "%" + aNewName;

    if (bReplace && !aNewName.isEmpty() && rName != aNewName)
    {
        Replace(GetTokenIndex(), rName.getLength(), aNewName);
        rName = aNewName;
    }

    // add symbol name to list of used symbols
    const OUString aSymbolName(m_aCurToken.aText.copy(1));
    if (!aSymbolName.isEmpty())
        m_aUsedSymbols.insert(aSymbolName);

    auto pNode = std::make_unique<SmSpecialNode>(m_aCurToken);
    pNode->SetSelection(m_aCurESelection);
    NextToken();
    return pNode;
}

std::unique_ptr<SmGlyphSpecialNode> SmParser5::DoGlyphSpecial()
{
    DepthProtect aDepthGuard(m_nParseDepth);

    auto pNode = std::make_unique<SmGlyphSpecialNode>(m_aCurToken);
    NextToken();
    return pNode;
}

std::unique_ptr<SmExpressionNode> SmParser5::DoError(SmParseError eError)
{
    DepthProtect aDepthGuard(m_nParseDepth);

    // Generate error node
    m_aCurToken.eType = TERROR;
    // Identify error message
    m_aCurToken.cMathChar = SmResId(RID_ERR_IDENT) + starmathdatabase::getParseErrorDesc(eError);
    auto xSNode = std::make_unique<SmExpressionNode>(m_aCurToken);
    SmErrorNode* pErr(new SmErrorNode(m_aCurToken));
    pErr->SetSelection(m_aCurESelection);
    xSNode->SetSubNode(0, pErr);

    // Append error to the error list
    SmErrorDesc aErrDesc(eError, xSNode.get(), m_aCurToken.cMathChar);
    m_aErrDescList.push_back(aErrDesc);

    NextToken();

    return xSNode;
}

// end grammar

SmParser5::SmParser5()
    : m_nCurError(0)
    , m_nBufferIndex(0)
    , m_nTokenIndex(0)
    , m_nRow(0)
    , m_nColOff(0)
    , m_bImportSymNames(false)
    , m_bExportSymNames(false)
    , m_nParseDepth(0)
    , m_aNumCC(LanguageTag(LANGUAGE_ENGLISH_US))
    , m_pSysCC(&SmModule::get()->GetSysLocale().GetCharClass())
{
}

SmParser5::~SmParser5() {}

std::unique_ptr<SmTableNode> SmParser5::Parse(const OUString& rBuffer)
{
    m_aUsedSymbols.clear();

    m_aBufferString = convertLineEnd(rBuffer, LINEEND_LF);
    m_nBufferIndex = 0;
    m_nTokenIndex = 0;
    m_nRow = 0;
    m_nColOff = 0;
    m_nCurError = -1;

    m_aErrDescList.clear();

    NextToken();
    return DoTable();
}

std::unique_ptr<SmNode> SmParser5::ParseExpression(const OUString& rBuffer)
{
    m_aBufferString = convertLineEnd(rBuffer, LINEEND_LF);
    m_nBufferIndex = 0;
    m_nTokenIndex = 0;
    m_nRow = 0;
    m_nColOff = 0;
    m_nCurError = -1;

    m_aErrDescList.clear();

    NextToken();
    return DoExpression();
}

const SmErrorDesc* SmParser5::NextError()
{
    if (!m_aErrDescList.empty())
        if (m_nCurError > 0)
            return &m_aErrDescList[--m_nCurError];
        else
        {
            m_nCurError = 0;
            return &m_aErrDescList[m_nCurError];
        }
    else
        return nullptr;
}

const SmErrorDesc* SmParser5::PrevError()
{
    if (!m_aErrDescList.empty())
        if (m_nCurError < static_cast<int>(m_aErrDescList.size() - 1))
            return &m_aErrDescList[++m_nCurError];
        else
        {
            m_nCurError = static_cast<int>(m_aErrDescList.size() - 1);
            return &m_aErrDescList[m_nCurError];
        }
    else
        return nullptr;
}

const SmErrorDesc* SmParser5::GetError() const
{
    if (m_aErrDescList.empty())
        return nullptr;
    return &m_aErrDescList.front();
}

/* vim:set shiftwidth=4 softtabstop=4 expandtab: */

Messung V0.5 in Prozent

¤ Dauer der Verarbeitung: 0.54 Sekunden (vorverarbeitet am 2026-05-05) ¤

Wurzel

Suchen

Beweissystem der NASA

Beweissystem Isabelle

NIST Cobol Testsuite

Cephes Mathematical Library

Wiener Entwicklungsmethode

Haftungshinweis

Die Informationen auf dieser Webseite wurden nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit, noch Qualität der bereit gestellten Informationen zugesichert.

Bemerkung:

Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.