/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ /* * This file is part of the LibreOffice project. * * This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. * * This file incorporates work covered by the following license notice: * * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed * with this work for additional information regarding copyright * ownership. The ASF licenses this file to you under the Apache * License, Version 2.0 (the "License"); you may not use this file * except in compliance with the License. You may obtain a copy of * the License at http://www.apache.org/licenses/LICENSE-2.0 .
*/
// white space list: obtained from the fonts.config.txt of a Linux system. const sal_Unicode aWhiteSpaces[] =
{
0x0020, /* SPACE */
0x00a0, /* NO-BREAK SPACE */
0x00ad, /* SOFT HYPHEN */
0x115f, /* HANGUL CHOSEONG FILLER */
0x1160, /* HANGUL JUNGSEONG FILLER */
0x1680, /* OGHAM SPACE MARK */
0x2000, /* EN QUAD */
0x2001, /* EM QUAD */
0x2002, /* EN SPACE */
0x2003, /* EM SPACE */
0x2004, /* THREE-PER-EM SPACE */
0x2005, /* FOUR-PER-EM SPACE */
0x2006, /* SIX-PER-EM SPACE */
0x2007, /* FIGURE SPACE */
0x2008, /* PUNCTUATION SPACE */
0x2009, /* THIN SPACE */
0x200a, /* HAIR SPACE */
0x200b, /* ZERO WIDTH SPACE */
0x200c, /* ZERO WIDTH NON-JOINER */
0x200d, /* ZERO WIDTH JOINER */
0x200e, /* LEFT-TO-RIGHT MARK */
0x200f, /* RIGHT-TO-LEFT MARK */
0x2028, /* LINE SEPARATOR */
0x2029, /* PARAGRAPH SEPARATOR */
0x202a, /* LEFT-TO-RIGHT EMBEDDING */
0x202b, /* RIGHT-TO-LEFT EMBEDDING */
0x202c, /* POP DIRECTIONAL FORMATTING */
0x202d, /* LEFT-TO-RIGHT OVERRIDE */
0x202e, /* RIGHT-TO-LEFT OVERRIDE */
0x202f, /* NARROW NO-BREAK SPACE */
0x205f, /* MEDIUM MATHEMATICAL SPACE */
0x2060, /* WORD JOINER */
0x2061, /* FUNCTION APPLICATION */
0x2062, /* INVISIBLE TIMES */
0x2063, /* INVISIBLE SEPARATOR */
0x206A, /* INHIBIT SYMMETRIC SWAPPING */
0x206B, /* ACTIVATE SYMMETRIC SWAPPING */
0x206C, /* INHIBIT ARABIC FORM SHAPING */
0x206D, /* ACTIVATE ARABIC FORM SHAPING */
0x206E, /* NATIONAL DIGIT SHAPES */
0x206F, /* NOMINAL DIGIT SHAPES */
0x3000, /* IDEOGRAPHIC SPACE */
0x3164, /* HANGUL FILLER */
0xfeff, /* ZERO WIDTH NO-BREAK SPACE */
0xffa0, /* HALFWIDTH HANGUL FILLER */
0xfff9, /* INTERLINEAR ANNOTATION ANCHOR */
0xfffa, /* INTERLINEAR ANNOTATION SEPARATOR */
0xfffb /* INTERLINEAR ANNOTATION TERMINATOR */
};
// Information about reason for proofreading (ProofInfo) const sal_Int32 PROOFINFO_GET_PROOFRESULT = 1; const sal_Int32 PROOFINFO_MARK_PARAGRAPH = 2;
staticbool lcl_IsWhiteSpace( sal_Unicode cChar )
{ return std::any_of(std::begin(aWhiteSpaces), std::end(aWhiteSpaces),
[&cChar](const sal_Unicode c) { return c == cChar; });
}
static sal_Int32 lcl_SkipWhiteSpaces( const OUString &rText, sal_Int32 nStartPos )
{ // note having nStartPos point right behind the string is OK since that one // is a correct end-of-sentence position to be returned from a grammar checker...
DBG_ASSERT( 0 <= nRes && nRes <= nLen, "lcl_SkipWhiteSpaces return value out of range" ); return nRes;
}
static sal_Int32 lcl_BacktraceWhiteSpaces( const OUString &rText, sal_Int32 nStartPos )
{ // note: having nStartPos point right behind the string is OK since that one // is a correct end-of-sentence position to be returned from a grammar checker...
static lang::Locale lcl_GetPrimaryLanguageOfSentence( const uno::Reference< text::XFlatParagraph >& xFlatPara,
sal_Int32 nStartIndex )
{ //get the language of the first word return xFlatPara->getLanguageOfText( nStartIndex, 1 );
}
void GrammarCheckingIterator::TerminateThread()
{
oslThread t;
{
::osl::Guard< ::osl::Mutex > aGuard( MyMutex() );
t = m_thread;
m_thread = nullptr;
m_bEnd = true;
m_aWakeUpThread.set();
} if (t != nullptr)
{
osl_joinWithThread(t);
osl_destroyThread(t);
} // After m_bEnd was used to flag lcl_workerfunc to quit, now // reset it so lcl_workerfunc could be relaunched later.
{
::osl::Guard< ::osl::Mutex > aGuard( MyMutex() );
m_bEnd = false;
}
}
OUString GrammarCheckingIterator::GetOrCreateDocId( const uno::Reference< lang::XComponent > &xComponent )
{ // internal method; will always be called with locked mutex
void GrammarCheckingIterator::AddEntry( const uno::Reference< text::XFlatParagraphIterator >& xFlatParaIterator, const uno::Reference< text::XFlatParagraph >& xFlatPara, const OUString & rDocId,
sal_Int32 nStartIndex, bool bAutomatic )
{ // we may not need/have a xFlatParaIterator (e.g. if checkGrammarAtPos was called) // but we always need a xFlatPara... if (!xFlatPara.is()) return;
// add new entry to the end of this queue
::osl::Guard< ::osl::Mutex > aGuard( MyMutex() ); if (!m_thread)
m_thread = osl_createThread( lcl_workerfunc, this );
m_aFPEntriesQueue.push_back( aNewFPEntry );
// wake up the thread in order to do grammar checking
m_aWakeUpThread.set();
}
void GrammarCheckingIterator::ProcessResult( const linguistic2::ProofreadingResult &rRes, const uno::Reference< text::XFlatParagraphIterator > &rxFlatParagraphIterator, bool bIsAutomaticChecking )
{
DBG_ASSERT( rRes.xFlatParagraph.is(), "xFlatParagraph is missing" ); //no guard necessary as no members are used bool bContinueWithNextPara = false; if (!rRes.xFlatParagraph.is() || rRes.xFlatParagraph->isModified())
{ // if paragraph was modified/deleted meanwhile continue with the next one...
bContinueWithNextPara = true;
} else// paragraph is still unchanged...
{ // mark found errors...
// the proofreader may return SPELLING but right now our core // does only handle PROOFREADING if the result is from the proofreader... // (later on we may wish to color spelling errors found by the proofreader // differently for example. But no special handling right now. if (rDesc.nType == text::TextMarkupType::SPELLCHECK)
rDesc.nType = text::TextMarkupType::PROOFREADING;
// other sentences left to be checked in this paragraph? if (rRes.nStartOfNextSentencePosition < rRes.aText.getLength())
{
AddEntry( rxFlatParagraphIterator, rRes.xFlatParagraph, rRes.aDocumentIdentifier, rRes.nStartOfNextSentencePosition, bIsAutomaticChecking );
} else// current paragraph finished
{ // set "already checked" flag for the current flat paragraph if (rRes.xFlatParagraph.is())
rRes.xFlatParagraph->setChecked( text::TextMarkupType::PROOFREADING, true );
bContinueWithNextPara = true;
}
}
if (bContinueWithNextPara)
{ // we need to continue with the next paragraph if (rxFlatParagraphIterator.is())
AddEntry(rxFlatParagraphIterator, rxFlatParagraphIterator->getNextPara(),
rRes.aDocumentIdentifier, 0, bIsAutomaticChecking);
}
}
// check supported locales for each grammarchecker if not already done if (!m_bGCServicesChecked)
{
GetConfiguredGCSvcs_Impl();
m_bGCServicesChecked = true;
}
if (constauto [aSvcImplName, oFallbackBcp47] = getServiceForLocale(rLocale);
!aSvcImplName.isEmpty()) // matching configured language found?
{ if (oFallbackBcp47)
rLocale = LanguageTag::convertToLocale(*oFallbackBcp47, false);
GCReferences_t::const_iterator aImplNameIt( m_aGCReferencesByService.find( aSvcImplName ) ); if (aImplNameIt != m_aGCReferencesByService.end()) // matching impl name found?
{
xRes = aImplNameIt->second;
} else// the service is to be instantiated here for the first time...
{ try
{ const uno::Reference< uno::XComponentContext >& xContext( comphelper::getProcessComponentContext() );
uno::Reference< linguistic2::XProofreader > xGC(
xContext->getServiceManager()->createInstanceWithContext(aSvcImplName, xContext),
uno::UNO_QUERY_THROW );
uno::Reference< linguistic2::XSupportedLocales > xSuppLoc( xGC, uno::UNO_QUERY_THROW );
//!! work-around to prevent looping if the grammar checker //!! failed to properly identify the sentence end if (aRes.nBehindEndOfSentencePosition <= nStartPos
&& aRes.nBehindEndOfSentencePosition != nSuggestedEnd)
{
SAL_WARN( "linguistic", "!! Grammarchecker failed to provide end of sentence !!");
aRes.nBehindEndOfSentencePosition = nSuggestedEnd;
}
aRes.xFlatParagraph = std::move(xFlatPara);
aRes.nStartOfSentencePosition = nStartPos;
} else
{ // no grammar checker -> no error // but we need to provide the data below in order to continue with the next sentence
aRes.aDocumentIdentifier = aCurDocId;
aRes.xFlatParagraph = std::move(xFlatPara);
aRes.aText = aCurTxt;
aRes.aLocale = std::move(aCurLocale);
aRes.nStartOfSentencePosition = nStartPos;
aRes.nBehindEndOfSentencePosition = nSuggestedEnd;
}
aRes.nStartOfNextSentencePosition
= lcl_SkipWhiteSpaces(aCurTxt, aRes.nBehindEndOfSentencePosition);
aRes.nBehindEndOfSentencePosition = lcl_BacktraceWhiteSpaces(
aCurTxt, aRes.nStartOfNextSentencePosition);
//guard has to be cleared as ProcessResult calls out of this class
} // ---- THREAD SAFE END ----
ProcessResult( aRes, xFPIterator, aFPEntryItem.m_bAutomatic );
} else
{ // the paragraph changed meanwhile... (and maybe is still edited) // thus we simply continue to ask for the next to be checked.
uno::Reference< text::XFlatParagraph > xFlatParaNext( xFPIterator->getNextPara() );
AddEntry( xFPIterator, xFlatParaNext, aCurDocId, 0, aFPEntryItem.m_bAutomatic );
}
} catch (css::uno::Exception &)
{
TOOLS_WARN_EXCEPTION("linguistic", "GrammarCheckingIterator::DequeueAndCheck ignoring");
}
}
// ---- THREAD SAFE START ----
{
::osl::Guard< ::osl::Mutex > aGuard( MyMutex() );
m_aCurCheckedDocId.clear();
} // ---- THREAD SAFE END ----
} else
{ // ---- THREAD SAFE START ----
{
::osl::Guard< ::osl::Mutex > aGuard( MyMutex() ); if (m_bEnd)
{ break;
} // Check queue state again if (m_aFPEntriesQueue.empty())
m_aWakeUpThread.reset();
} // ---- THREAD SAFE END ----
//if the queue is empty // IMPORTANT: Don't call condition.wait() with locked // mutex. Otherwise you would keep out other threads // to add entries to the queue! A condition is thread- // safe implemented.
m_aWakeUpThread.wait();
}
}
}
if (!bDoCheck) return linguistic2::ProofreadingResult();
// iterate through paragraph until we find the sentence we are interested in
linguistic2::ProofreadingResult aTmpRes;
sal_Int32 nStartPos = nStartOfSentencePos >= 0 ? nStartOfSentencePos : 0;
//!! work-around to prevent looping if the grammar checker //!! failed to properly identify the sentence end if (aTmpRes.nBehindEndOfSentencePosition <= nStartPos)
{
SAL_WARN( "linguistic", "!! Grammarchecker failed to provide end of sentence !!" );
aTmpRes.nBehindEndOfSentencePosition = nSuggestedEndOfSentencePos;
}
if ((nErrorPosInPara< 0 || nStartPos <= nErrorPosInPara) && nErrorPosInPara < nEndPos)
bFound = true;
} if (nEndPos == -1) // no result from grammar checker
nEndPos = nSuggestedEndOfSentencePos;
nStartPos = lcl_SkipWhiteSpaces( rText, nEndPos );
aTmpRes.nBehindEndOfSentencePosition = nEndPos;
aTmpRes.nStartOfNextSentencePosition = nStartPos;
aTmpRes.nBehindEndOfSentencePosition = lcl_BacktraceWhiteSpaces( rText, aTmpRes.nStartOfNextSentencePosition );
// prevent endless loop by forcefully advancing if needs be... if (nStartPos <= nOldStartOfSentencePos)
{
SAL_WARN( "linguistic", "end-of-sentence detection failed?" );
nStartPos = nOldStartOfSentencePos + 1;
}
} while (!bFound && nStartPos < rText.getLength());
if (bFound && !xFlatPara->isModified()) return aTmpRes;
return linguistic2::ProofreadingResult();
}
sal_Int32 GrammarCheckingIterator::GetSuggestedEndOfSentence( const OUString &rText,
sal_Int32 nSentenceStartPos, const lang::Locale &rLocale )
{ // internal method; will always be called with locked mutex
if (!m_xBreakIterator.is())
{ const uno::Reference< uno::XComponentContext >& xContext = ::comphelper::getProcessComponentContext();
m_xBreakIterator = i18n::BreakIterator::create(xContext);
}
sal_Int32 nTextLen = rText.getLength();
sal_Int32 nEndPosition(0);
sal_Int32 nTmpStartPos = nSentenceStartPos; do
{
sal_Int32 const nPrevEndPosition(nEndPosition);
nEndPosition = nTextLen; if (nTmpStartPos < nTextLen)
{
nEndPosition = m_xBreakIterator->endOfSentence( rText, nTmpStartPos, rLocale ); if (nEndPosition <= nPrevEndPosition)
{ // fdo#68750 if there's no progress at all then presumably // there's no end of sentence in this paragraph so just // set the end position to end of paragraph
nEndPosition = nTextLen;
}
} if (nEndPosition < 0)
nEndPosition = nTextLen;
++nTmpStartPos;
} while (nEndPosition <= nSentenceStartPos && nEndPosition < nTextLen); if (nEndPosition > nTextLen)
nEndPosition = nTextLen; return nEndPosition;
}
void SAL_CALL GrammarCheckingIterator::resetIgnoreRules( )
{ for (autoconst& elem : m_aGCReferencesByService)
{
uno::Reference< linguistic2::XProofreader > xGC(elem.second); if (xGC.is())
xGC->resetIgnoreRules();
}
}
uno::Reference< lang::XComponent > xComponent( xDoc, uno::UNO_QUERY ); if (xComponent.is())
{ // if the component was already used in one of the two calls to check text // i.e. in startGrammarChecking or checkGrammarAtPos it will be found in the // m_aDocIdMap unless the document already disposed. // If it is not found then it is not yet being checked (or requested to being checked) const DocMap_t::const_iterator aIt( m_aDocIdMap.find( xComponent.get() ) ); if (aIt != m_aDocIdMap.end())
{ // check in document is checked automatically in the background...
OUString aDocId = aIt->second; if (!m_aCurCheckedDocId.isEmpty() && m_aCurCheckedDocId == aDocId)
{ // an entry for that document was dequeued and is currently being checked.
bRes = true;
} else
{ // we need to check if there is an entry for that document in the queue... // That is the document is going to be checked sooner or later.
sal_Int32 nSize = m_aFPEntriesQueue.size(); for (sal_Int32 i = 0; i < nSize && !bRes; ++i)
{ if (aDocId == m_aFPEntriesQueue[i].m_aDocId)
bRes = true;
}
}
}
} // ---- THREAD SAFE END ----
// clear containers with UNO references AND have those references released
GCReferences_t aTmpEmpty1;
DocMap_t aTmpEmpty2;
FPQueue_t aTmpEmpty3;
m_aGCReferencesByService.swap( aTmpEmpty1 );
m_aDocIdMap.swap( aTmpEmpty2 );
m_aFPEntriesQueue.swap( aTmpEmpty3 );
} // ---- THREAD SAFE END ----
}
void SAL_CALL GrammarCheckingIterator::disposing( const lang::EventObject &rSource )
{ // if the component (document) is disposing release all references //!! There is no need to remove entries from the queue that are from this document //!! since the respectives xFlatParagraphs should become invalid (isModified() == true) //!! and the call to xFlatParagraphIterator->getNextPara() will result in an empty reference. //!! And if an entry is currently checked by a grammar checker upon return the results //!! should be ignored. //!! Also GetOrCreateDocId will not use that very same Id again... //!! All of the above resulting in that we only have to get rid of the implementation pointer here.
uno::Reference< lang::XComponent > xDoc( rSource.Source, uno::UNO_QUERY ); if (xDoc.is())
{ // ---- THREAD SAFE START ----
::osl::Guard< ::osl::Mutex > aGuard( MyMutex() );
m_aDocIdMap.erase( xDoc.get() ); // ---- THREAD SAFE END ----
}
}
for (const OUString& rElementName : aElementNames)
{
uno::Sequence< OUString > aImplNames;
uno::Any aTmp( xNA->getByName( rElementName ) ); if (aTmp >>= aImplNames)
{ if (aImplNames.hasElements())
{ // only the first entry is used, there should be only one grammar checker per language
aTmpGCImplNamesByLang[rElementName] = aImplNames[0];
}
} else
{
SAL_WARN( "linguistic", "failed to get aImplNames. Wrong type?" );
}
}
} catch (uno::Exception const &)
{
TOOLS_WARN_EXCEPTION( "linguistic", "exception caught. Failed to get configured services" );
}
OUString sBcp47 = LanguageTag::convertToBcp47(rLocale, false);
OUString aImplName; if (rSvcImplNames.hasElements())
aImplName = rSvcImplNames[0]; // there is only one grammar checker per language
if (!LinguIsUnspecified(sBcp47) && !sBcp47.isEmpty())
{ if (!aImplName.isEmpty())
m_aGCImplNamesByLang[sBcp47] = aImplName; else
m_aGCImplNamesByLang.erase(sBcp47);
}
}
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.