/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ /* * This file is part of the LibreOffice project. * * This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. * * This file incorporates work covered by the following license notice: * * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed * with this work for additional information regarding copyright * ownership. The ASF licenses this file to you under the Apache * License, Version 2.0 (the "License"); you may not use this file * except in compliance with the License. You may obtain a copy of * the License at http://www.apache.org/licenses/LICENSE-2.0 .
*/
/** Unescapes line-ending characters in input string. These characters are encoded as pairs of characters: '\\' 'n', resp. '\\' 'r'. This function converts them back to '\n', resp. '\r'.
*/
OString lcl_unescapeLineFeeds(std::string_view i_rStr)
{ const size_t nOrigLen(i_rStr.size()); constchar* const pOrig(i_rStr.data());
std::unique_ptr<char[]> pBuffer(newchar[nOrigLen + 1]);
constchar* pRead(pOrig); char* pWrite(pBuffer.get()); constchar* pCur(pOrig); while ((pCur = strchr(pCur, '\\')) != nullptr)
{ constchar cNext(pCur[1]); if (cNext == 'n' || cNext == 'r' || cNext == '\\')
{ const size_t nLen(pCur - pRead);
strncpy(pWrite, pRead, nLen);
pWrite += nLen;
*pWrite = cNext == 'n' ? '\n' : (cNext == 'r' ? '\r' : '\\');
++pWrite;
pCur = pRead = pCur + 2;
} else
{ // Just continue on the next character. The current // block will be copied the next time it goes through the // 'if' branch.
++pCur;
}
} // maybe there are some data to copy yet if (sal::static_int_cast<size_t>(pRead - pOrig) < nOrigLen)
{ const size_t nLen(nOrigLen - (pRead - pOrig));
strncpy(pWrite, pRead, nLen);
pWrite += nLen;
}
*pWrite = '\0';
// have two control points before us. the current one // is a normal point - thus, convert previous points // into bezier segment const sal_uInt32 nPoints( aSubPath.count() ); const basegfx::B2DPoint aCtrlA( aSubPath.getB2DPoint(nPoints-3) ); const basegfx::B2DPoint aCtrlB( aSubPath.getB2DPoint(nPoints-2) ); const basegfx::B2DPoint aEnd( aSubPath.getB2DPoint(nPoints-1) );
aSubPath.remove(nPoints-3, 3);
aSubPath.appendBezierSegment(aCtrlA, aCtrlB, aEnd);
/* Parse and convert the font family name (passed from xpdfimport) to correct font names e.g. TimesNewRomanPSMT -> TimesNewRoman TimesNewRomanPS-BoldMT -> TimesNewRoman TimesNewRomanPS-BoldItalicMT -> TimesNewRoman During the conversion, also apply the font features (bold italic etc) to the result.
TODO: Further convert the font names to real font names in the system rather than the PS names. e.g., TimesNewRoman -> Times New Roman
*/ void LineParser::parseFontFamilyName( FontAttributes& rResult )
{
SAL_INFO("sdext.pdfimport", "Processing " << rResult.familyName << " ---");
rResult.familyName = rResult.familyName.trim(); for (const OUString& fontAttributesSuffix: fontAttributesSuffixes)
{ if ( rResult.familyName.endsWith(fontAttributesSuffix) )
{
rResult.familyName = rResult.familyName.replaceAll(fontAttributesSuffix, "");
SAL_INFO("sdext.pdfimport", rResult.familyName); if (fontAttributesSuffix == u"Heavy" || fontAttributesSuffix == u"Black")
{
rResult.fontWeight = u"900"_ustr;
} elseif (fontAttributesSuffix == u"ExtraBold" || fontAttributesSuffix == u"UltraBold")
{
rResult.fontWeight = u"800"_ustr;
} elseif (fontAttributesSuffix == u"Bold")
{
rResult.fontWeight = u"bold"_ustr;
} elseif (fontAttributesSuffix == u"Semibold")
{
rResult.fontWeight = u"600"_ustr;
} elseif (fontAttributesSuffix == u"Medium")
{
rResult.fontWeight = u"500"_ustr;
} elseif (fontAttributesSuffix == u"Normal" || fontAttributesSuffix == u"Regular" || fontAttributesSuffix == u"Book")
{
rResult.fontWeight = u"400"_ustr;
} elseif (fontAttributesSuffix == u"Light")
{
rResult.fontWeight = u"300"_ustr;
} elseif (fontAttributesSuffix == u"ExtraLight" || fontAttributesSuffix == u"UltraLight")
{
rResult.fontWeight = u"200"_ustr;
} elseif (fontAttributesSuffix == u"Thin")
{
rResult.fontWeight = u"100"_ustr;
}
if ( (fontAttributesSuffix == "Italic") or (fontAttributesSuffix == "Oblique") )
{
rResult.isItalic = true;
}
}
}
}
void LineParser::readFont()
{ /* xpdf line is like (separated by space): updateFont <FontID> <isEmbedded> <maFontWeight> <isItalic> <isUnderline> <TransformedFontSize> <nEmbedSize> <FontName> updateFont 14 1 4 0 0 1200.000000 23068 TimesNewRomanPSMT
If nEmbedSize > 0, then a fontFile is followed as a stream.
*/
sal_Int64 nFontID;
sal_Int32 nIsEmbedded;
sal_Int32 nFontWeight;
sal_Int32 nIsItalic;
sal_Int32 nIsUnderline; double nSize;
sal_Int32 nFileLen;
OString aFontName;
nSize = nSize < 0.0 ? -nSize : nSize; // Read FontName. From the current position to the end (any white spaces will be included).
aFontName = lcl_unescapeLineFeeds(m_aLine.substr(m_nCharIndex));
// name gobbles up rest of line
m_nCharIndex = std::string_view::npos;
// Check if this font is already in our font map list. // If yes, update the font size and skip.
Parser::FontMapType::const_iterator pFont( m_parser.m_aFontMap.find(nFontID) ); if( pFont != m_parser.m_aFontMap.end() )
{
OSL_PRECOND(nFileLen==0,"font data for known font");
FontAttributes aRes(pFont->second);
aRes.size = nSize;
m_parser.m_pSink->setFont( aRes );
return;
}
// The font is not yet in the map list - get info and add to map
OUString sFontWeight; // font weight name per ODF specifications if (nFontWeight == 0 or nFontWeight == 4) // WeightNotDefined or W400, map to normal font
sFontWeight = u"normal"_ustr; elseif (nFontWeight == 1) // W100, Thin
sFontWeight = u"100"_ustr; elseif (nFontWeight == 2) // W200, Extra-Light
sFontWeight = u"200"_ustr; elseif (nFontWeight == 3) // W300, Light
sFontWeight = u"300"_ustr; elseif (nFontWeight == 5) // W500, Medium. Is this supported by ODF?
sFontWeight = u"500"_ustr; elseif (nFontWeight == 6) // W600, Semi-Bold
sFontWeight = u"600"_ustr; elseif (nFontWeight == 7) // W700, Bold
sFontWeight = u"bold"_ustr; elseif (nFontWeight == 8) // W800, Extra-Bold
sFontWeight = u"800"_ustr; elseif (nFontWeight == 9) // W900, Black
sFontWeight = u"900"_ustr;
SAL_INFO("sdext.pdfimport", "Font weight passed from xpdfimport is: " << sFontWeight);
/* The above font attributes (fontName, fontWeight, italic) are based on xpdf line output and may not be reliable. To get correct attributes, we do the following: 1. Read the embedded font file and determine the attributes based on the font file. 2. If we failed to read the font file, or empty result is returned, then determine the font attributes from the font name. 3. If all these attempts have failed, then use a fallback font.
*/ if (nFileLen > 0)
{
uno::Sequence<sal_Int8> aFontFile(nFileLen);
readBinaryData(aFontFile); // Read fontFile.
if (!aFontReadResult.GetFamilyName().isEmpty()) // font detection successful
{ // Family name
aResult.familyName = aFontReadResult.GetFamilyName();
SAL_INFO("sdext.pdfimport", aResult.familyName); // tdf#143959: there are cases when the family name returned by font descriptor // is like "AAAAAA+TimesNewRoman,Bold". In this case, use the font name // determined by parseFontFamilyName instead, but still determine the font // attributes (bold italic etc) from the font descriptor. if (aResult.familyName.getLength() > 7 and aResult.familyName.indexOf(u"+", 6) == 6)
{
aResult.familyName = aResult.familyName.copy(7, aResult.familyName.getLength() - 7);
parseFontFamilyName(aResult);
} if (aResult.familyName.endsWithIgnoreAsciiCase("-VKana"))
{
parseFontFamilyName(aResult);
}
// Font weight if (aFontReadResult.GetWeightMaybeAskConfig() == WEIGHT_THIN)
aResult.fontWeight = u"100"_ustr; elseif (aFontReadResult.GetWeightMaybeAskConfig() == WEIGHT_ULTRALIGHT)
aResult.fontWeight = u"200"_ustr; elseif (aFontReadResult.GetWeightMaybeAskConfig() == WEIGHT_LIGHT)
aResult.fontWeight = u"300"_ustr; elseif (aFontReadResult.GetWeightMaybeAskConfig() == WEIGHT_SEMILIGHT)
aResult.fontWeight = u"350"_ustr; // no need to check "normal" here as this is default in nFontWeight above elseif (aFontReadResult.GetWeightMaybeAskConfig() == WEIGHT_SEMIBOLD)
aResult.fontWeight = u"600"_ustr; elseif (aFontReadResult.GetWeightMaybeAskConfig() == WEIGHT_BOLD)
aResult.fontWeight = u"bold"_ustr; elseif (aFontReadResult.GetWeightMaybeAskConfig() == WEIGHT_ULTRABOLD)
aResult.fontWeight = u"800"_ustr; elseif (aFontReadResult.GetWeightMaybeAskConfig() == WEIGHT_BLACK)
aResult.fontWeight = u"900"_ustr;
SAL_INFO("sdext.pdfimport", aResult.fontWeight);
// Italic
aResult.isItalic = (aFontReadResult.GetItalicMaybeAskConfig() == ITALIC_OBLIQUE ||
aFontReadResult.GetItalicMaybeAskConfig() == ITALIC_NORMAL);
} else// font detection failed
{
SAL_WARN("sdext.pdfimport", "Font detection from fontFile returned empty result. Guessing font info from font name.");
parseFontFamilyName(aResult);
}
} else// no embedded font file - guess font attributes from font name
{
parseFontFamilyName(aResult);
}
// last fallback if (aResult.familyName.isEmpty())
{
SAL_WARN("sdext.pdfimport", "Failed to determine the font, using a fallback font Arial.");
aResult.familyName = "Arial";
}
if (!m_parser.m_xDev)
m_parser.m_xDev.disposeAndReset(VclPtr<VirtualDevice>::Create());
m_parser.m_pSink->hyperLink( aBounds,
OStringToOUString( lcl_unescapeLineFeeds(
m_aLine.substr(m_nCharIndex) ),
RTL_TEXTENCODING_UTF8 ) ); // name gobbles up rest of line
m_nCharIndex = std::string_view::npos;
}
// Read a line and return any error // Note: It skips leading \n and \r // It clears the line buffer at the start
oslFileError readLine(OStringBuffer& line)
{ char aChar('\n');
sal_uInt64 nBytesRead;
oslFileError nRes;
line.setLength(0);
// skip garbage \r \n at start of line for (;;)
{
nRes = read(&aChar, 1, &nBytesRead); if (osl_File_E_None != nRes || nBytesRead != 1 || (aChar != '\n' && aChar != '\r')) break;
} if (osl_File_E_None != nRes) return nRes;
if (aChar != '\n' && aChar != '\r')
line.append(aChar);
// Loop possibly asking for a password if needed bool bEntered = false; do
{ // Password lines are Pmypassword\n followed by "O\n" to try to open
OString aBuf = "P" + OUStringToOString(aPwd, RTL_TEXTENCODING_ISO_8859_1) + "\nO\n";
// Check for a header saying if the child managed to open the document
OStringBuffer aHeaderLine;
pBuffering = std::unique_ptr<Buffering>(new Buffering(pOut));
oslFileError eFileErr = pBuffering->readLine(aHeaderLine); if (osl_File_E_None == eFileErr)
{ auto aHeaderString = aHeaderLine.toString();
SAL_INFO("sdext.pdfimport", "Header line:" << aHeaderString); if (aHeaderString.startsWith("#OPEN"))
{ // Great - it opened! break;
}
// The only other thing we expect here is a line starting with // #ERROR: if (!aHeaderString.startsWith("#ERROR:"))
{
SAL_WARN("sdext.pdfimport", "Bad parser answer:: " << aHeaderString);
bRet = false; break;
}
if (!aHeaderString.endsWith(":ENCRYPTED"))
{ // Some other type of parser error
SAL_WARN("sdext.pdfimport", "Error from parser: " << aHeaderString);
bRet = false; break;
}
// Must be a failure to decrypt, prompt for a password unless we've // already got one (e.g. if the hybrid detect prompted for one) if (!bPasswordOnEntry)
{
bEntered = getPassword(xIHdl, aPwd, !bEntered, aDocName); if (!bEntered)
{ // User cancelled password input
SAL_INFO("sdext.pdfimport", "User cancelled password input");
bRet = false; break;
}
}
// user entered a password, just loop around again
} else
{
SAL_WARN("sdext.pdfimport", "Unable to read header line; " << eFileErr);
bRet = false;
}
} while (bRet);
if (bRet && pOut && pErr)
{ // Start the rendering by sending G command
osl_writeFile(pIn, "G\n", 2, &nWritten);
SAL_INFO("sdext.pdfimport", "Sent Go command: " << nWritten);
// read results of PDF parser. One line - one call to // OutputDev. stderr is used for alternate streams, like // embedded fonts and bitmaps
Parser aParser(rSink,pErr,xContext);
OStringBuffer line; for( ;; )
{
oslFileError nRes = pBuffering->readLine(line);
if ( osl_File_E_None != nRes ) break; if ( line.isEmpty() ) break;
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.