Quellcodebibliothek Statistik Leitseite products/Sources/formale Sprachen/C/LibreOffice/sdext/source/pdfimport/test/   (Office von Apache Version 25.8.3.2©)  Datei vom 5.10.2025 mit Größe 18 kB image not shown  

Quelle  pdfunzip.cxx   Sprache: C

 
/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
/*
 * This file is part of the LibreOffice project.
 *
 * This Source Code Form is subject to the terms of the Mozilla Public
 * License, v. 2.0. If a copy of the MPL was not distributed with this
 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
 *
 * This file incorporates work covered by the following license notice:
 *
 *   Licensed to the Apache Software Foundation (ASF) under one or more
 *   contributor license agreements. See the NOTICE file distributed
 *   with this work for additional information regarding copyright
 *   ownership. The ASF licenses this file to you under the Apache
 *   License, Version 2.0 (the "License"); you may not use this file
 *   except in compliance with the License. You may obtain a copy of
 *   the License at http://www.apache.org/licenses/LICENSE-2.0 .
 */



#include <stdio.h>
#include <string_view>

#include <sal/main.h>
#include <osl/file.h>
#include <osl/thread.h>
#include <rtl/alloc.h>
#include <rtl/ustring.hxx>
#include <rtl/strbuf.hxx>
#include <o3tl/string_view.hxx>

#include <pdfparse.hxx>

using namespace pdfparse;


static void printHelp( const char* pExe )
{
    fprintf( stdout,
    "USAGE: %s [-h,--help]\n"
    " %s [-pw, --password ] []\n"
    " %s <-a, --extract-add-streams> [-pw, --password ] []\n"
    " %s <-f, --extract-fonts> [-pw, --password ] []\n"
    " %s <-o, --extract-objects> [:][,[:g1][,...]] [-pw, --password ] []\n"
    " -h, --help: show help\n"
    " -a, --extract-add-streams: extracts additional streams to outputfile_object\n"
    " and prints the mimetype found to stdout\n"
    " -f, --extract-fonts: extracts fonts (currently only type1 and truetype are supported\n"
    " -o, --extract-objects: extracts object streams, the syntax of the argument is comma separated\n"
    " object numbers, where object number and generation number are separated by \':\'\n"
    " an omitted generation number defaults to 0\n"
    " -pw, --password: use password for decryption\n"
    "\n"
    "note: -f, -a, -o and normal unzip operation are mutually exclusive\n"
    , pExe, pExe, pExe, pExe, pExe );
}

namespace {

class FileEmitContext : public EmitContext
{
    oslFileHandle m_aHandle;
    oslFileHandle m_aReadHandle;
    unsigned int  m_nReadLen;

    void openReadFile( const char* pOrigName );

    public:
    FileEmitContext( const char* pFileName, const char* pOrigName, const PDFContainer* pTop );
    virtual ~FileEmitContext() override;

    virtual bool write( const void* pBuf, unsigned int nLen ) noexcept override;
    virtual unsigned int getCurPos() noexcept override;
    virtual bool copyOrigBytes( unsigned int nOrigOffset, unsigned int nLen ) noexcept override;
    virtual unsigned int readOrigBytes( unsigned int nOrigOffset, unsigned int nLen, void* pBuf ) noexcept override;
};

}

FileEmitContext::FileEmitContext( const char* pFileName, const char* pOrigName, const PDFContainer* pTop )
    : EmitContext( pTop ),
      m_aHandle( nullptr ),
      m_aReadHandle( nullptr ),
      m_nReadLen( 0 )
{
    OUString aSysFile(
        OStringToOUString( std::string_view( pFileName ), osl_getThreadTextEncoding() ) );
    OUString aURL;
    if( osl_getFileURLFromSystemPath( aSysFile.pData, &aURL.pData ) != osl_File_E_None )
    {
        fprintf( stderr, "filename conversion \"%s\" failed\n", pFileName );
        return;
    }

    if( osl_openFile( aURL.pData, &m_aHandle, osl_File_OpenFlag_Write ) == osl_File_E_None )
    {
        if( osl_setFileSize( m_aHandle, 0 ) != osl_File_E_None )
        {
            fprintf( stderr, "could not truncate %s\n", pFileName );
            osl_closeFile( m_aHandle );
            m_aHandle = nullptr;
        }
    }
    else if( osl_openFile( aURL.pData, &m_aHandle,
            osl_File_OpenFlag_Write |osl_File_OpenFlag_Create ) != osl_File_E_None )
    {
        fprintf( stderr, "could not open %s\n", pFileName );
        return;
    }
    m_bDeflate = true;

    openReadFile( pOrigName );
}

FileEmitContext::~FileEmitContext()
{
    if( m_aHandle )
        osl_closeFile( m_aHandle );
    if( m_aReadHandle )
        osl_closeFile( m_aReadHandle );
}

void FileEmitContext::openReadFile( const char* pInFile )
{
    OUString aSysFile(
        OStringToOUString( std::string_view( pInFile ), osl_getThreadTextEncoding() ) );
    OUString aURL;
    if( osl_getFileURLFromSystemPath( aSysFile.pData, &aURL.pData ) != osl_File_E_None )
    {
        fprintf( stderr, "filename conversion \"%s\" failed\n", pInFile );
        return;
    }

    if( osl_openFile( aURL.pData, &m_aReadHandle, osl_File_OpenFlag_Read ) != osl_File_E_None )
    {
        fprintf( stderr, "could not open %s\n", pInFile );
        return;
    }

    if( osl_setFilePos( m_aReadHandle, osl_Pos_End, 0 ) != osl_File_E_None )
    {
        fprintf( stderr, "could not seek to end of %s\n", pInFile );
        osl_closeFile( m_aReadHandle );
        return;
    }

    sal_uInt64 nFileSize = 0;
    if( osl_getFilePos( m_aReadHandle, &nFileSize ) != osl_File_E_None )
    {
        fprintf( stderr, "could not get end pos of %s\n", pInFile );
        osl_closeFile( m_aReadHandle );
        return;
    }

    m_nReadLen = static_cast<unsigned int>(nFileSize);
}

bool FileEmitContext::write( const void* pBuf, unsigned int nLen ) noexcept
{
    if( ! m_aHandle )
        return false;

    sal_uInt64 nWrite = static_cast<sal_uInt64>(nLen);
    sal_uInt64 nWritten = 0;
    return (osl_writeFile( m_aHandle, pBuf, nWrite, &nWritten ) == osl_File_E_None)
           && nWrite == nWritten;
}

unsigned int FileEmitContext::getCurPos() noexcept
{
    sal_uInt64 nFileSize = 0;
    if( m_aHandle )
    {
        if( osl_getFilePos( m_aHandle, &nFileSize ) != osl_File_E_None )
            nFileSize = 0;
    }
    return static_cast<unsigned int>(nFileSize);
}

bool FileEmitContext::copyOrigBytes( unsigned int nOrigOffset, unsigned int nLen ) noexcept
{
    if( nOrigOffset + nLen > m_nReadLen )
        return false;

    if( osl_setFilePos( m_aReadHandle, osl_Pos_Absolut, nOrigOffset ) != osl_File_E_None )
    {
        fprintf( stderr, "could not seek to offset %u\n", nOrigOffset );
        return false;
    }
    void* pBuf = std::malloc( nLen );
    if( ! pBuf )
        return false;
    sal_uInt64 nBytesRead = 0;
    if( osl_readFile( m_aReadHandle, pBuf, nLen, &nBytesRead ) != osl_File_E_None
        || nBytesRead != static_cast<sal_uInt64>(nLen) )
    {
        fprintf( stderr, "could not read %u bytes\n", nLen );
        std::free( pBuf );
        return false;
    }
    bool bRet = write( pBuf, nLen );
    std::free( pBuf );
    return bRet;
}

unsigned int FileEmitContext::readOrigBytes( unsigned int nOrigOffset, unsigned int nLenvoid* pBuf ) noexcept
{
    if( nOrigOffset + nLen > m_nReadLen )
        return 0;

    if( osl_setFilePos( m_aReadHandle, osl_Pos_Absolut, nOrigOffset ) != osl_File_E_None )
    {
        fprintf( stderr, "could not seek to offset %u\n", nOrigOffset );
        return 0;
    }
    sal_uInt64 nBytesRead = 0;
    if( osl_readFile( m_aReadHandle, pBuf, nLen, &nBytesRead ) != osl_File_E_None )
        return 0;
    return static_cast<unsigned int>(nBytesRead);
}

typedef int(*PDFFileHdl)(const char*, const char*, PDFFile*);

static int handleFile( const char* pInFile, const char* pOutFile, const char* pPassword, PDFFileHdl pHdl )
{
    int nRet = 0;
    std::unique_ptr<PDFEntry> pEntry
        = pdfparse::PDFReader::read(OStringToOUString(pInFile, osl_getThreadTextEncoding()));
    if( pEntry )
    {
        PDFFile* pPDFFile = dynamic_cast<PDFFile*>(pEntry.get());
        if( pPDFFile )
        {
            fprintf( stdout, "have a %s PDF file\n", pPDFFile->isEncrypted() ? "encrypted" : "unencrypted" );
            if( pPassword )
                fprintf( stdout, "password %s\n",
                         pPDFFile->setupDecryptionData( pPassword ) ? "matches" : "does not match" );
            nRet = pHdl( pInFile, pOutFile, pPDFFile );
        }
        else
            nRet = 20;
    }
    return nRet;
}

static int write_unzipFile( const char* pInFile, const char* pOutFile, PDFFile* pPDFFile )
{
    FileEmitContext aContext( pOutFile, pInFile, pPDFFile );
    aContext.m_bDecrypt = pPDFFile->isEncrypted();
    pPDFFile->emit(aContext);
    return 0;
}

static int write_addStreamArray( const char* pOutFile, PDFArray* pStreams, PDFFile* pPDFFile, const char* pInFile )
{
    int nRet = 0;
    unsigned int nArrayElements = pStreams->m_aSubElements.size();
    forunsigned int i = 0; i < nArrayElements-1 && nRet == 0; i++ )
    {
        PDFName* pMimeType = dynamic_cast<PDFName*>(pStreams->m_aSubElements[i].get());
        PDFObjectRef* pStreamRef = dynamic_cast<PDFObjectRef*>(pStreams->m_aSubElements[i+1].get());
        if( ! pMimeType )
            fprintf( stderr, "error: no mimetype element\n" );
        if( ! pStreamRef )
            fprintf( stderr, "error: no stream ref element\n" );
        if( pMimeType && pStreamRef )
        {
            fprintf( stdout, "found stream %d %d with mimetype %s\n",
                     pStreamRef->m_nNumber, pStreamRef->m_nGeneration,
                     pMimeType->m_aName.getStr() );
            PDFObject* pObject = pPDFFile->findObject( pStreamRef->m_nNumber, pStreamRef->m_nGeneration );
            if( pObject )
            {
                OString aOutStream = pOutFile +
                    OString::Concat("_stream_") +
                    OString::number( sal_Int32(pStreamRef->m_nNumber) ) +
                    "_" +
                    OString::number( sal_Int32(pStreamRef->m_nGeneration) );
                FileEmitContext aContext( aOutStream.getStr(), pInFile, pPDFFile );
                aContext.m_bDecrypt = pPDFFile->isEncrypted();
                pObject->writeStream( aContext, pPDFFile );
            }
            else
            {
                fprintf( stderr, "object not found\n" );
                nRet = 121;
            }
        }
        else
            nRet = 120;
    }
    return nRet;
}

static int write_addStreams( const char* pInFile, const char* pOutFile, PDFFile* pPDFFile )
{
    // find all trailers
    int nRet = 0;
    unsigned int nElements = pPDFFile->m_aSubElements.size();
    forunsigned i = 0; i < nElements && nRet == 0; i++ )
    {
        PDFTrailer* pTrailer = dynamic_cast<PDFTrailer*>(pPDFFile->m_aSubElements[i].get());
        if( pTrailer && pTrailer->m_pDict )
        {
            // search for AdditionalStreams entry
            auto add_stream = pTrailer->m_pDict->m_aMap.find( "AdditionalStreams"_ostr );
            if( add_stream != pTrailer->m_pDict->m_aMap.end() )
            {
                PDFArray* pStreams = dynamic_cast<PDFArray*>(add_stream->second);
                if( pStreams )
                    nRet = write_addStreamArray( pOutFile, pStreams, pPDFFile, pInFile );
            }
        }
    }
    return nRet;
}

static int write_fonts( const char* i_pInFile, const char* i_pOutFile, PDFFile* i_pPDFFile )
{
    unsigned int nElements = i_pPDFFile->m_aSubElements.size();
    for (unsigned i = 0; i < nElements; i++)
    {
        // search FontDescriptors
        PDFObject* pObj = dynamic_cast<PDFObject*>(i_pPDFFile->m_aSubElements[i].get());
        if( ! pObj )
            continue;
        PDFDict* pDict = dynamic_cast<PDFDict*>(pObj->m_pObject);
        if( ! pDict )
            continue;

        std::unordered_map<OString,PDFEntry*>::iterator map_it =
                pDict->m_aMap.find( "Type"_ostr );
        if( map_it == pDict->m_aMap.end() )
            continue;

        PDFName* pName = dynamic_cast<PDFName*>(map_it->second);
        if( ! pName )
            continue;
        if( pName->m_aName != "FontDescriptor" )
            continue;

        // the font name will be helpful, also there must be one in
        // a font descriptor
        map_it = pDict->m_aMap.find( "FontName"_ostr );
        if( map_it == pDict->m_aMap.end() )
            continue;
        pName = dynamic_cast<PDFName*>(map_it->second);
        if( ! pName )
            continue;
        OString aFontName( pName->m_aName );

        PDFObjectRef* pStreamRef = nullptr;
        const char* pFileType = nullptr;
        // we have a font descriptor, try for a type 1 font
        map_it = pDict->m_aMap.find( "FontFile"_ostr );
        if( map_it != pDict->m_aMap.end() )
        {
            pStreamRef = dynamic_cast<PDFObjectRef*>(map_it->second);
            if( pStreamRef )
                pFileType = "pfa";
        }

        // perhaps it's a truetype file ?
        if( ! pStreamRef )
        {
            map_it  = pDict->m_aMap.find( "FontFile2"_ostr );
            if( map_it != pDict->m_aMap.end() )
            {
                pStreamRef = dynamic_cast<PDFObjectRef*>(map_it->second);
                if( pStreamRef )
                    pFileType = "ttf";
            }
        }

        if( ! pStreamRef )
            continue;

        PDFObject* pStream = i_pPDFFile->findObject( pStreamRef );
        if( ! pStream )
            continue;

        OStringBuffer aOutStream( OString::Concat(i_pOutFile)
            + "_font_"
            + OString::number( sal_Int32(pStreamRef->m_nNumber) )
            + "_"
            + OString::number( sal_Int32(pStreamRef->m_nGeneration) )
            + "_"
            + aFontName );
        if( pFileType )
        {
            aOutStream.append( OString::Concat(".") + pFileType );
        }
        FileEmitContext aContext( aOutStream.getStr(), i_pInFile, i_pPDFFile );
        aContext.m_bDecrypt = i_pPDFFile->isEncrypted();
        pStream->writeStream( aContext, i_pPDFFile );
    }
    return 0;
}

static std::vector< std::pair< sal_Int32, sal_Int32 > > s_aEmitObjects;

static int write_objects( const char* i_pInFile, const char* i_pOutFile, PDFFile* i_pPDFFile )
{
    unsigned int nElements = s_aEmitObjects.size();
    for (unsigned i = 0; i < nElements; i++)
    {
        sal_Int32 nObject     = s_aEmitObjects[i].first;
        sal_Int32 nGeneration = s_aEmitObjects[i].second;
        PDFObject* pStream = i_pPDFFile->findObject( nObject, nGeneration );
        if( ! pStream )
        {
            fprintf( stderr, "object %d %d not found !\n"static_cast<int>(nObject), static_cast<int>(nGeneration) );
            continue;
        }

        OString aOutStream = i_pOutFile +
            OString::Concat("_stream_") +
            OString::number( nObject ) +
            "_"  +
            OString::number( nGeneration );
        FileEmitContext aContext( aOutStream.getStr(), i_pInFile, i_pPDFFile );
        aContext.m_bDecrypt = i_pPDFFile->isEncrypted();
        pStream->writeStream( aContext, i_pPDFFile );
    }
    return 0;
}

SAL_IMPLEMENT_MAIN_WITH_ARGS( argc, argv )
{
    const char* pInFile = nullptr;
    const char* pOutFile = nullptr;
    const char* pPassword = nullptr;
    OStringBuffer aOutFile( 256 );
    PDFFileHdl aHdl = write_unzipFile;

    forint nArg = 1; nArg < argc; nArg++ )
    {
        if( argv[nArg][0] == '-' )
        {
            if( ! rtl_str_compare( "-pw", argv[nArg] ) ||
                ! rtl_str_compare( "--password" , argv[nArg] ) )
            {
                if( nArg == argc-1 )
                {
                    fprintf( stderr, "no password given\n" );
                    return 1;
                }
                nArg++;
                pPassword = argv[nArg];
            }
            else if( ! rtl_str_compare( "-h", argv[nArg] ) ||
                ! rtl_str_compare( "--help", argv[nArg] ) )
            {
                printHelp( argv[0] );
                return 0;
            }
            else if( ! rtl_str_compare( "-a", argv[nArg] ) ||
                ! rtl_str_compare( "--extract-add-streams", argv[nArg] ) )
            {
                aHdl = write_addStreams;
            }
            else if( ! rtl_str_compare( "-f", argv[nArg] ) ||
                ! rtl_str_compare( "--extract-fonts", argv[nArg] ) )
            {
                aHdl = write_fonts;
            }
            else if( ! rtl_str_compare( "-o", argv[nArg] ) ||
                ! rtl_str_compare( "--extract-objects", argv[nArg] ) )
            {
                aHdl = write_objects;
                nArg++;
                if( nArg < argc )
                {
                    OString aObjs( argv[nArg] );
                    sal_Int32 nIndex = 0;
                    while( nIndex != -1 )
                    {
                        OString aToken( aObjs.getToken( 0, ',', nIndex ) );
                        sal_Int32 nObject = 0;
                        sal_Int32 nGeneration = 0;
                        sal_Int32 nGenIndex = 0;
                        nObject = o3tl::toInt32( o3tl::getToken( aToken, 0, ':', nGenIndex ) );
                        if( nGenIndex != -1 )
                            nGeneration = o3tl::toInt32( o3tl::getToken(aToken, 0, ':', nGenIndex ));
                        s_aEmitObjects.push_back( std::pair<sal_Int32,sal_Int32>(nObject,nGeneration) );
                    }
                }
            }
            else
            {
                fprintf( stderr, "unrecognized option \"%s\"\n",
                         argv[nArg] );
                printHelp( argv[0] );
                return 1;
            }
        }
        else if( pInFile == nullptr )
            pInFile = argv[nArg];
        else if( pOutFile == nullptr )
            pOutFile = argv[nArg];
    }
    if( ! pInFile )
    {
        fprintf( stderr, "no input file given\n" );
        return 10;
    }
    if( ! pOutFile )
    {
        OString aFile( pInFile );
        if( aFile.getLength() > 0 )
        {
            if( aFile.getLength() > 4 )
            {
                if( aFile.matchIgnoreAsciiCase( ".pdf", aFile.getLength()-4 ) )
                    aOutFile.append( pInFile, aFile.getLength() - 4 );
                else
                    aOutFile.append( aFile );
            }
            aOutFile.append( "_unzip.pdf" );
            pOutFile = aOutFile.getStr();
        }
        else
        {
            fprintf( stderr, "no output file given\n" );
            return 11;
        }
    }

    return handleFile( pInFile, pOutFile, pPassword, aHdl );
}

/* vim:set shiftwidth=4 softtabstop=4 expandtab: */

Messung V0.5
C=96 H=98 G=96

¤ Dauer der Verarbeitung: 0.12 Sekunden  (vorverarbeitet)  ¤

*© Formatika GbR, Deutschland






Wurzel

Suchen

Beweissystem der NASA

Beweissystem Isabelle

NIST Cobol Testsuite

Cephes Mathematical Library

Wiener Entwicklungsmethode

Haftungshinweis

Die Informationen auf dieser Webseite wurden nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit, noch Qualität der bereit gestellten Informationen zugesichert.

Bemerkung:

Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.