[poppler] poppler/Catalog.cc poppler/Catalog.h poppler/Dict.cc poppler/Dict.h poppler/FileSpec.cc poppler/FileSpec.h poppler/PDFDoc.cc poppler/Stream.cc poppler/Stream.h utils/CMakeLists.txt utils/pdfattach.1 utils/pdfattach.cc

GitLab Mirror gitlab-mirror at kemper.freedesktop.org
Thu Feb 28 16:32:51 UTC 2019


 poppler/Catalog.cc   |   81 ++++++++++++++++++++++++++++++++++++
 poppler/Catalog.h    |    8 +++
 poppler/Dict.cc      |   13 +++++
 poppler/Dict.h       |    1 
 poppler/FileSpec.cc  |   28 ++++++++++++
 poppler/FileSpec.h   |    2 
 poppler/PDFDoc.cc    |    6 ++
 poppler/Stream.cc    |    1 
 poppler/Stream.h     |    5 ++
 utils/CMakeLists.txt |    9 ++++
 utils/pdfattach.1    |   60 +++++++++++++++++++++++++++
 utils/pdfattach.cc   |  112 +++++++++++++++++++++++++++++++++++++++++++++++++++
 12 files changed, 326 insertions(+)

New commits:
commit 4402e335d6a907c3eb73708a6cd50061625d431f
Author: Albert Astals Cid <aacid at kde.org>
Date:   Sun Feb 10 10:00:32 2019 +0100

    Add new util: pdfattach

diff --git a/poppler/Catalog.cc b/poppler/Catalog.cc
index e958ad12..4955088a 100644
--- a/poppler/Catalog.cc
+++ b/poppler/Catalog.cc
@@ -432,6 +432,87 @@ FileSpec *Catalog::embeddedFile(int i)
     return embeddedFile;
 }
 
+bool Catalog::hasEmbeddedFile(const std::string &fileName)
+{
+  NameTree *ef = getEmbeddedFileNameTree();
+  for (int i = 0; i < ef->numEntries(); ++i) {
+    if (fileName == ef->getName(i)->toStr())
+      return true;
+  }
+  return false;
+}
+
+void Catalog::addEmbeddedFile(GooFile *file, const std::string &fileName)
+{
+  catalogLocker();
+
+  Object fileSpecObj = FileSpec::newFileSpecObject(xref, file, fileName);
+  const Ref fileSpecRef = xref->addIndirectObject(&fileSpecObj);
+
+  Object catDict = xref->getCatalog();
+  Ref namesObjRef;
+  Object namesObj = catDict.getDict()->lookup("Names", &namesObjRef);
+  if (!namesObj.isDict()) {
+    // Need to create the names Dict
+    catDict.dictSet("Names", Object(new Dict(xref)));
+    namesObj = catDict.getDict()->lookup("Names");
+
+    // Trigger getting the names dict again when needed
+    names = Object();
+  }
+
+  Dict *namesDict = namesObj.getDict();
+
+  // We create a new EmbeddedFiles nametree, this replaces the existing one (if any), but it's not a problem
+  Object embeddedFilesObj = Object(new Dict(xref));
+  const Ref embeddedFilesRef = xref->addIndirectObject(&embeddedFilesObj);
+
+  Array *embeddedFilesNamesArray = new Array(xref);
+
+  // This flattens out the existing EmbeddedFiles nametree (if any), should not be a problem
+  NameTree *ef = getEmbeddedFileNameTree();
+  bool fileAlreadyAdded = false;
+  for (int i = 0; i < ef->numEntries(); ++i) {
+    GooString *efNameI = ef->getName(i);
+
+    // we need to add the file if it has not been added yet and the name is smaller or equal lexicographically
+    // than the current item
+    const bool sameFileName = fileName == efNameI->toStr();
+    const bool addFile = !fileAlreadyAdded && (sameFileName || fileName < efNameI->toStr());
+    if (addFile) {
+      // If the new name is smaller lexicographically than an existing file add it in its correct position
+      embeddedFilesNamesArray->add(Object(new GooString(fileName)));
+      embeddedFilesNamesArray->add(Object(fileSpecRef));
+      fileAlreadyAdded = true;
+    }
+    if (sameFileName) {
+      // If the new name is the same lexicographically than an existing file then don't add the existing file (i.e. replace)
+      continue;
+    }
+    embeddedFilesNamesArray->add(Object(efNameI->copy()));
+    embeddedFilesNamesArray->add(ef->getValue(i)->copy());
+  }
+
+  if (!fileAlreadyAdded) {
+    // The new file is bigger lexicographically than the existing ones
+    embeddedFilesNamesArray->add(Object(new GooString(fileName)));
+    embeddedFilesNamesArray->add(Object(fileSpecRef));
+  }
+
+  embeddedFilesObj.dictSet("Names", Object(embeddedFilesNamesArray));
+  namesDict->set("EmbeddedFiles", Object(embeddedFilesRef));
+
+  if (namesObjRef.num != 0) {
+    xref->setModifiedObject(&namesObj, namesObjRef);
+  } else {
+    xref->setModifiedObject(&catDict, { xref->getRootNum(), xref->getRootGen() });
+  }
+
+  // recreate Nametree on next call that uses it
+  delete embeddedFileNameTree;
+  embeddedFileNameTree = nullptr;
+}
+
 GooString *Catalog::getJS(int i)
 {
   Object obj;
diff --git a/poppler/Catalog.h b/poppler/Catalog.h
index 97ac35a0..f4c3fe6c 100644
--- a/poppler/Catalog.h
+++ b/poppler/Catalog.h
@@ -178,6 +178,14 @@ public:
   // Get the i'th file embedded (at the Document level) in the document
   FileSpec *embeddedFile(int i);
 
+  // Is there an embedded file with the given name?
+  bool hasEmbeddedFile(const std::string &fileName);
+
+  // Adds and embeddedFile
+  // If there is already an existing embedded file with the given fileName
+  // it gets replaced, if that's not what you want check hasEmbeddedFile first
+  void addEmbeddedFile(GooFile *file, const std::string &fileName);
+
   // Get the number of javascript scripts
   int numJS() { return getJSNameTree()->numEntries(); }
   GooString *getJSName(int i) { return getJSNameTree()->getName(i); }
diff --git a/poppler/Dict.cc b/poppler/Dict.cc
index 383f7fd4..f1d6c6e1 100644
--- a/poppler/Dict.cc
+++ b/poppler/Dict.cc
@@ -168,6 +168,19 @@ Object Dict::lookup(const char *key, int recursion) const {
   return Object(objNull);
 }
 
+Object Dict::lookup(const char *key, Ref *returnRef, int recursion) const {
+  if (const auto *entry = find(key)) {
+    if (entry->second.getType() == objRef) {
+      *returnRef = entry->second.getRef();
+    } else {
+      *returnRef = { 0, 0 };
+    }
+    return entry->second.fetch(xref, recursion);
+  }
+  *returnRef = { 0, 0 };
+  return Object(objNull);
+}
+
 const Object &Dict::lookupNF(const char *key) const {
   if (const auto *entry = find(key)) {
     return entry->second;
diff --git a/poppler/Dict.h b/poppler/Dict.h
index db381310..13da4822 100644
--- a/poppler/Dict.h
+++ b/poppler/Dict.h
@@ -76,6 +76,7 @@ public:
   // Look up an entry and return the value.  Returns a null object
   // if <key> is not in the dictionary.
   Object lookup(const char *key, int recursion = 0) const;
+  Object lookup(const char *key, Ref *returnRef, int recursion = 0) const;
   const Object &lookupNF(const char *key) const;
   bool lookupInt(const char *key, const char *alt_key, int *value) const;
 
diff --git a/poppler/FileSpec.cc b/poppler/FileSpec.cc
index 463e5514..be262ad4 100644
--- a/poppler/FileSpec.cc
+++ b/poppler/FileSpec.cc
@@ -29,6 +29,7 @@
 #include <config.h>
 
 #include "FileSpec.h"
+#include "XRef.h"
 #include "goo/gfile.h"
 
 EmbFile::EmbFile(Object &&efStream)
@@ -165,6 +166,33 @@ EmbFile *FileSpec::getEmbeddedFile()
   return embFile;
 }
 
+Object FileSpec::newFileSpecObject(XRef *xref, GooFile *file, const std::string &fileName)
+{
+  Object paramsDict = Object(new Dict(xref));
+  paramsDict.dictSet("Size", Object(file->size()));
+
+  // No Subtype in the embedded file stream dictionary for now
+  Object streamDict = Object(new Dict(xref));
+  streamDict.dictSet("Length", Object(file->size()));
+  streamDict.dictSet("Params", std::move(paramsDict));
+
+  FileStream *fStream = new FileStream(file, 0, false, file->size(), std::move(streamDict));
+  fStream->setNeedsEncryptionOnSave(true);
+  Stream *stream = fStream;
+  Object streamObj = Object(stream);
+  const Ref streamRef = xref->addIndirectObject(&streamObj);
+
+  Dict *efDict = new Dict(xref);
+  efDict->set("F", Object(streamRef));
+
+  Dict *fsDict = new Dict(xref);
+  fsDict->set("Type", Object(objName, "Filespec"));
+  fsDict->set("UF", Object(new GooString(fileName)));
+  fsDict->set("EF", Object(efDict));
+
+  return Object(fsDict);
+}
+
 GooString *FileSpec::getFileNameForPlatform()
 {
   if (platformFileName)
diff --git a/poppler/FileSpec.h b/poppler/FileSpec.h
index a4fcc725..f205ac32 100644
--- a/poppler/FileSpec.h
+++ b/poppler/FileSpec.h
@@ -62,6 +62,8 @@ public:
   const GooString *getDescription() const { return desc; }
   EmbFile *getEmbeddedFile();
 
+  static Object newFileSpecObject(XRef *xref, GooFile *file, const std::string &fileName);
+
 private:
   bool ok;
 
diff --git a/poppler/PDFDoc.cc b/poppler/PDFDoc.cc
index 75f169e8..26842f84 100644
--- a/poppler/PDFDoc.cc
+++ b/poppler/PDFDoc.cc
@@ -1469,6 +1469,12 @@ void PDFDoc::writeObject (Object* obj, OutStream* outStr, XRef *xRef, unsigned i
           writeDictionnary (stream->getDict(),outStr, xRef, numOffset, fileKey, encAlgorithm, keyLength, objNum, objGen, alreadyWrittenDicts);
           writeStream (stream,outStr);
           delete encStream;
+        } else if (fileKey != nullptr && stream->getKind() == strFile && static_cast<FileStream*>(stream)->getNeedsEncryptionOnSave()) {
+          EncryptStream *encStream = new EncryptStream(stream, fileKey, encAlgorithm, keyLength, objNum, objGen);
+          encStream->setAutoDelete(false);
+          writeDictionnary (encStream->getDict(), outStr, xRef, numOffset, fileKey, encAlgorithm, keyLength, objNum, objGen, alreadyWrittenDicts);
+          writeStream (encStream, outStr);
+          delete encStream;
         } else {
           //raw stream copy
           FilterStream *fs = dynamic_cast<FilterStream*>(stream);
diff --git a/poppler/Stream.cc b/poppler/Stream.cc
index a41435ab..99ccbd22 100644
--- a/poppler/Stream.cc
+++ b/poppler/Stream.cc
@@ -759,6 +759,7 @@ FileStream::FileStream(GooFile* fileA, Goffset startA, bool limitedA,
   bufPos = start;
   savePos = 0;
   saved = false;
+  needsEncryptionOnSave = false;
 }
 
 FileStream::~FileStream() {
diff --git a/poppler/Stream.h b/poppler/Stream.h
index f6aa41cd..851d2fe3 100644
--- a/poppler/Stream.h
+++ b/poppler/Stream.h
@@ -470,6 +470,9 @@ public:
   int getUnfilteredChar () override { return getChar(); }
   void unfilteredReset () override { reset(); }
 
+  bool getNeedsEncryptionOnSave() const { return needsEncryptionOnSave; }
+  void setNeedsEncryptionOnSave(bool needsEncryptionOnSaveA) { needsEncryptionOnSave = needsEncryptionOnSaveA; }
+
 private:
 
   bool fillBuf();
@@ -508,6 +511,8 @@ private:
   Goffset bufPos;
   Goffset savePos;
   bool saved;
+  bool needsEncryptionOnSave;   // Needed for FileStreams that point to "external" files
+                                // and thus when saving we can't do a raw copy
 };
 
 //------------------------------------------------------------------------
diff --git a/utils/CMakeLists.txt b/utils/CMakeLists.txt
index 3516479e..3a1e0d1a 100644
--- a/utils/CMakeLists.txt
+++ b/utils/CMakeLists.txt
@@ -57,6 +57,15 @@ target_link_libraries(pdfdetach ${common_libs})
 install(TARGETS pdfdetach DESTINATION bin)
 install(FILES pdfdetach.1 DESTINATION ${CMAKE_INSTALL_MANDIR}/man1)
 
+# pdfdetach
+set(pdfattach_SOURCES ${common_srcs}
+  pdfattach.cc
+)
+add_executable(pdfattach ${pdfattach_SOURCES})
+target_link_libraries(pdfattach ${common_libs})
+install(TARGETS pdfattach DESTINATION bin)
+install(FILES pdfattach.1 DESTINATION ${CMAKE_INSTALL_MANDIR}/man1)
+
 # pdffonts
 set(pdffonts_SOURCES ${common_srcs}
   pdffonts.cc
diff --git a/utils/pdfattach.1 b/utils/pdfattach.1
new file mode 100644
index 00000000..c9589a78
--- /dev/null
+++ b/utils/pdfattach.1
@@ -0,0 +1,60 @@
+.\" Copyright 2019 Albert Astals Cid
+.TH pdfattach 1 "10 Febuary 2019"
+.SH NAME
+pdfattach \- Portable Document Format (PDF) document embedded file
+creator (version 3.03)
+.SH SYNOPSIS
+.B pdfattach
+[options]
+.I input-PDF-file file-to-attach output-PDF-file
+.SH DESCRIPTION
+.B Pdfattach
+adds a new embedded file (attachment) to an existing Portable
+Document Format (PDF) file.
+.SH OPTIONS
+.TP
+.B \-replace
+Replace embedded file with same name (if it exists)
+.TP
+.B \-v
+Print copyright and version information.
+.TP
+.B \-h
+Print usage information.
+.RB ( \-help
+and
+.B \-\-help
+are equivalent.)
+.SH EXIT CODES
+.TP
+0
+No error.
+.TP
+1
+Error opening input PDF file.
+.TP
+2
+Error opening file to attach.
+.TP
+3
+Output file already exists.
+.TP
+3
+There is already an attached file with that name.
+.TP
+5
+Error saving the output file.
+.SH AUTHOR
+The pdfattach software and documentation are copyright 2019 The Poppler developers
+.SH "SEE ALSO"
+.BR pdfdetach (1),
+.BR pdfimages (1),
+.BR pdfinfo (1),
+.BR pdftocairo (1),
+.BR pdftohtml (1),
+.BR pdftoppm (1),
+.BR pdftops (1),
+.BR pdftotext (1)
+.BR pdfseparate (1),
+.BR pdfsig (1),
+.BR pdfunite (1)
diff --git a/utils/pdfattach.cc b/utils/pdfattach.cc
new file mode 100644
index 00000000..e4f40a32
--- /dev/null
+++ b/utils/pdfattach.cc
@@ -0,0 +1,112 @@
+//========================================================================
+//
+// pdfattach.cc
+//
+// This file is licensed under the GPLv2 or later
+//
+// Copyright (C) 2019 Albert Astals Cid <aacid at kde.org>
+//
+// To see a description of the changes please see the Changelog file that
+// came with your tarball or type make ChangeLog if you are building from git
+//
+//========================================================================
+
+#include "config.h"
+#include <poppler-config.h>
+#include "gbasename.h"
+#include "parseargs.h"
+#include "GlobalParams.h"
+#include "PDFDoc.h"
+#include "PDFDocFactory.h"
+#include "Error.h"
+#include "ErrorCodes.h"
+#include "Win32Console.h"
+
+static bool doReplace = false;
+static bool printVersion = false;
+static bool printHelp = false;
+
+static ArgDesc argDesc[] = {
+  {"-replace",   argFlag,     &doReplace,        0,
+   "replace embedded file with same name (if it exists)"},
+  {"-v",      argFlag,     &printVersion,  0,
+   "print copyright and version info"},
+  {"-h",      argFlag,     &printHelp,     0,
+   "print usage information"},
+  {"-help",   argFlag,     &printHelp,     0,
+   "print usage information"},
+  {"--help",  argFlag,     &printHelp,     0,
+   "print usage information"},
+  {"-?",      argFlag,     &printHelp,     0,
+   "print usage information"},
+  { }
+};
+
+static bool fileExists(const char *filePath)
+{
+  FILE *f = openFile(filePath, "r");
+  if (f != nullptr) {
+    fclose(f);
+    return true;
+  }
+  return false;
+}
+
+int main(int argc, char *argv[]) {
+  Win32Console win32Console(&argc, &argv);
+
+  // parse args
+  const bool ok = parseArgs(argDesc, &argc, argv);
+  if (!ok || argc != 4 || printVersion || printHelp) {
+    fprintf(stderr, "pdfattach version %s\n", PACKAGE_VERSION);
+    fprintf(stderr, "%s\n", popplerCopyright);
+    fprintf(stderr, "%s\n", xpdfCopyright);
+    if (!printVersion) {
+      printUsage("pdfattach", "<input-PDF-file> <file-to-attach> <output-PDF-file>", argDesc);
+    }
+    return 99;
+  }
+  const GooString pdfFileName(argv[1]);
+  const GooString attachFilePath(argv[2]);
+
+  // init GlobalParams
+  auto gp = std::make_unique<GlobalParams>();
+  globalParams = gp.get();
+
+  // open PDF file
+  std::unique_ptr<PDFDoc> doc(PDFDocFactory().createPDFDoc(pdfFileName, nullptr, nullptr));
+
+  if (!doc->isOk()) {
+    fprintf(stderr, "Couldn't open %s\n", pdfFileName.c_str());
+    return 1;
+  }
+
+  std::unique_ptr<GooFile> attachFile(GooFile::open(&attachFilePath));
+  if (!attachFile) {
+    fprintf(stderr, "Couldn't open %s\n", attachFilePath.c_str());
+    return 2;
+  }
+
+  if (fileExists(argv[3])) {
+    fprintf(stderr, "File %s already exists.\n", argv[3]);
+    return 3;
+  }
+
+  const std::string attachFileName = gbasename(attachFilePath.c_str());
+
+  if (!doReplace && doc->getCatalog()->hasEmbeddedFile(attachFileName)) {
+    fprintf(stderr, "There is already an embedded file named %s.\n", attachFileName.c_str());
+    return 4;
+  }
+
+  doc->getCatalog()->addEmbeddedFile(attachFile.get(), attachFileName);
+
+  const GooString outputPdfFilePath(argv[3]);
+  const int saveResult = doc->saveAs(&outputPdfFilePath);
+  if (saveResult != errNone) {
+    fprintf(stderr, "Couldn't save the file properly.\n");
+    return 5;
+  }
+
+  return 0;
+}


More information about the poppler mailing list