[poppler] Branch 'xpdf303merge' - poppler/FileSpec.cc poppler/FileSpec.h utils/CMakeLists.txt utils/Makefile.am utils/pdfdetach.1 utils/pdfdetach.cc

Carlos Garcia Campos carlosgc at kemper.freedesktop.org
Sun Sep 25 03:24:59 PDT 2011


 poppler/FileSpec.cc  |   22 +++
 poppler/FileSpec.h   |    3 
 utils/CMakeLists.txt |    9 +
 utils/Makefile.am    |    6 
 utils/pdfdetach.1    |  105 ++++++++++++++++
 utils/pdfdetach.cc   |  318 +++++++++++++++++++++++++++++++++++++++++++++++++++
 6 files changed, 463 insertions(+)

New commits:
commit bba57e588fd1ee3a61f18405d1d1bf89fceb5b96
Author: Carlos Garcia Campos <carlosgc at gnome.org>
Date:   Sun Sep 25 12:21:02 2011 +0200

    xpdf303: Added the pdfdetach tool
    
    I haven't merged xpdf code for embedded files, I think our
    implementation is better and more complete. I've adapated pdfdetach
    code to use our code and return also embedded files of file attachment
    annotations to match what xpdf does.

diff --git a/poppler/FileSpec.cc b/poppler/FileSpec.cc
index 0366acc..1360608 100644
--- a/poppler/FileSpec.cc
+++ b/poppler/FileSpec.cc
@@ -79,6 +79,28 @@ EmbFile::~EmbFile()
   m_objStr.free();
 }
 
+GBool EmbFile::save(const char *path) {
+  FILE *f;
+  GBool ret;
+
+  if (!(f = fopen(path, "wb"))) {
+    return gFalse;
+  }
+  ret = save2(f);
+  fclose(f);
+  return ret;
+}
+
+GBool EmbFile::save2(FILE *f) {
+  int c;
+
+  m_objStr.streamReset();
+  while ((c = m_objStr.streamGetChar()) != EOF) {
+    fputc(c, f);
+  }
+  return gTrue;
+}
+
 FileSpec::FileSpec(Object *fileSpecA)
 {
   ok = gTrue;
diff --git a/poppler/FileSpec.h b/poppler/FileSpec.h
index e26cdcf..9f2f6fc 100644
--- a/poppler/FileSpec.h
+++ b/poppler/FileSpec.h
@@ -33,8 +33,11 @@ public:
   GooString *mimeType() { return m_mimetype; }
   Stream *stream() { return isOk() ? m_objStr.getStream() : NULL; }
   GBool isOk() { return m_objStr.isStream(); }
+  GBool save(const char *path);
 
 private:
+  GBool save2(FILE *f);
+
   int m_size;
   GooString *m_createDate;
   GooString *m_modDate;
diff --git a/utils/CMakeLists.txt b/utils/CMakeLists.txt
index a36616d..c0d1cd3 100644
--- a/utils/CMakeLists.txt
+++ b/utils/CMakeLists.txt
@@ -43,6 +43,15 @@ if (HAVE_CAIRO)
   install(FILES pdftocairo.1 DESTINATION share/man/man1)
 endif (HAVE_CAIRO)
 
+# pdfdetach
+set(pdfdetach_SOURCES ${common_srcs}
+  pdfdetach.cc
+)
+add_executable(pdfdetach ${pdfdetach_SOURCES})
+target_link_libraries(pdfdetach ${common_libs})
+install(TARGETS pdfdetach DESTINATION bin)
+install(FILES pdfdetach.1 DESTINATION share/man/man1)
+
 # pdffonts
 set(pdffonts_SOURCES ${common_srcs}
   pdffonts.cc
diff --git a/utils/Makefile.am b/utils/Makefile.am
index ac2a15e..3a0c742 100644
--- a/utils/Makefile.am
+++ b/utils/Makefile.am
@@ -44,6 +44,7 @@ endif
 AM_LDFLAGS = @auto_import_flags@
 
 bin_PROGRAMS =					\
+	pdfdetach				\
 	pdffonts				\
 	pdfimages				\
 	pdfinfo					\
@@ -56,6 +57,7 @@ bin_PROGRAMS =					\
 	$(pdftocairo_binary)
 
 dist_man1_MANS =				\
+	pdfdetach.1				\
 	pdffonts.1				\
 	pdfimages.1				\
 	pdfinfo.1				\
@@ -69,6 +71,10 @@ dist_man1_MANS =				\
 
 common = parseargs.cc parseargs.h
 
+pdfdetach_SOURCES = 				\
+	pdfdetach.cc				\
+	$(common)
+
 pdffonts_SOURCES =				\
 	pdffonts.cc				\
 	$(common)
diff --git a/utils/pdfdetach.1 b/utils/pdfdetach.1
new file mode 100644
index 0000000..1e2f8aa
--- /dev/null
+++ b/utils/pdfdetach.1
@@ -0,0 +1,105 @@
+.\" Copyright 2011 Glyph & Cog, LLC
+.TH pdfdetach 1 "15 August 2011"
+.SH NAME
+pdfdetach \- Portable Document Format (PDF) document embedded file
+extractor (version 3.03)
+.SH SYNOPSIS
+.B pdfdetach
+[options]
+.RI [ PDF-file ]
+.SH DESCRIPTION
+.B Pdfdetach
+lists or extracts embedded files (attachments) from a Portable
+Document Format (PDF) file.
+.SH CONFIGURATION FILE
+Pdfdetach reads a configuration file at startup.  It first tries to
+find the user's private config file, ~/.xpdfrc.  If that doesn't
+exist, it looks for a system-wide config file, typically
+/usr/local/etc/xpdfrc (but this location can be changed when pdfinfo
+is built).  See the
+.BR xpdfrc (5)
+man page for details.
+.SH OPTIONS
+Some of the following options can be set with configuration file
+commands.  These are listed in square brackets with the description of
+the corresponding command line option.
+.TP
+.B \-list
+List all of the embedded files in the PDF file.  File names are
+converted to the text encoding specified by the "\-enc" switch.
+.TP
+.BI \-save " number"
+Save the specified embedded file.  By default, this uses the file name
+associated with the embedded file (as printed by the "\-list" switch);
+the file name can be changed with the "\-o" switch.
+.TP
+.BI \-saveall
+Save all of the embedded files.  This uses the file names associated
+with the embedded files (as printed by the "\-list" switch).  By
+default, the files are saved in the current directory; this can be
+changed with the "\-o" switch.
+.TP
+.BI \-o " path"
+Set the file name used when saving an embedded file with the "\-save"
+switch, or the directory used by "\-saveall".
+.TP
+.BI \-enc " encoding-name"
+Sets the encoding to use for text output (embedded file names).  The
+.I encoding\-name
+must be defined with the unicodeMap command (see
+.BR xpdfrc (5)).
+This defaults to "Latin1" (which is a built-in encoding).
+.RB "[config file: " textEncoding ]
+.TP
+.BI \-opw " password"
+Specify the owner password for the PDF file.  Providing this will
+bypass all security restrictions.
+.TP
+.BI \-upw " password"
+Specify the user password for the PDF file.
+.TP
+.BI \-cfg " config-file"
+Read
+.I config-file
+in place of ~/.xpdfrc or the system-wide config file.
+.TP
+.B \-v
+Print copyright and version information.
+.TP
+.B \-h
+Print usage information.
+.RB ( \-help
+and
+.B \-\-help
+are equivalent.)
+.SH EXIT CODES
+The Xpdf tools use the following exit codes:
+.TP
+0
+No error.
+.TP
+1
+Error opening a PDF file.
+.TP
+2
+Error opening an output file.
+.TP
+3
+Error related to PDF permissions.
+.TP
+99
+Other error.
+.SH AUTHOR
+The pdfinfo software and documentation are copyright 1996-2011 Glyph &
+Cog, LLC.
+.SH "SEE ALSO"
+.BR xpdf (1),
+.BR pdftops (1),
+.BR pdftotext (1),
+.BR pdfinfo (1),
+.BR pdffonts (1),
+.BR pdftoppm (1),
+.BR pdfimages (1),
+.BR xpdfrc (5)
+.br
+.B http://www.foolabs.com/xpdf/
diff --git a/utils/pdfdetach.cc b/utils/pdfdetach.cc
new file mode 100644
index 0000000..3fbdfb7
--- /dev/null
+++ b/utils/pdfdetach.cc
@@ -0,0 +1,318 @@
+//========================================================================
+//
+// pdfdetach.cc
+//
+// Copyright 2010 Glyph & Cog, LLC
+//
+//========================================================================
+
+//========================================================================
+//
+// Modified under the Poppler project - http://poppler.freedesktop.org
+//
+// All changes made under the Poppler project to this file are licensed
+// under GPL version 2 or later
+//
+// Copyright (C) 2011 Carlos Garcia Campos <carlosgc at gnome.org>
+//
+// To see a description of the changes please see the Changelog file that
+// came with your tarball or type make ChangeLog if you are building from git
+//
+//========================================================================
+
+#include "config.h"
+#include <poppler-config.h>
+#include <stdio.h>
+#include "goo/gtypes.h"
+#include "goo/gmem.h"
+#include "goo/GooList.h"
+#include "parseargs.h"
+#include "Annot.h"
+#include "GlobalParams.h"
+#include "Page.h"
+#include "PDFDoc.h"
+#include "PDFDocFactory.h"
+#include "FileSpec.h"
+#include "CharTypes.h"
+#include "Catalog.h"
+#include "UnicodeMap.h"
+#include "PDFDocEncoding.h"
+#include "Error.h"
+
+static GBool doList = gFalse;
+static int saveNum = 0;
+static GBool saveAll = gFalse;
+static char savePath[1024] = "";
+static char textEncName[128] = "";
+static char ownerPassword[33] = "\001";
+static char userPassword[33] = "\001";
+static char cfgFileName[256] = "";
+static GBool printVersion = gFalse;
+static GBool printHelp = gFalse;
+
+static ArgDesc argDesc[] = {
+  {"-list",   argFlag,     &doList,        0,
+   "list all embedded files"},
+  {"-save",   argInt,      &saveNum,       0,
+   "save the specified embedded file"},
+  {"-saveall", argFlag,    &saveAll,       0,
+   "save all embedded files"},
+  {"-o",      argString,   savePath,       sizeof(savePath),
+   "file name for the saved embedded file"},
+  {"-enc",    argString,   textEncName,    sizeof(textEncName),
+   "output text encoding name"},
+  {"-opw",    argString,   ownerPassword,  sizeof(ownerPassword),
+   "owner password (for encrypted files)"},
+  {"-upw",    argString,   userPassword,   sizeof(userPassword),
+   "user password (for encrypted files)"},
+  {"-cfg",        argString,      cfgFileName,    sizeof(cfgFileName),
+   "configuration file to use in place of .xpdfrc"},
+  {"-v",      argFlag,     &printVersion,  0,
+   "print copyright and version info"},
+  {"-h",      argFlag,     &printHelp,     0,
+   "print usage information"},
+  {"-help",   argFlag,     &printHelp,     0,
+   "print usage information"},
+  {"--help",  argFlag,     &printHelp,     0,
+   "print usage information"},
+  {"-?",      argFlag,     &printHelp,     0,
+   "print usage information"},
+  {NULL}
+};
+
+int main(int argc, char *argv[]) {
+  GooString *fileName;
+  UnicodeMap *uMap;
+  GooString *ownerPW, *userPW;
+  PDFDoc *doc;
+  char uBuf[8];
+  char path[1024];
+  char *p;
+  GBool ok;
+  int exitCode;
+  GooList *embeddedFiles = NULL;
+  int nFiles, nPages, n, i, j;
+  FileSpec *fileSpec;
+  Page *page;
+  Annots *annots;
+  Annot *annot;
+  GooString *s1;
+  Unicode u;
+  GBool isUnicode;
+
+  exitCode = 99;
+
+  // parse args
+  ok = parseArgs(argDesc, &argc, argv);
+  if ((doList ? 1 : 0) +
+      ((saveNum != 0) ? 1 : 0) +
+      (saveAll ? 1 : 0) != 1) {
+    ok = gFalse;
+  }
+  if (!ok || argc != 2 || printVersion || printHelp) {
+    fprintf(stderr, "pdfdetach version %s\n", PACKAGE_VERSION);
+    fprintf(stderr, "%s\n", popplerCopyright);
+    fprintf(stderr, "%s\n", xpdfCopyright);
+    if (!printVersion) {
+      printUsage("pdfdetach", "<PDF-file>", argDesc);
+    }
+    goto err0;
+  }
+  fileName = new GooString(argv[1]);
+
+  // read config file
+  globalParams = new GlobalParams(cfgFileName);
+  if (textEncName[0]) {
+    globalParams->setTextEncoding(textEncName);
+  }
+
+  // get mapping to output encoding
+  if (!(uMap = globalParams->getTextEncoding())) {
+    error(errConfig, -1, "Couldn't get text encoding");
+    delete fileName;
+    goto err1;
+  }
+
+  // open PDF file
+  if (ownerPassword[0] != '\001') {
+    ownerPW = new GooString(ownerPassword);
+  } else {
+    ownerPW = NULL;
+  }
+  if (userPassword[0] != '\001') {
+    userPW = new GooString(userPassword);
+  } else {
+    userPW = NULL;
+  }
+
+  doc = PDFDocFactory().createPDFDoc(*fileName, ownerPW, userPW);
+
+  if (userPW) {
+    delete userPW;
+  }
+  if (ownerPW) {
+    delete ownerPW;
+  }
+  if (!doc->isOk()) {
+    exitCode = 1;
+    goto err2;
+  }
+
+  embeddedFiles = new GooList();
+  for (i = 0; i < doc->getCatalog()->numEmbeddedFiles(); ++i)
+    embeddedFiles->append(doc->getCatalog()->embeddedFile(i));
+
+  nPages = doc->getCatalog()->getNumPages();
+  for (i = 0; i < nPages; ++i) {
+    page = doc->getCatalog()->getPage(i + 1);
+    annots = page->getAnnots();
+    if (!annots)
+      break;
+
+    for (j = 0; j < annots->getNumAnnots(); ++j) {
+      annot = annots->getAnnot(j);
+      if (annot->getType() != Annot::typeFileAttachment)
+        continue;
+      embeddedFiles->append(new FileSpec(static_cast<AnnotFileAttachment *>(annot)->getFile()));
+    }
+  }
+
+  nFiles = embeddedFiles->getLength();
+
+  // list embedded files
+  if (doList) {
+    printf("%d embedded files\n", nFiles);
+    for (i = 0; i < nFiles; ++i) {
+      fileSpec = static_cast<FileSpec *>(embeddedFiles->get(i));
+      printf("%d: ", i+1);
+      s1 = fileSpec->getFileName();
+      if ((s1->getChar(0) & 0xff) == 0xfe && (s1->getChar(1) & 0xff) == 0xff) {
+        isUnicode = gTrue;
+        j = 2;
+      } else {
+        isUnicode = gFalse;
+        j = 0;
+      }
+      while (j < fileSpec->getFileName()->getLength()) {
+        if (isUnicode) {
+          u = ((s1->getChar(j) & 0xff) << 8) | (s1->getChar(j+1) & 0xff);
+          j += 2;
+        } else {
+          u = pdfDocEncoding[s1->getChar(j) & 0xff];
+          ++j;
+        }
+        n = uMap->mapUnicode(u, uBuf, sizeof(uBuf));
+        fwrite(uBuf, 1, n, stdout);
+      }
+      fputc('\n', stdout);
+    }
+
+  // save all embedded files
+  } else if (saveAll) {
+    for (i = 0; i < nFiles; ++i) {
+      fileSpec = static_cast<FileSpec *>(embeddedFiles->get(i));
+      if (savePath[0]) {
+	n = strlen(savePath);
+	if (n > (int)sizeof(path) - 2) {
+	  n = sizeof(path) - 2;
+	}
+	memcpy(path, savePath, n);
+	path[n] = '/';
+	p = path + n + 1;
+      } else {
+	p = path;
+      }
+      s1 = fileSpec->getFileName();
+      if ((s1->getChar(0) & 0xff) == 0xfe && (s1->getChar(1) & 0xff) == 0xff) {
+        isUnicode = gTrue;
+        j = 2;
+      } else {
+        isUnicode = gFalse;
+        j = 0;
+      }
+      while (j < fileSpec->getFileName()->getLength()) {
+        if (isUnicode) {
+          u = ((s1->getChar(j) & 0xff) << 8) | (s1->getChar(j+1) & 0xff);
+          j += 2;
+        } else {
+          u = pdfDocEncoding[s1->getChar(j) & 0xff];
+          ++j;
+        }
+        n = uMap->mapUnicode(u, uBuf, sizeof(uBuf));
+        if (p + n >= path + sizeof(path))
+          break;
+        memcpy(p, uBuf, n);
+        p += n;
+      }
+      *p = '\0';
+
+      if (!fileSpec->getEmbeddedFile()->save(path)) {
+	error(errIO, -1, "Error saving embedded file as '{0:s}'", p);
+	exitCode = 2;
+	goto err2;
+      }
+    }
+
+  // save an embedded file
+  } else {
+    if (saveNum < 1 || saveNum > nFiles) {
+      error(errCommandLine, -1, "Invalid file number");
+      goto err2;
+    }
+
+    fileSpec = static_cast<FileSpec *>(embeddedFiles->get(saveNum - 1));
+    if (savePath[0]) {
+      p = savePath;
+    } else {
+      p = path;
+      s1 = fileSpec->getFileName();
+      if ((s1->getChar(0) & 0xff) == 0xfe && (s1->getChar(1) & 0xff) == 0xff) {
+        isUnicode = gTrue;
+        j = 2;
+      } else {
+        isUnicode = gFalse;
+        j = 0;
+      }
+      while (j < fileSpec->getFileName()->getLength()) {
+        if (isUnicode) {
+          u = ((s1->getChar(j) & 0xff) << 8) | (s1->getChar(j+1) & 0xff);
+          j += 2;
+        } else {
+          u = pdfDocEncoding[s1->getChar(j) & 0xff];
+          ++j;
+        }
+        n = uMap->mapUnicode(u, uBuf, sizeof(uBuf));
+        if (p + n >= path + sizeof(path))
+          break;
+        memcpy(p, uBuf, n);
+        p += n;
+      }
+      *p = '\0';
+      p = path;
+    }
+
+    if (!fileSpec->getEmbeddedFile()->save(p)) {
+      error(errIO, -1, "Error saving embedded file as '{0:s}'", p);
+      exitCode = 2;
+      goto err2;
+    }
+  }
+
+  exitCode = 0;
+
+  // clean up
+ err2:
+  if (embeddedFiles)
+    deleteGooList(embeddedFiles, FileSpec);
+  uMap->decRefCnt();
+  delete doc;
+ err1:
+  delete globalParams;
+ err0:
+
+  // check for memory leaks
+  Object::memCheck(stderr);
+  gMemReport(stderr);
+
+  return exitCode;
+}


More information about the poppler mailing list