[poppler] utils/CMakeLists.txt utils/.gitignore utils/Makefile.am utils/pdfextract.1 utils/pdfextract.cc utils/pdfmerge.1 utils/pdfmerge.cc utils/pdfseparate.1 utils/pdfseparate.cc utils/pdfunite.1 utils/pdfunite.cc

Albert Astals Cid aacid at kemper.freedesktop.org
Mon Sep 26 15:12:21 PDT 2011


 utils/.gitignore     |    4 -
 utils/CMakeLists.txt |   28 ++++----
 utils/Makefile.am    |   16 ++--
 utils/pdfextract.1   |   49 --------------
 utils/pdfextract.cc  |  115 ---------------------------------
 utils/pdfmerge.1     |   33 ---------
 utils/pdfmerge.cc    |  176 ---------------------------------------------------
 utils/pdfseparate.1  |   49 ++++++++++++++
 utils/pdfseparate.cc |  115 +++++++++++++++++++++++++++++++++
 utils/pdfunite.1     |   33 +++++++++
 utils/pdfunite.cc    |  176 +++++++++++++++++++++++++++++++++++++++++++++++++++
 11 files changed, 397 insertions(+), 397 deletions(-)

New commits:
commit 6d34d4af90b8b41360de4dabb000bbcc894775d0
Author: Albert Astals Cid <aacid at kde.org>
Date:   Tue Sep 27 00:09:54 2011 +0200

    Rename pdfmerge and pdfextract
    
    To pdfunite and pdfseparate, the old names were taken

diff --git a/utils/.gitignore b/utils/.gitignore
index d18e8c9..71779f0 100644
--- a/utils/.gitignore
+++ b/utils/.gitignore
@@ -2,11 +2,11 @@
 .libs
 Makefile
 Makefile.in
-pdfextract
+pdfseparate
 pdffonts
 pdfimages
 pdfinfo
-pdfmerge
+pdfunite
 pdftohtml
 pdftoppm
 pdftops
diff --git a/utils/CMakeLists.txt b/utils/CMakeLists.txt
index a36616d..daea075 100644
--- a/utils/CMakeLists.txt
+++ b/utils/CMakeLists.txt
@@ -102,20 +102,20 @@ target_link_libraries(pdftohtml ${common_libs})
 install(TARGETS pdftohtml DESTINATION bin)
 install(FILES pdftohtml.1 DESTINATION share/man/man1)
 
-# pdfextract
-set(pdfextract_SOURCES ${common_srcs}
-  pdfextract.cc
+# pdfseparate
+set(pdfseparate_SOURCES ${common_srcs}
+  pdfseparate.cc
 )
-add_executable(pdfextract ${pdfextract_SOURCES})
-target_link_libraries(pdfextract ${common_libs})
-install(TARGETS pdfextract DESTINATION bin)
-install(FILES pdfextract.1 DESTINATION share/man/man1)
+add_executable(pdfseparate ${pdfseparate_SOURCES})
+target_link_libraries(pdfseparate ${common_libs})
+install(TARGETS pdfseparate DESTINATION bin)
+install(FILES pdfseparate.1 DESTINATION share/man/man1)
 
-# pdfmerge
-set(pdfmerge_SOURCES ${common_srcs}
-  pdfmerge.cc
+# pdfunite
+set(pdfunite_SOURCES ${common_srcs}
+  pdfunite.cc
 )
-add_executable(pdfmerge ${pdfmerge_SOURCES})
-target_link_libraries(pdfmerge ${common_libs})
-install(TARGETS pdfmerge DESTINATION bin)
-install(FILES pdfmerge.1 DESTINATION share/man/man1)
+add_executable(pdfunite ${pdfunite_SOURCES})
+target_link_libraries(pdfunite ${common_libs})
+install(TARGETS pdfunite DESTINATION bin)
+install(FILES pdfunite.1 DESTINATION share/man/man1)
diff --git a/utils/Makefile.am b/utils/Makefile.am
index ac2a15e..144d812 100644
--- a/utils/Makefile.am
+++ b/utils/Makefile.am
@@ -50,8 +50,8 @@ bin_PROGRAMS =					\
 	pdftops					\
 	pdftotext				\
 	pdftohtml				\
-	pdfextract				\
-	pdfmerge				\
+	pdfseparate				\
+	pdfunite				\
 	$(pdftoppm_binary)			\
 	$(pdftocairo_binary)
 
@@ -62,8 +62,8 @@ dist_man1_MANS =				\
 	pdftops.1				\
 	pdftotext.1				\
 	pdftohtml.1				\
-	pdfextract.1				\
-	pdfmerge.1				\
+	pdfseparate.1				\
+	pdfunite.1				\
 	$(pdftoppm_manpage)			\
 	$(pdftocairo_manpage)
 
@@ -106,12 +106,12 @@ pdftohtml_SOURCES =				\
 	HtmlUtils.h				\
 	$(common)
 
-pdfextract_SOURCES =				\
-	pdfextract.cc				\
+pdfseparate_SOURCES =				\
+	pdfseparate.cc				\
 	$(common)
 
-pdfmerge_SOURCES =				\
-	pdfmerge.cc				\
+pdfunite_SOURCES =				\
+	pdfunite.cc				\
 	$(common)
 
 # Yay, automake!  It should be able to figure out that it has to dist
diff --git a/utils/pdfextract.1 b/utils/pdfextract.1
deleted file mode 100644
index 2760045..0000000
--- a/utils/pdfextract.1
+++ /dev/null
@@ -1,49 +0,0 @@
-.\" Copyright 2011 The Poppler Developers - http://poppler.freedesktop.org
-.TH pdfextract 1 "15 September 2011"
-.SH NAME
-pdfextract \- Portable Document Format (PDF) page extractor
-.SH SYNOPSIS
-.B pdfextract
-[options]
-.I PDF-file PDF-page-pattern
-.SH DESCRIPTION
-.B pdfextract
-extract single pages from a Portable Document Format (PDF).
-.PP
-pdfextract reads the PDF file
-.IR PDF-file ,
-extracts one or more pages, and writes one PDF file for each page to
-.IR PDF-page-pattern,
-PDF-page-pattern should contain
-.B %d
-.%d is replaced by the page number
-.TP
-The PDF-file should not be encrypted.
-.SH OPTIONS
-.TP
-.BI \-f " number"
-Specifies the first page to extract. If -f is omitted, extraction starts with page 1.
-.TP
-.BI \-l " number"
-Specifies the last page to extract. if -p is omitted, extraction ends with the last page.
-.TP
-.B \-v
-Print copyright and version information.
-.TP
-.B \-h
-Print usage information.
-.RB ( \-help
-and
-.B \-\-help
-are equivalent.)
-.SH EXAMPLE
-pdfextract sample.pdf sample-%d.pdf
-.TP
-extracts all pages from sample.pdf, if i.e. sample.pdf has 3 pages, it produces
-.TP
-sample-1.pdf, sample-2.pdf, sample-3.pdf
-.SH AUTHOR
-The pdfextract software and documentation are copyright 1996-2004 Glyph
-& Cog, LLC and copyright 2005-2011 The Poppler Developers - http://poppler.freedesktop.org
-.SH "SEE ALSO"
-.BR pdfmerge (1),
diff --git a/utils/pdfextract.cc b/utils/pdfextract.cc
deleted file mode 100644
index d6a7eb5..0000000
--- a/utils/pdfextract.cc
+++ /dev/null
@@ -1,115 +0,0 @@
-//========================================================================
-//
-// pdfextract.cc
-//
-// This file is licensed under the GPLv2 or later
-//
-// Copyright (C) 2011 Thomas Freitag <Thomas.Freitag at alfa.de>
-//
-//========================================================================
-#include "config.h"
-#include <poppler-config.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <stddef.h>
-#include <string.h>
-#include "parseargs.h"
-#include "goo/GooString.h"
-#include "PDFDoc.h"
-#include "ErrorCodes.h"
-
-static int firstPage = 0;
-static int lastPage = 0;
-static GBool printVersion = gFalse;
-static GBool printHelp = gFalse;
-
-static const ArgDesc argDesc[] = {
-  {"-f", argInt, &firstPage, 0,
-   "first page to extract"},
-  {"-l", argInt, &lastPage, 0,
-   "last page to extract"},
-  {"-v", argFlag, &printVersion, 0,
-   "print copyright and version info"},
-  {"-h", argFlag, &printHelp, 0,
-   "print usage information"},
-  {"-help", argFlag, &printHelp, 0,
-   "print usage information"},
-  {"--help", argFlag, &printHelp, 0,
-   "print usage information"},
-  {"-?", argFlag, &printHelp, 0,
-   "print usage information"},
-  {NULL}
-};
-
-bool extractPages (const char *srcFileName, const char *destFileName) {
-  char pathName[1024];
-  GooString *gfileName = new GooString (srcFileName);
-  PDFDoc *doc = new PDFDoc (gfileName, NULL, NULL, NULL);
-
-  if (!doc->isOk()) {
-    error(-1, "Could not extract page(s) from damaged file ('%s')", srcFileName);
-    return false;
-  }
-  if (doc->isEncrypted()) {
-    error(-1, "Could not extract page(s) from encrypted file ('%s')", srcFileName);
-    return false;
-  }
-
-  if (firstPage == 0 && lastPage == 0) {
-    firstPage = 1;
-    lastPage = doc->getNumPages();
-  }
-  if (lastPage == 0)
-    lastPage = doc->getNumPages();
-  if (firstPage == 0)
-    firstPage = 1;
-  if (firstPage != lastPage && strstr(destFileName, "%d") == NULL) {
-    error(-1, "'%s' must contain '%%d' if more than one page should be extracted", destFileName);
-    return false;
-  }
-  for (int pageNo = firstPage; pageNo <= lastPage; pageNo++) {
-    sprintf (pathName, destFileName, pageNo);
-    GooString *gpageName = new GooString (pathName);
-    int errCode = doc->savePageAs(gpageName, pageNo);
-    if ( errCode != errNone) {
-      delete gpageName;
-      delete gfileName;
-      return false;
-    }
-    delete gpageName;
-  }
-  delete gfileName;
-  return true;
-}
-
-int
-main (int argc, char *argv[])
-{
-  Object info;
-  GBool ok;
-  int exitCode;
-
-  exitCode = 99;
-
-  // parse args
-  ok = parseArgs (argDesc, &argc, argv);
-  if (!ok || argc != 3 || printVersion || printHelp)
-    {
-      fprintf (stderr, "pdfextract version %s\n", PACKAGE_VERSION);
-      fprintf (stderr, "%s\n", popplerCopyright);
-      fprintf (stderr, "%s\n", xpdfCopyright);
-      if (!printVersion)
-	{
-	  printUsage ("pdfextract", "<PDF-sourcefile> <PDF-pattern-destfile>",
-		      argDesc);
-	}
-      if (printVersion || printHelp)
-	exitCode = 0;
-      goto err0;
-    }
-  extractPages (argv[1], argv[2]);
-
-err0:
-
-  return exitCode;
-}
diff --git a/utils/pdfmerge.1 b/utils/pdfmerge.1
deleted file mode 100644
index aecf555..0000000
--- a/utils/pdfmerge.1
+++ /dev/null
@@ -1,33 +0,0 @@
-.\" Copyright 2011 The Poppler Developers - http://poppler.freedesktop.org
-.TH pdfmerge 1 "15 September 2011"
-.SH NAME
-pdfmerge \- Portable Document Format (PDF) page merger
-.SH SYNOPSIS
-.B pdfmerge
-[options]
-.I PDF-sourcefile1..PDF-sourcefilen PDF-destfile
-.SH DESCRIPTION
-.B pdfmerge
-merges several PDF (Portable Document Format)  files in order of their occurence on command line to one PDF result file.
-.TP
-Neither of the PDF-sourcefile1 to PDF-sourcefilen should be encrypted.
-.SH OPTIONS
-.TP
-.B \-v
-Print copyright and version information.
-.TP
-.B \-h
-Print usage information.
-.RB ( \-help
-and
-.B \-\-help
-are equivalent.)
-.SH EXAMPLE
-pdfmerge sample1.pdf sample2.pdf sample.pdf
-.TP
-merges all pages from sample1.pdf and sample2.pdf (in that order) and creates sample.pdf
-.SH AUTHOR
-The pdfmerge software and documentation are copyright 1996-2004 Glyph & Cog, LLC
-and copyright 2005-2011 The Poppler Developers - http://poppler.freedesktop.org
-.SH "SEE ALSO"
-.BR pdfextract (1),
diff --git a/utils/pdfmerge.cc b/utils/pdfmerge.cc
deleted file mode 100644
index 28f7265..0000000
--- a/utils/pdfmerge.cc
+++ /dev/null
@@ -1,176 +0,0 @@
-//========================================================================
-//
-// pdfmerge.cc
-//
-// This file is licensed under the GPLv2 or later
-//
-// Copyright (C) 2011 Thomas Freitag <Thomas.Freitag at alfa.de>
-//
-//========================================================================
-#include <PDFDoc.h>
-#include "parseargs.h"
-#include "config.h"
-#include <poppler-config.h>
-#include <vector>
-
-static GBool printVersion = gFalse;
-static GBool printHelp = gFalse;
-
-static const ArgDesc argDesc[] = {
-  {"-v", argFlag, &printVersion, 0,
-   "print copyright and version info"},
-  {"-h", argFlag, &printHelp, 0,
-   "print usage information"},
-  {"-help", argFlag, &printHelp, 0,
-   "print usage information"},
-  {"--help", argFlag, &printHelp, 0,
-   "print usage information"},
-  {"-?", argFlag, &printHelp, 0,
-   "print usage information"},
-  {NULL}
-};
-
-///////////////////////////////////////////////////////////////////////////
-int main (int argc, char *argv[])
-///////////////////////////////////////////////////////////////////////////
-// Merge PDF files given by arguments 1 to argc-2 and write the result
-// to the file specified by argument argc-1.
-///////////////////////////////////////////////////////////////////////////
-{
-  int objectsCount = 0;
-  Guint numOffset = 0;
-  std::vector<Object> pages;
-  std::vector<Guint> offsets;
-  XRef *yRef, *countRef;
-  FILE *f;
-  OutStream *outStr;
-  int i;
-  int j, rootNum;
-  std::vector<PDFDoc *>docs;
-  int majorVersion = 0;
-  int minorVersion = 0;
-  char *fileName = argv[argc - 1];
-  int exitCode;
-
-  exitCode = 99;
-  if (argc <= 3 || printVersion || printHelp) {
-    fprintf(stderr, "pdfmerge version %s\n", PACKAGE_VERSION);
-    fprintf(stderr, "%s\n", popplerCopyright);
-    fprintf(stderr, "%s\n", xpdfCopyright);
-    if (!printVersion) {
-      printUsage("pdfmerge", "<PDF-sourcefile-1>..<PDF-sourcefile-n> <PDF-destfile>",
-	argDesc);
-    }
-    if (printVersion || printHelp)
-      exitCode = 0;
-    return exitCode;
-  }
-  exitCode = 0;
-
-  for (i = 1; i < argc - 1; i++) {
-    GooString *gfileName = new GooString(argv[i]);
-    PDFDoc *doc = new PDFDoc(gfileName, NULL, NULL, NULL);
-    if (doc->isOk() && !doc->isEncrypted()) {
-      docs.push_back(doc);
-      if (doc->getPDFMajorVersion() > majorVersion) {
-        majorVersion = doc->getPDFMajorVersion();
-        minorVersion = doc->getPDFMinorVersion();
-      } else if (doc->getPDFMajorVersion() == majorVersion) {
-        if (doc->getPDFMinorVersion() > minorVersion) {
-          minorVersion = doc->getPDFMinorVersion();
-        }
-      }
-    } else if (doc->isOk()) {
-      error(-1, "Could not merge encrypted files ('%s')", argv[i]);
-      return -1;
-    } else {
-      error(-1, "Could not merge damaged documents ('%s')", argv[i]);
-      return -1;
-    }
-  }
-
-  if (!(f = fopen(fileName, "wb"))) {
-    error(-1, "Could not open file '%s'", fileName);
-    return -1;
-  }
-  outStr = new FileOutStream(f, 0);
-
-  yRef = new XRef();
-  countRef = new XRef();
-  yRef->add(0, 65535, 0, gFalse);
-  PDFDoc::writeHeader(outStr, majorVersion, minorVersion);
-
-  for (i = 0; i < (int) docs.size(); i++) {
-    for (j = 1; j <= docs[i]->getNumPages(); j++) {
-      PDFRectangle *cropBox = NULL;
-      if (docs[i]->getCatalog()->getPage(j)->isCropped())
-        cropBox = docs[i]->getCatalog()->getPage(j)->getCropBox();
-      docs[i]->replacePageDict(j,
-	    docs[i]->getCatalog()->getPage(j)->getRotate(),
-	    docs[i]->getCatalog()->getPage(j)->getMediaBox(), cropBox, NULL);
-      Ref *refPage = docs[i]->getCatalog()->getPageRef(j);
-      Object page;
-      docs[i]->getXRef()->fetch(refPage->num, refPage->gen, &page);
-      pages.push_back(page);
-      offsets.push_back(numOffset);
-      Dict *pageDict = page.getDict();
-      docs[i]->markPageObjects(pageDict, yRef, countRef, numOffset);
-    }
-    objectsCount += docs[i]->writePageObjects(outStr, yRef, numOffset);
-    numOffset = yRef->getNumObjects() + 1;
-  }
-
-  rootNum = yRef->getNumObjects() + 1;
-  yRef->add(rootNum, 0, outStr->getPos(), gTrue);
-  outStr->printf("%d 0 obj\n", rootNum);
-  outStr->printf("<< /Type /Catalog /Pages %d 0 R", rootNum + 1);
-  outStr->printf(">>\nendobj\n");
-  objectsCount++;
-
-  yRef->add(rootNum + 1, 0, outStr->getPos(), gTrue);
-  outStr->printf("%d 0 obj\n", rootNum + 1);
-  outStr->printf("<< /Type /Pages /Kids [");
-  for (j = 0; j < (int) pages.size(); j++)
-    outStr->printf(" %d 0 R", rootNum + j + 2);
-  outStr->printf(" ] /Count %d >>\nendobj\n", pages.size());
-  objectsCount++;
-
-  for (i = 0; i < (int) pages.size(); i++) {
-    yRef->add(rootNum + i + 2, 0, outStr->getPos(), gTrue);
-    outStr->printf("%d 0 obj\n", rootNum + i + 2);
-    outStr->printf("<< ");
-    Dict *pageDict = pages[i].getDict();
-    for (j = 0; j < pageDict->getLength(); j++) {
-      if (j > 0)
-	outStr->printf(" ");
-      const char *key = pageDict->getKey(j);
-      Object value;
-      pageDict->getValNF(j, &value);
-      if (strcmp(key, "Parent") == 0) {
-        outStr->printf("/Parent %d 0 R", rootNum + 1);
-      } else {
-        outStr->printf("/%s ", key);
-        PDFDoc::writeObject(&value, NULL, outStr, yRef, offsets[i]);
-      }
-      value.free();
-    }
-    outStr->printf(" >>\nendobj\n");
-    objectsCount++;
-  }
-  Guint uxrefOffset = outStr->getPos();
-  yRef->writeToFile(outStr, gFalse /* do not write unnecessary entries */ );
-
-  Ref ref;
-  ref.num = rootNum;
-  ref.gen = 0;
-  PDFDoc::writeTrailer(uxrefOffset, objectsCount, outStr, (GBool) gFalse, 0,
-	&ref, yRef, fileName, outStr->getPos());
-
-  outStr->close();
-  fclose(f);
-  delete yRef;
-  delete countRef;
-  for (j = 0; j < (int) pages.size (); j++) pages[j].free();
-  for (i = 0; i < (int) docs.size (); i++) delete docs[i];
-  return exitCode;
-}
diff --git a/utils/pdfseparate.1 b/utils/pdfseparate.1
new file mode 100644
index 0000000..f871d3b
--- /dev/null
+++ b/utils/pdfseparate.1
@@ -0,0 +1,49 @@
+.\" Copyright 2011 The Poppler Developers - http://poppler.freedesktop.org
+.TH pdfseparate 1 "15 September 2011"
+.SH NAME
+pdfseparate \- Portable Document Format (PDF) page extractor
+.SH SYNOPSIS
+.B pdfseparate
+[options]
+.I PDF-file PDF-page-pattern
+.SH DESCRIPTION
+.B pdfseparate
+extract single pages from a Portable Document Format (PDF).
+.PP
+pdfseparate reads the PDF file
+.IR PDF-file ,
+extracts one or more pages, and writes one PDF file for each page to
+.IR PDF-page-pattern,
+PDF-page-pattern should contain
+.B %d
+.%d is replaced by the page number
+.TP
+The PDF-file should not be encrypted.
+.SH OPTIONS
+.TP
+.BI \-f " number"
+Specifies the first page to extract. If -f is omitted, extraction starts with page 1.
+.TP
+.BI \-l " number"
+Specifies the last page to extract. if -p is omitted, extraction ends with the last page.
+.TP
+.B \-v
+Print copyright and version information.
+.TP
+.B \-h
+Print usage information.
+.RB ( \-help
+and
+.B \-\-help
+are equivalent.)
+.SH EXAMPLE
+pdfseparate sample.pdf sample-%d.pdf
+.TP
+extracts all pages from sample.pdf, if i.e. sample.pdf has 3 pages, it produces
+.TP
+sample-1.pdf, sample-2.pdf, sample-3.pdf
+.SH AUTHOR
+The pdfseparate software and documentation are copyright 1996-2004 Glyph
+& Cog, LLC and copyright 2005-2011 The Poppler Developers - http://poppler.freedesktop.org
+.SH "SEE ALSO"
+.BR pdfunite (1),
diff --git a/utils/pdfseparate.cc b/utils/pdfseparate.cc
new file mode 100644
index 0000000..9e62705
--- /dev/null
+++ b/utils/pdfseparate.cc
@@ -0,0 +1,115 @@
+//========================================================================
+//
+// pdfseparate.cc
+//
+// This file is licensed under the GPLv2 or later
+//
+// Copyright (C) 2011 Thomas Freitag <Thomas.Freitag at alfa.de>
+//
+//========================================================================
+#include "config.h"
+#include <poppler-config.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stddef.h>
+#include <string.h>
+#include "parseargs.h"
+#include "goo/GooString.h"
+#include "PDFDoc.h"
+#include "ErrorCodes.h"
+
+static int firstPage = 0;
+static int lastPage = 0;
+static GBool printVersion = gFalse;
+static GBool printHelp = gFalse;
+
+static const ArgDesc argDesc[] = {
+  {"-f", argInt, &firstPage, 0,
+   "first page to extract"},
+  {"-l", argInt, &lastPage, 0,
+   "last page to extract"},
+  {"-v", argFlag, &printVersion, 0,
+   "print copyright and version info"},
+  {"-h", argFlag, &printHelp, 0,
+   "print usage information"},
+  {"-help", argFlag, &printHelp, 0,
+   "print usage information"},
+  {"--help", argFlag, &printHelp, 0,
+   "print usage information"},
+  {"-?", argFlag, &printHelp, 0,
+   "print usage information"},
+  {NULL}
+};
+
+bool extractPages (const char *srcFileName, const char *destFileName) {
+  char pathName[1024];
+  GooString *gfileName = new GooString (srcFileName);
+  PDFDoc *doc = new PDFDoc (gfileName, NULL, NULL, NULL);
+
+  if (!doc->isOk()) {
+    error(-1, "Could not extract page(s) from damaged file ('%s')", srcFileName);
+    return false;
+  }
+  if (doc->isEncrypted()) {
+    error(-1, "Could not extract page(s) from encrypted file ('%s')", srcFileName);
+    return false;
+  }
+
+  if (firstPage == 0 && lastPage == 0) {
+    firstPage = 1;
+    lastPage = doc->getNumPages();
+  }
+  if (lastPage == 0)
+    lastPage = doc->getNumPages();
+  if (firstPage == 0)
+    firstPage = 1;
+  if (firstPage != lastPage && strstr(destFileName, "%d") == NULL) {
+    error(-1, "'%s' must contain '%%d' if more than one page should be extracted", destFileName);
+    return false;
+  }
+  for (int pageNo = firstPage; pageNo <= lastPage; pageNo++) {
+    sprintf (pathName, destFileName, pageNo);
+    GooString *gpageName = new GooString (pathName);
+    int errCode = doc->savePageAs(gpageName, pageNo);
+    if ( errCode != errNone) {
+      delete gpageName;
+      delete gfileName;
+      return false;
+    }
+    delete gpageName;
+  }
+  delete gfileName;
+  return true;
+}
+
+int
+main (int argc, char *argv[])
+{
+  Object info;
+  GBool ok;
+  int exitCode;
+
+  exitCode = 99;
+
+  // parse args
+  ok = parseArgs (argDesc, &argc, argv);
+  if (!ok || argc != 3 || printVersion || printHelp)
+    {
+      fprintf (stderr, "pdfseparate version %s\n", PACKAGE_VERSION);
+      fprintf (stderr, "%s\n", popplerCopyright);
+      fprintf (stderr, "%s\n", xpdfCopyright);
+      if (!printVersion)
+	{
+	  printUsage ("pdfseparate", "<PDF-sourcefile> <PDF-pattern-destfile>",
+		      argDesc);
+	}
+      if (printVersion || printHelp)
+	exitCode = 0;
+      goto err0;
+    }
+  extractPages (argv[1], argv[2]);
+
+err0:
+
+  return exitCode;
+}
diff --git a/utils/pdfunite.1 b/utils/pdfunite.1
new file mode 100644
index 0000000..9b1f2e8
--- /dev/null
+++ b/utils/pdfunite.1
@@ -0,0 +1,33 @@
+.\" Copyright 2011 The Poppler Developers - http://poppler.freedesktop.org
+.TH pdfunite 1 "15 September 2011"
+.SH NAME
+pdfunite \- Portable Document Format (PDF) page merger
+.SH SYNOPSIS
+.B pdfunite
+[options]
+.I PDF-sourcefile1..PDF-sourcefilen PDF-destfile
+.SH DESCRIPTION
+.B pdfunite
+merges several PDF (Portable Document Format)  files in order of their occurence on command line to one PDF result file.
+.TP
+Neither of the PDF-sourcefile1 to PDF-sourcefilen should be encrypted.
+.SH OPTIONS
+.TP
+.B \-v
+Print copyright and version information.
+.TP
+.B \-h
+Print usage information.
+.RB ( \-help
+and
+.B \-\-help
+are equivalent.)
+.SH EXAMPLE
+pdfunite sample1.pdf sample2.pdf sample.pdf
+.TP
+merges all pages from sample1.pdf and sample2.pdf (in that order) and creates sample.pdf
+.SH AUTHOR
+The pdfunite software and documentation are copyright 1996-2004 Glyph & Cog, LLC
+and copyright 2005-2011 The Poppler Developers - http://poppler.freedesktop.org
+.SH "SEE ALSO"
+.BR pdfseparate (1),
diff --git a/utils/pdfunite.cc b/utils/pdfunite.cc
new file mode 100644
index 0000000..3b3d2bb
--- /dev/null
+++ b/utils/pdfunite.cc
@@ -0,0 +1,176 @@
+//========================================================================
+//
+// pdfunite.cc
+//
+// This file is licensed under the GPLv2 or later
+//
+// Copyright (C) 2011 Thomas Freitag <Thomas.Freitag at alfa.de>
+//
+//========================================================================
+#include <PDFDoc.h>
+#include "parseargs.h"
+#include "config.h"
+#include <poppler-config.h>
+#include <vector>
+
+static GBool printVersion = gFalse;
+static GBool printHelp = gFalse;
+
+static const ArgDesc argDesc[] = {
+  {"-v", argFlag, &printVersion, 0,
+   "print copyright and version info"},
+  {"-h", argFlag, &printHelp, 0,
+   "print usage information"},
+  {"-help", argFlag, &printHelp, 0,
+   "print usage information"},
+  {"--help", argFlag, &printHelp, 0,
+   "print usage information"},
+  {"-?", argFlag, &printHelp, 0,
+   "print usage information"},
+  {NULL}
+};
+
+///////////////////////////////////////////////////////////////////////////
+int main (int argc, char *argv[])
+///////////////////////////////////////////////////////////////////////////
+// Merge PDF files given by arguments 1 to argc-2 and write the result
+// to the file specified by argument argc-1.
+///////////////////////////////////////////////////////////////////////////
+{
+  int objectsCount = 0;
+  Guint numOffset = 0;
+  std::vector<Object> pages;
+  std::vector<Guint> offsets;
+  XRef *yRef, *countRef;
+  FILE *f;
+  OutStream *outStr;
+  int i;
+  int j, rootNum;
+  std::vector<PDFDoc *>docs;
+  int majorVersion = 0;
+  int minorVersion = 0;
+  char *fileName = argv[argc - 1];
+  int exitCode;
+
+  exitCode = 99;
+  if (argc <= 3 || printVersion || printHelp) {
+    fprintf(stderr, "pdfunite version %s\n", PACKAGE_VERSION);
+    fprintf(stderr, "%s\n", popplerCopyright);
+    fprintf(stderr, "%s\n", xpdfCopyright);
+    if (!printVersion) {
+      printUsage("pdfunite", "<PDF-sourcefile-1>..<PDF-sourcefile-n> <PDF-destfile>",
+	argDesc);
+    }
+    if (printVersion || printHelp)
+      exitCode = 0;
+    return exitCode;
+  }
+  exitCode = 0;
+
+  for (i = 1; i < argc - 1; i++) {
+    GooString *gfileName = new GooString(argv[i]);
+    PDFDoc *doc = new PDFDoc(gfileName, NULL, NULL, NULL);
+    if (doc->isOk() && !doc->isEncrypted()) {
+      docs.push_back(doc);
+      if (doc->getPDFMajorVersion() > majorVersion) {
+        majorVersion = doc->getPDFMajorVersion();
+        minorVersion = doc->getPDFMinorVersion();
+      } else if (doc->getPDFMajorVersion() == majorVersion) {
+        if (doc->getPDFMinorVersion() > minorVersion) {
+          minorVersion = doc->getPDFMinorVersion();
+        }
+      }
+    } else if (doc->isOk()) {
+      error(-1, "Could not merge encrypted files ('%s')", argv[i]);
+      return -1;
+    } else {
+      error(-1, "Could not merge damaged documents ('%s')", argv[i]);
+      return -1;
+    }
+  }
+
+  if (!(f = fopen(fileName, "wb"))) {
+    error(-1, "Could not open file '%s'", fileName);
+    return -1;
+  }
+  outStr = new FileOutStream(f, 0);
+
+  yRef = new XRef();
+  countRef = new XRef();
+  yRef->add(0, 65535, 0, gFalse);
+  PDFDoc::writeHeader(outStr, majorVersion, minorVersion);
+
+  for (i = 0; i < (int) docs.size(); i++) {
+    for (j = 1; j <= docs[i]->getNumPages(); j++) {
+      PDFRectangle *cropBox = NULL;
+      if (docs[i]->getCatalog()->getPage(j)->isCropped())
+        cropBox = docs[i]->getCatalog()->getPage(j)->getCropBox();
+      docs[i]->replacePageDict(j,
+	    docs[i]->getCatalog()->getPage(j)->getRotate(),
+	    docs[i]->getCatalog()->getPage(j)->getMediaBox(), cropBox, NULL);
+      Ref *refPage = docs[i]->getCatalog()->getPageRef(j);
+      Object page;
+      docs[i]->getXRef()->fetch(refPage->num, refPage->gen, &page);
+      pages.push_back(page);
+      offsets.push_back(numOffset);
+      Dict *pageDict = page.getDict();
+      docs[i]->markPageObjects(pageDict, yRef, countRef, numOffset);
+    }
+    objectsCount += docs[i]->writePageObjects(outStr, yRef, numOffset);
+    numOffset = yRef->getNumObjects() + 1;
+  }
+
+  rootNum = yRef->getNumObjects() + 1;
+  yRef->add(rootNum, 0, outStr->getPos(), gTrue);
+  outStr->printf("%d 0 obj\n", rootNum);
+  outStr->printf("<< /Type /Catalog /Pages %d 0 R", rootNum + 1);
+  outStr->printf(">>\nendobj\n");
+  objectsCount++;
+
+  yRef->add(rootNum + 1, 0, outStr->getPos(), gTrue);
+  outStr->printf("%d 0 obj\n", rootNum + 1);
+  outStr->printf("<< /Type /Pages /Kids [");
+  for (j = 0; j < (int) pages.size(); j++)
+    outStr->printf(" %d 0 R", rootNum + j + 2);
+  outStr->printf(" ] /Count %d >>\nendobj\n", pages.size());
+  objectsCount++;
+
+  for (i = 0; i < (int) pages.size(); i++) {
+    yRef->add(rootNum + i + 2, 0, outStr->getPos(), gTrue);
+    outStr->printf("%d 0 obj\n", rootNum + i + 2);
+    outStr->printf("<< ");
+    Dict *pageDict = pages[i].getDict();
+    for (j = 0; j < pageDict->getLength(); j++) {
+      if (j > 0)
+	outStr->printf(" ");
+      const char *key = pageDict->getKey(j);
+      Object value;
+      pageDict->getValNF(j, &value);
+      if (strcmp(key, "Parent") == 0) {
+        outStr->printf("/Parent %d 0 R", rootNum + 1);
+      } else {
+        outStr->printf("/%s ", key);
+        PDFDoc::writeObject(&value, NULL, outStr, yRef, offsets[i]);
+      }
+      value.free();
+    }
+    outStr->printf(" >>\nendobj\n");
+    objectsCount++;
+  }
+  Guint uxrefOffset = outStr->getPos();
+  yRef->writeToFile(outStr, gFalse /* do not write unnecessary entries */ );
+
+  Ref ref;
+  ref.num = rootNum;
+  ref.gen = 0;
+  PDFDoc::writeTrailer(uxrefOffset, objectsCount, outStr, (GBool) gFalse, 0,
+	&ref, yRef, fileName, outStr->getPos());
+
+  outStr->close();
+  fclose(f);
+  delete yRef;
+  delete countRef;
+  for (j = 0; j < (int) pages.size (); j++) pages[j].free();
+  for (i = 0; i < (int) docs.size (); i++) delete docs[i];
+  return exitCode;
+}


More information about the poppler mailing list