[poppler] utils/CMakeLists.txt utils/.gitignore utils/Makefile.am utils/pdfextract.1 utils/pdfextract.cc utils/pdfmerge.1 utils/pdfmerge.cc utils/pdfseparate.1 utils/pdfseparate.cc utils/pdfunite.1 utils/pdfunite.cc
Albert Astals Cid
aacid at kemper.freedesktop.org
Mon Sep 26 15:12:21 PDT 2011
utils/.gitignore | 4 -
utils/CMakeLists.txt | 28 ++++----
utils/Makefile.am | 16 ++--
utils/pdfextract.1 | 49 --------------
utils/pdfextract.cc | 115 ---------------------------------
utils/pdfmerge.1 | 33 ---------
utils/pdfmerge.cc | 176 ---------------------------------------------------
utils/pdfseparate.1 | 49 ++++++++++++++
utils/pdfseparate.cc | 115 +++++++++++++++++++++++++++++++++
utils/pdfunite.1 | 33 +++++++++
utils/pdfunite.cc | 176 +++++++++++++++++++++++++++++++++++++++++++++++++++
11 files changed, 397 insertions(+), 397 deletions(-)
New commits:
commit 6d34d4af90b8b41360de4dabb000bbcc894775d0
Author: Albert Astals Cid <aacid at kde.org>
Date: Tue Sep 27 00:09:54 2011 +0200
Rename pdfmerge and pdfextract
To pdfunite and pdfseparate, the old names were taken
diff --git a/utils/.gitignore b/utils/.gitignore
index d18e8c9..71779f0 100644
--- a/utils/.gitignore
+++ b/utils/.gitignore
@@ -2,11 +2,11 @@
.libs
Makefile
Makefile.in
-pdfextract
+pdfseparate
pdffonts
pdfimages
pdfinfo
-pdfmerge
+pdfunite
pdftohtml
pdftoppm
pdftops
diff --git a/utils/CMakeLists.txt b/utils/CMakeLists.txt
index a36616d..daea075 100644
--- a/utils/CMakeLists.txt
+++ b/utils/CMakeLists.txt
@@ -102,20 +102,20 @@ target_link_libraries(pdftohtml ${common_libs})
install(TARGETS pdftohtml DESTINATION bin)
install(FILES pdftohtml.1 DESTINATION share/man/man1)
-# pdfextract
-set(pdfextract_SOURCES ${common_srcs}
- pdfextract.cc
+# pdfseparate
+set(pdfseparate_SOURCES ${common_srcs}
+ pdfseparate.cc
)
-add_executable(pdfextract ${pdfextract_SOURCES})
-target_link_libraries(pdfextract ${common_libs})
-install(TARGETS pdfextract DESTINATION bin)
-install(FILES pdfextract.1 DESTINATION share/man/man1)
+add_executable(pdfseparate ${pdfseparate_SOURCES})
+target_link_libraries(pdfseparate ${common_libs})
+install(TARGETS pdfseparate DESTINATION bin)
+install(FILES pdfseparate.1 DESTINATION share/man/man1)
-# pdfmerge
-set(pdfmerge_SOURCES ${common_srcs}
- pdfmerge.cc
+# pdfunite
+set(pdfunite_SOURCES ${common_srcs}
+ pdfunite.cc
)
-add_executable(pdfmerge ${pdfmerge_SOURCES})
-target_link_libraries(pdfmerge ${common_libs})
-install(TARGETS pdfmerge DESTINATION bin)
-install(FILES pdfmerge.1 DESTINATION share/man/man1)
+add_executable(pdfunite ${pdfunite_SOURCES})
+target_link_libraries(pdfunite ${common_libs})
+install(TARGETS pdfunite DESTINATION bin)
+install(FILES pdfunite.1 DESTINATION share/man/man1)
diff --git a/utils/Makefile.am b/utils/Makefile.am
index ac2a15e..144d812 100644
--- a/utils/Makefile.am
+++ b/utils/Makefile.am
@@ -50,8 +50,8 @@ bin_PROGRAMS = \
pdftops \
pdftotext \
pdftohtml \
- pdfextract \
- pdfmerge \
+ pdfseparate \
+ pdfunite \
$(pdftoppm_binary) \
$(pdftocairo_binary)
@@ -62,8 +62,8 @@ dist_man1_MANS = \
pdftops.1 \
pdftotext.1 \
pdftohtml.1 \
- pdfextract.1 \
- pdfmerge.1 \
+ pdfseparate.1 \
+ pdfunite.1 \
$(pdftoppm_manpage) \
$(pdftocairo_manpage)
@@ -106,12 +106,12 @@ pdftohtml_SOURCES = \
HtmlUtils.h \
$(common)
-pdfextract_SOURCES = \
- pdfextract.cc \
+pdfseparate_SOURCES = \
+ pdfseparate.cc \
$(common)
-pdfmerge_SOURCES = \
- pdfmerge.cc \
+pdfunite_SOURCES = \
+ pdfunite.cc \
$(common)
# Yay, automake! It should be able to figure out that it has to dist
diff --git a/utils/pdfextract.1 b/utils/pdfextract.1
deleted file mode 100644
index 2760045..0000000
--- a/utils/pdfextract.1
+++ /dev/null
@@ -1,49 +0,0 @@
-.\" Copyright 2011 The Poppler Developers - http://poppler.freedesktop.org
-.TH pdfextract 1 "15 September 2011"
-.SH NAME
-pdfextract \- Portable Document Format (PDF) page extractor
-.SH SYNOPSIS
-.B pdfextract
-[options]
-.I PDF-file PDF-page-pattern
-.SH DESCRIPTION
-.B pdfextract
-extract single pages from a Portable Document Format (PDF).
-.PP
-pdfextract reads the PDF file
-.IR PDF-file ,
-extracts one or more pages, and writes one PDF file for each page to
-.IR PDF-page-pattern,
-PDF-page-pattern should contain
-.B %d
-.%d is replaced by the page number
-.TP
-The PDF-file should not be encrypted.
-.SH OPTIONS
-.TP
-.BI \-f " number"
-Specifies the first page to extract. If -f is omitted, extraction starts with page 1.
-.TP
-.BI \-l " number"
-Specifies the last page to extract. if -p is omitted, extraction ends with the last page.
-.TP
-.B \-v
-Print copyright and version information.
-.TP
-.B \-h
-Print usage information.
-.RB ( \-help
-and
-.B \-\-help
-are equivalent.)
-.SH EXAMPLE
-pdfextract sample.pdf sample-%d.pdf
-.TP
-extracts all pages from sample.pdf, if i.e. sample.pdf has 3 pages, it produces
-.TP
-sample-1.pdf, sample-2.pdf, sample-3.pdf
-.SH AUTHOR
-The pdfextract software and documentation are copyright 1996-2004 Glyph
-& Cog, LLC and copyright 2005-2011 The Poppler Developers - http://poppler.freedesktop.org
-.SH "SEE ALSO"
-.BR pdfmerge (1),
diff --git a/utils/pdfextract.cc b/utils/pdfextract.cc
deleted file mode 100644
index d6a7eb5..0000000
--- a/utils/pdfextract.cc
+++ /dev/null
@@ -1,115 +0,0 @@
-//========================================================================
-//
-// pdfextract.cc
-//
-// This file is licensed under the GPLv2 or later
-//
-// Copyright (C) 2011 Thomas Freitag <Thomas.Freitag at alfa.de>
-//
-//========================================================================
-#include "config.h"
-#include <poppler-config.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <stddef.h>
-#include <string.h>
-#include "parseargs.h"
-#include "goo/GooString.h"
-#include "PDFDoc.h"
-#include "ErrorCodes.h"
-
-static int firstPage = 0;
-static int lastPage = 0;
-static GBool printVersion = gFalse;
-static GBool printHelp = gFalse;
-
-static const ArgDesc argDesc[] = {
- {"-f", argInt, &firstPage, 0,
- "first page to extract"},
- {"-l", argInt, &lastPage, 0,
- "last page to extract"},
- {"-v", argFlag, &printVersion, 0,
- "print copyright and version info"},
- {"-h", argFlag, &printHelp, 0,
- "print usage information"},
- {"-help", argFlag, &printHelp, 0,
- "print usage information"},
- {"--help", argFlag, &printHelp, 0,
- "print usage information"},
- {"-?", argFlag, &printHelp, 0,
- "print usage information"},
- {NULL}
-};
-
-bool extractPages (const char *srcFileName, const char *destFileName) {
- char pathName[1024];
- GooString *gfileName = new GooString (srcFileName);
- PDFDoc *doc = new PDFDoc (gfileName, NULL, NULL, NULL);
-
- if (!doc->isOk()) {
- error(-1, "Could not extract page(s) from damaged file ('%s')", srcFileName);
- return false;
- }
- if (doc->isEncrypted()) {
- error(-1, "Could not extract page(s) from encrypted file ('%s')", srcFileName);
- return false;
- }
-
- if (firstPage == 0 && lastPage == 0) {
- firstPage = 1;
- lastPage = doc->getNumPages();
- }
- if (lastPage == 0)
- lastPage = doc->getNumPages();
- if (firstPage == 0)
- firstPage = 1;
- if (firstPage != lastPage && strstr(destFileName, "%d") == NULL) {
- error(-1, "'%s' must contain '%%d' if more than one page should be extracted", destFileName);
- return false;
- }
- for (int pageNo = firstPage; pageNo <= lastPage; pageNo++) {
- sprintf (pathName, destFileName, pageNo);
- GooString *gpageName = new GooString (pathName);
- int errCode = doc->savePageAs(gpageName, pageNo);
- if ( errCode != errNone) {
- delete gpageName;
- delete gfileName;
- return false;
- }
- delete gpageName;
- }
- delete gfileName;
- return true;
-}
-
-int
-main (int argc, char *argv[])
-{
- Object info;
- GBool ok;
- int exitCode;
-
- exitCode = 99;
-
- // parse args
- ok = parseArgs (argDesc, &argc, argv);
- if (!ok || argc != 3 || printVersion || printHelp)
- {
- fprintf (stderr, "pdfextract version %s\n", PACKAGE_VERSION);
- fprintf (stderr, "%s\n", popplerCopyright);
- fprintf (stderr, "%s\n", xpdfCopyright);
- if (!printVersion)
- {
- printUsage ("pdfextract", "<PDF-sourcefile> <PDF-pattern-destfile>",
- argDesc);
- }
- if (printVersion || printHelp)
- exitCode = 0;
- goto err0;
- }
- extractPages (argv[1], argv[2]);
-
-err0:
-
- return exitCode;
-}
diff --git a/utils/pdfmerge.1 b/utils/pdfmerge.1
deleted file mode 100644
index aecf555..0000000
--- a/utils/pdfmerge.1
+++ /dev/null
@@ -1,33 +0,0 @@
-.\" Copyright 2011 The Poppler Developers - http://poppler.freedesktop.org
-.TH pdfmerge 1 "15 September 2011"
-.SH NAME
-pdfmerge \- Portable Document Format (PDF) page merger
-.SH SYNOPSIS
-.B pdfmerge
-[options]
-.I PDF-sourcefile1..PDF-sourcefilen PDF-destfile
-.SH DESCRIPTION
-.B pdfmerge
-merges several PDF (Portable Document Format) files in order of their occurence on command line to one PDF result file.
-.TP
-Neither of the PDF-sourcefile1 to PDF-sourcefilen should be encrypted.
-.SH OPTIONS
-.TP
-.B \-v
-Print copyright and version information.
-.TP
-.B \-h
-Print usage information.
-.RB ( \-help
-and
-.B \-\-help
-are equivalent.)
-.SH EXAMPLE
-pdfmerge sample1.pdf sample2.pdf sample.pdf
-.TP
-merges all pages from sample1.pdf and sample2.pdf (in that order) and creates sample.pdf
-.SH AUTHOR
-The pdfmerge software and documentation are copyright 1996-2004 Glyph & Cog, LLC
-and copyright 2005-2011 The Poppler Developers - http://poppler.freedesktop.org
-.SH "SEE ALSO"
-.BR pdfextract (1),
diff --git a/utils/pdfmerge.cc b/utils/pdfmerge.cc
deleted file mode 100644
index 28f7265..0000000
--- a/utils/pdfmerge.cc
+++ /dev/null
@@ -1,176 +0,0 @@
-//========================================================================
-//
-// pdfmerge.cc
-//
-// This file is licensed under the GPLv2 or later
-//
-// Copyright (C) 2011 Thomas Freitag <Thomas.Freitag at alfa.de>
-//
-//========================================================================
-#include <PDFDoc.h>
-#include "parseargs.h"
-#include "config.h"
-#include <poppler-config.h>
-#include <vector>
-
-static GBool printVersion = gFalse;
-static GBool printHelp = gFalse;
-
-static const ArgDesc argDesc[] = {
- {"-v", argFlag, &printVersion, 0,
- "print copyright and version info"},
- {"-h", argFlag, &printHelp, 0,
- "print usage information"},
- {"-help", argFlag, &printHelp, 0,
- "print usage information"},
- {"--help", argFlag, &printHelp, 0,
- "print usage information"},
- {"-?", argFlag, &printHelp, 0,
- "print usage information"},
- {NULL}
-};
-
-///////////////////////////////////////////////////////////////////////////
-int main (int argc, char *argv[])
-///////////////////////////////////////////////////////////////////////////
-// Merge PDF files given by arguments 1 to argc-2 and write the result
-// to the file specified by argument argc-1.
-///////////////////////////////////////////////////////////////////////////
-{
- int objectsCount = 0;
- Guint numOffset = 0;
- std::vector<Object> pages;
- std::vector<Guint> offsets;
- XRef *yRef, *countRef;
- FILE *f;
- OutStream *outStr;
- int i;
- int j, rootNum;
- std::vector<PDFDoc *>docs;
- int majorVersion = 0;
- int minorVersion = 0;
- char *fileName = argv[argc - 1];
- int exitCode;
-
- exitCode = 99;
- if (argc <= 3 || printVersion || printHelp) {
- fprintf(stderr, "pdfmerge version %s\n", PACKAGE_VERSION);
- fprintf(stderr, "%s\n", popplerCopyright);
- fprintf(stderr, "%s\n", xpdfCopyright);
- if (!printVersion) {
- printUsage("pdfmerge", "<PDF-sourcefile-1>..<PDF-sourcefile-n> <PDF-destfile>",
- argDesc);
- }
- if (printVersion || printHelp)
- exitCode = 0;
- return exitCode;
- }
- exitCode = 0;
-
- for (i = 1; i < argc - 1; i++) {
- GooString *gfileName = new GooString(argv[i]);
- PDFDoc *doc = new PDFDoc(gfileName, NULL, NULL, NULL);
- if (doc->isOk() && !doc->isEncrypted()) {
- docs.push_back(doc);
- if (doc->getPDFMajorVersion() > majorVersion) {
- majorVersion = doc->getPDFMajorVersion();
- minorVersion = doc->getPDFMinorVersion();
- } else if (doc->getPDFMajorVersion() == majorVersion) {
- if (doc->getPDFMinorVersion() > minorVersion) {
- minorVersion = doc->getPDFMinorVersion();
- }
- }
- } else if (doc->isOk()) {
- error(-1, "Could not merge encrypted files ('%s')", argv[i]);
- return -1;
- } else {
- error(-1, "Could not merge damaged documents ('%s')", argv[i]);
- return -1;
- }
- }
-
- if (!(f = fopen(fileName, "wb"))) {
- error(-1, "Could not open file '%s'", fileName);
- return -1;
- }
- outStr = new FileOutStream(f, 0);
-
- yRef = new XRef();
- countRef = new XRef();
- yRef->add(0, 65535, 0, gFalse);
- PDFDoc::writeHeader(outStr, majorVersion, minorVersion);
-
- for (i = 0; i < (int) docs.size(); i++) {
- for (j = 1; j <= docs[i]->getNumPages(); j++) {
- PDFRectangle *cropBox = NULL;
- if (docs[i]->getCatalog()->getPage(j)->isCropped())
- cropBox = docs[i]->getCatalog()->getPage(j)->getCropBox();
- docs[i]->replacePageDict(j,
- docs[i]->getCatalog()->getPage(j)->getRotate(),
- docs[i]->getCatalog()->getPage(j)->getMediaBox(), cropBox, NULL);
- Ref *refPage = docs[i]->getCatalog()->getPageRef(j);
- Object page;
- docs[i]->getXRef()->fetch(refPage->num, refPage->gen, &page);
- pages.push_back(page);
- offsets.push_back(numOffset);
- Dict *pageDict = page.getDict();
- docs[i]->markPageObjects(pageDict, yRef, countRef, numOffset);
- }
- objectsCount += docs[i]->writePageObjects(outStr, yRef, numOffset);
- numOffset = yRef->getNumObjects() + 1;
- }
-
- rootNum = yRef->getNumObjects() + 1;
- yRef->add(rootNum, 0, outStr->getPos(), gTrue);
- outStr->printf("%d 0 obj\n", rootNum);
- outStr->printf("<< /Type /Catalog /Pages %d 0 R", rootNum + 1);
- outStr->printf(">>\nendobj\n");
- objectsCount++;
-
- yRef->add(rootNum + 1, 0, outStr->getPos(), gTrue);
- outStr->printf("%d 0 obj\n", rootNum + 1);
- outStr->printf("<< /Type /Pages /Kids [");
- for (j = 0; j < (int) pages.size(); j++)
- outStr->printf(" %d 0 R", rootNum + j + 2);
- outStr->printf(" ] /Count %d >>\nendobj\n", pages.size());
- objectsCount++;
-
- for (i = 0; i < (int) pages.size(); i++) {
- yRef->add(rootNum + i + 2, 0, outStr->getPos(), gTrue);
- outStr->printf("%d 0 obj\n", rootNum + i + 2);
- outStr->printf("<< ");
- Dict *pageDict = pages[i].getDict();
- for (j = 0; j < pageDict->getLength(); j++) {
- if (j > 0)
- outStr->printf(" ");
- const char *key = pageDict->getKey(j);
- Object value;
- pageDict->getValNF(j, &value);
- if (strcmp(key, "Parent") == 0) {
- outStr->printf("/Parent %d 0 R", rootNum + 1);
- } else {
- outStr->printf("/%s ", key);
- PDFDoc::writeObject(&value, NULL, outStr, yRef, offsets[i]);
- }
- value.free();
- }
- outStr->printf(" >>\nendobj\n");
- objectsCount++;
- }
- Guint uxrefOffset = outStr->getPos();
- yRef->writeToFile(outStr, gFalse /* do not write unnecessary entries */ );
-
- Ref ref;
- ref.num = rootNum;
- ref.gen = 0;
- PDFDoc::writeTrailer(uxrefOffset, objectsCount, outStr, (GBool) gFalse, 0,
- &ref, yRef, fileName, outStr->getPos());
-
- outStr->close();
- fclose(f);
- delete yRef;
- delete countRef;
- for (j = 0; j < (int) pages.size (); j++) pages[j].free();
- for (i = 0; i < (int) docs.size (); i++) delete docs[i];
- return exitCode;
-}
diff --git a/utils/pdfseparate.1 b/utils/pdfseparate.1
new file mode 100644
index 0000000..f871d3b
--- /dev/null
+++ b/utils/pdfseparate.1
@@ -0,0 +1,49 @@
+.\" Copyright 2011 The Poppler Developers - http://poppler.freedesktop.org
+.TH pdfseparate 1 "15 September 2011"
+.SH NAME
+pdfseparate \- Portable Document Format (PDF) page extractor
+.SH SYNOPSIS
+.B pdfseparate
+[options]
+.I PDF-file PDF-page-pattern
+.SH DESCRIPTION
+.B pdfseparate
+extract single pages from a Portable Document Format (PDF).
+.PP
+pdfseparate reads the PDF file
+.IR PDF-file ,
+extracts one or more pages, and writes one PDF file for each page to
+.IR PDF-page-pattern,
+PDF-page-pattern should contain
+.B %d
+.%d is replaced by the page number
+.TP
+The PDF-file should not be encrypted.
+.SH OPTIONS
+.TP
+.BI \-f " number"
+Specifies the first page to extract. If -f is omitted, extraction starts with page 1.
+.TP
+.BI \-l " number"
+Specifies the last page to extract. if -p is omitted, extraction ends with the last page.
+.TP
+.B \-v
+Print copyright and version information.
+.TP
+.B \-h
+Print usage information.
+.RB ( \-help
+and
+.B \-\-help
+are equivalent.)
+.SH EXAMPLE
+pdfseparate sample.pdf sample-%d.pdf
+.TP
+extracts all pages from sample.pdf, if i.e. sample.pdf has 3 pages, it produces
+.TP
+sample-1.pdf, sample-2.pdf, sample-3.pdf
+.SH AUTHOR
+The pdfseparate software and documentation are copyright 1996-2004 Glyph
+& Cog, LLC and copyright 2005-2011 The Poppler Developers - http://poppler.freedesktop.org
+.SH "SEE ALSO"
+.BR pdfunite (1),
diff --git a/utils/pdfseparate.cc b/utils/pdfseparate.cc
new file mode 100644
index 0000000..9e62705
--- /dev/null
+++ b/utils/pdfseparate.cc
@@ -0,0 +1,115 @@
+//========================================================================
+//
+// pdfseparate.cc
+//
+// This file is licensed under the GPLv2 or later
+//
+// Copyright (C) 2011 Thomas Freitag <Thomas.Freitag at alfa.de>
+//
+//========================================================================
+#include "config.h"
+#include <poppler-config.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stddef.h>
+#include <string.h>
+#include "parseargs.h"
+#include "goo/GooString.h"
+#include "PDFDoc.h"
+#include "ErrorCodes.h"
+
+static int firstPage = 0;
+static int lastPage = 0;
+static GBool printVersion = gFalse;
+static GBool printHelp = gFalse;
+
+static const ArgDesc argDesc[] = {
+ {"-f", argInt, &firstPage, 0,
+ "first page to extract"},
+ {"-l", argInt, &lastPage, 0,
+ "last page to extract"},
+ {"-v", argFlag, &printVersion, 0,
+ "print copyright and version info"},
+ {"-h", argFlag, &printHelp, 0,
+ "print usage information"},
+ {"-help", argFlag, &printHelp, 0,
+ "print usage information"},
+ {"--help", argFlag, &printHelp, 0,
+ "print usage information"},
+ {"-?", argFlag, &printHelp, 0,
+ "print usage information"},
+ {NULL}
+};
+
+bool extractPages (const char *srcFileName, const char *destFileName) {
+ char pathName[1024];
+ GooString *gfileName = new GooString (srcFileName);
+ PDFDoc *doc = new PDFDoc (gfileName, NULL, NULL, NULL);
+
+ if (!doc->isOk()) {
+ error(-1, "Could not extract page(s) from damaged file ('%s')", srcFileName);
+ return false;
+ }
+ if (doc->isEncrypted()) {
+ error(-1, "Could not extract page(s) from encrypted file ('%s')", srcFileName);
+ return false;
+ }
+
+ if (firstPage == 0 && lastPage == 0) {
+ firstPage = 1;
+ lastPage = doc->getNumPages();
+ }
+ if (lastPage == 0)
+ lastPage = doc->getNumPages();
+ if (firstPage == 0)
+ firstPage = 1;
+ if (firstPage != lastPage && strstr(destFileName, "%d") == NULL) {
+ error(-1, "'%s' must contain '%%d' if more than one page should be extracted", destFileName);
+ return false;
+ }
+ for (int pageNo = firstPage; pageNo <= lastPage; pageNo++) {
+ sprintf (pathName, destFileName, pageNo);
+ GooString *gpageName = new GooString (pathName);
+ int errCode = doc->savePageAs(gpageName, pageNo);
+ if ( errCode != errNone) {
+ delete gpageName;
+ delete gfileName;
+ return false;
+ }
+ delete gpageName;
+ }
+ delete gfileName;
+ return true;
+}
+
+int
+main (int argc, char *argv[])
+{
+ Object info;
+ GBool ok;
+ int exitCode;
+
+ exitCode = 99;
+
+ // parse args
+ ok = parseArgs (argDesc, &argc, argv);
+ if (!ok || argc != 3 || printVersion || printHelp)
+ {
+ fprintf (stderr, "pdfseparate version %s\n", PACKAGE_VERSION);
+ fprintf (stderr, "%s\n", popplerCopyright);
+ fprintf (stderr, "%s\n", xpdfCopyright);
+ if (!printVersion)
+ {
+ printUsage ("pdfseparate", "<PDF-sourcefile> <PDF-pattern-destfile>",
+ argDesc);
+ }
+ if (printVersion || printHelp)
+ exitCode = 0;
+ goto err0;
+ }
+ extractPages (argv[1], argv[2]);
+
+err0:
+
+ return exitCode;
+}
diff --git a/utils/pdfunite.1 b/utils/pdfunite.1
new file mode 100644
index 0000000..9b1f2e8
--- /dev/null
+++ b/utils/pdfunite.1
@@ -0,0 +1,33 @@
+.\" Copyright 2011 The Poppler Developers - http://poppler.freedesktop.org
+.TH pdfunite 1 "15 September 2011"
+.SH NAME
+pdfunite \- Portable Document Format (PDF) page merger
+.SH SYNOPSIS
+.B pdfunite
+[options]
+.I PDF-sourcefile1..PDF-sourcefilen PDF-destfile
+.SH DESCRIPTION
+.B pdfunite
+merges several PDF (Portable Document Format) files in order of their occurence on command line to one PDF result file.
+.TP
+Neither of the PDF-sourcefile1 to PDF-sourcefilen should be encrypted.
+.SH OPTIONS
+.TP
+.B \-v
+Print copyright and version information.
+.TP
+.B \-h
+Print usage information.
+.RB ( \-help
+and
+.B \-\-help
+are equivalent.)
+.SH EXAMPLE
+pdfunite sample1.pdf sample2.pdf sample.pdf
+.TP
+merges all pages from sample1.pdf and sample2.pdf (in that order) and creates sample.pdf
+.SH AUTHOR
+The pdfunite software and documentation are copyright 1996-2004 Glyph & Cog, LLC
+and copyright 2005-2011 The Poppler Developers - http://poppler.freedesktop.org
+.SH "SEE ALSO"
+.BR pdfseparate (1),
diff --git a/utils/pdfunite.cc b/utils/pdfunite.cc
new file mode 100644
index 0000000..3b3d2bb
--- /dev/null
+++ b/utils/pdfunite.cc
@@ -0,0 +1,176 @@
+//========================================================================
+//
+// pdfunite.cc
+//
+// This file is licensed under the GPLv2 or later
+//
+// Copyright (C) 2011 Thomas Freitag <Thomas.Freitag at alfa.de>
+//
+//========================================================================
+#include <PDFDoc.h>
+#include "parseargs.h"
+#include "config.h"
+#include <poppler-config.h>
+#include <vector>
+
+static GBool printVersion = gFalse;
+static GBool printHelp = gFalse;
+
+static const ArgDesc argDesc[] = {
+ {"-v", argFlag, &printVersion, 0,
+ "print copyright and version info"},
+ {"-h", argFlag, &printHelp, 0,
+ "print usage information"},
+ {"-help", argFlag, &printHelp, 0,
+ "print usage information"},
+ {"--help", argFlag, &printHelp, 0,
+ "print usage information"},
+ {"-?", argFlag, &printHelp, 0,
+ "print usage information"},
+ {NULL}
+};
+
+///////////////////////////////////////////////////////////////////////////
+int main (int argc, char *argv[])
+///////////////////////////////////////////////////////////////////////////
+// Merge PDF files given by arguments 1 to argc-2 and write the result
+// to the file specified by argument argc-1.
+///////////////////////////////////////////////////////////////////////////
+{
+ int objectsCount = 0;
+ Guint numOffset = 0;
+ std::vector<Object> pages;
+ std::vector<Guint> offsets;
+ XRef *yRef, *countRef;
+ FILE *f;
+ OutStream *outStr;
+ int i;
+ int j, rootNum;
+ std::vector<PDFDoc *>docs;
+ int majorVersion = 0;
+ int minorVersion = 0;
+ char *fileName = argv[argc - 1];
+ int exitCode;
+
+ exitCode = 99;
+ if (argc <= 3 || printVersion || printHelp) {
+ fprintf(stderr, "pdfunite version %s\n", PACKAGE_VERSION);
+ fprintf(stderr, "%s\n", popplerCopyright);
+ fprintf(stderr, "%s\n", xpdfCopyright);
+ if (!printVersion) {
+ printUsage("pdfunite", "<PDF-sourcefile-1>..<PDF-sourcefile-n> <PDF-destfile>",
+ argDesc);
+ }
+ if (printVersion || printHelp)
+ exitCode = 0;
+ return exitCode;
+ }
+ exitCode = 0;
+
+ for (i = 1; i < argc - 1; i++) {
+ GooString *gfileName = new GooString(argv[i]);
+ PDFDoc *doc = new PDFDoc(gfileName, NULL, NULL, NULL);
+ if (doc->isOk() && !doc->isEncrypted()) {
+ docs.push_back(doc);
+ if (doc->getPDFMajorVersion() > majorVersion) {
+ majorVersion = doc->getPDFMajorVersion();
+ minorVersion = doc->getPDFMinorVersion();
+ } else if (doc->getPDFMajorVersion() == majorVersion) {
+ if (doc->getPDFMinorVersion() > minorVersion) {
+ minorVersion = doc->getPDFMinorVersion();
+ }
+ }
+ } else if (doc->isOk()) {
+ error(-1, "Could not merge encrypted files ('%s')", argv[i]);
+ return -1;
+ } else {
+ error(-1, "Could not merge damaged documents ('%s')", argv[i]);
+ return -1;
+ }
+ }
+
+ if (!(f = fopen(fileName, "wb"))) {
+ error(-1, "Could not open file '%s'", fileName);
+ return -1;
+ }
+ outStr = new FileOutStream(f, 0);
+
+ yRef = new XRef();
+ countRef = new XRef();
+ yRef->add(0, 65535, 0, gFalse);
+ PDFDoc::writeHeader(outStr, majorVersion, minorVersion);
+
+ for (i = 0; i < (int) docs.size(); i++) {
+ for (j = 1; j <= docs[i]->getNumPages(); j++) {
+ PDFRectangle *cropBox = NULL;
+ if (docs[i]->getCatalog()->getPage(j)->isCropped())
+ cropBox = docs[i]->getCatalog()->getPage(j)->getCropBox();
+ docs[i]->replacePageDict(j,
+ docs[i]->getCatalog()->getPage(j)->getRotate(),
+ docs[i]->getCatalog()->getPage(j)->getMediaBox(), cropBox, NULL);
+ Ref *refPage = docs[i]->getCatalog()->getPageRef(j);
+ Object page;
+ docs[i]->getXRef()->fetch(refPage->num, refPage->gen, &page);
+ pages.push_back(page);
+ offsets.push_back(numOffset);
+ Dict *pageDict = page.getDict();
+ docs[i]->markPageObjects(pageDict, yRef, countRef, numOffset);
+ }
+ objectsCount += docs[i]->writePageObjects(outStr, yRef, numOffset);
+ numOffset = yRef->getNumObjects() + 1;
+ }
+
+ rootNum = yRef->getNumObjects() + 1;
+ yRef->add(rootNum, 0, outStr->getPos(), gTrue);
+ outStr->printf("%d 0 obj\n", rootNum);
+ outStr->printf("<< /Type /Catalog /Pages %d 0 R", rootNum + 1);
+ outStr->printf(">>\nendobj\n");
+ objectsCount++;
+
+ yRef->add(rootNum + 1, 0, outStr->getPos(), gTrue);
+ outStr->printf("%d 0 obj\n", rootNum + 1);
+ outStr->printf("<< /Type /Pages /Kids [");
+ for (j = 0; j < (int) pages.size(); j++)
+ outStr->printf(" %d 0 R", rootNum + j + 2);
+ outStr->printf(" ] /Count %d >>\nendobj\n", pages.size());
+ objectsCount++;
+
+ for (i = 0; i < (int) pages.size(); i++) {
+ yRef->add(rootNum + i + 2, 0, outStr->getPos(), gTrue);
+ outStr->printf("%d 0 obj\n", rootNum + i + 2);
+ outStr->printf("<< ");
+ Dict *pageDict = pages[i].getDict();
+ for (j = 0; j < pageDict->getLength(); j++) {
+ if (j > 0)
+ outStr->printf(" ");
+ const char *key = pageDict->getKey(j);
+ Object value;
+ pageDict->getValNF(j, &value);
+ if (strcmp(key, "Parent") == 0) {
+ outStr->printf("/Parent %d 0 R", rootNum + 1);
+ } else {
+ outStr->printf("/%s ", key);
+ PDFDoc::writeObject(&value, NULL, outStr, yRef, offsets[i]);
+ }
+ value.free();
+ }
+ outStr->printf(" >>\nendobj\n");
+ objectsCount++;
+ }
+ Guint uxrefOffset = outStr->getPos();
+ yRef->writeToFile(outStr, gFalse /* do not write unnecessary entries */ );
+
+ Ref ref;
+ ref.num = rootNum;
+ ref.gen = 0;
+ PDFDoc::writeTrailer(uxrefOffset, objectsCount, outStr, (GBool) gFalse, 0,
+ &ref, yRef, fileName, outStr->getPos());
+
+ outStr->close();
+ fclose(f);
+ delete yRef;
+ delete countRef;
+ for (j = 0; j < (int) pages.size (); j++) pages[j].free();
+ for (i = 0; i < (int) docs.size (); i++) delete docs[i];
+ return exitCode;
+}
More information about the poppler
mailing list