[poppler] 3 commits - poppler/PDFDoc.cc poppler/PDFDoc.h utils/CMakeLists.txt utils/Makefile.am utils/pdfextract.cc utils/pdfmerge.cc
Albert Astals Cid
aacid at kemper.freedesktop.org
Mon Aug 29 13:22:30 PDT 2011
poppler/PDFDoc.cc | 389 +++++++++++++++++++++++++++++++++++++++++++++------
poppler/PDFDoc.h | 30 +++
utils/CMakeLists.txt | 15 +
utils/Makefile.am | 15 +
utils/pdfextract.cc | 111 ++++++++++++++
utils/pdfmerge.cc | 176 +++++++++++++++++++++++
6 files changed, 690 insertions(+), 46 deletions(-)
New commits:
commit 1431564f3363a63a8669c8dd15970db814f4969f
Author: Thomas Freitag <Thomas.Freitag at alfa.de>
Date: Mon Aug 29 22:22:02 2011 +0200
Add pdfextract and pdfmerge
See "Creating PDF with poppler ?" thread for more info
diff --git a/utils/CMakeLists.txt b/utils/CMakeLists.txt
index beeedb8..90030cd 100644
--- a/utils/CMakeLists.txt
+++ b/utils/CMakeLists.txt
@@ -102,3 +102,18 @@ target_link_libraries(pdftohtml ${common_libs})
install(TARGETS pdftohtml DESTINATION bin)
install(FILES pdftohtml.1 DESTINATION share/man/man1)
+# pdfextract
+set(pdfextract_SOURCES ${common_srcs}
+ pdfextract.cc
+)
+add_executable(pdfextract ${pdfextract_SOURCES})
+target_link_libraries(pdfextract ${common_libs})
+install(TARGETS pdfextract DESTINATION bin)
+
+# pdfmerge
+set(pdfmerge_SOURCES ${common_srcs}
+ pdfmerge.cc
+)
+add_executable(pdfmerge ${pdfmerge_SOURCES})
+target_link_libraries(pdfmerge ${common_libs})
+install(TARGETS pdfmerge DESTINATION bin)
diff --git a/utils/Makefile.am b/utils/Makefile.am
index 4faddad..30328f2 100644
--- a/utils/Makefile.am
+++ b/utils/Makefile.am
@@ -50,6 +50,8 @@ bin_PROGRAMS = \
pdftops \
pdftotext \
pdftohtml \
+ pdfextract \
+ pdfmerge \
$(pdftoppm_binary) \
$(pdftocairo_binary)
@@ -102,6 +104,14 @@ pdftohtml_SOURCES = \
HtmlUtils.h \
$(common)
+pdfextract_SOURCES = \
+ pdfextract.cc \
+ $(common)
+
+pdfmerge_SOURCES = \
+ pdfmerge.cc \
+ $(common)
+
# Yay, automake! It should be able to figure out that it has to dist
# pdftoppm.1, but nooo. So we just add it here.
diff --git a/utils/pdfextract.cc b/utils/pdfextract.cc
new file mode 100644
index 0000000..c8c4749
--- /dev/null
+++ b/utils/pdfextract.cc
@@ -0,0 +1,111 @@
+//========================================================================
+//
+// pdfextract.cc
+//
+// This file is licensed under the GPLv2 or later
+//
+// Copyright (C) 2011 Thomas Freitag <Thomas.Freitag at alfa.de>
+//
+//========================================================================
+#include "config.h"
+#include <poppler-config.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stddef.h>
+#include <string.h>
+#include "parseargs.h"
+#include "goo/GooString.h"
+#include "PDFDoc.h"
+#include "ErrorCodes.h"
+
+static int firstPage = 0;
+static int lastPage = 0;
+static GBool printVersion = gFalse;
+static GBool printHelp = gFalse;
+
+static const ArgDesc argDesc[] = {
+ {"-f", argInt, &firstPage, 0,
+ "first page to extract"},
+ {"-l", argInt, &lastPage, 0,
+ "last page to extract"},
+ {"-v", argFlag, &printVersion, 0,
+ "print copyright and version info"},
+ {"-h", argFlag, &printHelp, 0,
+ "print usage information"},
+ {"-help", argFlag, &printHelp, 0,
+ "print usage information"},
+ {"--help", argFlag, &printHelp, 0,
+ "print usage information"},
+ {"-?", argFlag, &printHelp, 0,
+ "print usage information"},
+ {NULL}
+};
+
+bool extractPages (const char *srcFileName, const char *destFileName) {
+ char pathName[1024];
+ GooString *gfileName = new GooString (srcFileName);
+ PDFDoc *doc = new PDFDoc (gfileName, NULL, NULL, NULL);
+
+ if (!doc->isOk()) {
+ error(-1, "Could not extract page(s) from damaged file ('%s')", srcFileName);
+ return false;
+ }
+ if (doc->isEncrypted()) {
+ error(-1, "Could not extract page(s) from encrypted file ('%s')", srcFileName);
+ return false;
+ }
+
+ if (firstPage == 0 && lastPage == 0) {
+ firstPage = 1;
+ lastPage = doc->getNumPages();
+ }
+ if (lastPage == 0)
+ lastPage = doc->getNumPages();
+ if (firstPage == 0)
+ firstPage = 1;
+ for (int pageNo = firstPage; pageNo <= lastPage; pageNo++) {
+ sprintf (pathName, destFileName, pageNo);
+ GooString *gpageName = new GooString (pathName);
+ int errCode = doc->savePageAs(gpageName, pageNo);
+ if ( errCode != errNone) {
+ delete gpageName;
+ delete gfileName;
+ return false;
+ }
+ delete gpageName;
+ }
+ delete gfileName;
+ return true;
+}
+
+int
+main (int argc, char *argv[])
+{
+ Object info;
+ GBool ok;
+ int exitCode;
+
+ exitCode = 99;
+
+ // parse args
+ ok = parseArgs (argDesc, &argc, argv);
+ if (!ok || argc != 3 || printVersion || printHelp)
+ {
+ fprintf (stderr, "pdfextract version %s\n", PACKAGE_VERSION);
+ fprintf (stderr, "%s\n", popplerCopyright);
+ fprintf (stderr, "%s\n", xpdfCopyright);
+ if (!printVersion)
+ {
+ printUsage ("pdfextract", "<PDF-sourcefile> <PDF-pattern-destfile>",
+ argDesc);
+ }
+ if (printVersion || printHelp)
+ exitCode = 0;
+ goto err0;
+ }
+ extractPages (argv[1], argv[2]);
+
+err0:
+
+ return exitCode;
+}
diff --git a/utils/pdfmerge.cc b/utils/pdfmerge.cc
new file mode 100644
index 0000000..28f7265
--- /dev/null
+++ b/utils/pdfmerge.cc
@@ -0,0 +1,176 @@
+//========================================================================
+//
+// pdfmerge.cc
+//
+// This file is licensed under the GPLv2 or later
+//
+// Copyright (C) 2011 Thomas Freitag <Thomas.Freitag at alfa.de>
+//
+//========================================================================
+#include <PDFDoc.h>
+#include "parseargs.h"
+#include "config.h"
+#include <poppler-config.h>
+#include <vector>
+
+static GBool printVersion = gFalse;
+static GBool printHelp = gFalse;
+
+static const ArgDesc argDesc[] = {
+ {"-v", argFlag, &printVersion, 0,
+ "print copyright and version info"},
+ {"-h", argFlag, &printHelp, 0,
+ "print usage information"},
+ {"-help", argFlag, &printHelp, 0,
+ "print usage information"},
+ {"--help", argFlag, &printHelp, 0,
+ "print usage information"},
+ {"-?", argFlag, &printHelp, 0,
+ "print usage information"},
+ {NULL}
+};
+
+///////////////////////////////////////////////////////////////////////////
+int main (int argc, char *argv[])
+///////////////////////////////////////////////////////////////////////////
+// Merge PDF files given by arguments 1 to argc-2 and write the result
+// to the file specified by argument argc-1.
+///////////////////////////////////////////////////////////////////////////
+{
+ int objectsCount = 0;
+ Guint numOffset = 0;
+ std::vector<Object> pages;
+ std::vector<Guint> offsets;
+ XRef *yRef, *countRef;
+ FILE *f;
+ OutStream *outStr;
+ int i;
+ int j, rootNum;
+ std::vector<PDFDoc *>docs;
+ int majorVersion = 0;
+ int minorVersion = 0;
+ char *fileName = argv[argc - 1];
+ int exitCode;
+
+ exitCode = 99;
+ if (argc <= 3 || printVersion || printHelp) {
+ fprintf(stderr, "pdfmerge version %s\n", PACKAGE_VERSION);
+ fprintf(stderr, "%s\n", popplerCopyright);
+ fprintf(stderr, "%s\n", xpdfCopyright);
+ if (!printVersion) {
+ printUsage("pdfmerge", "<PDF-sourcefile-1>..<PDF-sourcefile-n> <PDF-destfile>",
+ argDesc);
+ }
+ if (printVersion || printHelp)
+ exitCode = 0;
+ return exitCode;
+ }
+ exitCode = 0;
+
+ for (i = 1; i < argc - 1; i++) {
+ GooString *gfileName = new GooString(argv[i]);
+ PDFDoc *doc = new PDFDoc(gfileName, NULL, NULL, NULL);
+ if (doc->isOk() && !doc->isEncrypted()) {
+ docs.push_back(doc);
+ if (doc->getPDFMajorVersion() > majorVersion) {
+ majorVersion = doc->getPDFMajorVersion();
+ minorVersion = doc->getPDFMinorVersion();
+ } else if (doc->getPDFMajorVersion() == majorVersion) {
+ if (doc->getPDFMinorVersion() > minorVersion) {
+ minorVersion = doc->getPDFMinorVersion();
+ }
+ }
+ } else if (doc->isOk()) {
+ error(-1, "Could not merge encrypted files ('%s')", argv[i]);
+ return -1;
+ } else {
+ error(-1, "Could not merge damaged documents ('%s')", argv[i]);
+ return -1;
+ }
+ }
+
+ if (!(f = fopen(fileName, "wb"))) {
+ error(-1, "Could not open file '%s'", fileName);
+ return -1;
+ }
+ outStr = new FileOutStream(f, 0);
+
+ yRef = new XRef();
+ countRef = new XRef();
+ yRef->add(0, 65535, 0, gFalse);
+ PDFDoc::writeHeader(outStr, majorVersion, minorVersion);
+
+ for (i = 0; i < (int) docs.size(); i++) {
+ for (j = 1; j <= docs[i]->getNumPages(); j++) {
+ PDFRectangle *cropBox = NULL;
+ if (docs[i]->getCatalog()->getPage(j)->isCropped())
+ cropBox = docs[i]->getCatalog()->getPage(j)->getCropBox();
+ docs[i]->replacePageDict(j,
+ docs[i]->getCatalog()->getPage(j)->getRotate(),
+ docs[i]->getCatalog()->getPage(j)->getMediaBox(), cropBox, NULL);
+ Ref *refPage = docs[i]->getCatalog()->getPageRef(j);
+ Object page;
+ docs[i]->getXRef()->fetch(refPage->num, refPage->gen, &page);
+ pages.push_back(page);
+ offsets.push_back(numOffset);
+ Dict *pageDict = page.getDict();
+ docs[i]->markPageObjects(pageDict, yRef, countRef, numOffset);
+ }
+ objectsCount += docs[i]->writePageObjects(outStr, yRef, numOffset);
+ numOffset = yRef->getNumObjects() + 1;
+ }
+
+ rootNum = yRef->getNumObjects() + 1;
+ yRef->add(rootNum, 0, outStr->getPos(), gTrue);
+ outStr->printf("%d 0 obj\n", rootNum);
+ outStr->printf("<< /Type /Catalog /Pages %d 0 R", rootNum + 1);
+ outStr->printf(">>\nendobj\n");
+ objectsCount++;
+
+ yRef->add(rootNum + 1, 0, outStr->getPos(), gTrue);
+ outStr->printf("%d 0 obj\n", rootNum + 1);
+ outStr->printf("<< /Type /Pages /Kids [");
+ for (j = 0; j < (int) pages.size(); j++)
+ outStr->printf(" %d 0 R", rootNum + j + 2);
+ outStr->printf(" ] /Count %d >>\nendobj\n", pages.size());
+ objectsCount++;
+
+ for (i = 0; i < (int) pages.size(); i++) {
+ yRef->add(rootNum + i + 2, 0, outStr->getPos(), gTrue);
+ outStr->printf("%d 0 obj\n", rootNum + i + 2);
+ outStr->printf("<< ");
+ Dict *pageDict = pages[i].getDict();
+ for (j = 0; j < pageDict->getLength(); j++) {
+ if (j > 0)
+ outStr->printf(" ");
+ const char *key = pageDict->getKey(j);
+ Object value;
+ pageDict->getValNF(j, &value);
+ if (strcmp(key, "Parent") == 0) {
+ outStr->printf("/Parent %d 0 R", rootNum + 1);
+ } else {
+ outStr->printf("/%s ", key);
+ PDFDoc::writeObject(&value, NULL, outStr, yRef, offsets[i]);
+ }
+ value.free();
+ }
+ outStr->printf(" >>\nendobj\n");
+ objectsCount++;
+ }
+ Guint uxrefOffset = outStr->getPos();
+ yRef->writeToFile(outStr, gFalse /* do not write unnecessary entries */ );
+
+ Ref ref;
+ ref.num = rootNum;
+ ref.gen = 0;
+ PDFDoc::writeTrailer(uxrefOffset, objectsCount, outStr, (GBool) gFalse, 0,
+ &ref, yRef, fileName, outStr->getPos());
+
+ outStr->close();
+ fclose(f);
+ delete yRef;
+ delete countRef;
+ for (j = 0; j < (int) pages.size (); j++) pages[j].free();
+ for (i = 0; i < (int) docs.size (); i++) delete docs[i];
+ return exitCode;
+}
commit 8ca2f41089bc6402baf9b24428af04314c037b54
Author: Thomas Freitag <Thomas.Freitag at alfa.de>
Date: Mon Aug 29 22:20:52 2011 +0200
Rework writing of PDF files
Makes it more compatible with other PDF readers
See "Creating PDF with poppler ?" thread in the mailing list for more info
diff --git a/poppler/PDFDoc.cc b/poppler/PDFDoc.cc
index aa52140..01d2759 100644
--- a/poppler/PDFDoc.cc
+++ b/poppler/PDFDoc.cc
@@ -26,6 +26,7 @@
// Copyright (C) 2010 Ilya Gorenbein <igorenbein at finjan.com>
// Copyright (C) 2010 Srinivas Adicherla <srinivas.adicherla at geodesic.com>
// Copyright (C) 2010 Philip Lorenz <lorenzph+freedesktop at gmail.com>
+// Copyright (C) 2011 Thomas Freitag <Thomas.Freitag at alfa.de>
//
// To see a description of the changes please see the Changelog file that
// came with your tarball or type make ChangeLog if you are building from git
@@ -573,6 +574,121 @@ Hints *PDFDoc::getHints()
return hints;
}
+int PDFDoc::savePageAs(GooString *name, int pageNo)
+{
+ FILE *f;
+ OutStream *outStr;
+ XRef *yRef, *countRef;
+ int rootNum = getXRef()->getSize() + 1;
+
+ if (pageNo < 1 || pageNo > getNumPages()) {
+ error(-1, "Illegal pageNo: %d(%d)", pageNo, getNumPages() );
+ return errOpenFile;
+ }
+ PDFRectangle *cropBox = NULL;
+ if (getCatalog()->getPage(pageNo)->isCropped()) {
+ cropBox = getCatalog()->getPage(pageNo)->getCropBox();
+ }
+ replacePageDict(pageNo,
+ getCatalog()->getPage(pageNo)->getRotate(),
+ getCatalog()->getPage(pageNo)->getMediaBox(),
+ cropBox, NULL);
+ Ref *refPage = getCatalog()->getPageRef(pageNo);
+ Object page;
+ getXRef()->fetch(refPage->num, refPage->gen, &page);
+
+ if (!(f = fopen(name->getCString(), "wb"))) {
+ error(-1, "Couldn't open file '%s'", name->getCString());
+ return errOpenFile;
+ }
+ outStr = new FileOutStream(f,0);
+
+ yRef = new XRef();
+ countRef = new XRef();
+ yRef->add(0, 65535, 0, gFalse);
+ writeHeader(outStr, getPDFMajorVersion(), getPDFMinorVersion());
+
+ // get and mark optional content groups
+ OCGs *ocgs = getCatalog()->getOptContentConfig();
+ if (ocgs != NULL) {
+ Object catDict, optContentProps;
+ getXRef()->getCatalog(&catDict);
+ catDict.dictLookup("OCProperties", &optContentProps);
+ Dict *pageDict = optContentProps.getDict();
+ markPageObjects(pageDict, yRef, countRef, 0);
+ catDict.free();
+ optContentProps.free();
+ }
+
+ Dict *pageDict = page.getDict();
+ markPageObjects(pageDict, yRef, countRef, 0);
+ Guint objectsCount = writePageObjects(outStr, yRef, 0);
+
+ yRef->add(rootNum,0,outStr->getPos(),gTrue);
+ outStr->printf("%d 0 obj\n", rootNum);
+ outStr->printf("<< /Type /Catalog /Pages %d 0 R", rootNum + 1);
+ if (ocgs != NULL) {
+ Object catDict, optContentProps;
+ getXRef()->getCatalog(&catDict);
+ catDict.dictLookup("OCProperties", &optContentProps);
+ outStr->printf(" /OCProperties <<");
+ Dict *pageDict = optContentProps.getDict();
+ for (int n = 0; n < pageDict->getLength(); n++) {
+ if (n > 0) outStr->printf(" ");
+ const char *key = pageDict->getKey(n);
+ Object value; pageDict->getValNF(n, &value);
+ outStr->printf("/%s ", key);
+ writeObject(&value, NULL, outStr, getXRef(), 0);
+ value.free();
+ }
+ outStr->printf(" >> ");
+ catDict.free();
+ optContentProps.free();
+ }
+ outStr->printf(">>\nendobj\n");
+ objectsCount++;
+
+ yRef->add(rootNum + 1,0,outStr->getPos(),gTrue);
+ outStr->printf("%d 0 obj\n", rootNum + 1);
+ outStr->printf("<< /Type /Pages /Kids [ %d 0 R ] /Count 1 >>\n", rootNum + 2);
+ outStr->printf("endobj\n");
+ objectsCount++;
+
+ yRef->add(rootNum + 2,0,outStr->getPos(),gTrue);
+ outStr->printf("%d 0 obj\n", rootNum + 2);
+ outStr->printf("<< ");
+ for (int n = 0; n < pageDict->getLength(); n++) {
+ if (n > 0) outStr->printf(" ");
+ const char *key = pageDict->getKey(n);
+ Object value; pageDict->getValNF(n, &value);
+ if (strcmp(key, "Parent") == 0) {
+ outStr->printf("/Parent %d 0 R", rootNum + 1);
+ } else {
+ outStr->printf("/%s ", key);
+ writeObject(&value, NULL, outStr, getXRef(), 0);
+ }
+ value.free();
+ }
+ outStr->printf(" >>\nendobj\n");
+ objectsCount++;
+ page.free();
+
+ Guint uxrefOffset = outStr->getPos();
+ yRef->writeToFile(outStr, gFalse /* do not write unnecessary entries */);
+
+ Ref ref;
+ ref.num = rootNum;
+ ref.gen = 0;
+ writeTrailer(uxrefOffset, objectsCount, outStr, gFalse, 0, &ref, getXRef(), name->getCString(), outStr->getPos());
+
+ outStr->close();
+ fclose(f);
+ delete yRef;
+ delete countRef;
+
+ return errNone;
+}
+
int PDFDoc::saveAs(GooString *name, PDFWriteMode mode) {
FILE *f;
OutStream *outStr;
@@ -740,7 +856,7 @@ void PDFDoc::saveCompleteRewrite (OutStream* outStr)
}
-void PDFDoc::writeDictionnary (Dict* dict, OutStream* outStr)
+void PDFDoc::writeDictionnary (Dict* dict, OutStream* outStr, XRef *xRef, Guint numOffset)
{
Object obj1;
outStr->printf("<<");
@@ -749,7 +865,7 @@ void PDFDoc::writeDictionnary (Dict* dict, OutStream* outStr)
GooString *keyNameToPrint = keyName.sanitizedName(gFalse /* non ps mode */);
outStr->printf("/%s ", keyNameToPrint->getCString());
delete keyNameToPrint;
- writeObject(dict->getValNF(i, &obj1), NULL, outStr);
+ writeObject(dict->getValNF(i, &obj1), NULL, outStr, xRef, numOffset);
obj1.free();
}
outStr->printf(">> ");
@@ -805,18 +921,24 @@ void PDFDoc::writeString (GooString* s, OutStream* outStr)
const char* c = s->getCString();
outStr->printf("(");
for(int i=0; i<s->getLength(); i++) {
- char unescaped = (*c)&0x000000ff;
+ char unescaped = *(c+i)&0x000000ff;
//escape if needed
- if (unescaped == '(' || unescaped == ')' || unescaped == '\\')
- outStr->printf("%c", '\\');
- outStr->printf("%c", unescaped);
- c++;
+ if (unescaped == '\r')
+ outStr->printf("\\r");
+ else if (unescaped == '\n')
+ outStr->printf("\\n");
+ else {
+ if (unescaped == '(' || unescaped == ')' || unescaped == '\\') {
+ outStr->printf("%c", '\\');
+ }
+ outStr->printf("%c", unescaped);
+ }
}
outStr->printf(") ");
}
}
-Guint PDFDoc::writeObject (Object* obj, Ref* ref, OutStream* outStr)
+Guint PDFDoc::writeObject (Object* obj, Ref* ref, OutStream* outStr, XRef *xRef, Guint numOffset)
{
Array *array;
Object obj1;
@@ -858,13 +980,13 @@ Guint PDFDoc::writeObject (Object* obj, Ref* ref, OutStream* outStr)
array = obj->getArray();
outStr->printf("[");
for (int i=0; i<array->getLength(); i++) {
- writeObject(array->getNF(i, &obj1), NULL,outStr);
+ writeObject(array->getNF(i, &obj1), NULL,outStr, xRef, numOffset);
obj1.free();
}
outStr->printf("] ");
break;
case objDict:
- writeDictionnary (obj->getDict(),outStr);
+ writeDictionnary (obj->getDict(),outStr, xRef, numOffset);
break;
case objStream:
{
@@ -886,7 +1008,7 @@ Guint PDFDoc::writeObject (Object* obj, Ref* ref, OutStream* outStr)
stream->getDict()->remove("Filter");
stream->getDict()->remove("DecodeParms");
- writeDictionnary (stream->getDict(),outStr);
+ writeDictionnary (stream->getDict(),outStr, xRef, numOffset);
writeStream (stream,outStr);
obj1.free();
} else {
@@ -896,23 +1018,23 @@ Guint PDFDoc::writeObject (Object* obj, Ref* ref, OutStream* outStr)
BaseStream *bs = fs->getBaseStream();
if (bs) {
Guint streamEnd;
- if (xref->getStreamEnd(bs->getStart(), &streamEnd)) {
+ if (xRef->getStreamEnd(bs->getStart(), &streamEnd)) {
Object val;
val.initInt(streamEnd - bs->getStart());
stream->getDict()->set("Length", &val);
}
}
}
- writeDictionnary (stream->getDict(), outStr);
+ writeDictionnary (stream->getDict(), outStr, xRef, numOffset);
writeRawStream (stream, outStr);
}
break;
}
case objRef:
- outStr->printf("%i %i R ", obj->getRef().num, obj->getRef().gen);
+ outStr->printf("%i %i R ", obj->getRef().num + numOffset, obj->getRef().gen);
break;
case objCmd:
- outStr->printf("cmd\r\n");
+ outStr->printf("%s\n", obj->getCmd());
break;
case objError:
outStr->printf("error\r\n");
@@ -932,9 +1054,12 @@ Guint PDFDoc::writeObject (Object* obj, Ref* ref, OutStream* outStr)
return offset;
}
-void PDFDoc::writeTrailer (Guint uxrefOffset, int uxrefSize, OutStream* outStr, GBool incrUpdate)
+void PDFDoc::writeTrailer(Guint uxrefOffset, int uxrefSize,
+ OutStream* outStr, GBool incrUpdate,
+ Guint startxRef, Ref *root, XRef *xRef, const char *fileName,
+ Guint fileSize)
{
- Dict *trailerDict = new Dict(xref);
+ Dict *trailerDict = new Dict(xRef);
Object obj1;
obj1.initInt(uxrefSize);
trailerDict->set("Size", &obj1);
@@ -950,23 +1075,13 @@ void PDFDoc::writeTrailer (Guint uxrefOffset, int uxrefSize, OutStream* outStr,
char buffer[256];
sprintf(buffer, "%i", (int)time(NULL));
message.append(buffer);
- if (fileName)
- message.append(fileName);
- else
- message.append("streamwithoutfilename.pdf");
- // file size
- unsigned int fileSize = 0;
- int c;
- str->reset();
- while ((c = str->getChar()) != EOF) {
- fileSize++;
- }
- str->close();
+ message.append(fileName);
+
sprintf(buffer, "%i", fileSize);
message.append(buffer);
//info dict -- only use text string
- if (xref->getDocInfo(&obj1)->isDict()) {
+ if (!xRef->getTrailerDict()->isNone() && xRef->getDocInfo(&obj1)->isDict()) {
for(int i=0; i<obj1.getDict()->getLength(); i++) {
Object obj2;
obj1.getDict()->getVal(i, &obj2);
@@ -985,12 +1100,12 @@ void PDFDoc::writeTrailer (Guint uxrefOffset, int uxrefSize, OutStream* outStr,
//create ID array
Object obj2,obj3,obj5;
- obj2.initArray(xref);
+ obj2.initArray(xRef);
if (incrUpdate) {
Object obj4;
//only update the second part of the array
- xref->getTrailerDict()->getDict()->lookup("ID", &obj4);
+ xRef->getTrailerDict()->getDict()->lookup("ID", &obj4);
if (!obj4.isArray()) {
error(-1, "PDFDoc::writeTrailer original file's ID entry isn't an array. Trying to continue");
} else {
@@ -1010,22 +1125,23 @@ void PDFDoc::writeTrailer (Guint uxrefOffset, int uxrefSize, OutStream* outStr,
trailerDict->set("ID", &obj2);
}
-
- obj1.initRef(xref->getRootNum(), xref->getRootGen());
+ obj1.initRef(root->num, root->gen);
trailerDict->set("Root", &obj1);
if (incrUpdate) {
- obj1.initInt(getStartXRef());
+ obj1.initInt(startxRef);
trailerDict->set("Prev", &obj1);
}
- xref->getDocInfoNF(&obj5);
- if (!obj5.isNull()) {
- trailerDict->set("Info", &obj5);
+ if (!xRef->getTrailerDict()->isNone()) {
+ xRef->getDocInfoNF(&obj5);
+ if (!obj5.isNull()) {
+ trailerDict->set("Info", &obj5);
+ }
}
outStr->printf( "trailer\r\n");
- writeDictionnary(trailerDict, outStr);
+ writeDictionnary(trailerDict, outStr, xRef, 0);
outStr->printf( "\r\nstartxref\r\n");
outStr->printf( "%i\r\n", uxrefOffset);
outStr->printf( "%%%%EOF\r\n");
@@ -1033,6 +1149,201 @@ void PDFDoc::writeTrailer (Guint uxrefOffset, int uxrefSize, OutStream* outStr,
delete trailerDict;
}
+void PDFDoc::writeTrailer(Guint uxrefOffset, int uxrefSize, OutStream* outStr, GBool incrUpdate)
+{
+ char *fileNameA;
+ if (fileName)
+ fileNameA = fileName->getCString();
+ else
+ fileNameA = "streamwithoutfilename.pdf";
+ // file size
+ unsigned int fileSize = 0;
+ int c;
+ str->reset();
+ while ((c = str->getChar()) != EOF) {
+ fileSize++;
+ }
+ str->close();
+ Ref ref;
+ ref.num = getXRef()->getRootNum();
+ ref.gen = getXRef()->getRootGen();
+ writeTrailer(uxrefOffset, uxrefSize, outStr, incrUpdate, getStartXRef(), &ref, getXRef(), fileNameA, fileSize);
+}
+
+void PDFDoc::writeHeader(OutStream *outStr, int major, int minor)
+{
+ outStr->printf("%%PDF-%d.%d\n", major, minor);
+ outStr->printf("%%\xE2\xE3\xCF\xD3\n");
+}
+
+void PDFDoc::markDictionnary (Dict* dict, XRef * xRef, XRef *countRef, Guint numOffset)
+{
+ Object obj1;
+ for (int i=0; i<dict->getLength(); i++) {
+ markObject(dict->getValNF(i, &obj1), xRef, countRef, numOffset);
+ obj1.free();
+ }
+}
+
+void PDFDoc::markObject (Object* obj, XRef *xRef, XRef *countRef, Guint numOffset)
+{
+ Array *array;
+ Object obj1;
+
+ switch (obj->getType()) {
+ case objArray:
+ array = obj->getArray();
+ for (int i=0; i<array->getLength(); i++) {
+ markObject(array->getNF(i, &obj1), xRef, countRef, numOffset);
+ obj1.free();
+ }
+ break;
+ case objDict:
+ markDictionnary (obj->getDict(), xRef, countRef, numOffset);
+ break;
+ case objStream:
+ {
+ Stream *stream = obj->getStream();
+ markDictionnary (stream->getDict(), xRef, countRef, numOffset);
+ }
+ break;
+ case objRef:
+ {
+ if (obj->getRef().num + (int) numOffset >= xRef->getNumObjects() || xRef->getEntry(obj->getRef().num + numOffset)->type == xrefEntryFree) {
+ if (getXRef()->getEntry(obj->getRef().num)->type == xrefEntryFree) {
+ return; // already marked as free => should be replaced
+ }
+ xRef->add(obj->getRef().num + numOffset, obj->getRef().gen, 0, gTrue);
+ if (getXRef()->getEntry(obj->getRef().num)->type == xrefEntryCompressed) {
+ xRef->getEntry(obj->getRef().num + numOffset)->type = xrefEntryCompressed;
+ }
+ }
+ if (obj->getRef().num + (int) numOffset >= countRef->getNumObjects() ||
+ countRef->getEntry(obj->getRef().num + numOffset)->type == xrefEntryFree)
+ {
+ countRef->add(obj->getRef().num + numOffset, 1, 0, gTrue);
+ } else {
+ XRefEntry *entry = countRef->getEntry(obj->getRef().num + numOffset);
+ entry->gen++;
+ }
+ Object obj1;
+ getXRef()->fetch(obj->getRef().num, obj->getRef().gen, &obj1);
+ markObject(&obj1, xRef, countRef, numOffset);
+ obj1.free();
+ }
+ break;
+ default:
+ break;
+ }
+}
+
+void PDFDoc::replacePageDict(int pageNo, int rotate,
+ PDFRectangle *mediaBox,
+ PDFRectangle *cropBox, Object *pageCTM)
+{
+ Ref *refPage = getCatalog()->getPageRef(pageNo);
+ Object page;
+ getXRef()->fetch(refPage->num, refPage->gen, &page);
+ Dict *pageDict = page.getDict();
+ pageDict->remove("MediaBox");
+ pageDict->remove("CropBox");
+ pageDict->remove("ArtBox");
+ pageDict->remove("BleedBox");
+ pageDict->remove("TrimBox");
+ pageDict->remove("Rotate");
+ Object *mediaBoxObj = new Object();
+ mediaBoxObj->initArray(getXRef());
+ Object *murx = new Object();
+ murx->initReal(mediaBox->x1);
+ Object *mury = new Object();
+ mury->initReal(mediaBox->y1);
+ Object *mllx = new Object();
+ mllx->initReal(mediaBox->x2);
+ Object *mlly = new Object();
+ mlly->initReal(mediaBox->y2);
+ mediaBoxObj->arrayAdd(murx);
+ mediaBoxObj->arrayAdd(mury);
+ mediaBoxObj->arrayAdd(mllx);
+ mediaBoxObj->arrayAdd(mlly);
+ pageDict->add(copyString("MediaBox"), mediaBoxObj);
+ if (cropBox != NULL) {
+ Object *cropBoxObj = new Object();
+ cropBoxObj->initArray(getXRef());
+ Object *curx = new Object();
+ curx->initReal(cropBox->x1);
+ Object *cury = new Object();
+ cury->initReal(cropBox->y1);
+ Object *cllx = new Object();
+ cllx->initReal(cropBox->x2);
+ Object *clly = new Object();
+ clly->initReal(cropBox->y2);
+ cropBoxObj->arrayAdd(curx);
+ cropBoxObj->arrayAdd(cury);
+ cropBoxObj->arrayAdd(cllx);
+ cropBoxObj->arrayAdd(clly);
+ pageDict->add(copyString("CropBox"), cropBoxObj);
+ }
+ Object *rotateObj = new Object();
+ rotateObj->initInt(rotate);
+ pageDict->add(copyString("Rotate"), rotateObj);
+ if (pageCTM != NULL) {
+ Object *contents = new Object();
+ Ref cmRef = getXRef()->addIndirectObject(pageCTM);
+ Object *ref = new Object();
+ ref->initRef(cmRef.num, cmRef.gen);
+ pageDict->lookupNF("Contents", contents);
+ Object *newContents = new Object();
+ newContents->initArray(getXRef());
+ if (contents->getType() == objRef) {
+ newContents->arrayAdd(ref);
+ newContents->arrayAdd(contents);
+ } else {
+ newContents->arrayAdd(ref);
+ for (int i = 0; i < contents->arrayGetLength(); i++) {
+ Object *contentEle = new Object();
+ contents->arrayGetNF(i, contentEle);
+ newContents->arrayAdd(contentEle);
+ }
+ }
+ pageDict->remove("Contents");
+ pageDict->add(copyString("Contents"), newContents);
+ }
+ getXRef()->setModifiedObject(&page, *refPage);
+ page.free();
+}
+
+void PDFDoc::markPageObjects(Dict *pageDict, XRef *xRef, XRef *countRef, Guint numOffset)
+{
+ for (int n = 0; n < pageDict->getLength(); n++) {
+ const char *key = pageDict->getKey(n);
+ Object value; pageDict->getValNF(n, &value);
+ if (strcmp(key, "Parent") != 0) {
+ markObject(&value, xRef, countRef, numOffset);
+ }
+ value.free();
+ }
+}
+
+Guint PDFDoc::writePageObjects(OutStream *outStr, XRef *xRef, Guint numOffset)
+{
+ Guint objectsCount = 0; //count the number of objects in the XRef(s)
+
+ for (int n = numOffset; n < xRef->getNumObjects(); n++) {
+ if (xRef->getEntry(n)->type != xrefEntryFree) {
+ Object obj;
+ Ref ref;
+ ref.num = n;
+ ref.gen = xRef->getEntry(n)->gen;
+ objectsCount++;
+ getXRef()->fetch(ref.num - numOffset, ref.gen, &obj);
+ Guint offset = writeObject(&obj, &ref, outStr, xRef, numOffset);
+ xRef->add(ref.num, ref.gen, offset, gTrue);
+ obj.free();
+ }
+ }
+ return objectsCount;
+}
+
#ifndef DISABLE_OUTLINE
Outline *PDFDoc::getOutline()
{
diff --git a/poppler/PDFDoc.h b/poppler/PDFDoc.h
index a7113c8..92cee78 100644
--- a/poppler/PDFDoc.h
+++ b/poppler/PDFDoc.h
@@ -22,6 +22,7 @@
// Copyright (C) 2009 Kovid Goyal <kovid at kovidgoyal.net>
// Copyright (C) 2010 Hib Eris <hib at hiberis.nl>
// Copyright (C) 2010 Srinivas Adicherla <srinivas.adicherla at geodesic.com>
+// Copyright (C) 2011 Thomas Freitag <Thomas.Freitag at alfa.de>
//
// To see a description of the changes please see the Changelog file that
// came with your tarball or type make ChangeLog if you are building from git
@@ -219,6 +220,8 @@ public:
//Return the PDF ID in the trailer dictionary (if any).
GBool getID(GooString *permanent_id, GooString *update_id);
+ // Save one page with another name.
+ int savePageAs(GooString *name, int pageNo);
// Save this file with another name.
int saveAs(GooString *name, PDFWriteMode mode=writeStandard);
// Save this file in the given output stream.
@@ -231,14 +234,31 @@ public:
// Return a pointer to the GUI (XPDFCore or WinPDFCore object).
void *getGUIData() { return guiData; }
+ // rewrite pageDict with MediaBox, CropBox and new page CTM
+ void replacePageDict(int pageNo, int rotate, PDFRectangle *mediaBox, PDFRectangle *cropBox, Object *pageCTM);
+ void markPageObjects(Dict *pageDict, XRef *xRef, XRef *countRef, Guint numOffset);
+ // write all objects used by pageDict to outStr
+ Guint writePageObjects(OutStream *outStr, XRef *xRef, Guint numOffset);
+ static Guint writeObject (Object *obj, Ref *ref, OutStream* outStr, XRef *xref, Guint numOffset);
+ static void writeHeader(OutStream *outStr, int major, int minor);
+ static void writeTrailer (Guint uxrefOffset, int uxrefSize, OutStream* outStr, GBool incrUpdate,
+ Guint startxRef, Ref *root, XRef *xRef, const char *fileName, Guint fileSize);
+
private:
+ // insert referenced objects in XRef
+ void markDictionnary (Dict* dict, XRef *xRef, XRef *countRef, Guint numOffset);
+ void markObject (Object *obj, XRef *xRef, XRef *countRef, Guint numOffset);
+ static void writeDictionnary (Dict* dict, OutStream* outStr, XRef *xRef, Guint numOffset);
+
// Add object to current file stream and return the offset of the beginning of the object
- Guint writeObject (Object *obj, Ref *ref, OutStream* outStr);
- void writeDictionnary (Dict* dict, OutStream* outStr);
- void writeStream (Stream* str, OutStream* outStr);
- void writeRawStream (Stream* str, OutStream* outStr);
+ Guint writeObject (Object *obj, Ref *ref, OutStream* outStr)
+ { return writeObject(obj, ref, outStr, getXRef(), 0); }
+ void writeDictionnary (Dict* dict, OutStream* outStr)
+ { writeDictionnary(dict, outStr, getXRef(), 0); }
+ static void writeStream (Stream* str, OutStream* outStr);
+ static void writeRawStream (Stream* str, OutStream* outStr);
void writeTrailer (Guint uxrefOffset, int uxrefSize, OutStream* outStr, GBool incrUpdate);
- void writeString (GooString* s, OutStream* outStr);
+ static void writeString (GooString* s, OutStream* outStr);
void saveIncrementalUpdate (OutStream* outStr);
void saveCompleteRewrite (OutStream* outStr);
commit 33da7e270431e8e4c500e7573b3ca0dddd9f237e
Author: suzuki toshiya <mpsuzuki at hiroshima-u.ac.jp>
Date: Sun Aug 28 22:07:38 2011 +0200
Fix building static-linked pdftocairo
diff --git a/utils/Makefile.am b/utils/Makefile.am
index 373d049..4faddad 100644
--- a/utils/Makefile.am
+++ b/utils/Makefile.am
@@ -30,8 +30,9 @@ pdftocairo_SOURCES = \
pdftocairo.cc \
$(common)
-pdftocairo_LDADD = $(LDADD) $(CAIRO_LIBS) \
- $(top_builddir)/poppler/libpoppler-cairo.la
+pdftocairo_LDADD = \
+ $(top_builddir)/poppler/libpoppler-cairo.la \
+ $(LDADD) $(CAIRO_LIBS)
pdftocairo_binary = pdftocairo
More information about the poppler
mailing list