[PATCH] save space in written files (saveCompleteRewrite) - introduce two helpers in Linearization.cc/Linearization.h and XRef.h to get object number of linearization dict and xrefstream flag, respectively - adapt saveCompleteRewrite to not write content of object streams, xref streams and linearization dict - write free entry to new xref for these objects
Axel Struebing
axel.struebing at freenet.de
Wed Oct 19 06:12:44 PDT 2011
---
poppler/Linearization.cc | 4 ++++
poppler/Linearization.h | 3 ++-
poppler/PDFDoc.cc | 39 ++++++++++++++++++++++++++++++++++++---
poppler/XRef.h | 2 ++
4 files changed, 44 insertions(+), 4 deletions(-)
diff --git a/poppler/Linearization.cc b/poppler/Linearization.cc
index 73dc5ad..51c534e 100644
--- a/poppler/Linearization.cc
+++ b/poppler/Linearization.cc
@@ -34,7 +34,11 @@ Linearization::Linearization (BaseStream *str)
parser->getObj(&linDict);
if (obj1.isInt() && obj2.isInt() && obj3.isCmd("obj") && linDict.isDict()) {
linDict.dictLookup("Linearized", &obj5);
+ linRef.num = obj1.getInt();
+ linRef.gen = obj2.getInt();
if (!(obj5.isNum() && obj5.getNum() > 0)) {
+ linRef.num = -1;
+ linRef.gen = -1;
linDict.free();
linDict.initNull();
}
diff --git a/poppler/Linearization.h b/poppler/Linearization.h
index 6728a75..55cd57b 100644
--- a/poppler/Linearization.h
+++ b/poppler/Linearization.h
@@ -35,11 +35,12 @@ public:
int getNumPages();
Guint getMainXRefEntriesOffset();
int getPageFirst();
+ Ref getLinRef() { return linRef; }
private:
Object linDict;
-
+ Ref linRef;
};
#endif
diff --git a/poppler/PDFDoc.cc b/poppler/PDFDoc.cc
index 01d2759..68c0c1e 100644
--- a/poppler/PDFDoc.cc
+++ b/poppler/PDFDoc.cc
@@ -814,15 +814,50 @@ void PDFDoc::saveIncrementalUpdate (OutStream* outStr)
delete uxref;
}
+/*
+ we can save space in saveCompleteRewrite
+ - it writes every nonfree object to the new file including its
+ object number in a standard xref
+ - objects originating from object streams are expanded to normal objects
+ - input object/xref streams were still copied to the output file
+ - so we can save space not doing so
+ - further saving is not writing linearization dict
+*/
void PDFDoc::saveCompleteRewrite (OutStream* outStr)
{
- outStr->printf("%%PDF-%d.%d\r\n",pdfMajorVersion,pdfMinorVersion);
+ Ref linRef = getLinearization()->getLinRef();
+ GBool noObjstrm = xref->getxRefStream();//file used xref stream
+
+ writeHeader(outStr, pdfMajorVersion, pdfMinorVersion);
XRef *uxref = new XRef();
uxref->add(0, 65535, 0, gFalse);
for(int i=0; i<xref->getNumObjects(); i++) {
Object obj1;
Ref ref;
XRefEntryType type = xref->getEntry(i)->type;
+ // set entry to free if ObjStm or XRef
+ if ( noObjstrm && ( type == xrefEntryUncompressed || type == xrefEntryCompressed ) ) {
+ ref.num = i;
+ if ( type == xrefEntryUncompressed ) {
+ ref.gen = xref->getEntry(i)->gen;
+ } else {
+ ref.gen = 0; //compressed entries have gen == 0
+ }
+ xref->fetch(ref.num, ref.gen, &obj1);
+ if ( obj1.isStream("XRef") || obj1.isStream("ObjStm") ) {
+ // do not write content and add as free
+ uxref->add(ref.num, ref.gen, 0, gFalse);
+ obj1.free();
+ continue;
+ }
+ obj1.free();
+ }
+ // do not write linearization dict, add as free
+ if ( linRef.num == i ) {
+ uxref->add(ref.num, ref.gen, 0, gFalse);
+ continue;
+ }
+
if (type == xrefEntryFree) {
ref.num = i;
ref.gen = xref->getEntry(i)->gen;
@@ -851,9 +886,7 @@ void PDFDoc::saveCompleteRewrite (OutStream* outStr)
writeTrailer(uxrefOffset, uxref->getSize(), outStr, gFalse);
-
delete uxref;
-
}
void PDFDoc::writeDictionnary (Dict* dict, OutStream* outStr, XRef *xRef, Guint numOffset)
diff --git a/poppler/XRef.h b/poppler/XRef.h
index 8b77b6c..697d2c0 100644
--- a/poppler/XRef.h
+++ b/poppler/XRef.h
@@ -131,6 +131,8 @@ public:
Ref addIndirectObject (Object* o);
void add(int num, int gen, Guint offs, GBool used);
void writeToFile(OutStream* outStr, GBool writeAllEntries);
+ // is this a cross reference streams only file
+ GBool getxRefStream() { return xRefStream; }
private:
--
1.7.3.4
--nextPart6438959.7A3akJjkYm--
More information about the poppler
mailing list