[PATCH] save space in written files (saveCompleteRewrite) - introduce two helpers in Linearization.cc/Linearization.h and XRef.h to get object number of linearization dict and xrefstream flag, respectively - adapt saveCompleteRewrite to not write content of object streams, xref streams and linearization dict - write free entry to new xref for these objects

Axel Struebing axel.struebing at freenet.de
Wed Oct 19 06:12:44 PDT 2011


---
 poppler/Linearization.cc |    4 ++++
 poppler/Linearization.h  |    3 ++-
 poppler/PDFDoc.cc        |   39 ++++++++++++++++++++++++++++++++++++---
 poppler/XRef.h           |    2 ++
 4 files changed, 44 insertions(+), 4 deletions(-)

diff --git a/poppler/Linearization.cc b/poppler/Linearization.cc
index 73dc5ad..51c534e 100644
--- a/poppler/Linearization.cc
+++ b/poppler/Linearization.cc
@@ -34,7 +34,11 @@ Linearization::Linearization (BaseStream *str)
   parser->getObj(&linDict);
   if (obj1.isInt() && obj2.isInt() && obj3.isCmd("obj") && linDict.isDict()) {
     linDict.dictLookup("Linearized", &obj5);
+    linRef.num = obj1.getInt();
+    linRef.gen = obj2.getInt();
     if (!(obj5.isNum() && obj5.getNum() > 0)) {
+       linRef.num = -1;
+       linRef.gen = -1;
        linDict.free();
        linDict.initNull();
     }
diff --git a/poppler/Linearization.h b/poppler/Linearization.h
index 6728a75..55cd57b 100644
--- a/poppler/Linearization.h
+++ b/poppler/Linearization.h
@@ -35,11 +35,12 @@ public:
   int getNumPages();
   Guint getMainXRefEntriesOffset();
   int getPageFirst();
+  Ref getLinRef() { return linRef; }
 
 private:
 
   Object linDict;
-
+  Ref linRef;
 };
 
 #endif
diff --git a/poppler/PDFDoc.cc b/poppler/PDFDoc.cc
index 01d2759..68c0c1e 100644
--- a/poppler/PDFDoc.cc
+++ b/poppler/PDFDoc.cc
@@ -814,15 +814,50 @@ void PDFDoc::saveIncrementalUpdate (OutStream* outStr)
   delete uxref;
 }
 
+/*
+  we can save space in saveCompleteRewrite
+  - it writes every nonfree object to the new file including its
+    object number in a standard xref
+  - objects originating from object streams are expanded to normal objects
+  - input object/xref streams were still copied to the output file
+  - so we can save space not doing so
+  - further saving is not writing linearization dict
+*/
 void PDFDoc::saveCompleteRewrite (OutStream* outStr)
 {
-  outStr->printf("%%PDF-%d.%d\r\n",pdfMajorVersion,pdfMinorVersion);
+  Ref linRef = getLinearization()->getLinRef();
+  GBool noObjstrm = xref->getxRefStream();//file used xref stream
+
+  writeHeader(outStr, pdfMajorVersion, pdfMinorVersion);
   XRef *uxref = new XRef();
   uxref->add(0, 65535, 0, gFalse);
   for(int i=0; i<xref->getNumObjects(); i++) {
     Object obj1;
     Ref ref;
     XRefEntryType type = xref->getEntry(i)->type;
+    // set entry to free if ObjStm or XRef
+    if ( noObjstrm && ( type == xrefEntryUncompressed || type == xrefEntryCompressed ) ) {
+      ref.num = i;
+      if ( type == xrefEntryUncompressed ) {
+	ref.gen = xref->getEntry(i)->gen;
+      } else {
+	ref.gen = 0; //compressed entries have gen == 0
+      }
+      xref->fetch(ref.num, ref.gen, &obj1);
+      if ( obj1.isStream("XRef") || obj1.isStream("ObjStm") ) {
+	// do not write content and add as free
+	uxref->add(ref.num, ref.gen, 0, gFalse);
+	obj1.free();
+	continue;
+      }
+      obj1.free();
+    }
+    // do not write linearization dict, add as free
+    if ( linRef.num == i ) {
+      uxref->add(ref.num, ref.gen, 0, gFalse);
+      continue;
+    }
+
     if (type == xrefEntryFree) {
       ref.num = i;
       ref.gen = xref->getEntry(i)->gen;
@@ -851,9 +886,7 @@ void PDFDoc::saveCompleteRewrite (OutStream* outStr)
 
   writeTrailer(uxrefOffset, uxref->getSize(), outStr, gFalse);
 
-
   delete uxref;
-
 }
 
 void PDFDoc::writeDictionnary (Dict* dict, OutStream* outStr, XRef *xRef, Guint numOffset)
diff --git a/poppler/XRef.h b/poppler/XRef.h
index 8b77b6c..697d2c0 100644
--- a/poppler/XRef.h
+++ b/poppler/XRef.h
@@ -131,6 +131,8 @@ public:
   Ref addIndirectObject (Object* o);
   void add(int num, int gen,  Guint offs, GBool used);
   void writeToFile(OutStream* outStr, GBool writeAllEntries);
+  // is this a cross reference streams only file
+  GBool getxRefStream() { return xRefStream; }
 
 private:
 
-- 
1.7.3.4


--nextPart6438959.7A3akJjkYm--



More information about the poppler mailing list