[poppler] poppler/PDFDoc.cc poppler/PDFDoc.h

Albert Astals Cid aacid at kemper.freedesktop.org
Mon Jul 3 20:45:06 UTC 2017


 poppler/PDFDoc.cc |   70 ++++++++++++++++++++++++++++++++++++++++--------------
 poppler/PDFDoc.h  |   21 ++++++----------
 2 files changed, 62 insertions(+), 29 deletions(-)

New commits:
commit 771c82623e8e1e0c92b8ca6f7c2b8a81ccbb60d3
Author: Albert Astals Cid <aacid at kde.org>
Date:   Mon Jul 3 22:44:42 2017 +0200

    pdfunite: fix crash in broken documents
    
    Bug #101208

diff --git a/poppler/PDFDoc.cc b/poppler/PDFDoc.cc
index f02f192f..ec5c7002 100644
--- a/poppler/PDFDoc.cc
+++ b/poppler/PDFDoc.cc
@@ -1127,8 +1127,22 @@ void PDFDoc::saveCompleteRewrite (OutStream* outStr)
 }
 
 void PDFDoc::writeDictionnary (Dict* dict, OutStream* outStr, XRef *xRef, Guint numOffset, Guchar *fileKey,
-                               CryptAlgorithm encAlgorithm, int keyLength, int objNum, int objGen)
+                               CryptAlgorithm encAlgorithm, int keyLength, int objNum, int objGen, std::set<Dict*> *alreadyWrittenDicts)
 {
+  bool deleteSet = false;
+  if (!alreadyWrittenDicts) {
+    alreadyWrittenDicts = new std::set<Dict*>;
+    deleteSet = true;
+  }
+
+  if (alreadyWrittenDicts->find(dict) != alreadyWrittenDicts->end()) {
+    error(errSyntaxWarning, -1, "PDFDoc::writeDictionnary: Found recursive dicts");
+    if (deleteSet) delete alreadyWrittenDicts;
+    return;
+  } else {
+    alreadyWrittenDicts->insert(dict);
+  }
+
   Object obj1;
   outStr->printf("<<");
   for (int i=0; i<dict->getLength(); i++) {
@@ -1136,10 +1150,14 @@ void PDFDoc::writeDictionnary (Dict* dict, OutStream* outStr, XRef *xRef, Guint
     GooString *keyNameToPrint = keyName.sanitizedName(gFalse /* non ps mode */);
     outStr->printf("/%s ", keyNameToPrint->getCString());
     delete keyNameToPrint;
-    writeObject(dict->getValNF(i, &obj1), outStr, xRef, numOffset, fileKey, encAlgorithm, keyLength, objNum, objGen);
+    writeObject(dict->getValNF(i, &obj1), outStr, xRef, numOffset, fileKey, encAlgorithm, keyLength, objNum, objGen, alreadyWrittenDicts);
     obj1.free();
   }
   outStr->printf(">> ");
+
+  if (deleteSet) {
+    delete alreadyWrittenDicts;
+  }
 }
 
 void PDFDoc::writeStream (Stream* str, OutStream* outStr)
@@ -1246,7 +1264,7 @@ Goffset PDFDoc::writeObjectHeader (Ref *ref, OutStream* outStr)
 }
 
 void PDFDoc::writeObject (Object* obj, OutStream* outStr, XRef *xRef, Guint numOffset, Guchar *fileKey,
-                          CryptAlgorithm encAlgorithm, int keyLength, int objNum, int objGen)
+                          CryptAlgorithm encAlgorithm, int keyLength, int objNum, int objGen, std::set<Dict*> *alreadyWrittenDicts)
 {
   Array *array;
   Object obj1;
@@ -1293,7 +1311,7 @@ void PDFDoc::writeObject (Object* obj, OutStream* outStr, XRef *xRef, Guint numO
       outStr->printf("] ");
       break;
     case objDict:
-      writeDictionnary (obj->getDict(), outStr, xRef, numOffset, fileKey, encAlgorithm, keyLength, objNum, objGen);
+      writeDictionnary (obj->getDict(), outStr, xRef, numOffset, fileKey, encAlgorithm, keyLength, objNum, objGen, alreadyWrittenDicts);
       break;
     case objStream: 
       {
@@ -1356,7 +1374,7 @@ void PDFDoc::writeObject (Object* obj, OutStream* outStr, XRef *xRef, Guint numO
           }
           stream->getDict()->remove("DecodeParms");
 
-          writeDictionnary (stream->getDict(),outStr, xRef, numOffset, fileKey, encAlgorithm, keyLength, objNum, objGen);
+          writeDictionnary (stream->getDict(),outStr, xRef, numOffset, fileKey, encAlgorithm, keyLength, objNum, objGen, alreadyWrittenDicts);
           writeStream (stream,outStr);
           delete encStream;
           obj1.free();
@@ -1374,7 +1392,7 @@ void PDFDoc::writeObject (Object* obj, OutStream* outStr, XRef *xRef, Guint numO
                 }
               }
           }
-          writeDictionnary (stream->getDict(), outStr, xRef, numOffset, fileKey, encAlgorithm, keyLength, objNum, objGen);
+          writeDictionnary (stream->getDict(), outStr, xRef, numOffset, fileKey, encAlgorithm, keyLength, objNum, objGen, alreadyWrittenDicts);
           writeRawStream (stream, outStr);
         }
         break;
@@ -1509,7 +1527,7 @@ void PDFDoc::writeXRefTableTrailer(Dict *trailerDict, XRef *uxref, GBool writeAl
 {
   uxref->writeTableToFile( outStr, writeAllEntries );
   outStr->printf( "trailer\r\n");
-  writeDictionnary(trailerDict, outStr, xRef, 0, NULL, cryptRC4, 0, 0, 0);
+  writeDictionnary(trailerDict, outStr, xRef, 0, NULL, cryptRC4, 0, 0, 0, nullptr);
   outStr->printf( "\r\nstartxref\r\n");
   outStr->printf( "%lli\r\n", uxrefOffset);
   outStr->printf( "%%%%EOF\r\n");
@@ -1563,26 +1581,44 @@ void PDFDoc::writeHeader(OutStream *outStr, int major, int minor)
    outStr->printf("%%\xE2\xE3\xCF\xD3\n");
 }
 
-void PDFDoc::markDictionnary (Dict* dict, XRef * xRef, XRef *countRef, Guint numOffset, int oldRefNum, int newRefNum)
+void PDFDoc::markDictionnary (Dict* dict, XRef * xRef, XRef *countRef, Guint numOffset, int oldRefNum, int newRefNum, std::set<Dict*> *alreadyMarkedDicts)
 {
+  bool deleteSet = false;
+  if (!alreadyMarkedDicts) {
+    alreadyMarkedDicts = new std::set<Dict*>;
+    deleteSet = true;
+  }
+
+  if (alreadyMarkedDicts->find(dict) != alreadyMarkedDicts->end()) {
+    error(errSyntaxWarning, -1, "PDFDoc::markDictionnary: Found recursive dicts");
+    if (deleteSet) delete alreadyMarkedDicts;
+    return;
+  } else {
+    alreadyMarkedDicts->insert(dict);
+  }
+
   Object obj1;
   for (int i=0; i<dict->getLength(); i++) {
     const char *key = dict->getKey(i);
     if (strcmp(key, "Annots") != 0) {
-      markObject(dict->getValNF(i, &obj1), xRef, countRef, numOffset, oldRefNum, newRefNum);
+      markObject(dict->getValNF(i, &obj1), xRef, countRef, numOffset, oldRefNum, newRefNum, alreadyMarkedDicts);
     } else {
       Object annotsObj;
       dict->getValNF(i, &annotsObj);
       if (!annotsObj.isNull()) {
-        markAnnotations(&annotsObj, xRef, countRef, 0, oldRefNum, newRefNum);
+        markAnnotations(&annotsObj, xRef, countRef, 0, oldRefNum, newRefNum, alreadyMarkedDicts);
         annotsObj.free();
       }
     }
     obj1.free();
   }
+
+  if (deleteSet) {
+    delete alreadyMarkedDicts;
+  }
 }
 
-void PDFDoc::markObject (Object* obj, XRef *xRef, XRef *countRef, Guint numOffset, int oldRefNum, int newRefNum)
+void PDFDoc::markObject (Object* obj, XRef *xRef, XRef *countRef, Guint numOffset, int oldRefNum, int newRefNum, std::set<Dict*> *alreadyMarkedDicts)
 {
   Array *array;
   Object obj1;
@@ -1596,12 +1632,12 @@ void PDFDoc::markObject (Object* obj, XRef *xRef, XRef *countRef, Guint numOffse
       }
       break;
     case objDict:
-      markDictionnary (obj->getDict(), xRef, countRef, numOffset, oldRefNum, newRefNum);
+      markDictionnary (obj->getDict(), xRef, countRef, numOffset, oldRefNum, newRefNum, alreadyMarkedDicts);
       break;
     case objStream: 
       {
         Stream *stream = obj->getStream();
-        markDictionnary (stream->getDict(), xRef, countRef, numOffset, oldRefNum, newRefNum);
+        markDictionnary (stream->getDict(), xRef, countRef, numOffset, oldRefNum, newRefNum, alreadyMarkedDicts);
       }
       break;
     case objRef:
@@ -1695,7 +1731,7 @@ void PDFDoc::replacePageDict(int pageNo, int rotate,
   page.free();
 }
 
-void PDFDoc::markPageObjects(Dict *pageDict, XRef *xRef, XRef *countRef, Guint numOffset, int oldRefNum, int newRefNum) 
+void PDFDoc::markPageObjects(Dict *pageDict, XRef *xRef, XRef *countRef, Guint numOffset, int oldRefNum, int newRefNum, std::set<Dict*> *alreadyMarkedDicts)
 {
   pageDict->remove("OpenAction");
   pageDict->remove("Outlines");
@@ -1710,13 +1746,13 @@ void PDFDoc::markPageObjects(Dict *pageDict, XRef *xRef, XRef *countRef, Guint n
 	      strcmp(key, "Annots") != 0 &&
 	      strcmp(key, "P") != 0 &&
         strcmp(key, "Root") != 0) {
-      markObject(&value, xRef, countRef, numOffset, oldRefNum, newRefNum);
+      markObject(&value, xRef, countRef, numOffset, oldRefNum, newRefNum, alreadyMarkedDicts);
     }
     value.free();
   }
 }
 
-GBool PDFDoc::markAnnotations(Object *annotsObj, XRef *xRef, XRef *countRef, Guint numOffset, int oldPageNum, int newPageNum) {
+GBool PDFDoc::markAnnotations(Object *annotsObj, XRef *xRef, XRef *countRef, Guint numOffset, int oldPageNum, int newPageNum, std::set<Dict*> *alreadyMarkedDicts) {
   Object annots;
   GBool modified = gFalse;
   annotsObj->fetch(getXRef(), &annots);
@@ -1775,7 +1811,7 @@ GBool PDFDoc::markAnnotations(Object *annotsObj, XRef *xRef, XRef *countRef, Gui
             obj2.free();
           }
           type.free();
-          markPageObjects(dict, xRef, countRef, numOffset, oldPageNum, newPageNum);
+          markPageObjects(dict, xRef, countRef, numOffset, oldPageNum, newPageNum, alreadyMarkedDicts);
         }
         obj1.free();
         array->getNF(i, &obj1);
diff --git a/poppler/PDFDoc.h b/poppler/PDFDoc.h
index 99de8a21..c5621ec2 100644
--- a/poppler/PDFDoc.h
+++ b/poppler/PDFDoc.h
@@ -14,7 +14,7 @@
 // under GPL version 2 or later
 //
 // Copyright (C) 2005, 2006, 2008 Brad Hards <bradh at frogmouth.net>
-// Copyright (C) 2005, 2009, 2014, 2015 Albert Astals Cid <aacid at kde.org>
+// Copyright (C) 2005, 2009, 2014, 2015, 2017 Albert Astals Cid <aacid at kde.org>
 // Copyright (C) 2008 Julien Rebetez <julienr at svn.gnome.org>
 // Copyright (C) 2008 Pino Toscano <pino at kde.org>
 // Copyright (C) 2008 Carlos Garcia Campos <carlosgc at gnome.org>
@@ -292,13 +292,13 @@ public:
 
   // rewrite pageDict with MediaBox, CropBox and new page CTM
   void replacePageDict(int pageNo, int rotate, PDFRectangle *mediaBox, PDFRectangle *cropBox);
-  void markPageObjects(Dict *pageDict, XRef *xRef, XRef *countRef, Guint numOffset, int oldRefNum, int newRefNum);
-  GBool markAnnotations(Object *annots, XRef *xRef, XRef *countRef, Guint numOffset, int oldPageNum, int newPageNum);
+  void markPageObjects(Dict *pageDict, XRef *xRef, XRef *countRef, Guint numOffset, int oldRefNum, int newRefNum, std::set<Dict*> *alreadyMarkedDicts = nullptr);
+  GBool markAnnotations(Object *annots, XRef *xRef, XRef *countRef, Guint numOffset, int oldPageNum, int newPageNum, std::set<Dict*> *alreadyMarkedDicts = nullptr);
   void markAcroForm(Object *acrpForm, XRef *xRef, XRef *countRef, Guint numOffset, int oldPageNum, int newPageNum);
   // write all objects used by pageDict to outStr
   Guint writePageObjects(OutStream *outStr, XRef *xRef, Guint numOffset, GBool combine = gFalse);
   static void writeObject (Object *obj, OutStream* outStr, XRef *xref, Guint numOffset, Guchar *fileKey,
-                           CryptAlgorithm encAlgorithm, int keyLength, int objNum, int objGen);
+                           CryptAlgorithm encAlgorithm, int keyLength, int objNum, int objGen, std::set<Dict*> *alreadyWrittenDicts = nullptr);
   static void writeHeader(OutStream *outStr, int major, int minor);
 
   // Ownership goes to the caller
@@ -311,21 +311,18 @@ public:
 
 private:
   // insert referenced objects in XRef
-  void markDictionnary (Dict* dict, XRef *xRef, XRef *countRef, Guint numOffset, int oldRefNum, int newRefNum);
-  void markObject (Object *obj, XRef *xRef, XRef *countRef, Guint numOffset, int oldRefNum, int newRefNum);
+  void markDictionnary (Dict* dict, XRef *xRef, XRef *countRef, Guint numOffset, int oldRefNum, int newRefNum, std::set<Dict*> *alreadyMarkedDicts);
+  void markObject (Object *obj, XRef *xRef, XRef *countRef, Guint numOffset, int oldRefNum, int newRefNum, std::set<Dict*> *alreadyMarkedDicts = nullptr);
   static void writeDictionnary (Dict* dict, OutStream* outStr, XRef *xRef, Guint numOffset, Guchar *fileKey,
-                                CryptAlgorithm encAlgorithm, int keyLength, int objNum, int objGen);
+                                CryptAlgorithm encAlgorithm, int keyLength, int objNum, int objGen, std::set<Dict*> *alreadyWrittenDicts);
 
   // Write object header to current file stream and return its offset
   static Goffset writeObjectHeader (Ref *ref, OutStream* outStr);
   static void writeObjectFooter (OutStream* outStr);
 
   void writeObject (Object *obj, OutStream* outStr, Guchar *fileKey, CryptAlgorithm encAlgorithm,
-                    int keyLength, int objNum, int objGen)
-  { writeObject(obj, outStr, getXRef(), 0, fileKey, encAlgorithm, keyLength, objNum, objGen); }
-  void writeDictionnary (Dict* dict, OutStream* outStr, Guchar *fileKey, CryptAlgorithm encAlgorithm,
-                         int keyLength, int objNum, int objGen)
-  { writeDictionnary(dict, outStr, getXRef(), 0, fileKey, encAlgorithm, keyLength, objNum, objGen); }
+                    int keyLength, int objNum, int objGen, std::set<Dict*> *alreadyWrittenDicts = nullptr)
+  { writeObject(obj, outStr, getXRef(), 0, fileKey, encAlgorithm, keyLength, objNum, objGen, alreadyWrittenDicts); }
   static void writeStream (Stream* str, OutStream* outStr);
   static void writeRawStream (Stream* str, OutStream* outStr);
   void writeXRefTableTrailer (Goffset uxrefOffset, XRef *uxref, GBool writeAllEntries,


More information about the poppler mailing list