[poppler] Branch 'xpdf303merge' - poppler/Dict.cc poppler/Dict.h poppler/Object.cc poppler/Object.h poppler/Parser.cc poppler/Parser.h poppler/XRef.cc poppler/XRef.h

Albert Astals Cid aacid at kemper.freedesktop.org
Tue Dec 6 11:31:29 PST 2011


 poppler/Dict.cc   |    4 ++--
 poppler/Dict.h    |    2 +-
 poppler/Object.cc |    4 ++--
 poppler/Object.h  |    8 ++++----
 poppler/Parser.cc |   32 +++++++++++++++-----------------
 poppler/Parser.h  |   10 +++-------
 poppler/XRef.cc   |   33 +++++++--------------------------
 poppler/XRef.h    |    2 +-
 8 files changed, 35 insertions(+), 60 deletions(-)

New commits:
commit 743f70f594bf3c9a58d1ff0738b9a2bc3ea03382
Author: Albert Astals Cid <aacid at kde.org>
Date:   Tue Dec 6 20:27:03 2011 +0100

    xpdf303: Use xpdf method against recursion while parsing
    
    Ours detected loops correctly, but not "valid" files containing lots of arrays one inside the other [[[[[[[[[[[[[[[[[[[
    So go to this more crude "fix" used in xpdf

diff --git a/poppler/Dict.cc b/poppler/Dict.cc
index d0d4b85..2615fde 100644
--- a/poppler/Dict.cc
+++ b/poppler/Dict.cc
@@ -193,10 +193,10 @@ GBool Dict::is(const char *type) {
   return (e = find("Type")) && e->val.isName(type);
 }
 
-Object *Dict::lookup(const char *key, Object *obj, std::set<int> *fetchOriginatorNums) {
+Object *Dict::lookup(const char *key, Object *obj, int recursion) {
   DictEntry *e;
 
-  return (e = find(key)) ? e->val.fetch(xref, obj, fetchOriginatorNums) : obj->initNull();
+  return (e = find(key)) ? e->val.fetch(xref, obj, recursion) : obj->initNull();
 }
 
 Object *Dict::lookupNF(const char *key, Object *obj) {
diff --git a/poppler/Dict.h b/poppler/Dict.h
index a589377..897f221 100644
--- a/poppler/Dict.h
+++ b/poppler/Dict.h
@@ -72,7 +72,7 @@ public:
 
   // Look up an entry and return the value.  Returns a null object
   // if <key> is not in the dictionary.
-  Object *lookup(const char *key, Object *obj, std::set<int> *fetchOriginatorNums = NULL);
+  Object *lookup(const char *key, Object *obj, int recursion = 0);
   Object *lookupNF(const char *key, Object *obj);
   GBool lookupInt(const char *key, const char *alt_key, int *value);
 
diff --git a/poppler/Object.cc b/poppler/Object.cc
index 84b5583..1cedba4 100644
--- a/poppler/Object.cc
+++ b/poppler/Object.cc
@@ -115,9 +115,9 @@ Object *Object::copy(Object *obj) {
   return obj;
 }
 
-Object *Object::fetch(XRef *xref, Object *obj, std::set<int> *fetchOriginatorNums) {
+Object *Object::fetch(XRef *xref, Object *obj, int recursion) {
   return (type == objRef && xref) ?
-         xref->fetch(ref.num, ref.gen, obj, fetchOriginatorNums) : copy(obj);
+         xref->fetch(ref.num, ref.gen, obj, recursion) : copy(obj);
 }
 
 void Object::free() {
diff --git a/poppler/Object.h b/poppler/Object.h
index 1b58037..a67b403 100644
--- a/poppler/Object.h
+++ b/poppler/Object.h
@@ -154,7 +154,7 @@ public:
 
   // If object is a Ref, fetch and return the referenced object.
   // Otherwise, return a copy of the object.
-  Object *fetch(XRef *xref, Object *obj, std::set<int> *fetchOriginatorNums = NULL);
+  Object *fetch(XRef *xref, Object *obj, int recursion = 0);
 
   // Free object contents.
   void free();
@@ -213,7 +213,7 @@ public:
   void dictAdd(char *key, Object *val);
   void dictSet(const char *key, Object *val);
   GBool dictIs(const char *dictType);
-  Object *dictLookup(const char *key, Object *obj, std::set<int> *fetchOriginatorNums = NULL);
+  Object *dictLookup(const char *key, Object *obj, int recursion = 0);
   Object *dictLookupNF(const char *key, Object *obj);
   char *dictGetKey(int i);
   Object *dictGetVal(int i, Object *obj);
@@ -300,8 +300,8 @@ inline GBool Object::dictIs(const char *dictType)
 inline GBool Object::isDict(const char *dictType)
   { return type == objDict && dictIs(dictType); }
 
-inline Object *Object::dictLookup(const char *key, Object *obj, std::set<int> *fetchOriginatorNums)
-  { OBJECT_TYPE_CHECK(objDict); return dict->lookup(key, obj, fetchOriginatorNums); }
+inline Object *Object::dictLookup(const char *key, Object *obj, int recursion)
+  { OBJECT_TYPE_CHECK(objDict); return dict->lookup(key, obj, recursion); }
 
 inline Object *Object::dictLookupNF(const char *key, Object *obj)
   { OBJECT_TYPE_CHECK(objDict); return dict->lookupNF(key, obj); }
diff --git a/poppler/Parser.cc b/poppler/Parser.cc
index 790ec3c..03b836e 100644
--- a/poppler/Parser.cc
+++ b/poppler/Parser.cc
@@ -37,6 +37,11 @@
 #include "XRef.h"
 #include "Error.h"
 
+// Max number of nested objects.  This is used to catch infinite loops
+// in the object structure. And also technically valid files with
+// lots of nested arrays that made us consume all the stack
+#define recursionLimit 500
+
 Parser::Parser(XRef *xrefA, Lexer *lexerA, GBool allowStreamsA) {
   xref = xrefA;
   lexer = lexerA;
@@ -52,21 +57,14 @@ Parser::~Parser() {
   delete lexer;
 }
 
-Object *Parser::getObj(Object *obj, Guchar *fileKey,
-           CryptAlgorithm encAlgorithm, int keyLength,
-           int objNum, int objGen) {
-  std::set<int> fetchOriginatorNums;
-  return getObj(obj, fileKey, encAlgorithm, keyLength, objNum, objGen, &fetchOriginatorNums);
-}
-
-Object *Parser::getObj(Object *obj, std::set<int> *fetchOriginatorNums)
+Object *Parser::getObj(Object *obj, int recursion)
 {
-  return getObj(obj, NULL, cryptRC4, 0, 0, 0, fetchOriginatorNums);
+  return getObj(obj, NULL, cryptRC4, 0, 0, 0, recursion);
 }
 
 Object *Parser::getObj(Object *obj, Guchar *fileKey,
 		       CryptAlgorithm encAlgorithm, int keyLength,
-		       int objNum, int objGen, std::set<int> *fetchOriginatorNums) {
+		       int objNum, int objGen, int recursion) {
   char *key;
   Stream *str;
   Object obj2;
@@ -85,18 +83,18 @@ Object *Parser::getObj(Object *obj, Guchar *fileKey,
   }
 
   // array
-  if (buf1.isCmd("[")) {
+  if (likely(recursion < recursionLimit) && buf1.isCmd("[")) {
     shift();
     obj->initArray(xref);
     while (!buf1.isCmd("]") && !buf1.isEOF())
       obj->arrayAdd(getObj(&obj2, fileKey, encAlgorithm, keyLength,
-			   objNum, objGen, fetchOriginatorNums));
+			   objNum, objGen, recursion + 1));
     if (buf1.isEOF())
       error(errSyntaxError, getPos(), "End of file inside array");
     shift();
 
   // dictionary or stream
-  } else if (buf1.isCmd("<<")) {
+  } else if (likely(recursion < recursionLimit) && buf1.isCmd("<<")) {
     shift(objNum);
     obj->initDict(xref);
     while (!buf1.isCmd(">>") && !buf1.isEOF()) {
@@ -111,7 +109,7 @@ Object *Parser::getObj(Object *obj, Guchar *fileKey,
 	  gfree(key);
 	  break;
 	}
-	obj->dictAdd(key, getObj(&obj2, fileKey, encAlgorithm, keyLength, objNum, objGen, fetchOriginatorNums));
+	obj->dictAdd(key, getObj(&obj2, fileKey, encAlgorithm, keyLength, objNum, objGen, recursion + 1));
       }
     }
     if (buf1.isEOF())
@@ -120,7 +118,7 @@ Object *Parser::getObj(Object *obj, Guchar *fileKey,
     // object streams
     if (allowStreams && buf2.isCmd("stream")) {
       if ((str = makeStream(obj, fileKey, encAlgorithm, keyLength,
-			    objNum, objGen, fetchOriginatorNums))) {
+			    objNum, objGen, recursion + 1))) {
 	obj->initStream(str);
       } else {
 	obj->free();
@@ -174,7 +172,7 @@ Object *Parser::getObj(Object *obj, Guchar *fileKey,
 
 Stream *Parser::makeStream(Object *dict, Guchar *fileKey,
 			   CryptAlgorithm encAlgorithm, int keyLength,
-			   int objNum, int objGen, std::set<int> *fetchOriginatorNums) {
+			   int objNum, int objGen, int recursion) {
   Object obj;
   BaseStream *baseStr;
   Stream *str;
@@ -188,7 +186,7 @@ Stream *Parser::makeStream(Object *dict, Guchar *fileKey,
   pos = str->getPos();
 
   // get length
-  dict->dictLookup("Length", &obj, fetchOriginatorNums);
+  dict->dictLookup("Length", &obj, recursion);
   if (obj.isInt()) {
     length = (Guint)obj.getInt();
     obj.free();
diff --git a/poppler/Parser.h b/poppler/Parser.h
index 3d8a831..f1fa765 100644
--- a/poppler/Parser.h
+++ b/poppler/Parser.h
@@ -45,13 +45,9 @@ public:
   // Get the next object from the input stream.
   Object *getObj(Object *obj, Guchar *fileKey = NULL,
 		 CryptAlgorithm encAlgorithm = cryptRC4, int keyLength = 0,
-		 int objNum = 0, int objGen = 0);
+		 int objNum = 0, int objGen = 0, int recursion = 0);
   
-  Object *getObj(Object *obj, Guchar *fileKey,
-     CryptAlgorithm encAlgorithm, int keyLength,
-     int objNum, int objGen, std::set<int> *fetchOriginatorNums);
-
-  Object *getObj(Object *obj, std::set<int> *fetchOriginatorNums);
+  Object *getObj(Object *obj, int recursion);
 
   // Get stream.
   Stream *getStream() { return lexer->getStream(); }
@@ -69,7 +65,7 @@ private:
 
   Stream *makeStream(Object *dict, Guchar *fileKey,
 		     CryptAlgorithm encAlgorithm, int keyLength,
-		     int objNum, int objGen, std::set<int> *fetchOriginatorNums);
+		     int objNum, int objGen, int recursion);
   void shift(int objNum = -1);
 };
 
diff --git a/poppler/XRef.cc b/poppler/XRef.cc
index e1115c0..bacd540 100644
--- a/poppler/XRef.cc
+++ b/poppler/XRef.cc
@@ -967,16 +967,14 @@ GBool XRef::okToAssemble(GBool ignoreOwnerPW) {
   return (!ignoreOwnerPW && ownerPasswordOk) || (permFlags & permAssemble);
 }
 
-Object *XRef::fetch(int num, int gen, Object *obj, std::set<int> *fetchOriginatorNums) {
+Object *XRef::fetch(int num, int gen, Object *obj, int recursion) {
   XRefEntry *e;
   Parser *parser;
   Object obj1, obj2, obj3;
-  bool deleteFetchOriginatorNums = false;
-  std::pair<std::set<int>::iterator, bool> fetchInsertResult;
 
   // check for bogus ref - this can happen in corrupted PDF files
-  if (num < 0 || num >= size || (fetchOriginatorNums != NULL && fetchOriginatorNums->find(num) != fetchOriginatorNums->end())) {
-    goto err2;
+  if (num < 0 || num >= size) {
+    goto err;
   }
 
   e = getEntry(num);
@@ -985,12 +983,6 @@ Object *XRef::fetch(int num, int gen, Object *obj, std::set<int> *fetchOriginato
     return obj;
   }
 
-  if (fetchOriginatorNums == NULL) {
-    fetchOriginatorNums = new std::set<int>();
-    deleteFetchOriginatorNums = true;
-  }
-  fetchInsertResult = fetchOriginatorNums->insert(num);
-
   switch (e->type) {
 
   case xrefEntryUncompressed:
@@ -1002,9 +994,9 @@ Object *XRef::fetch(int num, int gen, Object *obj, std::set<int> *fetchOriginato
 	       new Lexer(this,
 		 str->makeSubStream(start + e->offset, gFalse, 0, &obj1)),
 	       gTrue);
-    parser->getObj(&obj1, fetchOriginatorNums);
-    parser->getObj(&obj2, fetchOriginatorNums);
-    parser->getObj(&obj3, fetchOriginatorNums);
+    parser->getObj(&obj1, recursion);
+    parser->getObj(&obj2, recursion);
+    parser->getObj(&obj3, recursion);
     if (!obj1.isInt() || obj1.getInt() != num ||
 	!obj2.isInt() || obj2.getInt() != gen ||
 	!obj3.isCmd("obj")) {
@@ -1039,7 +1031,7 @@ Object *XRef::fetch(int num, int gen, Object *obj, std::set<int> *fetchOriginato
       goto err;
     }
     parser->getObj(obj, encrypted ? fileKey : (Guchar *)NULL,
-		   encAlgorithm, keyLength, num, gen, fetchOriginatorNums);
+		   encAlgorithm, keyLength, num, gen, recursion);
     obj1.free();
     obj2.free();
     obj3.free();
@@ -1087,20 +1079,9 @@ Object *XRef::fetch(int num, int gen, Object *obj, std::set<int> *fetchOriginato
     goto err;
   }
   
-  if (deleteFetchOriginatorNums) {
-    delete fetchOriginatorNums;
-  } else {
-    fetchOriginatorNums->erase(fetchInsertResult.first);
-  }
   return obj;
 
  err:
-  if (deleteFetchOriginatorNums) {
-    delete fetchOriginatorNums;
-  } else {
-    fetchOriginatorNums->erase(fetchInsertResult.first);
-  }
- err2:
   return obj->initNull();
 }
 
diff --git a/poppler/XRef.h b/poppler/XRef.h
index ecb1706..adfdc1a 100644
--- a/poppler/XRef.h
+++ b/poppler/XRef.h
@@ -102,7 +102,7 @@ public:
   Object *getCatalog(Object *obj) { return fetch(rootNum, rootGen, obj); }
 
   // Fetch an indirect reference.
-  Object *fetch(int num, int gen, Object *obj, std::set<int> *fetchOriginatorNums = NULL);
+  Object *fetch(int num, int gen, Object *obj, int recursion = 0);
 
   // Return the document's Info dictionary (if any).
   Object *getDocInfo(Object *obj);


More information about the poppler mailing list