[poppler] Branch 'xpdf303merge' - poppler/Dict.cc poppler/Dict.h poppler/Object.cc poppler/Object.h poppler/Parser.cc poppler/Parser.h poppler/XRef.cc poppler/XRef.h
Albert Astals Cid
aacid at kemper.freedesktop.org
Tue Dec 6 11:31:29 PST 2011
poppler/Dict.cc | 4 ++--
poppler/Dict.h | 2 +-
poppler/Object.cc | 4 ++--
poppler/Object.h | 8 ++++----
poppler/Parser.cc | 32 +++++++++++++++-----------------
poppler/Parser.h | 10 +++-------
poppler/XRef.cc | 33 +++++++--------------------------
poppler/XRef.h | 2 +-
8 files changed, 35 insertions(+), 60 deletions(-)
New commits:
commit 743f70f594bf3c9a58d1ff0738b9a2bc3ea03382
Author: Albert Astals Cid <aacid at kde.org>
Date: Tue Dec 6 20:27:03 2011 +0100
xpdf303: Use xpdf method against recursion while parsing
Ours detected loops correctly, but not "valid" files containing lots of arrays one inside the other [[[[[[[[[[[[[[[[[[[
So go to this more crude "fix" used in xpdf
diff --git a/poppler/Dict.cc b/poppler/Dict.cc
index d0d4b85..2615fde 100644
--- a/poppler/Dict.cc
+++ b/poppler/Dict.cc
@@ -193,10 +193,10 @@ GBool Dict::is(const char *type) {
return (e = find("Type")) && e->val.isName(type);
}
-Object *Dict::lookup(const char *key, Object *obj, std::set<int> *fetchOriginatorNums) {
+Object *Dict::lookup(const char *key, Object *obj, int recursion) {
DictEntry *e;
- return (e = find(key)) ? e->val.fetch(xref, obj, fetchOriginatorNums) : obj->initNull();
+ return (e = find(key)) ? e->val.fetch(xref, obj, recursion) : obj->initNull();
}
Object *Dict::lookupNF(const char *key, Object *obj) {
diff --git a/poppler/Dict.h b/poppler/Dict.h
index a589377..897f221 100644
--- a/poppler/Dict.h
+++ b/poppler/Dict.h
@@ -72,7 +72,7 @@ public:
// Look up an entry and return the value. Returns a null object
// if <key> is not in the dictionary.
- Object *lookup(const char *key, Object *obj, std::set<int> *fetchOriginatorNums = NULL);
+ Object *lookup(const char *key, Object *obj, int recursion = 0);
Object *lookupNF(const char *key, Object *obj);
GBool lookupInt(const char *key, const char *alt_key, int *value);
diff --git a/poppler/Object.cc b/poppler/Object.cc
index 84b5583..1cedba4 100644
--- a/poppler/Object.cc
+++ b/poppler/Object.cc
@@ -115,9 +115,9 @@ Object *Object::copy(Object *obj) {
return obj;
}
-Object *Object::fetch(XRef *xref, Object *obj, std::set<int> *fetchOriginatorNums) {
+Object *Object::fetch(XRef *xref, Object *obj, int recursion) {
return (type == objRef && xref) ?
- xref->fetch(ref.num, ref.gen, obj, fetchOriginatorNums) : copy(obj);
+ xref->fetch(ref.num, ref.gen, obj, recursion) : copy(obj);
}
void Object::free() {
diff --git a/poppler/Object.h b/poppler/Object.h
index 1b58037..a67b403 100644
--- a/poppler/Object.h
+++ b/poppler/Object.h
@@ -154,7 +154,7 @@ public:
// If object is a Ref, fetch and return the referenced object.
// Otherwise, return a copy of the object.
- Object *fetch(XRef *xref, Object *obj, std::set<int> *fetchOriginatorNums = NULL);
+ Object *fetch(XRef *xref, Object *obj, int recursion = 0);
// Free object contents.
void free();
@@ -213,7 +213,7 @@ public:
void dictAdd(char *key, Object *val);
void dictSet(const char *key, Object *val);
GBool dictIs(const char *dictType);
- Object *dictLookup(const char *key, Object *obj, std::set<int> *fetchOriginatorNums = NULL);
+ Object *dictLookup(const char *key, Object *obj, int recursion = 0);
Object *dictLookupNF(const char *key, Object *obj);
char *dictGetKey(int i);
Object *dictGetVal(int i, Object *obj);
@@ -300,8 +300,8 @@ inline GBool Object::dictIs(const char *dictType)
inline GBool Object::isDict(const char *dictType)
{ return type == objDict && dictIs(dictType); }
-inline Object *Object::dictLookup(const char *key, Object *obj, std::set<int> *fetchOriginatorNums)
- { OBJECT_TYPE_CHECK(objDict); return dict->lookup(key, obj, fetchOriginatorNums); }
+inline Object *Object::dictLookup(const char *key, Object *obj, int recursion)
+ { OBJECT_TYPE_CHECK(objDict); return dict->lookup(key, obj, recursion); }
inline Object *Object::dictLookupNF(const char *key, Object *obj)
{ OBJECT_TYPE_CHECK(objDict); return dict->lookupNF(key, obj); }
diff --git a/poppler/Parser.cc b/poppler/Parser.cc
index 790ec3c..03b836e 100644
--- a/poppler/Parser.cc
+++ b/poppler/Parser.cc
@@ -37,6 +37,11 @@
#include "XRef.h"
#include "Error.h"
+// Max number of nested objects. This is used to catch infinite loops
+// in the object structure. And also technically valid files with
+// lots of nested arrays that made us consume all the stack
+#define recursionLimit 500
+
Parser::Parser(XRef *xrefA, Lexer *lexerA, GBool allowStreamsA) {
xref = xrefA;
lexer = lexerA;
@@ -52,21 +57,14 @@ Parser::~Parser() {
delete lexer;
}
-Object *Parser::getObj(Object *obj, Guchar *fileKey,
- CryptAlgorithm encAlgorithm, int keyLength,
- int objNum, int objGen) {
- std::set<int> fetchOriginatorNums;
- return getObj(obj, fileKey, encAlgorithm, keyLength, objNum, objGen, &fetchOriginatorNums);
-}
-
-Object *Parser::getObj(Object *obj, std::set<int> *fetchOriginatorNums)
+Object *Parser::getObj(Object *obj, int recursion)
{
- return getObj(obj, NULL, cryptRC4, 0, 0, 0, fetchOriginatorNums);
+ return getObj(obj, NULL, cryptRC4, 0, 0, 0, recursion);
}
Object *Parser::getObj(Object *obj, Guchar *fileKey,
CryptAlgorithm encAlgorithm, int keyLength,
- int objNum, int objGen, std::set<int> *fetchOriginatorNums) {
+ int objNum, int objGen, int recursion) {
char *key;
Stream *str;
Object obj2;
@@ -85,18 +83,18 @@ Object *Parser::getObj(Object *obj, Guchar *fileKey,
}
// array
- if (buf1.isCmd("[")) {
+ if (likely(recursion < recursionLimit) && buf1.isCmd("[")) {
shift();
obj->initArray(xref);
while (!buf1.isCmd("]") && !buf1.isEOF())
obj->arrayAdd(getObj(&obj2, fileKey, encAlgorithm, keyLength,
- objNum, objGen, fetchOriginatorNums));
+ objNum, objGen, recursion + 1));
if (buf1.isEOF())
error(errSyntaxError, getPos(), "End of file inside array");
shift();
// dictionary or stream
- } else if (buf1.isCmd("<<")) {
+ } else if (likely(recursion < recursionLimit) && buf1.isCmd("<<")) {
shift(objNum);
obj->initDict(xref);
while (!buf1.isCmd(">>") && !buf1.isEOF()) {
@@ -111,7 +109,7 @@ Object *Parser::getObj(Object *obj, Guchar *fileKey,
gfree(key);
break;
}
- obj->dictAdd(key, getObj(&obj2, fileKey, encAlgorithm, keyLength, objNum, objGen, fetchOriginatorNums));
+ obj->dictAdd(key, getObj(&obj2, fileKey, encAlgorithm, keyLength, objNum, objGen, recursion + 1));
}
}
if (buf1.isEOF())
@@ -120,7 +118,7 @@ Object *Parser::getObj(Object *obj, Guchar *fileKey,
// object streams
if (allowStreams && buf2.isCmd("stream")) {
if ((str = makeStream(obj, fileKey, encAlgorithm, keyLength,
- objNum, objGen, fetchOriginatorNums))) {
+ objNum, objGen, recursion + 1))) {
obj->initStream(str);
} else {
obj->free();
@@ -174,7 +172,7 @@ Object *Parser::getObj(Object *obj, Guchar *fileKey,
Stream *Parser::makeStream(Object *dict, Guchar *fileKey,
CryptAlgorithm encAlgorithm, int keyLength,
- int objNum, int objGen, std::set<int> *fetchOriginatorNums) {
+ int objNum, int objGen, int recursion) {
Object obj;
BaseStream *baseStr;
Stream *str;
@@ -188,7 +186,7 @@ Stream *Parser::makeStream(Object *dict, Guchar *fileKey,
pos = str->getPos();
// get length
- dict->dictLookup("Length", &obj, fetchOriginatorNums);
+ dict->dictLookup("Length", &obj, recursion);
if (obj.isInt()) {
length = (Guint)obj.getInt();
obj.free();
diff --git a/poppler/Parser.h b/poppler/Parser.h
index 3d8a831..f1fa765 100644
--- a/poppler/Parser.h
+++ b/poppler/Parser.h
@@ -45,13 +45,9 @@ public:
// Get the next object from the input stream.
Object *getObj(Object *obj, Guchar *fileKey = NULL,
CryptAlgorithm encAlgorithm = cryptRC4, int keyLength = 0,
- int objNum = 0, int objGen = 0);
+ int objNum = 0, int objGen = 0, int recursion = 0);
- Object *getObj(Object *obj, Guchar *fileKey,
- CryptAlgorithm encAlgorithm, int keyLength,
- int objNum, int objGen, std::set<int> *fetchOriginatorNums);
-
- Object *getObj(Object *obj, std::set<int> *fetchOriginatorNums);
+ Object *getObj(Object *obj, int recursion);
// Get stream.
Stream *getStream() { return lexer->getStream(); }
@@ -69,7 +65,7 @@ private:
Stream *makeStream(Object *dict, Guchar *fileKey,
CryptAlgorithm encAlgorithm, int keyLength,
- int objNum, int objGen, std::set<int> *fetchOriginatorNums);
+ int objNum, int objGen, int recursion);
void shift(int objNum = -1);
};
diff --git a/poppler/XRef.cc b/poppler/XRef.cc
index e1115c0..bacd540 100644
--- a/poppler/XRef.cc
+++ b/poppler/XRef.cc
@@ -967,16 +967,14 @@ GBool XRef::okToAssemble(GBool ignoreOwnerPW) {
return (!ignoreOwnerPW && ownerPasswordOk) || (permFlags & permAssemble);
}
-Object *XRef::fetch(int num, int gen, Object *obj, std::set<int> *fetchOriginatorNums) {
+Object *XRef::fetch(int num, int gen, Object *obj, int recursion) {
XRefEntry *e;
Parser *parser;
Object obj1, obj2, obj3;
- bool deleteFetchOriginatorNums = false;
- std::pair<std::set<int>::iterator, bool> fetchInsertResult;
// check for bogus ref - this can happen in corrupted PDF files
- if (num < 0 || num >= size || (fetchOriginatorNums != NULL && fetchOriginatorNums->find(num) != fetchOriginatorNums->end())) {
- goto err2;
+ if (num < 0 || num >= size) {
+ goto err;
}
e = getEntry(num);
@@ -985,12 +983,6 @@ Object *XRef::fetch(int num, int gen, Object *obj, std::set<int> *fetchOriginato
return obj;
}
- if (fetchOriginatorNums == NULL) {
- fetchOriginatorNums = new std::set<int>();
- deleteFetchOriginatorNums = true;
- }
- fetchInsertResult = fetchOriginatorNums->insert(num);
-
switch (e->type) {
case xrefEntryUncompressed:
@@ -1002,9 +994,9 @@ Object *XRef::fetch(int num, int gen, Object *obj, std::set<int> *fetchOriginato
new Lexer(this,
str->makeSubStream(start + e->offset, gFalse, 0, &obj1)),
gTrue);
- parser->getObj(&obj1, fetchOriginatorNums);
- parser->getObj(&obj2, fetchOriginatorNums);
- parser->getObj(&obj3, fetchOriginatorNums);
+ parser->getObj(&obj1, recursion);
+ parser->getObj(&obj2, recursion);
+ parser->getObj(&obj3, recursion);
if (!obj1.isInt() || obj1.getInt() != num ||
!obj2.isInt() || obj2.getInt() != gen ||
!obj3.isCmd("obj")) {
@@ -1039,7 +1031,7 @@ Object *XRef::fetch(int num, int gen, Object *obj, std::set<int> *fetchOriginato
goto err;
}
parser->getObj(obj, encrypted ? fileKey : (Guchar *)NULL,
- encAlgorithm, keyLength, num, gen, fetchOriginatorNums);
+ encAlgorithm, keyLength, num, gen, recursion);
obj1.free();
obj2.free();
obj3.free();
@@ -1087,20 +1079,9 @@ Object *XRef::fetch(int num, int gen, Object *obj, std::set<int> *fetchOriginato
goto err;
}
- if (deleteFetchOriginatorNums) {
- delete fetchOriginatorNums;
- } else {
- fetchOriginatorNums->erase(fetchInsertResult.first);
- }
return obj;
err:
- if (deleteFetchOriginatorNums) {
- delete fetchOriginatorNums;
- } else {
- fetchOriginatorNums->erase(fetchInsertResult.first);
- }
- err2:
return obj->initNull();
}
diff --git a/poppler/XRef.h b/poppler/XRef.h
index ecb1706..adfdc1a 100644
--- a/poppler/XRef.h
+++ b/poppler/XRef.h
@@ -102,7 +102,7 @@ public:
Object *getCatalog(Object *obj) { return fetch(rootNum, rootGen, obj); }
// Fetch an indirect reference.
- Object *fetch(int num, int gen, Object *obj, std::set<int> *fetchOriginatorNums = NULL);
+ Object *fetch(int num, int gen, Object *obj, int recursion = 0);
// Return the document's Info dictionary (if any).
Object *getDocInfo(Object *obj);
More information about the poppler
mailing list