[poppler] poppler/PDFDoc.cc poppler/XRef.cc poppler/XRef.h

Albert Astals Cid aacid at kemper.freedesktop.org
Fri Aug 20 12:26:17 PDT 2010


 poppler/PDFDoc.cc |   24 ++++++++++++----
 poppler/XRef.cc   |   80 +++++++++++++++++++++++++++++++-----------------------
 poppler/XRef.h    |    5 ++-
 3 files changed, 68 insertions(+), 41 deletions(-)

New commits:
commit 7fc3c21a8c5d6cf8517100427b182887a9569ed0
Author: Ilya Gorenbein <igorenbein at finjan.com>
Date:   Fri Aug 20 20:24:31 2010 +0100

    Fix failure to parse PDF with damaged internal structure
    
    Patch in bug 29189, fixes bug 3870

diff --git a/poppler/PDFDoc.cc b/poppler/PDFDoc.cc
index 33a2b4d..e4ac639 100644
--- a/poppler/PDFDoc.cc
+++ b/poppler/PDFDoc.cc
@@ -23,6 +23,7 @@
 // Copyright (C) 2009 Axel Struebing <axel.struebing at freenet.de>
 // Copyright (C) 2010 Hib Eris <hib at hiberis.nl>
 // Copyright (C) 2010 Jakub Wilk <ubanus at users.sf.net>
+// Copyright (C) 2010 Ilya Gorenbein <igorenbein at finjan.com>
 //
 // To see a description of the changes please see the Changelog file that
 // came with your tarball or type make ChangeLog if you are building from git
@@ -205,8 +206,10 @@ GBool PDFDoc::setup(GooString *ownerPassword, GooString *userPassword) {
   // check header
   checkHeader();
 
+  GBool wasReconstructed = false;
+
   // read xref table
-  xref = new XRef(str);
+  xref = new XRef(str, &wasReconstructed);
   if (!xref->isOk()) {
     error(-1, "Couldn't read xref table");
     errCode = xref->getErrorCode();
@@ -221,10 +224,21 @@ GBool PDFDoc::setup(GooString *ownerPassword, GooString *userPassword) {
 
   // read catalog
   catalog = new Catalog(xref);
-  if (!catalog->isOk()) {
-    error(-1, "Couldn't read page catalog");
-    errCode = errBadCatalog;
-    return gFalse;
+  if (catalog && !catalog->isOk()) {
+    if (!wasReconstructed)
+    {
+      // try one more time to contruct the Catalog, maybe the problem is damaged XRef 
+      delete catalog;
+      delete xref;
+      xref = new XRef(str, NULL, true);
+      catalog = new Catalog(xref);
+    }
+
+    if (catalog && !catalog->isOk()) {
+      error(-1, "Couldn't read page catalog");
+      errCode = errBadCatalog;
+      return gFalse;
+    }
   }
 
   // done
diff --git a/poppler/XRef.cc b/poppler/XRef.cc
index a9cf571..0cd4be0 100644
--- a/poppler/XRef.cc
+++ b/poppler/XRef.cc
@@ -18,7 +18,7 @@
 // Copyright (C) 2006, 2008, 2010 Albert Astals Cid <aacid at kde.org>
 // Copyright (C) 2007-2008 Julien Rebetez <julienr at svn.gnome.org>
 // Copyright (C) 2007 Carlos Garcia Campos <carlosgc at gnome.org>
-// Copyright (C) 2009 Ilya Gorenbein <igorenbein at finjan.com>
+// Copyright (C) 2009, 2010 Ilya Gorenbein <igorenbein at finjan.com>
 //
 // To see a description of the changes please see the Changelog file that
 // came with your tarball or type make ChangeLog if you are building from git
@@ -268,7 +268,7 @@ XRef::XRef() {
   objStrs = new PopplerCache(5);
 }
 
-XRef::XRef(BaseStream *strA) {
+XRef::XRef(BaseStream *strA, GBool *wasReconstructed, GBool reconstruct) {
   Guint pos;
   Object obj;
 
@@ -289,43 +289,50 @@ XRef::XRef(BaseStream *strA) {
   start = str->getStart();
   pos = getStartXref();
 
-  // if there was a problem with the 'startxref' position, try to
-  // reconstruct the xref table
-  if (pos == 0) {
-    if (!(ok = constructXRef())) {
-      errCode = errDamaged;
-      return;
-    }
-
-  // read the xref table
-  } else {
-    GooVector<Guint> followedXRefStm;
-    while (readXRef(&pos, &followedXRefStm)) ;
+  if (reconstruct && !(ok = constructXRef(wasReconstructed)))
+  {
+    errCode = errDamaged;
+    return;
+  }
+  else
+  {
+    // if there was a problem with the 'startxref' position, try to
+    // reconstruct the xref table
+    if (pos == 0) {
+      if (!(ok = constructXRef(wasReconstructed))) {
+        errCode = errDamaged;
+        return;
+      }
 
-    // if there was a problem with the xref table,
-    // try to reconstruct it
-    if (!ok) {
-      if (!(ok = constructXRef())) {
-	errCode = errDamaged;
-	return;
+    // read the xref table
+    } else {
+      GooVector<Guint> followedXRefStm;
+      while (readXRef(&pos, &followedXRefStm)) ;
+
+      // if there was a problem with the xref table,
+      // try to reconstruct it
+      if (!ok) {
+        if (!(ok = constructXRef(wasReconstructed))) {
+          errCode = errDamaged;
+          return;
+        }
       }
     }
-  }
 
-  // get the root dictionary (catalog) object
-  trailerDict.dictLookupNF("Root", &obj);
-  if (obj.isRef()) {
-    rootNum = obj.getRefNum();
-    rootGen = obj.getRefGen();
-    obj.free();
-  } else {
-    obj.free();
-    if (!(ok = constructXRef())) {
-      errCode = errDamaged;
-      return;
+    // get the root dictionary (catalog) object
+    trailerDict.dictLookupNF("Root", &obj);
+    if (obj.isRef()) {
+      rootNum = obj.getRefNum();
+      rootGen = obj.getRefGen();
+      obj.free();
+    } else {
+      obj.free();
+      if (!(ok = constructXRef(wasReconstructed))) {
+        errCode = errDamaged;
+        return;
+      }
     }
   }
-
   // now set the trailer dictionary's xref pointer so we can fetch
   // indirect objects from it
   trailerDict.getDict()->setXRef(this);
@@ -746,7 +753,7 @@ GBool XRef::readXRefStreamSection(Stream *xrefStr, int *w, int first, int n) {
 }
 
 // Attempt to construct an xref table for a damaged file.
-GBool XRef::constructXRef() {
+GBool XRef::constructXRef(GBool *wasReconstructed) {
   Parser *parser;
   Object newTrailerDict, obj;
   char buf[256];
@@ -769,6 +776,11 @@ GBool XRef::constructXRef() {
   gotRoot = gFalse;
   streamEndsLen = streamEndsSize = 0;
 
+  if (wasReconstructed)
+  {
+    *wasReconstructed = true;
+  }
+
   str->reset();
   while (1) {
     pos = str->getPos();
diff --git a/poppler/XRef.h b/poppler/XRef.h
index be19e23..1f4ec6a 100644
--- a/poppler/XRef.h
+++ b/poppler/XRef.h
@@ -17,6 +17,7 @@
 // Copyright (C) 2006, 2008, 2010 Albert Astals Cid <aacid at kde.org>
 // Copyright (C) 2007-2008 Julien Rebetez <julienr at svn.gnome.org>
 // Copyright (C) 2007 Carlos Garcia Campos <carlosgc at gnome.org>
+// Copyright (C) 2010 Ilya Gorenbein <igorenbein at finjan.com>
 //
 // To see a description of the changes please see the Changelog file that
 // came with your tarball or type make ChangeLog if you are building from git
@@ -63,7 +64,7 @@ public:
   // Constructor, create an empty XRef, used for PDF writing
   XRef();
   // Constructor.  Read xref table from stream.
-  XRef(BaseStream *strA);
+  XRef(BaseStream *strA, GBool *wasReconstructed = NULL, GBool reconstruct = false);
 
   // Destructor.
   ~XRef();
@@ -161,7 +162,7 @@ private:
   GBool readXRefTable(Parser *parser, Guint *pos, GooVector<Guint> *followedXRefStm);
   GBool readXRefStreamSection(Stream *xrefStr, int *w, int first, int n);
   GBool readXRefStream(Stream *xrefStr, Guint *pos);
-  GBool constructXRef();
+  GBool constructXRef(GBool *wasReconstructed);
   Guint strToUnsigned(char *s);
 };
 


More information about the poppler mailing list