[poppler] poppler/Catalog.cc

Albert Astals Cid aacid at kemper.freedesktop.org
Thu Jan 8 11:14:22 PST 2015


 poppler/Catalog.cc |   37 ++++++++++++++++++++++++++++++++-----
 1 file changed, 32 insertions(+), 5 deletions(-)

New commits:
commit d91876a0c7a936b1f6f461d80131d7586a6c1a5e
Author: Albert Astals Cid <aacid at kde.org>
Date:   Thu Jan 8 20:13:06 2015 +0100

    Accept malformed documents whose root is a Page instead of a Pages
    
    gs and Adobe Reader do it so it's "common" enough
    
    Bug #88172

diff --git a/poppler/Catalog.cc b/poppler/Catalog.cc
index 039f63e..f75f4e1 100644
--- a/poppler/Catalog.cc
+++ b/poppler/Catalog.cc
@@ -14,7 +14,7 @@
 // under GPL version 2 or later
 //
 // Copyright (C) 2005 Kristian Høgsberg <krh at redhat.com>
-// Copyright (C) 2005-2013 Albert Astals Cid <aacid at kde.org>
+// Copyright (C) 2005-2013, 2015 Albert Astals Cid <aacid at kde.org>
 // Copyright (C) 2005 Jeff Muizelaar <jrmuizel at nit.ca>
 // Copyright (C) 2005 Jonathan Blandford <jrb at redhat.com>
 // Copyright (C) 2005 Marco Pesenti Gritti <mpg at redhat.com>
@@ -807,7 +807,6 @@ int Catalog::getNumPages()
       return 0;
     }
     catDict.dictLookup("Pages", &pagesDict);
-    catDict.free();
 
     // This should really be isDict("Pages"), but I've seen at least one
     // PDF file where the /Type entry is missing.
@@ -815,19 +814,47 @@ int Catalog::getNumPages()
       error(errSyntaxError, -1, "Top-level pages object is wrong type ({0:s})",
           pagesDict.getTypeName());
       pagesDict.free();
+      catDict.free();
       return 0;
     }
 
     pagesDict.dictLookup("Count", &obj);
     // some PDF files actually use real numbers here ("/Count 9.0")
     if (!obj.isNum()) {
-      error(errSyntaxError, -1, "Page count in top-level pages object is wrong type ({0:s})",
-         obj.getTypeName());
-      numPages = 0;
+      if (pagesDict.dictIs("Page")) {
+	Object pageRootRef;
+	catDict.dictLookupNF("Pages", &pageRootRef);
+
+	error(errSyntaxError, -1, "Pages top-level is a single Page. The document is mal-formet, trying to recover...");
+
+	Dict *pageDict = pagesDict.getDict();
+	const Ref pageRef = pageRootRef.getRef();
+	Page *p = new Page(doc, 1, pageDict, pageRef, new PageAttrs(NULL, pageDict), form);
+	if (p->isOk()) {
+	  pages = (Page **)gmallocn(1, sizeof(Page *));
+	  pageRefs = (Ref *)gmallocn(1, sizeof(Ref));
+
+	  pages[0] = p;
+	  pageRefs[0].num = pageRef.num;
+	  pageRefs[0].gen = pageRef.gen;
+
+	  numPages = 1;
+	  lastCachedPage = 1;
+	  pagesSize = 1;
+	} else {
+	  delete p;
+	  numPages = 0;
+	}
+      } else {
+	error(errSyntaxError, -1, "Page count in top-level pages object is wrong type ({0:s})",
+	  obj.getTypeName());
+	numPages = 0;
+      }
     } else {
       numPages = (int)obj.getNum();
     }
 
+    catDict.free();
     obj.free();
     pagesDict.free();
   }


More information about the poppler mailing list