[poppler] Towards support for linearized PDFs

Hib Eris hib at hiberis.nl
Mon Apr 5 12:37:18 PDT 2010


Hi,

As a first step towards support for linearized PDFs I have some
patches that changes to way the Catalog is initialized. Much of the
things it the Catalog are not always needed and their initialization
can be deferred to when they are needed. This first batch of patches
is fairly simple. The more invasive ones will come later.

The last patch in this series is almost certainly incorrect, but I
would like some input on it. It is dealing with form widgets.
Question: can this postWidgetsLoad() be done on a per page basis, or
is it something global to the whole document?

Cheers,

Hib
-------------- next part --------------
From c2c8b384788fe521386371e04dad69227817c7fe Mon Sep 17 00:00:00 2001
From: Hib Eris <hib at hiberis.nl>
Date: Sat, 27 Mar 2010 14:43:57 +0100
Subject: [PATCH 01/10] Parse PageMode and PageLayout on demand

---
 poppler/Catalog.cc |  103 ++++++++++++++++++++++++++++++++++------------------
 poppler/Catalog.h  |    7 +++-
 2 files changed, 73 insertions(+), 37 deletions(-)

diff --git a/poppler/Catalog.cc b/poppler/Catalog.cc
index b659180..cdcaf58 100644
--- a/poppler/Catalog.cc
+++ b/poppler/Catalog.cc
@@ -23,6 +23,7 @@
 // Copyright (C) 2007 Julien Rebetez <julienr at svn.gnome.org>
 // Copyright (C) 2008 Pino Toscano <pino at kde.org>
 // Copyright (C) 2009 Ilya Gorenbein <igorenbein at finjan.com>
+// Copyright (C) 2010 Hib Eris <hib at hiberis.nl>
 //
 // To see a description of the changes please see the Changelog file that
 // came with your tarball or type make ChangeLog if you are building from git
@@ -148,41 +149,6 @@ Catalog::Catalog(XRef *xrefA) {
     pageLabelInfo = new PageLabelInfo(&obj, numPages);
   obj.free();
 
-  // read page mode
-  pageMode = pageModeNone;
-  if (catDict.dictLookup("PageMode", &obj)->isName()) {
-    if (obj.isName("UseNone"))
-      pageMode = pageModeNone;
-    else if (obj.isName("UseOutlines"))
-      pageMode = pageModeOutlines;
-    else if (obj.isName("UseThumbs"))
-      pageMode = pageModeThumbs;
-    else if (obj.isName("FullScreen"))
-      pageMode = pageModeFullScreen;
-    else if (obj.isName("UseOC"))
-      pageMode = pageModeOC;
-    else if (obj.isName("UseAttachments"))
-      pageMode = pageModeAttach;
-  }
-  obj.free();
-
-  pageLayout = pageLayoutNone;
-  if (catDict.dictLookup("PageLayout", &obj)->isName()) {
-    if (obj.isName("SinglePage"))
-      pageLayout = pageLayoutSinglePage;
-    if (obj.isName("OneColumn"))
-      pageLayout = pageLayoutOneColumn;
-    if (obj.isName("TwoColumnLeft"))
-      pageLayout = pageLayoutTwoColumnLeft;
-    if (obj.isName("TwoColumnRight"))
-      pageLayout = pageLayoutTwoColumnRight;
-    if (obj.isName("TwoPageLeft"))
-      pageLayout = pageLayoutTwoPageLeft;
-    if (obj.isName("TwoPageRight"))
-      pageLayout = pageLayoutTwoPageRight;
-  }
-  obj.free();
-
   // read base URI
   if (catDict.dictLookup("URI", &obj)->isDict()) {
     if (obj.dictLookup("Base", &obj2)->isString()) {
@@ -473,6 +439,73 @@ GooString *Catalog::getJS(int i)
   return js;
 }
 
+Catalog::PageMode Catalog::getPageMode() {
+
+  if (pageMode == pageModeNull) {
+
+    Object catDict, obj;
+
+    pageMode = pageModeNone;
+
+    xref->getCatalog(&catDict);
+    if (!catDict.isDict()) {
+      error(-1, "Catalog object is wrong type (%s)", catDict.getTypeName());
+      return pageMode;
+    }
+
+    if (catDict.dictLookup("PageMode", &obj)->isName()) {
+      if (obj.isName("UseNone"))
+        pageMode = pageModeNone;
+      else if (obj.isName("UseOutlines"))
+        pageMode = pageModeOutlines;
+      else if (obj.isName("UseThumbs"))
+        pageMode = pageModeThumbs;
+      else if (obj.isName("FullScreen"))
+        pageMode = pageModeFullScreen;
+      else if (obj.isName("UseOC"))
+        pageMode = pageModeOC;
+      else if (obj.isName("UseAttachments"))
+        pageMode = pageModeAttach;
+    }
+    obj.free();
+  }
+  return pageMode;
+}
+
+Catalog::PageLayout Catalog::getPageLayout() {
+
+  if (pageLayout == pageLayoutNull) {
+
+    Object catDict, obj;
+
+    pageLayout = pageLayoutNone;
+
+    xref->getCatalog(&catDict);
+    if (!catDict.isDict()) {
+      error(-1, "Catalog object is wrong type (%s)", catDict.getTypeName());
+      return pageLayout;
+    }
+
+    pageLayout = pageLayoutNone;
+    if (catDict.dictLookup("PageLayout", &obj)->isName()) {
+      if (obj.isName("SinglePage"))
+        pageLayout = pageLayoutSinglePage;
+      if (obj.isName("OneColumn"))
+        pageLayout = pageLayoutOneColumn;
+      if (obj.isName("TwoColumnLeft"))
+        pageLayout = pageLayoutTwoColumnLeft;
+      if (obj.isName("TwoColumnRight"))
+        pageLayout = pageLayoutTwoColumnRight;
+      if (obj.isName("TwoPageLeft"))
+        pageLayout = pageLayoutTwoPageLeft;
+      if (obj.isName("TwoPageRight"))
+        pageLayout = pageLayoutTwoPageRight;
+    }
+    obj.free();
+  }
+  return pageLayout;
+}
+
 NameTree::NameTree()
 {
   size = 0;
diff --git a/poppler/Catalog.h b/poppler/Catalog.h
index f5b389f..52ee966 100644
--- a/poppler/Catalog.h
+++ b/poppler/Catalog.h
@@ -19,6 +19,7 @@
 // Copyright (C) 2005, 2006, 2008 Brad Hards <bradh at frogmouth.net>
 // Copyright (C) 2007 Julien Rebetez <julienr at svn.gnome.org>
 // Copyright (C) 2008 Pino Toscano <pino at kde.org>
+// Copyright (C) 2010 Hib Eris <hib at hiberis.nl>
 //
 // To see a description of the changes please see the Changelog file that
 // came with your tarball or type make ChangeLog if you are building from git
@@ -200,6 +201,7 @@ public:
   Form* getForm() { return form; }
 
   enum PageMode {
+    pageModeNull,
     pageModeNone,
     pageModeOutlines,
     pageModeThumbs,
@@ -208,6 +210,7 @@ public:
     pageModeAttach
   };
   enum PageLayout {
+    pageLayoutNull,
     pageLayoutNone,
     pageLayoutSinglePage,
     pageLayoutOneColumn,
@@ -218,8 +221,8 @@ public:
   };
 
   // Returns the page mode.
-  PageMode getPageMode() { return pageMode; }
-  PageLayout getPageLayout() { return pageLayout; }
+  PageMode getPageMode();
+  PageLayout getPageLayout();
 
 private:
 
-- 
1.6.4.2


From 8083b1fcc95f6c8ab504d223a6b86a90a514542c Mon Sep 17 00:00:00 2001
From: Hib Eris <hib at hiberis.nl>
Date: Thu, 25 Mar 2010 15:09:58 +0100
Subject: [PATCH 02/10] Parse PageLabelInfo on demand

---
 poppler/Catalog.cc |   35 +++++++++++++++++++++++++++--------
 poppler/Catalog.h  |    3 +++
 2 files changed, 30 insertions(+), 8 deletions(-)

diff --git a/poppler/Catalog.cc b/poppler/Catalog.cc
index cdcaf58..c537e7d 100644
--- a/poppler/Catalog.cc
+++ b/poppler/Catalog.cc
@@ -145,10 +145,6 @@ Catalog::Catalog(XRef *xrefA) {
   }
   obj.free();
 
-  if (catDict.dictLookup("PageLabels", &obj)->isDict())
-    pageLabelInfo = new PageLabelInfo(&obj, numPages);
-  obj.free();
-
   // read base URI
   if (catDict.dictLookup("URI", &obj)->isDict()) {
     if (obj.dictLookup("Base", &obj2)->isString()) {
@@ -636,8 +632,9 @@ GBool Catalog::labelToIndex(GooString *label, int *index)
 {
   char *end;
 
-  if (pageLabelInfo != NULL) {
-    if (!pageLabelInfo->labelToIndex(label, index))
+  PageLabelInfo *pli = getPageLabelInfo();
+  if (pli != NULL) {
+    if (!pli->labelToIndex(label, index))
       return gFalse;
   } else {
     *index = strtol(label->getCString(), &end, 10) - 1;
@@ -658,8 +655,9 @@ GBool Catalog::indexToLabel(int index, GooString *label)
   if (index < 0 || index >= numPages)
     return gFalse;
 
-  if (pageLabelInfo != NULL) {
-    return pageLabelInfo->indexToLabel(index, label);
+  PageLabelInfo *pli = getPageLabelInfo();
+  if (pli != NULL) {
+    return pli->indexToLabel(index, label);
   } else {
     snprintf(buffer, sizeof (buffer), "%d", index + 1);
     label->append(buffer);	      
@@ -769,3 +767,24 @@ EmbFile::EmbFile(Object *efDict, GooString *description)
   if (!m_mimetype)
     m_mimetype = new GooString();
 }
+
+PageLabelInfo *Catalog::getPageLabelInfo()
+{
+  if (!pageLabelInfo) {
+     Object catDict;
+     Object obj;
+
+     xref->getCatalog(&catDict);
+     if (!catDict.isDict()) {
+       error(-1, "Catalog object is wrong type (%s)", catDict.getTypeName());
+       return NULL;
+     }
+
+    if (catDict.dictLookup("PageLabels", &obj)->isDict()) {
+      pageLabelInfo = new PageLabelInfo(&obj, getNumPages());
+    }
+    obj.free();
+  }
+
+  return pageLabelInfo;
+}
diff --git a/poppler/Catalog.h b/poppler/Catalog.h
index 52ee966..3f65404 100644
--- a/poppler/Catalog.h
+++ b/poppler/Catalog.h
@@ -226,6 +226,9 @@ public:
 
 private:
 
+  // Get page label info.
+  PageLabelInfo *getPageLabelInfo();
+
   XRef *xref;			// the xref table for this PDF file
   Page **pages;			// array of pages
   Ref *pageRefs;		// object ID for each page
-- 
1.6.4.2


From 96b2e0ab586f0086ae0f1d3f50f9866fbf1b8556 Mon Sep 17 00:00:00 2001
From: Hib Eris <hib at hiberis.nl>
Date: Thu, 25 Mar 2010 14:55:22 +0100
Subject: [PATCH 03/10] Parse Metadata on demand

---
 poppler/Catalog.cc |   15 ++++++++++++---
 1 files changed, 12 insertions(+), 3 deletions(-)

diff --git a/poppler/Catalog.cc b/poppler/Catalog.cc
index c537e7d..bc3deb1 100644
--- a/poppler/Catalog.cc
+++ b/poppler/Catalog.cc
@@ -154,9 +154,6 @@ Catalog::Catalog(XRef *xrefA) {
   }
   obj.free();
 
-  // get the metadata stream
-  catDict.dictLookup("Metadata", &metadata);
-
   // get the structure tree root
   catDict.dictLookup("StructTreeRoot", &structTreeRoot);
 
@@ -222,6 +219,18 @@ GooString *Catalog::readMetadata() {
   Object obj;
   int c;
 
+  if (metadata.isNone()) {
+     Object catDict;
+
+     xref->getCatalog(&catDict);
+     if (catDict.isDict()) {
+       catDict.dictLookup("Metadata", &metadata);
+     } else {
+       error(-1, "Catalog object is wrong type (%s)", catDict.getTypeName());
+       metadata.initNull();
+     }
+  }
+
   if (!metadata.isStream()) {
     return NULL;
   }
-- 
1.6.4.2


From b8247769b0f4f35fbaea13c9fee830655cfc4e63 Mon Sep 17 00:00:00 2001
From: Hib Eris <hib at hiberis.nl>
Date: Thu, 25 Mar 2010 15:51:51 +0100
Subject: [PATCH 04/10] Parse StructTreeRoot on demand

---
 poppler/Catalog.cc |   21 ++++++++++++++++++---
 poppler/Catalog.h  |    2 +-
 2 files changed, 19 insertions(+), 4 deletions(-)

diff --git a/poppler/Catalog.cc b/poppler/Catalog.cc
index bc3deb1..9f5d5e8 100644
--- a/poppler/Catalog.cc
+++ b/poppler/Catalog.cc
@@ -154,9 +154,6 @@ Catalog::Catalog(XRef *xrefA) {
   }
   obj.free();
 
-  // get the structure tree root
-  catDict.dictLookup("StructTreeRoot", &structTreeRoot);
-
   // get the outline dictionary
   catDict.dictLookup("Outlines", &outline);
 
@@ -797,3 +794,21 @@ PageLabelInfo *Catalog::getPageLabelInfo()
 
   return pageLabelInfo;
 }
+
+Object *Catalog::getStructTreeRoot()
+{
+  if (structTreeRoot.isNone())
+  {
+     Object catDict;
+
+     xref->getCatalog(&catDict);
+     if (catDict.isDict()) {
+       catDict.dictLookup("StructTreeRoot", &structTreeRoot);
+     } else {
+       error(-1, "Catalog object is wrong type (%s)", catDict.getTypeName());
+       structTreeRoot.initNull();
+     }
+  }
+
+  return &structTreeRoot;
+}
diff --git a/poppler/Catalog.h b/poppler/Catalog.h
index 3f65404..24b0295 100644
--- a/poppler/Catalog.h
+++ b/poppler/Catalog.h
@@ -164,7 +164,7 @@ public:
   GooString *readMetadata();
 
   // Return the structure tree root object.
-  Object *getStructTreeRoot() { return &structTreeRoot; }
+  Object *getStructTreeRoot();
 
   // Find a page, given its object ID.  Returns page number, or 0 if
   // not found.
-- 
1.6.4.2


From e4a5621ef5d28e165667ab655c0bc466099763bb Mon Sep 17 00:00:00 2001
From: Hib Eris <hib at hiberis.nl>
Date: Thu, 25 Mar 2010 16:05:02 +0100
Subject: [PATCH 05/10] Parse Outline on demand

---
 poppler/Catalog.cc |   22 +++++++++++++++++++---
 poppler/Catalog.h  |    2 +-
 2 files changed, 20 insertions(+), 4 deletions(-)

diff --git a/poppler/Catalog.cc b/poppler/Catalog.cc
index 9f5d5e8..6918f08 100644
--- a/poppler/Catalog.cc
+++ b/poppler/Catalog.cc
@@ -154,9 +154,6 @@ Catalog::Catalog(XRef *xrefA) {
   }
   obj.free();
 
-  // get the outline dictionary
-  catDict.dictLookup("Outlines", &outline);
-
   // get the Optional Content dictionary
   if (catDict.dictLookup("OCProperties", &optContentProps)->isDict()) {
     optContent = new OCGs(&optContentProps, xref);
@@ -812,3 +809,22 @@ Object *Catalog::getStructTreeRoot()
 
   return &structTreeRoot;
 }
+
+Object *Catalog::getOutline()
+{
+  if (outline.isNone())
+  {
+     Object catDict;
+
+     xref->getCatalog(&catDict);
+     if (catDict.isDict()) {
+       catDict.dictLookup("Outlines", &outline);
+     } else {
+       error(-1, "Catalog object is wrong type (%s)", catDict.getTypeName());
+       outline.initNull();
+     }
+  }
+
+  return &outline;
+}
+
diff --git a/poppler/Catalog.h b/poppler/Catalog.h
index 24b0295..c8c30ec 100644
--- a/poppler/Catalog.h
+++ b/poppler/Catalog.h
@@ -192,7 +192,7 @@ public:
   GBool labelToIndex(GooString *label, int *index);
   GBool indexToLabel(int index, GooString *label);
 
-  Object *getOutline() { return &outline; }
+  Object *getOutline();
 
   Object *getAcroForm() { return &acroForm; }
 
-- 
1.6.4.2


From d04ce9305cf7f2da151d05d37f555ff505a93cd4 Mon Sep 17 00:00:00 2001
From: Hib Eris <hib at hiberis.nl>
Date: Thu, 25 Mar 2010 16:48:07 +0100
Subject: [PATCH 06/10] Parse Outline on demand in PDFDoc

---
 poppler/PDFDoc.cc |   17 ++++++++++++-----
 poppler/PDFDoc.h  |    2 +-
 2 files changed, 13 insertions(+), 6 deletions(-)

diff --git a/poppler/PDFDoc.cc b/poppler/PDFDoc.cc
index 78b6593..ef8748a 100644
--- a/poppler/PDFDoc.cc
+++ b/poppler/PDFDoc.cc
@@ -226,11 +226,6 @@ GBool PDFDoc::setup(GooString *ownerPassword, GooString *userPassword) {
     return gFalse;
   }
 
-#ifndef DISABLE_OUTLINE
-  // read outline
-  outline = new Outline(catalog->getOutline(), xref);
-#endif
-
   // done
   return gTrue;
 }
@@ -908,6 +903,18 @@ void PDFDoc::writeTrailer (Guint uxrefOffset, int uxrefSize, OutStream* outStr,
   delete trailerDict;
 }
 
+#ifndef DISABLE_OUTLINE
+Outline *PDFDoc::getOutline()
+{
+  if (!outline) {
+    // read outline
+    outline = new Outline(catalog->getOutline(), xref);
+  }
+
+  return outline;
+}
+#endif
+
 PDFDoc *PDFDoc::ErrorPDFDoc(int errorCode, GooString *fileNameA)
 {
   PDFDoc *doc = new PDFDoc();
diff --git a/poppler/PDFDoc.h b/poppler/PDFDoc.h
index 79f6d6d..6d7dea2 100644
--- a/poppler/PDFDoc.h
+++ b/poppler/PDFDoc.h
@@ -170,7 +170,7 @@ public:
 
 #ifndef DISABLE_OUTLINE
   // Return the outline object.
-  Outline *getOutline() { return outline; }
+  Outline *getOutline();
 #endif
 
   // Is the file encrypted?
-- 
1.6.4.2


From 3f4fbd983212817f30b99d8b81eeb15fc29a315a Mon Sep 17 00:00:00 2001
From: Hib Eris <hib at hiberis.nl>
Date: Thu, 25 Mar 2010 16:32:22 +0100
Subject: [PATCH 07/10] Parse Dests on demand

---
 poppler/Catalog.cc |   26 ++++++++++++++++++++------
 poppler/Catalog.h  |    2 +-
 2 files changed, 21 insertions(+), 7 deletions(-)

diff --git a/poppler/Catalog.cc b/poppler/Catalog.cc
index 6918f08..549544a 100644
--- a/poppler/Catalog.cc
+++ b/poppler/Catalog.cc
@@ -128,9 +128,6 @@ Catalog::Catalog(XRef *xrefA) {
   }
   pagesDict.free();
 
-  // read named destination dictionary
-  catDict.dictLookup("Dests", &dests);
-
   // read root of named destination tree - PDF1.6 table 3.28
   if (catDict.dictLookup("Names", &obj)->isDict()) {
     obj.dictLookup("Dests", &obj2);
@@ -175,7 +172,6 @@ Catalog::Catalog(XRef *xrefA) {
   pagesDict.free();
  err1:
   catDict.free();
-  dests.initNull();
   ok = gFalse;
 }
 
@@ -340,8 +336,8 @@ LinkDest *Catalog::findDest(GooString *name) {
 
   // try named destination dictionary then name tree
   found = gFalse;
-  if (dests.isDict()) {
-    if (!dests.dictLookup(name->getCString(), &obj1)->isNull())
+  if (getDests()->isDict()) {
+    if (!getDests()->dictLookup(name->getCString(), &obj1)->isNull())
       found = gTrue;
     else
       obj1.free();
@@ -828,3 +824,21 @@ Object *Catalog::getOutline()
   return &outline;
 }
 
+Object *Catalog::getDests()
+{
+  if (dests.isNone())
+  {
+     Object catDict;
+
+     xref->getCatalog(&catDict);
+     if (catDict.isDict()) {
+       catDict.dictLookup("Dests", &dests);
+     } else {
+       error(-1, "Catalog object is wrong type (%s)", catDict.getTypeName());
+       dests.initNull();
+     }
+  }
+
+  return &dests;
+}
+
diff --git a/poppler/Catalog.h b/poppler/Catalog.h
index c8c30ec..3d95b2e 100644
--- a/poppler/Catalog.h
+++ b/poppler/Catalog.h
@@ -174,7 +174,7 @@ public:
   // NULL if <name> is not a destination.
   LinkDest *findDest(GooString *name);
 
-  Object *getDests() { return &dests; }
+  Object *getDests();
 
   // Get the number of embedded files
   int numEmbeddedFiles() { return embeddedFileNameTree.numEntries(); }
-- 
1.6.4.2


From c2351c4ab7059f65817ae75423a7d093bc85fa7f Mon Sep 17 00:00:00 2001
From: Hib Eris <hib at hiberis.nl>
Date: Thu, 25 Mar 2010 17:33:11 +0100
Subject: [PATCH 08/10] Parse Names on demand

---
 poppler/Catalog.cc |   93 ++++++++++++++++++++++++++++++++++++++++++----------
 poppler/Catalog.h  |    9 +++++
 2 files changed, 84 insertions(+), 18 deletions(-)

diff --git a/poppler/Catalog.cc b/poppler/Catalog.cc
index 549544a..3ad9040 100644
--- a/poppler/Catalog.cc
+++ b/poppler/Catalog.cc
@@ -128,20 +128,6 @@ Catalog::Catalog(XRef *xrefA) {
   }
   pagesDict.free();
 
-  // read root of named destination tree - PDF1.6 table 3.28
-  if (catDict.dictLookup("Names", &obj)->isDict()) {
-    obj.dictLookup("Dests", &obj2);
-    destNameTree.init(xref, &obj2);
-    obj2.free();
-    obj.dictLookup("EmbeddedFiles", &obj2);
-    embeddedFileNameTree.init(xref, &obj2);
-    obj2.free();
-    obj.dictLookup("JavaScript", &obj2);
-    jsNameTree.init(xref, &obj2);
-    obj2.free();
-  }
-  obj.free();
-
   // read base URI
   if (catDict.dictLookup("URI", &obj)->isDict()) {
     if (obj.dictLookup("Base", &obj2)->isString()) {
@@ -343,7 +329,7 @@ LinkDest *Catalog::findDest(GooString *name) {
       obj1.free();
   }
   if (!found) {
-    if (destNameTree.lookup(name, &obj1))
+    if (getDestNameTree()->lookup(name, &obj1))
       found = gTrue;
     else
       obj1.free();
@@ -377,10 +363,10 @@ EmbFile *Catalog::embeddedFile(int i)
 {
     Object efDict;
     Object obj;
-    obj = embeddedFileNameTree.getValue(i);
+    obj = getEmbeddedFileNameTree()->getValue(i);
     EmbFile *embeddedFile = 0;
     if (obj.isRef()) {
-        GooString desc(embeddedFileNameTree.getName(i));
+        GooString desc(getEmbeddedFileNameTree()->getName(i));
         embeddedFile = new EmbFile(obj.fetch(xref, &efDict), &desc);
         efDict.free();
     } else {
@@ -392,7 +378,7 @@ EmbFile *Catalog::embeddedFile(int i)
 
 GooString *Catalog::getJS(int i)
 {
-  Object obj = jsNameTree.getValue(i);
+  Object obj = getJSNameTree()->getValue(i);
   if (obj.isRef()) {
     Ref r = obj.getRef();
     obj.free();
@@ -503,6 +489,7 @@ Catalog::PageLayout Catalog::getPageLayout() {
 
 NameTree::NameTree()
 {
+  initialized = gFalse;
   size = 0;
   length = 0;
   entries = NULL;
@@ -543,6 +530,7 @@ void NameTree::addEntry(Entry *entry)
 void NameTree::init(XRef *xrefA, Object *tree) {
   xref = xrefA;
   parse(tree);
+  initialized = gTrue;
 }
 
 void NameTree::parse(Object *tree) {
@@ -842,3 +830,72 @@ Object *Catalog::getDests()
   return &dests;
 }
 
+Object *Catalog::getNames()
+{
+  if (names.isNone())
+  {
+     Object catDict;
+
+     xref->getCatalog(&catDict);
+     if (catDict.isDict()) {
+       catDict.dictLookup("Names", &names);
+     } else {
+       error(-1, "Catalog object is wrong type (%s)", catDict.getTypeName());
+       names.initNull();
+     }
+  }
+
+  return &names;
+}
+
+NameTree *Catalog::getDestNameTree()
+{
+  if (!destNameTree.isInitialized()) {
+
+    if (getNames()->isDict()) {
+       Object obj;
+
+       getNames()->dictLookup("Dests", &obj);
+       destNameTree.init(xref, &obj);
+       obj.free();
+    }
+
+  }
+
+  return &destNameTree;
+}
+
+NameTree *Catalog::getEmbeddedFileNameTree()
+{
+  if (!embeddedFileNameTree.isInitialized()) {
+
+    if (getNames()->isDict()) {
+       Object obj;
+
+       getNames()->dictLookup("EmbeddedFiles", &obj);
+       embeddedFileNameTree.init(xref, &obj);
+       obj.free();
+    }
+
+  }
+
+  return &embeddedFileNameTree;
+}
+
+NameTree *Catalog::getJSNameTree()
+{
+  if (!jsNameTree.isInitialized()) {
+
+    if (getNames()->isDict()) {
+       Object obj;
+
+       getNames()->dictLookup("JavaScript", &obj);
+       jsNameTree.init(xref, &obj);
+       obj.free();
+    }
+
+  }
+
+  return &jsNameTree;
+}
+
diff --git a/poppler/Catalog.h b/poppler/Catalog.h
index 3d95b2e..2c17060 100644
--- a/poppler/Catalog.h
+++ b/poppler/Catalog.h
@@ -50,6 +50,7 @@ class OCGs;
 class NameTree {
 public:
   NameTree();
+  GBool isInitialized() { return initialized; };
   void init(XRef *xref, Object *tree);
   void parse(Object *tree);
   GBool lookup(GooString *name, Object *obj);
@@ -60,6 +61,7 @@ public:
   GooString *getName(int i);
 
 private:
+  GBool initialized;
   struct Entry {
     Entry(Array *array, int index);
     ~Entry();
@@ -236,6 +238,7 @@ private:
   int numPages;			// number of pages
   int pagesSize;		// size of pages array
   Object dests;			// named destination dictionary
+  Object names;			// named names dictionary
   NameTree destNameTree;	// named destination name-tree
   NameTree embeddedFileNameTree;  // embedded file name-tree
   NameTree jsNameTree;		// Java Script name-tree
@@ -253,6 +256,12 @@ private:
   int readPageTree(Dict *pages, PageAttrs *attrs, int start,
 		   char *alreadyRead);
   Object *findDestInTree(Object *tree, GooString *name, Object *obj);
+
+  Object *getNames();
+  NameTree *getDestNameTree();
+  NameTree *getEmbeddedFileNameTree();
+  NameTree *getJSNameTree();
+
 };
 
 #endif
-- 
1.6.4.2


From 1da5a475e0de07bf7f07b866f6b6360a63a1ff5c Mon Sep 17 00:00:00 2001
From: Hib Eris <hib at hiberis.nl>
Date: Thu, 25 Mar 2010 15:33:33 +0100
Subject: [PATCH 09/10] Parse Form on demand

---
 poppler/Catalog.cc |   23 ++++++++++++++---------
 poppler/Catalog.h  |    2 +-
 2 files changed, 15 insertions(+), 10 deletions(-)

diff --git a/poppler/Catalog.cc b/poppler/Catalog.cc
index 3ad9040..94aa817 100644
--- a/poppler/Catalog.cc
+++ b/poppler/Catalog.cc
@@ -81,13 +81,7 @@ Catalog::Catalog(XRef *xrefA) {
   // get the AcroForm dictionary
   catDict.dictLookup("AcroForm", &acroForm);
 
-  // load Forms
-  if (acroForm.isDict()) {
-    form = new Form(xref,&acroForm);
-  }
-
-
-  // read page tree
+  // get page count
   catDict.dictLookup("Pages", &pagesDict);
   // This should really be isDict("Pages"), but I've seen at least one
   // PDF file where the /Type entry is missing.
@@ -148,8 +142,8 @@ Catalog::Catalog(XRef *xrefA) {
   optContentProps.free();
 
   // perform form-related loading after all widgets have been loaded
-  if (form) 
-    form->postWidgetsLoad();
+  if (getForm())
+    getForm()->postWidgetsLoad();
 
   catDict.free();
   return;
@@ -830,6 +824,17 @@ Object *Catalog::getDests()
   return &dests;
 }
 
+Form *Catalog::getForm()
+{
+  if (!form) {
+    if (acroForm.isDict()) {
+      form = new Form(xref,&acroForm);
+    }
+  }
+
+  return form;
+}
+
 Object *Catalog::getNames()
 {
   if (names.isNone())
diff --git a/poppler/Catalog.h b/poppler/Catalog.h
index 2c17060..b0e1286 100644
--- a/poppler/Catalog.h
+++ b/poppler/Catalog.h
@@ -200,7 +200,7 @@ public:
 
   OCGs *getOptContentConfig() { return optContent; }
 
-  Form* getForm() { return form; }
+  Form* getForm();
 
   enum PageMode {
     pageModeNull,
-- 
1.6.4.2


From 42439114cdb527bcf9019dc3171f04fe2b958c96 Mon Sep 17 00:00:00 2001
From: Hib Eris <hib at hiberis.nl>
Date: Thu, 25 Mar 2010 16:14:35 +0100
Subject: [PATCH 10/10] WIP Fix form loading

I am not sure this can be done.
---
 poppler/Catalog.cc |    7 +++----
 1 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/poppler/Catalog.cc b/poppler/Catalog.cc
index 94aa817..ba93405 100644
--- a/poppler/Catalog.cc
+++ b/poppler/Catalog.cc
@@ -141,10 +141,6 @@ Catalog::Catalog(XRef *xrefA) {
   }
   optContentProps.free();
 
-  // perform form-related loading after all widgets have been loaded
-  if (getForm())
-    getForm()->postWidgetsLoad();
-
   catDict.free();
   return;
 
@@ -829,6 +825,9 @@ Form *Catalog::getForm()
   if (!form) {
     if (acroForm.isDict()) {
       form = new Form(xref,&acroForm);
+
+      // perform form-related loading after all widgets have been loaded
+      if (form) form->postWidgetsLoad();
     }
   }
 
-- 
1.6.4.2


More information about the poppler mailing list