[poppler] Towards support for linearized PDFs

Hib Eris hib at hiberis.nl
Tue Apr 6 11:21:39 PDT 2010


Hi all,

I have updated the patches taking Albert's comments into account.

On Mon, Apr 5, 2010 at 11:50 PM, Albert Astals Cid <aacid at kde.org> wrote:
> Some comments:
>  * You don't initialize pageMode nor pageLayout to their null enum values.
>  * I'd like the new enum values to be added to the end to be more saf in case
> anyone was using the int values to save them somewhere
>  * You should be freeing catDict

>> The last patch in this series is almost certainly incorrect, but I
>> would like some input on it. It is dealing with form widgets.
>> Question: can this postWidgetsLoad() be done on a per page basis, or
>> is it something global to the whole document?
>
> Probably it could be done per page, if you give a patch i can test all docs in
> my regression suite to see if we get the same behaviour with your patches or
> not.

Okay, for now I have left this last patch out of the new patch set. I
will study the details of annotations and formfields some more to come
up with a better patch.

> Also, can you please enlight us what does this give us towards getting
> linearized support?

The goal of linearized support is to be able to render any page in a
document without downloading any unrelated/unnecessary objects.  To
prevent downloading unnecessary objects, the Catalog should only
initialize what is absolutely needed and defer initializing anything
else to a when it is needed.

Linearized support while still initializing objects from all over the
document is just not very useful, so I thought it would be wise to
first do these things.

Hib
-------------- next part --------------
From 104ad5659bf0d8584387db77507e16abe86d1e4f Mon Sep 17 00:00:00 2001
From: Hib Eris <hib at hiberis.nl>
Date: Sat, 27 Mar 2010 14:43:57 +0100
Subject: [PATCH 1/9] Parse PageMode and PageLayout on demand

---
 poppler/Catalog.cc |  109 +++++++++++++++++++++++++++++++++++-----------------
 poppler/Catalog.h  |   11 +++--
 2 files changed, 81 insertions(+), 39 deletions(-)

diff --git a/poppler/Catalog.cc b/poppler/Catalog.cc
index b659180..94ccff2 100644
--- a/poppler/Catalog.cc
+++ b/poppler/Catalog.cc
@@ -23,6 +23,7 @@
 // Copyright (C) 2007 Julien Rebetez <julienr at svn.gnome.org>
 // Copyright (C) 2008 Pino Toscano <pino at kde.org>
 // Copyright (C) 2009 Ilya Gorenbein <igorenbein at finjan.com>
+// Copyright (C) 2010 Hib Eris <hib at hiberis.nl>
 //
 // To see a description of the changes please see the Changelog file that
 // came with your tarball or type make ChangeLog if you are building from git
@@ -71,6 +72,8 @@ Catalog::Catalog(XRef *xrefA) {
   pageLabelInfo = NULL;
   form = NULL;
   optContent = NULL;
+  pageMode = pageModeNull;
+  pageLayout = pageLayoutNull;
 
   xref->getCatalog(&catDict);
   if (!catDict.isDict()) {
@@ -148,41 +151,6 @@ Catalog::Catalog(XRef *xrefA) {
     pageLabelInfo = new PageLabelInfo(&obj, numPages);
   obj.free();
 
-  // read page mode
-  pageMode = pageModeNone;
-  if (catDict.dictLookup("PageMode", &obj)->isName()) {
-    if (obj.isName("UseNone"))
-      pageMode = pageModeNone;
-    else if (obj.isName("UseOutlines"))
-      pageMode = pageModeOutlines;
-    else if (obj.isName("UseThumbs"))
-      pageMode = pageModeThumbs;
-    else if (obj.isName("FullScreen"))
-      pageMode = pageModeFullScreen;
-    else if (obj.isName("UseOC"))
-      pageMode = pageModeOC;
-    else if (obj.isName("UseAttachments"))
-      pageMode = pageModeAttach;
-  }
-  obj.free();
-
-  pageLayout = pageLayoutNone;
-  if (catDict.dictLookup("PageLayout", &obj)->isName()) {
-    if (obj.isName("SinglePage"))
-      pageLayout = pageLayoutSinglePage;
-    if (obj.isName("OneColumn"))
-      pageLayout = pageLayoutOneColumn;
-    if (obj.isName("TwoColumnLeft"))
-      pageLayout = pageLayoutTwoColumnLeft;
-    if (obj.isName("TwoColumnRight"))
-      pageLayout = pageLayoutTwoColumnRight;
-    if (obj.isName("TwoPageLeft"))
-      pageLayout = pageLayoutTwoPageLeft;
-    if (obj.isName("TwoPageRight"))
-      pageLayout = pageLayoutTwoPageRight;
-  }
-  obj.free();
-
   // read base URI
   if (catDict.dictLookup("URI", &obj)->isDict()) {
     if (obj.dictLookup("Base", &obj2)->isString()) {
@@ -473,6 +441,77 @@ GooString *Catalog::getJS(int i)
   return js;
 }
 
+Catalog::PageMode Catalog::getPageMode() {
+
+  if (pageMode == pageModeNull) {
+
+    Object catDict, obj;
+
+    pageMode = pageModeNone;
+
+    xref->getCatalog(&catDict);
+    if (!catDict.isDict()) {
+      error(-1, "Catalog object is wrong type (%s)", catDict.getTypeName());
+      catDict.free();
+      return pageMode;
+    }
+
+    if (catDict.dictLookup("PageMode", &obj)->isName()) {
+      if (obj.isName("UseNone"))
+        pageMode = pageModeNone;
+      else if (obj.isName("UseOutlines"))
+        pageMode = pageModeOutlines;
+      else if (obj.isName("UseThumbs"))
+        pageMode = pageModeThumbs;
+      else if (obj.isName("FullScreen"))
+        pageMode = pageModeFullScreen;
+      else if (obj.isName("UseOC"))
+        pageMode = pageModeOC;
+      else if (obj.isName("UseAttachments"))
+        pageMode = pageModeAttach;
+    }
+    obj.free();
+    catDict.free();
+  }
+  return pageMode;
+}
+
+Catalog::PageLayout Catalog::getPageLayout() {
+
+  if (pageLayout == pageLayoutNull) {
+
+    Object catDict, obj;
+
+    pageLayout = pageLayoutNone;
+
+    xref->getCatalog(&catDict);
+    if (!catDict.isDict()) {
+      error(-1, "Catalog object is wrong type (%s)", catDict.getTypeName());
+      catDict.free();
+      return pageLayout;
+    }
+
+    pageLayout = pageLayoutNone;
+    if (catDict.dictLookup("PageLayout", &obj)->isName()) {
+      if (obj.isName("SinglePage"))
+        pageLayout = pageLayoutSinglePage;
+      if (obj.isName("OneColumn"))
+        pageLayout = pageLayoutOneColumn;
+      if (obj.isName("TwoColumnLeft"))
+        pageLayout = pageLayoutTwoColumnLeft;
+      if (obj.isName("TwoColumnRight"))
+        pageLayout = pageLayoutTwoColumnRight;
+      if (obj.isName("TwoPageLeft"))
+        pageLayout = pageLayoutTwoPageLeft;
+      if (obj.isName("TwoPageRight"))
+        pageLayout = pageLayoutTwoPageRight;
+    }
+    obj.free();
+    catDict.free();
+  }
+  return pageLayout;
+}
+
 NameTree::NameTree()
 {
   size = 0;
diff --git a/poppler/Catalog.h b/poppler/Catalog.h
index f5b389f..5e84679 100644
--- a/poppler/Catalog.h
+++ b/poppler/Catalog.h
@@ -19,6 +19,7 @@
 // Copyright (C) 2005, 2006, 2008 Brad Hards <bradh at frogmouth.net>
 // Copyright (C) 2007 Julien Rebetez <julienr at svn.gnome.org>
 // Copyright (C) 2008 Pino Toscano <pino at kde.org>
+// Copyright (C) 2010 Hib Eris <hib at hiberis.nl>
 //
 // To see a description of the changes please see the Changelog file that
 // came with your tarball or type make ChangeLog if you are building from git
@@ -205,7 +206,8 @@ public:
     pageModeThumbs,
     pageModeFullScreen,
     pageModeOC,
-    pageModeAttach
+    pageModeAttach,
+    pageModeNull
   };
   enum PageLayout {
     pageLayoutNone,
@@ -214,12 +216,13 @@ public:
     pageLayoutTwoColumnLeft,
     pageLayoutTwoColumnRight,
     pageLayoutTwoPageLeft,
-    pageLayoutTwoPageRight
+    pageLayoutTwoPageRight,
+    pageLayoutNull
   };
 
   // Returns the page mode.
-  PageMode getPageMode() { return pageMode; }
-  PageLayout getPageLayout() { return pageLayout; }
+  PageMode getPageMode();
+  PageLayout getPageLayout();
 
 private:
 
-- 
1.6.4.2


From 9adff5602d841a781f3dec5629c7d1e37b073572 Mon Sep 17 00:00:00 2001
From: Hib Eris <hib at hiberis.nl>
Date: Thu, 25 Mar 2010 15:09:58 +0100
Subject: [PATCH 2/9] Parse PageLabelInfo on demand

---
 poppler/Catalog.cc |   37 +++++++++++++++++++++++++++++--------
 poppler/Catalog.h  |    3 +++
 2 files changed, 32 insertions(+), 8 deletions(-)

diff --git a/poppler/Catalog.cc b/poppler/Catalog.cc
index 94ccff2..aa21b45 100644
--- a/poppler/Catalog.cc
+++ b/poppler/Catalog.cc
@@ -147,10 +147,6 @@ Catalog::Catalog(XRef *xrefA) {
   }
   obj.free();
 
-  if (catDict.dictLookup("PageLabels", &obj)->isDict())
-    pageLabelInfo = new PageLabelInfo(&obj, numPages);
-  obj.free();
-
   // read base URI
   if (catDict.dictLookup("URI", &obj)->isDict()) {
     if (obj.dictLookup("Base", &obj2)->isString()) {
@@ -642,8 +638,9 @@ GBool Catalog::labelToIndex(GooString *label, int *index)
 {
   char *end;
 
-  if (pageLabelInfo != NULL) {
-    if (!pageLabelInfo->labelToIndex(label, index))
+  PageLabelInfo *pli = getPageLabelInfo();
+  if (pli != NULL) {
+    if (!pli->labelToIndex(label, index))
       return gFalse;
   } else {
     *index = strtol(label->getCString(), &end, 10) - 1;
@@ -664,8 +661,9 @@ GBool Catalog::indexToLabel(int index, GooString *label)
   if (index < 0 || index >= numPages)
     return gFalse;
 
-  if (pageLabelInfo != NULL) {
-    return pageLabelInfo->indexToLabel(index, label);
+  PageLabelInfo *pli = getPageLabelInfo();
+  if (pli != NULL) {
+    return pli->indexToLabel(index, label);
   } else {
     snprintf(buffer, sizeof (buffer), "%d", index + 1);
     label->append(buffer);	      
@@ -775,3 +773,26 @@ EmbFile::EmbFile(Object *efDict, GooString *description)
   if (!m_mimetype)
     m_mimetype = new GooString();
 }
+
+PageLabelInfo *Catalog::getPageLabelInfo()
+{
+  if (!pageLabelInfo) {
+    Object catDict;
+    Object obj;
+
+    xref->getCatalog(&catDict);
+    if (!catDict.isDict()) {
+      error(-1, "Catalog object is wrong type (%s)", catDict.getTypeName());
+      catDict.free();
+      return NULL;
+    }
+
+    if (catDict.dictLookup("PageLabels", &obj)->isDict()) {
+      pageLabelInfo = new PageLabelInfo(&obj, getNumPages());
+    }
+    obj.free();
+    catDict.free();
+  }
+
+  return pageLabelInfo;
+}
diff --git a/poppler/Catalog.h b/poppler/Catalog.h
index 5e84679..2f7c616 100644
--- a/poppler/Catalog.h
+++ b/poppler/Catalog.h
@@ -226,6 +226,9 @@ public:
 
 private:
 
+  // Get page label info.
+  PageLabelInfo *getPageLabelInfo();
+
   XRef *xref;			// the xref table for this PDF file
   Page **pages;			// array of pages
   Ref *pageRefs;		// object ID for each page
-- 
1.6.4.2


From d5239aa800f9a4e047f670ea8968d28e7ae5dd9d Mon Sep 17 00:00:00 2001
From: Hib Eris <hib at hiberis.nl>
Date: Thu, 25 Mar 2010 14:55:22 +0100
Subject: [PATCH 3/9] Parse Metadata on demand

---
 poppler/Catalog.cc |   16 +++++++++++++---
 1 files changed, 13 insertions(+), 3 deletions(-)

diff --git a/poppler/Catalog.cc b/poppler/Catalog.cc
index aa21b45..d27b6f1 100644
--- a/poppler/Catalog.cc
+++ b/poppler/Catalog.cc
@@ -156,9 +156,6 @@ Catalog::Catalog(XRef *xrefA) {
   }
   obj.free();
 
-  // get the metadata stream
-  catDict.dictLookup("Metadata", &metadata);
-
   // get the structure tree root
   catDict.dictLookup("StructTreeRoot", &structTreeRoot);
 
@@ -224,6 +221,19 @@ GooString *Catalog::readMetadata() {
   Object obj;
   int c;
 
+  if (metadata.isNone()) {
+    Object catDict;
+
+    xref->getCatalog(&catDict);
+    if (catDict.isDict()) {
+      catDict.dictLookup("Metadata", &metadata);
+    } else {
+      error(-1, "Catalog object is wrong type (%s)", catDict.getTypeName());
+      metadata.initNull();
+    }
+    catDict.free();
+  }
+
   if (!metadata.isStream()) {
     return NULL;
   }
-- 
1.6.4.2


From 6f2306a1ae83fe74d0809f4bc36d109bd3e2118d Mon Sep 17 00:00:00 2001
From: Hib Eris <hib at hiberis.nl>
Date: Thu, 25 Mar 2010 15:51:51 +0100
Subject: [PATCH 4/9] Parse StructTreeRoot on demand

---
 poppler/Catalog.cc |   22 +++++++++++++++++++---
 poppler/Catalog.h  |    2 +-
 2 files changed, 20 insertions(+), 4 deletions(-)

diff --git a/poppler/Catalog.cc b/poppler/Catalog.cc
index d27b6f1..bcae737 100644
--- a/poppler/Catalog.cc
+++ b/poppler/Catalog.cc
@@ -156,9 +156,6 @@ Catalog::Catalog(XRef *xrefA) {
   }
   obj.free();
 
-  // get the structure tree root
-  catDict.dictLookup("StructTreeRoot", &structTreeRoot);
-
   // get the outline dictionary
   catDict.dictLookup("Outlines", &outline);
 
@@ -806,3 +803,22 @@ PageLabelInfo *Catalog::getPageLabelInfo()
 
   return pageLabelInfo;
 }
+
+Object *Catalog::getStructTreeRoot()
+{
+  if (structTreeRoot.isNone())
+  {
+     Object catDict;
+
+     xref->getCatalog(&catDict);
+     if (catDict.isDict()) {
+       catDict.dictLookup("StructTreeRoot", &structTreeRoot);
+     } else {
+       error(-1, "Catalog object is wrong type (%s)", catDict.getTypeName());
+       structTreeRoot.initNull();
+     }
+     catDict.free();
+  }
+
+  return &structTreeRoot;
+}
diff --git a/poppler/Catalog.h b/poppler/Catalog.h
index 2f7c616..a55d449 100644
--- a/poppler/Catalog.h
+++ b/poppler/Catalog.h
@@ -164,7 +164,7 @@ public:
   GooString *readMetadata();
 
   // Return the structure tree root object.
-  Object *getStructTreeRoot() { return &structTreeRoot; }
+  Object *getStructTreeRoot();
 
   // Find a page, given its object ID.  Returns page number, or 0 if
   // not found.
-- 
1.6.4.2


From 05ae1c68c1d80c2cb9f86428c3b6f5b60cc56ac5 Mon Sep 17 00:00:00 2001
From: Hib Eris <hib at hiberis.nl>
Date: Thu, 25 Mar 2010 16:05:02 +0100
Subject: [PATCH 5/9] Parse Outline on demand

---
 poppler/Catalog.cc |   23 ++++++++++++++++++++---
 poppler/Catalog.h  |    2 +-
 2 files changed, 21 insertions(+), 4 deletions(-)

diff --git a/poppler/Catalog.cc b/poppler/Catalog.cc
index bcae737..f6a8bef 100644
--- a/poppler/Catalog.cc
+++ b/poppler/Catalog.cc
@@ -156,9 +156,6 @@ Catalog::Catalog(XRef *xrefA) {
   }
   obj.free();
 
-  // get the outline dictionary
-  catDict.dictLookup("Outlines", &outline);
-
   // get the Optional Content dictionary
   if (catDict.dictLookup("OCProperties", &optContentProps)->isDict()) {
     optContent = new OCGs(&optContentProps, xref);
@@ -822,3 +819,23 @@ Object *Catalog::getStructTreeRoot()
 
   return &structTreeRoot;
 }
+
+Object *Catalog::getOutline()
+{
+  if (outline.isNone())
+  {
+     Object catDict;
+
+     xref->getCatalog(&catDict);
+     if (catDict.isDict()) {
+       catDict.dictLookup("Outlines", &outline);
+     } else {
+       error(-1, "Catalog object is wrong type (%s)", catDict.getTypeName());
+       outline.initNull();
+     }
+     catDict.free();
+  }
+
+  return &outline;
+}
+
diff --git a/poppler/Catalog.h b/poppler/Catalog.h
index a55d449..cfae726 100644
--- a/poppler/Catalog.h
+++ b/poppler/Catalog.h
@@ -192,7 +192,7 @@ public:
   GBool labelToIndex(GooString *label, int *index);
   GBool indexToLabel(int index, GooString *label);
 
-  Object *getOutline() { return &outline; }
+  Object *getOutline();
 
   Object *getAcroForm() { return &acroForm; }
 
-- 
1.6.4.2


From cf6f71c0a35a72ecb4a61fe80556e3c5d058fe76 Mon Sep 17 00:00:00 2001
From: Hib Eris <hib at hiberis.nl>
Date: Thu, 25 Mar 2010 16:48:07 +0100
Subject: [PATCH 6/9] Parse Outline on demand in PDFDoc

---
 poppler/PDFDoc.cc |   17 ++++++++++++-----
 poppler/PDFDoc.h  |    2 +-
 2 files changed, 13 insertions(+), 6 deletions(-)

diff --git a/poppler/PDFDoc.cc b/poppler/PDFDoc.cc
index 78b6593..ef8748a 100644
--- a/poppler/PDFDoc.cc
+++ b/poppler/PDFDoc.cc
@@ -226,11 +226,6 @@ GBool PDFDoc::setup(GooString *ownerPassword, GooString *userPassword) {
     return gFalse;
   }
 
-#ifndef DISABLE_OUTLINE
-  // read outline
-  outline = new Outline(catalog->getOutline(), xref);
-#endif
-
   // done
   return gTrue;
 }
@@ -908,6 +903,18 @@ void PDFDoc::writeTrailer (Guint uxrefOffset, int uxrefSize, OutStream* outStr,
   delete trailerDict;
 }
 
+#ifndef DISABLE_OUTLINE
+Outline *PDFDoc::getOutline()
+{
+  if (!outline) {
+    // read outline
+    outline = new Outline(catalog->getOutline(), xref);
+  }
+
+  return outline;
+}
+#endif
+
 PDFDoc *PDFDoc::ErrorPDFDoc(int errorCode, GooString *fileNameA)
 {
   PDFDoc *doc = new PDFDoc();
diff --git a/poppler/PDFDoc.h b/poppler/PDFDoc.h
index 79f6d6d..6d7dea2 100644
--- a/poppler/PDFDoc.h
+++ b/poppler/PDFDoc.h
@@ -170,7 +170,7 @@ public:
 
 #ifndef DISABLE_OUTLINE
   // Return the outline object.
-  Outline *getOutline() { return outline; }
+  Outline *getOutline();
 #endif
 
   // Is the file encrypted?
-- 
1.6.4.2


From 735125f91dbc5be222257d39a901f70575c9f799 Mon Sep 17 00:00:00 2001
From: Hib Eris <hib at hiberis.nl>
Date: Thu, 25 Mar 2010 16:32:22 +0100
Subject: [PATCH 7/9] Parse Dests on demand

---
 poppler/Catalog.cc |   27 +++++++++++++++++++++------
 poppler/Catalog.h  |    2 +-
 2 files changed, 22 insertions(+), 7 deletions(-)

diff --git a/poppler/Catalog.cc b/poppler/Catalog.cc
index f6a8bef..07b1e20 100644
--- a/poppler/Catalog.cc
+++ b/poppler/Catalog.cc
@@ -130,9 +130,6 @@ Catalog::Catalog(XRef *xrefA) {
   }
   pagesDict.free();
 
-  // read named destination dictionary
-  catDict.dictLookup("Dests", &dests);
-
   // read root of named destination tree - PDF1.6 table 3.28
   if (catDict.dictLookup("Names", &obj)->isDict()) {
     obj.dictLookup("Dests", &obj2);
@@ -177,7 +174,6 @@ Catalog::Catalog(XRef *xrefA) {
   pagesDict.free();
  err1:
   catDict.free();
-  dests.initNull();
   ok = gFalse;
 }
 
@@ -343,8 +339,8 @@ LinkDest *Catalog::findDest(GooString *name) {
 
   // try named destination dictionary then name tree
   found = gFalse;
-  if (dests.isDict()) {
-    if (!dests.dictLookup(name->getCString(), &obj1)->isNull())
+  if (getDests()->isDict()) {
+    if (!getDests()->dictLookup(name->getCString(), &obj1)->isNull())
       found = gTrue;
     else
       obj1.free();
@@ -839,3 +835,22 @@ Object *Catalog::getOutline()
   return &outline;
 }
 
+Object *Catalog::getDests()
+{
+  if (dests.isNone())
+  {
+     Object catDict;
+
+     xref->getCatalog(&catDict);
+     if (catDict.isDict()) {
+       catDict.dictLookup("Dests", &dests);
+     } else {
+       error(-1, "Catalog object is wrong type (%s)", catDict.getTypeName());
+       dests.initNull();
+     }
+     catDict.free();
+  }
+
+  return &dests;
+}
+
diff --git a/poppler/Catalog.h b/poppler/Catalog.h
index cfae726..134f1db 100644
--- a/poppler/Catalog.h
+++ b/poppler/Catalog.h
@@ -174,7 +174,7 @@ public:
   // NULL if <name> is not a destination.
   LinkDest *findDest(GooString *name);
 
-  Object *getDests() { return &dests; }
+  Object *getDests();
 
   // Get the number of embedded files
   int numEmbeddedFiles() { return embeddedFileNameTree.numEntries(); }
-- 
1.6.4.2


From f41b8a40f37cb1105695a5d40299c593f6f9e692 Mon Sep 17 00:00:00 2001
From: Hib Eris <hib at hiberis.nl>
Date: Thu, 25 Mar 2010 17:33:11 +0100
Subject: [PATCH 8/9] Parse Names on demand

---
 poppler/Catalog.cc |  112 ++++++++++++++++++++++++++++++++++++++++++----------
 poppler/Catalog.h  |   18 ++++++--
 2 files changed, 104 insertions(+), 26 deletions(-)

diff --git a/poppler/Catalog.cc b/poppler/Catalog.cc
index 07b1e20..4135d79 100644
--- a/poppler/Catalog.cc
+++ b/poppler/Catalog.cc
@@ -74,6 +74,9 @@ Catalog::Catalog(XRef *xrefA) {
   optContent = NULL;
   pageMode = pageModeNull;
   pageLayout = pageLayoutNull;
+  destNameTree = NULL;
+  embeddedFileNameTree = NULL;
+  jsNameTree = NULL;
 
   xref->getCatalog(&catDict);
   if (!catDict.isDict()) {
@@ -130,20 +133,6 @@ Catalog::Catalog(XRef *xrefA) {
   }
   pagesDict.free();
 
-  // read root of named destination tree - PDF1.6 table 3.28
-  if (catDict.dictLookup("Names", &obj)->isDict()) {
-    obj.dictLookup("Dests", &obj2);
-    destNameTree.init(xref, &obj2);
-    obj2.free();
-    obj.dictLookup("EmbeddedFiles", &obj2);
-    embeddedFileNameTree.init(xref, &obj2);
-    obj2.free();
-    obj.dictLookup("JavaScript", &obj2);
-    jsNameTree.init(xref, &obj2);
-    obj2.free();
-  }
-  obj.free();
-
   // read base URI
   if (catDict.dictLookup("URI", &obj)->isDict()) {
     if (obj.dictLookup("Base", &obj2)->isString()) {
@@ -190,9 +179,9 @@ Catalog::~Catalog() {
     gfree(pageRefs);
   }
   dests.free();
-  destNameTree.free();
-  embeddedFileNameTree.free();
-  jsNameTree.free();
+  delete destNameTree;
+  delete embeddedFileNameTree;
+  delete jsNameTree;
   if (baseURI) {
     delete baseURI;
   }
@@ -346,7 +335,7 @@ LinkDest *Catalog::findDest(GooString *name) {
       obj1.free();
   }
   if (!found) {
-    if (destNameTree.lookup(name, &obj1))
+    if (getDestNameTree()->lookup(name, &obj1))
       found = gTrue;
     else
       obj1.free();
@@ -380,10 +369,10 @@ EmbFile *Catalog::embeddedFile(int i)
 {
     Object efDict;
     Object obj;
-    obj = embeddedFileNameTree.getValue(i);
+    obj = getEmbeddedFileNameTree()->getValue(i);
     EmbFile *embeddedFile = 0;
     if (obj.isRef()) {
-        GooString desc(embeddedFileNameTree.getName(i));
+        GooString desc(getEmbeddedFileNameTree()->getName(i));
         embeddedFile = new EmbFile(obj.fetch(xref, &efDict), &desc);
         efDict.free();
     } else {
@@ -395,7 +384,7 @@ EmbFile *Catalog::embeddedFile(int i)
 
 GooString *Catalog::getJS(int i)
 {
-  Object obj = jsNameTree.getValue(i);
+  Object obj = getJSNameTree()->getValue(i);
   if (obj.isRef()) {
     Ref r = obj.getRef();
     obj.free();
@@ -515,6 +504,11 @@ NameTree::NameTree()
   entries = NULL;
 }
 
+NameTree::~NameTree()
+{
+  this->free();
+}
+
 NameTree::Entry::Entry(Array *array, int index) {
     if (!array->getString(index, &name) || !array->getNF(index + 1, &value)) {
       Object aux;
@@ -854,3 +848,79 @@ Object *Catalog::getDests()
   return &dests;
 }
 
+Object *Catalog::getNames()
+{
+  if (names.isNone())
+  {
+     Object catDict;
+
+     xref->getCatalog(&catDict);
+     if (catDict.isDict()) {
+       catDict.dictLookup("Names", &names);
+     } else {
+       error(-1, "Catalog object is wrong type (%s)", catDict.getTypeName());
+       names.initNull();
+     }
+     catDict.free();
+  }
+
+  return &names;
+}
+
+NameTree *Catalog::getDestNameTree()
+{
+  if (!destNameTree) {
+
+    destNameTree = new NameTree();
+
+    if (getNames()->isDict()) {
+       Object obj;
+
+       getNames()->dictLookup("Dests", &obj);
+       destNameTree->init(xref, &obj);
+       obj.free();
+    }
+
+  }
+
+  return destNameTree;
+}
+
+NameTree *Catalog::getEmbeddedFileNameTree()
+{
+  if (!embeddedFileNameTree) {
+
+    embeddedFileNameTree = new NameTree();
+
+    if (getNames()->isDict()) {
+       Object obj;
+
+       getNames()->dictLookup("EmbeddedFiles", &obj);
+       embeddedFileNameTree->init(xref, &obj);
+       obj.free();
+    }
+
+  }
+
+  return embeddedFileNameTree;
+}
+
+NameTree *Catalog::getJSNameTree()
+{
+  if (!jsNameTree) {
+
+    jsNameTree = new NameTree();
+
+    if (getNames()->isDict()) {
+       Object obj;
+
+       getNames()->dictLookup("JavaScript", &obj);
+       jsNameTree->init(xref, &obj);
+       obj.free();
+    }
+
+  }
+
+  return jsNameTree;
+}
+
diff --git a/poppler/Catalog.h b/poppler/Catalog.h
index 134f1db..6021eed 100644
--- a/poppler/Catalog.h
+++ b/poppler/Catalog.h
@@ -50,6 +50,7 @@ class OCGs;
 class NameTree {
 public:
   NameTree();
+  ~NameTree();
   void init(XRef *xref, Object *tree);
   void parse(Object *tree);
   GBool lookup(GooString *name, Object *obj);
@@ -177,13 +178,13 @@ public:
   Object *getDests();
 
   // Get the number of embedded files
-  int numEmbeddedFiles() { return embeddedFileNameTree.numEntries(); }
+  int numEmbeddedFiles() { return getEmbeddedFileNameTree()->numEntries(); }
 
   // Get the i'th file embedded (at the Document level) in the document
   EmbFile *embeddedFile(int i);
 
   // Get the number of javascript scripts
-  int numJS() { return jsNameTree.numEntries(); }
+  int numJS() { return getJSNameTree()->numEntries(); }
 
   // Get the i'th JavaScript script (at the Document level) in the document
   GooString *getJS(int i);
@@ -236,9 +237,10 @@ private:
   int numPages;			// number of pages
   int pagesSize;		// size of pages array
   Object dests;			// named destination dictionary
-  NameTree destNameTree;	// named destination name-tree
-  NameTree embeddedFileNameTree;  // embedded file name-tree
-  NameTree jsNameTree;		// Java Script name-tree
+  Object names;			// named names dictionary
+  NameTree *destNameTree;	// named destination name-tree
+  NameTree *embeddedFileNameTree;  // embedded file name-tree
+  NameTree *jsNameTree;		// Java Script name-tree
   GooString *baseURI;		// base URI for URI-type links
   Object metadata;		// metadata stream
   Object structTreeRoot;	// structure tree root dictionary
@@ -253,6 +255,12 @@ private:
   int readPageTree(Dict *pages, PageAttrs *attrs, int start,
 		   char *alreadyRead);
   Object *findDestInTree(Object *tree, GooString *name, Object *obj);
+
+  Object *getNames();
+  NameTree *getDestNameTree();
+  NameTree *getEmbeddedFileNameTree();
+  NameTree *getJSNameTree();
+
 };
 
 #endif
-- 
1.6.4.2


From 9535240d32caafadc15bfa113b841e57f8972339 Mon Sep 17 00:00:00 2001
From: Hib Eris <hib at hiberis.nl>
Date: Thu, 25 Mar 2010 15:33:33 +0100
Subject: [PATCH 9/9] Parse Form on demand

---
 poppler/Catalog.cc |   21 +++++++++++++--------
 poppler/Catalog.h  |    2 +-
 2 files changed, 14 insertions(+), 9 deletions(-)

diff --git a/poppler/Catalog.cc b/poppler/Catalog.cc
index 4135d79..a0d33cf 100644
--- a/poppler/Catalog.cc
+++ b/poppler/Catalog.cc
@@ -86,12 +86,6 @@ Catalog::Catalog(XRef *xrefA) {
   // get the AcroForm dictionary
   catDict.dictLookup("AcroForm", &acroForm);
 
-  // load Forms
-  if (acroForm.isDict()) {
-    form = new Form(xref,&acroForm);
-  }
-
-
   // read page tree
   catDict.dictLookup("Pages", &pagesDict);
   // This should really be isDict("Pages"), but I've seen at least one
@@ -153,8 +147,8 @@ Catalog::Catalog(XRef *xrefA) {
   optContentProps.free();
 
   // perform form-related loading after all widgets have been loaded
-  if (form) 
-    form->postWidgetsLoad();
+  if (getForm())
+    getForm()->postWidgetsLoad();
 
   catDict.free();
   return;
@@ -848,6 +842,17 @@ Object *Catalog::getDests()
   return &dests;
 }
 
+Form *Catalog::getForm()
+{
+  if (!form) {
+    if (acroForm.isDict()) {
+      form = new Form(xref,&acroForm);
+    }
+  }
+
+  return form;
+}
+
 Object *Catalog::getNames()
 {
   if (names.isNone())
diff --git a/poppler/Catalog.h b/poppler/Catalog.h
index 6021eed..fd1c32e 100644
--- a/poppler/Catalog.h
+++ b/poppler/Catalog.h
@@ -199,7 +199,7 @@ public:
 
   OCGs *getOptContentConfig() { return optContent; }
 
-  Form* getForm() { return form; }
+  Form* getForm();
 
   enum PageMode {
     pageModeNone,
-- 
1.6.4.2


More information about the poppler mailing list