[poppler] Towards support for linearized PDFs
Hib Eris
hib at hiberis.nl
Tue Apr 6 11:21:39 PDT 2010
Hi all,
I have updated the patches taking Albert's comments into account.
On Mon, Apr 5, 2010 at 11:50 PM, Albert Astals Cid <aacid at kde.org> wrote:
> Some comments:
> * You don't initialize pageMode nor pageLayout to their null enum values.
> * I'd like the new enum values to be added to the end to be more saf in case
> anyone was using the int values to save them somewhere
> * You should be freeing catDict
>> The last patch in this series is almost certainly incorrect, but I
>> would like some input on it. It is dealing with form widgets.
>> Question: can this postWidgetsLoad() be done on a per page basis, or
>> is it something global to the whole document?
>
> Probably it could be done per page, if you give a patch i can test all docs in
> my regression suite to see if we get the same behaviour with your patches or
> not.
Okay, for now I have left this last patch out of the new patch set. I
will study the details of annotations and formfields some more to come
up with a better patch.
> Also, can you please enlight us what does this give us towards getting
> linearized support?
The goal of linearized support is to be able to render any page in a
document without downloading any unrelated/unnecessary objects. To
prevent downloading unnecessary objects, the Catalog should only
initialize what is absolutely needed and defer initializing anything
else to a when it is needed.
Linearized support while still initializing objects from all over the
document is just not very useful, so I thought it would be wise to
first do these things.
Hib
-------------- next part --------------
From 104ad5659bf0d8584387db77507e16abe86d1e4f Mon Sep 17 00:00:00 2001
From: Hib Eris <hib at hiberis.nl>
Date: Sat, 27 Mar 2010 14:43:57 +0100
Subject: [PATCH 1/9] Parse PageMode and PageLayout on demand
---
poppler/Catalog.cc | 109 +++++++++++++++++++++++++++++++++++-----------------
poppler/Catalog.h | 11 +++--
2 files changed, 81 insertions(+), 39 deletions(-)
diff --git a/poppler/Catalog.cc b/poppler/Catalog.cc
index b659180..94ccff2 100644
--- a/poppler/Catalog.cc
+++ b/poppler/Catalog.cc
@@ -23,6 +23,7 @@
// Copyright (C) 2007 Julien Rebetez <julienr at svn.gnome.org>
// Copyright (C) 2008 Pino Toscano <pino at kde.org>
// Copyright (C) 2009 Ilya Gorenbein <igorenbein at finjan.com>
+// Copyright (C) 2010 Hib Eris <hib at hiberis.nl>
//
// To see a description of the changes please see the Changelog file that
// came with your tarball or type make ChangeLog if you are building from git
@@ -71,6 +72,8 @@ Catalog::Catalog(XRef *xrefA) {
pageLabelInfo = NULL;
form = NULL;
optContent = NULL;
+ pageMode = pageModeNull;
+ pageLayout = pageLayoutNull;
xref->getCatalog(&catDict);
if (!catDict.isDict()) {
@@ -148,41 +151,6 @@ Catalog::Catalog(XRef *xrefA) {
pageLabelInfo = new PageLabelInfo(&obj, numPages);
obj.free();
- // read page mode
- pageMode = pageModeNone;
- if (catDict.dictLookup("PageMode", &obj)->isName()) {
- if (obj.isName("UseNone"))
- pageMode = pageModeNone;
- else if (obj.isName("UseOutlines"))
- pageMode = pageModeOutlines;
- else if (obj.isName("UseThumbs"))
- pageMode = pageModeThumbs;
- else if (obj.isName("FullScreen"))
- pageMode = pageModeFullScreen;
- else if (obj.isName("UseOC"))
- pageMode = pageModeOC;
- else if (obj.isName("UseAttachments"))
- pageMode = pageModeAttach;
- }
- obj.free();
-
- pageLayout = pageLayoutNone;
- if (catDict.dictLookup("PageLayout", &obj)->isName()) {
- if (obj.isName("SinglePage"))
- pageLayout = pageLayoutSinglePage;
- if (obj.isName("OneColumn"))
- pageLayout = pageLayoutOneColumn;
- if (obj.isName("TwoColumnLeft"))
- pageLayout = pageLayoutTwoColumnLeft;
- if (obj.isName("TwoColumnRight"))
- pageLayout = pageLayoutTwoColumnRight;
- if (obj.isName("TwoPageLeft"))
- pageLayout = pageLayoutTwoPageLeft;
- if (obj.isName("TwoPageRight"))
- pageLayout = pageLayoutTwoPageRight;
- }
- obj.free();
-
// read base URI
if (catDict.dictLookup("URI", &obj)->isDict()) {
if (obj.dictLookup("Base", &obj2)->isString()) {
@@ -473,6 +441,77 @@ GooString *Catalog::getJS(int i)
return js;
}
+Catalog::PageMode Catalog::getPageMode() {
+
+ if (pageMode == pageModeNull) {
+
+ Object catDict, obj;
+
+ pageMode = pageModeNone;
+
+ xref->getCatalog(&catDict);
+ if (!catDict.isDict()) {
+ error(-1, "Catalog object is wrong type (%s)", catDict.getTypeName());
+ catDict.free();
+ return pageMode;
+ }
+
+ if (catDict.dictLookup("PageMode", &obj)->isName()) {
+ if (obj.isName("UseNone"))
+ pageMode = pageModeNone;
+ else if (obj.isName("UseOutlines"))
+ pageMode = pageModeOutlines;
+ else if (obj.isName("UseThumbs"))
+ pageMode = pageModeThumbs;
+ else if (obj.isName("FullScreen"))
+ pageMode = pageModeFullScreen;
+ else if (obj.isName("UseOC"))
+ pageMode = pageModeOC;
+ else if (obj.isName("UseAttachments"))
+ pageMode = pageModeAttach;
+ }
+ obj.free();
+ catDict.free();
+ }
+ return pageMode;
+}
+
+Catalog::PageLayout Catalog::getPageLayout() {
+
+ if (pageLayout == pageLayoutNull) {
+
+ Object catDict, obj;
+
+ pageLayout = pageLayoutNone;
+
+ xref->getCatalog(&catDict);
+ if (!catDict.isDict()) {
+ error(-1, "Catalog object is wrong type (%s)", catDict.getTypeName());
+ catDict.free();
+ return pageLayout;
+ }
+
+ pageLayout = pageLayoutNone;
+ if (catDict.dictLookup("PageLayout", &obj)->isName()) {
+ if (obj.isName("SinglePage"))
+ pageLayout = pageLayoutSinglePage;
+ if (obj.isName("OneColumn"))
+ pageLayout = pageLayoutOneColumn;
+ if (obj.isName("TwoColumnLeft"))
+ pageLayout = pageLayoutTwoColumnLeft;
+ if (obj.isName("TwoColumnRight"))
+ pageLayout = pageLayoutTwoColumnRight;
+ if (obj.isName("TwoPageLeft"))
+ pageLayout = pageLayoutTwoPageLeft;
+ if (obj.isName("TwoPageRight"))
+ pageLayout = pageLayoutTwoPageRight;
+ }
+ obj.free();
+ catDict.free();
+ }
+ return pageLayout;
+}
+
NameTree::NameTree()
{
size = 0;
diff --git a/poppler/Catalog.h b/poppler/Catalog.h
index f5b389f..5e84679 100644
--- a/poppler/Catalog.h
+++ b/poppler/Catalog.h
@@ -19,6 +19,7 @@
// Copyright (C) 2005, 2006, 2008 Brad Hards <bradh at frogmouth.net>
// Copyright (C) 2007 Julien Rebetez <julienr at svn.gnome.org>
// Copyright (C) 2008 Pino Toscano <pino at kde.org>
+// Copyright (C) 2010 Hib Eris <hib at hiberis.nl>
//
// To see a description of the changes please see the Changelog file that
// came with your tarball or type make ChangeLog if you are building from git
@@ -205,7 +206,8 @@ public:
pageModeThumbs,
pageModeFullScreen,
pageModeOC,
- pageModeAttach
+ pageModeAttach,
+ pageModeNull
};
enum PageLayout {
pageLayoutNone,
@@ -214,12 +216,13 @@ public:
pageLayoutTwoColumnLeft,
pageLayoutTwoColumnRight,
pageLayoutTwoPageLeft,
- pageLayoutTwoPageRight
+ pageLayoutTwoPageRight,
+ pageLayoutNull
};
// Returns the page mode.
- PageMode getPageMode() { return pageMode; }
- PageLayout getPageLayout() { return pageLayout; }
+ PageMode getPageMode();
+ PageLayout getPageLayout();
private:
--
1.6.4.2
From 9adff5602d841a781f3dec5629c7d1e37b073572 Mon Sep 17 00:00:00 2001
From: Hib Eris <hib at hiberis.nl>
Date: Thu, 25 Mar 2010 15:09:58 +0100
Subject: [PATCH 2/9] Parse PageLabelInfo on demand
---
poppler/Catalog.cc | 37 +++++++++++++++++++++++++++++--------
poppler/Catalog.h | 3 +++
2 files changed, 32 insertions(+), 8 deletions(-)
diff --git a/poppler/Catalog.cc b/poppler/Catalog.cc
index 94ccff2..aa21b45 100644
--- a/poppler/Catalog.cc
+++ b/poppler/Catalog.cc
@@ -147,10 +147,6 @@ Catalog::Catalog(XRef *xrefA) {
}
obj.free();
- if (catDict.dictLookup("PageLabels", &obj)->isDict())
- pageLabelInfo = new PageLabelInfo(&obj, numPages);
- obj.free();
-
// read base URI
if (catDict.dictLookup("URI", &obj)->isDict()) {
if (obj.dictLookup("Base", &obj2)->isString()) {
@@ -642,8 +638,9 @@ GBool Catalog::labelToIndex(GooString *label, int *index)
{
char *end;
- if (pageLabelInfo != NULL) {
- if (!pageLabelInfo->labelToIndex(label, index))
+ PageLabelInfo *pli = getPageLabelInfo();
+ if (pli != NULL) {
+ if (!pli->labelToIndex(label, index))
return gFalse;
} else {
*index = strtol(label->getCString(), &end, 10) - 1;
@@ -664,8 +661,9 @@ GBool Catalog::indexToLabel(int index, GooString *label)
if (index < 0 || index >= numPages)
return gFalse;
- if (pageLabelInfo != NULL) {
- return pageLabelInfo->indexToLabel(index, label);
+ PageLabelInfo *pli = getPageLabelInfo();
+ if (pli != NULL) {
+ return pli->indexToLabel(index, label);
} else {
snprintf(buffer, sizeof (buffer), "%d", index + 1);
label->append(buffer);
@@ -775,3 +773,26 @@ EmbFile::EmbFile(Object *efDict, GooString *description)
if (!m_mimetype)
m_mimetype = new GooString();
}
+
+PageLabelInfo *Catalog::getPageLabelInfo()
+{
+ if (!pageLabelInfo) {
+ Object catDict;
+ Object obj;
+
+ xref->getCatalog(&catDict);
+ if (!catDict.isDict()) {
+ error(-1, "Catalog object is wrong type (%s)", catDict.getTypeName());
+ catDict.free();
+ return NULL;
+ }
+
+ if (catDict.dictLookup("PageLabels", &obj)->isDict()) {
+ pageLabelInfo = new PageLabelInfo(&obj, getNumPages());
+ }
+ obj.free();
+ catDict.free();
+ }
+
+ return pageLabelInfo;
+}
diff --git a/poppler/Catalog.h b/poppler/Catalog.h
index 5e84679..2f7c616 100644
--- a/poppler/Catalog.h
+++ b/poppler/Catalog.h
@@ -226,6 +226,9 @@ public:
private:
+ // Get page label info.
+ PageLabelInfo *getPageLabelInfo();
+
XRef *xref; // the xref table for this PDF file
Page **pages; // array of pages
Ref *pageRefs; // object ID for each page
--
1.6.4.2
From d5239aa800f9a4e047f670ea8968d28e7ae5dd9d Mon Sep 17 00:00:00 2001
From: Hib Eris <hib at hiberis.nl>
Date: Thu, 25 Mar 2010 14:55:22 +0100
Subject: [PATCH 3/9] Parse Metadata on demand
---
poppler/Catalog.cc | 16 +++++++++++++---
1 files changed, 13 insertions(+), 3 deletions(-)
diff --git a/poppler/Catalog.cc b/poppler/Catalog.cc
index aa21b45..d27b6f1 100644
--- a/poppler/Catalog.cc
+++ b/poppler/Catalog.cc
@@ -156,9 +156,6 @@ Catalog::Catalog(XRef *xrefA) {
}
obj.free();
- // get the metadata stream
- catDict.dictLookup("Metadata", &metadata);
-
// get the structure tree root
catDict.dictLookup("StructTreeRoot", &structTreeRoot);
@@ -224,6 +221,19 @@ GooString *Catalog::readMetadata() {
Object obj;
int c;
+ if (metadata.isNone()) {
+ Object catDict;
+
+ xref->getCatalog(&catDict);
+ if (catDict.isDict()) {
+ catDict.dictLookup("Metadata", &metadata);
+ } else {
+ error(-1, "Catalog object is wrong type (%s)", catDict.getTypeName());
+ metadata.initNull();
+ }
+ catDict.free();
+ }
+
if (!metadata.isStream()) {
return NULL;
}
--
1.6.4.2
From 6f2306a1ae83fe74d0809f4bc36d109bd3e2118d Mon Sep 17 00:00:00 2001
From: Hib Eris <hib at hiberis.nl>
Date: Thu, 25 Mar 2010 15:51:51 +0100
Subject: [PATCH 4/9] Parse StructTreeRoot on demand
---
poppler/Catalog.cc | 22 +++++++++++++++++++---
poppler/Catalog.h | 2 +-
2 files changed, 20 insertions(+), 4 deletions(-)
diff --git a/poppler/Catalog.cc b/poppler/Catalog.cc
index d27b6f1..bcae737 100644
--- a/poppler/Catalog.cc
+++ b/poppler/Catalog.cc
@@ -156,9 +156,6 @@ Catalog::Catalog(XRef *xrefA) {
}
obj.free();
- // get the structure tree root
- catDict.dictLookup("StructTreeRoot", &structTreeRoot);
-
// get the outline dictionary
catDict.dictLookup("Outlines", &outline);
@@ -806,3 +803,22 @@ PageLabelInfo *Catalog::getPageLabelInfo()
return pageLabelInfo;
}
+
+Object *Catalog::getStructTreeRoot()
+{
+ if (structTreeRoot.isNone())
+ {
+ Object catDict;
+
+ xref->getCatalog(&catDict);
+ if (catDict.isDict()) {
+ catDict.dictLookup("StructTreeRoot", &structTreeRoot);
+ } else {
+ error(-1, "Catalog object is wrong type (%s)", catDict.getTypeName());
+ structTreeRoot.initNull();
+ }
+ catDict.free();
+ }
+
+ return &structTreeRoot;
+}
diff --git a/poppler/Catalog.h b/poppler/Catalog.h
index 2f7c616..a55d449 100644
--- a/poppler/Catalog.h
+++ b/poppler/Catalog.h
@@ -164,7 +164,7 @@ public:
GooString *readMetadata();
// Return the structure tree root object.
- Object *getStructTreeRoot() { return &structTreeRoot; }
+ Object *getStructTreeRoot();
// Find a page, given its object ID. Returns page number, or 0 if
// not found.
--
1.6.4.2
From 05ae1c68c1d80c2cb9f86428c3b6f5b60cc56ac5 Mon Sep 17 00:00:00 2001
From: Hib Eris <hib at hiberis.nl>
Date: Thu, 25 Mar 2010 16:05:02 +0100
Subject: [PATCH 5/9] Parse Outline on demand
---
poppler/Catalog.cc | 23 ++++++++++++++++++++---
poppler/Catalog.h | 2 +-
2 files changed, 21 insertions(+), 4 deletions(-)
diff --git a/poppler/Catalog.cc b/poppler/Catalog.cc
index bcae737..f6a8bef 100644
--- a/poppler/Catalog.cc
+++ b/poppler/Catalog.cc
@@ -156,9 +156,6 @@ Catalog::Catalog(XRef *xrefA) {
}
obj.free();
- // get the outline dictionary
- catDict.dictLookup("Outlines", &outline);
-
// get the Optional Content dictionary
if (catDict.dictLookup("OCProperties", &optContentProps)->isDict()) {
optContent = new OCGs(&optContentProps, xref);
@@ -822,3 +819,23 @@ Object *Catalog::getStructTreeRoot()
return &structTreeRoot;
}
+
+Object *Catalog::getOutline()
+{
+ if (outline.isNone())
+ {
+ Object catDict;
+
+ xref->getCatalog(&catDict);
+ if (catDict.isDict()) {
+ catDict.dictLookup("Outlines", &outline);
+ } else {
+ error(-1, "Catalog object is wrong type (%s)", catDict.getTypeName());
+ outline.initNull();
+ }
+ catDict.free();
+ }
+
+ return &outline;
+}
+
diff --git a/poppler/Catalog.h b/poppler/Catalog.h
index a55d449..cfae726 100644
--- a/poppler/Catalog.h
+++ b/poppler/Catalog.h
@@ -192,7 +192,7 @@ public:
GBool labelToIndex(GooString *label, int *index);
GBool indexToLabel(int index, GooString *label);
- Object *getOutline() { return &outline; }
+ Object *getOutline();
Object *getAcroForm() { return &acroForm; }
--
1.6.4.2
From cf6f71c0a35a72ecb4a61fe80556e3c5d058fe76 Mon Sep 17 00:00:00 2001
From: Hib Eris <hib at hiberis.nl>
Date: Thu, 25 Mar 2010 16:48:07 +0100
Subject: [PATCH 6/9] Parse Outline on demand in PDFDoc
---
poppler/PDFDoc.cc | 17 ++++++++++++-----
poppler/PDFDoc.h | 2 +-
2 files changed, 13 insertions(+), 6 deletions(-)
diff --git a/poppler/PDFDoc.cc b/poppler/PDFDoc.cc
index 78b6593..ef8748a 100644
--- a/poppler/PDFDoc.cc
+++ b/poppler/PDFDoc.cc
@@ -226,11 +226,6 @@ GBool PDFDoc::setup(GooString *ownerPassword, GooString *userPassword) {
return gFalse;
}
-#ifndef DISABLE_OUTLINE
- // read outline
- outline = new Outline(catalog->getOutline(), xref);
-#endif
-
// done
return gTrue;
}
@@ -908,6 +903,18 @@ void PDFDoc::writeTrailer (Guint uxrefOffset, int uxrefSize, OutStream* outStr,
delete trailerDict;
}
+#ifndef DISABLE_OUTLINE
+Outline *PDFDoc::getOutline()
+{
+ if (!outline) {
+ // read outline
+ outline = new Outline(catalog->getOutline(), xref);
+ }
+
+ return outline;
+}
+#endif
+
PDFDoc *PDFDoc::ErrorPDFDoc(int errorCode, GooString *fileNameA)
{
PDFDoc *doc = new PDFDoc();
diff --git a/poppler/PDFDoc.h b/poppler/PDFDoc.h
index 79f6d6d..6d7dea2 100644
--- a/poppler/PDFDoc.h
+++ b/poppler/PDFDoc.h
@@ -170,7 +170,7 @@ public:
#ifndef DISABLE_OUTLINE
// Return the outline object.
- Outline *getOutline() { return outline; }
+ Outline *getOutline();
#endif
// Is the file encrypted?
--
1.6.4.2
From 735125f91dbc5be222257d39a901f70575c9f799 Mon Sep 17 00:00:00 2001
From: Hib Eris <hib at hiberis.nl>
Date: Thu, 25 Mar 2010 16:32:22 +0100
Subject: [PATCH 7/9] Parse Dests on demand
---
poppler/Catalog.cc | 27 +++++++++++++++++++++------
poppler/Catalog.h | 2 +-
2 files changed, 22 insertions(+), 7 deletions(-)
diff --git a/poppler/Catalog.cc b/poppler/Catalog.cc
index f6a8bef..07b1e20 100644
--- a/poppler/Catalog.cc
+++ b/poppler/Catalog.cc
@@ -130,9 +130,6 @@ Catalog::Catalog(XRef *xrefA) {
}
pagesDict.free();
- // read named destination dictionary
- catDict.dictLookup("Dests", &dests);
-
// read root of named destination tree - PDF1.6 table 3.28
if (catDict.dictLookup("Names", &obj)->isDict()) {
obj.dictLookup("Dests", &obj2);
@@ -177,7 +174,6 @@ Catalog::Catalog(XRef *xrefA) {
pagesDict.free();
err1:
catDict.free();
- dests.initNull();
ok = gFalse;
}
@@ -343,8 +339,8 @@ LinkDest *Catalog::findDest(GooString *name) {
// try named destination dictionary then name tree
found = gFalse;
- if (dests.isDict()) {
- if (!dests.dictLookup(name->getCString(), &obj1)->isNull())
+ if (getDests()->isDict()) {
+ if (!getDests()->dictLookup(name->getCString(), &obj1)->isNull())
found = gTrue;
else
obj1.free();
@@ -839,3 +835,22 @@ Object *Catalog::getOutline()
return &outline;
}
+Object *Catalog::getDests()
+{
+ if (dests.isNone())
+ {
+ Object catDict;
+
+ xref->getCatalog(&catDict);
+ if (catDict.isDict()) {
+ catDict.dictLookup("Dests", &dests);
+ } else {
+ error(-1, "Catalog object is wrong type (%s)", catDict.getTypeName());
+ dests.initNull();
+ }
+ catDict.free();
+ }
+
+ return &dests;
+}
+
diff --git a/poppler/Catalog.h b/poppler/Catalog.h
index cfae726..134f1db 100644
--- a/poppler/Catalog.h
+++ b/poppler/Catalog.h
@@ -174,7 +174,7 @@ public:
// NULL if <name> is not a destination.
LinkDest *findDest(GooString *name);
- Object *getDests() { return &dests; }
+ Object *getDests();
// Get the number of embedded files
int numEmbeddedFiles() { return embeddedFileNameTree.numEntries(); }
--
1.6.4.2
From f41b8a40f37cb1105695a5d40299c593f6f9e692 Mon Sep 17 00:00:00 2001
From: Hib Eris <hib at hiberis.nl>
Date: Thu, 25 Mar 2010 17:33:11 +0100
Subject: [PATCH 8/9] Parse Names on demand
---
poppler/Catalog.cc | 112 ++++++++++++++++++++++++++++++++++++++++++----------
poppler/Catalog.h | 18 ++++++--
2 files changed, 104 insertions(+), 26 deletions(-)
diff --git a/poppler/Catalog.cc b/poppler/Catalog.cc
index 07b1e20..4135d79 100644
--- a/poppler/Catalog.cc
+++ b/poppler/Catalog.cc
@@ -74,6 +74,9 @@ Catalog::Catalog(XRef *xrefA) {
optContent = NULL;
pageMode = pageModeNull;
pageLayout = pageLayoutNull;
+ destNameTree = NULL;
+ embeddedFileNameTree = NULL;
+ jsNameTree = NULL;
xref->getCatalog(&catDict);
if (!catDict.isDict()) {
@@ -130,20 +133,6 @@ Catalog::Catalog(XRef *xrefA) {
}
pagesDict.free();
- // read root of named destination tree - PDF1.6 table 3.28
- if (catDict.dictLookup("Names", &obj)->isDict()) {
- obj.dictLookup("Dests", &obj2);
- destNameTree.init(xref, &obj2);
- obj2.free();
- obj.dictLookup("EmbeddedFiles", &obj2);
- embeddedFileNameTree.init(xref, &obj2);
- obj2.free();
- obj.dictLookup("JavaScript", &obj2);
- jsNameTree.init(xref, &obj2);
- obj2.free();
- }
- obj.free();
-
// read base URI
if (catDict.dictLookup("URI", &obj)->isDict()) {
if (obj.dictLookup("Base", &obj2)->isString()) {
@@ -190,9 +179,9 @@ Catalog::~Catalog() {
gfree(pageRefs);
}
dests.free();
- destNameTree.free();
- embeddedFileNameTree.free();
- jsNameTree.free();
+ delete destNameTree;
+ delete embeddedFileNameTree;
+ delete jsNameTree;
if (baseURI) {
delete baseURI;
}
@@ -346,7 +335,7 @@ LinkDest *Catalog::findDest(GooString *name) {
obj1.free();
}
if (!found) {
- if (destNameTree.lookup(name, &obj1))
+ if (getDestNameTree()->lookup(name, &obj1))
found = gTrue;
else
obj1.free();
@@ -380,10 +369,10 @@ EmbFile *Catalog::embeddedFile(int i)
{
Object efDict;
Object obj;
- obj = embeddedFileNameTree.getValue(i);
+ obj = getEmbeddedFileNameTree()->getValue(i);
EmbFile *embeddedFile = 0;
if (obj.isRef()) {
- GooString desc(embeddedFileNameTree.getName(i));
+ GooString desc(getEmbeddedFileNameTree()->getName(i));
embeddedFile = new EmbFile(obj.fetch(xref, &efDict), &desc);
efDict.free();
} else {
@@ -395,7 +384,7 @@ EmbFile *Catalog::embeddedFile(int i)
GooString *Catalog::getJS(int i)
{
- Object obj = jsNameTree.getValue(i);
+ Object obj = getJSNameTree()->getValue(i);
if (obj.isRef()) {
Ref r = obj.getRef();
obj.free();
@@ -515,6 +504,11 @@ NameTree::NameTree()
entries = NULL;
}
+NameTree::~NameTree()
+{
+ this->free();
+}
+
NameTree::Entry::Entry(Array *array, int index) {
if (!array->getString(index, &name) || !array->getNF(index + 1, &value)) {
Object aux;
@@ -854,3 +848,79 @@ Object *Catalog::getDests()
return &dests;
}
+Object *Catalog::getNames()
+{
+ if (names.isNone())
+ {
+ Object catDict;
+
+ xref->getCatalog(&catDict);
+ if (catDict.isDict()) {
+ catDict.dictLookup("Names", &names);
+ } else {
+ error(-1, "Catalog object is wrong type (%s)", catDict.getTypeName());
+ names.initNull();
+ }
+ catDict.free();
+ }
+
+ return &names;
+}
+
+NameTree *Catalog::getDestNameTree()
+{
+ if (!destNameTree) {
+
+ destNameTree = new NameTree();
+
+ if (getNames()->isDict()) {
+ Object obj;
+
+ getNames()->dictLookup("Dests", &obj);
+ destNameTree->init(xref, &obj);
+ obj.free();
+ }
+
+ }
+
+ return destNameTree;
+}
+
+NameTree *Catalog::getEmbeddedFileNameTree()
+{
+ if (!embeddedFileNameTree) {
+
+ embeddedFileNameTree = new NameTree();
+
+ if (getNames()->isDict()) {
+ Object obj;
+
+ getNames()->dictLookup("EmbeddedFiles", &obj);
+ embeddedFileNameTree->init(xref, &obj);
+ obj.free();
+ }
+
+ }
+
+ return embeddedFileNameTree;
+}
+
+NameTree *Catalog::getJSNameTree()
+{
+ if (!jsNameTree) {
+
+ jsNameTree = new NameTree();
+
+ if (getNames()->isDict()) {
+ Object obj;
+
+ getNames()->dictLookup("JavaScript", &obj);
+ jsNameTree->init(xref, &obj);
+ obj.free();
+ }
+
+ }
+
+ return jsNameTree;
+}
+
diff --git a/poppler/Catalog.h b/poppler/Catalog.h
index 134f1db..6021eed 100644
--- a/poppler/Catalog.h
+++ b/poppler/Catalog.h
@@ -50,6 +50,7 @@ class OCGs;
class NameTree {
public:
NameTree();
+ ~NameTree();
void init(XRef *xref, Object *tree);
void parse(Object *tree);
GBool lookup(GooString *name, Object *obj);
@@ -177,13 +178,13 @@ public:
Object *getDests();
// Get the number of embedded files
- int numEmbeddedFiles() { return embeddedFileNameTree.numEntries(); }
+ int numEmbeddedFiles() { return getEmbeddedFileNameTree()->numEntries(); }
// Get the i'th file embedded (at the Document level) in the document
EmbFile *embeddedFile(int i);
// Get the number of javascript scripts
- int numJS() { return jsNameTree.numEntries(); }
+ int numJS() { return getJSNameTree()->numEntries(); }
// Get the i'th JavaScript script (at the Document level) in the document
GooString *getJS(int i);
@@ -236,9 +237,10 @@ private:
int numPages; // number of pages
int pagesSize; // size of pages array
Object dests; // named destination dictionary
- NameTree destNameTree; // named destination name-tree
- NameTree embeddedFileNameTree; // embedded file name-tree
- NameTree jsNameTree; // Java Script name-tree
+ Object names; // named names dictionary
+ NameTree *destNameTree; // named destination name-tree
+ NameTree *embeddedFileNameTree; // embedded file name-tree
+ NameTree *jsNameTree; // Java Script name-tree
GooString *baseURI; // base URI for URI-type links
Object metadata; // metadata stream
Object structTreeRoot; // structure tree root dictionary
@@ -253,6 +255,12 @@ private:
int readPageTree(Dict *pages, PageAttrs *attrs, int start,
char *alreadyRead);
Object *findDestInTree(Object *tree, GooString *name, Object *obj);
+
+ Object *getNames();
+ NameTree *getDestNameTree();
+ NameTree *getEmbeddedFileNameTree();
+ NameTree *getJSNameTree();
+
};
#endif
--
1.6.4.2
From 9535240d32caafadc15bfa113b841e57f8972339 Mon Sep 17 00:00:00 2001
From: Hib Eris <hib at hiberis.nl>
Date: Thu, 25 Mar 2010 15:33:33 +0100
Subject: [PATCH 9/9] Parse Form on demand
---
poppler/Catalog.cc | 21 +++++++++++++--------
poppler/Catalog.h | 2 +-
2 files changed, 14 insertions(+), 9 deletions(-)
diff --git a/poppler/Catalog.cc b/poppler/Catalog.cc
index 4135d79..a0d33cf 100644
--- a/poppler/Catalog.cc
+++ b/poppler/Catalog.cc
@@ -86,12 +86,6 @@ Catalog::Catalog(XRef *xrefA) {
// get the AcroForm dictionary
catDict.dictLookup("AcroForm", &acroForm);
- // load Forms
- if (acroForm.isDict()) {
- form = new Form(xref,&acroForm);
- }
-
-
// read page tree
catDict.dictLookup("Pages", &pagesDict);
// This should really be isDict("Pages"), but I've seen at least one
@@ -153,8 +147,8 @@ Catalog::Catalog(XRef *xrefA) {
optContentProps.free();
// perform form-related loading after all widgets have been loaded
- if (form)
- form->postWidgetsLoad();
+ if (getForm())
+ getForm()->postWidgetsLoad();
catDict.free();
return;
@@ -848,6 +842,17 @@ Object *Catalog::getDests()
return &dests;
}
+Form *Catalog::getForm()
+{
+ if (!form) {
+ if (acroForm.isDict()) {
+ form = new Form(xref,&acroForm);
+ }
+ }
+
+ return form;
+}
+
Object *Catalog::getNames()
{
if (names.isNone())
diff --git a/poppler/Catalog.h b/poppler/Catalog.h
index 6021eed..fd1c32e 100644
--- a/poppler/Catalog.h
+++ b/poppler/Catalog.h
@@ -199,7 +199,7 @@ public:
OCGs *getOptContentConfig() { return optContent; }
- Form* getForm() { return form; }
+ Form* getForm();
enum PageMode {
pageModeNone,
--
1.6.4.2
More information about the poppler
mailing list