[poppler] 2 commits - poppler/Object.h poppler/StructElement.cc poppler/StructTreeRoot.cc poppler/StructTreeRoot.h utils/pdfinfo.cc
Adrian Johnson
ajohnson at kemper.freedesktop.org
Thu Jan 4 05:14:24 UTC 2018
poppler/Object.h | 8 ++
poppler/StructElement.cc | 2
poppler/StructTreeRoot.cc | 124 ++++++++++++++++++++++++++--------------------
poppler/StructTreeRoot.h | 14 +++--
utils/pdfinfo.cc | 6 --
5 files changed, 91 insertions(+), 63 deletions(-)
New commits:
commit 6233710068e8406cb44741bdc74d1a0e2582e5cf
Author: Adrian Johnson <ajohnson at redneon.com>
Date: Thu Jan 4 15:41:11 2018 +1030
Remove error for wrong child type for tagged pdf
It is harmless and as a few PDFs do this it just adds noise to the output.
Bug #103587
diff --git a/poppler/StructTreeRoot.cc b/poppler/StructTreeRoot.cc
index efcb6fb4..fed05b1b 100644
--- a/poppler/StructTreeRoot.cc
+++ b/poppler/StructTreeRoot.cc
@@ -95,9 +95,6 @@ void StructTreeRoot::parse(Dict *root)
}
}
} else if (kids.isDict()) {
- if (marked) {
- error(errSyntaxWarning, -1, "K has a child of wrong type for a tagged PDF ({0:s})", kids.getTypeName());
- }
StructElement *child = new StructElement(kids.getDict(), this, NULL, seenElements);
if (child->isOk()) {
appendChild(child);
commit 321538259a9c79a99ce846a6ea2d94dd7fa56f61
Author: Adrian Johnson <ajohnson at redneon.com>
Date: Sun Nov 26 20:43:15 2017 +1030
Fix some bugs in StructTreeRoot parsing of parent tree
- Add support for parsing child nodes in the number tree
- Number tree keys do not have to be consecutive numbers. Use
map instead of vector for parentTree.
- Due to performance impact of iterating a map instead of
vector in parentTreeAdd, add a reverse mapping from Ref
to parentTree.
- Add mcid parameter to findParentElement() to enable finding
the parent when there are multiple MCIDs on the same page.
- Move RefCompare from pdfinfo.cc to Object.h so it can be
used by other files.
Bug #103912
diff --git a/poppler/Object.h b/poppler/Object.h
index f2ca20d1..a333e7c8 100644
--- a/poppler/Object.h
+++ b/poppler/Object.h
@@ -86,6 +86,14 @@ struct Ref {
int gen; // generation number
};
+struct RefCompare {
+ bool operator() (const Ref& lhs, const Ref& rhs) const {
+ if (lhs.num != rhs.num)
+ return lhs.num < rhs.num;
+ return lhs.gen < rhs.gen;
+ }
+};
+
//------------------------------------------------------------------------
// object types
//------------------------------------------------------------------------
diff --git a/poppler/StructElement.cc b/poppler/StructElement.cc
index 62925474..e46fafea 100644
--- a/poppler/StructElement.cc
+++ b/poppler/StructElement.cc
@@ -1198,7 +1198,7 @@ StructElement *StructElement::parseChild(Object *ref,
* page will be always scanned when calling StructElement::getText().
*/
Object mcidObj = childObj->dictLookup("MCID");
- if (mcidObj.isInt()) {
+ if (!mcidObj.isInt()) {
error(errSyntaxError, -1, "MCID object is wrong type ({0:s})", mcidObj.getTypeName());
return NULL;
}
diff --git a/poppler/StructTreeRoot.cc b/poppler/StructTreeRoot.cc
index 5f561115..efcb6fb4 100644
--- a/poppler/StructTreeRoot.cc
+++ b/poppler/StructTreeRoot.cc
@@ -55,50 +55,7 @@ void StructTreeRoot::parse(Dict *root)
// be filled-in later when parsing them.
Object obj = root->lookup("ParentTree");
if (obj.isDict()) {
- Object nums = obj.dictLookup("Nums");
- if (nums.isArray()) {
- if (nums.arrayGetLength() % 2 == 0) {
- parentTree.resize(nums.arrayGetLength() / 2);
- // Index numbers in even positions, references in odd ones
- for (int i = 0; i < nums.arrayGetLength(); i += 2) {
- Object index = nums.arrayGet(i);
-
- if (!index.isInt()) {
- error(errSyntaxError, -1, "Nums item at position {0:d} is wrong type ({1:s})", i, index.getTypeName());
- continue;
- }
- const int idx = index.getInt();
- if (idx < 0 || idx >= (int)parentTree.size()) {
- error(errSyntaxError, -1, "Nums item at position {0:d} is invalid value ({1:d}): [0..{2:d}]", i, idx, parentTree.size() - 1);
- continue;
- }
-
- Object value = nums.arrayGetNF(i + 1);
- if (value.isRef()) {
- parentTree[idx].resize(1);
- parentTree[idx][0].ref = value.getRef();
- } else {
- value = nums.arrayGet(i + 1);
- if (value.isArray()) {
- parentTree[idx].resize(value.arrayGetLength());
- for (int j = 0; j < value.arrayGetLength(); j++) {
- Object itemvalue = value.arrayGetNF(j);
- if (itemvalue.isRef())
- parentTree[idx][j].ref = itemvalue.getRef();
- else
- error(errSyntaxError, -1, "Nums array item at position {0:d}/{1:d} is invalid type ({2:s})", i, j, itemvalue.getTypeName());
- }
- } else {
- error(errSyntaxError, -1, "Nums item at position {0:d} is wrong type ({1:s})", i + 1, value.getTypeName());
- }
- }
- }
- } else {
- error(errSyntaxError, -1, "Nums array length is not a even ({0:d})", nums.arrayGetLength());
- }
- } else {
- error(errSyntaxError, -1, "Nums object is wrong type ({0:s})", nums.getTypeName());
- }
+ parseNumberTreeNode(obj.getDict());
}
std::set<int> seenElements;
@@ -154,14 +111,80 @@ void StructTreeRoot::parse(Dict *root)
} else if (!kids.isNull()) {
error(errSyntaxWarning, -1, "K in StructTreeRoot is wrong type ({0:s})", kids.getTypeName());
}
+
+ // refToParentMap is only used during parsing. Ensure all memory used by it is freed.
+ std::multimap<Ref, Parent*, RefCompare>().swap(refToParentMap);
}
-void StructTreeRoot::parentTreeAdd(const Ref &objectRef, StructElement *element)
+void StructTreeRoot::parseNumberTreeNode(Dict *node)
{
- for (std::vector< std::vector<Parent> >::iterator i = parentTree.begin(); i != parentTree.end(); ++i) {
- for (std::vector<Parent>::iterator j = i->begin(); j != i->end(); ++j) {
- if (j->ref.num == objectRef.num && j->ref.gen == objectRef.gen)
- j->element = element;
+ Object kids = node->lookup("Kids");
+ if (kids.isArray()) {
+ for (int i = 0; i < kids.arrayGetLength(); i++) {
+ Object obj = kids.arrayGet(i);
+ if (obj.isDict()) {
+ parseNumberTreeNode(obj.getDict());
+ } else {
+ error(errSyntaxError, -1, "Kids item at position {0:d} is wrong type ({1:s})", i, obj.getTypeName());
+ }
}
+ return;
+ } else if (!kids.isNull()) {
+ error(errSyntaxError, -1, "Kids object is wrong type ({0:s})", kids.getTypeName());
}
+
+ Object nums = node->lookup("Nums");
+ if (nums.isArray()) {
+ if (nums.arrayGetLength() % 2 == 0) {
+ // keys in even positions, references in odd ones
+ for (int i = 0; i < nums.arrayGetLength(); i += 2) {
+ Object key = nums.arrayGet(i);
+
+ if (!key.isInt()) {
+ error(errSyntaxError, -1, "Nums item at position {0:d} is wrong type ({1:s})", i, key.getTypeName());
+ continue;
+ }
+ int keyVal = key.getInt();
+ std::vector<Parent>& vec = parentTree[keyVal];
+
+ Object value = nums.arrayGet(i + 1);
+ if (value.isArray()) {
+ vec.resize(value.arrayGetLength());
+ memset(vec.data(), 0, vec.size()*sizeof(Parent*));
+ for (int j = 0; j < value.arrayGetLength(); j++) {
+ Object itemvalue = value.arrayGetNF(j);
+ if (itemvalue.isRef()) {
+ Ref ref = itemvalue.getRef();
+ vec[j].ref = ref;
+ refToParentMap.insert(std::pair<Ref, Parent*>(ref, &vec[j]));
+ } else if (!itemvalue.isNull()) {
+ error(errSyntaxError, -1, "Nums array item at position {0:d}/{1:d} is invalid type ({2:s})", i, j, itemvalue.getTypeName());
+ }
+ }
+ } else {
+ value = nums.arrayGetNF(i + 1);
+ if (value.isRef()) {
+ Ref ref = value.getRef();
+ vec.resize(1);
+ vec[0].ref = ref;
+ refToParentMap.insert(std::pair<Ref, Parent*>(ref, &vec[0]));
+ } else {
+ error(errSyntaxError, -1, "Nums item at position {0:d} is wrong type ({1:s})", i + 1, value.getTypeName());
+ }
+ }
+ }
+ } else {
+ error(errSyntaxError, -1, "Nums array length is not a even ({0:d})", nums.arrayGetLength());
+ }
+ } else {
+ error(errSyntaxError, -1, "Nums object is wrong type ({0:s})", nums.getTypeName());
+ }
+}
+
+
+void StructTreeRoot::parentTreeAdd(const Ref &objectRef, StructElement *element)
+{
+ auto range = refToParentMap.equal_range(objectRef);
+ for (auto it = range.first; it !=range.second; ++it)
+ it->second->element = element;
}
diff --git a/poppler/StructTreeRoot.h b/poppler/StructTreeRoot.h
index 3b1f3c84..ca688499 100644
--- a/poppler/StructTreeRoot.h
+++ b/poppler/StructTreeRoot.h
@@ -18,6 +18,7 @@
#include "goo/gtypes.h"
#include "Object.h"
#include "StructElement.h"
+#include <map>
#include <vector>
class Dict;
@@ -43,9 +44,12 @@ public:
}
}
- const StructElement *findParentElement(unsigned index) const {
- if (index < parentTree.size() && parentTree[index].size() == 1) {
- return parentTree[index][0].element;
+ const StructElement *findParentElement(int key, unsigned mcid = 0) const {
+ auto it = parentTree.find(key);
+ if (it != parentTree.end()) {
+ if (mcid < it->second.size()) {
+ return it->second[mcid].element;
+ }
}
return NULL;
}
@@ -71,9 +75,11 @@ private:
Object roleMap;
Object classMap;
ElemPtrArray elements;
- std::vector< std::vector<Parent> > parentTree;
+ std::map<int, std::vector<Parent> > parentTree;
+ std::multimap<Ref, Parent*, RefCompare> refToParentMap;
void parse(Dict *rootDict);
+ void parseNumberTreeNode(Dict *node);
void parentTreeAdd(const Ref &objectRef, StructElement *element);
friend class StructElement;
diff --git a/utils/pdfinfo.cc b/utils/pdfinfo.cc
index a3099cf6..4cb569c5 100644
--- a/utils/pdfinfo.cc
+++ b/utils/pdfinfo.cc
@@ -291,12 +291,6 @@ static void printStruct(const StructElement *element, unsigned indent) {
}
}
-struct RefCompare {
- bool operator() (const Ref& lhs, const Ref& rhs) const {
- return lhs.num < rhs.num;
- }
-};
-
struct GooStringCompare {
bool operator() (GooString* lhs, GooString* rhs) const {
return lhs->cmp(const_cast<GooString*>(rhs)) < 0;
More information about the poppler
mailing list