[poppler] [PATCH] Allow properly identifying grouping elements
Adrian Perez de Castro
aperez at igalia.com
Tue Feb 4 09:48:04 PST 2014
Grouping elements in the Tagged-PDF structure should be identifiable as
such (see section 14.8.4.2 "Grouping Elements" of the PDF standard). Those
were previously reported as inline elements, which is quite not correct.
This patch introduces a new StructElement::isGrouping() method which
correctly reports grouping elements as such.
---
poppler/StructElement.cc | 31 +++++++++++++++++++------------
poppler/StructElement.h | 1 +
2 files changed, 20 insertions(+), 12 deletions(-)
diff --git a/poppler/StructElement.cc b/poppler/StructElement.cc
index 6392433..1c5ba28 100644
--- a/poppler/StructElement.cc
+++ b/poppler/StructElement.cc
@@ -506,6 +506,7 @@ static GBool ownerHasMorePriority(Attribute::Owner a, Attribute::Owner b)
enum ElementType {
elementTypeUndefined,
+ elementTypeGrouping,
elementTypeInline,
elementTypeBlock,
};
@@ -516,16 +517,16 @@ static const struct TypeMapEntry {
ElementType elementType;
const AttributeMapEntry **attributes;
} typeMap[] = {
- { StructElement::Document, "Document", elementTypeInline, attributeMapShared },
- { StructElement::Part, "Part", elementTypeInline, attributeMapShared },
- { StructElement::Art, "Art", elementTypeInline, attributeMapColumns },
- { StructElement::Sect, "Sect", elementTypeInline, attributeMapColumns },
- { StructElement::Div, "Div", elementTypeInline, attributeMapColumns },
- { StructElement::BlockQuote, "BlockQuote", elementTypeInline, attributeMapInline },
- { StructElement::Caption, "Caption", elementTypeInline, attributeMapInline },
- { StructElement::NonStruct, "NonStruct", elementTypeInline, attributeMapInline },
- { StructElement::Index, "Index", elementTypeInline, attributeMapInline },
- { StructElement::Private, "Private", elementTypeInline, attributeMapInline },
+ { StructElement::Document, "Document", elementTypeGrouping, attributeMapShared },
+ { StructElement::Part, "Part", elementTypeGrouping, attributeMapShared },
+ { StructElement::Art, "Art", elementTypeGrouping, attributeMapColumns },
+ { StructElement::Sect, "Sect", elementTypeGrouping, attributeMapColumns },
+ { StructElement::Div, "Div", elementTypeGrouping, attributeMapColumns },
+ { StructElement::BlockQuote, "BlockQuote", elementTypeGrouping, attributeMapInline },
+ { StructElement::Caption, "Caption", elementTypeGrouping, attributeMapInline },
+ { StructElement::NonStruct, "NonStruct", elementTypeGrouping, attributeMapInline },
+ { StructElement::Index, "Index", elementTypeGrouping, attributeMapInline },
+ { StructElement::Private, "Private", elementTypeGrouping, attributeMapInline },
{ StructElement::Span, "Span", elementTypeInline, attributeMapInline },
{ StructElement::Quote, "Quote", elementTypeInline, attributeMapInline },
{ StructElement::Note, "Note", elementTypeInline, attributeMapInline },
@@ -563,8 +564,8 @@ static const struct TypeMapEntry {
{ StructElement::Figure, "Figure", elementTypeUndefined, attributeMapIllustration },
{ StructElement::Formula, "Formula", elementTypeUndefined, attributeMapIllustration },
{ StructElement::Form, "Form", elementTypeUndefined, attributeMapIllustration },
- { StructElement::TOC, "TOC", elementTypeUndefined, attributeMapShared },
- { StructElement::TOCI, "TOCI", elementTypeUndefined, attributeMapShared },
+ { StructElement::TOC, "TOC", elementTypeGrouping, attributeMapShared },
+ { StructElement::TOCI, "TOCI", elementTypeGrouping, attributeMapShared },
};
@@ -913,6 +914,12 @@ GBool StructElement::isInline() const
return entry ? (entry->elementType == elementTypeInline) : gFalse;
}
+GBool StructElement::isGrouping() const
+{
+ const TypeMapEntry *entry = getTypeMapEntry(type);
+ return entry ? (entry->elementType == elementTypeGrouping) : gFalse;
+}
+
GBool StructElement::hasPageRef() const
{
return pageRef.isRef() || (parent && parent->hasPageRef());
diff --git a/poppler/StructElement.h b/poppler/StructElement.h
index b9eef8a..7de0082 100644
--- a/poppler/StructElement.h
+++ b/poppler/StructElement.h
@@ -153,6 +153,7 @@ public:
GBool isOk() const { return type != Unknown; }
GBool isBlock() const;
GBool isInline() const;
+ GBool isGrouping() const;
inline GBool isContent() const { return (type == MCID) || isObjectRef(); }
inline GBool isObjectRef() const { return (type == OBJR && c->ref.num != -1 && c->ref.gen != -1); }
--
1.8.5.3
More information about the poppler
mailing list