[poppler] [PATCH] Allow properly identifying grouping elements

Adrian Perez de Castro aperez at igalia.com
Tue Feb 4 09:48:04 PST 2014


Grouping elements in the Tagged-PDF structure should be identifiable as
such (see section 14.8.4.2 "Grouping Elements" of the PDF standard). Those
were previously reported as inline elements, which is quite not correct.
This patch introduces a new StructElement::isGrouping() method which
correctly reports grouping elements as such.
---
 poppler/StructElement.cc | 31 +++++++++++++++++++------------
 poppler/StructElement.h  |  1 +
 2 files changed, 20 insertions(+), 12 deletions(-)

diff --git a/poppler/StructElement.cc b/poppler/StructElement.cc
index 6392433..1c5ba28 100644
--- a/poppler/StructElement.cc
+++ b/poppler/StructElement.cc
@@ -506,6 +506,7 @@ static GBool ownerHasMorePriority(Attribute::Owner a, Attribute::Owner b)
 
 enum ElementType {
   elementTypeUndefined,
+  elementTypeGrouping,
   elementTypeInline,
   elementTypeBlock,
 };
@@ -516,16 +517,16 @@ static const struct TypeMapEntry {
   ElementType               elementType;
   const AttributeMapEntry **attributes;
 } typeMap[] = {
-  { StructElement::Document,   "Document",   elementTypeInline,    attributeMapShared       },
-  { StructElement::Part,       "Part",       elementTypeInline,    attributeMapShared       },
-  { StructElement::Art,        "Art",        elementTypeInline,    attributeMapColumns      },
-  { StructElement::Sect,       "Sect",       elementTypeInline,    attributeMapColumns      },
-  { StructElement::Div,        "Div",        elementTypeInline,    attributeMapColumns      },
-  { StructElement::BlockQuote, "BlockQuote", elementTypeInline,    attributeMapInline       },
-  { StructElement::Caption,    "Caption",    elementTypeInline,    attributeMapInline       },
-  { StructElement::NonStruct,  "NonStruct",  elementTypeInline,    attributeMapInline       },
-  { StructElement::Index,      "Index",      elementTypeInline,    attributeMapInline       },
-  { StructElement::Private,    "Private",    elementTypeInline,    attributeMapInline       },
+  { StructElement::Document,   "Document",   elementTypeGrouping,  attributeMapShared       },
+  { StructElement::Part,       "Part",       elementTypeGrouping,  attributeMapShared       },
+  { StructElement::Art,        "Art",        elementTypeGrouping,  attributeMapColumns      },
+  { StructElement::Sect,       "Sect",       elementTypeGrouping,  attributeMapColumns      },
+  { StructElement::Div,        "Div",        elementTypeGrouping,  attributeMapColumns      },
+  { StructElement::BlockQuote, "BlockQuote", elementTypeGrouping,  attributeMapInline       },
+  { StructElement::Caption,    "Caption",    elementTypeGrouping,  attributeMapInline       },
+  { StructElement::NonStruct,  "NonStruct",  elementTypeGrouping,  attributeMapInline       },
+  { StructElement::Index,      "Index",      elementTypeGrouping,  attributeMapInline       },
+  { StructElement::Private,    "Private",    elementTypeGrouping,  attributeMapInline       },
   { StructElement::Span,       "Span",       elementTypeInline,    attributeMapInline       },
   { StructElement::Quote,      "Quote",      elementTypeInline,    attributeMapInline       },
   { StructElement::Note,       "Note",       elementTypeInline,    attributeMapInline       },
@@ -563,8 +564,8 @@ static const struct TypeMapEntry {
   { StructElement::Figure,     "Figure",     elementTypeUndefined, attributeMapIllustration },
   { StructElement::Formula,    "Formula",    elementTypeUndefined, attributeMapIllustration },
   { StructElement::Form,       "Form",       elementTypeUndefined, attributeMapIllustration },
-  { StructElement::TOC,        "TOC",        elementTypeUndefined, attributeMapShared       },
-  { StructElement::TOCI,       "TOCI",       elementTypeUndefined, attributeMapShared       },
+  { StructElement::TOC,        "TOC",        elementTypeGrouping,  attributeMapShared       },
+  { StructElement::TOCI,       "TOCI",       elementTypeGrouping,  attributeMapShared       },
 };
 
 
@@ -913,6 +914,12 @@ GBool StructElement::isInline() const
   return entry ? (entry->elementType == elementTypeInline) : gFalse;
 }
 
+GBool StructElement::isGrouping() const
+{
+  const TypeMapEntry *entry = getTypeMapEntry(type);
+  return entry ? (entry->elementType == elementTypeGrouping) : gFalse;
+}
+
 GBool StructElement::hasPageRef() const
 {
   return pageRef.isRef() || (parent && parent->hasPageRef());
diff --git a/poppler/StructElement.h b/poppler/StructElement.h
index b9eef8a..7de0082 100644
--- a/poppler/StructElement.h
+++ b/poppler/StructElement.h
@@ -153,6 +153,7 @@ public:
   GBool isOk() const { return type != Unknown; }
   GBool isBlock() const;
   GBool isInline() const;
+  GBool isGrouping() const;
 
   inline GBool isContent() const { return (type == MCID) || isObjectRef(); }
   inline GBool isObjectRef() const { return (type == OBJR && c->ref.num != -1 && c->ref.gen != -1); }
-- 
1.8.5.3



More information about the poppler mailing list