[poppler] [PATCH 2/6] Tagged-PDF: Interpret the document structure
Adrian Perez
aperez at igalia.com
Wed May 29 16:47:27 PDT 2013
From: Adrian Perez de Castro <aperez at igalia.com>
Picking from StructTreeRoot, recursively creates a tree of StructTreeNode
objects representing the structure of the document. The biggest missing
things are:
- Presenting more information in StructTreeNode, being the most
important reading the structure and attribute dictionaries for
the elements in the tree.
- Resolving marked-content identifiers which refer to information
stored in page object streams.
- Creating a synthetic tree when the PDF is not tagged to use as
fall-back.
---
poppler/Catalog.cc | 36 +-
poppler/Catalog.h | 5 +-
poppler/MCOutputDev.cc | 145 +++++
poppler/MCOutputDev.h | 108 ++++
poppler/Makefile.am | 6 +
poppler/PDFDoc.h | 3 +-
poppler/StructElement.cc | 1361 +++++++++++++++++++++++++++++++++++++++++++++
poppler/StructElement.h | 273 +++++++++
poppler/StructTreeRoot.cc | 120 ++++
poppler/StructTreeRoot.h | 56 ++
10 files changed, 2095 insertions(+), 18 deletions(-)
create mode 100644 poppler/MCOutputDev.cc
create mode 100644 poppler/MCOutputDev.h
create mode 100644 poppler/StructElement.cc
create mode 100644 poppler/StructElement.h
create mode 100644 poppler/StructTreeRoot.cc
create mode 100644 poppler/StructTreeRoot.h
diff --git a/poppler/Catalog.cc b/poppler/Catalog.cc
index c365e06..f24f8a3 100644
--- a/poppler/Catalog.cc
+++ b/poppler/Catalog.cc
@@ -55,6 +55,7 @@
#include "OptionalContent.h"
#include "ViewerPreferences.h"
#include "FileSpec.h"
+#include "StructTreeRoot.h"
#if MULTITHREADED
# define catalogLocker() MutexLocker locker(&mutex)
@@ -90,6 +91,7 @@ Catalog::Catalog(PDFDoc *docA) {
embeddedFileNameTree = NULL;
jsNameTree = NULL;
viewerPrefs = NULL;
+ structTreeRoot = NULL;
pagesList = NULL;
pagesRefList = NULL;
@@ -175,8 +177,8 @@ Catalog::~Catalog() {
delete form;
delete optContent;
delete viewerPrefs;
+ delete structTreeRoot;
metadata.free();
- structTreeRoot.free();
outline.free();
acroForm.free();
viewerPreferences.free();
@@ -837,24 +839,28 @@ PageLabelInfo *Catalog::getPageLabelInfo()
return pageLabelInfo;
}
-Object *Catalog::getStructTreeRoot()
+StructTreeRoot *Catalog::getStructTreeRoot()
{
catalogLocker();
- if (structTreeRoot.isNone())
- {
- Object catDict;
+ if (!structTreeRoot) {
+ Object catalog;
+ Object root;
- xref->getCatalog(&catDict);
- if (catDict.isDict()) {
- catDict.dictLookup("StructTreeRoot", &structTreeRoot);
- } else {
- error(errSyntaxError, -1, "Catalog object is wrong type ({0:s})", catDict.getTypeName());
- structTreeRoot.initNull();
- }
- catDict.free();
+ xref->getCatalog(&catalog);
+ if (!catalog.isDict()) {
+ error(errSyntaxError, -1, "Catalog object is wrong type ({0:s})", catalog.getTypeName());
+ catalog.free();
+ return NULL;
+ }
+
+ if (catalog.dictLookup("StructTreeRoot", &root)->isDict("StructTreeRoot")) {
+ structTreeRoot = new StructTreeRoot(doc, root.getDict(), getMarkInfo() & markInfoMarked);
+ }
+
+ root.free();
+ catalog.free();
}
-
- return &structTreeRoot;
+ return structTreeRoot;
}
Guint Catalog::getMarkInfo()
diff --git a/poppler/Catalog.h b/poppler/Catalog.h
index 35b4f87..bdba3ce 100644
--- a/poppler/Catalog.h
+++ b/poppler/Catalog.h
@@ -53,6 +53,7 @@ class Form;
class OCGs;
class ViewerPreferences;
class FileSpec;
+class StructTreeRoot;
//------------------------------------------------------------------------
// NameTree
@@ -123,7 +124,7 @@ public:
GooString *readMetadata();
// Return the structure tree root object.
- Object *getStructTreeRoot();
+ StructTreeRoot* getStructTreeRoot();
// Return values from the MarkInfo dictionary as flags in a bitfield.
enum MarkInfoFlags {
@@ -227,8 +228,8 @@ private:
NameTree *jsNameTree; // Java Script name-tree
GooString *baseURI; // base URI for URI-type links
Object metadata; // metadata stream
- Object structTreeRoot; // structure tree root dictionary
int markInfo; // Flags from MarkInfo dictionary
+ StructTreeRoot *structTreeRoot; // structure tree root
Object outline; // outline dictionary
Object acroForm; // AcroForm dictionary
Object viewerPreferences; // ViewerPreference dictionary
diff --git a/poppler/MCOutputDev.cc b/poppler/MCOutputDev.cc
new file mode 100644
index 0000000..e593c78
--- /dev/null
+++ b/poppler/MCOutputDev.cc
@@ -0,0 +1,145 @@
+//========================================================================
+//
+// MCOutputDev.cc
+//
+// Copyright 2013 Igalia S.L.
+//
+//========================================================================
+
+#include "MCOutputDev.h"
+#include "GfxFont.h"
+#include "GfxState.h"
+#include "Annot.h"
+#include "Link.h"
+#include <vector>
+
+struct MCOutputDev::Priv
+{
+ MCOpArray commands;
+ bool capturing;
+ int mcid;
+ GfxFont *lastFont;
+ Guint lastFlags;
+ double pageWidth;
+ double pageHeight;
+
+ Priv(int mcidA):
+ commands(),
+ capturing(false),
+ mcid(mcidA),
+ lastFont(0),
+ lastFlags(0),
+ pageWidth(0.0),
+ pageHeight(0.0)
+ {}
+};
+
+
+MCOutputDev::MCOutputDev(int mcid):
+ p(new Priv(mcid))
+{
+}
+
+
+MCOutputDev::~MCOutputDev()
+{
+ delete p;
+}
+
+
+void MCOutputDev::startPage(int pageNum, GfxState *state, XRef *xref)
+{
+ if (state) {
+ p->pageWidth = state->getPageWidth();
+ p->pageHeight = state->getPageHeight();
+ } else {
+ p->pageWidth = p->pageHeight = 0.0;
+ }
+}
+
+
+void MCOutputDev::endPage()
+{
+ p->pageWidth = p->pageHeight = 0.0;
+}
+
+
+void MCOutputDev::beginMarkedContent(char *name, Dict *properties)
+{
+ int id = -1;
+ if (properties && properties->lookupInt("MCID", NULL, &id) && id == p->mcid)
+ p->capturing = true;
+}
+
+
+void MCOutputDev::endMarkedContent(GfxState *state)
+{
+ p->capturing = false;
+}
+
+
+void MCOutputDev::drawChar(GfxState *state,
+ double xx, double yy,
+ double dx, double dy,
+ double ox, double oy,
+ CharCode c, int nBytes,
+ Unicode *u, int uLen)
+{
+ if (!p->capturing || !uLen)
+ return;
+
+ double sp, dx2, dy2, w1, h1, x1, y1;
+
+ // Subtract char and word spacing from the (dx,dy) values
+ sp = state->getCharSpace();
+ if (c == (CharCode) 0x20)
+ sp += state->getWordSpace();
+ state->textTransformDelta(sp * state->getHorizScaling(), 0, &dx2, &dy2);
+ dx -= dx2;
+ dy -= dy2;
+ state->transformDelta(dx, dy, &w1, &h1);
+ state->transform(xx, yy, &x1, &y1);
+
+ // Throw away characters that are not inside the page boundaries.
+ if (x1 + w1 < 0 || x1 > p->pageWidth || y1 + h1 < 0 || y1 > p->pageHeight)
+ return;
+
+ // Make a sanity check on character size. Note: (x != x) <-> isnan(x)
+ if (x1 != x1 || y1 != y1 || w1 != w1 || h1 != h1)
+ return;
+
+ for (int i = 0; i < uLen; i++)
+ p->commands.push_back(MCOp(u[i]));
+}
+
+
+void MCOutputDev::updateFont(GfxState *state)
+{
+ GfxFont *font = state->getFont();
+ if (!font || font == p->lastFont) return;
+
+ if (!p->lastFont || (p->lastFont->getFamily() && p->lastFont->getFamily()->cmp(font->getFamily()))) {
+ if (p->capturing && font->getFamily())
+ p->commands.push_back(MCOp(mcOpFontName, font->getFamily()->getCString()));
+ if (p->lastFont) p->lastFont->decRefCnt();
+ p->lastFont = font;
+ font->incRefCnt();
+ }
+
+ Guint flags = 0;
+ if (font->isBold()) flags |= mcOpFlagFontBold;
+ if (font->isItalic()) flags |= mcOpFlagFontItalic;
+ if (font->isFixedWidth()) flags |= mcOpFlagFontItalic;
+
+ if (p->lastFlags != flags) {
+ if (p->capturing)
+ p->commands.push_back(MCOp(mcOpFlags, flags));
+ p->lastFlags = flags;
+ }
+}
+
+
+const MCOpArray& MCOutputDev::getMCOps() const
+{
+ return p->commands;
+}
diff --git a/poppler/MCOutputDev.h b/poppler/MCOutputDev.h
new file mode 100644
index 0000000..cd7c4f5
--- /dev/null
+++ b/poppler/MCOutputDev.h
@@ -0,0 +1,108 @@
+//========================================================================
+//
+// MCOutputDev.h
+//
+// Copyright 2013 Igalia S.L.
+//
+//========================================================================
+
+#ifndef MCDOUTPUTDEV_H
+#define MCDOUTPUTDEV_H
+
+#include "goo/gtypes.h"
+#include "goo/gmem.h"
+#include "OutputDev.h"
+#include <vector>
+
+class GfxState;
+class GooString;
+class Dict;
+
+
+enum MCOpType {
+ mcOpUnichar,
+ mcOpFontName,
+ mcOpFlags,
+ mcOpColor,
+};
+
+enum MCOpFlags {
+ mcOpFlagFontBold = (1 << 0),
+ mcOpFlagFontItalic = (1 << 1),
+ mcOpFlagFontFixed = (1 << 2),
+};
+
+struct MCColor {
+ double r, g, b;
+
+ Guint rgbPixel() const {
+ return ((Guint) (r * 255) & 0xFF) << 16
+ | ((Guint) (g * 255) & 0xFF) << 8
+ | ((Guint) (b * 255) & 0xFF);
+ }
+};
+
+struct MCOp {
+ MCOpType type;
+ union {
+ Unicode unichar;
+ char *value;
+ Guint flags;
+ MCColor color;
+ };
+
+ MCOp(const MCOp& op): type(op.type) {
+ switch (type) {
+ case mcOpFlags: flags = op.flags; break;
+ case mcOpUnichar: unichar = op.unichar; break;
+ case mcOpFontName: value = strdup(op.value); break;
+ case mcOpColor: memcpy(&color, &op.color, sizeof(MCColor)); break;
+ }
+ }
+ MCOp(): type(mcOpFontName), value(NULL) {}
+ MCOp(Unicode u): type(mcOpUnichar), unichar(u) {}
+ MCOp(MCOpType t, Guint f): type(t), flags(f) {}
+ MCOp(MCOpType t, const char *s = NULL): type(t), value(strdup(s)) {}
+ ~MCOp() { if (type == mcOpFontName) gfree(value); }
+};
+
+
+typedef std::vector<MCOp> MCOpArray;
+
+
+class MCOutputDev: public OutputDev {
+public:
+ MCOutputDev(int mcid);
+ virtual ~MCOutputDev();
+
+ virtual GBool isOk() { return gTrue; }
+ virtual GBool upsideDown() { return gTrue; }
+ virtual GBool useDrawChar() { return gTrue; }
+ virtual GBool interpretType3Chars() { return gFalse; }
+ virtual GBool needNonText() { return gFalse; }
+ virtual GBool needCharCount() { return gFalse; }
+
+ virtual void startPage(int pageNum, GfxState *state, XRef *xref);
+ virtual void endPage();
+
+ virtual void restoreState(GfxState *state) { updateFont(state); }
+ virtual void updateFont(GfxState *state);
+
+ virtual void drawChar(GfxState *state,
+ double xx, double yy,
+ double dx, double dy,
+ double ox, double oy,
+ CharCode c, int nBytes,
+ Unicode *u, int uLen);
+
+ virtual void beginMarkedContent(char *name, Dict *properties);
+ virtual void endMarkedContent(GfxState *state);
+
+ const MCOpArray& getMCOps() const;
+
+private:
+ struct Priv;
+ Priv *p;
+};
+
+#endif /* !MCOUTPUTDEV_H */
diff --git a/poppler/Makefile.am b/poppler/Makefile.am
index ac51d05..eaff39d 100644
--- a/poppler/Makefile.am
+++ b/poppler/Makefile.am
@@ -236,6 +236,8 @@ poppler_include_HEADERS = \
StdinPDFDocBuilder.h \
Stream-CCITT.h \
Stream.h \
+ StructElement.h \
+ StructTreeRoot.h \
UnicodeMap.h \
UnicodeMapTables.h \
UnicodeTypeTable.h \
@@ -250,6 +252,7 @@ poppler_include_HEADERS = \
NameToUnicodeTable.h \
PSOutputDev.h \
TextOutputDev.h \
+ MCOutputDev.h \
SecurityHandler.h \
UTF.h \
UTF8.h \
@@ -315,6 +318,8 @@ libpoppler_la_SOURCES = \
StdinCachedFile.cc \
StdinPDFDocBuilder.cc \
Stream.cc \
+ StructElement.cc \
+ StructTreeRoot.cc \
strtok_r.cpp \
UnicodeMap.cc \
UnicodeTypeTable.cc \
@@ -323,6 +328,7 @@ libpoppler_la_SOURCES = \
XRef.cc \
PSOutputDev.cc \
TextOutputDev.cc \
+ MCOutputDev.cc \
PageLabelInfo.h \
PageLabelInfo.cc \
SecurityHandler.cc \
diff --git a/poppler/PDFDoc.h b/poppler/PDFDoc.h
index da9bf5b..48189bc 100644
--- a/poppler/PDFDoc.h
+++ b/poppler/PDFDoc.h
@@ -60,6 +60,7 @@ class Outline;
class Linearization;
class SecurityHandler;
class Hints;
+class StructTreeRoot;
enum PDFWriteMode {
writeStandard,
@@ -139,7 +140,7 @@ public:
GooString *readMetadata() { return catalog->readMetadata(); }
// Return the structure tree root object.
- Object *getStructTreeRoot() { return catalog->getStructTreeRoot(); }
+ StructTreeRoot *getStructTreeRoot() { return catalog->getStructTreeRoot(); }
// Get page.
Page *getPage(int page);
diff --git a/poppler/StructElement.cc b/poppler/StructElement.cc
new file mode 100644
index 0000000..c99c9fa
--- /dev/null
+++ b/poppler/StructElement.cc
@@ -0,0 +1,1361 @@
+//========================================================================
+//
+// StructElement.cc
+//
+// This file is licensed under the GPLv2 or later
+//
+// Copyright 2013 Igalia S.L.
+//
+//========================================================================
+
+#ifdef USE_GCC_PRAGMAS
+#pragma interface
+#endif
+
+#include "StructElement.h"
+#include "StructTreeRoot.h"
+#include "TextOutputDev.h"
+#include "GlobalParams.h"
+#include "UnicodeMap.h"
+#include "PDFDoc.h"
+#include "Dict.h"
+
+#include <assert.h>
+
+class GfxState;
+
+
+static GBool isPlacementName(Object* value)
+{
+ return value->isName("Block")
+ || value->isName("Inline")
+ || value->isName("Before")
+ || value->isName("Start")
+ || value->isName("End");
+}
+
+static GBool isWritingModeName(Object* value)
+{
+ return value->isName("LrTb")
+ || value->isName("RlTb")
+ || value->isName("TbRl");
+}
+
+static GBool isBorderStyleName(Object* value)
+{
+ return value->isName("None")
+ || value->isName("Hidden")
+ || value->isName("Dotted")
+ || value->isName("Dashed")
+ || value->isName("Solid")
+ || value->isName("Double")
+ || value->isName("Groove")
+ || value->isName("Ridge")
+ || value->isName("Inset")
+ || value->isName("Outset");
+}
+
+static GBool isTextAlignName(Object* value)
+{
+ return value->isName("Start")
+ || value->isName("End")
+ || value->isName("Center")
+ || value->isName("Justify");
+}
+
+static GBool isBlockAlignName(Object* value)
+{
+ return value->isName("Before")
+ || value->isName("Middle")
+ || value->isName("After")
+ || value->isName("Justify");
+}
+
+static GBool isInlineAlignName(Object* value)
+{
+ return value->isName("Start")
+ || value->isName("End")
+ || value->isName("Center");
+}
+
+static GBool isNumber(Object* value);
+
+static GBool isLineHeight(Object* value)
+{
+ return value->isName("Normal")
+ || value->isName("Auto")
+ || isNumber(value);
+}
+
+static GBool isTextDecorationName(Object* value)
+{
+ return value->isName("None")
+ || value->isName("Underline")
+ || value->isName("Overline")
+ || value->isName("LineThrough");
+}
+
+static GBool isRubyAlignName(Object* value)
+{
+ return value->isName("Start")
+ || value->isName("End")
+ || value->isName("Center")
+ || value->isName("Justify")
+ || value->isName("Distribute");
+}
+
+static GBool isRubyPositionName(Object* value)
+{
+ return value->isName("Before")
+ || value->isName("After")
+ || value->isName("Warichu")
+ || value->isName("Inline");
+}
+
+static GBool isGlyphOrientationName(Object* value)
+{
+ return value->isName("Auto")
+ || value->isName("90")
+ || value->isName("180")
+ || value->isName("270")
+ || value->isName("360")
+ || value->isName("-90")
+ || value->isName("-180");
+}
+
+static GBool isListNumberingName(Object* value)
+{
+ return value->isName("None")
+ || value->isName("Disc")
+ || value->isName("Circle")
+ || value->isName("Square")
+ || value->isName("Decimal")
+ || value->isName("UpperRoman")
+ || value->isName("LowerRoman")
+ || value->isName("UpperAlpha")
+ || value->isName("LowerAlpha");
+}
+
+static GBool isFieldRoleName(Object* value)
+{
+ return value->isName("rb")
+ || value->isName("cb")
+ || value->isName("pb")
+ || value->isName("tv");
+}
+
+static GBool isFieldCheckedName(Object* value)
+{
+ return value->isName("on")
+ || value->isName("off")
+ || value->isName("neutral");
+}
+
+static GBool isTableScopeName(Object* value)
+{
+ return value->isName("Row")
+ || value->isName("Column")
+ || value->isName("Both");
+}
+
+static GBool isRGBColor(Object* value)
+{
+ if (!(value->isArray() && value->arrayGetLength() == 3))
+ return gFalse;
+
+ GBool okay = gTrue;
+ for (int i = 0; i < 3; i++) {
+ Object obj;
+ if (!value->arrayGet(i, &obj)->isNum()) {
+ okay = gFalse;
+ obj.free();
+ break;
+ }
+ if (obj.getNum() < 0.0 || obj.getNum() > 1.0) {
+ okay = gFalse;
+ obj.free();
+ break;
+ }
+ obj.free();
+ }
+
+ return okay;
+}
+
+static GBool isNatural(Object* value)
+{
+ return (value->isInt() && value->getInt() > 0)
+ || (value->isInt64() && value->getInt64() > 0);
+}
+
+static GBool isPositive(Object* value)
+{
+ return value->isNum() && value->getNum() >= 0.0;
+}
+
+static GBool isNumber(Object* value)
+{
+ return value->isNum();
+}
+
+static GBool isNumber_or_AutoName(Object* value)
+{
+ return isNumber(value) || value->isName("Auto");
+}
+
+static GBool isTextString(Object* value)
+{
+ // XXX: Shall isName() also be checked?
+ return value->isString();
+}
+
+
+#define ARRAY_CHECKER(name, checkItem, length, allowSingle, allowNulls) \
+ static GBool name(Object* value) { \
+ if (!value->isArray()) \
+ return allowSingle ? checkItem(value) : gFalse; \
+ \
+ if (length && value->arrayGetLength() != length) \
+ return gFalse; \
+ \
+ GBool okay = gTrue; \
+ for (int i = 0; i < value->arrayGetLength(); i++) { \
+ Object obj; \
+ value->arrayGet(i, &obj); \
+ if ((!allowNulls && obj.isNull()) || !checkItem(&obj)) { \
+ okay = gFalse; \
+ obj.free(); \
+ break; \
+ } \
+ obj.free(); \
+ } \
+ return okay; \
+ }
+
+ARRAY_CHECKER(isRGBColor_or_OptX4, isRGBColor, 4, gTrue, gTrue );
+ARRAY_CHECKER(isPositive_or_OptX4, isPositive, 4, gTrue, gTrue );
+ARRAY_CHECKER(isPositive_or_X4, isPositive, 4, gTrue, gFalse);
+ARRAY_CHECKER(isBorderStyle, isBorderStyleName, 4, gTrue, gTrue );
+ARRAY_CHECKER(isNumber_X4, isNumber, 4, gFalse, gFalse);
+ARRAY_CHECKER(isNumber_or_Xn, isNumber, 0, gTrue, gFalse);
+ARRAY_CHECKER(isTableHeaders, isTextString, 0, gFalse, gFalse);
+
+
+// Type of functions used to do type-checking on attribute values
+typedef GBool (*AttributeCheckFunc)(Object*);
+
+// Maps attributes to their names and whether the attribute can be inherited.
+struct AttributeMapEntry {
+ Attribute::Type type;
+ const char* name;
+ const Object* defval;
+ GBool inherit;
+ AttributeCheckFunc check;
+};
+
+struct AttributeDefaults {
+ Object Inline;
+ Object LrTb;
+ Object Normal;
+ Object Distribute;
+ Object off;
+ Object Zero;
+ Object Auto;
+ Object Start;
+ Object None;
+ Object Before;
+ Object Nat1;
+
+ AttributeDefaults() {
+ Inline.initName("Inline");
+ LrTb.initName("LrTb");
+ Normal.initName("Normal");
+ Distribute.initName("Distribute");
+ off.initName("off");
+
+ Zero.initReal(0.0);
+ Auto.initName("Auto");
+ Start.initName("Start");
+ None.initName("None");
+ Before.initName("Before");
+ Nat1.initInt(1);
+ }
+};
+
+static const AttributeDefaults attributeDefaults;
+
+
+#define ATTR_LIST_END { Attribute::Unknown, NULL, NULL, gFalse, NULL }
+#define ATTR_D(x, i, c, v) { Attribute::x, #x, &attributeDefaults.v, i, c }
+#define ATTR_N(x, i, c) { Attribute::x, #x, NULL, i, c }
+
+static const AttributeMapEntry attributeMapCommonShared[] =
+{
+ ATTR_D(Placement, gFalse, isPlacementName, Inline),
+ ATTR_D(WritingMode, gFalse, isWritingModeName, LrTb),
+ ATTR_N(BackgroundColor, gFalse, isRGBColor),
+ ATTR_N(BorderColor, gTrue, isRGBColor_or_OptX4),
+ ATTR_D(BorderStyle, gFalse, isBorderStyle, None),
+ ATTR_N(BorderThickness, gTrue, isPositive_or_OptX4),
+ ATTR_D(Padding, gFalse, isPositive_or_X4, Zero),
+ ATTR_N(Color, gTrue, isRGBColor),
+ ATTR_LIST_END
+};
+
+static const AttributeMapEntry attributeMapCommonBlock[] =
+{
+ ATTR_D(SpaceBefore, gFalse, isPositive, Zero),
+ ATTR_D(SpaceAfter, gFalse, isPositive, Zero),
+ ATTR_D(StartIndent, gTrue, isNumber, Zero),
+ ATTR_D(EndIndent, gTrue, isNumber, Zero),
+ ATTR_D(TextIndent, gTrue, isNumber, Zero),
+ ATTR_D(TextAlign, gTrue, isTextAlignName, Start),
+ ATTR_N(BBox, gFalse, isNumber_X4),
+ ATTR_D(Width, gFalse, isNumber_or_AutoName, Auto),
+ ATTR_D(Height, gFalse, isNumber_or_AutoName, Auto),
+ ATTR_D(BlockAlign, gTrue, isBlockAlignName, Before),
+ ATTR_D(InlineAlign, gTrue, isInlineAlignName, Start),
+ ATTR_LIST_END
+};
+
+static const AttributeMapEntry attributeMapCommonInline[] =
+{
+ ATTR_D(BaselineShift, gFalse, isNumber, Zero),
+ ATTR_D(LineHeight, gTrue, isLineHeight, Normal),
+ ATTR_N(TextDecorationColor, gTrue, isRGBColor),
+ ATTR_N(TextDecorationThickness, gTrue, isPositive),
+ ATTR_D(TextDecorationType, gFalse, isTextDecorationName, None),
+ ATTR_D(GlyphOrientationVertical, gTrue, isGlyphOrientationName, Auto),
+ ATTR_LIST_END
+};
+
+static const AttributeMapEntry attributeMapCommonRubyText[] =
+{
+ ATTR_D(RubyPosition, gTrue, isRubyPositionName, Before),
+ ATTR_D(RubyAlign, gTrue, isRubyAlignName, Distribute),
+ ATTR_LIST_END
+};
+
+static const AttributeMapEntry attributeMapCommonColumns[] =
+{
+ ATTR_D(ColumnCount, gFalse, isNatural, Nat1),
+ ATTR_N(ColumnGap, gFalse, isNumber_or_Xn),
+ ATTR_N(ColumnWidths, gFalse, isNumber_or_Xn),
+ ATTR_LIST_END
+};
+
+static const AttributeMapEntry attributeMapCommonList[] = {
+ ATTR_D(ListNumbering, gFalse, isListNumberingName, None),
+ ATTR_LIST_END
+};
+
+static const AttributeMapEntry attributeMapCommonPrintField[] =
+{
+ ATTR_N(Role, gFalse, isFieldRoleName),
+ ATTR_D(checked, gFalse, isFieldCheckedName, off),
+ ATTR_N(Desc, gFalse, isTextString),
+ ATTR_LIST_END
+};
+
+static const AttributeMapEntry attributeMapCommonTable[] =
+{
+ ATTR_N(Headers, gFalse, isTableHeaders),
+ ATTR_N(Scope, gFalse, isTableScopeName),
+ ATTR_N(Summary, gFalse, isTextString),
+ ATTR_LIST_END
+};
+
+static const AttributeMapEntry attributeMapCommonTableCell[] =
+{
+ ATTR_D(RowSpan, gFalse, isNatural, Nat1),
+ ATTR_D(ColSpan, gFalse, isNatural, Nat1),
+ ATTR_D(TBorderStyle, gTrue, isBorderStyle, None),
+ ATTR_D(TPadding, gTrue, isPositive_or_X4, Zero),
+ ATTR_LIST_END
+};
+
+
+static const AttributeMapEntry* attributeMapAll[] = {
+ attributeMapCommonShared,
+ attributeMapCommonBlock,
+ attributeMapCommonInline,
+ attributeMapCommonRubyText,
+ attributeMapCommonColumns,
+ attributeMapCommonList,
+ attributeMapCommonPrintField,
+ attributeMapCommonTable,
+ attributeMapCommonTableCell,
+ NULL,
+};
+
+static const AttributeMapEntry* attributeMapShared[] = {
+ attributeMapCommonShared,
+ NULL,
+};
+
+static const AttributeMapEntry* attributeMapBlock[] = {
+ attributeMapCommonShared,
+ attributeMapCommonBlock,
+ NULL,
+};
+
+static const AttributeMapEntry* attributeMapInline[] = {
+ attributeMapCommonShared,
+ attributeMapCommonInline,
+ NULL,
+};
+
+static const AttributeMapEntry* attributeMapTableCell[] = {
+ attributeMapCommonShared,
+ attributeMapCommonBlock,
+ attributeMapCommonTable,
+ attributeMapCommonTableCell,
+ NULL,
+};
+
+static const AttributeMapEntry* attributeMapRubyText[] = {
+ attributeMapCommonShared,
+ attributeMapCommonInline,
+ attributeMapCommonRubyText,
+ NULL,
+};
+
+static const AttributeMapEntry* attributeMapColumns[] = {
+ attributeMapCommonShared,
+ attributeMapCommonInline,
+ attributeMapCommonColumns,
+ NULL,
+};
+
+static const AttributeMapEntry* attributeMapList[] = {
+ attributeMapCommonShared,
+ attributeMapCommonList,
+ NULL,
+};
+
+static const AttributeMapEntry* attributeMapPrintField[] = {
+ attributeMapCommonShared,
+ attributeMapCommonPrintField,
+ NULL,
+};
+
+static const AttributeMapEntry* attributeMapTable[] = {
+ attributeMapCommonShared,
+ attributeMapCommonBlock,
+ attributeMapCommonTable,
+ NULL,
+};
+
+static const AttributeMapEntry* attributeMapIllustration[] = {
+ // XXX: Illustrations may have some attributes from the "shared", "inline",
+ // the "block" sets. This is a loose specification; making it better
+ // means duplicating entries from the sets. This seems good enough...
+ attributeMapCommonShared,
+ attributeMapCommonBlock,
+ attributeMapCommonInline,
+ NULL,
+};
+
+// Table mapping owners of attributes to their names.
+static const struct OwnerMapEntry {
+ Attribute::Owner owner;
+ const char* name;
+} ownerMap[] = {
+ // XXX: Those are sorted in the owner priority resolution order. If the
+ // same attribute is defined with two owners, the order in the table
+ // can be used to know which one has more priority.
+ { Attribute::XML_1_00, "XML-1.00" },
+ { Attribute::HTML_3_20, "HTML-3.20" },
+ { Attribute::HTML_4_01, "HTML-4.01" },
+ { Attribute::OEB_1_00, "OEB-1.00" },
+ { Attribute::RTF_1_05, "RTF-1.05" },
+ { Attribute::CSS_1_00, "CSS-1.00" },
+ { Attribute::CSS_2_00, "CSS-2.00" },
+ { Attribute::Layout, "Layout" },
+ { Attribute::PrintField, "PrintField" },
+ { Attribute::Table, "Table" },
+ { Attribute::List, "List" },
+ { Attribute::UserProperties, "UserProperties" },
+};
+
+
+static GBool ownerHasMorePriority(Attribute::Owner a, Attribute::Owner b)
+{
+ unsigned a_index, b_index;
+
+ for (unsigned i = a_index = b_index = 0; i < sizeof(ownerMap) / sizeof(ownerMap[0]); i++) {
+ if (ownerMap[i].owner == a)
+ a_index = i;
+ if (ownerMap[i].owner == b)
+ b_index = i;
+ }
+
+ return a_index < b_index;
+}
+
+
+// Maps element types to their names and also serves as lookup table
+// for additional element type attributes.
+
+enum ElementType {
+ elementTypeUndefined,
+ elementTypeInline,
+ elementTypeBlock,
+};
+
+static const struct TypeMapEntry {
+ StructElement::Type type;
+ const char* name;
+ ElementType elementType;
+ const AttributeMapEntry** attributes;
+} typeMap[] = {
+ { StructElement::Document, "Document", elementTypeInline, attributeMapShared },
+ { StructElement::Part, "Part", elementTypeInline, attributeMapShared },
+ { StructElement::Art, "Art", elementTypeInline, attributeMapColumns },
+ { StructElement::Sect, "Sect", elementTypeInline, attributeMapColumns },
+ { StructElement::Div, "Div", elementTypeInline, attributeMapColumns },
+ { StructElement::BlockQuote, "BlockQuote", elementTypeInline, attributeMapInline },
+ { StructElement::Caption, "Caption", elementTypeInline, attributeMapInline },
+ { StructElement::NonStruct, "NonStruct", elementTypeInline, attributeMapInline },
+ { StructElement::Index, "Index", elementTypeInline, attributeMapInline },
+ { StructElement::Private, "Private", elementTypeInline, attributeMapInline },
+ { StructElement::Span, "Span", elementTypeInline, attributeMapInline },
+ { StructElement::Quote, "Quote", elementTypeInline, attributeMapInline },
+ { StructElement::Note, "Note", elementTypeInline, attributeMapInline },
+ { StructElement::Reference, "Reference", elementTypeInline, attributeMapInline },
+ { StructElement::BibEntry, "BibEntry", elementTypeInline, attributeMapInline },
+ { StructElement::Code, "Code", elementTypeInline, attributeMapInline },
+ { StructElement::Link, "Link", elementTypeInline, attributeMapInline },
+ { StructElement::Annot, "Annot", elementTypeInline, attributeMapInline },
+ { StructElement::Ruby, "Ruby", elementTypeInline, attributeMapRubyText },
+ { StructElement::RB, "RB", elementTypeUndefined, attributeMapRubyText },
+ { StructElement::RT, "RT", elementTypeUndefined, attributeMapRubyText },
+ { StructElement::RP, "RP", elementTypeUndefined, attributeMapShared },
+ { StructElement::Warichu, "Warichu", elementTypeInline, attributeMapRubyText },
+ { StructElement::WT, "WT", elementTypeUndefined, attributeMapShared },
+ { StructElement::WP, "WP", elementTypeUndefined, attributeMapShared },
+ { StructElement::P, "P", elementTypeBlock, attributeMapBlock },
+ { StructElement::H, "H", elementTypeBlock, attributeMapBlock },
+ { StructElement::H1, "H1", elementTypeBlock, attributeMapBlock },
+ { StructElement::H2, "H2", elementTypeBlock, attributeMapBlock },
+ { StructElement::H3, "H3", elementTypeBlock, attributeMapBlock },
+ { StructElement::H4, "H4", elementTypeBlock, attributeMapBlock },
+ { StructElement::H5, "H5", elementTypeBlock, attributeMapBlock },
+ { StructElement::H6, "H6", elementTypeBlock, attributeMapBlock },
+ { StructElement::L, "L", elementTypeBlock, attributeMapList },
+ { StructElement::LI, "LI", elementTypeBlock, attributeMapBlock },
+ { StructElement::Lbl, "Lbl", elementTypeBlock, attributeMapBlock },
+ { StructElement::Table, "Table", elementTypeBlock, attributeMapTable },
+ { StructElement::TR, "TR", elementTypeUndefined, attributeMapShared },
+ { StructElement::TH, "TH", elementTypeUndefined, attributeMapTableCell },
+ { StructElement::TD, "TD", elementTypeUndefined, attributeMapTableCell },
+ { StructElement::THead, "THead", elementTypeUndefined, attributeMapShared },
+ { StructElement::TFoot, "TFoot", elementTypeUndefined, attributeMapShared },
+ { StructElement::TBody, "TBody", elementTypeUndefined, attributeMapShared },
+ { StructElement::Figure, "Figure", elementTypeUndefined, attributeMapIllustration },
+ { StructElement::Formula, "Formula", elementTypeUndefined, attributeMapIllustration },
+ { StructElement::Form, "Form", elementTypeUndefined, attributeMapIllustration },
+ { StructElement::TOC, "TOC", elementTypeUndefined, attributeMapShared },
+ { StructElement::TOCI, "TOCI", elementTypeUndefined, attributeMapShared },
+};
+
+
+//------------------------------------------------------------------------
+// Helpers for the attribute and structure type tables
+//------------------------------------------------------------------------
+
+static inline const AttributeMapEntry*
+getAttributeMapEntry(const AttributeMapEntry** entryList, Attribute::Type type)
+{
+ assert(entryList);
+ while (*entryList) {
+ const AttributeMapEntry* entry = *entryList;
+ while (entry->type != Attribute::Unknown) {
+ assert(entry->name);
+ if (type == entry->type)
+ return entry;
+ entry++;
+ }
+ entryList++;
+ }
+ return NULL;
+}
+
+static inline const AttributeMapEntry*
+getAttributeMapEntry(const AttributeMapEntry** entryList, const char* name)
+{
+ assert(entryList);
+ while (*entryList) {
+ const AttributeMapEntry* entry = *entryList;
+ while (entry->type != Attribute::Unknown) {
+ assert(entry->name);
+ if (strcmp(name, entry->name) == 0)
+ return entry;
+ entry++;
+ }
+ entryList++;
+ }
+ return NULL;
+}
+
+static inline const OwnerMapEntry* getOwnerMapEntry(Attribute::Owner owner)
+{
+ for (unsigned i = 0; i < sizeof(ownerMap) / sizeof(ownerMap[0]); i++) {
+ if (owner == ownerMap[i].owner)
+ return &ownerMap[i];
+ }
+ return NULL;
+}
+
+static inline const OwnerMapEntry* getOwnerMapEntry(const char* name)
+{
+ for (unsigned i = 0; i < sizeof(ownerMap) / sizeof(ownerMap[0]); i++) {
+ if (strcmp(name, ownerMap[i].name) == 0)
+ return &ownerMap[i];
+ }
+ return NULL;
+}
+
+static const char* ownerToName(Attribute::Owner owner)
+{
+ const OwnerMapEntry* entry = getOwnerMapEntry(owner);
+ return entry ? entry->name : "UnknownOwner";
+}
+
+Attribute::Owner nameToOwner(const char* name)
+{
+ const OwnerMapEntry* entry = getOwnerMapEntry(name);
+ return entry ? entry->owner : Attribute::UnknownOwner;
+}
+
+static inline const TypeMapEntry* getTypeMapEntry(StructElement::Type type)
+{
+ for (unsigned i = 0; i < sizeof(typeMap) / sizeof(typeMap[0]); i++) {
+ if (type == typeMap[i].type)
+ return &typeMap[i];
+ }
+ return NULL;
+}
+
+static inline const TypeMapEntry* getTypeMapEntry(const char* name)
+{
+ for (unsigned i = 0; i < sizeof(typeMap) / sizeof(typeMap[0]); i++) {
+ if (strcmp(name, typeMap[i].name) == 0)
+ return &typeMap[i];
+ }
+ return NULL;
+}
+
+static const char* typeToName(StructElement::Type type)
+{
+ if (type == StructElement::MCID)
+ return "MarkedContent";
+
+ const TypeMapEntry* entry = getTypeMapEntry(type);
+ return entry ? entry->name : "Unknown";
+}
+
+static StructElement::Type nameToType(const char* name)
+{
+ const TypeMapEntry* entry = getTypeMapEntry(name);
+ return entry ? entry->type : StructElement::Unknown;
+}
+
+
+//------------------------------------------------------------------------
+// Attribute
+//------------------------------------------------------------------------
+
+Attribute::Attribute(const char* nameA, Object* valueA, GBool copyValue):
+ type(UserProperty),
+ owner(UserProperties),
+ revision(0),
+ name(nameA),
+ value(),
+ hidden(gFalse),
+ formatted(NULL)
+{
+ assert(valueA);
+
+ if (copyValue)
+ valueA->copy(&value);
+ else
+ valueA->shallowCopy(&value);
+}
+
+Attribute::Attribute(Type type, Object* valueA, GBool copyValue):
+ type(type),
+ owner(UserProperties), // TODO: Determine corresponding owner from Type
+ revision(0),
+ name(),
+ value(),
+ hidden(gFalse),
+ formatted(NULL)
+{
+ assert(valueA);
+
+ if (copyValue)
+ valueA->copy(&value);
+ else
+ valueA->shallowCopy(&value);
+
+ if (!typeCheck()) {
+ type = Unknown;
+ }
+}
+
+Attribute::~Attribute()
+{
+ delete formatted;
+ value.free();
+}
+
+const char* Attribute::getTypeName() const
+{
+ if (type == UserProperty)
+ return name.getCString();
+
+ const AttributeMapEntry* entry = getAttributeMapEntry(attributeMapAll, type);
+ if (entry)
+ return entry->name;
+
+ return "Unknown";
+}
+
+const char* Attribute::getOwnerName() const
+{
+ return ownerToName(owner);
+}
+
+Object* Attribute::getDefaultValue(Attribute::Type type)
+{
+ const AttributeMapEntry* entry = getAttributeMapEntry(attributeMapAll, type);
+ return entry ? const_cast<Object*>(entry->defval) : NULL;
+}
+
+void Attribute::setFormattedValue(const char* formattedA)
+{
+ if (formattedA) {
+ if (formatted)
+ formatted->Set(formattedA);
+ else
+ formatted = new GooString(formattedA);
+ } else {
+ delete formatted;
+ }
+}
+
+GBool Attribute::typeCheck(StructElement* element)
+{
+ // If an element is passed, tighther type-checking can be done.
+ if (element) {
+ const TypeMapEntry* elementTypeEntry = getTypeMapEntry(element->getType());
+ if (elementTypeEntry && elementTypeEntry->attributes) {
+ const AttributeMapEntry* entry = getAttributeMapEntry(elementTypeEntry->attributes, type);
+ if (entry) {
+ if (entry->check && !((*entry->check)(&value))) {
+ return gFalse;
+ }
+ } else {
+ // No entry: the attribute is not valid for the containing element.
+ return gFalse;
+ }
+ }
+ }
+
+ return gTrue;
+}
+
+Attribute::Type Attribute::typeForName(const char* name, StructElement* element)
+{
+ const AttributeMapEntry** attributes = attributeMapAll;
+ if (element) {
+ const TypeMapEntry* elementTypeEntry = getTypeMapEntry(element->getType());
+ if (elementTypeEntry && elementTypeEntry->attributes) {
+ attributes = elementTypeEntry->attributes;
+ }
+ }
+
+ const AttributeMapEntry* entry = getAttributeMapEntry(attributes, name);
+ return entry ? entry->type : Unknown;
+}
+
+Attribute* Attribute::parseUserProperty(Dict* property)
+{
+ Object obj, value;
+ const char* name = NULL;
+
+ if (property->lookup("N", &obj)->isString())
+ name = obj.getString()->getCString();
+ else if (obj.isName())
+ name = obj.getName();
+ else {
+ error(errSyntaxError, -1, "N object is wrong type ({0:s})", obj.getTypeName());
+ obj.free();
+ return NULL;
+ }
+
+ if (property->lookup("V", &value)->isNull()) {
+ error(errSyntaxError, -1, "V object is wrong type ({0:s})", value.getTypeName());
+ value.free();
+ obj.free();
+ return NULL;
+ }
+
+ Attribute *attribute = new Attribute(name, &value, gFalse);
+ obj.free();
+
+ if (property->lookup("F", &obj)->isString()) {
+ attribute->setFormattedValue(obj.getString()->getCString());
+ } else if (!obj.isNull()) {
+ error(errSyntaxWarning, -1, "F object is wrong type ({0:s})", obj.getTypeName());
+ }
+ obj.free();
+
+ if (property->lookup("H", &obj)->isBool()) {
+ attribute->setHidden(obj.getBool());
+ } else if (!obj.isNull()) {
+ error(errSyntaxWarning, -1, "H object is wrong type ({0:s})", obj.getTypeName());
+ }
+ obj.free();
+
+ return attribute;
+}
+
+
+//------------------------------------------------------------------------
+// StructElement
+//------------------------------------------------------------------------
+
+StructElement::StructData::StructData():
+ parentRef(),
+ altText(0),
+ actualText(0),
+ id(0),
+ title(0),
+ expandedAbbr(0),
+ language(0),
+ revision(0),
+ elements(),
+ attributes()
+{
+}
+
+StructElement::StructData::~StructData()
+{
+ delete altText;
+ delete actualText;
+ delete id;
+ delete title;
+ gfree(language);
+ parentRef.free();
+ for (std::vector<StructElement*>::iterator i = elements.begin(); i != elements.end(); ++i) delete *i;
+ for (std::vector<Attribute*>::iterator i = attributes.begin(); i != attributes.end(); ++i) delete *i;
+}
+
+
+StructElement::StructElement(Dict* element, StructTreeRoot* treeRootA, StructElement* parentA):
+ type(Unknown),
+ treeRoot(treeRootA),
+ parent(parentA),
+ pageRef(),
+ s(new StructData())
+{
+ assert(treeRoot);
+ assert(element);
+ parse(element);
+}
+
+StructElement::StructElement(int mcid, StructTreeRoot* treeRootA, StructElement* parentA):
+ type(MCID),
+ treeRoot(treeRootA),
+ parent(parentA),
+ pageRef(),
+ c(new ContentData(mcid))
+{
+ assert(treeRoot);
+ assert(parent);
+ assert(c->mcid != InvalidMCID);
+}
+
+StructElement::~StructElement()
+{
+ if (isContent())
+ delete c;
+ else
+ delete s;
+ pageRef.free();
+}
+
+GBool StructElement::isBlock() const
+{
+ const TypeMapEntry* entry = getTypeMapEntry(type);
+ return entry ? (entry->elementType == elementTypeBlock) : gFalse;
+}
+
+GBool StructElement::isInline() const
+{
+ const TypeMapEntry* entry = getTypeMapEntry(type);
+ return entry ? (entry->elementType == elementTypeInline) : gFalse;
+}
+
+GBool StructElement::hasPageRef() const
+{
+ return pageRef.isRef() || (parent && parent->hasPageRef());
+}
+
+Ref StructElement::getPageRef() const
+{
+ if (pageRef.isRef())
+ return pageRef.getRef();
+
+ if (parent)
+ return parent->getPageRef();
+
+ static const Ref invalidRef = { -1, -1 };
+ return invalidRef;
+}
+
+const char* StructElement::getTypeName() const
+{
+ return typeToName(type);
+}
+
+const Attribute* StructElement::findAttribute(Attribute::Type attributeType, GBool inherit,
+ Attribute::Owner attributeOwner) const
+{
+ if (isContent())
+ return parent->findAttribute(attributeType, inherit, attributeOwner);
+
+ if (attributeType != Attribute::Unknown && attributeType != Attribute::UserProperty) {
+ const Attribute *result = NULL;
+
+ if (attributeOwner == Attribute::UnknownOwner) {
+ // Search for the attribute, no matter who the owner is
+ for (unsigned i = 0; i < getNumAttributes(); i++) {
+ const Attribute *attr = getAttribute(i);
+ if (attributeType == attr->getType()) {
+ if (!result || ownerHasMorePriority(attr->getOwner(), result->getOwner()))
+ result = attr;
+ }
+ }
+ } else {
+ // Search for the attribute, with a specific owner
+ for (unsigned i = 0; i < getNumAttributes(); i++) {
+ const Attribute *attr = getAttribute(i);
+ if (attributeType == attr->getType() && attributeOwner == attr->getOwner()) {
+ result = attr;
+ break;
+ }
+ }
+ }
+
+ if (result)
+ return result;
+
+ if (inherit && parent) {
+ const AttributeMapEntry *entry = getAttributeMapEntry(attributeMapAll, attributeType);
+ assert(entry);
+ // TODO: Take into account special inheritance cases, for example:
+ // inline elements which have been changed to be block using
+ // "/Placement/Block" have slightly different rules.
+ if (entry->inherit)
+ return parent->findAttribute(attributeType, inherit, attributeOwner);
+ }
+ }
+ return NULL;
+}
+
+GooString* StructElement::getText(GooString *string, GBool recursive) const
+{
+ if (isContent()) {
+ UnicodeMap *map = globalParams->getTextEncoding();
+ if (!map) {
+ GooString mapName("UTF-8");
+ map = UnicodeMap::parse(&mapName);
+ }
+ assert(map);
+
+ const MCOpArray& ops(getMCOps());
+ if (!ops.size())
+ return NULL;
+
+ if (!string)
+ string = new GooString();
+
+ char buf[9];
+ int n;
+
+ for (MCOpArray::const_iterator i = ops.begin(); i != ops.end(); ++i) {
+ if (i->type == mcOpUnichar) {
+ n = map->mapUnicode(i->unichar, buf, sizeof(buf));
+ string->append(buf, n);
+ }
+ }
+ map->decRefCnt();
+ return string;
+ }
+
+ if (!recursive)
+ return NULL;
+
+ // Do a depth-first traversal, to get elements in logical order
+ if (!string)
+ string = new GooString();
+
+ for (unsigned i = 0; i < getNumElements(); i++)
+ getElement(i)->getText(string, recursive);
+
+ return string;
+}
+
+
+const MCOpArray StructElement::getMCOps() const
+{
+ if (!isContent())
+ return MCOpArray(); // Empty array
+
+ MCOutputDev mcdev(getMCID());
+ int startPage = 0, endPage = 0;
+
+ if (hasPageRef()) {
+ Ref ref = getPageRef();
+ startPage = endPage = treeRoot->getDoc()->findPage(ref.num, ref.gen);
+ }
+
+ if (!(startPage && endPage)) {
+ startPage = 1;
+ endPage = treeRoot->getDoc()->getNumPages();
+ }
+
+ treeRoot->getDoc()->displayPages(&mcdev, startPage, endPage, 72.0, 72.0, 0, gTrue, gFalse, gFalse);
+ return mcdev.getMCOps();
+}
+
+
+void StructElement::parse(Dict* element)
+{
+ Object obj;
+
+ // Type is optional, but if present must be StructElem
+ if (!element->lookup("Type", &obj)->isNull() && !obj.isName("StructElem")) {
+ error(errSyntaxError, -1, "Type of StructElem object is wrong");
+ obj.free();
+ return;
+ }
+ obj.free();
+
+ // Parent object reference (required).
+ if (!element->lookupNF("P", &s->parentRef)->isRef()) {
+ error(errSyntaxError, -1, "P object is wrong type ({0:s})", obj.getTypeName());
+ return;
+ }
+
+ // Check whether the S-type is valid for the top level
+ // element and create a node of the appropriate type.
+ if (!element->lookup("S", &obj)->isName()) {
+ error(errSyntaxError, -1, "S object is wrong type ({0:s})", obj.getTypeName());
+ obj.free();
+ return;
+ }
+
+ // Type name may not be standard, resolve through RoleMap first.
+ // TODO: roleMap entries may need to be resolved recursively until
+ // a known standard name is found, cycles may be present.
+ if (treeRoot->getRoleMap()) {
+ Object resolved;
+ if (treeRoot->getRoleMap()->lookup(obj.getName(), &resolved)->isName()) {
+ type = nameToType(resolved.getName());
+ } else if (resolved.isNull()) {
+ type = nameToType(obj.getName());
+ } else {
+ error(errSyntaxError, -1, "Value in RoleMap is wrong type ({0:s})", resolved.getTypeName());
+ resolved.free();
+ obj.free();
+ return;
+ }
+ resolved.free();
+ } else {
+ type = nameToType(obj.getName());
+ }
+ if (type == Unknown) {
+ error(errSyntaxError, -1, "StructElem object is wrong type ({0:s})", obj.getName());
+ obj.free();
+ return;
+ }
+ obj.free();
+
+ // Object ID (optional), to be looked at the IDTree in the tree root.
+ if (element->lookup("ID", &obj)->isString()) {
+ s->id = new GooString(obj.getString());
+ } else if (!obj.isNull()) {
+ error(errSyntaxWarning, -1, "ID object is wrong type ({0:s})", obj.getTypeName());
+ }
+ obj.free();
+
+ // Page reference (optional) in which at least one of the child items
+ // is to be rendered in. Note: each element stores only the /Pg value
+ // contained by it, and StructElement::getPageRef() may look in parent
+ // elements to find the page where an element belongs.
+ if (!element->lookupNF("Pg", &pageRef)->isRef() && !pageRef.isNull()) {
+ error(errSyntaxWarning, -1, "Pg object is wrong type ({0:s})", pageRef.getTypeName());
+ }
+
+ // Revision number (optional).
+ if (element->lookup("R", &obj)->isInt()) {
+ s->revision = obj.getInt();
+ } else if (!obj.isNull()) {
+ error(errSyntaxWarning, -1, "R object is wrong type ({0:s})", obj.getTypeName());
+ }
+ obj.free();
+
+ // Element title (optional).
+ if (element->lookup("T", &obj)->isString()) {
+ s->title = new GooString(obj.getString());
+ } else if (!obj.isNull()) {
+ error(errSyntaxWarning, -1, "T object is wrong type ({0:s})", obj.getTypeName());
+ }
+ obj.free();
+
+ // Language (optional).
+ if (element->lookup("Lang", &obj)->isString()) {
+ s->language = obj.getString()->getCString();
+ obj.initNull(); // The StructElement takes ownership of the GooString
+ } else if (!obj.isNull()) {
+ error(errSyntaxWarning, -1, "Lang object is wrong type ({0:s})", obj.getTypeName());
+ }
+ obj.free();
+
+ // Alternative text (optional).
+ if (element->lookup("Alt", &obj)->isString()) {
+ s->altText = obj.getString();
+ obj.initNull(); // The StructElement takes ownership of the GooString
+ } else if (!obj.isNull()) {
+ error(errSyntaxWarning, -1, "Alt object is wrong type ({0:s})", obj.getTypeName());
+ }
+ obj.free();
+
+ // Expanded form of an abbreviation (optional).
+ if (element->lookup("E", &obj)->isString()) {
+ s->expandedAbbr = obj.getString();
+ obj.initNull(); // The StructElement takes ownership of the GooString
+ } else if (!obj.isNull()) {
+ error(errSyntaxWarning, -1, "E object is wrong type ({0:s})", obj.getTypeName());
+ }
+ obj.free();
+
+ // Actual text (optional).
+ if (element->lookup("ActualText", &obj)->isString()) {
+ s->actualText = obj.getString();
+ obj.initNull(); // The StructElement takes ownership of the GooString
+ } else if (!obj.isNull()) {
+ error(errSyntaxWarning, -1, "ActualText object is wrong type ({0:s})", obj.getTypeName());
+ }
+ obj.free();
+
+ // Attributes directly attached to the element (optional).
+ if (element->lookup("A", &obj)->isDict()) {
+ parseAttributes(obj.getDict());
+ } else if (obj.isArray()) {
+ Object iobj;
+ unsigned attrIndex = getNumAttributes();
+ for (int i = 0; i < obj.arrayGetLength(); i++) {
+ if (obj.arrayGet(i, &iobj)->isDict()) {
+ attrIndex = getNumAttributes();
+ parseAttributes(obj.getDict());
+ } else if (iobj.isInt()) {
+ const int revision = iobj.getInt();
+ // Set revision numbers for the elements previously created.
+ for (unsigned j = attrIndex; j < getNumAttributes(); j++)
+ getAttribute(j)->setRevision(revision);
+ } else {
+ error(errSyntaxWarning, -1, "A item is wrong type ({0:s})", iobj.getTypeName());
+ }
+ iobj.free();
+ }
+ } else if (!obj.isNull()) {
+ error(errSyntaxWarning, -1, "A is wrong type ({0:s})", obj.getTypeName());
+ }
+ obj.free();
+
+ // Attributes referenced indirectly through the ClassMap (optional).
+ if (treeRoot->getClassMap()) {
+ Object classes;
+ if (element->lookup("C", &classes)->isName()) {
+ Object attr;
+ if (treeRoot->getClassMap()->lookup(classes.getName(), &attr)->isDict()) {
+ parseAttributes(attr.getDict(), gTrue);
+ } else if (attr.isArray()) {
+ for (int i = 0; i < attr.arrayGetLength(); i++) {
+ Object iobj;
+ unsigned attrIndex = getNumAttributes();
+ if (attr.arrayGet(i, &iobj)->isDict()) {
+ attrIndex = getNumAttributes();
+ parseAttributes(iobj.getDict(), gTrue);
+ } else if (iobj.isInt()) {
+ // Set revision numbers for the elements previously created.
+ const int revision = iobj.getInt();
+ for (unsigned j = attrIndex; j < getNumAttributes(); j++)
+ getAttribute(j)->setRevision(revision);
+ } else {
+ error(errSyntaxWarning, -1, "C item is wrong type ({0:s})", iobj.getTypeName());
+ }
+ }
+ } else if (!attr.isNull()) {
+ error(errSyntaxWarning, -1, "C object is wrong type ({0:s})", classes.getTypeName());
+ }
+ classes.free();
+ }
+ }
+
+ parseChildren(element);
+}
+
+StructElement* StructElement::parseChild(Object* childObj)
+{
+ assert(childObj);
+ StructElement* child = NULL;
+
+ if (childObj->isInt()) {
+ child = new StructElement(childObj->getInt(), treeRoot, this);
+ } else if (childObj->isDict("MCR")) {
+ /*
+ * TODO: The optional Stm/StwOwn attributes are not handled, so all the
+ * page will be always scanned when calling StructElement::getText().
+ */
+ Object mcidObj;
+ Object refObj;
+
+ if (!childObj->dictLookup("MCID", &mcidObj)->isInt()) {
+ error(errSyntaxError, -1, "MCID object is wrong type ({0:s})", mcidObj.getTypeName());
+ mcidObj.free();
+ return NULL;
+ }
+
+ child = new StructElement(mcidObj.getInt(), treeRoot, this);
+
+ if (childObj->dictLookupNF("Pg", &refObj)->isRef()) {
+ // XXX Unclassy manipulation of the page reference (ugh!)
+ child->pageRef = refObj;
+ } else {
+ refObj.free();
+ }
+ } else if (childObj->isDict("OBJR")) {
+ // TODO: PDF Object Reference
+ } else if (childObj->isDict()) {
+ child = new StructElement(childObj->getDict(), treeRoot, this);
+ } else {
+ error(errSyntaxWarning, -1, "K has a child of wrong type ({0:s})", childObj->getTypeName());
+ }
+
+ if (child) {
+ if (child->isOk())
+ appendElement(child);
+ else {
+ delete child;
+ child = NULL;
+ }
+ }
+
+ return child;
+}
+
+void StructElement::parseChildren(Dict* element)
+{
+ Object kids;
+
+ if (element->lookup("K", &kids)->isArray()) {
+ for (int i = 0; i < kids.arrayGetLength(); i++) {
+ Object obj;
+ kids.arrayGet(i, &obj);
+ parseChild(&obj);
+ obj.free();
+ }
+ } else if (kids.isDict() || kids.isInt()) {
+ parseChild(&kids);
+ } else if (!kids.isNull()) {
+ error(errSyntaxWarning, -1, "K in StructElement is wrong type ({0:s})", kids.getTypeName());
+ }
+
+ kids.free();
+}
+
+void StructElement::parseAttributes(Dict* attributes, GBool keepExisting)
+{
+ Object owner;
+ if (attributes->lookup("O", &owner)->isName("UserProperties")) {
+ // In this case /P is an array of UserProperty dictionaries
+ Object userProperties;
+ if (attributes->lookup("P", &userProperties)->isArray()) {
+ for (int i = 0; i < userProperties.arrayGetLength(); i++) {
+ Object property;
+ if (userProperties.arrayGet(i, &property)->isDict()) {
+ Attribute* attribute = Attribute::parseUserProperty(property.getDict());
+ if (attribute && attribute->isOk()) {
+ appendAttribute(attribute);
+ } else {
+ error(errSyntaxWarning, -1, "Item in P is invalid");
+ delete attribute;
+ }
+ } else {
+ error(errSyntaxWarning, -1, "Item in P is wrong type ({0:s})", property.getTypeName());
+ }
+ property.free();
+ }
+ } else {
+ error(errSyntaxWarning, -1, "P is wrong type ({0:s})", userProperties.getTypeName());
+ }
+ userProperties.free();
+ } else if (owner.isName()) {
+ // In this case /P contains standard attributes.
+ // Check first if the owner is a valid standard one.
+ Attribute::Owner ownerValue = nameToOwner(owner.getName());
+ if (ownerValue != Attribute::UnknownOwner) {
+ // Iterate over the entries of the "attributes" dictionary.
+ // The /O entry (owner) is skipped.
+ for (int i = 0; i < attributes->getLength(); i++) {
+ const char* key = attributes->getKey(i);
+ if (strcmp(key, "O") != 0) {
+ Attribute::Type type = Attribute::typeForName(key, this);
+
+ // Check if the attribute is already defined.
+ if (keepExisting) {
+ GBool exists = gFalse;
+ for (unsigned j = 0; j < getNumAttributes(); j++) {
+ if (getAttribute(j)->getType() == type) {
+ exists = gTrue;
+ break;
+ }
+ }
+ if (exists)
+ continue;
+ }
+
+ if (type != Attribute::Unknown) {
+ Object value;
+ GBool typeCheckOk = gTrue;
+ Attribute* attribute = new Attribute(type, attributes->getVal(i, &value), gFalse);
+ if (attribute->isOk() && (typeCheckOk = attribute->typeCheck(this))) {
+ appendAttribute(attribute);
+ } else {
+ // It is not needed to free "value", the Attribute instance
+ // owns the contents, so deleting "attribute" is enough.
+ if (!typeCheckOk) {
+ error(errSyntaxWarning, -1, "Attribute {0:s} value is of wrong type ({1:s})",
+ attribute->getTypeName(), attribute->getValue()->getTypeName());
+ }
+ delete attribute;
+ }
+ } else {
+ error(errSyntaxWarning, -1, "Wrong Attribute '{0:s}' in element {1:s}", key, getTypeName());
+ }
+ }
+ }
+ } else {
+ error(errSyntaxWarning, -1, "O object is invalid value ({0:s})", owner.getName());
+ }
+ } else if (!owner.isNull()) {
+ error(errSyntaxWarning, -1, "O is wrong type ({0:s})", owner.getTypeName());
+ }
+ owner.free();
+}
diff --git a/poppler/StructElement.h b/poppler/StructElement.h
new file mode 100644
index 0000000..63fb051
--- /dev/null
+++ b/poppler/StructElement.h
@@ -0,0 +1,273 @@
+//========================================================================
+//
+// StructElement.h
+//
+// This file is licensed under the GPLv2 or later
+//
+// Copyright 2013 Igalia S.L.
+//
+//========================================================================
+
+#ifndef STRUCTELEMENT_H
+#define STRUCTELEMENT_H
+
+#ifdef USE_GCC_PRAGMAS
+#pragma interface
+#endif
+
+#include "goo/gtypes.h"
+#include "goo/GooString.h"
+#include "MCOutputDev.h"
+#include "Object.h"
+#include <vector>
+
+class GooString;
+class Dict;
+class StructElement;
+class StructTreeRoot;
+class TextWordList;
+
+
+class Attribute {
+public:
+ enum Type {
+ Unknown = 0, // Uninitialized, parsing error, etc.
+ UserProperty, // User defined attribute (i.e. non-standard)
+
+ // Common standard attributes
+ Placement, WritingMode, BackgroundColor, BorderColor, BorderStyle,
+ BorderThickness, Color, Padding,
+
+ // Block element standard attributes
+ SpaceBefore, SpaceAfter, StartIndent, EndIndent, TextIndent, TextAlign,
+ BBox, Width, Height, BlockAlign, InlineAlign, TBorderStyle, TPadding,
+
+ // Inline element standard attributes
+ BaselineShift, LineHeight, TextDecorationColor, TextDecorationThickness,
+ TextDecorationType, RubyAlign, RubyPosition, GlyphOrientationVertical,
+
+ // Column-only standard attributes
+ ColumnCount, ColumnGap, ColumnWidths,
+
+ // List-only standard attributes
+ ListNumbering,
+
+ // PrintField-only standard attributes
+ Role, checked, Desc,
+
+ // Table-only standard attributes
+ RowSpan, ColSpan, Headers, Scope, Summary,
+ };
+
+ enum Owner {
+ UnknownOwner = 0,
+ // User-defined attributes
+ UserProperties,
+ // Standard attributes
+ Layout, List, PrintField, Table,
+ // Translation to other formats
+ XML_1_00, HTML_3_20, HTML_4_01, OEB_1_00, RTF_1_05, CSS_1_00, CSS_2_00,
+ };
+
+ // Creates a standard attribute. The name is predefined, and the
+ // value is type-checked to conform to the PDF specification.
+ Attribute(Type type, Object* value, GBool copyValue = gTrue);
+
+ // Creates an UserProperty attribute, with an arbitrary name and value.
+ Attribute(const char* name, Object* value, GBool copyValue = gTrue);
+
+ GBool isOk() const { return type != Unknown; }
+
+ // Name, type and value can be set only on construction.
+ Type getType() const { return type; }
+ Owner getOwner() const { return owner; }
+ const char* getTypeName() const;
+ const char* getOwnerName() const;
+ Object* getValue() const { return &value; }
+ static Object* getDefaultValue(Type type);
+
+ const char* getName() const { return type == UserProperty ? name.getCString() : getTypeName(); }
+
+ // The revision is optional, and defaults to zero.
+ Guint getRevision() const { return revision; }
+ void setRevision(Guint revisionA) { revision = revisionA; }
+
+ // Hidden elements should not be displayed by the user agent
+ GBool isHidden() const { return hidden; }
+ void setHidden(GBool hiddenA) { hidden = hiddenA; }
+
+ // The formatted value may be in the PDF, or be left undefined (NULL).
+ // In the later case the user agent should provide a default representation.
+ const char* getFormattedValue() const { return formatted ? formatted->getCString() : NULL; }
+ void setFormattedValue(const char *formattedA);
+
+ ~Attribute();
+
+private:
+ Type type;
+ Owner owner;
+ Guint revision;
+ mutable GooString name;
+ mutable Object value;
+ GBool hidden;
+ GooString *formatted;
+
+ GBool typeCheck(StructElement* element = NULL);
+ static Type typeForName(const char* name, StructElement* element = NULL);
+ static Attribute* parseUserProperty(Dict* property);
+
+ friend class StructElement;
+};
+
+
+class StructElement {
+public:
+ enum Type {
+ Unknown = 0,
+ MCID, // MCID reference, used internally
+
+ Document, Part, Art, Sect, Div, // Structural elements
+
+ Span, Quote, Note, Reference, BibEntry, // Inline elements
+ Code, Link, Annot,
+ BlockQuote, Caption, NonStruct,
+ TOC, TOCI, Index, Private,
+
+ P, H, H1, H2, H3, H4, H5, H6, // Paragraph-like
+
+ L, LI, Lbl, // List elements
+
+ Table, TR, TH, TD, THead, TFoot, TBody, // Table elements
+
+ Ruby, RB, RT, RP, // Ruby text elements
+ Warichu, WT, WP,
+
+ Figure, Formula, Form, // Illustration-like elements
+ };
+
+ static const int InvalidMCID = -1;
+
+ const char* getTypeName() const;
+ Type getType() const { return type; }
+ GBool isOk() const { return type != Unknown; }
+ GBool isBlock() const;
+ GBool isInline() const;
+
+ // TODO Handle object references (OBJR)
+ inline GBool isContent() const { return (type == MCID) && (c->mcid != InvalidMCID); }
+
+ int getMCID() const { return isContent() ? c->mcid : InvalidMCID; }
+ Ref getParentRef() { return isContent() ? parent->getParentRef() : s->parentRef.getRef(); }
+ GBool hasPageRef() const;
+ Ref getPageRef() const;
+ StructTreeRoot* getStructTreeRoot() const { return treeRoot; }
+
+ // Optional element identifier.
+ const GooString* getID() const { return isContent() ? NULL : s->id; }
+
+ // Optional ISO language name, e.g. en_US
+ const char* getLang(GBool recursive = gTrue) const
+ { return isContent() ? parent->getLang(recursive)
+ : (s->language ? s->language : (recursive && parent ? parent->getLang() : NULL)); }
+
+ // Optional revision number, defaults to zero.
+ Guint getRevision() const { return isContent() ? 0 : s->revision; }
+ void setRevision(Guint revision) { if (isContent()) s->revision = revision; }
+
+ // Optional element title, in human-readable form.
+ const GooString* getTitle() const { return isContent() ? NULL : s->title; }
+
+ // Optional element expanded abbreviation text.
+ const GooString* getExpandedAbbr() const { return isContent() ? NULL : s->expandedAbbr; }
+
+ unsigned getNumElements() const { return isContent() ? 0 : s->elements.size(); }
+ const StructElement* getElement(int i) const { return isContent() ? NULL : s->elements.at(i); }
+ StructElement* getElement(int i) { return isContent() ? NULL : s->elements.at(i); }
+
+ void appendElement(StructElement* element)
+ { if (!isContent() && element && element->isOk()) s->elements.push_back(element); }
+
+ unsigned getNumAttributes() const { return isContent() ? 0 : s->attributes.size(); }
+ const Attribute* getAttribute(int i) const { return isContent() ? NULL : s->attributes.at(i); }
+ Attribute* getAttribute(int i) { return isContent() ? NULL : s->attributes.at(i); }
+
+ void appendAttribute(Attribute* attribute)
+ { if (!isContent() && attribute) s->attributes.push_back(attribute); }
+
+ const Attribute* findAttribute(Attribute::Type attributeType, GBool inherit = gFalse,
+ Attribute::Owner owner = Attribute::UnknownOwner) const;
+
+ GooString* getAltText() const { return isContent() ? NULL : s->altText; }
+ GooString* getActualText() const { return isContent() ? NULL : s->actualText; }
+
+ // Content text referenced by the element:
+ //
+ // - For MCID reference elements, this is just the text of the
+ // corresponding marked content object in the page stream, regardless
+ // of the setting of the "recursive" flag.
+ // - For other elements, if the "recursive" flag is set, the text
+ // enclosed by *all* the child MCID reference elements of the subtree
+ // is returned. The text is assembled by traversing the leaf MCID
+ // reference elements in logical order.
+ // - In any other case, the function returns NULL.
+ //
+ // The text will be appended to the passed GooString. If NULL is passed,
+ // a new string is returned, and the ownership passed to the caller.
+ //
+ GooString* getText(GooString *string = NULL, GBool recursive = gTrue) const;
+
+ const MCOpArray getMCOps() const;
+
+ ~StructElement();
+
+private:
+ typedef std::vector<Attribute*> AttrPtrArray;
+ typedef std::vector<StructElement*> ElemPtrArray;
+
+ struct StructData {
+ Object parentRef;
+ GooString *altText;
+ GooString *actualText;
+ GooString *id;
+ GooString *title;
+ GooString *expandedAbbr;
+ char *language;
+ Guint revision;
+ ElemPtrArray elements;
+ AttrPtrArray attributes;
+
+ StructData();
+ ~StructData();
+ };
+
+ // Data in content elements (MCID, MCR)
+ struct ContentData {
+ int mcid;
+
+ ContentData(int mcidA = InvalidMCID): mcid(mcidA) {}
+ };
+
+ // Common data
+ Type type;
+ StructTreeRoot* treeRoot;
+ StructElement* parent;
+ mutable Object pageRef;
+
+ union {
+ StructData *s;
+ ContentData *c;
+ };
+
+ StructElement(Dict* elementDict, StructTreeRoot* treeRootA, StructElement* parentA = 0);
+ StructElement(int mcid, StructTreeRoot* treeRootA, StructElement* parentA);
+
+ void parse(Dict* elementDict);
+ StructElement* parseChild(Object* childObj);
+ void parseChildren(Dict* element);
+ void parseAttributes(Dict* element, GBool keepExisting = gFalse);
+
+ friend class StructTreeRoot;
+};
+
+#endif
+
diff --git a/poppler/StructTreeRoot.cc b/poppler/StructTreeRoot.cc
new file mode 100644
index 0000000..727bfe9
--- /dev/null
+++ b/poppler/StructTreeRoot.cc
@@ -0,0 +1,120 @@
+//========================================================================
+//
+// StructTreeRoot.cc
+//
+// This file is licensed under the GPLv2 or later
+//
+// Copyright 2013 Igalia S.L.
+//
+//========================================================================
+
+#ifdef USE_GCC_PRAGMAS
+#pragma interface
+#endif
+
+#include "goo/GooString.h"
+#include "StructTreeRoot.h"
+#include "StructElement.h"
+#include "Object.h"
+#include "Dict.h"
+
+#include <assert.h>
+
+
+StructTreeRoot::StructTreeRoot(PDFDoc *docA, Dict* structTreeRootDict, GBool marked):
+ doc(docA),
+ roleMap(),
+ classMap(),
+ elements()
+{
+ assert(doc);
+ assert(structTreeRootDict);
+ parse(structTreeRootDict, marked);
+}
+
+StructTreeRoot::~StructTreeRoot()
+{
+ for (ElemPtrArray::iterator i = elements.begin(); i != elements.end(); ++i)
+ delete *i;
+ classMap.free();
+ roleMap.free();
+}
+
+void StructTreeRoot::parse(Dict* root, GBool marked)
+{
+ // The RoleMap/ClassMap dictionaries are needed by all the parsing
+ // functions, which will resolve the custom names to canonical
+ // standard names.
+ root->lookup("RoleMap", &roleMap);
+ root->lookup("ClassMap", &classMap);
+
+ Object kids;
+ if (root->lookup("K", &kids)->isArray()) {
+ if (marked && kids.arrayGetLength() > 1) {
+ error(errSyntaxWarning, -1, "K in StructTreeRoot has more than one children in a tagged PDF");
+ }
+ for (int i = 0; i < kids.arrayGetLength(); i++) {
+ Object obj;
+ kids.arrayGet(i, &obj);
+ if (obj.isDict()) {
+ StructElement* child = new StructElement(obj.getDict(), this);
+ if (child->isOk()) {
+ if (marked && !(child->getType() == StructElement::Document ||
+ child->getType() == StructElement::Part ||
+ child->getType() == StructElement::Art ||
+ child->getType() == StructElement::Div)) {
+ error(errSyntaxWarning, -1, "StructTreeRoot element of tagged PDF is wrong type ({0:s})", child->getTypeName());
+ }
+ appendElement(child);
+ } else {
+ error(errSyntaxWarning, -1, "StructTreeRoot element could not be parsed");
+ delete child;
+ }
+ } else {
+ error(errSyntaxWarning, -1, "K has a child of wrong type ({0:s})", obj.getTypeName());
+ }
+ obj.free();
+ }
+ } else if (kids.isDict()) {
+ if (marked) {
+ error(errSyntaxWarning, -1, "K has a child of wrong type for a tagged PDF ({0:s})", kids.getTypeName());
+ }
+ StructElement* child = new StructElement(kids.getDict(), this);
+ if (child->isOk()) {
+ appendElement(child);
+ } else {
+ error(errSyntaxWarning, -1, "StructTreeRoot element could not be parsed");
+ delete child;
+ }
+ } else if (!kids.isNull()) {
+ error(errSyntaxWarning, -1, "K in StructTreeRoot is wrong type ({0:s})", kids.getTypeName());
+ }
+
+ kids.free();
+}
+
+static const StructElement* findElementAux(GooString* elementId, const StructElement* element)
+{
+ if (element->getID()->cmp(elementId) == 0) {
+ return element;
+ }
+ for (unsigned i = 0; i < element->getNumElements(); i++) {
+ const StructElement* child = findElementAux(elementId, element->getElement(i));
+ if (child) {
+ return child;
+ }
+ }
+ return NULL;
+}
+
+const StructElement* StructTreeRoot::findElement(GooString* elementId) const
+{
+ assert(elementId);
+ for (unsigned i = 0; i < getNumElements(); i++) {
+ const StructElement* element = findElementAux(elementId, getElement(i));
+ if (element) {
+ return element;
+ }
+ }
+ return NULL;
+}
diff --git a/poppler/StructTreeRoot.h b/poppler/StructTreeRoot.h
new file mode 100644
index 0000000..2952d93
--- /dev/null
+++ b/poppler/StructTreeRoot.h
@@ -0,0 +1,56 @@
+//========================================================================
+//
+// StructTreeRoot.h
+//
+// This file is licensed under the GPLv2 or later
+//
+// Copyright 2013 Igalia S.L.
+//
+//========================================================================
+
+#ifndef STRUCTTREEROOT_H
+#define STRUCTTREEROOT_H
+
+#ifdef USE_GCC_PRAGMAS
+#pragma interface
+#endif
+
+#include "goo/gtypes.h"
+#include "Object.h"
+#include "StructElement.h"
+#include <vector>
+
+class Dict;
+class PDFDoc;
+
+
+class StructTreeRoot
+{
+public:
+ StructTreeRoot(PDFDoc *docA, Dict* rootDict, GBool marked);
+ ~StructTreeRoot();
+
+ PDFDoc* getDoc() { return doc; }
+ Dict* getRoleMap() { return roleMap.isDict() ? roleMap.getDict() : NULL; }
+ Dict* getClassMap() { return classMap.isDict() ? classMap.getDict() : NULL; }
+ unsigned getNumElements() const { return elements.size(); }
+ const StructElement* getElement(int i) const { return elements.at(i); }
+ StructElement* getElement(int i) { return elements.at(i); }
+ void appendElement(StructElement* element)
+ { if (element && element->isOk()) elements.push_back(element); }
+ const StructElement* findElement(GooString* elementId) const;
+
+private:
+ PDFDoc *doc;
+
+ Object roleMap;
+ Object classMap;
+
+ typedef std::vector<StructElement*> ElemPtrArray;
+ ElemPtrArray elements;
+
+ void parse(Dict* rootDict, GBool marked);
+};
+
+#endif
+
--
1.8.3
More information about the poppler
mailing list