[poppler] Branch 'tagged-pdf' - 3 commits - glib/demo glib/Makefile.am glib/poppler-document.cc glib/poppler-document.h glib/poppler.h glib/poppler-private.h glib/poppler-structure.cc glib/poppler-structure-element.cc glib/poppler-structure-element.h glib/poppler-structure.h glib/reference
Carlos Garcia Campos
carlosgc at kemper.freedesktop.org
Thu Aug 1 06:53:22 PDT 2013
glib/Makefile.am | 4
glib/demo/Makefile.am | 2
glib/demo/main.c | 2
glib/demo/taggedstruct.c | 264 +++++
glib/demo/taggedstruct.h | 31
glib/poppler-document.cc | 22
glib/poppler-document.h | 1
glib/poppler-private.h | 32
glib/poppler-structure-element.cc | 1787 ++++++++++++++++++++++++++++++++++++
glib/poppler-structure-element.h | 368 +++++++
glib/poppler-structure.cc | 362 +++++++
glib/poppler-structure.h | 43
glib/poppler.h | 3
glib/reference/poppler-docs.sgml | 2
glib/reference/poppler-sections.txt | 86 +
glib/reference/poppler.types | 2
16 files changed, 3011 insertions(+)
New commits:
commit 5e41d451f45615ede34577cd4e3da33a8404c895
Author: Adrian Perez de Castro <aperez at igalia.com>
Date: Thu Jun 13 21:50:53 2013 +0300
Tagged-PDF: Heuristics in poppler-glib for data/layout table identification
Add functions in poppler-glib which, used on a PopplerStructureElement of
type POPPLER_STRUCTURE_TABLE, determine whether a table is used for layout
or contains actual data. The heuristic is quite simple so far:
- poppler_structure_element_is_data_table(): checks for table headings
and that THead/TBody elements are present.
- poppler_structure_element_is_layout_table(): negates the result of
the previous function.
The idea is that in the later case a different heuristic could be
implemented later on, and at some point tables could be "data tables",
"layout tables", or the heuristics can not tell for sure.
diff --git a/glib/poppler-structure-element.cc b/glib/poppler-structure-element.cc
index 5998c7c..12a3946 100644
--- a/glib/poppler-structure-element.cc
+++ b/glib/poppler-structure-element.cc
@@ -769,6 +769,77 @@ poppler_structure_element_is_block (PopplerStructureElement *poppler_structure_e
return poppler_structure_element->elem->isBlock ();
}
+
+static guint
+data_table_score (const StructElement *elem, gboolean *has_th)
+{
+ g_assert (elem);
+ g_assert (has_th);
+
+ guint score = 0;
+ for (unsigned i = 0; i < elem->getNumElements (); i++)
+ score += data_table_score (elem->getElement (i), has_th);
+
+ switch (elem->getType ())
+ {
+ case StructElement::THead: score++; break;
+ case StructElement::TBody: score++; break;
+ case StructElement::TH: *has_th = TRUE; break;
+ default: break;
+ }
+
+ return score;
+}
+
+/**
+ * poppler_structure_element_is_data_table:
+ * @poppler_structure_element: A #PopplerStructureElement
+ *
+ * Note that there is no proper metadata in PDF documents which identify
+ * data tables, so heuristics are used to determine whether a table is
+ * <em>likely</em> to contain data.
+ *
+ * Return value: Whether an element is a %POPPLER_STRUCTURE_ELEMENT_TABLE
+ * and the table contains series of data.
+ */
+gboolean
+poppler_structure_element_is_data_table (PopplerStructureElement *poppler_structure_element)
+{
+ g_return_val_if_fail (POPPLER_IS_STRUCTURE_ELEMENT (poppler_structure_element), FALSE);
+ g_assert (poppler_structure_element->elem);
+
+ if (poppler_structure_element->elem->getType () != StructElement::Table)
+ return FALSE;
+
+ /*
+ * Data tables are likely to have table-header cells, and at least have
+ * the contents divided in THead and/or TBody elements. The scoring
+ * function counts the later and also sets has_th to TRUE. If the score
+ * is more than zero and there is header cells, assume there is a data
+ * table.
+ */
+ gboolean has_th = FALSE;
+ return data_table_score (poppler_structure_element->elem, &has_th) && has_th;
+}
+
+/**
+ * poppler_structure_element_is_layout_table:
+ * @poppler_structure_element: A #PopplerStructureElement
+ *
+ * Note that there is no proper metadata in PDF documents which identify
+ * layout tables, so heuristics are used to determine whether a table is
+ * <em>likely</em> to be used for layout purposes.
+ *
+ * Return value: Whether an element is a %POPPLER_STRUCTURE_ELEMENT_TABLE
+ * and the table is used as aid for layout of page elements.
+ */
+gboolean
+poppler_structure_element_is_layout_table (PopplerStructureElement *poppler_structure_element)
+{
+ g_return_val_if_fail (POPPLER_IS_STRUCTURE_ELEMENT (poppler_structure_element), FALSE);
+ return !poppler_structure_element_is_data_table (poppler_structure_element);
+}
+
/**
* poppler_structure_element_get_n_children:
* @poppler_structure_element: A #PopplerStructureElement
diff --git a/glib/poppler-structure-element.h b/glib/poppler-structure-element.h
index b3076d2..474bcba 100644
--- a/glib/poppler-structure-element.h
+++ b/glib/poppler-structure-element.h
@@ -337,6 +337,8 @@ gint poppler_structure_element_get_page (Poppl
gboolean poppler_structure_element_is_content (PopplerStructureElement *poppler_structure_element);
gboolean poppler_structure_element_is_inline (PopplerStructureElement *poppler_structure_element);
gboolean poppler_structure_element_is_block (PopplerStructureElement *poppler_structure_element);
+gboolean poppler_structure_element_is_data_table (PopplerStructureElement *poppler_structure_element);
+gboolean poppler_structure_element_is_layout_table (PopplerStructureElement *poppler_structure_element);
guint poppler_structure_element_get_n_children (PopplerStructureElement *poppler_structure_element);
PopplerStructureElement *poppler_structure_element_get_child (PopplerStructureElement *poppler_structure_element,
guint index);
commit bc44e292ae5a4f67d60ad328a29af2f9956cad60
Author: Adrian Perez de Castro <aperez at igalia.com>
Date: Wed May 29 23:44:03 2013 +0300
Tagged-PDF: Pane in poppler-glib demo showing the structure
Adds a new pane in poppler-glib-demo showing the structure for Tagged-PDF
documents. It also serves as an example on how to to use the API for
PopplerStructure and PopplerStructureElement.
diff --git a/glib/demo/Makefile.am b/glib/demo/Makefile.am
index 8120bc9..e5df9d0 100644
--- a/glib/demo/Makefile.am
+++ b/glib/demo/Makefile.am
@@ -42,6 +42,8 @@ poppler_glib_demo_SOURCES = \
selections.h \
selections.c \
text.h \
+ taggedstruct.h \
+ taggedstruct.c \
text.c \
transitions.h \
transitions.c \
diff --git a/glib/demo/main.c b/glib/demo/main.c
index 2523800..f07700e 100644
--- a/glib/demo/main.c
+++ b/glib/demo/main.c
@@ -34,6 +34,7 @@
#include "attachments.h"
#include "layers.h"
#include "text.h"
+#include "taggedstruct.h"
#include "find.h"
#include "print.h"
#include "selections.h"
@@ -65,6 +66,7 @@ static const PopplerGlibDemo demo_list[] = {
{ "Attachments", pgd_attachments_create_widget },
{ "Layers", pgd_layers_create_widget },
{ "Text", pgd_text_create_widget },
+ { "Tagged Structure", pgd_taggedstruct_create_widget },
{ "Find", pgd_find_create_widget },
{ "Print", pgd_print_create_widget }
};
diff --git a/glib/demo/taggedstruct.c b/glib/demo/taggedstruct.c
new file mode 100644
index 0000000..c64419b
--- /dev/null
+++ b/glib/demo/taggedstruct.c
@@ -0,0 +1,264 @@
+/*
+ * Copyright (C) 2013 Igalia S.L.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+
+#include <string.h>
+
+#include "text.h"
+#include "utils.h"
+
+typedef struct {
+ PopplerStructure *structure;
+ GtkWidget *view;
+ GtkTreeStore *store;
+
+ GtkWidget *type_value;
+ GtkWidget *lang_value;
+ GtkWidget *abbr_value;
+ GtkWidget *id_value;
+ GtkWidget *title_value;
+ GtkWidget *link_target;
+ GtkTextBuffer *text_buffer;
+} PgdTaggedStructDemo;
+
+
+static void
+pgd_taggedstruct_free (PgdTaggedStructDemo *demo)
+{
+ if (!demo)
+ return;
+
+ if (demo->store) {
+ g_object_unref (demo->store);
+ demo->store = NULL;
+ }
+
+ if (demo->structure)
+ g_object_unref (demo->structure);
+
+ g_free (demo);
+}
+
+
+static void
+populate_store_aux (GtkTreeStore *store, GtkTreeIter *parent, PopplerStructureElement *element)
+{
+ GEnumClass *enum_class = G_ENUM_CLASS (g_type_class_ref (POPPLER_TYPE_STRUCTURE_ELEMENT_KIND));
+ GEnumValue *enum_value = g_enum_get_value (enum_class, poppler_structure_element_get_kind (element));
+ GtkTreeIter pos;
+ guint i;
+
+ gtk_tree_store_append (store, &pos, parent);
+ gtk_tree_store_set (store, &pos, 0, enum_value->value_nick, 1, element, -1);
+
+ for (i = 0; i < poppler_structure_element_get_n_children (element); i++)
+ populate_store_aux (store, &pos, poppler_structure_element_get_child (element, i));
+}
+
+
+static GtkTreeStore *
+populate_store (PopplerStructure *structure)
+{
+ guint i;
+ GtkTreeStore *store = gtk_tree_store_new (2, G_TYPE_STRING, G_TYPE_POINTER);
+
+ if (structure)
+ {
+ for (i = 0; i < poppler_structure_get_n_children (structure); i++)
+ {
+ PopplerStructureElement *element = poppler_structure_get_child (structure, i);
+ populate_store_aux (store, NULL, element);
+ }
+ }
+ else
+ {
+ GtkTreeIter pos;
+ gtk_tree_store_append (store, &pos, NULL);
+ gtk_tree_store_set (store, &pos, 0, "<b>Not a Tagged-PDF</b>", 1, NULL, -1);
+ }
+
+ return store;
+}
+
+
+static void
+pgd_row_activated (GtkTreeView *tree_view, GtkTreePath *path, GtkTreeViewColumn *column, gpointer user_data)
+{
+ PgdTaggedStructDemo *demo = (PgdTaggedStructDemo*) user_data;
+ GtkTreeModel *model = gtk_tree_view_get_model (tree_view);
+ PopplerStructureElement *element;
+ GtkTreeIter iter;
+ gpointer elementptr;
+
+ if (!gtk_tree_model_get_iter (model, &iter, path))
+ return;
+
+ gtk_tree_model_get (model, &iter, 1, &elementptr, -1);
+ element = POPPLER_STRUCTURE_ELEMENT (elementptr);
+
+ gtk_label_set_text (GTK_LABEL (demo->id_value),
+ poppler_structure_element_get_id (element));
+ gtk_label_set_text (GTK_LABEL (demo->title_value),
+ poppler_structure_element_get_title (element));
+ gtk_label_set_text (GTK_LABEL (demo->lang_value),
+ poppler_structure_element_get_language (element));
+ gtk_label_set_text (GTK_LABEL (demo->abbr_value),
+ poppler_structure_element_get_abbreviation (element));
+ gtk_text_buffer_set_text (demo->text_buffer, "", -1);
+
+ if (poppler_structure_element_is_content (element))
+ {
+ const gchar *text = poppler_structure_element_get_text (element, FALSE);
+ if (text)
+ gtk_text_buffer_set_text (demo->text_buffer, text, -1);
+ gtk_label_set_text (GTK_LABEL (demo->type_value), "Content");
+ }
+ else
+ {
+ if (poppler_structure_element_is_inline (element))
+ gtk_label_set_text (GTK_LABEL (demo->type_value), "Inline");
+ else if (poppler_structure_element_is_block (element))
+ gtk_label_set_text (GTK_LABEL (demo->type_value), "Block");
+ else
+ gtk_label_set_text (GTK_LABEL (demo->type_value), "Structure");
+ }
+
+ gtk_label_set_text (GTK_LABEL (demo->link_target), "");
+ if (poppler_structure_element_get_reference_type (element) ==
+ POPPLER_STRUCTURE_REFERENCE_LINK)
+ {
+ PopplerLinkMapping *mapping = poppler_structure_element_get_reference_link (element);
+ if (mapping)
+ {
+ const char *text;
+ switch (mapping->action->type)
+ {
+ case POPPLER_ACTION_URI:
+ text = mapping->action->uri.uri;
+ break;
+ }
+ if (text)
+ gtk_label_set_text (GTK_LABEL (demo->link_target), text);
+ poppler_link_mapping_free (mapping);
+ }
+ }
+}
+
+
+static void
+pgd_cursor_changed (GtkTreeView *tree_view, gpointer user_data)
+{
+ GtkTreePath *path;
+ gtk_tree_view_get_cursor (tree_view, &path, NULL);
+ if (path)
+ {
+ pgd_row_activated (tree_view, path, NULL, user_data);
+ gtk_tree_path_free (path);
+ }
+}
+
+
+GtkWidget *
+pgd_taggedstruct_create_widget (PopplerDocument *document)
+{
+ PgdTaggedStructDemo *demo;
+ GtkCellRenderer *renderer;
+ GtkWidget *hbox;
+ GtkWidget *vbox;
+ GtkWidget *grid;
+ GtkWidget *scroll;
+ GtkWidget *w;
+ gint row;
+
+ demo = g_new0 (PgdTaggedStructDemo, 1);
+ demo->structure = poppler_document_get_structure (document);
+ if (demo->structure)
+ g_object_ref (demo->structure);
+
+ demo->store = populate_store (demo->structure);
+ demo->view = gtk_tree_view_new_with_model (GTK_TREE_MODEL (demo->store));
+
+ renderer = gtk_cell_renderer_text_new ();
+ gtk_tree_view_insert_column_with_attributes (GTK_TREE_VIEW (demo->view),
+ 0, "Type",
+ renderer,
+ "markup", 0,
+ NULL);
+ g_object_set (G_OBJECT (gtk_tree_view_get_column (GTK_TREE_VIEW (demo->view), 0)),
+ "expand", TRUE, NULL);
+
+ gtk_tree_view_expand_all (GTK_TREE_VIEW (demo->view));
+ gtk_tree_view_set_show_expanders (GTK_TREE_VIEW (demo->view), TRUE);
+ gtk_tree_view_set_headers_visible (GTK_TREE_VIEW (demo->view), TRUE);
+ gtk_tree_view_set_headers_clickable (GTK_TREE_VIEW (demo->view), FALSE);
+ gtk_tree_view_set_activate_on_single_click (GTK_TREE_VIEW (demo->view), TRUE);
+
+ hbox = gtk_box_new (GTK_ORIENTATION_HORIZONTAL, 6);
+ scroll = gtk_scrolled_window_new (NULL, NULL);
+ gtk_container_add (GTK_CONTAINER (scroll), demo->view);
+ gtk_widget_show (demo->view);
+ gtk_box_pack_start (GTK_BOX (hbox), scroll, TRUE, TRUE, 0);
+ gtk_widget_show (scroll);
+
+ row = 0;
+ grid = gtk_grid_new ();
+ gtk_container_set_border_width (GTK_CONTAINER (grid), 12);
+ gtk_grid_set_row_homogeneous (GTK_GRID (grid), FALSE);
+ gtk_grid_set_column_spacing (GTK_GRID (grid), 6);
+ gtk_grid_set_row_spacing (GTK_GRID (grid), 6);
+ pgd_table_add_property_with_value_widget (GTK_GRID (grid), "<b>Type:</b>", &demo->type_value, NULL, &row);
+ pgd_table_add_property_with_value_widget (GTK_GRID (grid), "<b>ID:</b>", &demo->id_value, NULL, &row);
+ pgd_table_add_property_with_value_widget (GTK_GRID (grid), "<b>Title:</b>", &demo->title_value, NULL, &row);
+ pgd_table_add_property_with_value_widget (GTK_GRID (grid), "<b>Language:</b>", &demo->lang_value, NULL, &row);
+ pgd_table_add_property_with_value_widget (GTK_GRID (grid), "<b>Abbreviation:</b>", &demo->abbr_value, NULL, &row);
+ pgd_table_add_property_with_value_widget (GTK_GRID (grid), "<b>Link Target:</b>", &demo->link_target, NULL, &row);
+
+ vbox = gtk_box_new (GTK_ORIENTATION_VERTICAL, 6);
+ gtk_box_pack_start (GTK_BOX (vbox), grid, FALSE, FALSE, 0);
+ gtk_widget_show (grid);
+
+ scroll = gtk_scrolled_window_new (NULL, NULL);
+ gtk_container_set_border_width (GTK_CONTAINER (scroll), 12);
+ gtk_box_pack_end (GTK_BOX (vbox), scroll, TRUE, TRUE, 0);
+ gtk_widget_show (scroll);
+
+ gtk_container_add (GTK_CONTAINER (scroll), (w = gtk_text_view_new ()));
+ gtk_widget_show (w);
+
+ demo->text_buffer = gtk_text_view_get_buffer (GTK_TEXT_VIEW (w));
+ gtk_text_view_set_wrap_mode (GTK_TEXT_VIEW (w), GTK_WRAP_WORD_CHAR);
+ gtk_text_view_set_editable (GTK_TEXT_VIEW (w), FALSE);
+ gtk_text_buffer_set_text (demo->text_buffer, "", -1);
+ gtk_widget_show (w);
+
+ g_signal_connect (demo->view, "row-activated",
+ G_CALLBACK (pgd_row_activated),
+ demo);
+ g_signal_connect (demo->view, "cursor-changed",
+ G_CALLBACK (pgd_cursor_changed),
+ demo);
+
+ gtk_box_pack_end (GTK_BOX (hbox), vbox, TRUE, TRUE, 0);
+ gtk_widget_show (vbox);
+
+ g_object_weak_ref (G_OBJECT (hbox),
+ (GWeakNotify) pgd_taggedstruct_free,
+ demo);
+
+ gtk_widget_show (hbox);
+ return hbox;
+}
diff --git a/glib/demo/taggedstruct.h b/glib/demo/taggedstruct.h
new file mode 100644
index 0000000..3a38727
--- /dev/null
+++ b/glib/demo/taggedstruct.h
@@ -0,0 +1,31 @@
+/*
+ * Copyright (C) 2013 Igalia S.L.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+
+#include <gtk/gtk.h>
+#include <poppler.h>
+
+#ifndef _TAGGEDSTRUCT_H_
+#define _TAGGEDSTRUCT_H_
+
+G_BEGIN_DECLS
+
+GtkWidget *pgd_taggedstruct_create_widget (PopplerDocument *document);
+
+G_END_DECLS
+
+#endif /* _TAGGEDSTRUCT_H_ */
commit e6074f17b46e692281aa657320be67d97aed91e8
Author: Adrian Perez de Castro <aperez at igalia.com>
Date: Thu May 9 12:01:59 2013 +0300
Tagged-PDF: Expose the structure tree in poppler-glib
Implements two new GObject classes, which build upon StructTreeRoot
and StructElement to expose the document structure of tagged PDFs
in the GLib binding:
- PopplerStructure wraps StrucTreeRoot, and contains additional utility
methods to search for elements in the structure tree.
- PopplerStructureElement wraps StructElement, and does the heavy lifting
of exposing data in GLib-friendly data types.
For standard attributes, to avoid cluttering the class with fmethods, a
single poppler_structure_element_get_attribute() method is implemented,
which returns either NULL (for undefined attributes) or a GVariant
containing a sensible representation of the value.
diff --git a/glib/Makefile.am b/glib/Makefile.am
index a38e052..645cfd3 100644
--- a/glib/Makefile.am
+++ b/glib/Makefile.am
@@ -41,6 +41,8 @@ poppler_glib_public_headers = \
poppler-layer.h \
poppler-media.h \
poppler-movie.h \
+ poppler-structure.h \
+ poppler-structure-element.h \
poppler.h
poppler_glib_includedir = $(includedir)/poppler/glib
@@ -67,6 +69,8 @@ libpoppler_glib_la_SOURCES = \
poppler-cached-file-loader.h \
poppler-input-stream.cc \
poppler-input-stream.h \
+ poppler-structure.cc \
+ poppler-structure-element.cc \
poppler.cc \
poppler-private.h
diff --git a/glib/poppler-document.cc b/glib/poppler-document.cc
index 61d92e8..bc39314 100644
--- a/glib/poppler-document.cc
+++ b/glib/poppler-document.cc
@@ -1220,6 +1220,28 @@ poppler_document_get_metadata (PopplerDocument *document)
return retval;
}
+/**
+ * poppler_document_get_structure:
+ * @document: A #PopplerDocument
+ *
+ * Returns the #PopplerStructure of the document. This object is owned by
+ * the called.
+ *
+ * Return value: (transfer full): The #PopplerStructure of the document.
+ */
+PopplerStructure *
+poppler_document_get_structure (PopplerDocument *document)
+{
+ StructTreeRoot *tree_root;
+
+ g_return_val_if_fail (POPPLER_IS_DOCUMENT (document), NULL);
+
+ tree_root = document->doc->getStructTreeRoot ();
+ if (!tree_root) return NULL;
+
+ return _poppler_structure_new (document, tree_root);
+}
+
static void
poppler_document_get_property (GObject *object,
guint prop_id,
diff --git a/glib/poppler-document.h b/glib/poppler-document.h
index a34e88c..97cea4a 100644
--- a/glib/poppler-document.h
+++ b/glib/poppler-document.h
@@ -213,6 +213,7 @@ PopplerPageLayout poppler_document_get_page_layout (PopplerDocument *doc
PopplerPageMode poppler_document_get_page_mode (PopplerDocument *document);
PopplerPermissions poppler_document_get_permissions (PopplerDocument *document);
gchar *poppler_document_get_metadata (PopplerDocument *document);
+PopplerStructure *poppler_document_get_structure (PopplerDocument *document);
/* Attachments */
guint poppler_document_get_n_attachments (PopplerDocument *document);
diff --git a/glib/poppler-private.h b/glib/poppler-private.h
index ab39b49..4719a08 100644
--- a/glib/poppler-private.h
+++ b/glib/poppler-private.h
@@ -17,6 +17,7 @@
#include <OptionalContent.h>
#include <CairoOutputDev.h>
#include <FileSpec.h>
+#include <StructElement.h>
#endif
struct _PopplerDocument
@@ -95,6 +96,33 @@ struct _PopplerLayer
gchar *title;
};
+struct _PopplerStructure
+{
+ /*< private >*/
+ GObject parent_instance;
+ PopplerDocument *document;
+ StructTreeRoot *root;
+ PopplerStructureElement **children;
+};
+
+struct _PopplerStructureElement
+{
+ /*< private >*/
+ GObject parent_instance;
+ StructElement *elem;
+ gchar *id;
+ gchar *title;
+ gchar *text;
+ gchar *text_r;
+ gchar *text_abbrev;
+ gchar *alt_text;
+ gchar *actual_text;
+ gchar *language;
+ GList *text_spans;
+ PopplerStructure *structure;
+ PopplerStructureElement **children;
+};
+
GList *_poppler_document_get_layers (PopplerDocument *document);
GList *_poppler_document_get_layer_rbgroup (PopplerDocument *document,
Layer *layer);
@@ -120,6 +148,10 @@ PopplerAnnot *_poppler_annot_free_text_new (Annot *annot);
PopplerAnnot *_poppler_annot_file_attachment_new (Annot *annot);
PopplerAnnot *_poppler_annot_movie_new (Annot *annot);
PopplerAnnot *_poppler_annot_screen_new (Annot *annot);
+PopplerStructure *_poppler_structure_new (PopplerDocument *poppler_document,
+ StructTreeRoot *struct_tree_root);
+PopplerStructureElement *_poppler_structure_element_new (PopplerStructure *structure,
+ StructElement *struct_element);
char *_poppler_goo_string_to_utf8(GooString *s);
gboolean _poppler_convert_pdf_date_to_gtime (GooString *date,
diff --git a/glib/poppler-structure-element.cc b/glib/poppler-structure-element.cc
new file mode 100644
index 0000000..5998c7c
--- /dev/null
+++ b/glib/poppler-structure-element.cc
@@ -0,0 +1,1716 @@
+/* poppler-structure.cc: glib interface to poppler
+ *
+ * Copyright (C) 2013 Igalia S.L.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+
+#include "config.h"
+
+#ifndef __GI_SCANNER__
+#include <StructTreeRoot.h>
+#include <StructElement.h>
+#include <GlobalParams.h>
+#include <UnicodeMap.h>
+#endif /* !__GI_SCANNER__ */
+
+#include "poppler.h"
+#include "poppler-private.h"
+#include "poppler-structure-element.h"
+
+
+static inline PopplerStructureElementKind
+_poppler_structelement_type_to_poppler_structure_element_kind (StructElement::Type type)
+{
+ switch (type)
+ {
+ case StructElement::Unknown:
+ return POPPLER_STRUCTURE_ELEMENT_UNKNOWN;
+ case StructElement::MCID:
+ return POPPLER_STRUCTURE_ELEMENT_CONTENT;
+ case StructElement::OBJR:
+ return POPPLER_STRUCTURE_ELEMENT_OBJECT_REFERENCE;
+ case StructElement::Document:
+ return POPPLER_STRUCTURE_ELEMENT_DOCUMENT;
+ case StructElement::Part:
+ return POPPLER_STRUCTURE_ELEMENT_PART;
+ case StructElement::Sect:
+ return POPPLER_STRUCTURE_ELEMENT_SECTION;
+ case StructElement::Div:
+ return POPPLER_STRUCTURE_ELEMENT_DIV;
+ case StructElement::Span:
+ return POPPLER_STRUCTURE_ELEMENT_SPAN;
+ case StructElement::Quote:
+ return POPPLER_STRUCTURE_ELEMENT_QUOTE;
+ case StructElement::Note:
+ return POPPLER_STRUCTURE_ELEMENT_NOTE;
+ case StructElement::Reference:
+ return POPPLER_STRUCTURE_ELEMENT_REFERENCE;
+ case StructElement::BibEntry:
+ return POPPLER_STRUCTURE_ELEMENT_BIBENTRY;
+ case StructElement::Code:
+ return POPPLER_STRUCTURE_ELEMENT_CODE;
+ case StructElement::Link:
+ return POPPLER_STRUCTURE_ELEMENT_LINK;
+ case StructElement::Annot:
+ return POPPLER_STRUCTURE_ELEMENT_ANNOT;
+ case StructElement::Ruby:
+ return POPPLER_STRUCTURE_ELEMENT_RUBY;
+ case StructElement::Warichu:
+ return POPPLER_STRUCTURE_ELEMENT_WARICHU;
+ case StructElement::BlockQuote:
+ return POPPLER_STRUCTURE_ELEMENT_BLOCKQUOTE;
+ case StructElement::Caption:
+ return POPPLER_STRUCTURE_ELEMENT_CAPTION;
+ case StructElement::NonStruct:
+ return POPPLER_STRUCTURE_ELEMENT_NONSTRUCT;
+ case StructElement::TOC:
+ return POPPLER_STRUCTURE_ELEMENT_TOC;
+ case StructElement::TOCI:
+ return POPPLER_STRUCTURE_ELEMENT_TOC_ITEM;
+ case StructElement::Index:
+ return POPPLER_STRUCTURE_ELEMENT_INDEX;
+ case StructElement::Private:
+ return POPPLER_STRUCTURE_ELEMENT_PRIVATE;
+ case StructElement::P:
+ return POPPLER_STRUCTURE_ELEMENT_PARAGRAPH;
+ case StructElement::H:
+ return POPPLER_STRUCTURE_ELEMENT_HEADING;
+ case StructElement::H1:
+ return POPPLER_STRUCTURE_ELEMENT_HEADING_1;
+ case StructElement::H2:
+ return POPPLER_STRUCTURE_ELEMENT_HEADING_2;
+ case StructElement::H3:
+ return POPPLER_STRUCTURE_ELEMENT_HEADING_3;
+ case StructElement::H4:
+ return POPPLER_STRUCTURE_ELEMENT_HEADING_4;
+ case StructElement::H5:
+ return POPPLER_STRUCTURE_ELEMENT_HEADING_5;
+ case StructElement::H6:
+ return POPPLER_STRUCTURE_ELEMENT_HEADING_6;
+ case StructElement::L:
+ return POPPLER_STRUCTURE_ELEMENT_LIST;
+ case StructElement::LI:
+ return POPPLER_STRUCTURE_ELEMENT_LIST_ITEM;
+ case StructElement::Lbl:
+ return POPPLER_STRUCTURE_ELEMENT_LIST_LABEL;
+ case StructElement::Table:
+ return POPPLER_STRUCTURE_ELEMENT_TABLE;
+ case StructElement::TR:
+ return POPPLER_STRUCTURE_ELEMENT_TABLE_ROW;
+ case StructElement::TH:
+ return POPPLER_STRUCTURE_ELEMENT_TABLE_HEADING;
+ case StructElement::TD:
+ return POPPLER_STRUCTURE_ELEMENT_TABLE_DATA;
+ case StructElement::THead:
+ return POPPLER_STRUCTURE_ELEMENT_TABLE_HEADER;
+ case StructElement::TFoot:
+ return POPPLER_STRUCTURE_ELEMENT_TABLE_FOOTER;
+ case StructElement::TBody:
+ return POPPLER_STRUCTURE_ELEMENT_TABLE_BODY;
+ case StructElement::Figure:
+ return POPPLER_STRUCTURE_ELEMENT_FIGURE;
+ case StructElement::Formula:
+ return POPPLER_STRUCTURE_ELEMENT_FORMULA;
+ case StructElement::Form:
+ return POPPLER_STRUCTURE_ELEMENT_FORM;
+ default:
+ g_assert_not_reached ();
+ }
+}
+
+static GBool
+_rgb_array_to_doubles(Array *array, double rgb[3])
+{
+ double r, g, b;
+ Object obj;
+
+ if (array->getLength() != 3)
+ return FALSE;
+
+ if (!array->getNF(0, &obj)->isReal()) goto not_a_real;
+ r = obj.getReal();
+ obj.free();
+
+ if (!array->getNF(1, &obj)->isReal()) goto not_a_real;
+ g = obj.getReal();
+ obj.free();
+
+ if (!array->getNF(2, &obj)->isReal()) goto not_a_real;
+ b = obj.getReal();
+ obj.free();
+
+ rgb[0] = r;
+ rgb[1] = g;
+ rgb[2] = b;
+
+ return TRUE;
+
+not_a_real:
+ obj.free();
+ return FALSE;
+}
+
+
+static GVariant*
+_g_variant_new_from_rgb_array (Array *array)
+{
+ double v[3];
+
+ g_return_val_if_fail (array->getLength () == 3, NULL);
+
+ if (!_rgb_array_to_doubles (array, v))
+ return NULL;
+
+ return g_variant_new ("(ddd)", v[0], v[1], v[2]);
+}
+
+
+static GVariant*
+_g_variant_new_from_rgb_array_or_x4 (Array *array)
+{
+ double v[12];
+
+ if (array->getLength() == 3)
+ {
+ if (!_rgb_array_to_doubles (array, v))
+ return NULL;
+
+ v[ 9] = v[6] = v[3] = v[0];
+ v[10] = v[7] = v[4] = v[1];
+ v[11] = v[8] = v[5] = v[2];
+ }
+ else if (array->getLength () == 4)
+ {
+ for (int i = 0; i < 4; i++)
+ {
+ Object item;
+ if (!array->get(i, &item)->isArray())
+ return NULL;
+ if (!_rgb_array_to_doubles (item.getArray(), &v[i * 3]))
+ return NULL;
+ }
+ }
+ else
+ return NULL;
+
+ return g_variant_new ("((ddd)(ddd)(ddd)(ddd))",
+ v[ 0], v[ 1], v[ 2],
+ v[ 3], v[ 4], v[ 5],
+ v[ 6], v[ 7], v[ 8],
+ v[ 9], v[10], v[11]);
+}
+
+
+template <typename EnumType>
+struct EnumNameValue {
+ const gchar *name;
+ EnumType value;
+
+ static const EnumNameValue<EnumType> values[];
+ static const EnumType null = static_cast<EnumType> (-1);
+};
+
+template<>
+const EnumNameValue<PopplerStructurePlacement> EnumNameValue<PopplerStructurePlacement>::values[] =
+{
+ { "Block", POPPLER_STRUCTURE_PLACEMENT_BLOCK },
+ { "Inline", POPPLER_STRUCTURE_PLACEMENT_INLINE },
+ { "Before", POPPLER_STRUCTURE_PLACEMENT_BEFORE },
+ { "Start", POPPLER_STRUCTURE_PLACEMENT_START },
+ { "End", POPPLER_STRUCTURE_PLACEMENT_END },
+ { NULL }
+};
+
+template<>
+const EnumNameValue<PopplerStructureWritingMode> EnumNameValue<PopplerStructureWritingMode>::values[] =
+{
+ { "LrTb", POPPLER_STRUCTURE_WRITING_MODE_LR_TB },
+ { "RlTb", POPPLER_STRUCTURE_WRITING_MODE_RL_TB },
+ { "TbRl", POPPLER_STRUCTURE_WRITING_MODE_TB_RL },
+ { NULL }
+};
+
+template<>
+const EnumNameValue<PopplerStructureBorderStyle> EnumNameValue<PopplerStructureBorderStyle>::values[] =
+{
+ { "None", POPPLER_STRUCTURE_BORDER_STYLE_NONE },
+ { "Hidden", POPPLER_STRUCTURE_BORDER_STYLE_HIDDEN },
+ { "Dotted", POPPLER_STRUCTURE_BORDER_STYLE_DOTTED },
+ { "Dashed", POPPLER_STRUCTURE_BORDER_STYLE_DASHED },
+ { "Solid", POPPLER_STRUCTURE_BORDER_STYLE_SOLID },
+ { "Double", POPPLER_STRUCTURE_BORDER_STYLE_DOUBLE },
+ { "Groove", POPPLER_STRUCTURE_BORDER_STYLE_GROOVE },
+ { "Inset", POPPLER_STRUCTURE_BORDER_STYLE_INSET },
+ { "Outset", POPPLER_STRUCTURE_BORDER_STYLE_OUTSET },
+ { NULL }
+};
+
+template<>
+const EnumNameValue<PopplerStructureTextAlign> EnumNameValue<PopplerStructureTextAlign>::values[] =
+{
+ { "Start", POPPLER_STRUCTURE_TEXT_ALIGN_START },
+ { "Center", POPPLER_STRUCTURE_TEXT_ALIGN_CENTER },
+ { "End", POPPLER_STRUCTURE_TEXT_ALIGN_END },
+ { "Justify", POPPLER_STRUCTURE_TEXT_ALIGN_JUSTIFY },
+ { NULL }
+};
+
+template<>
+const EnumNameValue<PopplerStructureBlockAlign> EnumNameValue<PopplerStructureBlockAlign>::values[] =
+{
+ { "Before", POPPLER_STRUCTURE_BLOCK_ALIGN_BEFORE },
+ { "Middle", POPPLER_STRUCTURE_BLOCK_ALIGN_MIDDLE },
+ { "After", POPPLER_STRUCTURE_BLOCK_ALIGN_AFTER },
+ { "Justify", POPPLER_STRUCTURE_BLOCK_ALIGN_JUSTIFY },
+ { NULL }
+};
+
+template<>
+const EnumNameValue<PopplerStructureInlineAlign> EnumNameValue<PopplerStructureInlineAlign>::values[] =
+{
+ { "Start", POPPLER_STRUCTURE_INLINE_ALIGN_START },
+ { "Center", POPPLER_STRUCTURE_INLINE_ALIGN_CENTER },
+ { "End", POPPLER_STRUCTURE_INLINE_ALIGN_END },
+ { NULL }
+};
+
+template<>
+const EnumNameValue<PopplerStructureTextDecoration> EnumNameValue<PopplerStructureTextDecoration>::values[] =
+{
+ { "None", POPPLER_STRUCTURE_TEXT_DECORATION_NONE },
+ { "Underline", POPPLER_STRUCTURE_TEXT_DECORATION_UNDERLINE },
+ { "Overline", POPPLER_STRUCTURE_TEXT_DECORATION_OVERLINE },
+ { "LineThrough", POPPLER_STRUCTURE_TEXT_DECORATION_LINETHROUGH },
+ { NULL }
+};
+
+template<>
+const EnumNameValue<PopplerStructureRubyAlign> EnumNameValue<PopplerStructureRubyAlign>::values[] =
+{
+ { "Start", POPPLER_STRUCTURE_RUBY_ALIGN_START },
+ { "Center", POPPLER_STRUCTURE_RUBY_ALIGN_CENTER },
+ { "End", POPPLER_STRUCTURE_RUBY_ALIGN_END },
+ { "Justify", POPPLER_STRUCTURE_RUBY_ALIGN_JUSTIFY },
+ { "Distribute", POPPLER_STRUCTURE_RUBY_ALIGN_DISTRIBUTE },
+ { NULL }
+};
+
+template<>
+const EnumNameValue<PopplerStructureRubyPosition> EnumNameValue<PopplerStructureRubyPosition>::values[] =
+{
+ { "Before", POPPLER_STRUCTURE_RUBY_POSITION_BEFORE },
+ { "After", POPPLER_STRUCTURE_RUBY_POSITION_AFTER },
+ { "Warichu", POPPLER_STRUCTURE_RUBY_POSITION_WARICHU },
+ { "Inline", POPPLER_STRUCTURE_RUBY_POSITION_INLINE },
+ { NULL }
+};
+
+template<>
+const EnumNameValue<PopplerStructureGlyphOrientation> EnumNameValue<PopplerStructureGlyphOrientation>::values[] =
+{
+ { "Auto", POPPLER_STRUCTURE_GLYPH_ORIENTATION_AUTO },
+ { "90", POPPLER_STRUCTURE_GLYPH_ORIENTATION_90 },
+ { "180", POPPLER_STRUCTURE_GLYPH_ORIENTATION_180 },
+ { "270", POPPLER_STRUCTURE_GLYPH_ORIENTATION_270 },
+ { "360", POPPLER_STRUCTURE_GLYPH_ORIENTATION_0 },
+ { "-90", POPPLER_STRUCTURE_GLYPH_ORIENTATION_270 },
+ { "-180", POPPLER_STRUCTURE_GLYPH_ORIENTATION_180 },
+ { NULL }
+};
+
+template<>
+const EnumNameValue<PopplerStructureListNumbering> EnumNameValue<PopplerStructureListNumbering>::values[] =
+{
+ { "None", POPPLER_STRUCTURE_LIST_NUMBERING_NONE },
+ { "Disc", POPPLER_STRUCTURE_LIST_NUMBERING_DISC },
+ { "Circle", POPPLER_STRUCTURE_LIST_NUMBERING_CIRCLE },
+ { "Square", POPPLER_STRUCTURE_LIST_NUMBERING_SQUARE },
+ { "Decimal", POPPLER_STRUCTURE_LIST_NUMBERING_DECIMAL },
+ { "UpperRoman", POPPLER_STRUCTURE_LIST_NUMBERING_UPPER_ROMAN },
+ { "LowerRoman", POPPLER_STRUCTURE_LIST_NUMBERING_LOWER_ROMAN },
+ { "UpperAlpha", POPPLER_STRUCTURE_LIST_NUMBERING_UPPER_ALPHA },
+ { "LowerAlpha", POPPLER_STRUCTURE_LIST_NUMBERING_LOWER_ALPHA },
+ { NULL }
+};
+
+template<>
+const EnumNameValue<PopplerStructureRole> EnumNameValue<PopplerStructureRole>::values[] =
+{
+ { "rb", POPPLER_STRUCTURE_ROLE_RADIO_BUTTON },
+ { "cb", POPPLER_STRUCTURE_ROLE_CHECKBOX },
+ { "pb", POPPLER_STRUCTURE_ROLE_PUSH_BUTTON },
+ { "tv", POPPLER_STRUCTURE_ROLE_TEXT_VALUE },
+ { NULL }
+};
+
+template<>
+const EnumNameValue<PopplerStructureChecked> EnumNameValue<PopplerStructureChecked>::values[] =
+{
+ { "on", POPPLER_STRUCTURE_CHECKED_ON },
+ { "off", POPPLER_STRUCTURE_CHECKED_OFF },
+ { "neutral", POPPLER_STRUCTURE_CHECKED_NEUTRAL },
+ { NULL }
+};
+
+template<>
+const EnumNameValue<PopplerStructureScope> EnumNameValue<PopplerStructureScope>::values[] =
+{
+ { "Row", POPPLER_STRUCTURE_SCOPE_ROW },
+ { "Column", POPPLER_STRUCTURE_SCOPE_COLUMN },
+ { "Both", POPPLER_STRUCTURE_SCOPE_BOTH },
+ { NULL }
+};
+
+template <typename EnumType>
+static EnumType
+name_to_enum (Object *name_value,
+ EnumType default_value = EnumType::null)
+{
+ if (!name_value)
+ return default_value;
+
+ for (const EnumNameValue<EnumType> *item = EnumNameValue<EnumType>::values ; item->name; item++)
+ if (name_value->isName (item->name))
+ return item->value;
+
+ return default_value;
+}
+
+
+template <typename EnumType>
+static GVariant*
+name_to_variant_enum (Object *name_value,
+ EnumType default_value = EnumNameValue<EnumType>::null)
+{
+ EnumType value = name_to_enum<EnumType> (name_value, default_value);
+ return value == EnumNameValue<EnumType>::null ? NULL : g_variant_new_uint32 (value);
+}
+
+
+static GVariant*
+string_to_variant (Object *object)
+{
+ if (object->isName ())
+ return g_variant_new_string (object->getName ());
+ if (object->isString ())
+ {
+ gchar *utf8_string = _poppler_goo_string_to_utf8 (object->getString ());
+ GVariant* result = g_variant_new_string (utf8_string);
+ g_free (utf8_string);
+ return result;
+ }
+ return NULL;
+}
+
+
+static GVariant*
+_g_variant_new_from_border_style (Object *object)
+{
+ PopplerStructureBorderStyle border_style[4];
+
+ if (object->isArray () && object->arrayGetLength () == 4)
+ {
+ Object item;
+ for (int i = 0; i < 4; i++)
+ border_style[i] = name_to_enum<PopplerStructureBorderStyle> (object->arrayGet (i, &item),
+ POPPLER_STRUCTURE_BORDER_STYLE_NONE);
+ }
+ else if (object->isName ())
+ {
+ border_style[0] = border_style[1] = border_style[2] = border_style[3] =
+ name_to_enum <PopplerStructureBorderStyle> (object, POPPLER_STRUCTURE_BORDER_STYLE_NONE);
+ }
+ else
+ return NULL;
+
+ return g_variant_new ("(uuuu)",
+ border_style[0],
+ border_style[1],
+ border_style[2],
+ border_style[3]);
+}
+
+
+static GVariant*
+_g_variant_new_from_number_or_x4 (Object *object)
+{
+ double v[4];
+
+ if (object->isArray () && object->arrayGetLength () == 4)
+ {
+ Object item;
+ for (int i = 0; i < 4; i++)
+ {
+ if (object->arrayGet (i, &item)->isReal ())
+ v[i] = item.getReal ();
+ else if (item.isInt ())
+ v[i] = (double) item.getInt ();
+ else
+ return NULL;
+ }
+ }
+ else if (object->isReal ())
+ v[0] = v[1] = v[2] = v[3] = object->getReal ();
+ else if (object->isInt ())
+ v[0] = v[1] = v[2] = v[3] = (double) object->getInt ();
+ else
+ return NULL;
+
+ return g_variant_new ("(dddd)", v[0], v[1], v[2], v[3]);
+}
+
+
+static inline GVariant*
+_g_variant_new_from_number_x4 (Object *object)
+{
+ return object->isArray () ? _g_variant_new_from_number_or_x4 (object) : NULL;
+}
+
+
+static GVariant*
+_g_variant_new_from_number (Object *object)
+{
+ if (object->isReal ())
+ return g_variant_new_double (object->getReal ());
+ if (object->isInt ())
+ return g_variant_new_double ((double) object->getInt ());
+ return NULL;
+}
+
+
+static GVariant*
+_g_variant_new_from_number_or_auto (Object *object)
+{
+ if (object->isName ("Auto"))
+ return g_variant_new ("md", NULL);
+ if (object->isReal ())
+ return g_variant_new ("md", object->getReal ());
+ if (object->isInt ())
+ return g_variant_new ("md", (double) object->getInt ());
+ return NULL;
+}
+
+
+static inline GVariant*
+_g_variant_new_from_number_or_auto_or_normal (Object *object)
+{
+ return object->isName ("Normal") ? g_variant_new ("md", NULL)
+ : _g_variant_new_from_number_or_auto (object);
+}
+
+
+static GVariant*
+_g_variant_new_number_array (Object *object)
+{
+ GVariantBuilder *builder = g_variant_builder_new (G_VARIANT_TYPE ("ad"));
+
+ if (object->isReal ())
+ g_variant_builder_add (builder, "d", object->getReal ());
+ else if (object->isInt ())
+ g_variant_builder_add (builder, "d", (double) object->getInt ());
+ else if (object->isArray ())
+ {
+ for (int i = 0; i < object->arrayGetLength (); i++)
+ {
+ Object item;
+ if (object->arrayGet (i, &item)->isReal ())
+ g_variant_builder_add (builder, "d", item.getReal ());
+ else if (item.isInt ())
+ g_variant_builder_add (builder, "d", (double) item.getInt ());
+ }
+ }
+
+ GVariant *value = g_variant_new ("ad", builder);
+ g_variant_builder_unref (builder);
+ return value;
+}
+
+
+static GVariant*
+_g_variant_new_string_array (Object *object)
+{
+ GVariantBuilder *builder = g_variant_builder_new (G_VARIANT_TYPE ("as"));
+
+ if (object->isName ())
+ g_variant_builder_add (builder, "s", object->getName ());
+ else if (object->isString ())
+ {
+ gchar *utf8_string = _poppler_goo_string_to_utf8 (object->getString ());
+ g_variant_builder_add (builder, "s", utf8_string);
+ g_free (utf8_string);
+ }
+ else if (object->isArray ())
+ {
+ for (int i = 0; i < object->arrayGetLength (); i++)
+ {
+ Object item;
+ if (object->arrayGet (i, &item)->isName ())
+ g_variant_builder_add (builder, "s", object->getName ());
+ else if (object->isString ())
+ {
+ gchar *utf8_string = _poppler_goo_string_to_utf8 (object->getString ());
+ g_variant_builder_add (builder, "s", utf8_string);
+ g_free (utf8_string);
+ }
+ }
+ }
+
+ GVariant *value = g_variant_new ("as", builder);
+ g_variant_builder_unref (builder);
+ return value;
+}
+
+
+static inline Object*
+attr_value_or_default (PopplerStructureElement *poppler_structure_element,
+ Attribute::Type attribute_type,
+ gboolean inherit)
+{
+ Object *value = Attribute::getDefaultValue (attribute_type);
+ const Attribute *attr;
+
+ if ((attr = poppler_structure_element->elem->findAttribute (attribute_type, inherit)))
+ value = attr->getValue ();
+
+ return value;
+}
+
+
+static void _poppler_text_span_free (gpointer data)
+{
+ PopplerTextSpan *span = (PopplerTextSpan*) data;
+ g_free (span->text);
+ g_free (span->font_name);
+ g_free (span->link_target);
+ g_slice_free (PopplerTextSpan, data);
+}
+
+
+/**
+ * SECTION:poppler-structure-element
+ * @short_description: Document structure element.
+ * @title: PopplerStructureElement
+ * @see_also: #PopplerStructure
+ *
+ * Instances of #PopplerStructureElement are used to describe the structure
+ * of a #PopplerDocument. To access the elements in the structure of the
+ * document, first use poppler_document_get_structure() to obtain its
+ * #PopplerStructure, and then use poppler_structure_get_n_children()
+ * and poppler_structure_get_child() to enumerate the top level elements.
+ */
+
+typedef struct _PopplerStructureElementClass PopplerStructureElementClass;
+struct _PopplerStructureElementClass
+{
+ GObjectClass parent_class;
+};
+
+G_DEFINE_TYPE (PopplerStructureElement, poppler_structure_element, G_TYPE_OBJECT);
+
+
+PopplerStructureElement*
+_poppler_structure_element_new (PopplerStructure *structure, StructElement *element)
+{
+ PopplerStructureElement *poppler_structure_element;
+
+ g_assert (structure);
+ g_assert (element);
+
+ poppler_structure_element = (PopplerStructureElement *) g_object_new (POPPLER_TYPE_STRUCTURE_ELEMENT, NULL, NULL);
+ poppler_structure_element->text = NULL;
+ poppler_structure_element->text_r = NULL;
+ poppler_structure_element->children = NULL;
+ poppler_structure_element->structure = structure;
+ poppler_structure_element->elem = element;
+
+ if (element->getNumElements ())
+ poppler_structure_element->children = (PopplerStructureElement**) g_new0 (PopplerStructureElement*,
+ element->getNumElements ());
+ return poppler_structure_element;
+}
+
+
+static void
+poppler_structure_element_init (PopplerStructureElement *poppler_structure_element)
+{
+}
+
+
+static void
+poppler_structure_element_finalize (GObject *object)
+{
+ PopplerStructureElement *poppler_structure_element = POPPLER_STRUCTURE_ELEMENT (object);
+
+ /* poppler_structure_element->elem is owned by the StructTreeRoot */
+ g_free (poppler_structure_element->language);
+ g_free (poppler_structure_element->text_r);
+ g_free (poppler_structure_element->text);
+ g_free (poppler_structure_element->title);
+ g_free (poppler_structure_element->id);
+ g_list_free_full (poppler_structure_element->text_spans, _poppler_text_span_free);
+
+ if (poppler_structure_element->children)
+ {
+ for (unsigned i = 0; i < poppler_structure_element->elem->getNumElements (); i++)
+ g_object_unref (poppler_structure_element->children[i]);
+ g_free (poppler_structure_element->children);
+ }
+
+ G_OBJECT_CLASS (poppler_structure_element_parent_class)->finalize (object);
+}
+
+
+static void
+poppler_structure_element_class_init (PopplerStructureElementClass *klass)
+{
+ GObjectClass *gobject_class = G_OBJECT_CLASS (klass);
+ gobject_class->finalize = poppler_structure_element_finalize;
+}
+
+
+/**
+ * poppler_structure_element_get_kind:
+ * @poppler_structure_element: A #PopplerStructureElement
+ *
+ * Return value: A #PopplerStructureElementKind value.
+ */
+PopplerStructureElementKind
+poppler_structure_element_get_kind (PopplerStructureElement *poppler_structure_element)
+{
+ g_return_val_if_fail (POPPLER_IS_STRUCTURE_ELEMENT (poppler_structure_element), POPPLER_STRUCTURE_ELEMENT_UNKNOWN);
+ g_assert (poppler_structure_element->elem);
+
+ return _poppler_structelement_type_to_poppler_structure_element_kind (poppler_structure_element->elem->getType ());
+}
+
+/**
+ * poppler_structure_element_get_page:
+ * @poppler_structure_element: A #PopplerStructureElement
+ *
+ * Return value: Number of the page that contains the element, of
+ * <code>-1</code> if not defined.
+ */
+gint
+poppler_structure_element_get_page (PopplerStructureElement *poppler_structure_element)
+{
+ g_return_val_if_fail (POPPLER_IS_STRUCTURE_ELEMENT (poppler_structure_element), POPPLER_STRUCTURE_ELEMENT_UNKNOWN);
+ g_assert (poppler_structure_element->elem);
+
+ if (poppler_structure_element->elem->hasPageRef ())
+ {
+ const Ref ref = poppler_structure_element->elem->getPageRef ();
+ return poppler_structure_element->structure->document->doc->findPage(ref.num, ref.gen) - 1;
+ }
+
+ return -1;
+}
+
+/**
+ * poppler_structure_element_is_content:
+ * @poppler_structure_element: A #PopplerStructureElement
+ *
+ * Checks whether an element is actual document content.
+ *
+ * Return value: Whether the element is content.
+ */
+gboolean
+poppler_structure_element_is_content (PopplerStructureElement *poppler_structure_element)
+{
+ g_return_val_if_fail (POPPLER_IS_STRUCTURE_ELEMENT (poppler_structure_element), FALSE);
+ g_assert (poppler_structure_element->elem);
+
+ return poppler_structure_element->elem->isContent ();
+}
+
+/**
+ * poppler_structure_element_is_inline:
+ * @poppler_structure_element: A #PopplerStructureElement
+ *
+ * Checks whether an element is an inline element.
+ *
+ * Return value: Whether the element is inline.
+ */
+gboolean
+poppler_structure_element_is_inline (PopplerStructureElement *poppler_structure_element)
+{
+ g_return_val_if_fail (POPPLER_IS_STRUCTURE_ELEMENT (poppler_structure_element), FALSE);
+ g_assert (poppler_structure_element->elem);
+
+ return poppler_structure_element->elem->isInline ();
+}
+
+/**
+ * poppler_structure_element_is_block:
+ * @poppler_structure_element: A #PopplerStructureElement
+ *
+ * Checks whether an element is a block element.
+ *
+ * Return value: Whether the element is block.
+ */
+gboolean
+poppler_structure_element_is_block (PopplerStructureElement *poppler_structure_element)
+{
+ g_return_val_if_fail (POPPLER_IS_STRUCTURE_ELEMENT (poppler_structure_element), FALSE);
+ g_assert (poppler_structure_element->elem);
+
+ return poppler_structure_element->elem->isBlock ();
+}
+
+/**
+ * poppler_structure_element_get_n_children:
+ * @poppler_structure_element: A #PopplerStructureElement
+ *
+ * Gets the number of children of @structure_element.
+ *
+ * Return value: Number of children elements.
+ */
+guint
+poppler_structure_element_get_n_children (PopplerStructureElement *poppler_structure_element)
+{
+ g_return_val_if_fail (POPPLER_IS_STRUCTURE_ELEMENT (poppler_structure_element), 0);
+ g_assert (poppler_structure_element->elem);
+
+ return poppler_structure_element->elem->getNumElements ();
+}
+
+/**
+ * poppler_structure_element_get_child:
+ * @poppler_structure_element: A #PopplerStructureElement
+ * @index: Index of the children element to obtain.
+ *
+ * Return value: (transfer none): A #PopplerStructureElement.
+ */
+PopplerStructureElement*
+poppler_structure_element_get_child (PopplerStructureElement *poppler_structure_element,
+ guint index)
+{
+ g_return_val_if_fail (POPPLER_IS_STRUCTURE_ELEMENT (poppler_structure_element), NULL);
+ g_assert (poppler_structure_element->elem);
+ g_assert (poppler_structure_element->elem->getNumElements () >= 0);
+ g_return_val_if_fail (index < (guint) poppler_structure_element->elem->getNumElements (), NULL);
+
+ if (!poppler_structure_element->children[index])
+ {
+ poppler_structure_element->children[index] = _poppler_structure_element_new (poppler_structure_element->structure,
+ poppler_structure_element->elem->getElement (index));
+ g_object_ref_sink (poppler_structure_element->children[index]);
+ }
+ return poppler_structure_element->children[index];
+}
+
+/**
+ * poppler_structure_element_get_id:
+ * @poppler_structure_element: A #PopplerStructureElement
+ *
+ * Return value: (transfer none): The identifier of the element (if
+ * defined), or %NULL.
+ */
+const gchar*
+poppler_structure_element_get_id (PopplerStructureElement *poppler_structure_element)
+{
+ g_return_val_if_fail (POPPLER_IS_STRUCTURE_ELEMENT (poppler_structure_element), NULL);
+ g_assert (poppler_structure_element->elem);
+
+ if (!poppler_structure_element->id && poppler_structure_element->elem->getID ())
+ poppler_structure_element->id = _poppler_goo_string_to_utf8 (poppler_structure_element->elem->getID ());
+
+ return poppler_structure_element->id;
+}
+
+/**
+ * poppler_structure_element_get_title:
+ * @poppler_structure_element: A #PopplerStructureElement
+ *
+ * Return value: (transfer none): The title of the element (if defined),
+ * or %NULL.
+ */
+const gchar*
+poppler_structure_element_get_title (PopplerStructureElement *poppler_structure_element)
+{
+ g_return_val_if_fail (POPPLER_IS_STRUCTURE_ELEMENT (poppler_structure_element), NULL);
+ g_assert (poppler_structure_element->elem);
+
+ if (!poppler_structure_element->title && poppler_structure_element->elem->getTitle ())
+ poppler_structure_element->title = _poppler_goo_string_to_utf8 (poppler_structure_element->elem->getTitle ());
+
+ return poppler_structure_element->title;
+}
+
+/**
+ * popppler_structure_element_get_abbreviation:
+ * @poppler_structure_element: A #PopplerStructureElement
+ *
+ * Acronyms and abbreviations contained in elements of type
+ * #POPPLER_STRUCTURE_ELEMENT_SPAN may have an associated expanded
+ * text form, which can be retrieved using this function.
+ *
+ * Return value: (transfer none): Text of the expanded abbreviation, if the
+ * element text is an abbreviation or acronym.
+ */
+const gchar*
+poppler_structure_element_get_abbreviation (PopplerStructureElement *poppler_structure_element)
+{
+ g_return_val_if_fail (POPPLER_IS_STRUCTURE_ELEMENT (poppler_structure_element), NULL);
+ g_assert (poppler_structure_element->elem);
+
+ if (poppler_structure_element->elem->getType () != StructElement::Span)
+ return NULL;
+
+ if (!poppler_structure_element->text_abbrev && poppler_structure_element->elem->getExpandedAbbr ())
+ poppler_structure_element->text_abbrev = _poppler_goo_string_to_utf8 (poppler_structure_element->elem->getExpandedAbbr ());
+
+ return poppler_structure_element->text_abbrev;
+}
+
+/**
+ * poppler_structure_element_get_language:
+ * @poppler_structure_element: A #PopplerStructureElement
+ *
+ * Return value: (transfer none): language and country code, in two-letter
+ * ISO format, e.g. <code>en_US</code>, or %NULL if not defined.
+ */
+const gchar*
+poppler_structure_element_get_language (PopplerStructureElement *poppler_structure_element)
+{
+ g_return_val_if_fail (POPPLER_IS_STRUCTURE_ELEMENT (poppler_structure_element), NULL);
+ g_assert (poppler_structure_element->elem);
+
+ if (!poppler_structure_element->language && poppler_structure_element->elem->getLanguage ())
+ poppler_structure_element->language = _poppler_goo_string_to_utf8 (poppler_structure_element->elem->getLanguage ());
+
+ return poppler_structure_element->language;
+}
+
+/**
+ * poppler_structure_element_get_alt_text:
+ * @poppler_structure_element: A #PopplerStructureElement
+ *
+ * Obtains the âalternateâ text representation of the element (and its child
+ * elements). This is mostly used for non-text elements like images and
+ * figures, to specify a textual description of the element.
+ *
+ * Note that for elements containing proper text, the function
+ * poppler_structure_element_get_text() must be used instead.
+ *
+ * Return value: (transfer none): The alternate text representation for the
+ * element, or %NULL if not defined.
+ */
+const gchar*
+poppler_structure_element_get_alt_text (PopplerStructureElement *poppler_structure_element)
+{
+ g_return_val_if_fail (POPPLER_IS_STRUCTURE_ELEMENT (poppler_structure_element), NULL);
+ g_assert (poppler_structure_element->elem);
+
+ if (!poppler_structure_element->alt_text && poppler_structure_element->elem->getAltText ())
+ {
+ GooString *s = poppler_structure_element->elem->getAltText ();
+ if (s)
+ poppler_structure_element->alt_text = _poppler_goo_string_to_utf8 (s);
+ delete s;
+ }
+
+ return poppler_structure_element->alt_text;
+}
+
+/**
+ * poppler_structure_element_get_actual_text:
+ * @poppler_structure_element: A #PopplerStructureElement
+ *
+ * Obtains the actual text enclosed by the element (and its child elements).
+ * The actual text is mostly used for non-text elements like images and
+ * figures which <em>do</em> have the graphical appearance of text, like
+ * a logo. For those the actual text is the equivalent text to those
+ * graphical elements which look like text when rendered.
+ *
+ * Note that for elements containing proper text, the function
+ * poppler_structure_element_get_text() must be used instead.
+ *
+ * Return value: (transfer none): The actual text for the element, or %NULL
+ * if not defined.
+ */
+const gchar*
+poppler_structure_element_get_actual_text (PopplerStructureElement *poppler_structure_element)
+{
+ g_return_val_if_fail (POPPLER_IS_STRUCTURE_ELEMENT (poppler_structure_element), NULL);
+ g_assert (poppler_structure_element->elem);
+
+ if (!poppler_structure_element->actual_text && poppler_structure_element->elem->getActualText ())
+ {
+ GooString *s = poppler_structure_element->elem->getActualText ();
+ if (s)
+ poppler_structure_element->actual_text = _poppler_goo_string_to_utf8 (s);
+ delete s;
+ }
+
+ return poppler_structure_element->actual_text;
+}
+
+/**
+ * poppler_structure_element_get_text:
+ * @poppler_structure_element: A #PopplerStructureElement
+ * @recursive: If %TRUE, the text of child elements is gathered recursively
+ * in logical order and returned as part of the result.
+ *
+ * Obtains the text enclosed by an element, or the subtree under an element.
+ *
+ * Return value: (transfer none): A string.
+ */
+const gchar*
+poppler_structure_element_get_text (PopplerStructureElement *poppler_structure_element,
+ gboolean recursive)
+{
+ if (recursive)
+ {
+ if (!poppler_structure_element->text_r)
+ {
+ GooString *s = poppler_structure_element->elem->getText (NULL, gTrue);
+ if (s)
+ poppler_structure_element->text_r = _poppler_goo_string_to_utf8 (s);
+ delete s;
+ }
+ return poppler_structure_element->text_r;
+ }
+
+ if (!poppler_structure_element->text)
+ {
+ GooString *s = poppler_structure_element->elem->getText (NULL, gFalse);
+ if (s)
+ poppler_structure_element->text = _poppler_goo_string_to_utf8 (s);
+ delete s;
+ }
+ return poppler_structure_element->text;
+}
+
+
+class SpanBuilder {
+public:
+ SpanBuilder():
+ font(), text(), link(),
+ map(globalParams->getTextEncoding()),
+ glist(NULL),
+ flags(0),
+ color(0)
+ {}
+
+ ~SpanBuilder() {
+ map->decRefCnt();
+ g_list_free_full (glist, _poppler_text_span_free);
+ }
+
+ void process(const MCOpArray& ops) {
+ for (MCOpArray::const_iterator i = ops.begin(); i != ops.end(); ++i)
+ process(*i);
+ }
+
+ void process(const MCOp& op) {
+ if (op.type == MCOp::Unichar) {
+ int n = map->mapUnicode(op.unichar, buf, sizeof(buf));
+ text.append(buf, n);
+ return;
+ }
+
+ Guint oldFlags = flags;
+
+ if (op.type == MCOp::Flags) {
+ if (op.flags & MCOp::FlagFontBold)
+ flags |= POPPLER_TEXT_SPAN_BOLD;
+ else
+ flags &= ~POPPLER_TEXT_SPAN_BOLD;
+
+ if (op.flags & MCOp::FlagFontFixed)
+ flags |= POPPLER_TEXT_SPAN_FIXED_WIDTH;
+ else
+ flags &= ~POPPLER_TEXT_SPAN_FIXED_WIDTH;
+
+ if (op.flags & MCOp::FlagFontItalic)
+ flags |= POPPLER_TEXT_SPAN_ITALIC;
+ else
+ flags &= ~POPPLER_TEXT_SPAN_ITALIC;
+ }
+
+ if (op.type == MCOp::Color && (color = op.color.rgbPixel ())) {
+ flags |= POPPLER_TEXT_SPAN_COLOR;
+ } else {
+ flags &= ~POPPLER_TEXT_SPAN_COLOR;
+ }
+
+ if (op.type == MCOp::FontName) {
+ if (op.value) {
+ flags |= POPPLER_TEXT_SPAN_FONT;
+ font.append(op.value);
+ } else {
+ flags &= ~POPPLER_TEXT_SPAN_FONT;
+ }
+ }
+
+ if (flags != oldFlags)
+ newSpan();
+ }
+
+ void newSpan() {
+ // If there is no text, do not append a new PopplerTextSpan
+ // and keep the attributes/flags for the next span.
+ if (text.getLength ()) {
+ PopplerTextSpan *span = g_slice_new0 (PopplerTextSpan);
+ span->color = color;
+ span->flags = flags;
+ span->text = _poppler_goo_string_to_utf8 (&text);
+ text.clear();
+
+ if (font.getLength()) {
+ span->font_name = _poppler_goo_string_to_utf8 (&font);
+ font.clear();
+ }
+
+ if (link.getLength()) {
+ assert(flags & POPPLER_TEXT_SPAN_LINK);
+ span->link_target = _poppler_goo_string_to_utf8 (&link);
+ }
+
+ glist = g_list_append (glist, span);
+ }
+
+ // Link is always cleared
+ link.clear();
+ }
+
+ GList* end() {
+ GList *result = glist;
+ glist = NULL;
+ return result;
+ }
+
+private:
+ GooString font;
+ GooString text;
+ GooString link;
+ UnicodeMap *map;
+ GList *glist;
+ char buf[8];
+ Guint flags;
+ Guint color;
+};
+
+
+/**
+ * poppler_structure_element_get_text_spans:
+ * @poppler_structure_element: A #PopplerStructureElement
+ *
+ * Obtains the text enclosed by an element, as a #GList of #PopplerTextSpan
+ * structures. Each item in the list is a piece of text which share the same
+ * attributes, plus its attributes.
+ *
+ * Return value: (transfer none) (element-type PopplerTextSpan): A #GList
+ * of #PopplerTextSpan structures.
+ */
+GList*
+poppler_structure_element_get_text_spans (PopplerStructureElement *poppler_structure_element)
+{
+ g_return_val_if_fail (POPPLER_IS_STRUCTURE_ELEMENT (poppler_structure_element), NULL);
+ g_assert (poppler_structure_element->elem);
+
+ if (!poppler_structure_element->elem->isContent ())
+ return NULL;
+
+ if (!poppler_structure_element->text_spans)
+ {
+ SpanBuilder builder;
+ builder.process(poppler_structure_element->elem->getMCOps ());
+ poppler_structure_element->text_spans = builder.end();
+ }
+ return poppler_structure_element->text_spans;
+}
+
+/**
+ * poppler_structure_element_get_attribute:
+ * @poppler_structure_element: A #PopplerStructureElement.
+ * @attribute: A #PopperStructureAttribute value.
+ * @value (out): A #GValue in which to return the value of the attribute.
+ * @inherit: Whether to look up for inheritable attribute values in the
+ * ancestors of the element, if the attribute is not defined in the
+ * element.
+ *
+ * <table>
+ * <title>Types returned for each attribute</title>
+ * <thead>
+ * <tr>
+ * <th>Attributes</th>
+ * <th>Returned type</th>
+ * </tr>
+ * </thead>
+ * <tbody>
+ * <tr>
+ * <td><code>POPPLER_STRUCTURE_ATTRIBUTE_PLACEMENT</code></td>
+ * <td>A #PopplerStructurePlacement value, as a <code>guint32</code>.</td>
+ * </tr><tr>
+ * <td><code>POPPLER_STRUCTURE_ATTRIBUTE_WRITING_MODE</code></td>
+ * <td>A #PopplerStructureWritingMode value, as a <code>guint32</code>.</td>
+ * </tr><tr>
+ * <td><code>POPPLER_STRUCTURE_ATTRIBUTE_TEXT_ALIGN</code></td>
+ * <td>A #PopplerStructureTextAlign, as a <code>guint32</code>.</td>
+ * </tr><tr>
+ * <td><code>POPPLER_STRUCTURE_ATTRIBUTE_BLOCK_ALIGN</code></td>
+ * <td>A #PopplerStructureBlockAlign, as a <code>guint32</code>.</td>
+ * </tr><tr>
+ * <td><code>POPPLER_STRUCTURE_ATTRIBUTE_INLINE_ALIGN</code></td>
+ * <td>A #PopplerStructureInlineAlign, as a <code>guint32</code>.</td>
+ * </tr><tr>
+ * <td><code>POPPLER_STRUCTURE_ATTRIBUTE_TEXT_DECORATION</code></td>
+ * <td>A #PopplerStructureTextDecoration value, as a <code>guint32</code>.</td>
+ * </tr><tr>
+ * <td><code>POPPLER_STRUCTURE_ATTRIBUTE_RUBY_ALIGN</code></td>
+ * <td>A #PopplerStructureRubyAlign value, as a <code>guint32</code>.</td>
+ * </tr><tr>
+ * <td><code>POPPLER_STRUCTURE_ATTRIBUTE_RUBY_POSITION</code></td>
+ * <td>A #PopplerStructureRubyPosition value, as a <code>guint32</code>.</td>
+ * </tr><tr>
+ * <td><code>POPPLER_STRUCTURE_ATTRIBUTE_GLYPH_ORIENTATION</code></td>
+ * <td>A #PopplerStructureGlyphOrientation value, as a <code>guint32</code>.</td>
+ * </tr><tr>
+ * <td><code>POPPLER_STRUCTURE_ATTRIBUTE_LIST_NUMBERING</code></td>
+ * <td>A #PopplerStructureListNumbering value, as a <code>guint32</code>.</td>
+ * </tr><tr>
+ * <td><code>POPPLER_STRUCTURE_ATTRIBUTE_ROLE</code></td>
+ * <td>A #PopplerStructureRole value, as a <code>guint32</code>.</td>
+ * </tr><tr>
+ * <td><code>POPPLER_STRUCTURE_ATTRIBUTE_CHECKED</code></td>
+ * <td>A #PopplerStructureChecked value, as a <code>guint32</code>.</td>
+ * </tr><tr>
+ * <td><code>POPPLER_STRUCTURE_ATTRIBUTE_SCOPE</code></td>
+ * <td>A #PopplerStructureScope value, as a <code>guint32</code>.</td>
+ * </tr>
+ * <tr>
+ * <td><code>POPPLER_STRUCTURE_ATTRIBUTE_DESCRIPTION</code></td>
+ * <td rowspan="2">A string, as a <code>const gchar*</code>.</td>
+ * </tr>
+ * <tr><td><code>POPPLER_STRUCTURE_ATTRIBUTE_SUMMARY</code></td></tr>
+ * <tr>
+ * <td><code>POPPLER_STRUCTURE_ATTRIBUTE_SPACE_BEFORE</code></td>
+ * <td rowspan="10">Number, as a <code>double</code>.</td>
+ * </tr>
+ * <tr><td><code>POPPLER_STRUCTURE_ATTRIBUTE_SPACE_AFTER</code></td></tr>
+ * <tr><td><code>POPPLER_STRUCTURE_ATTRIBUTE_START_INDENT</code></td></tr>
+ * <tr><td><code>POPPLER_STRUCTURE_ATTRIBUTE_END_INDENT</code></td></tr>
+ * <tr><td><code>POPPLER_STRUCTURE_ATTRIBUTE_TEXT_INDENT</code></td></tr>
+ * <tr><td><code>POPPLER_STRUCTURE_ATTRIBUTE_BASELINE_SHIFT</code></td></tr>
+ * <tr><td><code>POPPLER_STRUCTURE_ATTRIBUTE_TEXT_DECORATION_THICKNESS</code></td></tr>
+ * <tr><td><code>POPPLER_STRUCTURE_ATTRIBUTE_COLUMN_COUNT</code></td></tr>
+ * <tr><td><code>POPPLER_STRUCTURE_ATTRIBUTE_ROW_SPAN</code></td></tr>
+ * <tr><td><code>POPPLER_STRUCTURE_ATTRIBUTE_COLUMN_SPAN</code></td></tr>
+ * <tr>
+ * <td><code>POPPLER_STRUCTURE_ATTRIBUTE_COLUMN_GAP</code></td>
+ * <td rowspan="2">
+ * An array of <code>double</code> numbers. The type of the
+ * returned #GVariant is <code>ad</code>.
+ * </td>
+ * </tr>
+ * <tr><td><code>POPPLER_STRUCTURE_ATTRIBUTE_COLUMN_WIDTHS</code></td></tr>
+ * <tr>
+ * <td><code>POPPLER_STRUCTURE_ATTRIBUTE_WIDTH</code></td>
+ * <td rowspan="3">
+ * A maybe-double number. That is, a #GVariant with type
+ * <code>md</code>. If the number is undefined, the value
+ * is meant to be calculated automatically.
+ * </td>
+ * </tr>
+ * <tr><td><code>POPPLER_STRUCTURE_ATTRIBUTE_HEIGHT</code></td></tr>
+ * <tr><td><code>POPPLER_STRUCTURE_ATTRIBUTE_LINE_HEIGHT</code></td></tr>
+ * <tr>
+ * <td><code>POPPLER_STRUCTURE_ATTRIBUTE_COLOR</code></td>
+ * <td rowspan="3">
+ * A 3-tuple of doubles, with values in the <code>[0, 1]</code> range,
+ * in red-green-blue (RGB) order. The type of the returned #GVariant is
+ * <code>(ddd)</code>.
+ * </td>
+ * </tr>
+ * <tr><td><code>POPPLER_STRUCTURE_ATTRIBUTE_BACKGROUND_COLOR</code></td></tr>
+ * <tr><td><code>POPPLER_STRUCTURE_ATTRIBUTE_TEXT_DECORATION_COLOR</code></td></tr>
+ * <tr>
+ * <td><code>POPPLER_STRUCTURE_ATTRIBUTE_BORDER_COLOR</code></td>
+ * <td>A 4-tuple of 3-tuples. Each one of the tuples is a RGB color,
+ * being each color component a double in the <code>[0, 1]</code>
+ * range. The four returned colors are in top-right-bottom-left
+ * order. The type of the returned #GVariant is
+ * <code>((ddd)(ddd)(ddd)(ddd))</code>.
+ * </td>
+ * </tr>
+ * <tr>
+ * <td><code>POPPLER_STRUCTURE_ATTRIBUTE_BORDER_STYLE</code></td>
+ * <td rowspan="2">
+ * A 4-tuple of #PopplerStructureBorderStyle values, each one as a
+ * %guint32, in top-right-bottom-left order. The type of the
+ * returned #GVariant is <code>(uuuu)</code>.
+ * </td>
+ * </tr>
+ * <tr><td><code>POPPLER_STRUCTURE_ATTRIBUTE_TABLE_BORDER_STYLE</code></td></tr>
+ * <tr>
+ * <td><code>POPPLER_STRUCTURE_ATTRIBUTE_BORDER_THICKNESS</code></td>
+ * <td rowspan="4">
+ * A 4-tuple of #double numbers, in top-right-bottom-left order.
+ * The type of the returned #GVariant is <code>(dddd)</code>.
+ * </td>
+ * </tr>
+ * <tr><td><code>POPPLER_STRUCTURE_ATTRIBUTE_TABLE_PADDING</code></td></tr>
+ * <tr><td><code>POPPLER_STRUCTURE_ATTRIBUTE_PADDING</code></td></tr>
+ * <tr><td><code>POPPLER_STRUCTURE_ATTRIBUTE_BBOX</code></td></tr>
+ * <tr>
+ * <td><code>POPPLER_STRUCTURE_ATTRIBUTE_HEADERS</code></td>
+ * <td>An array of strings, each string being a <code>const gchar*</code>.
+ * The type of the returned #GVariant is <code>as</code>.</td>
+ * </tr>
+ * </tbody>
+ * </table>
+ *
+ * Return value: (transfer full): A #GVariant, with value varying depending
+ * on the attribute requested, as specified in the table. If the
+ * attribute is not defined, <code>NULL</code> is returned.
+ */
+GVariant*
+poppler_structure_element_get_attribute (PopplerStructureElement *poppler_structure_element,
+ PopplerStructureAttribute attribute,
+ gboolean inherit)
+{
+ Object *value = NULL;
+
+ g_return_val_if_fail (POPPLER_IS_STRUCTURE_ELEMENT (poppler_structure_element), NULL);
+ g_return_val_if_fail (attribute != POPPLER_STRUCTURE_ATTRIBUTE_UNKNOWN, NULL);
+ g_return_val_if_fail (attribute != POPPLER_STRUCTURE_ATTRIBUTE_USER_PROPERTY, NULL);
+
+ switch (attribute)
+ {
+ case POPPLER_STRUCTURE_ATTRIBUTE_PLACEMENT:
+ return name_to_variant_enum<PopplerStructurePlacement> (attr_value_or_default (poppler_structure_element,
+ Attribute::Placement, inherit));
+ case POPPLER_STRUCTURE_ATTRIBUTE_WRITING_MODE:
+ return name_to_variant_enum<PopplerStructureWritingMode> (attr_value_or_default (poppler_structure_element,
+ Attribute::WritingMode, inherit));
+ case POPPLER_STRUCTURE_ATTRIBUTE_BACKGROUND_COLOR:
+ value = attr_value_or_default (poppler_structure_element, Attribute::BackgroundColor, inherit);
+ return (value && value->isArray ()) ? _g_variant_new_from_rgb_array (value->getArray ()) : NULL;
+
+ case POPPLER_STRUCTURE_ATTRIBUTE_BORDER_COLOR:
+ value = attr_value_or_default (poppler_structure_element, Attribute::BorderColor, inherit);
+ return (value && value->isArray ()) ? _g_variant_new_from_rgb_array_or_x4 (value->getArray ()) : NULL;
+
+ case POPPLER_STRUCTURE_ATTRIBUTE_BORDER_STYLE:
+ value = attr_value_or_default (poppler_structure_element, Attribute::BorderStyle, inherit);
+ return value ?_g_variant_new_from_border_style (value) : NULL;
+
+ case POPPLER_STRUCTURE_ATTRIBUTE_BORDER_THICKNESS:
+ value = attr_value_or_default (poppler_structure_element, Attribute::BorderThickness, inherit);
+ return value ? _g_variant_new_from_number_or_x4 (value) : NULL;
+
+ case POPPLER_STRUCTURE_ATTRIBUTE_COLOR:
+ value = attr_value_or_default (poppler_structure_element, Attribute::Color, inherit);
+ return (value && value->isArray ()) ? _g_variant_new_from_rgb_array (value->getArray ()) : NULL;
+
+ case POPPLER_STRUCTURE_ATTRIBUTE_PADDING:
+ value = attr_value_or_default (poppler_structure_element, Attribute::Padding, inherit);
+ return value ? _g_variant_new_from_number_or_x4 (value) : NULL;
+
+ case POPPLER_STRUCTURE_ATTRIBUTE_SPACE_BEFORE:
+ value = attr_value_or_default (poppler_structure_element, Attribute::SpaceBefore, inherit);
+ return value ? _g_variant_new_from_number (value) : NULL;
+
+ case POPPLER_STRUCTURE_ATTRIBUTE_SPACE_AFTER:
+ value = attr_value_or_default (poppler_structure_element, Attribute::SpaceAfter, inherit);
+ return value ? _g_variant_new_from_number (value) : NULL;
+
+ case POPPLER_STRUCTURE_ATTRIBUTE_START_INDENT:
+ value = attr_value_or_default (poppler_structure_element, Attribute::StartIndent, inherit);
+ return value ? _g_variant_new_from_number (value) : NULL;
+
+ case POPPLER_STRUCTURE_ATTRIBUTE_END_INDENT:
+ value = attr_value_or_default (poppler_structure_element, Attribute::EndIndent, inherit);
+ return value ? _g_variant_new_from_number (value) : NULL;
+
+ case POPPLER_STRUCTURE_ATTRIBUTE_TEXT_INDENT:
+ value = attr_value_or_default (poppler_structure_element, Attribute::TextIndent, inherit);
+ return value ? _g_variant_new_from_number (value) : NULL;
+
+ case POPPLER_STRUCTURE_ATTRIBUTE_TEXT_ALIGN:
+ return name_to_variant_enum<PopplerStructureTextAlign> (attr_value_or_default (poppler_structure_element,
+ Attribute::TextAlign, inherit));
+ case POPPLER_STRUCTURE_ATTRIBUTE_BBOX:
+ value = attr_value_or_default (poppler_structure_element, Attribute::BBox, inherit);
+ return value ? _g_variant_new_from_number_x4 (value) : NULL;
+
+ case POPPLER_STRUCTURE_ATTRIBUTE_WIDTH:
+ value = attr_value_or_default (poppler_structure_element, Attribute::Width, inherit);
+ return value ? _g_variant_new_from_number_or_auto (value) : NULL;
+
+ case POPPLER_STRUCTURE_ATTRIBUTE_HEIGHT:
+ value = attr_value_or_default (poppler_structure_element, Attribute::Height, inherit);
+ return value ? _g_variant_new_from_number_or_auto (value) : NULL;
+
+ case POPPLER_STRUCTURE_ATTRIBUTE_BLOCK_ALIGN:
+ return name_to_variant_enum<PopplerStructureBlockAlign> (attr_value_or_default (poppler_structure_element,
+ Attribute::BlockAlign, inherit));
+ case POPPLER_STRUCTURE_ATTRIBUTE_INLINE_ALIGN:
+ return name_to_variant_enum<PopplerStructureInlineAlign> (attr_value_or_default (poppler_structure_element,
+ Attribute::InlineAlign, inherit));
+ case POPPLER_STRUCTURE_ATTRIBUTE_TABLE_BORDER_STYLE:
+ value = attr_value_or_default (poppler_structure_element, Attribute::TBorderStyle, inherit);
+ return value ?_g_variant_new_from_border_style (value) : NULL;
+
+ case POPPLER_STRUCTURE_ATTRIBUTE_TABLE_PADDING:
+ value = attr_value_or_default (poppler_structure_element, Attribute::TPadding, inherit);
+ return value ? _g_variant_new_from_number_or_x4 (value) : NULL;
+
+ case POPPLER_STRUCTURE_ATTRIBUTE_BASELINE_SHIFT:
+ value = attr_value_or_default (poppler_structure_element, Attribute::BaselineShift, inherit);
+ return value ? _g_variant_new_from_number (value) : NULL;
+
+ case POPPLER_STRUCTURE_ATTRIBUTE_LINE_HEIGHT:
+ value = attr_value_or_default (poppler_structure_element, Attribute::LineHeight, inherit);
+ return value ? _g_variant_new_from_number_or_auto_or_normal (value) : NULL;
+
+ case POPPLER_STRUCTURE_ATTRIBUTE_TEXT_DECORATION_COLOR:
+ value = attr_value_or_default (poppler_structure_element, Attribute::TextDecorationColor, inherit);
+ return (value && value->isArray ()) ? _g_variant_new_from_rgb_array (value->getArray ()) : NULL;
+
+ case POPPLER_STRUCTURE_ATTRIBUTE_TEXT_DECORATION_THICKNESS:
+ value = attr_value_or_default (poppler_structure_element, Attribute::LineHeight, inherit);
+ return value ? _g_variant_new_from_number (value) : NULL;
+
+ case POPPLER_STRUCTURE_ATTRIBUTE_TEXT_DECORATION:
+ return name_to_variant_enum<PopplerStructureTextDecoration> (attr_value_or_default (poppler_structure_element,
+ Attribute::TextDecorationType, inherit));
+ case POPPLER_STRUCTURE_ATTRIBUTE_RUBY_ALIGN:
+ return name_to_variant_enum<PopplerStructureRubyAlign> (attr_value_or_default (poppler_structure_element,
+ Attribute::RubyAlign, inherit));
+ case POPPLER_STRUCTURE_ATTRIBUTE_RUBY_POSITION:
+ return name_to_variant_enum<PopplerStructureRubyPosition> (attr_value_or_default (poppler_structure_element,
+ Attribute::RubyPosition, inherit));
+ case POPPLER_STRUCTURE_ATTRIBUTE_GLYPH_ORIENTATION:
+ return name_to_variant_enum<PopplerStructureGlyphOrientation> (attr_value_or_default (poppler_structure_element,
+ Attribute::GlyphOrientationVertical, inherit));
+ case POPPLER_STRUCTURE_ATTRIBUTE_COLUMN_COUNT:
+ value = attr_value_or_default (poppler_structure_element, Attribute::ColumnCount, inherit);
+ return value ? _g_variant_new_from_number (value) : NULL;
+
+ case POPPLER_STRUCTURE_ATTRIBUTE_COLUMN_GAP:
+ value = attr_value_or_default (poppler_structure_element, Attribute::ColumnGap, inherit);
+ return value ? _g_variant_new_number_array (value) : NULL;
+
+ case POPPLER_STRUCTURE_ATTRIBUTE_COLUMN_WIDTHS:
+ value = attr_value_or_default (poppler_structure_element, Attribute::ColumnGap, inherit);
+ return value ? _g_variant_new_number_array (value) : NULL;
+
+ case POPPLER_STRUCTURE_ATTRIBUTE_LIST_NUMBERING:
+ return name_to_variant_enum<PopplerStructureListNumbering> (attr_value_or_default (poppler_structure_element,
+ Attribute::ListNumbering, inherit));
+ case POPPLER_STRUCTURE_ATTRIBUTE_ROLE:
+ return name_to_variant_enum<PopplerStructureRole> (attr_value_or_default (poppler_structure_element,
+ Attribute::Role, inherit));
+ case POPPLER_STRUCTURE_ATTRIBUTE_CHECKED:
+ return name_to_variant_enum<PopplerStructureChecked> (attr_value_or_default (poppler_structure_element,
+ Attribute::checked, inherit));
+ case POPPLER_STRUCTURE_ATTRIBUTE_DESCRIPTION:
+ return string_to_variant (attr_value_or_default (poppler_structure_element,
+ Attribute::Desc, inherit));
+ case POPPLER_STRUCTURE_ATTRIBUTE_ROW_SPAN:
+ value = attr_value_or_default (poppler_structure_element, Attribute::RowSpan, inherit);
+ return value ? _g_variant_new_from_number (value) : NULL;
+
+ case POPPLER_STRUCTURE_ATTRIBUTE_COLUMN_SPAN:
+ value = attr_value_or_default (poppler_structure_element, Attribute::ColSpan, inherit);
+ return value ? _g_variant_new_from_number (value) : NULL;
+
+ case POPPLER_STRUCTURE_ATTRIBUTE_HEADERS:
+ value = attr_value_or_default (poppler_structure_element, Attribute::Headers, inherit);
+ return value ? _g_variant_new_string_array (value) : NULL;
+
+ case POPPLER_STRUCTURE_ATTRIBUTE_SCOPE:
+ return name_to_variant_enum<PopplerStructureScope> (attr_value_or_default (poppler_structure_element,
+ Attribute::Scope, inherit));
+ case POPPLER_STRUCTURE_ATTRIBUTE_SUMMARY:
+ return string_to_variant (attr_value_or_default (poppler_structure_element, Attribute::Summary, inherit));
+
+ case POPPLER_STRUCTURE_ATTRIBUTE_USER_PROPERTY:
+ case POPPLER_STRUCTURE_ATTRIBUTE_UNKNOWN:
+ default:
+ g_assert_not_reached ();
+ return NULL;
+ }
+}
+
+/**
+ * poppler_structure_element_is_reference:
+ * @poppler_structure_element: A #PopplerStructureElement
+ *
+ * Return value: Whether the element is a reference to another object.
+ */
+gboolean
+poppler_structure_element_is_reference (PopplerStructureElement *poppler_structure_element)
+{
+ g_return_val_if_fail (POPPLER_IS_STRUCTURE_ELEMENT (poppler_structure_element), FALSE);
+ g_assert (poppler_structure_element->elem);
+
+ return poppler_structure_element->elem->isObjectRef ();
+}
+
+/**
+ * poppler_structure_element_get_reference_type:
+ * @poppler_structure_element: A #PopplerStructureElement
+ *
+ * Return value: The type of object pointed to by the reference, a value of
+ * #PopplerStructureReference.
+ */
+PopplerStructureReference
+poppler_structure_element_get_reference_type (PopplerStructureElement *poppler_structure_element)
+{
+ PopplerStructureReference reftype = POPPLER_STRUCTURE_REFERENCE_UNKNOWN;
+
+ g_return_val_if_fail (POPPLER_IS_STRUCTURE_ELEMENT (poppler_structure_element), reftype);
+ g_assert (poppler_structure_element->elem);
+
+ if (poppler_structure_element->elem->isObjectRef ())
+ {
+ Object obj;
+ const Ref ref = poppler_structure_element->elem->getObjectRef ();
+ XRef *xref = poppler_structure_element->structure->document->doc->getXRef ();
+
+ if (xref->fetch(ref.num, ref.gen, &obj)->isDict("Annot"))
+ {
+ reftype = POPPLER_STRUCTURE_REFERENCE_ANNOT;
+ Object subtype;
+ if (obj.dictLookup("Subtype", &subtype)->isName("Link"))
+ reftype = POPPLER_STRUCTURE_REFERENCE_LINK;
+ subtype.free();
+ }
+
+ obj.free();
+ }
+
+ return reftype;
+}
+
+/**
+ * poppler_structure_element_get_reference_link:
+ * @poppler_structure_element: A #PopplerStructureElement
+ *
+ * Return value: (transfer full): The #PopplerAnnot pointed by the object
+ * reference, or %NULL of the element is not a reference pointing to
+ * a #PopplerLink.
+ */
+PopplerLinkMapping*
+poppler_structure_element_get_reference_link (PopplerStructureElement *poppler_structure_element)
+{
+ g_return_val_if_fail (POPPLER_IS_STRUCTURE_ELEMENT (poppler_structure_element), NULL);
+ g_assert (poppler_structure_element->elem);
+
+ if (poppler_structure_element_get_reference_type (poppler_structure_element)
+ != POPPLER_STRUCTURE_REFERENCE_LINK)
+ return NULL;
+
+ gint num = poppler_structure_element_get_page (poppler_structure_element);
+ if (num < 0 || num >= poppler_document_get_n_pages (poppler_structure_element->structure->document))
+ return NULL;
+
+ AnnotLink *link = NULL;
+ Page *page = poppler_structure_element->structure->document->doc->getPage (num + 1);
+ Links links(page->getAnnots ());
+
+ for (gint i = 0; i < links.getNumLinks(); i++)
+ {
+ AnnotLink *l = links.getLink (i);
+ const StructElement *parent = poppler_structure_element->structure->root->findParentElement (l->getTreeKey ());
+ if (parent == poppler_structure_element->elem)
+ {
+ link = l;
+ break;
+ }
+ }
+
+ if (!link)
+ return NULL;
+
+ PopplerRectangle rect;
+ LinkAction *link_action = link->getAction ();
+ PopplerLinkMapping *mapping = poppler_link_mapping_new ();
+ mapping->action = _poppler_action_new (poppler_structure_element->structure->document, link_action, NULL);
+
+ link->getRect (&rect.x1, &rect.y1, &rect.x2, &rect.y2);
+
+ rect.x1 -= page->getCropBox()->x1;
+ rect.x2 -= page->getCropBox()->x1;
+ rect.y1 -= page->getCropBox()->y1;
+ rect.y2 -= page->getCropBox()->y1;
+
+ switch (page->getRotate ())
+ {
+ case 90:
+ mapping->area.x1 = rect.y1;
+ mapping->area.y1 = page->getCropWidth () - rect.x2;
+ mapping->area.x2 = mapping->area.x1 + (rect.y2 - rect.y1);
+ mapping->area.y2 = mapping->area.y1 + (rect.x2 - rect.x1);
+ break;
+ case 180:
+ mapping->area.x1 = page->getCropWidth () - rect.x2;
+ mapping->area.y1 = page->getCropHeight () - rect.y2;
+ mapping->area.x2 = mapping->area.x1 + (rect.x2 - rect.x1);
+ mapping->area.y2 = mapping->area.y1 + (rect.y2 - rect.y1);
+ break;
+ case 270:
+ mapping->area.x1 = page->getCropHeight () - rect.y2;
+ mapping->area.y1 = rect.x1;
+ mapping->area.x2 = mapping->area.x1 + (rect.y2 - rect.y1);
+ mapping->area.y2 = mapping->area.y1 + (rect.x2 - rect.x1);
+ break;
+ default:
+ mapping->area.x1 = rect.x1;
+ mapping->area.y2 = rect.y1;
+ mapping->area.x2 = rect.x2;
+ mapping->area.y2 = rect.y2;
+ }
+
+ return mapping;
+}
+
+/**
+ * poppler_structure_element_find_link:
+ * @poppler_structure_element: A #PopplerStructureElement
+ *
+ * Return value: (transfer full): A #PopplerLinkMapping, or %NULL if the
+ * link cannot be found.
+ */
+PopplerLinkMapping*
+poppler_structure_element_find_link (PopplerStructureElement *poppler_structure_element)
+{
+ g_return_val_if_fail (POPPLER_IS_STRUCTURE_ELEMENT (poppler_structure_element), NULL);
+ g_assert (poppler_structure_element->elem);
+
+ if (poppler_structure_element->elem->getType () != StructElement::Link)
+ return NULL;
+
+ for (unsigned i = 0; i < poppler_structure_element_get_n_children (poppler_structure_element); i++)
+ {
+ PopplerStructureElement *child = poppler_structure_element_get_child (poppler_structure_element, i);
+ if (poppler_structure_element_get_reference_type (child) == POPPLER_STRUCTURE_REFERENCE_LINK)
+ {
+ PopplerLinkMapping *mapping = poppler_structure_element_get_reference_link (poppler_structure_element);
+ if (mapping)
+ return mapping;
+ }
+ }
+
+ return NULL;
+}
+
+/**
+ * poppler_structure_element_get_form_field:
+ * @poppler_structure_element: A #PopplerStructureElement
+ *
+ * Return value: (transfer full): A #PopplerFormField, or %NULL if
+ * the element is not a %POPPLER_STRUCTURE_ELEMENT_FORM.
+ */
+PopplerFormField*
+poppler_structure_element_get_form_field (PopplerStructureElement *poppler_structure_element)
+{
+ g_return_val_if_fail (POPPLER_IS_STRUCTURE_ELEMENT (poppler_structure_element), NULL);
+ g_assert (poppler_structure_element->elem);
+
+ if (poppler_structure_element->elem->getType () != StructElement::Form)
+ return NULL;
+
+ // TODO Handle elements which have a Role attribute (used sometimes for
+ // non-editable widgets, to describe their appearance). Editable
+ // fields have only a single child, with the field identifier.
+ if (poppler_structure_element->elem->getNumElements () != 1)
+ return NULL;
+
+ gint field_id = -1;
+ const StructElement *child = poppler_structure_element->elem->getElement (0);
+ if (child->isContent ())
+ {
+ if (child->isObjectRef ())
+ {
+ // TODO Handle this case -- I have yet to see a PDF using this.
+ }
+ else
+ {
+ // Element contains the form field ID as the MCID attribute.
+ field_id = child->getMCID ();
+ }
+ }
+
+ if (field_id < 0)
+ return NULL;
+
+ return (field_id < 0) ? NULL : poppler_document_get_form_field (poppler_structure_element->structure->document,
+ field_id);
+}
+
+/**
+ * poppler_structure_element_get_form_field_mapping:
+ * @poppler_structure_element: A #PopplerStructureElement
+ *
+ * Return value: (transfer full): A #PopplerFormFieldMapping, or %NULL if
+ * the element is not a %POPPLER_STRUCTURE_ELEMENT_FORM.
+ */
+PopplerFormFieldMapping*
+poppler_structure_element_get_form_field_mapping (PopplerStructureElement *poppler_structure_element)
+{
+ g_return_val_if_fail (POPPLER_IS_STRUCTURE_ELEMENT (poppler_structure_element), NULL);
+ g_assert (poppler_structure_element->elem);
+
+ gint page_num = poppler_structure_element_get_page (poppler_structure_element);
+ g_assert (page_num < poppler_document_get_n_pages (poppler_structure_element->structure->document));
+ if (page_num < 0)
+ return NULL;
+
+ Page *page = poppler_structure_element->structure->document->doc->getPage (page_num + 1);
+ if (!page)
+ return NULL;
+
+ FormPageWidgets *forms = page->getFormWidgets ();
+ if (!forms)
+ return NULL;
+
+ PopplerFormField *field = poppler_structure_element_get_form_field (poppler_structure_element);
+ if (!field)
+ return NULL;
+
+ FormWidget *widget = NULL;
+ for (int i = 0; i < forms->getNumWidgets (); i++)
+ {
+ FormWidget *w = forms->getWidget (i);
+ if ((gint) w->getID () == poppler_form_field_get_id (field))
+ {
+ widget = w;
+ break;
+ }
+ }
+
+ if (!field)
+ {
+ g_object_unref (field);
+ return NULL;
+ }
+
+ PopplerFormFieldMapping *mapping = poppler_form_field_mapping_new ();
+ mapping->field = field;
+
+ widget->getRect (&mapping->area.x1, &mapping->area.y1,
+ &mapping->area.x2, &mapping->area.y2);
+ mapping->area.x1 -= page->getCropBox ()->x1;
+ mapping->area.x2 -= page->getCropBox ()->x1;
+ mapping->area.y1 -= page->getCropBox ()->y1;
+ mapping->area.y2 -= page->getCropBox ()->y1;
+
+ return mapping;
+}
diff --git a/glib/poppler-structure-element.h b/glib/poppler-structure-element.h
new file mode 100644
index 0000000..b3076d2
--- /dev/null
+++ b/glib/poppler-structure-element.h
@@ -0,0 +1,366 @@
+/* poppler-structure-element.h: glib interface to poppler
+ *
+ * Copyright (C) 2013 Igalia S.L.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+
+#ifndef __POPPLER_STRUCTURE_ELEMENT_H__
+#define __POPPLER_STRUCTURE_ELEMENT_H__
+
+#include <glib-object.h>
+#include "poppler.h"
+
+G_BEGIN_DECLS
+
+#define POPPLER_TYPE_STRUCTURE_ELEMENT (poppler_structure_element_get_type ())
+#define POPPLER_STRUCTURE_ELEMENT(obj) (G_TYPE_CHECK_INSTANCE_CAST ((obj), POPPLER_TYPE_STRUCTURE_ELEMENT, PopplerStructureElement))
+#define POPPLER_IS_STRUCTURE_ELEMENT(obj) (G_TYPE_CHECK_INSTANCE_TYPE ((obj), POPPLER_TYPE_STRUCTURE_ELEMENT))
+
+/**
+ * PopplerStructureElementKind:
+ */
+typedef enum {
+ POPPLER_STRUCTURE_ELEMENT_UNKNOWN,
+ POPPLER_STRUCTURE_ELEMENT_CONTENT,
+ POPPLER_STRUCTURE_ELEMENT_OBJECT_REFERENCE,
+ POPPLER_STRUCTURE_ELEMENT_DOCUMENT,
+ POPPLER_STRUCTURE_ELEMENT_PART,
+ POPPLER_STRUCTURE_ELEMENT_ARTICLE,
+ POPPLER_STRUCTURE_ELEMENT_SECTION,
+ POPPLER_STRUCTURE_ELEMENT_DIV,
+ POPPLER_STRUCTURE_ELEMENT_SPAN,
+ POPPLER_STRUCTURE_ELEMENT_QUOTE,
+ POPPLER_STRUCTURE_ELEMENT_NOTE,
+ POPPLER_STRUCTURE_ELEMENT_REFERENCE,
+ POPPLER_STRUCTURE_ELEMENT_BIBENTRY,
+ POPPLER_STRUCTURE_ELEMENT_CODE,
+ POPPLER_STRUCTURE_ELEMENT_LINK,
+ POPPLER_STRUCTURE_ELEMENT_ANNOT,
+ POPPLER_STRUCTURE_ELEMENT_RUBY,
+ POPPLER_STRUCTURE_ELEMENT_WARICHU,
+ POPPLER_STRUCTURE_ELEMENT_BLOCKQUOTE,
+ POPPLER_STRUCTURE_ELEMENT_CAPTION,
+ POPPLER_STRUCTURE_ELEMENT_NONSTRUCT,
+ POPPLER_STRUCTURE_ELEMENT_TOC,
+ POPPLER_STRUCTURE_ELEMENT_TOC_ITEM,
+ POPPLER_STRUCTURE_ELEMENT_INDEX,
+ POPPLER_STRUCTURE_ELEMENT_PRIVATE,
+ POPPLER_STRUCTURE_ELEMENT_PARAGRAPH,
+ POPPLER_STRUCTURE_ELEMENT_HEADING,
+ POPPLER_STRUCTURE_ELEMENT_HEADING_1,
+ POPPLER_STRUCTURE_ELEMENT_HEADING_2,
+ POPPLER_STRUCTURE_ELEMENT_HEADING_3,
+ POPPLER_STRUCTURE_ELEMENT_HEADING_4,
+ POPPLER_STRUCTURE_ELEMENT_HEADING_5,
+ POPPLER_STRUCTURE_ELEMENT_HEADING_6,
+ POPPLER_STRUCTURE_ELEMENT_LIST,
+ POPPLER_STRUCTURE_ELEMENT_LIST_ITEM,
+ POPPLER_STRUCTURE_ELEMENT_LIST_LABEL,
+ POPPLER_STRUCTURE_ELEMENT_TABLE,
+ POPPLER_STRUCTURE_ELEMENT_TABLE_ROW,
+ POPPLER_STRUCTURE_ELEMENT_TABLE_HEADING,
+ POPPLER_STRUCTURE_ELEMENT_TABLE_DATA,
+ POPPLER_STRUCTURE_ELEMENT_TABLE_HEADER,
+ POPPLER_STRUCTURE_ELEMENT_TABLE_FOOTER,
+ POPPLER_STRUCTURE_ELEMENT_TABLE_BODY,
+ POPPLER_STRUCTURE_ELEMENT_FIGURE,
+ POPPLER_STRUCTURE_ELEMENT_FORMULA,
+ POPPLER_STRUCTURE_ELEMENT_FORM,
+} PopplerStructureElementKind;
+
+/**
+ * PopplerStructureAttribute:
+ */
+typedef enum {
+ POPPLER_STRUCTURE_ATTRIBUTE_UNKNOWN,
+ POPPLER_STRUCTURE_ATTRIBUTE_USER_PROPERTY,
+ POPPLER_STRUCTURE_ATTRIBUTE_PLACEMENT,
+ POPPLER_STRUCTURE_ATTRIBUTE_WRITING_MODE,
+ POPPLER_STRUCTURE_ATTRIBUTE_BACKGROUND_COLOR,
+ POPPLER_STRUCTURE_ATTRIBUTE_BORDER_COLOR,
+ POPPLER_STRUCTURE_ATTRIBUTE_BORDER_STYLE,
+ POPPLER_STRUCTURE_ATTRIBUTE_BORDER_THICKNESS,
+ POPPLER_STRUCTURE_ATTRIBUTE_COLOR,
+ POPPLER_STRUCTURE_ATTRIBUTE_PADDING,
+ POPPLER_STRUCTURE_ATTRIBUTE_SPACE_BEFORE,
+ POPPLER_STRUCTURE_ATTRIBUTE_SPACE_AFTER,
+ POPPLER_STRUCTURE_ATTRIBUTE_START_INDENT,
+ POPPLER_STRUCTURE_ATTRIBUTE_END_INDENT,
+ POPPLER_STRUCTURE_ATTRIBUTE_TEXT_INDENT,
+ POPPLER_STRUCTURE_ATTRIBUTE_TEXT_ALIGN,
+ POPPLER_STRUCTURE_ATTRIBUTE_BBOX,
+ POPPLER_STRUCTURE_ATTRIBUTE_WIDTH,
+ POPPLER_STRUCTURE_ATTRIBUTE_HEIGHT,
+ POPPLER_STRUCTURE_ATTRIBUTE_BLOCK_ALIGN,
+ POPPLER_STRUCTURE_ATTRIBUTE_INLINE_ALIGN,
+ POPPLER_STRUCTURE_ATTRIBUTE_TABLE_BORDER_STYLE,
+ POPPLER_STRUCTURE_ATTRIBUTE_TABLE_PADDING,
+ POPPLER_STRUCTURE_ATTRIBUTE_BASELINE_SHIFT,
+ POPPLER_STRUCTURE_ATTRIBUTE_LINE_HEIGHT,
+ POPPLER_STRUCTURE_ATTRIBUTE_TEXT_DECORATION_COLOR,
+ POPPLER_STRUCTURE_ATTRIBUTE_TEXT_DECORATION_THICKNESS,
+ POPPLER_STRUCTURE_ATTRIBUTE_TEXT_DECORATION,
+ POPPLER_STRUCTURE_ATTRIBUTE_RUBY_ALIGN,
+ POPPLER_STRUCTURE_ATTRIBUTE_RUBY_POSITION,
+ POPPLER_STRUCTURE_ATTRIBUTE_GLYPH_ORIENTATION,
+ POPPLER_STRUCTURE_ATTRIBUTE_COLUMN_COUNT,
+ POPPLER_STRUCTURE_ATTRIBUTE_COLUMN_GAP,
+ POPPLER_STRUCTURE_ATTRIBUTE_COLUMN_WIDTHS,
+ POPPLER_STRUCTURE_ATTRIBUTE_LIST_NUMBERING,
+ POPPLER_STRUCTURE_ATTRIBUTE_ROLE,
+ POPPLER_STRUCTURE_ATTRIBUTE_CHECKED,
+ POPPLER_STRUCTURE_ATTRIBUTE_DESCRIPTION,
+ POPPLER_STRUCTURE_ATTRIBUTE_ROW_SPAN,
+ POPPLER_STRUCTURE_ATTRIBUTE_COLUMN_SPAN,
+ POPPLER_STRUCTURE_ATTRIBUTE_HEADERS,
+ POPPLER_STRUCTURE_ATTRIBUTE_SCOPE,
+ POPPLER_STRUCTURE_ATTRIBUTE_SUMMARY,
+} PopplerStructureAttribute;
+
+/**
+ * PopplerStructurePlacement:
+ */
+typedef enum {
+ POPPLER_STRUCTURE_PLACEMENT_BLOCK,
+ POPPLER_STRUCTURE_PLACEMENT_INLINE,
+ POPPLER_STRUCTURE_PLACEMENT_BEFORE,
+ POPPLER_STRUCTURE_PLACEMENT_START,
+ POPPLER_STRUCTURE_PLACEMENT_END,
+} PopplerStructurePlacement;
+
+/**
+ * PopplerStructureWritingMode:
+ */
+typedef enum {
+ POPPLER_STRUCTURE_WRITING_MODE_LR_TB,
+ POPPLER_STRUCTURE_WRITING_MODE_RL_TB,
+ POPPLER_STRUCTURE_WRITING_MODE_TB_RL,
+} PopplerStructureWritingMode;
+
+/**
+ * PopplerStructureBorderStyle:
+ */
+typedef enum {
+ POPPLER_STRUCTURE_BORDER_STYLE_NONE,
+ POPPLER_STRUCTURE_BORDER_STYLE_HIDDEN,
+ POPPLER_STRUCTURE_BORDER_STYLE_DOTTED,
+ POPPLER_STRUCTURE_BORDER_STYLE_DASHED,
+ POPPLER_STRUCTURE_BORDER_STYLE_SOLID,
+ POPPLER_STRUCTURE_BORDER_STYLE_DOUBLE,
+ POPPLER_STRUCTURE_BORDER_STYLE_GROOVE,
+ POPPLER_STRUCTURE_BORDER_STYLE_INSET,
+ POPPLER_STRUCTURE_BORDER_STYLE_OUTSET,
+} PopplerStructureBorderStyle;
+
+/**
+ * PopplerStructureTextAlign:
+ */
+typedef enum {
+ POPPLER_STRUCTURE_TEXT_ALIGN_START,
+ POPPLER_STRUCTURE_TEXT_ALIGN_CENTER,
+ POPPLER_STRUCTURE_TEXT_ALIGN_END,
+ POPPLER_STRUCTURE_TEXT_ALIGN_JUSTIFY,
+} PopplerStructureTextAlign;
+
+/**
+ * PopplerStructureBlockAlign:
+ */
+typedef enum {
+ POPPLER_STRUCTURE_BLOCK_ALIGN_BEFORE,
+ POPPLER_STRUCTURE_BLOCK_ALIGN_MIDDLE,
+ POPPLER_STRUCTURE_BLOCK_ALIGN_AFTER,
+ POPPLER_STRUCTURE_BLOCK_ALIGN_JUSTIFY,
+} PopplerStructureBlockAlign;
+
+/**
+ * PopplerStructureInlineAlign:
+ */
+typedef enum {
+ POPPLER_STRUCTURE_INLINE_ALIGN_START,
+ POPPLER_STRUCTURE_INLINE_ALIGN_CENTER,
+ POPPLER_STRUCTURE_INLINE_ALIGN_END,
+} PopplerStructureInlineAlign;
+
+/**
+ * PopplerStructureTextDecoration:
+ */
+typedef enum {
+ POPPLER_STRUCTURE_TEXT_DECORATION_NONE,
+ POPPLER_STRUCTURE_TEXT_DECORATION_UNDERLINE,
+ POPPLER_STRUCTURE_TEXT_DECORATION_OVERLINE,
+ POPPLER_STRUCTURE_TEXT_DECORATION_LINETHROUGH,
+} PopplerStructureTextDecoration;
+
+/**
+ * PopplerStructureRubyAlign:
+ */
+typedef enum
+{
+ POPPLER_STRUCTURE_RUBY_ALIGN_START,
+ POPPLER_STRUCTURE_RUBY_ALIGN_CENTER,
+ POPPLER_STRUCTURE_RUBY_ALIGN_END,
+ POPPLER_STRUCTURE_RUBY_ALIGN_JUSTIFY,
+ POPPLER_STRUCTURE_RUBY_ALIGN_DISTRIBUTE,
+} PopplerStructureRubyAlign;
+
+/**
+ * PopplerStructureRubyPosition:
+ */
+typedef enum {
+ POPPLER_STRUCTURE_RUBY_POSITION_BEFORE,
+ POPPLER_STRUCTURE_RUBY_POSITION_AFTER,
+ POPPLER_STRUCTURE_RUBY_POSITION_WARICHU,
+ POPPLER_STRUCTURE_RUBY_POSITION_INLINE,
+} PopplerStructureRubyPosition;
+
+/**
+ * PopplerStructureGlyphOrientation:
+ */
+typedef enum {
+ POPPLER_STRUCTURE_GLYPH_ORIENTATION_AUTO,
+ POPPLER_STRUCTURE_GLYPH_ORIENTATION_0 = POPPLER_STRUCTURE_GLYPH_ORIENTATION_AUTO,
+ POPPLER_STRUCTURE_GLYPH_ORIENTATION_90,
+ POPPLER_STRUCTURE_GLYPH_ORIENTATION_180,
+ POPPLER_STRUCTURE_GLYPH_ORIENTATION_270,
+} PopplerStructureGlyphOrientation;
+
+/**
+ * PopplerStructureListNumbering:
+ */
+typedef enum {
+ POPPLER_STRUCTURE_LIST_NUMBERING_NONE,
+ POPPLER_STRUCTURE_LIST_NUMBERING_DISC,
+ POPPLER_STRUCTURE_LIST_NUMBERING_CIRCLE,
+ POPPLER_STRUCTURE_LIST_NUMBERING_SQUARE,
+ POPPLER_STRUCTURE_LIST_NUMBERING_DECIMAL,
+ POPPLER_STRUCTURE_LIST_NUMBERING_UPPER_ROMAN,
+ POPPLER_STRUCTURE_LIST_NUMBERING_LOWER_ROMAN,
+ POPPLER_STRUCTURE_LIST_NUMBERING_UPPER_ALPHA,
+ POPPLER_STRUCTURE_LIST_NUMBERING_LOWER_ALPHA,
+} PopplerStructureListNumbering;
+
+/**
+ * PopplerStructureRole:
+ */
+typedef enum {
+ POPPLER_STRUCTURE_ROLE_RADIO_BUTTON,
+ POPPLER_STRUCTURE_ROLE_PUSH_BUTTON,
+ POPPLER_STRUCTURE_ROLE_TEXT_VALUE,
+ POPPLER_STRUCTURE_ROLE_CHECKBOX,
+} PopplerStructureRole;
+
+/**
+ * PopplerStructureChecked:
+ */
+typedef enum {
+ POPPLER_STRUCTURE_CHECKED_ON,
+ POPPLER_STRUCTURE_CHECKED_OFF,
+ POPPLER_STRUCTURE_CHECKED_NEUTRAL,
+} PopplerStructureChecked;
+
+/**
+ * PopplerStructureScope:
+ */
+typedef enum {
+ POPPLER_STRUCTURE_SCOPE_ROW,
+ POPPLER_STRUCTURE_SCOPE_COLUMN,
+ POPPLER_STRUCTURE_SCOPE_BOTH,
+} PopplerStructureScope;
+
+/**
+ * PopplerStructureReference:
+ */
+typedef enum {
+ POPPLER_STRUCTURE_REFERENCE_UNKNOWN,
+ POPPLER_STRUCTURE_REFERENCE_ANNOT,
+ POPPLER_STRUCTURE_REFERENCE_LINK,
+} PopplerStructureReference;
+
+
+typedef struct _PopplerTextSpan PopplerTextSpan;
+struct _PopplerTextSpan {
+ gchar *text;
+ gchar *font_name;
+ gchar *link_target;
+ guint flags;
+ guint color; /* 0x00RRGGBB */
+};
+
+enum {
+ POPPLER_TEXT_SPAN_FIXED_WIDTH = (1 << 0),
+ POPPLER_TEXT_SPAN_SERIF_FONT = (1 << 1),
+ POPPLER_TEXT_SPAN_ITALIC = (1 << 2),
+ POPPLER_TEXT_SPAN_BOLD = (1 << 3),
+ POPPLER_TEXT_SPAN_LINK = (1 << 4),
+ POPPLER_TEXT_SPAN_COLOR = (1 << 5),
+ POPPLER_TEXT_SPAN_FONT = (1 << 6),
+};
+
+
+static inline gboolean poppler_text_span_is_fixed_width (PopplerTextSpan *poppler_text_span)
+{
+ return (poppler_text_span->flags & POPPLER_TEXT_SPAN_FIXED_WIDTH);
+}
+
+static inline gboolean poppler_text_span_is_serif_font (PopplerTextSpan *poppler_text_span)
+{
+ return (poppler_text_span->flags & POPPLER_TEXT_SPAN_SERIF_FONT);
+}
+
+static inline gboolean poppler_text_span_is_bold (PopplerTextSpan *poppler_text_span)
+{
+ return (poppler_text_span->flags & POPPLER_TEXT_SPAN_BOLD);
+}
+
+static inline gboolean poppler_text_span_is_link (PopplerTextSpan *poppler_text_span)
+{
+ return (poppler_text_span->flags & POPPLER_TEXT_SPAN_LINK);
+}
+
+
+GType poppler_structure_element_get_type (void) G_GNUC_CONST;
+PopplerStructureElementKind poppler_structure_element_get_kind (PopplerStructureElement *poppler_structure_element);
+gint poppler_structure_element_get_page (PopplerStructureElement *poppler_structure_element);
+gboolean poppler_structure_element_is_content (PopplerStructureElement *poppler_structure_element);
+gboolean poppler_structure_element_is_inline (PopplerStructureElement *poppler_structure_element);
+gboolean poppler_structure_element_is_block (PopplerStructureElement *poppler_structure_element);
+guint poppler_structure_element_get_n_children (PopplerStructureElement *poppler_structure_element);
+PopplerStructureElement *poppler_structure_element_get_child (PopplerStructureElement *poppler_structure_element,
+ guint index);
+const gchar *poppler_structure_element_get_id (PopplerStructureElement *poppler_structure_element);
+const gchar *poppler_structure_element_get_title (PopplerStructureElement *poppler_structure_element);
+const gchar *poppler_structure_element_get_abbreviation (PopplerStructureElement *poppler_structure_element);
+const gchar *poppler_structure_element_get_language (PopplerStructureElement *poppler_structure_element);
+const gchar *poppler_structure_element_get_text (PopplerStructureElement *poppler_structure_element,
+ gboolean recursive);
+GList *poppler_structure_element_get_text_spans (PopplerStructureElement *poppler_structure_element,
+ gboolean recursive);
+const gchar *poppler_structure_element_get_alt_text (PopplerStructureElement *poppler_structure_element);
+const gchar *poppler_structure_element_get_actual_text (PopplerStructureElement *poppler_structure_element);
+GVariant *poppler_structure_element_get_attribute (PopplerStructureElement *poppler_structure_element,
+ PopplerStructureAttribute attribute,
+ gboolean inherit);
+gboolean poppler_structure_element_is_reference (PopplerStructureElement *poppler_structure_element);
+PopplerStructureReference poppler_structure_element_get_reference_type (PopplerStructureElement *poppler_structure_element);
+PopplerLinkMapping *poppler_structure_element_get_reference_link (PopplerStructureElement *poppler_structure_element);
+PopplerLinkMapping *poppler_structure_element_find_link (PopplerStructureElement *poppler_structure_element);
+
+PopplerFormField *poppler_structure_element_get_form_field (PopplerStructureElement *poppler_structure_element);
+PopplerFormFieldMapping *poppler_structure_element_get_form_field_mapping (PopplerStructureElement *poppler_structure_element);
+
+G_END_DECLS
+
+#endif /* !__POPPLER_STRUCTURE_ELEMENT_H__ */
diff --git a/glib/poppler-structure.cc b/glib/poppler-structure.cc
new file mode 100644
index 0000000..f9671a6
--- /dev/null
+++ b/glib/poppler-structure.cc
@@ -0,0 +1,362 @@
+/* poppler-structure.cc: glib interface to poppler
+ *
+ * Copyright (C) 2013 Igalia S.L.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+
+#include "config.h"
+
+#ifndef __GI_SCANNER__
+#include <StructTreeRoot.h>
+#include <StructElement.h>
+#endif /* !__GI_SCANNER__ */
+
+#include "poppler.h"
+#include "poppler-private.h"
+#include "poppler-structure.h"
+
+
+static inline StructElement::Type
+_poppler_structure_element_kind_to_structelement_type (PopplerStructureElementKind kind)
+{
+ switch (kind)
+ {
+ case POPPLER_STRUCTURE_ELEMENT_UNKNOWN:
+ return StructElement::Unknown;
+ case POPPLER_STRUCTURE_ELEMENT_CONTENT:
+ return StructElement::MCID;
+ case POPPLER_STRUCTURE_ELEMENT_OBJECT_REFERENCE:
+ return StructElement::OBJR;
+ case POPPLER_STRUCTURE_ELEMENT_DOCUMENT:
+ return StructElement::Document;
+ case POPPLER_STRUCTURE_ELEMENT_PART:
+ return StructElement::Part;
+ case POPPLER_STRUCTURE_ELEMENT_SECTION:
+ return StructElement::Sect;
+ case POPPLER_STRUCTURE_ELEMENT_DIV:
+ return StructElement::Div;
+ case POPPLER_STRUCTURE_ELEMENT_SPAN:
+ return StructElement::Span;
+ case POPPLER_STRUCTURE_ELEMENT_QUOTE:
+ return StructElement::Quote;
+ case POPPLER_STRUCTURE_ELEMENT_NOTE:
+ return StructElement::Note;
+ case POPPLER_STRUCTURE_ELEMENT_REFERENCE:
+ return StructElement::Reference;
+ case POPPLER_STRUCTURE_ELEMENT_BIBENTRY:
+ return StructElement::BibEntry;
+ case POPPLER_STRUCTURE_ELEMENT_CODE:
+ return StructElement::Code;
+ case POPPLER_STRUCTURE_ELEMENT_LINK:
+ return StructElement::Link;
+ case POPPLER_STRUCTURE_ELEMENT_ANNOT:
+ return StructElement::Annot;
+ case POPPLER_STRUCTURE_ELEMENT_RUBY:
+ return StructElement::Ruby;
+ case POPPLER_STRUCTURE_ELEMENT_WARICHU:
+ return StructElement::Warichu;
+ case POPPLER_STRUCTURE_ELEMENT_BLOCKQUOTE:
+ return StructElement::BlockQuote;
+ case POPPLER_STRUCTURE_ELEMENT_CAPTION:
+ return StructElement::Caption;
+ case POPPLER_STRUCTURE_ELEMENT_NONSTRUCT:
+ return StructElement::NonStruct;
+ case POPPLER_STRUCTURE_ELEMENT_TOC:
+ return StructElement::TOC;
+ case POPPLER_STRUCTURE_ELEMENT_TOC_ITEM:
+ return StructElement::TOCI;
+ case POPPLER_STRUCTURE_ELEMENT_INDEX:
+ return StructElement::Index;
+ case POPPLER_STRUCTURE_ELEMENT_PRIVATE:
+ return StructElement::Private;
+ case POPPLER_STRUCTURE_ELEMENT_PARAGRAPH:
+ return StructElement::P;
+ case POPPLER_STRUCTURE_ELEMENT_HEADING:
+ return StructElement::H;
+ case POPPLER_STRUCTURE_ELEMENT_HEADING_1:
+ return StructElement::H1;
+ case POPPLER_STRUCTURE_ELEMENT_HEADING_2:
+ return StructElement::H2;
+ case POPPLER_STRUCTURE_ELEMENT_HEADING_3:
+ return StructElement::H3;
+ case POPPLER_STRUCTURE_ELEMENT_HEADING_4:
+ return StructElement::H4;
+ case POPPLER_STRUCTURE_ELEMENT_HEADING_5:
+ return StructElement::H5;
+ case POPPLER_STRUCTURE_ELEMENT_HEADING_6:
+ return StructElement::H6;
+ case POPPLER_STRUCTURE_ELEMENT_LIST:
+ return StructElement::L;
+ case POPPLER_STRUCTURE_ELEMENT_LIST_ITEM:
+ return StructElement::LI;
+ case POPPLER_STRUCTURE_ELEMENT_LIST_LABEL:
+ return StructElement::Lbl;
+ case POPPLER_STRUCTURE_ELEMENT_TABLE:
+ return StructElement::Table;
+ case POPPLER_STRUCTURE_ELEMENT_TABLE_ROW:
+ return StructElement::TR;
+ case POPPLER_STRUCTURE_ELEMENT_TABLE_HEADING:
+ return StructElement::TH;
+ case POPPLER_STRUCTURE_ELEMENT_TABLE_DATA:
+ return StructElement::TD;
+ case POPPLER_STRUCTURE_ELEMENT_TABLE_HEADER:
+ return StructElement::THead;
+ case POPPLER_STRUCTURE_ELEMENT_TABLE_FOOTER:
+ return StructElement::TFoot;
+ case POPPLER_STRUCTURE_ELEMENT_TABLE_BODY:
+ return StructElement::TBody;
+ case POPPLER_STRUCTURE_ELEMENT_FIGURE:
+ return StructElement::Figure;
+ case POPPLER_STRUCTURE_ELEMENT_FORMULA:
+ return StructElement::Formula;
+ case POPPLER_STRUCTURE_ELEMENT_FORM:
+ return StructElement::Form;
+ default:
+ g_assert_not_reached ();
+ }
+}
+
+
+
+/**
+ * SECTION:poppler-structure
+ * @short_description: Document structure
+ * @title: PopplerStructure
+ *
+ * #PopplerStructure is used to represent the structure of a #PopplerDocument.
+ * If a structure is defined, poppler_document_get_structure() will return a
+ * valid pointer to its #PopplerStructure, which represents the document
+ * tree. Elements can be obtained using poppler_structure_get_n_children()
+ * and poppler_structure_get_child(). Seraching for elements of a given
+ * #PopplerStructureElementType and/or page can be done using
+ * poppler_structure_find_elements().
+ *
+ * The document structure tree is formed by #PopplerStructureElement objects,
+ * describing each one of them a relevant element of the document. The
+ * logical order of the documents is that of doing a depth-first traversal
+ * of the tree. Elements may recursively contain other child elements, which
+ * can be obtained using poppler_structure_element_get_n_children() and
+ * poppler_structure_element_get_child().
+ *
+ * Elements may have attached attributes describing additional information
+ * about them. The standard attributes (as defined in the PDF specification,
+ * see #PopplerStructureAttribute for a complete list) can be obtained using
+ * poppler_structure_element_get_attribute(). Elements may also contain
+ * non-standard attributes with arbitrary names called âuser propertiesâ,
+ * see poppler_structure_element_get_user_property() for details.
+ */
+
+typedef struct _PopplerStructureClass PopplerStructureClass;
+struct _PopplerStructureClass
+{
+ GObjectClass parent_class;
+};
+
+G_DEFINE_TYPE (PopplerStructure, poppler_structure, G_TYPE_OBJECT);
+
+
+static void
+poppler_structure_init (PopplerStructure *poppler_structure)
+{
+}
+
+
+static void
+poppler_structure_finalize (GObject *object)
+{
+ PopplerStructure *poppler_structure = POPPLER_STRUCTURE (object);
+
+ /* poppler_structure->root is owned by the catalog */
+ g_object_unref (poppler_structure->document);
+
+ if (poppler_structure->children)
+ {
+ for (unsigned i = 0; i < poppler_structure->root->getNumElements (); i++)
+ g_object_unref (poppler_structure->children[i]);
+ g_free (poppler_structure->children);
+ }
+
+ G_OBJECT_CLASS (poppler_structure_parent_class)->finalize (object);
+}
+
+
+static void
+poppler_structure_class_init (PopplerStructureClass *klass)
+{
+ GObjectClass *gobject_class = G_OBJECT_CLASS (klass);
+ gobject_class->finalize = poppler_structure_finalize;
+}
+
+
+PopplerStructure*
+_poppler_structure_new (PopplerDocument *poppler_document,
+ StructTreeRoot *root)
+{
+ PopplerStructure *poppler_structure;
+
+ g_return_val_if_fail (POPPLER_IS_DOCUMENT (poppler_document), NULL);
+ g_assert (root);
+
+ poppler_structure = (PopplerStructure*) g_object_new (POPPLER_TYPE_STRUCTURE, NULL, NULL);
+
+ poppler_structure->document = (PopplerDocument*) g_object_ref (poppler_document);
+ poppler_structure->root = root;
+ poppler_structure->children = NULL;
+
+ if (root->getNumElements ())
+ poppler_structure->children = (PopplerStructureElement**) g_new0 (PopplerStructureElement*,
+ root->getNumElements ());
+ return poppler_structure;
+}
+
+
+/**
+ * poppler_structure_get_n_children:
+ * @poppler_structure: A #PopplerStructure
+ *
+ * Return value: Number of structure elements in the root of the
+ * structure tree.
+ */
+guint
+poppler_structure_get_n_children (PopplerStructure *poppler_structure)
+{
+ g_return_val_if_fail (POPPLER_IS_STRUCTURE (poppler_structure), 0);
+ g_assert (poppler_structure->root);
+
+ return poppler_structure->root->getNumElements ();
+}
+
+/**
+ * poppler_structure_get_child:
+ * @poppler_structure: A #PopplerStructure
+ * @index: Index of the root structure element to obtain.
+ *
+ * Return value: (transfer none): A #PopplerStructureElement.
+ */
+PopplerStructureElement*
+poppler_structure_get_child (PopplerStructure *poppler_structure,
+ guint index)
+{
+ g_return_val_if_fail (POPPLER_IS_STRUCTURE (poppler_structure), NULL);
+ g_assert (poppler_structure->root);
+ g_assert (poppler_structure->root->getNumElements () >= 0);
+ g_return_val_if_fail (index < (guint) poppler_structure->root->getNumElements (), NULL);
+
+ if (!poppler_structure->children[index])
+ {
+ poppler_structure->children[index] = _poppler_structure_element_new (poppler_structure,
+ poppler_structure->root->getElement (index));
+ g_object_ref_sink (poppler_structure->children[index]);
+ }
+ return poppler_structure->children[index];
+}
+
+
+/*
+ * XXX PopplerStructure and PopplerStructureElement cache their children
+ * PopplerStructureElement wrapper objects. This find function could
+ * traverse the cache instead of creating a list of new objects, but
+ * it is unclear whether that would be faster -- it will mostly depend
+ * on the use-case.
+ */
+static void
+_poppler_structure_find_elements_helper (PopplerStructure *structure,
+ StructElement *element,
+ StructElement::Type type,
+ Ref *pageRef,
+ GList **result)
+{
+ g_assert (element);
+ g_assert (result);
+
+ /* Traverse the tree depth-first, to get elements in logical order */
+ for (unsigned i = 0; i < element->getNumElements (); i++)
+ {
+ _poppler_structure_find_elements_helper (structure,
+ element->getElement (i),
+ type,
+ pageRef,
+ result);
+ }
+
+ /*
+ * If filtering by type, when the type does not match,
+ * return without adding the element to the result list.
+ */
+ if (type != StructElement::Unknown && (type != element->getType ()))
+ return;
+
+ /*
+ * If filtering by page, when the page does not match,
+ * return without adding the element to the result list.
+ */
+ if (pageRef)
+ {
+ Ref ref = element->getPageRef ();
+ if (!(pageRef->num == ref.num && pageRef->gen == ref.gen))
+ return;
+ }
+
+ /*
+ * All filtering checks passed, add element to result list.
+ */
+ *result = g_list_append (*result, _poppler_structure_element_new (structure, element));
+}
+
+/**
+ * poppler_structure_find_elements:
+ * @poppler_structure: A #PopplerStructure.
+ * @kind: A #PopplerStructureElementKind value. Use
+ * #POPPLER_STRUCTURE_ELEMENT_UNKNOWN to return all the structure
+ * elements, or any other value to obtain only the elements of the
+ * given type.
+ * @page: Limit the elements returned to those whose content is to
+ * be displayed in a certain page. Use <code>-1</code> to obtain
+ * elements from all the pages.
+ *
+ * The elements are returned in *logical order* as defined in the
+ * PDF specification, being that the ordering resulting of a depth-first
+ * traversal of the structure tree.
+ *
+ * Return value: (element-type PopplerStructureElement) (transfer full):
+ * A #GList of #PopplerStructureElement objects.
+ */
+GList*
+poppler_structure_find_elements (PopplerStructure *poppler_structure,
+ PopplerStructureElementKind kind,
+ gint page)
+{
+ GList *result = NULL;
+ Ref *pageRef = NULL;
+
+ g_return_val_if_fail (POPPLER_IS_STRUCTURE (poppler_structure), NULL);
+
+ if (page >= 0)
+ {
+ pageRef = poppler_structure->document->doc->getCatalog ()->getPageRef(page);
+ }
+
+ for (unsigned i = 0; i < poppler_structure->root->getNumElements (); i++)
+ {
+ _poppler_structure_find_elements_helper (poppler_structure,
+ poppler_structure->root->getElement (i),
+ _poppler_structure_element_kind_to_structelement_type (kind),
+ pageRef,
+ &result);
+ }
+
+ return result;
+}
diff --git a/glib/poppler-structure.h b/glib/poppler-structure.h
new file mode 100644
index 0000000..d01d551
--- /dev/null
+++ b/glib/poppler-structure.h
@@ -0,0 +1,43 @@
+/* poppler-structure.h: glib interface to poppler
+ *
+ * Copyright (C) 2013 Igalia S.L.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+
+#ifndef __POPPLER_STRUCTURE_H__
+#define __POPPLER_STRUCTURE_H__
+
+#include <glib-object.h>
+#include "poppler.h"
+#include "poppler-structure-element.h"
+
+G_BEGIN_DECLS
+
+#define POPPLER_TYPE_STRUCTURE (poppler_structure_get_type ())
+#define POPPLER_STRUCTURE(obj) (G_TYPE_CHECK_INSTANCE_CAST ((obj), POPPLER_TYPE_STRUCTURE, PopplerStructure))
+#define POPPLER_IS_STRUCTURE(obj) (G_TYPE_CHECK_INSTANCE_TYPE ((obj), POPPLER_TYPE_STRUCTURE))
+
+GType poppler_structure_get_type (void) G_GNUC_CONST;
+guint poppler_structure_get_n_children (PopplerStructure *poppler_structure);
+PopplerStructureElement *poppler_structure_get_child (PopplerStructure *poppler_structure,
+ guint index);
+GList *poppler_structure_find_elements (PopplerStructure *poppler_structure,
+ PopplerStructureElementKind kind,
+ gint page);
+
+G_END_DECLS
+
+#endif /* !__POPPLER_STRUCTURE_H__ */
diff --git a/glib/poppler.h b/glib/poppler.h
index 2d190f3..3cdc077 100644
--- a/glib/poppler.h
+++ b/glib/poppler.h
@@ -202,6 +202,8 @@ typedef struct _PopplerAnnotFileAttachment PopplerAnnotFileAttachment;
typedef struct _PopplerAnnotMovie PopplerAnnotMovie;
typedef struct _PopplerAnnotScreen PopplerAnnotScreen;
typedef struct _PopplerAnnotCalloutLine PopplerAnnotCalloutLine;
+typedef struct _PopplerStructure PopplerStructure;
+typedef struct _PopplerStructureElement PopplerStructureElement;
typedef enum
{
@@ -227,5 +229,6 @@ G_END_DECLS
#include "poppler-date.h"
#include "poppler-movie.h"
#include "poppler-media.h"
+#include "poppler-structure.h"
#endif /* __POPPLER_GLIB_H__ */
diff --git a/glib/reference/poppler-docs.sgml b/glib/reference/poppler-docs.sgml
index a9d5158..9617ed7 100644
--- a/glib/reference/poppler-docs.sgml
+++ b/glib/reference/poppler-docs.sgml
@@ -23,6 +23,8 @@
<xi:include href="xml/poppler-layer.xml"/>
<xi:include href="xml/poppler-media.xml"/>
<xi:include href="xml/poppler-movie.xml"/>
+ <xi:include href="xml/poppler-structure.xml"/>
+ <xi:include href="xml/poppler-structure-element.xml"/>
<xi:include href="xml/poppler-features.xml"/>
</chapter>
diff --git a/glib/reference/poppler-sections.txt b/glib/reference/poppler-sections.txt
index 6fb14bc..6c4ebc9 100644
--- a/glib/reference/poppler-sections.txt
+++ b/glib/reference/poppler-sections.txt
@@ -145,6 +145,7 @@ poppler_document_get_n_attachments
poppler_document_has_attachments
poppler_document_get_attachments
poppler_document_get_form_field
+poppler_document_get_structure
poppler_index_iter_new
poppler_index_iter_copy
poppler_index_iter_free
@@ -535,6 +536,91 @@ poppler_movie_get_type
</SECTION>
<SECTION>
+<FILE>poppler-structure</FILE>
+<TITLE>PopplerStructure</TITLE>
+PopplerStructure
+poppler_structure_get_n_children
+poppler_structure_get_child
+poppler_structure_find_elements
+
+<SUBSECTION Standard>
+POPPLER_STRUCTURE
+POPPLER_IS_STRUCTURE
+POPPLER_TYPE_STRUCTURE
+
+<SUBSECTION Private>
+poppler_structure_get_type
+</SECTION>
+
+<SECTION>
+<FILE>poppler-structure-element</FILE>
+<TITLE>PopplerStructureElement</TITLE>
+PopplerStructureElement
+poppler_structure_element_get_element_type
+poppler_structure_element_get_n_children
+poppler_structure_element_get_child
+poppler_structure_element_get_attribute
+poppler_structure_element_get_user_property
+PopplerStructureElementType
+PopplerStructureAttribute
+PopplerStructurePlacement
+PopplerStructureWritingMode
+PopplerStructureBorderStyle
+PopplerStructureTextAlign
+PopplerStructureBlockAlign
+PopplerStructureInlineAlign
+PopplerStructureTextDecoration
+PopplerStructureRubyAlign
+PopplerStructureRubyPosition
+PopplerStructureGlyphOrientation
+PopplerStructureListNumbering
+PopplerStructureRole
+PopplerStructureChecked
+PopplerStructureScope
+
+<SUBSECTION Standard>
+POPPLER_STRUCTURE_ELEMENT
+POPPLER_IS_STRUCTURE_ELEMENT
+POPPLER_STRUCTURE_ELEMENT_TYPE
+POPPLER_TYPE_STRUCTURE_ATTRIBUTE
+POPPLER_TYPE_STRUCTURE_BLOCK_ALIGN
+POPPLER_TYPE_STRUCTURE_BORDER_STYLE
+POPPLER_TYPE_STRUCTURE_CHECKED
+POPPLER_TYPE_STRUCTURE_ELEMENT
+POPPLER_TYPE_STRUCTURE_ELEMENT_TYPE
+POPPLER_TYPE_STRUCTURE_GLYPH_ORIENTATION
+POPPLER_TYPE_STRUCTURE_INLINE_ALIGN
+POPPLER_TYPE_STRUCTURE_LIST_NUMBERING
+POPPLER_TYPE_STRUCTURE_PLACEMENT
+POPPLER_TYPE_STRUCTURE_ROLE
+POPPLER_TYPE_STRUCTURE_RUBY_ALIGN
+POPPLER_TYPE_STRUCTURE_RUBY_POSITION
+POPPLER_TYPE_STRUCTURE_SCOPE
+POPPLER_TYPE_STRUCTURE_TEXT_ALIGN
+POPPLER_TYPE_STRUCTURE_TEXT_DECORATION
+POPPLER_TYPE_STRUCTURE_WRITING_MODE
+
+<SUBSECTION Private>
+poppler_structure_element_get_type
+poppler_structure_attribute_get_type
+poppler_structure_block_align_get_type
+poppler_structure_border_style_get_type
+poppler_structure_checked_get_type
+poppler_structure_element_type_get_type
+poppler_structure_glyph_orientation_get_type
+poppler_structure_inline_align_get_type
+poppler_structure_list_numbering_get_type
+poppler_structure_placement_get_type
+poppler_structure_role_get_type
+poppler_structure_ruby_align_get_type
+poppler_structure_ruby_position_get_type
+poppler_structure_scope_get_type
+poppler_structure_text_align_get_type
+poppler_structure_text_decoration_get_type
+poppler_structure_writing_mode_get_type
+</SECTION>
+
+<SECTION>
<FILE>poppler-features</FILE>
POPPLER_HAS_CAIRO
POPPLER_MAJOR_VERSION
diff --git a/glib/reference/poppler.types b/glib/reference/poppler.types
index eed9849..6d40ac3 100644
--- a/glib/reference/poppler.types
+++ b/glib/reference/poppler.types
@@ -8,3 +8,5 @@ poppler_annot_get_type
poppler_layer_get_type
poppler_media_get_type
poppler_movie_get_type
+poppler_structure_get_type
+poppler_structure_element_get_type
More information about the poppler
mailing list