[poppler] 3 commits - glib/demo glib/Makefile.am glib/poppler.h glib/poppler-private.h glib/poppler-structure-element.cc glib/poppler-structure-element.h glib/reference

Carlos Garcia Campos carlosgc at kemper.freedesktop.org
Sun Feb 9 08:28:26 PST 2014


 glib/Makefile.am                    |    2 
 glib/demo/Makefile.am               |    2 
 glib/demo/main.c                    |    2 
 glib/demo/taggedstruct.c            |  232 ++++++++
 glib/demo/taggedstruct.h            |   31 +
 glib/poppler-private.h              |   10 
 glib/poppler-structure-element.cc   |  932 ++++++++++++++++++++++++++++++++++++
 glib/poppler-structure-element.h    |  126 ++++
 glib/poppler.h                      |    4 
 glib/reference/poppler-docs.sgml    |    2 
 glib/reference/poppler-sections.txt |   49 +
 glib/reference/poppler.types        |    2 
 12 files changed, 1394 insertions(+)

New commits:
commit d6fde0fac0120b1622942d8344d5153d9abf3e1e
Author: Adrian Perez de Castro <aperez at igalia.com>
Date:   Wed May 29 23:44:03 2013 +0300

    glib-demo: Pane showing the document structure
    
    Adds a new pane in poppler-glib-demo showing the structure for Tagged-PDF
    documents. It also serves as an example on how to to use the API for
    PopplerStructure and PopplerStructureElement.

diff --git a/glib/demo/Makefile.am b/glib/demo/Makefile.am
index 8120bc9..e5df9d0 100644
--- a/glib/demo/Makefile.am
+++ b/glib/demo/Makefile.am
@@ -42,6 +42,8 @@ poppler_glib_demo_SOURCES = 			\
 	selections.h				\
 	selections.c				\
 	text.h					\
+	taggedstruct.h				\
+	taggedstruct.c				\
 	text.c					\
 	transitions.h				\
 	transitions.c				\
diff --git a/glib/demo/main.c b/glib/demo/main.c
index 3ba9b59..5bb13c9 100644
--- a/glib/demo/main.c
+++ b/glib/demo/main.c
@@ -34,6 +34,7 @@
 #include "attachments.h"
 #include "layers.h"
 #include "text.h"
+#include "taggedstruct.h"
 #include "find.h"
 #include "print.h"
 #include "selections.h"
@@ -65,6 +66,7 @@ static const PopplerGlibDemo demo_list[] = {
 	{ "Attachments",      pgd_attachments_create_widget },
 	{ "Layers",           pgd_layers_create_widget },
 	{ "Text",             pgd_text_create_widget },
+        { "Tagged Structure", pgd_taggedstruct_create_widget },
 	{ "Find",             pgd_find_create_widget },
 	{ "Print",            pgd_print_create_widget }
 };
diff --git a/glib/demo/taggedstruct.c b/glib/demo/taggedstruct.c
new file mode 100644
index 0000000..f5870f5
--- /dev/null
+++ b/glib/demo/taggedstruct.c
@@ -0,0 +1,232 @@
+/*
+ * Copyright (C) 2013 Igalia S.L.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+
+#include <string.h>
+
+#include "text.h"
+#include "utils.h"
+
+typedef struct {
+  GtkWidget     *view;
+  GtkTreeStore  *store;
+  GtkWidget     *type_value;
+  GtkWidget     *lang_value;
+  GtkWidget     *abbr_value;
+  GtkWidget     *id_value;
+  GtkWidget     *title_value;
+  GtkTextBuffer *text_buffer;
+} PgdTaggedStructDemo;
+
+
+static void
+pgd_taggedstruct_free (PgdTaggedStructDemo *demo)
+{
+  if (!demo)
+    return;
+
+  if (demo->store)
+    {
+      g_object_unref (demo->store);
+      demo->store = NULL;
+    }
+
+  g_free (demo);
+}
+
+
+static void
+populate_store_aux (GtkTreeStore *store, GtkTreeIter *parent, PopplerStructureElementIter *iter)
+{
+  do
+    {
+      PopplerStructureElementIter *child = poppler_structure_element_iter_get_child (iter);
+      PopplerStructureElement *element = poppler_structure_element_iter_get_element (iter);
+      GEnumClass *enum_class = G_ENUM_CLASS (g_type_class_ref (POPPLER_TYPE_STRUCTURE_ELEMENT_KIND));
+      GEnumValue *enum_value = g_enum_get_value (enum_class, poppler_structure_element_get_kind (element));
+      GtkTreeIter pos;
+
+      gtk_tree_store_append (store, &pos, parent);
+      gtk_tree_store_set (store, &pos, 0, enum_value->value_nick, 1, element, -1);
+
+      if (child)
+        {
+          populate_store_aux (store, &pos, child);
+          poppler_structure_element_iter_free (child);
+        }
+    }
+  while (poppler_structure_element_iter_next (iter));
+}
+
+
+static GtkTreeStore *
+populate_store (PopplerStructureElementIter *iter)
+{
+  GtkTreeStore *store = gtk_tree_store_new (2, G_TYPE_STRING, G_TYPE_POINTER);
+
+  if (iter)
+    {
+      populate_store_aux (store, NULL, iter);
+    }
+  else
+    {
+      GtkTreeIter pos;
+
+      gtk_tree_store_append (store, &pos, NULL);
+      gtk_tree_store_set (store, &pos, 0, "<b>Not a Tagged-PDF</b>", 1, NULL, -1);
+    }
+
+  return store;
+}
+
+
+/*static void
+pgd_row_activated (GtkTreeView *tree_view, GtkTreePath *path, GtkTreeViewColumn *column, PgdTaggedStructDemo *demo)
+{*/
+static void
+pgd_selection_changed (GtkTreeSelection *selection, PgdTaggedStructDemo *demo)
+{
+  GtkTreeModel *model;
+  PopplerStructureElement *element;
+  GtkTreeIter iter;
+  gpointer elementptr;
+
+  if (!gtk_tree_selection_get_selected (selection, &model, &iter))
+    return;
+
+  gtk_tree_model_get (model, &iter, 1, &elementptr, -1);
+  element = POPPLER_STRUCTURE_ELEMENT (elementptr);
+
+  gtk_label_set_text (GTK_LABEL (demo->id_value),
+                      poppler_structure_element_get_id (element));
+  gtk_label_set_text (GTK_LABEL (demo->title_value),
+                      poppler_structure_element_get_title (element));
+  gtk_label_set_text (GTK_LABEL (demo->lang_value),
+                      poppler_structure_element_get_language (element));
+  gtk_label_set_text (GTK_LABEL (demo->abbr_value),
+                      poppler_structure_element_get_abbreviation (element));
+  gtk_text_buffer_set_text (demo->text_buffer, "", -1);
+
+  if (poppler_structure_element_is_content (element))
+    {
+      const gchar *text = poppler_structure_element_get_text (element, FALSE);
+
+      if (text)
+        gtk_text_buffer_set_text (demo->text_buffer, text, -1);
+      gtk_label_set_text (GTK_LABEL (demo->type_value), "Content");
+    }
+  else
+    {
+      if (poppler_structure_element_is_inline (element))
+        gtk_label_set_text (GTK_LABEL (demo->type_value), "Inline");
+      else if (poppler_structure_element_is_block (element))
+        gtk_label_set_text (GTK_LABEL (demo->type_value), "Block");
+      else
+        gtk_label_set_text (GTK_LABEL (demo->type_value), "Structure");
+    }
+}
+
+
+GtkWidget *
+pgd_taggedstruct_create_widget (PopplerDocument *document)
+{
+  PopplerStructureElementIter *iter;
+  PgdTaggedStructDemo *demo;
+  GtkCellRenderer *renderer;
+  GtkTreeSelection *selection;
+  GtkWidget *hbox;
+  GtkWidget *vbox;
+  GtkWidget *grid;
+  GtkWidget *scroll;
+  GtkWidget *w;
+  gint row;
+
+  demo = g_new0 (PgdTaggedStructDemo, 1);
+
+  iter = poppler_structure_element_iter_new (document);
+  demo->store = populate_store (iter);
+  poppler_structure_element_iter_free (iter);
+
+  demo->view = gtk_tree_view_new_with_model (GTK_TREE_MODEL (demo->store));
+
+  renderer = gtk_cell_renderer_text_new ();
+  gtk_tree_view_insert_column_with_attributes (GTK_TREE_VIEW (demo->view),
+                                               0, "Type",
+                                               renderer,
+                                               "markup", 0,
+                                               NULL);
+  g_object_set (G_OBJECT (gtk_tree_view_get_column (GTK_TREE_VIEW (demo->view), 0)),
+                "expand", TRUE, NULL);
+
+  gtk_tree_view_expand_all (GTK_TREE_VIEW (demo->view));
+  gtk_tree_view_set_show_expanders (GTK_TREE_VIEW (demo->view), TRUE);
+  gtk_tree_view_set_headers_visible (GTK_TREE_VIEW (demo->view), TRUE);
+  gtk_tree_view_set_headers_clickable (GTK_TREE_VIEW (demo->view), FALSE);
+  gtk_tree_view_set_activate_on_single_click (GTK_TREE_VIEW (demo->view), TRUE);
+
+  hbox = gtk_box_new (GTK_ORIENTATION_HORIZONTAL, 6);
+  scroll = gtk_scrolled_window_new (NULL, NULL);
+  gtk_container_add (GTK_CONTAINER (scroll), demo->view);
+  gtk_widget_show (demo->view);
+  gtk_box_pack_start (GTK_BOX (hbox), scroll, TRUE, TRUE, 0);
+  gtk_widget_show (scroll);
+
+  row = 0;
+  grid = gtk_grid_new ();
+  gtk_container_set_border_width (GTK_CONTAINER (grid), 12);
+  gtk_grid_set_row_homogeneous (GTK_GRID (grid), FALSE);
+  gtk_grid_set_column_spacing (GTK_GRID (grid), 6);
+  gtk_grid_set_row_spacing (GTK_GRID (grid), 6);
+  pgd_table_add_property_with_value_widget (GTK_GRID (grid), "<b>Type:</b>", &demo->type_value, NULL, &row);
+  pgd_table_add_property_with_value_widget (GTK_GRID (grid), "<b>ID:</b>", &demo->id_value, NULL, &row);
+  pgd_table_add_property_with_value_widget (GTK_GRID (grid), "<b>Title:</b>", &demo->title_value, NULL, &row);
+  pgd_table_add_property_with_value_widget (GTK_GRID (grid), "<b>Language:</b>", &demo->lang_value, NULL, &row);
+  pgd_table_add_property_with_value_widget (GTK_GRID (grid), "<b>Abbreviation:</b>", &demo->abbr_value, NULL, &row);
+
+  vbox = gtk_box_new (GTK_ORIENTATION_VERTICAL, 6);
+  gtk_box_pack_start (GTK_BOX (vbox), grid, FALSE, FALSE, 0);
+  gtk_widget_show (grid);
+
+  scroll = gtk_scrolled_window_new (NULL, NULL);
+  gtk_container_set_border_width (GTK_CONTAINER (scroll), 12);
+  gtk_box_pack_end (GTK_BOX (vbox), scroll, TRUE, TRUE, 0);
+  gtk_widget_show (scroll);
+
+  gtk_container_add (GTK_CONTAINER (scroll), (w = gtk_text_view_new ()));
+  gtk_widget_show (w);
+
+  demo->text_buffer = gtk_text_view_get_buffer (GTK_TEXT_VIEW (w));
+  gtk_text_view_set_wrap_mode (GTK_TEXT_VIEW (w), GTK_WRAP_WORD_CHAR);
+  gtk_text_view_set_editable (GTK_TEXT_VIEW (w), FALSE);
+  gtk_text_buffer_set_text (demo->text_buffer, "", -1);
+  gtk_widget_show (w);
+
+  selection = gtk_tree_view_get_selection (GTK_TREE_VIEW (demo->view));
+  g_signal_connect (selection, "changed",
+                    G_CALLBACK (pgd_selection_changed),
+                    demo);
+
+  gtk_box_pack_end (GTK_BOX (hbox), vbox, TRUE, TRUE, 0);
+  gtk_widget_show (vbox);
+
+  g_object_weak_ref (G_OBJECT (hbox),
+                     (GWeakNotify) pgd_taggedstruct_free,
+                     demo);
+
+  gtk_widget_show (hbox);
+  return hbox;
+}
diff --git a/glib/demo/taggedstruct.h b/glib/demo/taggedstruct.h
new file mode 100644
index 0000000..3a38727
--- /dev/null
+++ b/glib/demo/taggedstruct.h
@@ -0,0 +1,31 @@
+/*
+ * Copyright (C) 2013 Igalia S.L.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+
+#include <gtk/gtk.h>
+#include <poppler.h>
+
+#ifndef _TAGGEDSTRUCT_H_
+#define _TAGGEDSTRUCT_H_
+
+G_BEGIN_DECLS
+
+GtkWidget *pgd_taggedstruct_create_widget (PopplerDocument *document);
+
+G_END_DECLS
+
+#endif /* _TAGGEDSTRUCT_H_ */
commit 0f9fa775c469c03d1613b955ee7b06b823e6e080
Author: Adrian Perez de Castro <aperez at igalia.com>
Date:   Thu Sep 26 17:50:51 2013 +0300

    glib: Expose inline attributes of structure elements
    
    Allows obtaining inline text attributes from structure elements. The text
    is divived into "spans", which are groups of consecutive glyphs that share
    their attributes. Each one of those is represented by a PopplerTextSpan,
    which gives information about the text font and color, and the link target
    for links. The list of PopplerTextSpans is created lazily when first used.
    
    https://bugs.freedesktop.org/show_bug.cgi?id=64821

diff --git a/glib/poppler-structure-element.cc b/glib/poppler-structure-element.cc
index 6b8778c..39dfd51 100644
--- a/glib/poppler-structure-element.cc
+++ b/glib/poppler-structure-element.cc
@@ -661,3 +661,272 @@ poppler_structure_element_iter_get_child (PopplerStructureElementIter *parent)
 
   return NULL;
 }
+
+
+struct _PopplerTextSpan {
+  gchar *text;
+  gchar *font_name;
+  guint  flags;
+  PopplerColor color;
+};
+
+POPPLER_DEFINE_BOXED_TYPE (PopplerTextSpan,
+                           poppler_text_span,
+                           poppler_text_span_copy,
+                           poppler_text_span_free)
+
+enum {
+  POPPLER_TEXT_SPAN_FIXED_WIDTH = (1 << 0),
+  POPPLER_TEXT_SPAN_SERIF       = (1 << 1),
+  POPPLER_TEXT_SPAN_ITALIC      = (1 << 2),
+  POPPLER_TEXT_SPAN_BOLD        = (1 << 3),
+};
+
+static PopplerTextSpan *
+text_span_poppler_text_span (const TextSpan& span)
+{
+    PopplerTextSpan *new_span = g_slice_new0 (PopplerTextSpan);
+    if (GooString *text = span.getText ())
+      new_span->text = _poppler_goo_string_to_utf8 (text);
+
+    new_span->color.red = colToDbl (span.getColor ().r) * 65535;
+    new_span->color.green = colToDbl (span.getColor ().g) * 65535;
+    new_span->color.blue = colToDbl (span.getColor ().b) * 65535;
+
+    if (span.getFont ())
+      {
+        // GfxFont sometimes does not have a family name but there
+        // is always a font name that can be used as fallback.
+        GooString *font_name = span.getFont ()->getFamily ();
+        if (font_name == NULL)
+          font_name = span.getFont ()->getName ();
+
+        new_span->font_name = _poppler_goo_string_to_utf8 (font_name);
+        if (span.getFont ()->isFixedWidth ())
+          new_span->flags |= POPPLER_TEXT_SPAN_FIXED_WIDTH;
+        if (span.getFont ()->isSerif ())
+            new_span->flags |= POPPLER_TEXT_SPAN_SERIF;
+        if (span.getFont ()->isItalic ())
+            new_span->flags |= POPPLER_TEXT_SPAN_ITALIC;
+        if (span.getFont ()->isBold ())
+            new_span->flags |= POPPLER_TEXT_SPAN_BOLD;
+
+        /* isBold() can return false for some fonts whose weight is heavy */
+        switch (span.getFont ()->getWeight ())
+          {
+          case GfxFont::W500:
+          case GfxFont::W600:
+          case GfxFont::W700:
+          case GfxFont::W800:
+          case GfxFont::W900:
+            new_span->flags |= POPPLER_TEXT_SPAN_BOLD;
+          default:
+            break;
+          }
+      }
+
+    return new_span;
+}
+
+/**
+ * poppler_text_span_copy:
+ * @poppler_text_span: a #PopplerTextSpan
+ *
+ * Makes a copy of a text span.
+ *
+ * Return value: (transfer full): A new #PopplerTextSpan
+ *
+ * Since: 0.26
+ */
+PopplerTextSpan *
+poppler_text_span_copy (PopplerTextSpan *poppler_text_span)
+{
+  PopplerTextSpan *new_span;
+
+  g_return_val_if_fail (poppler_text_span != NULL, NULL);
+
+  new_span = g_slice_dup (PopplerTextSpan, poppler_text_span);
+  new_span->text = g_strdup (poppler_text_span->text);
+  if (poppler_text_span->font_name)
+    new_span->font_name = g_strdup (poppler_text_span->font_name);
+  return new_span;
+}
+
+/**
+ * poppler_text_span_free:
+ * @poppler_text_span: A #PopplerTextSpan
+ *
+ * Frees a text span.
+ *
+ * Since: 0.26
+ */
+void
+poppler_text_span_free (PopplerTextSpan *poppler_text_span)
+{
+  if (G_UNLIKELY (poppler_text_span == NULL))
+    return;
+
+  g_free (poppler_text_span->text);
+  g_free (poppler_text_span->font_name);
+  g_slice_free (PopplerTextSpan, poppler_text_span);
+}
+
+/**
+ * poppler_text_span_is_fixed_width_font:
+ * @poppler_text_span: a #PopplerTextSpan
+ *
+ * Check wether a text span is meant to be rendered using a fixed-width font.
+ *
+ * Return value: Whether the span uses a fixed-width font.
+ *
+ * Since: 0.26
+ */
+gboolean
+poppler_text_span_is_fixed_width_font (PopplerTextSpan *poppler_text_span)
+{
+  g_return_val_if_fail (poppler_text_span != NULL, FALSE);
+
+  return (poppler_text_span->flags & POPPLER_TEXT_SPAN_FIXED_WIDTH);
+}
+
+/**
+ * poppler_text_span_is_serif_font:
+ * @poppler_text_span: a #PopplerTextSpan
+ *
+ * Check whether a text span is meant to be rendered using a serif font.
+ *
+ * Return value: Whether the span uses a serif font.
+ *
+ * Since: 0.26
+ */
+gboolean
+poppler_text_span_is_serif_font (PopplerTextSpan *poppler_text_span)
+{
+  g_return_val_if_fail (poppler_text_span != NULL, FALSE);
+
+  return (poppler_text_span->flags & POPPLER_TEXT_SPAN_SERIF);
+}
+
+/**
+ * poppler_text_span_is_bold_font:
+ * @poppler_text_span: a #PopplerTextSpan
+ *
+ * Check whether a text span is meant to be rendered using a bold font.
+ *
+ * Return value: Whether the span uses bold font.
+ *
+ * Since: 0.26
+ */
+gboolean
+poppler_text_span_is_bold_font (PopplerTextSpan *poppler_text_span)
+{
+  g_return_val_if_fail (poppler_text_span != NULL, FALSE);
+
+  return (poppler_text_span->flags & POPPLER_TEXT_SPAN_BOLD);
+}
+
+/**
+ * poppler_text_span_get_color:
+ * @poppler_text_span: a #PopplerTextSpan
+ * @color: (out): a return location for a #PopplerColor
+ *
+ * Obtains the color in which the text is to be rendered.
+ *
+ * Since: 0.26
+ */
+void
+poppler_text_span_get_color (PopplerTextSpan *poppler_text_span,
+                             PopplerColor *color)
+{
+  g_return_if_fail (poppler_text_span != NULL);
+  g_return_if_fail (color != NULL);
+
+  *color = poppler_text_span->color;
+}
+
+/**
+ * poppler_text_span_get_text:
+ * @poppler_text_span: a #PopplerTextSpan
+ *
+ * Obtains the text contained in the span.
+ *
+ * Return value: (transfer none): A string.
+ *
+ * Since: 0.26
+ */
+const gchar *
+poppler_text_span_get_text (PopplerTextSpan *poppler_text_span)
+{
+  g_return_val_if_fail (poppler_text_span != NULL, NULL);
+
+  return poppler_text_span->text;
+}
+
+/**
+ * poppler_text_span_get_font_name:
+ * @poppler_text_span: a #PopplerTextSpan
+ *
+ * Obtains the name of the font in which the span is to be rendered.
+ *
+ * Return value: (transfer none): A string containing the font name, or
+ *   %NULL if a font is not defined.
+ *
+ * Since: 0.26
+ */
+const gchar *
+poppler_text_span_get_font_name (PopplerTextSpan *poppler_text_span)
+{
+  g_return_val_if_fail (poppler_text_span != NULL, NULL);
+
+  return poppler_text_span->font_name;
+}
+
+
+/**
+ * poppler_structure_element_get_text_spans:
+ * @poppler_structure_element: A #PopplerStructureElement
+ * @n_text_spans: (out): A pointer to the location where the number of elements in
+ *    the returned array will be stored.
+ *
+ * Obtains the text enclosed by an element, as an array of #PopplerTextSpan
+ * structures. Each item in the list is a piece of text which share the same
+ * attributes, plus its attributes. The following example shows how to
+ * obtain and free the text spans of an element:
+ *
+ * <informalexample><programlisting>
+ * guint i, n_spans;
+ * PopplerTextSpan **text_spans =
+ *    poppler_structure_element_get_text_spans (element, &n_spans);
+ * /<!-- -->* Use the text spans *<!-- -->/
+ * for (i = 0; i < n_spans; i++)
+ *    poppler_text_span_free (text_spans[i]);
+ * g_free (text_spans);
+ * </programlisting></informalexample>
+ *
+ * Return value: (transfer full) (array length=n_text_spans) (element-type PopplerTextSpan):
+ *    An array of #PopplerTextSpan elments.
+ *
+ * Since: 0.26
+ */
+PopplerTextSpan **
+poppler_structure_element_get_text_spans (PopplerStructureElement *poppler_structure_element,
+                                          guint                   *n_text_spans)
+{
+  g_return_val_if_fail (POPPLER_IS_STRUCTURE_ELEMENT (poppler_structure_element), NULL);
+  g_return_val_if_fail (n_text_spans != NULL, NULL);
+  g_return_val_if_fail (poppler_structure_element->elem != NULL, NULL);
+
+  if (!poppler_structure_element->elem->isContent ())
+    return NULL;
+
+  const TextSpanArray spans(poppler_structure_element->elem->getTextSpans ());
+  PopplerTextSpan **text_spans = g_new0 (PopplerTextSpan*, spans.size ());
+
+  size_t i = 0;
+  for (TextSpanArray::const_iterator s = spans.begin (); s != spans.end (); ++s)
+    text_spans[i++] = text_span_poppler_text_span (*s);
+
+  *n_text_spans = spans.size ();
+
+  return text_spans;
+}
diff --git a/glib/poppler-structure-element.h b/glib/poppler-structure-element.h
index a5ac04b..7ba5d56 100644
--- a/glib/poppler-structure-element.h
+++ b/glib/poppler-structure-element.h
@@ -82,7 +82,6 @@ typedef enum {
   POPPLER_STRUCTURE_ELEMENT_FORM,
 } PopplerStructureElementKind;
 
-
 GType                        poppler_structure_element_get_type                   (void) G_GNUC_CONST;
 PopplerStructureElementKind  poppler_structure_element_get_kind                   (PopplerStructureElement  *poppler_structure_element);
 gint                         poppler_structure_element_get_page                   (PopplerStructureElement  *poppler_structure_element);
@@ -97,6 +96,8 @@ gchar                       *poppler_structure_element_get_text
                                                                                    gboolean                  recursive);
 gchar                       *poppler_structure_element_get_alt_text               (PopplerStructureElement  *poppler_structure_element);
 gchar                       *poppler_structure_element_get_actual_text            (PopplerStructureElement  *poppler_structure_element);
+PopplerTextSpan            **poppler_structure_element_get_text_spans             (PopplerStructureElement  *poppler_structure_element,
+                                                                                   guint                    *n_text_spans);
 
 #define POPPLER_TYPE_STRUCTURE_ELEMENT_ITER                                       (poppler_structure_element_iter_get_type ())
 GType                        poppler_structure_element_iter_get_type              (void) G_GNUC_CONST;
@@ -107,6 +108,19 @@ PopplerStructureElement     *poppler_structure_element_iter_get_element
 gboolean                     poppler_structure_element_iter_next                  (PopplerStructureElementIter *iter);
 void                         poppler_structure_element_iter_free                  (PopplerStructureElementIter *iter);
 
+#define POPPLER_TYPE_TEXT_SPAN                                                    (poppler_text_span_get_type ())
+GType                        poppler_text_span_get_type                           (void) G_GNUC_CONST;
+PopplerTextSpan             *poppler_text_span_copy                               (PopplerTextSpan *poppler_text_span);
+void                         poppler_text_span_free                               (PopplerTextSpan *poppler_text_span);
+gboolean                     poppler_text_span_is_fixed_width_font                (PopplerTextSpan *poppler_text_span);
+gboolean                     poppler_text_span_is_serif_font                      (PopplerTextSpan *poppler_text_span);
+gboolean                     poppler_text_span_is_bold_font                       (PopplerTextSpan *poppler_text_span);
+gboolean                     poppler_text_span_is_link                            (PopplerTextSpan *poppler_text_span);
+void                         poppler_text_span_get_color                          (PopplerTextSpan *poppler_text_span,
+                                                                                   PopplerColor    *color);
+const gchar                 *poppler_text_span_get_text                           (PopplerTextSpan *poppler_text_span);
+const gchar                 *poppler_text_span_get_font_name                      (PopplerTextSpan *poppler_text_span);
+
 G_END_DECLS
 
 #endif /* !__POPPLER_STRUCTURE_ELEMENT_H__ */
diff --git a/glib/poppler.h b/glib/poppler.h
index 1da2af1..f5c9563 100644
--- a/glib/poppler.h
+++ b/glib/poppler.h
@@ -210,6 +210,7 @@ typedef struct _PopplerAnnotSquare         PopplerAnnotSquare;
 typedef struct _PopplerQuadrilateral       PopplerQuadrilateral;
 typedef struct _PopplerStructureElement    PopplerStructureElement;
 typedef struct _PopplerStructureElementIter PopplerStructureElementIter;
+typedef struct _PopplerTextSpan            PopplerTextSpan;
 
 typedef enum
 {
diff --git a/glib/reference/poppler-sections.txt b/glib/reference/poppler-sections.txt
index fff370b..719b708 100644
--- a/glib/reference/poppler-sections.txt
+++ b/glib/reference/poppler-sections.txt
@@ -590,6 +590,7 @@ poppler_movie_get_type
 PopplerStructureElement
 PopplerStructureElementKind
 PopplerStructureElementIter
+PopplerTextSpan
 poppler_structure_element_iter_new
 poppler_structure_element_iter_next
 poppler_structure_element_iter_copy
@@ -608,6 +609,15 @@ poppler_structure_element_get_language
 poppler_structure_element_get_text
 poppler_structure_element_get_alt_text
 poppler_structure_element_get_actual_text
+poppler_structure_element_get_text_spans
+poppler_text_span_copy
+poppler_text_span_free
+poppler_text_span_is_fixed_width_font
+poppler_text_span_is_serif_font
+poppler_text_span_is_bold_font
+poppler_text_span_get_color
+poppler_text_span_get_text
+poppler_text_span_get_font_name
 
 <SUBSECTION Standard>
 POPPLER_STRUCTURE_ELEMENT
@@ -618,7 +628,9 @@ POPPLER_TYPE_STRUCTURE_ELEMENT_KIND
 
 <SUBSECTION Private>
 poppler_structure_element_get_type
+poppler_structure_element_kind_get_type
 poppler_structure_element_iter_get_type
+poppler_text_span_get_type
 </SECTION>
 
 <SECTION>
commit 8072d4b0e3ea10b4308f8172891f769f30466133
Author: Adrian Perez de Castro <aperez at igalia.com>
Date:   Thu May 9 12:01:59 2013 +0300

    glib: Expose the document structure tree
    
    Implements a new PopplerStructureElement classe, which builds upon
    StructTreeRoot and StructElement to expose the document structure of
    tagged PDFs in the GLib binding.
    
    Navigation of the structure tree is done by an iterator-based interface,
    using PopplerStructureElementIter.
    
    https://bugs.freedesktop.org/show_bug.cgi?id=64821

diff --git a/glib/Makefile.am b/glib/Makefile.am
index a38e052..040996a 100644
--- a/glib/Makefile.am
+++ b/glib/Makefile.am
@@ -41,6 +41,7 @@ poppler_glib_public_headers =			\
 	poppler-layer.h				\
 	poppler-media.h				\
 	poppler-movie.h				\
+	poppler-structure-element.h		\
 	poppler.h
 
 poppler_glib_includedir = $(includedir)/poppler/glib
@@ -67,6 +68,7 @@ libpoppler_glib_la_SOURCES =			\
 	poppler-cached-file-loader.h		\
 	poppler-input-stream.cc			\
 	poppler-input-stream.h			\
+	poppler-structure-element.cc		\
 	poppler.cc				\
 	poppler-private.h
 
diff --git a/glib/poppler-private.h b/glib/poppler-private.h
index 93d0f23..874cfdb 100644
--- a/glib/poppler-private.h
+++ b/glib/poppler-private.h
@@ -17,6 +17,7 @@
 #include <OptionalContent.h>
 #include <CairoOutputDev.h>
 #include <FileSpec.h>
+#include <StructElement.h>
 #endif
 
 struct _PopplerDocument
@@ -95,6 +96,15 @@ struct _PopplerLayer
   gchar *title;
 };
 
+
+struct _PopplerStructureElement
+{
+  /*< private >*/
+  GObject parent_instance;
+  PopplerDocument *document;
+  StructElement *elem;
+};
+
 GList         *_poppler_document_get_layers (PopplerDocument *document);
 GList         *_poppler_document_get_layer_rbgroup (PopplerDocument *document,
 						    Layer           *layer);
diff --git a/glib/poppler-structure-element.cc b/glib/poppler-structure-element.cc
new file mode 100644
index 0000000..6b8778c
--- /dev/null
+++ b/glib/poppler-structure-element.cc
@@ -0,0 +1,663 @@
+/* poppler-structure.cc: glib interface to poppler
+ *
+ * Copyright (C) 2013 Igalia S.L.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+
+#include "config.h"
+
+#ifndef __GI_SCANNER__
+#include <StructTreeRoot.h>
+#include <StructElement.h>
+#include <GlobalParams.h>
+#include <UnicodeMap.h>
+#endif /* !__GI_SCANNER__ */
+
+#include "poppler.h"
+#include "poppler-private.h"
+#include "poppler-structure-element.h"
+
+
+/**
+ * SECTION:poppler-structure-element
+ * @short_description: Document structure element.
+ * @title: PopplerStructureElement
+ * @see_also: #PopplerStructure
+ *
+ * Instances of #PopplerStructureElement are used to describe the structure
+ * of a #PopplerDocument. To access the elements in the structure of the
+ * document, first use poppler_document_get_structure() to obtain its
+ * #PopplerStructure, and then use poppler_structure_get_n_children()
+ * and poppler_structure_get_child() to enumerate the top level elements.
+ */
+
+typedef struct _PopplerStructureElementClass
+{
+  GObjectClass parent_class;
+} PopplerStructureElementClass;
+
+G_DEFINE_TYPE (PopplerStructureElement, poppler_structure_element, G_TYPE_OBJECT);
+
+static PopplerStructureElement *
+_poppler_structure_element_new (PopplerDocument *document, StructElement *element)
+{
+  PopplerStructureElement *poppler_structure_element;
+
+  g_assert (POPPLER_IS_DOCUMENT (document));
+  g_assert (element);
+
+  poppler_structure_element = (PopplerStructureElement *) g_object_new (POPPLER_TYPE_STRUCTURE_ELEMENT, NULL, NULL);
+  poppler_structure_element->document = (PopplerDocument *) g_object_ref (document);
+  poppler_structure_element->elem = element;
+
+  return poppler_structure_element;
+}
+
+
+static void
+poppler_structure_element_init (PopplerStructureElement *poppler_structure_element)
+{
+}
+
+
+static void
+poppler_structure_element_finalize (GObject *object)
+{
+  PopplerStructureElement *poppler_structure_element = POPPLER_STRUCTURE_ELEMENT (object);
+
+  /* poppler_structure_element->elem is owned by the StructTreeRoot */
+  g_object_unref (poppler_structure_element->document);
+
+  G_OBJECT_CLASS (poppler_structure_element_parent_class)->finalize (object);
+}
+
+
+static void
+poppler_structure_element_class_init (PopplerStructureElementClass *klass)
+{
+  GObjectClass *gobject_class = G_OBJECT_CLASS (klass);
+  gobject_class->finalize = poppler_structure_element_finalize;
+}
+
+
+/**
+ * poppler_structure_element_get_kind:
+ * @poppler_structure_element: A #PopplerStructureElement
+ *
+ * Return value: A #PopplerStructureElementKind value.
+ *
+ * Since: 0.26
+ */
+PopplerStructureElementKind
+poppler_structure_element_get_kind (PopplerStructureElement *poppler_structure_element)
+{
+  g_return_val_if_fail (POPPLER_IS_STRUCTURE_ELEMENT (poppler_structure_element), POPPLER_STRUCTURE_ELEMENT_UNKNOWN);
+  g_return_val_if_fail (poppler_structure_element->elem != NULL, POPPLER_STRUCTURE_ELEMENT_UNKNOWN);
+
+  switch (poppler_structure_element->elem->getType ())
+    {
+      case StructElement::Unknown:
+        return POPPLER_STRUCTURE_ELEMENT_UNKNOWN;
+      case StructElement::MCID:
+        return POPPLER_STRUCTURE_ELEMENT_CONTENT;
+      case StructElement::OBJR:
+        return POPPLER_STRUCTURE_ELEMENT_OBJECT_REFERENCE;
+      case StructElement::Document:
+        return POPPLER_STRUCTURE_ELEMENT_DOCUMENT;
+      case StructElement::Part:
+        return POPPLER_STRUCTURE_ELEMENT_PART;
+      case StructElement::Sect:
+        return POPPLER_STRUCTURE_ELEMENT_SECTION;
+      case StructElement::Div:
+        return POPPLER_STRUCTURE_ELEMENT_DIV;
+      case StructElement::Span:
+        return POPPLER_STRUCTURE_ELEMENT_SPAN;
+      case StructElement::Quote:
+        return POPPLER_STRUCTURE_ELEMENT_QUOTE;
+      case StructElement::Note:
+        return POPPLER_STRUCTURE_ELEMENT_NOTE;
+      case StructElement::Reference:
+        return POPPLER_STRUCTURE_ELEMENT_REFERENCE;
+      case StructElement::BibEntry:
+        return POPPLER_STRUCTURE_ELEMENT_BIBENTRY;
+      case StructElement::Code:
+        return POPPLER_STRUCTURE_ELEMENT_CODE;
+      case StructElement::Link:
+        return POPPLER_STRUCTURE_ELEMENT_LINK;
+      case StructElement::Annot:
+        return POPPLER_STRUCTURE_ELEMENT_ANNOT;
+      case StructElement::Ruby:
+        return POPPLER_STRUCTURE_ELEMENT_RUBY;
+      case StructElement::Warichu:
+        return POPPLER_STRUCTURE_ELEMENT_WARICHU;
+      case StructElement::BlockQuote:
+        return POPPLER_STRUCTURE_ELEMENT_BLOCKQUOTE;
+      case StructElement::Caption:
+        return POPPLER_STRUCTURE_ELEMENT_CAPTION;
+      case StructElement::NonStruct:
+        return POPPLER_STRUCTURE_ELEMENT_NONSTRUCT;
+      case StructElement::TOC:
+        return POPPLER_STRUCTURE_ELEMENT_TOC;
+      case StructElement::TOCI:
+        return POPPLER_STRUCTURE_ELEMENT_TOC_ITEM;
+      case StructElement::Index:
+        return POPPLER_STRUCTURE_ELEMENT_INDEX;
+      case StructElement::Private:
+        return POPPLER_STRUCTURE_ELEMENT_PRIVATE;
+      case StructElement::P:
+        return POPPLER_STRUCTURE_ELEMENT_PARAGRAPH;
+      case StructElement::H:
+        return POPPLER_STRUCTURE_ELEMENT_HEADING;
+      case StructElement::H1:
+        return POPPLER_STRUCTURE_ELEMENT_HEADING_1;
+      case StructElement::H2:
+        return POPPLER_STRUCTURE_ELEMENT_HEADING_2;
+      case StructElement::H3:
+        return POPPLER_STRUCTURE_ELEMENT_HEADING_3;
+      case StructElement::H4:
+        return POPPLER_STRUCTURE_ELEMENT_HEADING_4;
+      case StructElement::H5:
+        return POPPLER_STRUCTURE_ELEMENT_HEADING_5;
+      case StructElement::H6:
+        return POPPLER_STRUCTURE_ELEMENT_HEADING_6;
+      case StructElement::L:
+        return POPPLER_STRUCTURE_ELEMENT_LIST;
+      case StructElement::LI:
+        return POPPLER_STRUCTURE_ELEMENT_LIST_ITEM;
+      case StructElement::Lbl:
+        return POPPLER_STRUCTURE_ELEMENT_LIST_LABEL;
+      case StructElement::LBody:
+        return POPPLER_STRUCTURE_ELEMENT_LIST_BODY;
+      case StructElement::Table:
+        return POPPLER_STRUCTURE_ELEMENT_TABLE;
+      case StructElement::TR:
+        return POPPLER_STRUCTURE_ELEMENT_TABLE_ROW;
+      case StructElement::TH:
+        return POPPLER_STRUCTURE_ELEMENT_TABLE_HEADING;
+      case StructElement::TD:
+        return POPPLER_STRUCTURE_ELEMENT_TABLE_DATA;
+      case StructElement::THead:
+        return POPPLER_STRUCTURE_ELEMENT_TABLE_HEADER;
+      case StructElement::TFoot:
+        return POPPLER_STRUCTURE_ELEMENT_TABLE_FOOTER;
+      case StructElement::TBody:
+        return POPPLER_STRUCTURE_ELEMENT_TABLE_BODY;
+      case StructElement::Figure:
+        return POPPLER_STRUCTURE_ELEMENT_FIGURE;
+      case StructElement::Formula:
+        return POPPLER_STRUCTURE_ELEMENT_FORMULA;
+      case StructElement::Form:
+        return POPPLER_STRUCTURE_ELEMENT_FORM;
+      default:
+        g_assert_not_reached ();
+    }
+}
+
+/**
+ * poppler_structure_element_get_page:
+ * @poppler_structure_element: A #PopplerStructureElement
+ *
+ * Obtains the page number in which the element is contained.
+ *
+ * Return value: Number of the page that contains the element, of
+ *    <code>-1</code> if not defined.
+ *
+ * Since: 0.26
+ */
+gint
+poppler_structure_element_get_page (PopplerStructureElement *poppler_structure_element)
+{
+  g_return_val_if_fail (POPPLER_IS_STRUCTURE_ELEMENT (poppler_structure_element), -1);
+  g_return_val_if_fail (poppler_structure_element->elem != NULL, -1);
+
+  Ref ref;
+  if (poppler_structure_element->elem->getPageRef (ref))
+    {
+      return poppler_structure_element->document->doc->findPage(ref.num, ref.gen) - 1;
+    }
+
+  return -1;
+}
+
+/**
+ * poppler_structure_element_is_content:
+ * @poppler_structure_element: A #PopplerStructureElement
+ *
+ * Checks whether an element is actual document content.
+ *
+ * Return value: %TRUE if the element is content, or %FALSE otherwise.
+ *
+ * Since: 0.26
+ */
+gboolean
+poppler_structure_element_is_content (PopplerStructureElement *poppler_structure_element)
+{
+  g_return_val_if_fail (POPPLER_IS_STRUCTURE_ELEMENT (poppler_structure_element), FALSE);
+  g_return_val_if_fail (poppler_structure_element->elem != NULL, FALSE);
+
+  return poppler_structure_element->elem->isContent ();
+}
+
+/**
+ * poppler_structure_element_is_inline:
+ * @poppler_structure_element: A #PopplerStructureElement
+ *
+ * Checks whether an element is an inline element.
+ *
+ * Return value: %TRUE if the element is an inline element, or %FALSE otherwise.
+ *
+ * Since: 0.26
+ */
+gboolean
+poppler_structure_element_is_inline (PopplerStructureElement *poppler_structure_element)
+{
+  g_return_val_if_fail (POPPLER_IS_STRUCTURE_ELEMENT (poppler_structure_element), FALSE);
+  g_return_val_if_fail (poppler_structure_element->elem != NULL, FALSE);
+
+  return poppler_structure_element->elem->isInline ();
+}
+
+/**
+ * poppler_structure_element_is_block:
+ * @poppler_structure_element: A #PopplerStructureElement
+ *
+ * Checks whether an element is a block element.
+ *
+ * Return value: %TRUE if  the element is a block element, or %FALSE otherwise.
+ *
+ * Since: 0.26
+ */
+gboolean
+poppler_structure_element_is_block (PopplerStructureElement *poppler_structure_element)
+{
+  g_return_val_if_fail (POPPLER_IS_STRUCTURE_ELEMENT (poppler_structure_element), FALSE);
+  g_return_val_if_fail (poppler_structure_element->elem != NULL, FALSE);
+
+  return poppler_structure_element->elem->isBlock ();
+}
+
+/**
+ * poppler_structure_element_get_id:
+ * @poppler_structure_element: A #PopplerStructureElement
+ *
+ * Obtains the identifier of an element.
+ *
+ * Return value: (transfer full): The identifier of the element (if
+ *    defined), or %NULL.
+ *
+ * Since: 0.26
+ */
+gchar *
+poppler_structure_element_get_id (PopplerStructureElement *poppler_structure_element)
+{
+  g_return_val_if_fail (POPPLER_IS_STRUCTURE_ELEMENT (poppler_structure_element), NULL);
+  g_return_val_if_fail (poppler_structure_element->elem != NULL, NULL);
+
+  GooString *string = poppler_structure_element->elem->getID ();
+  return string ? _poppler_goo_string_to_utf8 (string) : NULL;
+}
+
+/**
+ * poppler_structure_element_get_title:
+ * @poppler_structure_element: A #PopplerStructureElement
+ *
+ * Obtains the title of an element.
+ *
+ * Return value: (transfer full): The title of the element, or %NULL.
+ *
+ * Since: 0.26
+ */
+gchar *
+poppler_structure_element_get_title (PopplerStructureElement *poppler_structure_element)
+{
+  g_return_val_if_fail (POPPLER_IS_STRUCTURE_ELEMENT (poppler_structure_element), NULL);
+  g_return_val_if_fail (poppler_structure_element->elem != NULL, NULL);
+
+  GooString *string = poppler_structure_element->elem->getTitle ();
+  return string ? _poppler_goo_string_to_utf8 (string) : NULL;
+}
+
+/**
+ * popppler_structure_element_get_abbreviation:
+ * @poppler_structure_element: A #PopplerStructureElement
+ *
+ * Acronyms and abbreviations contained in elements of type
+ * #POPPLER_STRUCTURE_ELEMENT_SPAN may have an associated expanded
+ * text form, which can be retrieved using this function.
+ *
+ * Return value: (transfer full): Text of the expanded abbreviation if the
+ *    element text is an abbreviation or acrony, %NULL if not.
+ *
+ * Since: 0.26
+ */
+gchar *
+poppler_structure_element_get_abbreviation (PopplerStructureElement *poppler_structure_element)
+{
+  g_return_val_if_fail (POPPLER_IS_STRUCTURE_ELEMENT (poppler_structure_element), NULL);
+  g_return_val_if_fail (poppler_structure_element->elem != NULL, NULL);
+
+  if (poppler_structure_element->elem->getType () != StructElement::Span)
+    return NULL;
+
+  GooString *string = poppler_structure_element->elem->getExpandedAbbr ();
+  return string ? _poppler_goo_string_to_utf8 (string) : NULL;
+}
+
+/**
+ * poppler_structure_element_get_language:
+ * @poppler_structure_element: A #PopplerStructureElement
+ *
+ * Obtains the language and country code for the content in an element,
+ * in two-letter ISO format, e.g. <code>en_ES</code>, or %NULL if not
+ * defined.
+ *
+ * Return value: (transfer full): language and country code, or %NULL.
+ *
+ * Since: 0.26
+ */
+gchar *
+poppler_structure_element_get_language (PopplerStructureElement *poppler_structure_element)
+{
+  g_return_val_if_fail (POPPLER_IS_STRUCTURE_ELEMENT (poppler_structure_element), NULL);
+  g_return_val_if_fail (poppler_structure_element->elem != NULL, NULL);
+
+  GooString *string = poppler_structure_element->elem->getLanguage ();
+  return string ? _poppler_goo_string_to_utf8 (string) : NULL;
+}
+
+/**
+ * poppler_structure_element_get_alt_text:
+ * @poppler_structure_element: A #PopplerStructureElement
+ *
+ * Obtains the “alternate” text representation of the element (and its child
+ * elements). This is mostly used for non-text elements like images and
+ * figures, to specify a textual description of the element.
+ *
+ * Note that for elements containing proper text, the function
+ * poppler_structure_element_get_text() must be used instead.
+ *
+ * Return value: (transfer full): The alternate text representation for the
+ *    element, or %NULL if not defined.
+ *
+ * Since: 0.26
+ */
+gchar *
+poppler_structure_element_get_alt_text (PopplerStructureElement *poppler_structure_element)
+{
+  g_return_val_if_fail (POPPLER_IS_STRUCTURE_ELEMENT (poppler_structure_element), NULL);
+  g_return_val_if_fail (poppler_structure_element->elem != NULL, NULL);
+
+  GooString *string = poppler_structure_element->elem->getAltText ();
+  return string ? _poppler_goo_string_to_utf8 (string) : NULL;
+}
+
+/**
+ * poppler_structure_element_get_actual_text:
+ * @poppler_structure_element: A #PopplerStructureElement
+ *
+ * Obtains the actual text enclosed by the element (and its child elements).
+ * The actual text is mostly used for non-text elements like images and
+ * figures which <em>do</em> have the graphical appearance of text, like
+ * a logo. For those the actual text is the equivalent text to those
+ * graphical elements which look like text when rendered.
+ *
+ * Note that for elements containing proper text, the function
+ * poppler_structure_element_get_text() must be used instead.
+ *
+ * Return value: (transfer full): The actual text for the element, or %NULL
+ *    if not defined.
+ *
+ * Since: 0.26
+ */
+gchar *
+poppler_structure_element_get_actual_text (PopplerStructureElement *poppler_structure_element)
+{
+  g_return_val_if_fail (POPPLER_IS_STRUCTURE_ELEMENT (poppler_structure_element), NULL);
+  g_return_val_if_fail (poppler_structure_element->elem != NULL, NULL);
+
+  GooString *string = poppler_structure_element->elem->getActualText ();
+  return string ? _poppler_goo_string_to_utf8 (string) : NULL;
+}
+
+/**
+ * poppler_structure_element_get_text:
+ * @poppler_structure_element: A #PopplerStructureElement
+ * @recursive: If %TRUE, the text of child elements is gathered recursively
+ *   in logical order and returned as part of the result.
+ *
+ * Obtains the text enclosed by an element, or the text enclosed by the
+ * elements in the subtree (including the element itself).
+ *
+ * Return value: (transfer full): A string.
+ *
+ * Since: 0.26
+ */
+gchar *
+poppler_structure_element_get_text (PopplerStructureElement *poppler_structure_element,
+                                    gboolean                 recursive)
+{
+  g_return_val_if_fail (POPPLER_IS_STRUCTURE_ELEMENT (poppler_structure_element), NULL);
+  g_return_val_if_fail (poppler_structure_element->elem != NULL, NULL);
+
+  GooString *string = poppler_structure_element->elem->getText (recursive);
+  gchar *result = string ? _poppler_goo_string_to_utf8 (string) : NULL;
+  delete string;
+  return result;
+}
+
+struct _PopplerStructureElementIter
+{
+  PopplerDocument *document;
+  union {
+    StructElement  *elem;
+    StructTreeRoot *root;
+  };
+  gboolean is_root;
+  unsigned index;
+};
+
+POPPLER_DEFINE_BOXED_TYPE (PopplerStructureElementIter,
+                           poppler_structure_element_iter,
+                           poppler_structure_element_iter_copy,
+                           poppler_structure_element_iter_free)
+
+/**
+ * poppler_structure_element_iter_copy:
+ * @iter: a #PopplerStructureElementIter
+ *
+ * Creates a new #PopplerStructureElementIter as a copy of @iter. The
+ * returned value must be freed with poppler_structure_element_iter_free().
+ *
+ * Return value: (transfer full): a new #PopplerStructureElementIter
+ *
+ * Since: 0.26
+ */
+PopplerStructureElementIter *
+poppler_structure_element_iter_copy (PopplerStructureElementIter *iter)
+{
+  PopplerStructureElementIter *new_iter;
+
+  g_return_val_if_fail (iter != NULL, NULL);
+
+  new_iter = g_slice_dup (PopplerStructureElementIter, iter);
+  new_iter->document = (PopplerDocument *) g_object_ref (new_iter->document);
+
+  return new_iter;
+}
+
+/**
+ * poppler_structure_element_iter_free:
+ * @iter: a #PopplerStructureElementIter
+ *
+ * Frees @iter.
+ *
+ * Since: 0.26
+ */
+void
+poppler_structure_element_iter_free (PopplerStructureElementIter *iter)
+{
+  if (G_UNLIKELY (iter == NULL))
+    return;
+
+  g_object_unref (iter->document);
+  g_slice_free (PopplerStructureElementIter, iter);
+}
+
+/**
+ * poppler_structure_element_iter_new:
+ * @poppler_document: a #PopplerDocument.
+ *
+ * Returns the root #PopplerStructureElementIter for @document, or %NULL. The
+ * returned value must be freed with poppler_structure_element_iter_free().
+ *
+ * Documents may have an associated structure tree &mdashmostly, Tagged-PDF
+ * compliant documents— which can be used to obtain information about
+ * the document structure and its contents. Each node in the tree contains
+ * a #PopplerStructureElement.
+ *
+ * Here is a simple example that walks the whole tree:
+ *
+ * <informalexample><programlisting>
+ * static void
+ * walk_structure (PopplerStructureElementIter *iter)
+ * {
+ *   do {
+ *     /<!-- -->* Get the element and do something with it *<!-- -->/
+ *     PopplerStructureElementIter *child = poppler_structure_element_iter_get_child (iter);
+ *     if (child)
+ *       walk_structure (child);
+ *     poppler_structure_element_iter_free (child);
+ *   } while (poppler_structure_element_iter_next (iter));
+ * }
+ * ...
+ * {
+ *   iter = poppler_structure_element_iter_new (document);
+ *   walk_structure (iter);
+ *   poppler_structure_element_iter_free (iter);
+ * }
+ * </programlisting></informalexample>
+ *
+ * Return value: (transfer full): a new #PopplerStructureElementIter, or %NULL if document
+ *    doesn't have structure tree.
+ *
+ * Since: 0.26
+ */
+PopplerStructureElementIter *
+poppler_structure_element_iter_new (PopplerDocument *poppler_document)
+{
+  PopplerStructureElementIter *iter;
+  StructTreeRoot *root;
+
+  g_return_val_if_fail (POPPLER_IS_DOCUMENT (poppler_document), NULL);
+
+  root = poppler_document->doc->getStructTreeRoot ();
+  if (root == NULL)
+    return NULL;
+
+  if (root->getNumElements () == 0)
+    return NULL;
+
+  iter = g_slice_new0 (PopplerStructureElementIter);
+  iter->document = (PopplerDocument *) g_object_ref (poppler_document);
+  iter->is_root = TRUE;
+  iter->root = root;
+
+  return iter;
+}
+
+/**
+ * poppler_structure_element_iter_next:
+ * @iter: a #PopplerStructureElementIter
+ *
+ * Sets @iter to point to the next structure element at the current level
+ * of the tree, if valid. See poppler_structure_element_iter_new() for more
+ * information.
+ *
+ * Return value: %TRUE, if @iter was set to the next structure element
+ *
+ * Since: 0.26
+ */
+gboolean
+poppler_structure_element_iter_next (PopplerStructureElementIter *iter)
+{
+  unsigned elements;
+
+  g_return_val_if_fail (iter != NULL, FALSE);
+
+  elements = iter->is_root
+    ? iter->root->getNumElements ()
+    : iter->elem->getNumElements ();
+
+  return ++iter->index < elements;
+}
+
+/**
+ * poppler_structure_element_iter_get_element:
+ * @iter: a #PopplerStructureElementIter
+ *
+ * Returns the #PopplerStructureElementIter associated with @iter.
+ *
+ * Return value: (transfer full): a new #PopplerStructureElementIter
+ *
+ * Since: 0.26
+ */
+PopplerStructureElement *
+poppler_structure_element_iter_get_element (PopplerStructureElementIter *iter)
+{
+  StructElement *elem;
+
+  g_return_val_if_fail (iter != NULL, NULL);
+
+  elem = iter->is_root
+    ? iter->root->getElement (iter->index)
+    : iter->elem->getElement (iter->index);
+
+  return _poppler_structure_element_new (iter->document, elem);
+}
+
+/**
+ * poppler_structure_element_iter_get_child:
+ * @parent: a #PopplerStructureElementIter
+ *
+ * Returns a new iterator to the children elements of the
+ * #PopplerStructureElement associated with @iter. The returned value must
+ * be freed with poppler_structure_element_iter_free().
+ *
+ * Return value: a new #PopplerStructureElementIter
+ *
+ * Since: 0.26
+ */
+PopplerStructureElementIter *
+poppler_structure_element_iter_get_child (PopplerStructureElementIter *parent)
+{
+  StructElement *elem;
+
+  g_return_val_if_fail (parent != NULL, NULL);
+
+  elem = parent->is_root
+    ? parent->root->getElement (parent->index)
+    : parent->elem->getElement (parent->index);
+
+  if (elem->getNumElements () > 0)
+    {
+      PopplerStructureElementIter *child = g_slice_new0 (PopplerStructureElementIter);
+      child->document = (PopplerDocument *) g_object_ref (parent->document);
+      child->elem = elem;
+      return child;
+    }
+
+  return NULL;
+}
diff --git a/glib/poppler-structure-element.h b/glib/poppler-structure-element.h
new file mode 100644
index 0000000..a5ac04b
--- /dev/null
+++ b/glib/poppler-structure-element.h
@@ -0,0 +1,112 @@
+/* poppler-structure-element.h: glib interface to poppler
+ *
+ * Copyright (C) 2013 Igalia S.L.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+
+#ifndef __POPPLER_STRUCTURE_ELEMENT_H__
+#define __POPPLER_STRUCTURE_ELEMENT_H__
+
+#include <glib-object.h>
+#include "poppler.h"
+
+G_BEGIN_DECLS
+
+#define POPPLER_TYPE_STRUCTURE_ELEMENT    (poppler_structure_element_get_type ())
+#define POPPLER_STRUCTURE_ELEMENT(obj)    (G_TYPE_CHECK_INSTANCE_CAST ((obj), POPPLER_TYPE_STRUCTURE_ELEMENT, PopplerStructureElement))
+#define POPPLER_IS_STRUCTURE_ELEMENT(obj) (G_TYPE_CHECK_INSTANCE_TYPE ((obj), POPPLER_TYPE_STRUCTURE_ELEMENT))
+
+/**
+ * PopplerStructureElementKind:
+ */
+typedef enum {
+  POPPLER_STRUCTURE_ELEMENT_UNKNOWN,
+  POPPLER_STRUCTURE_ELEMENT_CONTENT,
+  POPPLER_STRUCTURE_ELEMENT_OBJECT_REFERENCE,
+  POPPLER_STRUCTURE_ELEMENT_DOCUMENT,
+  POPPLER_STRUCTURE_ELEMENT_PART,
+  POPPLER_STRUCTURE_ELEMENT_ARTICLE,
+  POPPLER_STRUCTURE_ELEMENT_SECTION,
+  POPPLER_STRUCTURE_ELEMENT_DIV,
+  POPPLER_STRUCTURE_ELEMENT_SPAN,
+  POPPLER_STRUCTURE_ELEMENT_QUOTE,
+  POPPLER_STRUCTURE_ELEMENT_NOTE,
+  POPPLER_STRUCTURE_ELEMENT_REFERENCE,
+  POPPLER_STRUCTURE_ELEMENT_BIBENTRY,
+  POPPLER_STRUCTURE_ELEMENT_CODE,
+  POPPLER_STRUCTURE_ELEMENT_LINK,
+  POPPLER_STRUCTURE_ELEMENT_ANNOT,
+  POPPLER_STRUCTURE_ELEMENT_RUBY,
+  POPPLER_STRUCTURE_ELEMENT_WARICHU,
+  POPPLER_STRUCTURE_ELEMENT_BLOCKQUOTE,
+  POPPLER_STRUCTURE_ELEMENT_CAPTION,
+  POPPLER_STRUCTURE_ELEMENT_NONSTRUCT,
+  POPPLER_STRUCTURE_ELEMENT_TOC,
+  POPPLER_STRUCTURE_ELEMENT_TOC_ITEM,
+  POPPLER_STRUCTURE_ELEMENT_INDEX,
+  POPPLER_STRUCTURE_ELEMENT_PRIVATE,
+  POPPLER_STRUCTURE_ELEMENT_PARAGRAPH,
+  POPPLER_STRUCTURE_ELEMENT_HEADING,
+  POPPLER_STRUCTURE_ELEMENT_HEADING_1,
+  POPPLER_STRUCTURE_ELEMENT_HEADING_2,
+  POPPLER_STRUCTURE_ELEMENT_HEADING_3,
+  POPPLER_STRUCTURE_ELEMENT_HEADING_4,
+  POPPLER_STRUCTURE_ELEMENT_HEADING_5,
+  POPPLER_STRUCTURE_ELEMENT_HEADING_6,
+  POPPLER_STRUCTURE_ELEMENT_LIST,
+  POPPLER_STRUCTURE_ELEMENT_LIST_ITEM,
+  POPPLER_STRUCTURE_ELEMENT_LIST_LABEL,
+  POPPLER_STRUCTURE_ELEMENT_LIST_BODY,
+  POPPLER_STRUCTURE_ELEMENT_TABLE,
+  POPPLER_STRUCTURE_ELEMENT_TABLE_ROW,
+  POPPLER_STRUCTURE_ELEMENT_TABLE_HEADING,
+  POPPLER_STRUCTURE_ELEMENT_TABLE_DATA,
+  POPPLER_STRUCTURE_ELEMENT_TABLE_HEADER,
+  POPPLER_STRUCTURE_ELEMENT_TABLE_FOOTER,
+  POPPLER_STRUCTURE_ELEMENT_TABLE_BODY,
+  POPPLER_STRUCTURE_ELEMENT_FIGURE,
+  POPPLER_STRUCTURE_ELEMENT_FORMULA,
+  POPPLER_STRUCTURE_ELEMENT_FORM,
+} PopplerStructureElementKind;
+
+
+GType                        poppler_structure_element_get_type                   (void) G_GNUC_CONST;
+PopplerStructureElementKind  poppler_structure_element_get_kind                   (PopplerStructureElement  *poppler_structure_element);
+gint                         poppler_structure_element_get_page                   (PopplerStructureElement  *poppler_structure_element);
+gboolean                     poppler_structure_element_is_content                 (PopplerStructureElement  *poppler_structure_element);
+gboolean                     poppler_structure_element_is_inline                  (PopplerStructureElement  *poppler_structure_element);
+gboolean                     poppler_structure_element_is_block                   (PopplerStructureElement  *poppler_structure_element);
+gchar                       *poppler_structure_element_get_id                     (PopplerStructureElement  *poppler_structure_element);
+gchar                       *poppler_structure_element_get_title                  (PopplerStructureElement  *poppler_structure_element);
+gchar                       *poppler_structure_element_get_abbreviation           (PopplerStructureElement  *poppler_structure_element);
+gchar                       *poppler_structure_element_get_language               (PopplerStructureElement  *poppler_structure_element);
+gchar                       *poppler_structure_element_get_text                   (PopplerStructureElement  *poppler_structure_element,
+                                                                                   gboolean                  recursive);
+gchar                       *poppler_structure_element_get_alt_text               (PopplerStructureElement  *poppler_structure_element);
+gchar                       *poppler_structure_element_get_actual_text            (PopplerStructureElement  *poppler_structure_element);
+
+#define POPPLER_TYPE_STRUCTURE_ELEMENT_ITER                                       (poppler_structure_element_iter_get_type ())
+GType                        poppler_structure_element_iter_get_type              (void) G_GNUC_CONST;
+PopplerStructureElementIter *poppler_structure_element_iter_new                   (PopplerDocument             *poppler_document);
+PopplerStructureElementIter *poppler_structure_element_iter_get_child             (PopplerStructureElementIter *parent);
+PopplerStructureElementIter *poppler_structure_element_iter_copy                  (PopplerStructureElementIter *iter);
+PopplerStructureElement     *poppler_structure_element_iter_get_element           (PopplerStructureElementIter *iter);
+gboolean                     poppler_structure_element_iter_next                  (PopplerStructureElementIter *iter);
+void                         poppler_structure_element_iter_free                  (PopplerStructureElementIter *iter);
+
+G_END_DECLS
+
+#endif /* !__POPPLER_STRUCTURE_ELEMENT_H__ */
diff --git a/glib/poppler.h b/glib/poppler.h
index 0db97d0..1da2af1 100644
--- a/glib/poppler.h
+++ b/glib/poppler.h
@@ -208,6 +208,8 @@ typedef struct _PopplerAnnotLine           PopplerAnnotLine;
 typedef struct _PopplerAnnotCircle         PopplerAnnotCircle;
 typedef struct _PopplerAnnotSquare         PopplerAnnotSquare;
 typedef struct _PopplerQuadrilateral       PopplerQuadrilateral;
+typedef struct _PopplerStructureElement    PopplerStructureElement;
+typedef struct _PopplerStructureElementIter PopplerStructureElementIter;
 
 typedef enum
 {
@@ -233,5 +235,6 @@ G_END_DECLS
 #include "poppler-date.h"
 #include "poppler-movie.h"
 #include "poppler-media.h"
+#include "poppler-structure-element.h"
 
 #endif /* __POPPLER_GLIB_H__ */
diff --git a/glib/reference/poppler-docs.sgml b/glib/reference/poppler-docs.sgml
index a9d5158..b817a0d 100644
--- a/glib/reference/poppler-docs.sgml
+++ b/glib/reference/poppler-docs.sgml
@@ -23,6 +23,8 @@
     <xi:include href="xml/poppler-layer.xml"/>
     <xi:include href="xml/poppler-media.xml"/>
     <xi:include href="xml/poppler-movie.xml"/>
+    <xi:include href="xml/poppler-structure.xml"/>
+    <xi:include href="xml/poppler-structure-element.xml"/>
     <xi:include href="xml/poppler-features.xml"/>
   </chapter>
 
diff --git a/glib/reference/poppler-sections.txt b/glib/reference/poppler-sections.txt
index a954f64..fff370b 100644
--- a/glib/reference/poppler-sections.txt
+++ b/glib/reference/poppler-sections.txt
@@ -585,6 +585,43 @@ poppler_movie_get_type
 </SECTION>
 
 <SECTION>
+<FILE>poppler-structure-element</FILE>
+<TITLE>PopplerStructureElement</TITLE>
+PopplerStructureElement
+PopplerStructureElementKind
+PopplerStructureElementIter
+poppler_structure_element_iter_new
+poppler_structure_element_iter_next
+poppler_structure_element_iter_copy
+poppler_structure_element_iter_free
+poppler_structure_element_iter_get_child
+poppler_structure_element_iter_get_element
+poppler_structure_element_get_kind
+poppler_structure_element_get_page
+poppler_structure_element_is_content
+poppler_structure_element_is_inline
+poppler_structure_element_is_block
+poppler_structure_element_get_id
+poppler_structure_element_get_title
+poppler_structure_element_get_abbreviation
+poppler_structure_element_get_language
+poppler_structure_element_get_text
+poppler_structure_element_get_alt_text
+poppler_structure_element_get_actual_text
+
+<SUBSECTION Standard>
+POPPLER_STRUCTURE_ELEMENT
+POPPLER_IS_STRUCTURE_ELEMENT
+POPPLER_TYPE_STRUCTURE_ELEMENT
+POPPLER_TYPE_STRUCTURE_ELEMENT_ITER
+POPPLER_TYPE_STRUCTURE_ELEMENT_KIND
+
+<SUBSECTION Private>
+poppler_structure_element_get_type
+poppler_structure_element_iter_get_type
+</SECTION>
+
+<SECTION>
 <FILE>poppler-features</FILE>
 POPPLER_HAS_CAIRO
 POPPLER_MAJOR_VERSION
diff --git a/glib/reference/poppler.types b/glib/reference/poppler.types
index eed9849..388852a 100644
--- a/glib/reference/poppler.types
+++ b/glib/reference/poppler.types
@@ -8,3 +8,5 @@ poppler_annot_get_type
 poppler_layer_get_type
 poppler_media_get_type
 poppler_movie_get_type
+poppler_structure_element_get_type
+poppler_structure_element_iter_get_type


More information about the poppler mailing list