[poppler] 3 commits - glib/demo glib/Makefile.am glib/poppler.h glib/poppler-private.h glib/poppler-structure-element.cc glib/poppler-structure-element.h glib/reference
Carlos Garcia Campos
carlosgc at kemper.freedesktop.org
Sun Feb 9 08:28:26 PST 2014
glib/Makefile.am | 2
glib/demo/Makefile.am | 2
glib/demo/main.c | 2
glib/demo/taggedstruct.c | 232 ++++++++
glib/demo/taggedstruct.h | 31 +
glib/poppler-private.h | 10
glib/poppler-structure-element.cc | 932 ++++++++++++++++++++++++++++++++++++
glib/poppler-structure-element.h | 126 ++++
glib/poppler.h | 4
glib/reference/poppler-docs.sgml | 2
glib/reference/poppler-sections.txt | 49 +
glib/reference/poppler.types | 2
12 files changed, 1394 insertions(+)
New commits:
commit d6fde0fac0120b1622942d8344d5153d9abf3e1e
Author: Adrian Perez de Castro <aperez at igalia.com>
Date: Wed May 29 23:44:03 2013 +0300
glib-demo: Pane showing the document structure
Adds a new pane in poppler-glib-demo showing the structure for Tagged-PDF
documents. It also serves as an example on how to to use the API for
PopplerStructure and PopplerStructureElement.
diff --git a/glib/demo/Makefile.am b/glib/demo/Makefile.am
index 8120bc9..e5df9d0 100644
--- a/glib/demo/Makefile.am
+++ b/glib/demo/Makefile.am
@@ -42,6 +42,8 @@ poppler_glib_demo_SOURCES = \
selections.h \
selections.c \
text.h \
+ taggedstruct.h \
+ taggedstruct.c \
text.c \
transitions.h \
transitions.c \
diff --git a/glib/demo/main.c b/glib/demo/main.c
index 3ba9b59..5bb13c9 100644
--- a/glib/demo/main.c
+++ b/glib/demo/main.c
@@ -34,6 +34,7 @@
#include "attachments.h"
#include "layers.h"
#include "text.h"
+#include "taggedstruct.h"
#include "find.h"
#include "print.h"
#include "selections.h"
@@ -65,6 +66,7 @@ static const PopplerGlibDemo demo_list[] = {
{ "Attachments", pgd_attachments_create_widget },
{ "Layers", pgd_layers_create_widget },
{ "Text", pgd_text_create_widget },
+ { "Tagged Structure", pgd_taggedstruct_create_widget },
{ "Find", pgd_find_create_widget },
{ "Print", pgd_print_create_widget }
};
diff --git a/glib/demo/taggedstruct.c b/glib/demo/taggedstruct.c
new file mode 100644
index 0000000..f5870f5
--- /dev/null
+++ b/glib/demo/taggedstruct.c
@@ -0,0 +1,232 @@
+/*
+ * Copyright (C) 2013 Igalia S.L.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+
+#include <string.h>
+
+#include "text.h"
+#include "utils.h"
+
+typedef struct {
+ GtkWidget *view;
+ GtkTreeStore *store;
+ GtkWidget *type_value;
+ GtkWidget *lang_value;
+ GtkWidget *abbr_value;
+ GtkWidget *id_value;
+ GtkWidget *title_value;
+ GtkTextBuffer *text_buffer;
+} PgdTaggedStructDemo;
+
+
+static void
+pgd_taggedstruct_free (PgdTaggedStructDemo *demo)
+{
+ if (!demo)
+ return;
+
+ if (demo->store)
+ {
+ g_object_unref (demo->store);
+ demo->store = NULL;
+ }
+
+ g_free (demo);
+}
+
+
+static void
+populate_store_aux (GtkTreeStore *store, GtkTreeIter *parent, PopplerStructureElementIter *iter)
+{
+ do
+ {
+ PopplerStructureElementIter *child = poppler_structure_element_iter_get_child (iter);
+ PopplerStructureElement *element = poppler_structure_element_iter_get_element (iter);
+ GEnumClass *enum_class = G_ENUM_CLASS (g_type_class_ref (POPPLER_TYPE_STRUCTURE_ELEMENT_KIND));
+ GEnumValue *enum_value = g_enum_get_value (enum_class, poppler_structure_element_get_kind (element));
+ GtkTreeIter pos;
+
+ gtk_tree_store_append (store, &pos, parent);
+ gtk_tree_store_set (store, &pos, 0, enum_value->value_nick, 1, element, -1);
+
+ if (child)
+ {
+ populate_store_aux (store, &pos, child);
+ poppler_structure_element_iter_free (child);
+ }
+ }
+ while (poppler_structure_element_iter_next (iter));
+}
+
+
+static GtkTreeStore *
+populate_store (PopplerStructureElementIter *iter)
+{
+ GtkTreeStore *store = gtk_tree_store_new (2, G_TYPE_STRING, G_TYPE_POINTER);
+
+ if (iter)
+ {
+ populate_store_aux (store, NULL, iter);
+ }
+ else
+ {
+ GtkTreeIter pos;
+
+ gtk_tree_store_append (store, &pos, NULL);
+ gtk_tree_store_set (store, &pos, 0, "<b>Not a Tagged-PDF</b>", 1, NULL, -1);
+ }
+
+ return store;
+}
+
+
+/*static void
+pgd_row_activated (GtkTreeView *tree_view, GtkTreePath *path, GtkTreeViewColumn *column, PgdTaggedStructDemo *demo)
+{*/
+static void
+pgd_selection_changed (GtkTreeSelection *selection, PgdTaggedStructDemo *demo)
+{
+ GtkTreeModel *model;
+ PopplerStructureElement *element;
+ GtkTreeIter iter;
+ gpointer elementptr;
+
+ if (!gtk_tree_selection_get_selected (selection, &model, &iter))
+ return;
+
+ gtk_tree_model_get (model, &iter, 1, &elementptr, -1);
+ element = POPPLER_STRUCTURE_ELEMENT (elementptr);
+
+ gtk_label_set_text (GTK_LABEL (demo->id_value),
+ poppler_structure_element_get_id (element));
+ gtk_label_set_text (GTK_LABEL (demo->title_value),
+ poppler_structure_element_get_title (element));
+ gtk_label_set_text (GTK_LABEL (demo->lang_value),
+ poppler_structure_element_get_language (element));
+ gtk_label_set_text (GTK_LABEL (demo->abbr_value),
+ poppler_structure_element_get_abbreviation (element));
+ gtk_text_buffer_set_text (demo->text_buffer, "", -1);
+
+ if (poppler_structure_element_is_content (element))
+ {
+ const gchar *text = poppler_structure_element_get_text (element, FALSE);
+
+ if (text)
+ gtk_text_buffer_set_text (demo->text_buffer, text, -1);
+ gtk_label_set_text (GTK_LABEL (demo->type_value), "Content");
+ }
+ else
+ {
+ if (poppler_structure_element_is_inline (element))
+ gtk_label_set_text (GTK_LABEL (demo->type_value), "Inline");
+ else if (poppler_structure_element_is_block (element))
+ gtk_label_set_text (GTK_LABEL (demo->type_value), "Block");
+ else
+ gtk_label_set_text (GTK_LABEL (demo->type_value), "Structure");
+ }
+}
+
+
+GtkWidget *
+pgd_taggedstruct_create_widget (PopplerDocument *document)
+{
+ PopplerStructureElementIter *iter;
+ PgdTaggedStructDemo *demo;
+ GtkCellRenderer *renderer;
+ GtkTreeSelection *selection;
+ GtkWidget *hbox;
+ GtkWidget *vbox;
+ GtkWidget *grid;
+ GtkWidget *scroll;
+ GtkWidget *w;
+ gint row;
+
+ demo = g_new0 (PgdTaggedStructDemo, 1);
+
+ iter = poppler_structure_element_iter_new (document);
+ demo->store = populate_store (iter);
+ poppler_structure_element_iter_free (iter);
+
+ demo->view = gtk_tree_view_new_with_model (GTK_TREE_MODEL (demo->store));
+
+ renderer = gtk_cell_renderer_text_new ();
+ gtk_tree_view_insert_column_with_attributes (GTK_TREE_VIEW (demo->view),
+ 0, "Type",
+ renderer,
+ "markup", 0,
+ NULL);
+ g_object_set (G_OBJECT (gtk_tree_view_get_column (GTK_TREE_VIEW (demo->view), 0)),
+ "expand", TRUE, NULL);
+
+ gtk_tree_view_expand_all (GTK_TREE_VIEW (demo->view));
+ gtk_tree_view_set_show_expanders (GTK_TREE_VIEW (demo->view), TRUE);
+ gtk_tree_view_set_headers_visible (GTK_TREE_VIEW (demo->view), TRUE);
+ gtk_tree_view_set_headers_clickable (GTK_TREE_VIEW (demo->view), FALSE);
+ gtk_tree_view_set_activate_on_single_click (GTK_TREE_VIEW (demo->view), TRUE);
+
+ hbox = gtk_box_new (GTK_ORIENTATION_HORIZONTAL, 6);
+ scroll = gtk_scrolled_window_new (NULL, NULL);
+ gtk_container_add (GTK_CONTAINER (scroll), demo->view);
+ gtk_widget_show (demo->view);
+ gtk_box_pack_start (GTK_BOX (hbox), scroll, TRUE, TRUE, 0);
+ gtk_widget_show (scroll);
+
+ row = 0;
+ grid = gtk_grid_new ();
+ gtk_container_set_border_width (GTK_CONTAINER (grid), 12);
+ gtk_grid_set_row_homogeneous (GTK_GRID (grid), FALSE);
+ gtk_grid_set_column_spacing (GTK_GRID (grid), 6);
+ gtk_grid_set_row_spacing (GTK_GRID (grid), 6);
+ pgd_table_add_property_with_value_widget (GTK_GRID (grid), "<b>Type:</b>", &demo->type_value, NULL, &row);
+ pgd_table_add_property_with_value_widget (GTK_GRID (grid), "<b>ID:</b>", &demo->id_value, NULL, &row);
+ pgd_table_add_property_with_value_widget (GTK_GRID (grid), "<b>Title:</b>", &demo->title_value, NULL, &row);
+ pgd_table_add_property_with_value_widget (GTK_GRID (grid), "<b>Language:</b>", &demo->lang_value, NULL, &row);
+ pgd_table_add_property_with_value_widget (GTK_GRID (grid), "<b>Abbreviation:</b>", &demo->abbr_value, NULL, &row);
+
+ vbox = gtk_box_new (GTK_ORIENTATION_VERTICAL, 6);
+ gtk_box_pack_start (GTK_BOX (vbox), grid, FALSE, FALSE, 0);
+ gtk_widget_show (grid);
+
+ scroll = gtk_scrolled_window_new (NULL, NULL);
+ gtk_container_set_border_width (GTK_CONTAINER (scroll), 12);
+ gtk_box_pack_end (GTK_BOX (vbox), scroll, TRUE, TRUE, 0);
+ gtk_widget_show (scroll);
+
+ gtk_container_add (GTK_CONTAINER (scroll), (w = gtk_text_view_new ()));
+ gtk_widget_show (w);
+
+ demo->text_buffer = gtk_text_view_get_buffer (GTK_TEXT_VIEW (w));
+ gtk_text_view_set_wrap_mode (GTK_TEXT_VIEW (w), GTK_WRAP_WORD_CHAR);
+ gtk_text_view_set_editable (GTK_TEXT_VIEW (w), FALSE);
+ gtk_text_buffer_set_text (demo->text_buffer, "", -1);
+ gtk_widget_show (w);
+
+ selection = gtk_tree_view_get_selection (GTK_TREE_VIEW (demo->view));
+ g_signal_connect (selection, "changed",
+ G_CALLBACK (pgd_selection_changed),
+ demo);
+
+ gtk_box_pack_end (GTK_BOX (hbox), vbox, TRUE, TRUE, 0);
+ gtk_widget_show (vbox);
+
+ g_object_weak_ref (G_OBJECT (hbox),
+ (GWeakNotify) pgd_taggedstruct_free,
+ demo);
+
+ gtk_widget_show (hbox);
+ return hbox;
+}
diff --git a/glib/demo/taggedstruct.h b/glib/demo/taggedstruct.h
new file mode 100644
index 0000000..3a38727
--- /dev/null
+++ b/glib/demo/taggedstruct.h
@@ -0,0 +1,31 @@
+/*
+ * Copyright (C) 2013 Igalia S.L.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+
+#include <gtk/gtk.h>
+#include <poppler.h>
+
+#ifndef _TAGGEDSTRUCT_H_
+#define _TAGGEDSTRUCT_H_
+
+G_BEGIN_DECLS
+
+GtkWidget *pgd_taggedstruct_create_widget (PopplerDocument *document);
+
+G_END_DECLS
+
+#endif /* _TAGGEDSTRUCT_H_ */
commit 0f9fa775c469c03d1613b955ee7b06b823e6e080
Author: Adrian Perez de Castro <aperez at igalia.com>
Date: Thu Sep 26 17:50:51 2013 +0300
glib: Expose inline attributes of structure elements
Allows obtaining inline text attributes from structure elements. The text
is divived into "spans", which are groups of consecutive glyphs that share
their attributes. Each one of those is represented by a PopplerTextSpan,
which gives information about the text font and color, and the link target
for links. The list of PopplerTextSpans is created lazily when first used.
https://bugs.freedesktop.org/show_bug.cgi?id=64821
diff --git a/glib/poppler-structure-element.cc b/glib/poppler-structure-element.cc
index 6b8778c..39dfd51 100644
--- a/glib/poppler-structure-element.cc
+++ b/glib/poppler-structure-element.cc
@@ -661,3 +661,272 @@ poppler_structure_element_iter_get_child (PopplerStructureElementIter *parent)
return NULL;
}
+
+
+struct _PopplerTextSpan {
+ gchar *text;
+ gchar *font_name;
+ guint flags;
+ PopplerColor color;
+};
+
+POPPLER_DEFINE_BOXED_TYPE (PopplerTextSpan,
+ poppler_text_span,
+ poppler_text_span_copy,
+ poppler_text_span_free)
+
+enum {
+ POPPLER_TEXT_SPAN_FIXED_WIDTH = (1 << 0),
+ POPPLER_TEXT_SPAN_SERIF = (1 << 1),
+ POPPLER_TEXT_SPAN_ITALIC = (1 << 2),
+ POPPLER_TEXT_SPAN_BOLD = (1 << 3),
+};
+
+static PopplerTextSpan *
+text_span_poppler_text_span (const TextSpan& span)
+{
+ PopplerTextSpan *new_span = g_slice_new0 (PopplerTextSpan);
+ if (GooString *text = span.getText ())
+ new_span->text = _poppler_goo_string_to_utf8 (text);
+
+ new_span->color.red = colToDbl (span.getColor ().r) * 65535;
+ new_span->color.green = colToDbl (span.getColor ().g) * 65535;
+ new_span->color.blue = colToDbl (span.getColor ().b) * 65535;
+
+ if (span.getFont ())
+ {
+ // GfxFont sometimes does not have a family name but there
+ // is always a font name that can be used as fallback.
+ GooString *font_name = span.getFont ()->getFamily ();
+ if (font_name == NULL)
+ font_name = span.getFont ()->getName ();
+
+ new_span->font_name = _poppler_goo_string_to_utf8 (font_name);
+ if (span.getFont ()->isFixedWidth ())
+ new_span->flags |= POPPLER_TEXT_SPAN_FIXED_WIDTH;
+ if (span.getFont ()->isSerif ())
+ new_span->flags |= POPPLER_TEXT_SPAN_SERIF;
+ if (span.getFont ()->isItalic ())
+ new_span->flags |= POPPLER_TEXT_SPAN_ITALIC;
+ if (span.getFont ()->isBold ())
+ new_span->flags |= POPPLER_TEXT_SPAN_BOLD;
+
+ /* isBold() can return false for some fonts whose weight is heavy */
+ switch (span.getFont ()->getWeight ())
+ {
+ case GfxFont::W500:
+ case GfxFont::W600:
+ case GfxFont::W700:
+ case GfxFont::W800:
+ case GfxFont::W900:
+ new_span->flags |= POPPLER_TEXT_SPAN_BOLD;
+ default:
+ break;
+ }
+ }
+
+ return new_span;
+}
+
+/**
+ * poppler_text_span_copy:
+ * @poppler_text_span: a #PopplerTextSpan
+ *
+ * Makes a copy of a text span.
+ *
+ * Return value: (transfer full): A new #PopplerTextSpan
+ *
+ * Since: 0.26
+ */
+PopplerTextSpan *
+poppler_text_span_copy (PopplerTextSpan *poppler_text_span)
+{
+ PopplerTextSpan *new_span;
+
+ g_return_val_if_fail (poppler_text_span != NULL, NULL);
+
+ new_span = g_slice_dup (PopplerTextSpan, poppler_text_span);
+ new_span->text = g_strdup (poppler_text_span->text);
+ if (poppler_text_span->font_name)
+ new_span->font_name = g_strdup (poppler_text_span->font_name);
+ return new_span;
+}
+
+/**
+ * poppler_text_span_free:
+ * @poppler_text_span: A #PopplerTextSpan
+ *
+ * Frees a text span.
+ *
+ * Since: 0.26
+ */
+void
+poppler_text_span_free (PopplerTextSpan *poppler_text_span)
+{
+ if (G_UNLIKELY (poppler_text_span == NULL))
+ return;
+
+ g_free (poppler_text_span->text);
+ g_free (poppler_text_span->font_name);
+ g_slice_free (PopplerTextSpan, poppler_text_span);
+}
+
+/**
+ * poppler_text_span_is_fixed_width_font:
+ * @poppler_text_span: a #PopplerTextSpan
+ *
+ * Check wether a text span is meant to be rendered using a fixed-width font.
+ *
+ * Return value: Whether the span uses a fixed-width font.
+ *
+ * Since: 0.26
+ */
+gboolean
+poppler_text_span_is_fixed_width_font (PopplerTextSpan *poppler_text_span)
+{
+ g_return_val_if_fail (poppler_text_span != NULL, FALSE);
+
+ return (poppler_text_span->flags & POPPLER_TEXT_SPAN_FIXED_WIDTH);
+}
+
+/**
+ * poppler_text_span_is_serif_font:
+ * @poppler_text_span: a #PopplerTextSpan
+ *
+ * Check whether a text span is meant to be rendered using a serif font.
+ *
+ * Return value: Whether the span uses a serif font.
+ *
+ * Since: 0.26
+ */
+gboolean
+poppler_text_span_is_serif_font (PopplerTextSpan *poppler_text_span)
+{
+ g_return_val_if_fail (poppler_text_span != NULL, FALSE);
+
+ return (poppler_text_span->flags & POPPLER_TEXT_SPAN_SERIF);
+}
+
+/**
+ * poppler_text_span_is_bold_font:
+ * @poppler_text_span: a #PopplerTextSpan
+ *
+ * Check whether a text span is meant to be rendered using a bold font.
+ *
+ * Return value: Whether the span uses bold font.
+ *
+ * Since: 0.26
+ */
+gboolean
+poppler_text_span_is_bold_font (PopplerTextSpan *poppler_text_span)
+{
+ g_return_val_if_fail (poppler_text_span != NULL, FALSE);
+
+ return (poppler_text_span->flags & POPPLER_TEXT_SPAN_BOLD);
+}
+
+/**
+ * poppler_text_span_get_color:
+ * @poppler_text_span: a #PopplerTextSpan
+ * @color: (out): a return location for a #PopplerColor
+ *
+ * Obtains the color in which the text is to be rendered.
+ *
+ * Since: 0.26
+ */
+void
+poppler_text_span_get_color (PopplerTextSpan *poppler_text_span,
+ PopplerColor *color)
+{
+ g_return_if_fail (poppler_text_span != NULL);
+ g_return_if_fail (color != NULL);
+
+ *color = poppler_text_span->color;
+}
+
+/**
+ * poppler_text_span_get_text:
+ * @poppler_text_span: a #PopplerTextSpan
+ *
+ * Obtains the text contained in the span.
+ *
+ * Return value: (transfer none): A string.
+ *
+ * Since: 0.26
+ */
+const gchar *
+poppler_text_span_get_text (PopplerTextSpan *poppler_text_span)
+{
+ g_return_val_if_fail (poppler_text_span != NULL, NULL);
+
+ return poppler_text_span->text;
+}
+
+/**
+ * poppler_text_span_get_font_name:
+ * @poppler_text_span: a #PopplerTextSpan
+ *
+ * Obtains the name of the font in which the span is to be rendered.
+ *
+ * Return value: (transfer none): A string containing the font name, or
+ * %NULL if a font is not defined.
+ *
+ * Since: 0.26
+ */
+const gchar *
+poppler_text_span_get_font_name (PopplerTextSpan *poppler_text_span)
+{
+ g_return_val_if_fail (poppler_text_span != NULL, NULL);
+
+ return poppler_text_span->font_name;
+}
+
+
+/**
+ * poppler_structure_element_get_text_spans:
+ * @poppler_structure_element: A #PopplerStructureElement
+ * @n_text_spans: (out): A pointer to the location where the number of elements in
+ * the returned array will be stored.
+ *
+ * Obtains the text enclosed by an element, as an array of #PopplerTextSpan
+ * structures. Each item in the list is a piece of text which share the same
+ * attributes, plus its attributes. The following example shows how to
+ * obtain and free the text spans of an element:
+ *
+ * <informalexample><programlisting>
+ * guint i, n_spans;
+ * PopplerTextSpan **text_spans =
+ * poppler_structure_element_get_text_spans (element, &n_spans);
+ * /<!-- -->* Use the text spans *<!-- -->/
+ * for (i = 0; i < n_spans; i++)
+ * poppler_text_span_free (text_spans[i]);
+ * g_free (text_spans);
+ * </programlisting></informalexample>
+ *
+ * Return value: (transfer full) (array length=n_text_spans) (element-type PopplerTextSpan):
+ * An array of #PopplerTextSpan elments.
+ *
+ * Since: 0.26
+ */
+PopplerTextSpan **
+poppler_structure_element_get_text_spans (PopplerStructureElement *poppler_structure_element,
+ guint *n_text_spans)
+{
+ g_return_val_if_fail (POPPLER_IS_STRUCTURE_ELEMENT (poppler_structure_element), NULL);
+ g_return_val_if_fail (n_text_spans != NULL, NULL);
+ g_return_val_if_fail (poppler_structure_element->elem != NULL, NULL);
+
+ if (!poppler_structure_element->elem->isContent ())
+ return NULL;
+
+ const TextSpanArray spans(poppler_structure_element->elem->getTextSpans ());
+ PopplerTextSpan **text_spans = g_new0 (PopplerTextSpan*, spans.size ());
+
+ size_t i = 0;
+ for (TextSpanArray::const_iterator s = spans.begin (); s != spans.end (); ++s)
+ text_spans[i++] = text_span_poppler_text_span (*s);
+
+ *n_text_spans = spans.size ();
+
+ return text_spans;
+}
diff --git a/glib/poppler-structure-element.h b/glib/poppler-structure-element.h
index a5ac04b..7ba5d56 100644
--- a/glib/poppler-structure-element.h
+++ b/glib/poppler-structure-element.h
@@ -82,7 +82,6 @@ typedef enum {
POPPLER_STRUCTURE_ELEMENT_FORM,
} PopplerStructureElementKind;
-
GType poppler_structure_element_get_type (void) G_GNUC_CONST;
PopplerStructureElementKind poppler_structure_element_get_kind (PopplerStructureElement *poppler_structure_element);
gint poppler_structure_element_get_page (PopplerStructureElement *poppler_structure_element);
@@ -97,6 +96,8 @@ gchar *poppler_structure_element_get_text
gboolean recursive);
gchar *poppler_structure_element_get_alt_text (PopplerStructureElement *poppler_structure_element);
gchar *poppler_structure_element_get_actual_text (PopplerStructureElement *poppler_structure_element);
+PopplerTextSpan **poppler_structure_element_get_text_spans (PopplerStructureElement *poppler_structure_element,
+ guint *n_text_spans);
#define POPPLER_TYPE_STRUCTURE_ELEMENT_ITER (poppler_structure_element_iter_get_type ())
GType poppler_structure_element_iter_get_type (void) G_GNUC_CONST;
@@ -107,6 +108,19 @@ PopplerStructureElement *poppler_structure_element_iter_get_element
gboolean poppler_structure_element_iter_next (PopplerStructureElementIter *iter);
void poppler_structure_element_iter_free (PopplerStructureElementIter *iter);
+#define POPPLER_TYPE_TEXT_SPAN (poppler_text_span_get_type ())
+GType poppler_text_span_get_type (void) G_GNUC_CONST;
+PopplerTextSpan *poppler_text_span_copy (PopplerTextSpan *poppler_text_span);
+void poppler_text_span_free (PopplerTextSpan *poppler_text_span);
+gboolean poppler_text_span_is_fixed_width_font (PopplerTextSpan *poppler_text_span);
+gboolean poppler_text_span_is_serif_font (PopplerTextSpan *poppler_text_span);
+gboolean poppler_text_span_is_bold_font (PopplerTextSpan *poppler_text_span);
+gboolean poppler_text_span_is_link (PopplerTextSpan *poppler_text_span);
+void poppler_text_span_get_color (PopplerTextSpan *poppler_text_span,
+ PopplerColor *color);
+const gchar *poppler_text_span_get_text (PopplerTextSpan *poppler_text_span);
+const gchar *poppler_text_span_get_font_name (PopplerTextSpan *poppler_text_span);
+
G_END_DECLS
#endif /* !__POPPLER_STRUCTURE_ELEMENT_H__ */
diff --git a/glib/poppler.h b/glib/poppler.h
index 1da2af1..f5c9563 100644
--- a/glib/poppler.h
+++ b/glib/poppler.h
@@ -210,6 +210,7 @@ typedef struct _PopplerAnnotSquare PopplerAnnotSquare;
typedef struct _PopplerQuadrilateral PopplerQuadrilateral;
typedef struct _PopplerStructureElement PopplerStructureElement;
typedef struct _PopplerStructureElementIter PopplerStructureElementIter;
+typedef struct _PopplerTextSpan PopplerTextSpan;
typedef enum
{
diff --git a/glib/reference/poppler-sections.txt b/glib/reference/poppler-sections.txt
index fff370b..719b708 100644
--- a/glib/reference/poppler-sections.txt
+++ b/glib/reference/poppler-sections.txt
@@ -590,6 +590,7 @@ poppler_movie_get_type
PopplerStructureElement
PopplerStructureElementKind
PopplerStructureElementIter
+PopplerTextSpan
poppler_structure_element_iter_new
poppler_structure_element_iter_next
poppler_structure_element_iter_copy
@@ -608,6 +609,15 @@ poppler_structure_element_get_language
poppler_structure_element_get_text
poppler_structure_element_get_alt_text
poppler_structure_element_get_actual_text
+poppler_structure_element_get_text_spans
+poppler_text_span_copy
+poppler_text_span_free
+poppler_text_span_is_fixed_width_font
+poppler_text_span_is_serif_font
+poppler_text_span_is_bold_font
+poppler_text_span_get_color
+poppler_text_span_get_text
+poppler_text_span_get_font_name
<SUBSECTION Standard>
POPPLER_STRUCTURE_ELEMENT
@@ -618,7 +628,9 @@ POPPLER_TYPE_STRUCTURE_ELEMENT_KIND
<SUBSECTION Private>
poppler_structure_element_get_type
+poppler_structure_element_kind_get_type
poppler_structure_element_iter_get_type
+poppler_text_span_get_type
</SECTION>
<SECTION>
commit 8072d4b0e3ea10b4308f8172891f769f30466133
Author: Adrian Perez de Castro <aperez at igalia.com>
Date: Thu May 9 12:01:59 2013 +0300
glib: Expose the document structure tree
Implements a new PopplerStructureElement classe, which builds upon
StructTreeRoot and StructElement to expose the document structure of
tagged PDFs in the GLib binding.
Navigation of the structure tree is done by an iterator-based interface,
using PopplerStructureElementIter.
https://bugs.freedesktop.org/show_bug.cgi?id=64821
diff --git a/glib/Makefile.am b/glib/Makefile.am
index a38e052..040996a 100644
--- a/glib/Makefile.am
+++ b/glib/Makefile.am
@@ -41,6 +41,7 @@ poppler_glib_public_headers = \
poppler-layer.h \
poppler-media.h \
poppler-movie.h \
+ poppler-structure-element.h \
poppler.h
poppler_glib_includedir = $(includedir)/poppler/glib
@@ -67,6 +68,7 @@ libpoppler_glib_la_SOURCES = \
poppler-cached-file-loader.h \
poppler-input-stream.cc \
poppler-input-stream.h \
+ poppler-structure-element.cc \
poppler.cc \
poppler-private.h
diff --git a/glib/poppler-private.h b/glib/poppler-private.h
index 93d0f23..874cfdb 100644
--- a/glib/poppler-private.h
+++ b/glib/poppler-private.h
@@ -17,6 +17,7 @@
#include <OptionalContent.h>
#include <CairoOutputDev.h>
#include <FileSpec.h>
+#include <StructElement.h>
#endif
struct _PopplerDocument
@@ -95,6 +96,15 @@ struct _PopplerLayer
gchar *title;
};
+
+struct _PopplerStructureElement
+{
+ /*< private >*/
+ GObject parent_instance;
+ PopplerDocument *document;
+ StructElement *elem;
+};
+
GList *_poppler_document_get_layers (PopplerDocument *document);
GList *_poppler_document_get_layer_rbgroup (PopplerDocument *document,
Layer *layer);
diff --git a/glib/poppler-structure-element.cc b/glib/poppler-structure-element.cc
new file mode 100644
index 0000000..6b8778c
--- /dev/null
+++ b/glib/poppler-structure-element.cc
@@ -0,0 +1,663 @@
+/* poppler-structure.cc: glib interface to poppler
+ *
+ * Copyright (C) 2013 Igalia S.L.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+
+#include "config.h"
+
+#ifndef __GI_SCANNER__
+#include <StructTreeRoot.h>
+#include <StructElement.h>
+#include <GlobalParams.h>
+#include <UnicodeMap.h>
+#endif /* !__GI_SCANNER__ */
+
+#include "poppler.h"
+#include "poppler-private.h"
+#include "poppler-structure-element.h"
+
+
+/**
+ * SECTION:poppler-structure-element
+ * @short_description: Document structure element.
+ * @title: PopplerStructureElement
+ * @see_also: #PopplerStructure
+ *
+ * Instances of #PopplerStructureElement are used to describe the structure
+ * of a #PopplerDocument. To access the elements in the structure of the
+ * document, first use poppler_document_get_structure() to obtain its
+ * #PopplerStructure, and then use poppler_structure_get_n_children()
+ * and poppler_structure_get_child() to enumerate the top level elements.
+ */
+
+typedef struct _PopplerStructureElementClass
+{
+ GObjectClass parent_class;
+} PopplerStructureElementClass;
+
+G_DEFINE_TYPE (PopplerStructureElement, poppler_structure_element, G_TYPE_OBJECT);
+
+static PopplerStructureElement *
+_poppler_structure_element_new (PopplerDocument *document, StructElement *element)
+{
+ PopplerStructureElement *poppler_structure_element;
+
+ g_assert (POPPLER_IS_DOCUMENT (document));
+ g_assert (element);
+
+ poppler_structure_element = (PopplerStructureElement *) g_object_new (POPPLER_TYPE_STRUCTURE_ELEMENT, NULL, NULL);
+ poppler_structure_element->document = (PopplerDocument *) g_object_ref (document);
+ poppler_structure_element->elem = element;
+
+ return poppler_structure_element;
+}
+
+
+static void
+poppler_structure_element_init (PopplerStructureElement *poppler_structure_element)
+{
+}
+
+
+static void
+poppler_structure_element_finalize (GObject *object)
+{
+ PopplerStructureElement *poppler_structure_element = POPPLER_STRUCTURE_ELEMENT (object);
+
+ /* poppler_structure_element->elem is owned by the StructTreeRoot */
+ g_object_unref (poppler_structure_element->document);
+
+ G_OBJECT_CLASS (poppler_structure_element_parent_class)->finalize (object);
+}
+
+
+static void
+poppler_structure_element_class_init (PopplerStructureElementClass *klass)
+{
+ GObjectClass *gobject_class = G_OBJECT_CLASS (klass);
+ gobject_class->finalize = poppler_structure_element_finalize;
+}
+
+
+/**
+ * poppler_structure_element_get_kind:
+ * @poppler_structure_element: A #PopplerStructureElement
+ *
+ * Return value: A #PopplerStructureElementKind value.
+ *
+ * Since: 0.26
+ */
+PopplerStructureElementKind
+poppler_structure_element_get_kind (PopplerStructureElement *poppler_structure_element)
+{
+ g_return_val_if_fail (POPPLER_IS_STRUCTURE_ELEMENT (poppler_structure_element), POPPLER_STRUCTURE_ELEMENT_UNKNOWN);
+ g_return_val_if_fail (poppler_structure_element->elem != NULL, POPPLER_STRUCTURE_ELEMENT_UNKNOWN);
+
+ switch (poppler_structure_element->elem->getType ())
+ {
+ case StructElement::Unknown:
+ return POPPLER_STRUCTURE_ELEMENT_UNKNOWN;
+ case StructElement::MCID:
+ return POPPLER_STRUCTURE_ELEMENT_CONTENT;
+ case StructElement::OBJR:
+ return POPPLER_STRUCTURE_ELEMENT_OBJECT_REFERENCE;
+ case StructElement::Document:
+ return POPPLER_STRUCTURE_ELEMENT_DOCUMENT;
+ case StructElement::Part:
+ return POPPLER_STRUCTURE_ELEMENT_PART;
+ case StructElement::Sect:
+ return POPPLER_STRUCTURE_ELEMENT_SECTION;
+ case StructElement::Div:
+ return POPPLER_STRUCTURE_ELEMENT_DIV;
+ case StructElement::Span:
+ return POPPLER_STRUCTURE_ELEMENT_SPAN;
+ case StructElement::Quote:
+ return POPPLER_STRUCTURE_ELEMENT_QUOTE;
+ case StructElement::Note:
+ return POPPLER_STRUCTURE_ELEMENT_NOTE;
+ case StructElement::Reference:
+ return POPPLER_STRUCTURE_ELEMENT_REFERENCE;
+ case StructElement::BibEntry:
+ return POPPLER_STRUCTURE_ELEMENT_BIBENTRY;
+ case StructElement::Code:
+ return POPPLER_STRUCTURE_ELEMENT_CODE;
+ case StructElement::Link:
+ return POPPLER_STRUCTURE_ELEMENT_LINK;
+ case StructElement::Annot:
+ return POPPLER_STRUCTURE_ELEMENT_ANNOT;
+ case StructElement::Ruby:
+ return POPPLER_STRUCTURE_ELEMENT_RUBY;
+ case StructElement::Warichu:
+ return POPPLER_STRUCTURE_ELEMENT_WARICHU;
+ case StructElement::BlockQuote:
+ return POPPLER_STRUCTURE_ELEMENT_BLOCKQUOTE;
+ case StructElement::Caption:
+ return POPPLER_STRUCTURE_ELEMENT_CAPTION;
+ case StructElement::NonStruct:
+ return POPPLER_STRUCTURE_ELEMENT_NONSTRUCT;
+ case StructElement::TOC:
+ return POPPLER_STRUCTURE_ELEMENT_TOC;
+ case StructElement::TOCI:
+ return POPPLER_STRUCTURE_ELEMENT_TOC_ITEM;
+ case StructElement::Index:
+ return POPPLER_STRUCTURE_ELEMENT_INDEX;
+ case StructElement::Private:
+ return POPPLER_STRUCTURE_ELEMENT_PRIVATE;
+ case StructElement::P:
+ return POPPLER_STRUCTURE_ELEMENT_PARAGRAPH;
+ case StructElement::H:
+ return POPPLER_STRUCTURE_ELEMENT_HEADING;
+ case StructElement::H1:
+ return POPPLER_STRUCTURE_ELEMENT_HEADING_1;
+ case StructElement::H2:
+ return POPPLER_STRUCTURE_ELEMENT_HEADING_2;
+ case StructElement::H3:
+ return POPPLER_STRUCTURE_ELEMENT_HEADING_3;
+ case StructElement::H4:
+ return POPPLER_STRUCTURE_ELEMENT_HEADING_4;
+ case StructElement::H5:
+ return POPPLER_STRUCTURE_ELEMENT_HEADING_5;
+ case StructElement::H6:
+ return POPPLER_STRUCTURE_ELEMENT_HEADING_6;
+ case StructElement::L:
+ return POPPLER_STRUCTURE_ELEMENT_LIST;
+ case StructElement::LI:
+ return POPPLER_STRUCTURE_ELEMENT_LIST_ITEM;
+ case StructElement::Lbl:
+ return POPPLER_STRUCTURE_ELEMENT_LIST_LABEL;
+ case StructElement::LBody:
+ return POPPLER_STRUCTURE_ELEMENT_LIST_BODY;
+ case StructElement::Table:
+ return POPPLER_STRUCTURE_ELEMENT_TABLE;
+ case StructElement::TR:
+ return POPPLER_STRUCTURE_ELEMENT_TABLE_ROW;
+ case StructElement::TH:
+ return POPPLER_STRUCTURE_ELEMENT_TABLE_HEADING;
+ case StructElement::TD:
+ return POPPLER_STRUCTURE_ELEMENT_TABLE_DATA;
+ case StructElement::THead:
+ return POPPLER_STRUCTURE_ELEMENT_TABLE_HEADER;
+ case StructElement::TFoot:
+ return POPPLER_STRUCTURE_ELEMENT_TABLE_FOOTER;
+ case StructElement::TBody:
+ return POPPLER_STRUCTURE_ELEMENT_TABLE_BODY;
+ case StructElement::Figure:
+ return POPPLER_STRUCTURE_ELEMENT_FIGURE;
+ case StructElement::Formula:
+ return POPPLER_STRUCTURE_ELEMENT_FORMULA;
+ case StructElement::Form:
+ return POPPLER_STRUCTURE_ELEMENT_FORM;
+ default:
+ g_assert_not_reached ();
+ }
+}
+
+/**
+ * poppler_structure_element_get_page:
+ * @poppler_structure_element: A #PopplerStructureElement
+ *
+ * Obtains the page number in which the element is contained.
+ *
+ * Return value: Number of the page that contains the element, of
+ * <code>-1</code> if not defined.
+ *
+ * Since: 0.26
+ */
+gint
+poppler_structure_element_get_page (PopplerStructureElement *poppler_structure_element)
+{
+ g_return_val_if_fail (POPPLER_IS_STRUCTURE_ELEMENT (poppler_structure_element), -1);
+ g_return_val_if_fail (poppler_structure_element->elem != NULL, -1);
+
+ Ref ref;
+ if (poppler_structure_element->elem->getPageRef (ref))
+ {
+ return poppler_structure_element->document->doc->findPage(ref.num, ref.gen) - 1;
+ }
+
+ return -1;
+}
+
+/**
+ * poppler_structure_element_is_content:
+ * @poppler_structure_element: A #PopplerStructureElement
+ *
+ * Checks whether an element is actual document content.
+ *
+ * Return value: %TRUE if the element is content, or %FALSE otherwise.
+ *
+ * Since: 0.26
+ */
+gboolean
+poppler_structure_element_is_content (PopplerStructureElement *poppler_structure_element)
+{
+ g_return_val_if_fail (POPPLER_IS_STRUCTURE_ELEMENT (poppler_structure_element), FALSE);
+ g_return_val_if_fail (poppler_structure_element->elem != NULL, FALSE);
+
+ return poppler_structure_element->elem->isContent ();
+}
+
+/**
+ * poppler_structure_element_is_inline:
+ * @poppler_structure_element: A #PopplerStructureElement
+ *
+ * Checks whether an element is an inline element.
+ *
+ * Return value: %TRUE if the element is an inline element, or %FALSE otherwise.
+ *
+ * Since: 0.26
+ */
+gboolean
+poppler_structure_element_is_inline (PopplerStructureElement *poppler_structure_element)
+{
+ g_return_val_if_fail (POPPLER_IS_STRUCTURE_ELEMENT (poppler_structure_element), FALSE);
+ g_return_val_if_fail (poppler_structure_element->elem != NULL, FALSE);
+
+ return poppler_structure_element->elem->isInline ();
+}
+
+/**
+ * poppler_structure_element_is_block:
+ * @poppler_structure_element: A #PopplerStructureElement
+ *
+ * Checks whether an element is a block element.
+ *
+ * Return value: %TRUE if the element is a block element, or %FALSE otherwise.
+ *
+ * Since: 0.26
+ */
+gboolean
+poppler_structure_element_is_block (PopplerStructureElement *poppler_structure_element)
+{
+ g_return_val_if_fail (POPPLER_IS_STRUCTURE_ELEMENT (poppler_structure_element), FALSE);
+ g_return_val_if_fail (poppler_structure_element->elem != NULL, FALSE);
+
+ return poppler_structure_element->elem->isBlock ();
+}
+
+/**
+ * poppler_structure_element_get_id:
+ * @poppler_structure_element: A #PopplerStructureElement
+ *
+ * Obtains the identifier of an element.
+ *
+ * Return value: (transfer full): The identifier of the element (if
+ * defined), or %NULL.
+ *
+ * Since: 0.26
+ */
+gchar *
+poppler_structure_element_get_id (PopplerStructureElement *poppler_structure_element)
+{
+ g_return_val_if_fail (POPPLER_IS_STRUCTURE_ELEMENT (poppler_structure_element), NULL);
+ g_return_val_if_fail (poppler_structure_element->elem != NULL, NULL);
+
+ GooString *string = poppler_structure_element->elem->getID ();
+ return string ? _poppler_goo_string_to_utf8 (string) : NULL;
+}
+
+/**
+ * poppler_structure_element_get_title:
+ * @poppler_structure_element: A #PopplerStructureElement
+ *
+ * Obtains the title of an element.
+ *
+ * Return value: (transfer full): The title of the element, or %NULL.
+ *
+ * Since: 0.26
+ */
+gchar *
+poppler_structure_element_get_title (PopplerStructureElement *poppler_structure_element)
+{
+ g_return_val_if_fail (POPPLER_IS_STRUCTURE_ELEMENT (poppler_structure_element), NULL);
+ g_return_val_if_fail (poppler_structure_element->elem != NULL, NULL);
+
+ GooString *string = poppler_structure_element->elem->getTitle ();
+ return string ? _poppler_goo_string_to_utf8 (string) : NULL;
+}
+
+/**
+ * popppler_structure_element_get_abbreviation:
+ * @poppler_structure_element: A #PopplerStructureElement
+ *
+ * Acronyms and abbreviations contained in elements of type
+ * #POPPLER_STRUCTURE_ELEMENT_SPAN may have an associated expanded
+ * text form, which can be retrieved using this function.
+ *
+ * Return value: (transfer full): Text of the expanded abbreviation if the
+ * element text is an abbreviation or acrony, %NULL if not.
+ *
+ * Since: 0.26
+ */
+gchar *
+poppler_structure_element_get_abbreviation (PopplerStructureElement *poppler_structure_element)
+{
+ g_return_val_if_fail (POPPLER_IS_STRUCTURE_ELEMENT (poppler_structure_element), NULL);
+ g_return_val_if_fail (poppler_structure_element->elem != NULL, NULL);
+
+ if (poppler_structure_element->elem->getType () != StructElement::Span)
+ return NULL;
+
+ GooString *string = poppler_structure_element->elem->getExpandedAbbr ();
+ return string ? _poppler_goo_string_to_utf8 (string) : NULL;
+}
+
+/**
+ * poppler_structure_element_get_language:
+ * @poppler_structure_element: A #PopplerStructureElement
+ *
+ * Obtains the language and country code for the content in an element,
+ * in two-letter ISO format, e.g. <code>en_ES</code>, or %NULL if not
+ * defined.
+ *
+ * Return value: (transfer full): language and country code, or %NULL.
+ *
+ * Since: 0.26
+ */
+gchar *
+poppler_structure_element_get_language (PopplerStructureElement *poppler_structure_element)
+{
+ g_return_val_if_fail (POPPLER_IS_STRUCTURE_ELEMENT (poppler_structure_element), NULL);
+ g_return_val_if_fail (poppler_structure_element->elem != NULL, NULL);
+
+ GooString *string = poppler_structure_element->elem->getLanguage ();
+ return string ? _poppler_goo_string_to_utf8 (string) : NULL;
+}
+
+/**
+ * poppler_structure_element_get_alt_text:
+ * @poppler_structure_element: A #PopplerStructureElement
+ *
+ * Obtains the âalternateâ text representation of the element (and its child
+ * elements). This is mostly used for non-text elements like images and
+ * figures, to specify a textual description of the element.
+ *
+ * Note that for elements containing proper text, the function
+ * poppler_structure_element_get_text() must be used instead.
+ *
+ * Return value: (transfer full): The alternate text representation for the
+ * element, or %NULL if not defined.
+ *
+ * Since: 0.26
+ */
+gchar *
+poppler_structure_element_get_alt_text (PopplerStructureElement *poppler_structure_element)
+{
+ g_return_val_if_fail (POPPLER_IS_STRUCTURE_ELEMENT (poppler_structure_element), NULL);
+ g_return_val_if_fail (poppler_structure_element->elem != NULL, NULL);
+
+ GooString *string = poppler_structure_element->elem->getAltText ();
+ return string ? _poppler_goo_string_to_utf8 (string) : NULL;
+}
+
+/**
+ * poppler_structure_element_get_actual_text:
+ * @poppler_structure_element: A #PopplerStructureElement
+ *
+ * Obtains the actual text enclosed by the element (and its child elements).
+ * The actual text is mostly used for non-text elements like images and
+ * figures which <em>do</em> have the graphical appearance of text, like
+ * a logo. For those the actual text is the equivalent text to those
+ * graphical elements which look like text when rendered.
+ *
+ * Note that for elements containing proper text, the function
+ * poppler_structure_element_get_text() must be used instead.
+ *
+ * Return value: (transfer full): The actual text for the element, or %NULL
+ * if not defined.
+ *
+ * Since: 0.26
+ */
+gchar *
+poppler_structure_element_get_actual_text (PopplerStructureElement *poppler_structure_element)
+{
+ g_return_val_if_fail (POPPLER_IS_STRUCTURE_ELEMENT (poppler_structure_element), NULL);
+ g_return_val_if_fail (poppler_structure_element->elem != NULL, NULL);
+
+ GooString *string = poppler_structure_element->elem->getActualText ();
+ return string ? _poppler_goo_string_to_utf8 (string) : NULL;
+}
+
+/**
+ * poppler_structure_element_get_text:
+ * @poppler_structure_element: A #PopplerStructureElement
+ * @recursive: If %TRUE, the text of child elements is gathered recursively
+ * in logical order and returned as part of the result.
+ *
+ * Obtains the text enclosed by an element, or the text enclosed by the
+ * elements in the subtree (including the element itself).
+ *
+ * Return value: (transfer full): A string.
+ *
+ * Since: 0.26
+ */
+gchar *
+poppler_structure_element_get_text (PopplerStructureElement *poppler_structure_element,
+ gboolean recursive)
+{
+ g_return_val_if_fail (POPPLER_IS_STRUCTURE_ELEMENT (poppler_structure_element), NULL);
+ g_return_val_if_fail (poppler_structure_element->elem != NULL, NULL);
+
+ GooString *string = poppler_structure_element->elem->getText (recursive);
+ gchar *result = string ? _poppler_goo_string_to_utf8 (string) : NULL;
+ delete string;
+ return result;
+}
+
+struct _PopplerStructureElementIter
+{
+ PopplerDocument *document;
+ union {
+ StructElement *elem;
+ StructTreeRoot *root;
+ };
+ gboolean is_root;
+ unsigned index;
+};
+
+POPPLER_DEFINE_BOXED_TYPE (PopplerStructureElementIter,
+ poppler_structure_element_iter,
+ poppler_structure_element_iter_copy,
+ poppler_structure_element_iter_free)
+
+/**
+ * poppler_structure_element_iter_copy:
+ * @iter: a #PopplerStructureElementIter
+ *
+ * Creates a new #PopplerStructureElementIter as a copy of @iter. The
+ * returned value must be freed with poppler_structure_element_iter_free().
+ *
+ * Return value: (transfer full): a new #PopplerStructureElementIter
+ *
+ * Since: 0.26
+ */
+PopplerStructureElementIter *
+poppler_structure_element_iter_copy (PopplerStructureElementIter *iter)
+{
+ PopplerStructureElementIter *new_iter;
+
+ g_return_val_if_fail (iter != NULL, NULL);
+
+ new_iter = g_slice_dup (PopplerStructureElementIter, iter);
+ new_iter->document = (PopplerDocument *) g_object_ref (new_iter->document);
+
+ return new_iter;
+}
+
+/**
+ * poppler_structure_element_iter_free:
+ * @iter: a #PopplerStructureElementIter
+ *
+ * Frees @iter.
+ *
+ * Since: 0.26
+ */
+void
+poppler_structure_element_iter_free (PopplerStructureElementIter *iter)
+{
+ if (G_UNLIKELY (iter == NULL))
+ return;
+
+ g_object_unref (iter->document);
+ g_slice_free (PopplerStructureElementIter, iter);
+}
+
+/**
+ * poppler_structure_element_iter_new:
+ * @poppler_document: a #PopplerDocument.
+ *
+ * Returns the root #PopplerStructureElementIter for @document, or %NULL. The
+ * returned value must be freed with poppler_structure_element_iter_free().
+ *
+ * Documents may have an associated structure tree &mdashmostly, Tagged-PDF
+ * compliant documents— which can be used to obtain information about
+ * the document structure and its contents. Each node in the tree contains
+ * a #PopplerStructureElement.
+ *
+ * Here is a simple example that walks the whole tree:
+ *
+ * <informalexample><programlisting>
+ * static void
+ * walk_structure (PopplerStructureElementIter *iter)
+ * {
+ * do {
+ * /<!-- -->* Get the element and do something with it *<!-- -->/
+ * PopplerStructureElementIter *child = poppler_structure_element_iter_get_child (iter);
+ * if (child)
+ * walk_structure (child);
+ * poppler_structure_element_iter_free (child);
+ * } while (poppler_structure_element_iter_next (iter));
+ * }
+ * ...
+ * {
+ * iter = poppler_structure_element_iter_new (document);
+ * walk_structure (iter);
+ * poppler_structure_element_iter_free (iter);
+ * }
+ * </programlisting></informalexample>
+ *
+ * Return value: (transfer full): a new #PopplerStructureElementIter, or %NULL if document
+ * doesn't have structure tree.
+ *
+ * Since: 0.26
+ */
+PopplerStructureElementIter *
+poppler_structure_element_iter_new (PopplerDocument *poppler_document)
+{
+ PopplerStructureElementIter *iter;
+ StructTreeRoot *root;
+
+ g_return_val_if_fail (POPPLER_IS_DOCUMENT (poppler_document), NULL);
+
+ root = poppler_document->doc->getStructTreeRoot ();
+ if (root == NULL)
+ return NULL;
+
+ if (root->getNumElements () == 0)
+ return NULL;
+
+ iter = g_slice_new0 (PopplerStructureElementIter);
+ iter->document = (PopplerDocument *) g_object_ref (poppler_document);
+ iter->is_root = TRUE;
+ iter->root = root;
+
+ return iter;
+}
+
+/**
+ * poppler_structure_element_iter_next:
+ * @iter: a #PopplerStructureElementIter
+ *
+ * Sets @iter to point to the next structure element at the current level
+ * of the tree, if valid. See poppler_structure_element_iter_new() for more
+ * information.
+ *
+ * Return value: %TRUE, if @iter was set to the next structure element
+ *
+ * Since: 0.26
+ */
+gboolean
+poppler_structure_element_iter_next (PopplerStructureElementIter *iter)
+{
+ unsigned elements;
+
+ g_return_val_if_fail (iter != NULL, FALSE);
+
+ elements = iter->is_root
+ ? iter->root->getNumElements ()
+ : iter->elem->getNumElements ();
+
+ return ++iter->index < elements;
+}
+
+/**
+ * poppler_structure_element_iter_get_element:
+ * @iter: a #PopplerStructureElementIter
+ *
+ * Returns the #PopplerStructureElementIter associated with @iter.
+ *
+ * Return value: (transfer full): a new #PopplerStructureElementIter
+ *
+ * Since: 0.26
+ */
+PopplerStructureElement *
+poppler_structure_element_iter_get_element (PopplerStructureElementIter *iter)
+{
+ StructElement *elem;
+
+ g_return_val_if_fail (iter != NULL, NULL);
+
+ elem = iter->is_root
+ ? iter->root->getElement (iter->index)
+ : iter->elem->getElement (iter->index);
+
+ return _poppler_structure_element_new (iter->document, elem);
+}
+
+/**
+ * poppler_structure_element_iter_get_child:
+ * @parent: a #PopplerStructureElementIter
+ *
+ * Returns a new iterator to the children elements of the
+ * #PopplerStructureElement associated with @iter. The returned value must
+ * be freed with poppler_structure_element_iter_free().
+ *
+ * Return value: a new #PopplerStructureElementIter
+ *
+ * Since: 0.26
+ */
+PopplerStructureElementIter *
+poppler_structure_element_iter_get_child (PopplerStructureElementIter *parent)
+{
+ StructElement *elem;
+
+ g_return_val_if_fail (parent != NULL, NULL);
+
+ elem = parent->is_root
+ ? parent->root->getElement (parent->index)
+ : parent->elem->getElement (parent->index);
+
+ if (elem->getNumElements () > 0)
+ {
+ PopplerStructureElementIter *child = g_slice_new0 (PopplerStructureElementIter);
+ child->document = (PopplerDocument *) g_object_ref (parent->document);
+ child->elem = elem;
+ return child;
+ }
+
+ return NULL;
+}
diff --git a/glib/poppler-structure-element.h b/glib/poppler-structure-element.h
new file mode 100644
index 0000000..a5ac04b
--- /dev/null
+++ b/glib/poppler-structure-element.h
@@ -0,0 +1,112 @@
+/* poppler-structure-element.h: glib interface to poppler
+ *
+ * Copyright (C) 2013 Igalia S.L.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+
+#ifndef __POPPLER_STRUCTURE_ELEMENT_H__
+#define __POPPLER_STRUCTURE_ELEMENT_H__
+
+#include <glib-object.h>
+#include "poppler.h"
+
+G_BEGIN_DECLS
+
+#define POPPLER_TYPE_STRUCTURE_ELEMENT (poppler_structure_element_get_type ())
+#define POPPLER_STRUCTURE_ELEMENT(obj) (G_TYPE_CHECK_INSTANCE_CAST ((obj), POPPLER_TYPE_STRUCTURE_ELEMENT, PopplerStructureElement))
+#define POPPLER_IS_STRUCTURE_ELEMENT(obj) (G_TYPE_CHECK_INSTANCE_TYPE ((obj), POPPLER_TYPE_STRUCTURE_ELEMENT))
+
+/**
+ * PopplerStructureElementKind:
+ */
+typedef enum {
+ POPPLER_STRUCTURE_ELEMENT_UNKNOWN,
+ POPPLER_STRUCTURE_ELEMENT_CONTENT,
+ POPPLER_STRUCTURE_ELEMENT_OBJECT_REFERENCE,
+ POPPLER_STRUCTURE_ELEMENT_DOCUMENT,
+ POPPLER_STRUCTURE_ELEMENT_PART,
+ POPPLER_STRUCTURE_ELEMENT_ARTICLE,
+ POPPLER_STRUCTURE_ELEMENT_SECTION,
+ POPPLER_STRUCTURE_ELEMENT_DIV,
+ POPPLER_STRUCTURE_ELEMENT_SPAN,
+ POPPLER_STRUCTURE_ELEMENT_QUOTE,
+ POPPLER_STRUCTURE_ELEMENT_NOTE,
+ POPPLER_STRUCTURE_ELEMENT_REFERENCE,
+ POPPLER_STRUCTURE_ELEMENT_BIBENTRY,
+ POPPLER_STRUCTURE_ELEMENT_CODE,
+ POPPLER_STRUCTURE_ELEMENT_LINK,
+ POPPLER_STRUCTURE_ELEMENT_ANNOT,
+ POPPLER_STRUCTURE_ELEMENT_RUBY,
+ POPPLER_STRUCTURE_ELEMENT_WARICHU,
+ POPPLER_STRUCTURE_ELEMENT_BLOCKQUOTE,
+ POPPLER_STRUCTURE_ELEMENT_CAPTION,
+ POPPLER_STRUCTURE_ELEMENT_NONSTRUCT,
+ POPPLER_STRUCTURE_ELEMENT_TOC,
+ POPPLER_STRUCTURE_ELEMENT_TOC_ITEM,
+ POPPLER_STRUCTURE_ELEMENT_INDEX,
+ POPPLER_STRUCTURE_ELEMENT_PRIVATE,
+ POPPLER_STRUCTURE_ELEMENT_PARAGRAPH,
+ POPPLER_STRUCTURE_ELEMENT_HEADING,
+ POPPLER_STRUCTURE_ELEMENT_HEADING_1,
+ POPPLER_STRUCTURE_ELEMENT_HEADING_2,
+ POPPLER_STRUCTURE_ELEMENT_HEADING_3,
+ POPPLER_STRUCTURE_ELEMENT_HEADING_4,
+ POPPLER_STRUCTURE_ELEMENT_HEADING_5,
+ POPPLER_STRUCTURE_ELEMENT_HEADING_6,
+ POPPLER_STRUCTURE_ELEMENT_LIST,
+ POPPLER_STRUCTURE_ELEMENT_LIST_ITEM,
+ POPPLER_STRUCTURE_ELEMENT_LIST_LABEL,
+ POPPLER_STRUCTURE_ELEMENT_LIST_BODY,
+ POPPLER_STRUCTURE_ELEMENT_TABLE,
+ POPPLER_STRUCTURE_ELEMENT_TABLE_ROW,
+ POPPLER_STRUCTURE_ELEMENT_TABLE_HEADING,
+ POPPLER_STRUCTURE_ELEMENT_TABLE_DATA,
+ POPPLER_STRUCTURE_ELEMENT_TABLE_HEADER,
+ POPPLER_STRUCTURE_ELEMENT_TABLE_FOOTER,
+ POPPLER_STRUCTURE_ELEMENT_TABLE_BODY,
+ POPPLER_STRUCTURE_ELEMENT_FIGURE,
+ POPPLER_STRUCTURE_ELEMENT_FORMULA,
+ POPPLER_STRUCTURE_ELEMENT_FORM,
+} PopplerStructureElementKind;
+
+
+GType poppler_structure_element_get_type (void) G_GNUC_CONST;
+PopplerStructureElementKind poppler_structure_element_get_kind (PopplerStructureElement *poppler_structure_element);
+gint poppler_structure_element_get_page (PopplerStructureElement *poppler_structure_element);
+gboolean poppler_structure_element_is_content (PopplerStructureElement *poppler_structure_element);
+gboolean poppler_structure_element_is_inline (PopplerStructureElement *poppler_structure_element);
+gboolean poppler_structure_element_is_block (PopplerStructureElement *poppler_structure_element);
+gchar *poppler_structure_element_get_id (PopplerStructureElement *poppler_structure_element);
+gchar *poppler_structure_element_get_title (PopplerStructureElement *poppler_structure_element);
+gchar *poppler_structure_element_get_abbreviation (PopplerStructureElement *poppler_structure_element);
+gchar *poppler_structure_element_get_language (PopplerStructureElement *poppler_structure_element);
+gchar *poppler_structure_element_get_text (PopplerStructureElement *poppler_structure_element,
+ gboolean recursive);
+gchar *poppler_structure_element_get_alt_text (PopplerStructureElement *poppler_structure_element);
+gchar *poppler_structure_element_get_actual_text (PopplerStructureElement *poppler_structure_element);
+
+#define POPPLER_TYPE_STRUCTURE_ELEMENT_ITER (poppler_structure_element_iter_get_type ())
+GType poppler_structure_element_iter_get_type (void) G_GNUC_CONST;
+PopplerStructureElementIter *poppler_structure_element_iter_new (PopplerDocument *poppler_document);
+PopplerStructureElementIter *poppler_structure_element_iter_get_child (PopplerStructureElementIter *parent);
+PopplerStructureElementIter *poppler_structure_element_iter_copy (PopplerStructureElementIter *iter);
+PopplerStructureElement *poppler_structure_element_iter_get_element (PopplerStructureElementIter *iter);
+gboolean poppler_structure_element_iter_next (PopplerStructureElementIter *iter);
+void poppler_structure_element_iter_free (PopplerStructureElementIter *iter);
+
+G_END_DECLS
+
+#endif /* !__POPPLER_STRUCTURE_ELEMENT_H__ */
diff --git a/glib/poppler.h b/glib/poppler.h
index 0db97d0..1da2af1 100644
--- a/glib/poppler.h
+++ b/glib/poppler.h
@@ -208,6 +208,8 @@ typedef struct _PopplerAnnotLine PopplerAnnotLine;
typedef struct _PopplerAnnotCircle PopplerAnnotCircle;
typedef struct _PopplerAnnotSquare PopplerAnnotSquare;
typedef struct _PopplerQuadrilateral PopplerQuadrilateral;
+typedef struct _PopplerStructureElement PopplerStructureElement;
+typedef struct _PopplerStructureElementIter PopplerStructureElementIter;
typedef enum
{
@@ -233,5 +235,6 @@ G_END_DECLS
#include "poppler-date.h"
#include "poppler-movie.h"
#include "poppler-media.h"
+#include "poppler-structure-element.h"
#endif /* __POPPLER_GLIB_H__ */
diff --git a/glib/reference/poppler-docs.sgml b/glib/reference/poppler-docs.sgml
index a9d5158..b817a0d 100644
--- a/glib/reference/poppler-docs.sgml
+++ b/glib/reference/poppler-docs.sgml
@@ -23,6 +23,8 @@
<xi:include href="xml/poppler-layer.xml"/>
<xi:include href="xml/poppler-media.xml"/>
<xi:include href="xml/poppler-movie.xml"/>
+ <xi:include href="xml/poppler-structure.xml"/>
+ <xi:include href="xml/poppler-structure-element.xml"/>
<xi:include href="xml/poppler-features.xml"/>
</chapter>
diff --git a/glib/reference/poppler-sections.txt b/glib/reference/poppler-sections.txt
index a954f64..fff370b 100644
--- a/glib/reference/poppler-sections.txt
+++ b/glib/reference/poppler-sections.txt
@@ -585,6 +585,43 @@ poppler_movie_get_type
</SECTION>
<SECTION>
+<FILE>poppler-structure-element</FILE>
+<TITLE>PopplerStructureElement</TITLE>
+PopplerStructureElement
+PopplerStructureElementKind
+PopplerStructureElementIter
+poppler_structure_element_iter_new
+poppler_structure_element_iter_next
+poppler_structure_element_iter_copy
+poppler_structure_element_iter_free
+poppler_structure_element_iter_get_child
+poppler_structure_element_iter_get_element
+poppler_structure_element_get_kind
+poppler_structure_element_get_page
+poppler_structure_element_is_content
+poppler_structure_element_is_inline
+poppler_structure_element_is_block
+poppler_structure_element_get_id
+poppler_structure_element_get_title
+poppler_structure_element_get_abbreviation
+poppler_structure_element_get_language
+poppler_structure_element_get_text
+poppler_structure_element_get_alt_text
+poppler_structure_element_get_actual_text
+
+<SUBSECTION Standard>
+POPPLER_STRUCTURE_ELEMENT
+POPPLER_IS_STRUCTURE_ELEMENT
+POPPLER_TYPE_STRUCTURE_ELEMENT
+POPPLER_TYPE_STRUCTURE_ELEMENT_ITER
+POPPLER_TYPE_STRUCTURE_ELEMENT_KIND
+
+<SUBSECTION Private>
+poppler_structure_element_get_type
+poppler_structure_element_iter_get_type
+</SECTION>
+
+<SECTION>
<FILE>poppler-features</FILE>
POPPLER_HAS_CAIRO
POPPLER_MAJOR_VERSION
diff --git a/glib/reference/poppler.types b/glib/reference/poppler.types
index eed9849..388852a 100644
--- a/glib/reference/poppler.types
+++ b/glib/reference/poppler.types
@@ -8,3 +8,5 @@ poppler_annot_get_type
poppler_layer_get_type
poppler_media_get_type
poppler_movie_get_type
+poppler_structure_element_get_type
+poppler_structure_element_iter_get_type
More information about the poppler
mailing list