[poppler] 2 commits - glib/demo glib/poppler-page.cc glib/poppler-page.h
Carlos Garcia Campos
carlosgc at kemper.freedesktop.org
Wed Jun 16 02:56:15 PDT 2010
glib/demo/text.c | 182 +++++++++++++++++++++++++++++++++++++++++++++------
glib/poppler-page.cc | 91 +++++++++++++++++++++++++
glib/poppler-page.h | 4 -
3 files changed, 258 insertions(+), 19 deletions(-)
New commits:
commit 35e87d2062b1d82db0d765de5a6187122a0fa99c
Author: Carlos Garcia Campos <carlosgc at gnome.org>
Date: Wed Jun 16 11:52:25 2010 +0200
[gib-demo] Add demo for poppler_page_get_text_layout()
diff --git a/glib/demo/text.c b/glib/demo/text.c
index e119082..b7a5c91 100644
--- a/glib/demo/text.c
+++ b/glib/demo/text.c
@@ -20,11 +20,22 @@
#include "text.h"
+enum {
+ TEXT_X1_COLUMN,
+ TEXT_Y1_COLUMN,
+ TEXT_X2_COLUMN,
+ TEXT_Y2_COLUMN,
+ TEXT_OFFSET_COLUMN,
+ TEXT_OFFPTR_COLUMN,
+ N_COLUMNS
+};
+
typedef struct {
PopplerDocument *doc;
GtkWidget *timer_label;
GtkTextBuffer *buffer;
+ GtkListStore *model;
gint page;
} PgdTextDemo;
@@ -45,6 +56,11 @@ pgd_text_free (PgdTextDemo *demo)
demo->buffer = NULL;
}
+ if (demo->model) {
+ g_object_unref (demo->model);
+ demo->model = NULL;
+ }
+
g_free (demo);
}
@@ -52,16 +68,21 @@ static void
pgd_text_get_text (GtkWidget *button,
PgdTextDemo *demo)
{
- PopplerPage *page;
- PopplerRectangle rect;
- gdouble width, height;
- gchar *text;
- GTimer *timer;
+ PopplerPage *page;
+ PopplerRectangle rect;
+ PopplerRectangle *recs = NULL;
+ guint n_recs;
+ gdouble width, height;
+ gchar *text;
+ GTimer *timer;
+ gint i;
page = poppler_document_get_page (demo->doc, demo->page);
if (!page)
return;
+ gtk_list_store_clear (demo->model);
+
poppler_page_get_size (page, &width, &height);
rect.x1 = rect.y1 = 0;
rect.x2 = width;
@@ -72,10 +93,17 @@ pgd_text_get_text (GtkWidget *button,
g_timer_stop (timer);
if (text) {
- gchar *str;
+ gchar *str;
+ gdouble text_elapsed;
- str = g_strdup_printf ("<i>got text in %.4f seconds</i>",
- g_timer_elapsed (timer, NULL));
+ text_elapsed = g_timer_elapsed (timer, NULL);
+
+ g_timer_start (timer);
+ poppler_page_get_text_layout (page, &recs, &n_recs);
+ g_timer_stop (timer);
+
+ str = g_strdup_printf ("<i>got text in %.4f seconds, text layout in %.4f seconds</i>",
+ text_elapsed, g_timer_elapsed (timer, NULL));
gtk_label_set_markup (GTK_LABEL (demo->timer_label), str);
g_free (str);
} else {
@@ -89,8 +117,62 @@ pgd_text_get_text (GtkWidget *button,
gtk_text_buffer_set_text (demo->buffer, text, strlen (text));
g_free (text);
}
+
+ for (i = 0; i < n_recs; i++) {
+ GtkTreeIter iter;
+ gchar *x1, *y1, *x2, *y2;
+ gchar *offset;
+
+ x1 = g_strdup_printf ("%.2f", recs[i].x1);
+ y1 = g_strdup_printf ("%.2f", recs[i].y1);
+ x2 = g_strdup_printf ("%.2f", recs[i].x2);
+ y2 = g_strdup_printf ("%.2f", recs[i].y2);
+
+ offset = g_strdup_printf ("%d", i);
+
+ gtk_list_store_append (demo->model, &iter);
+ gtk_list_store_set (demo->model, &iter,
+ TEXT_X1_COLUMN, x1,
+ TEXT_Y1_COLUMN, y1,
+ TEXT_X2_COLUMN, x2,
+ TEXT_Y2_COLUMN, y2,
+ TEXT_OFFSET_COLUMN, offset,
+ TEXT_OFFPTR_COLUMN, GINT_TO_POINTER (i),
+ -1);
+
+ g_free (x1);
+ g_free (y1);
+ g_free (x2);
+ g_free (y2);
+ g_free (offset);
+ }
+
+ g_free (recs);
+}
+
+static void
+pgd_text_selection_changed (GtkTreeSelection *treeselection,
+ PgdTextDemo *demo)
+{
+ GtkTreeModel *model;
+ GtkTreeIter iter;
+
+ if (gtk_tree_selection_get_selected (treeselection, &model, &iter)) {
+ gpointer offset;
+ GtkTextIter begin_iter, end_iter;
+
+ gtk_tree_model_get (model, &iter,
+ TEXT_OFFPTR_COLUMN, &offset,
+ -1);
+
+ gtk_text_buffer_get_iter_at_offset (demo->buffer, &begin_iter, GPOINTER_TO_INT (offset));
+ end_iter = begin_iter;
+ gtk_text_iter_forward_char (&end_iter);
+ gtk_text_buffer_select_range (demo->buffer, &begin_iter, &end_iter);
+ }
}
+
static void
pgd_text_page_selector_value_changed (GtkSpinButton *spinbutton,
PgdTextDemo *demo)
@@ -101,14 +183,17 @@ pgd_text_page_selector_value_changed (GtkSpinButton *spinbutton,
GtkWidget *
pgd_text_create_widget (PopplerDocument *document)
{
- PgdTextDemo *demo;
- GtkWidget *label;
- GtkWidget *vbox;
- GtkWidget *hbox, *page_selector;
- GtkWidget *button;
- GtkWidget *swindow, *textview;
- gchar *str;
- gint n_pages;
+ PgdTextDemo *demo;
+ GtkWidget *label;
+ GtkWidget *vbox;
+ GtkWidget *hbox, *page_selector;
+ GtkWidget *button;
+ GtkWidget *swindow, *textview, *treeview;
+ GtkTreeSelection *selection;
+ GtkWidget *hpaned;
+ GtkCellRenderer *renderer;
+ gchar *str;
+ gint n_pages;
demo = g_new0 (PgdTextDemo, 1);
@@ -153,20 +238,81 @@ pgd_text_create_widget (PopplerDocument *document)
gtk_box_pack_start (GTK_BOX (vbox), demo->timer_label, FALSE, TRUE, 0);
gtk_widget_show (demo->timer_label);
+ hpaned = gtk_hpaned_new ();
+ gtk_paned_set_position (GTK_PANED (hpaned), 300);
+
+ swindow = gtk_scrolled_window_new (NULL, NULL);
+ gtk_scrolled_window_set_policy (GTK_SCROLLED_WINDOW (swindow),
+ GTK_POLICY_AUTOMATIC,
+ GTK_POLICY_AUTOMATIC);
+
+ demo->model = gtk_list_store_new (N_COLUMNS,
+ G_TYPE_STRING,
+ G_TYPE_STRING, G_TYPE_STRING,
+ G_TYPE_STRING, G_TYPE_STRING,
+ G_TYPE_POINTER);
+ treeview = gtk_tree_view_new_with_model (GTK_TREE_MODEL (demo->model));
+
+ renderer = gtk_cell_renderer_text_new ();
+ gtk_tree_view_insert_column_with_attributes (GTK_TREE_VIEW (treeview),
+ TEXT_X1_COLUMN, "X1",
+ renderer,
+ "text", TEXT_X1_COLUMN,
+ NULL);
+ renderer = gtk_cell_renderer_text_new ();
+ gtk_tree_view_insert_column_with_attributes (GTK_TREE_VIEW (treeview),
+ TEXT_Y1_COLUMN, "Y1",
+ renderer,
+ "text", TEXT_Y1_COLUMN,
+ NULL);
+ renderer = gtk_cell_renderer_text_new ();
+ gtk_tree_view_insert_column_with_attributes (GTK_TREE_VIEW (treeview),
+ TEXT_X2_COLUMN, "X2",
+ renderer,
+ "text", TEXT_X2_COLUMN,
+ NULL);
+ renderer = gtk_cell_renderer_text_new ();
+ gtk_tree_view_insert_column_with_attributes (GTK_TREE_VIEW (treeview),
+ TEXT_Y2_COLUMN, "Y2",
+ renderer,
+ "text", TEXT_Y2_COLUMN,
+ NULL);
+
+ renderer = gtk_cell_renderer_text_new ();
+ gtk_tree_view_insert_column_with_attributes (GTK_TREE_VIEW (treeview),
+ TEXT_OFFSET_COLUMN, "Offset",
+ renderer,
+ "text", TEXT_OFFSET_COLUMN,
+ NULL);
+
+ selection = gtk_tree_view_get_selection (GTK_TREE_VIEW (treeview));
+ g_signal_connect (selection, "changed",
+ G_CALLBACK (pgd_text_selection_changed),
+ (gpointer) demo);
+
+ gtk_container_add (GTK_CONTAINER (swindow), treeview);
+ gtk_widget_show (treeview);
+
+ gtk_paned_add1 (GTK_PANED (hpaned), swindow);
+ gtk_widget_show (swindow);
+
swindow = gtk_scrolled_window_new (NULL, NULL);
gtk_scrolled_window_set_policy (GTK_SCROLLED_WINDOW (swindow),
GTK_POLICY_AUTOMATIC,
GTK_POLICY_AUTOMATIC);
-
+
demo->buffer = gtk_text_buffer_new (NULL);
textview = gtk_text_view_new_with_buffer (demo->buffer);
gtk_container_add (GTK_CONTAINER (swindow), textview);
gtk_widget_show (textview);
- gtk_box_pack_start (GTK_BOX (vbox), swindow, TRUE, TRUE, 0);
+ gtk_paned_add2 (GTK_PANED (hpaned), swindow);
gtk_widget_show (swindow);
+ gtk_box_pack_start (GTK_BOX (vbox), hpaned, TRUE, TRUE, 0);
+ gtk_widget_show (hpaned);
+
g_object_weak_ref (G_OBJECT (vbox),
(GWeakNotify)pgd_text_free,
demo);
commit ddcea568b3a7334e062d6214f43d0a2c2ec95be4
Author: Daniel Garcia <danigm at yaco.es>
Date: Tue Jun 15 16:57:32 2010 +0200
[glib] Add poppler_page_get_text_layout()
Returns an array of PopplerRectangle items and each Rectangle is a
text character position.
The position in this array represent the offset in text returned by
poppler_page_get_text
diff --git a/glib/poppler-page.cc b/glib/poppler-page.cc
index 19ea941..01d5540 100644
--- a/glib/poppler-page.cc
+++ b/glib/poppler-page.cc
@@ -1736,3 +1736,94 @@ poppler_page_get_crop_box (PopplerPage *page, PopplerRectangle *rect)
rect->y2 = cropBox->y2;
}
+/**
+ * poppler_page_get_text_layout:
+ * @page: A #PopplerPage
+ * @rectangles: return location for an array of #PopplerRectangle
+ * @n_rectangles: length of returned array
+ *
+ * Obtains the layout of the text as a list of #PopplerRectangle
+ * This array must be freed with g_free () when done.
+ *
+ * The position in the array represents an offset in the text returned by
+ * poppler_page_get_text
+ *
+ * Return value: %TRUE if the page contains text, %FALSE otherwise
+ **/
+gboolean
+poppler_page_get_text_layout (PopplerPage *page,
+ PopplerRectangle **rectangles,
+ guint *n_rectangles)
+{
+ TextPage *text;
+ TextWordList *wordlist;
+ TextWord *word, *nextword;
+ PopplerRectangle *rect;
+ int i, j, offset = 0;
+ gdouble x1, y1, x2, y2;
+ gdouble x3, y3, x4, y4;
+
+ g_return_val_if_fail (POPPLER_IS_PAGE (page), FALSE);
+
+ *n_rectangles = 0;
+
+ text = poppler_page_get_text_page (page);
+ wordlist = text->makeWordList (gFalse);
+
+ if (wordlist->getLength () <= 0)
+ return FALSE;
+
+ // Getting the array size
+ for (i = 0; i < wordlist->getLength (); i++)
+ {
+ word = wordlist->get (i);
+ *n_rectangles += word->getLength () + 1;
+ }
+
+ *rectangles = g_new (PopplerRectangle, *n_rectangles);
+
+ // Calculating each char position
+ for (i = 0; i < wordlist->getLength (); i++)
+ {
+ word = wordlist->get (i);
+ for (j = 0; j < word->getLength (); j++)
+ {
+ rect = *rectangles + offset;
+ word->getCharBBox (j,
+ &(rect->x1),
+ &(rect->y1),
+ &(rect->x2),
+ &(rect->y2));
+ offset++;
+ }
+
+ // adding spaces and break lines
+ rect = *rectangles + offset;
+ word->getBBox (&x1, &y1, &x2, &y2);
+
+ nextword = word->getNext ();
+ if (nextword)
+ {
+ nextword->getBBox (&x3, &y3, &x4, &y4);
+ // space is from one word to other and with the same height as
+ // first word.
+ rect->x1 = x2;
+ rect->y1 = y1;
+ rect->x2 = x3;
+ rect->y2 = y2;
+ }
+ else
+ {
+ // end of line
+ rect->x1 = x2;
+ rect->y1 = y2;
+ rect->x2 = x2;
+ rect->y2 = y2;
+ }
+ offset++;
+ }
+
+ delete wordlist;
+
+ return TRUE;
+}
diff --git a/glib/poppler-page.h b/glib/poppler-page.h
index 20dc20f..3a31acd 100644
--- a/glib/poppler-page.h
+++ b/glib/poppler-page.h
@@ -114,7 +114,9 @@ GList *poppler_page_get_annot_mapping (PopplerPage *pa
void poppler_page_free_annot_mapping (GList *list);
void poppler_page_get_crop_box (PopplerPage *page,
PopplerRectangle *rect);
-
+gboolean poppler_page_get_text_layout (PopplerPage *page,
+ PopplerRectangle **rectangles,
+ guint *n_rectangles);
/* A rectangle on a page, with coordinates in PDF points. */
#define POPPLER_TYPE_RECTANGLE (poppler_rectangle_get_type ())
More information about the poppler
mailing list