[poppler] 2 commits - glib/demo glib/poppler-page.cc glib/poppler-page.h

Carlos Garcia Campos carlosgc at kemper.freedesktop.org
Wed Jun 16 02:56:15 PDT 2010


 glib/demo/text.c     |  182 +++++++++++++++++++++++++++++++++++++++++++++------
 glib/poppler-page.cc |   91 +++++++++++++++++++++++++
 glib/poppler-page.h  |    4 -
 3 files changed, 258 insertions(+), 19 deletions(-)

New commits:
commit 35e87d2062b1d82db0d765de5a6187122a0fa99c
Author: Carlos Garcia Campos <carlosgc at gnome.org>
Date:   Wed Jun 16 11:52:25 2010 +0200

    [gib-demo] Add demo for poppler_page_get_text_layout()

diff --git a/glib/demo/text.c b/glib/demo/text.c
index e119082..b7a5c91 100644
--- a/glib/demo/text.c
+++ b/glib/demo/text.c
@@ -20,11 +20,22 @@
 
 #include "text.h"
 
+enum {
+	TEXT_X1_COLUMN,
+	TEXT_Y1_COLUMN,
+	TEXT_X2_COLUMN,
+	TEXT_Y2_COLUMN,
+	TEXT_OFFSET_COLUMN,
+	TEXT_OFFPTR_COLUMN,
+	N_COLUMNS
+};
+
 typedef struct {
 	PopplerDocument *doc;
 
 	GtkWidget       *timer_label;
 	GtkTextBuffer   *buffer;
+	GtkListStore    *model;
 
 	gint             page;
 } PgdTextDemo;
@@ -45,6 +56,11 @@ pgd_text_free (PgdTextDemo *demo)
 		demo->buffer = NULL;
 	}
 
+	if (demo->model) {
+		g_object_unref (demo->model);
+		demo->model = NULL;
+	}
+
 	g_free (demo);
 }
 
@@ -52,16 +68,21 @@ static void
 pgd_text_get_text (GtkWidget   *button,
 		   PgdTextDemo *demo)
 {
-	PopplerPage     *page;
-	PopplerRectangle rect;
-	gdouble          width, height;
-	gchar           *text;
-	GTimer          *timer;
+	PopplerPage      *page;
+	PopplerRectangle  rect;
+	PopplerRectangle *recs = NULL;
+	guint             n_recs;
+	gdouble           width, height;
+	gchar            *text;
+	GTimer           *timer;
+	gint              i;
 
 	page = poppler_document_get_page (demo->doc, demo->page);
 	if (!page)
 		return;
 
+	gtk_list_store_clear (demo->model);
+
 	poppler_page_get_size (page, &width, &height);
 	rect.x1 = rect.y1 = 0;
 	rect.x2 = width;
@@ -72,10 +93,17 @@ pgd_text_get_text (GtkWidget   *button,
 	g_timer_stop (timer);
 
 	if (text) {
-		gchar *str;
+		gchar  *str;
+		gdouble text_elapsed;
 
-		str = g_strdup_printf ("<i>got text in %.4f seconds</i>",
-				       g_timer_elapsed (timer, NULL));
+		text_elapsed = g_timer_elapsed (timer, NULL);
+
+		g_timer_start (timer);
+		poppler_page_get_text_layout (page, &recs, &n_recs);
+		g_timer_stop (timer);
+
+		str = g_strdup_printf ("<i>got text in %.4f seconds, text layout in %.4f seconds</i>",
+				       text_elapsed, g_timer_elapsed (timer, NULL));
 		gtk_label_set_markup (GTK_LABEL (demo->timer_label), str);
 		g_free (str);
 	} else {
@@ -89,8 +117,62 @@ pgd_text_get_text (GtkWidget   *button,
 		gtk_text_buffer_set_text (demo->buffer, text, strlen (text));
 		g_free (text);
 	}
+
+	for (i = 0; i < n_recs; i++) {
+		GtkTreeIter iter;
+		gchar      *x1, *y1, *x2, *y2;
+		gchar      *offset;
+
+		x1 = g_strdup_printf ("%.2f", recs[i].x1);
+		y1 = g_strdup_printf ("%.2f", recs[i].y1);
+		x2 = g_strdup_printf ("%.2f", recs[i].x2);
+		y2 = g_strdup_printf ("%.2f", recs[i].y2);
+
+		offset = g_strdup_printf ("%d", i);
+
+		gtk_list_store_append (demo->model, &iter);
+		gtk_list_store_set (demo->model, &iter,
+				    TEXT_X1_COLUMN, x1,
+				    TEXT_Y1_COLUMN, y1,
+				    TEXT_X2_COLUMN, x2,
+				    TEXT_Y2_COLUMN, y2,
+				    TEXT_OFFSET_COLUMN, offset,
+				    TEXT_OFFPTR_COLUMN, GINT_TO_POINTER (i),
+				    -1);
+
+		g_free (x1);
+		g_free (y1);
+		g_free (x2);
+		g_free (y2);
+		g_free (offset);
+	}
+
+	g_free (recs);
+}
+
+static void
+pgd_text_selection_changed (GtkTreeSelection *treeselection,
+			    PgdTextDemo      *demo)
+{
+	GtkTreeModel *model;
+	GtkTreeIter   iter;
+
+	if (gtk_tree_selection_get_selected (treeselection, &model, &iter)) {
+		gpointer    offset;
+		GtkTextIter begin_iter, end_iter;
+
+		gtk_tree_model_get (model, &iter,
+				    TEXT_OFFPTR_COLUMN, &offset,
+				    -1);
+
+		gtk_text_buffer_get_iter_at_offset (demo->buffer, &begin_iter, GPOINTER_TO_INT (offset));
+		end_iter = begin_iter;
+		gtk_text_iter_forward_char (&end_iter);
+		gtk_text_buffer_select_range (demo->buffer, &begin_iter, &end_iter);
+	}
 }
 
+
 static void
 pgd_text_page_selector_value_changed (GtkSpinButton *spinbutton,
 				      PgdTextDemo   *demo)
@@ -101,14 +183,17 @@ pgd_text_page_selector_value_changed (GtkSpinButton *spinbutton,
 GtkWidget *
 pgd_text_create_widget (PopplerDocument *document)
 {
-	PgdTextDemo *demo;
-	GtkWidget   *label;
-	GtkWidget   *vbox;
-	GtkWidget   *hbox, *page_selector;
-	GtkWidget   *button;
-	GtkWidget   *swindow, *textview;
-	gchar       *str;
-	gint         n_pages;
+	PgdTextDemo      *demo;
+	GtkWidget        *label;
+	GtkWidget        *vbox;
+	GtkWidget        *hbox, *page_selector;
+	GtkWidget        *button;
+	GtkWidget        *swindow, *textview, *treeview;
+	GtkTreeSelection *selection;
+	GtkWidget        *hpaned;
+	GtkCellRenderer  *renderer;
+	gchar            *str;
+	gint              n_pages;
 
 	demo = g_new0 (PgdTextDemo, 1);
 
@@ -153,20 +238,81 @@ pgd_text_create_widget (PopplerDocument *document)
 	gtk_box_pack_start (GTK_BOX (vbox), demo->timer_label, FALSE, TRUE, 0);
 	gtk_widget_show (demo->timer_label);
 
+	hpaned = gtk_hpaned_new ();
+	gtk_paned_set_position (GTK_PANED (hpaned), 300);
+
+	swindow = gtk_scrolled_window_new (NULL, NULL);
+	gtk_scrolled_window_set_policy (GTK_SCROLLED_WINDOW (swindow),
+					GTK_POLICY_AUTOMATIC,
+					GTK_POLICY_AUTOMATIC);
+
+	demo->model = gtk_list_store_new (N_COLUMNS,
+					  G_TYPE_STRING,
+					  G_TYPE_STRING, G_TYPE_STRING,
+					  G_TYPE_STRING, G_TYPE_STRING,
+					  G_TYPE_POINTER);
+	treeview = gtk_tree_view_new_with_model (GTK_TREE_MODEL (demo->model));
+
+	renderer = gtk_cell_renderer_text_new ();
+	gtk_tree_view_insert_column_with_attributes (GTK_TREE_VIEW (treeview),
+						     TEXT_X1_COLUMN, "X1",
+						     renderer,
+						     "text", TEXT_X1_COLUMN,
+						     NULL);
+	renderer = gtk_cell_renderer_text_new ();
+	gtk_tree_view_insert_column_with_attributes (GTK_TREE_VIEW (treeview),
+						     TEXT_Y1_COLUMN, "Y1",
+						     renderer,
+						     "text", TEXT_Y1_COLUMN,
+						     NULL);
+	renderer = gtk_cell_renderer_text_new ();
+	gtk_tree_view_insert_column_with_attributes (GTK_TREE_VIEW (treeview),
+						     TEXT_X2_COLUMN, "X2",
+						     renderer,
+						     "text", TEXT_X2_COLUMN,
+						     NULL);
+	renderer = gtk_cell_renderer_text_new ();
+	gtk_tree_view_insert_column_with_attributes (GTK_TREE_VIEW (treeview),
+						     TEXT_Y2_COLUMN, "Y2",
+						     renderer,
+						     "text", TEXT_Y2_COLUMN,
+						     NULL);
+
+	renderer = gtk_cell_renderer_text_new ();
+	gtk_tree_view_insert_column_with_attributes (GTK_TREE_VIEW (treeview),
+						     TEXT_OFFSET_COLUMN, "Offset",
+						     renderer,
+						     "text", TEXT_OFFSET_COLUMN,
+						     NULL);
+
+	selection = gtk_tree_view_get_selection (GTK_TREE_VIEW (treeview));
+	g_signal_connect (selection, "changed",
+			  G_CALLBACK (pgd_text_selection_changed),
+			  (gpointer) demo);
+
+	gtk_container_add (GTK_CONTAINER (swindow), treeview);
+	gtk_widget_show (treeview);
+
+	gtk_paned_add1 (GTK_PANED (hpaned), swindow);
+	gtk_widget_show (swindow);
+
 	swindow = gtk_scrolled_window_new (NULL, NULL);
 	gtk_scrolled_window_set_policy (GTK_SCROLLED_WINDOW (swindow),
 					GTK_POLICY_AUTOMATIC,
 					GTK_POLICY_AUTOMATIC);
-	
+
 	demo->buffer = gtk_text_buffer_new (NULL);
 	textview = gtk_text_view_new_with_buffer (demo->buffer);
 
 	gtk_container_add (GTK_CONTAINER (swindow), textview);
 	gtk_widget_show (textview);
 
-	gtk_box_pack_start (GTK_BOX (vbox), swindow, TRUE, TRUE, 0);
+	gtk_paned_add2 (GTK_PANED (hpaned), swindow);
 	gtk_widget_show (swindow);
 
+	gtk_box_pack_start (GTK_BOX (vbox), hpaned, TRUE, TRUE, 0);
+	gtk_widget_show (hpaned);
+
 	g_object_weak_ref (G_OBJECT (vbox),
 			   (GWeakNotify)pgd_text_free,
 			   demo);
commit ddcea568b3a7334e062d6214f43d0a2c2ec95be4
Author: Daniel Garcia <danigm at yaco.es>
Date:   Tue Jun 15 16:57:32 2010 +0200

    [glib] Add poppler_page_get_text_layout()
    
    Returns an array of PopplerRectangle items and each Rectangle is a
    text character position.
    
    The position in this array represent the offset in text returned by
    poppler_page_get_text

diff --git a/glib/poppler-page.cc b/glib/poppler-page.cc
index 19ea941..01d5540 100644
--- a/glib/poppler-page.cc
+++ b/glib/poppler-page.cc
@@ -1736,3 +1736,94 @@ poppler_page_get_crop_box (PopplerPage *page, PopplerRectangle *rect)
   rect->y2 = cropBox->y2;
 }
 
+/**
+ * poppler_page_get_text_layout:
+ * @page: A #PopplerPage
+ * @rectangles: return location for an array of #PopplerRectangle
+ * @n_rectangles: length of returned array
+ *
+ * Obtains the layout of the text as a list of #PopplerRectangle
+ * This array must be freed with g_free () when done.
+ *
+ * The position in the array represents an offset in the text returned by
+ * poppler_page_get_text
+ *
+ * Return value: %TRUE if the page contains text, %FALSE otherwise
+ **/
+gboolean
+poppler_page_get_text_layout (PopplerPage       *page,
+                              PopplerRectangle **rectangles,
+                              guint             *n_rectangles)
+{
+  TextPage *text;
+  TextWordList *wordlist;
+  TextWord *word, *nextword;
+  PopplerRectangle *rect;
+  int i, j, offset = 0;
+  gdouble x1, y1, x2, y2;
+  gdouble x3, y3, x4, y4;
+
+  g_return_val_if_fail (POPPLER_IS_PAGE (page), FALSE);
+
+  *n_rectangles = 0;
+
+  text = poppler_page_get_text_page (page);
+  wordlist = text->makeWordList (gFalse);
+
+  if (wordlist->getLength () <= 0)
+    return FALSE;
+
+  // Getting the array size
+  for (i = 0; i < wordlist->getLength (); i++)
+    {
+      word = wordlist->get (i);
+      *n_rectangles += word->getLength () + 1;
+    }
+
+  *rectangles = g_new (PopplerRectangle, *n_rectangles);
+
+  // Calculating each char position
+  for (i = 0; i < wordlist->getLength (); i++)
+    {
+      word = wordlist->get (i);
+      for (j = 0; j < word->getLength (); j++)
+        {
+          rect = *rectangles + offset;
+	  word->getCharBBox (j,
+			     &(rect->x1),
+			     &(rect->y1),
+			     &(rect->x2),
+			     &(rect->y2));
+	  offset++;
+	}
+
+      // adding spaces and break lines
+      rect = *rectangles + offset;
+      word->getBBox (&x1, &y1, &x2, &y2);
+
+      nextword = word->getNext ();
+      if (nextword)
+        {
+	  nextword->getBBox (&x3, &y3, &x4, &y4);
+	  // space is from one word to other and with the same height as
+	  // first word.
+	  rect->x1 = x2;
+	  rect->y1 = y1;
+	  rect->x2 = x3;
+	  rect->y2 = y2;
+	}
+      else
+        {
+	  // end of line
+	  rect->x1 = x2;
+	  rect->y1 = y2;
+	  rect->x2 = x2;
+	  rect->y2 = y2;
+	}
+      offset++;
+    }
+
+  delete wordlist;
+
+  return TRUE;
+}
diff --git a/glib/poppler-page.h b/glib/poppler-page.h
index 20dc20f..3a31acd 100644
--- a/glib/poppler-page.h
+++ b/glib/poppler-page.h
@@ -114,7 +114,9 @@ GList                 *poppler_page_get_annot_mapping    (PopplerPage        *pa
 void                   poppler_page_free_annot_mapping   (GList              *list);
 void 		      poppler_page_get_crop_box 	 (PopplerPage        *page,
 							  PopplerRectangle   *rect);
-
+gboolean               poppler_page_get_text_layout      (PopplerPage        *page,
+                                                          PopplerRectangle  **rectangles,
+                                                          guint              *n_rectangles);
 
 /* A rectangle on a page, with coordinates in PDF points. */
 #define POPPLER_TYPE_RECTANGLE             (poppler_rectangle_get_type ())


More information about the poppler mailing list