[poppler] 3 commits - glib/demo glib/poppler-page.cc glib/poppler-page.h glib/reference poppler/TextOutputDev.cc poppler/TextOutputDev.h
Carlos Garcia Campos
carlosgc at kemper.freedesktop.org
Mon Nov 25 00:10:31 PST 2013
glib/demo/text.c | 114 ++++++++++++++++++++++++++++++-
glib/poppler-page.cc | 129 ++++++++++++++++++++++++++++++++----
glib/poppler-page.h | 8 ++
glib/reference/poppler-sections.txt | 3
poppler/TextOutputDev.cc | 40 ++---------
poppler/TextOutputDev.h | 20 +++++
6 files changed, 265 insertions(+), 49 deletions(-)
New commits:
commit f20fe89d4f5a8f768e2019f25cecf40cd0e4f5f8
Author: Carlos Garcia Campos <carlosgc at gnome.org>
Date: Mon Nov 25 09:05:41 2013 +0100
glib-demo: Add an area selector to text demo
And use the for_area variants of the API to get the text, text layout
and text attributes.
diff --git a/glib/demo/text.c b/glib/demo/text.c
index af8428e..82293d5 100644
--- a/glib/demo/text.c
+++ b/glib/demo/text.c
@@ -38,6 +38,10 @@ typedef struct {
GtkTextBuffer *buffer;
GtkWidget *treeview;
GtkListStore *model;
+ GtkWidget *area_x1;
+ GtkWidget *area_y1;
+ GtkWidget *area_x2;
+ GtkWidget *area_y2;
/* Text attributes */
GList *text_attrs;
@@ -47,6 +51,7 @@ typedef struct {
GtkWidget *text_color;
gint page;
+ PopplerRectangle area;
} PgdTextDemo;
static void
@@ -99,7 +104,7 @@ pgd_text_get_text (GtkWidget *button,
demo->text_attrs = NULL;
timer = g_timer_new ();
- text = poppler_page_get_text (page);
+ text = poppler_page_get_text_for_area (page, &demo->area);
g_timer_stop (timer);
if (text) {
@@ -109,13 +114,13 @@ pgd_text_get_text (GtkWidget *button,
text_elapsed = g_timer_elapsed (timer, NULL);
g_timer_start (timer);
- poppler_page_get_text_layout (page, &recs, &n_recs);
+ poppler_page_get_text_layout_for_area (page, &demo->area, &recs, &n_recs);
g_timer_stop (timer);
layout_elapsed = g_timer_elapsed (timer, NULL);
g_timer_start (timer);
- demo->text_attrs = poppler_page_get_text_attributes (page);
+ demo->text_attrs = poppler_page_get_text_attributes_for_area (page, &demo->area);
g_timer_stop (timer);
str = g_strdup_printf ("<i>got text in %.4f seconds, text layout in %.4f seconds, text attrs in %.4f seconds</i>",
@@ -273,6 +278,40 @@ pgd_text_view_query_tooltip (GtkTextView *textview,
}
}
+static void
+pgd_text_area_selector_setup (PgdTextDemo *demo)
+{
+ PopplerPage *page;
+ gdouble width, height;
+
+ page = poppler_document_get_page (demo->doc, demo->page);
+ if (!page)
+ return;
+
+ poppler_page_get_size (page, &width, &height);
+
+ gtk_spin_button_set_range (GTK_SPIN_BUTTON (demo->area_x1), -10, width - 10);
+ gtk_spin_button_set_range (GTK_SPIN_BUTTON (demo->area_y1), -10, height - 10);
+ gtk_spin_button_set_range (GTK_SPIN_BUTTON (demo->area_x2), 0, width + 10);
+ gtk_spin_button_set_range (GTK_SPIN_BUTTON (demo->area_y2), 0, height + 10);
+
+ gtk_spin_button_set_value (GTK_SPIN_BUTTON (demo->area_x1), 0);
+ gtk_spin_button_set_value (GTK_SPIN_BUTTON (demo->area_y1), 0);
+ gtk_spin_button_set_value (GTK_SPIN_BUTTON (demo->area_x2), width);
+ gtk_spin_button_set_value (GTK_SPIN_BUTTON (demo->area_y2), height);
+
+ g_object_unref (page);
+}
+
+static void
+pgd_text_area_selector_value_changed (GtkSpinButton *spinbutton,
+ PgdTextDemo *demo)
+{
+ demo->area.x1 = gtk_spin_button_get_value (GTK_SPIN_BUTTON (demo->area_x1));
+ demo->area.y1 = gtk_spin_button_get_value (GTK_SPIN_BUTTON (demo->area_y1));
+ demo->area.x2 = gtk_spin_button_get_value (GTK_SPIN_BUTTON (demo->area_x2));
+ demo->area.y2 = gtk_spin_button_get_value (GTK_SPIN_BUTTON (demo->area_y2));
+}
static void
pgd_text_page_selector_value_changed (GtkSpinButton *spinbutton,
@@ -287,7 +326,7 @@ pgd_text_create_widget (PopplerDocument *document)
PgdTextDemo *demo;
GtkWidget *label;
GtkWidget *vbox, *vbox2;
- GtkWidget *hbox, *page_selector;
+ GtkWidget *hbox, *page_selector, *area_hbox;
GtkWidget *button;
GtkWidget *swindow, *textview, *treeview;
GtkTreeSelection *selection;
@@ -326,6 +365,73 @@ pgd_text_create_widget (PopplerDocument *document)
gtk_widget_show (label);
g_free (str);
+ gtk_box_pack_start (GTK_BOX (vbox), hbox, FALSE, TRUE, 0);
+ gtk_widget_show (hbox);
+
+ hbox = gtk_box_new (GTK_ORIENTATION_HORIZONTAL, 12);
+
+ area_hbox = gtk_box_new (GTK_ORIENTATION_HORIZONTAL, 6);
+ label = gtk_label_new ("X1:");
+ gtk_box_pack_start (GTK_BOX (area_hbox), label, TRUE, TRUE, 0);
+ gtk_widget_show (label);
+
+ demo->area_x1 = gtk_spin_button_new_with_range (0, 0, 0.01);
+ g_signal_connect (demo->area_x1, "value-changed",
+ G_CALLBACK (pgd_text_area_selector_value_changed),
+ demo);
+ gtk_box_pack_start (GTK_BOX (area_hbox), demo->area_x1, TRUE, TRUE, 0);
+ gtk_widget_show (demo->area_x1);
+
+ gtk_box_pack_start (GTK_BOX (hbox), area_hbox, FALSE, TRUE, 0);
+ gtk_widget_show (area_hbox);
+
+ area_hbox = gtk_box_new (GTK_ORIENTATION_HORIZONTAL, 6);
+ label = gtk_label_new ("Y1:");
+ gtk_box_pack_start (GTK_BOX (area_hbox), label, TRUE, TRUE, 0);
+ gtk_widget_show (label);
+
+ demo->area_y1 = gtk_spin_button_new_with_range (0, 0, 0.01);
+ g_signal_connect (demo->area_y1, "value-changed",
+ G_CALLBACK (pgd_text_area_selector_value_changed),
+ demo);
+ gtk_box_pack_start (GTK_BOX (area_hbox), demo->area_y1, TRUE, TRUE, 0);
+ gtk_widget_show (demo->area_y1);
+
+ gtk_box_pack_start (GTK_BOX (hbox), area_hbox, FALSE, TRUE, 0);
+ gtk_widget_show (area_hbox);
+
+ area_hbox = gtk_box_new (GTK_ORIENTATION_HORIZONTAL, 6);
+ label = gtk_label_new ("X2:");
+ gtk_box_pack_start (GTK_BOX (area_hbox), label, TRUE, TRUE, 0);
+ gtk_widget_show (label);
+
+ demo->area_x2 = gtk_spin_button_new_with_range (0, 0, 0.01);
+ g_signal_connect (demo->area_x2, "value-changed",
+ G_CALLBACK (pgd_text_area_selector_value_changed),
+ demo);
+ gtk_box_pack_start (GTK_BOX (area_hbox), demo->area_x2, TRUE, TRUE, 0);
+ gtk_widget_show (demo->area_x2);
+
+ gtk_box_pack_start (GTK_BOX (hbox), area_hbox, FALSE, TRUE, 0);
+ gtk_widget_show (area_hbox);
+
+ area_hbox = gtk_box_new (GTK_ORIENTATION_HORIZONTAL, 6);
+ label = gtk_label_new ("Y2:");
+ gtk_box_pack_start (GTK_BOX (area_hbox), label, TRUE, TRUE, 0);
+ gtk_widget_show (label);
+
+ demo->area_y2 = gtk_spin_button_new_with_range (0, 0, 0.01);
+ g_signal_connect (demo->area_y2, "value-changed",
+ G_CALLBACK (pgd_text_area_selector_value_changed),
+ demo);
+ gtk_box_pack_start (GTK_BOX (area_hbox), demo->area_y2, TRUE, TRUE, 0);
+ gtk_widget_show (demo->area_y2);
+
+ gtk_box_pack_start (GTK_BOX (hbox), area_hbox, FALSE, TRUE, 0);
+ gtk_widget_show (area_hbox);
+
+ pgd_text_area_selector_setup (demo);
+
button = gtk_button_new_with_label ("Get Text");
g_signal_connect (G_OBJECT (button), "clicked",
G_CALLBACK (pgd_text_get_text),
commit bb2b7fb491fb72f0ea024d80df89680ede3457b4
Author: Carlos Garcia Campos <carlosgc at gnome.org>
Date: Mon Nov 25 09:04:30 2013 +0100
glib: Add API to get text, text layout and text attributes for a given area
diff --git a/glib/poppler-page.cc b/glib/poppler-page.cc
index 248649d..bb32473 100644
--- a/glib/poppler-page.cc
+++ b/glib/poppler-page.cc
@@ -851,6 +851,28 @@ poppler_page_get_text (PopplerPage *page)
}
/**
+ * poppler_page_get_text_for_area:
+ * @page: a #PopplerPage
+ * @area: a #PopplerRectangle
+ *
+ * Retrieves the text of @page contained in @area.
+ *
+ * Return value: a pointer to the text as a string
+ *
+ * Since: 0.26
+ **/
+char *
+poppler_page_get_text_for_area (PopplerPage *page,
+ PopplerRectangle *area)
+{
+ g_return_val_if_fail (POPPLER_IS_PAGE (page), NULL);
+ g_return_val_if_fail (area != NULL, NULL);
+
+ return poppler_page_get_selected_text (page, POPPLER_SELECTION_GLYPH, area);
+}
+
+
+/**
* poppler_page_find_text_with_options:
* @page: a #PopplerPage
* @text: the text to search for (UTF-8 encoded)
@@ -2018,14 +2040,16 @@ poppler_page_get_crop_box (PopplerPage *page, PopplerRectangle *rect)
* poppler_page_get_text_layout:
* @page: A #PopplerPage
* @rectangles: (out) (array length=n_rectangles) (transfer container): return location for an array of #PopplerRectangle
- * @n_rectangles: (out) length of returned array
+ * @n_rectangles: (out): length of returned array
*
* Obtains the layout of the text as a list of #PopplerRectangle
- * This array must be freed with g_free () when done.
+ * This array must be freed with g_free() when done.
*
* The position in the array represents an offset in the text returned by
* poppler_page_get_text()
*
+ * See also poppler_page_get_text_layout_for_area().
+ *
* Return value: %TRUE if the page contains text, %FALSE otherwise
*
* Since: 0.16
@@ -2035,6 +2059,38 @@ poppler_page_get_text_layout (PopplerPage *page,
PopplerRectangle **rectangles,
guint *n_rectangles)
{
+ PopplerRectangle selection = {0, 0, 0, 0};
+
+ g_return_val_if_fail (POPPLER_IS_PAGE (page), FALSE);
+
+ poppler_page_get_size (page, &selection.x2, &selection.y2);
+
+ return poppler_page_get_text_layout_for_area (page, &selection, rectangles, n_rectangles);
+}
+
+/**
+ * poppler_page_get_text_layout_for_area:
+ * @page: A #PopplerPage
+ * @area: a #PopplerRectangle
+ * @rectangles: (out) (array length=n_rectangles) (transfer container): return location for an array of #PopplerRectangle
+ * @n_rectangles: (out): length of returned array
+ *
+ * Obtains the layout of the text contained in @area as a list of #PopplerRectangle
+ * This array must be freed with g_free() when done.
+ *
+ * The position in the array represents an offset in the text returned by
+ * poppler_page_get_text_for_area()
+ *
+ * Return value: %TRUE if the page contains text, %FALSE otherwise
+ *
+ * Since: 0.26
+ **/
+gboolean
+poppler_page_get_text_layout_for_area (PopplerPage *page,
+ PopplerRectangle *area,
+ PopplerRectangle **rectangles,
+ guint *n_rectangles)
+{
TextPage *text;
PopplerRectangle *rect;
PDFRectangle selection;
@@ -2047,10 +2103,15 @@ poppler_page_get_text_layout (PopplerPage *page,
int n_lines;
g_return_val_if_fail (POPPLER_IS_PAGE (page), FALSE);
+ g_return_val_if_fail (area != NULL, FALSE);
*n_rectangles = 0;
- poppler_page_get_size (page, &selection.x2, &selection.y2);
+ selection.x1 = area->x1;
+ selection.y1 = area->y1;
+ selection.x2 = area->x2;
+ selection.y2 = area->y2;
+
text = poppler_page_get_text_page (page);
word_list = text->getSelectionWords (&selection, selectionStyleGlyph, &n_lines);
if (!word_list)
@@ -2170,13 +2231,15 @@ word_text_attributes_equal (TextWord *a, gint ai, TextWord *b, gint bi)
* poppler_page_get_text_attributes:
* @page: A #PopplerPage
*
- * Obtains the attributes of the text as a GList of #PopplerTextAttributes.
+ * Obtains the attributes of the text as a #GList of #PopplerTextAttributes.
* This list must be freed with poppler_page_free_text_attributes() when done.
*
* Each list element is a #PopplerTextAttributes struct where start_index and
* end_index indicates the range of text (as returned by poppler_page_get_text())
* to which text attributes apply.
*
+ * See also poppler_page_get_text_attributes_for_area()
+ *
* Return value: (element-type PopplerTextAttributes) (transfer full): A #GList of #PopplerTextAttributes
*
* Since: 0.18
@@ -2184,6 +2247,35 @@ word_text_attributes_equal (TextWord *a, gint ai, TextWord *b, gint bi)
GList *
poppler_page_get_text_attributes (PopplerPage *page)
{
+ PopplerRectangle selection = {0, 0, 0, 0};
+
+ g_return_val_if_fail (POPPLER_IS_PAGE (page), NULL);
+
+ poppler_page_get_size (page, &selection.x2, &selection.y2);
+
+ return poppler_page_get_text_attributes_for_area (page, &selection);
+}
+
+/**
+ * poppler_page_get_text_attributes_for_area:
+ * @page: A #PopplerPage
+ * @area: a #PopplerRectangle
+ *
+ * Obtains the attributes of the text in @area as a #GList of #PopplerTextAttributes.
+ * This list must be freed with poppler_page_free_text_attributes() when done.
+ *
+ * Each list element is a #PopplerTextAttributes struct where start_index and
+ * end_index indicates the range of text (as returned by poppler_page_get_text_for_area())
+ * to which text attributes apply.
+ *
+ * Return value: (element-type PopplerTextAttributes) (transfer full): A #GList of #PopplerTextAttributes
+ *
+ * Since: 0.26
+ **/
+GList *
+poppler_page_get_text_attributes_for_area (PopplerPage *page,
+ PopplerRectangle *area)
+{
TextPage *text;
PDFRectangle selection;
GooList **word_list;
@@ -2196,8 +2288,13 @@ poppler_page_get_text_attributes (PopplerPage *page)
GList *attributes = NULL;
g_return_val_if_fail (POPPLER_IS_PAGE (page), NULL);
+ g_return_val_if_fail (area != NULL, FALSE);
+
+ selection.x1 = area->x1;
+ selection.y1 = area->y1;
+ selection.x2 = area->x2;
+ selection.y2 = area->y2;
- poppler_page_get_size (page, &selection.x2, &selection.y2);
text = poppler_page_get_text_page (page);
word_list = text->getSelectionWords (&selection, selectionStyleGlyph, &n_lines);
if (!word_list)
diff --git a/glib/poppler-page.h b/glib/poppler-page.h
index c54eb9c..68e2a1d 100644
--- a/glib/poppler-page.h
+++ b/glib/poppler-page.h
@@ -68,6 +68,8 @@ GList *poppler_page_find_text (PopplerPage *page,
void poppler_page_render_to_ps (PopplerPage *page,
PopplerPSFile *ps_file);
char *poppler_page_get_text (PopplerPage *page);
+char *poppler_page_get_text_for_area (PopplerPage *page,
+ PopplerRectangle *area);
char *poppler_page_get_selected_text (PopplerPage *page,
PopplerSelectionStyle style,
PopplerRectangle *selection);
@@ -99,8 +101,14 @@ void poppler_page_get_crop_box (PopplerPage *page,
gboolean poppler_page_get_text_layout (PopplerPage *page,
PopplerRectangle **rectangles,
guint *n_rectangles);
+gboolean poppler_page_get_text_layout_for_area (PopplerPage *page,
+ PopplerRectangle *area,
+ PopplerRectangle **rectangles,
+ guint *n_rectangles);
GList *poppler_page_get_text_attributes (PopplerPage *page);
void poppler_page_free_text_attributes (GList *list);
+GList * poppler_page_get_text_attributes_for_area (PopplerPage *page,
+ PopplerRectangle *area);
/* A rectangle on a page, with coordinates in PDF points. */
#define POPPLER_TYPE_RECTANGLE (poppler_rectangle_get_type ())
diff --git a/glib/reference/poppler-sections.txt b/glib/reference/poppler-sections.txt
index ce829e3..62e0f77 100644
--- a/glib/reference/poppler-sections.txt
+++ b/glib/reference/poppler-sections.txt
@@ -35,8 +35,11 @@ poppler_page_get_selected_text
poppler_page_find_text
poppler_page_find_text_with_options
poppler_page_get_text
+poppler_page_get_text_for_area
poppler_page_get_text_layout
+poppler_page_get_text_layout_for_area
poppler_page_get_text_attributes
+poppler_page_get_text_attributes_for_area
poppler_page_free_text_attributes
poppler_page_get_link_mapping
poppler_page_free_link_mapping
commit f662973b0da52da84acc3668a0e037ee72498193
Author: Carlos Garcia Campos <carlosgc at gnome.org>
Date: Sat Nov 2 14:07:07 2013 +0100
TextOutputDev: Honor the selection rectangle passed to TextPage::getSelectionWords()
Make TextPage::getSelectionWords() return a list of TextWordSelection
instead of a list of TextWord so that it's possible to know which
characters of the word are inside the given selection rectangle.
Adapt the glib frontend to the new API and use the selection bounds
instead of the whole word to build the list of characters in
poppler_page_get_text_layout() and poppler_page_get_text_attributes(),
which ensures the number of glyphs returned is in sync with the number
of characters returned by poppler_page_get_text().
https://bugs.freedesktop.org/show_bug.cgi?id=71160
diff --git a/glib/poppler-page.cc b/glib/poppler-page.cc
index fbab9b4..248649d 100644
--- a/glib/poppler-page.cc
+++ b/glib/poppler-page.cc
@@ -2063,8 +2063,8 @@ poppler_page_get_text_layout (PopplerPage *page,
n_rects += line_words->getLength() - 1;
for (j = 0; j < line_words->getLength(); j++)
{
- TextWord *word = (TextWord *)line_words->get(j);
- n_rects += word->getLength();
+ TextWordSelection *word_sel = (TextWordSelection *)line_words->get(j);
+ n_rects += word_sel->getEnd() - word_sel->getBegin();
}
}
@@ -2076,8 +2076,11 @@ poppler_page_get_text_layout (PopplerPage *page,
GooList *line_words = word_list[i];
for (j = 0; j < line_words->getLength(); j++)
{
- TextWord *word = (TextWord *)line_words->get(j);
- for (k = 0; k < word->getLength(); k++)
+ TextWordSelection *word_sel = (TextWordSelection *)line_words->get(j);
+ TextWord *word = word_sel->getWord();
+ int end = word_sel->getEnd();
+
+ for (k = word_sel->getBegin(); k < end; k++)
{
rect = *rectangles + offset;
word->getCharBBox (k,
@@ -2093,9 +2096,9 @@ poppler_page_get_text_layout (PopplerPage *page,
if (j < line_words->getLength() - 1)
{
- TextWord *next_word = (TextWord *)line_words->get(j + 1);
+ TextWordSelection *word_sel = (TextWordSelection *)line_words->get(j + 1);
- next_word->getBBox(&x3, &y3, &x4, &y4);
+ word_sel->getWord()->getBBox(&x3, &y3, &x4, &y4);
// space is from one word to other and with the same height as
// first word.
rect->x1 = x2;
@@ -2205,9 +2208,12 @@ poppler_page_get_text_attributes (PopplerPage *page)
GooList *line_words = word_list[i];
for (j = 0; j < line_words->getLength(); j++)
{
- word = (TextWord *)line_words->get(j);
+ TextWordSelection *word_sel = (TextWordSelection *)line_words->get(j);
+ int end = word_sel->getEnd();
+
+ word = word_sel->getWord();
- for (word_i = 0; word_i < word->getLength (); word_i++)
+ for (word_i = word_sel->getBegin(); word_i < end; word_i++)
{
if (!prev_word || !word_text_attributes_equal (word, word_i, prev_word, prev_word_i))
{
diff --git a/poppler/TextOutputDev.cc b/poppler/TextOutputDev.cc
index 4adb3c2..7c2ca78 100644
--- a/poppler/TextOutputDev.cc
+++ b/poppler/TextOutputDev.cc
@@ -4000,21 +4000,6 @@ public:
PDFRectangle *selection) = 0;
protected:
-
- class TextWordSelection {
- public:
- TextWordSelection(TextWord *word, int begin, int end)
- : word(word),
- begin(begin),
- end(end)
- {
- }
-
- TextWord *word;
- int begin;
- int end;
- };
-
TextPage *page;
};
@@ -4044,7 +4029,7 @@ public:
void endPage();
GooString *getText(void);
- GooList **getWordList(int *nLines);
+ GooList **takeWordList(int *nLines);
private:
@@ -4179,27 +4164,18 @@ GooString *TextSelectionDumper::getText (void)
return text;
}
-GooList **TextSelectionDumper::getWordList(int *nLinesOut)
+GooList **TextSelectionDumper::takeWordList(int *nLinesOut)
{
- int i, j;
+ GooList **returnValue = lines;
+ *nLinesOut = nLines;
if (nLines == 0)
return NULL;
- GooList **wordList = (GooList **)gmallocn(nLines, sizeof(GooList *));
-
- for (i = 0; i < nLines; i++) {
- GooList *lineWords = lines[i];
- wordList[i] = new GooList();
- for (j = 0; j < lineWords->getLength(); j++) {
- TextWordSelection *sel = (TextWordSelection *)lineWords->get(j);
- wordList[i]->append(sel->word);
- }
- }
-
- *nLinesOut = nLines;
+ nLines = 0;
+ lines = NULL;
- return wordList;
+ return returnValue;
}
class TextSelectionSizer : public TextSelectionVisitor {
@@ -4793,7 +4769,7 @@ GooList **TextPage::getSelectionWords(PDFRectangle *selection,
visitSelection(&dumper, selection, style);
dumper.endPage();
- return dumper.getWordList(nLines);
+ return dumper.takeWordList(nLines);
}
GBool TextPage::findCharRange(int pos, int length,
diff --git a/poppler/TextOutputDev.h b/poppler/TextOutputDev.h
index 56736b3..23fb3b7 100644
--- a/poppler/TextOutputDev.h
+++ b/poppler/TextOutputDev.h
@@ -478,6 +478,26 @@ private:
#endif // TEXTOUT_WORD_LIST
+class TextWordSelection {
+public:
+ TextWordSelection(TextWord *word, int begin, int end)
+ : word(word), begin(begin), end(end)
+ {
+ }
+
+ TextWord * getWord() const { return word; }
+ int getBegin() const { return begin; }
+ int getEnd() const { return end; }
+
+private:
+ TextWord *word;
+ int begin;
+ int end;
+
+ friend class TextSelectionPainter;
+ friend class TextSelectionDumper;
+};
+
//------------------------------------------------------------------------
// TextPage
//------------------------------------------------------------------------
More information about the poppler
mailing list