[poppler] 3 commits - poppler/Annot.cc poppler/Annot.h poppler/TextOutputDev.cc qt4/src qt4/tests

Albert Astals Cid aacid at kemper.freedesktop.org
Wed Sep 1 12:21:11 PDT 2010


 poppler/Annot.cc         |    2 +-
 poppler/Annot.h          |    4 ++--
 poppler/TextOutputDev.cc |   23 ++++++++++++++++++++---
 qt4/src/poppler-page.cc  |   11 +++++++++--
 qt4/src/poppler-qt4.h    |   21 +++++++++++++++++++++
 qt4/tests/CMakeLists.txt |    1 +
 qt4/tests/Makefile.am    |    7 ++++++-
 7 files changed, 60 insertions(+), 9 deletions(-)

New commits:
commit a86f9d90be99a36c41c6932fb4d9a202c4ff6d05
Author: Albert Astals Cid <aacid at kde.org>
Date:   Wed Sep 1 20:20:48 2010 +0100

    Clarify the ownership

diff --git a/qt4/src/poppler-qt4.h b/qt4/src/poppler-qt4.h
index cb4ec39..5ed7218 100644
--- a/qt4/src/poppler-qt4.h
+++ b/qt4/src/poppler-qt4.h
@@ -799,6 +799,8 @@ delete it;
      
 	   Note that this follows the PDF standard of being zero based - if you
 	   want the first page, then you need an index of zero.
+	
+	   The caller gets the ownership of the returned object.
 
 	   \param index the page number index
 	*/
commit 33ad3a17ac26879fcd6a7fad2023dd219bc5919f
Author: Suzuki Toshiya <mpsuzuki at hiroshima-u.ac.jp>
Date:   Wed Sep 1 20:19:54 2010 +0100

    Add a way to access the raw text

diff --git a/poppler/TextOutputDev.cc b/poppler/TextOutputDev.cc
index c840aef..576bcc9 100644
--- a/poppler/TextOutputDev.cc
+++ b/poppler/TextOutputDev.cc
@@ -23,6 +23,7 @@
 // Copyright (C) 2009 Ross Moore <ross at maths.mq.edu.au>
 // Copyright (C) 2009 Kovid Goyal <kovid at kovidgoyal.net>
 // Copyright (C) 2010 Brian Ewins <brian.ewins at gmail.com>
+// Copyright (C) 2010 Suzuki Toshiya <mpsuzuki at hiroshima-u.ac.jp>
 //
 // To see a description of the changes please see the Changelog file that
 // came with your tarball or type make ChangeLog if you are building from git
@@ -3605,14 +3606,30 @@ GooString *TextPage::getText(double xMin, double yMin,
 
   s = new GooString();
 
-  if (rawOrder) {
+  // get the output encoding
+  if (!(uMap = globalParams->getTextEncoding())) {
     return s;
   }
 
-  // get the output encoding
-  if (!(uMap = globalParams->getTextEncoding())) {
+  if (rawOrder) {
+    TextWord*  word;
+    char mbc[16];
+    int  mbc_len;
+
+    for (word = rawWords; word && word <= rawLastWord; word = word->next) {
+      for (j = 0; j < word->getLength(); ++j) {
+        double gXMin, gXMax, gYMin, gYMax;
+        word->getCharBBox(j, &gXMin, &gYMin, &gXMax, &gYMax);
+        if (xMin <= gXMin && gXMax <= xMax && yMin <= gYMin && gYMax <= yMax)
+        {
+          mbc_len = uMap->mapUnicode( *(word->getChar(j)), mbc, sizeof(mbc) );
+          s->append(mbc, mbc_len);
+        }
+      }
+    }
     return s;
   }
+
   isUnicode = uMap->isUnicode();
   spaceLen = uMap->mapUnicode(0x20, space, sizeof(space));
   eolLen = 0; // make gcc happy
diff --git a/qt4/src/poppler-page.cc b/qt4/src/poppler-page.cc
index ae67b11..49a0a77 100644
--- a/qt4/src/poppler-page.cc
+++ b/qt4/src/poppler-page.cc
@@ -7,6 +7,7 @@
  * Copyright (C) 2008 Carlos Garcia Campos <carlosgc at gnome.org>
  * Copyright (C) 2009 Shawn Rutledge <shawn.t.rutledge at gmail.com>
  * Copyright (C) 2010, Guillermo Amaral <gamaral at kdab.com>
+ * Copyright (C) 2010 Suzuki Toshiya <mpsuzuki at hiroshima-u.ac.jp>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
@@ -295,14 +296,15 @@ QImage Page::thumbnail() const
   return ret;
 }
 
-QString Page::text(const QRectF &r) const
+QString Page::text(const QRectF &r, TextLayout textLayout) const
 {
   TextOutputDev *output_dev;
   GooString *s;
   PDFRectangle *rect;
   QString result;
   
-  output_dev = new TextOutputDev(0, gFalse, gFalse, gFalse);
+  const GBool rawOrder = textLayout == RawOrder;
+  output_dev = new TextOutputDev(0, gFalse, rawOrder, gFalse);
   m_page->parentDoc->doc->displayPageSlice(output_dev, m_page->index + 1, 72, 72,
       0, false, true, false, -1, -1, -1, -1);
   if (r.isNull())
@@ -322,6 +324,11 @@ QString Page::text(const QRectF &r) const
   return result;
 }
 
+QString Page::text(const QRectF &r) const
+{
+  return text(r, PhysicalLayout);
+}
+
 bool Page::search(const QString &text, double &sLeft, double &sTop, double &sRight, double &sBottom, SearchDirection direction, SearchMode caseSensitive, Rotation rotate) const
 {
   const QChar * str = text.unicode();
diff --git a/qt4/src/poppler-qt4.h b/qt4/src/poppler-qt4.h
index 117dc43..cb4ec39 100644
--- a/qt4/src/poppler-qt4.h
+++ b/qt4/src/poppler-qt4.h
@@ -5,6 +5,7 @@
  * Copyright (C) 2005, Stefan Kebekus <stefan.kebekus at math.uni-koeln.de>
  * Copyright (C) 2006-2009, Pino Toscano <pino at kde.org>
  * Copyright (C) 2009 Shawn Rutledge <shawn.t.rutledge at gmail.com>
+ * Copyright (C) 2010 Suzuki Toshiya <mpsuzuki at hiroshima-u.ac.jp>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
@@ -387,6 +388,14 @@ delete it;
 	    Opening,   ///< The action when a page is "opened"
 	    Closing    ///< The action when a page is "closed"
 	};
+	
+	/**
+	   How the text is going to be returned
+	*/
+	enum TextLayout {
+	    PhysicalLayout,   ///< The text is layouted to resemble the real page layout
+	    RawOrder          ///< The text is returned without any type of processing
+	};
 
 	/** 
 	   Render the page to a QImage using the current
@@ -445,6 +454,16 @@ delete it;
 	   with coordinates given in points, i.e., 1/72th of an inch.
 	   If rect is null, all text on the page is given
 	**/
+	QString text(const QRectF &rect, TextLayout textLayout) const;
+
+	/**
+	   Returns the text that is inside a specified rectangle.
+	   The text is returned using the physical layout of the page
+
+	   \param rect the rectangle specifying the area of interest,
+	   with coordinates given in points, i.e., 1/72th of an inch.
+	   If rect is null, all text on the page is given
+	**/
 	QString text(const QRectF &rect) const;
 	
 	/**
diff --git a/qt4/tests/CMakeLists.txt b/qt4/tests/CMakeLists.txt
index 892ec66..3a67614 100644
--- a/qt4/tests/CMakeLists.txt
+++ b/qt4/tests/CMakeLists.txt
@@ -41,6 +41,7 @@ qt4_add_simpletest(poppler-fonts poppler-fonts.cpp)
 qt4_add_simpletest(poppler_attachments poppler-attachments.cpp)
 qt4_add_simpletest(stress-poppler-qt4 stress-poppler-qt4.cpp)
 qt4_add_simpletest(stress-poppler-dir stress-poppler-dir.cpp)
+qt4_add_simpletest(poppler-texts poppler-texts.cpp)
 
 qt4_add_qtest(check_attachments check_attachments.cpp)
 qt4_add_qtest(check_dateConversion check_dateConversion.cpp)
diff --git a/qt4/tests/Makefile.am b/qt4/tests/Makefile.am
index 7bc16d7..244097c 100644
--- a/qt4/tests/Makefile.am
+++ b/qt4/tests/Makefile.am
@@ -21,7 +21,7 @@ SUFFIXES: .moc
 
 noinst_PROGRAMS = test-poppler-qt4 stress-poppler-qt4 \
 	poppler-fonts test-password-qt4 stress-poppler-dir \
-	poppler-attachments
+	poppler-attachments poppler-texts
 
 
 test_poppler_qt4_SOURCES =			\
@@ -46,6 +46,11 @@ poppler_attachments_SOURCES =			\
 
 poppler_attachments_LDADD = $(LDADDS)
 
+poppler_texts_SOURCES =			\
+       poppler-texts.cpp
+
+poppler_texts_LDADD = $(LDADDS)
+
 
 stress_poppler_qt4_SOURCES =			\
        stress-poppler-qt4.cpp
commit 46e89248b3c5b1789baa3bd9bfa012570720ddb5
Author: Albert Astals Cid <aacid at kde.org>
Date:   Wed Sep 1 19:54:02 2010 +0100

    quadding is not a GBool but an int

diff --git a/poppler/Annot.cc b/poppler/Annot.cc
index 6a18d7f..0cb6516 100644
--- a/poppler/Annot.cc
+++ b/poppler/Annot.cc
@@ -3400,7 +3400,7 @@ void AnnotWidget::drawText(GooString *text, GooString *da, GfxFontDict *fontDict
 // Draw the variable text or caption for a field.
 void AnnotWidget::drawListBox(GooString **text, GBool *selection,
 			      int nOptions, int topIdx,
-			      GooString *da, GfxFontDict *fontDict, GBool quadding) {
+			      GooString *da, GfxFontDict *fontDict, int quadding) {
   GooList *daToks;
   GooString *tok, *convertedText;
   GfxFont *font;
diff --git a/poppler/Annot.h b/poppler/Annot.h
index a21b55e..a392267 100644
--- a/poppler/Annot.h
+++ b/poppler/Annot.h
@@ -21,7 +21,7 @@
 // Copyright (C) 2008 Hugo Mercier <hmercier31 at gmail.com>
 // Copyright (C) 2008 Pino Toscano <pino at kde.org>
 // Copyright (C) 2008 Tomas Are Haavet <tomasare at gmail.com>
-// Copyright (C) 2009 Albert Astals Cid <aacid at kde.org>
+// Copyright (C) 2009, 2010 Albert Astals Cid <aacid at kde.org>
 //
 // To see a description of the changes please see the Changelog file that
 // came with your tarball or type make ChangeLog if you are building from git
@@ -1175,7 +1175,7 @@ private:
 		GBool password=false);
   void drawListBox(GooString **text, GBool *selection,
 		   int nOptions, int topIdx,
-		   GooString *da, GfxFontDict *fontDict, GBool quadding);
+		   GooString *da, GfxFontDict *fontDict, int quadding);
   void layoutText(GooString *text, GooString *outBuf, int *i, GfxFont *font,
 		  double *width, double widthLimit, int *charCount,
 		  GBool noReencode);


More information about the poppler mailing list