[PATCH 2/2] TextData::getText in rawOrder now count chars

danigm dani at danigm.net
Mon May 10 01:14:57 PDT 2010


The previous getText with rawOrder only looked if words are between
limits. This commit adds chars of a word which are in.
---
 poppler/TextOutputDev.cc |   43 ++++++++++++++++++++++++++++++++-----------
 test/gettext-test.cc     |    1 +
 2 files changed, 33 insertions(+), 11 deletions(-)

diff --git a/poppler/TextOutputDev.cc b/poppler/TextOutputDev.cc
index 28b864b..4c42b30 100644
--- a/poppler/TextOutputDev.cc
+++ b/poppler/TextOutputDev.cc
@@ -3626,22 +3626,43 @@ GooString *TextPage::getText(double xMin, double yMin,
     TextWordList *wordlist;
     wordlist = makeWordList(gFalse);
     int word_length = wordlist->getLength ();
-    TextWord *word;
+    TextWord *word=NULL, *prev_word=NULL;
+    const Unicode *word_char;
+    char buf[8];
+    bool outOfBound = false;
     double xMinA, yMinA, xMaxA, yMaxA;
 
-    for (int i=0; i < word_length; i++)
-    {
+    for (int i=0; i < word_length; i++) {
       word = wordlist->get (i);
+
+      if (prev_word && word->primaryDelta (prev_word) <= 0) {
+        if (!outOfBound)
+            s->append(space, spaceLen);
+      } else {
+        s->append(eol, eolLen);
+      }
+
       word->getBBox (&xMinA, &yMinA, &xMaxA, &yMaxA);
-      if (xMinA > xMin && yMinA > yMin && xMaxA < xMax && yMaxA < yMax)
+      if (xMinA > xMin && yMinA > yMin && xMaxA < xMax && yMaxA < yMax) {
         s->append (word->getText ());
-      else
-        continue;
-      if (word->getNext() && word->getNext()->primaryDelta (word) <= 0)
-      {
-	s->append(space, spaceLen);
-      } else {
-	s->append(eol, eolLen);
+        prev_word = word;
+        outOfBound = false;
+      }
+      else if (xMinA < xMax && yMinA < yMax) {
+        for (int i=0; i < word->getLength(); i++) {
+          int n;
+          word->getCharBBox(i, &xMinA, &yMinA, &xMaxA, &yMaxA);
+          if (xMinA > xMin && yMinA > yMin && xMaxA < xMax && yMaxA < yMax) {
+            word_char = word->getChar(i);
+            n = uMap->mapUnicode(*word_char, buf, sizeof(buf));
+            s->append(buf, n);
+          }
+        }
+        prev_word = word;
+        outOfBound = true;
+      }
+      else {
+        outOfBound = true;
       }
     }
     return s;
diff --git a/test/gettext-test.cc b/test/gettext-test.cc
index 0c32a9e..58f07a9 100644
--- a/test/gettext-test.cc
+++ b/test/gettext-test.cc
@@ -50,6 +50,7 @@ int main (int argc, char *argv[])
 
   rect = page->getCropBox();
   s = textOut->getText(rect->x1, rect->y1, rect->x2, rect->y2);
+  //s = textOut->getText(0, 0, 200, 1000);
 
   result = s->getCString ();
   printf ("%s\n", result);
-- 
1.7.1


--=-XyuwjjX5VE+xex9UV8dk--



More information about the poppler mailing list