[PATCH 2/2] TextData::getText in rawOrder now count chars
danigm
dani at danigm.net
Mon May 10 01:14:57 PDT 2010
The previous getText with rawOrder only looked if words are between
limits. This commit adds chars of a word which are in.
---
poppler/TextOutputDev.cc | 43 ++++++++++++++++++++++++++++++++-----------
test/gettext-test.cc | 1 +
2 files changed, 33 insertions(+), 11 deletions(-)
diff --git a/poppler/TextOutputDev.cc b/poppler/TextOutputDev.cc
index 28b864b..4c42b30 100644
--- a/poppler/TextOutputDev.cc
+++ b/poppler/TextOutputDev.cc
@@ -3626,22 +3626,43 @@ GooString *TextPage::getText(double xMin, double yMin,
TextWordList *wordlist;
wordlist = makeWordList(gFalse);
int word_length = wordlist->getLength ();
- TextWord *word;
+ TextWord *word=NULL, *prev_word=NULL;
+ const Unicode *word_char;
+ char buf[8];
+ bool outOfBound = false;
double xMinA, yMinA, xMaxA, yMaxA;
- for (int i=0; i < word_length; i++)
- {
+ for (int i=0; i < word_length; i++) {
word = wordlist->get (i);
+
+ if (prev_word && word->primaryDelta (prev_word) <= 0) {
+ if (!outOfBound)
+ s->append(space, spaceLen);
+ } else {
+ s->append(eol, eolLen);
+ }
+
word->getBBox (&xMinA, &yMinA, &xMaxA, &yMaxA);
- if (xMinA > xMin && yMinA > yMin && xMaxA < xMax && yMaxA < yMax)
+ if (xMinA > xMin && yMinA > yMin && xMaxA < xMax && yMaxA < yMax) {
s->append (word->getText ());
- else
- continue;
- if (word->getNext() && word->getNext()->primaryDelta (word) <= 0)
- {
- s->append(space, spaceLen);
- } else {
- s->append(eol, eolLen);
+ prev_word = word;
+ outOfBound = false;
+ }
+ else if (xMinA < xMax && yMinA < yMax) {
+ for (int i=0; i < word->getLength(); i++) {
+ int n;
+ word->getCharBBox(i, &xMinA, &yMinA, &xMaxA, &yMaxA);
+ if (xMinA > xMin && yMinA > yMin && xMaxA < xMax && yMaxA < yMax) {
+ word_char = word->getChar(i);
+ n = uMap->mapUnicode(*word_char, buf, sizeof(buf));
+ s->append(buf, n);
+ }
+ }
+ prev_word = word;
+ outOfBound = true;
+ }
+ else {
+ outOfBound = true;
}
}
return s;
diff --git a/test/gettext-test.cc b/test/gettext-test.cc
index 0c32a9e..58f07a9 100644
--- a/test/gettext-test.cc
+++ b/test/gettext-test.cc
@@ -50,6 +50,7 @@ int main (int argc, char *argv[])
rect = page->getCropBox();
s = textOut->getText(rect->x1, rect->y1, rect->x2, rect->y2);
+ //s = textOut->getText(0, 0, 200, 1000);
result = s->getCString ();
printf ("%s\n", result);
--
1.7.1
--=-XyuwjjX5VE+xex9UV8dk--
More information about the poppler
mailing list