[poppler] Branch 'xpdf303merge' - poppler/TextOutputDev.cc poppler/TextOutputDev.h
Albert Astals Cid
aacid at kemper.freedesktop.org
Wed Dec 14 13:50:23 PST 2011
poppler/TextOutputDev.cc | 268 +++++++++++++++++++++++++++++------------------
poppler/TextOutputDev.h | 19 +--
2 files changed, 180 insertions(+), 107 deletions(-)
New commits:
commit c5ce12993a4d2bcd3b3e95b1f08d00dc8960678c
Author: Albert Astals Cid <aacid at kde.org>
Date: Wed Dec 14 22:49:33 2011 +0100
[xpdf303] Merge some stuff from TextOutputDev
Yes, this is the best commit log i could think of
diff --git a/poppler/TextOutputDev.cc b/poppler/TextOutputDev.cc
index 570d53d..0c451ec 100644
--- a/poppler/TextOutputDev.cc
+++ b/poppler/TextOutputDev.cc
@@ -234,74 +234,103 @@ GBool TextFontInfo::matches(TextFontInfo *fontInfo) {
//------------------------------------------------------------------------
TextWord::TextWord(GfxState *state, int rotA, double x0, double y0,
- int charPosA, TextFontInfo *fontA, double fontSizeA) {
+ TextFontInfo *fontA, double fontSizeA) {
GfxFont *gfxFont;
double x, y, ascent, descent;
-
+ int wMode;
+
rot = rotA;
- charPos = charPosA;
- charLen = 0;
font = fontA;
fontSize = fontSizeA;
state->transform(x0, y0, &x, &y);
if ((gfxFont = font->gfxFont)) {
ascent = gfxFont->getAscent() * fontSize;
descent = gfxFont->getDescent() * fontSize;
+ wMode = gfxFont->getWMode();
} else {
// this means that the PDF file draws text without a current font,
// which should never happen
ascent = 0.95 * fontSize;
descent = -0.35 * fontSize;
+ wMode = 0;
}
- switch (rot) {
- case 0:
- yMin = y - ascent;
- yMax = y - descent;
- if (yMin == yMax) {
- // this is a sanity check for a case that shouldn't happen -- but
- // if it does happen, we want to avoid dividing by zero later
- yMin = y;
- yMax = y + 1;
- }
- base = y;
- break;
- case 1:
- xMin = x + descent;
- xMax = x + ascent;
- if (xMin == xMax) {
- // this is a sanity check for a case that shouldn't happen -- but
- // if it does happen, we want to avoid dividing by zero later
+ if (wMode) { // vertical writing mode
+ // NB: the rotation value has been incremented by 1 (in
+ // TextPage::beginWord()) for vertical writing mode
+ switch (rot) {
+ case 0:
+ yMin = y - fontSize;
+ yMax = y;
+ base = y;
+ break;
+ case 1:
xMin = x;
- xMax = x + 1;
- }
- base = x;
- break;
- case 2:
- yMin = y + descent;
- yMax = y + ascent;
- if (yMin == yMax) {
- // this is a sanity check for a case that shouldn't happen -- but
- // if it does happen, we want to avoid dividing by zero later
+ xMax = x + fontSize;
+ base = x;
+ break;
+ case 2:
yMin = y;
- yMax = y + 1;
+ yMax = y + fontSize;
+ base = y;
+ break;
+ case 3:
+ xMin = x - fontSize;
+ xMax = x;
+ base = x;
+ break;
}
- base = y;
- break;
- case 3:
- xMin = x - ascent;
- xMax = x - descent;
- if (xMin == xMax) {
- // this is a sanity check for a case that shouldn't happen -- but
- // if it does happen, we want to avoid dividing by zero later
- xMin = x;
- xMax = x + 1;
+ } else { // horizontal writing mode
+ switch (rot) {
+ case 0:
+ yMin = y - ascent;
+ yMax = y - descent;
+ if (yMin == yMax) {
+ // this is a sanity check for a case that shouldn't happen -- but
+ // if it does happen, we want to avoid dividing by zero later
+ yMin = y;
+ yMax = y + 1;
+ }
+ base = y;
+ break;
+ case 1:
+ xMin = x + descent;
+ xMax = x + ascent;
+ if (xMin == xMax) {
+ // this is a sanity check for a case that shouldn't happen -- but
+ // if it does happen, we want to avoid dividing by zero later
+ xMin = x;
+ xMax = x + 1;
+ }
+ base = x;
+ break;
+ case 2:
+ yMin = y + descent;
+ yMax = y + ascent;
+ if (yMin == yMax) {
+ // this is a sanity check for a case that shouldn't happen -- but
+ // if it does happen, we want to avoid dividing by zero later
+ yMin = y;
+ yMax = y + 1;
+ }
+ base = y;
+ break;
+ case 3:
+ xMin = x - ascent;
+ xMax = x - descent;
+ if (xMin == xMax) {
+ // this is a sanity check for a case that shouldn't happen -- but
+ // if it does happen, we want to avoid dividing by zero later
+ xMin = x;
+ xMax = x + 1;
+ }
+ base = x;
+ break;
}
- base = x;
- break;
}
text = NULL;
charcode = NULL;
edge = NULL;
+ charPos = NULL;
len = size = 0;
spaceAfter = gFalse;
next = NULL;
@@ -327,47 +356,90 @@ TextWord::~TextWord() {
gfree(text);
gfree(charcode);
gfree(edge);
+ gfree(charPos);
}
void TextWord::addChar(GfxState *state, double x, double y,
- double dx, double dy, CharCode c, Unicode u) {
+ double dx, double dy, int charPosA, int charLen,
+ CharCode c, Unicode u) {
+ int wMode;
+
if (len == size) {
size += 16;
text = (Unicode *)greallocn(text, size, sizeof(Unicode));
charcode = (Unicode *)greallocn(charcode, size, sizeof(CharCode));
edge = (double *)greallocn(edge, (size + 1), sizeof(double));
+ charPos = (int *)greallocn(charPos, size + 1, sizeof(int));
}
text[len] = u;
charcode[len] = c;
- switch (rot) {
- case 0:
- if (len == 0) {
- xMin = x;
- }
- edge[len] = x;
- xMax = edge[len+1] = x + dx;
- break;
- case 1:
- if (len == 0) {
- yMin = y;
- }
- edge[len] = y;
- yMax = edge[len+1] = y + dy;
- break;
- case 2:
- if (len == 0) {
- xMax = x;
- }
- edge[len] = x;
- xMin = edge[len+1] = x + dx;
- break;
- case 3:
- if (len == 0) {
- yMax = y;
+ charPos[len] = charPosA;
+ charPos[len + 1] = charPosA + charLen;
+ wMode = font->gfxFont ? font->gfxFont->getWMode() : 0;
+ if (wMode) { // vertical writing mode
+ // NB: the rotation value has been incremented by 1 (in
+ // TextPage::beginWord()) for vertical writing mode
+ switch (rot) {
+ case 0:
+ if (len == 0) {
+ xMin = x - fontSize;
+ }
+ edge[len] = x - fontSize;
+ xMax = edge[len+1] = x;
+ break;
+ case 1:
+ if (len == 0) {
+ yMin = y - fontSize;
+ }
+ edge[len] = y - fontSize;
+ yMax = edge[len+1] = y;
+ break;
+ case 2:
+ if (len == 0) {
+ xMax = x + fontSize;
+ }
+ edge[len] = x + fontSize;
+ xMin = edge[len+1] = x;
+ break;
+ case 3:
+ if (len == 0) {
+ yMax = y + fontSize;
+ }
+ edge[len] = y + fontSize;
+ yMin = edge[len+1] = y;
+ break;
}
- edge[len] = y;
- yMin = edge[len+1] = y + dy;
- break;
+ } else { // horizontal writing mode
+ switch (rot) {
+ case 0:
+ if (len == 0) {
+ xMin = x;
+ }
+ edge[len] = x;
+ xMax = edge[len+1] = x + dx;
+ break;
+ case 1:
+ if (len == 0) {
+ yMin = y;
+ }
+ edge[len] = y;
+ yMax = edge[len+1] = y + dy;
+ break;
+ case 2:
+ if (len == 0) {
+ xMax = x;
+ }
+ edge[len] = x;
+ xMin = edge[len+1] = x + dx;
+ break;
+ case 3:
+ if (len == 0) {
+ yMax = y;
+ }
+ edge[len] = y;
+ yMin = edge[len+1] = y + dy;
+ break;
+ }
}
++len;
}
@@ -392,15 +464,17 @@ void TextWord::merge(TextWord *word) {
text = (Unicode *)greallocn(text, size, sizeof(Unicode));
charcode = (CharCode *)greallocn(charcode, (size + 1), sizeof(CharCode));
edge = (double *)greallocn(edge, (size + 1), sizeof(double));
+ charPos = (int *)greallocn(charPos, size + 1, sizeof(int));
}
for (i = 0; i < word->len; ++i) {
text[len + i] = word->text[i];
charcode[len + i] = word->charcode[i];
edge[len + i] = word->edge[i];
+ charPos[len + i] = word->charPos[i];
}
edge[len + word->len] = word->edge[word->len];
+ charPos[len + word->len] = word->charPos[word->len];
len += word->len;
- charLen += word->charLen;
}
inline int TextWord::primaryCmp(TextWord *word) {
@@ -792,7 +866,7 @@ void TextLine::coalesce(UnicodeMap *uMap) {
word0->underlined == word1->underlined &&
fabs(word0->fontSize - word1->fontSize) <
maxWordFontSizeDelta * words->fontSize &&
- word1->charPos == word0->charPos + word0->charLen) {
+ word1->charPos[0] == word0->charPos[word0->len]) {
word0->merge(word1);
word0->next = word1->next;
delete word1;
@@ -2153,12 +2227,18 @@ void TextPage::beginWord(GfxState *state, double x0, double y0) {
m[3] = m2[3];
}
if (fabs(m[0] * m[3]) > fabs(m[1] * m[2])) {
- rot = (m[3] < 0) ? 0 : 2;
+ rot = (m[0] > 0 || m[3] < 0) ? 0 : 2;
} else {
rot = (m[2] > 0) ? 1 : 3;
}
- curWord = new TextWord(state, rot, x0, y0, charPos, curFont, curFontSize);
+ // for vertical writing mode, the lines are effectively rotated 90
+ // degrees
+ if (state->getFont()->getWMode()) {
+ rot = (rot + 1) & 3;
+ }
+
+ curWord = new TextWord(state, rot, x0, y0, curFont, curFontSize);
}
void TextPage::addChar(GfxState *state, double x, double y,
@@ -2199,9 +2279,6 @@ void TextPage::addChar(GfxState *state, double x, double y,
// break words at space character
if (uLen == 1 && u[0] == (Unicode)0x20) {
- if (curWord) {
- ++curWord->charLen;
- }
charPos += nBytes;
endWord();
return;
@@ -2284,24 +2361,21 @@ void TextPage::addChar(GfxState *state, double x, double y,
/* next code is a low surrogate */
Unicode uu = (((u[i] & 0x3ff) << 10) | (u[i+1] & 0x3ff)) + 0x10000;
i++;
- curWord->addChar(state, x1 + i*w1, y1 + i*h1, w1, h1, c, uu);
+ curWord->addChar(state, x1 + i*w1, y1 + i*h1, w1, h1, charPos, nBytes, c, uu);
} else {
/* missing low surrogate
replace it with REPLACEMENT CHARACTER (U+FFFD) */
- curWord->addChar(state, x1 + i*w1, y1 + i*h1, w1, h1, c, 0xfffd);
+ curWord->addChar(state, x1 + i*w1, y1 + i*h1, w1, h1, charPos, nBytes, c, 0xfffd);
}
} else if (u[i] >= 0xdc00 && u[i] < 0xe000) {
/* invalid low surrogate
replace it with REPLACEMENT CHARACTER (U+FFFD) */
- curWord->addChar(state, x1 + i*w1, y1 + i*h1, w1, h1, c, 0xfffd);
+ curWord->addChar(state, x1 + i*w1, y1 + i*h1, w1, h1, charPos, nBytes, c, 0xfffd);
} else {
- curWord->addChar(state, x1 + i*w1, y1 + i*h1, w1, h1, c, u[i]);
+ curWord->addChar(state, x1 + i*w1, y1 + i*h1, w1, h1, charPos, nBytes, c, u[i]);
}
}
}
- if (curWord) {
- curWord->charLen += nBytes;
- }
charPos += nBytes;
}
@@ -4606,16 +4680,14 @@ GBool TextPage::findCharRange(int pos, int length,
blk = blocks[i];
for (line = blk->lines; line; line = line->next) {
for (word = line->words; word; word = word->next) {
- if (pos < word->charPos + word->charLen &&
- word->charPos < pos + length) {
- j0 = pos - word->charPos;
- if (j0 < 0) {
- j0 = 0;
- }
- j1 = pos + length - 1 - word->charPos;
- if (j1 >= word->len) {
- j1 = word->len - 1;
- }
+ if (pos < word->charPos[word->len] &&
+ pos + length > word->charPos[0]) {
+ for (j0 = 0;
+ j0 < word->len && pos >= word->charPos[j0 + 1];
+ ++j0) ;
+ for (j1 = word->len - 1;
+ j1 > j0 && pos + length <= word->charPos[j1];
+ --j1) ;
switch (line->rot) {
case 0:
xMin1 = word->edge[j0];
diff --git a/poppler/TextOutputDev.h b/poppler/TextOutputDev.h
index dff3921..2dd78cd 100644
--- a/poppler/TextOutputDev.h
+++ b/poppler/TextOutputDev.h
@@ -114,14 +114,15 @@ public:
// Constructor.
TextWord(GfxState *state, int rotA, double x0, double y0,
- int charPosA, TextFontInfo *fontA, double fontSize);
+ TextFontInfo *fontA, double fontSize);
// Destructor.
~TextWord();
// Add a character to the word.
void addChar(GfxState *state, double x, double y,
- double dx, double dy, CharCode c, Unicode u);
+ double dx, double dy, int charPosA, int charLen,
+ CharCode c, Unicode u);
// Merge <word> onto the end of <this>.
void merge(TextWord *word);
@@ -159,8 +160,8 @@ public:
double *xMaxA, double *yMaxA);
double getFontSize() { return fontSize; }
int getRotation() { return rot; }
- int getCharPos() { return charPos; }
- int getCharLen() { return charLen; }
+ int getCharPos() { return charPos[0]; }
+ int getCharLen() { return charPos[len] - charPos[0]; }
GBool getSpaceAfter() { return spaceAfter; }
#endif
GBool isUnderlined() { return underlined; }
@@ -180,11 +181,11 @@ private:
CharCode *charcode; // glyph indices
double *edge; // "near" edge x or y coord of each char
// (plus one extra entry for the last char)
- int len; // length of text and edge arrays
- int size; // size of text and edge arrays
- int charPos; // character position (within content stream)
- int charLen; // number of content stream characters in
- // this word
+ int *charPos; // character position (within content stream)
+ // of each char (plus one extra entry for
+ // the last char)
+ int len; // length of text/edge/charPos arrays
+ int size; // size of text/edge/charPos arrays
TextFontInfo *font; // font information
double fontSize; // font size
GBool spaceAfter; // set if there is a space between this
More information about the poppler
mailing list