[poppler] Branch 'xpdf303merge' - poppler/TextOutputDev.cc poppler/TextOutputDev.h

Albert Astals Cid aacid at kemper.freedesktop.org
Wed Dec 14 13:50:23 PST 2011


 poppler/TextOutputDev.cc |  268 +++++++++++++++++++++++++++++------------------
 poppler/TextOutputDev.h  |   19 +--
 2 files changed, 180 insertions(+), 107 deletions(-)

New commits:
commit c5ce12993a4d2bcd3b3e95b1f08d00dc8960678c
Author: Albert Astals Cid <aacid at kde.org>
Date:   Wed Dec 14 22:49:33 2011 +0100

    [xpdf303] Merge some stuff from TextOutputDev
    
    Yes, this is the best commit log i could think of

diff --git a/poppler/TextOutputDev.cc b/poppler/TextOutputDev.cc
index 570d53d..0c451ec 100644
--- a/poppler/TextOutputDev.cc
+++ b/poppler/TextOutputDev.cc
@@ -234,74 +234,103 @@ GBool TextFontInfo::matches(TextFontInfo *fontInfo) {
 //------------------------------------------------------------------------
 
 TextWord::TextWord(GfxState *state, int rotA, double x0, double y0,
-		   int charPosA, TextFontInfo *fontA, double fontSizeA) {
+		   TextFontInfo *fontA, double fontSizeA) {
   GfxFont *gfxFont;
   double x, y, ascent, descent;
-
+  int wMode;
+  
   rot = rotA;
-  charPos = charPosA;
-  charLen = 0;
   font = fontA;
   fontSize = fontSizeA;
   state->transform(x0, y0, &x, &y);
   if ((gfxFont = font->gfxFont)) {
     ascent = gfxFont->getAscent() * fontSize;
     descent = gfxFont->getDescent() * fontSize;
+    wMode = gfxFont->getWMode();
   } else {
     // this means that the PDF file draws text without a current font,
     // which should never happen
     ascent = 0.95 * fontSize;
     descent = -0.35 * fontSize;
+    wMode = 0;
   }
-  switch (rot) {
-  case 0:
-    yMin = y - ascent;
-    yMax = y - descent;
-    if (yMin == yMax) {
-      // this is a sanity check for a case that shouldn't happen -- but
-      // if it does happen, we want to avoid dividing by zero later
-      yMin = y;
-      yMax = y + 1;
-    }
-    base = y;
-    break;
-  case 1:
-    xMin = x + descent;
-    xMax = x + ascent;
-    if (xMin == xMax) {
-      // this is a sanity check for a case that shouldn't happen -- but
-      // if it does happen, we want to avoid dividing by zero later
+  if (wMode) { // vertical writing mode
+    // NB: the rotation value has been incremented by 1 (in
+    // TextPage::beginWord()) for vertical writing mode
+    switch (rot) {
+    case 0:
+      yMin = y - fontSize;
+      yMax = y;
+      base = y;
+      break;
+    case 1:
       xMin = x;
-      xMax = x + 1;
-    }
-    base = x;
-    break;
-  case 2:
-    yMin = y + descent;
-    yMax = y + ascent;
-    if (yMin == yMax) {
-      // this is a sanity check for a case that shouldn't happen -- but
-      // if it does happen, we want to avoid dividing by zero later
+      xMax = x + fontSize;
+      base = x;
+      break;
+    case 2:
       yMin = y;
-      yMax = y + 1;
+      yMax = y + fontSize;
+      base = y;
+      break;
+    case 3:
+      xMin = x - fontSize;
+      xMax = x;
+      base = x;
+      break;
     }
-    base = y;
-    break;
-  case 3:
-    xMin = x - ascent;
-    xMax = x - descent;
-    if (xMin == xMax) {
-      // this is a sanity check for a case that shouldn't happen -- but
-      // if it does happen, we want to avoid dividing by zero later
-      xMin = x;
-      xMax = x + 1;
+  } else { // horizontal writing mode
+    switch (rot) {
+    case 0:
+      yMin = y - ascent;
+      yMax = y - descent;
+      if (yMin == yMax) {
+	// this is a sanity check for a case that shouldn't happen -- but
+	// if it does happen, we want to avoid dividing by zero later
+	yMin = y;
+	yMax = y + 1;
+      }
+      base = y;
+      break;
+    case 1:
+      xMin = x + descent;
+      xMax = x + ascent;
+      if (xMin == xMax) {
+	// this is a sanity check for a case that shouldn't happen -- but
+	// if it does happen, we want to avoid dividing by zero later
+	xMin = x;
+	xMax = x + 1;
+      }
+      base = x;
+      break;
+    case 2:
+      yMin = y + descent;
+      yMax = y + ascent;
+      if (yMin == yMax) {
+	// this is a sanity check for a case that shouldn't happen -- but
+	// if it does happen, we want to avoid dividing by zero later
+	yMin = y;
+	yMax = y + 1;
+      }
+      base = y;
+      break;
+    case 3:
+      xMin = x - ascent;
+      xMax = x - descent;
+      if (xMin == xMax) {
+	// this is a sanity check for a case that shouldn't happen -- but
+	// if it does happen, we want to avoid dividing by zero later
+	xMin = x;
+	xMax = x + 1;
+      }
+      base = x;
+      break;
     }
-    base = x;
-    break;
   }
   text = NULL;
   charcode = NULL;
   edge = NULL;
+  charPos = NULL;
   len = size = 0;
   spaceAfter = gFalse;
   next = NULL;
@@ -327,47 +356,90 @@ TextWord::~TextWord() {
   gfree(text);
   gfree(charcode);
   gfree(edge);
+  gfree(charPos);
 }
 
 void TextWord::addChar(GfxState *state, double x, double y,
-		       double dx, double dy, CharCode c, Unicode u) {
+		       double dx, double dy, int charPosA, int charLen,
+		       CharCode c, Unicode u) {
+  int wMode;
+
   if (len == size) {
     size += 16;
     text = (Unicode *)greallocn(text, size, sizeof(Unicode));
     charcode = (Unicode *)greallocn(charcode, size, sizeof(CharCode));
     edge = (double *)greallocn(edge, (size + 1), sizeof(double));
+    charPos = (int *)greallocn(charPos, size + 1, sizeof(int));
   }
   text[len] = u;
   charcode[len] = c;
-  switch (rot) {
-  case 0:
-    if (len == 0) {
-      xMin = x;
-    }
-    edge[len] = x;
-    xMax = edge[len+1] = x + dx;
-    break;
-  case 1:
-    if (len == 0) {
-      yMin = y;
-    }
-    edge[len] = y;
-    yMax = edge[len+1] = y + dy;
-    break;
-  case 2:
-    if (len == 0) {
-      xMax = x;
-    }
-    edge[len] = x;
-    xMin = edge[len+1] = x + dx;
-    break;
-  case 3:
-    if (len == 0) {
-      yMax = y;
+  charPos[len] = charPosA;
+  charPos[len + 1] = charPosA + charLen;
+  wMode = font->gfxFont ? font->gfxFont->getWMode() : 0;
+  if (wMode) { // vertical writing mode
+    // NB: the rotation value has been incremented by 1 (in
+    // TextPage::beginWord()) for vertical writing mode
+    switch (rot) {
+    case 0:
+      if (len == 0) {
+	xMin = x - fontSize;
+      }
+      edge[len] = x - fontSize;
+      xMax = edge[len+1] = x;
+      break;
+    case 1:
+      if (len == 0) {
+	yMin = y - fontSize;
+      }
+      edge[len] = y - fontSize;
+      yMax = edge[len+1] = y;
+      break;
+    case 2:
+      if (len == 0) {
+	xMax = x + fontSize;
+      }
+      edge[len] = x + fontSize;
+      xMin = edge[len+1] = x;
+      break;
+    case 3:
+      if (len == 0) {
+	yMax = y + fontSize;
+      }
+      edge[len] = y + fontSize;
+      yMin = edge[len+1] = y;
+      break;
     }
-    edge[len] = y;
-    yMin = edge[len+1] = y + dy;
-    break;
+  } else { // horizontal writing mode
+    switch (rot) {
+    case 0:
+      if (len == 0) {
+	xMin = x;
+      }
+      edge[len] = x;
+      xMax = edge[len+1] = x + dx;
+      break;
+    case 1:
+      if (len == 0) {
+	yMin = y;
+      }
+      edge[len] = y;
+      yMax = edge[len+1] = y + dy;
+      break;
+    case 2:
+      if (len == 0) {
+	xMax = x;
+      }
+      edge[len] = x;
+      xMin = edge[len+1] = x + dx;
+      break;
+    case 3:
+      if (len == 0) {
+	yMax = y;
+      }
+      edge[len] = y;
+      yMin = edge[len+1] = y + dy;
+      break;
+   }
   }
   ++len;
 }
@@ -392,15 +464,17 @@ void TextWord::merge(TextWord *word) {
     text = (Unicode *)greallocn(text, size, sizeof(Unicode));
     charcode = (CharCode *)greallocn(charcode, (size + 1), sizeof(CharCode));
     edge = (double *)greallocn(edge, (size + 1), sizeof(double));
+    charPos = (int *)greallocn(charPos, size + 1, sizeof(int));
   }
   for (i = 0; i < word->len; ++i) {
     text[len + i] = word->text[i];
     charcode[len + i] = word->charcode[i];
     edge[len + i] = word->edge[i];
+    charPos[len + i] = word->charPos[i];
   }
   edge[len + word->len] = word->edge[word->len];
+  charPos[len + word->len] = word->charPos[word->len];
   len += word->len;
-  charLen += word->charLen;
 }
 
 inline int TextWord::primaryCmp(TextWord *word) {
@@ -792,7 +866,7 @@ void TextLine::coalesce(UnicodeMap *uMap) {
 		 word0->underlined == word1->underlined &&
 		 fabs(word0->fontSize - word1->fontSize) <
 		   maxWordFontSizeDelta * words->fontSize &&
-		 word1->charPos == word0->charPos + word0->charLen) {
+		 word1->charPos[0] == word0->charPos[word0->len]) {
 	word0->merge(word1);
 	word0->next = word1->next;
 	delete word1;
@@ -2153,12 +2227,18 @@ void TextPage::beginWord(GfxState *state, double x0, double y0) {
     m[3] = m2[3];
   }
   if (fabs(m[0] * m[3]) > fabs(m[1] * m[2])) {
-    rot = (m[3] < 0) ? 0 : 2;
+    rot = (m[0] > 0 || m[3] < 0) ? 0 : 2;
   } else {
     rot = (m[2] > 0) ? 1 : 3;
   }
 
-  curWord = new TextWord(state, rot, x0, y0, charPos, curFont, curFontSize);
+  // for vertical writing mode, the lines are effectively rotated 90
+  // degrees
+  if (state->getFont()->getWMode()) {
+    rot = (rot + 1) & 3;
+  }
+
+  curWord = new TextWord(state, rot, x0, y0, curFont, curFontSize);
 }
 
 void TextPage::addChar(GfxState *state, double x, double y,
@@ -2199,9 +2279,6 @@ void TextPage::addChar(GfxState *state, double x, double y,
 
   // break words at space character
   if (uLen == 1 && u[0] == (Unicode)0x20) {
-    if (curWord) {
-      ++curWord->charLen;
-    }
     charPos += nBytes;
     endWord();
     return;
@@ -2284,24 +2361,21 @@ void TextPage::addChar(GfxState *state, double x, double y,
 	  /* next code is a low surrogate */
 	  Unicode uu = (((u[i] & 0x3ff) << 10) | (u[i+1] & 0x3ff)) + 0x10000;
 	  i++;
-	  curWord->addChar(state, x1 + i*w1, y1 + i*h1, w1, h1, c, uu);
+	  curWord->addChar(state, x1 + i*w1, y1 + i*h1, w1, h1, charPos, nBytes, c, uu);
 	} else {
 	    /* missing low surrogate
 	     replace it with REPLACEMENT CHARACTER (U+FFFD) */
-	  curWord->addChar(state, x1 + i*w1, y1 + i*h1, w1, h1, c, 0xfffd);
+	  curWord->addChar(state, x1 + i*w1, y1 + i*h1, w1, h1, charPos, nBytes, c, 0xfffd);
 	}
       } else if (u[i] >= 0xdc00 && u[i] < 0xe000) {
 	  /* invalid low surrogate
 	   replace it with REPLACEMENT CHARACTER (U+FFFD) */
-	curWord->addChar(state, x1 + i*w1, y1 + i*h1, w1, h1, c, 0xfffd);
+	curWord->addChar(state, x1 + i*w1, y1 + i*h1, w1, h1, charPos, nBytes, c, 0xfffd);
       } else {
-	curWord->addChar(state, x1 + i*w1, y1 + i*h1, w1, h1, c, u[i]);
+	curWord->addChar(state, x1 + i*w1, y1 + i*h1, w1, h1, charPos, nBytes, c, u[i]);
       }
     }
   }
-  if (curWord) {
-    curWord->charLen += nBytes;
-  }
   charPos += nBytes;
 }
 
@@ -4606,16 +4680,14 @@ GBool TextPage::findCharRange(int pos, int length,
     blk = blocks[i];
     for (line = blk->lines; line; line = line->next) {
       for (word = line->words; word; word = word->next) {
-	if (pos < word->charPos + word->charLen &&
-	    word->charPos < pos + length) {
-	  j0 = pos - word->charPos;
-	  if (j0 < 0) {
-	    j0 = 0;
-	  }
-	  j1 = pos + length - 1 - word->charPos;
-	  if (j1 >= word->len) {
-	    j1 = word->len - 1;
-	  }
+	if (pos < word->charPos[word->len] &&
+	    pos + length > word->charPos[0]) {
+	  for (j0 = 0;
+	       j0 < word->len && pos >= word->charPos[j0 + 1];
+	       ++j0) ;
+	  for (j1 = word->len - 1;
+	       j1 > j0 && pos + length <= word->charPos[j1];
+	       --j1) ;
 	  switch (line->rot) {
 	  case 0:
 	    xMin1 = word->edge[j0];
diff --git a/poppler/TextOutputDev.h b/poppler/TextOutputDev.h
index dff3921..2dd78cd 100644
--- a/poppler/TextOutputDev.h
+++ b/poppler/TextOutputDev.h
@@ -114,14 +114,15 @@ public:
 
   // Constructor.
   TextWord(GfxState *state, int rotA, double x0, double y0,
-	   int charPosA, TextFontInfo *fontA, double fontSize);
+	   TextFontInfo *fontA, double fontSize);
 
   // Destructor.
   ~TextWord();
 
   // Add a character to the word.
   void addChar(GfxState *state, double x, double y,
-	       double dx, double dy, CharCode c, Unicode u);
+	       double dx, double dy, int charPosA, int charLen,
+	       CharCode c, Unicode u);
 
   // Merge <word> onto the end of <this>.
   void merge(TextWord *word);
@@ -159,8 +160,8 @@ public:
 		   double *xMaxA, double *yMaxA);
   double getFontSize() { return fontSize; }
   int getRotation() { return rot; }
-  int getCharPos() { return charPos; }
-  int getCharLen() { return charLen; }
+  int getCharPos() { return charPos[0]; }
+  int getCharLen() { return charPos[len] - charPos[0]; }
   GBool getSpaceAfter() { return spaceAfter; }
 #endif
   GBool isUnderlined() { return underlined; }
@@ -180,11 +181,11 @@ private:
   CharCode *charcode;		// glyph indices
   double *edge;			// "near" edge x or y coord of each char
 				//   (plus one extra entry for the last char)
-  int len;			// length of text and edge arrays
-  int size;			// size of text and edge arrays
-  int charPos;                  // character position (within content stream)
-  int charLen;                  // number of content stream characters in
-                                //   this word
+  int *charPos;			// character position (within content stream)
+				//   of each char (plus one extra entry for
+				//   the last char)
+  int len;			// length of text/edge/charPos arrays
+  int size;			// size of text/edge/charPos arrays
   TextFontInfo *font;		// font information
   double fontSize;		// font size
   GBool spaceAfter;		// set if there is a space between this


More information about the poppler mailing list