[poppler] 2 commits - poppler/Form.cc poppler/Gfx.cc poppler/OutputDev.cc poppler/OutputDev.h poppler/PDFDocEncoding.h poppler/TextOutputDev.cc poppler/TextOutputDev.h

Albert Astals Cid aacid at kemper.freedesktop.org
Sun Dec 9 09:07:45 PST 2007


 poppler/Form.cc          |    2 
 poppler/Gfx.cc           |    2 
 poppler/OutputDev.cc     |    2 
 poppler/OutputDev.h      |    2 
 poppler/PDFDocEncoding.h |    4 +
 poppler/TextOutputDev.cc |   98 ++++++++++++++++++++++++++++++++++++++++++++++-
 poppler/TextOutputDev.h  |   11 +++++
 7 files changed, 116 insertions(+), 5 deletions(-)

New commits:
commit e807f9c72c7f0c5cc0655918f676f4af54739442
Merge: bf57117... e2ea743...
Author: Albert Astals Cid <aacid at kde.org>
Date:   Sun Dec 9 18:07:30 2007 +0100

    Merge branch 'master' of ssh://aacid@git.freedesktop.org/git/poppler/poppler

commit bf57117df8786778faf31e5d843533004f867ff3
Author: Adrian Johnson <ajohnson at redneon.com>
Date:   Sun Dec 9 18:07:00 2007 +0100

    Add support for ActualText entries
    
    Patch by Adrian Johnson with two minor changes by me (one fordward declaration and a leak fix)

diff --git a/poppler/Form.cc b/poppler/Form.cc
index 334e45c..5cb4b87 100644
--- a/poppler/Form.cc
+++ b/poppler/Form.cc
@@ -27,7 +27,7 @@
 #include "Catalog.h"
 
 //return a newly allocated char* containing an UTF16BE string of size length
-static char* pdfDocEncodingToUTF16 (GooString* orig, int* length)
+char* pdfDocEncodingToUTF16 (GooString* orig, int* length)
 {
   //double size, a unicode char takes 2 char, add 2 for the unicode marker
   *length = 2+2*orig->getLength();
diff --git a/poppler/Gfx.cc b/poppler/Gfx.cc
index 163b340..d2b3cb8 100644
--- a/poppler/Gfx.cc
+++ b/poppler/Gfx.cc
@@ -4036,7 +4036,7 @@ void Gfx::opBeginMarkedContent(Object args[], int numArgs) {
 }
 
 void Gfx::opEndMarkedContent(Object args[], int numArgs) {
-  out->endMarkedContent();
+  out->endMarkedContent(state);
 }
 
 void Gfx::opMarkPoint(Object args[], int numArgs) {
diff --git a/poppler/OutputDev.cc b/poppler/OutputDev.cc
index 59184a9..dedffd3 100644
--- a/poppler/OutputDev.cc
+++ b/poppler/OutputDev.cc
@@ -123,7 +123,7 @@ void OutputDev::drawSoftMaskedImage(GfxState *state, Object *ref, Stream *str,
   drawImage(state, ref, str, width, height, colorMap, NULL, gFalse);
 }
 
-void OutputDev::endMarkedContent() {
+void OutputDev::endMarkedContent(GfxState *state) {
 }
 
 void OutputDev::beginMarkedContent(char *name) {
diff --git a/poppler/OutputDev.h b/poppler/OutputDev.h
index 1e92b16..af042c6 100644
--- a/poppler/OutputDev.h
+++ b/poppler/OutputDev.h
@@ -210,7 +210,7 @@ public:
 
   //----- grouping operators
 
-  virtual void endMarkedContent();
+  virtual void endMarkedContent(GfxState *state);
   virtual void beginMarkedContent(char *name);
   virtual void beginMarkedContent(char *name, Dict *properties);
   virtual void markPoint(char *name);
diff --git a/poppler/PDFDocEncoding.h b/poppler/PDFDocEncoding.h
index 3259d3e..7e5f17d 100644
--- a/poppler/PDFDocEncoding.h
+++ b/poppler/PDFDocEncoding.h
@@ -11,6 +11,10 @@
 
 #include "CharTypes.h"
 
+class GooString;
+
 extern Unicode pdfDocEncoding[256];
 
+char* pdfDocEncodingToUTF16 (GooString* orig, int* length);
+
 #endif
diff --git a/poppler/TextOutputDev.cc b/poppler/TextOutputDev.cc
index e2aaa43..75a0ac0 100644
--- a/poppler/TextOutputDev.cc
+++ b/poppler/TextOutputDev.cc
@@ -32,6 +32,7 @@
 #include "Link.h"
 #include "TextOutputDev.h"
 #include "Page.h"
+#include "PDFDocEncoding.h"
 
 #ifdef MACOS
 // needed for setting type/creator of MacOS files
@@ -4484,6 +4485,7 @@ TextOutputDev::TextOutputDev(char *fileName, GBool physLayoutA,
 
   // set up text object
   text = new TextPage(rawOrderA);
+  actualTextBMCLevel = 0;
 }
 
 TextOutputDev::TextOutputDev(TextOutputFunc func, void *stream,
@@ -4496,6 +4498,7 @@ TextOutputDev::TextOutputDev(TextOutputFunc func, void *stream,
   doHTML = gFalse;
   text = new TextPage(rawOrderA);
   ok = gTrue;
+  actualTextBMCLevel = 0;
 }
 
 TextOutputDev::~TextOutputDev() {
@@ -4536,7 +4539,100 @@ void TextOutputDev::drawChar(GfxState *state, double x, double y,
 			     double dx, double dy,
 			     double originX, double originY,
 			     CharCode c, int nBytes, Unicode *u, int uLen) {
-  text->addChar(state, x, y, dx, dy, c, nBytes, u, uLen);
+  if (actualTextBMCLevel == 0) {
+    text->addChar(state, x, y, dx, dy, c, nBytes, u, uLen);
+  } else {
+    // Inside ActualText span.
+    if (newActualTextSpan) {
+      actualText_x = x;
+      actualText_y = y;
+      actualText_dx = dx;
+      actualText_dy = dy;
+      newActualTextSpan = gFalse;
+    } else {
+      if (x < actualText_x)
+	actualText_x = x;
+      if (y < actualText_y)
+	actualText_y = y;
+      if (x + dx > actualText_x + actualText_dx)
+	actualText_dx = x + dx - actualText_x;
+      if (y + dy > actualText_y + actualText_dy)
+	actualText_dy = y + dy - actualText_y;
+    }
+  }
+}
+
+void TextOutputDev::beginMarkedContent(char *name, Dict *properties)
+{
+  Object obj;
+
+  if (actualTextBMCLevel > 0) {
+    // Already inside a ActualText span.
+    actualTextBMCLevel++;
+    return;
+  }
+
+  if (properties->lookup("ActualText", &obj)) {
+    if (obj.isString()) {
+      actualText = obj.getString();
+      actualTextBMCLevel = 1;
+      newActualTextSpan = gTrue;
+    }
+  }
+}
+
+void TextOutputDev::endMarkedContent(GfxState *state)
+{
+  char *uniString = NULL;
+  Unicode *uni;
+  int length, i;
+
+  if (actualTextBMCLevel > 0) {
+    actualTextBMCLevel--;
+    if (actualTextBMCLevel == 0) {
+      // ActualText span closed. Output the span text and the
+      // extents of all the glyphs inside the span
+
+      if (newActualTextSpan) {
+	// No content inside span.
+	actualText_x = state->getCurX();
+	actualText_y = state->getCurY();
+	actualText_dx = 0;
+	actualText_dy = 0;
+      }
+
+      if (!actualText->hasUnicodeMarker()) {
+	if (actualText->getLength() > 0) {
+	  //non-unicode string -- assume pdfDocEncoding and
+	  //try to convert to UTF16BE
+	  uniString = pdfDocEncodingToUTF16(actualText, &length);
+	} else {
+	  length = 0;
+	}
+      } else {
+	uniString = actualText->getCString();
+	length = actualText->getLength();
+      }
+
+      if (length < 2)
+	length = 0;
+      else
+	length = length/2 - 1;
+      uni = new Unicode[length];
+      for (i = 0 ; i < length; i++)
+	uni[i] = (uniString[2 + i*2]<<8) + uniString[2 + i*2+1];
+
+      text->addChar(state,
+		    actualText_x, actualText_y,
+		    actualText_dx, actualText_dy,
+		    0, 1, uni, length);
+
+      delete [] uni;
+      if (!actualText->hasUnicodeMarker())
+	delete [] uniString;
+      delete actualText;
+    }
+  }
 }
 
 void TextOutputDev::stroke(GfxState *state) {
diff --git a/poppler/TextOutputDev.h b/poppler/TextOutputDev.h
index 2808a9d..db40a44 100644
--- a/poppler/TextOutputDev.h
+++ b/poppler/TextOutputDev.h
@@ -651,6 +651,10 @@ public:
 			double originX, double originY,
 			CharCode c, int nBytes, Unicode *u, int uLen);
 
+  //----- grouping operators
+  virtual void beginMarkedContent(char *name, Dict *properties);
+  virtual void endMarkedContent(GfxState *state);
+
   //----- path painting
   virtual void stroke(GfxState *state);
   virtual void fill(GfxState *state);
@@ -725,6 +729,13 @@ private:
   GBool rawOrder;		// keep text in content stream order
   GBool doHTML;			// extra processing for HTML conversion
   GBool ok;			// set up ok?
+
+  int actualTextBMCLevel;       // > 0 when inside ActualText span. Incremented
+                                // for each nested BMC inside the span.
+  GooString *actualText;        // replacement text for the span
+  GBool newActualTextSpan;      // true at start of span. used to init the extent
+  double actualText_x, actualText_y; // extent of the text inside the span
+  double actualText_dx, actualText_dy;
 };
 
 #endif


More information about the poppler mailing list