[poppler] 2 commits - poppler/Form.cc poppler/Gfx.cc poppler/OutputDev.cc poppler/OutputDev.h poppler/PDFDocEncoding.h poppler/TextOutputDev.cc poppler/TextOutputDev.h
Albert Astals Cid
aacid at kemper.freedesktop.org
Sun Dec 9 09:07:45 PST 2007
poppler/Form.cc | 2
poppler/Gfx.cc | 2
poppler/OutputDev.cc | 2
poppler/OutputDev.h | 2
poppler/PDFDocEncoding.h | 4 +
poppler/TextOutputDev.cc | 98 ++++++++++++++++++++++++++++++++++++++++++++++-
poppler/TextOutputDev.h | 11 +++++
7 files changed, 116 insertions(+), 5 deletions(-)
New commits:
commit e807f9c72c7f0c5cc0655918f676f4af54739442
Merge: bf57117... e2ea743...
Author: Albert Astals Cid <aacid at kde.org>
Date: Sun Dec 9 18:07:30 2007 +0100
Merge branch 'master' of ssh://aacid@git.freedesktop.org/git/poppler/poppler
commit bf57117df8786778faf31e5d843533004f867ff3
Author: Adrian Johnson <ajohnson at redneon.com>
Date: Sun Dec 9 18:07:00 2007 +0100
Add support for ActualText entries
Patch by Adrian Johnson with two minor changes by me (one fordward declaration and a leak fix)
diff --git a/poppler/Form.cc b/poppler/Form.cc
index 334e45c..5cb4b87 100644
--- a/poppler/Form.cc
+++ b/poppler/Form.cc
@@ -27,7 +27,7 @@
#include "Catalog.h"
//return a newly allocated char* containing an UTF16BE string of size length
-static char* pdfDocEncodingToUTF16 (GooString* orig, int* length)
+char* pdfDocEncodingToUTF16 (GooString* orig, int* length)
{
//double size, a unicode char takes 2 char, add 2 for the unicode marker
*length = 2+2*orig->getLength();
diff --git a/poppler/Gfx.cc b/poppler/Gfx.cc
index 163b340..d2b3cb8 100644
--- a/poppler/Gfx.cc
+++ b/poppler/Gfx.cc
@@ -4036,7 +4036,7 @@ void Gfx::opBeginMarkedContent(Object args[], int numArgs) {
}
void Gfx::opEndMarkedContent(Object args[], int numArgs) {
- out->endMarkedContent();
+ out->endMarkedContent(state);
}
void Gfx::opMarkPoint(Object args[], int numArgs) {
diff --git a/poppler/OutputDev.cc b/poppler/OutputDev.cc
index 59184a9..dedffd3 100644
--- a/poppler/OutputDev.cc
+++ b/poppler/OutputDev.cc
@@ -123,7 +123,7 @@ void OutputDev::drawSoftMaskedImage(GfxState *state, Object *ref, Stream *str,
drawImage(state, ref, str, width, height, colorMap, NULL, gFalse);
}
-void OutputDev::endMarkedContent() {
+void OutputDev::endMarkedContent(GfxState *state) {
}
void OutputDev::beginMarkedContent(char *name) {
diff --git a/poppler/OutputDev.h b/poppler/OutputDev.h
index 1e92b16..af042c6 100644
--- a/poppler/OutputDev.h
+++ b/poppler/OutputDev.h
@@ -210,7 +210,7 @@ public:
//----- grouping operators
- virtual void endMarkedContent();
+ virtual void endMarkedContent(GfxState *state);
virtual void beginMarkedContent(char *name);
virtual void beginMarkedContent(char *name, Dict *properties);
virtual void markPoint(char *name);
diff --git a/poppler/PDFDocEncoding.h b/poppler/PDFDocEncoding.h
index 3259d3e..7e5f17d 100644
--- a/poppler/PDFDocEncoding.h
+++ b/poppler/PDFDocEncoding.h
@@ -11,6 +11,10 @@
#include "CharTypes.h"
+class GooString;
+
extern Unicode pdfDocEncoding[256];
+char* pdfDocEncodingToUTF16 (GooString* orig, int* length);
+
#endif
diff --git a/poppler/TextOutputDev.cc b/poppler/TextOutputDev.cc
index e2aaa43..75a0ac0 100644
--- a/poppler/TextOutputDev.cc
+++ b/poppler/TextOutputDev.cc
@@ -32,6 +32,7 @@
#include "Link.h"
#include "TextOutputDev.h"
#include "Page.h"
+#include "PDFDocEncoding.h"
#ifdef MACOS
// needed for setting type/creator of MacOS files
@@ -4484,6 +4485,7 @@ TextOutputDev::TextOutputDev(char *fileName, GBool physLayoutA,
// set up text object
text = new TextPage(rawOrderA);
+ actualTextBMCLevel = 0;
}
TextOutputDev::TextOutputDev(TextOutputFunc func, void *stream,
@@ -4496,6 +4498,7 @@ TextOutputDev::TextOutputDev(TextOutputFunc func, void *stream,
doHTML = gFalse;
text = new TextPage(rawOrderA);
ok = gTrue;
+ actualTextBMCLevel = 0;
}
TextOutputDev::~TextOutputDev() {
@@ -4536,7 +4539,100 @@ void TextOutputDev::drawChar(GfxState *state, double x, double y,
double dx, double dy,
double originX, double originY,
CharCode c, int nBytes, Unicode *u, int uLen) {
- text->addChar(state, x, y, dx, dy, c, nBytes, u, uLen);
+ if (actualTextBMCLevel == 0) {
+ text->addChar(state, x, y, dx, dy, c, nBytes, u, uLen);
+ } else {
+ // Inside ActualText span.
+ if (newActualTextSpan) {
+ actualText_x = x;
+ actualText_y = y;
+ actualText_dx = dx;
+ actualText_dy = dy;
+ newActualTextSpan = gFalse;
+ } else {
+ if (x < actualText_x)
+ actualText_x = x;
+ if (y < actualText_y)
+ actualText_y = y;
+ if (x + dx > actualText_x + actualText_dx)
+ actualText_dx = x + dx - actualText_x;
+ if (y + dy > actualText_y + actualText_dy)
+ actualText_dy = y + dy - actualText_y;
+ }
+ }
+}
+
+void TextOutputDev::beginMarkedContent(char *name, Dict *properties)
+{
+ Object obj;
+
+ if (actualTextBMCLevel > 0) {
+ // Already inside a ActualText span.
+ actualTextBMCLevel++;
+ return;
+ }
+
+ if (properties->lookup("ActualText", &obj)) {
+ if (obj.isString()) {
+ actualText = obj.getString();
+ actualTextBMCLevel = 1;
+ newActualTextSpan = gTrue;
+ }
+ }
+}
+
+void TextOutputDev::endMarkedContent(GfxState *state)
+{
+ char *uniString = NULL;
+ Unicode *uni;
+ int length, i;
+
+ if (actualTextBMCLevel > 0) {
+ actualTextBMCLevel--;
+ if (actualTextBMCLevel == 0) {
+ // ActualText span closed. Output the span text and the
+ // extents of all the glyphs inside the span
+
+ if (newActualTextSpan) {
+ // No content inside span.
+ actualText_x = state->getCurX();
+ actualText_y = state->getCurY();
+ actualText_dx = 0;
+ actualText_dy = 0;
+ }
+
+ if (!actualText->hasUnicodeMarker()) {
+ if (actualText->getLength() > 0) {
+ //non-unicode string -- assume pdfDocEncoding and
+ //try to convert to UTF16BE
+ uniString = pdfDocEncodingToUTF16(actualText, &length);
+ } else {
+ length = 0;
+ }
+ } else {
+ uniString = actualText->getCString();
+ length = actualText->getLength();
+ }
+
+ if (length < 2)
+ length = 0;
+ else
+ length = length/2 - 1;
+ uni = new Unicode[length];
+ for (i = 0 ; i < length; i++)
+ uni[i] = (uniString[2 + i*2]<<8) + uniString[2 + i*2+1];
+
+ text->addChar(state,
+ actualText_x, actualText_y,
+ actualText_dx, actualText_dy,
+ 0, 1, uni, length);
+
+ delete [] uni;
+ if (!actualText->hasUnicodeMarker())
+ delete [] uniString;
+ delete actualText;
+ }
+ }
}
void TextOutputDev::stroke(GfxState *state) {
diff --git a/poppler/TextOutputDev.h b/poppler/TextOutputDev.h
index 2808a9d..db40a44 100644
--- a/poppler/TextOutputDev.h
+++ b/poppler/TextOutputDev.h
@@ -651,6 +651,10 @@ public:
double originX, double originY,
CharCode c, int nBytes, Unicode *u, int uLen);
+ //----- grouping operators
+ virtual void beginMarkedContent(char *name, Dict *properties);
+ virtual void endMarkedContent(GfxState *state);
+
//----- path painting
virtual void stroke(GfxState *state);
virtual void fill(GfxState *state);
@@ -725,6 +729,13 @@ private:
GBool rawOrder; // keep text in content stream order
GBool doHTML; // extra processing for HTML conversion
GBool ok; // set up ok?
+
+ int actualTextBMCLevel; // > 0 when inside ActualText span. Incremented
+ // for each nested BMC inside the span.
+ GooString *actualText; // replacement text for the span
+ GBool newActualTextSpan; // true at start of span. used to init the extent
+ double actualText_x, actualText_y; // extent of the text inside the span
+ double actualText_dx, actualText_dy;
};
#endif
More information about the poppler
mailing list