[poppler] poppler/GfxFont.cc poppler/GfxFont.h
Albert Astals Cid
aacid at kemper.freedesktop.org
Fri Jan 28 11:19:34 PST 2011
poppler/GfxFont.cc | 14 +++++++++++++-
poppler/GfxFont.h | 2 ++
2 files changed, 15 insertions(+), 1 deletion(-)
New commits:
commit e6fb20d7b3bf8ea8aedc1bcd910e035059835b5f
Author: Axel Strübing <axel.struebing at freenet.de>
Date: Fri Jan 28 19:20:15 2011 +0000
Extract text of a pdf correctly
See "[poppler] text extraction does not work" in the mailing list for more info
diff --git a/poppler/GfxFont.cc b/poppler/GfxFont.cc
index 2d7180a..0a165a6 100644
--- a/poppler/GfxFont.cc
+++ b/poppler/GfxFont.cc
@@ -22,6 +22,7 @@
// Copyright (C) 2008, 2010 Hib Eris <hib at hiberis.nl>
// Copyright (C) 2009 Peter Kerzum <kerzum at yandex-team.ru>
// Copyright (C) 2009, 2010 David Benjamin <davidben at mit.edu>
+// Copyright (C) 2011 Axel Strübing <axel.struebing at freenet.de>
//
// To see a description of the changes please see the Changelog file that
// came with your tarball or type make ChangeLog if you are building from git
@@ -176,6 +177,7 @@ GfxFont::GfxFont(char *tagA, Ref idA, GooString *nameA) {
weight = WeightNotDefined;
refCnt = 1;
dfp = NULL;
+ hasToUnicode = gFalse;
}
GfxFont::~GfxFont() {
@@ -436,6 +438,7 @@ CharCodeToUnicode *GfxFont::readToUnicodeCMap(Dict *fontDict, int nBits,
} else {
ctu = CharCodeToUnicode::parseCMap(buf, nBits);
}
+ hasToUnicode = gTrue;
delete buf;
return ctu;
}
@@ -1697,7 +1700,16 @@ int GfxCIDFont::getNextChar(char *s, int len, CharCode *code,
*code = (CharCode)(cid = cMap->getCID(s, len, &n));
if (ctu) {
- *uLen = ctu->mapToUnicode(cid, u);
+ if (hasToUnicode) {
+ int i = 0, c = 0;
+ while (i < n) {
+ c = (c << 8 ) + (s[i] & 0xff);
+ ++i;
+ }
+ *uLen = ctu->mapToUnicode(c, u);
+ } else {
+ *uLen = ctu->mapToUnicode(cid, u);
+ }
} else {
*uLen = 0;
}
diff --git a/poppler/GfxFont.h b/poppler/GfxFont.h
index 55f8692..09af062 100644
--- a/poppler/GfxFont.h
+++ b/poppler/GfxFont.h
@@ -19,6 +19,7 @@
// Copyright (C) 2007 Julien Rebetez <julienr at svn.gnome.org>
// Copyright (C) 2007 Jeff Muizelaar <jeff at infidigm.net>
// Copyright (C) 2007 Koji Otani <sho at bbr.jp>
+// Copyright (C) 2011 Axel Strübing <axel.struebing at freenet.de>
//
// To see a description of the changes please see the Changelog file that
// came with your tarball or type make ChangeLog if you are building from git
@@ -253,6 +254,7 @@ protected:
double descent; // max depth below baseline
int refCnt;
GBool ok;
+ GBool hasToUnicode;
};
//------------------------------------------------------------------------
More information about the poppler
mailing list