[poppler] Branch 'poppler-0.16' - 2 commits - NEWS poppler/GfxFont.cc poppler/GfxFont.h

Albert Astals Cid aacid at kemper.freedesktop.org
Fri Jan 28 11:19:53 PST 2011


 NEWS               |    1 +
 poppler/GfxFont.cc |   14 +++++++++++++-
 poppler/GfxFont.h  |    2 ++
 3 files changed, 16 insertions(+), 1 deletion(-)

New commits:
commit 301a07c4c303e16c537090563cefb7d6f867977b
Author: Axel Strübing <axel.struebing at freenet.de>
Date:   Fri Jan 28 19:20:15 2011 +0000

    Extract text of a pdf correctly
    
    See "[poppler] text extraction does not work" in the mailing list for more info

diff --git a/poppler/GfxFont.cc b/poppler/GfxFont.cc
index 2d7180a..0a165a6 100644
--- a/poppler/GfxFont.cc
+++ b/poppler/GfxFont.cc
@@ -22,6 +22,7 @@
 // Copyright (C) 2008, 2010 Hib Eris <hib at hiberis.nl>
 // Copyright (C) 2009 Peter Kerzum <kerzum at yandex-team.ru>
 // Copyright (C) 2009, 2010 David Benjamin <davidben at mit.edu>
+// Copyright (C) 2011 Axel Strübing <axel.struebing at freenet.de>
 //
 // To see a description of the changes please see the Changelog file that
 // came with your tarball or type make ChangeLog if you are building from git
@@ -176,6 +177,7 @@ GfxFont::GfxFont(char *tagA, Ref idA, GooString *nameA) {
   weight = WeightNotDefined;
   refCnt = 1;
   dfp = NULL;
+  hasToUnicode = gFalse;
 }
 
 GfxFont::~GfxFont() {
@@ -436,6 +438,7 @@ CharCodeToUnicode *GfxFont::readToUnicodeCMap(Dict *fontDict, int nBits,
   } else {
     ctu = CharCodeToUnicode::parseCMap(buf, nBits);
   }
+  hasToUnicode = gTrue;
   delete buf;
   return ctu;
 }
@@ -1697,7 +1700,16 @@ int GfxCIDFont::getNextChar(char *s, int len, CharCode *code,
 
   *code = (CharCode)(cid = cMap->getCID(s, len, &n));
   if (ctu) {
-    *uLen = ctu->mapToUnicode(cid, u);
+    if (hasToUnicode) {
+      int i = 0, c = 0;
+      while (i < n) {
+	c = (c << 8 ) + (s[i] & 0xff);
+	++i;
+      }
+      *uLen = ctu->mapToUnicode(c, u);
+    } else {
+      *uLen = ctu->mapToUnicode(cid, u);
+    }
   } else {
     *uLen = 0;
   }
diff --git a/poppler/GfxFont.h b/poppler/GfxFont.h
index 55f8692..09af062 100644
--- a/poppler/GfxFont.h
+++ b/poppler/GfxFont.h
@@ -19,6 +19,7 @@
 // Copyright (C) 2007 Julien Rebetez <julienr at svn.gnome.org>
 // Copyright (C) 2007 Jeff Muizelaar <jeff at infidigm.net>
 // Copyright (C) 2007 Koji Otani <sho at bbr.jp>
+// Copyright (C) 2011 Axel Strübing <axel.struebing at freenet.de>
 //
 // To see a description of the changes please see the Changelog file that
 // came with your tarball or type make ChangeLog if you are building from git
@@ -253,6 +254,7 @@ protected:
   double descent;		// max depth below baseline
   int refCnt;
   GBool ok;
+  GBool hasToUnicode;
 };
 
 //------------------------------------------------------------------------
commit f424374611d68f45f9bd467a395f65db1606597d
Author: Albert Astals Cid <aacid at kde.org>
Date:   Thu Jan 27 20:01:01 2011 +0000

    forgot this is aprt of 0.16.1 too
    
    Let's see if i can retag...

diff --git a/NEWS b/NEWS
index 6d3132b..2e3e46c 100644
--- a/NEWS
+++ b/NEWS
@@ -8,6 +8,7 @@ Release 0.16.1
 	 * Fix line selection, dont check y for Line selections
 	 * Include zlib header in PNGWriter.cc
 	 * Fix leak in Splash backend when doing axial shaded fills
+	 * Fix label to index conversion on multiple prefixes
 
 	glib:
 	 * Use NULL instead of FALSE for functions returning a pointer


More information about the poppler mailing list