[poppler] utils/pdfinfo.cc

Albert Astals Cid aacid at kemper.freedesktop.org
Tue Feb 21 15:06:42 PST 2012


 utils/pdfinfo.cc |   14 ++++++++++++--
 1 file changed, 12 insertions(+), 2 deletions(-)

New commits:
commit fde3bed0f400a50f31f1f6bcee44ac1b2c17ddc6
Author: Albert Astals Cid <aacid at kde.org>
Date:   Wed Feb 22 00:03:37 2012 +0100

    pdfinfo: decode utf-16 surrogate pairs
    
    Based on a patch by Adrian Johnson
    Bug 23075

diff --git a/utils/pdfinfo.cc b/utils/pdfinfo.cc
index e0a6f7a..cdc5375 100644
--- a/utils/pdfinfo.cc
+++ b/utils/pdfinfo.cc
@@ -14,9 +14,10 @@
 // under GPL version 2 or later
 //
 // Copyright (C) 2006 Dom Lachowicz <cinamod at hotmail.com>
-// Copyright (C) 2007-2010 Albert Astals Cid <aacid at kde.org>
+// Copyright (C) 2007-2010, 2012 Albert Astals Cid <aacid at kde.org>
 // Copyright (C) 2010 Hib Eris <hib at hiberis.nl>
 // Copyright (C) 2011 Vittal Aithal <vittal.aithal at cognidox.com>
+// Copyright (C) 2012 Adrian Johnson <ajohnson at redneon.com>
 //
 // To see a description of the changes please see the Changelog file that
 // came with your tarball or type make ChangeLog if you are building from git
@@ -379,7 +380,7 @@ static void printInfoString(Dict *infoDict, const char *key, const char *text,
   Object obj;
   GooString *s1;
   GBool isUnicode;
-  Unicode u;
+  Unicode u, u2;
   char buf[8];
   int i, n;
 
@@ -399,6 +400,15 @@ static void printInfoString(Dict *infoDict, const char *key, const char *text,
 	u = ((s1->getChar(i) & 0xff) << 8) |
 	    (s1->getChar(i+1) & 0xff);
 	i += 2;
+	if (u >= 0xd800 && u <= 0xdbff && i < obj.getString()->getLength()) {
+	  // surrogate pair
+	  u2 = ((s1->getChar(i) & 0xff) << 8) |
+	    (s1->getChar(i+1) & 0xff);
+	  i += 2;
+	  if (u2 >= 0xdc00 && u2 <= 0xdfff) {
+	    u = 0x10000 + ((u - 0xd800) << 10) + (u2 - 0xdc00);
+	  }
+	}
       } else {
 	u = pdfDocEncoding[s1->getChar(i) & 0xff];
 	++i;


More information about the poppler mailing list