[poppler] utils/pdfinfo.cc
Albert Astals Cid
aacid at kemper.freedesktop.org
Tue Feb 21 15:06:42 PST 2012
utils/pdfinfo.cc | 14 ++++++++++++--
1 file changed, 12 insertions(+), 2 deletions(-)
New commits:
commit fde3bed0f400a50f31f1f6bcee44ac1b2c17ddc6
Author: Albert Astals Cid <aacid at kde.org>
Date: Wed Feb 22 00:03:37 2012 +0100
pdfinfo: decode utf-16 surrogate pairs
Based on a patch by Adrian Johnson
Bug 23075
diff --git a/utils/pdfinfo.cc b/utils/pdfinfo.cc
index e0a6f7a..cdc5375 100644
--- a/utils/pdfinfo.cc
+++ b/utils/pdfinfo.cc
@@ -14,9 +14,10 @@
// under GPL version 2 or later
//
// Copyright (C) 2006 Dom Lachowicz <cinamod at hotmail.com>
-// Copyright (C) 2007-2010 Albert Astals Cid <aacid at kde.org>
+// Copyright (C) 2007-2010, 2012 Albert Astals Cid <aacid at kde.org>
// Copyright (C) 2010 Hib Eris <hib at hiberis.nl>
// Copyright (C) 2011 Vittal Aithal <vittal.aithal at cognidox.com>
+// Copyright (C) 2012 Adrian Johnson <ajohnson at redneon.com>
//
// To see a description of the changes please see the Changelog file that
// came with your tarball or type make ChangeLog if you are building from git
@@ -379,7 +380,7 @@ static void printInfoString(Dict *infoDict, const char *key, const char *text,
Object obj;
GooString *s1;
GBool isUnicode;
- Unicode u;
+ Unicode u, u2;
char buf[8];
int i, n;
@@ -399,6 +400,15 @@ static void printInfoString(Dict *infoDict, const char *key, const char *text,
u = ((s1->getChar(i) & 0xff) << 8) |
(s1->getChar(i+1) & 0xff);
i += 2;
+ if (u >= 0xd800 && u <= 0xdbff && i < obj.getString()->getLength()) {
+ // surrogate pair
+ u2 = ((s1->getChar(i) & 0xff) << 8) |
+ (s1->getChar(i+1) & 0xff);
+ i += 2;
+ if (u2 >= 0xdc00 && u2 <= 0xdfff) {
+ u = 0x10000 + ((u - 0xd800) << 10) + (u2 - 0xdc00);
+ }
+ }
} else {
u = pdfDocEncoding[s1->getChar(i) & 0xff];
++i;
More information about the poppler
mailing list