[poppler] poppler/CharCodeToUnicode.cc poppler/UTF.cc poppler/UTF.h
Adrian Johnson
ajohnson at kemper.freedesktop.org
Fri Nov 2 17:17:05 PDT 2012
poppler/CharCodeToUnicode.cc | 11 +++++++++--
poppler/UTF.cc | 11 +++++++++++
poppler/UTF.h | 4 ++++
3 files changed, 24 insertions(+), 2 deletions(-)
New commits:
commit da08ebeee241198907378c6461721fddb5106875
Author: Adrian Johnson <ajohnson at redneon.com>
Date: Sun Oct 28 12:13:22 2012 +1030
Don't allow invalid unicode to be passed to backends
diff --git a/poppler/CharCodeToUnicode.cc b/poppler/CharCodeToUnicode.cc
index ed59395..16ba6d4 100644
--- a/poppler/CharCodeToUnicode.cc
+++ b/poppler/CharCodeToUnicode.cc
@@ -439,7 +439,7 @@ void CharCodeToUnicode::addMapping(CharCode code, char *uStr, int n,
for (i = oldLen; i < mapLen; ++i) {
map[i] = 0;
}
- }
+ }
}
if (n <= 4) {
if (!parseHex(uStr, n, &u)) {
@@ -447,6 +447,9 @@ void CharCodeToUnicode::addMapping(CharCode code, char *uStr, int n,
return;
}
map[code] = u + offset;
+ if (!UnicodeIsValid(map[code])) {
+ map[code] = 0xfffd;
+ }
} else {
if (sMapLen >= sMapSize) {
sMapSize = sMapSize + 16;
@@ -595,7 +598,11 @@ void CharCodeToUnicode::setMapping(CharCode c, Unicode *u, int len) {
sMap[i].len = len;
sMap[i].u = (Unicode*)gmallocn(len, sizeof(Unicode));
for (j = 0; j < len; ++j) {
- sMap[i].u[j] = u[j];
+ if (UnicodeIsValid(u[j])) {
+ sMap[i].u[j] = u[j];
+ } else {
+ sMap[i].u[j] = 0xfffd;
+ }
}
}
}
diff --git a/poppler/UTF.cc b/poppler/UTF.cc
index 675ac68..42c7836 100644
--- a/poppler/UTF.cc
+++ b/poppler/UTF.cc
@@ -26,6 +26,14 @@
#include "PDFDocEncoding.h"
#include "UTF.h"
+bool UnicodeIsValid(Unicode ucs4)
+{
+ return (ucs4 < 0x110000) &&
+ ((ucs4 & 0xfffff800) != 0xd800) &&
+ (ucs4 < 0xfdd0 || ucs4 > 0xfdef) &&
+ ((ucs4 & 0xfffe) != 0xfffe);
+}
+
int UTF16toUCS4(const Unicode *utf16, int utf16Len, Unicode **ucs4)
{
int i, n, len;
@@ -64,6 +72,9 @@ int UTF16toUCS4(const Unicode *utf16, int utf16Len, Unicode **ucs4)
} else {
u[n] = utf16[i];
}
+ if (!UnicodeIsValid(u[n])) {
+ u[n] = 0xfffd;
+ }
n++;
}
*ucs4 = u;
diff --git a/poppler/UTF.h b/poppler/UTF.h
index 1111c37..248c168 100644
--- a/poppler/UTF.h
+++ b/poppler/UTF.h
@@ -32,4 +32,8 @@ int UTF16toUCS4(const Unicode *utf16, int utf16_len, Unicode **ucs4_out);
// returns number of UCS-4 characters
int TextStringToUCS4(GooString *textStr, Unicode **ucs4);
+// check if UCS-4 character is valid
+bool UnicodeIsValid(Unicode ucs4);
+
+
#endif
More information about the poppler
mailing list