[poppler] poppler/CharCodeToUnicode.cc poppler/UTF.cc poppler/UTF.h

Adrian Johnson ajohnson at kemper.freedesktop.org
Fri Nov 2 17:17:05 PDT 2012


 poppler/CharCodeToUnicode.cc |   11 +++++++++--
 poppler/UTF.cc               |   11 +++++++++++
 poppler/UTF.h                |    4 ++++
 3 files changed, 24 insertions(+), 2 deletions(-)

New commits:
commit da08ebeee241198907378c6461721fddb5106875
Author: Adrian Johnson <ajohnson at redneon.com>
Date:   Sun Oct 28 12:13:22 2012 +1030

    Don't allow invalid unicode to be passed to backends

diff --git a/poppler/CharCodeToUnicode.cc b/poppler/CharCodeToUnicode.cc
index ed59395..16ba6d4 100644
--- a/poppler/CharCodeToUnicode.cc
+++ b/poppler/CharCodeToUnicode.cc
@@ -439,7 +439,7 @@ void CharCodeToUnicode::addMapping(CharCode code, char *uStr, int n,
       for (i = oldLen; i < mapLen; ++i) {
         map[i] = 0;
       }
-	}
+    }
   }
   if (n <= 4) {
     if (!parseHex(uStr, n, &u)) {
@@ -447,6 +447,9 @@ void CharCodeToUnicode::addMapping(CharCode code, char *uStr, int n,
       return;
     }
     map[code] = u + offset;
+    if (!UnicodeIsValid(map[code])) {
+      map[code] = 0xfffd;
+    }
   } else {
     if (sMapLen >= sMapSize) {
       sMapSize = sMapSize + 16;
@@ -595,7 +598,11 @@ void CharCodeToUnicode::setMapping(CharCode c, Unicode *u, int len) {
     sMap[i].len = len;
     sMap[i].u = (Unicode*)gmallocn(len, sizeof(Unicode));
     for (j = 0; j < len; ++j) {
-      sMap[i].u[j] = u[j];
+      if (UnicodeIsValid(u[j])) {
+        sMap[i].u[j] = u[j];
+      } else {
+        sMap[i].u[j] = 0xfffd;
+      }
     }
   }
 }
diff --git a/poppler/UTF.cc b/poppler/UTF.cc
index 675ac68..42c7836 100644
--- a/poppler/UTF.cc
+++ b/poppler/UTF.cc
@@ -26,6 +26,14 @@
 #include "PDFDocEncoding.h"
 #include "UTF.h"
 
+bool UnicodeIsValid(Unicode ucs4)
+{
+  return (ucs4 < 0x110000) &&
+    ((ucs4 & 0xfffff800) != 0xd800) &&
+    (ucs4 < 0xfdd0 || ucs4 > 0xfdef) &&
+    ((ucs4 & 0xfffe) != 0xfffe);
+}
+
 int UTF16toUCS4(const Unicode *utf16, int utf16Len, Unicode **ucs4)
 {
   int i, n, len;
@@ -64,6 +72,9 @@ int UTF16toUCS4(const Unicode *utf16, int utf16Len, Unicode **ucs4)
     } else {
       u[n] = utf16[i];
     }
+    if (!UnicodeIsValid(u[n])) {
+      u[n] = 0xfffd;
+    }
     n++;
   }
   *ucs4 = u;
diff --git a/poppler/UTF.h b/poppler/UTF.h
index 1111c37..248c168 100644
--- a/poppler/UTF.h
+++ b/poppler/UTF.h
@@ -32,4 +32,8 @@ int UTF16toUCS4(const Unicode *utf16, int utf16_len, Unicode **ucs4_out);
 //   returns number of UCS-4 characters
 int TextStringToUCS4(GooString *textStr, Unicode **ucs4);
 
+// check if UCS-4 character is valid
+bool UnicodeIsValid(Unicode ucs4);
+
+
 #endif


More information about the poppler mailing list