[poppler] poppler/GfxFont.cc poppler/GlobalParams.cc

Albert Astals Cid aacid at kemper.freedesktop.org
Tue Mar 11 16:35:26 PDT 2014


 poppler/GfxFont.cc      |  139 ++++++++++++++++++++++++++++++++++--------------
 poppler/GlobalParams.cc |    4 -
 2 files changed, 102 insertions(+), 41 deletions(-)

New commits:
commit 1ea2eb412d12d97eaf49d1e51d7fda7abd8fbf9d
Author: Jason Crain <jason at aquaticape.us>
Date:   Wed Mar 12 00:34:13 2014 +0100

    Limit numeric parsing of character names
    
    Bug #38456

diff --git a/poppler/GfxFont.cc b/poppler/GfxFont.cc
index c54181b..9eadcc7 100644
--- a/poppler/GfxFont.cc
+++ b/poppler/GfxFont.cc
@@ -30,7 +30,7 @@
 // Copyright (C) 2012 Yi Yang <ahyangyi at gmail.com>
 // Copyright (C) 2012 Suzuki Toshiya <mpsuzuki at hiroshima-u.ac.jp>
 // Copyright (C) 2012 Thomas Freitag <Thomas.Freitag at alfa.de>
-// Copyright (C) 2013 Jason Crain <jason at aquaticape.us>
+// Copyright (C) 2013, 2014 Jason Crain <jason at aquaticape.us>
 //
 // To see a description of the changes please see the Changelog file that
 // came with your tarball or type make ChangeLog if you are building from git
@@ -916,6 +916,88 @@ char *GfxFont::readEmbFontFile(XRef *xref, int *len) {
 // Gfx8BitFont
 //------------------------------------------------------------------------
 
+// Parse character names of the form 'Axx', 'xx', 'Ann', 'ABnn', or
+// 'nn', where 'A' and 'B' are any letters, 'xx' is two hex digits,
+// and 'nn' is decimal digits.
+static GBool parseNumericName(char *s, GBool hex, unsigned int *u) {
+  char *endptr;
+
+  // Strip leading alpha characters.
+  if (hex) {
+    int n = 0;
+
+    // Get string length while ignoring junk at end.
+    while (isalnum(s[n]))
+      ++n;
+
+    // Only 2 hex characters with optional leading alpha is allowed.
+    if (n == 3 && isalpha(*s)) {
+      ++s;
+    } else if (n != 2) {
+      return gFalse;
+    }
+  } else {
+    // Strip up to two alpha characters.
+    for (int i = 0; i < 2 && isalpha(*s); ++i)
+      ++s;
+  }
+
+  int v = strtol(s, &endptr, hex ? 16 : 10);
+
+  if (endptr == s)
+    return gFalse;
+
+  // Skip trailing junk characters.
+  while (*endptr != '\0' && !isalnum(*endptr))
+    ++endptr;
+
+  if (*endptr == '\0') {
+    if (u)
+      *u = v;
+    return gTrue;
+  }
+  return gFalse;
+}
+
+// Returns gTrue if the font has character names like xx or Axx which
+// should be parsed for hex or decimal values.
+static GBool testForNumericNames(Dict *fontDict, GBool hex) {
+  Object enc, diff, obj;
+  GBool numeric = gTrue;
+
+  fontDict->lookup("Encoding", &enc);
+  if (!enc.isDict()) {
+    enc.free();
+    return gFalse;
+  }
+
+  enc.dictLookup("Differences", &diff);
+  enc.free();
+  if (!diff.isArray()) {
+    diff.free();
+    return gFalse;
+  }
+
+  for (int i = 0; i < diff.arrayGetLength() && numeric; ++i) {
+    diff.arrayGet(i, &obj);
+    if (obj.isInt()) {
+      // All sequences must start between character codes 0 and 5.
+      if (obj.getInt() > 5)
+	numeric = gFalse;
+    } else if (obj.isName()) {
+      // All character names must sucessfully parse.
+      if (!parseNumericName(obj.getName(), hex, NULL))
+	numeric = gFalse;
+    } else {
+      numeric = gFalse;
+    }
+    obj.free();
+  }
+
+  diff.free();
+  return numeric;
+}
+
 Gfx8BitFont::Gfx8BitFont(XRef *xref, const char *tagA, Ref idA, GooString *nameA,
 			 GfxFontType typeA, Ref embFontIDA, Dict *fontDict):
   GfxFont(tagA, idA, nameA, typeA, embFontIDA) {
@@ -930,6 +1012,7 @@ Gfx8BitFont::Gfx8BitFont(XRef *xref, const char *tagA, Ref idA, GooString *nameA
   int code;
   char *charName;
   GBool missing, hex;
+  GBool numeric;
   Unicode toUnicode[256];
   CharCodeToUnicode *utu, *ctu2;
   Unicode uBuf[8];
@@ -1244,9 +1327,9 @@ Gfx8BitFont::Gfx8BitFont(XRef *xref, const char *tagA, Ref idA, GooString *nameA
 	      (charName[2] >= 'A' && charName[2] <= 'F'))) ||
 	    (strlen(charName) == 2 &&
 	     isxdigit(charName[0]) && isxdigit(charName[1]) &&
-	     ((charName[0] >= 'a' && charName[0] <= 'f') ||
-	      (charName[0] >= 'A' && charName[0] <= 'F') ||
-	      (charName[1] >= 'a' && charName[1] <= 'f') ||
+	     // Only check idx 1 to avoid misidentifying a decimal
+	     // number like a0
+	     ((charName[1] >= 'a' && charName[1] <= 'f') ||
 	      (charName[1] >= 'A' && charName[1] <= 'F')))) {
 	  hex = gTrue;
 	}
@@ -1257,6 +1340,8 @@ Gfx8BitFont::Gfx8BitFont(XRef *xref, const char *tagA, Ref idA, GooString *nameA
     }
   }
 
+  numeric = testForNumericNames(fontDict, hex);
+
   // construct the char code -> Unicode mapping object
   ctu = CharCodeToUnicode::make8BitToUnicode(toUnicode);
 
@@ -1280,22 +1365,18 @@ Gfx8BitFont::Gfx8BitFont(XRef *xref, const char *tagA, Ref idA, GooString *nameA
 	    && (n = parseCharName(charName, uBuf, sizeof(uBuf)/sizeof(*uBuf), 
 				  gFalse, // don't check simple names (pass 1)
 				  gTrue, // do check ligatures
-				  globalParams->getMapNumericCharNames(),
+				  numeric,
 				  hex,
 				  gTrue))) { // do check variants
 	  ctu->setMapping((CharCode)code, uBuf, n);
-	} else if (globalParams->getMapUnknownCharNames()) {
-	  // if the 'mapUnknownCharNames' flag is set, do a simple pass-through
-	  // mapping for unknown character names
-	  if (charName && charName[0]) {
-	    for (n = 0; n < (int)(sizeof(uBuf)/sizeof(*uBuf)); ++n)
-	      if (!(uBuf[n] = charName[n]))
-		break;
-	    ctu->setMapping((CharCode)code, uBuf, n);
-	  } else {
+	  continue;
+	}
+
+	// if the 'mapUnknownCharNames' flag is set, do a simple pass-through
+	// mapping for unknown character names
+	if (globalParams->getMapUnknownCharNames()) {
 	    uBuf[0] = code;
 	    ctu->setMapping((CharCode)code, uBuf, 1);
-	  }
 	}
       }
     }
@@ -1495,7 +1576,7 @@ static int parseCharName(char *charName, Unicode *uBuf, int uLen,
   if (names && (uBuf[0] = globalParams->mapNameToUnicodeText(charName))) {
     return 1;
   }
-  if (numeric) {
+  if (globalParams->getMapNumericCharNames()) {
     unsigned int n = strlen(charName);
     // 3.3. otherwise, if the component is of the form "uni" (U+0075 U+006E
     // U+0069) followed by a sequence of uppercase hexadecimal digits (0 .. 9,
@@ -1538,30 +1619,10 @@ static int parseCharName(char *charName, Unicode *uBuf, int uLen,
 	return 1;
       }
     }
-    // Not in Adobe Glyph Mapping convention: look for names of the form 'Axx',
-    // 'xx', 'Ann', 'ABnn', or 'nn', where 'A' and 'B' are any letters, 'xx' is
-    // two hex digits, and 'nn' is 2-4 decimal digits
-    if (hex && n == 3 && isalpha(charName[0]) &&
-	isxdigit(charName[1]) && isxdigit(charName[2])) {
-      sscanf(charName+1, "%x", (unsigned int *)uBuf);
-      return 1;
-    } else if (hex && n == 2 &&
-	       isxdigit(charName[0]) && isxdigit(charName[1])) {
-      sscanf(charName, "%x", (unsigned int *)uBuf);
-      return 1;
-    } else if (!hex && n >= 2 && n <= 4 &&
-	       isdigit(charName[0]) && isdigit(charName[1])) {
-      uBuf[0] = (Unicode)atoi(charName);
+    // Not in Adobe Glyph Mapping convention: look for names like xx
+    // or Axx and parse for hex or decimal values.
+    if (numeric && parseNumericName(charName, hex, uBuf))
       return 1;
-    } else if (n >= 3 && n <= 5 &&
-	       isdigit(charName[1]) && isdigit(charName[2])) {
-      uBuf[0] = (Unicode)atoi(charName+1);
-      return 1;
-    } else if (n >= 4 && n <= 6 &&
-	       isdigit(charName[2]) && isdigit(charName[3])) {
-      uBuf[0] = (Unicode)atoi(charName+2);
-      return 1;
-    }
   }
   // 3.5. otherwise, map the component to the empty string
   return 0;
diff --git a/poppler/GlobalParams.cc b/poppler/GlobalParams.cc
index 01a3157..3d88646 100644
--- a/poppler/GlobalParams.cc
+++ b/poppler/GlobalParams.cc
@@ -34,7 +34,7 @@
 // Copyright (C) 2012 Adrian Johnson <ajohnson at redneon.com>
 // Copyright (C) 2012 Thomas Freitag <Thomas.Freitag at alfa.de>
 // Copyright (C) 2012 Peter Breitenlohner <peb at mppmu.mpg.de>
-// Copyright (C) 2013 Jason Crain <jason at aquaticape.us>
+// Copyright (C) 2013, 2014 Jason Crain <jason at aquaticape.us>
 //
 // To see a description of the changes please see the Changelog file that
 // came with your tarball or type make ChangeLog if you are building from git
@@ -633,7 +633,7 @@ GlobalParams::GlobalParams(const char *customPopplerDataDir)
   minLineWidth = 0.0;
   overprintPreview = gFalse;
   mapNumericCharNames = gTrue;
-  mapUnknownCharNames = gFalse;
+  mapUnknownCharNames = gTrue;
   printCommands = gFalse;
   profileCommands = gFalse;
   errQuiet = gFalse;


More information about the poppler mailing list