[poppler] poppler/GfxFont.cc poppler/GlobalParams.cc
Albert Astals Cid
aacid at kemper.freedesktop.org
Tue Mar 11 16:35:26 PDT 2014
poppler/GfxFont.cc | 139 ++++++++++++++++++++++++++++++++++--------------
poppler/GlobalParams.cc | 4 -
2 files changed, 102 insertions(+), 41 deletions(-)
New commits:
commit 1ea2eb412d12d97eaf49d1e51d7fda7abd8fbf9d
Author: Jason Crain <jason at aquaticape.us>
Date: Wed Mar 12 00:34:13 2014 +0100
Limit numeric parsing of character names
Bug #38456
diff --git a/poppler/GfxFont.cc b/poppler/GfxFont.cc
index c54181b..9eadcc7 100644
--- a/poppler/GfxFont.cc
+++ b/poppler/GfxFont.cc
@@ -30,7 +30,7 @@
// Copyright (C) 2012 Yi Yang <ahyangyi at gmail.com>
// Copyright (C) 2012 Suzuki Toshiya <mpsuzuki at hiroshima-u.ac.jp>
// Copyright (C) 2012 Thomas Freitag <Thomas.Freitag at alfa.de>
-// Copyright (C) 2013 Jason Crain <jason at aquaticape.us>
+// Copyright (C) 2013, 2014 Jason Crain <jason at aquaticape.us>
//
// To see a description of the changes please see the Changelog file that
// came with your tarball or type make ChangeLog if you are building from git
@@ -916,6 +916,88 @@ char *GfxFont::readEmbFontFile(XRef *xref, int *len) {
// Gfx8BitFont
//------------------------------------------------------------------------
+// Parse character names of the form 'Axx', 'xx', 'Ann', 'ABnn', or
+// 'nn', where 'A' and 'B' are any letters, 'xx' is two hex digits,
+// and 'nn' is decimal digits.
+static GBool parseNumericName(char *s, GBool hex, unsigned int *u) {
+ char *endptr;
+
+ // Strip leading alpha characters.
+ if (hex) {
+ int n = 0;
+
+ // Get string length while ignoring junk at end.
+ while (isalnum(s[n]))
+ ++n;
+
+ // Only 2 hex characters with optional leading alpha is allowed.
+ if (n == 3 && isalpha(*s)) {
+ ++s;
+ } else if (n != 2) {
+ return gFalse;
+ }
+ } else {
+ // Strip up to two alpha characters.
+ for (int i = 0; i < 2 && isalpha(*s); ++i)
+ ++s;
+ }
+
+ int v = strtol(s, &endptr, hex ? 16 : 10);
+
+ if (endptr == s)
+ return gFalse;
+
+ // Skip trailing junk characters.
+ while (*endptr != '\0' && !isalnum(*endptr))
+ ++endptr;
+
+ if (*endptr == '\0') {
+ if (u)
+ *u = v;
+ return gTrue;
+ }
+ return gFalse;
+}
+
+// Returns gTrue if the font has character names like xx or Axx which
+// should be parsed for hex or decimal values.
+static GBool testForNumericNames(Dict *fontDict, GBool hex) {
+ Object enc, diff, obj;
+ GBool numeric = gTrue;
+
+ fontDict->lookup("Encoding", &enc);
+ if (!enc.isDict()) {
+ enc.free();
+ return gFalse;
+ }
+
+ enc.dictLookup("Differences", &diff);
+ enc.free();
+ if (!diff.isArray()) {
+ diff.free();
+ return gFalse;
+ }
+
+ for (int i = 0; i < diff.arrayGetLength() && numeric; ++i) {
+ diff.arrayGet(i, &obj);
+ if (obj.isInt()) {
+ // All sequences must start between character codes 0 and 5.
+ if (obj.getInt() > 5)
+ numeric = gFalse;
+ } else if (obj.isName()) {
+ // All character names must sucessfully parse.
+ if (!parseNumericName(obj.getName(), hex, NULL))
+ numeric = gFalse;
+ } else {
+ numeric = gFalse;
+ }
+ obj.free();
+ }
+
+ diff.free();
+ return numeric;
+}
+
Gfx8BitFont::Gfx8BitFont(XRef *xref, const char *tagA, Ref idA, GooString *nameA,
GfxFontType typeA, Ref embFontIDA, Dict *fontDict):
GfxFont(tagA, idA, nameA, typeA, embFontIDA) {
@@ -930,6 +1012,7 @@ Gfx8BitFont::Gfx8BitFont(XRef *xref, const char *tagA, Ref idA, GooString *nameA
int code;
char *charName;
GBool missing, hex;
+ GBool numeric;
Unicode toUnicode[256];
CharCodeToUnicode *utu, *ctu2;
Unicode uBuf[8];
@@ -1244,9 +1327,9 @@ Gfx8BitFont::Gfx8BitFont(XRef *xref, const char *tagA, Ref idA, GooString *nameA
(charName[2] >= 'A' && charName[2] <= 'F'))) ||
(strlen(charName) == 2 &&
isxdigit(charName[0]) && isxdigit(charName[1]) &&
- ((charName[0] >= 'a' && charName[0] <= 'f') ||
- (charName[0] >= 'A' && charName[0] <= 'F') ||
- (charName[1] >= 'a' && charName[1] <= 'f') ||
+ // Only check idx 1 to avoid misidentifying a decimal
+ // number like a0
+ ((charName[1] >= 'a' && charName[1] <= 'f') ||
(charName[1] >= 'A' && charName[1] <= 'F')))) {
hex = gTrue;
}
@@ -1257,6 +1340,8 @@ Gfx8BitFont::Gfx8BitFont(XRef *xref, const char *tagA, Ref idA, GooString *nameA
}
}
+ numeric = testForNumericNames(fontDict, hex);
+
// construct the char code -> Unicode mapping object
ctu = CharCodeToUnicode::make8BitToUnicode(toUnicode);
@@ -1280,22 +1365,18 @@ Gfx8BitFont::Gfx8BitFont(XRef *xref, const char *tagA, Ref idA, GooString *nameA
&& (n = parseCharName(charName, uBuf, sizeof(uBuf)/sizeof(*uBuf),
gFalse, // don't check simple names (pass 1)
gTrue, // do check ligatures
- globalParams->getMapNumericCharNames(),
+ numeric,
hex,
gTrue))) { // do check variants
ctu->setMapping((CharCode)code, uBuf, n);
- } else if (globalParams->getMapUnknownCharNames()) {
- // if the 'mapUnknownCharNames' flag is set, do a simple pass-through
- // mapping for unknown character names
- if (charName && charName[0]) {
- for (n = 0; n < (int)(sizeof(uBuf)/sizeof(*uBuf)); ++n)
- if (!(uBuf[n] = charName[n]))
- break;
- ctu->setMapping((CharCode)code, uBuf, n);
- } else {
+ continue;
+ }
+
+ // if the 'mapUnknownCharNames' flag is set, do a simple pass-through
+ // mapping for unknown character names
+ if (globalParams->getMapUnknownCharNames()) {
uBuf[0] = code;
ctu->setMapping((CharCode)code, uBuf, 1);
- }
}
}
}
@@ -1495,7 +1576,7 @@ static int parseCharName(char *charName, Unicode *uBuf, int uLen,
if (names && (uBuf[0] = globalParams->mapNameToUnicodeText(charName))) {
return 1;
}
- if (numeric) {
+ if (globalParams->getMapNumericCharNames()) {
unsigned int n = strlen(charName);
// 3.3. otherwise, if the component is of the form "uni" (U+0075 U+006E
// U+0069) followed by a sequence of uppercase hexadecimal digits (0 .. 9,
@@ -1538,30 +1619,10 @@ static int parseCharName(char *charName, Unicode *uBuf, int uLen,
return 1;
}
}
- // Not in Adobe Glyph Mapping convention: look for names of the form 'Axx',
- // 'xx', 'Ann', 'ABnn', or 'nn', where 'A' and 'B' are any letters, 'xx' is
- // two hex digits, and 'nn' is 2-4 decimal digits
- if (hex && n == 3 && isalpha(charName[0]) &&
- isxdigit(charName[1]) && isxdigit(charName[2])) {
- sscanf(charName+1, "%x", (unsigned int *)uBuf);
- return 1;
- } else if (hex && n == 2 &&
- isxdigit(charName[0]) && isxdigit(charName[1])) {
- sscanf(charName, "%x", (unsigned int *)uBuf);
- return 1;
- } else if (!hex && n >= 2 && n <= 4 &&
- isdigit(charName[0]) && isdigit(charName[1])) {
- uBuf[0] = (Unicode)atoi(charName);
+ // Not in Adobe Glyph Mapping convention: look for names like xx
+ // or Axx and parse for hex or decimal values.
+ if (numeric && parseNumericName(charName, hex, uBuf))
return 1;
- } else if (n >= 3 && n <= 5 &&
- isdigit(charName[1]) && isdigit(charName[2])) {
- uBuf[0] = (Unicode)atoi(charName+1);
- return 1;
- } else if (n >= 4 && n <= 6 &&
- isdigit(charName[2]) && isdigit(charName[3])) {
- uBuf[0] = (Unicode)atoi(charName+2);
- return 1;
- }
}
// 3.5. otherwise, map the component to the empty string
return 0;
diff --git a/poppler/GlobalParams.cc b/poppler/GlobalParams.cc
index 01a3157..3d88646 100644
--- a/poppler/GlobalParams.cc
+++ b/poppler/GlobalParams.cc
@@ -34,7 +34,7 @@
// Copyright (C) 2012 Adrian Johnson <ajohnson at redneon.com>
// Copyright (C) 2012 Thomas Freitag <Thomas.Freitag at alfa.de>
// Copyright (C) 2012 Peter Breitenlohner <peb at mppmu.mpg.de>
-// Copyright (C) 2013 Jason Crain <jason at aquaticape.us>
+// Copyright (C) 2013, 2014 Jason Crain <jason at aquaticape.us>
//
// To see a description of the changes please see the Changelog file that
// came with your tarball or type make ChangeLog if you are building from git
@@ -633,7 +633,7 @@ GlobalParams::GlobalParams(const char *customPopplerDataDir)
minLineWidth = 0.0;
overprintPreview = gFalse;
mapNumericCharNames = gTrue;
- mapUnknownCharNames = gFalse;
+ mapUnknownCharNames = gTrue;
printCommands = gFalse;
profileCommands = gFalse;
errQuiet = gFalse;
More information about the poppler
mailing list