[poppler] poppler/Lexer.cc poppler/Lexer.h poppler/Parser.cc poppler/Parser.h
Albert Astals Cid
aacid at kemper.freedesktop.org
Sat Apr 6 14:23:00 PDT 2013
poppler/Lexer.cc | 45 +++++++++++++++++++++++++++++++++++++++++++++
poppler/Lexer.h | 2 ++
poppler/Parser.cc | 28 ++++++++++++++++++++++++++--
poppler/Parser.h | 2 ++
4 files changed, 75 insertions(+), 2 deletions(-)
New commits:
commit e1ffa9100cf6b4a444be7ed76b11698a5c5bb441
Author: Thomas Freitag <Thomas.Freitag at alfa.de>
Date: Sat Apr 6 23:21:58 2013 +0200
Fix endstream detection
Part 1 of bug #62985
the endstream search, and at least with bug-poppler16579.pdf this doesn't work correctly: the shift(-1) with the used token mechanism in Lexer isn't correct for a binary data stream. If there is i.e. a "(" without corresponding ")" in the binary data, which of course can happen and happens in that pdf, shift(-1) skips the searched endstream and can therefore in worst case reach the end-of-file. Therefore I implemented a shift("endstream") in Java, which I now port back to C++, or in other words "There and Back Again" :-)
You can test it with bug-poppler16579.pdf if You just change temporary
if (longNumber <= INT_MAX && longNumber >= INT_MIN && *end_ptr == '\0') {
in XRef.cc to
if (gFalse && longNumber <= INT_MAX && longNumber >= INT_MIN && *end_ptr == '\0') {
diff --git a/poppler/Lexer.cc b/poppler/Lexer.cc
index a0bb35e..4e9ea12 100644
--- a/poppler/Lexer.cc
+++ b/poppler/Lexer.cc
@@ -17,6 +17,7 @@
// Copyright (C) 2006 Krzysztof Kowalczyk <kkowalczyk at gmail.com>
// Copyright (C) 2010 Carlos Garcia Campos <carlosgc at gnome.org>
// Copyright (C) 2012, 2013 Adrian Johnson <ajohnson at redneon.com>
+// Copyright (C) 2013 Thomas Freitag <Thomas.Freitag at alfa.de>
//
// To see a description of the changes please see the Changelog file that
// came with your tarball or type make ChangeLog if you are building from git
@@ -580,6 +581,50 @@ Object *Lexer::getObj(Object *obj, int objNum) {
return obj;
}
+Object *Lexer::getObj(Object *obj, const char *cmdA) {
+ char *p;
+ int c, c2;
+ GBool comment, done;
+ int numParen;
+ GooString *s;
+ int n, m;
+
+ // skip whitespace and comments
+ comment = gFalse;
+ const char *cmd1 = tokBuf;
+ *tokBuf = 0;
+ while (strcmp(cmdA, cmd1)) {
+ while (1) {
+ if ((c = getChar()) == EOF) {
+ return obj->initEOF();
+ }
+ if (comment) {
+ if (c == '\r' || c == '\n') {
+ comment = gFalse;
+ }
+ } else if (c == '%') {
+ comment = gTrue;
+ } else if (specialChars[c] != 1) {
+ break;
+ }
+ }
+ p = tokBuf;
+ *p++ = c;
+ n = 1;
+ while ((c = lookChar()) != EOF && specialChars[c] == 0) {
+ getChar();
+ if (++n == tokBufSize) {
+ break;
+ }
+ *p++ = c;
+ }
+ *p = '\0';
+ }
+ obj->initCmd(tokBuf);
+
+ return obj;
+}
+
void Lexer::skipToNextLine() {
int c;
diff --git a/poppler/Lexer.h b/poppler/Lexer.h
index 227508f..d9c23dc 100644
--- a/poppler/Lexer.h
+++ b/poppler/Lexer.h
@@ -16,6 +16,7 @@
// Copyright (C) 2006, 2007, 2010 Albert Astals Cid <aacid at kde.org>
// Copyright (C) 2006 Krzysztof Kowalczyk <kkowalczyk at gmail.com>
// Copyright (C) 2013 Adrian Johnson <ajohnson at redneon.com>
+// Copyright (C) 2013 Thomas Freitag <Thomas.Freitag at alfa.de>
//
// To see a description of the changes please see the Changelog file that
// came with your tarball or type make ChangeLog if you are building from git
@@ -56,6 +57,7 @@ public:
// Get the next object from the input stream.
Object *getObj(Object *obj, int objNum = -1);
+ Object *getObj(Object *obj, const char *cmdA);
// Skip to the beginning of the next line in the input stream.
void skipToNextLine();
diff --git a/poppler/Parser.cc b/poppler/Parser.cc
index b66203f..0370564 100644
--- a/poppler/Parser.cc
+++ b/poppler/Parser.cc
@@ -242,7 +242,7 @@ Stream *Parser::makeStream(Object *dict, Guchar *fileKey,
// refill token buffers and check for 'endstream'
shift(); // kill '>>'
- shift(); // kill 'stream'
+ shift("endstream"); // kill 'stream'
if (buf1.isCmd("endstream")) {
shift();
} else {
@@ -251,7 +251,7 @@ Stream *Parser::makeStream(Object *dict, Guchar *fileKey,
if (xref) {
// shift until we find the proper endstream or we change to another object or reach eof
while (!buf1.isCmd("endstream") && xref->getNumEntry(lexer->getPos()) == objNum && !buf1.isEOF()) {
- shift();
+ shift("endstream");
}
length = lexer->getPos() - pos;
if (buf1.isCmd("endstream")) {
@@ -302,3 +302,27 @@ void Parser::shift(int objNum) {
else
lexer->getObj(&buf2, objNum);
}
+
+void Parser::shift(const char *cmdA) {
+ if (inlineImg > 0) {
+ if (inlineImg < 2) {
+ ++inlineImg;
+ } else {
+ // in a damaged content stream, if 'ID' shows up in the middle
+ // of a dictionary, we need to reset
+ inlineImg = 0;
+ }
+ } else if (buf2.isCmd("ID")) {
+ lexer->skipChar(); // skip char after 'ID' command
+ inlineImg = 1;
+ }
+ buf1.free();
+ buf2.shallowCopy(&buf1);
+ if (inlineImg > 0) {
+ buf2.initNull();
+ } else if (buf1.isCmd(cmdA)) {
+ lexer->getObj(&buf2, -1);
+ } else {
+ lexer->getObj(&buf2, cmdA);
+ }
+}
diff --git a/poppler/Parser.h b/poppler/Parser.h
index adaf913..9702716 100644
--- a/poppler/Parser.h
+++ b/poppler/Parser.h
@@ -16,6 +16,7 @@
// Copyright (C) 2006, 2010 Albert Astals Cid <aacid at kde.org>
// Copyright (C) 2012 Hib Eris <hib at hiberis.nl>
// Copyright (C) 2013 Adrian Johnson <ajohnson at redneon.com>
+// Copyright (C) 2013 Thomas Freitag <Thomas.Freitag at alfa.de>
//
// To see a description of the changes please see the Changelog file that
// came with your tarball or type make ChangeLog if you are building from git
@@ -74,6 +75,7 @@ private:
int objNum, int objGen, int recursion,
GBool strict);
void shift(int objNum = -1);
+ void shift(const char *cmdA);
};
#endif
More information about the poppler
mailing list