[poppler] poppler/Lexer.cc poppler/Lexer.h poppler/Parser.cc poppler/Parser.h

Albert Astals Cid aacid at kemper.freedesktop.org
Sat Apr 6 14:23:00 PDT 2013


 poppler/Lexer.cc  |   45 +++++++++++++++++++++++++++++++++++++++++++++
 poppler/Lexer.h   |    2 ++
 poppler/Parser.cc |   28 ++++++++++++++++++++++++++--
 poppler/Parser.h  |    2 ++
 4 files changed, 75 insertions(+), 2 deletions(-)

New commits:
commit e1ffa9100cf6b4a444be7ed76b11698a5c5bb441
Author: Thomas Freitag <Thomas.Freitag at alfa.de>
Date:   Sat Apr 6 23:21:58 2013 +0200

    Fix endstream detection
    
    Part 1 of bug #62985
    
    the endstream search, and at least with bug-poppler16579.pdf this doesn't work correctly: the shift(-1) with the used token mechanism in Lexer isn't correct for a binary data stream. If there is i.e. a "(" without corresponding ")" in the binary data, which of course can happen and happens in that pdf, shift(-1) skips the searched endstream and can therefore in worst case reach the end-of-file. Therefore I implemented a shift("endstream") in Java, which I now port back to C++, or in other words "There and Back Again" :-)
    
    You can test it with bug-poppler16579.pdf if You just change temporary
    
              if (longNumber <= INT_MAX && longNumber >= INT_MIN && *end_ptr == '\0') {
    
    in XRef.cc to
    
              if (gFalse && longNumber <= INT_MAX && longNumber >= INT_MIN && *end_ptr == '\0') {

diff --git a/poppler/Lexer.cc b/poppler/Lexer.cc
index a0bb35e..4e9ea12 100644
--- a/poppler/Lexer.cc
+++ b/poppler/Lexer.cc
@@ -17,6 +17,7 @@
 // Copyright (C) 2006 Krzysztof Kowalczyk <kkowalczyk at gmail.com>
 // Copyright (C) 2010 Carlos Garcia Campos <carlosgc at gnome.org>
 // Copyright (C) 2012, 2013 Adrian Johnson <ajohnson at redneon.com>
+// Copyright (C) 2013 Thomas Freitag <Thomas.Freitag at alfa.de>
 //
 // To see a description of the changes please see the Changelog file that
 // came with your tarball or type make ChangeLog if you are building from git
@@ -580,6 +581,50 @@ Object *Lexer::getObj(Object *obj, int objNum) {
   return obj;
 }
 
+Object *Lexer::getObj(Object *obj, const char *cmdA) {
+  char *p;
+  int c, c2;
+  GBool comment, done;
+  int numParen;
+  GooString *s;
+  int n, m;
+
+  // skip whitespace and comments
+  comment = gFalse;
+  const char *cmd1 = tokBuf;
+  *tokBuf = 0;
+  while (strcmp(cmdA, cmd1)) {
+    while (1) {
+      if ((c = getChar()) == EOF) {
+        return obj->initEOF();
+      }
+      if (comment) {
+        if (c == '\r' || c == '\n') {
+          comment = gFalse;
+        }
+      } else if (c == '%') {
+        comment = gTrue;
+      } else if (specialChars[c] != 1) {
+        break;
+      }
+    }
+    p = tokBuf;
+    *p++ = c;
+    n = 1;
+    while ((c = lookChar()) != EOF && specialChars[c] == 0) {
+      getChar();
+      if (++n == tokBufSize) {
+        break;
+      }
+      *p++ = c;
+    }
+    *p = '\0';
+  }
+  obj->initCmd(tokBuf);
+  
+  return obj;
+}
+
 void Lexer::skipToNextLine() {
   int c;
 
diff --git a/poppler/Lexer.h b/poppler/Lexer.h
index 227508f..d9c23dc 100644
--- a/poppler/Lexer.h
+++ b/poppler/Lexer.h
@@ -16,6 +16,7 @@
 // Copyright (C) 2006, 2007, 2010 Albert Astals Cid <aacid at kde.org>
 // Copyright (C) 2006 Krzysztof Kowalczyk <kkowalczyk at gmail.com>
 // Copyright (C) 2013 Adrian Johnson <ajohnson at redneon.com>
+// Copyright (C) 2013 Thomas Freitag <Thomas.Freitag at alfa.de>
 //
 // To see a description of the changes please see the Changelog file that
 // came with your tarball or type make ChangeLog if you are building from git
@@ -56,6 +57,7 @@ public:
 
   // Get the next object from the input stream.
   Object *getObj(Object *obj, int objNum = -1);
+  Object *getObj(Object *obj, const char *cmdA);
 
   // Skip to the beginning of the next line in the input stream.
   void skipToNextLine();
diff --git a/poppler/Parser.cc b/poppler/Parser.cc
index b66203f..0370564 100644
--- a/poppler/Parser.cc
+++ b/poppler/Parser.cc
@@ -242,7 +242,7 @@ Stream *Parser::makeStream(Object *dict, Guchar *fileKey,
 
   // refill token buffers and check for 'endstream'
   shift();  // kill '>>'
-  shift();  // kill 'stream'
+  shift("endstream");  // kill 'stream'
   if (buf1.isCmd("endstream")) {
     shift();
   } else {
@@ -251,7 +251,7 @@ Stream *Parser::makeStream(Object *dict, Guchar *fileKey,
     if (xref) {
       // shift until we find the proper endstream or we change to another object or reach eof
       while (!buf1.isCmd("endstream") && xref->getNumEntry(lexer->getPos()) == objNum && !buf1.isEOF()) {
-        shift();
+        shift("endstream");
       }
       length = lexer->getPos() - pos;
       if (buf1.isCmd("endstream")) {
@@ -302,3 +302,27 @@ void Parser::shift(int objNum) {
   else
     lexer->getObj(&buf2, objNum);
 }
+
+void Parser::shift(const char *cmdA) {
+  if (inlineImg > 0) {
+    if (inlineImg < 2) {
+      ++inlineImg;
+    } else {
+      // in a damaged content stream, if 'ID' shows up in the middle
+      // of a dictionary, we need to reset
+      inlineImg = 0;
+    }
+  } else if (buf2.isCmd("ID")) {
+    lexer->skipChar();		// skip char after 'ID' command
+    inlineImg = 1;
+  }
+  buf1.free();
+  buf2.shallowCopy(&buf1);
+  if (inlineImg > 0) {
+    buf2.initNull();
+  } else if (buf1.isCmd(cmdA)) {
+    lexer->getObj(&buf2, -1);
+  } else {
+    lexer->getObj(&buf2, cmdA);
+  }
+}
diff --git a/poppler/Parser.h b/poppler/Parser.h
index adaf913..9702716 100644
--- a/poppler/Parser.h
+++ b/poppler/Parser.h
@@ -16,6 +16,7 @@
 // Copyright (C) 2006, 2010 Albert Astals Cid <aacid at kde.org>
 // Copyright (C) 2012 Hib Eris <hib at hiberis.nl>
 // Copyright (C) 2013 Adrian Johnson <ajohnson at redneon.com>
+// Copyright (C) 2013 Thomas Freitag <Thomas.Freitag at alfa.de>
 //
 // To see a description of the changes please see the Changelog file that
 // came with your tarball or type make ChangeLog if you are building from git
@@ -74,6 +75,7 @@ private:
 		     int objNum, int objGen, int recursion,
 		     GBool strict);
   void shift(int objNum = -1);
+  void shift(const char *cmdA);
 };
 
 #endif


More information about the poppler mailing list