[poppler] poppler/PDFDoc.cc

Mon Apr 2 18:16:51 UTC 2018

poppler/PDFDoc.cc |   23 +++++++++++++++--------
 1 file changed, 15 insertions(+), 8 deletions(-)

New commits:
commit e491e935ea355d48519cf0a14e4b060655850675
Author: Evangelos Foutras <evangelos at foutrelis.com>
Date:   Mon Apr 2 16:09:34 2018 +0300

    Fix PDFDoc::checkHeader() for PDFs smaller than 1 KiB
    
    The fix for bug 104502 made it so PDFDoc::checkHeader() would print a
    warning and return immediatelly if it encounters an EOF while reading
    the first 1024 bytes.
    
    Some PDF files can be smaller than 1024 bytes, for example those used
    by pdf2djvu's test suite. The latter would fail due to the unexpected
    warnings.
    
    Change the behavior of PDFDoc::checkHeader() when encountering an EOF
    so it processes the data it has read so far instead of aborting early.
    
    https://bugs.freedesktop.org/show_bug.cgi?id=105674

diff --git a/poppler/PDFDoc.cc b/poppler/PDFDoc.cc
index 6789c39f..1b3c48e6 100644
--- a/poppler/PDFDoc.cc
+++ b/poppler/PDFDoc.cc
@@ -38,6 +38,7 @@
 // Copyright (C) 2017 Jean Ghali <jghali at libertysurf.fr>
 // Copyright (C) 2017 Fredrik Fornwall <fredrik at fornwall.net>
 // Copyright (C) 2018 Ben Timby <btimby at gmail.com>
+// Copyright (C) 2018 Evangelos Foutras <evangelos at foutrelis.com>
 //
 // To see a description of the changes please see the Changelog file that
 // came with your tarball or type make ChangeLog if you are building from git
@@ -409,24 +410,30 @@ void PDFDoc::checkHeader() {
   char *p;
   char *tokptr;
   int i;
-  int c;
+  int bytesRead;
 
   pdfMajorVersion = 0;
   pdfMinorVersion = 0;
+
+  // read up to headerSearchSize bytes from the beginning of the document
   for (i = 0; i < headerSearchSize; ++i) {
-    if ((c = str->getChar()) == EOF) {
-      error(errSyntaxWarning, -1, "EOF while reading header (continuing anyway)");
-      return;
-    }
+    const int c = str->getChar();
+    if (c == EOF)
+      break;
     hdrBuf[i] = c;
   }
-  hdrBuf[headerSearchSize] = '\0';
-  for (i = 0; i < headerSearchSize - 5; ++i) {
+  bytesRead = i;
+  hdrBuf[bytesRead] = '\0';
+
+  // find the start of the PDF header if it exists and parse the version
+  bool headerFound = false;
+  for (i = 0; i < bytesRead - 5; ++i) {
     if (!strncmp(&hdrBuf[i], "%PDF-", 5)) {
+      headerFound = true;
       break;
     }
   }
-  if (i >= headerSearchSize - 5) {
+  if (!headerFound) {
     error(errSyntaxWarning, -1, "May not be a PDF file (continuing anyway)");
     return;
   }