[Libreoffice-commits] core.git: Branch 'libreoffice-7-1' - vcl/qa vcl/source

Fri May 14 10:20:23 UTC 2021

vcl/qa/cppunit/filter/ipdf/data/comment-end.pdf |   69 ++++++++++++++++++++++++
 vcl/qa/cppunit/filter/ipdf/ipdf.cxx             |   19 ++++++
 vcl/source/filter/ipdf/pdfdocument.cxx          |    7 ++
 3 files changed, 94 insertions(+), 1 deletion(-)

New commits:
commit be73b5974284cd7094ae2bf31cd57e70e1e08dd2
Author:     Miklos Vajna <vmiklos at collabora.com>
AuthorDate: Wed May 12 10:51:09 2021 +0200
Commit:     Michael Stahl <michael.stahl at allotropia.de>
CommitDate: Fri May 14 12:19:51 2021 +0200

    vcl PDF tokenizer: fix EOF position when \r is not followed by \n
    
    Otherwise this would break partial tokenize when we only read a trailer
    in the middle of the file: m_aEOFs.back() is one byte larger than
    rStream.Tell(), so we reader past the end of the trailer, resulting in a
    tokenize failure.
    
    What's special about the bugdoc:
    
    - it has 2 xrefs, the first is incomplete, and refers to a second which
    is later in the file
    - the object length is as indirect object, triggering an xref lookup
    - the first EOF is followed by a \r, but then not with a \n
    
    This results in reading past the end of the first trailer and then
    triggering a lookup failure.
    
    FWIW, pdfium does the same in
    <https://pdfium.googlesource.com/pdfium/+/59d107323f6727bbd5f8a4d0843081790638a1dd/core/fpdfapi/parser/cpdf_syntax_parser.cpp#446>,
    we're on in sync with it.
    
    Change-Id: Ia556a25e333b5e4f1418d92a98d74358862120e2
    Reviewed-on: https://gerrit.libreoffice.org/c/core/+/115466
    Reviewed-by: Miklos Vajna <vmiklos at collabora.com>
    Tested-by: Jenkins
    (cherry picked from commit 6b1d5bafdc722d07d3dc4980764275a6caa707ba)
    Reviewed-on: https://gerrit.libreoffice.org/c/core/+/115516
    Reviewed-by: Michael Stahl <michael.stahl at allotropia.de>

diff --git a/vcl/qa/cppunit/filter/ipdf/data/comment-end.pdf b/vcl/qa/cppunit/filter/ipdf/data/comment-end.pdf
new file mode 100644
index 000000000000..6f1ad86f5c99
--- /dev/null
+++ b/vcl/qa/cppunit/filter/ipdf/data/comment-end.pdf
@@ -0,0 +1,69 @@
+%PDF-1.7
+% ò¤ô
+1 0 obj <<
+  /Type /Catalog
+  /Pages 2 0 R
+>>
+endobj
+2 0 obj <<
+  /Type /Pages
+  /MediaBox [0 0 200 300]
+  /Count 1
+  /Kids [3 0 R]
+>>
+endobj
+3 0 obj <<
+  /Type /Page
+  /Parent 2 0 R
+  /Contents 4 0 R
+>>
+endobj
+4 0 obj <<
+  /Length 4
+>>
+stream
+q
+Q
+endstream
+endobj
+xref
+0 5
+0000000000 65535 f 
+0000000015 00000 n 
+0000000068 00000 n 
+0000000157 00000 n 
+0000000226 00000 n 
+trailer <<
+  /Root 1 0 R
+  /Size 5
+  /Prev 541
+>>
+startxref
+280
+%%EOF
%%TEST
+4 0 obj <<
+  /Length 5 0 R
+>>
+stream
+q
+Q
+endstream
+endobj
+5 0 obj
+4
+endobj
+xref
+0 6
+0000000000 65535 f 
+0000000015 00000 n 
+0000000068 00000 n 
+0000000157 00000 n 
+0000000466 00000 n 
+0000000524 00000 n 
+trailer <<
+  /Root 1 0 R
+  /Size 6
+>>
+startxref
+280
+%%EOF
diff --git a/vcl/qa/cppunit/filter/ipdf/ipdf.cxx b/vcl/qa/cppunit/filter/ipdf/ipdf.cxx
index d94eb76aa5b3..93cc22360b56 100644
--- a/vcl/qa/cppunit/filter/ipdf/ipdf.cxx
+++ b/vcl/qa/cppunit/filter/ipdf/ipdf.cxx
@@ -178,6 +178,25 @@ CPPUNIT_TEST_FIXTURE(VclFilterIpdfTest, testRealNumbers)
     CPPUNIT_ASSERT(!aPages.empty());
 }
 
+CPPUNIT_TEST_FIXTURE(VclFilterIpdfTest, testCommentEnd)
+{
+    // Load the test document:
+    // - it has two xrefs
+    // - second xref has an updated page content object with an indirect length
+    // - last startxref refers to the first xref
+    // - first xref has a /Prev to the second xref
+    // - first xref is terminated by a \r, which is not followed by a newline
+    // this means that if reading doesn't stop at the end of the first xref, then we'll try to look
+    // up the offset of the length object, which we don't yet have
+    OUString aSourceURL = m_directories.getURLFromSrc(DATA_DIRECTORY) + "comment-end.pdf";
+    SvFileStream aFile(aSourceURL, StreamMode::READ);
+    vcl::filter::PDFDocument aDocument;
+
+    // Without the accompanying fix in place, this test would have failed, because Tokenize() didn't
+    // stop at the end of the first xref.
+    CPPUNIT_ASSERT(aDocument.Read(aFile));
+}
+
 CPPUNIT_PLUGIN_IMPLEMENT();
 
 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/vcl/source/filter/ipdf/pdfdocument.cxx b/vcl/source/filter/ipdf/pdfdocument.cxx
index 804713abaf10..a0164c0c4ce6 100644
--- a/vcl/source/filter/ipdf/pdfdocument.cxx
+++ b/vcl/source/filter/ipdf/pdfdocument.cxx
@@ -2194,9 +2194,14 @@ bool PDFCommentElement::Read(SvStream& rStream)
                 sal_uInt64 nPos = rStream.Tell();
                 if (ch == '\r')
                 {
+                    rStream.ReadChar(ch);
+                    rStream.SeekRel(-1);
                     // If the comment ends with a \r\n, count the \n as well to match Adobe Acrobat
                     // behavior.
-                    nPos += 1;
+                    if (ch == '\n')
+                    {
+                        nPos += 1;
+                    }
                 }
                 m_rDoc.PushBackEOF(nPos);
             }