[Libreoffice-commits] core.git: include/vcl vcl/source xmlsecurity/qa

Miklos Vajna vmiklos at collabora.co.uk
Thu Apr 13 22:11:30 UTC 2017


 include/vcl/filter/pdfdocument.hxx                |    4 
 vcl/source/filter/ipdf/pdfdocument.cxx            |   37 +++++++-
 xmlsecurity/qa/unit/pdfsigning/data/tdf107149.pdf |   97 ++++++++++++++++++++++
 xmlsecurity/qa/unit/pdfsigning/pdfsigning.cxx     |    6 +
 4 files changed, 141 insertions(+), 3 deletions(-)

New commits:
commit f15a69bd57e578ca607f14cb62f29a16986b96e6
Author: Miklos Vajna <vmiklos at collabora.co.uk>
Date:   Thu Apr 13 18:09:32 2017 +0200

    vcl PDF import: don't assume larger offset -> newer trailer
    
    Usually when the PDF file contains incremental updates the updates are
    appended at the end of the document. But this is not required, the
    various trailers can be in any order. Make sure that we look at the last
    trailer (logically last, not the one with the largest file offset) when
    looking for pages.
    
    Change-Id: Idcd85a7c6bbf08c9436dd73933d79cdb683f482c
    Reviewed-on: https://gerrit.libreoffice.org/36527
    Reviewed-by: Miklos Vajna <vmiklos at collabora.co.uk>
    Tested-by: Jenkins <ci at libreoffice.org>

diff --git a/include/vcl/filter/pdfdocument.hxx b/include/vcl/filter/pdfdocument.hxx
index d83cb8308f11..5011504f13df 100644
--- a/include/vcl/filter/pdfdocument.hxx
+++ b/include/vcl/filter/pdfdocument.hxx
@@ -308,6 +308,10 @@ class VCL_DLLPUBLIC PDFDocument
     std::map<size_t, PDFObjectElement*> m_aIDObjects;
     /// List of xref offsets we know.
     std::vector<size_t> m_aStartXRefs;
+    /// Offsets of trailers, from latest to oldest.
+    std::vector<size_t> m_aTrailerOffsets;
+    /// Trailer offset <-> Trailer pointer map.
+    std::map<size_t, PDFTrailerElement*> m_aOffsetTrailers;
     /// List of EOF offsets we know.
     std::vector<size_t> m_aEOFs;
     PDFTrailerElement* m_pTrailer;
diff --git a/vcl/source/filter/ipdf/pdfdocument.cxx b/vcl/source/filter/ipdf/pdfdocument.cxx
index b0bb8be6c93e..105800974532 100644
--- a/vcl/source/filter/ipdf/pdfdocument.cxx
+++ b/vcl/source/filter/ipdf/pdfdocument.cxx
@@ -112,11 +112,14 @@ class PDFTrailerElement : public PDFElement
 {
     PDFDocument& m_rDoc;
     std::map<OString, PDFElement*> m_aDictionary;
+    /// Location of the end of the trailer token.
+    sal_uInt64 m_nOffset = 0;
 
 public:
     explicit PDFTrailerElement(PDFDocument& rDoc);
     bool Read(SvStream& rStream) override;
     PDFElement* Lookup(const OString& rDictionaryKey);
+    sal_uInt64 GetLocation() const;
 };
 
 XRefEntry::XRefEntry()
@@ -1176,6 +1179,11 @@ bool PDFDocument::Tokenize(SvStream& rStream, TokenizeMode eMode, std::vector< s
                 else if (aKeyword == "trailer")
                 {
                     auto pTrailer = new PDFTrailerElement(*this);
+
+                    // Make it possible to find this trailer later by offset.
+                    pTrailer->Read(rStream);
+                    m_aOffsetTrailers[pTrailer->GetLocation()] = pTrailer;
+
                     // When reading till the first EOF token only, remember
                     // just the first trailer token.
                     if (eMode != TokenizeMode::EOF_TOKEN || !m_pTrailer)
@@ -1261,7 +1269,13 @@ bool PDFDocument::Read(SvStream& rStream)
 
         PDFNumberElement* pPrev = nullptr;
         if (m_pTrailer)
+        {
             pPrev = dynamic_cast<PDFNumberElement*>(m_pTrailer->Lookup("Prev"));
+
+            // Remember the offset of this trailer in the correct order. It's
+            // possible that newer trailers don't have a larger offset.
+            m_aTrailerOffsets.push_back(m_pTrailer->GetLocation());
+        }
         else if (m_pXRefStream)
             pPrev = dynamic_cast<PDFNumberElement*>(m_pXRefStream->Lookup("Prev"));
         if (pPrev)
@@ -1788,8 +1802,20 @@ std::vector<PDFObjectElement*> PDFDocument::GetPages()
     std::vector<PDFObjectElement*> aRet;
 
     PDFReferenceElement* pRoot = nullptr;
-    if (m_pTrailer)
-        pRoot = dynamic_cast<PDFReferenceElement*>(m_pTrailer->Lookup("Root"));
+
+
+    PDFTrailerElement* pTrailer = nullptr;
+    if (!m_aTrailerOffsets.empty())
+    {
+        // Get access to the latest trailer, and work with the keys of that
+        // one.
+        auto it = m_aOffsetTrailers.find(m_aTrailerOffsets[0]);
+        if (it != m_aOffsetTrailers.end())
+            pTrailer = it->second;
+    }
+
+    if (pTrailer)
+        pRoot = dynamic_cast<PDFReferenceElement*>(pTrailer->Lookup("Root"));
     else if (m_pXRefStream)
         pRoot = dynamic_cast<PDFReferenceElement*>(m_pXRefStream->Lookup("Root"));
 
@@ -2085,8 +2111,9 @@ PDFTrailerElement::PDFTrailerElement(PDFDocument& rDoc)
 {
 }
 
-bool PDFTrailerElement::Read(SvStream& /*rStream*/)
+bool PDFTrailerElement::Read(SvStream& rStream)
 {
+    m_nOffset = rStream.Tell();
     return true;
 }
 
@@ -2098,6 +2125,10 @@ PDFElement* PDFTrailerElement::Lookup(const OString& rDictionaryKey)
     return PDFDictionaryElement::Lookup(m_aDictionary, rDictionaryKey);
 }
 
+sal_uInt64 PDFTrailerElement::GetLocation() const
+{
+    return m_nOffset;
+}
 
 double PDFNumberElement::GetValue() const
 {
diff --git a/xmlsecurity/qa/unit/pdfsigning/data/tdf107149.pdf b/xmlsecurity/qa/unit/pdfsigning/data/tdf107149.pdf
new file mode 100644
index 000000000000..db063f366a93
--- /dev/null
+++ b/xmlsecurity/qa/unit/pdfsigning/data/tdf107149.pdf
@@ -0,0 +1,97 @@
+%PDF-1.2
+%âãÏÓ
+3 0 obj 
+<<
+/Length 61
+>>
+stream
+  BT
+  /F1 24 Tf
+  1 0 0 1 260 254 Tm
+  (Hello Worl2)Tj
+  ET
+
+endstream 
+endobj xref
+0 7
+0000000000 65535 f 
+0000000312 00000 n 
+0000000532 00000 n 
+0000000015 00000 n 
+0000000660 00000 n 
+0000000578 00000 n 
+0000000743 00000 n 
+trailer
+
+<<
+/Root 6 0 R
+/Size 7
+>>
+1 0 obj 
+<<
+/Resources 2 0 R
+/Contents 3 0 R
+/Parent 4 0 R
+/Type /Page
+/MediaBox [0 0 612 446]
+>>
+endobj 
+3 0 obj 
+<<
+/Length 61
+>>
+stream
+  BT
+  /F1 24 Tf
+  1 0 0 1 260 254 Tm
+  (Hello World)Tj
+  ET
+
+endstream 
+endobj 
+2 0 obj 
+<<
+/Font 
+<<
+/F1 5 0 R
+>>
+>>
+endobj 
+5 0 obj 
+<<
+/Subtype /Type1
+/Name /F1
+/Type /Font
+/BaseFont /Helvetica
+>>
+endobj 
+4 0 obj 
+<<
+/Kids [1 0 R]
+/Type /Pages
+/MediaBox [0 0 612 446]
+/Count 1
+>>
+endobj 
+6 0 obj 
+<<
+/Type /Catalog
+/Pages 4 0 R
+>>
+endobj xref
+0 7
+0000000000 65535 f 
+0000000015 00000 n 
+0000000235 00000 n 
+0000000121 00000 n 
+0000000363 00000 n 
+0000000281 00000 n 
+0000000446 00000 n 
+trailer
+
+<<
+/Size 7
+>>
+startxref
+128
+%%EOF
diff --git a/xmlsecurity/qa/unit/pdfsigning/pdfsigning.cxx b/xmlsecurity/qa/unit/pdfsigning/pdfsigning.cxx
index 05f8517ffec3..7462be32882a 100644
--- a/xmlsecurity/qa/unit/pdfsigning/pdfsigning.cxx
+++ b/xmlsecurity/qa/unit/pdfsigning/pdfsigning.cxx
@@ -395,6 +395,7 @@ void PDFSigningTest::testTokenize()
         "noeol.pdf",
         // File that's intentionally smaller than 1024 bytes.
         "small.pdf",
+        "tdf107149.pdf",
     };
 
     for (const auto& rName : aNames)
@@ -403,6 +404,11 @@ void PDFSigningTest::testTokenize()
         vcl::filter::PDFDocument aDocument;
         // Just make sure the tokenizer finishes without an error, don't look at the signature.
         CPPUNIT_ASSERT(aDocument.Read(aStream));
+
+        OUString aNoPages("tdf107149.pdf");
+        if (aNoPages == rName)
+            // This failed, page list was empty.
+            CPPUNIT_ASSERT_EQUAL(static_cast<size_t>(1), aDocument.GetPages().size());
     }
 }
 


More information about the Libreoffice-commits mailing list