[Libreoffice-commits] core.git: include/vcl vcl/qa vcl/source

Miklos Vajna vmiklos at collabora.co.uk
Fri Apr 7 21:08:20 UTC 2017


 include/vcl/filter/pdfdocument.hxx          |    7 ++-
 vcl/qa/cppunit/pdfexport/data/tdf107018.odt |binary
 vcl/qa/cppunit/pdfexport/pdfexport.cxx      |   50 ++++++++++++++++++++++++++++
 vcl/source/filter/ipdf/pdfdocument.cxx      |   27 ++++++---------
 vcl/source/gdi/pdfwriter_impl.cxx           |   10 ++---
 5 files changed, 70 insertions(+), 24 deletions(-)

New commits:
commit ee73747ab58fbbd5039823767693431223c347d3
Author: Miklos Vajna <vmiklos at collabora.co.uk>
Date:   Fri Apr 7 18:19:41 2017 +0200

    tdf#107018 PDF export of PDF images: handle references in nested dictionaries
    
    Also get rid of the GetKeyOffset() and GetKeyValueLength() calls when
    copying dictionaries: the reference already knows its offset and length,
    so no need to call them. This makes the dictionary and the array
    handling more similar.
    
    Change-Id: I65936acfaf857636a8d83da3a4cec69289eb89d8
    Reviewed-on: https://gerrit.libreoffice.org/36282
    Reviewed-by: Miklos Vajna <vmiklos at collabora.co.uk>
    Tested-by: Jenkins <ci at libreoffice.org>

diff --git a/include/vcl/filter/pdfdocument.hxx b/include/vcl/filter/pdfdocument.hxx
index 595b4f0fdfd3..d83cb8308f11 100644
--- a/include/vcl/filter/pdfdocument.hxx
+++ b/include/vcl/filter/pdfdocument.hxx
@@ -71,6 +71,9 @@ class VCL_DLLPUBLIC PDFObjectElement : public PDFElement
     std::vector< std::unique_ptr<PDFElement> > m_aElements;
     /// Uncompressed buffer of an object in an object stream.
     std::unique_ptr<SvMemoryStream> m_pStreamBuffer;
+    /// List of all reference elements inside this object's dictionary and
+    /// nested dictionaries.
+    std::vector<PDFReferenceElement*> m_aDictionaryReferences;
 
 public:
     PDFObjectElement(PDFDocument& rDoc, double fObjectValue, double fGenerationValue);
@@ -88,8 +91,8 @@ public:
     PDFNumberElement* GetNumberElement() const;
     /// Get access to the parsed key-value items from the object dictionary.
     const std::map<OString, PDFElement*>& GetDictionaryItems();
-    /// Same as GetDictionaryItems(), but entries are sorted by file offset.
-    std::vector< std::pair<OString, PDFElement*> > GetDictionaryItemsByOffset();
+    const std::vector<PDFReferenceElement*>& GetDictionaryReferences() const;
+    void AddDictionaryReference(PDFReferenceElement* pReference);
     void SetArray(PDFArrayElement* pArrayElement);
     void SetStream(PDFStreamElement* pStreamElement);
     /// Access to the stream of the object, if it has any.
diff --git a/vcl/qa/cppunit/pdfexport/data/tdf107018.odt b/vcl/qa/cppunit/pdfexport/data/tdf107018.odt
new file mode 100644
index 000000000000..3bfc7b2d73cb
Binary files /dev/null and b/vcl/qa/cppunit/pdfexport/data/tdf107018.odt differ
diff --git a/vcl/qa/cppunit/pdfexport/pdfexport.cxx b/vcl/qa/cppunit/pdfexport/pdfexport.cxx
index 31d0dfb384f2..aacf36b2796b 100644
--- a/vcl/qa/cppunit/pdfexport/pdfexport.cxx
+++ b/vcl/qa/cppunit/pdfexport/pdfexport.cxx
@@ -53,6 +53,7 @@ public:
     void testTdf106972();
     void testTdf106972Pdf17();
     void testTdf107013();
+    void testTdf107018();
 #endif
 
     CPPUNIT_TEST_SUITE(PdfExportTest);
@@ -65,6 +66,7 @@ public:
     CPPUNIT_TEST(testTdf106972);
     CPPUNIT_TEST(testTdf106972Pdf17);
     CPPUNIT_TEST(testTdf107013);
+    CPPUNIT_TEST(testTdf107018);
 #endif
     CPPUNIT_TEST_SUITE_END();
 };
@@ -402,6 +404,54 @@ void PdfExportTest::testTdf107013()
     // This failed, the reference to the image was created, but not the image.
     CPPUNIT_ASSERT(pXObject);
 }
+
+void PdfExportTest::testTdf107018()
+{
+    vcl::filter::PDFDocument aDocument;
+    load("tdf107018.odt", aDocument);
+
+    // Get access to the only image on the only page.
+    std::vector<vcl::filter::PDFObjectElement*> aPages = aDocument.GetPages();
+    CPPUNIT_ASSERT_EQUAL(static_cast<size_t>(1), aPages.size());
+    vcl::filter::PDFObjectElement* pResources = aPages[0]->LookupObject("Resources");
+    CPPUNIT_ASSERT(pResources);
+    auto pXObjects = dynamic_cast<vcl::filter::PDFDictionaryElement*>(pResources->Lookup("XObject"));
+    CPPUNIT_ASSERT(pXObjects);
+    CPPUNIT_ASSERT_EQUAL(static_cast<size_t>(1), pXObjects->GetItems().size());
+    vcl::filter::PDFObjectElement* pXObject = pXObjects->LookupObject(pXObjects->GetItems().begin()->first);
+    CPPUNIT_ASSERT(pXObject);
+
+    // Get access to the form object inside the image.
+    auto pXObjectResources = dynamic_cast<vcl::filter::PDFDictionaryElement*>(pXObject->Lookup("Resources"));
+    CPPUNIT_ASSERT(pXObjectResources);
+    auto pXObjectForms = dynamic_cast<vcl::filter::PDFDictionaryElement*>(pXObjectResources->LookupElement("XObject"));
+    CPPUNIT_ASSERT(pXObjectForms);
+    vcl::filter::PDFObjectElement* pForm = pXObjectForms->LookupObject(pXObjectForms->GetItems().begin()->first);
+    CPPUNIT_ASSERT(pForm);
+
+    // Get access to Resources -> Font -> F1 of the form.
+    auto pFormResources = dynamic_cast<vcl::filter::PDFDictionaryElement*>(pForm->Lookup("Resources"));
+    CPPUNIT_ASSERT(pFormResources);
+    auto pFonts = dynamic_cast<vcl::filter::PDFDictionaryElement*>(pFormResources->LookupElement("Font"));
+    CPPUNIT_ASSERT(pFonts);
+    auto pF1Ref = dynamic_cast<vcl::filter::PDFReferenceElement*>(pFonts->LookupElement("F1"));
+    CPPUNIT_ASSERT(pF1Ref);
+    vcl::filter::PDFObjectElement* pF1 = pF1Ref->LookupObject();
+    CPPUNIT_ASSERT(pF1);
+
+    // Check that Foo -> Bar of the font is of type Pages.
+    auto pFontFoo = dynamic_cast<vcl::filter::PDFDictionaryElement*>(pF1->Lookup("Foo"));
+    CPPUNIT_ASSERT(pFontFoo);
+    auto pBar = dynamic_cast<vcl::filter::PDFReferenceElement*>(pFontFoo->LookupElement("Bar"));
+    CPPUNIT_ASSERT(pBar);
+    vcl::filter::PDFObjectElement* pObject = pBar->LookupObject();
+    CPPUNIT_ASSERT(pObject);
+    auto pName = dynamic_cast<vcl::filter::PDFNameElement*>(pObject->Lookup("Type"));
+    CPPUNIT_ASSERT(pName);
+    // This was "XObject", reference in a nested dictionary wasn't updated when
+    // copying the page stream of a PDF image.
+    CPPUNIT_ASSERT_EQUAL(OString("Pages"), pName->GetValue());
+}
 #endif
 
 CPPUNIT_TEST_SUITE_REGISTRATION(PdfExportTest);
diff --git a/vcl/source/filter/ipdf/pdfdocument.cxx b/vcl/source/filter/ipdf/pdfdocument.cxx
index 43d4248cc8ad..b0bb8be6c93e 100644
--- a/vcl/source/filter/ipdf/pdfdocument.cxx
+++ b/vcl/source/filter/ipdf/pdfdocument.cxx
@@ -1071,10 +1071,14 @@ bool PDFDocument::Tokenize(SvStream& rStream, TokenizeMode eMode, std::vector< s
                     }
                     else
                     {
-                        rElements.push_back(std::unique_ptr<PDFElement>(new PDFReferenceElement(*this, *pObjectNumber, *pGenerationNumber)));
+                        auto pReference = new PDFReferenceElement(*this, *pObjectNumber, *pGenerationNumber);
+                        rElements.push_back(std::unique_ptr<PDFElement>(pReference));
                         if (pArray)
                             // Reference is part of a direct (non-dictionary) array, inform the array.
                             pArray->PushBack(rElements.back().get());
+                        if (bInObject && nDictionaryDepth > 0 && pObject)
+                            // Inform the object about a new in-dictionary reference.
+                            pObject->AddDictionaryReference(pReference);
                     }
                     if (!rElements.back()->Read(rStream))
                     {
@@ -2512,23 +2516,14 @@ PDFNumberElement* PDFObjectElement::GetNumberElement() const
     return m_pNumberElement;
 }
 
-std::vector< std::pair<OString, PDFElement*> > PDFObjectElement::GetDictionaryItemsByOffset()
+const std::vector<PDFReferenceElement*>& PDFObjectElement::GetDictionaryReferences() const
 {
-    std::vector< std::pair<OString, PDFElement*> > aRet;
-
-    for (const auto& rItem : m_aDictionary)
-        aRet.push_back(rItem);
-
-    PDFDictionaryElement* pDictionary = GetDictionary();
-    if (!pDictionary)
-        return aRet;
-
-    std::sort(aRet.begin(), aRet.end(), [pDictionary](const std::pair<OString, PDFElement*>& a, const std::pair<OString, PDFElement*>& b) -> bool
-    {
-        return pDictionary->GetKeyOffset(a.first) < pDictionary->GetKeyOffset(b.first);
-    });
+    return m_aDictionaryReferences;
+}
 
-    return aRet;
+void PDFObjectElement::AddDictionaryReference(PDFReferenceElement* pReference)
+{
+    m_aDictionaryReferences.push_back(pReference);
 }
 
 const std::map<OString, PDFElement*>& PDFObjectElement::GetDictionaryItems()
diff --git a/vcl/source/gdi/pdfwriter_impl.cxx b/vcl/source/gdi/pdfwriter_impl.cxx
index 8445377a1a5a..d5c1f6e8e7ee 100644
--- a/vcl/source/gdi/pdfwriter_impl.cxx
+++ b/vcl/source/gdi/pdfwriter_impl.cxx
@@ -10899,17 +10899,15 @@ sal_Int32 PDFWriterImpl::copyExternalResource(SvMemoryStream& rDocBuffer, filter
     OStringBuffer aLine;
     aLine.append(nObject);
     aLine.append(" 0 obj\n");
-    if (filter::PDFDictionaryElement* pDictionary = rObject.GetDictionary())
+    if (rObject.GetDictionary())
     {
         aLine.append("<<");
 
         // Complex case: can't copy the dictionary byte array as is, as it may contain references.
         bool bDone = false;
-        std::vector< std::pair<OString, filter::PDFElement*> > aItems = rObject.GetDictionaryItemsByOffset();
         sal_uInt64 nCopyStart = 0;
-        for (const auto& rItem : aItems)
+        for (auto pReference : rObject.GetDictionaryReferences())
         {
-            auto pReference = dynamic_cast<filter::PDFReferenceElement*>(rItem.second);
             if (pReference)
             {
                 filter::PDFObjectElement* pReferenced = pReference->LookupObject();
@@ -10918,8 +10916,8 @@ sal_Int32 PDFWriterImpl::copyExternalResource(SvMemoryStream& rDocBuffer, filter
                     // Copy the referenced object.
                     sal_Int32 nRef = copyExternalResource(rDocBuffer, *pReferenced, rCopiedResources);
 
-                    sal_uInt64 nReferenceStart = pDictionary->GetKeyOffset(rItem.first) + rItem.first.getLength();
-                    sal_uInt64 nReferenceEnd = pDictionary->GetKeyOffset(rItem.first) + pDictionary->GetKeyValueLength(rItem.first);
+                    sal_uInt64 nReferenceStart = pReference->GetObjectElement().GetLocation();
+                    sal_uInt64 nReferenceEnd = pReference->GetOffset();
                     sal_uInt64 nOffset = 0;
                     if (nCopyStart == 0)
                         // Dict start -> reference start.


More information about the Libreoffice-commits mailing list