[Libreoffice-commits] core.git: include/vcl vcl/qa vcl/source

Miklos Vajna vmiklos at collabora.co.uk
Wed Apr 5 17:08:15 UTC 2017


 include/vcl/filter/pdfdocument.hxx          |    6 ++-
 vcl/qa/cppunit/pdfexport/data/tdf106972.odt |binary
 vcl/qa/cppunit/pdfexport/pdfexport.cxx      |   48 ++++++++++++++++++++++++++++
 vcl/source/filter/ipdf/pdfdocument.cxx      |   24 +++++++++++++-
 vcl/source/gdi/pdfwriter_impl.cxx           |   36 ++++++++++++++++-----
 5 files changed, 104 insertions(+), 10 deletions(-)

New commits:
commit 242a9b634213acf03cabc373928555dc81afc672
Author: Miklos Vajna <vmiklos at collabora.co.uk>
Date:   Wed Apr 5 15:35:13 2017 +0200

    tdf#106972 vcl PDF export, PDF images: handle indirect font references
    
    There were a number of problems here:
    
    - the /Resources key of a page object may be an indirect object
    - the /Font key of a resource object may be an indirect object
    - the /Length key of an object may be an indirect object
    
    So in all these cases handle not only a direct dictionary / number but
    also when we have a reference-to-dictionary/number.
    
    Change-Id: Ie74371f0ba43a133a1299843ef20cbfc75fe26d7

diff --git a/include/vcl/filter/pdfdocument.hxx b/include/vcl/filter/pdfdocument.hxx
index 4bed3c32737a..595b4f0fdfd3 100644
--- a/include/vcl/filter/pdfdocument.hxx
+++ b/include/vcl/filter/pdfdocument.hxx
@@ -50,6 +50,8 @@ class VCL_DLLPUBLIC PDFObjectElement : public PDFElement
     double m_fObjectValue;
     double m_fGenerationValue;
     std::map<OString, PDFElement*> m_aDictionary;
+    /// If set, the object contains this number element (outside any dictionary/array).
+    PDFNumberElement* m_pNumberElement;
     /// Position after the '<<' token.
     sal_uInt64 m_nDictionaryOffset;
     /// Length of the dictionary buffer till (before) the '>>' token.
@@ -82,8 +84,10 @@ public:
     sal_uInt64 GetDictionaryLength();
     PDFDictionaryElement* GetDictionary();
     void SetDictionary(PDFDictionaryElement* pDictionaryElement);
+    void SetNumberElement(PDFNumberElement* pNumberElement);
+    PDFNumberElement* GetNumberElement() const;
     /// Get access to the parsed key-value items from the object dictionary.
-    const std::map<OString, PDFElement*>& GetDictionaryItems() const;
+    const std::map<OString, PDFElement*>& GetDictionaryItems();
     /// Same as GetDictionaryItems(), but entries are sorted by file offset.
     std::vector< std::pair<OString, PDFElement*> > GetDictionaryItemsByOffset();
     void SetArray(PDFArrayElement* pArrayElement);
diff --git a/vcl/qa/cppunit/pdfexport/data/tdf106972.odt b/vcl/qa/cppunit/pdfexport/data/tdf106972.odt
new file mode 100644
index 000000000000..3fa76c49fab9
Binary files /dev/null and b/vcl/qa/cppunit/pdfexport/data/tdf106972.odt differ
diff --git a/vcl/qa/cppunit/pdfexport/pdfexport.cxx b/vcl/qa/cppunit/pdfexport/pdfexport.cxx
index ede50077b62c..2bbb9f3c9f01 100644
--- a/vcl/qa/cppunit/pdfexport/pdfexport.cxx
+++ b/vcl/qa/cppunit/pdfexport/pdfexport.cxx
@@ -49,6 +49,7 @@ public:
     void testTdf106206();
     /// Tests export of PDF images without reference XObjects.
     void testTdf106693();
+    void testTdf106972();
 #endif
 
     CPPUNIT_TEST_SUITE(PdfExportTest);
@@ -58,6 +59,7 @@ public:
     CPPUNIT_TEST(testTdf105093);
     CPPUNIT_TEST(testTdf106206);
     CPPUNIT_TEST(testTdf106693);
+    CPPUNIT_TEST(testTdf106972);
 #endif
     CPPUNIT_TEST_SUITE_END();
 };
@@ -317,6 +319,52 @@ void PdfExportTest::testTdf106206()
     // This failed, object #0 was referenced.
     CPPUNIT_ASSERT(bool(it == pEnd));
 }
+
+void PdfExportTest::testTdf106972()
+{
+    // Import the bugdoc and export as PDF.
+    OUString aURL = m_directories.getURLFromSrc(DATA_DIRECTORY) + "tdf106972.odt";
+    mxComponent = loadFromDesktop(aURL);
+    CPPUNIT_ASSERT(mxComponent.is());
+
+    uno::Reference<frame::XStorable> xStorable(mxComponent, uno::UNO_QUERY);
+    utl::TempFile aTempFile;
+    aTempFile.EnableKillingFile();
+    utl::MediaDescriptor aMediaDescriptor;
+    aMediaDescriptor["FilterName"] <<= OUString("writer_pdf_Export");
+    xStorable->storeToURL(aTempFile.GetURL(), aMediaDescriptor.getAsConstPropertyValueList());
+
+    // Parse the export result.
+    vcl::filter::PDFDocument aDocument;
+    SvFileStream aStream(aTempFile.GetURL(), StreamMode::READ);
+    CPPUNIT_ASSERT(aDocument.Read(aStream));
+
+    // Get access to the only form object on the only page.
+    std::vector<vcl::filter::PDFObjectElement*> aPages = aDocument.GetPages();
+    CPPUNIT_ASSERT_EQUAL(static_cast<size_t>(1), aPages.size());
+    vcl::filter::PDFObjectElement* pResources = aPages[0]->LookupObject("Resources");
+    CPPUNIT_ASSERT(pResources);
+    auto pXObjects = dynamic_cast<vcl::filter::PDFDictionaryElement*>(pResources->Lookup("XObject"));
+    CPPUNIT_ASSERT(pXObjects);
+    CPPUNIT_ASSERT_EQUAL(static_cast<size_t>(1), pXObjects->GetItems().size());
+    vcl::filter::PDFObjectElement* pXObject = pXObjects->LookupObject(pXObjects->GetItems().begin()->first);
+    CPPUNIT_ASSERT(pXObject);
+
+    // Get access to the only image inside the form object.
+    auto pFormResources = dynamic_cast<vcl::filter::PDFDictionaryElement*>(pXObject->Lookup("Resources"));
+    CPPUNIT_ASSERT(pFormResources);
+    auto pImages = dynamic_cast<vcl::filter::PDFDictionaryElement*>(pFormResources->LookupElement("XObject"));
+    CPPUNIT_ASSERT(pImages);
+    CPPUNIT_ASSERT_EQUAL(static_cast<size_t>(1), pImages->GetItems().size());
+    vcl::filter::PDFObjectElement* pImage = pImages->LookupObject(pImages->GetItems().begin()->first);
+    CPPUNIT_ASSERT(pImage);
+
+    // Assert resources of the image.
+    auto pImageResources = dynamic_cast<vcl::filter::PDFDictionaryElement*>(pImage->Lookup("Resources"));
+    CPPUNIT_ASSERT(pImageResources);
+    // This failed: the PDF image had no Font resource.
+    CPPUNIT_ASSERT(pImageResources->LookupElement("Font"));
+}
 #endif
 
 CPPUNIT_TEST_SUITE_REGISTRATION(PdfExportTest);
diff --git a/vcl/source/filter/ipdf/pdfdocument.cxx b/vcl/source/filter/ipdf/pdfdocument.cxx
index 86aeade27137..444ec9239d20 100644
--- a/vcl/source/filter/ipdf/pdfdocument.cxx
+++ b/vcl/source/filter/ipdf/pdfdocument.cxx
@@ -880,6 +880,8 @@ bool PDFDocument::Tokenize(SvStream& rStream, TokenizeMode eMode, std::vector< s
     int nArrayDepth = 0;
     // Last seen array token that's outside any dictionaries.
     PDFArrayElement* pArray = nullptr;
+    // If we're inside an obj/endobj pair.
+    bool bInObject = false;
     while (true)
     {
         char ch;
@@ -1030,6 +1032,10 @@ bool PDFDocument::Tokenize(SvStream& rStream, TokenizeMode eMode, std::vector< s
                     if (it != m_aOffsetObjects.end())
                         m_pXRefStream = it->second;
                 }
+                else if (bInObject && !nDictionaryDepth && !nArrayDepth && pObject)
+                    // Number element inside an object, but outside a
+                    // dictionary / array: remember it.
+                    pObject->SetNumberElement(pNumberElement);
             }
             else if (rtl::isAsciiAlpha(static_cast<unsigned char>(ch)))
             {
@@ -1061,6 +1067,7 @@ bool PDFDocument::Tokenize(SvStream& rStream, TokenizeMode eMode, std::vector< s
                         rElements.push_back(std::unique_ptr<PDFElement>(pObject));
                         m_aOffsetObjects[pObjectNumber->GetLocation()] = pObject;
                         m_aIDObjects[pObjectNumber->GetValue()] = pObject;
+                        bInObject = true;
                     }
                     else
                     {
@@ -1150,6 +1157,7 @@ bool PDFDocument::Tokenize(SvStream& rStream, TokenizeMode eMode, std::vector< s
                         pObjectStream = nullptr;
                         pObjectKey = nullptr;
                     }
+                    bInObject = false;
                 }
                 else if (aKeyword == "true" || aKeyword == "false")
                     rElements.push_back(std::unique_ptr<PDFElement>(new PDFBooleanElement(aKeyword.toBoolean())));
@@ -2080,6 +2088,7 @@ PDFObjectElement::PDFObjectElement(PDFDocument& rDoc, double fObjectValue, doubl
     : m_rDoc(rDoc),
       m_fObjectValue(fObjectValue),
       m_fGenerationValue(fGenerationValue),
+      m_pNumberElement(nullptr),
       m_nDictionaryOffset(0),
       m_nDictionaryLength(0),
       m_pDictionaryElement(nullptr),
@@ -2477,6 +2486,16 @@ void PDFObjectElement::SetDictionary(PDFDictionaryElement* pDictionaryElement)
     m_pDictionaryElement = pDictionaryElement;
 }
 
+void PDFObjectElement::SetNumberElement(PDFNumberElement* pNumberElement)
+{
+    m_pNumberElement = pNumberElement;
+}
+
+PDFNumberElement* PDFObjectElement::GetNumberElement() const
+{
+    return m_pNumberElement;
+}
+
 std::vector< std::pair<OString, PDFElement*> > PDFObjectElement::GetDictionaryItemsByOffset()
 {
     std::vector< std::pair<OString, PDFElement*> > aRet;
@@ -2496,8 +2515,11 @@ std::vector< std::pair<OString, PDFElement*> > PDFObjectElement::GetDictionaryIt
     return aRet;
 }
 
-const std::map<OString, PDFElement*>& PDFObjectElement::GetDictionaryItems() const
+const std::map<OString, PDFElement*>& PDFObjectElement::GetDictionaryItems()
 {
+    if (m_aDictionary.empty())
+        PDFDictionaryElement::Parse(m_rDoc.GetElements(), this, m_aDictionary);
+
     return m_aDictionary;
 }
 
diff --git a/vcl/source/gdi/pdfwriter_impl.cxx b/vcl/source/gdi/pdfwriter_impl.cxx
index f11ef8591f59..698fdab61d6b 100644
--- a/vcl/source/gdi/pdfwriter_impl.cxx
+++ b/vcl/source/gdi/pdfwriter_impl.cxx
@@ -10990,6 +10990,14 @@ sal_Int32 PDFWriterImpl::copyExternalResource(SvMemoryStream& rDocBuffer, filter
         aLine.append("]\n");
     }
 
+    // If the object has a number element outside a dictionary or array, copy that.
+    if (filter::PDFNumberElement* pNumber = rObject.GetNumberElement())
+    {
+        aLine.append(static_cast<const sal_Char*>(rDocBuffer.GetData()) + pNumber->GetLocation(), pNumber->GetLength());
+        aLine.append("\n");
+    }
+
+
     aLine.append("endobj\n\n");
 
     // We have the whole object, now write it to the output.
@@ -11008,18 +11016,30 @@ OString PDFWriterImpl::copyExternalResources(filter::PDFObjectElement& rPage, co
     std::map<OString, sal_Int32> aRet;
 
     // Get the rKind subset of the resource dictionary.
-    auto pResources = dynamic_cast<filter::PDFDictionaryElement*>(rPage.Lookup("Resources"));
-    if (!pResources)
-        return OString();
-
-    auto pDictionary = dynamic_cast<filter::PDFDictionaryElement*>(pResources->LookupElement(rKind));
-    if (!pDictionary)
+    std::map<OString, filter::PDFElement*> aItems;
+    if (auto pResources = dynamic_cast<filter::PDFDictionaryElement*>(rPage.Lookup("Resources")))
+    {
+        // Resources is a direct dictionary.
+        if (auto pDictionary = dynamic_cast<filter::PDFDictionaryElement*>(pResources->LookupElement(rKind)))
+            aItems = pDictionary->GetItems();
+    }
+    else if (filter::PDFObjectElement* pPageResources = rPage.LookupObject("Resources"))
+    {
+        // Resources is an indirect object.
+        filter::PDFElement* pValue = pPageResources->Lookup(rKind);
+        if (auto pDictionary = dynamic_cast<filter::PDFDictionaryElement*>(pValue))
+            // Kind is a direct dictionary.
+            aItems = pDictionary->GetItems();
+        else if (filter::PDFObjectElement* pObject = pPageResources->LookupObject(rKind))
+            // Kind is an indirect object.
+            aItems = pObject->GetDictionaryItems();
+    }
+    if (aItems.empty())
         return OString();
 
     SvMemoryStream& rDocBuffer = rPage.GetDocument().GetEditBuffer();
 
-    const std::map<OString, filter::PDFElement*>& rItems = pDictionary->GetItems();
-    for (const auto& rItem : rItems)
+    for (const auto& rItem : aItems)
     {
         // For each item copy it over to our output then insert it into aRet.
         auto pReference = dynamic_cast<filter::PDFReferenceElement*>(rItem.second);


More information about the Libreoffice-commits mailing list