[Libreoffice-commits] core.git: Branch 'distro/collabora/cp-5.3' - 2 commits - include/vcl vcl/qa vcl/source

Thu Apr 6 08:12:44 UTC 2017

include/vcl/filter/pdfdocument.hxx                |    6 +
 vcl/qa/cppunit/pdfexport/data/tdf106972-pdf17.odt |binary
 vcl/qa/cppunit/pdfexport/data/tdf106972.odt       |binary
 vcl/qa/cppunit/pdfexport/pdfexport.cxx            |   86 ++++++++++++++++++++++
 vcl/source/filter/ipdf/pdfdocument.cxx            |   24 +++++-
 vcl/source/gdi/pdfwriter_impl.cxx                 |   66 ++++++++++++++--
 6 files changed, 170 insertions(+), 12 deletions(-)

New commits:
commit 6cdc0514f92a24532b3be581b9295324ea02fdf5
Author: Miklos Vajna <vmiklos at collabora.co.uk>
Date:   Wed Apr 5 17:31:47 2017 +0200

    Related: tdf#106972 vcl PDF export, PDF images: ignore PDF >= 1.5
    
    When copying their page steam into ours, we need to make sure their
    syntax is <= 1.4; so when checking if the image has PDF data, ignore the
    case when it has, but it's >= 1.5 (at least in the default case when not
    using reference XObjects).
    
    Change-Id: I6bd77803b92fe16bdd327e5e7c3d2b42adeacca4
    Reviewed-on: https://gerrit.libreoffice.org/36161
    Reviewed-by: Miklos Vajna <vmiklos at collabora.co.uk>
    Tested-by: Jenkins <ci at libreoffice.org>
    (cherry picked from commit 4443d7be61a9ae45630183d856a566cecd06ad95)

diff --git a/vcl/qa/cppunit/pdfexport/data/tdf106972-pdf17.odt b/vcl/qa/cppunit/pdfexport/data/tdf106972-pdf17.odt
new file mode 100644
index 000000000000..d46c93dffb5f
Binary files /dev/null and b/vcl/qa/cppunit/pdfexport/data/tdf106972-pdf17.odt differ
diff --git a/vcl/qa/cppunit/pdfexport/pdfexport.cxx b/vcl/qa/cppunit/pdfexport/pdfexport.cxx
index 2bbb9f3c9f01..566495f38edf 100644
--- a/vcl/qa/cppunit/pdfexport/pdfexport.cxx
+++ b/vcl/qa/cppunit/pdfexport/pdfexport.cxx
@@ -50,6 +50,7 @@ public:
     /// Tests export of PDF images without reference XObjects.
     void testTdf106693();
     void testTdf106972();
+    void testTdf106972Pdf17();
 #endif
 
     CPPUNIT_TEST_SUITE(PdfExportTest);
@@ -60,6 +61,7 @@ public:
     CPPUNIT_TEST(testTdf106206);
     CPPUNIT_TEST(testTdf106693);
     CPPUNIT_TEST(testTdf106972);
+    CPPUNIT_TEST(testTdf106972Pdf17);
 #endif
     CPPUNIT_TEST_SUITE_END();
 };
@@ -365,6 +367,42 @@ void PdfExportTest::testTdf106972()
     // This failed: the PDF image had no Font resource.
     CPPUNIT_ASSERT(pImageResources->LookupElement("Font"));
 }
+
+void PdfExportTest::testTdf106972Pdf17()
+{
+    // Import the bugdoc and export as PDF.
+    OUString aURL = m_directories.getURLFromSrc(DATA_DIRECTORY) + "tdf106972-pdf17.odt";
+    mxComponent = loadFromDesktop(aURL);
+    CPPUNIT_ASSERT(mxComponent.is());
+
+    uno::Reference<frame::XStorable> xStorable(mxComponent, uno::UNO_QUERY);
+    utl::TempFile aTempFile;
+    aTempFile.EnableKillingFile();
+    utl::MediaDescriptor aMediaDescriptor;
+    aMediaDescriptor["FilterName"] <<= OUString("writer_pdf_Export");
+    xStorable->storeToURL(aTempFile.GetURL(), aMediaDescriptor.getAsConstPropertyValueList());
+
+    // Parse the export result.
+    vcl::filter::PDFDocument aDocument;
+    SvFileStream aStream(aTempFile.GetURL(), StreamMode::READ);
+    CPPUNIT_ASSERT(aDocument.Read(aStream));
+
+    // Get access to the only image on the only page.
+    std::vector<vcl::filter::PDFObjectElement*> aPages = aDocument.GetPages();
+    CPPUNIT_ASSERT_EQUAL(static_cast<size_t>(1), aPages.size());
+    vcl::filter::PDFObjectElement* pResources = aPages[0]->LookupObject("Resources");
+    CPPUNIT_ASSERT(pResources);
+    auto pXObjects = dynamic_cast<vcl::filter::PDFDictionaryElement*>(pResources->Lookup("XObject"));
+    CPPUNIT_ASSERT(pXObjects);
+    CPPUNIT_ASSERT_EQUAL(static_cast<size_t>(1), pXObjects->GetItems().size());
+    vcl::filter::PDFObjectElement* pXObject = pXObjects->LookupObject(pXObjects->GetItems().begin()->first);
+    CPPUNIT_ASSERT(pXObject);
+
+    // This failed, the "image" had resources; that typically means we tried to
+    // preserve the original PDF markup here; which is not OK till our default
+    // output is PDF 1.4, and this bugdoc has PDF 1.7 data.
+    CPPUNIT_ASSERT(!pXObject->Lookup("Resources"));
+}
 #endif
 
 CPPUNIT_TEST_SUITE_REGISTRATION(PdfExportTest);
diff --git a/vcl/source/gdi/pdfwriter_impl.cxx b/vcl/source/gdi/pdfwriter_impl.cxx
index c58f22fbc85a..2fd12cd0e5fb 100644
--- a/vcl/source/gdi/pdfwriter_impl.cxx
+++ b/vcl/source/gdi/pdfwriter_impl.cxx
@@ -665,6 +665,32 @@ static void appendDestinationName( const OUString& rString, OStringBuffer& rBuff
 }
 //<--- i56629
 
+/// Decide if rGraphic has PDF data that is possible to embed in our output.
+static bool hasPdfData(const Graphic& rGraphic, bool bUseReferenceXObject)
+{
+    const css::uno::Sequence<sal_Int8>& rData = rGraphic.getPdfData();
+
+    if (rData.getLength() < 8)
+        return false;
+
+    if (rData[0] != '%' || rData[1] != 'P' || rData[2] != 'D' || rData[3] != 'F' || rData[4] != '-')
+        // Unexpected header.
+        return false;
+
+    if (bUseReferenceXObject)
+        // This is possible for all versions.
+        return true;
+
+    sal_Int32 nMajor = OString(rData[5]).toInt32();
+    sal_Int32 nMinor = OString(rData[7]).toInt32();
+
+    if (nMajor > 1 || (nMajor == 1 && nMinor > 4))
+        // This is PDF-1.5 or newer, can't embed into PDF-1.4.
+        return false;
+
+    return true;
+}
+
 void PDFWriter::AppendUnicodeTextString(const OUString& rString, OStringBuffer& rBuffer)
 {
     rBuffer.append( "FEFF" );
@@ -12413,7 +12439,7 @@ void PDFWriterImpl::createEmbeddedFile(const Graphic& rGraphic, ReferenceXObject
     // no pdf data.
     rEmit.m_nBitmapObject = nBitmapObject;
 
-    if (!rGraphic.getPdfData().hasElements())
+    if (!hasPdfData(rGraphic, m_aContext.UseReferenceXObject))
         return;
 
     if (m_aContext.UseReferenceXObject)
@@ -12593,7 +12619,7 @@ const PDFWriterImpl::BitmapEmit& PDFWriterImpl::createBitmapEmit( const BitmapEx
         m_aBitmaps.push_front( BitmapEmit() );
         m_aBitmaps.front().m_aID        = aID;
         m_aBitmaps.front().m_aBitmap    = aBitmap;
-        if (!rGraphic.getPdfData().hasElements() || m_aContext.UseReferenceXObject)
+        if (!hasPdfData(rGraphic, m_aContext.UseReferenceXObject) || m_aContext.UseReferenceXObject)
             m_aBitmaps.front().m_nObject = createObject();
         createEmbeddedFile(rGraphic, m_aBitmaps.front().m_aReferenceXObject, m_aBitmaps.front().m_nObject);
         it = m_aBitmaps.begin();
commit 3d1e7a288a0121f9a6fadf3a0631a739dab6a5ab
Author: Miklos Vajna <vmiklos at collabora.co.uk>
Date:   Wed Apr 5 15:35:13 2017 +0200

    tdf#106972 vcl PDF export, PDF images: handle indirect font references
    
    There were a number of problems here:
    
    - the /Resources key of a page object may be an indirect object
    - the /Font key of a resource object may be an indirect object
    - the /Length key of an object may be an indirect object
    
    So in all these cases handle not only a direct dictionary / number but
    also when we have a reference-to-dictionary/number.
    
    Change-Id: Ie74371f0ba43a133a1299843ef20cbfc75fe26d7
    (cherry picked from commit 242a9b634213acf03cabc373928555dc81afc672)

diff --git a/include/vcl/filter/pdfdocument.hxx b/include/vcl/filter/pdfdocument.hxx
index 4bed3c32737a..595b4f0fdfd3 100644
--- a/include/vcl/filter/pdfdocument.hxx
+++ b/include/vcl/filter/pdfdocument.hxx
@@ -50,6 +50,8 @@ class VCL_DLLPUBLIC PDFObjectElement : public PDFElement
     double m_fObjectValue;
     double m_fGenerationValue;
     std::map<OString, PDFElement*> m_aDictionary;
+    /// If set, the object contains this number element (outside any dictionary/array).
+    PDFNumberElement* m_pNumberElement;
     /// Position after the '<<' token.
     sal_uInt64 m_nDictionaryOffset;
     /// Length of the dictionary buffer till (before) the '>>' token.
@@ -82,8 +84,10 @@ public:
     sal_uInt64 GetDictionaryLength();
     PDFDictionaryElement* GetDictionary();
     void SetDictionary(PDFDictionaryElement* pDictionaryElement);
+    void SetNumberElement(PDFNumberElement* pNumberElement);
+    PDFNumberElement* GetNumberElement() const;
     /// Get access to the parsed key-value items from the object dictionary.
-    const std::map<OString, PDFElement*>& GetDictionaryItems() const;
+    const std::map<OString, PDFElement*>& GetDictionaryItems();
     /// Same as GetDictionaryItems(), but entries are sorted by file offset.
     std::vector< std::pair<OString, PDFElement*> > GetDictionaryItemsByOffset();
     void SetArray(PDFArrayElement* pArrayElement);
diff --git a/vcl/qa/cppunit/pdfexport/data/tdf106972.odt b/vcl/qa/cppunit/pdfexport/data/tdf106972.odt
new file mode 100644
index 000000000000..3fa76c49fab9
Binary files /dev/null and b/vcl/qa/cppunit/pdfexport/data/tdf106972.odt differ
diff --git a/vcl/qa/cppunit/pdfexport/pdfexport.cxx b/vcl/qa/cppunit/pdfexport/pdfexport.cxx
index ede50077b62c..2bbb9f3c9f01 100644
--- a/vcl/qa/cppunit/pdfexport/pdfexport.cxx
+++ b/vcl/qa/cppunit/pdfexport/pdfexport.cxx
@@ -49,6 +49,7 @@ public:
     void testTdf106206();
     /// Tests export of PDF images without reference XObjects.
     void testTdf106693();
+    void testTdf106972();
 #endif
 
     CPPUNIT_TEST_SUITE(PdfExportTest);
@@ -58,6 +59,7 @@ public:
     CPPUNIT_TEST(testTdf105093);
     CPPUNIT_TEST(testTdf106206);
     CPPUNIT_TEST(testTdf106693);
+    CPPUNIT_TEST(testTdf106972);
 #endif
     CPPUNIT_TEST_SUITE_END();
 };
@@ -317,6 +319,52 @@ void PdfExportTest::testTdf106206()
     // This failed, object #0 was referenced.
     CPPUNIT_ASSERT(bool(it == pEnd));
 }
+
+void PdfExportTest::testTdf106972()
+{
+    // Import the bugdoc and export as PDF.
+    OUString aURL = m_directories.getURLFromSrc(DATA_DIRECTORY) + "tdf106972.odt";
+    mxComponent = loadFromDesktop(aURL);
+    CPPUNIT_ASSERT(mxComponent.is());
+
+    uno::Reference<frame::XStorable> xStorable(mxComponent, uno::UNO_QUERY);
+    utl::TempFile aTempFile;
+    aTempFile.EnableKillingFile();
+    utl::MediaDescriptor aMediaDescriptor;
+    aMediaDescriptor["FilterName"] <<= OUString("writer_pdf_Export");
+    xStorable->storeToURL(aTempFile.GetURL(), aMediaDescriptor.getAsConstPropertyValueList());
+
+    // Parse the export result.
+    vcl::filter::PDFDocument aDocument;
+    SvFileStream aStream(aTempFile.GetURL(), StreamMode::READ);
+    CPPUNIT_ASSERT(aDocument.Read(aStream));
+
+    // Get access to the only form object on the only page.
+    std::vector<vcl::filter::PDFObjectElement*> aPages = aDocument.GetPages();
+    CPPUNIT_ASSERT_EQUAL(static_cast<size_t>(1), aPages.size());
+    vcl::filter::PDFObjectElement* pResources = aPages[0]->LookupObject("Resources");
+    CPPUNIT_ASSERT(pResources);
+    auto pXObjects = dynamic_cast<vcl::filter::PDFDictionaryElement*>(pResources->Lookup("XObject"));
+    CPPUNIT_ASSERT(pXObjects);
+    CPPUNIT_ASSERT_EQUAL(static_cast<size_t>(1), pXObjects->GetItems().size());
+    vcl::filter::PDFObjectElement* pXObject = pXObjects->LookupObject(pXObjects->GetItems().begin()->first);
+    CPPUNIT_ASSERT(pXObject);
+
+    // Get access to the only image inside the form object.
+    auto pFormResources = dynamic_cast<vcl::filter::PDFDictionaryElement*>(pXObject->Lookup("Resources"));
+    CPPUNIT_ASSERT(pFormResources);
+    auto pImages = dynamic_cast<vcl::filter::PDFDictionaryElement*>(pFormResources->LookupElement("XObject"));
+    CPPUNIT_ASSERT(pImages);
+    CPPUNIT_ASSERT_EQUAL(static_cast<size_t>(1), pImages->GetItems().size());
+    vcl::filter::PDFObjectElement* pImage = pImages->LookupObject(pImages->GetItems().begin()->first);
+    CPPUNIT_ASSERT(pImage);
+
+    // Assert resources of the image.
+    auto pImageResources = dynamic_cast<vcl::filter::PDFDictionaryElement*>(pImage->Lookup("Resources"));
+    CPPUNIT_ASSERT(pImageResources);
+    // This failed: the PDF image had no Font resource.
+    CPPUNIT_ASSERT(pImageResources->LookupElement("Font"));
+}
 #endif
 
 CPPUNIT_TEST_SUITE_REGISTRATION(PdfExportTest);
diff --git a/vcl/source/filter/ipdf/pdfdocument.cxx b/vcl/source/filter/ipdf/pdfdocument.cxx
index b50105565e3c..f6ae66f28041 100644
--- a/vcl/source/filter/ipdf/pdfdocument.cxx
+++ b/vcl/source/filter/ipdf/pdfdocument.cxx
@@ -879,6 +879,8 @@ bool PDFDocument::Tokenize(SvStream& rStream, TokenizeMode eMode, std::vector< s
     int nArrayDepth = 0;
     // Last seen array token that's outside any dictionaries.
     PDFArrayElement* pArray = nullptr;
+    // If we're inside an obj/endobj pair.
+    bool bInObject = false;
     while (true)
     {
         char ch;
@@ -1029,6 +1031,10 @@ bool PDFDocument::Tokenize(SvStream& rStream, TokenizeMode eMode, std::vector< s
                     if (it != m_aOffsetObjects.end())
                         m_pXRefStream = it->second;
                 }
+                else if (bInObject && !nDictionaryDepth && !nArrayDepth && pObject)
+                    // Number element inside an object, but outside a
+                    // dictionary / array: remember it.
+                    pObject->SetNumberElement(pNumberElement);
             }
             else if (isalpha(ch))
             {
@@ -1060,6 +1066,7 @@ bool PDFDocument::Tokenize(SvStream& rStream, TokenizeMode eMode, std::vector< s
                         rElements.push_back(std::unique_ptr<PDFElement>(pObject));
                         m_aOffsetObjects[pObjectNumber->GetLocation()] = pObject;
                         m_aIDObjects[pObjectNumber->GetValue()] = pObject;
+                        bInObject = true;
                     }
                     else
                     {
@@ -1149,6 +1156,7 @@ bool PDFDocument::Tokenize(SvStream& rStream, TokenizeMode eMode, std::vector< s
                         pObjectStream = nullptr;
                         pObjectKey = nullptr;
                     }
+                    bInObject = false;
                 }
                 else if (aKeyword == "true" || aKeyword == "false")
                     rElements.push_back(std::unique_ptr<PDFElement>(new PDFBooleanElement(aKeyword.toBoolean())));
@@ -2077,6 +2085,7 @@ PDFObjectElement::PDFObjectElement(PDFDocument& rDoc, double fObjectValue, doubl
     : m_rDoc(rDoc),
       m_fObjectValue(fObjectValue),
       m_fGenerationValue(fGenerationValue),
+      m_pNumberElement(nullptr),
       m_nDictionaryOffset(0),
       m_nDictionaryLength(0),
       m_pDictionaryElement(nullptr),
@@ -2474,6 +2483,16 @@ void PDFObjectElement::SetDictionary(PDFDictionaryElement* pDictionaryElement)
     m_pDictionaryElement = pDictionaryElement;
 }
 
+void PDFObjectElement::SetNumberElement(PDFNumberElement* pNumberElement)
+{
+    m_pNumberElement = pNumberElement;
+}
+
+PDFNumberElement* PDFObjectElement::GetNumberElement() const
+{
+    return m_pNumberElement;
+}
+
 std::vector< std::pair<OString, PDFElement*> > PDFObjectElement::GetDictionaryItemsByOffset()
 {
     std::vector< std::pair<OString, PDFElement*> > aRet;
@@ -2493,8 +2512,11 @@ std::vector< std::pair<OString, PDFElement*> > PDFObjectElement::GetDictionaryIt
     return aRet;
 }
 
-const std::map<OString, PDFElement*>& PDFObjectElement::GetDictionaryItems() const
+const std::map<OString, PDFElement*>& PDFObjectElement::GetDictionaryItems()
 {
+    if (m_aDictionary.empty())
+        PDFDictionaryElement::Parse(m_rDoc.GetElements(), this, m_aDictionary);
+
     return m_aDictionary;
 }
 
diff --git a/vcl/source/gdi/pdfwriter_impl.cxx b/vcl/source/gdi/pdfwriter_impl.cxx
index 51eed7262bbe..c58f22fbc85a 100644
--- a/vcl/source/gdi/pdfwriter_impl.cxx
+++ b/vcl/source/gdi/pdfwriter_impl.cxx
@@ -11829,6 +11829,14 @@ sal_Int32 PDFWriterImpl::copyExternalResource(SvMemoryStream& rDocBuffer, filter
         aLine.append("]\n");
     }
 
+    // If the object has a number element outside a dictionary or array, copy that.
+    if (filter::PDFNumberElement* pNumber = rObject.GetNumberElement())
+    {
+        aLine.append(static_cast<const sal_Char*>(rDocBuffer.GetData()) + pNumber->GetLocation(), pNumber->GetLength());
+        aLine.append("\n");
+    }
+
+
     aLine.append("endobj\n\n");
 
     // We have the whole object, now write it to the output.
@@ -11847,18 +11855,30 @@ OString PDFWriterImpl::copyExternalResources(filter::PDFObjectElement& rPage, co
     std::map<OString, sal_Int32> aRet;
 
     // Get the rKind subset of the resource dictionary.
-    auto pResources = dynamic_cast<filter::PDFDictionaryElement*>(rPage.Lookup("Resources"));
-    if (!pResources)
-        return OString();
-
-    auto pDictionary = dynamic_cast<filter::PDFDictionaryElement*>(pResources->LookupElement(rKind));
-    if (!pDictionary)
+    std::map<OString, filter::PDFElement*> aItems;
+    if (auto pResources = dynamic_cast<filter::PDFDictionaryElement*>(rPage.Lookup("Resources")))
+    {
+        // Resources is a direct dictionary.
+        if (auto pDictionary = dynamic_cast<filter::PDFDictionaryElement*>(pResources->LookupElement(rKind)))
+            aItems = pDictionary->GetItems();
+    }
+    else if (filter::PDFObjectElement* pPageResources = rPage.LookupObject("Resources"))
+    {
+        // Resources is an indirect object.
+        filter::PDFElement* pValue = pPageResources->Lookup(rKind);
+        if (auto pDictionary = dynamic_cast<filter::PDFDictionaryElement*>(pValue))
+            // Kind is a direct dictionary.
+            aItems = pDictionary->GetItems();
+        else if (filter::PDFObjectElement* pObject = pPageResources->LookupObject(rKind))
+            // Kind is an indirect object.
+            aItems = pObject->GetDictionaryItems();
+    }
+    if (aItems.empty())
         return OString();
 
     SvMemoryStream& rDocBuffer = rPage.GetDocument().GetEditBuffer();
 
-    const std::map<OString, filter::PDFElement*>& rItems = pDictionary->GetItems();
-    for (const auto& rItem : rItems)
+    for (const auto& rItem : aItems)
     {
         // For each item copy it over to our output then insert it into aRet.
         auto pReference = dynamic_cast<filter::PDFReferenceElement*>(rItem.second);