[Libreoffice-commits] core.git: Branch 'distro/collabora/cp-5.3' - 2 commits - include/vcl vcl/qa vcl/source
Miklos Vajna
vmiklos at collabora.co.uk
Thu Apr 6 08:12:44 UTC 2017
include/vcl/filter/pdfdocument.hxx | 6 +
vcl/qa/cppunit/pdfexport/data/tdf106972-pdf17.odt |binary
vcl/qa/cppunit/pdfexport/data/tdf106972.odt |binary
vcl/qa/cppunit/pdfexport/pdfexport.cxx | 86 ++++++++++++++++++++++
vcl/source/filter/ipdf/pdfdocument.cxx | 24 +++++-
vcl/source/gdi/pdfwriter_impl.cxx | 66 ++++++++++++++--
6 files changed, 170 insertions(+), 12 deletions(-)
New commits:
commit 6cdc0514f92a24532b3be581b9295324ea02fdf5
Author: Miklos Vajna <vmiklos at collabora.co.uk>
Date: Wed Apr 5 17:31:47 2017 +0200
Related: tdf#106972 vcl PDF export, PDF images: ignore PDF >= 1.5
When copying their page steam into ours, we need to make sure their
syntax is <= 1.4; so when checking if the image has PDF data, ignore the
case when it has, but it's >= 1.5 (at least in the default case when not
using reference XObjects).
Change-Id: I6bd77803b92fe16bdd327e5e7c3d2b42adeacca4
Reviewed-on: https://gerrit.libreoffice.org/36161
Reviewed-by: Miklos Vajna <vmiklos at collabora.co.uk>
Tested-by: Jenkins <ci at libreoffice.org>
(cherry picked from commit 4443d7be61a9ae45630183d856a566cecd06ad95)
diff --git a/vcl/qa/cppunit/pdfexport/data/tdf106972-pdf17.odt b/vcl/qa/cppunit/pdfexport/data/tdf106972-pdf17.odt
new file mode 100644
index 000000000000..d46c93dffb5f
Binary files /dev/null and b/vcl/qa/cppunit/pdfexport/data/tdf106972-pdf17.odt differ
diff --git a/vcl/qa/cppunit/pdfexport/pdfexport.cxx b/vcl/qa/cppunit/pdfexport/pdfexport.cxx
index 2bbb9f3c9f01..566495f38edf 100644
--- a/vcl/qa/cppunit/pdfexport/pdfexport.cxx
+++ b/vcl/qa/cppunit/pdfexport/pdfexport.cxx
@@ -50,6 +50,7 @@ public:
/// Tests export of PDF images without reference XObjects.
void testTdf106693();
void testTdf106972();
+ void testTdf106972Pdf17();
#endif
CPPUNIT_TEST_SUITE(PdfExportTest);
@@ -60,6 +61,7 @@ public:
CPPUNIT_TEST(testTdf106206);
CPPUNIT_TEST(testTdf106693);
CPPUNIT_TEST(testTdf106972);
+ CPPUNIT_TEST(testTdf106972Pdf17);
#endif
CPPUNIT_TEST_SUITE_END();
};
@@ -365,6 +367,42 @@ void PdfExportTest::testTdf106972()
// This failed: the PDF image had no Font resource.
CPPUNIT_ASSERT(pImageResources->LookupElement("Font"));
}
+
+void PdfExportTest::testTdf106972Pdf17()
+{
+ // Import the bugdoc and export as PDF.
+ OUString aURL = m_directories.getURLFromSrc(DATA_DIRECTORY) + "tdf106972-pdf17.odt";
+ mxComponent = loadFromDesktop(aURL);
+ CPPUNIT_ASSERT(mxComponent.is());
+
+ uno::Reference<frame::XStorable> xStorable(mxComponent, uno::UNO_QUERY);
+ utl::TempFile aTempFile;
+ aTempFile.EnableKillingFile();
+ utl::MediaDescriptor aMediaDescriptor;
+ aMediaDescriptor["FilterName"] <<= OUString("writer_pdf_Export");
+ xStorable->storeToURL(aTempFile.GetURL(), aMediaDescriptor.getAsConstPropertyValueList());
+
+ // Parse the export result.
+ vcl::filter::PDFDocument aDocument;
+ SvFileStream aStream(aTempFile.GetURL(), StreamMode::READ);
+ CPPUNIT_ASSERT(aDocument.Read(aStream));
+
+ // Get access to the only image on the only page.
+ std::vector<vcl::filter::PDFObjectElement*> aPages = aDocument.GetPages();
+ CPPUNIT_ASSERT_EQUAL(static_cast<size_t>(1), aPages.size());
+ vcl::filter::PDFObjectElement* pResources = aPages[0]->LookupObject("Resources");
+ CPPUNIT_ASSERT(pResources);
+ auto pXObjects = dynamic_cast<vcl::filter::PDFDictionaryElement*>(pResources->Lookup("XObject"));
+ CPPUNIT_ASSERT(pXObjects);
+ CPPUNIT_ASSERT_EQUAL(static_cast<size_t>(1), pXObjects->GetItems().size());
+ vcl::filter::PDFObjectElement* pXObject = pXObjects->LookupObject(pXObjects->GetItems().begin()->first);
+ CPPUNIT_ASSERT(pXObject);
+
+ // This failed, the "image" had resources; that typically means we tried to
+ // preserve the original PDF markup here; which is not OK till our default
+ // output is PDF 1.4, and this bugdoc has PDF 1.7 data.
+ CPPUNIT_ASSERT(!pXObject->Lookup("Resources"));
+}
#endif
CPPUNIT_TEST_SUITE_REGISTRATION(PdfExportTest);
diff --git a/vcl/source/gdi/pdfwriter_impl.cxx b/vcl/source/gdi/pdfwriter_impl.cxx
index c58f22fbc85a..2fd12cd0e5fb 100644
--- a/vcl/source/gdi/pdfwriter_impl.cxx
+++ b/vcl/source/gdi/pdfwriter_impl.cxx
@@ -665,6 +665,32 @@ static void appendDestinationName( const OUString& rString, OStringBuffer& rBuff
}
//<--- i56629
+/// Decide if rGraphic has PDF data that is possible to embed in our output.
+static bool hasPdfData(const Graphic& rGraphic, bool bUseReferenceXObject)
+{
+ const css::uno::Sequence<sal_Int8>& rData = rGraphic.getPdfData();
+
+ if (rData.getLength() < 8)
+ return false;
+
+ if (rData[0] != '%' || rData[1] != 'P' || rData[2] != 'D' || rData[3] != 'F' || rData[4] != '-')
+ // Unexpected header.
+ return false;
+
+ if (bUseReferenceXObject)
+ // This is possible for all versions.
+ return true;
+
+ sal_Int32 nMajor = OString(rData[5]).toInt32();
+ sal_Int32 nMinor = OString(rData[7]).toInt32();
+
+ if (nMajor > 1 || (nMajor == 1 && nMinor > 4))
+ // This is PDF-1.5 or newer, can't embed into PDF-1.4.
+ return false;
+
+ return true;
+}
+
void PDFWriter::AppendUnicodeTextString(const OUString& rString, OStringBuffer& rBuffer)
{
rBuffer.append( "FEFF" );
@@ -12413,7 +12439,7 @@ void PDFWriterImpl::createEmbeddedFile(const Graphic& rGraphic, ReferenceXObject
// no pdf data.
rEmit.m_nBitmapObject = nBitmapObject;
- if (!rGraphic.getPdfData().hasElements())
+ if (!hasPdfData(rGraphic, m_aContext.UseReferenceXObject))
return;
if (m_aContext.UseReferenceXObject)
@@ -12593,7 +12619,7 @@ const PDFWriterImpl::BitmapEmit& PDFWriterImpl::createBitmapEmit( const BitmapEx
m_aBitmaps.push_front( BitmapEmit() );
m_aBitmaps.front().m_aID = aID;
m_aBitmaps.front().m_aBitmap = aBitmap;
- if (!rGraphic.getPdfData().hasElements() || m_aContext.UseReferenceXObject)
+ if (!hasPdfData(rGraphic, m_aContext.UseReferenceXObject) || m_aContext.UseReferenceXObject)
m_aBitmaps.front().m_nObject = createObject();
createEmbeddedFile(rGraphic, m_aBitmaps.front().m_aReferenceXObject, m_aBitmaps.front().m_nObject);
it = m_aBitmaps.begin();
commit 3d1e7a288a0121f9a6fadf3a0631a739dab6a5ab
Author: Miklos Vajna <vmiklos at collabora.co.uk>
Date: Wed Apr 5 15:35:13 2017 +0200
tdf#106972 vcl PDF export, PDF images: handle indirect font references
There were a number of problems here:
- the /Resources key of a page object may be an indirect object
- the /Font key of a resource object may be an indirect object
- the /Length key of an object may be an indirect object
So in all these cases handle not only a direct dictionary / number but
also when we have a reference-to-dictionary/number.
Change-Id: Ie74371f0ba43a133a1299843ef20cbfc75fe26d7
(cherry picked from commit 242a9b634213acf03cabc373928555dc81afc672)
diff --git a/include/vcl/filter/pdfdocument.hxx b/include/vcl/filter/pdfdocument.hxx
index 4bed3c32737a..595b4f0fdfd3 100644
--- a/include/vcl/filter/pdfdocument.hxx
+++ b/include/vcl/filter/pdfdocument.hxx
@@ -50,6 +50,8 @@ class VCL_DLLPUBLIC PDFObjectElement : public PDFElement
double m_fObjectValue;
double m_fGenerationValue;
std::map<OString, PDFElement*> m_aDictionary;
+ /// If set, the object contains this number element (outside any dictionary/array).
+ PDFNumberElement* m_pNumberElement;
/// Position after the '<<' token.
sal_uInt64 m_nDictionaryOffset;
/// Length of the dictionary buffer till (before) the '>>' token.
@@ -82,8 +84,10 @@ public:
sal_uInt64 GetDictionaryLength();
PDFDictionaryElement* GetDictionary();
void SetDictionary(PDFDictionaryElement* pDictionaryElement);
+ void SetNumberElement(PDFNumberElement* pNumberElement);
+ PDFNumberElement* GetNumberElement() const;
/// Get access to the parsed key-value items from the object dictionary.
- const std::map<OString, PDFElement*>& GetDictionaryItems() const;
+ const std::map<OString, PDFElement*>& GetDictionaryItems();
/// Same as GetDictionaryItems(), but entries are sorted by file offset.
std::vector< std::pair<OString, PDFElement*> > GetDictionaryItemsByOffset();
void SetArray(PDFArrayElement* pArrayElement);
diff --git a/vcl/qa/cppunit/pdfexport/data/tdf106972.odt b/vcl/qa/cppunit/pdfexport/data/tdf106972.odt
new file mode 100644
index 000000000000..3fa76c49fab9
Binary files /dev/null and b/vcl/qa/cppunit/pdfexport/data/tdf106972.odt differ
diff --git a/vcl/qa/cppunit/pdfexport/pdfexport.cxx b/vcl/qa/cppunit/pdfexport/pdfexport.cxx
index ede50077b62c..2bbb9f3c9f01 100644
--- a/vcl/qa/cppunit/pdfexport/pdfexport.cxx
+++ b/vcl/qa/cppunit/pdfexport/pdfexport.cxx
@@ -49,6 +49,7 @@ public:
void testTdf106206();
/// Tests export of PDF images without reference XObjects.
void testTdf106693();
+ void testTdf106972();
#endif
CPPUNIT_TEST_SUITE(PdfExportTest);
@@ -58,6 +59,7 @@ public:
CPPUNIT_TEST(testTdf105093);
CPPUNIT_TEST(testTdf106206);
CPPUNIT_TEST(testTdf106693);
+ CPPUNIT_TEST(testTdf106972);
#endif
CPPUNIT_TEST_SUITE_END();
};
@@ -317,6 +319,52 @@ void PdfExportTest::testTdf106206()
// This failed, object #0 was referenced.
CPPUNIT_ASSERT(bool(it == pEnd));
}
+
+void PdfExportTest::testTdf106972()
+{
+ // Import the bugdoc and export as PDF.
+ OUString aURL = m_directories.getURLFromSrc(DATA_DIRECTORY) + "tdf106972.odt";
+ mxComponent = loadFromDesktop(aURL);
+ CPPUNIT_ASSERT(mxComponent.is());
+
+ uno::Reference<frame::XStorable> xStorable(mxComponent, uno::UNO_QUERY);
+ utl::TempFile aTempFile;
+ aTempFile.EnableKillingFile();
+ utl::MediaDescriptor aMediaDescriptor;
+ aMediaDescriptor["FilterName"] <<= OUString("writer_pdf_Export");
+ xStorable->storeToURL(aTempFile.GetURL(), aMediaDescriptor.getAsConstPropertyValueList());
+
+ // Parse the export result.
+ vcl::filter::PDFDocument aDocument;
+ SvFileStream aStream(aTempFile.GetURL(), StreamMode::READ);
+ CPPUNIT_ASSERT(aDocument.Read(aStream));
+
+ // Get access to the only form object on the only page.
+ std::vector<vcl::filter::PDFObjectElement*> aPages = aDocument.GetPages();
+ CPPUNIT_ASSERT_EQUAL(static_cast<size_t>(1), aPages.size());
+ vcl::filter::PDFObjectElement* pResources = aPages[0]->LookupObject("Resources");
+ CPPUNIT_ASSERT(pResources);
+ auto pXObjects = dynamic_cast<vcl::filter::PDFDictionaryElement*>(pResources->Lookup("XObject"));
+ CPPUNIT_ASSERT(pXObjects);
+ CPPUNIT_ASSERT_EQUAL(static_cast<size_t>(1), pXObjects->GetItems().size());
+ vcl::filter::PDFObjectElement* pXObject = pXObjects->LookupObject(pXObjects->GetItems().begin()->first);
+ CPPUNIT_ASSERT(pXObject);
+
+ // Get access to the only image inside the form object.
+ auto pFormResources = dynamic_cast<vcl::filter::PDFDictionaryElement*>(pXObject->Lookup("Resources"));
+ CPPUNIT_ASSERT(pFormResources);
+ auto pImages = dynamic_cast<vcl::filter::PDFDictionaryElement*>(pFormResources->LookupElement("XObject"));
+ CPPUNIT_ASSERT(pImages);
+ CPPUNIT_ASSERT_EQUAL(static_cast<size_t>(1), pImages->GetItems().size());
+ vcl::filter::PDFObjectElement* pImage = pImages->LookupObject(pImages->GetItems().begin()->first);
+ CPPUNIT_ASSERT(pImage);
+
+ // Assert resources of the image.
+ auto pImageResources = dynamic_cast<vcl::filter::PDFDictionaryElement*>(pImage->Lookup("Resources"));
+ CPPUNIT_ASSERT(pImageResources);
+ // This failed: the PDF image had no Font resource.
+ CPPUNIT_ASSERT(pImageResources->LookupElement("Font"));
+}
#endif
CPPUNIT_TEST_SUITE_REGISTRATION(PdfExportTest);
diff --git a/vcl/source/filter/ipdf/pdfdocument.cxx b/vcl/source/filter/ipdf/pdfdocument.cxx
index b50105565e3c..f6ae66f28041 100644
--- a/vcl/source/filter/ipdf/pdfdocument.cxx
+++ b/vcl/source/filter/ipdf/pdfdocument.cxx
@@ -879,6 +879,8 @@ bool PDFDocument::Tokenize(SvStream& rStream, TokenizeMode eMode, std::vector< s
int nArrayDepth = 0;
// Last seen array token that's outside any dictionaries.
PDFArrayElement* pArray = nullptr;
+ // If we're inside an obj/endobj pair.
+ bool bInObject = false;
while (true)
{
char ch;
@@ -1029,6 +1031,10 @@ bool PDFDocument::Tokenize(SvStream& rStream, TokenizeMode eMode, std::vector< s
if (it != m_aOffsetObjects.end())
m_pXRefStream = it->second;
}
+ else if (bInObject && !nDictionaryDepth && !nArrayDepth && pObject)
+ // Number element inside an object, but outside a
+ // dictionary / array: remember it.
+ pObject->SetNumberElement(pNumberElement);
}
else if (isalpha(ch))
{
@@ -1060,6 +1066,7 @@ bool PDFDocument::Tokenize(SvStream& rStream, TokenizeMode eMode, std::vector< s
rElements.push_back(std::unique_ptr<PDFElement>(pObject));
m_aOffsetObjects[pObjectNumber->GetLocation()] = pObject;
m_aIDObjects[pObjectNumber->GetValue()] = pObject;
+ bInObject = true;
}
else
{
@@ -1149,6 +1156,7 @@ bool PDFDocument::Tokenize(SvStream& rStream, TokenizeMode eMode, std::vector< s
pObjectStream = nullptr;
pObjectKey = nullptr;
}
+ bInObject = false;
}
else if (aKeyword == "true" || aKeyword == "false")
rElements.push_back(std::unique_ptr<PDFElement>(new PDFBooleanElement(aKeyword.toBoolean())));
@@ -2077,6 +2085,7 @@ PDFObjectElement::PDFObjectElement(PDFDocument& rDoc, double fObjectValue, doubl
: m_rDoc(rDoc),
m_fObjectValue(fObjectValue),
m_fGenerationValue(fGenerationValue),
+ m_pNumberElement(nullptr),
m_nDictionaryOffset(0),
m_nDictionaryLength(0),
m_pDictionaryElement(nullptr),
@@ -2474,6 +2483,16 @@ void PDFObjectElement::SetDictionary(PDFDictionaryElement* pDictionaryElement)
m_pDictionaryElement = pDictionaryElement;
}
+void PDFObjectElement::SetNumberElement(PDFNumberElement* pNumberElement)
+{
+ m_pNumberElement = pNumberElement;
+}
+
+PDFNumberElement* PDFObjectElement::GetNumberElement() const
+{
+ return m_pNumberElement;
+}
+
std::vector< std::pair<OString, PDFElement*> > PDFObjectElement::GetDictionaryItemsByOffset()
{
std::vector< std::pair<OString, PDFElement*> > aRet;
@@ -2493,8 +2512,11 @@ std::vector< std::pair<OString, PDFElement*> > PDFObjectElement::GetDictionaryIt
return aRet;
}
-const std::map<OString, PDFElement*>& PDFObjectElement::GetDictionaryItems() const
+const std::map<OString, PDFElement*>& PDFObjectElement::GetDictionaryItems()
{
+ if (m_aDictionary.empty())
+ PDFDictionaryElement::Parse(m_rDoc.GetElements(), this, m_aDictionary);
+
return m_aDictionary;
}
diff --git a/vcl/source/gdi/pdfwriter_impl.cxx b/vcl/source/gdi/pdfwriter_impl.cxx
index 51eed7262bbe..c58f22fbc85a 100644
--- a/vcl/source/gdi/pdfwriter_impl.cxx
+++ b/vcl/source/gdi/pdfwriter_impl.cxx
@@ -11829,6 +11829,14 @@ sal_Int32 PDFWriterImpl::copyExternalResource(SvMemoryStream& rDocBuffer, filter
aLine.append("]\n");
}
+ // If the object has a number element outside a dictionary or array, copy that.
+ if (filter::PDFNumberElement* pNumber = rObject.GetNumberElement())
+ {
+ aLine.append(static_cast<const sal_Char*>(rDocBuffer.GetData()) + pNumber->GetLocation(), pNumber->GetLength());
+ aLine.append("\n");
+ }
+
+
aLine.append("endobj\n\n");
// We have the whole object, now write it to the output.
@@ -11847,18 +11855,30 @@ OString PDFWriterImpl::copyExternalResources(filter::PDFObjectElement& rPage, co
std::map<OString, sal_Int32> aRet;
// Get the rKind subset of the resource dictionary.
- auto pResources = dynamic_cast<filter::PDFDictionaryElement*>(rPage.Lookup("Resources"));
- if (!pResources)
- return OString();
-
- auto pDictionary = dynamic_cast<filter::PDFDictionaryElement*>(pResources->LookupElement(rKind));
- if (!pDictionary)
+ std::map<OString, filter::PDFElement*> aItems;
+ if (auto pResources = dynamic_cast<filter::PDFDictionaryElement*>(rPage.Lookup("Resources")))
+ {
+ // Resources is a direct dictionary.
+ if (auto pDictionary = dynamic_cast<filter::PDFDictionaryElement*>(pResources->LookupElement(rKind)))
+ aItems = pDictionary->GetItems();
+ }
+ else if (filter::PDFObjectElement* pPageResources = rPage.LookupObject("Resources"))
+ {
+ // Resources is an indirect object.
+ filter::PDFElement* pValue = pPageResources->Lookup(rKind);
+ if (auto pDictionary = dynamic_cast<filter::PDFDictionaryElement*>(pValue))
+ // Kind is a direct dictionary.
+ aItems = pDictionary->GetItems();
+ else if (filter::PDFObjectElement* pObject = pPageResources->LookupObject(rKind))
+ // Kind is an indirect object.
+ aItems = pObject->GetDictionaryItems();
+ }
+ if (aItems.empty())
return OString();
SvMemoryStream& rDocBuffer = rPage.GetDocument().GetEditBuffer();
- const std::map<OString, filter::PDFElement*>& rItems = pDictionary->GetItems();
- for (const auto& rItem : rItems)
+ for (const auto& rItem : aItems)
{
// For each item copy it over to our output then insert it into aRet.
auto pReference = dynamic_cast<filter::PDFReferenceElement*>(rItem.second);
More information about the Libreoffice-commits
mailing list