[Libreoffice-commits] core.git: include/vcl vcl/qa vcl/source
Miklos Vajna
vmiklos at collabora.co.uk
Wed Apr 5 17:08:15 UTC 2017
include/vcl/filter/pdfdocument.hxx | 6 ++-
vcl/qa/cppunit/pdfexport/data/tdf106972.odt |binary
vcl/qa/cppunit/pdfexport/pdfexport.cxx | 48 ++++++++++++++++++++++++++++
vcl/source/filter/ipdf/pdfdocument.cxx | 24 +++++++++++++-
vcl/source/gdi/pdfwriter_impl.cxx | 36 ++++++++++++++++-----
5 files changed, 104 insertions(+), 10 deletions(-)
New commits:
commit 242a9b634213acf03cabc373928555dc81afc672
Author: Miklos Vajna <vmiklos at collabora.co.uk>
Date: Wed Apr 5 15:35:13 2017 +0200
tdf#106972 vcl PDF export, PDF images: handle indirect font references
There were a number of problems here:
- the /Resources key of a page object may be an indirect object
- the /Font key of a resource object may be an indirect object
- the /Length key of an object may be an indirect object
So in all these cases handle not only a direct dictionary / number but
also when we have a reference-to-dictionary/number.
Change-Id: Ie74371f0ba43a133a1299843ef20cbfc75fe26d7
diff --git a/include/vcl/filter/pdfdocument.hxx b/include/vcl/filter/pdfdocument.hxx
index 4bed3c32737a..595b4f0fdfd3 100644
--- a/include/vcl/filter/pdfdocument.hxx
+++ b/include/vcl/filter/pdfdocument.hxx
@@ -50,6 +50,8 @@ class VCL_DLLPUBLIC PDFObjectElement : public PDFElement
double m_fObjectValue;
double m_fGenerationValue;
std::map<OString, PDFElement*> m_aDictionary;
+ /// If set, the object contains this number element (outside any dictionary/array).
+ PDFNumberElement* m_pNumberElement;
/// Position after the '<<' token.
sal_uInt64 m_nDictionaryOffset;
/// Length of the dictionary buffer till (before) the '>>' token.
@@ -82,8 +84,10 @@ public:
sal_uInt64 GetDictionaryLength();
PDFDictionaryElement* GetDictionary();
void SetDictionary(PDFDictionaryElement* pDictionaryElement);
+ void SetNumberElement(PDFNumberElement* pNumberElement);
+ PDFNumberElement* GetNumberElement() const;
/// Get access to the parsed key-value items from the object dictionary.
- const std::map<OString, PDFElement*>& GetDictionaryItems() const;
+ const std::map<OString, PDFElement*>& GetDictionaryItems();
/// Same as GetDictionaryItems(), but entries are sorted by file offset.
std::vector< std::pair<OString, PDFElement*> > GetDictionaryItemsByOffset();
void SetArray(PDFArrayElement* pArrayElement);
diff --git a/vcl/qa/cppunit/pdfexport/data/tdf106972.odt b/vcl/qa/cppunit/pdfexport/data/tdf106972.odt
new file mode 100644
index 000000000000..3fa76c49fab9
Binary files /dev/null and b/vcl/qa/cppunit/pdfexport/data/tdf106972.odt differ
diff --git a/vcl/qa/cppunit/pdfexport/pdfexport.cxx b/vcl/qa/cppunit/pdfexport/pdfexport.cxx
index ede50077b62c..2bbb9f3c9f01 100644
--- a/vcl/qa/cppunit/pdfexport/pdfexport.cxx
+++ b/vcl/qa/cppunit/pdfexport/pdfexport.cxx
@@ -49,6 +49,7 @@ public:
void testTdf106206();
/// Tests export of PDF images without reference XObjects.
void testTdf106693();
+ void testTdf106972();
#endif
CPPUNIT_TEST_SUITE(PdfExportTest);
@@ -58,6 +59,7 @@ public:
CPPUNIT_TEST(testTdf105093);
CPPUNIT_TEST(testTdf106206);
CPPUNIT_TEST(testTdf106693);
+ CPPUNIT_TEST(testTdf106972);
#endif
CPPUNIT_TEST_SUITE_END();
};
@@ -317,6 +319,52 @@ void PdfExportTest::testTdf106206()
// This failed, object #0 was referenced.
CPPUNIT_ASSERT(bool(it == pEnd));
}
+
+void PdfExportTest::testTdf106972()
+{
+ // Import the bugdoc and export as PDF.
+ OUString aURL = m_directories.getURLFromSrc(DATA_DIRECTORY) + "tdf106972.odt";
+ mxComponent = loadFromDesktop(aURL);
+ CPPUNIT_ASSERT(mxComponent.is());
+
+ uno::Reference<frame::XStorable> xStorable(mxComponent, uno::UNO_QUERY);
+ utl::TempFile aTempFile;
+ aTempFile.EnableKillingFile();
+ utl::MediaDescriptor aMediaDescriptor;
+ aMediaDescriptor["FilterName"] <<= OUString("writer_pdf_Export");
+ xStorable->storeToURL(aTempFile.GetURL(), aMediaDescriptor.getAsConstPropertyValueList());
+
+ // Parse the export result.
+ vcl::filter::PDFDocument aDocument;
+ SvFileStream aStream(aTempFile.GetURL(), StreamMode::READ);
+ CPPUNIT_ASSERT(aDocument.Read(aStream));
+
+ // Get access to the only form object on the only page.
+ std::vector<vcl::filter::PDFObjectElement*> aPages = aDocument.GetPages();
+ CPPUNIT_ASSERT_EQUAL(static_cast<size_t>(1), aPages.size());
+ vcl::filter::PDFObjectElement* pResources = aPages[0]->LookupObject("Resources");
+ CPPUNIT_ASSERT(pResources);
+ auto pXObjects = dynamic_cast<vcl::filter::PDFDictionaryElement*>(pResources->Lookup("XObject"));
+ CPPUNIT_ASSERT(pXObjects);
+ CPPUNIT_ASSERT_EQUAL(static_cast<size_t>(1), pXObjects->GetItems().size());
+ vcl::filter::PDFObjectElement* pXObject = pXObjects->LookupObject(pXObjects->GetItems().begin()->first);
+ CPPUNIT_ASSERT(pXObject);
+
+ // Get access to the only image inside the form object.
+ auto pFormResources = dynamic_cast<vcl::filter::PDFDictionaryElement*>(pXObject->Lookup("Resources"));
+ CPPUNIT_ASSERT(pFormResources);
+ auto pImages = dynamic_cast<vcl::filter::PDFDictionaryElement*>(pFormResources->LookupElement("XObject"));
+ CPPUNIT_ASSERT(pImages);
+ CPPUNIT_ASSERT_EQUAL(static_cast<size_t>(1), pImages->GetItems().size());
+ vcl::filter::PDFObjectElement* pImage = pImages->LookupObject(pImages->GetItems().begin()->first);
+ CPPUNIT_ASSERT(pImage);
+
+ // Assert resources of the image.
+ auto pImageResources = dynamic_cast<vcl::filter::PDFDictionaryElement*>(pImage->Lookup("Resources"));
+ CPPUNIT_ASSERT(pImageResources);
+ // This failed: the PDF image had no Font resource.
+ CPPUNIT_ASSERT(pImageResources->LookupElement("Font"));
+}
#endif
CPPUNIT_TEST_SUITE_REGISTRATION(PdfExportTest);
diff --git a/vcl/source/filter/ipdf/pdfdocument.cxx b/vcl/source/filter/ipdf/pdfdocument.cxx
index 86aeade27137..444ec9239d20 100644
--- a/vcl/source/filter/ipdf/pdfdocument.cxx
+++ b/vcl/source/filter/ipdf/pdfdocument.cxx
@@ -880,6 +880,8 @@ bool PDFDocument::Tokenize(SvStream& rStream, TokenizeMode eMode, std::vector< s
int nArrayDepth = 0;
// Last seen array token that's outside any dictionaries.
PDFArrayElement* pArray = nullptr;
+ // If we're inside an obj/endobj pair.
+ bool bInObject = false;
while (true)
{
char ch;
@@ -1030,6 +1032,10 @@ bool PDFDocument::Tokenize(SvStream& rStream, TokenizeMode eMode, std::vector< s
if (it != m_aOffsetObjects.end())
m_pXRefStream = it->second;
}
+ else if (bInObject && !nDictionaryDepth && !nArrayDepth && pObject)
+ // Number element inside an object, but outside a
+ // dictionary / array: remember it.
+ pObject->SetNumberElement(pNumberElement);
}
else if (rtl::isAsciiAlpha(static_cast<unsigned char>(ch)))
{
@@ -1061,6 +1067,7 @@ bool PDFDocument::Tokenize(SvStream& rStream, TokenizeMode eMode, std::vector< s
rElements.push_back(std::unique_ptr<PDFElement>(pObject));
m_aOffsetObjects[pObjectNumber->GetLocation()] = pObject;
m_aIDObjects[pObjectNumber->GetValue()] = pObject;
+ bInObject = true;
}
else
{
@@ -1150,6 +1157,7 @@ bool PDFDocument::Tokenize(SvStream& rStream, TokenizeMode eMode, std::vector< s
pObjectStream = nullptr;
pObjectKey = nullptr;
}
+ bInObject = false;
}
else if (aKeyword == "true" || aKeyword == "false")
rElements.push_back(std::unique_ptr<PDFElement>(new PDFBooleanElement(aKeyword.toBoolean())));
@@ -2080,6 +2088,7 @@ PDFObjectElement::PDFObjectElement(PDFDocument& rDoc, double fObjectValue, doubl
: m_rDoc(rDoc),
m_fObjectValue(fObjectValue),
m_fGenerationValue(fGenerationValue),
+ m_pNumberElement(nullptr),
m_nDictionaryOffset(0),
m_nDictionaryLength(0),
m_pDictionaryElement(nullptr),
@@ -2477,6 +2486,16 @@ void PDFObjectElement::SetDictionary(PDFDictionaryElement* pDictionaryElement)
m_pDictionaryElement = pDictionaryElement;
}
+void PDFObjectElement::SetNumberElement(PDFNumberElement* pNumberElement)
+{
+ m_pNumberElement = pNumberElement;
+}
+
+PDFNumberElement* PDFObjectElement::GetNumberElement() const
+{
+ return m_pNumberElement;
+}
+
std::vector< std::pair<OString, PDFElement*> > PDFObjectElement::GetDictionaryItemsByOffset()
{
std::vector< std::pair<OString, PDFElement*> > aRet;
@@ -2496,8 +2515,11 @@ std::vector< std::pair<OString, PDFElement*> > PDFObjectElement::GetDictionaryIt
return aRet;
}
-const std::map<OString, PDFElement*>& PDFObjectElement::GetDictionaryItems() const
+const std::map<OString, PDFElement*>& PDFObjectElement::GetDictionaryItems()
{
+ if (m_aDictionary.empty())
+ PDFDictionaryElement::Parse(m_rDoc.GetElements(), this, m_aDictionary);
+
return m_aDictionary;
}
diff --git a/vcl/source/gdi/pdfwriter_impl.cxx b/vcl/source/gdi/pdfwriter_impl.cxx
index f11ef8591f59..698fdab61d6b 100644
--- a/vcl/source/gdi/pdfwriter_impl.cxx
+++ b/vcl/source/gdi/pdfwriter_impl.cxx
@@ -10990,6 +10990,14 @@ sal_Int32 PDFWriterImpl::copyExternalResource(SvMemoryStream& rDocBuffer, filter
aLine.append("]\n");
}
+ // If the object has a number element outside a dictionary or array, copy that.
+ if (filter::PDFNumberElement* pNumber = rObject.GetNumberElement())
+ {
+ aLine.append(static_cast<const sal_Char*>(rDocBuffer.GetData()) + pNumber->GetLocation(), pNumber->GetLength());
+ aLine.append("\n");
+ }
+
+
aLine.append("endobj\n\n");
// We have the whole object, now write it to the output.
@@ -11008,18 +11016,30 @@ OString PDFWriterImpl::copyExternalResources(filter::PDFObjectElement& rPage, co
std::map<OString, sal_Int32> aRet;
// Get the rKind subset of the resource dictionary.
- auto pResources = dynamic_cast<filter::PDFDictionaryElement*>(rPage.Lookup("Resources"));
- if (!pResources)
- return OString();
-
- auto pDictionary = dynamic_cast<filter::PDFDictionaryElement*>(pResources->LookupElement(rKind));
- if (!pDictionary)
+ std::map<OString, filter::PDFElement*> aItems;
+ if (auto pResources = dynamic_cast<filter::PDFDictionaryElement*>(rPage.Lookup("Resources")))
+ {
+ // Resources is a direct dictionary.
+ if (auto pDictionary = dynamic_cast<filter::PDFDictionaryElement*>(pResources->LookupElement(rKind)))
+ aItems = pDictionary->GetItems();
+ }
+ else if (filter::PDFObjectElement* pPageResources = rPage.LookupObject("Resources"))
+ {
+ // Resources is an indirect object.
+ filter::PDFElement* pValue = pPageResources->Lookup(rKind);
+ if (auto pDictionary = dynamic_cast<filter::PDFDictionaryElement*>(pValue))
+ // Kind is a direct dictionary.
+ aItems = pDictionary->GetItems();
+ else if (filter::PDFObjectElement* pObject = pPageResources->LookupObject(rKind))
+ // Kind is an indirect object.
+ aItems = pObject->GetDictionaryItems();
+ }
+ if (aItems.empty())
return OString();
SvMemoryStream& rDocBuffer = rPage.GetDocument().GetEditBuffer();
- const std::map<OString, filter::PDFElement*>& rItems = pDictionary->GetItems();
- for (const auto& rItem : rItems)
+ for (const auto& rItem : aItems)
{
// For each item copy it over to our output then insert it into aRet.
auto pReference = dynamic_cast<filter::PDFReferenceElement*>(rItem.second);
More information about the Libreoffice-commits
mailing list