[Libreoffice-commits] core.git: Branch 'distro/collabora/co-2021' - 4 commits - include/vcl vcl/CppunitTest_vcl_pdfium_library_test.mk vcl/inc vcl/qa vcl/source
Tomaž Vajngerl (via logerrit)
logerrit at kemper.freedesktop.org
Thu Apr 8 06:34:22 UTC 2021
include/vcl/filter/pdfdocument.hxx | 141 ++++
vcl/CppunitTest_vcl_pdfium_library_test.mk | 1
vcl/inc/pdf/objectcopier.hxx | 5
vcl/qa/cppunit/PDFDocumentTest.cxx | 588 ++++++++++++++++++++
vcl/qa/cppunit/data/basic.pdf | 71 ++
vcl/qa/cppunit/data/basicSource.pdf | 60 ++
vcl/source/filter/ipdf/pdfdocument.cxx | 841 +++++++++++++++--------------
vcl/source/gdi/pdfobjectcopier.cxx | 175 ++----
8 files changed, 1381 insertions(+), 501 deletions(-)
New commits:
commit 10c66240f5f0a8023d67fd7427d2551cd53a38de
Author: Tomaž Vajngerl <tomaz.vajngerl at collabora.co.uk>
AuthorDate: Thu Nov 12 22:24:02 2020 +0100
Commit: Andras Timar <andras.timar at collabora.com>
CommitDate: Thu Apr 8 08:33:36 2021 +0200
pdf: Improve dict and array format when copying with PDFObjectCopier
Change-Id: I4fcc4d912d4ce9d7800782b69811f877b85d9857
Reviewed-on: https://gerrit.libreoffice.org/c/core/+/105811
Tested-by: Jenkins
Reviewed-by: Tomaž Vajngerl <quikee at gmail.com>
diff --git a/vcl/source/gdi/pdfobjectcopier.cxx b/vcl/source/gdi/pdfobjectcopier.cxx
index d6323a17e91c..596ed3a2587d 100644
--- a/vcl/source/gdi/pdfobjectcopier.cxx
+++ b/vcl/source/gdi/pdfobjectcopier.cxx
@@ -102,17 +102,21 @@ sal_Int32 PDFObjectCopier::copyExternalResource(SvMemoryStream& rDocBuffer,
if (rObject.GetDictionary())
{
aLine.append("<< ");
-
+ bool bFirst = true;
for (auto const& rPair : rObject.GetDictionaryItems())
{
+ if (bFirst)
+ bFirst = false;
+ else
+ aLine.append(" ");
+
aLine.append("/");
aLine.append(rPair.first);
aLine.append(" ");
copyRecursively(aLine, rPair.second, rDocBuffer, rCopiedResources);
- aLine.append(" ");
}
- aLine.append(">>\n");
+ aLine.append(" >>\n");
}
if (filter::PDFStreamElement* pStream = rObject.GetStream())
@@ -129,10 +133,14 @@ sal_Int32 PDFObjectCopier::copyExternalResource(SvMemoryStream& rDocBuffer,
const std::vector<filter::PDFElement*>& rElements = pArray->GetElements();
+ bool bFirst = true;
for (auto const& pElement : rElements)
{
+ if (bFirst)
+ bFirst = false;
+ else
+ aLine.append(" ");
copyRecursively(aLine, pElement, rDocBuffer, rCopiedResources);
- aLine.append(" ");
}
aLine.append("]\n");
}
@@ -140,8 +148,7 @@ sal_Int32 PDFObjectCopier::copyExternalResource(SvMemoryStream& rDocBuffer,
// If the object has a number element outside a dictionary or array, copy that.
if (filter::PDFNumberElement* pNumber = rObject.GetNumberElement())
{
- aLine.append(static_cast<const char*>(pObjectStream->GetData()) + pNumber->GetLocation(),
- pNumber->GetLength());
+ pNumber->writeString(aLine);
aLine.append("\n");
}
commit a7db32bacbc9ae350274420832b4729efc934258
Author: Tomaž Vajngerl <tomaz.vajngerl at collabora.co.uk>
AuthorDate: Mon Nov 9 19:19:19 2020 +0100
Commit: Andras Timar <andras.timar at collabora.com>
CommitDate: Thu Apr 8 08:33:21 2021 +0200
pdf: test PDFDocument parsing
basic.pdf is custom created so it covers all different parsing
use-cases.
Change-Id: I6eefa55b1cec5bf7eb91518d6a2df2cb48746dcc
Reviewed-on: https://gerrit.libreoffice.org/c/core/+/105494
Tested-by: Jenkins
Reviewed-by: Tomaž Vajngerl <quikee at gmail.com>
diff --git a/vcl/CppunitTest_vcl_pdfium_library_test.mk b/vcl/CppunitTest_vcl_pdfium_library_test.mk
index 0f4a480c8254..37acb1125506 100644
--- a/vcl/CppunitTest_vcl_pdfium_library_test.mk
+++ b/vcl/CppunitTest_vcl_pdfium_library_test.mk
@@ -11,6 +11,7 @@ $(eval $(call gb_CppunitTest_CppunitTest,vcl_pdfium_library_test))
$(eval $(call gb_CppunitTest_add_exception_objects,vcl_pdfium_library_test, \
vcl/qa/cppunit/PDFiumLibraryTest \
+ vcl/qa/cppunit/PDFDocumentTest \
))
$(eval $(call gb_CppunitTest_use_sdk_api,vcl_pdfium_library_test))
diff --git a/vcl/qa/cppunit/PDFDocumentTest.cxx b/vcl/qa/cppunit/PDFDocumentTest.cxx
new file mode 100644
index 000000000000..66de7dfc77d4
--- /dev/null
+++ b/vcl/qa/cppunit/PDFDocumentTest.cxx
@@ -0,0 +1,588 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ */
+
+#include <sal/config.h>
+
+#include <memory>
+
+#include <test/bootstrapfixture.hxx>
+#include <unotest/macros_test.hxx>
+
+#include <vcl/filter/pdfdocument.hxx>
+
+class PDFDocumentTest : public test::BootstrapFixture, public unotest::MacrosTest
+{
+public:
+ PDFDocumentTest() = default;
+};
+
+char const DATA_DIRECTORY[] = "/vcl/qa/cppunit/data/";
+
+CPPUNIT_TEST_FIXTURE(PDFDocumentTest, testParseBasicPDF)
+{
+ OUString aURL = m_directories.getURLFromSrc(DATA_DIRECTORY) + "basic.pdf";
+ vcl::filter::PDFDocument aDocument;
+ SvFileStream aStream(aURL, StreamMode::READ);
+ CPPUNIT_ASSERT(aDocument.Read(aStream));
+
+ std::vector<vcl::filter::PDFObjectElement*> aPages = aDocument.GetPages();
+ CPPUNIT_ASSERT_EQUAL(size_t(1), aPages.size());
+
+ vcl::filter::PDFObjectElement* pResources = aPages[0]->LookupObject("Resources");
+ CPPUNIT_ASSERT(pResources);
+
+ vcl::filter::PDFObjectElement* pTest = pResources->LookupObject("Test");
+ CPPUNIT_ASSERT(pTest);
+
+ vcl::filter::PDFObjectElement* pTestArray1 = pTest->LookupObject("TestArray1");
+ CPPUNIT_ASSERT(pTestArray1);
+ {
+ CPPUNIT_ASSERT_EQUAL(size_t(5), pTestArray1->GetArray()->GetElements().size());
+ }
+
+ vcl::filter::PDFObjectElement* pTestArray2 = pTest->LookupObject("TestArray2");
+ CPPUNIT_ASSERT(pTestArray2);
+ {
+ CPPUNIT_ASSERT_EQUAL(size_t(2), pTestArray2->GetArray()->GetElements().size());
+ }
+
+ vcl::filter::PDFObjectElement* pTestDictionary = pTest->LookupObject("TestDictionary");
+ {
+ sal_uInt64 nOffset = pTestDictionary->GetDictionaryOffset();
+ sal_uInt64 nLength = pTestDictionary->GetDictionaryLength();
+
+ aStream.Seek(nOffset);
+ std::vector<char> aBuffer(nLength + 1, 0);
+ aStream.ReadBytes(aBuffer.data(), nLength);
+ OString aString(aBuffer.data());
+
+ CPPUNIT_ASSERT_EQUAL(
+ OString("/TestReference 7 0 R/TestNumber "
+ "123/TestName/SomeName/TestDictionary<</Key/Value>>/TestArray[1 2 3]"),
+ aString);
+ }
+
+ CPPUNIT_ASSERT(pTestDictionary);
+ {
+ auto const& rItems = pTestDictionary->GetDictionaryItems();
+ CPPUNIT_ASSERT_EQUAL(size_t(5), rItems.size());
+ auto* pReference = dynamic_cast<vcl::filter::PDFReferenceElement*>(
+ pTestDictionary->Lookup("TestReference"));
+ CPPUNIT_ASSERT(pReference);
+ CPPUNIT_ASSERT_EQUAL(7, pReference->GetObjectValue());
+
+ auto* pNumber
+ = dynamic_cast<vcl::filter::PDFNumberElement*>(pTestDictionary->Lookup("TestNumber"));
+ CPPUNIT_ASSERT(pNumber);
+ CPPUNIT_ASSERT_EQUAL(123.0, pNumber->GetValue());
+
+ auto* pName
+ = dynamic_cast<vcl::filter::PDFNameElement*>(pTestDictionary->Lookup("TestName"));
+ CPPUNIT_ASSERT(pName);
+ CPPUNIT_ASSERT_EQUAL(OString("SomeName"), pName->GetValue());
+
+ auto* pDictionary = dynamic_cast<vcl::filter::PDFDictionaryElement*>(
+ pTestDictionary->Lookup("TestDictionary"));
+ CPPUNIT_ASSERT(pDictionary);
+
+ auto* pArray
+ = dynamic_cast<vcl::filter::PDFArrayElement*>(pTestDictionary->Lookup("TestArray"));
+ CPPUNIT_ASSERT(pArray);
+
+ // Check offsets and lengths
+ {
+ sal_uInt64 nOffset = pTestDictionary->GetDictionary()->GetKeyOffset("TestReference");
+ sal_uInt64 nLength
+ = pTestDictionary->GetDictionary()->GetKeyValueLength("TestReference");
+
+ aStream.Seek(nOffset);
+ std::vector<char> aBuffer(nLength + 1, 0);
+ aStream.ReadBytes(aBuffer.data(), nLength);
+ OString aString(aBuffer.data());
+
+ CPPUNIT_ASSERT_EQUAL(OString("TestReference 7 0 R"), aString);
+ }
+ {
+ sal_uInt64 nOffset = pTestDictionary->GetDictionary()->GetKeyOffset("TestNumber");
+ sal_uInt64 nLength = pTestDictionary->GetDictionary()->GetKeyValueLength("TestNumber");
+
+ aStream.Seek(nOffset);
+ std::vector<char> aBuffer(nLength + 1, 0);
+ aStream.ReadBytes(aBuffer.data(), nLength);
+ OString aString(aBuffer.data());
+
+ CPPUNIT_ASSERT_EQUAL(OString("TestNumber 123"), aString);
+ }
+ {
+ sal_uInt64 nOffset = pTestDictionary->GetDictionary()->GetKeyOffset("TestName");
+ sal_uInt64 nLength = pTestDictionary->GetDictionary()->GetKeyValueLength("TestName");
+
+ aStream.Seek(nOffset);
+ std::vector<char> aBuffer(nLength + 1, 0);
+ aStream.ReadBytes(aBuffer.data(), nLength);
+ OString aString(aBuffer.data());
+
+ CPPUNIT_ASSERT_EQUAL(OString("TestName/SomeName"), aString);
+ }
+ {
+ sal_uInt64 nOffset = pTestDictionary->GetDictionary()->GetKeyOffset("TestDictionary");
+ sal_uInt64 nLength
+ = pTestDictionary->GetDictionary()->GetKeyValueLength("TestDictionary");
+
+ aStream.Seek(nOffset);
+ std::vector<char> aBuffer(nLength + 1, 0);
+ aStream.ReadBytes(aBuffer.data(), nLength);
+ OString aString(aBuffer.data());
+
+ CPPUNIT_ASSERT_EQUAL(OString("TestDictionary<</Key/Value>>"), aString);
+ }
+ {
+ sal_uInt64 nOffset = pTestDictionary->GetDictionary()->GetKeyOffset("TestArray");
+ sal_uInt64 nLength = pTestDictionary->GetDictionary()->GetKeyValueLength("TestArray");
+
+ aStream.Seek(nOffset);
+ std::vector<char> aBuffer(nLength + 1, 0);
+ aStream.ReadBytes(aBuffer.data(), nLength);
+ OString aString(aBuffer.data());
+
+ CPPUNIT_ASSERT_EQUAL(OString("TestArray[1 2 3]"), aString);
+ }
+ }
+}
+
+namespace
+{
+vcl::filter::PDFObjectElement*
+addObjectElement(std::vector<std::unique_ptr<vcl::filter::PDFElement>>& rElements,
+ vcl::filter::PDFDocument& rDocument, int nObjectNumber, int nGenerationNumber)
+{
+ auto pObject = std::make_unique<vcl::filter::PDFObjectElement>(rDocument, nObjectNumber,
+ nGenerationNumber);
+ auto pObjectPtr = pObject.get();
+ rElements.push_back(std::move(pObject));
+ return pObjectPtr;
+}
+
+vcl::filter::PDFTrailerElement*
+addTrailerObjectElement(std::vector<std::unique_ptr<vcl::filter::PDFElement>>& rElements,
+ vcl::filter::PDFDocument& rDocument)
+{
+ auto pTrailer = std::make_unique<vcl::filter::PDFTrailerElement>(rDocument);
+ auto pTrailerPtr = pTrailer.get();
+ rElements.push_back(std::move(pTrailer));
+ return pTrailerPtr;
+}
+void addEndObjectElement(std::vector<std::unique_ptr<vcl::filter::PDFElement>>& rElements)
+{
+ rElements.push_back(std::make_unique<vcl::filter::PDFEndObjectElement>());
+}
+
+void addDictionaryElement(std::vector<std::unique_ptr<vcl::filter::PDFElement>>& rElements)
+{
+ rElements.push_back(std::make_unique<vcl::filter::PDFDictionaryElement>());
+}
+
+void addEndDictionaryElement(std::vector<std::unique_ptr<vcl::filter::PDFElement>>& rElements)
+{
+ rElements.push_back(std::make_unique<vcl::filter::PDFEndDictionaryElement>());
+}
+
+void addNameElement(std::vector<std::unique_ptr<vcl::filter::PDFElement>>& rElements,
+ OString const& rName)
+{
+ auto pNameElement = std::make_unique<vcl::filter::PDFNameElement>();
+ pNameElement->SetValue(rName);
+ rElements.push_back(std::move(pNameElement));
+}
+
+vcl::filter::PDFNumberElement*
+addNumberElement(std::vector<std::unique_ptr<vcl::filter::PDFElement>>& rElements, double fNumber)
+{
+ auto pNumberElement = std::make_unique<vcl::filter::PDFNumberElement>();
+ auto pNumberElementPtr = pNumberElement.get();
+ pNumberElement->SetValue(fNumber);
+ rElements.push_back(std::move(pNumberElement));
+ return pNumberElementPtr;
+}
+
+void addReferenceElement(std::vector<std::unique_ptr<vcl::filter::PDFElement>>& rElements,
+ vcl::filter::PDFDocument& rDocument,
+ vcl::filter::PDFNumberElement* pNumber1,
+ vcl::filter::PDFNumberElement* pNumber2)
+{
+ auto pReferenceElement
+ = std::make_unique<vcl::filter::PDFReferenceElement>(rDocument, *pNumber1, *pNumber2);
+ rElements.push_back(std::move(pReferenceElement));
+}
+
+void addArrayElement(std::vector<std::unique_ptr<vcl::filter::PDFElement>>& rElements,
+ vcl::filter::PDFObjectElement* pObjectPointer)
+{
+ auto pArray = std::make_unique<vcl::filter::PDFArrayElement>(pObjectPointer);
+ rElements.push_back(std::move(pArray));
+}
+
+void addEndArrayElement(std::vector<std::unique_ptr<vcl::filter::PDFElement>>& rElements)
+{
+ rElements.push_back(std::make_unique<vcl::filter::PDFEndArrayElement>());
+}
+
+} // end anonymous namespace
+
+CPPUNIT_TEST_FIXTURE(PDFDocumentTest, testParseEmptyDictionary)
+{
+ std::vector<std::unique_ptr<vcl::filter::PDFElement>> aElements;
+ vcl::filter::PDFDocument aDocument;
+ addObjectElement(aElements, aDocument, 1, 0);
+ addDictionaryElement(aElements);
+ addEndDictionaryElement(aElements);
+ addEndObjectElement(aElements);
+
+ auto pObject = dynamic_cast<vcl::filter::PDFObjectElement*>(aElements[0].get());
+ CPPUNIT_ASSERT(pObject);
+
+ vcl::filter::PDFObjectParser aParser(aElements);
+ aParser.parse(pObject);
+
+ CPPUNIT_ASSERT(pObject->GetDictionary());
+ CPPUNIT_ASSERT_EQUAL(size_t(0), pObject->GetDictionary()->GetItems().size());
+}
+
+CPPUNIT_TEST_FIXTURE(PDFDocumentTest, testParseDictionaryWithName)
+{
+ std::vector<std::unique_ptr<vcl::filter::PDFElement>> aElements;
+ vcl::filter::PDFDocument aDocument;
+ {
+ addObjectElement(aElements, aDocument, 1, 0);
+ addDictionaryElement(aElements);
+ addNameElement(aElements, "Test");
+ addNumberElement(aElements, 30.0);
+ addEndDictionaryElement(aElements);
+ addEndObjectElement(aElements);
+ }
+
+ auto pObject = dynamic_cast<vcl::filter::PDFObjectElement*>(aElements[0].get());
+ CPPUNIT_ASSERT(pObject);
+
+ vcl::filter::PDFObjectParser aParser(aElements);
+ aParser.parse(pObject);
+
+ CPPUNIT_ASSERT(pObject->GetDictionary());
+ CPPUNIT_ASSERT_EQUAL(size_t(1), pObject->GetDictionary()->GetItems().size());
+ auto& rItems = pObject->GetDictionary()->GetItems();
+ auto pNumberElement = dynamic_cast<vcl::filter::PDFNumberElement*>(rItems.at("Test"));
+ CPPUNIT_ASSERT(pNumberElement);
+ CPPUNIT_ASSERT_DOUBLES_EQUAL(30.0, pNumberElement->GetValue(), 1e-4);
+}
+
+CPPUNIT_TEST_FIXTURE(PDFDocumentTest, testParseDictionaryNested)
+{
+ std::vector<std::unique_ptr<vcl::filter::PDFElement>> aElements;
+ vcl::filter::PDFDocument aDocument;
+ {
+ addObjectElement(aElements, aDocument, 1, 0);
+ addDictionaryElement(aElements);
+
+ addNameElement(aElements, "Nested1");
+ addDictionaryElement(aElements);
+ {
+ addNameElement(aElements, "Nested2");
+ addDictionaryElement(aElements);
+ {
+ addNameElement(aElements, "SomeOtherKey");
+ addNameElement(aElements, "SomeOtherValue");
+ }
+ addEndDictionaryElement(aElements);
+ }
+ addEndDictionaryElement(aElements);
+
+ addNameElement(aElements, "SomeOtherKey");
+ addNameElement(aElements, "SomeOtherValue");
+
+ addEndObjectElement(aElements);
+ }
+
+ auto pObject = dynamic_cast<vcl::filter::PDFObjectElement*>(aElements[0].get());
+ CPPUNIT_ASSERT(pObject);
+
+ vcl::filter::PDFObjectParser aParser(aElements);
+ aParser.parse(pObject);
+
+ CPPUNIT_ASSERT(pObject->GetDictionary());
+ CPPUNIT_ASSERT_EQUAL(size_t(2), pObject->GetDictionary()->GetItems().size());
+ CPPUNIT_ASSERT(pObject->Lookup("Nested1"));
+ CPPUNIT_ASSERT(pObject->Lookup("SomeOtherKey"));
+}
+
+CPPUNIT_TEST_FIXTURE(PDFDocumentTest, testParseEmptyArray)
+{
+ std::vector<std::unique_ptr<vcl::filter::PDFElement>> aElements;
+ vcl::filter::PDFDocument aDocument;
+ {
+ auto pObjectPtr = addObjectElement(aElements, aDocument, 1, 0);
+ addArrayElement(aElements, pObjectPtr);
+ addEndArrayElement(aElements);
+ addEndObjectElement(aElements);
+ }
+
+ auto pObject = dynamic_cast<vcl::filter::PDFObjectElement*>(aElements[0].get());
+ CPPUNIT_ASSERT(pObject);
+
+ vcl::filter::PDFObjectParser aParser(aElements);
+ aParser.parse(pObject);
+
+ CPPUNIT_ASSERT(pObject->GetArray());
+ CPPUNIT_ASSERT_EQUAL(size_t(0), pObject->GetArray()->GetElements().size());
+}
+
+CPPUNIT_TEST_FIXTURE(PDFDocumentTest, testParseArrayWithSimpleElements)
+{
+ std::vector<std::unique_ptr<vcl::filter::PDFElement>> aElements;
+ vcl::filter::PDFDocument aDocument;
+
+ {
+ auto pObjectPtr = addObjectElement(aElements, aDocument, 1, 0);
+ addArrayElement(aElements, pObjectPtr);
+ addNameElement(aElements, "Test");
+ addNumberElement(aElements, 30.0);
+ addEndArrayElement(aElements);
+ addEndObjectElement(aElements);
+ }
+
+ auto pObject = dynamic_cast<vcl::filter::PDFObjectElement*>(aElements[0].get());
+ CPPUNIT_ASSERT(pObject);
+
+ vcl::filter::PDFObjectParser aParser(aElements);
+ aParser.parse(pObject);
+
+ CPPUNIT_ASSERT(pObject->GetArray());
+ CPPUNIT_ASSERT_EQUAL(size_t(2), pObject->GetArray()->GetElements().size());
+}
+
+CPPUNIT_TEST_FIXTURE(PDFDocumentTest, testParseArrayNestedWithNumbers)
+{
+ std::vector<std::unique_ptr<vcl::filter::PDFElement>> aElements;
+ vcl::filter::PDFDocument aDocument;
+
+ // [ 1 [ 10 ] 2 ]
+ {
+ auto pObjectPtr = addObjectElement(aElements, aDocument, 1, 0);
+ addArrayElement(aElements, pObjectPtr);
+ {
+ addNumberElement(aElements, 1.0);
+ addArrayElement(aElements, pObjectPtr);
+ addNumberElement(aElements, 10.0);
+ addEndArrayElement(aElements);
+ addNumberElement(aElements, 2.0);
+ }
+ addEndArrayElement(aElements);
+ addEndObjectElement(aElements);
+ }
+
+ // Assert
+ {
+ auto pObject = dynamic_cast<vcl::filter::PDFObjectElement*>(aElements[0].get());
+ CPPUNIT_ASSERT(pObject);
+
+ vcl::filter::PDFObjectParser aParser(aElements);
+ aParser.parse(pObject);
+
+ CPPUNIT_ASSERT(pObject->GetArray());
+ CPPUNIT_ASSERT_EQUAL(size_t(3), pObject->GetArray()->GetElements().size());
+ auto pRootArray = pObject->GetArray();
+
+ auto pNumber1 = dynamic_cast<vcl::filter::PDFNumberElement*>(pRootArray->GetElement(0));
+ CPPUNIT_ASSERT(pNumber1);
+ CPPUNIT_ASSERT_DOUBLES_EQUAL(1.0, pNumber1->GetValue(), 1e-4);
+
+ auto pArray3 = dynamic_cast<vcl::filter::PDFArrayElement*>(pRootArray->GetElement(1));
+ CPPUNIT_ASSERT(pArray3);
+ CPPUNIT_ASSERT_EQUAL(size_t(1), pArray3->GetElements().size());
+
+ auto pNumber2 = dynamic_cast<vcl::filter::PDFNumberElement*>(pRootArray->GetElement(2));
+ CPPUNIT_ASSERT(pNumber1);
+ CPPUNIT_ASSERT_DOUBLES_EQUAL(2.0, pNumber2->GetValue(), 1e-4);
+ }
+}
+
+CPPUNIT_TEST_FIXTURE(PDFDocumentTest, testParseArrayNestedWithNames)
+{
+ std::vector<std::unique_ptr<vcl::filter::PDFElement>> aElements;
+ vcl::filter::PDFDocument aDocument;
+
+ // [/Inner1/Inner2[/Inner31][/Inner41/Inner42[/Inner431/Inner432]][/Inner51[/Inner521]]]
+
+ {
+ auto pObjectPtr = addObjectElement(aElements, aDocument, 1, 0);
+ addArrayElement(aElements, pObjectPtr);
+ {
+ addNameElement(aElements, "Inner1");
+ addNameElement(aElements, "Inner2");
+
+ addArrayElement(aElements, pObjectPtr);
+ {
+ addNameElement(aElements, "Inner31");
+ }
+ addEndArrayElement(aElements);
+
+ addArrayElement(aElements, pObjectPtr);
+ {
+ addNameElement(aElements, "Inner41");
+ addNameElement(aElements, "Inner42");
+ addArrayElement(aElements, pObjectPtr);
+ {
+ addNameElement(aElements, "Inner431");
+ addNameElement(aElements, "Inner432");
+ }
+ addEndArrayElement(aElements);
+ }
+ addEndArrayElement(aElements);
+
+ addArrayElement(aElements, pObjectPtr);
+ {
+ addNameElement(aElements, "Inner51");
+ addArrayElement(aElements, pObjectPtr);
+ {
+ addNameElement(aElements, "Inner521");
+ }
+ addEndArrayElement(aElements);
+ }
+ addEndArrayElement(aElements);
+ }
+ addEndArrayElement(aElements);
+ addEndObjectElement(aElements);
+ }
+
+ // Assert
+ {
+ auto pObject = dynamic_cast<vcl::filter::PDFObjectElement*>(aElements[0].get());
+ CPPUNIT_ASSERT(pObject);
+
+ vcl::filter::PDFObjectParser aParser(aElements);
+ aParser.parse(pObject);
+
+ CPPUNIT_ASSERT(pObject->GetArray());
+ CPPUNIT_ASSERT_EQUAL(size_t(5), pObject->GetArray()->GetElements().size());
+ auto pRootArray = pObject->GetArray();
+
+ auto pName1 = dynamic_cast<vcl::filter::PDFNameElement*>(pRootArray->GetElement(0));
+ CPPUNIT_ASSERT(pName1);
+ CPPUNIT_ASSERT_EQUAL(OString("Inner1"), pName1->GetValue());
+
+ auto pName2 = dynamic_cast<vcl::filter::PDFNameElement*>(pRootArray->GetElement(1));
+ CPPUNIT_ASSERT(pName2);
+ CPPUNIT_ASSERT_EQUAL(OString("Inner2"), pName2->GetValue());
+
+ auto pArray3 = dynamic_cast<vcl::filter::PDFArrayElement*>(pRootArray->GetElement(2));
+ CPPUNIT_ASSERT(pArray3);
+ CPPUNIT_ASSERT_EQUAL(size_t(1), pArray3->GetElements().size());
+
+ auto pInner31 = dynamic_cast<vcl::filter::PDFNameElement*>(pArray3->GetElement(0));
+ CPPUNIT_ASSERT(pInner31);
+ CPPUNIT_ASSERT_EQUAL(OString("Inner31"), pInner31->GetValue());
+
+ auto pArray4 = dynamic_cast<vcl::filter::PDFArrayElement*>(pRootArray->GetElement(3));
+ CPPUNIT_ASSERT(pArray4);
+ CPPUNIT_ASSERT_EQUAL(size_t(3), pArray4->GetElements().size());
+
+ auto pInner41 = dynamic_cast<vcl::filter::PDFNameElement*>(pArray4->GetElement(0));
+ CPPUNIT_ASSERT(pInner41);
+ CPPUNIT_ASSERT_EQUAL(OString("Inner41"), pInner41->GetValue());
+
+ auto pInner42 = dynamic_cast<vcl::filter::PDFNameElement*>(pArray4->GetElement(1));
+ CPPUNIT_ASSERT(pInner42);
+ CPPUNIT_ASSERT_EQUAL(OString("Inner42"), pInner42->GetValue());
+
+ auto pArray43 = dynamic_cast<vcl::filter::PDFArrayElement*>(pArray4->GetElement(2));
+ CPPUNIT_ASSERT(pArray43);
+ CPPUNIT_ASSERT_EQUAL(size_t(2), pArray43->GetElements().size());
+
+ auto pInner431 = dynamic_cast<vcl::filter::PDFNameElement*>(pArray43->GetElement(0));
+ CPPUNIT_ASSERT(pInner431);
+ CPPUNIT_ASSERT_EQUAL(OString("Inner431"), pInner431->GetValue());
+
+ auto pInner432 = dynamic_cast<vcl::filter::PDFNameElement*>(pArray43->GetElement(1));
+ CPPUNIT_ASSERT(pInner432);
+ CPPUNIT_ASSERT_EQUAL(OString("Inner432"), pInner432->GetValue());
+
+ auto pArray5 = dynamic_cast<vcl::filter::PDFArrayElement*>(pRootArray->GetElement(4));
+ CPPUNIT_ASSERT(pArray5);
+ CPPUNIT_ASSERT_EQUAL(size_t(2), pArray5->GetElements().size());
+
+ auto pInner51 = dynamic_cast<vcl::filter::PDFNameElement*>(pArray5->GetElement(0));
+ CPPUNIT_ASSERT(pInner51);
+ CPPUNIT_ASSERT_EQUAL(OString("Inner51"), pInner51->GetValue());
+
+ auto pArray52 = dynamic_cast<vcl::filter::PDFArrayElement*>(pArray5->GetElement(1));
+ CPPUNIT_ASSERT(pArray52);
+ CPPUNIT_ASSERT_EQUAL(size_t(1), pArray52->GetElements().size());
+
+ auto pInner521 = dynamic_cast<vcl::filter::PDFNameElement*>(pArray52->GetElement(0));
+ CPPUNIT_ASSERT(pInner521);
+ CPPUNIT_ASSERT_EQUAL(OString("Inner521"), pInner521->GetValue());
+ }
+}
+
+CPPUNIT_TEST_FIXTURE(PDFDocumentTest, testParseTrailer)
+{
+ std::vector<std::unique_ptr<vcl::filter::PDFElement>> aElements;
+ vcl::filter::PDFDocument aDocument;
+
+ {
+ addTrailerObjectElement(aElements, aDocument);
+ addDictionaryElement(aElements);
+ addNameElement(aElements, "Size");
+ addNumberElement(aElements, 11.0);
+ addEndDictionaryElement(aElements);
+ }
+ {
+ auto pTrailer = dynamic_cast<vcl::filter::PDFTrailerElement*>(aElements[0].get());
+ CPPUNIT_ASSERT(pTrailer);
+
+ vcl::filter::PDFObjectParser aParser(aElements);
+ aParser.parse(pTrailer);
+
+ CPPUNIT_ASSERT(pTrailer->GetDictionary());
+ CPPUNIT_ASSERT_EQUAL(size_t(1), pTrailer->GetDictionary()->GetItems().size());
+ }
+}
+
+CPPUNIT_TEST_FIXTURE(PDFDocumentTest, testParseTrailerWithReference)
+{
+ std::vector<std::unique_ptr<vcl::filter::PDFElement>> aElements;
+ vcl::filter::PDFDocument aDocument;
+
+ {
+ addTrailerObjectElement(aElements, aDocument);
+ addDictionaryElement(aElements);
+ addNameElement(aElements, "Reference");
+ auto pNumberElement1 = addNumberElement(aElements, 11.0);
+ auto pNumberElement2 = addNumberElement(aElements, 0.0);
+ addReferenceElement(aElements, aDocument, pNumberElement1, pNumberElement2);
+ addEndDictionaryElement(aElements);
+ }
+ {
+ auto pTrailer = dynamic_cast<vcl::filter::PDFTrailerElement*>(aElements[0].get());
+ CPPUNIT_ASSERT(pTrailer);
+
+ vcl::filter::PDFObjectParser aParser(aElements);
+ aParser.parse(pTrailer);
+
+ CPPUNIT_ASSERT(pTrailer->GetDictionary());
+ CPPUNIT_ASSERT_EQUAL(size_t(1), pTrailer->GetDictionary()->GetItems().size());
+ auto pElement = pTrailer->Lookup("Reference");
+ CPPUNIT_ASSERT(pElement);
+ auto pReference = dynamic_cast<vcl::filter::PDFReferenceElement*>(pElement);
+ CPPUNIT_ASSERT(pReference);
+ CPPUNIT_ASSERT_DOUBLES_EQUAL(11.0, pReference->GetObjectValue(), 1e-4);
+ CPPUNIT_ASSERT_DOUBLES_EQUAL(0.0, pReference->GetGenerationValue(), 1e-4);
+ }
+}
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/vcl/qa/cppunit/data/basic.pdf b/vcl/qa/cppunit/data/basic.pdf
new file mode 100644
index 000000000000..0d68be4bf5c0
--- /dev/null
+++ b/vcl/qa/cppunit/data/basic.pdf
@@ -0,0 +1,71 @@
+%PDF-1.2
+%µ¶
+
+1 0 obj
+<</Type/Page/Parent 5 0 R/Resources 3 0 R/Contents 2 0 R>>
+endobj
+
+2 0 obj
+<</Length 57>>
+stream
+BT
+/F1 24 Tf
+1 0 0 1 260 254 Tm
+0.5 g
+(Hello World)Tj
+ET
+
+endstream
+endobj
+
+3 0 obj
+<</ProcSet[/PDF/Text]/Font<</F1 4 0 R>>/Test 7 0 R>>
+endobj
+
+4 0 obj
+<</Type/Font/Subtype/Type1/Name/F1/BaseFont/Helvetica>>
+endobj
+
+5 0 obj
+<</Type/Pages/Kids[1 0 R]/Count 1/MediaBox[0 0 612 446]>>
+endobj
+
+6 0 obj
+<</Type/Catalog/Pages 5 0 R>>
+endobj
+
+7 0 obj
+<</TestArray1 8 0 R/TestArray2 9 0 R/TestDictionary 10 0 R>>
+endobj
+
+8 0 obj
+[/Inner1/Inner2[/Inner31][/Inner41/Inner42[/Inner431/Inner432]][/Inner51[/Inner521]]]
+endobj
+
+9 0 obj
+[/TestReference 7 0 R]
+endobj
+
+10 0 obj
+<</TestReference 7 0 R/TestNumber 123/TestName/SomeName/TestDictionary<</Key/Value>>/TestArray[1 2 3]>>
+endobj
+
+xref
+0 11
+0000000000 65536 f
+0000000016 00000 n
+0000000091 00000 n
+0000000197 00000 n
+0000000266 00000 n
+0000000338 00000 n
+0000000412 00000 n
+0000000458 00000 n
+0000000535 00000 n
+0000000637 00000 n
+0000000676 00000 n
+
+trailer
+<</Size 11/Root 6 0 R>>
+startxref
+797
+%%EOF
diff --git a/vcl/qa/cppunit/data/basicSource.pdf b/vcl/qa/cppunit/data/basicSource.pdf
new file mode 100644
index 000000000000..2cde317fc4a8
--- /dev/null
+++ b/vcl/qa/cppunit/data/basicSource.pdf
@@ -0,0 +1,60 @@
+%PDF-1.2
+
+%Fix with "mutool clean vcl/qa/cppunit/data/basicSource.pdf vcl/qa/cppunit/data/basic.pdf"
+
+1 0 obj
+<< /Type /Page /Parent 5 0 R /Resources 3 0 R /Contents 2 0 R>>
+endobj
+
+2 0 obj
+<</Length 57>>
+stream
+BT
+/F1 24 Tf
+1 0 0 1 260 254 Tm
+0.5 g
+(Hello World)Tj
+ET
+endstream
+endobj
+
+3 0 obj
+<<
+/ProcSet [/PDF /Text ]
+/Font << /F1 4 0 R >>
+/Test 7 0 R
+>>
+endobj
+
+4 0 obj
+<< /Type /Font /Subtype /Type1 /Name /F1 /BaseFont /Helvetica >>
+endobj
+
+5 0 obj
+<< /Type /Pages /Kids [ 1 0 R ] /Count 1 /MediaBox [ 0 0 612 446 ]>>
+endobj
+
+6 0 obj
+<< /Type /Catalog /Pages 5 0 R>>
+endobj
+
+7 0 obj
+<< /TestArray1 8 0 R /TestArray2 9 0 R /TestDictionary 10 0 R >>
+endobj
+
+8 0 obj
+[ /Inner1 /Inner2 [/Inner31] [/Inner41 /Inner42 [/Inner431 /Inner432] ] [ /Inner51 [/Inner521] ] ]
+endobj
+
+9 0 obj
+[ /TestReference 7 0 R ]
+endobj
+
+10 0 obj
+<< /TestReference 7 0 R /TestNumber 123 /TestName /SomeName /TestDictionary << /Key /Value >> /TestArray [1 2 3] >>
+endobj
+
+trailer
+<</Root 6 0 R>>
+
+%%EOF
commit cba97ed5fb30e6edbff73a75de54281650c8e8d5
Author: Tomaž Vajngerl <tomaz.vajngerl at collabora.co.uk>
AuthorDate: Tue Nov 10 09:43:20 2020 +0100
Commit: Andras Timar <andras.timar at collabora.com>
CommitDate: Thu Apr 8 08:32:54 2021 +0200
pdf: move parsing into it's own class, rewrite the parser
This moves the parser into it's own class as it is not only
limited to dictionaries.
The parser has been rewritten to handle the array elements
correctly and properly. Previous array elements were filled during
the tokenization step, which is wrong, and the arrays weren't
parsed recursively, making the parsing incomplete in some cases.
This rewrite handles arrays similar to dictionaries and thus
allows them to be parsed properly.
All the sub-classes of PDFElement have been moved into the
header file.
Another change is also to not have a separate input dictionary
for the root object and instead always look into the dictionary
element that should be available instead.
+ many other smaller changes
Change-Id: I7fcf94760967bbd1474a0b432ba3a4e3c9b7cabe
Reviewed-on: https://gerrit.libreoffice.org/c/core/+/105517
Tested-by: Tomaž Vajngerl <quikee at gmail.com>
Reviewed-by: Tomaž Vajngerl <quikee at gmail.com>
diff --git a/include/vcl/filter/pdfdocument.hxx b/include/vcl/filter/pdfdocument.hxx
index b938234b9370..7f7cc8dfb641 100644
--- a/include/vcl/filter/pdfdocument.hxx
+++ b/include/vcl/filter/pdfdocument.hxx
@@ -71,7 +71,6 @@ class VCL_DLLPUBLIC PDFObjectElement final : public PDFElement
PDFDocument& m_rDoc;
double m_fObjectValue;
double m_fGenerationValue;
- std::map<OString, PDFElement*> m_aDictionary;
/// If set, the object contains this number element (outside any dictionary/array).
PDFNumberElement* m_pNumberElement;
/// Position after the '<<' token.
@@ -97,6 +96,8 @@ class VCL_DLLPUBLIC PDFObjectElement final : public PDFElement
/// nested dictionaries.
std::vector<PDFReferenceElement*> m_aDictionaryReferences;
+ bool m_bParsed;
+
void parseIfNecessary();
public:
@@ -125,7 +126,7 @@ public:
sal_uInt64 GetArrayOffset() const;
void SetArrayLength(sal_uInt64 nArrayLength);
sal_uInt64 GetArrayLength() const;
- PDFArrayElement* GetArray() const;
+ PDFArrayElement* GetArray();
/// Parse objects stored in this object stream.
void ParseStoredObjects();
std::vector<std::unique_ptr<PDFElement>>& GetStoredElements();
@@ -148,6 +149,7 @@ public:
bool Read(SvStream& rStream) override;
void PushBack(PDFElement* pElement);
const std::vector<PDFElement*>& GetElements() const;
+ PDFElement* GetElement(size_t nIndex) const { return m_aElements[nIndex]; }
void writeString(OStringBuffer& rBuffer) override
{
@@ -226,9 +228,10 @@ class VCL_DLLPUBLIC PDFNameElement final : public PDFElement
public:
PDFNameElement();
bool Read(SvStream& rStream) override;
+ void SetValue(const OString& rValue) { m_aValue = rValue; }
const OString& GetValue() const;
sal_uInt64 GetLocation() const;
- static sal_uInt64 GetLength() { return 0; }
+ sal_uInt64 GetLength() { return m_aValue.getLength(); }
void writeString(OStringBuffer& rBuffer) override
{
@@ -253,8 +256,6 @@ public:
PDFDictionaryElement();
bool Read(SvStream& rStream) override;
- static size_t Parse(const std::vector<std::unique_ptr<PDFElement>>& rElements,
- PDFElement* pThis, std::map<OString, PDFElement*>& rDictionary);
static PDFElement* Lookup(const std::map<OString, PDFElement*>& rDictionary,
const OString& rKey);
void SetKeyOffset(const OString& rKey, sal_uInt64 nOffset);
@@ -266,6 +267,11 @@ public:
PDFObjectElement* LookupObject(const OString& rDictionaryKey);
/// Looks up an element which is contained in this dictionary.
PDFElement* LookupElement(const OString& rDictionaryKey);
+ sal_uInt64 GetLocation() const { return m_nLocation; }
+ void insert(OString const& rKey, PDFElement* pPDFElement)
+ {
+ m_aItems.emplace(rKey, pPDFElement);
+ }
void writeString(OStringBuffer& rBuffer) override
{
@@ -382,12 +388,100 @@ public:
PDFNumberElement();
bool Read(SvStream& rStream) override;
double GetValue() const;
+ void SetValue(double fValue) { m_fValue = fValue; }
+
sal_uInt64 GetLocation() const;
sal_uInt64 GetLength() const;
void writeString(OStringBuffer& rBuffer) override { rBuffer.append(m_fValue); }
};
+/// A one-liner comment.
+class VCL_DLLPUBLIC PDFCommentElement : public PDFElement
+{
+ PDFDocument& m_rDoc;
+ OString m_aComment;
+
+public:
+ explicit PDFCommentElement(PDFDocument& rDoc);
+ bool Read(SvStream& rStream) override;
+ void writeString(OStringBuffer& /*rBuffer*/) override {}
+};
+
+/// End of a dictionary: '>>'.
+class VCL_DLLPUBLIC PDFEndDictionaryElement : public PDFElement
+{
+ /// Offset before the '>>' token.
+ sal_uInt64 m_nLocation = 0;
+
+public:
+ PDFEndDictionaryElement();
+ bool Read(SvStream& rStream) override;
+ sal_uInt64 GetLocation() const;
+
+ void writeString(OStringBuffer& /*rBuffer*/) override {}
+};
+
+/// End of a stream: 'endstream' keyword.
+class VCL_DLLPUBLIC PDFEndStreamElement : public PDFElement
+{
+public:
+ bool Read(SvStream& rStream) override;
+
+ void writeString(OStringBuffer& /*rBuffer*/) override {}
+};
+
+/// End of an object: 'endobj' keyword.
+class VCL_DLLPUBLIC PDFEndObjectElement : public PDFElement
+{
+public:
+ bool Read(SvStream& rStream) override;
+
+ void writeString(OStringBuffer& /*rBuffer*/) override {}
+};
+
+/// End of an array: ']'.
+class VCL_DLLPUBLIC PDFEndArrayElement : public PDFElement
+{
+ /// Location before the ']' token.
+ sal_uInt64 m_nOffset = 0;
+
+public:
+ PDFEndArrayElement();
+ bool Read(SvStream& rStream) override;
+ sal_uInt64 GetOffset() const;
+
+ void writeString(OStringBuffer& /*rBuffer*/) override {}
+};
+
+/// Boolean object: a 'true' or a 'false'.
+class VCL_DLLPUBLIC PDFBooleanElement : public PDFElement
+{
+ bool m_aValue;
+
+public:
+ explicit PDFBooleanElement(bool bValue)
+ : m_aValue(bValue)
+ {
+ }
+
+ bool Read(SvStream& rStream) override;
+
+ void writeString(OStringBuffer& rBuffer) override
+ {
+ rBuffer.append(m_aValue ? "true" : "false");
+ }
+};
+
+/// Null object: the 'null' singleton.
+class VCL_DLLPUBLIC PDFNullElement : public PDFElement
+{
+public:
+ bool Read(SvStream& rStream) override;
+
+ void writeString(OStringBuffer& rBuffer) override { rBuffer.append("null"); }
+};
+
/**
* In-memory representation of an on-disk PDF document.
*
@@ -502,6 +596,43 @@ public:
bool writeBuffer(const void* pBuffer, sal_uInt64 nBytes) override;
};
+/// The trailer singleton is at the end of the doc.
+class VCL_DLLPUBLIC PDFTrailerElement : public PDFElement
+{
+ PDFDocument& m_rDoc;
+ PDFDictionaryElement* m_pDictionaryElement;
+ /// Location of the end of the trailer token.
+ sal_uInt64 m_nOffset = 0;
+
+public:
+ explicit PDFTrailerElement(PDFDocument& rDoc);
+ bool Read(SvStream& rStream) override;
+ PDFElement* Lookup(const OString& rDictionaryKey);
+ sal_uInt64 GetLocation() const;
+
+ void SetDictionary(PDFDictionaryElement* pDictionaryElement)
+ {
+ m_pDictionaryElement = pDictionaryElement;
+ }
+
+ PDFDictionaryElement* GetDictionary() { return m_pDictionaryElement; }
+
+ void writeString(OStringBuffer& /*rBuffer*/) override { assert(false && "not implemented"); }
+};
+
+class VCL_DLLPUBLIC PDFObjectParser final
+{
+ const std::vector<std::unique_ptr<PDFElement>>& mrElements;
+
+public:
+ PDFObjectParser(std::vector<std::unique_ptr<PDFElement>> const& rElements)
+ : mrElements(rElements)
+ {
+ }
+
+ size_t parse(PDFElement* pParsingElement, size_t nStartIndex = 0, int nCurrentDepth = 0);
+};
+
} // namespace vcl::filter
#endif // INCLUDED_VCL_FILTER_PDFDOCUMENT_HXX
diff --git a/vcl/source/filter/ipdf/pdfdocument.cxx b/vcl/source/filter/ipdf/pdfdocument.cxx
index 804713abaf10..9c677e85e0f5 100644
--- a/vcl/source/filter/ipdf/pdfdocument.cxx
+++ b/vcl/source/filter/ipdf/pdfdocument.cxx
@@ -36,119 +36,6 @@ namespace vcl::filter
{
const int MAX_SIGNATURE_CONTENT_LENGTH = 50000;
-class PDFTrailerElement;
-
-namespace
-{
-/// A one-liner comment.
-class PDFCommentElement : public PDFElement
-{
- PDFDocument& m_rDoc;
- OString m_aComment;
-
-public:
- explicit PDFCommentElement(PDFDocument& rDoc);
- bool Read(SvStream& rStream) override;
- void writeString(OStringBuffer& /*rBuffer*/) override {}
-};
-}
-
-class PDFReferenceElement;
-
-namespace
-{
-/// End of a dictionary: '>>'.
-class PDFEndDictionaryElement : public PDFElement
-{
- /// Offset before the '>>' token.
- sal_uInt64 m_nLocation = 0;
-
-public:
- PDFEndDictionaryElement();
- bool Read(SvStream& rStream) override;
- sal_uInt64 GetLocation() const;
-
- void writeString(OStringBuffer& /*rBuffer*/) override {}
-};
-
-/// End of a stream: 'endstream' keyword.
-class PDFEndStreamElement : public PDFElement
-{
-public:
- bool Read(SvStream& rStream) override;
-
- void writeString(OStringBuffer& /*rBuffer*/) override {}
-};
-
-/// End of an object: 'endobj' keyword.
-class PDFEndObjectElement : public PDFElement
-{
-public:
- bool Read(SvStream& rStream) override;
-
- void writeString(OStringBuffer& /*rBuffer*/) override {}
-};
-
-/// End of an array: ']'.
-class PDFEndArrayElement : public PDFElement
-{
- /// Location before the ']' token.
- sal_uInt64 m_nOffset = 0;
-
-public:
- PDFEndArrayElement();
- bool Read(SvStream& rStream) override;
- sal_uInt64 GetOffset() const;
-
- void writeString(OStringBuffer& /*rBuffer*/) override {}
-};
-
-/// Boolean object: a 'true' or a 'false'.
-class PDFBooleanElement : public PDFElement
-{
- bool m_aValue;
-
-public:
- explicit PDFBooleanElement(bool bValue)
- : m_aValue(bValue)
- {
- }
-
- bool Read(SvStream& rStream) override;
-
- void writeString(OStringBuffer& rBuffer) override
- {
- rBuffer.append(m_aValue ? "true" : "false");
- }
-};
-
-/// Null object: the 'null' singleton.
-class PDFNullElement : public PDFElement
-{
-public:
- bool Read(SvStream& rStream) override;
-
- void writeString(OStringBuffer& rBuffer) override { rBuffer.append("null"); }
-};
-}
-
-/// The trailer singleton is at the end of the doc.
-class PDFTrailerElement : public PDFElement
-{
- PDFDocument& m_rDoc;
- std::map<OString, PDFElement*> m_aDictionary;
- /// Location of the end of the trailer token.
- sal_uInt64 m_nOffset = 0;
-
-public:
- explicit PDFTrailerElement(PDFDocument& rDoc);
- bool Read(SvStream& rStream) override;
- PDFElement* Lookup(const OString& rDictionaryKey);
- sal_uInt64 GetLocation() const;
-
- void writeString(OStringBuffer& /*rBuffer*/) override {}
-};
-
XRefEntry::XRefEntry() = default;
PDFDocument::PDFDocument() = default;
@@ -641,6 +528,7 @@ bool PDFDocument::WriteCatalogObject(sal_Int32 nAnnotId, PDFReferenceElement*& p
sal_uInt64 nFieldsEndOffset = pAcroFormDictionary->GetKeyOffset("Fields")
+ pAcroFormDictionary->GetKeyValueLength("Fields")
- strlen("]");
+
// Length of beginning of the object dictionary -> Fields end.
sal_uInt64 nFieldsBeforeEndLength = nFieldsEndOffset;
if (pStreamBuffer)
@@ -1093,13 +981,12 @@ bool PDFDocument::Tokenize(SvStream& rStream, TokenizeMode eMode,
// The next number will be an xref offset.
bool bInStartXRef = false;
// Dictionary depth, so we know when we're outside any dictionaries.
- int nDictionaryDepth = 0;
- // Array depth, only the offset/length of the toplevel array is tracked.
- int nArrayDepth = 0;
+ int nDepth = 0;
// Last seen array token that's outside any dictionaries.
PDFArrayElement* pArray = nullptr;
// If we're inside an obj/endobj pair.
bool bInObject = false;
+
while (true)
{
char ch;
@@ -1136,7 +1023,7 @@ bool PDFDocument::Tokenize(SvStream& rStream, TokenizeMode eMode,
if (ch == '<')
{
rElements.push_back(std::unique_ptr<PDFElement>(new PDFDictionaryElement()));
- ++nDictionaryDepth;
+ ++nDepth;
}
else
rElements.push_back(std::unique_ptr<PDFElement>(new PDFHexStringElement));
@@ -1151,7 +1038,7 @@ bool PDFDocument::Tokenize(SvStream& rStream, TokenizeMode eMode,
case '>':
{
rElements.push_back(std::unique_ptr<PDFElement>(new PDFEndDictionaryElement()));
- --nDictionaryDepth;
+ --nDepth;
rStream.SeekRel(-1);
if (!rElements.back()->Read(rStream))
{
@@ -1165,7 +1052,7 @@ bool PDFDocument::Tokenize(SvStream& rStream, TokenizeMode eMode,
{
auto pArr = new PDFArrayElement(pObject);
rElements.push_back(std::unique_ptr<PDFElement>(pArr));
- if (nDictionaryDepth == 0 && nArrayDepth == 0)
+ if (nDepth == 0)
{
// The array is attached directly, inform the object.
pArray = pArr;
@@ -1175,7 +1062,7 @@ bool PDFDocument::Tokenize(SvStream& rStream, TokenizeMode eMode,
pObject->SetArrayOffset(rStream.Tell());
}
}
- ++nArrayDepth;
+ ++nDepth;
rStream.SeekRel(-1);
if (!rElements.back()->Read(rStream))
{
@@ -1187,11 +1074,9 @@ bool PDFDocument::Tokenize(SvStream& rStream, TokenizeMode eMode,
case ']':
{
rElements.push_back(std::unique_ptr<PDFElement>(new PDFEndArrayElement()));
- --nArrayDepth;
- if (nArrayDepth == 0)
- pArray = nullptr;
+ --nDepth;
rStream.SeekRel(-1);
- if (nDictionaryDepth == 0 && nArrayDepth == 0)
+ if (nDepth == 0)
{
if (pObject)
{
@@ -1216,6 +1101,7 @@ bool PDFDocument::Tokenize(SvStream& rStream, TokenizeMode eMode,
SAL_WARN("vcl.filter", "PDFDocument::Tokenize: PDFNameElement::Read() failed");
return false;
}
+
if (pObject && pObjectKey && pObjectKey->GetValue() == "Type"
&& pNameElement->GetValue() == "ObjStm")
pObjectStream = pObject;
@@ -1259,7 +1145,7 @@ bool PDFDocument::Tokenize(SvStream& rStream, TokenizeMode eMode,
if (it != m_aOffsetObjects.end())
m_pXRefStream = it->second;
}
- else if (bInObject && !nDictionaryDepth && !nArrayDepth && pObject)
+ else if (bInObject && !nDepth && pObject)
// Number element inside an object, but outside a
// dictionary / array: remember it.
pObject->SetNumberElement(pNumberElement);
@@ -1306,10 +1192,7 @@ bool PDFDocument::Tokenize(SvStream& rStream, TokenizeMode eMode,
auto pReference = new PDFReferenceElement(*this, *pObjectNumber,
*pGenerationNumber);
rElements.push_back(std::unique_ptr<PDFElement>(pReference));
- if (pArray)
- // Reference is part of a direct (non-dictionary) array, inform the array.
- pArray->PushBack(rElements.back().get());
- if (bInObject && nDictionaryDepth > 0 && pObject)
+ if (bInObject && nDepth > 0 && pObject)
// Inform the object about a new in-dictionary reference.
pObject->AddDictionaryReference(pReference);
}
@@ -2328,6 +2211,7 @@ const OString& PDFLiteralStringElement::GetValue() const { return m_aValue; }
PDFTrailerElement::PDFTrailerElement(PDFDocument& rDoc)
: m_rDoc(rDoc)
+ , m_pDictionaryElement(nullptr)
{
}
@@ -2339,10 +2223,14 @@ bool PDFTrailerElement::Read(SvStream& rStream)
PDFElement* PDFTrailerElement::Lookup(const OString& rDictionaryKey)
{
- if (m_aDictionary.empty())
- PDFDictionaryElement::Parse(m_rDoc.GetElements(), this, m_aDictionary);
-
- return PDFDictionaryElement::Lookup(m_aDictionary, rDictionaryKey);
+ if (!m_pDictionaryElement)
+ {
+ PDFObjectParser aParser(m_rDoc.GetElements());
+ aParser.parse(this);
+ }
+ if (!m_pDictionaryElement)
+ return nullptr;
+ return m_pDictionaryElement->LookupElement(rDictionaryKey);
}
sal_uInt64 PDFTrailerElement::GetLocation() const { return m_nOffset; }
@@ -2361,6 +2249,7 @@ PDFObjectElement::PDFObjectElement(PDFDocument& rDoc, double fObjectValue, doubl
, m_nArrayLength(0)
, m_pArrayElement(nullptr)
, m_pStreamElement(nullptr)
+ , m_bParsed(false)
{
}
@@ -2373,257 +2262,6 @@ bool PDFObjectElement::Read(SvStream& /*rStream*/)
PDFDictionaryElement::PDFDictionaryElement() = default;
-size_t PDFDictionaryElement::Parse(const std::vector<std::unique_ptr<PDFElement>>& rElements,
- PDFElement* pThis, std::map<OString, PDFElement*>& rDictionary)
-{
- // The index of last parsed element, in case of nested dictionaries.
- size_t nRet = 0;
-
- if (!rDictionary.empty())
- return nRet;
-
- pThis->setParsing(true);
-
- auto pThisObject = dynamic_cast<PDFObjectElement*>(pThis);
- // This is set to non-nullptr here for nested dictionaries only.
- auto pThisDictionary = dynamic_cast<PDFDictionaryElement*>(pThis);
-
- // Find out where the dictionary for this object starts.
- size_t nIndex = 0;
- for (size_t i = 0; i < rElements.size(); ++i)
- {
- if (rElements[i].get() == pThis)
- {
- nIndex = i;
- break;
- }
- }
-
- OString aName;
- sal_uInt64 nNameOffset = 0;
- std::vector<PDFNumberElement*> aNumbers;
- // The array value we're in -- if any.
- PDFArrayElement* pArray = nullptr;
- sal_uInt64 nDictionaryOffset = 0;
- int nDictionaryDepth = 0;
- // Toplevel dictionary found (not inside an array).
- bool bDictionaryFound = false;
- // Toplevel array found (not inside a dictionary).
- bool bArrayFound = false;
- for (size_t i = nIndex; i < rElements.size(); ++i)
- {
- // Dictionary tokens can be nested, track enter/leave.
- if (auto pDictionary = dynamic_cast<PDFDictionaryElement*>(rElements[i].get()))
- {
- bDictionaryFound = true;
- if (++nDictionaryDepth == 1)
- {
- // First dictionary start, track start offset.
- nDictionaryOffset = pDictionary->m_nLocation;
- if (pThisObject)
- {
- if (!bArrayFound)
- // Then the toplevel dictionary of the object.
- pThisObject->SetDictionary(pDictionary);
- pThisDictionary = pDictionary;
- pThisObject->SetDictionaryOffset(nDictionaryOffset);
- }
- }
- else if (!pDictionary->alreadyParsing())
- {
- // Nested dictionary.
- const size_t nexti
- = PDFDictionaryElement::Parse(rElements, pDictionary, pDictionary->m_aItems);
- if (nexti >= i) // ensure we go forwards and not endlessly loop
- {
- i = nexti;
- if (pArray)
- {
- // Dictionary value inside an array.
- pArray->PushBack(pDictionary);
- }
- else
- {
- // Dictionary toplevel value.
- rDictionary[aName] = pDictionary;
- aName.clear();
- }
- }
- }
- }
-
- if (auto pEndDictionary = dynamic_cast<PDFEndDictionaryElement*>(rElements[i].get()))
- {
- if (--nDictionaryDepth == 0)
- {
- // Last dictionary end, track length and stop parsing.
- if (pThisObject)
- pThisObject->SetDictionaryLength(pEndDictionary->GetLocation()
- - nDictionaryOffset);
- nRet = i;
- break;
- }
- }
-
- auto pName = dynamic_cast<PDFNameElement*>(rElements[i].get());
- if (pName)
- {
- if (!aNumbers.empty())
- {
- PDFNumberElement* pNumber = aNumbers.back();
- rDictionary[aName] = pNumber;
- if (pThisDictionary)
- {
- pThisDictionary->SetKeyOffset(aName, nNameOffset);
- pThisDictionary->SetKeyValueLength(
- aName, pNumber->GetLocation() + pNumber->GetLength() - nNameOffset);
- }
- aName.clear();
- aNumbers.clear();
- }
-
- if (aName.isEmpty())
- {
- // Remember key.
- aName = pName->GetValue();
- nNameOffset = pName->GetLocation();
- }
- else
- {
- if (pArray)
- {
- if (bDictionaryFound)
- // Array inside dictionary.
- pArray->PushBack(pName);
- }
- else
- {
- // Name-name key-value.
- rDictionary[aName] = pName;
- if (pThisDictionary)
- {
- pThisDictionary->SetKeyOffset(aName, nNameOffset);
- pThisDictionary->SetKeyValueLength(aName, pName->GetLocation()
- + PDFNameElement::GetLength()
- - nNameOffset);
- }
- aName.clear();
- }
- }
- continue;
- }
-
- auto pArr = dynamic_cast<PDFArrayElement*>(rElements[i].get());
- if (pArr)
- {
- bArrayFound = true;
- pArray = pArr;
- continue;
- }
-
- auto pEndArr = dynamic_cast<PDFEndArrayElement*>(rElements[i].get());
- if (pArray && pEndArr)
- {
- for (auto& pNumber : aNumbers)
- pArray->PushBack(pNumber);
- aNumbers.clear();
- rDictionary[aName] = pArray;
- if (pThisDictionary)
- {
- pThisDictionary->SetKeyOffset(aName, nNameOffset);
- // Include the ending ']' in the length of the key - (array)value pair length.
- pThisDictionary->SetKeyValueLength(aName, pEndArr->GetOffset() - nNameOffset + 1);
- }
- aName.clear();
- pArray = nullptr;
- continue;
- }
-
- auto pReference = dynamic_cast<PDFReferenceElement*>(rElements[i].get());
- if (pReference)
- {
- if (!pArray)
- {
- rDictionary[aName] = pReference;
- if (pThisDictionary)
- {
- pThisDictionary->SetKeyOffset(aName, nNameOffset);
- pThisDictionary->SetKeyValueLength(aName,
- pReference->GetOffset() - nNameOffset);
- }
- aName.clear();
- }
- else
- {
- if (bDictionaryFound)
- // Array inside dictionary.
- pArray->PushBack(pReference);
- }
- aNumbers.clear();
- continue;
- }
-
- auto pLiteralString = dynamic_cast<PDFLiteralStringElement*>(rElements[i].get());
- if (pLiteralString)
- {
- rDictionary[aName] = pLiteralString;
- if (pThisDictionary)
- pThisDictionary->SetKeyOffset(aName, nNameOffset);
- aName.clear();
- continue;
- }
-
- auto pBoolean = dynamic_cast<PDFBooleanElement*>(rElements[i].get());
- if (pBoolean)
- {
- rDictionary[aName] = pBoolean;
- if (pThisDictionary)
- pThisDictionary->SetKeyOffset(aName, nNameOffset);
- aName.clear();
- continue;
- }
-
- auto pHexString = dynamic_cast<PDFHexStringElement*>(rElements[i].get());
- if (pHexString)
- {
- if (!pArray)
- {
- rDictionary[aName] = pHexString;
- if (pThisDictionary)
- pThisDictionary->SetKeyOffset(aName, nNameOffset);
- aName.clear();
- }
- else
- {
- pArray->PushBack(pHexString);
- }
- continue;
- }
-
- if (dynamic_cast<PDFEndObjectElement*>(rElements[i].get()))
- break;
-
- // Just remember this, so that in case it's not a reference parameter,
- // we can handle it later.
- auto pNumber = dynamic_cast<PDFNumberElement*>(rElements[i].get());
- if (pNumber)
- aNumbers.push_back(pNumber);
- }
-
- if (!aNumbers.empty())
- {
- rDictionary[aName] = aNumbers.back();
- if (pThisDictionary)
- pThisDictionary->SetKeyOffset(aName, nNameOffset);
- aName.clear();
- aNumbers.clear();
- }
-
- pThis->setParsing(false);
-
- return nRet;
-}
-
PDFElement* PDFDictionaryElement::Lookup(const std::map<OString, PDFElement*>& rDictionary,
const OString& rKey)
{
@@ -2656,21 +2294,30 @@ PDFElement* PDFDictionaryElement::LookupElement(const OString& rDictionaryKey)
void PDFObjectElement::parseIfNecessary()
{
- if (m_aDictionary.empty())
+ if (!m_bParsed)
{
if (!m_aElements.empty())
+ {
// This is a stored object in an object stream.
- PDFDictionaryElement::Parse(m_aElements, this, m_aDictionary);
+ PDFObjectParser aParser(m_aElements);
+ aParser.parse(this);
+ }
else
+ {
// Normal object: elements are stored as members of the document itself.
- PDFDictionaryElement::Parse(m_rDoc.GetElements(), this, m_aDictionary);
+ PDFObjectParser aParser(m_rDoc.GetElements());
+ aParser.parse(this);
+ }
+ m_bParsed = true;
}
}
PDFElement* PDFObjectElement::Lookup(const OString& rDictionaryKey)
{
parseIfNecessary();
- return PDFDictionaryElement::Lookup(m_aDictionary, rDictionaryKey);
+ if (!m_pDictionaryElement)
+ return nullptr;
+ return PDFDictionaryElement::Lookup(GetDictionaryItems(), rDictionaryKey);
}
PDFObjectElement* PDFObjectElement::LookupObject(const OString& rDictionaryKey)
@@ -2779,7 +2426,7 @@ void PDFObjectElement::AddDictionaryReference(PDFReferenceElement* pReference)
const std::map<OString, PDFElement*>& PDFObjectElement::GetDictionaryItems()
{
parseIfNecessary();
- return m_aDictionary;
+ return m_pDictionaryElement->GetItems();
}
void PDFObjectElement::SetArray(PDFArrayElement* pArrayElement) { m_pArrayElement = pArrayElement; }
@@ -2791,7 +2438,11 @@ void PDFObjectElement::SetStream(PDFStreamElement* pStreamElement)
PDFStreamElement* PDFObjectElement::GetStream() const { return m_pStreamElement; }
-PDFArrayElement* PDFObjectElement::GetArray() const { return m_pArrayElement; }
+PDFArrayElement* PDFObjectElement::GetArray()
+{
+ parseIfNecessary();
+ return m_pArrayElement;
+}
void PDFObjectElement::ParseStoredObjects()
{
@@ -3226,6 +2877,416 @@ bool PDFEndArrayElement::Read(SvStream& rStream)
sal_uInt64 PDFEndArrayElement::GetOffset() const { return m_nOffset; }
+// PDFObjectParser
+
+size_t PDFObjectParser::parse(PDFElement* pParsingElement, size_t nStartIndex, int nCurrentDepth)
+{
+ // The index of last parsed element
+ size_t nReturnIndex = 0;
+
+ pParsingElement->setParsing(true);
+
+ comphelper::ScopeGuard aGuard([pParsingElement]() { pParsingElement->setParsing(false); });
+
+ // Current object, if root is an object, else nullptr
+ auto pParsingObject = dynamic_cast<PDFObjectElement*>(pParsingElement);
+ auto pParsingTrailer = dynamic_cast<PDFTrailerElement*>(pParsingElement);
+
+ // Current dictionary, if root is an dictionary, else nullptr
+ auto pParsingDictionary = dynamic_cast<PDFDictionaryElement*>(pParsingElement);
+
+ // Current parsing array, if root is an array, else nullptr
+ auto pParsingArray = dynamic_cast<PDFArrayElement*>(pParsingElement);
+
+ // Find out where the dictionary for this object starts.
+ size_t nIndex = nStartIndex;
+ for (size_t i = nStartIndex; i < mrElements.size(); ++i)
+ {
+ if (mrElements[i].get() == pParsingElement)
+ {
+ nIndex = i;
+ break;
+ }
+ }
+
+ OString aName;
+ sal_uInt64 nNameOffset = 0;
+ std::vector<PDFNumberElement*> aNumbers;
+
+ sal_uInt64 nDictionaryOffset = 0;
+
+ // Current depth; 1 is current
+ int nDepth = 0;
+
+ for (size_t i = nIndex; i < mrElements.size(); ++i)
+ {
+ auto* pCurrentElement = mrElements[i].get();
+
+ // Dictionary tokens can be nested, track enter/leave.
+ if (auto pCurrentDictionary = dynamic_cast<PDFDictionaryElement*>(pCurrentElement))
+ {
+ // Handle previously stored number
+ if (!aNumbers.empty())
+ {
+ if (pParsingDictionary)
+ {
+ PDFNumberElement* pNumber = aNumbers.back();
+ sal_uInt64 nLength
+ = pNumber->GetLocation() + pNumber->GetLength() - nNameOffset;
+
+ pParsingDictionary->insert(aName, pNumber);
+ pParsingDictionary->SetKeyOffset(aName, nNameOffset);
+ pParsingDictionary->SetKeyValueLength(aName, nLength);
+ }
+ else if (pParsingArray)
+ {
+ for (auto& pNumber : aNumbers)
+ pParsingArray->PushBack(pNumber);
+ }
+ else
+ {
+ SAL_INFO("vcl.filter", "neither Dictionary nor Array available");
+ }
+ aName.clear();
+ aNumbers.clear();
+ }
+
+ nDepth++;
+
+ if (nDepth == 1) // pParsingDictionary is the current one
+ {
+ // First dictionary start, track start offset.
+ nDictionaryOffset = pCurrentDictionary->GetLocation();
+
+ if (pParsingObject)
+ {
+ // Then the toplevel dictionary of the object.
+ pParsingObject->SetDictionary(pCurrentDictionary);
+ pParsingObject->SetDictionaryOffset(nDictionaryOffset);
+ pParsingDictionary = pCurrentDictionary;
+ }
+ else if (pParsingTrailer)
+ {
+ pParsingTrailer->SetDictionary(pCurrentDictionary);
+ pParsingDictionary = pCurrentDictionary;
+ }
+ }
+ else if (!pCurrentDictionary->alreadyParsing())
+ {
+ if (pParsingArray)
+ {
+ pParsingArray->PushBack(pCurrentDictionary);
+ }
+ else if (pParsingDictionary)
+ {
+ // Dictionary toplevel value.
+ pParsingDictionary->insert(aName, pCurrentDictionary);
+ }
+ else
+ {
+ SAL_INFO("vcl.filter", "neither Dictionary nor Array available");
+ }
+ // Nested dictionary.
+ const size_t nNextElementIndex = parse(pCurrentDictionary, i, nCurrentDepth + 1);
+ i = std::max(i, nNextElementIndex - 1);
+ }
+ }
+ else if (auto pCurrentEndDictionary
+ = dynamic_cast<PDFEndDictionaryElement*>(pCurrentElement))
+ {
+ // Handle previously stored number
+ if (!aNumbers.empty())
+ {
+ if (pParsingDictionary)
+ {
+ PDFNumberElement* pNumber = aNumbers.back();
+ sal_uInt64 nLength
+ = pNumber->GetLocation() + pNumber->GetLength() - nNameOffset;
+
+ pParsingDictionary->insert(aName, pNumber);
+ pParsingDictionary->SetKeyOffset(aName, nNameOffset);
+ pParsingDictionary->SetKeyValueLength(aName, nLength);
+ }
+ else if (pParsingArray)
+ {
+ for (auto& pNumber : aNumbers)
+ pParsingArray->PushBack(pNumber);
+ }
+ else
+ {
+ SAL_INFO("vcl.filter", "neither Dictionary nor Array available");
+ }
+ aName.clear();
+ aNumbers.clear();
+ }
+
+ if (pParsingDictionary)
+ {
+ pParsingDictionary->SetKeyOffset(aName, nNameOffset);
+ sal_uInt64 nLength = pCurrentEndDictionary->GetLocation() - nNameOffset + 2;
+ pParsingDictionary->SetKeyValueLength(aName, nLength);
+ aName.clear();
+ }
+
+ if (nDepth == 1) // did the parsing ended
+ {
+ // Last dictionary end, track length and stop parsing.
+ if (pParsingObject)
+ {
+ sal_uInt64 nDictionaryLength
+ = pCurrentEndDictionary->GetLocation() - nDictionaryOffset;
+ pParsingObject->SetDictionaryLength(nDictionaryLength);
+ }
+ nReturnIndex = i;
+ break;
+ }
+
+ nDepth--;
+ }
+ else if (auto pCurrentArray = dynamic_cast<PDFArrayElement*>(pCurrentElement))
+ {
+ // Handle previously stored number
+ if (!aNumbers.empty())
+ {
+ if (pParsingDictionary)
+ {
+ PDFNumberElement* pNumber = aNumbers.back();
+
+ sal_uInt64 nLength
+ = pNumber->GetLocation() + pNumber->GetLength() - nNameOffset;
+ pParsingDictionary->insert(aName, pNumber);
+ pParsingDictionary->SetKeyOffset(aName, nNameOffset);
+ pParsingDictionary->SetKeyValueLength(aName, nLength);
+ }
+ else if (pParsingArray)
+ {
+ for (auto& pNumber : aNumbers)
+ pParsingArray->PushBack(pNumber);
+ }
+ else
+ {
+ SAL_INFO("vcl.filter", "neither Dictionary nor Array available");
+ }
+ aName.clear();
+ aNumbers.clear();
+ }
+
+ nDepth++;
+ if (nDepth == 1) // pParsingDictionary is the current one
+ {
+ if (pParsingObject)
+ {
+ pParsingObject->SetArray(pCurrentArray);
+ pParsingArray = pCurrentArray;
+ }
+ }
+ else if (!pCurrentArray->alreadyParsing())
+ {
+ if (pParsingArray)
+ {
+ // Array is toplevel
+ pParsingArray->PushBack(pCurrentArray);
+ }
+ else if (pParsingDictionary)
+ {
+ // Dictionary toplevel value.
+ pParsingDictionary->insert(aName, pCurrentArray);
+ }
+
+ const size_t nNextElementIndex = parse(pCurrentArray, i, nCurrentDepth + 1);
+
+ // ensure we go forwards and not endlessly loop
+ i = std::max(i, nNextElementIndex - 1);
+ }
+ }
+ else if (auto pCurrentEndArray = dynamic_cast<PDFEndArrayElement*>(pCurrentElement))
+ {
+ // Handle previously stored number
+ if (!aNumbers.empty())
+ {
+ if (pParsingDictionary)
+ {
+ PDFNumberElement* pNumber = aNumbers.back();
+
+ sal_uInt64 nLength
+ = pNumber->GetLocation() + pNumber->GetLength() - nNameOffset;
+ pParsingDictionary->insert(aName, pNumber);
+ pParsingDictionary->SetKeyOffset(aName, nNameOffset);
+ pParsingDictionary->SetKeyValueLength(aName, nLength);
+ }
+ else if (pParsingArray)
+ {
+ for (auto& pNumber : aNumbers)
+ pParsingArray->PushBack(pNumber);
+ }
+ else
+ {
+ SAL_INFO("vcl.filter", "neither Dictionary nor Array available");
+ }
+ aName.clear();
+ aNumbers.clear();
+ }
+
+ if (nDepth == 1) // did the pParsing ended
+ {
+ // Last array end, track length and stop parsing.
+ nReturnIndex = i;
+ break;
+ }
+ else
+ {
+ if (pParsingDictionary)
+ {
+ pParsingDictionary->SetKeyOffset(aName, nNameOffset);
+ // Include the ending ']' in the length of the key - (array)value pair length.
+ sal_uInt64 nLength = pCurrentEndArray->GetOffset() - nNameOffset + 1;
+ pParsingDictionary->SetKeyValueLength(aName, nLength);
+ aName.clear();
+ }
+ }
+ nDepth--;
+ }
+ else if (auto pCurrentName = dynamic_cast<PDFNameElement*>(pCurrentElement))
+ {
+ // Handle previously stored number
+ if (!aNumbers.empty())
+ {
+ if (pParsingDictionary)
+ {
+ PDFNumberElement* pNumber = aNumbers.back();
+
+ sal_uInt64 nLength
+ = pNumber->GetLocation() + pNumber->GetLength() - nNameOffset;
+ pParsingDictionary->insert(aName, pNumber);
+ pParsingDictionary->SetKeyOffset(aName, nNameOffset);
+ pParsingDictionary->SetKeyValueLength(aName, nLength);
+ }
+ else if (pParsingArray)
+ {
+ for (auto& pNumber : aNumbers)
+ pParsingArray->PushBack(pNumber);
+ }
+ aName.clear();
+ aNumbers.clear();
+ }
+
+ // Now handle name
+ if (pParsingArray)
+ {
+ // if we are in an array, just push the name to array
+ pParsingArray->PushBack(pCurrentName);
+ }
+ else if (pParsingDictionary)
+ {
+ // if we are in a dictionary, we need to store the name as a possible key
+ if (aName.isEmpty())
+ {
+ aName = pCurrentName->GetValue();
+ nNameOffset = pCurrentName->GetLocation();
+ }
+ else
+ {
+ sal_uInt64 nKeyLength
+ = pCurrentName->GetLocation() + pCurrentName->GetLength() - nNameOffset;
+ pParsingDictionary->insert(aName, pCurrentName);
+ pParsingDictionary->SetKeyOffset(aName, nNameOffset);
+ pParsingDictionary->SetKeyValueLength(aName, nKeyLength);
+ aName.clear();
+ }
+ }
+ }
+ else if (auto pReference = dynamic_cast<PDFReferenceElement*>(pCurrentElement))
+ {
+ if (pParsingArray)
+ {
+ pParsingArray->PushBack(pReference);
+ }
+ else if (pParsingDictionary)
+ {
+ sal_uInt64 nLength = pReference->GetOffset() - nNameOffset;
+ pParsingDictionary->insert(aName, pReference);
+ pParsingDictionary->SetKeyOffset(aName, nNameOffset);
+ pParsingDictionary->SetKeyValueLength(aName, nLength);
+ aName.clear();
+ }
+ else
+ {
+ SAL_INFO("vcl.filter", "neither Dictionary nor Array available");
+ }
+ aNumbers.clear();
+ }
+ else if (auto pLiteralString = dynamic_cast<PDFLiteralStringElement*>(pCurrentElement))
+ {
+ if (pParsingArray)
+ {
+ pParsingArray->PushBack(pLiteralString);
+ }
+ else if (pParsingDictionary)
+ {
+ pParsingDictionary->insert(aName, pLiteralString);
+ pParsingDictionary->SetKeyOffset(aName, nNameOffset);
+ aName.clear();
+ }
+ else
+ {
+ SAL_INFO("vcl.filter", "neither Dictionary nor Array available");
+ }
+ }
+ else if (auto pBoolean = dynamic_cast<PDFBooleanElement*>(pCurrentElement))
+ {
+ if (pParsingArray)
+ {
+ pParsingArray->PushBack(pBoolean);
+ }
+ else if (pParsingDictionary)
+ {
+ pParsingDictionary->insert(aName, pBoolean);
+ pParsingDictionary->SetKeyOffset(aName, nNameOffset);
+ aName.clear();
+ }
+ else
+ {
+ SAL_INFO("vcl.filter", "neither Dictionary nor Array available");
+ }
+ }
+ else if (auto pHexString = dynamic_cast<PDFHexStringElement*>(pCurrentElement))
+ {
+ if (pParsingArray)
+ {
+ pParsingArray->PushBack(pHexString);
+ }
+ else if (pParsingDictionary)
+ {
+ pParsingDictionary->insert(aName, pHexString);
+ pParsingDictionary->SetKeyOffset(aName, nNameOffset);
+ aName.clear();
+ }
+ }
+ else if (auto pNumberElement = dynamic_cast<PDFNumberElement*>(pCurrentElement))
+ {
+ // Just remember this, so that in case it's not a reference parameter,
+ // we can handle it later.
+ aNumbers.push_back(pNumberElement);
+ }
+ else if (dynamic_cast<PDFEndObjectElement*>(pCurrentElement))
+ {
+ // parsing of the object is finished
+ break;
+ }
+ else if (dynamic_cast<PDFObjectElement*>(pCurrentElement)
+ || dynamic_cast<PDFTrailerElement*>(pCurrentElement))
+ {
+ continue;
+ }
+ else
+ {
+ SAL_INFO("vcl.filter", "Unhandeled element while parsing.");
+ }
+ }
+
+ return nReturnIndex;
+}
+
} // namespace vcl
/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
commit f9887a1f08e50f3ef7dd7afeefabd1b48c09ca55
Author: Tomaž Vajngerl <tomaz.vajngerl at collabora.co.uk>
AuthorDate: Mon Nov 9 19:16:27 2020 +0100
Commit: Andras Timar <andras.timar at collabora.com>
CommitDate: Thu Apr 8 08:32:01 2021 +0200
pdf: improve PDFObjectCopier, copy arrays/dicts recursively
Change-Id: Ia2f8d86ae012b530f3e9c39842bb75ef8ca27718
Reviewed-on: https://gerrit.libreoffice.org/c/core/+/105493
Tested-by: Jenkins
Reviewed-by: Tomaž Vajngerl <quikee at gmail.com>
diff --git a/vcl/inc/pdf/objectcopier.hxx b/vcl/inc/pdf/objectcopier.hxx
index 65dbbb49aef4..3880d739bb10 100644
--- a/vcl/inc/pdf/objectcopier.hxx
+++ b/vcl/inc/pdf/objectcopier.hxx
@@ -25,6 +25,7 @@ class PDFObjectContainer;
namespace filter
{
class PDFObjectElement;
+class PDFElement;
}
/// Copies objects from one PDF file into another one.
@@ -32,6 +33,10 @@ class PDFObjectCopier
{
PDFObjectContainer& m_rContainer;
+ void copyRecursively(OStringBuffer& rLine, filter::PDFElement* pInputElement,
+ SvMemoryStream& rDocBuffer,
+ std::map<sal_Int32, sal_Int32>& rCopiedResources);
+
public:
PDFObjectCopier(PDFObjectContainer& rContainer);
diff --git a/vcl/source/gdi/pdfobjectcopier.cxx b/vcl/source/gdi/pdfobjectcopier.cxx
index 129a4c8bda35..d6323a17e91c 100644
--- a/vcl/source/gdi/pdfobjectcopier.cxx
+++ b/vcl/source/gdi/pdfobjectcopier.cxx
@@ -26,14 +26,62 @@ PDFObjectCopier::PDFObjectCopier(PDFObjectContainer& rContainer)
{
}
+void PDFObjectCopier::copyRecursively(OStringBuffer& rLine, filter::PDFElement* pInputElement,
+ SvMemoryStream& rDocBuffer,
+ std::map<sal_Int32, sal_Int32>& rCopiedResources)
+{
+ if (auto pReference = dynamic_cast<filter::PDFReferenceElement*>(pInputElement))
+ {
+ filter::PDFObjectElement* pReferenced = pReference->LookupObject();
+ if (pReferenced)
+ {
+ // Copy the referenced object.
+ sal_Int32 nRef = copyExternalResource(rDocBuffer, *pReferenced, rCopiedResources);
+
+ // Write the updated reference.
+ rLine.append(nRef);
+ rLine.append(" 0 R");
+ }
+ }
+ else if (auto pInputArray = dynamic_cast<filter::PDFArrayElement*>(pInputElement))
+ {
+ rLine.append("[ ");
+ for (auto const& pElement : pInputArray->GetElements())
+ {
+ copyRecursively(rLine, pElement, rDocBuffer, rCopiedResources);
+ rLine.append(" ");
+ }
+ rLine.append("] ");
+ }
+ else if (auto pInputDictionary = dynamic_cast<filter::PDFDictionaryElement*>(pInputElement))
+ {
+ rLine.append("<< ");
+ for (auto const& pPair : pInputDictionary->GetItems())
+ {
+ rLine.append("/");
+ rLine.append(pPair.first);
+ rLine.append(" ");
+ copyRecursively(rLine, pPair.second, rDocBuffer, rCopiedResources);
+ rLine.append(" ");
+ }
+ rLine.append(">> ");
+ }
+ else
+ {
+ pInputElement->writeString(rLine);
+ }
+}
+
sal_Int32 PDFObjectCopier::copyExternalResource(SvMemoryStream& rDocBuffer,
filter::PDFObjectElement& rObject,
std::map<sal_Int32, sal_Int32>& rCopiedResources)
{
auto it = rCopiedResources.find(rObject.GetObjectValue());
if (it != rCopiedResources.end())
+ {
// This resource was already copied once, nothing to do.
return it->second;
+ }
sal_Int32 nObject = m_rContainer.createObject();
// Remember what is the ID of this object in our output.
@@ -50,62 +98,19 @@ sal_Int32 PDFObjectCopier::copyExternalResource(SvMemoryStream& rDocBuffer,
OStringBuffer aLine;
aLine.append(nObject);
aLine.append(" 0 obj\n");
+
if (rObject.GetDictionary())
{
- aLine.append("<<");
-
- // Complex case: can't copy the dictionary byte array as is, as it may contain references.
- bool bDone = false;
- sal_uInt64 nCopyStart = 0;
- for (auto pReference : rObject.GetDictionaryReferences())
- {
- if (pReference)
- {
- filter::PDFObjectElement* pReferenced = pReference->LookupObject();
- if (pReferenced)
- {
- // Copy the referenced object.
- sal_Int32 nRef
- = copyExternalResource(rDocBuffer, *pReferenced, rCopiedResources);
-
- sal_uInt64 nReferenceStart = pReference->GetObjectElement().GetLocation();
- sal_uInt64 nReferenceEnd = pReference->GetOffset();
- sal_uInt64 nOffset = 0;
- if (nCopyStart == 0)
- // Dict start -> reference start.
- nOffset = rObject.GetDictionaryOffset();
- else
- // Previous reference end -> reference start.
- nOffset = nCopyStart;
- aLine.append(static_cast<const char*>(pObjectStream->GetData()) + nOffset,
- nReferenceStart - nOffset);
- // Write the updated reference.
- aLine.append(" ");
- aLine.append(nRef);
- aLine.append(" 0 R");
- // Start copying here next time.
- nCopyStart = nReferenceEnd;
-
- bDone = true;
- }
- }
- }
+ aLine.append("<< ");
- if (bDone)
+ for (auto const& rPair : rObject.GetDictionaryItems())
{
- // Copy the last part here, in the complex case.
- sal_uInt64 nDictEnd = rObject.GetDictionaryOffset() + rObject.GetDictionaryLength();
- const sal_Int32 nLen = nDictEnd - nCopyStart;
- if (nLen < 0)
- SAL_WARN("vcl.pdfwriter", "copyExternalResource() failed");
- else
- aLine.append(static_cast<const char*>(pObjectStream->GetData()) + nCopyStart, nLen);
+ aLine.append("/");
+ aLine.append(rPair.first);
+ aLine.append(" ");
+ copyRecursively(aLine, rPair.second, rDocBuffer, rCopiedResources);
+ aLine.append(" ");
}
- else
- // Can copy it as-is.
- aLine.append(static_cast<const char*>(pObjectStream->GetData())
- + rObject.GetDictionaryOffset(),
- rObject.GetDictionaryLength());
aLine.append(">>\n");
}
@@ -120,64 +125,15 @@ sal_Int32 PDFObjectCopier::copyExternalResource(SvMemoryStream& rDocBuffer,
if (filter::PDFArrayElement* pArray = rObject.GetArray())
{
- aLine.append("[");
+ aLine.append("[ ");
const std::vector<filter::PDFElement*>& rElements = pArray->GetElements();
- bool bDone = false;
- // Complex case: can't copy the array byte array as is, as it may contain references.
- sal_uInt64 nCopyStart = 0;
- for (const auto pElement : rElements)
- {
- auto pReference = dynamic_cast<filter::PDFReferenceElement*>(pElement);
- if (pReference)
- {
- filter::PDFObjectElement* pReferenced = pReference->LookupObject();
- if (pReferenced)
- {
- // Copy the referenced object.
- sal_Int32 nRef
- = copyExternalResource(rDocBuffer, *pReferenced, rCopiedResources);
-
- sal_uInt64 nReferenceStart = pReference->GetObjectElement().GetLocation();
- sal_uInt64 nReferenceEnd = pReference->GetOffset();
- sal_uInt64 nOffset = 0;
- if (nCopyStart == 0)
- // Array start -> reference start.
- nOffset = rObject.GetArrayOffset();
- else
- // Previous reference end -> reference start.
- nOffset = nCopyStart;
- aLine.append(static_cast<const char*>(pObjectStream->GetData()) + nOffset,
- nReferenceStart - nOffset);
-
- // Write the updated reference.
- aLine.append(" ");
- aLine.append(nRef);
- aLine.append(" 0 R");
- // Start copying here next time.
- nCopyStart = nReferenceEnd;
-
- bDone = true;
- }
- }
- }
- if (bDone)
+ for (auto const& pElement : rElements)
{
- // Copy the last part here, in the complex case.
- sal_uInt64 nArrEnd = rObject.GetArrayOffset() + rObject.GetArrayLength();
- const sal_Int32 nLen = nArrEnd - nCopyStart;
- if (nLen < 0)
- SAL_WARN("vcl.pdfwriter", "copyExternalResource() failed");
- else
- aLine.append(static_cast<const char*>(pObjectStream->GetData()) + nCopyStart, nLen);
+ copyRecursively(aLine, pElement, rDocBuffer, rCopiedResources);
+ aLine.append(" ");
}
- else
- // Can copy it as-is.
- aLine.append(static_cast<const char*>(pObjectStream->GetData())
- + rObject.GetArrayOffset(),
- rObject.GetArrayLength());
-
aLine.append("]\n");
}
More information about the Libreoffice-commits
mailing list