[Libreoffice-commits] core.git: Branch 'distro/collabora/cp-6.4' - include/vcl svx/source vcl/qa vcl/source

Tomaž Vajngerl (via logerrit) logerrit at kemper.freedesktop.org
Mon Jun 29 20:34:55 UTC 2020


 include/vcl/filter/PDFiumLibrary.hxx |   40 ++++++++++++++++++++++
 svx/source/svdraw/svdpdf.cxx         |   11 ++----
 vcl/qa/cppunit/PDFiumLibraryTest.cxx |   37 ++++++++++++++++++++
 vcl/source/pdf/PDFiumLibrary.cxx     |   62 +++++++++++++++++++++++++++++++++++
 4 files changed, 142 insertions(+), 8 deletions(-)

New commits:
commit 64b7feb7f0e0d3f29625c73e9790b2f152e34c09
Author:     Tomaž Vajngerl <tomaz.vajngerl at collabora.co.uk>
AuthorDate: Sun Jun 28 10:12:17 2020 +0200
Commit:     Tomaž Vajngerl <quikee at gmail.com>
CommitDate: Mon Jun 29 22:34:23 2020 +0200

    pdf: add PDFiumTextPage and PDFiumPageObject + test
    
    Also use it in ImpSdrPdfImport.
    
    Change-Id: I6d353ef60d036c3516448e64a50b25a9befd5db8
    Reviewed-on: https://gerrit.libreoffice.org/c/core/+/97364
    Tested-by: Jenkins
    Reviewed-by: Tomaž Vajngerl <quikee at gmail.com>
    (cherry picked from commit 440cb3fb80d9fd356871eac410b9797f23433722)
    Reviewed-on: https://gerrit.libreoffice.org/c/core/+/97449
    Tested-by: Jenkins CollaboraOffice <jenkinscollaboraoffice at gmail.com>

diff --git a/include/vcl/filter/PDFiumLibrary.hxx b/include/vcl/filter/PDFiumLibrary.hxx
index 35826097e45e..501f964f395d 100644
--- a/include/vcl/filter/PDFiumLibrary.hxx
+++ b/include/vcl/filter/PDFiumLibrary.hxx
@@ -69,6 +69,41 @@ public:
     std::unique_ptr<PDFiumAnnotation> getLinked(OString const& rKey);
 };
 
+class PDFiumTextPage;
+
+class VCL_DLLPUBLIC PDFiumPageObject final
+{
+private:
+    FPDF_PAGEOBJECT mpPageObject;
+
+    PDFiumPageObject(const PDFiumPageObject&) = delete;
+    PDFiumPageObject& operator=(const PDFiumPageObject&) = delete;
+
+public:
+    PDFiumPageObject(FPDF_PAGEOBJECT pPageObject);
+    ~PDFiumPageObject();
+
+    FPDF_PAGEOBJECT getPointer() { return mpPageObject; }
+
+    int getType();
+    OUString getText(std::unique_ptr<PDFiumTextPage> const& pTextPage);
+};
+
+class VCL_DLLPUBLIC PDFiumTextPage final
+{
+private:
+    FPDF_TEXTPAGE mpTextPage;
+
+    PDFiumTextPage(const PDFiumTextPage&) = delete;
+    PDFiumTextPage& operator=(const PDFiumTextPage&) = delete;
+
+public:
+    PDFiumTextPage(FPDF_TEXTPAGE pTextPage);
+    ~PDFiumTextPage();
+
+    FPDF_TEXTPAGE getPointer() { return mpTextPage; }
+};
+
 class VCL_DLLPUBLIC PDFiumPage final
 {
 private:
@@ -92,10 +127,15 @@ public:
 
     FPDF_PAGE getPointer() { return mpPage; }
 
+    int getObjectCount();
+    std::unique_ptr<PDFiumPageObject> getObject(int nIndex);
+
     int getAnnotationCount();
     int getAnnotationIndex(std::unique_ptr<PDFiumAnnotation> const& rAnnotation);
 
     std::unique_ptr<PDFiumAnnotation> getAnnotation(int nIndex);
+
+    std::unique_ptr<PDFiumTextPage> getTextPage();
 };
 
 class VCL_DLLPUBLIC PDFiumDocument final
diff --git a/svx/source/svdraw/svdpdf.cxx b/svx/source/svdraw/svdpdf.cxx
index ca8226b68ce5..50e94a6f983b 100644
--- a/svx/source/svdraw/svdpdf.cxx
+++ b/svx/source/svdraw/svdpdf.cxx
@@ -176,17 +176,16 @@ void ImpSdrPdfImport::DoObjects(SvdProgressInfo* pProgrInfo, sal_uInt32* pAction
         SetupPageScale(dPageWidth, dPageHeight);
 
         // Load the page text to extract it when we get text elements.
-        FPDF_TEXTPAGE pTextPage = FPDFText_LoadPage(pPdfPage->getPointer());
+        auto pTextPage = pPdfPage->getTextPage();
 
-        const int nPageObjectCount = FPDFPage_CountObjects(pPdfPage->getPointer());
+        const int nPageObjectCount = pPdfPage->getObjectCount();
         if (pProgrInfo)
             pProgrInfo->SetActionCount(nPageObjectCount);
 
         for (int nPageObjectIndex = 0; nPageObjectIndex < nPageObjectCount; ++nPageObjectIndex)
         {
-            FPDF_PAGEOBJECT pPageObject
-                = FPDFPage_GetObject(pPdfPage->getPointer(), nPageObjectIndex);
-            ImportPdfObject(pPageObject, pTextPage, nPageObjectIndex);
+            auto pPageObject = pPdfPage->getObject(nPageObjectIndex);
+            ImportPdfObject(pPageObject->getPointer(), pTextPage->getPointer(), nPageObjectIndex);
             if (pProgrInfo && pActionsToReport)
             {
                 (*pActionsToReport)++;
@@ -200,8 +199,6 @@ void ImpSdrPdfImport::DoObjects(SvdProgressInfo* pProgrInfo, sal_uInt32* pAction
                 }
             }
         }
-
-        FPDFText_ClosePage(pTextPage);
     }
 }
 
diff --git a/vcl/qa/cppunit/PDFiumLibraryTest.cxx b/vcl/qa/cppunit/PDFiumLibraryTest.cxx
index 61b3981731f6..9c0c92607b14 100644
--- a/vcl/qa/cppunit/PDFiumLibraryTest.cxx
+++ b/vcl/qa/cppunit/PDFiumLibraryTest.cxx
@@ -35,6 +35,7 @@ class PDFiumLibraryTest : public test::BootstrapFixtureBase
 
     void testDocument();
     void testPages();
+    void testPageObjects();
     void testAnnotationsMadeInEvince();
     void testAnnotationsMadeInAcrobat();
     void testTools();
@@ -42,6 +43,7 @@ class PDFiumLibraryTest : public test::BootstrapFixtureBase
     CPPUNIT_TEST_SUITE(PDFiumLibraryTest);
     CPPUNIT_TEST(testDocument);
     CPPUNIT_TEST(testPages);
+    CPPUNIT_TEST(testPageObjects);
     CPPUNIT_TEST(testAnnotationsMadeInEvince);
     CPPUNIT_TEST(testAnnotationsMadeInAcrobat);
     CPPUNIT_TEST(testTools);
@@ -74,12 +76,35 @@ void PDFiumLibraryTest::testDocument()
     auto aSize = pDocument->getPageSize(0);
     CPPUNIT_ASSERT_EQUAL(612.0, aSize.getX());
     CPPUNIT_ASSERT_EQUAL(792.0, aSize.getY());
+}
+
+void PDFiumLibraryTest::testPages()
+{
+    OUString aURL = getFullUrl("Pangram.pdf");
+    SvFileStream aStream(aURL, StreamMode::READ);
+    GraphicFilter& rGraphicFilter = GraphicFilter::GetGraphicFilter();
+    Graphic aGraphic = rGraphicFilter.ImportUnloadedGraphic(aStream);
+    aGraphic.makeAvailable();
+
+    auto pVectorGraphicData = aGraphic.getVectorGraphicData();
+    CPPUNIT_ASSERT(pVectorGraphicData);
+    CPPUNIT_ASSERT_EQUAL(VectorGraphicDataType::Pdf,
+                         pVectorGraphicData->getVectorGraphicDataType());
+
+    const void* pData = pVectorGraphicData->getVectorGraphicDataArray().getConstArray();
+    int nLength = pVectorGraphicData->getVectorGraphicDataArrayLength();
+
+    auto pPdfium = vcl::pdf::PDFiumLibrary::get();
+    auto pDocument = pPdfium->openDocument(pData, nLength);
+    CPPUNIT_ASSERT(pDocument);
+
+    CPPUNIT_ASSERT_EQUAL(1, pDocument->getPageCount());
 
     auto pPage = pDocument->openPage(0);
     CPPUNIT_ASSERT(pPage);
 }
 
-void PDFiumLibraryTest::testPages()
+void PDFiumLibraryTest::testPageObjects()
 {
     OUString aURL = getFullUrl("Pangram.pdf");
     SvFileStream aStream(aURL, StreamMode::READ);
@@ -103,6 +128,16 @@ void PDFiumLibraryTest::testPages()
 
     auto pPage = pDocument->openPage(0);
     CPPUNIT_ASSERT(pPage);
+
+    CPPUNIT_ASSERT_EQUAL(12, pPage->getObjectCount());
+
+    auto pPageObject = pPage->getObject(0);
+    auto pTextPage = pPage->getTextPage();
+
+    CPPUNIT_ASSERT_EQUAL(1, pPageObject->getType());
+    CPPUNIT_ASSERT_EQUAL(OUString("The quick, brown fox jumps over a lazy dog. DJs flock by when "
+                                  "MTV ax quiz prog. Junk MTV quiz "),
+                         pPageObject->getText(pTextPage));
 }
 
 void PDFiumLibraryTest::testAnnotationsMadeInEvince()
diff --git a/vcl/source/pdf/PDFiumLibrary.cxx b/vcl/source/pdf/PDFiumLibrary.cxx
index cad2296eeea9..af13f8ec8fbe 100644
--- a/vcl/source/pdf/PDFiumLibrary.cxx
+++ b/vcl/source/pdf/PDFiumLibrary.cxx
@@ -15,6 +15,7 @@
 #include <vcl/filter/PDFiumLibrary.hxx>
 #include <fpdf_annot.h>
 #include <fpdf_edit.h>
+#include <fpdf_text.h>
 
 namespace vcl::pdf
 {
@@ -167,6 +168,19 @@ basegfx::B2DSize PDFiumDocument::getPageSize(int nIndex)
 
 int PDFiumDocument::getPageCount() { return FPDF_GetPageCount(mpPdfDocument); }
 
+int PDFiumPage::getObjectCount() { return FPDFPage_CountObjects(mpPage); }
+
+std::unique_ptr<PDFiumPageObject> PDFiumPage::getObject(int nIndex)
+{
+    std::unique_ptr<PDFiumPageObject> pPDFiumPageObject;
+    FPDF_PAGEOBJECT pPageObject = FPDFPage_GetObject(mpPage, nIndex);
+    if (pPageObject)
+    {
+        pPDFiumPageObject = std::make_unique<PDFiumPageObject>(pPageObject);
+    }
+    return pPDFiumPageObject;
+}
+
 int PDFiumPage::getAnnotationCount() { return FPDFPage_GetAnnotCount(mpPage); }
 
 int PDFiumPage::getAnnotationIndex(std::unique_ptr<PDFiumAnnotation> const& rAnnotation)
@@ -185,6 +199,42 @@ std::unique_ptr<PDFiumAnnotation> PDFiumPage::getAnnotation(int nIndex)
     return pPDFiumAnnotation;
 }
 
+std::unique_ptr<PDFiumTextPage> PDFiumPage::getTextPage()
+{
+    std::unique_ptr<PDFiumTextPage> pPDFiumTextPage;
+    FPDF_TEXTPAGE pTextPage = FPDFText_LoadPage(mpPage);
+    if (pTextPage)
+    {
+        pPDFiumTextPage = std::make_unique<PDFiumTextPage>(pTextPage);
+    }
+    return pPDFiumTextPage;
+}
+
+PDFiumPageObject::PDFiumPageObject(FPDF_PAGEOBJECT pPageObject)
+    : mpPageObject(pPageObject)
+{
+}
+
+PDFiumPageObject::~PDFiumPageObject() {}
+
+OUString PDFiumPageObject::getText(std::unique_ptr<PDFiumTextPage> const& pTextPage)
+{
+    OUString sReturnText;
+
+    const int nBytes = FPDFTextObj_GetText(mpPageObject, pTextPage->getPointer(), nullptr, 0);
+
+    std::unique_ptr<sal_Unicode[]> pText(new sal_Unicode[nBytes]);
+
+    const int nActualBytes
+        = FPDFTextObj_GetText(mpPageObject, pTextPage->getPointer(), pText.get(), nBytes);
+    if (nActualBytes > 2)
+        sReturnText = OUString(pText.get());
+
+    return sReturnText;
+}
+
+int PDFiumPageObject::getType() { return FPDFPageObj_GetType(mpPageObject); }
+
 PDFiumAnnotation::PDFiumAnnotation(FPDF_ANNOTATION pAnnotation)
     : mpAnnotation(pAnnotation)
 {
@@ -239,6 +289,18 @@ std::unique_ptr<PDFiumAnnotation> PDFiumAnnotation::getLinked(OString const& rKe
     }
     return pPDFiumAnnotation;
 }
+
+PDFiumTextPage::PDFiumTextPage(FPDF_TEXTPAGE pTextPage)
+    : mpTextPage(pTextPage)
+{
+}
+
+PDFiumTextPage::~PDFiumTextPage()
+{
+    if (mpTextPage)
+        FPDFText_ClosePage(mpTextPage);
+}
+
 } // end vcl::pdf
 
 #endif // HAVE_FEATURE_PDFIUM


More information about the Libreoffice-commits mailing list