[Libreoffice-commits] core.git: include/vcl vcl/CppunitTest_vcl_graphic_test.mk vcl/Library_vcl.mk vcl/qa vcl/source

Tomaž Vajngerl (via logerrit) logerrit at kemper.freedesktop.org
Mon Jun 1 08:44:29 UTC 2020


 include/vcl/VectorGraphicSearch.hxx        |   39 ++++++
 vcl/CppunitTest_vcl_graphic_test.mk        |    7 -
 vcl/Library_vcl.mk                         |    1 
 vcl/qa/cppunit/VectorGraphicSearchTest.cxx |   50 ++++++++
 vcl/qa/cppunit/data/Pangram.pdf            |binary
 vcl/source/graphic/VectorGraphicSearch.cxx |  168 +++++++++++++++++++++++++++++
 6 files changed, 262 insertions(+), 3 deletions(-)

New commits:
commit efba780d6155317b592b6f5f73945a7851ec4d3b
Author:     Tomaž Vajngerl <tomaz.vajngerl at collabora.co.uk>
AuthorDate: Thu May 7 22:01:22 2020 +0200
Commit:     Tomaž Vajngerl <quikee at gmail.com>
CommitDate: Mon Jun 1 10:43:46 2020 +0200

    vcl: VectorGraphicSearch - for searching text inside PDF
    
    Change-Id: Iee940a3927330c8739774ff3c1af15998f89193b
    Reviewed-on: https://gerrit.libreoffice.org/c/core/+/95254
    Tested-by: Tomaž Vajngerl <quikee at gmail.com>
    Reviewed-by: Tomaž Vajngerl <quikee at gmail.com>

diff --git a/include/vcl/VectorGraphicSearch.hxx b/include/vcl/VectorGraphicSearch.hxx
new file mode 100644
index 000000000000..3411d0a931e6
--- /dev/null
+++ b/include/vcl/VectorGraphicSearch.hxx
@@ -0,0 +1,39 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ *
+ */
+
+#pragma once
+
+#include <vcl/graph.hxx>
+#include <vcl/vectorgraphicdata.hxx>
+#include <vcl/dllapi.h>
+
+#include <fpdf_doc.h>
+
+#include <memory>
+
+class SearchContext;
+
+class VCL_DLLPUBLIC VectorGraphicSearch final
+{
+private:
+    Graphic maGraphic;
+    FPDF_DOCUMENT mpPdfDocument;
+    std::unique_ptr<SearchContext> mpSearchContext;
+    bool searchPDF(std::shared_ptr<VectorGraphicData> const& rData, OUString const& rSearchString);
+
+public:
+    VectorGraphicSearch(Graphic const& rGraphic);
+    ~VectorGraphicSearch();
+    bool search(OUString const& rSearchString);
+    bool next();
+    int index();
+};
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/vcl/CppunitTest_vcl_graphic_test.mk b/vcl/CppunitTest_vcl_graphic_test.mk
index 353d054e1ba7..2f2c61735ef8 100644
--- a/vcl/CppunitTest_vcl_graphic_test.mk
+++ b/vcl/CppunitTest_vcl_graphic_test.mk
@@ -14,11 +14,12 @@ $(eval $(call gb_CppunitTest_add_exception_objects,vcl_graphic_test, \
     vcl/qa/cppunit/GraphicDescriptorTest \
     vcl/qa/cppunit/GraphicFormatDetectorTest \
     vcl/qa/cppunit/GraphicNativeMetadataTest \
+    vcl/qa/cppunit/VectorGraphicSearchTest \
 ))
 
-$(eval $(call gb_CppunitTest_use_externals,vcl_graphic_test,\
-	boost_headers \
-	glm_headers \
+$(eval $(call gb_CppunitTest_use_externals,vcl_graphic_test, \
+    boost_headers \
+    $(if $(filter PDFIUM,$(BUILD_TYPE)),pdfium) \
 ))
 ifeq ($(TLS),NSS)
 $(eval $(call gb_CppunitTest_use_externals,vcl_graphic_test,\
diff --git a/vcl/Library_vcl.mk b/vcl/Library_vcl.mk
index 0e861442218f..ccbe52b2902c 100644
--- a/vcl/Library_vcl.mk
+++ b/vcl/Library_vcl.mk
@@ -330,6 +330,7 @@ $(eval $(call gb_Library_add_exception_objects,vcl,\
     vcl/source/graphic/UnoGraphicObject \
     vcl/source/graphic/UnoGraphicProvider \
     vcl/source/graphic/UnoGraphicTransformer \
+    vcl/source/graphic/VectorGraphicSearch \
     vcl/source/bitmap/bitmap \
     vcl/source/bitmap/bitmapfilter \
     vcl/source/bitmap/BitmapAlphaClampFilter \
diff --git a/vcl/qa/cppunit/VectorGraphicSearchTest.cxx b/vcl/qa/cppunit/VectorGraphicSearchTest.cxx
new file mode 100644
index 000000000000..0ed21ccf9e26
--- /dev/null
+++ b/vcl/qa/cppunit/VectorGraphicSearchTest.cxx
@@ -0,0 +1,50 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ */
+
+#include <cppunit/TestAssert.h>
+#include <cppunit/extensions/HelperMacros.h>
+#include <unotest/bootstrapfixturebase.hxx>
+#include <unotest/directories.hxx>
+
+#include <vcl/VectorGraphicSearch.hxx>
+#include <vcl/graph.hxx>
+#include <vcl/graphicfilter.hxx>
+#include <tools/stream.hxx>
+
+class VectorGraphicSearchTest : public test::BootstrapFixtureBase
+{
+    OUString getFullUrl(const OUString& sFileName)
+    {
+        return m_directories.getURLFromSrc("/vcl/qa/cppunit/data/") + sFileName;
+    }
+
+    void test();
+
+    CPPUNIT_TEST_SUITE(VectorGraphicSearchTest);
+    CPPUNIT_TEST(test);
+    CPPUNIT_TEST_SUITE_END();
+};
+
+void VectorGraphicSearchTest::test()
+{
+    OUString aURL = getFullUrl("Pangram.pdf");
+    SvFileStream aStream(aURL, StreamMode::READ);
+    GraphicFilter& rGraphicFilter = GraphicFilter::GetGraphicFilter();
+    Graphic aGraphic = rGraphicFilter.ImportUnloadedGraphic(aStream);
+    aGraphic.makeAvailable();
+
+    VectorGraphicSearch aSearch(aGraphic);
+    CPPUNIT_ASSERT_EQUAL(true, aSearch.search("lazy"));
+    CPPUNIT_ASSERT_EQUAL(true, aSearch.next());
+    CPPUNIT_ASSERT_EQUAL(34, aSearch.index());
+}
+
+CPPUNIT_TEST_SUITE_REGISTRATION(VectorGraphicSearchTest);
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/vcl/qa/cppunit/data/Pangram.pdf b/vcl/qa/cppunit/data/Pangram.pdf
new file mode 100644
index 000000000000..0714fda4e4dd
Binary files /dev/null and b/vcl/qa/cppunit/data/Pangram.pdf differ
diff --git a/vcl/source/graphic/VectorGraphicSearch.cxx b/vcl/source/graphic/VectorGraphicSearch.cxx
new file mode 100644
index 000000000000..864c65f2dda2
--- /dev/null
+++ b/vcl/source/graphic/VectorGraphicSearch.cxx
@@ -0,0 +1,168 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ *
+ */
+
+#include <sal/config.h>
+#include <vcl/VectorGraphicSearch.hxx>
+
+#include <fpdf_text.h>
+
+class SearchContext
+{
+public:
+    bool bInitialized = false;
+
+    FPDF_DOCUMENT mpPdfDocument;
+    sal_Int32 mnPageIndex;
+    FPDF_PAGE mpPage;
+    FPDF_TEXTPAGE mpTextPage;
+    OUString maSearchString;
+    FPDF_SCHHANDLE mpSearchHandle;
+
+    SearchContext(FPDF_DOCUMENT pPdfDocument, sal_Int32 nPageIndex, OUString const& rSearchString)
+        : mpPdfDocument(pPdfDocument)
+        , mnPageIndex(nPageIndex)
+        , mpPage(nullptr)
+        , mpTextPage(nullptr)
+        , maSearchString(rSearchString)
+        , mpSearchHandle(nullptr)
+    {
+    }
+
+    ~SearchContext()
+    {
+        if (mpSearchHandle)
+            FPDFText_FindClose(mpSearchHandle);
+        if (mpTextPage)
+            FPDFText_ClosePage(mpTextPage);
+        if (mpPage)
+            FPDF_ClosePage(mpPage);
+    }
+
+    bool initialize()
+    {
+        if (!mpPdfDocument)
+            return false;
+        mpPage = FPDF_LoadPage(mpPdfDocument, mnPageIndex);
+        if (!mpPage)
+            return false;
+        mpTextPage = FPDFText_LoadPage(mpPage);
+        if (!mpTextPage)
+            return false;
+
+        FPDF_WIDESTRING pString = reinterpret_cast<FPDF_WIDESTRING>(maSearchString.getStr());
+        mpSearchHandle = FPDFText_FindStart(mpTextPage, pString, 0, 0);
+
+        return mpSearchHandle != nullptr;
+    }
+
+    bool next()
+    {
+        if (mpSearchHandle)
+            return FPDFText_FindNext(mpSearchHandle);
+        return false;
+    }
+
+    int index()
+    {
+        if (mpSearchHandle)
+            return FPDFText_GetSchResultIndex(mpSearchHandle);
+        return -1;
+    }
+};
+
+VectorGraphicSearch::VectorGraphicSearch(Graphic const& rGraphic)
+    : maGraphic(rGraphic)
+    , mpPdfDocument(nullptr)
+{
+    FPDF_LIBRARY_CONFIG aConfig;
+    aConfig.version = 2;
+    aConfig.m_pUserFontPaths = nullptr;
+    aConfig.m_pIsolate = nullptr;
+    aConfig.m_v8EmbedderSlot = 0;
+    FPDF_InitLibraryWithConfig(&aConfig);
+}
+
+VectorGraphicSearch::~VectorGraphicSearch()
+{
+    mpSearchContext.reset();
+
+    if (mpPdfDocument)
+        FPDF_CloseDocument(mpPdfDocument);
+    FPDF_DestroyLibrary();
+}
+
+bool VectorGraphicSearch::search(OUString const& rSearchString)
+{
+    auto pData = maGraphic.getVectorGraphicData();
+
+    if (pData && pData->getVectorGraphicDataType() == VectorGraphicDataType::Pdf)
+    {
+        return searchPDF(pData, rSearchString);
+    }
+    return false;
+}
+
+bool VectorGraphicSearch::searchPDF(std::shared_ptr<VectorGraphicData> const& rData,
+                                    OUString const& rSearchString)
+{
+    if (rSearchString.isEmpty())
+        return false;
+
+    mpPdfDocument
+        = FPDF_LoadMemDocument(rData->getVectorGraphicDataArray().getConstArray(),
+                               rData->getVectorGraphicDataArrayLength(), /*password=*/nullptr);
+
+    if (!mpPdfDocument)
+    {
+        //TODO: Handle failure to load.
+        switch (FPDF_GetLastError())
+        {
+            case FPDF_ERR_SUCCESS:
+                break;
+            case FPDF_ERR_UNKNOWN:
+                break;
+            case FPDF_ERR_FILE:
+                break;
+            case FPDF_ERR_FORMAT:
+                break;
+            case FPDF_ERR_PASSWORD:
+                break;
+            case FPDF_ERR_SECURITY:
+                break;
+            case FPDF_ERR_PAGE:
+                break;
+            default:
+                break;
+        }
+        return false;
+    }
+
+    sal_Int32 nPageIndex = std::max(rData->getPageIndex(), 0);
+
+    mpSearchContext.reset(new SearchContext(mpPdfDocument, nPageIndex, rSearchString));
+
+    return mpSearchContext->initialize();
+}
+
+bool VectorGraphicSearch::next()
+{
+    if (mpSearchContext)
+        return mpSearchContext->next();
+    return false;
+}
+
+int VectorGraphicSearch::index()
+{
+    if (mpSearchContext)
+        return mpSearchContext->index();
+    return -1;
+}
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */


More information about the Libreoffice-commits mailing list