[Libreoffice-commits] core.git: vcl/qa vcl/source

Tomaž Vajngerl (via logerrit) logerrit at kemper.freedesktop.org
Sat Apr 4 14:27:44 UTC 2020


 vcl/qa/cppunit/BitmapFilterTest.cxx         |    5 
 vcl/source/bitmap/BitmapFilterStackBlur.cxx |  278 ++++++++++++++++++++--------
 2 files changed, 208 insertions(+), 75 deletions(-)

New commits:
commit 3c1e746e479b89dd5c566c0a950dba23e0a2074e
Author:     Tomaž Vajngerl <tomaz.vajngerl at collabora.co.uk>
AuthorDate: Sat Apr 4 13:52:24 2020 +0200
Commit:     Tomaž Vajngerl <quikee at gmail.com>
CommitDate: Sat Apr 4 16:27:10 2020 +0200

    vcl: Parallelize BitmapFilterStackBlur
    
    Performance increase from ~3100ms to ~1400ms on 4c/8t CPU.
    
    Change-Id: Ic057c3fafc3cf6f0f90430ca431db569be08a133
    Reviewed-on: https://gerrit.libreoffice.org/c/core/+/91684
    Tested-by: Jenkins
    Reviewed-by: Tomaž Vajngerl <quikee at gmail.com>

diff --git a/vcl/qa/cppunit/BitmapFilterTest.cxx b/vcl/qa/cppunit/BitmapFilterTest.cxx
index 732c0c2dfb0b..a28057a4bf57 100644
--- a/vcl/qa/cppunit/BitmapFilterTest.cxx
+++ b/vcl/qa/cppunit/BitmapFilterTest.cxx
@@ -129,10 +129,11 @@ void BitmapFilterTest::testPerformance()
 
     int nIterations = 10;
     auto start = std::chrono::high_resolution_clock::now();
+    Bitmap aResult;
     for (int i = 0; i < nIterations; i++)
     {
         BitmapFilterStackBlur aBlurFilter(250, false); // don't extend the image
-        aBlurFilter.filter(aBigBitmap);
+        aResult = aBlurFilter.filter(aBigBitmap);
     }
     auto end = std::chrono::high_resolution_clock::now();
     auto elapsed = (end - start) / nIterations;
@@ -142,7 +143,7 @@ void BitmapFilterTest::testPerformance()
         std::unique_ptr<SvFileStream> pStream(
             new SvFileStream("~/BlurBigPerformance.png", StreamMode::WRITE | StreamMode::TRUNC));
         GraphicFilter& rFilter = GraphicFilter::GetGraphicFilter();
-        rFilter.compressAsPNG(aBigBitmap, *pStream);
+        rFilter.compressAsPNG(aResult, *pStream);
 
         pStream.reset(new SvFileStream("~/BlurBigPerformance.txt", StreamMode::WRITE));
         pStream->WriteOString("Blur average time: ");
diff --git a/vcl/source/bitmap/BitmapFilterStackBlur.cxx b/vcl/source/bitmap/BitmapFilterStackBlur.cxx
index 9629cc079c00..adac14248753 100644
--- a/vcl/source/bitmap/BitmapFilterStackBlur.cxx
+++ b/vcl/source/bitmap/BitmapFilterStackBlur.cxx
@@ -11,6 +11,9 @@
 #include <vcl/BitmapFilterStackBlur.hxx>
 #include <vcl/bitmapaccess.hxx>
 #include <bitmapwriteaccess.hxx>
+#include <sal/log.hxx>
+
+#include <comphelper/threadpool.hxx>
 
 namespace
 {
@@ -48,9 +51,30 @@ static const sal_Int16 constShiftTable[255]
 class BlurSharedData
 {
 public:
+    BitmapReadAccess* mpReadAccess;
+    BitmapWriteAccess* mpWriteAccess;
     long mnRadius;
     long mnComponentWidth;
     long mnDiv;
+    long mnColorChannels;
+
+    BlurSharedData(BitmapReadAccess* pReadAccess, BitmapWriteAccess* pWriteAccess, long aRadius,
+                   long nComponentWidth, long nColorChannels)
+        : mpReadAccess(pReadAccess)
+        , mpWriteAccess(pWriteAccess)
+        , mnRadius(aRadius)
+        , mnComponentWidth(nComponentWidth)
+        , mnDiv(aRadius + aRadius + 1)
+        , mnColorChannels(nColorChannels)
+    {
+    }
+};
+
+class BlurArrays
+{
+public:
+    BlurSharedData maShared;
+
     std::vector<sal_uInt8> maStackBuffer;
     std::vector<long> maPositionTable;
     std::vector<long> maWeightTable;
@@ -59,31 +83,54 @@ public:
     std::vector<long> mnInSumVector;
     std::vector<long> mnOutSumVector;
 
-    BlurSharedData(long aRadius, long nComponentWidth, long nColorChannels)
-        : mnRadius(aRadius)
-        , mnComponentWidth(nComponentWidth)
-        , mnDiv(aRadius + aRadius + 1)
-        , maStackBuffer(mnDiv * mnComponentWidth)
-        , maPositionTable(mnDiv)
-        , maWeightTable(mnDiv)
-        , mnSumVector(nColorChannels)
-        , mnInSumVector(nColorChannels)
-        , mnOutSumVector(nColorChannels)
+    BlurArrays(BlurSharedData const& rShared)
+        : maShared(rShared)
+        , maStackBuffer(maShared.mnDiv * maShared.mnComponentWidth)
+        , maPositionTable(maShared.mnDiv)
+        , maWeightTable(maShared.mnDiv)
+        , mnSumVector(maShared.mnColorChannels)
+        , mnInSumVector(maShared.mnColorChannels)
+        , mnOutSumVector(maShared.mnColorChannels)
     {
     }
 
-    void calculateWeightAndPositions(long nLastIndex)
+    void initializeWeightAndPositions(long nLastIndex)
     {
-        for (long i = 0; i < mnDiv; i++)
+        for (long i = 0; i < maShared.mnDiv; i++)
         {
-            maPositionTable[i] = std::min(nLastIndex, std::max(0L, i - mnRadius));
-            maWeightTable[i] = mnRadius + 1 - std::abs(i - mnRadius);
+            maPositionTable[i] = std::min(nLastIndex, std::max(0L, i - maShared.mnRadius));
+            maWeightTable[i] = maShared.mnRadius + 1 - std::abs(i - maShared.mnRadius);
         }
     }
 
-    long getMultiplyValue() { return static_cast<long>(constMultiplyTable[mnRadius]); }
+    long getMultiplyValue() { return static_cast<long>(constMultiplyTable[maShared.mnRadius]); }
+
+    long getShiftValue() { return static_cast<long>(constShiftTable[maShared.mnRadius]); }
+};
+
+typedef void (*BlurRangeFn)(BlurSharedData const& rShared, long nStartY, long nEndY);
+
+constexpr long constBlurThreadStrip = 16;
+
+class BlurTask : public comphelper::ThreadTask
+{
+    BlurRangeFn mpBlurFunction;
+    BlurSharedData& mrShared;
+    long mnStartY;
+    long mnEndY;
+
+public:
+    explicit BlurTask(const std::shared_ptr<comphelper::ThreadTaskTag>& pTag,
+                      BlurRangeFn pBlurFunction, BlurSharedData& rShared, long nStartY, long nEndY)
+        : comphelper::ThreadTask(pTag)
+        , mpBlurFunction(pBlurFunction)
+        , mrShared(rShared)
+        , mnStartY(nStartY)
+        , mnEndY(nEndY)
+    {
+    }
 
-    long getShiftValue() { return static_cast<long>(constShiftTable[mnRadius]); }
+    virtual void doWork() override { mpBlurFunction(mrShared, mnStartY, mnEndY); }
 };
 
 struct SumFunction24
@@ -171,19 +218,21 @@ struct SumFunction8
 };
 
 template <typename SumFunction>
-void stackBlurHorizontal(BitmapReadAccess* pReadAccess, BitmapWriteAccess* pWriteAccess,
-                         BlurSharedData& rShared)
+void stackBlurHorizontal(BlurSharedData const& rShared, long nStart, long nEnd)
 {
-    long nWidth = pReadAccess->Width();
-    long nHeight = pReadAccess->Height();
+    BitmapReadAccess* pReadAccess = rShared.mpReadAccess;
+    BitmapWriteAccess* pWriteAccess = rShared.mpWriteAccess;
 
-    sal_uInt8* pStack = rShared.maStackBuffer.data();
+    BlurArrays aArrays(rShared);
+
+    sal_uInt8* pStack = aArrays.maStackBuffer.data();
     sal_uInt8* pStackPtr;
 
+    long nWidth = pReadAccess->Width();
     long nLastIndexX = nWidth - 1;
 
-    long nMultiplyValue = rShared.getMultiplyValue();
-    long nShiftValue = rShared.getShiftValue();
+    long nMultiplyValue = aArrays.getMultiplyValue();
+    long nShiftValue = aArrays.getShiftValue();
 
     long nRadius = rShared.mnRadius;
     long nComponentWidth = rShared.mnComponentWidth;
@@ -197,15 +246,16 @@ void stackBlurHorizontal(BitmapReadAccess* pReadAccess, BitmapWriteAccess* pWrit
     long nStackIndexStart;
     long nWeight;
 
-    long* nSum = rShared.mnSumVector.data();
-    long* nInSum = rShared.mnInSumVector.data();
-    long* nOutSum = rShared.mnOutSumVector.data();
+    aArrays.initializeWeightAndPositions(nLastIndexX);
+
+    long* nSum = aArrays.mnSumVector.data();
+    long* nInSum = aArrays.mnInSumVector.data();
+    long* nOutSum = aArrays.mnOutSumVector.data();
 
-    rShared.calculateWeightAndPositions(nLastIndexX);
-    long* pPositionPointer = rShared.maPositionTable.data();
-    long* pWeightPointer = rShared.maWeightTable.data();
+    long* pPositionPointer = aArrays.maPositionTable.data();
+    long* pWeightPointer = aArrays.maWeightTable.data();
 
-    for (long y = 0; y < nHeight; y++)
+    for (long y = nStart; y <= nEnd; y++)
     {
         SumFunction::set(nSum, 0L);
         SumFunction::set(nInSum, 0L);
@@ -282,19 +332,21 @@ void stackBlurHorizontal(BitmapReadAccess* pReadAccess, BitmapWriteAccess* pWrit
 }
 
 template <typename SumFunction>
-void stackBlurVertical(BitmapReadAccess* pReadAccess, BitmapWriteAccess* pWriteAccess,
-                       BlurSharedData& rShared)
+void stackBlurVertical(BlurSharedData const& rShared, long nStart, long nEnd)
 {
-    long nWidth = pReadAccess->Width();
-    long nHeight = pReadAccess->Height();
+    BitmapReadAccess* pReadAccess = rShared.mpReadAccess;
+    BitmapWriteAccess* pWriteAccess = rShared.mpWriteAccess;
+
+    BlurArrays aArrays(rShared);
 
-    sal_uInt8* pStack = rShared.maStackBuffer.data();
+    sal_uInt8* pStack = aArrays.maStackBuffer.data();
     sal_uInt8* pStackPtr;
 
+    long nHeight = pReadAccess->Height();
     long nLastIndexY = nHeight - 1;
 
-    long nMultiplyValue = rShared.getMultiplyValue();
-    long nShiftValue = rShared.getShiftValue();
+    long nMultiplyValue = aArrays.getMultiplyValue();
+    long nShiftValue = aArrays.getShiftValue();
 
     long nRadius = rShared.mnRadius;
     long nComponentWidth = rShared.mnComponentWidth;
@@ -308,15 +360,15 @@ void stackBlurVertical(BitmapReadAccess* pReadAccess, BitmapWriteAccess* pWriteA
     long nStackIndexStart;
     long nWeight;
 
-    long* nSum = rShared.mnSumVector.data();
-    long* nInSum = rShared.mnInSumVector.data();
-    long* nOutSum = rShared.mnOutSumVector.data();
+    aArrays.initializeWeightAndPositions(nLastIndexY);
 
-    rShared.calculateWeightAndPositions(nLastIndexY);
-    long* pPositionPointer = rShared.maPositionTable.data();
-    long* pWeightPointer = rShared.maWeightTable.data();
+    long* nSum = aArrays.mnSumVector.data();
+    long* nInSum = aArrays.mnInSumVector.data();
+    long* nOutSum = aArrays.mnOutSumVector.data();
+    long* pPositionPointer = aArrays.maPositionTable.data();
+    long* pWeightPointer = aArrays.maWeightTable.data();
 
-    for (long x = 0; x < nWidth; x++)
+    for (long x = nStart; x <= nEnd; x++)
     {
         SumFunction::set(nSum, 0L);
         SumFunction::set(nInSum, 0L);
@@ -373,9 +425,7 @@ void stackBlurVertical(BitmapReadAccess* pReadAccess, BitmapWriteAccess* pWriteA
             }
 
             SumFunction::assignPtr(pStackPtr, pSourcePointer);
-
             SumFunction::add(nInSum, pSourcePointer);
-
             SumFunction::add(nSum, nInSum);
 
             nStackIndex++;
@@ -387,54 +437,136 @@ void stackBlurVertical(BitmapReadAccess* pReadAccess, BitmapWriteAccess* pWriteA
             pStackPtr = &pStack[nStackIndex * nComponentWidth];
 
             SumFunction::add(nOutSum, pStackPtr);
-
             SumFunction::sub(nInSum, pStackPtr);
         }
     }
 }
 
+void runStackBlur(Bitmap& rBitmap, const long nRadius, const long nComponentWidth,
+                  const long nColorChannels, BlurRangeFn pBlurHorizontalFn,
+                  BlurRangeFn pBlurVerticalFn, const bool bParallel)
+{
+    if (bParallel)
+    {
+        try
+        {
+            comphelper::ThreadPool& rShared = comphelper::ThreadPool::getSharedOptimalPool();
+            std::shared_ptr<comphelper::ThreadTaskTag> pTag
+                = comphelper::ThreadPool::createThreadTaskTag();
+
+            {
+                Bitmap::ScopedReadAccess pReadAccess(rBitmap);
+                BitmapScopedWriteAccess pWriteAccess(rBitmap);
+
+                BlurSharedData aSharedData(pReadAccess.get(), pWriteAccess.get(), nRadius,
+                                           nComponentWidth, nColorChannels);
+
+                const Size aSize = rBitmap.GetSizePixel();
+                long nEnd = aSize.Height() - 1;
+
+                long nStripStart = 0;
+                long nStripEnd = nStripStart + constBlurThreadStrip - 1;
+
+                while (nStripEnd < nEnd)
+                {
+                    std::unique_ptr<BlurTask> pTask(
+                        new BlurTask(pTag, pBlurHorizontalFn, aSharedData, nStripStart, nStripEnd));
+                    rShared.pushTask(std::move(pTask));
+                    nStripStart += constBlurThreadStrip;
+                    nStripEnd += constBlurThreadStrip;
+                }
+                if (nStripStart <= nEnd)
+                {
+                    std::unique_ptr<BlurTask> pTask(
+                        new BlurTask(pTag, pBlurHorizontalFn, aSharedData, nStripStart, nEnd));
+                    rShared.pushTask(std::move(pTask));
+                }
+                rShared.waitUntilDone(pTag);
+            }
+            {
+                Bitmap::ScopedReadAccess pReadAccess(rBitmap);
+                BitmapScopedWriteAccess pWriteAccess(rBitmap);
+
+                BlurSharedData aSharedData(pReadAccess.get(), pWriteAccess.get(), nRadius,
+                                           nComponentWidth, nColorChannels);
+
+                const Size aSize = rBitmap.GetSizePixel();
+                long nEnd = aSize.Width() - 1;
+
+                long nStripStart = 0;
+                long nStripEnd = nStripStart + constBlurThreadStrip - 1;
+
+                while (nStripEnd < nEnd)
+                {
+                    std::unique_ptr<BlurTask> pTask(
+                        new BlurTask(pTag, pBlurVerticalFn, aSharedData, nStripStart, nStripEnd));
+                    rShared.pushTask(std::move(pTask));
+                    nStripStart += constBlurThreadStrip;
+                    nStripEnd += constBlurThreadStrip;
+                }
+                if (nStripStart <= nEnd)
+                {
+                    std::unique_ptr<BlurTask> pTask(
+                        new BlurTask(pTag, pBlurVerticalFn, aSharedData, nStripStart, nEnd));
+                    rShared.pushTask(std::move(pTask));
+                }
+                rShared.waitUntilDone(pTag);
+            }
+        }
+        catch (...)
+        {
+            SAL_WARN("vcl.gdi", "threaded bitmap blurring failed");
+        }
+    }
+    else
+    {
+        {
+            Bitmap::ScopedReadAccess pReadAccess(rBitmap);
+            BitmapScopedWriteAccess pWriteAccess(rBitmap);
+            BlurSharedData aSharedData(pReadAccess.get(), pWriteAccess.get(), nRadius,
+                                       nComponentWidth, nColorChannels);
+            long nFirstIndex = 0;
+            long nLastIndex = pReadAccess->Height() - 1;
+            pBlurHorizontalFn(aSharedData, nFirstIndex, nLastIndex);
+        }
+        {
+            Bitmap::ScopedReadAccess pReadAccess(rBitmap);
+            BitmapScopedWriteAccess pWriteAccess(rBitmap);
+            BlurSharedData aSharedData(pReadAccess.get(), pWriteAccess.get(), nRadius,
+                                       nComponentWidth, nColorChannels);
+            long nFirstIndex = 0;
+            long nLastIndex = pReadAccess->Width() - 1;
+            pBlurVerticalFn(aSharedData, nFirstIndex, nLastIndex);
+        }
+    }
+}
+
 void stackBlur24(Bitmap& rBitmap, sal_Int32 nRadius, sal_Int32 nComponentWidth)
 {
+    const bool bParallel = true;
     // Limit radius
     nRadius = std::clamp<sal_Int32>(nRadius, 2, 254);
     const long nColorChannels = 3; // 3 color channel
-    BlurSharedData aData(nRadius, nComponentWidth, nColorChannels);
 
-    {
-        Bitmap::ScopedReadAccess pReadAccess(rBitmap);
-        BitmapScopedWriteAccess pWriteAccess(rBitmap);
-
-        stackBlurHorizontal<SumFunction24>(pReadAccess.get(), pWriteAccess.get(), aData);
-    }
+    BlurRangeFn pBlurHorizontalFn = stackBlurHorizontal<SumFunction24>;
+    BlurRangeFn pBlurVerticalFn = stackBlurVertical<SumFunction24>;
 
-    {
-        Bitmap::ScopedReadAccess pReadAccess(rBitmap);
-        BitmapScopedWriteAccess pWriteAccess(rBitmap);
-
-        stackBlurVertical<SumFunction24>(pReadAccess.get(), pWriteAccess.get(), aData);
-    }
+    runStackBlur(rBitmap, nRadius, nComponentWidth, nColorChannels, pBlurHorizontalFn,
+                 pBlurVerticalFn, bParallel);
 }
 
 void stackBlur8(Bitmap& rBitmap, sal_Int32 nRadius, sal_Int32 nComponentWidth)
 {
+    const bool bParallel = true;
     // Limit radius
     nRadius = std::clamp<sal_Int32>(nRadius, 2, 254);
     const long nColorChannels = 1; // 1 color channel
-    BlurSharedData aData(nRadius, nComponentWidth, nColorChannels);
-
-    {
-        Bitmap::ScopedReadAccess pReadAccess(rBitmap);
-        BitmapScopedWriteAccess pWriteAccess(rBitmap);
 
-        stackBlurHorizontal<SumFunction8>(pReadAccess.get(), pWriteAccess.get(), aData);
-    }
+    BlurRangeFn pBlurHorizontalFn = stackBlurHorizontal<SumFunction8>;
+    BlurRangeFn pBlurVerticalFn = stackBlurVertical<SumFunction8>;
 
-    {
-        Bitmap::ScopedReadAccess pReadAccess(rBitmap);
-        BitmapScopedWriteAccess pWriteAccess(rBitmap);
-
-        stackBlurVertical<SumFunction8>(pReadAccess.get(), pWriteAccess.get(), aData);
-    }
+    runStackBlur(rBitmap, nRadius, nComponentWidth, nColorChannels, pBlurHorizontalFn,
+                 pBlurVerticalFn, bParallel);
 }
 
 void centerExtendBitmap(Bitmap& rBitmap, sal_Int32 nExtendSize, Color aColor)


More information about the Libreoffice-commits mailing list