[Libreoffice-commits] core.git: comphelper/CppunitTest_comphelper_parallelsort_test.mk comphelper/Module_comphelper.mk comphelper/qa include/comphelper sc/source

Dennis Francis (via logerrit) logerrit at kemper.freedesktop.org
Tue Oct 1 16:12:30 UTC 2019


 comphelper/CppunitTest_comphelper_parallelsort_test.mk |   30 +
 comphelper/Module_comphelper.mk                        |    1 
 comphelper/qa/unit/parallelsorttest.cxx                |  101 ++++
 include/comphelper/parallelsort.hxx                    |  373 +++++++++++++++++
 sc/source/core/data/dpcache.cxx                        |    8 
 5 files changed, 510 insertions(+), 3 deletions(-)

New commits:
commit 46d0afba738d8ee7c9b63384fef513f42ee587f3
Author:     Dennis Francis <dennis.francis at collabora.com>
AuthorDate: Tue Jul 9 23:07:23 2019 +0530
Commit:     Dennis Francis <dennis.francis at collabora.com>
CommitDate: Tue Oct 1 18:11:36 2019 +0200

    Implement parallel version of super-scalar-sample-sort...
    
    and use it for the pivot table construction routine processBuckets().
    
    The implementation uses ideas from the non-parallel sample sort discussed in the below paper,
    but parallelizes the "binning"/"classification" operations and the sorting of the bins
    themselves.
    
    Sanders, Peter, and Sebastian Winkel. "Super scalar sample sort."
    European Symposium on Algorithms. Springer, Berlin, Heidelberg, 2004.
    
    which can be accessed at :
    http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.72.366&rep=rep1&type=pdf
    
    Change-Id: I3723b87e2feb8d7d9ee03f71f6025e26add914ce
    Reviewed-on: https://gerrit.libreoffice.org/79486
    Tested-by: Jenkins
    Reviewed-by: Luboš Luňák <l.lunak at collabora.com>

diff --git a/comphelper/CppunitTest_comphelper_parallelsort_test.mk b/comphelper/CppunitTest_comphelper_parallelsort_test.mk
new file mode 100644
index 000000000000..e1d2eab9ec36
--- /dev/null
+++ b/comphelper/CppunitTest_comphelper_parallelsort_test.mk
@@ -0,0 +1,30 @@
+# -*- Mode: makefile-gmake; tab-width: 4; indent-tabs-mode: t -*-
+#
+# This file is part of the LibreOffice project.
+#
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+#
+
+$(eval $(call gb_CppunitTest_CppunitTest,comphelper_parallelsort_test))
+
+$(eval $(call gb_CppunitTest_add_exception_objects,comphelper_parallelsort_test, \
+    comphelper/qa/unit/parallelsorttest \
+))
+
+$(eval $(call gb_CppunitTest_use_externals,comphelper_parallelsort_test,\
+	boost_headers \
+))
+
+$(eval $(call gb_CppunitTest_use_sdk_api,comphelper_parallelsort_test))
+
+$(eval $(call gb_CppunitTest_use_libraries,comphelper_parallelsort_test, \
+    comphelper \
+    cppuhelper \
+    cppu \
+    sal \
+    tl \
+))
+
+# vim: set noet sw=4 ts=4:
\ No newline at end of file
diff --git a/comphelper/Module_comphelper.mk b/comphelper/Module_comphelper.mk
index 30ac708a927d..7541a59f1641 100644
--- a/comphelper/Module_comphelper.mk
+++ b/comphelper/Module_comphelper.mk
@@ -30,6 +30,7 @@ $(eval $(call gb_Module_add_subsequentcheck_targets,comphelper,\
 ))
 
 $(eval $(call gb_Module_add_check_targets,comphelper,\
+    CppunitTest_comphelper_parallelsort_test \
     CppunitTest_comphelper_threadpool_test \
     CppunitTest_comphelper_syntaxhighlight_test \
     CppunitTest_comphelper_variadictemplates_test \
diff --git a/comphelper/qa/unit/parallelsorttest.cxx b/comphelper/qa/unit/parallelsorttest.cxx
new file mode 100644
index 000000000000..90dcb3c235ba
--- /dev/null
+++ b/comphelper/qa/unit/parallelsorttest.cxx
@@ -0,0 +1,101 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ */
+
+#include <comphelper/parallelsort.hxx>
+#include <comphelper/threadpool.hxx>
+#include <rtl/string.hxx>
+#include <cppunit/TestAssert.h>
+#include <cppunit/TestFixture.h>
+#include <cppunit/extensions/HelperMacros.h>
+#include <cppunit/plugin/TestPlugIn.h>
+
+#include <cstdlib>
+#include <vector>
+#include <algorithm>
+#include <random>
+
+class ParallelSortTest : public CppUnit::TestFixture
+{
+public:
+    void testSortTiny();
+    void testSortMedium();
+    void testSortBig();
+
+    virtual void setUp() override;
+    virtual void tearDown() override;
+
+    CPPUNIT_TEST_SUITE(ParallelSortTest);
+    CPPUNIT_TEST(testSortTiny);
+    CPPUNIT_TEST(testSortMedium);
+    CPPUNIT_TEST(testSortBig);
+    CPPUNIT_TEST_SUITE_END();
+
+private:
+    void sortTest(size_t nLen);
+    void fillRandomUptoN(std::vector<size_t>& rVector, size_t N);
+
+    comphelper::ThreadPool* pThreadPool;
+    size_t mnThreads;
+};
+
+void ParallelSortTest::setUp()
+{
+    pThreadPool = &comphelper::ThreadPool::getSharedOptimalPool();
+    mnThreads = pThreadPool->getWorkerCount();
+}
+
+void ParallelSortTest::tearDown()
+{
+    if (pThreadPool)
+        pThreadPool->joinAll();
+}
+
+void ParallelSortTest::fillRandomUptoN(std::vector<size_t>& rVector, size_t N)
+{
+    rVector.resize(N);
+    for (size_t nIdx = 0; nIdx < N; ++nIdx)
+        rVector[nIdx] = nIdx;
+    std::shuffle(rVector.begin(), rVector.end(), std::default_random_engine(42));
+}
+
+void ParallelSortTest::sortTest(size_t nLen)
+{
+    std::vector<size_t> aVector(nLen);
+    fillRandomUptoN(aVector, nLen);
+    comphelper::parallelSort(aVector.begin(), aVector.end());
+    for (size_t nIdx = 0; nIdx < nLen; ++nIdx)
+    {
+        OString aMsg = "Wrong aVector[" + OString::number(nIdx) + "]";
+        CPPUNIT_ASSERT_EQUAL_MESSAGE(aMsg.getStr(), nIdx, aVector[nIdx]);
+    }
+}
+
+void ParallelSortTest::testSortTiny()
+{
+    sortTest(5);
+    sortTest(15);
+    sortTest(16);
+    sortTest(17);
+}
+
+void ParallelSortTest::testSortMedium()
+{
+    sortTest(1025);
+    sortTest(1029);
+    sortTest(1024 * 2 + 1);
+    sortTest(1024 * 2 + 9);
+}
+
+void ParallelSortTest::testSortBig() { sortTest(1024 * 16 + 3); }
+
+CPPUNIT_TEST_SUITE_REGISTRATION(ParallelSortTest);
+
+CPPUNIT_PLUGIN_IMPLEMENT();
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/include/comphelper/parallelsort.hxx b/include/comphelper/parallelsort.hxx
new file mode 100644
index 000000000000..fc77bdeea669
--- /dev/null
+++ b/include/comphelper/parallelsort.hxx
@@ -0,0 +1,373 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ */
+
+#ifndef INCLUDED_COMPHELPER_PARALLELSORT_HXX
+#define INCLUDED_COMPHELPER_PARALLELSORT_HXX
+
+#include <comphelper/comphelperdllapi.h>
+#include <comphelper/threadpool.hxx>
+#include <tools/cpuid.hxx>
+
+#include <memory>
+#include <iterator>
+#include <thread>
+#include <algorithm>
+#include <cmath>
+#include <random>
+#include <functional>
+#include <iostream>
+#include <chrono>
+
+namespace comphelper
+{
+static const size_t nThreadCountGlobal = std::thread::hardware_concurrency();
+const static bool bHyperThreadingActive = cpuid::hasHyperThreading();
+static comphelper::ThreadPool& rTPool(comphelper::ThreadPool::getSharedOptimalPool());
+
+static thread_local std::mt19937 aGenerator{ std::random_device{}() };
+
+#define PARALLELSORT_ENABLEPZ 0
+
+namespace
+{
+class ProfileZone
+{
+public:
+#if PARALLELSORT_ENABLEPZ
+    ProfileZone(const char* pTag)
+        : maTag(pTag)
+        , maStart(std::chrono::steady_clock::now())
+        , mbFinished(false)
+    {
+    }
+
+    ~ProfileZone()
+    {
+        if (!mbFinished)
+            showTimeElapsed();
+    }
+
+    void stop()
+    {
+        showTimeElapsed();
+        mbFinished = true;
+    }
+#else
+    ProfileZone(const char* /*pTag*/)
+        : mbDummy(true)
+    {
+    }
+
+    void stop()
+    {
+        // Avoid loplugin:staticmethods, loplugin:staticaccess errors
+        (void)mbDummy;
+    }
+#endif
+
+private:
+#if PARALLELSORT_ENABLEPZ
+
+    void showTimeElapsed()
+    {
+        auto end = std::chrono::steady_clock::now();
+        size_t elapsed
+            = std::chrono::duration_cast<std::chrono::milliseconds>(end - maStart).count();
+        std::cout << maTag << " : " << elapsed << " ms" << std::endl << std::flush;
+    }
+
+    std::string maTag;
+    std::chrono::steady_clock::time_point maStart;
+    bool mbFinished;
+#else
+    bool mbDummy;
+
+#endif
+};
+
+class ParallelRunner
+{
+    class Executor : public comphelper::ThreadTask
+    {
+    public:
+        Executor(const std::shared_ptr<comphelper::ThreadTaskTag>& rTag,
+                 std::function<void()> aFunc)
+            : comphelper::ThreadTask(rTag)
+            , maFunc(std::move(aFunc))
+        {
+        }
+
+        virtual void doWork() override { maFunc(); }
+
+    private:
+        const std::function<void()> maFunc;
+    };
+
+public:
+    ParallelRunner() { maTag = comphelper::ThreadPool::createThreadTaskTag(); }
+
+    void enqueue(std::function<void()> aFunc)
+    {
+        rTPool.pushTask(std::make_unique<Executor>(maTag, aFunc));
+    }
+
+    void wait() { rTPool.waitUntilDone(maTag, false); }
+
+private:
+    std::shared_ptr<comphelper::ThreadTaskTag> maTag;
+};
+
+constexpr size_t nMaxTreeArraySize = 64;
+
+size_t lcl_round_down_pow2(size_t nNum)
+{
+    size_t nPow2;
+    for (nPow2 = 1; nPow2 <= nNum; nPow2 <<= 1)
+        ;
+    return std::min((nPow2 >> 1), nMaxTreeArraySize);
+}
+
+template <class RandItr> struct Sampler
+{
+    using ValueType = typename std::iterator_traits<RandItr>::value_type;
+
+    static void sample(RandItr aBegin, RandItr aEnd, ValueType* pSamples, size_t nSamples,
+                       size_t /*nParallelism*/)
+    {
+        ProfileZone aZone("\tsample()");
+        assert(aBegin <= aEnd);
+        size_t nLen = static_cast<std::size_t>(aEnd - aBegin);
+        assert(std::mt19937::max() >= nLen);
+
+        for (size_t nIdx = 0; nIdx < nSamples; ++nIdx)
+        {
+            size_t nSel = aGenerator() % nLen--;
+            std::swap(*(aBegin + nSel), *(aBegin + nLen));
+            pSamples[nIdx] = *(aBegin + nLen);
+        }
+    }
+};
+
+template <class RandItr, class Compare> class Binner
+{
+    using ValueType = typename std::iterator_traits<RandItr>::value_type;
+
+    const size_t mnTreeArraySize;
+    const size_t mnDividers;
+    constexpr static size_t mnMaxStaticSize = 1024 * 50;
+    uint8_t maLabels[mnMaxStaticSize];
+    ValueType maDividers[nMaxTreeArraySize];
+    std::unique_ptr<uint8_t[]> pLabels;
+    size_t maSepBinEnds[nMaxTreeArraySize * nMaxTreeArraySize];
+    bool mbThreaded;
+
+public:
+    size_t maBinEnds[nMaxTreeArraySize];
+
+    Binner(const ValueType* pSamples, size_t nSamples, size_t nBins, bool bThreaded)
+        : mnTreeArraySize(lcl_round_down_pow2(nBins))
+        , mnDividers(mnTreeArraySize - 1)
+        , mbThreaded(bThreaded)
+    {
+        assert((nSamples % mnTreeArraySize) == 0);
+        assert(mnTreeArraySize <= nMaxTreeArraySize);
+        std::fill(maBinEnds, maBinEnds + mnTreeArraySize, 0);
+        std::fill(maSepBinEnds, maSepBinEnds + mnTreeArraySize * mnTreeArraySize, 0);
+        fillTreeArray(1, pSamples, pSamples + nSamples);
+    }
+
+    void fillTreeArray(size_t nPos, const ValueType* pLow, const ValueType* pHigh)
+    {
+        assert(pLow <= pHigh);
+        const ValueType* pMid = pLow + (pHigh - pLow) / 2;
+        maDividers[nPos] = *pMid;
+
+        if (2 * nPos < mnDividers) // So that 2*nPos < mnTreeArraySize
+        {
+            fillTreeArray(2 * nPos, pLow, pMid);
+            fillTreeArray(2 * nPos + 1, pMid + 1, pHigh);
+        }
+    }
+
+    constexpr inline size_t findBin(const ValueType& rVal, Compare& aComp)
+    {
+        size_t nIdx = 1;
+        while (nIdx <= mnDividers)
+            nIdx = ((nIdx << 1) + aComp(maDividers[nIdx], rVal));
+        return (nIdx - mnTreeArraySize);
+    }
+
+    void label(const RandItr aBegin, const RandItr aEnd, Compare& aComp)
+    {
+        ProfileZone aZoneSetup("\tlabel():setup");
+        size_t nLen = static_cast<std::size_t>(aEnd - aBegin);
+        if (nLen > mnMaxStaticSize)
+            pLabels = std::make_unique<uint8_t[]>(nLen);
+        uint8_t* pLabelsRaw = (nLen > mnMaxStaticSize) ? pLabels.get() : maLabels;
+        aZoneSetup.stop();
+        ProfileZone aZoneFindBins("\tFindBins()");
+        if (mbThreaded)
+        {
+            ParallelRunner aPRunner;
+            const size_t nBins = mnTreeArraySize;
+            for (size_t nTIdx = 0; nTIdx < nBins; ++nTIdx)
+            {
+                aPRunner.enqueue([this, nTIdx, nBins, nLen, aBegin, pLabelsRaw, &aComp] {
+                    ProfileZone aZoneIn("\t\tFindBinsThreaded()");
+                    size_t nBinEndsStartIdx = nTIdx * mnTreeArraySize;
+                    size_t* pBinEnds = maSepBinEnds + nBinEndsStartIdx;
+                    size_t aBinEndsF[nMaxTreeArraySize] = { 0 };
+                    for (size_t nIdx = nTIdx; nIdx < nLen; nIdx += nBins)
+                    {
+                        size_t nBinIdx = findBin(*(aBegin + nIdx), aComp);
+                        pLabelsRaw[nIdx] = static_cast<uint8_t>(nBinIdx);
+                        ++aBinEndsF[nBinIdx];
+                    }
+
+                    for (size_t nIdx = 0; nIdx < mnTreeArraySize; ++nIdx)
+                        pBinEnds[nIdx] = aBinEndsF[nIdx];
+                });
+            }
+
+            aPRunner.wait();
+
+            // Populate maBinEnds from maSepBinEnds
+            for (size_t nTIdx = 0; nTIdx < mnTreeArraySize; ++nTIdx)
+            {
+                for (size_t nSepIdx = 0; nSepIdx < mnTreeArraySize; ++nSepIdx)
+                    maBinEnds[nTIdx] += maSepBinEnds[nSepIdx * mnTreeArraySize + nTIdx];
+            }
+        }
+        else
+        {
+            uint8_t* pLabel = pLabelsRaw;
+            for (RandItr aItr = aBegin; aItr != aEnd; ++aItr)
+            {
+                size_t nBinIdx = findBin(*aItr, aComp);
+                *pLabel++ = nBinIdx;
+                ++maBinEnds[nBinIdx];
+            }
+        }
+
+        aZoneFindBins.stop();
+
+        size_t nSum = 0;
+        // Store each bin's starting position in maBinEnds array for now.
+        for (size_t nIdx = 0; nIdx < mnTreeArraySize; ++nIdx)
+        {
+            size_t nSize = maBinEnds[nIdx];
+            maBinEnds[nIdx] = nSum;
+            nSum += nSize;
+        }
+
+        // Now maBinEnds has end positions of each bin.
+    }
+
+    void bin(const RandItr aBegin, const RandItr aEnd, ValueType* pOut)
+    {
+        ProfileZone aZone("\tbin()");
+        const size_t nLen = static_cast<std::size_t>(aEnd - aBegin);
+        uint8_t* pLabelsRaw = (nLen > mnMaxStaticSize) ? pLabels.get() : maLabels;
+        size_t nIdx;
+        for (nIdx = 0; nIdx < nLen; ++nIdx)
+        {
+            pOut[maBinEnds[pLabelsRaw[nIdx]]++] = *(aBegin + nIdx);
+        }
+    }
+};
+
+template <class RandItr, class Compare = std::less<>>
+void s3sort(const RandItr aBegin, const RandItr aEnd, Compare aComp = Compare(),
+            bool bThreaded = true)
+{
+    static size_t nThreadCount = nThreadCountGlobal;
+
+    constexpr size_t nBaseCaseSize = 1024;
+    const std::size_t nLen = static_cast<std::size_t>(aEnd - aBegin);
+    if (nLen < nBaseCaseSize)
+    {
+        std::sort(aBegin, aEnd, aComp);
+        return;
+    }
+
+    using ValueType = typename std::iterator_traits<RandItr>::value_type;
+    auto pOut = std::make_unique<ValueType[]>(nLen);
+
+    const size_t nBins = lcl_round_down_pow2(nThreadCount);
+    const size_t nOverSamplingFactor = std::max(1.0, std::sqrt(static_cast<double>(nLen) / 64));
+    const size_t nSamples = nOverSamplingFactor * nBins;
+    auto aSamples = std::make_unique<ValueType[]>(nSamples);
+    ProfileZone aZoneSampleAnsSort("SampleAndSort");
+    // Select samples and sort them
+    Sampler<RandItr>::sample(aBegin, aEnd, aSamples.get(), nSamples, nBins);
+    std::sort(aSamples.get(), aSamples.get() + nSamples, aComp);
+    aZoneSampleAnsSort.stop();
+
+    if (!aComp(aSamples[0], aSamples[nSamples - 1]))
+    {
+        // All samples are equal, fallback to standard sort.
+        std::sort(aBegin, aEnd, aComp);
+        return;
+    }
+
+    ProfileZone aZoneBinner("Binner");
+    // Create and populate bins using pOut from input iterators.
+    Binner<RandItr, Compare> aBinner(aSamples.get(), nSamples, nBins, bThreaded);
+    aBinner.label(aBegin, aEnd, aComp);
+    aBinner.bin(aBegin, aEnd, pOut.get());
+    aZoneBinner.stop();
+
+    ProfileZone aZoneSortBins("SortBins");
+    ValueType* pOutRaw = pOut.get();
+    if (bThreaded)
+    {
+        ParallelRunner aPRunner;
+        // Sort the bins separately.
+        for (size_t nBinIdx = 0, nBinStart = 0; nBinIdx < nBins; ++nBinIdx)
+        {
+            size_t nBinEnd = aBinner.maBinEnds[nBinIdx];
+            aPRunner.enqueue([pOutRaw, nBinStart, nBinEnd, &aComp] {
+                std::sort(pOutRaw + nBinStart, pOutRaw + nBinEnd, aComp);
+            });
+
+            nBinStart = nBinEnd;
+        }
+
+        aPRunner.wait();
+    }
+    else
+    {
+        for (size_t nBinIdx = 0, nBinStart = 0; nBinIdx < nBins; ++nBinIdx)
+        {
+            auto nBinEnd = aBinner.maBinEnds[nBinIdx];
+            std::sort(pOutRaw + nBinStart, pOutRaw + nBinEnd, aComp);
+            nBinStart = nBinEnd;
+        }
+    }
+
+    aZoneSortBins.stop();
+
+    // Move the sorted array to the array specified by input iterators.
+    std::move(pOutRaw, pOutRaw + nLen, aBegin);
+}
+
+} // anonymous namespace
+
+template <class RandItr, class Compare = std::less<>>
+void parallelSort(const RandItr aBegin, const RandItr aEnd, Compare aComp = Compare())
+{
+    assert(aBegin <= aEnd);
+    s3sort(aBegin, aEnd, aComp);
+}
+
+} // namespace comphelper
+
+#endif // INCLUDED_COMPHELPER_PARALLELSORT_HXX
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/sc/source/core/data/dpcache.cxx b/sc/source/core/data/dpcache.cxx
index cf7eaff0d53d..8a834614f94e 100644
--- a/sc/source/core/data/dpcache.cxx
+++ b/sc/source/core/data/dpcache.cxx
@@ -32,6 +32,7 @@
 #include <columniterator.hxx>
 #include <cellvalue.hxx>
 
+#include <comphelper/parallelsort.hxx>
 #include <rtl/math.hxx>
 #include <unotools/charclass.hxx>
 #include <unotools/textsearch.hxx>
@@ -171,6 +172,7 @@ struct Bucket
     ScDPItemData maValue;
     SCROW mnOrderIndex;
     SCROW mnDataIndex;
+    Bucket() {}
     Bucket(const ScDPItemData& rValue, SCROW nData) :
         maValue(rValue), mnOrderIndex(0), mnDataIndex(nData) {}
 };
@@ -250,7 +252,7 @@ void processBuckets(std::vector<Bucket>& aBuckets, ScDPCache::Field& rField)
         return;
 
     // Sort by the value.
-    std::sort(aBuckets.begin(), aBuckets.end(), LessByValue());
+    comphelper::parallelSort(aBuckets.begin(), aBuckets.end(), LessByValue());
 
     {
         // Set order index such that unique values have identical index value.
@@ -269,14 +271,14 @@ void processBuckets(std::vector<Bucket>& aBuckets, ScDPCache::Field& rField)
     }
 
     // Re-sort the bucket this time by the data index.
-    std::sort(aBuckets.begin(), aBuckets.end(), LessByDataIndex());
+    comphelper::parallelSort(aBuckets.begin(), aBuckets.end(), LessByDataIndex());
 
     // Copy the order index series into the field object.
     rField.maData.reserve(aBuckets.size());
     std::for_each(aBuckets.begin(), aBuckets.end(), PushBackOrderIndex(rField.maData));
 
     // Sort by the value again.
-    std::sort(aBuckets.begin(), aBuckets.end(), LessByOrderIndex());
+    comphelper::parallelSort(aBuckets.begin(), aBuckets.end(), LessByOrderIndex());
 
     // Unique by value.
     std::vector<Bucket>::iterator itUniqueEnd =


More information about the Libreoffice-commits mailing list