[Libreoffice-commits] core.git: sc/CppunitTest_sc_ucalc.mk sc/inc sc/Library_sc.mk sc/source

Tue Jun 6 03:00:54 UTC 2017

sc/CppunitTest_sc_ucalc.mk       |    6 
 sc/Library_sc.mk                 |    6 
 sc/inc/dpcache.hxx               |   10 -
 sc/source/core/data/dpcache.cxx  |  376 ++++++++++++++++++++++++++++++---------
 sc/source/core/data/dpobject.cxx |    4 
 5 files changed, 317 insertions(+), 85 deletions(-)

New commits:
commit a078328e91ab9cbd78a92608c1abdc2c57ff9ac5
Author: Kohei Yoshida <kohei.yoshida at collabora.com>
Date:   Tue May 23 22:09:40 2017 -0400

    tdf#107945: prepare for future multi-threading of pivot cache...
    
    It's disabled for now. We need to first make the edit engine and
    a few other places thread-safe before we can parallelize this code.
    
    Change-Id: Ie09536964ece42d43f505afc5e2611d469cc5c95
    Reviewed-on: https://gerrit.libreoffice.org/38424
    Tested-by: Jenkins <ci at libreoffice.org>
    Reviewed-by: Kohei Yoshida <libreoffice at kohei.us>

diff --git a/sc/CppunitTest_sc_ucalc.mk b/sc/CppunitTest_sc_ucalc.mk
index 5e91862f416c..d918182a0c4f 100644
--- a/sc/CppunitTest_sc_ucalc.mk
+++ b/sc/CppunitTest_sc_ucalc.mk
@@ -112,6 +112,12 @@ $(eval $(call gb_CppunitTest_add_libs,sc_ucalc,\
 ))
 endif
 
+ifeq ($(OS), $(filter LINUX %BSD SOLARIS, $(OS)))
+$(eval $(call gb_CppunitTest_add_libs,sc_ucalc,\
+    -lpthread \
+))
+endif
+
 $(eval $(call gb_CppunitTest_use_configuration,sc_ucalc))
 
 # vim: set noet sw=4 ts=4:
diff --git a/sc/Library_sc.mk b/sc/Library_sc.mk
index e72c61546f05..5b7d2fcf9275 100644
--- a/sc/Library_sc.mk
+++ b/sc/Library_sc.mk
@@ -696,6 +696,12 @@ $(eval $(call gb_Library_add_libs,sc,\
 ))
 endif
 
+ifeq ($(OS), $(filter LINUX %BSD SOLARIS, $(OS)))
+$(eval $(call gb_Library_add_libs,sc,\
+    -lpthread \
+))
+endif
+
 $(eval $(call gb_SdiTarget_SdiTarget,sc/sdi/scslots,sc/sdi/scalc))
 
 $(eval $(call gb_SdiTarget_set_include,sc/sdi/scslots,\
diff --git a/sc/inc/dpcache.hxx b/sc/inc/dpcache.hxx
index 515a3fae6a19..3bac2751600d 100644
--- a/sc/inc/dpcache.hxx
+++ b/sc/inc/dpcache.hxx
@@ -42,9 +42,9 @@ struct ScDPNumGroupInfo;
  */
 class SC_DLLPUBLIC ScDPCache
 {
-    typedef std::unordered_set<OUString, OUStringHash> StringSetType;
-
 public:
+    typedef std::unordered_set<OUString, OUStringHash> StringSetType;
+    typedef mdds::flat_segment_tree<SCROW, bool> EmptyRowsType;
     typedef std::vector<ScDPItemData> ScDPItemDataVec;
     typedef std::set<ScDPObject*> ScDPObjectSet;
     typedef std::vector<SCROW> IndexArrayType;
@@ -117,17 +117,17 @@ private:
 
     FieldsType maFields;
     GroupFieldsType maGroupFields;
-    StringSetType maStringPool;
+    std::vector<StringSetType> maStringPools; // one for each field.
 
     std::vector<OUString> maLabelNames; // Stores dimension names and the data layout dimension name at position 0.
-    mdds::flat_segment_tree<SCROW, bool> maEmptyRows;
+    EmptyRowsType maEmptyRows;
     SCROW mnDataSize;
     SCROW mnRowCount;
 
     bool mbDisposing;
 
 public:
-    rtl_uString* InternString( const OUString& rStr );
+    rtl_uString* InternString( size_t nDim, const OUString& rStr );
     void AddReference(ScDPObject* pObj) const;
     void RemoveReference(ScDPObject* pObj) const;
     const ScDPObjectSet& GetAllReferences() const;
diff --git a/sc/source/core/data/dpcache.cxx b/sc/source/core/data/dpcache.cxx
index 0383108bf81b..2064d840496d 100644
--- a/sc/source/core/data/dpcache.cxx
+++ b/sc/source/core/data/dpcache.cxx
@@ -43,6 +43,17 @@
 #include <com/sun/star/sheet/DataPilotFieldGroupBy.hpp>
 #endif
 
+// TODO : Threaded pivot cache operation is disabled until we can figure out
+// ways to make the edit engine and number formatter codes thread-safe in a
+// proper fashion.
+#define ENABLE_THREADED_PIVOT_CACHE 0
+
+#if ENABLE_THREADED_PIVOT_CACHE
+#include <thread>
+#include <future>
+#include <queue>
+#endif
+
 using namespace ::com::sun::star;
 
 using ::com::sun::star::uno::Exception;
@@ -106,6 +117,17 @@ private:
     ScDocument* mpDoc;
 };
 
+rtl_uString* internString( ScDPCache::StringSetType& rPool, const OUString& rStr )
+{
+    ScDPCache::StringSetType::iterator it = rPool.find(rStr);
+    if (it != rPool.end())
+        // In the pool.
+        return (*it).pData;
+
+    std::pair<ScDPCache::StringSetType::iterator, bool> r = rPool.insert(rStr);
+    return r.second ? (*r.first).pData : nullptr;
+}
+
 OUString createLabelString( const ScDocument* pDoc, SCCOL nCol, const ScRefCellValue& rCell )
 {
     OUString aDocStr = rCell.getRawString(pDoc);
@@ -125,16 +147,15 @@ OUString createLabelString( const ScDocument* pDoc, SCCOL nCol, const ScRefCellV
 }
 
 void initFromCell(
-    ScDPCache& rCache, ScDocument* pDoc, const ScAddress& rPos,
-    const ScRefCellValue& rCell,
-    ScDPItemData& rData, sal_uInt32& rNumFormat)
+    ScDPCache::StringSetType& rStrPool, ScDocument* pDoc, const ScAddress& rPos,
+    const ScRefCellValue& rCell, ScDPItemData& rData, sal_uInt32& rNumFormat)
 {
     OUString aDocStr = rCell.getRawString(pDoc);
     rNumFormat = 0;
 
     if (rCell.hasError())
     {
-        rData.SetErrorStringInterned(rCache.InternString(aDocStr));
+        rData.SetErrorStringInterned(internString(rStrPool, aDocStr));
     }
     else if (rCell.hasNumeric())
     {
@@ -144,7 +165,7 @@ void initFromCell(
     }
     else if (!rCell.isEmpty())
     {
-        rData.SetStringInterned(rCache.InternString(aDocStr));
+        rData.SetStringInterned(internString(rStrPool, aDocStr));
     }
     else
         rData.SetEmpty();
@@ -273,42 +294,249 @@ void processBuckets(std::vector<Bucket>& aBuckets, ScDPCache::Field& rField)
     std::for_each(itBeg, itUniqueEnd, PushBackValue(rField.maItems));
 }
 
+struct InitColumnData
+{
+    ScDPCache::EmptyRowsType maEmptyRows;
+    OUString maLabel;
+
+    ScDPCache::StringSetType* mpStrPool;
+    ScDPCache::Field* mpField;
+
+    SCCOL mnCol;
+
+    InitColumnData() :
+        maEmptyRows(0, MAXROWCOUNT, true),
+        mpStrPool(nullptr),
+        mpField(nullptr),
+        mnCol(-1) {}
+
+    void init( SCCOL nCol, ScDPCache::StringSetType* pStrPool, ScDPCache::Field* pField )
+    {
+        mpStrPool = pStrPool;
+        mpField = pField;
+        mnCol = nCol;
+    }
+};
+
+struct InitDocData
+{
+    ScDocument* mpDoc;
+    SCTAB mnDocTab;
+    SCROW mnStartRow;
+    SCROW mnEndRow;
+    bool mbTailEmptyRows;
+
+    InitDocData() :
+        mpDoc(nullptr),
+        mnDocTab(-1),
+        mnStartRow(-1),
+        mnEndRow(-1),
+        mbTailEmptyRows(false) {}
+};
+
+typedef std::unordered_set<OUString, OUStringHash> LabelSet;
+
+class InsertLabel : public std::unary_function<OUString, void>
+{
+    LabelSet& mrNames;
+public:
+    explicit InsertLabel(LabelSet& rNames) : mrNames(rNames) {}
+    void operator() (const OUString& r)
+    {
+        mrNames.insert(r);
+    }
+};
+
+std::vector<OUString> normalizeLabels( const std::vector<InitColumnData>& rColData )
+{
+    std::vector<OUString> aLabels(1u, ScGlobal::GetRscString(STR_PIVOT_DATA));
+
+    LabelSet aExistingNames;
+
+    for (const InitColumnData& rCol : rColData)
+    {
+        const OUString& rLabel = rCol.maLabel;
+        sal_Int32 nSuffix = 1;
+        OUString aNewLabel = rLabel;
+        while (true)
+        {
+            if (!aExistingNames.count(aNewLabel))
+            {
+                // this is a unique label.
+                aLabels.push_back(aNewLabel);
+                aExistingNames.insert(aNewLabel);
+                break;
+            }
+
+            // This name already exists.
+            OUStringBuffer aBuf(rLabel);
+            aBuf.append(++nSuffix);
+            aNewLabel = aBuf.makeStringAndClear();
+        }
+    }
+
+    return aLabels;
+}
+
+void initColumnFromDoc( InitDocData& rDocData, InitColumnData &rColData )
+{
+    ScDPCache::Field& rField = *rColData.mpField;
+    ScDocument* pDoc = rDocData.mpDoc;
+    SCTAB nDocTab = rDocData.mnDocTab;
+    SCCOL nCol = rColData.mnCol;
+    SCROW nStartRow = rDocData.mnStartRow;
+    SCROW nEndRow = rDocData.mnEndRow;
+    bool bTailEmptyRows = rDocData.mbTailEmptyRows;
+
+    std::unique_ptr<sc::ColumnIterator> pIter =
+        pDoc->GetColumnIterator(nDocTab, nCol, nStartRow, nEndRow);
+    assert(pIter);
+    assert(pIter->hasCell());
+
+    ScDPItemData aData;
+
+    rColData.maLabel = createLabelString(pDoc, nCol, pIter->getCell());
+    pIter->next();
+
+    std::vector<Bucket> aBuckets;
+    aBuckets.reserve(nEndRow-nStartRow); // skip the topmost label cell.
+
+    // Push back all original values.
+    for (SCROW i = 0, n = nEndRow-nStartRow; i < n; ++i, pIter->next())
+    {
+        assert(pIter->hasCell());
+
+        sal_uInt32 nNumFormat = 0;
+        ScAddress aPos(nCol, pIter->getRow(), nDocTab);
+        initFromCell(*rColData.mpStrPool, pDoc, aPos, pIter->getCell(), aData, nNumFormat);
+
+        aBuckets.emplace_back(aData, i);
+
+        if (!aData.IsEmpty())
+        {
+            rColData.maEmptyRows.insert_back(i, i+1, false);
+            if (nNumFormat)
+                // Only take non-default number format.
+                rField.mnNumFormat = nNumFormat;
+        }
+    }
+
+    processBuckets(aBuckets, rField);
+
+    if (bTailEmptyRows)
+    {
+        // If the last item is not empty, append one. Note that the items
+        // are sorted, and empty item should come last when sorted.
+        if (rField.maItems.empty() || !rField.maItems.back().IsEmpty())
+        {
+            aData.SetEmpty();
+            rField.maItems.push_back(aData);
+        }
+    }
+}
+
+#if ENABLE_THREADED_PIVOT_CACHE
+
+class ThreadQueue
+{
+    using FutureType = std::future<void>;
+    std::queue<FutureType> maQueue;
+    std::mutex maMutex;
+    std::condition_variable maCond;
+
+    size_t mnMaxQueue;
+
+public:
+    ThreadQueue( size_t nMaxQueue ) : mnMaxQueue(nMaxQueue) {}
+
+    void push( std::function<void()> aFunc )
+    {
+        std::unique_lock<std::mutex> lock(maMutex);
+
+        while (maQueue.size() >= mnMaxQueue)
+            maCond.wait(lock);
+
+        FutureType f = std::async(std::launch::async, aFunc);
+        maQueue.push(std::move(f));
+        lock.unlock();
+
+        maCond.notify_one();
+    }
+
+    void waitForOne()
+    {
+        std::unique_lock<std::mutex> lock(maMutex);
+
+        while (maQueue.empty())
+            maCond.wait(lock);
+
+        FutureType ret = std::move(maQueue.front());
+        maQueue.pop();
+        lock.unlock();
+
+        ret.get(); // This may throw if an exception was thrown on the async thread.
+
+        maCond.notify_one();
+    }
+};
+
+class ThreadScopedGuard
+{
+    std::thread maThread;
+public:
+    ThreadScopedGuard(std::thread thread) : maThread(std::move(thread)) {}
+    ThreadScopedGuard(ThreadScopedGuard&& other) : maThread(std::move(other.maThread)) {}
+
+    ThreadScopedGuard(const ThreadScopedGuard&) = delete;
+    ThreadScopedGuard& operator= (const ThreadScopedGuard&) = delete;
+
+    ~ThreadScopedGuard()
+    {
+        maThread.join();
+    }
+};
+
+#endif
+
 }
 
 void ScDPCache::InitFromDoc(ScDocument* pDoc, const ScRange& rRange)
 {
     Clear();
 
+    InitDocData aDocData;
+    aDocData.mpDoc = pDoc;
+
     // Make sure the formula cells within the data range are interpreted
     // during this call, for this method may be called from the interpretation
     // of GETPIVOTDATA, which disables nested formula interpretation without
     // increasing the macro level.
     MacroInterpretIncrementer aMacroInc(pDoc);
 
-    SCROW nStartRow = rRange.aStart.Row();  // start of data
-    SCROW nEndRow = rRange.aEnd.Row();
+    aDocData.mnStartRow = rRange.aStart.Row();  // start of data
+    aDocData.mnEndRow = rRange.aEnd.Row();
 
     // Sanity check
-    if (!ValidRow(nStartRow) || !ValidRow(nEndRow) || nEndRow <= nStartRow)
+    if (!ValidRow(aDocData.mnStartRow) || !ValidRow(aDocData.mnEndRow) || aDocData.mnEndRow <= aDocData.mnStartRow)
         return;
 
-    sal_uInt16 nStartCol = rRange.aStart.Col();
-    sal_uInt16 nEndCol = rRange.aEnd.Col();
-    sal_uInt16 nDocTab = rRange.aStart.Tab();
+    SCCOL nStartCol = rRange.aStart.Col();
+    SCCOL nEndCol = rRange.aEnd.Col();
+    aDocData.mnDocTab = rRange.aStart.Tab();
 
     mnColumnCount = nEndCol - nStartCol + 1;
 
     // this row count must include the trailing empty rows.
-    mnRowCount = nEndRow - nStartRow; // skip the topmost label row.
+    mnRowCount = aDocData.mnEndRow - aDocData.mnStartRow; // skip the topmost label row.
 
     // Skip trailing empty rows if exists.
     SCCOL nCol1 = nStartCol, nCol2 = nEndCol;
-    SCROW nRow1 = nStartRow, nRow2 = nEndRow;
-    pDoc->ShrinkToDataArea(nDocTab, nCol1, nRow1, nCol2, nRow2);
-    bool bTailEmptyRows = nEndRow > nRow2; // Trailing empty rows exist.
-    nEndRow = nRow2;
+    SCROW nRow1 = aDocData.mnStartRow, nRow2 = aDocData.mnEndRow;
+    pDoc->ShrinkToDataArea(aDocData.mnDocTab, nCol1, nRow1, nCol2, nRow2);
+    aDocData.mbTailEmptyRows = aDocData.mnEndRow > nRow2; // Trailing empty rows exist.
+    aDocData.mnEndRow = nRow2;
 
-    if (nEndRow <= nStartRow)
+    if (aDocData.mnEndRow <= aDocData.mnStartRow)
     {
         // Check this again since the end row position has changed. It's
         // possible that the new end row becomes lower than the start row
@@ -317,6 +545,8 @@ void ScDPCache::InitFromDoc(ScDocument* pDoc, const ScRange& rRange)
         return;
     }
 
+    maStringPools.resize(mnColumnCount);
+    std::vector<InitColumnData> aColData(mnColumnCount);
     maFields.reserve(mnColumnCount);
     for (size_t i = 0; i < static_cast<size_t>(mnColumnCount); ++i)
         maFields.push_back(o3tl::make_unique<Field>());
@@ -326,52 +556,61 @@ void ScDPCache::InitFromDoc(ScDocument* pDoc, const ScRange& rRange)
     // Ensure that none of the formula cells in the data range are dirty.
     pDoc->EnsureFormulaCellResults(rRange);
 
-    ScDPItemData aData;
-    for (sal_uInt16 nCol = nStartCol; nCol <= nEndCol; ++nCol)
+#if ENABLE_THREADED_PIVOT_CACHE
+    ThreadQueue aQueue(std::thread::hardware_concurrency());
+
+    auto aFuncLaunchFieldThreads = [&]()
     {
-        std::unique_ptr<sc::ColumnIterator> pIter =
-            pDoc->GetColumnIterator(nDocTab, nCol, nStartRow, nEndRow);
-        assert(pIter);
-        assert(pIter->hasCell());
+        for (sal_uInt16 nCol = nStartCol; nCol <= nEndCol; ++nCol)
+        {
+            size_t nDim = nCol - nStartCol;
+            InitColumnData& rColData = aColData[nDim];
+            rColData.init(nCol, &maStringPools[nDim], maFields[nDim].get());
 
-        AddLabel(createLabelString(pDoc, nCol, pIter->getCell()));
-        pIter->next();
+            auto func = [&aDocData,&rColData]()
+            {
+                initColumnFromDoc(aDocData, rColData);
+            };
 
-        Field& rField = *maFields[nCol-nStartCol].get();
-        std::vector<Bucket> aBuckets;
-        aBuckets.reserve(nEndRow-nStartRow); // skip the topmost label cell.
+            aQueue.push(std::move(func));
+        }
+    };
 
-        // Push back all original values.
-        for (SCROW i = 0, n = nEndRow-nStartRow; i < n; ++i, pIter->next())
-        {
-            assert(pIter->hasCell());
+    {
+        // Launch a separate thread that in turn spawns async threads to populate the fields.
+        std::thread t(aFuncLaunchFieldThreads);
+        ThreadScopedGuard sg(std::move(t));
 
-            sal_uInt32 nNumFormat = 0;
-            ScAddress aPos(nCol, pIter->getRow(), nDocTab);
-            initFromCell(*this, pDoc, aPos, pIter->getCell(), aData, nNumFormat);
+        // Wait for all the async threads to complete on the main thread.
+        for (SCCOL i = 0; i < mnColumnCount; ++i)
+            aQueue.waitForOne();
+    }
 
-            aBuckets.push_back(Bucket(aData, i));
+#else
+    for (sal_uInt16 nCol = nStartCol; nCol <= nEndCol; ++nCol)
+    {
+        size_t nDim = nCol - nStartCol;
+        InitColumnData& rColData = aColData[nDim];
+        rColData.init(nCol, &maStringPools[nDim], maFields[nDim].get());
 
-            if (!aData.IsEmpty())
-            {
-                maEmptyRows.insert_back(i, i+1, false);
-                if (nNumFormat)
-                    // Only take non-default number format.
-                    rField.mnNumFormat = nNumFormat;
-            }
-        }
+        initColumnFromDoc(aDocData, rColData);
+    }
+#endif
+
+    maLabelNames = normalizeLabels(aColData);
 
-        processBuckets(aBuckets, rField);
+    // Merge all non-empty rows data.
+    for (const InitColumnData& rCol : aColData)
+    {
+        EmptyRowsType::const_segment_iterator it = rCol.maEmptyRows.begin_segment();
+        EmptyRowsType::const_segment_iterator ite = rCol.maEmptyRows.end_segment();
+        EmptyRowsType::const_iterator pos = maEmptyRows.begin();
 
-        if (bTailEmptyRows)
+        for (; it != ite; ++it)
         {
-            // If the last item is not empty, append one. Note that the items
-            // are sorted, and empty item should come last when sorted.
-            if (rField.maItems.empty() || !rField.maItems.back().IsEmpty())
-            {
-                aData.SetEmpty();
-                rField.maItems.push_back(aData);
-            }
+            if (!it->value)
+                // Non-empty segment found.  Record it.
+                pos = maEmptyRows.insert(pos, it->start, it->end, false).first;
         }
     }
 
@@ -385,6 +624,7 @@ bool ScDPCache::InitFromDataBase(DBConnector& rDB)
     try
     {
         mnColumnCount = rDB.getColumnCount();
+        maStringPools.resize(mnColumnCount);
         maFields.clear();
         maFields.reserve(mnColumnCount);
         for (size_t i = 0; i < static_cast<size_t>(mnColumnCount); ++i)
@@ -690,23 +930,6 @@ OUString ScDPCache::GetDimensionName(std::vector<OUString>::size_type nDim) cons
         return OUString();
 }
 
-namespace {
-
-typedef std::unordered_set<OUString, OUStringHash> LabelSet;
-
-class InsertLabel : public std::unary_function<OUString, void>
-{
-    LabelSet& mrNames;
-public:
-    explicit InsertLabel(LabelSet& rNames) : mrNames(rNames) {}
-    void operator() (const OUString& r)
-    {
-        mrNames.insert(r);
-    }
-};
-
-}
-
 void ScDPCache::PostInit()
 {
     OSL_ENSURE(!maFields.empty(), "Cache not initialized!");
@@ -734,7 +957,7 @@ void ScDPCache::Clear()
     maLabelNames.clear();
     maGroupFields.clear();
     maEmptyRows.clear();
-    maStringPool.clear();
+    maStringPools.clear();
 }
 
 void ScDPCache::AddLabel(const OUString& rLabel)
@@ -901,15 +1124,10 @@ SCCOL ScDPCache::GetDimensionIndex(const OUString& sName) const
     return -1;
 }
 
-rtl_uString* ScDPCache::InternString( const OUString& rStr )
+rtl_uString* ScDPCache::InternString( size_t nDim, const OUString& rStr )
 {
-    StringSetType::iterator it = maStringPool.find(rStr);
-    if (it != maStringPool.end())
-        // In the pool.
-        return (*it).pData;
-
-    std::pair<StringSetType::iterator, bool> r = maStringPool.insert(rStr);
-    return r.second ? (*r.first).pData : nullptr;
+    assert(nDim < maStringPools.size());
+    return internString(maStringPools[nDim], rStr);
 }
 
 void ScDPCache::AddReference(ScDPObject* pObj) const
diff --git a/sc/source/core/data/dpobject.cxx b/sc/source/core/data/dpobject.cxx
index 19dc7e846d6d..810742838814 100644
--- a/sc/source/core/data/dpobject.cxx
+++ b/sc/source/core/data/dpobject.cxx
@@ -247,7 +247,9 @@ void DBConnector::getValue(long nCol, ScDPItemData &rData, short& rNumType) cons
             case sdbc::DataType::VARBINARY:
             case sdbc::DataType::LONGVARBINARY:
             default:
-                rData.SetStringInterned(mrCache.InternString(mxRow->getString(nCol+1)));
+                // nCol is 0-based, and the left-most column always has nCol == 0.
+                rData.SetStringInterned(
+                    mrCache.InternString(nCol, mxRow->getString(nCol+1)));
         }
     }
     catch (uno::Exception&)