[Libreoffice-commits] core.git: sc/CppunitTest_sc_ucalc.mk sc/inc sc/Library_sc.mk sc/source
Kohei Yoshida
kohei.yoshida at collabora.com
Tue Jun 6 03:00:54 UTC 2017
sc/CppunitTest_sc_ucalc.mk | 6
sc/Library_sc.mk | 6
sc/inc/dpcache.hxx | 10 -
sc/source/core/data/dpcache.cxx | 376 ++++++++++++++++++++++++++++++---------
sc/source/core/data/dpobject.cxx | 4
5 files changed, 317 insertions(+), 85 deletions(-)
New commits:
commit a078328e91ab9cbd78a92608c1abdc2c57ff9ac5
Author: Kohei Yoshida <kohei.yoshida at collabora.com>
Date: Tue May 23 22:09:40 2017 -0400
tdf#107945: prepare for future multi-threading of pivot cache...
It's disabled for now. We need to first make the edit engine and
a few other places thread-safe before we can parallelize this code.
Change-Id: Ie09536964ece42d43f505afc5e2611d469cc5c95
Reviewed-on: https://gerrit.libreoffice.org/38424
Tested-by: Jenkins <ci at libreoffice.org>
Reviewed-by: Kohei Yoshida <libreoffice at kohei.us>
diff --git a/sc/CppunitTest_sc_ucalc.mk b/sc/CppunitTest_sc_ucalc.mk
index 5e91862f416c..d918182a0c4f 100644
--- a/sc/CppunitTest_sc_ucalc.mk
+++ b/sc/CppunitTest_sc_ucalc.mk
@@ -112,6 +112,12 @@ $(eval $(call gb_CppunitTest_add_libs,sc_ucalc,\
))
endif
+ifeq ($(OS), $(filter LINUX %BSD SOLARIS, $(OS)))
+$(eval $(call gb_CppunitTest_add_libs,sc_ucalc,\
+ -lpthread \
+))
+endif
+
$(eval $(call gb_CppunitTest_use_configuration,sc_ucalc))
# vim: set noet sw=4 ts=4:
diff --git a/sc/Library_sc.mk b/sc/Library_sc.mk
index e72c61546f05..5b7d2fcf9275 100644
--- a/sc/Library_sc.mk
+++ b/sc/Library_sc.mk
@@ -696,6 +696,12 @@ $(eval $(call gb_Library_add_libs,sc,\
))
endif
+ifeq ($(OS), $(filter LINUX %BSD SOLARIS, $(OS)))
+$(eval $(call gb_Library_add_libs,sc,\
+ -lpthread \
+))
+endif
+
$(eval $(call gb_SdiTarget_SdiTarget,sc/sdi/scslots,sc/sdi/scalc))
$(eval $(call gb_SdiTarget_set_include,sc/sdi/scslots,\
diff --git a/sc/inc/dpcache.hxx b/sc/inc/dpcache.hxx
index 515a3fae6a19..3bac2751600d 100644
--- a/sc/inc/dpcache.hxx
+++ b/sc/inc/dpcache.hxx
@@ -42,9 +42,9 @@ struct ScDPNumGroupInfo;
*/
class SC_DLLPUBLIC ScDPCache
{
- typedef std::unordered_set<OUString, OUStringHash> StringSetType;
-
public:
+ typedef std::unordered_set<OUString, OUStringHash> StringSetType;
+ typedef mdds::flat_segment_tree<SCROW, bool> EmptyRowsType;
typedef std::vector<ScDPItemData> ScDPItemDataVec;
typedef std::set<ScDPObject*> ScDPObjectSet;
typedef std::vector<SCROW> IndexArrayType;
@@ -117,17 +117,17 @@ private:
FieldsType maFields;
GroupFieldsType maGroupFields;
- StringSetType maStringPool;
+ std::vector<StringSetType> maStringPools; // one for each field.
std::vector<OUString> maLabelNames; // Stores dimension names and the data layout dimension name at position 0.
- mdds::flat_segment_tree<SCROW, bool> maEmptyRows;
+ EmptyRowsType maEmptyRows;
SCROW mnDataSize;
SCROW mnRowCount;
bool mbDisposing;
public:
- rtl_uString* InternString( const OUString& rStr );
+ rtl_uString* InternString( size_t nDim, const OUString& rStr );
void AddReference(ScDPObject* pObj) const;
void RemoveReference(ScDPObject* pObj) const;
const ScDPObjectSet& GetAllReferences() const;
diff --git a/sc/source/core/data/dpcache.cxx b/sc/source/core/data/dpcache.cxx
index 0383108bf81b..2064d840496d 100644
--- a/sc/source/core/data/dpcache.cxx
+++ b/sc/source/core/data/dpcache.cxx
@@ -43,6 +43,17 @@
#include <com/sun/star/sheet/DataPilotFieldGroupBy.hpp>
#endif
+// TODO : Threaded pivot cache operation is disabled until we can figure out
+// ways to make the edit engine and number formatter codes thread-safe in a
+// proper fashion.
+#define ENABLE_THREADED_PIVOT_CACHE 0
+
+#if ENABLE_THREADED_PIVOT_CACHE
+#include <thread>
+#include <future>
+#include <queue>
+#endif
+
using namespace ::com::sun::star;
using ::com::sun::star::uno::Exception;
@@ -106,6 +117,17 @@ private:
ScDocument* mpDoc;
};
+rtl_uString* internString( ScDPCache::StringSetType& rPool, const OUString& rStr )
+{
+ ScDPCache::StringSetType::iterator it = rPool.find(rStr);
+ if (it != rPool.end())
+ // In the pool.
+ return (*it).pData;
+
+ std::pair<ScDPCache::StringSetType::iterator, bool> r = rPool.insert(rStr);
+ return r.second ? (*r.first).pData : nullptr;
+}
+
OUString createLabelString( const ScDocument* pDoc, SCCOL nCol, const ScRefCellValue& rCell )
{
OUString aDocStr = rCell.getRawString(pDoc);
@@ -125,16 +147,15 @@ OUString createLabelString( const ScDocument* pDoc, SCCOL nCol, const ScRefCellV
}
void initFromCell(
- ScDPCache& rCache, ScDocument* pDoc, const ScAddress& rPos,
- const ScRefCellValue& rCell,
- ScDPItemData& rData, sal_uInt32& rNumFormat)
+ ScDPCache::StringSetType& rStrPool, ScDocument* pDoc, const ScAddress& rPos,
+ const ScRefCellValue& rCell, ScDPItemData& rData, sal_uInt32& rNumFormat)
{
OUString aDocStr = rCell.getRawString(pDoc);
rNumFormat = 0;
if (rCell.hasError())
{
- rData.SetErrorStringInterned(rCache.InternString(aDocStr));
+ rData.SetErrorStringInterned(internString(rStrPool, aDocStr));
}
else if (rCell.hasNumeric())
{
@@ -144,7 +165,7 @@ void initFromCell(
}
else if (!rCell.isEmpty())
{
- rData.SetStringInterned(rCache.InternString(aDocStr));
+ rData.SetStringInterned(internString(rStrPool, aDocStr));
}
else
rData.SetEmpty();
@@ -273,42 +294,249 @@ void processBuckets(std::vector<Bucket>& aBuckets, ScDPCache::Field& rField)
std::for_each(itBeg, itUniqueEnd, PushBackValue(rField.maItems));
}
+struct InitColumnData
+{
+ ScDPCache::EmptyRowsType maEmptyRows;
+ OUString maLabel;
+
+ ScDPCache::StringSetType* mpStrPool;
+ ScDPCache::Field* mpField;
+
+ SCCOL mnCol;
+
+ InitColumnData() :
+ maEmptyRows(0, MAXROWCOUNT, true),
+ mpStrPool(nullptr),
+ mpField(nullptr),
+ mnCol(-1) {}
+
+ void init( SCCOL nCol, ScDPCache::StringSetType* pStrPool, ScDPCache::Field* pField )
+ {
+ mpStrPool = pStrPool;
+ mpField = pField;
+ mnCol = nCol;
+ }
+};
+
+struct InitDocData
+{
+ ScDocument* mpDoc;
+ SCTAB mnDocTab;
+ SCROW mnStartRow;
+ SCROW mnEndRow;
+ bool mbTailEmptyRows;
+
+ InitDocData() :
+ mpDoc(nullptr),
+ mnDocTab(-1),
+ mnStartRow(-1),
+ mnEndRow(-1),
+ mbTailEmptyRows(false) {}
+};
+
+typedef std::unordered_set<OUString, OUStringHash> LabelSet;
+
+class InsertLabel : public std::unary_function<OUString, void>
+{
+ LabelSet& mrNames;
+public:
+ explicit InsertLabel(LabelSet& rNames) : mrNames(rNames) {}
+ void operator() (const OUString& r)
+ {
+ mrNames.insert(r);
+ }
+};
+
+std::vector<OUString> normalizeLabels( const std::vector<InitColumnData>& rColData )
+{
+ std::vector<OUString> aLabels(1u, ScGlobal::GetRscString(STR_PIVOT_DATA));
+
+ LabelSet aExistingNames;
+
+ for (const InitColumnData& rCol : rColData)
+ {
+ const OUString& rLabel = rCol.maLabel;
+ sal_Int32 nSuffix = 1;
+ OUString aNewLabel = rLabel;
+ while (true)
+ {
+ if (!aExistingNames.count(aNewLabel))
+ {
+ // this is a unique label.
+ aLabels.push_back(aNewLabel);
+ aExistingNames.insert(aNewLabel);
+ break;
+ }
+
+ // This name already exists.
+ OUStringBuffer aBuf(rLabel);
+ aBuf.append(++nSuffix);
+ aNewLabel = aBuf.makeStringAndClear();
+ }
+ }
+
+ return aLabels;
+}
+
+void initColumnFromDoc( InitDocData& rDocData, InitColumnData &rColData )
+{
+ ScDPCache::Field& rField = *rColData.mpField;
+ ScDocument* pDoc = rDocData.mpDoc;
+ SCTAB nDocTab = rDocData.mnDocTab;
+ SCCOL nCol = rColData.mnCol;
+ SCROW nStartRow = rDocData.mnStartRow;
+ SCROW nEndRow = rDocData.mnEndRow;
+ bool bTailEmptyRows = rDocData.mbTailEmptyRows;
+
+ std::unique_ptr<sc::ColumnIterator> pIter =
+ pDoc->GetColumnIterator(nDocTab, nCol, nStartRow, nEndRow);
+ assert(pIter);
+ assert(pIter->hasCell());
+
+ ScDPItemData aData;
+
+ rColData.maLabel = createLabelString(pDoc, nCol, pIter->getCell());
+ pIter->next();
+
+ std::vector<Bucket> aBuckets;
+ aBuckets.reserve(nEndRow-nStartRow); // skip the topmost label cell.
+
+ // Push back all original values.
+ for (SCROW i = 0, n = nEndRow-nStartRow; i < n; ++i, pIter->next())
+ {
+ assert(pIter->hasCell());
+
+ sal_uInt32 nNumFormat = 0;
+ ScAddress aPos(nCol, pIter->getRow(), nDocTab);
+ initFromCell(*rColData.mpStrPool, pDoc, aPos, pIter->getCell(), aData, nNumFormat);
+
+ aBuckets.emplace_back(aData, i);
+
+ if (!aData.IsEmpty())
+ {
+ rColData.maEmptyRows.insert_back(i, i+1, false);
+ if (nNumFormat)
+ // Only take non-default number format.
+ rField.mnNumFormat = nNumFormat;
+ }
+ }
+
+ processBuckets(aBuckets, rField);
+
+ if (bTailEmptyRows)
+ {
+ // If the last item is not empty, append one. Note that the items
+ // are sorted, and empty item should come last when sorted.
+ if (rField.maItems.empty() || !rField.maItems.back().IsEmpty())
+ {
+ aData.SetEmpty();
+ rField.maItems.push_back(aData);
+ }
+ }
+}
+
+#if ENABLE_THREADED_PIVOT_CACHE
+
+class ThreadQueue
+{
+ using FutureType = std::future<void>;
+ std::queue<FutureType> maQueue;
+ std::mutex maMutex;
+ std::condition_variable maCond;
+
+ size_t mnMaxQueue;
+
+public:
+ ThreadQueue( size_t nMaxQueue ) : mnMaxQueue(nMaxQueue) {}
+
+ void push( std::function<void()> aFunc )
+ {
+ std::unique_lock<std::mutex> lock(maMutex);
+
+ while (maQueue.size() >= mnMaxQueue)
+ maCond.wait(lock);
+
+ FutureType f = std::async(std::launch::async, aFunc);
+ maQueue.push(std::move(f));
+ lock.unlock();
+
+ maCond.notify_one();
+ }
+
+ void waitForOne()
+ {
+ std::unique_lock<std::mutex> lock(maMutex);
+
+ while (maQueue.empty())
+ maCond.wait(lock);
+
+ FutureType ret = std::move(maQueue.front());
+ maQueue.pop();
+ lock.unlock();
+
+ ret.get(); // This may throw if an exception was thrown on the async thread.
+
+ maCond.notify_one();
+ }
+};
+
+class ThreadScopedGuard
+{
+ std::thread maThread;
+public:
+ ThreadScopedGuard(std::thread thread) : maThread(std::move(thread)) {}
+ ThreadScopedGuard(ThreadScopedGuard&& other) : maThread(std::move(other.maThread)) {}
+
+ ThreadScopedGuard(const ThreadScopedGuard&) = delete;
+ ThreadScopedGuard& operator= (const ThreadScopedGuard&) = delete;
+
+ ~ThreadScopedGuard()
+ {
+ maThread.join();
+ }
+};
+
+#endif
+
}
void ScDPCache::InitFromDoc(ScDocument* pDoc, const ScRange& rRange)
{
Clear();
+ InitDocData aDocData;
+ aDocData.mpDoc = pDoc;
+
// Make sure the formula cells within the data range are interpreted
// during this call, for this method may be called from the interpretation
// of GETPIVOTDATA, which disables nested formula interpretation without
// increasing the macro level.
MacroInterpretIncrementer aMacroInc(pDoc);
- SCROW nStartRow = rRange.aStart.Row(); // start of data
- SCROW nEndRow = rRange.aEnd.Row();
+ aDocData.mnStartRow = rRange.aStart.Row(); // start of data
+ aDocData.mnEndRow = rRange.aEnd.Row();
// Sanity check
- if (!ValidRow(nStartRow) || !ValidRow(nEndRow) || nEndRow <= nStartRow)
+ if (!ValidRow(aDocData.mnStartRow) || !ValidRow(aDocData.mnEndRow) || aDocData.mnEndRow <= aDocData.mnStartRow)
return;
- sal_uInt16 nStartCol = rRange.aStart.Col();
- sal_uInt16 nEndCol = rRange.aEnd.Col();
- sal_uInt16 nDocTab = rRange.aStart.Tab();
+ SCCOL nStartCol = rRange.aStart.Col();
+ SCCOL nEndCol = rRange.aEnd.Col();
+ aDocData.mnDocTab = rRange.aStart.Tab();
mnColumnCount = nEndCol - nStartCol + 1;
// this row count must include the trailing empty rows.
- mnRowCount = nEndRow - nStartRow; // skip the topmost label row.
+ mnRowCount = aDocData.mnEndRow - aDocData.mnStartRow; // skip the topmost label row.
// Skip trailing empty rows if exists.
SCCOL nCol1 = nStartCol, nCol2 = nEndCol;
- SCROW nRow1 = nStartRow, nRow2 = nEndRow;
- pDoc->ShrinkToDataArea(nDocTab, nCol1, nRow1, nCol2, nRow2);
- bool bTailEmptyRows = nEndRow > nRow2; // Trailing empty rows exist.
- nEndRow = nRow2;
+ SCROW nRow1 = aDocData.mnStartRow, nRow2 = aDocData.mnEndRow;
+ pDoc->ShrinkToDataArea(aDocData.mnDocTab, nCol1, nRow1, nCol2, nRow2);
+ aDocData.mbTailEmptyRows = aDocData.mnEndRow > nRow2; // Trailing empty rows exist.
+ aDocData.mnEndRow = nRow2;
- if (nEndRow <= nStartRow)
+ if (aDocData.mnEndRow <= aDocData.mnStartRow)
{
// Check this again since the end row position has changed. It's
// possible that the new end row becomes lower than the start row
@@ -317,6 +545,8 @@ void ScDPCache::InitFromDoc(ScDocument* pDoc, const ScRange& rRange)
return;
}
+ maStringPools.resize(mnColumnCount);
+ std::vector<InitColumnData> aColData(mnColumnCount);
maFields.reserve(mnColumnCount);
for (size_t i = 0; i < static_cast<size_t>(mnColumnCount); ++i)
maFields.push_back(o3tl::make_unique<Field>());
@@ -326,52 +556,61 @@ void ScDPCache::InitFromDoc(ScDocument* pDoc, const ScRange& rRange)
// Ensure that none of the formula cells in the data range are dirty.
pDoc->EnsureFormulaCellResults(rRange);
- ScDPItemData aData;
- for (sal_uInt16 nCol = nStartCol; nCol <= nEndCol; ++nCol)
+#if ENABLE_THREADED_PIVOT_CACHE
+ ThreadQueue aQueue(std::thread::hardware_concurrency());
+
+ auto aFuncLaunchFieldThreads = [&]()
{
- std::unique_ptr<sc::ColumnIterator> pIter =
- pDoc->GetColumnIterator(nDocTab, nCol, nStartRow, nEndRow);
- assert(pIter);
- assert(pIter->hasCell());
+ for (sal_uInt16 nCol = nStartCol; nCol <= nEndCol; ++nCol)
+ {
+ size_t nDim = nCol - nStartCol;
+ InitColumnData& rColData = aColData[nDim];
+ rColData.init(nCol, &maStringPools[nDim], maFields[nDim].get());
- AddLabel(createLabelString(pDoc, nCol, pIter->getCell()));
- pIter->next();
+ auto func = [&aDocData,&rColData]()
+ {
+ initColumnFromDoc(aDocData, rColData);
+ };
- Field& rField = *maFields[nCol-nStartCol].get();
- std::vector<Bucket> aBuckets;
- aBuckets.reserve(nEndRow-nStartRow); // skip the topmost label cell.
+ aQueue.push(std::move(func));
+ }
+ };
- // Push back all original values.
- for (SCROW i = 0, n = nEndRow-nStartRow; i < n; ++i, pIter->next())
- {
- assert(pIter->hasCell());
+ {
+ // Launch a separate thread that in turn spawns async threads to populate the fields.
+ std::thread t(aFuncLaunchFieldThreads);
+ ThreadScopedGuard sg(std::move(t));
- sal_uInt32 nNumFormat = 0;
- ScAddress aPos(nCol, pIter->getRow(), nDocTab);
- initFromCell(*this, pDoc, aPos, pIter->getCell(), aData, nNumFormat);
+ // Wait for all the async threads to complete on the main thread.
+ for (SCCOL i = 0; i < mnColumnCount; ++i)
+ aQueue.waitForOne();
+ }
- aBuckets.push_back(Bucket(aData, i));
+#else
+ for (sal_uInt16 nCol = nStartCol; nCol <= nEndCol; ++nCol)
+ {
+ size_t nDim = nCol - nStartCol;
+ InitColumnData& rColData = aColData[nDim];
+ rColData.init(nCol, &maStringPools[nDim], maFields[nDim].get());
- if (!aData.IsEmpty())
- {
- maEmptyRows.insert_back(i, i+1, false);
- if (nNumFormat)
- // Only take non-default number format.
- rField.mnNumFormat = nNumFormat;
- }
- }
+ initColumnFromDoc(aDocData, rColData);
+ }
+#endif
+
+ maLabelNames = normalizeLabels(aColData);
- processBuckets(aBuckets, rField);
+ // Merge all non-empty rows data.
+ for (const InitColumnData& rCol : aColData)
+ {
+ EmptyRowsType::const_segment_iterator it = rCol.maEmptyRows.begin_segment();
+ EmptyRowsType::const_segment_iterator ite = rCol.maEmptyRows.end_segment();
+ EmptyRowsType::const_iterator pos = maEmptyRows.begin();
- if (bTailEmptyRows)
+ for (; it != ite; ++it)
{
- // If the last item is not empty, append one. Note that the items
- // are sorted, and empty item should come last when sorted.
- if (rField.maItems.empty() || !rField.maItems.back().IsEmpty())
- {
- aData.SetEmpty();
- rField.maItems.push_back(aData);
- }
+ if (!it->value)
+ // Non-empty segment found. Record it.
+ pos = maEmptyRows.insert(pos, it->start, it->end, false).first;
}
}
@@ -385,6 +624,7 @@ bool ScDPCache::InitFromDataBase(DBConnector& rDB)
try
{
mnColumnCount = rDB.getColumnCount();
+ maStringPools.resize(mnColumnCount);
maFields.clear();
maFields.reserve(mnColumnCount);
for (size_t i = 0; i < static_cast<size_t>(mnColumnCount); ++i)
@@ -690,23 +930,6 @@ OUString ScDPCache::GetDimensionName(std::vector<OUString>::size_type nDim) cons
return OUString();
}
-namespace {
-
-typedef std::unordered_set<OUString, OUStringHash> LabelSet;
-
-class InsertLabel : public std::unary_function<OUString, void>
-{
- LabelSet& mrNames;
-public:
- explicit InsertLabel(LabelSet& rNames) : mrNames(rNames) {}
- void operator() (const OUString& r)
- {
- mrNames.insert(r);
- }
-};
-
-}
-
void ScDPCache::PostInit()
{
OSL_ENSURE(!maFields.empty(), "Cache not initialized!");
@@ -734,7 +957,7 @@ void ScDPCache::Clear()
maLabelNames.clear();
maGroupFields.clear();
maEmptyRows.clear();
- maStringPool.clear();
+ maStringPools.clear();
}
void ScDPCache::AddLabel(const OUString& rLabel)
@@ -901,15 +1124,10 @@ SCCOL ScDPCache::GetDimensionIndex(const OUString& sName) const
return -1;
}
-rtl_uString* ScDPCache::InternString( const OUString& rStr )
+rtl_uString* ScDPCache::InternString( size_t nDim, const OUString& rStr )
{
- StringSetType::iterator it = maStringPool.find(rStr);
- if (it != maStringPool.end())
- // In the pool.
- return (*it).pData;
-
- std::pair<StringSetType::iterator, bool> r = maStringPool.insert(rStr);
- return r.second ? (*r.first).pData : nullptr;
+ assert(nDim < maStringPools.size());
+ return internString(maStringPools[nDim], rStr);
}
void ScDPCache::AddReference(ScDPObject* pObj) const
diff --git a/sc/source/core/data/dpobject.cxx b/sc/source/core/data/dpobject.cxx
index 19dc7e846d6d..810742838814 100644
--- a/sc/source/core/data/dpobject.cxx
+++ b/sc/source/core/data/dpobject.cxx
@@ -247,7 +247,9 @@ void DBConnector::getValue(long nCol, ScDPItemData &rData, short& rNumType) cons
case sdbc::DataType::VARBINARY:
case sdbc::DataType::LONGVARBINARY:
default:
- rData.SetStringInterned(mrCache.InternString(mxRow->getString(nCol+1)));
+ // nCol is 0-based, and the left-most column always has nCol == 0.
+ rData.SetStringInterned(
+ mrCache.InternString(nCol, mxRow->getString(nCol+1)));
}
}
catch (uno::Exception&)
More information about the Libreoffice-commits
mailing list