[Libreoffice-commits] .: sc/inc sc/source

Kohei Yoshida kohei at kemper.freedesktop.org
Fri Mar 16 20:25:08 PDT 2012


 sc/inc/dpcache.hxx                 |    5 +++
 sc/inc/dpitemdata.hxx              |   10 +++++--
 sc/source/core/data/dpcache.cxx    |   28 ++++++++++++++++------
 sc/source/core/data/dpitemdata.cxx |   47 ++++++++++++++++++++++++++++++-------
 4 files changed, 72 insertions(+), 18 deletions(-)

New commits:
commit f81d15c3bab32938b5b475e16ae2a746a7a32ea9
Author: Kohei Yoshida <kohei.yoshida at gmail.com>
Date:   Fri Mar 16 21:44:55 2012 -0400

    Use shared string pool to share string instances among string item values.
    
    This brings down the reload time from 22 seconds to 4.3 seconds with
    my test document.  This is what I've been looking for!

diff --git a/sc/inc/dpcache.hxx b/sc/inc/dpcache.hxx
index 839f64b..2591b3d 100644
--- a/sc/inc/dpcache.hxx
+++ b/sc/inc/dpcache.hxx
@@ -36,6 +36,7 @@
 #include <boost/noncopyable.hpp>
 #include <boost/scoped_ptr.hpp>
 #include <boost/ptr_container/ptr_vector.hpp>
+#include <boost/unordered_set.hpp>
 #include <mdds/flat_segment_tree.hpp>
 
 #include <vector>
@@ -58,6 +59,8 @@ struct ScDPNumGroupInfo;
  */
 class SC_DLLPUBLIC ScDPCache : boost::noncopyable
 {
+    typedef boost::unordered_set<rtl::OUString, rtl::OUStringHash> StringSetType;
+
 public:
     typedef std::vector<ScDPItemData> ItemsType;
     typedef std::set<ScDPObject*> ObjectSetType;
@@ -112,6 +115,7 @@ private:
 
     FieldsType maFields;
     GroupFieldsType maGroupFields;
+    mutable StringSetType maStringPool;
 
     LabelsType maLabelNames;    // Stores dimension names.
     mdds::flat_segment_tree<SCROW, bool> maEmptyRows;
@@ -119,6 +123,7 @@ private:
     bool mbDisposing;
 
 public:
+    const rtl::OUString* InternString(const rtl::OUString& rStr) const;
     void AddReference(ScDPObject* pObj) const;
     void RemoveReference(ScDPObject* pObj) const;
     const ObjectSetType& GetAllReferences() const;
diff --git a/sc/inc/dpitemdata.hxx b/sc/inc/dpitemdata.hxx
index e30eae3..20535a8 100644
--- a/sc/inc/dpitemdata.hxx
+++ b/sc/inc/dpitemdata.hxx
@@ -49,7 +49,7 @@ class SC_DLLPUBLIC ScDPItemData
     friend class ScDPCache;
 
 public:
-    enum Type { GroupValue = 0, RangeStart, Value, String, Error, Empty };
+    enum Type { GroupValue = 0, RangeStart = 1, Value = 2, String = 3, Error = 4, Empty = 5 };
 
     static const sal_Int32 DateFirst;
     static const sal_Int32 DateLast;
@@ -63,12 +63,13 @@ public:
 private:
 
     union {
-        rtl::OUString* mpString;
+        const rtl::OUString* mpString;
         GroupValueAttr maGroupValue;
         double mfValue;
     };
 
-    Type meType;
+    sal_uInt8 meType:3;
+    bool mbStringInterned:1;
 
     void DisposeString();
 
@@ -79,16 +80,19 @@ public:
     ScDPItemData();
     ScDPItemData(const ScDPItemData& r);
     ScDPItemData(const rtl::OUString& rStr);
+    ScDPItemData(const rtl::OUString* pStr);
     ScDPItemData(sal_Int32 nGroupType, sal_Int32 nValue);
     ~ScDPItemData();
 
     Type GetType() const;
     void SetString(const rtl::OUString& rS);
+    void SetString(const rtl::OUString* pS);
     void SetValue(double fVal);
     void SetRangeStart(double fVal);
     void SetRangeFirst();
     void SetRangeLast();
     void SetErrorString(const rtl::OUString& rS);
+    void SetErrorString(const rtl::OUString* pS);
     bool IsCaseInsEqual(const ScDPItemData& r) const;
 
     // exact equality
diff --git a/sc/source/core/data/dpcache.cxx b/sc/source/core/data/dpcache.cxx
index 216c2ff..369c5b3 100644
--- a/sc/source/core/data/dpcache.cxx
+++ b/sc/source/core/data/dpcache.cxx
@@ -138,7 +138,9 @@ rtl::OUString createLabelString(ScDocument* pDoc, SCCOL nCol, SCROW nRow, SCTAB
     return aDocStr;
 }
 
-void initFromCell(ScDocument* pDoc, SCCOL nCol, SCROW nRow, SCTAB nTab, ScDPItemData& rData, sal_uLong& rNumFormat)
+void initFromCell(
+    ScDPCache& rCache, ScDocument* pDoc, SCCOL nCol, SCROW nRow, SCTAB nTab,
+    ScDPItemData& rData, sal_uLong& rNumFormat)
 {
     rtl::OUString aDocStr = pDoc->GetString(nCol, nRow, nTab);
     rNumFormat = 0;
@@ -147,7 +149,7 @@ void initFromCell(ScDocument* pDoc, SCCOL nCol, SCROW nRow, SCTAB nTab, ScDPItem
 
     if (pDoc->GetErrCode(aPos))
     {
-        rData.SetErrorString(aDocStr);
+        rData.SetErrorString(rCache.InternString(aDocStr));
     }
     else if (pDoc->HasValueData(nCol, nRow, nTab))
     {
@@ -157,12 +159,12 @@ void initFromCell(ScDocument* pDoc, SCCOL nCol, SCROW nRow, SCTAB nTab, ScDPItem
     }
     else if (pDoc->HasData(nCol, nRow, nTab))
     {
-        rData.SetString(aDocStr);
+        rData.SetString(rCache.InternString(aDocStr));
     }
 }
 
 void getItemValue(
-    ScDPItemData& rData, const Reference<sdbc::XRow>& xRow, sal_Int32 nType,
+    ScDPCache& rCache, ScDPItemData& rData, const Reference<sdbc::XRow>& xRow, sal_Int32 nType,
     long nCol, const Date& rNullDate, short& rNumType)
 {
     rNumType = NUMBERFORMAT_NUMBER;
@@ -232,7 +234,7 @@ void getItemValue(
             case sdbc::DataType::VARBINARY:
             case sdbc::DataType::LONGVARBINARY:
             default:
-                rData.SetString(xRow->getString(nCol));
+                rData.SetString(rCache.InternString(xRow->getString(nCol)));
         }
     }
     catch (uno::Exception&)
@@ -380,7 +382,7 @@ bool ScDPCache::InitFromDoc(ScDocument* pDoc, const ScRange& rRange)
         {
             SCROW nRow = i + nOffset;
             sal_uLong nNumFormat = 0;
-            initFromCell(pDoc, nCol, nRow, nDocTab, aData, nNumFormat);
+            initFromCell(*this, pDoc, nCol, nRow, nDocTab, aData, nNumFormat);
             aBuckets.push_back(Bucket(aData, 0, i));
 
             if (!aData.IsEmpty())
@@ -444,7 +446,7 @@ bool ScDPCache::InitFromDataBase (const Reference<sdbc::XRowSet>& xRowSet, const
             {
                 SCROW nRow = 0;
                 short nFormatType = NUMBERFORMAT_UNDEFINED;
-                getItemValue(aData, xRow, aColTypes[nCol], nCol+1, rNullDate, nFormatType);
+                getItemValue(*this, aData, xRow, aColTypes[nCol], nCol+1, rNullDate, nFormatType);
                 aBuckets.push_back(Bucket(aData, 0, nRow++));
                 if (!aData.IsEmpty())
                 {
@@ -730,6 +732,7 @@ void ScDPCache::Clear()
     maLabelNames.clear();
     maGroupFields.clear();
     maEmptyRows.clear();
+    maStringPool.clear();
 }
 
 void ScDPCache::AddLabel(const rtl::OUString& rLabel)
@@ -864,6 +867,17 @@ SCCOL ScDPCache::GetDimensionIndex(const rtl::OUString& sName) const
     return -1;
 }
 
+const rtl::OUString* ScDPCache::InternString(const rtl::OUString& rStr) const
+{
+    StringSetType::iterator it = maStringPool.find(rStr);
+    if (it != maStringPool.end())
+        // In the pool.
+        return &(*it);
+
+    std::pair<StringSetType::iterator, bool> r = maStringPool.insert(rStr);
+    return r.second ? &(*r.first) : NULL;
+}
+
 void ScDPCache::AddReference(ScDPObject* pObj) const
 {
     maRefObjects.insert(pObj);
diff --git a/sc/source/core/data/dpitemdata.cxx b/sc/source/core/data/dpitemdata.cxx
index 7c5bae5..fe2c540 100644
--- a/sc/source/core/data/dpitemdata.cxx
+++ b/sc/source/core/data/dpitemdata.cxx
@@ -70,6 +70,10 @@ sal_Int32 ScDPItemData::Compare(const ScDPItemData& rA, const ScDPItemData& rB)
         }
         case String:
         case Error:
+            if (rA.mpString == rB.mpString)
+                // strings may be interned.
+                return 0;
+
             return ScGlobal::GetCollator()->compareString(rA.GetString(), rB.GetString());
         default:
             ;
@@ -78,16 +82,16 @@ sal_Int32 ScDPItemData::Compare(const ScDPItemData& rA, const ScDPItemData& rB)
 }
 
 ScDPItemData::ScDPItemData() :
-    mfValue(0.0), meType(Empty) {}
+    mfValue(0.0), meType(Empty), mbStringInterned(false) {}
 
 ScDPItemData::ScDPItemData(const ScDPItemData& r) :
-    meType(r.meType)
+    meType(r.meType), mbStringInterned(r.mbStringInterned)
 {
     switch (r.meType)
     {
         case String:
         case Error:
-            mpString = new rtl::OUString(*r.mpString);
+            mpString = mbStringInterned ? r.mpString :  new rtl::OUString(*r.mpString);
         break;
         case Value:
         case RangeStart:
@@ -105,12 +109,20 @@ ScDPItemData::ScDPItemData(const ScDPItemData& r) :
 
 void ScDPItemData::DisposeString()
 {
-    if (meType == String || meType == Error)
-        delete mpString;
+    if (!mbStringInterned)
+    {
+        if (meType == String || meType == Error)
+            delete mpString;
+    }
+
+    mbStringInterned = false;
 }
 
 ScDPItemData::ScDPItemData(const rtl::OUString& rStr) :
-    mpString(new rtl::OUString(rStr)), meType(String) {}
+    mpString(new rtl::OUString(rStr)), meType(String), mbStringInterned(false) {}
+
+ScDPItemData::ScDPItemData(const rtl::OUString* pStr) :
+    mpString(pStr), meType(String), mbStringInterned(true) {}
 
 ScDPItemData::ScDPItemData(sal_Int32 nGroupType, sal_Int32 nValue) :
     meType(GroupValue)
@@ -126,7 +138,7 @@ ScDPItemData::~ScDPItemData()
 
 ScDPItemData::Type ScDPItemData::GetType() const
 {
-    return meType;
+    return static_cast<Type>(meType);
 }
 
 void ScDPItemData::SetString(const rtl::OUString& rS)
@@ -136,6 +148,14 @@ void ScDPItemData::SetString(const rtl::OUString& rS)
     meType = String;
 }
 
+void ScDPItemData::SetString(const rtl::OUString* pS)
+{
+    DisposeString();
+    mpString = pS;
+    meType = String;
+    mbStringInterned = true;
+}
+
 void ScDPItemData::SetValue(double fVal)
 {
     DisposeString();
@@ -170,6 +190,12 @@ void ScDPItemData::SetErrorString(const rtl::OUString& rS)
     meType = Error;
 }
 
+void ScDPItemData::SetErrorString(const rtl::OUString* pS)
+{
+    SetString(pS);
+    meType = Error;
+}
+
 bool ScDPItemData::IsCaseInsEqual(const ScDPItemData& r) const
 {
     if (meType != r.meType)
@@ -187,6 +213,9 @@ bool ScDPItemData::IsCaseInsEqual(const ScDPItemData& r) const
             ;
     }
 
+    if (mbStringInterned && r.mbStringInterned)
+        return mpString == mpString;
+
     return ScGlobal::GetpTransliteration()->isEqual(GetString(), r.GetString());
 }
 
@@ -225,11 +254,13 @@ ScDPItemData& ScDPItemData::operator= (const ScDPItemData& r)
 {
     DisposeString();
     meType = r.meType;
+    mbStringInterned = false;
     switch (r.meType)
     {
         case String:
         case Error:
-            mpString = new rtl::OUString(*r.mpString);
+            mpString = r.mbStringInterned ? r.mpString : new rtl::OUString(*r.mpString);
+            mbStringInterned = r.mbStringInterned;
         break;
         case Value:
         case RangeStart:


More information about the Libreoffice-commits mailing list