[Libreoffice-commits] core.git: sc/qa svl/qa svl/source

Noel Grandin (via logerrit) logerrit at kemper.freedesktop.org
Fri Jun 12 06:51:03 UTC 2020


 sc/qa/unit/ucalc.cxx                 |    8 +--
 svl/qa/unit/svl.cxx                  |   25 +++++++++--
 svl/source/misc/sharedstringpool.cxx |   74 +++++++++++++++++------------------
 3 files changed, 62 insertions(+), 45 deletions(-)

New commits:
commit 5af636c5021ecf7fba8f5f34cc6af929f1e04b4c
Author:     Noel Grandin <noel.grandin at collabora.co.uk>
AuthorDate: Wed Jun 10 15:27:48 2020 +0200
Commit:     Noel Grandin <noel.grandin at collabora.co.uk>
CommitDate: Fri Jun 12 08:50:29 2020 +0200

    fix ASAN in SharedStringPool
    
    regression from
        commit 3581f1d71ae0d431ba28c0f3b7b263ff6212ce7b
        optimize SharedStringPool::purge() and fix tests
    
    which results in us potentially de-referencing an already de-allocated
    OUString object in the first loop in purge().
    
    So switch to a different strategy, which only needs one data structure,
    instead of two.
    
    Change-Id: Iaac6beda48459643afdb7b14ce7d39d68a93339c
    Reviewed-on: https://gerrit.libreoffice.org/c/core/+/95226
    Tested-by: Jenkins
    Reviewed-by: Noel Grandin <noel.grandin at collabora.co.uk>

diff --git a/sc/qa/unit/ucalc.cxx b/sc/qa/unit/ucalc.cxx
index 49cd337570a1..eb9636caaea1 100644
--- a/sc/qa/unit/ucalc.cxx
+++ b/sc/qa/unit/ucalc.cxx
@@ -207,25 +207,25 @@ void Test::testSharedStringPool()
     // Check the string counts after purging. Purging shouldn't remove any strings in this case.
     svl::SharedStringPool& rPool = m_pDoc->GetSharedStringPool();
     rPool.purge();
-    CPPUNIT_ASSERT_EQUAL(static_cast<size_t>(4), rPool.getCount());
+    CPPUNIT_ASSERT_EQUAL(static_cast<size_t>(5), rPool.getCount());
     CPPUNIT_ASSERT_EQUAL(static_cast<size_t>(2), rPool.getCountIgnoreCase());
 
     // Clear A1 and purge again.
     clearRange(m_pDoc, ScAddress(0,0,0));
     rPool.purge();
-    CPPUNIT_ASSERT_EQUAL(static_cast<size_t>(4), rPool.getCount());
+    CPPUNIT_ASSERT_EQUAL(static_cast<size_t>(5), rPool.getCount());
     CPPUNIT_ASSERT_EQUAL(static_cast<size_t>(2), rPool.getCountIgnoreCase());
 
     // Clear A2 and purge again.
     clearRange(m_pDoc, ScAddress(0,1,0));
     rPool.purge();
-    CPPUNIT_ASSERT_EQUAL(static_cast<size_t>(3), rPool.getCount());
+    CPPUNIT_ASSERT_EQUAL(static_cast<size_t>(4), rPool.getCount());
     CPPUNIT_ASSERT_EQUAL(static_cast<size_t>(2), rPool.getCountIgnoreCase());
 
     // Clear A3 and purge again.
     clearRange(m_pDoc, ScAddress(0,2,0));
     rPool.purge();
-    CPPUNIT_ASSERT_EQUAL(static_cast<size_t>(2), rPool.getCount());
+    CPPUNIT_ASSERT_EQUAL(static_cast<size_t>(3), rPool.getCount());
     CPPUNIT_ASSERT_EQUAL(static_cast<size_t>(2), rPool.getCountIgnoreCase());
 
     // Clear A4 and purge again.
diff --git a/svl/qa/unit/svl.cxx b/svl/qa/unit/svl.cxx
index 194dc550c278..c32017f7aea6 100644
--- a/svl/qa/unit/svl.cxx
+++ b/svl/qa/unit/svl.cxx
@@ -60,6 +60,7 @@ public:
     void testSharedString();
     void testSharedStringPool();
     void testSharedStringPoolPurge();
+    void testSharedStringPoolPurgeBug1();
     void testFdo60915();
     void testI116701();
     void testTdf103060();
@@ -77,6 +78,7 @@ public:
     CPPUNIT_TEST(testSharedString);
     CPPUNIT_TEST(testSharedStringPool);
     CPPUNIT_TEST(testSharedStringPoolPurge);
+    CPPUNIT_TEST(testSharedStringPoolPurgeBug1);
     CPPUNIT_TEST(testFdo60915);
     CPPUNIT_TEST(testI116701);
     CPPUNIT_TEST(testTdf103060);
@@ -376,30 +378,30 @@ void Test::testSharedStringPoolPurge()
     std::optional<svl::SharedString> pStr3 = aPool.intern("ANDY");
     std::optional<svl::SharedString> pStr4 = aPool.intern("Bruce");
 
-    CPPUNIT_ASSERT_EQUAL(static_cast<size_t>(4), aPool.getCount());
+    CPPUNIT_ASSERT_EQUAL(static_cast<size_t>(5), aPool.getCount());
     CPPUNIT_ASSERT_EQUAL(static_cast<size_t>(2), aPool.getCountIgnoreCase());
 
     // This shouldn't purge anything.
     aPool.purge();
-    CPPUNIT_ASSERT_EQUAL(static_cast<size_t>(4), aPool.getCount());
+    CPPUNIT_ASSERT_EQUAL(static_cast<size_t>(5), aPool.getCount());
     CPPUNIT_ASSERT_EQUAL(static_cast<size_t>(2), aPool.getCountIgnoreCase());
 
     // Delete one heap string object, and purge. That should purge one string.
     pStr1.reset();
     aPool.purge();
-    CPPUNIT_ASSERT_EQUAL(static_cast<size_t>(3), aPool.getCount());
+    CPPUNIT_ASSERT_EQUAL(static_cast<size_t>(4), aPool.getCount());
     CPPUNIT_ASSERT_EQUAL(static_cast<size_t>(2), aPool.getCountIgnoreCase());
 
     // Nothing changes, because the upper-string is still in the map
     pStr3.reset();
     aPool.purge();
-    CPPUNIT_ASSERT_EQUAL(static_cast<size_t>(3), aPool.getCount());
+    CPPUNIT_ASSERT_EQUAL(static_cast<size_t>(4), aPool.getCount());
     CPPUNIT_ASSERT_EQUAL(static_cast<size_t>(2), aPool.getCountIgnoreCase());
 
     // Again.
     pStr2.reset();
     aPool.purge();
-    CPPUNIT_ASSERT_EQUAL(static_cast<size_t>(1), aPool.getCount());
+    CPPUNIT_ASSERT_EQUAL(static_cast<size_t>(2), aPool.getCount());
     CPPUNIT_ASSERT_EQUAL(static_cast<size_t>(1), aPool.getCountIgnoreCase());
 
     // Delete 'Bruce' and purge.
@@ -409,6 +411,19 @@ void Test::testSharedStringPoolPurge()
     CPPUNIT_ASSERT_EQUAL(static_cast<size_t>(0), aPool.getCountIgnoreCase());
 }
 
+void Test::testSharedStringPoolPurgeBug1()
+{
+    // We had a bug where, if we had two strings that mapped to the same uppercase string,
+    // purge() would de-reference a dangling pointer and consequently cause an ASAN failure.
+    SvtSysLocale aSysLocale;
+    svl::SharedStringPool aPool(*aSysLocale.GetCharClassPtr());
+    aPool.intern("Andy");
+    aPool.intern("andy");
+    aPool.purge();
+    CPPUNIT_ASSERT_EQUAL(static_cast<size_t>(0), aPool.getCount());
+    CPPUNIT_ASSERT_EQUAL(static_cast<size_t>(0), aPool.getCountIgnoreCase());
+}
+
 void Test::checkPreviewString(SvNumberFormatter& aFormatter,
                               const OUString& sCode,
                               double fPreviewNumber,
diff --git a/svl/source/misc/sharedstringpool.cxx b/svl/source/misc/sharedstringpool.cxx
index d2d890004fbd..e4bc873e5f69 100644
--- a/svl/source/misc/sharedstringpool.cxx
+++ b/svl/source/misc/sharedstringpool.cxx
@@ -29,10 +29,10 @@ sal_Int32 getRefCount( const rtl_uString* p )
 struct SharedStringPool::Impl
 {
     mutable osl::Mutex maMutex;
-    // set of upper-case, so we can share these as the value in the maStrMap
-    std::unordered_set<OUString> maStrPoolUpper;
-    // map with rtl_uString* as value so we can avoid some ref-counting
-    std::unordered_map<OUString,rtl_uString*> maStrMap;
+    // We use this map for two purposes - to store lower->upper case mappings
+    // and to retrieve a shared uppercase object, so the management logic
+    // is quite complex.
+    std::unordered_map<OUString,OUString> maStrMap;
     const CharClass& mrCharClass;
 
     explicit Impl( const CharClass& rCharClass ) : mrCharClass(rCharClass) {}
@@ -49,31 +49,34 @@ SharedString SharedStringPool::intern( const OUString& rStr )
 {
     osl::MutexGuard aGuard(&mpImpl->maMutex);
 
-    auto [mapIt,bInserted] = mpImpl->maStrMap.emplace(rStr, rStr.pData);
-    if (bInserted)
+    auto [mapIt,bInserted] = mpImpl->maStrMap.emplace(rStr, rStr);
+    if (!bInserted)
+        // there is already a mapping
+        return SharedString(mapIt->first.pData, mapIt->second.pData);
+
+    // This is a new string insertion. Establish mapping to upper-case variant.
+    OUString aUpper = mpImpl->mrCharClass.uppercase(rStr);
+    if (aUpper == rStr)
+        // no need to do anything more, because we inserted an upper->upper mapping
+        return SharedString(mapIt->first.pData, mapIt->second.pData);
+
+    // We need to insert a lower->upper mapping, so also insert
+    // an upper->upper mapping, which we can use both for when an upper string
+    // is interned, and to look up a shared upper string.
+    auto mapIt2 = mpImpl->maStrMap.find(aUpper);
+    if (mapIt2 != mpImpl->maStrMap.end())
     {
-        // This is a new string insertion. Establish mapping to upper-case variant.
-        OUString aUpper = mpImpl->mrCharClass.uppercase(rStr);
-        if (aUpper == rStr)
-        {
-            auto insertResult = mpImpl->maStrPoolUpper.insert(rStr);
-            // need to use the same underlying rtl_uString object so the
-            // upper->upper detection in purge() works
-            auto pData = insertResult.first->pData;
-            // This is dodgy, but necessary. I don't want to do a delete/insert because
-            // this class is very performance sensitive. This does not violate the internals
-            // the map because the new key points to something with the same hash and equality
-            // as the old key.
-            const_cast<OUString&>(mapIt->first) = *insertResult.first;
-            mapIt->second = pData;
-        }
-        else
-        {
-            auto insertResult = mpImpl->maStrPoolUpper.insert(aUpper);
-            mapIt->second = insertResult.first->pData;
-        }
+        // there is an already existing upper string
+        mapIt->second = mapIt2->first;
+        return SharedString(mapIt->first.pData, mapIt->second.pData);
     }
-    return SharedString(mapIt->first.pData, mapIt->second);
+
+    // There is no already existing upper string.
+    // First, update using the iterator, can't do this later because
+    // the iterator will be invalid.
+    mapIt->second = aUpper;
+    mpImpl->maStrMap.emplace_hint(mapIt2, aUpper, aUpper);
+    return SharedString(rStr.pData, aUpper.pData);
 }
 
 void SharedStringPool::purge()
@@ -91,7 +94,7 @@ void SharedStringPool::purge()
     while (it != itEnd)
     {
         rtl_uString* p1 = it->first.pData;
-        rtl_uString* p2 = it->second;
+        rtl_uString* p2 = it->second.pData;
         if (p1 != p2)
         {
             // normal case - lowercase mapped to uppercase, which
@@ -100,10 +103,6 @@ void SharedStringPool::purge()
             if (getRefCount(p1) == 1)
             {
                 it = mpImpl->maStrMap.erase(it);
-                // except that the uppercase entry may be mapped to
-                // by other lower-case entries
-                if (getRefCount(p2) == 1)
-                    mpImpl->maStrPoolUpper.erase(OUString::unacquired(&p2));
                 continue;
             }
         }
@@ -115,16 +114,15 @@ void SharedStringPool::purge()
     while (it != itEnd)
     {
         rtl_uString* p1 = it->first.pData;
-        rtl_uString* p2 = it->second;
+        rtl_uString* p2 = it->second.pData;
         if (p1 == p2)
         {
             // uppercase which is mapped to itself, which means
             // one ref-counted entry as the key in the map, and
-            // one ref-counted entry in the set
+            // one ref-counted entry in the value in the map
             if (getRefCount(p1) == 2)
             {
                 it = mpImpl->maStrMap.erase(it);
-                mpImpl->maStrPoolUpper.erase(OUString::unacquired(&p1));
                 continue;
             }
         }
@@ -141,7 +139,11 @@ size_t SharedStringPool::getCount() const
 size_t SharedStringPool::getCountIgnoreCase() const
 {
     osl::MutexGuard aGuard(&mpImpl->maMutex);
-    return mpImpl->maStrPoolUpper.size();
+    // this is only called from unit tests, so no need to be efficient
+    std::unordered_set<OUString> aUpperSet;
+    for (auto const & pair : mpImpl->maStrMap)
+        aUpperSet.insert(pair.second);
+    return aUpperSet.size();
 }
 
 }


More information about the Libreoffice-commits mailing list