[Libreoffice] Interesting difference in thread-specific data function semantics for Windows vs. Unix

Tor Lillqvist tlillqvist at novell.com
Sun Jun 19 13:10:02 PDT 2011


I was investigating why the osl_Thread unit test (sal/qa/osl/process/osl_Thread.cxx) fails most of the times on Windows. If I understand correctly, the problem is a quite fundamental difference in the semantics of thread-specific data keys created with a destructor callback passed to osl_createrThreadKey() (or its wrapper, the constructor of the ThreadData class):

On Windows, if the value of a key is changed with osl_setThreadKeyData() (or its wrapper, ThreadData::setData()), the destructor callbac is called for the old value. (See the code in sal/osl/w32/thread.c. On Unix this does not seem to happen. There osl_setThreadKeyData() is just a thin wrapper for pthread_setspecific(), and the documentation for that does not say anything about the destructor being called. (Experimentation confirms this, at least on Linux.)

Now, this means that on Linux, if you set a thread-specific key's value to some pointer to dynamically allocated memory, and then set it to another value, the first value will indeed leak unless you otherwise free it yourself. This is why Caolán added some explicit delete calls in April. But at least some of those delete calls now then cause double deletes of the same data on Windows, causing to slight (non-fatal in this case) heap corruption, and in most runs of the test then (apparently a bit timing dependent) a failed assertion.

So, the interesting question now then is whether this difference in  osl_setThreadKeyData() semantics is intentional and known, and handled specifically in those places in the code where thread-specific data is used? Or whether the code assumes the semantics on Windows and just then leaks on Unix? Or what...

Here is a patch that adds some debugging output to osl_Thread.cxx:

diff --git a/sal/qa/osl/process/osl_Thread.cxx b/sal/qa/osl/process/osl_Thread.cxx
index aa6ad67..982a82e
--- a/sal/qa/osl/process/osl_Thread.cxx
+++ b/sal/qa/osl/process/osl_Thread.cxx
@@ -1896,7 +1896,8 @@ namespace osl_Thread
 // destroy function when the binding thread terminate
 void SAL_CALL destroyCallback(void * data)
 {
-    delete[] (char *) data;
+    fprintf (stderr, "del: %p (data in destroyCallback)\n", data); fflush (stderr);
+    //delete[] (char *) data;
 }
 
 static ThreadData myThreadData(destroyCallback);
@@ -1919,6 +1920,7 @@ private:
     void SAL_CALL run()
         {
             char * pc = new char[2];
+            fprintf (stderr, "new: %p (pc in myKeyThread::run)\n", pc); fflush (stderr);
 //      strcpy(pc, &m_nData);
             memcpy(pc, &m_nData, 1);
             pc[1] = '\0';
@@ -2032,6 +2034,7 @@ namespace osl_ThreadData
             {
                 // at first, set the data a value
                 char* pc = new char[2];
+                fprintf (stderr, "new: %p (pc in setData_002)\n", pc); fflush (stderr);
                 char m_nData = 'm';
 // LLA: this is a copy functions only and really only for \0 terminated strings
 //      m_nData is not a string, it's a character
@@ -2068,6 +2071,7 @@ namespace osl_ThreadData
             {
                 // at first, set the data a value
                 char* pc = new char[2];
+                fprintf (stderr, "new: %p (pc in setData_003)\n", pc); fflush (stderr);
                 char m_nData = 'm';
                 memcpy(pc, &m_nData, 1);
                 pc[1] = '\0';
@@ -2080,6 +2084,7 @@ namespace osl_ThreadData
                 // aThread1 and aThread2 should have not terminated yet
                 // setData the second time
                 char* pc2 = new char[2];
+                fprintf (stderr, "new: %p (pc2 in setData_003)\n", pc2); fflush (stderr);
                 m_nData = 'o';
                 memcpy(pc2, &m_nData, 1);
                 pc2[1] = '\0';
@@ -2100,8 +2105,10 @@ namespace osl_ThreadData
                     cData1 == 'a' && cData2 == 'b' && aChar == 'o'
                     );
 
-                delete [] pc2;
-                delete [] pc;
+                fprintf (stderr, "del: %p (pc2 in setData_003\n", pc2); fflush (stderr);
+                // delete [] pc2;
+                fprintf (stderr, "del: %p (pc in setData_003)\n", pc); fflush (stderr);
+                // delete [] pc;
             }
 
         CPPUNIT_TEST_SUITE(setData);
@@ -2127,6 +2134,7 @@ namespace osl_ThreadData
         void getData_001()
             {
                 char* pc = new char[2];
+                fprintf (stderr, "new: %p (pc in getData_001)\n", pc); fflush (stderr);
                 char m_nData[] = "i";
                 strcpy(pc, m_nData);
                 myThreadData.setData(pc);
@@ -2150,7 +2158,8 @@ namespace osl_ThreadData
                     cData1 == 'c' && cData2 == 'd' && aChar == 'i'
                     );
 
-                delete [] pc;
+                fprintf (stderr, "del: %p (pc in getData_001)\n", pc); fflush (stderr);
+                // delete [] pc;
             }
 
         // setData then change the value in the address data pointer points,
@@ -2158,6 +2167,7 @@ namespace osl_ThreadData
         void getData_002()
             {
                 char* pc = new char[2];
+                fprintf (stderr, "new: %p (pc in getData_002)\n", pc); fflush (stderr);
                 char m_nData = 'i';
                 memcpy(pc, &m_nData, 1);
                 pc[1] = '\0';

As you see I coment out the actual delete[] calls, so that the heap blocks are not reused and the history of each relevant heap block can be seen more uniquely in the output. When run on Windows, I get:

new: 004AED08 (pc in setData_002)
new: 004AED18 (pc in myKeyThread::run)
new: 004AED28 (pc in myKeyThread::run)
del: 004AED18 (data in destroyCallback)
del: 004AED28 (data in destroyCallback)
new: 004AED38 (pc in setData_003)
del: 004AED08 (data in destroyCallback)
new: 004AED48 (pc2 in setData_003)
del: 004AED38 (data in destroyCallback)  <<<===
new: 004AED58 (pc in myKeyThread::run)
new: 004AED68 (pc in myKeyThread::run)
del: 004AED68 (data in destroyCallback)
del: 004AED58 (data in destroyCallback)
del: 004AED48 (pc2 in setData_003        <<<===
del: 004AED38 (pc in setData_003)        <<<===
new: 004AED78 (pc in getData_001)
del: 004AED48 (data in destroyCallback)  <<<===
new: 004AED88 (pc in myKeyThread::run)
new: 004AED98 (pc in myKeyThread::run)
del: 004AED88 (data in destroyCallback)
del: 004AED98 (data in destroyCallback)
del: 004AED78 (pc in getData_001)        <<<===
new: 004AEDA8 (pc in getData_002)
del: 004AED78 (data in destroyCallback)  <<====
new: 004AEDB8 (pc in myKeyThread::run)
new: 004A3C38 (pc in myKeyThread::run)
del: 004A3C38 (data in destroyCallback)
del: 004AEDB8 (data in destroyCallback)

Note the indicated places where the same pointer gets deleted twice.

When run on Linux, I see no such problems.

--tml



More information about the LibreOffice mailing list