[PATCH] Hold on to connection lock when invoking timeout and watch functions.

Havoc Pennington hp at pobox.com
Tue Jan 1 09:50:42 PST 2008


2008-01-01  Havoc Pennington  <hp at redhat.com>

	Patch based on one from Olivier Hochreutiner <olivier.hochreutiner
	gmail.com>

	* dbus/dbus-connection.c (protected_change_timeout): remove the
	elaborate nonworking hack to try to drop locks and just keep the
	locks; this isn't right either, but at least is correct, though
	it puts restrictions on apps.

	* dbus/dbus-connection.c (protected_change_watch): make the same
	change as for timeouts

	* dbus/dbus-connection.c (dbus_connection_set_timeout_functions):
	don't drop the lock here; add documentation of the problem to API
	docs
	(dbus_connection_set_watch_functions): same

	* dbus/dbus-connection.c (dbus_connection_get_data)
	(dbus_connection_set_data): introduce a separate slot_mutex
	protecting connection->slot_list so these two functions can be
	called inside watch and timeout functions. Not sure this
	is going to be a good idea.

	* dbus/dbus-connection.c (dbus_connection_unref)
	(dbus_connection_ref): avoid using connection lock in ref/unref
	so these can also be used in watch and timeout functions
---
 ChangeLog              |   28 +++++++
 dbus/dbus-connection.c |  196 ++++++++++++++++++++++++++----------------------
 2 files changed, 134 insertions(+), 90 deletions(-)

diff --git a/ChangeLog b/ChangeLog
index 35e35a7..ec761f3 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,31 @@
+2008-01-01  Havoc Pennington  <hp at redhat.com>
+
+	Patch based on one from Olivier Hochreutiner <olivier.hochreutiner
+	gmail.com>
+	
+	* dbus/dbus-connection.c (protected_change_timeout): remove the
+	elaborate nonworking hack to try to drop locks and just keep the
+	locks; this isn't right either, but at least is correct, though
+	it puts restrictions on apps.
+
+	* dbus/dbus-connection.c (protected_change_watch): make the same
+	change as for timeouts	
+
+	* dbus/dbus-connection.c (dbus_connection_set_timeout_functions):
+	don't drop the lock here; add documentation of the problem to API
+	docs
+	(dbus_connection_set_watch_functions): same
+
+	* dbus/dbus-connection.c (dbus_connection_get_data)
+	(dbus_connection_set_data): introduce a separate slot_mutex
+	protecting connection->slot_list so these two functions can be
+	called inside watch and timeout functions. Not sure this 
+	is going to be a good idea.
+
+	* dbus/dbus-connection.c (dbus_connection_unref)
+	(dbus_connection_ref): avoid using connection lock in ref/unref 
+	so these can also be used in watch and timeout functions
+	
 2007-12-18  Havoc Pennington  <hp at redhat.com>
 
 	* dbus/dbus-connection.c (_dbus_connection_block_pending_call):
diff --git a/dbus/dbus-connection.c b/dbus/dbus-connection.c
index 0de5f22..177fb2e 100644
--- a/dbus/dbus-connection.c
+++ b/dbus/dbus-connection.c
@@ -73,6 +73,14 @@
     _dbus_mutex_unlock ((connection)->mutex);                                            \
   } while (0)
 
+#define SLOTS_LOCK(connection) do {                     \
+    _dbus_mutex_lock ((connection)->slot_mutex);        \
+  } while (0)
+
+#define SLOTS_UNLOCK(connection) do {                   \
+    _dbus_mutex_unlock ((connection)->slot_mutex);      \
+  } while (0)
+
 #define DISPATCH_STATUS_NAME(s)                                            \
                      ((s) == DBUS_DISPATCH_COMPLETE ? "complete" :         \
                       (s) == DBUS_DISPATCH_DATA_REMAINS ? "data remains" : \
@@ -257,6 +265,7 @@ struct DBusConnection
   
   DBusList *filter_list;        /**< List of filters. */
 
+  DBusMutex *slot_mutex;        /**< Lock on slot_list so overall connection lock need not be taken */
   DBusDataSlotList slot_list;   /**< Data stored by allocated integer ID */
 
   DBusHashTable *pending_replies;  /**< Hash of message serials to #DBusPendingCall. */  
@@ -647,38 +656,42 @@ protected_change_watch (DBusConnection         *connection,
                         DBusWatchToggleFunction toggle_function,
                         dbus_bool_t             enabled)
 {
-  DBusWatchList *watches;
   dbus_bool_t retval;
   
   HAVE_LOCK_CHECK (connection);
 
-  /* This isn't really safe or reasonable; a better pattern is the "do everything, then
-   * drop lock and call out" one; but it has to be propagated up through all callers
+  /* The original purpose of protected_change_watch() was to hold a
+   * ref on the connection while dropping the connection lock, then
+   * calling out to the app.  This was a broken hack that did not
+   * work, since the connection was in a hosed state (no WatchList
+   * field) while calling out.
+   *
+   * So for now we'll just keep the lock while calling out. This means
+   * apps are not allowed to call DBusConnection methods inside a
+   * watch function or they will deadlock.
+   * 
+   * The "real fix" is to use the _and_unlock() pattern found
+   * elsewhere in the code, to defer calling out to the app until
+   * we're about to drop locks and return flow of control to the app
+   * anyway.
+   *
+   * See http://lists.freedesktop.org/archives/dbus/2007-July/thread.html#8144
    */
   
-  watches = connection->watches;
-  if (watches)
+  if (connection->watches)
     {
-      connection->watches = NULL;
-      _dbus_connection_ref_unlocked (connection);
-      CONNECTION_UNLOCK (connection);
-
       if (add_function)
-        retval = (* add_function) (watches, watch);
+        retval = (* add_function) (connection->watches, watch);
       else if (remove_function)
         {
           retval = TRUE;
-          (* remove_function) (watches, watch);
+          (* remove_function) (connection->watches, watch);
         }
       else
         {
           retval = TRUE;
-          (* toggle_function) (watches, watch, enabled);
+          (* toggle_function) (connection->watches, watch, enabled);
         }
-      
-      CONNECTION_LOCK (connection);
-      connection->watches = watches;
-      _dbus_connection_unref_unlocked (connection);
 
       return retval;
     }
@@ -768,39 +781,43 @@ protected_change_timeout (DBusConnection           *connection,
                           DBusTimeoutToggleFunction toggle_function,
                           dbus_bool_t               enabled)
 {
-  DBusTimeoutList *timeouts;
   dbus_bool_t retval;
   
   HAVE_LOCK_CHECK (connection);
 
-  /* This isn't really safe or reasonable; a better pattern is the "do everything, then
-   * drop lock and call out" one; but it has to be propagated up through all callers
+  /* The original purpose of protected_change_timeout() was to hold a
+   * ref on the connection while dropping the connection lock, then
+   * calling out to the app.  This was a broken hack that did not
+   * work, since the connection was in a hosed state (no TimeoutList
+   * field) while calling out.
+   *
+   * So for now we'll just keep the lock while calling out. This means
+   * apps are not allowed to call DBusConnection methods inside a
+   * timeout function or they will deadlock.
+   * 
+   * The "real fix" is to use the _and_unlock() pattern found
+   * elsewhere in the code, to defer calling out to the app until
+   * we're about to drop locks and return flow of control to the app
+   * anyway.
+   *
+   * See http://lists.freedesktop.org/archives/dbus/2007-July/thread.html#8144
    */
   
-  timeouts = connection->timeouts;
-  if (timeouts)
+  if (connection->timeouts)
     {
-      connection->timeouts = NULL;
-      _dbus_connection_ref_unlocked (connection);
-      CONNECTION_UNLOCK (connection);
-
       if (add_function)
-        retval = (* add_function) (timeouts, timeout);
+        retval = (* add_function) (connection->timeouts, timeout);
       else if (remove_function)
         {
           retval = TRUE;
-          (* remove_function) (timeouts, timeout);
+          (* remove_function) (connection->timeouts, timeout);
         }
       else
         {
           retval = TRUE;
-          (* toggle_function) (timeouts, timeout, enabled);
+          (* toggle_function) (connection->timeouts, timeout, enabled);
         }
       
-      CONNECTION_LOCK (connection);
-      connection->timeouts = timeouts;
-      _dbus_connection_unref_unlocked (connection);
-
       return retval;
     }
   else
@@ -1219,6 +1236,10 @@ _dbus_connection_new_for_transport (DBusTransport *transport)
   if (connection->io_path_cond == NULL)
     goto error;
 
+  _dbus_mutex_new_at_location (&connection->slot_mutex);
+  if (connection->slot_mutex == NULL)
+    goto error;
+  
   disconnect_message = dbus_message_new_signal (DBUS_PATH_LOCAL,
                                                 DBUS_INTERFACE_LOCAL,
                                                 "Disconnected");
@@ -1295,6 +1316,7 @@ _dbus_connection_new_for_transport (DBusTransport *transport)
       _dbus_mutex_free_at_location (&connection->mutex);
       _dbus_mutex_free_at_location (&connection->io_path_mutex);
       _dbus_mutex_free_at_location (&connection->dispatch_mutex);
+      _dbus_mutex_free_at_location (&connection->slot_mutex);
       dbus_free (connection);
     }
   if (pending_replies)
@@ -2467,9 +2489,15 @@ dbus_connection_ref (DBusConnection *connection)
   
   /* The connection lock is better than the global
    * lock in the atomic increment fallback
+   *
+   * (FIXME but for now we always use the atomic version,
+   * to avoid taking the connection lock, due to
+   * the mess with set_timeout_functions()/set_watch_functions()
+   * calling out to the app without dropping locks)
    */
   
-#ifdef DBUS_HAVE_ATOMIC_INT
+  /* #ifdef DBUS_HAVE_ATOMIC_INT */
+#if 1
   _dbus_atomic_inc (&connection->refcount);
 #else
   CONNECTION_LOCK (connection);
@@ -2581,6 +2609,8 @@ _dbus_connection_last_unref (DBusConnection *connection)
   _dbus_mutex_free_at_location (&connection->io_path_mutex);
   _dbus_mutex_free_at_location (&connection->dispatch_mutex);
 
+  _dbus_mutex_free_at_location (&connection->slot_mutex);
+  
   _dbus_mutex_free_at_location (&connection->mutex);
   
   dbus_free (connection);
@@ -2615,9 +2645,15 @@ dbus_connection_unref (DBusConnection *connection)
   
   /* The connection lock is better than the global
    * lock in the atomic increment fallback
+   * 
+   * (FIXME but for now we always use the atomic version,
+   * to avoid taking the connection lock, due to
+   * the mess with set_timeout_functions()/set_watch_functions()
+   * calling out to the app without dropping locks)   
    */
   
-#ifdef DBUS_HAVE_ATOMIC_INT
+  /* #ifdef DBUS_HAVE_ATOMIC_INT */
+#if 1
   last_unref = (_dbus_atomic_dec (&connection->refcount) == 1);
 #else
   CONNECTION_LOCK (connection);
@@ -4550,10 +4586,12 @@ dbus_connection_dispatch (DBusConnection *connection)
  * should be that dbus_connection_set_watch_functions() has no effect,
  * but the add_function and remove_function may have been called.
  *
- * @todo We need to drop the lock when we call the
- * add/remove/toggled functions which can be a side effect
- * of setting the watch functions.
- * 
+ * @note For now, the thread lock on DBusConnection is held while
+ * watch functions are invoked, so inside these functions you
+ * may not invoke any methods on DBusConnection or it will deadlock.
+ * See the comments in the code or http://lists.freedesktop.org/archives/dbus/2007-July/thread.html#8144
+ * if you encounter this issue and want to attempt writing a patch.
+ *
  * @param connection the connection.
  * @param add_function function to begin monitoring a new descriptor.
  * @param remove_function function to stop monitoring a descriptor.
@@ -4571,42 +4609,17 @@ dbus_connection_set_watch_functions (DBusConnection              *connection,
                                      DBusFreeFunction             free_data_function)
 {
   dbus_bool_t retval;
-  DBusWatchList *watches;
 
   _dbus_return_val_if_fail (connection != NULL, FALSE);
   
   CONNECTION_LOCK (connection);
 
-#ifndef DBUS_DISABLE_CHECKS
-  if (connection->watches == NULL)
-    {
-      _dbus_warn_check_failed ("Re-entrant call to %s is not allowed\n",
-                               _DBUS_FUNCTION_NAME);
-      return FALSE;
-    }
-#endif
-  
-  /* ref connection for slightly better reentrancy */
-  _dbus_connection_ref_unlocked (connection);
-
-  /* This can call back into user code, and we need to drop the
-   * connection lock when it does. This is kind of a lame
-   * way to do it.
-   */
-  watches = connection->watches;
-  connection->watches = NULL;
-  CONNECTION_UNLOCK (connection);
-
-  retval = _dbus_watch_list_set_functions (watches,
+  retval = _dbus_watch_list_set_functions (connection->watches,
                                            add_function, remove_function,
                                            toggled_function,
                                            data, free_data_function);
-  CONNECTION_LOCK (connection);
-  connection->watches = watches;
   
   CONNECTION_UNLOCK (connection);
-  /* drop our paranoid refcount */
-  dbus_connection_unref (connection);
   
   return retval;
 }
@@ -4636,6 +4649,12 @@ dbus_connection_set_watch_functions (DBusConnection              *connection,
  * given remove_function.  The timer interval may change whenever the
  * timeout is added, removed, or toggled.
  *
+ * @note For now, the thread lock on DBusConnection is held while
+ * timeout functions are invoked, so inside these functions you
+ * may not invoke any methods on DBusConnection or it will deadlock.
+ * See the comments in the code or http://lists.freedesktop.org/archives/dbus/2007-July/thread.html#8144
+ * if you encounter this issue and want to attempt writing a patch.
+ * 
  * @param connection the connection.
  * @param add_function function to add a timeout.
  * @param remove_function function to remove a timeout.
@@ -4653,38 +4672,17 @@ dbus_connection_set_timeout_functions   (DBusConnection            *connection,
 					 DBusFreeFunction           free_data_function)
 {
   dbus_bool_t retval;
-  DBusTimeoutList *timeouts;
 
   _dbus_return_val_if_fail (connection != NULL, FALSE);
   
   CONNECTION_LOCK (connection);
-
-#ifndef DBUS_DISABLE_CHECKS
-  if (connection->timeouts == NULL)
-    {
-      _dbus_warn_check_failed ("Re-entrant call to %s is not allowed\n",
-                               _DBUS_FUNCTION_NAME);
-      return FALSE;
-    }
-#endif
   
-  /* ref connection for slightly better reentrancy */
-  _dbus_connection_ref_unlocked (connection);
-
-  timeouts = connection->timeouts;
-  connection->timeouts = NULL;
-  CONNECTION_UNLOCK (connection);
-  
-  retval = _dbus_timeout_list_set_functions (timeouts,
+  retval = _dbus_timeout_list_set_functions (connection->timeouts,
                                              add_function, remove_function,
                                              toggled_function,
                                              data, free_data_function);
-  CONNECTION_LOCK (connection);
-  connection->timeouts = timeouts;
   
   CONNECTION_UNLOCK (connection);
-  /* drop our paranoid refcount */
-  dbus_connection_unref (connection);
 
   return retval;
 }
@@ -5612,6 +5610,15 @@ dbus_connection_free_data_slot (dbus_int32_t *slot_p)
  * the connection is finalized. The slot number
  * must have been allocated with dbus_connection_allocate_data_slot().
  *
+ * @note As a temporary hack, this function does not take the
+ * main thread lock on DBusConnection, which allows it to be
+ * used from inside watch and timeout functions. (See the
+ * note in docs for dbus_connection_set_watch_functions().)
+ * A side effect of this is that you need to know there's
+ * a reference held on the connection while invoking
+ * dbus_connection_set_data(), or the connection could be
+ * finalized during dbus_connection_set_data().
+ * 
  * @param connection the connection
  * @param slot the slot number
  * @param data the data to store
@@ -5631,14 +5638,14 @@ dbus_connection_set_data (DBusConnection   *connection,
   _dbus_return_val_if_fail (connection != NULL, FALSE);
   _dbus_return_val_if_fail (slot >= 0, FALSE);
   
-  CONNECTION_LOCK (connection);
+  SLOTS_LOCK (connection);
 
   retval = _dbus_data_slot_list_set (&slot_allocator,
                                      &connection->slot_list,
                                      slot, data, free_data_func,
                                      &old_free_func, &old_data);
   
-  CONNECTION_UNLOCK (connection);
+  SLOTS_UNLOCK (connection);
 
   if (retval)
     {
@@ -5654,6 +5661,15 @@ dbus_connection_set_data (DBusConnection   *connection,
  * Retrieves data previously set with dbus_connection_set_data().
  * The slot must still be allocated (must not have been freed).
  *
+ * @note As a temporary hack, this function does not take the
+ * main thread lock on DBusConnection, which allows it to be
+ * used from inside watch and timeout functions. (See the
+ * note in docs for dbus_connection_set_watch_functions().)
+ * A side effect of this is that you need to know there's
+ * a reference held on the connection while invoking
+ * dbus_connection_get_data(), or the connection could be
+ * finalized during dbus_connection_get_data().
+ * 
  * @param connection the connection
  * @param slot the slot to get data from
  * @returns the data, or #NULL if not found
@@ -5666,13 +5682,13 @@ dbus_connection_get_data (DBusConnection   *connection,
 
   _dbus_return_val_if_fail (connection != NULL, NULL);
   
-  CONNECTION_LOCK (connection);
+  SLOTS_LOCK (connection);
 
   res = _dbus_data_slot_list_get (&slot_allocator,
                                   &connection->slot_list,
                                   slot);
   
-  CONNECTION_UNLOCK (connection);
+  SLOTS_UNLOCK (connection);
 
   return res;
 }
-- 
1.5.3.6


--------------090406020901070607020802--


More information about the dbus mailing list