[Nouveau] [PATCH 6/9] drm/nouveau: Convert event handler list to RCU

Peter Hurley peter at hurleysoftware.com
Tue Aug 27 13:12:59 PDT 2013


Lockdep report [1] correctly identifies a potential deadlock between
nouveau_drm_vblank_enable() and nouveau_drm_vblank_handler() due
to inverted lock order of event->lock with drm core's
dev->vblank_time_lock.

Fix vblank event deadlock by converting event handler list to RCU.
List updates remain serialized by event->lock.
List traversal & handler execution is lockless which prevents
inverted lock order problems with the drm core.
Safe deletion of the event handler is accomplished with
synchronize_rcu() for those handlers stored in nouveau_object-based
containers (nouveau_drm & nv50_/nvc0_software_context); otherwise,
with kfree_rcu (for nouveau_connector & uevent temporary handlers).

[1]
======================================================
[ INFO: possible circular locking dependency detected ]
3.10.0-0+tip-xeon+lockdep #0+tip Not tainted
-------------------------------------------------------
swapper/7/0 is trying to acquire lock:
 (&(&dev->vblank_time_lock)->rlock){-.....}, at: [<ffffffffa0086269>] drm_handle_vblank+0x69/0x400 [drm]

but task is already holding lock:
 (&(&event->lock)->rlock){-.....}, at: [<ffffffffa0101dbd>] nouveau_event_trigger+0x4d/0xd0 [nouveau]

which lock already depends on the new lock.

the existing dependency chain (in reverse order) is:

-> #1 (&(&event->lock)->rlock){-.....}:
       [<ffffffff810b6d62>] lock_acquire+0x92/0x1f0
       [<ffffffff8175e926>] _raw_spin_lock_irqsave+0x46/0x60
       [<ffffffffa0101cf7>] nouveau_event_get+0x27/0xa0 [nouveau]
       [<ffffffffa01807bd>] nouveau_drm_vblank_enable+0x8d/0xf0 [nouveau]
       [<ffffffffa00856d8>] drm_vblank_get+0xf8/0x2c0 [drm]
       [<ffffffffa00867f4>] drm_wait_vblank+0x84/0x6f0 [drm]
       [<ffffffffa0081719>] drm_ioctl+0x559/0x690 [drm]
       [<ffffffff811bed77>] do_vfs_ioctl+0x97/0x590
       [<ffffffff811bf301>] SyS_ioctl+0x91/0xb0
       [<ffffffff817677c2>] system_call_fastpath+0x16/0x1b

-> #0 (&(&dev->vblank_time_lock)->rlock){-.....}:
       [<ffffffff810b65c3>] __lock_acquire+0x1c43/0x1d30
       [<ffffffff810b6d62>] lock_acquire+0x92/0x1f0
       [<ffffffff8175e926>] _raw_spin_lock_irqsave+0x46/0x60
       [<ffffffffa0086269>] drm_handle_vblank+0x69/0x400 [drm]
       [<ffffffffa0180847>] nouveau_drm_vblank_handler+0x27/0x30 [nouveau]
       [<ffffffffa0101e00>] nouveau_event_trigger+0x90/0xd0 [nouveau]
       [<ffffffffa012dafd>] nv50_disp_intr+0xdd/0x230 [nouveau]
       [<ffffffffa0120451>] nouveau_mc_intr+0xa1/0x100 [nouveau]
       [<ffffffff810f3c7c>] handle_irq_event_percpu+0x6c/0x3d0
       [<ffffffff810f4028>] handle_irq_event+0x48/0x70
       [<ffffffff810f71ca>] handle_fasteoi_irq+0x5a/0x100
       [<ffffffff81004512>] handle_irq+0x22/0x40
       [<ffffffff817691fa>] do_IRQ+0x5a/0xd0
       [<ffffffff8175f76f>] ret_from_intr+0x0/0x13
       [<ffffffff8100b876>] arch_cpu_idle+0x26/0x30
       [<ffffffff810a5b4e>] cpu_startup_entry+0xce/0x410
       [<ffffffff81743d4e>] start_secondary+0x255/0x25c

other info that might help us debug this:

 Possible unsafe locking scenario:

       CPU0                    CPU1
       ----                    ----
  lock(&(&event->lock)->rlock);
                               lock(&(&dev->vblank_time_lock)->rlock);
                               lock(&(&event->lock)->rlock);
  lock(&(&dev->vblank_time_lock)->rlock);

 *** DEADLOCK ***

1 lock held by swapper/7/0:
 #0:  (&(&event->lock)->rlock){-.....}, at: [<ffffffffa0101dbd>] nouveau_event_trigger+0x4d/0xd0 [nouveau]

stack backtrace:
CPU: 7 PID: 0 Comm: swapper/7 Not tainted 3.10.0-0+tip-xeon+lockdep #0+tip
Hardware name: Dell Inc. Precision WorkStation T5400  /0RW203, BIOS A11 04/30/2012
 ffffffff821ccf60 ffff8802bfdc3b08 ffffffff81755f39 ffff8802bfdc3b58
 ffffffff8174f526 0000000000000002 ffff8802bfdc3be8 ffff8802b330a7f8
 ffff8802b330a820 ffff8802b330a7f8 0000000000000000 0000000000000001
Call Trace:
 <IRQ>  [<ffffffff81755f39>] dump_stack+0x19/0x1b
 [<ffffffff8174f526>] print_circular_bug+0x1fb/0x20c
 [<ffffffff810b65c3>] __lock_acquire+0x1c43/0x1d30
 [<ffffffff810b1f8d>] ? trace_hardirqs_off+0xd/0x10
 [<ffffffff8102b1a8>] ? flat_send_IPI_mask+0x88/0xa0
 [<ffffffff810b6d62>] lock_acquire+0x92/0x1f0
 [<ffffffffa0086269>] ? drm_handle_vblank+0x69/0x400 [drm]
 [<ffffffff8175e926>] _raw_spin_lock_irqsave+0x46/0x60
 [<ffffffffa0086269>] ? drm_handle_vblank+0x69/0x400 [drm]
 [<ffffffffa0086269>] drm_handle_vblank+0x69/0x400 [drm]
 [<ffffffffa0180847>] nouveau_drm_vblank_handler+0x27/0x30 [nouveau]
 [<ffffffffa0101e00>] nouveau_event_trigger+0x90/0xd0 [nouveau]
 [<ffffffffa012dafd>] nv50_disp_intr+0xdd/0x230 [nouveau]
 [<ffffffff8175f182>] ? _raw_spin_unlock_irqrestore+0x42/0x80
 [<ffffffff8107800c>] ? __hrtimer_start_range_ns+0x16c/0x530
 [<ffffffffa0120451>] nouveau_mc_intr+0xa1/0x100 [nouveau]
 [<ffffffff810f3c7c>] handle_irq_event_percpu+0x6c/0x3d0
 [<ffffffff810f4028>] handle_irq_event+0x48/0x70
 [<ffffffff810f718e>] ? handle_fasteoi_irq+0x1e/0x100
 [<ffffffff810f71ca>] handle_fasteoi_irq+0x5a/0x100
 [<ffffffff81004512>] handle_irq+0x22/0x40
 [<ffffffff817691fa>] do_IRQ+0x5a/0xd0
 [<ffffffff8175f76f>] common_interrupt+0x6f/0x6f
 <EOI>  [<ffffffff8100ad3d>] ? default_idle+0x1d/0x290
 [<ffffffff8100ad3b>] ? default_idle+0x1b/0x290
 [<ffffffff8100b876>] arch_cpu_idle+0x26/0x30
 [<ffffffff810a5b4e>] cpu_startup_entry+0xce/0x410
 [<ffffffff810ae0dc>] ? clockevents_register_device+0xdc/0x140
 [<ffffffff81743d4e>] start_secondary+0x255/0x25c

Reported-by: Dave Jones <davej at redhat.com>
Signed-off-by: Peter Hurley <peter at hurleysoftware.com>
---
 drivers/gpu/drm/nouveau/core/core/event.c          | 22 +++++++++++-----------
 .../gpu/drm/nouveau/core/engine/software/nv50.c    |  1 +
 .../gpu/drm/nouveau/core/engine/software/nvc0.c    |  1 +
 drivers/gpu/drm/nouveau/core/include/core/event.h  |  1 +
 drivers/gpu/drm/nouveau/nouveau_connector.c        |  2 +-
 drivers/gpu/drm/nouveau/nouveau_drm.c              |  1 +
 6 files changed, 16 insertions(+), 12 deletions(-)

diff --git a/drivers/gpu/drm/nouveau/core/core/event.c b/drivers/gpu/drm/nouveau/core/core/event.c
index 4cd1ebe..ce0a0ef 100644
--- a/drivers/gpu/drm/nouveau/core/core/event.c
+++ b/drivers/gpu/drm/nouveau/core/core/event.c
@@ -22,6 +22,7 @@
 
 #include <core/os.h>
 #include <core/event.h>
+#include <linux/rculist.h>
 
 void
 nouveau_event_handler_install(struct nouveau_event *event, int index,
@@ -37,7 +38,7 @@ nouveau_event_handler_install(struct nouveau_event *event, int index,
 	handler->priv = priv;
 
 	spin_lock_irqsave(&event->lock, flags);
-	list_add(&handler->head, &event->index[index].list);
+	list_add_rcu(&handler->head, &event->index[index].list);
 	spin_unlock_irqrestore(&event->lock, flags);
 }
 
@@ -51,7 +52,7 @@ nouveau_event_handler_remove(struct nouveau_event *event, int index,
 		return;
 
 	spin_lock_irqsave(&event->lock, flags);
-	list_del(&handler->head);
+	list_del_rcu(&handler->head);
 	spin_unlock_irqrestore(&event->lock, flags);
 }
 
@@ -71,7 +72,7 @@ nouveau_event_handler_create(struct nouveau_event *event, int index,
 	__set_bit(NVKM_EVENT_ENABLE, &handler->flags);
 
 	spin_lock_irqsave(&event->lock, flags);
-	list_add(&handler->head, &event->index[index].list);
+	list_add_rcu(&handler->head, &event->index[index].list);
 	if (!event->index[index].refs++) {
 		if (event->enable)
 			event->enable(event, index);
@@ -94,9 +95,9 @@ nouveau_event_handler_destroy(struct nouveau_event *event, int index,
 		if (event->disable)
 			event->disable(event, index);
 	}
-	list_del(&handler->head);
+	list_del_rcu(&handler->head);
 	spin_unlock_irqrestore(&event->lock, flags);
-	kfree(handler);
+	kfree_rcu(handler, rcu);
 }
 
 static void
@@ -147,20 +148,19 @@ nouveau_event_get(struct nouveau_event *event, int index,
 void
 nouveau_event_trigger(struct nouveau_event *event, int index)
 {
-	struct nouveau_eventh *handler, *temp;
-	unsigned long flags;
+	struct nouveau_eventh *handler;
 
 	if (index >= event->index_nr)
 		return;
 
-	spin_lock_irqsave(&event->lock, flags);
-	list_for_each_entry_safe(handler, temp, &event->index[index].list, head) {
+	rcu_read_lock();
+	list_for_each_entry_rcu(handler, &event->index[index].list, head) {
 		if (test_bit(NVKM_EVENT_ENABLE, &handler->flags)) {
 			if (handler->func(handler, index) == NVKM_EVENT_DROP)
-				nouveau_event_put_locked(event, index, handler);
+				nouveau_event_put(event, index, handler);
 		}
 	}
-	spin_unlock_irqrestore(&event->lock, flags);
+	rcu_read_unlock();
 }
 
 void
diff --git a/drivers/gpu/drm/nouveau/core/engine/software/nv50.c b/drivers/gpu/drm/nouveau/core/engine/software/nv50.c
index dfce1f5..87aeee1 100644
--- a/drivers/gpu/drm/nouveau/core/engine/software/nv50.c
+++ b/drivers/gpu/drm/nouveau/core/engine/software/nv50.c
@@ -191,6 +191,7 @@ nv50_software_context_dtor(struct nouveau_object *object)
 		nouveau_event_handler_remove(disp->vblank, i,
 					     &chan->base.vblank.event[i]);
 	}
+	synchronize_rcu();
 	_nouveau_software_context_dtor(object);
 }
 
diff --git a/drivers/gpu/drm/nouveau/core/engine/software/nvc0.c b/drivers/gpu/drm/nouveau/core/engine/software/nvc0.c
index d1f52c0..e87ba42 100644
--- a/drivers/gpu/drm/nouveau/core/engine/software/nvc0.c
+++ b/drivers/gpu/drm/nouveau/core/engine/software/nvc0.c
@@ -197,6 +197,7 @@ nvc0_software_context_dtor(struct nouveau_object *object)
 		nouveau_event_handler_remove(disp->vblank, i,
 					     &chan->base.vblank.event[i]);
 	}
+	synchronize_rcu();
 	_nouveau_software_context_dtor(object);
 }
 
diff --git a/drivers/gpu/drm/nouveau/core/include/core/event.h b/drivers/gpu/drm/nouveau/core/include/core/event.h
index 45e8a0f..f01b173 100644
--- a/drivers/gpu/drm/nouveau/core/include/core/event.h
+++ b/drivers/gpu/drm/nouveau/core/include/core/event.h
@@ -13,6 +13,7 @@ struct nouveau_eventh {
 	unsigned long flags;
 	void *priv;
 	int (*func)(struct nouveau_eventh *, int index);
+	struct rcu_head rcu;
 };
 
 struct nouveau_event {
diff --git a/drivers/gpu/drm/nouveau/nouveau_connector.c b/drivers/gpu/drm/nouveau/nouveau_connector.c
index 13b38ed..14fce8a 100644
--- a/drivers/gpu/drm/nouveau/nouveau_connector.c
+++ b/drivers/gpu/drm/nouveau/nouveau_connector.c
@@ -106,7 +106,7 @@ nouveau_connector_destroy(struct drm_connector *connector)
 	kfree(nv_connector->edid);
 	drm_sysfs_connector_remove(connector);
 	drm_connector_cleanup(connector);
-	kfree(connector);
+	kfree_rcu(nv_connector, hpd_func.rcu);
 }
 
 static struct nouveau_i2c_port *
diff --git a/drivers/gpu/drm/nouveau/nouveau_drm.c b/drivers/gpu/drm/nouveau/nouveau_drm.c
index 4187cad..544ca19 100644
--- a/drivers/gpu/drm/nouveau/nouveau_drm.c
+++ b/drivers/gpu/drm/nouveau/nouveau_drm.c
@@ -432,6 +432,7 @@ nouveau_drm_unload(struct drm_device *dev)
 	for (i = 0; i < ARRAY_SIZE(drm->vblank); i++)
 		nouveau_event_handler_remove(disp->vblank, i,
 					     &drm->vblank[i]);
+	synchronize_rcu();
 
 	nouveau_cli_destroy(&drm->client);
 	return 0;
-- 
1.8.1.2



More information about the Nouveau mailing list