[Nouveau] [PATCH] nouveau: use an rwlock for the event lock.
Danilo Krummrich
me at dakr.org
Tue Nov 7 23:40:25 UTC 2023
On 11/7/23 06:32, Dave Airlie wrote:
> From: Dave Airlie <airlied at redhat.com>
>
> This allows it to break the following circular locking dependency.
>
> Aug 10 07:01:29 dg1test kernel: ======================================================
> Aug 10 07:01:29 dg1test kernel: WARNING: possible circular locking dependency detected
> Aug 10 07:01:29 dg1test kernel: 6.4.0-rc7+ #10 Not tainted
> Aug 10 07:01:29 dg1test kernel: ------------------------------------------------------
> Aug 10 07:01:29 dg1test kernel: wireplumber/2236 is trying to acquire lock:
> Aug 10 07:01:29 dg1test kernel: ffff8fca5320da18 (&fctx->lock){-...}-{2:2}, at: nouveau_fence_wait_uevent_handler+0x2b/0x100 [nouveau]
> Aug 10 07:01:29 dg1test kernel:
> but task is already holding lock:
> Aug 10 07:01:29 dg1test kernel: ffff8fca41208610 (&event->list_lock#2){-...}-{2:2}, at: nvkm_event_ntfy+0x50/0xf0 [nouveau]
> Aug 10 07:01:29 dg1test kernel:
> which lock already depends on the new lock.
> Aug 10 07:01:29 dg1test kernel:
> the existing dependency chain (in reverse order) is:
> Aug 10 07:01:29 dg1test kernel:
> -> #3 (&event->list_lock#2){-...}-{2:2}:
> Aug 10 07:01:29 dg1test kernel: _raw_spin_lock_irqsave+0x4b/0x70
> Aug 10 07:01:29 dg1test kernel: nvkm_event_ntfy+0x50/0xf0 [nouveau]
> Aug 10 07:01:29 dg1test kernel: ga100_fifo_nonstall_intr+0x24/0x30 [nouveau]
> Aug 10 07:01:29 dg1test kernel: nvkm_intr+0x12c/0x240 [nouveau]
> Aug 10 07:01:29 dg1test kernel: __handle_irq_event_percpu+0x88/0x240
> Aug 10 07:01:29 dg1test kernel: handle_irq_event+0x38/0x80
> Aug 10 07:01:29 dg1test kernel: handle_edge_irq+0xa3/0x240
> Aug 10 07:01:29 dg1test kernel: __common_interrupt+0x72/0x160
> Aug 10 07:01:29 dg1test kernel: common_interrupt+0x60/0xe0
> Aug 10 07:01:29 dg1test kernel: asm_common_interrupt+0x26/0x40
> Aug 10 07:01:29 dg1test kernel:
> -> #2 (&device->intr.lock){-...}-{2:2}:
> Aug 10 07:01:29 dg1test kernel: _raw_spin_lock_irqsave+0x4b/0x70
> Aug 10 07:01:29 dg1test kernel: nvkm_inth_allow+0x2c/0x80 [nouveau]
> Aug 10 07:01:29 dg1test kernel: nvkm_event_ntfy_state+0x181/0x250 [nouveau]
> Aug 10 07:01:29 dg1test kernel: nvkm_event_ntfy_allow+0x63/0xd0 [nouveau]
> Aug 10 07:01:29 dg1test kernel: nvkm_uevent_mthd+0x4d/0x70 [nouveau]
> Aug 10 07:01:29 dg1test kernel: nvkm_ioctl+0x10b/0x250 [nouveau]
> Aug 10 07:01:29 dg1test kernel: nvif_object_mthd+0xa8/0x1f0 [nouveau]
> Aug 10 07:01:29 dg1test kernel: nvif_event_allow+0x2a/0xa0 [nouveau]
> Aug 10 07:01:29 dg1test kernel: nouveau_fence_enable_signaling+0x78/0x80 [nouveau]
> Aug 10 07:01:29 dg1test kernel: __dma_fence_enable_signaling+0x5e/0x100
> Aug 10 07:01:29 dg1test kernel: dma_fence_add_callback+0x4b/0xd0
> Aug 10 07:01:29 dg1test kernel: nouveau_cli_work_queue+0xae/0x110 [nouveau]
> Aug 10 07:01:29 dg1test kernel: nouveau_gem_object_close+0x1d1/0x2a0 [nouveau]
> Aug 10 07:01:29 dg1test kernel: drm_gem_handle_delete+0x70/0xe0 [drm]
> Aug 10 07:01:29 dg1test kernel: drm_ioctl_kernel+0xa5/0x150 [drm]
> Aug 10 07:01:29 dg1test kernel: drm_ioctl+0x256/0x490 [drm]
> Aug 10 07:01:29 dg1test kernel: nouveau_drm_ioctl+0x5a/0xb0 [nouveau]
> Aug 10 07:01:29 dg1test kernel: __x64_sys_ioctl+0x91/0xd0
> Aug 10 07:01:29 dg1test kernel: do_syscall_64+0x3c/0x90
> Aug 10 07:01:29 dg1test kernel: entry_SYSCALL_64_after_hwframe+0x72/0xdc
> Aug 10 07:01:29 dg1test kernel:
> -> #1 (&event->refs_lock#4){....}-{2:2}:
> Aug 10 07:01:29 dg1test kernel: _raw_spin_lock_irqsave+0x4b/0x70
> Aug 10 07:01:29 dg1test kernel: nvkm_event_ntfy_state+0x37/0x250 [nouveau]
> Aug 10 07:01:29 dg1test kernel: nvkm_event_ntfy_allow+0x63/0xd0 [nouveau]
> Aug 10 07:01:29 dg1test kernel: nvkm_uevent_mthd+0x4d/0x70 [nouveau]
> Aug 10 07:01:29 dg1test kernel: nvkm_ioctl+0x10b/0x250 [nouveau]
> Aug 10 07:01:29 dg1test kernel: nvif_object_mthd+0xa8/0x1f0 [nouveau]
> Aug 10 07:01:29 dg1test kernel: nvif_event_allow+0x2a/0xa0 [nouveau]
> Aug 10 07:01:29 dg1test kernel: nouveau_fence_enable_signaling+0x78/0x80 [nouveau]
> Aug 10 07:01:29 dg1test kernel: __dma_fence_enable_signaling+0x5e/0x100
> Aug 10 07:01:29 dg1test kernel: dma_fence_add_callback+0x4b/0xd0
> Aug 10 07:01:29 dg1test kernel: nouveau_cli_work_queue+0xae/0x110 [nouveau]
> Aug 10 07:01:29 dg1test kernel: nouveau_gem_object_close+0x1d1/0x2a0 [nouveau]
> Aug 10 07:01:29 dg1test kernel: drm_gem_handle_delete+0x70/0xe0 [drm]
> Aug 10 07:01:29 dg1test kernel: drm_ioctl_kernel+0xa5/0x150 [drm]
> Aug 10 07:01:29 dg1test kernel: drm_ioctl+0x256/0x490 [drm]
> Aug 10 07:01:29 dg1test kernel: nouveau_drm_ioctl+0x5a/0xb0 [nouveau]
> Aug 10 07:01:29 dg1test kernel: __x64_sys_ioctl+0x91/0xd0
> Aug 10 07:01:29 dg1test kernel: do_syscall_64+0x3c/0x90
> Aug 10 07:01:29 dg1test kernel: entry_SYSCALL_64_after_hwframe+0x72/0xdc
> Aug 10 07:01:29 dg1test kernel:
> -> #0 (&fctx->lock){-...}-{2:2}:
> Aug 10 07:01:29 dg1test kernel: __lock_acquire+0x14e3/0x2240
> Aug 10 07:01:29 dg1test kernel: lock_acquire+0xc8/0x2a0
> Aug 10 07:01:29 dg1test kernel: _raw_spin_lock_irqsave+0x4b/0x70
> Aug 10 07:01:29 dg1test kernel: nouveau_fence_wait_uevent_handler+0x2b/0x100 [nouveau]
> Aug 10 07:01:29 dg1test kernel: nvkm_client_event+0xf/0x20 [nouveau]
> Aug 10 07:01:29 dg1test kernel: nvkm_event_ntfy+0x9b/0xf0 [nouveau]
> Aug 10 07:01:29 dg1test kernel: ga100_fifo_nonstall_intr+0x24/0x30 [nouveau]
> Aug 10 07:01:29 dg1test kernel: nvkm_intr+0x12c/0x240 [nouveau]
> Aug 10 07:01:29 dg1test kernel: __handle_irq_event_percpu+0x88/0x240
> Aug 10 07:01:29 dg1test kernel: handle_irq_event+0x38/0x80
> Aug 10 07:01:29 dg1test kernel: handle_edge_irq+0xa3/0x240
> Aug 10 07:01:29 dg1test kernel: __common_interrupt+0x72/0x160
> Aug 10 07:01:29 dg1test kernel: common_interrupt+0x60/0xe0
> Aug 10 07:01:29 dg1test kernel: asm_common_interrupt+0x26/0x40
> Aug 10 07:01:29 dg1test kernel:
> other info that might help us debug this:
> Aug 10 07:01:29 dg1test kernel: Chain exists of:
> &fctx->lock --> &device->intr.lock --> &event->list_lock#2
> Aug 10 07:01:29 dg1test kernel: Possible unsafe locking scenario:
> Aug 10 07:01:29 dg1test kernel: CPU0 CPU1
> Aug 10 07:01:29 dg1test kernel: ---- ----
> Aug 10 07:01:29 dg1test kernel: lock(&event->list_lock#2);
> Aug 10 07:01:29 dg1test kernel: lock(&device->intr.lock);
> Aug 10 07:01:29 dg1test kernel: lock(&event->list_lock#2);
> Aug 10 07:01:29 dg1test kernel: lock(&fctx->lock);
> Aug 10 07:01:29 dg1test kernel:
> *** DEADLOCK ***
> Aug 10 07:01:29 dg1test kernel: 2 locks held by wireplumber/2236:
> Aug 10 07:01:29 dg1test kernel: #0: ffff8fca53177bf8 (&device->intr.lock){-...}-{2:2}, at: nvkm_intr+0x29/0x240 [nouveau]
> Aug 10 07:01:29 dg1test kernel: #1: ffff8fca41208610 (&event->list_lock#2){-...}-{2:2}, at: nvkm_event_ntfy+0x50/0xf0 [nouveau]
> Aug 10 07:01:29 dg1test kernel:
> stack backtrace:
> Aug 10 07:01:29 dg1test kernel: CPU: 6 PID: 2236 Comm: wireplumber Not tainted 6.4.0-rc7+ #10
> Aug 10 07:01:29 dg1test kernel: Hardware name: Gigabyte Technology Co., Ltd. Z390 I AORUS PRO WIFI/Z390 I AORUS PRO WIFI-CF, BIOS F8 11/05/2021
> Aug 10 07:01:29 dg1test kernel: Call Trace:
> Aug 10 07:01:29 dg1test kernel: <TASK>
> Aug 10 07:01:29 dg1test kernel: dump_stack_lvl+0x5b/0x90
> Aug 10 07:01:29 dg1test kernel: check_noncircular+0xe2/0x110
> Aug 10 07:01:29 dg1test kernel: __lock_acquire+0x14e3/0x2240
> Aug 10 07:01:29 dg1test kernel: lock_acquire+0xc8/0x2a0
> Aug 10 07:01:29 dg1test kernel: ? nouveau_fence_wait_uevent_handler+0x2b/0x100 [nouveau]
> Aug 10 07:01:29 dg1test kernel: ? lock_acquire+0xc8/0x2a0
> Aug 10 07:01:29 dg1test kernel: _raw_spin_lock_irqsave+0x4b/0x70
> Aug 10 07:01:29 dg1test kernel: ? nouveau_fence_wait_uevent_handler+0x2b/0x100 [nouveau]
> Aug 10 07:01:29 dg1test kernel: nouveau_fence_wait_uevent_handler+0x2b/0x100 [nouveau]
> Aug 10 07:01:29 dg1test kernel: nvkm_client_event+0xf/0x20 [nouveau]
> Aug 10 07:01:29 dg1test kernel: nvkm_event_ntfy+0x9b/0xf0 [nouveau]
> Aug 10 07:01:29 dg1test kernel: ga100_fifo_nonstall_intr+0x24/0x30 [nouveau]
> Aug 10 07:01:29 dg1test kernel: nvkm_intr+0x12c/0x240 [nouveau]
> Aug 10 07:01:29 dg1test kernel: __handle_irq_event_percpu+0x88/0x240
> Aug 10 07:01:29 dg1test kernel: handle_irq_event+0x38/0x80
> Aug 10 07:01:29 dg1test kernel: handle_edge_irq+0xa3/0x240
> Aug 10 07:01:29 dg1test kernel: __common_interrupt+0x72/0x160
> Aug 10 07:01:29 dg1test kernel: common_interrupt+0x60/0xe0
> Aug 10 07:01:29 dg1test kernel: asm_common_interrupt+0x26/0x40
> Aug 10 07:01:29 dg1test kernel: RIP: 0033:0x7fb66174d700
> Aug 10 07:01:29 dg1test kernel: Code: c1 e2 05 29 ca 8d 0c 10 0f be 07 84 c0 75 eb 89 c8 c3 0f 1f 84 00 00 00 00 00 f3 0f 1e fa e9 d7 0f fc ff 0f 1f 80 00 00 00 00 <f3> 0f 1e fa e9 c7 0f fc>
> Aug 10 07:01:29 dg1test kernel: RSP: 002b:00007ffdd3c48438 EFLAGS: 00000206
> Aug 10 07:01:29 dg1test kernel: RAX: 000055bb758763c0 RBX: 000055bb758752c0 RCX: 00000000000028b0
> Aug 10 07:01:29 dg1test kernel: RDX: 000055bb758752c0 RSI: 000055bb75887490 RDI: 000055bb75862950
> Aug 10 07:01:29 dg1test kernel: RBP: 00007ffdd3c48490 R08: 000055bb75873b10 R09: 0000000000000001
> Aug 10 07:01:29 dg1test kernel: R10: 0000000000000004 R11: 000055bb7587f000 R12: 000055bb75887490
> Aug 10 07:01:29 dg1test kernel: R13: 000055bb757f6280 R14: 000055bb758875c0 R15: 000055bb757f6280
> Aug 10 07:01:29 dg1test kernel: </TASK>
>
> Signed-off-by: Dave Airlie <airlied at redhat.com>
Tested-by: Danilo Krummrich <dakr at redhat.com>
and
Reviewed-by: Danilo Krummrich <dakr at redhat.com>
> ---
> drivers/gpu/drm/nouveau/include/nvkm/core/event.h | 4 ++--
> drivers/gpu/drm/nouveau/nvkm/core/event.c | 12 ++++++------
> 2 files changed, 8 insertions(+), 8 deletions(-)
>
> diff --git a/drivers/gpu/drm/nouveau/include/nvkm/core/event.h b/drivers/gpu/drm/nouveau/include/nvkm/core/event.h
> index 82b267c11147..460459af272d 100644
> --- a/drivers/gpu/drm/nouveau/include/nvkm/core/event.h
> +++ b/drivers/gpu/drm/nouveau/include/nvkm/core/event.h
> @@ -14,7 +14,7 @@ struct nvkm_event {
> int index_nr;
>
> spinlock_t refs_lock;
> - spinlock_t list_lock;
> + rwlock_t list_lock;
> int *refs;
>
> struct list_head ntfy;
> @@ -38,7 +38,7 @@ nvkm_event_init(const struct nvkm_event_func *func, struct nvkm_subdev *subdev,
> int types_nr, int index_nr, struct nvkm_event *event)
> {
> spin_lock_init(&event->refs_lock);
> - spin_lock_init(&event->list_lock);
> + rwlock_init(&event->list_lock);
> return __nvkm_event_init(func, subdev, types_nr, index_nr, event);
> }
>
> diff --git a/drivers/gpu/drm/nouveau/nvkm/core/event.c b/drivers/gpu/drm/nouveau/nvkm/core/event.c
> index a6c877135598..61fed7792e41 100644
> --- a/drivers/gpu/drm/nouveau/nvkm/core/event.c
> +++ b/drivers/gpu/drm/nouveau/nvkm/core/event.c
> @@ -81,17 +81,17 @@ nvkm_event_ntfy_state(struct nvkm_event_ntfy *ntfy)
> static void
> nvkm_event_ntfy_remove(struct nvkm_event_ntfy *ntfy)
> {
> - spin_lock_irq(&ntfy->event->list_lock);
> + write_lock_irq(&ntfy->event->list_lock);
> list_del_init(&ntfy->head);
> - spin_unlock_irq(&ntfy->event->list_lock);
> + write_unlock_irq(&ntfy->event->list_lock);
> }
>
> static void
> nvkm_event_ntfy_insert(struct nvkm_event_ntfy *ntfy)
> {
> - spin_lock_irq(&ntfy->event->list_lock);
> + write_lock_irq(&ntfy->event->list_lock);
> list_add_tail(&ntfy->head, &ntfy->event->ntfy);
> - spin_unlock_irq(&ntfy->event->list_lock);
> + write_unlock_irq(&ntfy->event->list_lock);
> }
>
> static void
> @@ -176,7 +176,7 @@ nvkm_event_ntfy(struct nvkm_event *event, int id, u32 bits)
> return;
>
> nvkm_trace(event->subdev, "event: ntfy %08x on %d\n", bits, id);
> - spin_lock_irqsave(&event->list_lock, flags);
> + read_lock_irqsave(&event->list_lock, flags);
>
> list_for_each_entry_safe(ntfy, ntmp, &event->ntfy, head) {
> if (ntfy->id == id && ntfy->bits & bits) {
> @@ -185,7 +185,7 @@ nvkm_event_ntfy(struct nvkm_event *event, int id, u32 bits)
> }
> }
>
> - spin_unlock_irqrestore(&event->list_lock, flags);
> + read_unlock_irqrestore(&event->list_lock, flags);
> }
>
> void
More information about the dri-devel
mailing list