[PATCH] nouveau: offload fence uevents work to workqueue

Danilo Krummrich dakr at redhat.com
Mon Feb 5 16:22:19 UTC 2024


On 1/29/24 02:50, Dave Airlie wrote:
> From: Dave Airlie <airlied at redhat.com>
> 
> This should break the deadlock between the fctx lock and the irq lock.
> 
> This offloads the processing off the work from the irq into a workqueue.
> 
> Signed-off-by: Dave Airlie <airlied at redhat.com>

Nouveau's scheduler uses a dedicated wq, hence from this perspective it's
safe deferring fence signalling to the kernel global wq. However, I wonder
if we could create deadlocks by building dependency chains into other
drivers / kernel code that, by chance, makes use of the kernel global wq as
well.

Admittedly, even if, it's gonna be extremely unlikely given that
WQ_MAX_ACTIVE == 512. But maybe it'd be safer to use a dedicated wq.

Also, do we need to CC stable?

> ---
>   drivers/gpu/drm/nouveau/nouveau_fence.c | 24 ++++++++++++++++++------
>   drivers/gpu/drm/nouveau/nouveau_fence.h |  1 +
>   2 files changed, 19 insertions(+), 6 deletions(-)
> 
> diff --git a/drivers/gpu/drm/nouveau/nouveau_fence.c b/drivers/gpu/drm/nouveau/nouveau_fence.c
> index ca762ea55413..93f08f9479d8 100644
> --- a/drivers/gpu/drm/nouveau/nouveau_fence.c
> +++ b/drivers/gpu/drm/nouveau/nouveau_fence.c
> @@ -103,6 +103,7 @@ nouveau_fence_context_kill(struct nouveau_fence_chan *fctx, int error)
>   void
>   nouveau_fence_context_del(struct nouveau_fence_chan *fctx)
>   {
> +	cancel_work_sync(&fctx->uevent_work);
>   	nouveau_fence_context_kill(fctx, 0);
>   	nvif_event_dtor(&fctx->event);
>   	fctx->dead = 1;
> @@ -145,12 +146,13 @@ nouveau_fence_update(struct nouveau_channel *chan, struct nouveau_fence_chan *fc
>   	return drop;
>   }
>   
> -static int
> -nouveau_fence_wait_uevent_handler(struct nvif_event *event, void *repv, u32 repc)
> +static void
> +nouveau_fence_uevent_work(struct work_struct *work)
>   {
> -	struct nouveau_fence_chan *fctx = container_of(event, typeof(*fctx), event);
> +	struct nouveau_fence_chan *fctx = container_of(work, struct nouveau_fence_chan,
> +						       uevent_work);
>   	unsigned long flags;
> -	int ret = NVIF_EVENT_KEEP;
> +	int drop = 0;
>   
>   	spin_lock_irqsave(&fctx->lock, flags);
>   	if (!list_empty(&fctx->pending)) {
> @@ -160,11 +162,20 @@ nouveau_fence_wait_uevent_handler(struct nvif_event *event, void *repv, u32 repc
>   		fence = list_entry(fctx->pending.next, typeof(*fence), head);
>   		chan = rcu_dereference_protected(fence->channel, lockdep_is_held(&fctx->lock));
>   		if (nouveau_fence_update(chan, fctx))
> -			ret = NVIF_EVENT_DROP;
> +			drop = 1;
>   	}
> +	if (drop)
> +		nvif_event_block(&fctx->event);
> +
>   	spin_unlock_irqrestore(&fctx->lock, flags);
> +}
>   
> -	return ret;
> +static int
> +nouveau_fence_wait_uevent_handler(struct nvif_event *event, void *repv, u32 repc)
> +{
> +	struct nouveau_fence_chan *fctx = container_of(event, typeof(*fctx), event);
> +	schedule_work(&fctx->uevent_work);
> +	return NVIF_EVENT_KEEP;
>   }
>   
>   void
> @@ -178,6 +189,7 @@ nouveau_fence_context_new(struct nouveau_channel *chan, struct nouveau_fence_cha
>   	} args;
>   	int ret;
>   
> +	INIT_WORK(&fctx->uevent_work, nouveau_fence_uevent_work);
>   	INIT_LIST_HEAD(&fctx->flip);
>   	INIT_LIST_HEAD(&fctx->pending);
>   	spin_lock_init(&fctx->lock);
> diff --git a/drivers/gpu/drm/nouveau/nouveau_fence.h b/drivers/gpu/drm/nouveau/nouveau_fence.h
> index 64d33ae7f356..8bc065acfe35 100644
> --- a/drivers/gpu/drm/nouveau/nouveau_fence.h
> +++ b/drivers/gpu/drm/nouveau/nouveau_fence.h
> @@ -44,6 +44,7 @@ struct nouveau_fence_chan {
>   	u32 context;
>   	char name[32];
>   
> +	struct work_struct uevent_work;
>   	struct nvif_event event;
>   	int notify_ref, dead, killed;
>   };



More information about the dri-devel mailing list