[PATCH 1/4] drm/vmwgfx: Fix a deadlock in dma buf fence polling
Martin Krastev
martin.krastev at broadcom.com
Thu Jun 27 12:08:14 UTC 2024
On Thu, Jun 27, 2024 at 8:34 AM Zack Rusin <zack.rusin at broadcom.com> wrote:
>
> Introduce a version of the fence ops that on release doesn't remove
> the fence from the pending list, and thus doesn't require a lock to
> fix poll->fence wait->fence unref deadlocks.
>
> vmwgfx overwrites the wait callback to iterate over the list of all
> fences and update their status, to do that it holds a lock to prevent
> the list modifcations from other threads. The fence destroy callback
> both deletes the fence and removes it from the list of pending
> fences, for which it holds a lock.
>
> dma buf polling cb unrefs a fence after it's been signaled: so the poll
> calls the wait, which signals the fences, which are being destroyed.
> The destruction tries to acquire the lock on the pending fences list
> which it can never get because it's held by the wait from which it
> was called.
>
> Old bug, but not a lot of userspace apps were using dma-buf polling
> interfaces. Fix those, in particular this fixes KDE stalls/deadlock.
>
> Signed-off-by: Zack Rusin <zack.rusin at broadcom.com>
> Fixes: 2298e804e96e ("drm/vmwgfx: rework to new fence interface, v2")
> Cc: Broadcom internal kernel review list <bcm-kernel-feedback-list at broadcom.com>
> Cc: dri-devel at lists.freedesktop.org
> Cc: <stable at vger.kernel.org> # v6.2+
> ---
> drivers/gpu/drm/vmwgfx/vmwgfx_fence.c | 26 ++++++++++++++++++++------
> 1 file changed, 20 insertions(+), 6 deletions(-)
>
> diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c b/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c
> index 5efc6a766f64..76971ef7801a 100644
> --- a/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c
> +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c
> @@ -32,7 +32,6 @@
> #define VMW_FENCE_WRAP (1 << 31)
>
> struct vmw_fence_manager {
> - int num_fence_objects;
> struct vmw_private *dev_priv;
> spinlock_t lock;
> struct list_head fence_list;
> @@ -120,16 +119,23 @@ static void vmw_fence_goal_write(struct vmw_private *vmw, u32 value)
> * objects with actions attached to them.
> */
>
> -static void vmw_fence_obj_destroy(struct dma_fence *f)
> +static void vmw_fence_obj_destroy_removed(struct dma_fence *f)
> {
> struct vmw_fence_obj *fence =
> container_of(f, struct vmw_fence_obj, base);
>
> + WARN_ON(!list_empty(&fence->head));
> + fence->destroy(fence);
> +}
> +
> +static void vmw_fence_obj_destroy(struct dma_fence *f)
> +{
> + struct vmw_fence_obj *fence =
> + container_of(f, struct vmw_fence_obj, base);
> struct vmw_fence_manager *fman = fman_from_fence(fence);
>
> spin_lock(&fman->lock);
> list_del_init(&fence->head);
> - --fman->num_fence_objects;
> spin_unlock(&fman->lock);
> fence->destroy(fence);
> }
> @@ -257,6 +263,13 @@ static const struct dma_fence_ops vmw_fence_ops = {
> .release = vmw_fence_obj_destroy,
> };
>
> +static const struct dma_fence_ops vmw_fence_ops_removed = {
> + .get_driver_name = vmw_fence_get_driver_name,
> + .get_timeline_name = vmw_fence_get_timeline_name,
> + .enable_signaling = vmw_fence_enable_signaling,
> + .wait = vmw_fence_wait,
> + .release = vmw_fence_obj_destroy_removed,
> +};
>
> /*
> * Execute signal actions on fences recently signaled.
> @@ -355,7 +368,6 @@ static int vmw_fence_obj_init(struct vmw_fence_manager *fman,
> goto out_unlock;
> }
> list_add_tail(&fence->head, &fman->fence_list);
> - ++fman->num_fence_objects;
>
> out_unlock:
> spin_unlock(&fman->lock);
> @@ -403,7 +415,7 @@ static bool vmw_fence_goal_new_locked(struct vmw_fence_manager *fman,
> u32 passed_seqno)
> {
> u32 goal_seqno;
> - struct vmw_fence_obj *fence;
> + struct vmw_fence_obj *fence, *next_fence;
>
> if (likely(!fman->seqno_valid))
> return false;
> @@ -413,7 +425,7 @@ static bool vmw_fence_goal_new_locked(struct vmw_fence_manager *fman,
> return false;
>
> fman->seqno_valid = false;
> - list_for_each_entry(fence, &fman->fence_list, head) {
> + list_for_each_entry_safe(fence, next_fence, &fman->fence_list, head) {
> if (!list_empty(&fence->seq_passed_actions)) {
> fman->seqno_valid = true;
> vmw_fence_goal_write(fman->dev_priv,
> @@ -471,6 +483,7 @@ static void __vmw_fences_update(struct vmw_fence_manager *fman)
> rerun:
> list_for_each_entry_safe(fence, next_fence, &fman->fence_list, head) {
> if (seqno - fence->base.seqno < VMW_FENCE_WRAP) {
> + fence->base.ops = &vmw_fence_ops_removed;
> list_del_init(&fence->head);
> dma_fence_signal_locked(&fence->base);
> INIT_LIST_HEAD(&action_list);
> @@ -662,6 +675,7 @@ void vmw_fence_fifo_down(struct vmw_fence_manager *fman)
> VMW_FENCE_WAIT_TIMEOUT);
>
> if (unlikely(ret != 0)) {
> + fence->base.ops = &vmw_fence_ops_removed;
> list_del_init(&fence->head);
> dma_fence_signal(&fence->base);
> INIT_LIST_HEAD(&action_list);
> --
> 2.40.1
>
Neat fix!
Reviewed-by: Martin Krastev <martin.krastev at broadcom.com>
Regards,
Martin
More information about the dri-devel
mailing list