[PATCH 1/4] drm/vmwgfx: Fix a deadlock in dma buf fence polling

Martin Krastev martin.krastev at broadcom.com
Thu Jun 27 12:08:14 UTC 2024


On Thu, Jun 27, 2024 at 8:34 AM Zack Rusin <zack.rusin at broadcom.com> wrote:
>
> Introduce a version of the fence ops that on release doesn't remove
> the fence from the pending list, and thus doesn't require a lock to
> fix poll->fence wait->fence unref deadlocks.
>
> vmwgfx overwrites the wait callback to iterate over the list of all
> fences and update their status, to do that it holds a lock to prevent
> the list modifcations from other threads. The fence destroy callback
> both deletes the fence and removes it from the list of pending
> fences, for which it holds a lock.
>
> dma buf polling cb unrefs a fence after it's been signaled: so the poll
> calls the wait, which signals the fences, which are being destroyed.
> The destruction tries to acquire the lock on the pending fences list
> which it can never get because it's held by the wait from which it
> was called.
>
> Old bug, but not a lot of userspace apps were using dma-buf polling
> interfaces. Fix those, in particular this fixes KDE stalls/deadlock.
>
> Signed-off-by: Zack Rusin <zack.rusin at broadcom.com>
> Fixes: 2298e804e96e ("drm/vmwgfx: rework to new fence interface, v2")
> Cc: Broadcom internal kernel review list <bcm-kernel-feedback-list at broadcom.com>
> Cc: dri-devel at lists.freedesktop.org
> Cc: <stable at vger.kernel.org> # v6.2+
> ---
>  drivers/gpu/drm/vmwgfx/vmwgfx_fence.c | 26 ++++++++++++++++++++------
>  1 file changed, 20 insertions(+), 6 deletions(-)
>
> diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c b/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c
> index 5efc6a766f64..76971ef7801a 100644
> --- a/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c
> +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c
> @@ -32,7 +32,6 @@
>  #define VMW_FENCE_WRAP (1 << 31)
>
>  struct vmw_fence_manager {
> -       int num_fence_objects;
>         struct vmw_private *dev_priv;
>         spinlock_t lock;
>         struct list_head fence_list;
> @@ -120,16 +119,23 @@ static void vmw_fence_goal_write(struct vmw_private *vmw, u32 value)
>   * objects with actions attached to them.
>   */
>
> -static void vmw_fence_obj_destroy(struct dma_fence *f)
> +static void vmw_fence_obj_destroy_removed(struct dma_fence *f)
>  {
>         struct vmw_fence_obj *fence =
>                 container_of(f, struct vmw_fence_obj, base);
>
> +       WARN_ON(!list_empty(&fence->head));
> +       fence->destroy(fence);
> +}
> +
> +static void vmw_fence_obj_destroy(struct dma_fence *f)
> +{
> +       struct vmw_fence_obj *fence =
> +               container_of(f, struct vmw_fence_obj, base);
>         struct vmw_fence_manager *fman = fman_from_fence(fence);
>
>         spin_lock(&fman->lock);
>         list_del_init(&fence->head);
> -       --fman->num_fence_objects;
>         spin_unlock(&fman->lock);
>         fence->destroy(fence);
>  }
> @@ -257,6 +263,13 @@ static const struct dma_fence_ops vmw_fence_ops = {
>         .release = vmw_fence_obj_destroy,
>  };
>
> +static const struct dma_fence_ops vmw_fence_ops_removed = {
> +       .get_driver_name = vmw_fence_get_driver_name,
> +       .get_timeline_name = vmw_fence_get_timeline_name,
> +       .enable_signaling = vmw_fence_enable_signaling,
> +       .wait = vmw_fence_wait,
> +       .release = vmw_fence_obj_destroy_removed,
> +};
>
>  /*
>   * Execute signal actions on fences recently signaled.
> @@ -355,7 +368,6 @@ static int vmw_fence_obj_init(struct vmw_fence_manager *fman,
>                 goto out_unlock;
>         }
>         list_add_tail(&fence->head, &fman->fence_list);
> -       ++fman->num_fence_objects;
>
>  out_unlock:
>         spin_unlock(&fman->lock);
> @@ -403,7 +415,7 @@ static bool vmw_fence_goal_new_locked(struct vmw_fence_manager *fman,
>                                       u32 passed_seqno)
>  {
>         u32 goal_seqno;
> -       struct vmw_fence_obj *fence;
> +       struct vmw_fence_obj *fence, *next_fence;
>
>         if (likely(!fman->seqno_valid))
>                 return false;
> @@ -413,7 +425,7 @@ static bool vmw_fence_goal_new_locked(struct vmw_fence_manager *fman,
>                 return false;
>
>         fman->seqno_valid = false;
> -       list_for_each_entry(fence, &fman->fence_list, head) {
> +       list_for_each_entry_safe(fence, next_fence, &fman->fence_list, head) {
>                 if (!list_empty(&fence->seq_passed_actions)) {
>                         fman->seqno_valid = true;
>                         vmw_fence_goal_write(fman->dev_priv,
> @@ -471,6 +483,7 @@ static void __vmw_fences_update(struct vmw_fence_manager *fman)
>  rerun:
>         list_for_each_entry_safe(fence, next_fence, &fman->fence_list, head) {
>                 if (seqno - fence->base.seqno < VMW_FENCE_WRAP) {
> +                       fence->base.ops = &vmw_fence_ops_removed;
>                         list_del_init(&fence->head);
>                         dma_fence_signal_locked(&fence->base);
>                         INIT_LIST_HEAD(&action_list);
> @@ -662,6 +675,7 @@ void vmw_fence_fifo_down(struct vmw_fence_manager *fman)
>                                          VMW_FENCE_WAIT_TIMEOUT);
>
>                 if (unlikely(ret != 0)) {
> +                       fence->base.ops = &vmw_fence_ops_removed;
>                         list_del_init(&fence->head);
>                         dma_fence_signal(&fence->base);
>                         INIT_LIST_HEAD(&action_list);
> --
> 2.40.1
>

Neat fix!
Reviewed-by: Martin Krastev <martin.krastev at broadcom.com>

Regards,
Martin


More information about the dri-devel mailing list