[PATCH] dma-buf: Restore seqlock around dma_resv updates

Chris Wilson chris at chris-wilson.co.uk
Wed Aug 14 18:26:44 UTC 2019


Quoting Chris Wilson (2019-08-14 19:24:01)
> This reverts
> 67c97fb79a7f ("dma-buf: add reservation_object_fences helper")
> dd7a7d1ff2f1 ("drm/i915: use new reservation_object_fences helper")
> 0e1d8083bddb ("dma-buf: further relax reservation_object_add_shared_fence")
> 5d344f58da76 ("dma-buf: nuke reservation_object seq number")
> 
> The scenario that defeats simply grabbing a set of shared/exclusive
> fences and using them blissfully under RCU is that any of those fences
> may be reallocated by a SLAB_TYPESAFE_BY_RCU fence slab cache. In this
> scenario, while keeping the rcu_read_lock we need to establish that no
> fence was changed in the dma_resv after a read (or full) memory barrier.
> 
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> Cc: Chris Wilson <chris at chris-wilson.co.uk>

I said I needed to go lie down, that proves it.

Cc: Christian König <christian.koenig at amd.com>

> Cc: Daniel Vetter <daniel.vetter at ffwll.ch>
> ---
>  drivers/dma-buf/dma-buf.c                     |  31 ++++-
>  drivers/dma-buf/dma-resv.c                    | 109 ++++++++++++-----
>  .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c  |   7 +-
>  drivers/gpu/drm/i915/gem/i915_gem_busy.c      |  24 ++--
>  include/linux/dma-resv.h                      | 113 ++++++++----------
>  5 files changed, 175 insertions(+), 109 deletions(-)
> 
> diff --git a/drivers/dma-buf/dma-buf.c b/drivers/dma-buf/dma-buf.c
> index b3400d6524ab..433d91d710e4 100644
> --- a/drivers/dma-buf/dma-buf.c
> +++ b/drivers/dma-buf/dma-buf.c
> @@ -199,7 +199,7 @@ static __poll_t dma_buf_poll(struct file *file, poll_table *poll)
>         struct dma_resv_list *fobj;
>         struct dma_fence *fence_excl;
>         __poll_t events;
> -       unsigned shared_count;
> +       unsigned shared_count, seq;
>  
>         dmabuf = file->private_data;
>         if (!dmabuf || !dmabuf->resv)
> @@ -213,8 +213,21 @@ static __poll_t dma_buf_poll(struct file *file, poll_table *poll)
>         if (!events)
>                 return 0;
>  
> +retry:
> +       seq = read_seqcount_begin(&resv->seq);
>         rcu_read_lock();
> -       dma_resv_fences(resv, &fence_excl, &fobj, &shared_count);
> +
> +       fobj = rcu_dereference(resv->fence);
> +       if (fobj)
> +               shared_count = fobj->shared_count;
> +       else
> +               shared_count = 0;
> +       fence_excl = rcu_dereference(resv->fence_excl);
> +       if (read_seqcount_retry(&resv->seq, seq)) {
> +               rcu_read_unlock();
> +               goto retry;
> +       }
> +
>         if (fence_excl && (!(events & EPOLLOUT) || shared_count == 0)) {
>                 struct dma_buf_poll_cb_t *dcb = &dmabuf->cb_excl;
>                 __poll_t pevents = EPOLLIN;
> @@ -1144,6 +1157,7 @@ static int dma_buf_debug_show(struct seq_file *s, void *unused)
>         struct dma_resv *robj;
>         struct dma_resv_list *fobj;
>         struct dma_fence *fence;
> +       unsigned seq;
>         int count = 0, attach_count, shared_count, i;
>         size_t size = 0;
>  
> @@ -1174,9 +1188,16 @@ static int dma_buf_debug_show(struct seq_file *s, void *unused)
>                                 buf_obj->name ?: "");
>  
>                 robj = buf_obj->resv;
> -               rcu_read_lock();
> -               dma_resv_fences(robj, &fence, &fobj, &shared_count);
> -               rcu_read_unlock();
> +               while (true) {
> +                       seq = read_seqcount_begin(&robj->seq);
> +                       rcu_read_lock();
> +                       fobj = rcu_dereference(robj->fence);
> +                       shared_count = fobj ? fobj->shared_count : 0;
> +                       fence = rcu_dereference(robj->fence_excl);
> +                       if (!read_seqcount_retry(&robj->seq, seq))
> +                               break;
> +                       rcu_read_unlock();
> +               }
>  
>                 if (fence)
>                         seq_printf(s, "\tExclusive fence: %s %s %ssignalled\n",
> diff --git a/drivers/dma-buf/dma-resv.c b/drivers/dma-buf/dma-resv.c
> index f5142683c851..42a8f3f11681 100644
> --- a/drivers/dma-buf/dma-resv.c
> +++ b/drivers/dma-buf/dma-resv.c
> @@ -49,6 +49,12 @@
>  DEFINE_WD_CLASS(reservation_ww_class);
>  EXPORT_SYMBOL(reservation_ww_class);
>  
> +struct lock_class_key reservation_seqcount_class;
> +EXPORT_SYMBOL(reservation_seqcount_class);
> +
> +const char reservation_seqcount_string[] = "reservation_seqcount";
> +EXPORT_SYMBOL(reservation_seqcount_string);
> +
>  /**
>   * dma_resv_list_alloc - allocate fence list
>   * @shared_max: number of fences we need space for
> @@ -96,6 +102,9 @@ static void dma_resv_list_free(struct dma_resv_list *list)
>  void dma_resv_init(struct dma_resv *obj)
>  {
>         ww_mutex_init(&obj->lock, &reservation_ww_class);
> +
> +       __seqcount_init(&obj->seq, reservation_seqcount_string,
> +                       &reservation_seqcount_class);
>         RCU_INIT_POINTER(obj->fence, NULL);
>         RCU_INIT_POINTER(obj->fence_excl, NULL);
>  }
> @@ -225,6 +234,9 @@ void dma_resv_add_shared_fence(struct dma_resv *obj, struct dma_fence *fence)
>         fobj = dma_resv_get_list(obj);
>         count = fobj->shared_count;
>  
> +       preempt_disable();
> +       write_seqcount_begin(&obj->seq);
> +
>         for (i = 0; i < count; ++i) {
>  
>                 old = rcu_dereference_protected(fobj->shared[i],
> @@ -242,6 +254,9 @@ void dma_resv_add_shared_fence(struct dma_resv *obj, struct dma_fence *fence)
>         RCU_INIT_POINTER(fobj->shared[i], fence);
>         /* pointer update must be visible before we extend the shared_count */
>         smp_store_mb(fobj->shared_count, count);
> +
> +       write_seqcount_end(&obj->seq);
> +       preempt_enable();
>         dma_fence_put(old);
>  }
>  EXPORT_SYMBOL(dma_resv_add_shared_fence);
> @@ -269,10 +284,12 @@ void dma_resv_add_excl_fence(struct dma_resv *obj, struct dma_fence *fence)
>                 dma_fence_get(fence);
>  
>         preempt_disable();
> -       rcu_assign_pointer(obj->fence_excl, fence);
> -       /* pointer update must be visible before we modify the shared_count */
> +       write_seqcount_begin(&obj->seq);
> +       /* write_seqcount_begin provides the necessary memory barrier */
> +       RCU_INIT_POINTER(obj->fence_excl, fence);
>         if (old)
> -               smp_store_mb(old->shared_count, 0);
> +               old->shared_count = 0;
> +       write_seqcount_end(&obj->seq);
>         preempt_enable();
>  
>         /* inplace update, no shared fences */
> @@ -295,15 +312,17 @@ int dma_resv_copy_fences(struct dma_resv *dst, struct dma_resv *src)
>  {
>         struct dma_resv_list *src_list, *dst_list;
>         struct dma_fence *old, *new;
> -       unsigned int i, shared_count;
> +       unsigned i;
>  
>         dma_resv_assert_held(dst);
>  
>         rcu_read_lock();
> +       src_list = rcu_dereference(src->fence);
>  
>  retry:
> -       dma_resv_fences(src, &new, &src_list, &shared_count);
> -       if (shared_count) {
> +       if (src_list) {
> +               unsigned shared_count = src_list->shared_count;
> +
>                 rcu_read_unlock();
>  
>                 dst_list = dma_resv_list_alloc(shared_count);
> @@ -311,14 +330,14 @@ int dma_resv_copy_fences(struct dma_resv *dst, struct dma_resv *src)
>                         return -ENOMEM;
>  
>                 rcu_read_lock();
> -               dma_resv_fences(src, &new, &src_list, &shared_count);
> -               if (!src_list || shared_count > dst_list->shared_max) {
> +               src_list = rcu_dereference(src->fence);
> +               if (!src_list || src_list->shared_count > shared_count) {
>                         kfree(dst_list);
>                         goto retry;
>                 }
>  
>                 dst_list->shared_count = 0;
> -               for (i = 0; i < shared_count; ++i) {
> +               for (i = 0; i < src_list->shared_count; ++i) {
>                         struct dma_fence *fence;
>  
>                         fence = rcu_dereference(src_list->shared[i]);
> @@ -328,6 +347,7 @@ int dma_resv_copy_fences(struct dma_resv *dst, struct dma_resv *src)
>  
>                         if (!dma_fence_get_rcu(fence)) {
>                                 dma_resv_list_free(dst_list);
> +                               src_list = rcu_dereference(src->fence);
>                                 goto retry;
>                         }
>  
> @@ -342,18 +362,18 @@ int dma_resv_copy_fences(struct dma_resv *dst, struct dma_resv *src)
>                 dst_list = NULL;
>         }
>  
> -       if (new && !dma_fence_get_rcu(new)) {
> -               dma_resv_list_free(dst_list);
> -               goto retry;
> -       }
> +       new = dma_fence_get_rcu_safe(&src->fence_excl);
>         rcu_read_unlock();
>  
>         src_list = dma_resv_get_list(dst);
>         old = dma_resv_get_excl(dst);
>  
>         preempt_disable();
> -       rcu_assign_pointer(dst->fence_excl, new);
> -       rcu_assign_pointer(dst->fence, dst_list);
> +       write_seqcount_begin(&dst->seq);
> +       /* write_seqcount_begin provides the necessary memory barrier */
> +       RCU_INIT_POINTER(dst->fence_excl, new);
> +       RCU_INIT_POINTER(dst->fence, dst_list);
> +       write_seqcount_end(&dst->seq);
>         preempt_enable();
>  
>         dma_resv_list_free(src_list);
> @@ -388,18 +408,19 @@ int dma_resv_get_fences_rcu(struct dma_resv *obj,
>  
>         do {
>                 struct dma_resv_list *fobj;
> -               unsigned int i;
> +               unsigned int i, seq;
>                 size_t sz = 0;
>  
> -               i = 0;
> +               shared_count = i = 0;
>  
>                 rcu_read_lock();
> -               dma_resv_fences(obj, &fence_excl, &fobj,
> -                                         &shared_count);
> +               seq = read_seqcount_begin(&obj->seq);
>  
> +               fence_excl = rcu_dereference(obj->fence_excl);
>                 if (fence_excl && !dma_fence_get_rcu(fence_excl))
>                         goto unlock;
>  
> +               fobj = rcu_dereference(obj->fence);
>                 if (fobj)
>                         sz += sizeof(*shared) * fobj->shared_max;
>  
> @@ -427,6 +448,7 @@ int dma_resv_get_fences_rcu(struct dma_resv *obj,
>                                 break;
>                         }
>                         shared = nshared;
> +                       shared_count = fobj ? fobj->shared_count : 0;
>                         for (i = 0; i < shared_count; ++i) {
>                                 shared[i] = rcu_dereference(fobj->shared[i]);
>                                 if (!dma_fence_get_rcu(shared[i]))
> @@ -434,7 +456,7 @@ int dma_resv_get_fences_rcu(struct dma_resv *obj,
>                         }
>                 }
>  
> -               if (i != shared_count) {
> +               if (i != shared_count || read_seqcount_retry(&obj->seq, seq)) {
>                         while (i--)
>                                 dma_fence_put(shared[i]);
>                         dma_fence_put(fence_excl);
> @@ -478,17 +500,18 @@ long dma_resv_wait_timeout_rcu(struct dma_resv *obj,
>                                bool wait_all, bool intr,
>                                unsigned long timeout)
>  {
> -       struct dma_resv_list *fobj;
>         struct dma_fence *fence;
> -       unsigned shared_count;
> +       unsigned seq, shared_count;
>         long ret = timeout ? timeout : 1;
>         int i;
>  
>  retry:
> +       shared_count = 0;
> +       seq = read_seqcount_begin(&obj->seq);
>         rcu_read_lock();
>         i = -1;
>  
> -       dma_resv_fences(obj, &fence, &fobj, &shared_count);
> +       fence = rcu_dereference(obj->fence_excl);
>         if (fence && !test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)) {
>                 if (!dma_fence_get_rcu(fence))
>                         goto unlock_retry;
> @@ -503,6 +526,11 @@ long dma_resv_wait_timeout_rcu(struct dma_resv *obj,
>         }
>  
>         if (wait_all) {
> +               struct dma_resv_list *fobj = rcu_dereference(obj->fence);
> +
> +               if (fobj)
> +                       shared_count = fobj->shared_count;
> +
>                 for (i = 0; !fence && i < shared_count; ++i) {
>                         struct dma_fence *lfence = rcu_dereference(fobj->shared[i]);
>  
> @@ -525,6 +553,11 @@ long dma_resv_wait_timeout_rcu(struct dma_resv *obj,
>  
>         rcu_read_unlock();
>         if (fence) {
> +               if (read_seqcount_retry(&obj->seq, seq)) {
> +                       dma_fence_put(fence);
> +                       goto retry;
> +               }
> +
>                 ret = dma_fence_wait_timeout(fence, intr, ret);
>                 dma_fence_put(fence);
>                 if (ret > 0 && wait_all && (i + 1 < shared_count))
> @@ -567,19 +600,23 @@ static inline int dma_resv_test_signaled_single(struct dma_fence *passed_fence)
>   */
>  bool dma_resv_test_signaled_rcu(struct dma_resv *obj, bool test_all)
>  {
> -       struct dma_resv_list *fobj;
> -       struct dma_fence *fence_excl;
> -       unsigned shared_count;
> +       unsigned seq, shared_count;
>         int ret;
>  
>         rcu_read_lock();
>  retry:
>         ret = true;
> +       shared_count = 0;
> +       seq = read_seqcount_begin(&obj->seq);
>  
> -       dma_resv_fences(obj, &fence_excl, &fobj, &shared_count);
>         if (test_all) {
>                 unsigned i;
>  
> +               struct dma_resv_list *fobj = rcu_dereference(obj->fence);
> +
> +               if (fobj)
> +                       shared_count = fobj->shared_count;
> +
>                 for (i = 0; i < shared_count; ++i) {
>                         struct dma_fence *fence = rcu_dereference(fobj->shared[i]);
>  
> @@ -589,14 +626,24 @@ bool dma_resv_test_signaled_rcu(struct dma_resv *obj, bool test_all)
>                         else if (!ret)
>                                 break;
>                 }
> -       }
>  
> -       if (!shared_count && fence_excl) {
> -               ret = dma_resv_test_signaled_single(fence_excl);
> -               if (ret < 0)
> +               if (read_seqcount_retry(&obj->seq, seq))
>                         goto retry;
>         }
>  
> +       if (!shared_count) {
> +               struct dma_fence *fence_excl = rcu_dereference(obj->fence_excl);
> +
> +               if (fence_excl) {
> +                       ret = dma_resv_test_signaled_single(fence_excl);
> +                       if (ret < 0)
> +                               goto retry;
> +
> +                       if (read_seqcount_retry(&obj->seq, seq))
> +                               goto retry;
> +               }
> +       }
> +
>         rcu_read_unlock();
>         return ret;
>  }
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> index bc4ec6b20a87..76e3516484e7 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> @@ -251,7 +251,12 @@ static int amdgpu_amdkfd_remove_eviction_fence(struct amdgpu_bo *bo,
>         new->shared_max = old->shared_max;
>         new->shared_count = k;
>  
> -       rcu_assign_pointer(resv->fence, new);
> +       /* Install the new fence list, seqcount provides the barriers */
> +       preempt_disable();
> +       write_seqcount_begin(&resv->seq);
> +       RCU_INIT_POINTER(resv->fence, new);
> +       write_seqcount_end(&resv->seq);
> +       preempt_enable();
>  
>         /* Drop the references to the removed fences or move them to ef_list */
>         for (i = j, k = 0; i < old->shared_count; ++i) {
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_busy.c b/drivers/gpu/drm/i915/gem/i915_gem_busy.c
> index a2aff1d8290e..3d4f5775a4ba 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_busy.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_busy.c
> @@ -83,8 +83,7 @@ i915_gem_busy_ioctl(struct drm_device *dev, void *data,
>         struct drm_i915_gem_busy *args = data;
>         struct drm_i915_gem_object *obj;
>         struct dma_resv_list *list;
> -       unsigned int i, shared_count;
> -       struct dma_fence *excl;
> +       unsigned int seq;
>         int err;
>  
>         err = -ENOENT;
> @@ -110,18 +109,29 @@ i915_gem_busy_ioctl(struct drm_device *dev, void *data,
>          * to report the overall busyness. This is what the wait-ioctl does.
>          *
>          */
> -       dma_resv_fences(obj->base.resv, &excl, &list, &shared_count);
> +retry:
> +       seq = raw_read_seqcount(&obj->base.resv->seq);
>  
>         /* Translate the exclusive fence to the READ *and* WRITE engine */
> -       args->busy = busy_check_writer(excl);
> +       args->busy =
> +               busy_check_writer(rcu_dereference(obj->base.resv->fence_excl));
>  
>         /* Translate shared fences to READ set of engines */
> -       for (i = 0; i < shared_count; ++i) {
> -               struct dma_fence *fence = rcu_dereference(list->shared[i]);
> +       list = rcu_dereference(obj->base.resv->fence);
> +       if (list) {
> +               unsigned int shared_count = list->shared_count, i;
>  
> -               args->busy |= busy_check_reader(fence);
> +               for (i = 0; i < shared_count; ++i) {
> +                       struct dma_fence *fence =
> +                               rcu_dereference(list->shared[i]);
> +
> +                       args->busy |= busy_check_reader(fence);
> +               }
>         }
>  
> +       if (args->busy && read_seqcount_retry(&obj->base.resv->seq, seq))
> +               goto retry;
> +
>         err = 0;
>  out:
>         rcu_read_unlock();
> diff --git a/include/linux/dma-resv.h b/include/linux/dma-resv.h
> index 38f2802afabb..ee50d10f052b 100644
> --- a/include/linux/dma-resv.h
> +++ b/include/linux/dma-resv.h
> @@ -46,6 +46,8 @@
>  #include <linux/rcupdate.h>
>  
>  extern struct ww_class reservation_ww_class;
> +extern struct lock_class_key reservation_seqcount_class;
> +extern const char reservation_seqcount_string[];
>  
>  /**
>   * struct dma_resv_list - a list of shared fences
> @@ -69,6 +71,7 @@ struct dma_resv_list {
>   */
>  struct dma_resv {
>         struct ww_mutex lock;
> +       seqcount_t seq;
>  
>         struct dma_fence __rcu *fence_excl;
>         struct dma_resv_list __rcu *fence;
> @@ -77,24 +80,6 @@ struct dma_resv {
>  #define dma_resv_held(obj) lockdep_is_held(&(obj)->lock.base)
>  #define dma_resv_assert_held(obj) lockdep_assert_held(&(obj)->lock.base)
>  
> -/**
> - * dma_resv_get_excl - get the reservation object's
> - * exclusive fence, with update-side lock held
> - * @obj: the reservation object
> - *
> - * Returns the exclusive fence (if any).  Does NOT take a
> - * reference. Writers must hold obj->lock, readers may only
> - * hold a RCU read side lock.
> - *
> - * RETURNS
> - * The exclusive fence or NULL
> - */
> -static inline struct dma_fence *dma_resv_get_excl(struct dma_resv *obj)
> -{
> -       return rcu_dereference_protected(obj->fence_excl,
> -                                        dma_resv_held(obj));
> -}
> -
>  /**
>   * dma_resv_get_list - get the reservation object's
>   * shared fence list, with update-side lock held
> @@ -109,53 +94,6 @@ static inline struct dma_resv_list *dma_resv_get_list(struct dma_resv *obj)
>                                          dma_resv_held(obj));
>  }
>  
> -/**
> - * dma_resv_fences - read consistent fence pointers
> - * @obj: reservation object where we get the fences from
> - * @excl: pointer for the exclusive fence
> - * @list: pointer for the shared fence list
> - *
> - * Make sure we have a consisten exclusive fence and shared fence list.
> - * Must be called with rcu read side lock held.
> - */
> -static inline void dma_resv_fences(struct dma_resv *obj,
> -                                  struct dma_fence **excl,
> -                                  struct dma_resv_list **list,
> -                                  u32 *shared_count)
> -{
> -       do {
> -               *excl = rcu_dereference(obj->fence_excl);
> -               *list = rcu_dereference(obj->fence);
> -               *shared_count = *list ? (*list)->shared_count : 0;
> -               smp_rmb(); /* See dma_resv_add_excl_fence */
> -       } while (rcu_access_pointer(obj->fence_excl) != *excl);
> -}
> -
> -/**
> - * dma_resv_get_excl_rcu - get the reservation object's
> - * exclusive fence, without lock held.
> - * @obj: the reservation object
> - *
> - * If there is an exclusive fence, this atomically increments it's
> - * reference count and returns it.
> - *
> - * RETURNS
> - * The exclusive fence or NULL if none
> - */
> -static inline struct dma_fence *dma_resv_get_excl_rcu(struct dma_resv *obj)
> -{
> -       struct dma_fence *fence;
> -
> -       if (!rcu_access_pointer(obj->fence_excl))
> -               return NULL;
> -
> -       rcu_read_lock();
> -       fence = dma_fence_get_rcu_safe(&obj->fence_excl);
> -       rcu_read_unlock();
> -
> -       return fence;
> -}
> -
>  /**
>   * dma_resv_lock - lock the reservation object
>   * @obj: the reservation object
> @@ -290,6 +228,51 @@ static inline void dma_resv_unlock(struct dma_resv *obj)
>         ww_mutex_unlock(&obj->lock);
>  }
>  
> +/**
> + * dma_resv_get_excl - get the reservation object's
> + * exclusive fence, with update-side lock held
> + * @obj: the reservation object
> + *
> + * Returns the exclusive fence (if any).  Does NOT take a
> + * reference. Writers must hold obj->lock, readers may only
> + * hold a RCU read side lock.
> + *
> + * RETURNS
> + * The exclusive fence or NULL
> + */
> +static inline struct dma_fence *
> +dma_resv_get_excl(struct dma_resv *obj)
> +{
> +       return rcu_dereference_protected(obj->fence_excl,
> +                                        dma_resv_held(obj));
> +}
> +
> +/**
> + * dma_resv_get_excl_rcu - get the reservation object's
> + * exclusive fence, without lock held.
> + * @obj: the reservation object
> + *
> + * If there is an exclusive fence, this atomically increments it's
> + * reference count and returns it.
> + *
> + * RETURNS
> + * The exclusive fence or NULL if none
> + */
> +static inline struct dma_fence *
> +dma_resv_get_excl_rcu(struct dma_resv *obj)
> +{
> +       struct dma_fence *fence;
> +
> +       if (!rcu_access_pointer(obj->fence_excl))
> +               return NULL;
> +
> +       rcu_read_lock();
> +       fence = dma_fence_get_rcu_safe(&obj->fence_excl);
> +       rcu_read_unlock();
> +
> +       return fence;
> +}
> +
>  void dma_resv_init(struct dma_resv *obj);
>  void dma_resv_fini(struct dma_resv *obj);
>  int dma_resv_reserve_shared(struct dma_resv *obj, unsigned int num_fences);
> -- 
> 2.23.0.rc1
> 


More information about the dri-devel mailing list