[Intel-gfx] [PATCH 3/3] drm/i915/userptr: Avoid struct_mutex recursion for mmu_invalidate_range_start
Tvrtko Ursulin
tvrtko.ursulin at linux.intel.com
Thu Nov 8 16:45:01 UTC 2018
On 08/11/2018 08:17, Chris Wilson wrote:
> Since commit 93065ac753e4 ("mm, oom: distinguish blockable mode for mmu
> notifiers") we have been able to report failure from
> mmu_invalidate_range_start which allows us to use a trylock on the
> struct_mutex to avoid potential recursion and report -EBUSY instead.
> Furthermore, this allows us to pull the work into the main callback and
> avoid the sleight-of-hand in using a workqueue to avoid lockdep.
>
> However, not all paths to mmu_invalidate_range_start are prepared to
> handle failure, so instead of reporting the recursion, deal with it by
> propagating the failure upwards, who can decide themselves to handle it
> or report it.
>
> v2: Mark up the recursive lock behaviour and comment on the various weak
> points.
>
> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=108375
> References: 93065ac753e4 ("mm, oom: distinguish blockable mode for mmu notifiers")
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> Cc: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
> ---
> drivers/gpu/drm/i915/i915_drv.h | 4 +-
> drivers/gpu/drm/i915/i915_gem.c | 30 +++-
> drivers/gpu/drm/i915/i915_gem_object.h | 7 +
> drivers/gpu/drm/i915/i915_gem_userptr.c | 221 +++++++++++-------------
> 4 files changed, 136 insertions(+), 126 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 0c8438de3c1b..b95afa05976d 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -3076,8 +3076,8 @@ enum i915_mm_subclass { /* lockdep subclass for obj->mm.lock */
> I915_MM_SHRINKER
> };
>
> -void __i915_gem_object_put_pages(struct drm_i915_gem_object *obj,
> - enum i915_mm_subclass subclass);
> +int __i915_gem_object_put_pages(struct drm_i915_gem_object *obj,
> + enum i915_mm_subclass subclass);
> void __i915_gem_object_invalidate(struct drm_i915_gem_object *obj);
>
> enum i915_map_type {
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index dc120b5d8e05..53fb3605c4df 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -2447,8 +2447,8 @@ __i915_gem_object_unset_pages(struct drm_i915_gem_object *obj)
> struct sg_table *pages;
>
> pages = fetch_and_zero(&obj->mm.pages);
> - if (!pages)
> - return NULL;
> + if (IS_ERR_OR_NULL(pages))
Where can we set pages to some ERR_PTR value?
> + return pages;
>
> spin_lock(&i915->mm.obj_lock);
> list_del(&obj->mm.link);
> @@ -2472,22 +2472,23 @@ __i915_gem_object_unset_pages(struct drm_i915_gem_object *obj)
> return pages;
> }
>
> -void __i915_gem_object_put_pages(struct drm_i915_gem_object *obj,
> - enum i915_mm_subclass subclass)
> +int __i915_gem_object_put_pages(struct drm_i915_gem_object *obj,
> + enum i915_mm_subclass subclass)
> {
> struct sg_table *pages;
> + int ret;
>
> if (i915_gem_object_has_pinned_pages(obj))
> - return;
> + return -EBUSY;
>
> GEM_BUG_ON(obj->bind_count);
> - if (!i915_gem_object_has_pages(obj))
> - return;
>
> /* May be called by shrinker from within get_pages() (on another bo) */
> mutex_lock_nested(&obj->mm.lock, subclass);
> - if (unlikely(atomic_read(&obj->mm.pages_pin_count)))
> + if (unlikely(atomic_read(&obj->mm.pages_pin_count))) {
> + ret = -EBUSY;
> goto unlock;
> + }
>
> /*
> * ->put_pages might need to allocate memory for the bit17 swizzle
> @@ -2495,11 +2496,24 @@ void __i915_gem_object_put_pages(struct drm_i915_gem_object *obj,
> * lists early.
> */
> pages = __i915_gem_object_unset_pages(obj);
> +
> + /*
> + * XXX Temporary hijinx to avoid updating all backends to handle
> + * NULL pages. In the future, when we have more asynchronous
> + * get_pages backends we should be better able to handle the
> + * cancellation of the async task in a more uniform manner.
> + */
> + if (!pages && !i915_gem_object_needs_async_cancel(obj))
> + pages = ERR_PTR(-EINVAL);
> +
> if (!IS_ERR(pages))
> obj->ops->put_pages(obj, pages);
>
> + ret = 0;
> unlock:
> mutex_unlock(&obj->mm.lock);
> +
> + return ret;
> }
>
> bool i915_sg_trim(struct sg_table *orig_st)
> diff --git a/drivers/gpu/drm/i915/i915_gem_object.h b/drivers/gpu/drm/i915/i915_gem_object.h
> index a6dd7c46de0d..49ce797173b5 100644
> --- a/drivers/gpu/drm/i915/i915_gem_object.h
> +++ b/drivers/gpu/drm/i915/i915_gem_object.h
> @@ -56,6 +56,7 @@ struct drm_i915_gem_object_ops {
> #define I915_GEM_OBJECT_HAS_STRUCT_PAGE BIT(0)
> #define I915_GEM_OBJECT_IS_SHRINKABLE BIT(1)
> #define I915_GEM_OBJECT_IS_PROXY BIT(2)
> +#define I915_GEM_OBJECT_ASYNC_CANCEL BIT(3)
>
> /* Interface between the GEM object and its backing storage.
> * get_pages() is called once prior to the use of the associated set
> @@ -386,6 +387,12 @@ i915_gem_object_is_proxy(const struct drm_i915_gem_object *obj)
> return obj->ops->flags & I915_GEM_OBJECT_IS_PROXY;
> }
>
> +static inline bool
> +i915_gem_object_needs_async_cancel(const struct drm_i915_gem_object *obj)
> +{
> + return obj->ops->flags & I915_GEM_OBJECT_ASYNC_CANCEL;
> +}
> +
> static inline bool
> i915_gem_object_is_active(const struct drm_i915_gem_object *obj)
> {
> diff --git a/drivers/gpu/drm/i915/i915_gem_userptr.c b/drivers/gpu/drm/i915/i915_gem_userptr.c
> index 2c9b284036d1..8b07fd44731f 100644
> --- a/drivers/gpu/drm/i915/i915_gem_userptr.c
> +++ b/drivers/gpu/drm/i915/i915_gem_userptr.c
> @@ -50,79 +50,84 @@ struct i915_mmu_notifier {
> struct hlist_node node;
> struct mmu_notifier mn;
> struct rb_root_cached objects;
> - struct workqueue_struct *wq;
> + struct i915_mm_struct *mm;
> };
>
> struct i915_mmu_object {
> struct i915_mmu_notifier *mn;
> struct drm_i915_gem_object *obj;
> struct interval_tree_node it;
> - struct list_head link;
> - struct work_struct work;
> - bool attached;
> };
>
> -static void cancel_userptr(struct work_struct *work)
> -{
> - struct i915_mmu_object *mo = container_of(work, typeof(*mo), work);
> - struct drm_i915_gem_object *obj = mo->obj;
> - struct work_struct *active;
> -
> - /* Cancel any active worker and force us to re-evaluate gup */
> - mutex_lock(&obj->mm.lock);
> - active = fetch_and_zero(&obj->userptr.work);
> - mutex_unlock(&obj->mm.lock);
> - if (active)
> - goto out;
> -
> - i915_gem_object_wait(obj, I915_WAIT_ALL, MAX_SCHEDULE_TIMEOUT, NULL);
> -
> - mutex_lock(&obj->base.dev->struct_mutex);
> -
> - /* We are inside a kthread context and can't be interrupted */
> - if (i915_gem_object_unbind(obj) == 0)
> - __i915_gem_object_put_pages(obj, I915_MM_NORMAL);
> - WARN_ONCE(i915_gem_object_has_pages(obj),
> - "Failed to release pages: bind_count=%d, pages_pin_count=%d, pin_global=%d\n",
> - obj->bind_count,
> - atomic_read(&obj->mm.pages_pin_count),
> - obj->pin_global);
> -
> - mutex_unlock(&obj->base.dev->struct_mutex);
> -
> -out:
> - i915_gem_object_put(obj);
> -}
> -
> static void add_object(struct i915_mmu_object *mo)
> {
> - if (mo->attached)
> + if (!RB_EMPTY_NODE(&mo->it.rb))
> return;
>
> interval_tree_insert(&mo->it, &mo->mn->objects);
> - mo->attached = true;
> }
>
> static void del_object(struct i915_mmu_object *mo)
> {
> - if (!mo->attached)
> + if (RB_EMPTY_NODE(&mo->it.rb))
> return;
>
> interval_tree_remove(&mo->it, &mo->mn->objects);
> - mo->attached = false;
> + RB_CLEAR_NODE(&mo->it.rb);
> +}
> +
> +static void
> +__i915_gem_userptr_set_active(struct drm_i915_gem_object *obj, bool value)
> +{
> + struct i915_mmu_object *mo = obj->userptr.mmu_object;
> +
> + /*
> + * During mm_invalidate_range we need to cancel any userptr that
> + * overlaps the range being invalidated. Doing so requires the
> + * struct_mutex, and that risks recursion. In order to cause
> + * recursion, the user must alias the userptr address space with
> + * a GTT mmapping (possible with a MAP_FIXED) - then when we have
> + * to invalidate that mmaping, mm_invalidate_range is called with
> + * the userptr address *and* the struct_mutex held. To prevent that
> + * we set a flag under the i915_mmu_notifier spinlock to indicate
> + * whether this object is valid.
> + */
> + if (!mo)
> + return;
> +
> + spin_lock(&mo->mn->lock);
> + if (value)
> + add_object(mo);
> + else
> + del_object(mo);
> + spin_unlock(&mo->mn->lock);
> +}
> +
> +static struct mutex *__i915_mutex_lock_recursive(struct mutex *m)
> +{
> + switch (mutex_trylock_recursive(m)) {
> + default:
> + case MUTEX_TRYLOCK_FAILED:
> + mutex_lock_nested(m, I915_MM_SHRINKER);
I am thinking that perhaps we need a new namespace for struct mutex lock
nesting, like I915_STRUCT_MUTEX...
And this one would then be called something other than shrinker. Since
it is not triggered by the shrinker but page migration, right?
> + case MUTEX_TRYLOCK_SUCCESS:
> + return m;
> +
> + case MUTEX_TRYLOCK_RECURSIVE:
> + return ERR_PTR(-EEXIST);
> + }
> }
>
> static int i915_gem_userptr_mn_invalidate_range_start(struct mmu_notifier *_mn,
> - struct mm_struct *mm,
> - unsigned long start,
> - unsigned long end,
> - bool blockable)
> + struct mm_struct *mm,
> + unsigned long start,
> + unsigned long end,
> + bool blockable)
> {
> struct i915_mmu_notifier *mn =
> container_of(_mn, struct i915_mmu_notifier, mn);
> - struct i915_mmu_object *mo;
> struct interval_tree_node *it;
> - LIST_HEAD(cancelled);
> + struct mutex *unlock = NULL;
> + int ret = 0;
>
> if (RB_EMPTY_ROOT(&mn->objects.rb_root))
> return 0;
> @@ -133,11 +138,15 @@ static int i915_gem_userptr_mn_invalidate_range_start(struct mmu_notifier *_mn,
> spin_lock(&mn->lock);
> it = interval_tree_iter_first(&mn->objects, start, end);
> while (it) {
> + struct drm_i915_gem_object *obj;
> +
> if (!blockable) {
> - spin_unlock(&mn->lock);
> - return -EAGAIN;
> + ret = -EAGAIN;
> + break;
> }
> - /* The mmu_object is released late when destroying the
> +
> + /*
> + * The mmu_object is released late when destroying the
> * GEM object so it is entirely possible to gain a
> * reference on an object in the process of being freed
> * since our serialisation is via the spinlock and not
> @@ -146,21 +155,39 @@ static int i915_gem_userptr_mn_invalidate_range_start(struct mmu_notifier *_mn,
> * use-after-free we only acquire a reference on the
> * object if it is not in the process of being destroyed.
> */
> - mo = container_of(it, struct i915_mmu_object, it);
> - if (kref_get_unless_zero(&mo->obj->base.refcount))
> - queue_work(mn->wq, &mo->work);
> + obj = container_of(it, struct i915_mmu_object, it)->obj;
> + if (!kref_get_unless_zero(&obj->base.refcount)) {
> + it = interval_tree_iter_next(it, start, end);
> + continue;
> + }
> + spin_unlock(&mn->lock);
>
> - list_add(&mo->link, &cancelled);
> - it = interval_tree_iter_next(it, start, end);
> + if (!unlock)
> + unlock = __i915_mutex_lock_recursive(&mn->mm->i915->drm.struct_mutex);
> + ret = i915_gem_object_unbind(obj);
> + if (ret == 0)
> + ret = __i915_gem_object_put_pages(obj, I915_MM_SHRINKER);
> + i915_gem_object_put(obj);
> + if (ret)
> + goto unlock;
> +
> + spin_lock(&mn->lock);
> +
> + /*
> + * As we do not (yet) protect the mmu from concurrent insertion
> + * over this range, there is no guarantee that this search will
> + * terminate given a pathologic workload.
> + */
Hmm my concern from the other day. But it must exit, some client will
eventually stall for a bit I think. If not sooner then when everything
grinds to a halt due everyone being stuck in this loop.
> + it = interval_tree_iter_first(&mn->objects, start, end);
> }
> - list_for_each_entry(mo, &cancelled, link)
> - del_object(mo);
> spin_unlock(&mn->lock);
>
> - if (!list_empty(&cancelled))
> - flush_workqueue(mn->wq);
> +unlock:
> + if (!IS_ERR_OR_NULL(unlock))
> + mutex_unlock(unlock);
> +
> + return ret;
>
> - return 0;
> }
>
> static const struct mmu_notifier_ops i915_gem_userptr_notifier = {
> @@ -168,7 +195,7 @@ static const struct mmu_notifier_ops i915_gem_userptr_notifier = {
> };
>
> static struct i915_mmu_notifier *
> -i915_mmu_notifier_create(struct mm_struct *mm)
> +i915_mmu_notifier_create(struct i915_mm_struct *mm)
> {
> struct i915_mmu_notifier *mn;
>
> @@ -179,13 +206,7 @@ i915_mmu_notifier_create(struct mm_struct *mm)
> spin_lock_init(&mn->lock);
> mn->mn.ops = &i915_gem_userptr_notifier;
> mn->objects = RB_ROOT_CACHED;
> - mn->wq = alloc_workqueue("i915-userptr-release",
> - WQ_UNBOUND | WQ_MEM_RECLAIM,
> - 0);
> - if (mn->wq == NULL) {
> - kfree(mn);
> - return ERR_PTR(-ENOMEM);
> - }
> + mn->mm = mm;
>
> return mn;
> }
> @@ -195,16 +216,14 @@ i915_gem_userptr_release__mmu_notifier(struct drm_i915_gem_object *obj)
> {
> struct i915_mmu_object *mo;
>
> - mo = obj->userptr.mmu_object;
> - if (mo == NULL)
> + mo = fetch_and_zero(&obj->userptr.mmu_object);
> + if (!mo)
> return;
>
> spin_lock(&mo->mn->lock);
> del_object(mo);
> spin_unlock(&mo->mn->lock);
> kfree(mo);
> -
> - obj->userptr.mmu_object = NULL;
> }
>
> static struct i915_mmu_notifier *
> @@ -217,7 +236,7 @@ i915_mmu_notifier_find(struct i915_mm_struct *mm)
> if (mn)
> return mn;
>
> - mn = i915_mmu_notifier_create(mm->mm);
> + mn = i915_mmu_notifier_create(mm);
> if (IS_ERR(mn))
> err = PTR_ERR(mn);
>
> @@ -240,10 +259,8 @@ i915_mmu_notifier_find(struct i915_mm_struct *mm)
> mutex_unlock(&mm->i915->mm_lock);
> up_write(&mm->mm->mmap_sem);
>
> - if (mn && !IS_ERR(mn)) {
> - destroy_workqueue(mn->wq);
> + if (mn && !IS_ERR(mn))
> kfree(mn);
> - }
>
> return err ? ERR_PTR(err) : mm->mn;
> }
> @@ -266,14 +283,14 @@ i915_gem_userptr_init__mmu_notifier(struct drm_i915_gem_object *obj,
> return PTR_ERR(mn);
>
> mo = kzalloc(sizeof(*mo), GFP_KERNEL);
> - if (mo == NULL)
> + if (!mo)
> return -ENOMEM;
>
> mo->mn = mn;
> mo->obj = obj;
> mo->it.start = obj->userptr.ptr;
> mo->it.last = obj->userptr.ptr + obj->base.size - 1;
> - INIT_WORK(&mo->work, cancel_userptr);
> + RB_CLEAR_NODE(&mo->it.rb);
>
> obj->userptr.mmu_object = mo;
> return 0;
> @@ -287,12 +304,16 @@ i915_mmu_notifier_free(struct i915_mmu_notifier *mn,
> return;
>
> mmu_notifier_unregister(&mn->mn, mm);
> - destroy_workqueue(mn->wq);
> kfree(mn);
> }
>
> #else
>
> +static void
> +__i915_gem_userptr_set_active(struct drm_i915_gem_object *obj, bool value)
> +{
> +}
> +
> static void
> i915_gem_userptr_release__mmu_notifier(struct drm_i915_gem_object *obj)
> {
> @@ -461,42 +482,6 @@ __i915_gem_userptr_alloc_pages(struct drm_i915_gem_object *obj,
> return st;
> }
>
> -static int
> -__i915_gem_userptr_set_active(struct drm_i915_gem_object *obj,
> - bool value)
> -{
> - int ret = 0;
> -
> - /* During mm_invalidate_range we need to cancel any userptr that
> - * overlaps the range being invalidated. Doing so requires the
> - * struct_mutex, and that risks recursion. In order to cause
> - * recursion, the user must alias the userptr address space with
> - * a GTT mmapping (possible with a MAP_FIXED) - then when we have
> - * to invalidate that mmaping, mm_invalidate_range is called with
> - * the userptr address *and* the struct_mutex held. To prevent that
> - * we set a flag under the i915_mmu_notifier spinlock to indicate
> - * whether this object is valid.
> - */
> -#if defined(CONFIG_MMU_NOTIFIER)
> - if (obj->userptr.mmu_object == NULL)
> - return 0;
> -
> - spin_lock(&obj->userptr.mmu_object->mn->lock);
> - /* In order to serialise get_pages with an outstanding
> - * cancel_userptr, we must drop the struct_mutex and try again.
> - */
> - if (!value)
> - del_object(obj->userptr.mmu_object);
> - else if (!work_pending(&obj->userptr.mmu_object->work))
> - add_object(obj->userptr.mmu_object);
> - else
> - ret = -EAGAIN;
> - spin_unlock(&obj->userptr.mmu_object->mn->lock);
> -#endif
> -
> - return ret;
> -}
> -
> static void
> __i915_gem_userptr_get_pages_worker(struct work_struct *_work)
> {
> @@ -682,8 +667,11 @@ i915_gem_userptr_put_pages(struct drm_i915_gem_object *obj,
> struct sgt_iter sgt_iter;
> struct page *page;
>
> - BUG_ON(obj->userptr.work != NULL);
> + /* Cancel any inflight work and force them to restart their gup */
> + obj->userptr.work = NULL;
> __i915_gem_userptr_set_active(obj, false);
> + if (!pages)
> + return;
>
> if (obj->mm.madv != I915_MADV_WILLNEED)
> obj->mm.dirty = false;
> @@ -721,7 +709,8 @@ i915_gem_userptr_dmabuf_export(struct drm_i915_gem_object *obj)
>
> static const struct drm_i915_gem_object_ops i915_gem_userptr_ops = {
> .flags = I915_GEM_OBJECT_HAS_STRUCT_PAGE |
> - I915_GEM_OBJECT_IS_SHRINKABLE,
> + I915_GEM_OBJECT_IS_SHRINKABLE |
> + I915_GEM_OBJECT_ASYNC_CANCEL,
> .get_pages = i915_gem_userptr_get_pages,
> .put_pages = i915_gem_userptr_put_pages,
> .dmabuf_export = i915_gem_userptr_dmabuf_export,
>
Regards,
Tvrtko
More information about the Intel-gfx
mailing list