[Intel-gfx] [PATCH] drm/i915/userptr: Avoid struct_mutex recursion for mmu_invalidate_range_start

Thu Oct 25 19:16:50 UTC 2018

On Thu, Oct 25, 2018 at 01:45:42PM +0100, Chris Wilson wrote:
> Since commit 93065ac753e4 ("mm, oom: distinguish blockable mode for mmu
> notifiers") we have been able to report failure from
> mmu_invalidate_range_start which allows us to use a trylock on the
> struct_mutex to avoid potential recursion and report -EBUSY instead.
> Furthermore, this allows us to pull the work into the main callback and
> avoid the sleight-of-hand in using a workqueue to avoid lockdep.
> 
> However, not all paths to mmu_invalidate_range_start are prepared to
> handle failure, so instead of reporting the recursion, deal with it.
> 
> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=108375
> References: 93065ac753e4 ("mm, oom: distinguish blockable mode for mmu notifiers")
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> ---
>  drivers/gpu/drm/i915/i915_drv.h         |   4 +-
>  drivers/gpu/drm/i915/i915_gem.c         |  12 +-
>  drivers/gpu/drm/i915/i915_gem_userptr.c | 219 ++++++++++++------------
>  3 files changed, 115 insertions(+), 120 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 2d7761b8ac07..c3b94c3f930f 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -3076,8 +3076,8 @@ enum i915_mm_subclass { /* lockdep subclass for obj->mm.lock */
>  	I915_MM_SHRINKER
>  };
>  
> -void __i915_gem_object_put_pages(struct drm_i915_gem_object *obj,
> -				 enum i915_mm_subclass subclass);
> +int __i915_gem_object_put_pages(struct drm_i915_gem_object *obj,
> +				enum i915_mm_subclass subclass);
>  void __i915_gem_object_invalidate(struct drm_i915_gem_object *obj);
>  
>  enum i915_map_type {
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 93d09282710d..de7ccb3eb7b8 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -2454,17 +2454,18 @@ __i915_gem_object_unset_pages(struct drm_i915_gem_object *obj)
>  	return pages;
>  }
>  
> -void __i915_gem_object_put_pages(struct drm_i915_gem_object *obj,
> -				 enum i915_mm_subclass subclass)
> +int __i915_gem_object_put_pages(struct drm_i915_gem_object *obj,
> +				enum i915_mm_subclass subclass)
>  {
>  	struct sg_table *pages;
> +	int ret = -EBUSY;
>  
>  	if (i915_gem_object_has_pinned_pages(obj))
> -		return;
> +		return -EBUSY;
>  
>  	GEM_BUG_ON(obj->bind_count);
>  	if (!i915_gem_object_has_pages(obj))
> -		return;
> +		return 0;
>  
>  	/* May be called by shrinker from within get_pages() (on another bo) */
>  	mutex_lock_nested(&obj->mm.lock, subclass);
> @@ -2480,8 +2481,11 @@ void __i915_gem_object_put_pages(struct drm_i915_gem_object *obj,
>  	if (!IS_ERR(pages))
>  		obj->ops->put_pages(obj, pages);
>  
> +	ret = 0;
>  unlock:
>  	mutex_unlock(&obj->mm.lock);
> +
> +	return ret;
>  }
>  
>  bool i915_sg_trim(struct sg_table *orig_st)
> diff --git a/drivers/gpu/drm/i915/i915_gem_userptr.c b/drivers/gpu/drm/i915/i915_gem_userptr.c
> index 2c9b284036d1..a8f3c304db55 100644
> --- a/drivers/gpu/drm/i915/i915_gem_userptr.c
> +++ b/drivers/gpu/drm/i915/i915_gem_userptr.c
> @@ -50,79 +50,84 @@ struct i915_mmu_notifier {
>  	struct hlist_node node;
>  	struct mmu_notifier mn;
>  	struct rb_root_cached objects;
> -	struct workqueue_struct *wq;
> +	struct i915_mm_struct *mm;
>  };
>  
>  struct i915_mmu_object {
>  	struct i915_mmu_notifier *mn;
>  	struct drm_i915_gem_object *obj;
>  	struct interval_tree_node it;
> -	struct list_head link;
> -	struct work_struct work;
> -	bool attached;
>  };
>  
> -static void cancel_userptr(struct work_struct *work)
> -{
> -	struct i915_mmu_object *mo = container_of(work, typeof(*mo), work);
> -	struct drm_i915_gem_object *obj = mo->obj;
> -	struct work_struct *active;
> -
> -	/* Cancel any active worker and force us to re-evaluate gup */
> -	mutex_lock(&obj->mm.lock);
> -	active = fetch_and_zero(&obj->userptr.work);
> -	mutex_unlock(&obj->mm.lock);
> -	if (active)
> -		goto out;
> -
> -	i915_gem_object_wait(obj, I915_WAIT_ALL, MAX_SCHEDULE_TIMEOUT, NULL);
> -
> -	mutex_lock(&obj->base.dev->struct_mutex);
> -
> -	/* We are inside a kthread context and can't be interrupted */
> -	if (i915_gem_object_unbind(obj) == 0)
> -		__i915_gem_object_put_pages(obj, I915_MM_NORMAL);
> -	WARN_ONCE(i915_gem_object_has_pages(obj),
> -		  "Failed to release pages: bind_count=%d, pages_pin_count=%d, pin_global=%d\n",
> -		  obj->bind_count,
> -		  atomic_read(&obj->mm.pages_pin_count),
> -		  obj->pin_global);
> -
> -	mutex_unlock(&obj->base.dev->struct_mutex);
> -
> -out:
> -	i915_gem_object_put(obj);
> -}
> -
>  static void add_object(struct i915_mmu_object *mo)
>  {
> -	if (mo->attached)
> +	if (!RB_EMPTY_NODE(&mo->it.rb))
>  		return;
>  
>  	interval_tree_insert(&mo->it, &mo->mn->objects);
> -	mo->attached = true;
>  }
>  
>  static void del_object(struct i915_mmu_object *mo)
>  {
> -	if (!mo->attached)
> +	if (RB_EMPTY_NODE(&mo->it.rb))
>  		return;
>  
>  	interval_tree_remove(&mo->it, &mo->mn->objects);
> -	mo->attached = false;
> +	RB_CLEAR_NODE(&mo->it.rb);
> +}
> +
> +static void
> +__i915_gem_userptr_set_active(struct drm_i915_gem_object *obj, bool value)
> +{
> +	struct i915_mmu_object *mo = obj->userptr.mmu_object;
> +
> +	/*
> +	 * During mm_invalidate_range we need to cancel any userptr that
> +	 * overlaps the range being invalidated. Doing so requires the
> +	 * struct_mutex, and that risks recursion. In order to cause
> +	 * recursion, the user must alias the userptr address space with
> +	 * a GTT mmapping (possible with a MAP_FIXED) - then when we have
> +	 * to invalidate that mmaping, mm_invalidate_range is called with
> +	 * the userptr address *and* the struct_mutex held.  To prevent that
> +	 * we set a flag under the i915_mmu_notifier spinlock to indicate
> +	 * whether this object is valid.
> +	 */
> +	if (!mo)
> +		return;
> +
> +	spin_lock(&mo->mn->lock);
> +	if (value)
> +		add_object(mo);
> +	else
> +		del_object(mo);
> +	spin_unlock(&mo->mn->lock);
> +}
> +
> +static struct mutex *__i915_mutex_lock_recursive(struct mutex *m)
> +{
> +	switch (mutex_trylock_recursive(m)) {
> +	default:
> +	case MUTEX_TRYLOCK_FAILED:
> +		mutex_lock(m);
> +	case MUTEX_TRYLOCK_SUCCESS:
> +		return m;
> +
> +	case MUTEX_TRYLOCK_RECURSIVE:
> +		return ERR_PTR(-EEXIST);
> +	}
>  }
>  
>  static int i915_gem_userptr_mn_invalidate_range_start(struct mmu_notifier *_mn,
> -						       struct mm_struct *mm,
> -						       unsigned long start,
> -						       unsigned long end,
> -						       bool blockable)
> +						      struct mm_struct *mm,
> +						      unsigned long start,
> +						      unsigned long end,
> +						      bool blockable)
>  {
>  	struct i915_mmu_notifier *mn =
>  		container_of(_mn, struct i915_mmu_notifier, mn);
> -	struct i915_mmu_object *mo;
>  	struct interval_tree_node *it;
> -	LIST_HEAD(cancelled);
> +	struct mutex *unlock = NULL;
> +	int ret = 0;
>  
>  	if (RB_EMPTY_ROOT(&mn->objects.rb_root))
>  		return 0;
> @@ -133,11 +138,16 @@ static int i915_gem_userptr_mn_invalidate_range_start(struct mmu_notifier *_mn,
>  	spin_lock(&mn->lock);
>  	it = interval_tree_iter_first(&mn->objects, start, end);
>  	while (it) {
> +		struct drm_i915_gem_object *obj;
> +		bool has_pages = false;
> +
>  		if (!blockable) {
> -			spin_unlock(&mn->lock);
> -			return -EAGAIN;
> +			ret = -EAGAIN;
> +			break;
>  		}
> -		/* The mmu_object is released late when destroying the
> +
> +		/*
> +		 * The mmu_object is released late when destroying the
>  		 * GEM object so it is entirely possible to gain a
>  		 * reference on an object in the process of being freed
>  		 * since our serialisation is via the spinlock and not
> @@ -146,21 +156,44 @@ static int i915_gem_userptr_mn_invalidate_range_start(struct mmu_notifier *_mn,
>  		 * use-after-free we only acquire a reference on the
>  		 * object if it is not in the process of being destroyed.
>  		 */
> -		mo = container_of(it, struct i915_mmu_object, it);
> -		if (kref_get_unless_zero(&mo->obj->base.refcount))
> -			queue_work(mn->wq, &mo->work);
> +		obj = container_of(it, struct i915_mmu_object, it)->obj;
> +		if (!kref_get_unless_zero(&obj->base.refcount)) {
> +			it = interval_tree_iter_next(it, start, end);
> +			continue;
> +		}
> +		spin_unlock(&mn->lock);
>  
> -		list_add(&mo->link, &cancelled);
> -		it = interval_tree_iter_next(it, start, end);
> +		/* Cancel any pending worker and force us to re-evaluate gup */
> +		mutex_lock_nested(&obj->mm.lock, I915_MM_SHRINKER);

mutex_lock_nested is meant for static nesting. This here is rather
dynamic, and I don't really see where the guarantee is that the classic
deadlock isn't possible:

thread 1: get_pages on object A -> mmu_notifier (or shrinker) on object B
thread 2: get_pages on object B -> mmu_notifier (or shrinker) on object A

Only when you have some guarantee for a global order can you nest locks
using mutex_lock_nested.

There's a few checks and stuff that might work, but they all seem rather
racy. What's the guarantee that prevents the above?
-Daniel

> +		if (fetch_and_zero(&obj->userptr.work))
> +			__i915_gem_userptr_set_active(obj, false);
> +		else
> +			has_pages = i915_gem_object_has_pages(obj);
> +		mutex_unlock(&obj->mm.lock);
> +
> +		if (has_pages) {
> +			if (!unlock)
> +				unlock = __i915_mutex_lock_recursive(&mn->mm->i915->drm.struct_mutex);
> +			ret = i915_gem_object_unbind(obj);
> +			if (ret == 0)
> +				ret = __i915_gem_object_put_pages(obj, I915_MM_SHRINKER);
> +		}
> +
> +		i915_gem_object_put(obj);
> +		if (ret)
> +			goto unlock;
> +
> +		spin_lock(&mn->lock);
> +		it = interval_tree_iter_first(&mn->objects, start, end);
>  	}
> -	list_for_each_entry(mo, &cancelled, link)
> -		del_object(mo);
>  	spin_unlock(&mn->lock);
>  
> -	if (!list_empty(&cancelled))
> -		flush_workqueue(mn->wq);
> +unlock:
> +	if (!IS_ERR_OR_NULL(unlock))
> +		mutex_unlock(unlock);
> +
> +	return ret;
>  
> -	return 0;
>  }
>  
>  static const struct mmu_notifier_ops i915_gem_userptr_notifier = {
> @@ -168,7 +201,7 @@ static const struct mmu_notifier_ops i915_gem_userptr_notifier = {
>  };
>  
>  static struct i915_mmu_notifier *
> -i915_mmu_notifier_create(struct mm_struct *mm)
> +i915_mmu_notifier_create(struct i915_mm_struct *mm)
>  {
>  	struct i915_mmu_notifier *mn;
>  
> @@ -179,13 +212,7 @@ i915_mmu_notifier_create(struct mm_struct *mm)
>  	spin_lock_init(&mn->lock);
>  	mn->mn.ops = &i915_gem_userptr_notifier;
>  	mn->objects = RB_ROOT_CACHED;
> -	mn->wq = alloc_workqueue("i915-userptr-release",
> -				 WQ_UNBOUND | WQ_MEM_RECLAIM,
> -				 0);
> -	if (mn->wq == NULL) {
> -		kfree(mn);
> -		return ERR_PTR(-ENOMEM);
> -	}
> +	mn->mm = mm;
>  
>  	return mn;
>  }
> @@ -195,16 +222,14 @@ i915_gem_userptr_release__mmu_notifier(struct drm_i915_gem_object *obj)
>  {
>  	struct i915_mmu_object *mo;
>  
> -	mo = obj->userptr.mmu_object;
> -	if (mo == NULL)
> +	mo = fetch_and_zero(&obj->userptr.mmu_object);
> +	if (!mo)
>  		return;
>  
>  	spin_lock(&mo->mn->lock);
>  	del_object(mo);
>  	spin_unlock(&mo->mn->lock);
>  	kfree(mo);
> -
> -	obj->userptr.mmu_object = NULL;
>  }
>  
>  static struct i915_mmu_notifier *
> @@ -217,7 +242,7 @@ i915_mmu_notifier_find(struct i915_mm_struct *mm)
>  	if (mn)
>  		return mn;
>  
> -	mn = i915_mmu_notifier_create(mm->mm);
> +	mn = i915_mmu_notifier_create(mm);
>  	if (IS_ERR(mn))
>  		err = PTR_ERR(mn);
>  
> @@ -240,10 +265,8 @@ i915_mmu_notifier_find(struct i915_mm_struct *mm)
>  	mutex_unlock(&mm->i915->mm_lock);
>  	up_write(&mm->mm->mmap_sem);
>  
> -	if (mn && !IS_ERR(mn)) {
> -		destroy_workqueue(mn->wq);
> +	if (mn && !IS_ERR(mn))
>  		kfree(mn);
> -	}
>  
>  	return err ? ERR_PTR(err) : mm->mn;
>  }
> @@ -266,14 +289,14 @@ i915_gem_userptr_init__mmu_notifier(struct drm_i915_gem_object *obj,
>  		return PTR_ERR(mn);
>  
>  	mo = kzalloc(sizeof(*mo), GFP_KERNEL);
> -	if (mo == NULL)
> +	if (!mo)
>  		return -ENOMEM;
>  
>  	mo->mn = mn;
>  	mo->obj = obj;
>  	mo->it.start = obj->userptr.ptr;
>  	mo->it.last = obj->userptr.ptr + obj->base.size - 1;
> -	INIT_WORK(&mo->work, cancel_userptr);
> +	RB_CLEAR_NODE(&mo->it.rb);
>  
>  	obj->userptr.mmu_object = mo;
>  	return 0;
> @@ -287,12 +310,16 @@ i915_mmu_notifier_free(struct i915_mmu_notifier *mn,
>  		return;
>  
>  	mmu_notifier_unregister(&mn->mn, mm);
> -	destroy_workqueue(mn->wq);
>  	kfree(mn);
>  }
>  
>  #else
>  
> +static void
> +__i915_gem_userptr_set_active(struct drm_i915_gem_object *obj, bool value)
> +{
> +}
> +
>  static void
>  i915_gem_userptr_release__mmu_notifier(struct drm_i915_gem_object *obj)
>  {
> @@ -461,42 +488,6 @@ __i915_gem_userptr_alloc_pages(struct drm_i915_gem_object *obj,
>  	return st;
>  }
>  
> -static int
> -__i915_gem_userptr_set_active(struct drm_i915_gem_object *obj,
> -			      bool value)
> -{
> -	int ret = 0;
> -
> -	/* During mm_invalidate_range we need to cancel any userptr that
> -	 * overlaps the range being invalidated. Doing so requires the
> -	 * struct_mutex, and that risks recursion. In order to cause
> -	 * recursion, the user must alias the userptr address space with
> -	 * a GTT mmapping (possible with a MAP_FIXED) - then when we have
> -	 * to invalidate that mmaping, mm_invalidate_range is called with
> -	 * the userptr address *and* the struct_mutex held.  To prevent that
> -	 * we set a flag under the i915_mmu_notifier spinlock to indicate
> -	 * whether this object is valid.
> -	 */
> -#if defined(CONFIG_MMU_NOTIFIER)
> -	if (obj->userptr.mmu_object == NULL)
> -		return 0;
> -
> -	spin_lock(&obj->userptr.mmu_object->mn->lock);
> -	/* In order to serialise get_pages with an outstanding
> -	 * cancel_userptr, we must drop the struct_mutex and try again.
> -	 */
> -	if (!value)
> -		del_object(obj->userptr.mmu_object);
> -	else if (!work_pending(&obj->userptr.mmu_object->work))
> -		add_object(obj->userptr.mmu_object);
> -	else
> -		ret = -EAGAIN;
> -	spin_unlock(&obj->userptr.mmu_object->mn->lock);
> -#endif
> -
> -	return ret;
> -}
> -
>  static void
>  __i915_gem_userptr_get_pages_worker(struct work_struct *_work)
>  {
> -- 
> 2.19.1
> 
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx

-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch