[Intel-gfx] [PATCH v9 16/70] drm/i915: Fix userptr so we do not have to worry about obj->mm.lock, v7.

Wed Mar 24 11:34:34 UTC 2021

On 3/24/21 12:28 PM, Daniel Vetter wrote:
> On Tue, Mar 23, 2021 at 04:50:05PM +0100, Maarten Lankhorst wrote:
>> Instead of doing what we do currently, which will never work with
>> PROVE_LOCKING, do the same as AMD does, and something similar to
>> relocation slowpath. When all locks are dropped, we acquire the
>> pages for pinning. When the locks are taken, we transfer those
>> pages in .get_pages() to the bo. As a final check before installing
>> the fences, we ensure that the mmu notifier was not called; if it is,
>> we return -EAGAIN to userspace to signal it has to start over.
>>
>> Changes since v1:
>> - Unbinding is done in submit_init only. submit_begin() removed.
>> - MMU_NOTFIER -> MMU_NOTIFIER
>> Changes since v2:
>> - Make i915->mm.notifier a spinlock.
>> Changes since v3:
>> - Add WARN_ON if there are any page references left, should have been 0.
>> - Return 0 on success in submit_init(), bug from spinlock conversion.
>> - Release pvec outside of notifier_lock (Thomas).
>> Changes since v4:
>> - Mention why we're clearing eb->[i + 1].vma in the code. (Thomas)
>> - Actually check all invalidations in eb_move_to_gpu. (Thomas)
>> - Do not wait when process is exiting to fix gem_ctx_persistence.userptr.
>> Changes since v5:
>> - Clarify why check on PF_EXITING is (temporarily) required.
>> Changes since v6:
>> - Ensure userptr validity is checked in set_domain through a special path.
>>
>> Signed-off-by: Maarten Lankhorst <maarten.lankhorst at linux.intel.com>
>> Acked-by: Dave Airlie <airlied at redhat.com>
>> ---
>>   drivers/gpu/drm/i915/gem/i915_gem_domain.c    |  18 +-
>>   .../gpu/drm/i915/gem/i915_gem_execbuffer.c    | 101 ++-
>>   drivers/gpu/drm/i915/gem/i915_gem_object.h    |  38 +-
>>   .../gpu/drm/i915/gem/i915_gem_object_types.h  |  10 +-
>>   drivers/gpu/drm/i915/gem/i915_gem_pages.c     |   2 +-
>>   drivers/gpu/drm/i915/gem/i915_gem_userptr.c   | 796 ++++++------------
>>   drivers/gpu/drm/i915/i915_drv.h               |   9 +-
>>   drivers/gpu/drm/i915/i915_gem.c               |   5 +-
>>   8 files changed, 395 insertions(+), 584 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_domain.c b/drivers/gpu/drm/i915/gem/i915_gem_domain.c
>> index 2f4980bf742e..76cb9f5c66aa 100644
>> --- a/drivers/gpu/drm/i915/gem/i915_gem_domain.c
>> +++ b/drivers/gpu/drm/i915/gem/i915_gem_domain.c
>> @@ -468,14 +468,28 @@ i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
>>   	if (!obj)
>>   		return -ENOENT;
>>   
>> +	if (i915_gem_object_is_userptr(obj)) {
>> +		/*
>> +		 * Try to grab userptr pages, iris uses set_domain to check
>> +		 * userptr validity
>> +		 */
>> +		err = i915_gem_object_userptr_validate(obj);
>> +		if (!err)
>> +			err = i915_gem_object_wait(obj,
>> +						   I915_WAIT_INTERRUPTIBLE |
>> +						   I915_WAIT_PRIORITY |
>> +						   (write_domain ? I915_WAIT_ALL : 0),
>> +						   MAX_SCHEDULE_TIMEOUT);
>> +		goto out;
>> +	}
>> +
>>   	/*
>>   	 * Proxy objects do not control access to the backing storage, ergo
>>   	 * they cannot be used as a means to manipulate the cache domain
>>   	 * tracking for that backing storage. The proxy object is always
>>   	 * considered to be outside of any cache domain.
>>   	 */
>> -	if (i915_gem_object_is_proxy(obj) &&
>> -	    !i915_gem_object_is_userptr(obj)) {
>> +	if (i915_gem_object_is_proxy(obj)) {
>>   		err = -ENXIO;
>>   		goto out;
>>   	}
>> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
>> index 795c68fcc6ed..b5ca9eb53565 100644
>> --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
>> +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
>> @@ -53,14 +53,16 @@ enum {
>>   /* __EXEC_OBJECT_NO_RESERVE is BIT(31), defined in i915_vma.h */
>>   #define __EXEC_OBJECT_HAS_PIN		BIT(30)
>>   #define __EXEC_OBJECT_HAS_FENCE		BIT(29)
>> -#define __EXEC_OBJECT_NEEDS_MAP		BIT(28)
>> -#define __EXEC_OBJECT_NEEDS_BIAS	BIT(27)
>> -#define __EXEC_OBJECT_INTERNAL_FLAGS	(~0u << 27) /* all of the above + */
>> +#define __EXEC_OBJECT_USERPTR_INIT	BIT(28)
>> +#define __EXEC_OBJECT_NEEDS_MAP		BIT(27)
>> +#define __EXEC_OBJECT_NEEDS_BIAS	BIT(26)
>> +#define __EXEC_OBJECT_INTERNAL_FLAGS	(~0u << 26) /* all of the above + */
>>   #define __EXEC_OBJECT_RESERVED (__EXEC_OBJECT_HAS_PIN | __EXEC_OBJECT_HAS_FENCE)
>>   
>>   #define __EXEC_HAS_RELOC	BIT(31)
>>   #define __EXEC_ENGINE_PINNED	BIT(30)
>> -#define __EXEC_INTERNAL_FLAGS	(~0u << 30)
>> +#define __EXEC_USERPTR_USED	BIT(29)
>> +#define __EXEC_INTERNAL_FLAGS	(~0u << 29)
>>   #define UPDATE			PIN_OFFSET_FIXED
>>   
>>   #define BATCH_OFFSET_BIAS (256*1024)
>> @@ -871,6 +873,26 @@ static int eb_lookup_vmas(struct i915_execbuffer *eb)
>>   		}
>>   
>>   		eb_add_vma(eb, i, batch, vma);
>> +
>> +		if (i915_gem_object_is_userptr(vma->obj)) {
>> +			err = i915_gem_object_userptr_submit_init(vma->obj);
>> +			if (err) {
>> +				if (i + 1 < eb->buffer_count) {
>> +					/*
>> +					 * Execbuffer code expects last vma entry to be NULL,
>> +					 * since we already initialized this entry,
>> +					 * set the next value to NULL or we mess up
>> +					 * cleanup handling.
>> +					 */
>> +					eb->vma[i + 1].vma = NULL;
>> +				}
>> +
>> +				return err;
>> +			}
>> +
>> +			eb->vma[i].flags |= __EXEC_OBJECT_USERPTR_INIT;
>> +			eb->args->flags |= __EXEC_USERPTR_USED;
>> +		}
>>   	}
>>   
>>   	if (unlikely(eb->batch->flags & EXEC_OBJECT_WRITE)) {
>> @@ -972,7 +994,7 @@ eb_get_vma(const struct i915_execbuffer *eb, unsigned long handle)
>>   	}
>>   }
>>   
>> -static void eb_release_vmas(struct i915_execbuffer *eb, bool final)
>> +static void eb_release_vmas(struct i915_execbuffer *eb, bool final, bool release_userptr)
>>   {
>>   	const unsigned int count = eb->buffer_count;
>>   	unsigned int i;
>> @@ -986,6 +1008,11 @@ static void eb_release_vmas(struct i915_execbuffer *eb, bool final)
>>   
>>   		eb_unreserve_vma(ev);
>>   
>> +		if (release_userptr && ev->flags & __EXEC_OBJECT_USERPTR_INIT) {
>> +			ev->flags &= ~__EXEC_OBJECT_USERPTR_INIT;
>> +			i915_gem_object_userptr_submit_fini(vma->obj);
>> +		}
>> +
>>   		if (final)
>>   			i915_vma_put(vma);
>>   	}
>> @@ -1916,6 +1943,31 @@ static int eb_prefault_relocations(const struct i915_execbuffer *eb)
>>   	return 0;
>>   }
>>   
>> +static int eb_reinit_userptr(struct i915_execbuffer *eb)
>> +{
>> +	const unsigned int count = eb->buffer_count;
>> +	unsigned int i;
>> +	int ret;
>> +
>> +	if (likely(!(eb->args->flags & __EXEC_USERPTR_USED)))
>> +		return 0;
>> +
>> +	for (i = 0; i < count; i++) {
>> +		struct eb_vma *ev = &eb->vma[i];
>> +
>> +		if (!i915_gem_object_is_userptr(ev->vma->obj))
>> +			continue;
>> +
>> +		ret = i915_gem_object_userptr_submit_init(ev->vma->obj);
>> +		if (ret)
>> +			return ret;
>> +
>> +		ev->flags |= __EXEC_OBJECT_USERPTR_INIT;
>> +	}
>> +
>> +	return 0;
>> +}
>> +
>>   static noinline int eb_relocate_parse_slow(struct i915_execbuffer *eb,
>>   					   struct i915_request *rq)
>>   {
>> @@ -1930,7 +1982,7 @@ static noinline int eb_relocate_parse_slow(struct i915_execbuffer *eb,
>>   	}
>>   
>>   	/* We may process another execbuffer during the unlock... */
>> -	eb_release_vmas(eb, false);
>> +	eb_release_vmas(eb, false, true);
>>   	i915_gem_ww_ctx_fini(&eb->ww);
>>   
>>   	if (rq) {
>> @@ -1971,10 +2023,8 @@ static noinline int eb_relocate_parse_slow(struct i915_execbuffer *eb,
>>   		err = 0;
>>   	}
>>   
>> -#ifdef CONFIG_MMU_NOTIFIER
>>   	if (!err)
>> -		flush_workqueue(eb->i915->mm.userptr_wq);
>> -#endif
>> +		err = eb_reinit_userptr(eb);
>>   
>>   err_relock:
>>   	i915_gem_ww_ctx_init(&eb->ww, true);
>> @@ -2036,7 +2086,7 @@ static noinline int eb_relocate_parse_slow(struct i915_execbuffer *eb,
>>   
>>   err:
>>   	if (err == -EDEADLK) {
>> -		eb_release_vmas(eb, false);
>> +		eb_release_vmas(eb, false, false);
>>   		err = i915_gem_ww_ctx_backoff(&eb->ww);
>>   		if (!err)
>>   			goto repeat_validate;
>> @@ -2133,7 +2183,7 @@ static int eb_relocate_parse(struct i915_execbuffer *eb)
>>   
>>   err:
>>   	if (err == -EDEADLK) {
>> -		eb_release_vmas(eb, false);
>> +		eb_release_vmas(eb, false, false);
>>   		err = i915_gem_ww_ctx_backoff(&eb->ww);
>>   		if (!err)
>>   			goto retry;
>> @@ -2208,6 +2258,30 @@ static int eb_move_to_gpu(struct i915_execbuffer *eb)
>>   						      flags | __EXEC_OBJECT_NO_RESERVE);
>>   	}
>>   
>> +#ifdef CONFIG_MMU_NOTIFIER
>> +	if (!err && (eb->args->flags & __EXEC_USERPTR_USED)) {
>> +		spin_lock(&eb->i915->mm.notifier_lock);
>> +
>> +		/*
>> +		 * count is always at least 1, otherwise __EXEC_USERPTR_USED
>> +		 * could not have been set
>> +		 */
>> +		for (i = 0; i < count; i++) {
>> +			struct eb_vma *ev = &eb->vma[i];
>> +			struct drm_i915_gem_object *obj = ev->vma->obj;
>> +
>> +			if (!i915_gem_object_is_userptr(obj))
>> +				continue;
>> +
>> +			err = i915_gem_object_userptr_submit_done(obj);
>> +			if (err)
>> +				break;
>> +		}
>> +
>> +		spin_unlock(&eb->i915->mm.notifier_lock);
>> +	}
>> +#endif
>> +
>>   	if (unlikely(err))
>>   		goto err_skip;
>>   
>> @@ -3352,7 +3426,7 @@ i915_gem_do_execbuffer(struct drm_device *dev,
>>   
>>   	err = eb_lookup_vmas(&eb);
>>   	if (err) {
>> -		eb_release_vmas(&eb, true);
>> +		eb_release_vmas(&eb, true, true);
>>   		goto err_engine;
>>   	}
>>   
>> @@ -3424,6 +3498,7 @@ i915_gem_do_execbuffer(struct drm_device *dev,
>>   
>>   	trace_i915_request_queue(eb.request, eb.batch_flags);
>>   	err = eb_submit(&eb, batch);
>> +
>>   err_request:
>>   	i915_request_get(eb.request);
>>   	err = eb_request_add(&eb, err);
>> @@ -3444,7 +3519,7 @@ i915_gem_do_execbuffer(struct drm_device *dev,
>>   	i915_request_put(eb.request);
>>   
>>   err_vma:
>> -	eb_release_vmas(&eb, true);
>> +	eb_release_vmas(&eb, true, true);
>>   	if (eb.trampoline)
>>   		i915_vma_unpin(eb.trampoline);
>>   	WARN_ON(err == -EDEADLK);
>> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h b/drivers/gpu/drm/i915/gem/i915_gem_object.h
>> index 69509dbd01c7..b5af9c440ac5 100644
>> --- a/drivers/gpu/drm/i915/gem/i915_gem_object.h
>> +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h
>> @@ -59,6 +59,7 @@ i915_gem_object_create_shmem_from_data(struct drm_i915_private *i915,
>>   				       const void *data, resource_size_t size);
>>   
>>   extern const struct drm_i915_gem_object_ops i915_gem_shmem_ops;
>> +
>>   void __i915_gem_object_release_shmem(struct drm_i915_gem_object *obj,
>>   				     struct sg_table *pages,
>>   				     bool needs_clflush);
>> @@ -278,12 +279,6 @@ i915_gem_object_never_mmap(const struct drm_i915_gem_object *obj)
>>   	return i915_gem_object_type_has(obj, I915_GEM_OBJECT_NO_MMAP);
>>   }
>>   
>> -static inline bool
>> -i915_gem_object_needs_async_cancel(const struct drm_i915_gem_object *obj)
>> -{
>> -	return i915_gem_object_type_has(obj, I915_GEM_OBJECT_ASYNC_CANCEL);
>> -}
>> -
>>   static inline bool
>>   i915_gem_object_is_framebuffer(const struct drm_i915_gem_object *obj)
>>   {
>> @@ -573,16 +568,6 @@ void __i915_gem_object_flush_frontbuffer(struct drm_i915_gem_object *obj,
>>   void __i915_gem_object_invalidate_frontbuffer(struct drm_i915_gem_object *obj,
>>   					      enum fb_op_origin origin);
>>   
>> -static inline bool
>> -i915_gem_object_is_userptr(struct drm_i915_gem_object *obj)
>> -{
>> -#ifdef CONFIG_MMU_NOTIFIER
>> -	return obj->userptr.mm;
>> -#else
>> -	return false;
>> -#endif
>> -}
>> -
>>   static inline void
>>   i915_gem_object_flush_frontbuffer(struct drm_i915_gem_object *obj,
>>   				  enum fb_op_origin origin)
>> @@ -603,4 +588,25 @@ int i915_gem_object_read_from_page(struct drm_i915_gem_object *obj, u64 offset,
>>   
>>   bool i915_gem_object_is_shmem(const struct drm_i915_gem_object *obj);
>>   
>> +#ifdef CONFIG_MMU_NOTIFIER
>> +static inline bool
>> +i915_gem_object_is_userptr(struct drm_i915_gem_object *obj)
>> +{
>> +	return obj->userptr.notifier.mm;
>> +}
>> +
>> +int i915_gem_object_userptr_submit_init(struct drm_i915_gem_object *obj);
>> +int i915_gem_object_userptr_submit_done(struct drm_i915_gem_object *obj);
>> +void i915_gem_object_userptr_submit_fini(struct drm_i915_gem_object *obj);
>> +int i915_gem_object_userptr_validate(struct drm_i915_gem_object *obj);
>> +#else
>> +static inline bool i915_gem_object_is_userptr(struct drm_i915_gem_object *obj) { return false; }
>> +
>> +static inline int i915_gem_object_userptr_submit_init(struct drm_i915_gem_object *obj) { GEM_BUG_ON(1); return -ENODEV; }
>> +static inline int i915_gem_object_userptr_submit_done(struct drm_i915_gem_object *obj) { GEM_BUG_ON(1); return -ENODEV; }
>> +static inline void i915_gem_object_userptr_submit_fini(struct drm_i915_gem_object *obj) { GEM_BUG_ON(1); }
>> +static inline int i915_gem_object_userptr_validate(struct drm_i915_gem_object *obj) { GEM_BUG_ON(1); return -ENODEV; }
>> +
>> +#endif
>> +
>>   #endif
>> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
>> index 414322619781..4c0a34231623 100644
>> --- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
>> +++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
>> @@ -7,6 +7,8 @@
>>   #ifndef __I915_GEM_OBJECT_TYPES_H__
>>   #define __I915_GEM_OBJECT_TYPES_H__
>>   
>> +#include <linux/mmu_notifier.h>
>> +
>>   #include <drm/drm_gem.h>
>>   #include <uapi/drm/i915_drm.h>
>>   
>> @@ -34,7 +36,6 @@ struct drm_i915_gem_object_ops {
>>   #define I915_GEM_OBJECT_IS_SHRINKABLE	BIT(2)
>>   #define I915_GEM_OBJECT_IS_PROXY	BIT(3)
>>   #define I915_GEM_OBJECT_NO_MMAP		BIT(4)
>> -#define I915_GEM_OBJECT_ASYNC_CANCEL	BIT(5)
>>   
>>   	/* Interface between the GEM object and its backing storage.
>>   	 * get_pages() is called once prior to the use of the associated set
>> @@ -299,10 +300,11 @@ struct drm_i915_gem_object {
>>   #ifdef CONFIG_MMU_NOTIFIER
>>   		struct i915_gem_userptr {
>>   			uintptr_t ptr;
>> +			unsigned long notifier_seq;
>>   
>> -			struct i915_mm_struct *mm;
>> -			struct i915_mmu_object *mmu_object;
>> -			struct work_struct *work;
>> +			struct mmu_interval_notifier notifier;
>> +			struct page **pvec;
>> +			int page_ref;
>>   		} userptr;
>>   #endif
>>   
>> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pages.c b/drivers/gpu/drm/i915/gem/i915_gem_pages.c
>> index bf61b88a2113..e7d7650072c5 100644
>> --- a/drivers/gpu/drm/i915/gem/i915_gem_pages.c
>> +++ b/drivers/gpu/drm/i915/gem/i915_gem_pages.c
>> @@ -226,7 +226,7 @@ int __i915_gem_object_put_pages(struct drm_i915_gem_object *obj)
>>   	 * get_pages backends we should be better able to handle the
>>   	 * cancellation of the async task in a more uniform manner.
>>   	 */
>> -	if (!pages && !i915_gem_object_needs_async_cancel(obj))
>> +	if (!pages)
>>   		pages = ERR_PTR(-EINVAL);
>>   
>>   	if (!IS_ERR(pages))
>> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_userptr.c b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c
>> index b466ab2def4d..1e42fbc68697 100644
>> --- a/drivers/gpu/drm/i915/gem/i915_gem_userptr.c
>> +++ b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c
>> @@ -2,10 +2,39 @@
>>    * SPDX-License-Identifier: MIT
>>    *
>>    * Copyright © 2012-2014 Intel Corporation
>> + *
>> +  * Based on amdgpu_mn, which bears the following notice:
>> + *
>> + * Copyright 2014 Advanced Micro Devices, Inc.
>> + * All Rights Reserved.
>> + *
>> + * Permission is hereby granted, free of charge, to any person obtaining a
>> + * copy of this software and associated documentation files (the
>> + * "Software"), to deal in the Software without restriction, including
>> + * without limitation the rights to use, copy, modify, merge, publish,
>> + * distribute, sub license, and/or sell copies of the Software, and to
>> + * permit persons to whom the Software is furnished to do so, subject to
>> + * the following conditions:
>> + *
>> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
>> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
>> + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
>> + * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
>> + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
>> + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
>> + * USE OR OTHER DEALINGS IN THE SOFTWARE.
>> + *
>> + * The above copyright notice and this permission notice (including the
>> + * next paragraph) shall be included in all copies or substantial portions
>> + * of the Software.
>> + *
>> + */
>> +/*
>> + * Authors:
>> + *    Christian König <christian.koenig at amd.com>
>>    */
>>   
>>   #include <linux/mmu_context.h>
>> -#include <linux/mmu_notifier.h>
>>   #include <linux/mempolicy.h>
>>   #include <linux/swap.h>
>>   #include <linux/sched/mm.h>
>> @@ -15,373 +44,121 @@
>>   #include "i915_gem_object.h"
>>   #include "i915_scatterlist.h"
>>   
>> -#if defined(CONFIG_MMU_NOTIFIER)
>> -
>> -struct i915_mm_struct {
>> -	struct mm_struct *mm;
>> -	struct drm_i915_private *i915;
>> -	struct i915_mmu_notifier *mn;
>> -	struct hlist_node node;
>> -	struct kref kref;
>> -	struct rcu_work work;
>> -};
>> -
>> -#include <linux/interval_tree.h>
>> -
>> -struct i915_mmu_notifier {
>> -	spinlock_t lock;
>> -	struct hlist_node node;
>> -	struct mmu_notifier mn;
>> -	struct rb_root_cached objects;
>> -	struct i915_mm_struct *mm;
>> -};
>> -
>> -struct i915_mmu_object {
>> -	struct i915_mmu_notifier *mn;
>> -	struct drm_i915_gem_object *obj;
>> -	struct interval_tree_node it;
>> -};
>> +#ifdef CONFIG_MMU_NOTIFIER
>>   
>> -static void add_object(struct i915_mmu_object *mo)
>> +/**
>> + * i915_gem_userptr_invalidate - callback to notify about mm change
>> + *
>> + * @mni: the range (mm) is about to update
>> + * @range: details on the invalidation
>> + * @cur_seq: Value to pass to mmu_interval_set_seq()
>> + *
>> + * Block for operations on BOs to finish and mark pages as accessed and
>> + * potentially dirty.
>> + */
>> +static bool i915_gem_userptr_invalidate(struct mmu_interval_notifier *mni,
>> +					const struct mmu_notifier_range *range,
>> +					unsigned long cur_seq)
>>   {
>> -	GEM_BUG_ON(!RB_EMPTY_NODE(&mo->it.rb));
>> -	interval_tree_insert(&mo->it, &mo->mn->objects);
>> -}
>> +	struct drm_i915_gem_object *obj = container_of(mni, struct drm_i915_gem_object, userptr.notifier);
>> +	struct drm_i915_private *i915 = to_i915(obj->base.dev);
>> +	long r;
>>   
>> -static void del_object(struct i915_mmu_object *mo)
>> -{
>> -	if (RB_EMPTY_NODE(&mo->it.rb))
>> -		return;
>> +	if (!mmu_notifier_range_blockable(range))
>> +		return false;
>>   
>> -	interval_tree_remove(&mo->it, &mo->mn->objects);
>> -	RB_CLEAR_NODE(&mo->it.rb);
>> -}
>> +	spin_lock(&i915->mm.notifier_lock);
>>   
>> -static void
>> -__i915_gem_userptr_set_active(struct drm_i915_gem_object *obj, bool value)
>> -{
>> -	struct i915_mmu_object *mo = obj->userptr.mmu_object;
>> +	mmu_interval_set_seq(mni, cur_seq);
>> +
>> +	spin_unlock(&i915->mm.notifier_lock);
>>   
>>   	/*
>> -	 * During mm_invalidate_range we need to cancel any userptr that
>> -	 * overlaps the range being invalidated. Doing so requires the
>> -	 * struct_mutex, and that risks recursion. In order to cause
>> -	 * recursion, the user must alias the userptr address space with
>> -	 * a GTT mmapping (possible with a MAP_FIXED) - then when we have
>> -	 * to invalidate that mmaping, mm_invalidate_range is called with
>> -	 * the userptr address *and* the struct_mutex held.  To prevent that
>> -	 * we set a flag under the i915_mmu_notifier spinlock to indicate
>> -	 * whether this object is valid.
>> +	 * We don't wait when the process is exiting. This is valid
>> +	 * because the object will be cleaned up anyway.
>> +	 *
>> +	 * This is also temporarily required as a hack, because we
>> +	 * cannot currently force non-consistent batch buffers to preempt
>> +	 * and reschedule by waiting on it, hanging processes on exit.
>>   	 */
>> -	if (!mo)
>> -		return;
>> -
>> -	spin_lock(&mo->mn->lock);
>> -	if (value)
>> -		add_object(mo);
>> -	else
>> -		del_object(mo);
>> -	spin_unlock(&mo->mn->lock);
>> -}
>> -
>> -static int
>> -userptr_mn_invalidate_range_start(struct mmu_notifier *_mn,
>> -				  const struct mmu_notifier_range *range)
>> -{
>> -	struct i915_mmu_notifier *mn =
>> -		container_of(_mn, struct i915_mmu_notifier, mn);
>> -	struct interval_tree_node *it;
>> -	unsigned long end;
>> -	int ret = 0;
>> -
>> -	if (RB_EMPTY_ROOT(&mn->objects.rb_root))
>> -		return 0;
>> -
>> -	/* interval ranges are inclusive, but invalidate range is exclusive */
>> -	end = range->end - 1;
>> -
>> -	spin_lock(&mn->lock);
>> -	it = interval_tree_iter_first(&mn->objects, range->start, end);
>> -	while (it) {
>> -		struct drm_i915_gem_object *obj;
>> -
>> -		if (!mmu_notifier_range_blockable(range)) {
>> -			ret = -EAGAIN;
>> -			break;
>> -		}
>> -
>> -		/*
>> -		 * The mmu_object is released late when destroying the
>> -		 * GEM object so it is entirely possible to gain a
>> -		 * reference on an object in the process of being freed
>> -		 * since our serialisation is via the spinlock and not
>> -		 * the struct_mutex - and consequently use it after it
>> -		 * is freed and then double free it. To prevent that
>> -		 * use-after-free we only acquire a reference on the
>> -		 * object if it is not in the process of being destroyed.
>> -		 */
>> -		obj = container_of(it, struct i915_mmu_object, it)->obj;
>> -		if (!kref_get_unless_zero(&obj->base.refcount)) {
>> -			it = interval_tree_iter_next(it, range->start, end);
>> -			continue;
>> -		}
>> -		spin_unlock(&mn->lock);
>> -
>> -		ret = i915_gem_object_unbind(obj,
>> -					     I915_GEM_OBJECT_UNBIND_ACTIVE |
>> -					     I915_GEM_OBJECT_UNBIND_BARRIER);
>> -		if (ret == 0)
>> -			ret = __i915_gem_object_put_pages(obj);
>> -		i915_gem_object_put(obj);
>> -		if (ret)
>> -			return ret;
>> +	if (current->flags & PF_EXITING)
>> +		return true;
>>   
>> -		spin_lock(&mn->lock);
>> -
>> -		/*
>> -		 * As we do not (yet) protect the mmu from concurrent insertion
>> -		 * over this range, there is no guarantee that this search will
>> -		 * terminate given a pathologic workload.
>> -		 */
>> -		it = interval_tree_iter_first(&mn->objects, range->start, end);
>> -	}
>> -	spin_unlock(&mn->lock);
>> -
>> -	return ret;
>> +	/* we will unbind on next submission, still have userptr pins */
>> +	r = dma_resv_wait_timeout_rcu(obj->base.resv, true, false,
>> +				      MAX_SCHEDULE_TIMEOUT);
>> +	if (r <= 0)
>> +		drm_err(&i915->drm, "(%ld) failed to wait for idle\n", r);
>>   
>> +	return true;
>>   }
>>   
>> -static const struct mmu_notifier_ops i915_gem_userptr_notifier = {
>> -	.invalidate_range_start = userptr_mn_invalidate_range_start,
>> +static const struct mmu_interval_notifier_ops i915_gem_userptr_notifier_ops = {
>> +	.invalidate = i915_gem_userptr_invalidate,
>>   };
>>   
>> -static struct i915_mmu_notifier *
>> -i915_mmu_notifier_create(struct i915_mm_struct *mm)
>> -{
>> -	struct i915_mmu_notifier *mn;
>> -
>> -	mn = kmalloc(sizeof(*mn), GFP_KERNEL);
>> -	if (mn == NULL)
>> -		return ERR_PTR(-ENOMEM);
>> -
>> -	spin_lock_init(&mn->lock);
>> -	mn->mn.ops = &i915_gem_userptr_notifier;
>> -	mn->objects = RB_ROOT_CACHED;
>> -	mn->mm = mm;
>> -
>> -	return mn;
>> -}
>> -
>> -static void
>> -i915_gem_userptr_release__mmu_notifier(struct drm_i915_gem_object *obj)
>> -{
>> -	struct i915_mmu_object *mo;
>> -
>> -	mo = fetch_and_zero(&obj->userptr.mmu_object);
>> -	if (!mo)
>> -		return;
>> -
>> -	spin_lock(&mo->mn->lock);
>> -	del_object(mo);
>> -	spin_unlock(&mo->mn->lock);
>> -	kfree(mo);
>> -}
>> -
>> -static struct i915_mmu_notifier *
>> -i915_mmu_notifier_find(struct i915_mm_struct *mm)
>> -{
>> -	struct i915_mmu_notifier *mn, *old;
>> -	int err;
>> -
>> -	mn = READ_ONCE(mm->mn);
>> -	if (likely(mn))
>> -		return mn;
>> -
>> -	mn = i915_mmu_notifier_create(mm);
>> -	if (IS_ERR(mn))
>> -		return mn;
>> -
>> -	err = mmu_notifier_register(&mn->mn, mm->mm);
>> -	if (err) {
>> -		kfree(mn);
>> -		return ERR_PTR(err);
>> -	}
>> -
>> -	old = cmpxchg(&mm->mn, NULL, mn);
>> -	if (old) {
>> -		mmu_notifier_unregister(&mn->mn, mm->mm);
>> -		kfree(mn);
>> -		mn = old;
>> -	}
>> -
>> -	return mn;
>> -}
>> -
>>   static int
>>   i915_gem_userptr_init__mmu_notifier(struct drm_i915_gem_object *obj)
>>   {
>> -	struct i915_mmu_notifier *mn;
>> -	struct i915_mmu_object *mo;
>> -
>> -	if (GEM_WARN_ON(!obj->userptr.mm))
>> -		return -EINVAL;
>> -
>> -	mn = i915_mmu_notifier_find(obj->userptr.mm);
>> -	if (IS_ERR(mn))
>> -		return PTR_ERR(mn);
>> -
>> -	mo = kzalloc(sizeof(*mo), GFP_KERNEL);
>> -	if (!mo)
>> -		return -ENOMEM;
>> -
>> -	mo->mn = mn;
>> -	mo->obj = obj;
>> -	mo->it.start = obj->userptr.ptr;
>> -	mo->it.last = obj->userptr.ptr + obj->base.size - 1;
>> -	RB_CLEAR_NODE(&mo->it.rb);
>> -
>> -	obj->userptr.mmu_object = mo;
>> -	return 0;
>> -}
>> -
>> -static void
>> -i915_mmu_notifier_free(struct i915_mmu_notifier *mn,
>> -		       struct mm_struct *mm)
>> -{
>> -	if (mn == NULL)
>> -		return;
>> -
>> -	mmu_notifier_unregister(&mn->mn, mm);
>> -	kfree(mn);
>> -}
>> -
>> -
>> -static struct i915_mm_struct *
>> -__i915_mm_struct_find(struct drm_i915_private *i915, struct mm_struct *real)
>> -{
>> -	struct i915_mm_struct *it, *mm = NULL;
>> -
>> -	rcu_read_lock();
>> -	hash_for_each_possible_rcu(i915->mm_structs,
>> -				   it, node,
>> -				   (unsigned long)real)
>> -		if (it->mm == real && kref_get_unless_zero(&it->kref)) {
>> -			mm = it;
>> -			break;
>> -		}
>> -	rcu_read_unlock();
>> -
>> -	return mm;
>> +	return mmu_interval_notifier_insert(&obj->userptr.notifier, current->mm,
>> +					    obj->userptr.ptr, obj->base.size,
>> +					    &i915_gem_userptr_notifier_ops);
>>   }
>>   
>> -static int
>> -i915_gem_userptr_init__mm_struct(struct drm_i915_gem_object *obj)
>> +static void i915_gem_object_userptr_drop_ref(struct drm_i915_gem_object *obj)
>>   {
>>   	struct drm_i915_private *i915 = to_i915(obj->base.dev);
>> -	struct i915_mm_struct *mm, *new;
>> -	int ret = 0;
>> -
>> -	/* During release of the GEM object we hold the struct_mutex. This
>> -	 * precludes us from calling mmput() at that time as that may be
>> -	 * the last reference and so call exit_mmap(). exit_mmap() will
>> -	 * attempt to reap the vma, and if we were holding a GTT mmap
>> -	 * would then call drm_gem_vm_close() and attempt to reacquire
>> -	 * the struct mutex. So in order to avoid that recursion, we have
>> -	 * to defer releasing the mm reference until after we drop the
>> -	 * struct_mutex, i.e. we need to schedule a worker to do the clean
>> -	 * up.
>> -	 */
>> -	mm = __i915_mm_struct_find(i915, current->mm);
>> -	if (mm)
>> -		goto out;
>> +	struct page **pvec = NULL;
>>   
>> -	new = kmalloc(sizeof(*mm), GFP_KERNEL);
>> -	if (!new)
>> -		return -ENOMEM;
>> -
>> -	kref_init(&new->kref);
>> -	new->i915 = to_i915(obj->base.dev);
>> -	new->mm = current->mm;
>> -	new->mn = NULL;
>> -
>> -	spin_lock(&i915->mm_lock);
>> -	mm = __i915_mm_struct_find(i915, current->mm);
>> -	if (!mm) {
>> -		hash_add_rcu(i915->mm_structs,
>> -			     &new->node,
>> -			     (unsigned long)new->mm);
>> -		mmgrab(current->mm);
>> -		mm = new;
>> +	spin_lock(&i915->mm.notifier_lock);
>> +	if (!--obj->userptr.page_ref) {
>> +		pvec = obj->userptr.pvec;
>> +		obj->userptr.pvec = NULL;
>>   	}
>> -	spin_unlock(&i915->mm_lock);
>> -	if (mm != new)
>> -		kfree(new);
>> +	GEM_BUG_ON(obj->userptr.page_ref < 0);
>> +	spin_unlock(&i915->mm.notifier_lock);
>>   
>> -out:
>> -	obj->userptr.mm = mm;
>> -	return ret;
>> -}
>> -
>> -static void
>> -__i915_mm_struct_free__worker(struct work_struct *work)
>> -{
>> -	struct i915_mm_struct *mm = container_of(work, typeof(*mm), work.work);
>> -
>> -	i915_mmu_notifier_free(mm->mn, mm->mm);
>> -	mmdrop(mm->mm);
>> -	kfree(mm);
>> -}
>> -
>> -static void
>> -__i915_mm_struct_free(struct kref *kref)
>> -{
>> -	struct i915_mm_struct *mm = container_of(kref, typeof(*mm), kref);
>> -
>> -	spin_lock(&mm->i915->mm_lock);
>> -	hash_del_rcu(&mm->node);
>> -	spin_unlock(&mm->i915->mm_lock);
>> -
>> -	INIT_RCU_WORK(&mm->work, __i915_mm_struct_free__worker);
>> -	queue_rcu_work(system_wq, &mm->work);
>> -}
>> -
>> -static void
>> -i915_gem_userptr_release__mm_struct(struct drm_i915_gem_object *obj)
>> -{
>> -	if (obj->userptr.mm == NULL)
>> -		return;
>> +	if (pvec) {
>> +		const unsigned long num_pages = obj->base.size >> PAGE_SHIFT;
>>   
>> -	kref_put(&obj->userptr.mm->kref, __i915_mm_struct_free);
>> -	obj->userptr.mm = NULL;
>> +		unpin_user_pages(pvec, num_pages);
>> +		kfree(pvec);
> This one needs to be kvfree apparently, per review from previous round.
> Can you pls do a fixup patch? I'm already merging.
>
> For the other pending review comments and suggestions I think it's better
> if we handle this in a separate patch series to highlight all the
> questions, and ideally with amdgpu folks involved too.
> -Daniel

Sounds fair enough,

So with the kfree->kvfree fixed,

Reviewed-by: Thomas Hellström <thomas.hellstrom at linux.intel.com>