[Intel-gfx] [PATCH 2/2] drm/i915: Remove the vm open count

Wed Mar 2 03:45:44 UTC 2022

On Tue, Feb 22, 2022 at 06:10:30PM +0100, Thomas Hellström wrote:
>vms are not getting properly closed. Rather than fixing that,
>Remove the vm open count and instead rely on the vm refcount.
>
>The vm open count existed solely to break the strong references the
>vmas had on the vms. Now instead make those references weak and
>ensure vmas are destroyed when the vm is destroyed.
>
>Unfortunately if the vm destructor and the object destructor both
>wants to destroy a vma, that may lead to a race in that the vm
>destructor just unbinds the vma and leaves the actual vma destruction
>to the object destructor. However in order for the object destructor
>to ensure the vma is unbound it needs to grab the vm mutex. In order
>to keep the vm mutex alive until the object destructor is done with
>it, somewhat hackishly grab a vm_resv refcount that is released late
>in the vma destruction process, when the vm mutex is no longer needed.
>
>Cc: <dri-devel at lists.freedesktop.org>
>Co-developed-by: Niranjana Vishwanathapura <niranjana.vishwanathapura at intel.com>
>Signed-off-by: Niranjana Vishwanathapura <niranjana.vishwanathapura at intel.com>
>Signed-off-by: Thomas Hellström <thomas.hellstrom at linux.intel.com>
>---
> drivers/gpu/drm/i915/display/intel_dpt.c      |  2 +-
> drivers/gpu/drm/i915/gem/i915_gem_context.c   | 29 ++-----
> .../gpu/drm/i915/gem/i915_gem_execbuffer.c    |  6 ++
> .../gpu/drm/i915/gem/selftests/mock_context.c |  5 +-
> drivers/gpu/drm/i915/gt/gen6_ppgtt.c          |  2 +-
> drivers/gpu/drm/i915/gt/intel_ggtt.c          | 25 ++----
> drivers/gpu/drm/i915/gt/intel_gtt.c           | 48 ++++++++---
> drivers/gpu/drm/i915/gt/intel_gtt.h           | 56 ++++--------
> drivers/gpu/drm/i915/gt/selftest_execlists.c  | 86 +++++++++----------
> drivers/gpu/drm/i915/i915_gem.c               |  6 +-
> drivers/gpu/drm/i915/i915_vma.c               | 55 ++++++++----
> drivers/gpu/drm/i915/i915_vma_resource.c      |  2 +-
> drivers/gpu/drm/i915/i915_vma_resource.h      |  6 ++
> drivers/gpu/drm/i915/i915_vma_types.h         |  7 ++
> drivers/gpu/drm/i915/selftests/i915_gem_gtt.c |  4 +-
> 15 files changed, 179 insertions(+), 160 deletions(-)
>
>diff --git a/drivers/gpu/drm/i915/display/intel_dpt.c b/drivers/gpu/drm/i915/display/intel_dpt.c
>index c2f8f853db90..6920669bc571 100644
>--- a/drivers/gpu/drm/i915/display/intel_dpt.c
>+++ b/drivers/gpu/drm/i915/display/intel_dpt.c
>@@ -298,5 +298,5 @@ void intel_dpt_destroy(struct i915_address_space *vm)
> {
> 	struct i915_dpt *dpt = i915_vm_to_dpt(vm);
>
>-	i915_vm_close(&dpt->vm);
>+	i915_vm_put(&dpt->vm);
> }
>diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c
>index ebbac2ea0833..41404f043741 100644
>--- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
>+++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
>@@ -1440,8 +1440,6 @@ static void set_closed_name(struct i915_gem_context *ctx)
>
> static void context_close(struct i915_gem_context *ctx)
> {
>-	struct i915_address_space *vm;
>-
> 	/* Flush any concurrent set_engines() */
> 	mutex_lock(&ctx->engines_mutex);
> 	unpin_engines(__context_engines_static(ctx));
>@@ -1453,19 +1451,6 @@ static void context_close(struct i915_gem_context *ctx)
>
> 	set_closed_name(ctx);
>
>-	vm = ctx->vm;
>-	if (vm) {
>-		/* i915_vm_close drops the final reference, which is a bit too
>-		 * early and could result in surprises with concurrent
>-		 * operations racing with thist ctx close. Keep a full reference
>-		 * until the end.
>-		 */
>-		i915_vm_get(vm);
>-		i915_vm_close(vm);
>-	}
>-
>-	ctx->file_priv = ERR_PTR(-EBADF);
>-
> 	/*
> 	 * The LUT uses the VMA as a backpointer to unref the object,
> 	 * so we need to clear the LUT before we close all the VMA (inside
>@@ -1473,6 +1458,8 @@ static void context_close(struct i915_gem_context *ctx)
> 	 */
> 	lut_close(ctx);
>
>+	ctx->file_priv = ERR_PTR(-EBADF);
>+
> 	spin_lock(&ctx->i915->gem.contexts.lock);
> 	list_del(&ctx->link);
> 	spin_unlock(&ctx->i915->gem.contexts.lock);
>@@ -1571,12 +1558,8 @@ i915_gem_create_context(struct drm_i915_private *i915,
> 		}
> 		vm = &ppgtt->vm;
> 	}
>-	if (vm) {
>-		ctx->vm = i915_vm_open(vm);
>-
>-		/* i915_vm_open() takes a reference */
>-		i915_vm_put(vm);
>-	}
>+	if (vm)
>+		ctx->vm = vm;
>
> 	mutex_init(&ctx->engines_mutex);
> 	if (pc->num_user_engines >= 0) {
>@@ -1626,7 +1609,7 @@ i915_gem_create_context(struct drm_i915_private *i915,
> 	free_engines(e);
> err_vm:
> 	if (ctx->vm)
>-		i915_vm_close(ctx->vm);
>+		i915_vm_put(ctx->vm);
> err_ctx:
> 	kfree(ctx);
> 	return ERR_PTR(err);
>@@ -1810,7 +1793,7 @@ static int get_ppgtt(struct drm_i915_file_private *file_priv,
> 	if (err)
> 		return err;
>
>-	i915_vm_open(vm);
>+	i915_vm_get(vm);
>
> 	GEM_BUG_ON(id == 0); /* reserved for invalid/unassigned ppgtt */
> 	args->value = id;
>diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
>index ae6805b37806..4a0af90546cf 100644
>--- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
>+++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
>@@ -2688,6 +2688,11 @@ eb_select_engine(struct i915_execbuffer *eb)
> 	if (err)
> 		goto err;
>
>+	if (!i915_vm_tryget(ce->vm)) {
>+		err = -ENOENT;
>+		goto err;
>+	}
>+
> 	eb->context = ce;
> 	eb->gt = ce->engine->gt;
>
>@@ -2711,6 +2716,7 @@ eb_put_engine(struct i915_execbuffer *eb)
> {
> 	struct intel_context *child;
>
>+	i915_vm_put(eb->context->vm);
> 	intel_gt_pm_put(eb->gt);
> 	for_each_child(eb->context, child)
> 		intel_context_put(child);
>diff --git a/drivers/gpu/drm/i915/gem/selftests/mock_context.c b/drivers/gpu/drm/i915/gem/selftests/mock_context.c
>index c0a8ef368044..5675b04dfa33 100644
>--- a/drivers/gpu/drm/i915/gem/selftests/mock_context.c
>+++ b/drivers/gpu/drm/i915/gem/selftests/mock_context.c
>@@ -41,8 +41,7 @@ mock_context(struct drm_i915_private *i915,
> 		if (!ppgtt)
> 			goto err_free;
>
>-		ctx->vm = i915_vm_open(&ppgtt->vm);
>-		i915_vm_put(&ppgtt->vm);
>+		ctx->vm = &ppgtt->vm;
> 	}
>
> 	mutex_init(&ctx->engines_mutex);
>@@ -58,7 +57,7 @@ mock_context(struct drm_i915_private *i915,
>
> err_vm:
> 	if (ctx->vm)
>-		i915_vm_close(ctx->vm);
>+		i915_vm_put(ctx->vm);
> err_free:
> 	kfree(ctx);
> 	return NULL;
>diff --git a/drivers/gpu/drm/i915/gt/gen6_ppgtt.c b/drivers/gpu/drm/i915/gt/gen6_ppgtt.c
>index d657ffd6c86a..b40c965cfae0 100644
>--- a/drivers/gpu/drm/i915/gt/gen6_ppgtt.c
>+++ b/drivers/gpu/drm/i915/gt/gen6_ppgtt.c
>@@ -318,7 +318,7 @@ int gen6_ppgtt_pin(struct i915_ppgtt *base, struct i915_gem_ww_ctx *ww)
> 	struct gen6_ppgtt *ppgtt = to_gen6_ppgtt(base);
> 	int err;
>
>-	GEM_BUG_ON(!atomic_read(&ppgtt->base.vm.open));
>+	GEM_BUG_ON(!kref_read(&ppgtt->base.vm.ref));
>
> 	/*
> 	 * Workaround the limited maximum vma->pin_count and the aliasing_ppgtt
>diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c b/drivers/gpu/drm/i915/gt/intel_ggtt.c
>index 536b0995b595..cb694fe8586e 100644
>--- a/drivers/gpu/drm/i915/gt/intel_ggtt.c
>+++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c
>@@ -125,7 +125,7 @@ static bool needs_idle_maps(struct drm_i915_private *i915)
> void i915_ggtt_suspend_vm(struct i915_address_space *vm)
> {
> 	struct i915_vma *vma, *vn;
>-	int open;
>+	int save_skip_rewrite;
>
> 	drm_WARN_ON(&vm->i915->drm, !vm->is_ggtt && !vm->is_dpt);
>
>@@ -135,7 +135,8 @@ void i915_ggtt_suspend_vm(struct i915_address_space *vm)
> 	mutex_lock(&vm->mutex);
>
> 	/* Skip rewriting PTE on VMA unbind. */
>-	open = atomic_xchg(&vm->open, 0);
>+	save_skip_rewrite = vm->skip_pte_rewrite;
>+	vm->skip_pte_rewrite = true;
>
> 	list_for_each_entry_safe(vma, vn, &vm->bound_list, vm_link) {
> 		struct drm_i915_gem_object *obj = vma->obj;
>@@ -153,16 +154,14 @@ void i915_ggtt_suspend_vm(struct i915_address_space *vm)
> 			 */
> 			i915_gem_object_get(obj);
>
>-			atomic_set(&vm->open, open);
> 			mutex_unlock(&vm->mutex);
>
> 			i915_gem_object_lock(obj, NULL);
>-			open = i915_vma_unbind(vma);
>+			GEM_WARN_ON(i915_vma_unbind(vma));
> 			i915_gem_object_unlock(obj);
>-
>-			GEM_WARN_ON(open);
>-
> 			i915_gem_object_put(obj);
>+
>+			vm->skip_pte_rewrite = save_skip_rewrite;

This skip_pte_rewrite method to convey information to i915_vma_unbind()
seems bit hacky to me. But the earlier vm->open setting/resetting here
was also hacky anyway. So, should be Ok.
Any thoughts on how to clean it up in future?

> 			goto retry;
> 		}
>
>@@ -178,7 +177,7 @@ void i915_ggtt_suspend_vm(struct i915_address_space *vm)
>
> 	vm->clear_range(vm, 0, vm->total);
>
>-	atomic_set(&vm->open, open);
>+	vm->skip_pte_rewrite = save_skip_rewrite;
>
> 	mutex_unlock(&vm->mutex);
> }
>@@ -772,13 +771,13 @@ static void ggtt_cleanup_hw(struct i915_ggtt *ggtt)
> {
> 	struct i915_vma *vma, *vn;
>
>-	atomic_set(&ggtt->vm.open, 0);
>-
> 	flush_workqueue(ggtt->vm.i915->wq);
> 	i915_gem_drain_freed_objects(ggtt->vm.i915);
>
> 	mutex_lock(&ggtt->vm.mutex);
>
>+	ggtt->vm.skip_pte_rewrite = true;
>+
> 	list_for_each_entry_safe(vma, vn, &ggtt->vm.bound_list, vm_link) {
> 		struct drm_i915_gem_object *obj = vma->obj;
> 		bool trylock;
>@@ -1306,16 +1305,12 @@ bool i915_ggtt_resume_vm(struct i915_address_space *vm)
> {
> 	struct i915_vma *vma;
> 	bool write_domain_objs = false;
>-	int open;
>
> 	drm_WARN_ON(&vm->i915->drm, !vm->is_ggtt && !vm->is_dpt);
>
> 	/* First fill our portion of the GTT with scratch pages */
> 	vm->clear_range(vm, 0, vm->total);
>
>-	/* Skip rewriting PTE on VMA unbind. */
>-	open = atomic_xchg(&vm->open, 0);
>-
> 	/* clflush objects bound into the GGTT and rebind them. */
> 	list_for_each_entry(vma, &vm->bound_list, vm_link) {
> 		struct drm_i915_gem_object *obj = vma->obj;
>@@ -1332,8 +1327,6 @@ bool i915_ggtt_resume_vm(struct i915_address_space *vm)
> 		}
> 	}
>
>-	atomic_set(&vm->open, open);
>-
> 	return write_domain_objs;
> }
>
>diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.c b/drivers/gpu/drm/i915/gt/intel_gtt.c
>index 4363848f7411..ff402653938a 100644
>--- a/drivers/gpu/drm/i915/gt/intel_gtt.c
>+++ b/drivers/gpu/drm/i915/gt/intel_gtt.c
>@@ -95,32 +95,52 @@ int map_pt_dma_locked(struct i915_address_space *vm, struct drm_i915_gem_object
> 	return 0;
> }
>
>-void __i915_vm_close(struct i915_address_space *vm)
>+static void clear_vm_list(struct list_head *list)
> {
> 	struct i915_vma *vma, *vn;
>
>-	if (!atomic_dec_and_mutex_lock(&vm->open, &vm->mutex))
>-		return;
>-
>-	list_for_each_entry_safe(vma, vn, &vm->bound_list, vm_link) {
>+	list_for_each_entry_safe(vma, vn, list, vm_link) {
> 		struct drm_i915_gem_object *obj = vma->obj;
>
> 		if (!kref_get_unless_zero(&obj->base.refcount)) {
> 			/*
> 			 * Unbind the dying vma to ensure the bound_list
> 			 * is completely drained. We leave the destruction to
>-			 * the object destructor.
>+			 * the object destructor to avoid the vma
>+			 * disappearing under it.
> 			 */
> 			atomic_and(~I915_VMA_PIN_MASK, &vma->flags);
> 			WARN_ON(__i915_vma_unbind(vma));
>+
>+			/* Remove from the unbound list */
>+			list_del_init(&vma->vm_link);

Looks like it gets deleted from the unbind list during i915_vma_destroy.
Why do we need to do it here?

>+
>+			/*
>+			 * Delay the vm and vm mutex freeing until the
>+			 * object is done with destruction.
>+			 */
>+			i915_vm_resv_get(vma->vm);
>+			vma->vm_ddestroy = true;
>+

I am wondering if we really need this vm_ddestroy mechanism.
Can we call i915_gem_object_put() after i915_vm_put() in i915_vma_parked?
Are there other call flows that can cause this?

> 			continue;
>+		} else {
>+			i915_vma_destroy_locked(vma);
>+			i915_gem_object_put(obj);
> 		}
>
>-		/* Keep the obj (and hence the vma) alive as _we_ destroy it */
>-		i915_vma_destroy_locked(vma);
>-		i915_gem_object_put(obj);

We don't need to shove it under the else as the 'if' statement has a 'continue'
at the end. Checkpatch suggests that if there were a 'return' at the end of 'if'
statement (not sure about 'continue').

> 	}
>+}
>+
>+static void __i915_vm_close(struct i915_address_space *vm)
>+{
>+	mutex_lock(&vm->mutex);
>+
>+	clear_vm_list(&vm->bound_list);
>+	clear_vm_list(&vm->unbound_list);
>+
>+	/* Check for must-fix unanticipated side-effects */
> 	GEM_BUG_ON(!list_empty(&vm->bound_list));
>+	GEM_BUG_ON(!list_empty(&vm->unbound_list));
>
> 	mutex_unlock(&vm->mutex);
> }
>@@ -142,7 +162,6 @@ int i915_vm_lock_objects(struct i915_address_space *vm,
> void i915_address_space_fini(struct i915_address_space *vm)
> {
> 	drm_mm_takedown(&vm->mm);
>-	mutex_destroy(&vm->mutex);
> }
>
> /**
>@@ -150,7 +169,8 @@ void i915_address_space_fini(struct i915_address_space *vm)
>  * @kref: Pointer to the &i915_address_space.resv_ref member.
>  *
>  * This function is called when the last lock sharer no longer shares the
>- * &i915_address_space._resv lock.
>+ * &i915_address_space._resv lock, and also if we raced when
>+ * destroying a vma by the vma destruction
>  */
> void i915_vm_resv_release(struct kref *kref)
> {
>@@ -158,6 +178,8 @@ void i915_vm_resv_release(struct kref *kref)
> 		container_of(kref, typeof(*vm), resv_ref);
>
> 	dma_resv_fini(&vm->_resv);
>+	mutex_destroy(&vm->mutex);
>+
> 	kfree(vm);
> }
>
>@@ -166,6 +188,8 @@ static void __i915_vm_release(struct work_struct *work)
> 	struct i915_address_space *vm =
> 		container_of(work, struct i915_address_space, release_work);
>
>+	__i915_vm_close(vm);
>+
> 	/* Synchronize async unbinds. */
> 	i915_vma_resource_bind_dep_sync_all(vm);
>
>@@ -199,7 +223,6 @@ void i915_address_space_init(struct i915_address_space *vm, int subclass)
>
> 	vm->pending_unbind = RB_ROOT_CACHED;
> 	INIT_WORK(&vm->release_work, __i915_vm_release);
>-	atomic_set(&vm->open, 1);
>
> 	/*
> 	 * The vm->mutex must be reclaim safe (for use in the shrinker).
>@@ -243,6 +266,7 @@ void i915_address_space_init(struct i915_address_space *vm, int subclass)
> 	vm->mm.head_node.color = I915_COLOR_UNEVICTABLE;
>
> 	INIT_LIST_HEAD(&vm->bound_list);
>+	INIT_LIST_HEAD(&vm->unbound_list);
> }
>
> void *__px_vaddr(struct drm_i915_gem_object *p)
>diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.h b/drivers/gpu/drm/i915/gt/intel_gtt.h
>index 9d83c2d3959c..4529b5e9f6e6 100644
>--- a/drivers/gpu/drm/i915/gt/intel_gtt.h
>+++ b/drivers/gpu/drm/i915/gt/intel_gtt.h
>@@ -240,15 +240,6 @@ struct i915_address_space {
>
> 	unsigned int bind_async_flags;
>
>-	/*
>-	 * Each active user context has its own address space (in full-ppgtt).
>-	 * Since the vm may be shared between multiple contexts, we count how
>-	 * many contexts keep us "open". Once open hits zero, we are closed
>-	 * and do not allow any new attachments, and proceed to shutdown our
>-	 * vma and page directories.
>-	 */
>-	atomic_t open;
>-
> 	struct mutex mutex; /* protects vma and our lists */
>
> 	struct kref resv_ref; /* kref to keep the reservation lock alive. */
>@@ -263,6 +254,11 @@ struct i915_address_space {
> 	 */
> 	struct list_head bound_list;
>
>+	/**
>+	 * List of vmas not yet bound or evicted.
>+	 */
>+	struct list_head unbound_list;
>+
> 	/* Global GTT */
> 	bool is_ggtt:1;
>
>@@ -272,6 +268,9 @@ struct i915_address_space {
> 	/* Some systems support read-only mappings for GGTT and/or PPGTT */
> 	bool has_read_only:1;
>
>+	/* Skip pte rewrite on unbind for suspend. Protected by @mutex */
>+	bool skip_pte_rewrite:1;
>+
> 	u8 top;
> 	u8 pd_shift;
> 	u8 scratch_order;
>@@ -446,6 +445,17 @@ i915_vm_get(struct i915_address_space *vm)
> 	return vm;
> }
>
>+static inline struct i915_address_space *
>+i915_vm_tryget(struct i915_address_space *vm)
>+{
>+	return kref_get_unless_zero(&vm->ref) ? vm : NULL;
>+}
>+
>+static inline void assert_vm_alive(struct i915_address_space *vm)
>+{
>+	GEM_BUG_ON(!kref_read(&vm->ref));
>+}
>+
> /**
>  * i915_vm_resv_get - Obtain a reference on the vm's reservation lock
>  * @vm: The vm whose reservation lock we want to share.
>@@ -476,34 +486,6 @@ static inline void i915_vm_resv_put(struct i915_address_space *vm)
> 	kref_put(&vm->resv_ref, i915_vm_resv_release);
> }
>
>-static inline struct i915_address_space *
>-i915_vm_open(struct i915_address_space *vm)
>-{
>-	GEM_BUG_ON(!atomic_read(&vm->open));
>-	atomic_inc(&vm->open);
>-	return i915_vm_get(vm);
>-}
>-
>-static inline bool
>-i915_vm_tryopen(struct i915_address_space *vm)
>-{
>-	if (atomic_add_unless(&vm->open, 1, 0))
>-		return i915_vm_get(vm);
>-
>-	return false;
>-}
>-
>-void __i915_vm_close(struct i915_address_space *vm);
>-
>-static inline void
>-i915_vm_close(struct i915_address_space *vm)
>-{
>-	GEM_BUG_ON(!atomic_read(&vm->open));
>-	__i915_vm_close(vm);
>-
>-	i915_vm_put(vm);
>-}
>-
> void i915_address_space_init(struct i915_address_space *vm, int subclass);
> void i915_address_space_fini(struct i915_address_space *vm);
>
>diff --git a/drivers/gpu/drm/i915/gt/selftest_execlists.c b/drivers/gpu/drm/i915/gt/selftest_execlists.c
>index e10da897e07a..401f71973238 100644
>--- a/drivers/gpu/drm/i915/gt/selftest_execlists.c
>+++ b/drivers/gpu/drm/i915/gt/selftest_execlists.c
>@@ -1735,15 +1735,9 @@ static int live_preempt(void *arg)
> 	enum intel_engine_id id;
> 	int err = -ENOMEM;
>
>-	if (igt_spinner_init(&spin_hi, gt))
>-		return -ENOMEM;
>-
>-	if (igt_spinner_init(&spin_lo, gt))
>-		goto err_spin_hi;
>-
> 	ctx_hi = kernel_context(gt->i915, NULL);
> 	if (!ctx_hi)
>-		goto err_spin_lo;
>+		return -ENOMEM;
> 	ctx_hi->sched.priority = I915_CONTEXT_MAX_USER_PRIORITY;
>
> 	ctx_lo = kernel_context(gt->i915, NULL);
>@@ -1751,6 +1745,12 @@ static int live_preempt(void *arg)
> 		goto err_ctx_hi;
> 	ctx_lo->sched.priority = I915_CONTEXT_MIN_USER_PRIORITY;
>
>+	if (igt_spinner_init(&spin_hi, gt))
>+		goto err_ctx_lo;
>+
>+	if (igt_spinner_init(&spin_lo, gt))
>+		goto err_spin_hi;
>+
> 	for_each_engine(engine, gt, id) {
> 		struct igt_live_test t;
> 		struct i915_request *rq;
>@@ -1760,14 +1760,14 @@ static int live_preempt(void *arg)
>
> 		if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
> 			err = -EIO;
>-			goto err_ctx_lo;
>+			goto err_spin_lo;
> 		}
>
> 		rq = spinner_create_request(&spin_lo, ctx_lo, engine,
> 					    MI_ARB_CHECK);
> 		if (IS_ERR(rq)) {
> 			err = PTR_ERR(rq);
>-			goto err_ctx_lo;
>+			goto err_spin_lo;
> 		}
>
> 		i915_request_add(rq);
>@@ -1776,7 +1776,7 @@ static int live_preempt(void *arg)
> 			GEM_TRACE_DUMP();
> 			intel_gt_set_wedged(gt);
> 			err = -EIO;
>-			goto err_ctx_lo;
>+			goto err_spin_lo;
> 		}
>
> 		rq = spinner_create_request(&spin_hi, ctx_hi, engine,
>@@ -1784,7 +1784,7 @@ static int live_preempt(void *arg)
> 		if (IS_ERR(rq)) {
> 			igt_spinner_end(&spin_lo);
> 			err = PTR_ERR(rq);
>-			goto err_ctx_lo;
>+			goto err_spin_lo;
> 		}
>
> 		i915_request_add(rq);
>@@ -1793,7 +1793,7 @@ static int live_preempt(void *arg)
> 			GEM_TRACE_DUMP();
> 			intel_gt_set_wedged(gt);
> 			err = -EIO;
>-			goto err_ctx_lo;
>+			goto err_spin_lo;
> 		}
>
> 		igt_spinner_end(&spin_hi);
>@@ -1801,19 +1801,19 @@ static int live_preempt(void *arg)
>
> 		if (igt_live_test_end(&t)) {
> 			err = -EIO;
>-			goto err_ctx_lo;
>+			goto err_spin_lo;
> 		}
> 	}
>
> 	err = 0;
>-err_ctx_lo:
>-	kernel_context_close(ctx_lo);
>-err_ctx_hi:
>-	kernel_context_close(ctx_hi);
> err_spin_lo:
> 	igt_spinner_fini(&spin_lo);
> err_spin_hi:
> 	igt_spinner_fini(&spin_hi);
>+err_ctx_lo:
>+	kernel_context_close(ctx_lo);
>+err_ctx_hi:
>+	kernel_context_close(ctx_hi);
> 	return err;
> }
>
>@@ -1827,20 +1827,20 @@ static int live_late_preempt(void *arg)
> 	enum intel_engine_id id;
> 	int err = -ENOMEM;
>
>-	if (igt_spinner_init(&spin_hi, gt))
>-		return -ENOMEM;
>-
>-	if (igt_spinner_init(&spin_lo, gt))
>-		goto err_spin_hi;
>-
> 	ctx_hi = kernel_context(gt->i915, NULL);
> 	if (!ctx_hi)
>-		goto err_spin_lo;
>+		return -ENOMEM;
>
> 	ctx_lo = kernel_context(gt->i915, NULL);
> 	if (!ctx_lo)
> 		goto err_ctx_hi;
>
>+	if (igt_spinner_init(&spin_hi, gt))
>+		goto err_ctx_lo;
>+
>+	if (igt_spinner_init(&spin_lo, gt))
>+		goto err_spin_hi;
>+
> 	/* Make sure ctx_lo stays before ctx_hi until we trigger preemption. */
> 	ctx_lo->sched.priority = 1;
>
>@@ -1853,14 +1853,14 @@ static int live_late_preempt(void *arg)
>
> 		if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
> 			err = -EIO;
>-			goto err_ctx_lo;
>+			goto err_spin_lo;
> 		}
>
> 		rq = spinner_create_request(&spin_lo, ctx_lo, engine,
> 					    MI_ARB_CHECK);
> 		if (IS_ERR(rq)) {
> 			err = PTR_ERR(rq);
>-			goto err_ctx_lo;
>+			goto err_spin_lo;
> 		}
>
> 		i915_request_add(rq);
>@@ -1874,7 +1874,7 @@ static int live_late_preempt(void *arg)
> 		if (IS_ERR(rq)) {
> 			igt_spinner_end(&spin_lo);
> 			err = PTR_ERR(rq);
>-			goto err_ctx_lo;
>+			goto err_spin_lo;
> 		}
>
> 		i915_request_add(rq);
>@@ -1897,19 +1897,19 @@ static int live_late_preempt(void *arg)
>
> 		if (igt_live_test_end(&t)) {
> 			err = -EIO;
>-			goto err_ctx_lo;
>+			goto err_spin_lo;
> 		}
> 	}
>
> 	err = 0;
>-err_ctx_lo:
>-	kernel_context_close(ctx_lo);
>-err_ctx_hi:
>-	kernel_context_close(ctx_hi);
> err_spin_lo:
> 	igt_spinner_fini(&spin_lo);
> err_spin_hi:
> 	igt_spinner_fini(&spin_hi);
>+err_ctx_lo:
>+	kernel_context_close(ctx_lo);
>+err_ctx_hi:
>+	kernel_context_close(ctx_hi);
> 	return err;
>
> err_wedged:
>@@ -1917,7 +1917,7 @@ static int live_late_preempt(void *arg)
> 	igt_spinner_end(&spin_lo);
> 	intel_gt_set_wedged(gt);
> 	err = -EIO;
>-	goto err_ctx_lo;
>+	goto err_spin_lo;
> }
>
> struct preempt_client {
>@@ -3381,12 +3381,9 @@ static int live_preempt_timeout(void *arg)
> 	if (!intel_has_reset_engine(gt))
> 		return 0;
>
>-	if (igt_spinner_init(&spin_lo, gt))
>-		return -ENOMEM;
>-
> 	ctx_hi = kernel_context(gt->i915, NULL);
> 	if (!ctx_hi)
>-		goto err_spin_lo;
>+		return -ENOMEM;
> 	ctx_hi->sched.priority = I915_CONTEXT_MAX_USER_PRIORITY;
>
> 	ctx_lo = kernel_context(gt->i915, NULL);
>@@ -3394,6 +3391,9 @@ static int live_preempt_timeout(void *arg)
> 		goto err_ctx_hi;
> 	ctx_lo->sched.priority = I915_CONTEXT_MIN_USER_PRIORITY;
>
>+	if (igt_spinner_init(&spin_lo, gt))
>+		goto err_ctx_lo;
>+
> 	for_each_engine(engine, gt, id) {
> 		unsigned long saved_timeout;
> 		struct i915_request *rq;
>@@ -3405,21 +3405,21 @@ static int live_preempt_timeout(void *arg)
> 					    MI_NOOP); /* preemption disabled */
> 		if (IS_ERR(rq)) {
> 			err = PTR_ERR(rq);
>-			goto err_ctx_lo;
>+			goto err_spin_lo;
> 		}
>
> 		i915_request_add(rq);
> 		if (!igt_wait_for_spinner(&spin_lo, rq)) {
> 			intel_gt_set_wedged(gt);
> 			err = -EIO;
>-			goto err_ctx_lo;
>+			goto err_spin_lo;
> 		}
>
> 		rq = igt_request_alloc(ctx_hi, engine);
> 		if (IS_ERR(rq)) {
> 			igt_spinner_end(&spin_lo);
> 			err = PTR_ERR(rq);
>-			goto err_ctx_lo;
>+			goto err_spin_lo;
> 		}
>
> 		/* Flush the previous CS ack before changing timeouts */
>@@ -3439,7 +3439,7 @@ static int live_preempt_timeout(void *arg)
> 			intel_gt_set_wedged(gt);
> 			i915_request_put(rq);
> 			err = -ETIME;
>-			goto err_ctx_lo;
>+			goto err_spin_lo;
> 		}
>
> 		igt_spinner_end(&spin_lo);
>@@ -3447,12 +3447,12 @@ static int live_preempt_timeout(void *arg)
> 	}
>
> 	err = 0;
>+err_spin_lo:
>+	igt_spinner_fini(&spin_lo);
> err_ctx_lo:
> 	kernel_context_close(ctx_lo);
> err_ctx_hi:
> 	kernel_context_close(ctx_hi);
>-err_spin_lo:
>-	igt_spinner_fini(&spin_lo);
> 	return err;
> }
>
>diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
>index bb65563296b5..9d5a95dc58e1 100644
>--- a/drivers/gpu/drm/i915/i915_gem.c
>+++ b/drivers/gpu/drm/i915/i915_gem.c
>@@ -138,8 +138,6 @@ int i915_gem_object_unbind(struct drm_i915_gem_object *obj,
> 	while (!ret && (vma = list_first_entry_or_null(&obj->vma.list,
> 						       struct i915_vma,
> 						       obj_link))) {
>-		struct i915_address_space *vm = vma->vm;
>-
> 		list_move_tail(&vma->obj_link, &still_in_list);
> 		if (!i915_vma_is_bound(vma, I915_VMA_BIND_MASK))
> 			continue;
>@@ -150,7 +148,7 @@ int i915_gem_object_unbind(struct drm_i915_gem_object *obj,
> 		}
>
> 		ret = -EAGAIN;
>-		if (!i915_vm_tryopen(vm))
>+		if (!i915_vm_tryget(vma->vm))
> 			break;
>
> 		/* Prevent vma being freed by i915_vma_parked as we unbind */
>@@ -182,7 +180,7 @@ int i915_gem_object_unbind(struct drm_i915_gem_object *obj,
> 			__i915_vma_put(vma);
> 		}
>
>-		i915_vm_close(vm);
>+		i915_vm_put(vma->vm);
> 		spin_lock(&obj->vma.lock);
> 	}
> 	list_splice_init(&still_in_list, &obj->vma.list);
>diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
>index 9c1582a473c6..f67186d0df31 100644
>--- a/drivers/gpu/drm/i915/i915_vma.c
>+++ b/drivers/gpu/drm/i915/i915_vma.c
>@@ -46,7 +46,7 @@ static inline void assert_vma_held_evict(const struct i915_vma *vma)
> 	 * This is the only exception to the requirement of the object lock
> 	 * being held.
> 	 */
>-	if (atomic_read(&vma->vm->open))
>+	if (kref_read(&vma->vm->ref))
> 		assert_object_held_shared(vma->obj);
> }
>
>@@ -112,6 +112,7 @@ vma_create(struct drm_i915_gem_object *obj,
> 	struct i915_vma *pos = ERR_PTR(-E2BIG);
> 	struct i915_vma *vma;
> 	struct rb_node *rb, **p;
>+	int err;
>
> 	/* The aliasing_ppgtt should never be used directly! */
> 	GEM_BUG_ON(vm == &vm->gt->ggtt->alias->vm);
>@@ -121,7 +122,6 @@ vma_create(struct drm_i915_gem_object *obj,
> 		return ERR_PTR(-ENOMEM);
>
> 	kref_init(&vma->ref);
>-	vma->vm = i915_vm_get(vm);
> 	vma->ops = &vm->vma_ops;
> 	vma->obj = obj;
> 	vma->size = obj->base.size;
>@@ -137,6 +137,8 @@ vma_create(struct drm_i915_gem_object *obj,
> 	}
>
> 	INIT_LIST_HEAD(&vma->closed_link);
>+	INIT_LIST_HEAD(&vma->obj_link);
>+	RB_CLEAR_NODE(&vma->obj_node);
>
> 	if (view && view->type != I915_GGTT_VIEW_NORMAL) {
> 		vma->ggtt_view = *view;
>@@ -162,8 +164,16 @@ vma_create(struct drm_i915_gem_object *obj,
>
> 	GEM_BUG_ON(!IS_ALIGNED(vma->size, I915_GTT_PAGE_SIZE));
>
>-	spin_lock(&obj->vma.lock);
>+	err = mutex_lock_interruptible(&vm->mutex);
>+	if (err) {
>+		pos = ERR_PTR(err);
>+		goto err_vma;
>+	}
>
>+	vma->vm = vm;
>+	list_add_tail(&vma->vm_link, &vm->unbound_list);
>+
>+	spin_lock(&obj->vma.lock);
> 	if (i915_is_ggtt(vm)) {
> 		if (unlikely(overflows_type(vma->size, u32)))
> 			goto err_unlock;
>@@ -221,13 +231,15 @@ vma_create(struct drm_i915_gem_object *obj,
> 		list_add_tail(&vma->obj_link, &obj->vma.list);
>
> 	spin_unlock(&obj->vma.lock);
>+	mutex_unlock(&vm->mutex);
>
> 	return vma;
>
> err_unlock:
> 	spin_unlock(&obj->vma.lock);
>+	list_del_init(&vma->vm_link);
>+	mutex_unlock(&vm->mutex);
> err_vma:
>-	i915_vm_put(vm);
> 	i915_vma_free(vma);
> 	return pos;
> }
>@@ -278,7 +290,7 @@ i915_vma_instance(struct drm_i915_gem_object *obj,
> 	struct i915_vma *vma;
>
> 	GEM_BUG_ON(view && !i915_is_ggtt_or_dpt(vm));
>-	GEM_BUG_ON(!atomic_read(&vm->open));
>+	GEM_BUG_ON(!kref_read(&vm->ref));
>
> 	spin_lock(&obj->vma.lock);
> 	vma = i915_vma_lookup(obj, vm, view);
>@@ -321,7 +333,6 @@ static void __vma_release(struct dma_fence_work *work)
> 		i915_gem_object_put(vw->pinned);
>
> 	i915_vm_free_pt_stash(vw->vm, &vw->stash);
>-	i915_vm_put(vw->vm);
> 	if (vw->vma_res)
> 		i915_vma_resource_put(vw->vma_res);
> }
>@@ -837,7 +848,7 @@ i915_vma_insert(struct i915_vma *vma, struct i915_gem_ww_ctx *ww,
> 	GEM_BUG_ON(!drm_mm_node_allocated(&vma->node));
> 	GEM_BUG_ON(!i915_gem_valid_gtt_space(vma, color));
>
>-	list_add_tail(&vma->vm_link, &vma->vm->bound_list);
>+	list_move_tail(&vma->vm_link, &vma->vm->bound_list);
>
> 	return 0;
> }
>@@ -853,7 +864,7 @@ i915_vma_detach(struct i915_vma *vma)
> 	 * vma, we can drop its hold on the backing storage and allow
> 	 * it to be reaped by the shrinker.
> 	 */
>-	list_del(&vma->vm_link);
>+	list_move_tail(&vma->vm_link, &vma->vm->unbound_list);
> }
>
> static bool try_qad_pin(struct i915_vma *vma, unsigned int flags)
>@@ -1314,8 +1325,7 @@ int i915_vma_pin_ww(struct i915_vma *vma, struct i915_gem_ww_ctx *ww,
> 			goto err_rpm;
> 		}
>
>-		work->vm = i915_vm_get(vma->vm);
>-
>+		work->vm = vma->vm;
> 		dma_fence_work_chain(&work->base, moving);
>
> 		/* Allocate enough page directories to used PTE */
>@@ -1563,7 +1573,6 @@ void i915_vma_release(struct kref *ref)
> {
> 	struct i915_vma *vma = container_of(ref, typeof(*vma), ref);
>
>-	i915_vm_put(vma->vm);
> 	i915_active_fini(&vma->active);
> 	GEM_WARN_ON(vma->resource);
> 	i915_vma_free(vma);
>@@ -1579,7 +1588,7 @@ static void force_unbind(struct i915_vma *vma)
> 	GEM_BUG_ON(drm_mm_node_allocated(&vma->node));
> }
>
>-static void release_references(struct i915_vma *vma)
>+static void release_references(struct i915_vma *vma, bool vm_ddestroy)
> {
> 	struct drm_i915_gem_object *obj = vma->obj;
>
>@@ -1589,10 +1598,14 @@ static void release_references(struct i915_vma *vma)
> 	list_del(&vma->obj_link);
> 	if (!RB_EMPTY_NODE(&vma->obj_node))
> 		rb_erase(&vma->obj_node, &obj->vma.tree);
>+
> 	spin_unlock(&obj->vma.lock);
>
> 	__i915_vma_remove_closed(vma);
>
>+	if (vm_ddestroy)
>+		i915_vm_resv_put(vma->vm);
>+
> 	__i915_vma_put(vma);
> }
>
>@@ -1626,15 +1639,21 @@ void i915_vma_destroy_locked(struct i915_vma *vma)
> 	lockdep_assert_held(&vma->vm->mutex);
>
> 	force_unbind(vma);
>-	release_references(vma);
>+	list_del_init(&vma->vm_link);
>+	release_references(vma, false);
> }
>
> void i915_vma_destroy(struct i915_vma *vma)
> {
>+	bool vm_ddestroy;
>+
> 	mutex_lock(&vma->vm->mutex);
> 	force_unbind(vma);
>+	list_del_init(&vma->vm_link);
>+	vm_ddestroy = vma->vm_ddestroy;
>+	vma->vm_ddestroy = false;
> 	mutex_unlock(&vma->vm->mutex);
>-	release_references(vma);
>+	release_references(vma, vm_ddestroy);
> }
>
> void i915_vma_parked(struct intel_gt *gt)
>@@ -1652,7 +1671,7 @@ void i915_vma_parked(struct intel_gt *gt)
> 		if (!kref_get_unless_zero(&obj->base.refcount))
> 			continue;
>
>-		if (!i915_vm_tryopen(vm)) {
>+		if (!i915_vm_tryget(vm)) {
> 			i915_gem_object_put(obj);
> 			continue;
> 		}
>@@ -1678,7 +1697,7 @@ void i915_vma_parked(struct intel_gt *gt)
> 		}
>
> 		i915_gem_object_put(obj);
>-		i915_vm_close(vm);
>+		i915_vm_put(vm);
> 	}
> }
>
>@@ -1829,7 +1848,9 @@ struct dma_fence *__i915_vma_evict(struct i915_vma *vma, bool async)
>
> 	/* If vm is not open, unbind is a nop. */
> 	vma_res->needs_wakeref = i915_vma_is_bound(vma, I915_VMA_GLOBAL_BIND) &&
>-		atomic_read(&vma->vm->open);
>+		kref_read(&vma->vm->ref);
>+	vma_res->skip_pte_rewrite = !kref_read(&vma->vm->ref) ||
>+		vma->vm->skip_pte_rewrite;

So, the idea here page table entries gets cleared during VM release (which is under way),
so we don't have to do it for this vma here?

Niranjana

> 	trace_i915_vma_unbind(vma);
>
> 	unbind_fence = i915_vma_resource_unbind(vma_res);
>diff --git a/drivers/gpu/drm/i915/i915_vma_resource.c b/drivers/gpu/drm/i915/i915_vma_resource.c
>index 57ae92ba8af1..27c55027387a 100644
>--- a/drivers/gpu/drm/i915/i915_vma_resource.c
>+++ b/drivers/gpu/drm/i915/i915_vma_resource.c
>@@ -178,7 +178,7 @@ static void i915_vma_resource_unbind_work(struct work_struct *work)
> 	bool lockdep_cookie;
>
> 	lockdep_cookie = dma_fence_begin_signalling();
>-	if (likely(atomic_read(&vm->open)))
>+	if (likely(!vma_res->skip_pte_rewrite))
> 		vma_res->ops->unbind_vma(vm, vma_res);
>
> 	dma_fence_end_signalling(lockdep_cookie);
>diff --git a/drivers/gpu/drm/i915/i915_vma_resource.h b/drivers/gpu/drm/i915/i915_vma_resource.h
>index 25913913baa6..5d8427caa2ba 100644
>--- a/drivers/gpu/drm/i915/i915_vma_resource.h
>+++ b/drivers/gpu/drm/i915/i915_vma_resource.h
>@@ -62,6 +62,11 @@ struct i915_page_sizes {
>  * deferred to a work item awaiting unsignaled fences. This is a hack.
>  * (dma_fence_work uses a fence flag for this, but this seems slightly
>  * cleaner).
>+ * @needs_wakeref: Whether a wakeref is needed during unbind. Since we can't
>+ * take a wakeref in the dma-fence signalling critical path, it needs to be
>+ * taken when the unbind is scheduled.
>+ * @skip_pte_rewrite: During ggtt suspend and vm takedown pte rewriting
>+ * needs to be skipped for unbind.
>  *
>  * The lifetime of a struct i915_vma_resource is from a binding request to
>  * the actual possible asynchronous unbind has completed.
>@@ -113,6 +118,7 @@ struct i915_vma_resource {
> 	bool allocated:1;
> 	bool immediate_unbind:1;
> 	bool needs_wakeref:1;
>+	bool skip_pte_rewrite:1;
> };
>
> bool i915_vma_resource_hold(struct i915_vma_resource *vma_res,
>diff --git a/drivers/gpu/drm/i915/i915_vma_types.h b/drivers/gpu/drm/i915/i915_vma_types.h
>index 88370dadca82..eac36be184e5 100644
>--- a/drivers/gpu/drm/i915/i915_vma_types.h
>+++ b/drivers/gpu/drm/i915/i915_vma_types.h
>@@ -271,6 +271,13 @@ struct i915_vma {
> #define I915_VMA_PAGES_ACTIVE (BIT(24) | 1)
> 	atomic_t pages_count; /* number of active binds to the pages */
>
>+	/**
>+	 * Whether we hold a reference on the vm dma_resv lock to temporarily
>+	 * block vm freeing until the vma is destroyed.
>+	 * Protected by the vm mutex.
>+	 */
>+	bool vm_ddestroy;
>+
> 	/**
> 	 * Support different GGTT views into the same object.
> 	 * This means there can be multiple VMA mappings per object and per VM.
>diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c
>index ca4ed9dd909b..272560ece32e 100644
>--- a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c
>+++ b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c
>@@ -1204,7 +1204,7 @@ static int exercise_ppgtt(struct drm_i915_private *dev_priv,
> 		goto out_free;
> 	}
> 	GEM_BUG_ON(offset_in_page(ppgtt->vm.total));
>-	GEM_BUG_ON(!atomic_read(&ppgtt->vm.open));
>+	assert_vm_alive(&ppgtt->vm);
>
> 	err = func(&ppgtt->vm, 0, ppgtt->vm.total, end_time);
>
>@@ -1437,7 +1437,7 @@ static void track_vma_bind(struct i915_vma *vma)
> 	vma->resource->bi.pages = vma->pages;
>
> 	mutex_lock(&vma->vm->mutex);
>-	list_add_tail(&vma->vm_link, &vma->vm->bound_list);
>+	list_move_tail(&vma->vm_link, &vma->vm->bound_list);
> 	mutex_unlock(&vma->vm->mutex);
> }
>
>-- 
>2.34.1
>