[PATCH 9/9] drm/i915: Rework i915_gem_evict_vm with eviction locking

Maarten Lankhorst maarten.lankhorst at linux.intel.com
Thu Jan 6 10:13:41 UTC 2022


Rework i915_gem_evict_vm to something equivalent, but with a special
list to annotate objects that were locked for eviction.

Evicted objects are held without refcount, and may disappear as soon
as we unlock.
Since removing vma from the vm list requires vm->mutex, it means that
the destruction has not run yet when trylock succeeds. This trick allows
us to lock all evictable objects without requiring a refcount.

However, when the trylock fails we cannot lock to evict, as we cannot
do a blocking wait with vm->mutex held, and there is no guarantee the
object will stay valid otherwise. Hence, we grab a refcount to the
object, lock the object to evict, and then drop the refcount.

This is a bit of a trick, but still safe.

Signed-off-by: Maarten Lankhorst <maarten.lankhorst at linux.intel.com>
---
 drivers/gpu/drm/i915/gem/i915_gem_object.h    | 43 ++++++++++--
 .../gpu/drm/i915/gem/i915_gem_object_types.h  |  6 ++
 drivers/gpu/drm/i915/i915_gem_evict.c         | 70 +++++++++----------
 drivers/gpu/drm/i915/i915_gem_ww.c            | 24 ++++++-
 drivers/gpu/drm/i915/i915_gem_ww.h            |  5 +-
 drivers/gpu/drm/i915/i915_vma.c               |  2 +-
 .../gpu/drm/i915/selftests/i915_gem_evict.c   | 11 ++-
 7 files changed, 105 insertions(+), 56 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h b/drivers/gpu/drm/i915/gem/i915_gem_object.h
index 09cf1c92373a..ac90a4abde23 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h
@@ -159,7 +159,7 @@ i915_gem_object_put(struct drm_i915_gem_object *obj)
 
 static inline int __i915_gem_object_lock(struct drm_i915_gem_object *obj,
 					 struct i915_gem_ww_ctx *ww,
-					 bool intr)
+					 bool intr, bool evict)
 {
 	int ret;
 
@@ -169,31 +169,49 @@ static inline int __i915_gem_object_lock(struct drm_i915_gem_object *obj,
 		ret = dma_resv_lock(obj->base.resv, ww ? &ww->ctx : NULL);
 
 	if (!ret && ww) {
-		i915_gem_object_get(obj);
-		list_add_tail(&obj->obj_link, &ww->obj_list);
+		if (!evict)
+			i915_gem_object_get(obj);
+		obj->evict_locked = evict;
+		list_add_tail(&obj->obj_link, !evict ? &ww->obj_list : &ww->eviction_list);
 	}
-	if (ret == -EALREADY)
+	if (ret == -EALREADY) {
+		/* move to end of list, if it was on eviction list previously, it's moved to normal list */
+		if (!evict && obj->evict_locked) {
+			list_move_tail(&obj->obj_link, &ww->obj_list);
+			i915_gem_object_get(obj);
+			obj->evict_locked = false;
+		}
+
 		ret = 0;
+	}
 
 	if (ret == -EDEADLK) {
 		i915_gem_object_get(obj);
 		ww->contended = obj;
+		ww->contended_evict = evict;
 	}
 
 	return ret;
 }
 
+static inline int
+i915_gem_object_lock_to_evict(struct drm_i915_gem_object *obj,
+			      struct i915_gem_ww_ctx *ww)
+{
+	return __i915_gem_object_lock(obj, ww, ww->intr, true);
+}
+
 static inline int i915_gem_object_lock(struct drm_i915_gem_object *obj,
 				       struct i915_gem_ww_ctx *ww)
 {
-	return __i915_gem_object_lock(obj, ww, ww && ww->intr);
+	return __i915_gem_object_lock(obj, ww, ww && ww->intr, false);
 }
 
 static inline int i915_gem_object_lock_interruptible(struct drm_i915_gem_object *obj,
 						     struct i915_gem_ww_ctx *ww)
 {
 	WARN_ON(ww && !ww->intr);
-	return __i915_gem_object_lock(obj, ww, true);
+	return __i915_gem_object_lock(obj, ww, true, false);
 }
 
 static inline bool i915_gem_object_trylock(struct drm_i915_gem_object *obj,
@@ -205,6 +223,19 @@ static inline bool i915_gem_object_trylock(struct drm_i915_gem_object *obj,
 		return ww_mutex_trylock(&obj->base.resv->lock, &ww->ctx);
 }
 
+static inline bool
+i915_gem_object_trylock_to_evict(struct drm_i915_gem_object *obj,
+				 struct i915_gem_ww_ctx *ww)
+{
+	bool ret = i915_gem_object_trylock(obj, ww);
+
+	if (ret) {
+		obj->evict_locked = true;
+		list_add_tail(&obj->obj_link, &ww->eviction_list);
+	}
+	return ret;
+}
+
 static inline void i915_gem_object_unlock(struct drm_i915_gem_object *obj)
 {
 	if (obj->ops->adjust_lru)
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
index f9f7e44099fe..db0fdb0e5721 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
@@ -269,6 +269,12 @@ struct drm_i915_gem_object {
 	 * when i915_gem_ww_ctx_backoff() or i915_gem_ww_ctx_fini() are called.
 	 */
 	struct list_head obj_link;
+
+	/**
+	 * @evict_locked: Whether @obj_link sits on the eviction_list.
+	 */
+	bool evict_locked;
+
 	/**
 	 * @shared_resv_from: The object shares the resv from this vm.
 	 */
diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c
index 68a83ee01c7a..ec1423e60516 100644
--- a/drivers/gpu/drm/i915/i915_gem_evict.c
+++ b/drivers/gpu/drm/i915/i915_gem_evict.c
@@ -391,8 +391,10 @@ int i915_gem_evict_for_node(struct i915_address_space *vm,
  */
 int i915_gem_evict_vm(struct i915_address_space *vm, struct i915_gem_ww_ctx *ww)
 {
+	struct i915_vma *vma, *next;
 	int ret;
 
+retry:
 	ret = mutex_lock_interruptible(&vm->mutex);
 	if (ret)
 		return ret;
@@ -410,55 +412,47 @@ int i915_gem_evict_vm(struct i915_address_space *vm, struct i915_gem_ww_ctx *ww)
 			goto out;
 	}
 
-	do {
-		struct i915_vma *vma, *vn;
-		LIST_HEAD(eviction_list);
-		LIST_HEAD(locked_eviction_list);
+	/* __i915_vma_unbind will remove vma from the bound_list, use _safe */
+	list_for_each_entry_safe(vma, next, &vm->bound_list, vm_link) {
+		/*
+		  * If we already own the lock, trylock fails. In case the
+		  * resv is shared among multiple objects, we still need the
+		  * object ref.
+		  */
+		if (dma_resv_locking_ctx(vma->obj->base.resv) != &ww->ctx &&
+		    !i915_gem_object_trylock_to_evict(vma->obj, ww)) {
+			struct drm_i915_gem_object *obj;
 
-		list_for_each_entry(vma, &vm->bound_list, vm_link) {
 			if (i915_vma_is_pinned(vma))
 				continue;
 
-			/*
-			 * If we already own the lock, trylock fails. In case the resv
-			 * is shared among multiple objects, we still need the object ref.
-			 */
-			if (ww && (dma_resv_locking_ctx(vma->obj->base.resv) == &ww->ctx)) {
-				__i915_vma_pin(vma);
-				list_add(&vma->evict_link, &locked_eviction_list);
+			obj = i915_gem_object_get_rcu(vma->obj);
+			if (!obj)
+				/* Object's dead, can't drop vm->mutex safely */
 				continue;
-			}
 
-			if (!i915_gem_object_trylock(vma->obj, ww))
-				continue;
-
-			__i915_vma_pin(vma);
-			list_add(&vma->evict_link, &eviction_list);
-		}
-		if (list_empty(&eviction_list) && list_empty(&locked_eviction_list))
-			break;
+			mutex_unlock(&vm->mutex);
+			ret = i915_gem_object_lock_to_evict(obj, ww);
+			i915_gem_object_put(obj);
 
-		ret = 0;
-		/* Unbind locked objects first, before unlocking the eviction_list */
-		list_for_each_entry_safe(vma, vn, &locked_eviction_list, evict_link) {
-			__i915_vma_unpin(vma);
+			if (ret) {
+				i915_gem_ww_ctx_unlock_evictions(ww);
+				return ret;
+			}
 
-			if (ret == 0)
-				ret = __i915_vma_unbind(vma);
-			if (ret != -EINTR) /* "Get me out of here!" */
-				ret = 0;
+			/* take the vm->mutex and evict vma next time we loop */
+			goto retry;
 		}
 
-		list_for_each_entry_safe(vma, vn, &eviction_list, evict_link) {
-			__i915_vma_unpin(vma);
-			if (ret == 0)
-				ret = __i915_vma_unbind(vma);
-			if (ret != -EINTR) /* "Get me out of here!" */
-				ret = 0;
+		if (i915_vma_is_pinned(vma))
+			continue;
 
-			i915_gem_object_unlock(vma->obj);
-		}
-	} while (ret == 0);
+		ret = __i915_vma_unbind(vma);
+		if (ret)
+			break;
+	}
+
+	i915_gem_ww_ctx_unlock_evictions(ww);
 
 out:
 	mutex_unlock(&vm->mutex);
diff --git a/drivers/gpu/drm/i915/i915_gem_ww.c b/drivers/gpu/drm/i915/i915_gem_ww.c
index 3f6ff139478e..3e075ebfa8b7 100644
--- a/drivers/gpu/drm/i915/i915_gem_ww.c
+++ b/drivers/gpu/drm/i915/i915_gem_ww.c
@@ -10,19 +10,34 @@ void i915_gem_ww_ctx_init(struct i915_gem_ww_ctx *ww, bool intr)
 {
 	ww_acquire_init(&ww->ctx, &reservation_ww_class);
 	INIT_LIST_HEAD(&ww->obj_list);
+	INIT_LIST_HEAD(&ww->eviction_list);
 	ww->intr = intr;
 	ww->contended = NULL;
 }
 
+void i915_gem_ww_ctx_unlock_evictions(struct i915_gem_ww_ctx *ww)
+{
+	struct drm_i915_gem_object *obj;
+
+	while ((obj = list_first_entry_or_null(&ww->eviction_list, struct drm_i915_gem_object, obj_link))) {
+		GEM_WARN_ON(!obj->evict_locked);
+		list_del(&obj->obj_link);
+		i915_gem_object_unlock(obj);
+	}
+}
+
 static void i915_gem_ww_ctx_unlock_all(struct i915_gem_ww_ctx *ww)
 {
 	struct drm_i915_gem_object *obj;
 
 	while ((obj = list_first_entry_or_null(&ww->obj_list, struct drm_i915_gem_object, obj_link))) {
 		list_del(&obj->obj_link);
+		GEM_WARN_ON(obj->evict_locked);
 		i915_gem_object_unlock(obj);
 		i915_gem_object_put(obj);
 	}
+
+	i915_gem_ww_ctx_unlock_evictions(ww);
 }
 
 void i915_gem_ww_unlock_single(struct drm_i915_gem_object *obj)
@@ -52,9 +67,12 @@ int __must_check i915_gem_ww_ctx_backoff(struct i915_gem_ww_ctx *ww)
 	else
 		dma_resv_lock_slow(ww->contended->base.resv, &ww->ctx);
 
-	if (!ret)
-		list_add_tail(&ww->contended->obj_link, &ww->obj_list);
-	else
+	if (!ret) {
+		list_add_tail(&ww->contended->obj_link,
+			      !ww->contended_evict ? &ww->obj_list : &ww->eviction_list);
+		ww->contended->evict_locked = ww->contended_evict;
+	}
+	if (ret || ww->contended_evict)
 		i915_gem_object_put(ww->contended);
 
 	ww->contended = NULL;
diff --git a/drivers/gpu/drm/i915/i915_gem_ww.h b/drivers/gpu/drm/i915/i915_gem_ww.h
index 86f0fe343de6..9728c4c7efdf 100644
--- a/drivers/gpu/drm/i915/i915_gem_ww.h
+++ b/drivers/gpu/drm/i915/i915_gem_ww.h
@@ -9,15 +9,16 @@
 
 struct i915_gem_ww_ctx {
 	struct ww_acquire_ctx ctx;
-	struct list_head obj_list;
+	struct list_head obj_list, eviction_list;
 	struct drm_i915_gem_object *contended;
-	bool intr;
+	bool intr, contended_evict;
 };
 
 void i915_gem_ww_ctx_init(struct i915_gem_ww_ctx *ctx, bool intr);
 void i915_gem_ww_ctx_fini(struct i915_gem_ww_ctx *ctx);
 int __must_check i915_gem_ww_ctx_backoff(struct i915_gem_ww_ctx *ctx);
 void i915_gem_ww_unlock_single(struct drm_i915_gem_object *obj);
+void i915_gem_ww_ctx_unlock_evictions(struct i915_gem_ww_ctx *ww);
 
 /* Internal function used by the inlines! Don't use. */
 static inline int __i915_gem_ww_fini(struct i915_gem_ww_ctx *ww, int err)
diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
index 85a22288d024..474022816374 100644
--- a/drivers/gpu/drm/i915/i915_vma.c
+++ b/drivers/gpu/drm/i915/i915_vma.c
@@ -1436,7 +1436,7 @@ static int __i915_ggtt_pin(struct i915_vma *vma, struct i915_gem_ww_ctx *ww,
 
 		/* Unlike i915_vma_pin, we don't take no for an answer! */
 		flush_idle_contexts(vm->gt);
-		i915_gem_evict_vm(vm, NULL);
+		i915_gem_evict_vm(vm, ww);
 	} while (1);
 }
 
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_evict.c b/drivers/gpu/drm/i915/selftests/i915_gem_evict.c
index a387d9ce3ece..9042090c1243 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem_evict.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem_evict.c
@@ -343,7 +343,9 @@ static int igt_evict_vm(void *arg)
 		goto cleanup;
 
 	/* Everything is pinned, nothing should happen */
-	err = i915_gem_evict_vm(&ggtt->vm, NULL);
+	for_i915_gem_ww(&ww, err, false)
+		err = i915_gem_evict_vm(&ggtt->vm, &ww);
+
 	if (err) {
 		pr_err("i915_gem_evict_vm on a full GGTT returned err=%d]\n",
 		       err);
@@ -352,11 +354,8 @@ static int igt_evict_vm(void *arg)
 
 	unpin_ggtt(ggtt);
 
-	i915_gem_ww_ctx_init(&ww, false);
-	err = i915_gem_evict_vm(&ggtt->vm, &ww);
-
-	/* no -EDEADLK handling; can't happen with vm.mutex in place */
-	i915_gem_ww_ctx_fini(&ww);
+	for_i915_gem_ww(&ww, err, false)
+		err = i915_gem_evict_vm(&ggtt->vm, &ww);
 
 	if (err) {
 		pr_err("i915_gem_evict_vm on a full GGTT returned err=%d]\n",
-- 
2.34.1



More information about the Intel-gfx-trybot mailing list