[PATCH 09/10] drm/i915: Rework i915_gem_evict_vm with eviction locking

Maarten Lankhorst maarten.lankhorst at linux.intel.com
Tue Jan 11 16:07:26 UTC 2022


Rework i915_gem_evict_vm to something equivalent, but with a special
list to annotate objects that were locked for eviction.

Evicted objects are held without refcount, and may disappear as soon
as we unlock.
Since removing vma from the vm list requires vm->mutex, it means that
the destruction has not run yet when trylock succeeds. This trick allows
us to lock all evictable objects without requiring a refcount.

However, when the trylock fails we cannot lock to evict, as we cannot
do a blocking wait with vm->mutex held, and there is no guarantee the
object will stay valid otherwise. Hence, we grab a refcount to the
object, lock the object to evict, and then drop the refcount.

This is a bit of a trick, but still safe.

Signed-off-by: Maarten Lankhorst <maarten.lankhorst at linux.intel.com>
---
 drivers/gpu/drm/i915/gem/i915_gem_object.h    |  56 ---------
 .../gpu/drm/i915/gem/i915_gem_object_types.h  |   1 +
 drivers/gpu/drm/i915/i915_gem_evict.c         |  70 +++++------
 drivers/gpu/drm/i915/i915_gem_ww.c            | 112 +++++++++++++++++-
 drivers/gpu/drm/i915/i915_gem_ww.h            |  18 ++-
 drivers/gpu/drm/i915/i915_vma.c               |   2 +-
 .../gpu/drm/i915/selftests/i915_gem_evict.c   |  11 +-
 7 files changed, 164 insertions(+), 106 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h b/drivers/gpu/drm/i915/gem/i915_gem_object.h
index c1cdfaf2d1e3..08db9af75a62 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h
@@ -157,62 +157,6 @@ i915_gem_object_put(struct drm_i915_gem_object *obj)
 
 #define assert_object_held(obj) dma_resv_assert_held((obj)->base.resv)
 
-static inline int __i915_gem_object_lock(struct drm_i915_gem_object *obj,
-					 struct i915_gem_ww_ctx *ww,
-					 bool intr)
-{
-	int ret;
-
-	if (intr)
-		ret = dma_resv_lock_interruptible(obj->base.resv, ww ? &ww->ctx : NULL);
-	else
-		ret = dma_resv_lock(obj->base.resv, ww ? &ww->ctx : NULL);
-
-	if (!ret && ww) {
-		i915_gem_object_get(obj);
-		list_add_tail(&obj->obj_link, &ww->obj_list);
-	}
-	if (ret == -EALREADY)
-		ret = 0;
-
-	if (ret == -EDEADLK) {
-		i915_gem_object_get(obj);
-		ww->contended = obj;
-	}
-
-	return ret;
-}
-
-static inline int i915_gem_object_lock(struct drm_i915_gem_object *obj,
-				       struct i915_gem_ww_ctx *ww)
-{
-	return __i915_gem_object_lock(obj, ww, ww && ww->intr);
-}
-
-static inline int i915_gem_object_lock_interruptible(struct drm_i915_gem_object *obj,
-						     struct i915_gem_ww_ctx *ww)
-{
-	WARN_ON(ww && !ww->intr);
-	return __i915_gem_object_lock(obj, ww, true);
-}
-
-static inline bool i915_gem_object_trylock(struct drm_i915_gem_object *obj,
-					   struct i915_gem_ww_ctx *ww)
-{
-	if (!ww)
-		return dma_resv_trylock(obj->base.resv);
-	else
-		return ww_mutex_trylock(&obj->base.resv->lock, &ww->ctx);
-}
-
-static inline void i915_gem_object_unlock(struct drm_i915_gem_object *obj)
-{
-	if (obj->ops->adjust_lru)
-		obj->ops->adjust_lru(obj);
-
-	dma_resv_unlock(obj->base.resv);
-}
-
 static inline void
 i915_gem_object_set_readonly(struct drm_i915_gem_object *obj)
 {
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
index 1c903d813097..d87d253aacc8 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
@@ -286,6 +286,7 @@ struct drm_i915_gem_object {
 	 * when i915_gem_ww_ctx_backoff() or i915_gem_ww_ctx_fini() are called.
 	 */
 	struct list_head obj_link;
+
 	/**
 	 * @shared_resv_from: The object shares the resv from this vm.
 	 */
diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c
index 68a83ee01c7a..ec1423e60516 100644
--- a/drivers/gpu/drm/i915/i915_gem_evict.c
+++ b/drivers/gpu/drm/i915/i915_gem_evict.c
@@ -391,8 +391,10 @@ int i915_gem_evict_for_node(struct i915_address_space *vm,
  */
 int i915_gem_evict_vm(struct i915_address_space *vm, struct i915_gem_ww_ctx *ww)
 {
+	struct i915_vma *vma, *next;
 	int ret;
 
+retry:
 	ret = mutex_lock_interruptible(&vm->mutex);
 	if (ret)
 		return ret;
@@ -410,55 +412,47 @@ int i915_gem_evict_vm(struct i915_address_space *vm, struct i915_gem_ww_ctx *ww)
 			goto out;
 	}
 
-	do {
-		struct i915_vma *vma, *vn;
-		LIST_HEAD(eviction_list);
-		LIST_HEAD(locked_eviction_list);
+	/* __i915_vma_unbind will remove vma from the bound_list, use _safe */
+	list_for_each_entry_safe(vma, next, &vm->bound_list, vm_link) {
+		/*
+		  * If we already own the lock, trylock fails. In case the
+		  * resv is shared among multiple objects, we still need the
+		  * object ref.
+		  */
+		if (dma_resv_locking_ctx(vma->obj->base.resv) != &ww->ctx &&
+		    !i915_gem_object_trylock_to_evict(vma->obj, ww)) {
+			struct drm_i915_gem_object *obj;
 
-		list_for_each_entry(vma, &vm->bound_list, vm_link) {
 			if (i915_vma_is_pinned(vma))
 				continue;
 
-			/*
-			 * If we already own the lock, trylock fails. In case the resv
-			 * is shared among multiple objects, we still need the object ref.
-			 */
-			if (ww && (dma_resv_locking_ctx(vma->obj->base.resv) == &ww->ctx)) {
-				__i915_vma_pin(vma);
-				list_add(&vma->evict_link, &locked_eviction_list);
+			obj = i915_gem_object_get_rcu(vma->obj);
+			if (!obj)
+				/* Object's dead, can't drop vm->mutex safely */
 				continue;
-			}
 
-			if (!i915_gem_object_trylock(vma->obj, ww))
-				continue;
-
-			__i915_vma_pin(vma);
-			list_add(&vma->evict_link, &eviction_list);
-		}
-		if (list_empty(&eviction_list) && list_empty(&locked_eviction_list))
-			break;
+			mutex_unlock(&vm->mutex);
+			ret = i915_gem_object_lock_to_evict(obj, ww);
+			i915_gem_object_put(obj);
 
-		ret = 0;
-		/* Unbind locked objects first, before unlocking the eviction_list */
-		list_for_each_entry_safe(vma, vn, &locked_eviction_list, evict_link) {
-			__i915_vma_unpin(vma);
+			if (ret) {
+				i915_gem_ww_ctx_unlock_evictions(ww);
+				return ret;
+			}
 
-			if (ret == 0)
-				ret = __i915_vma_unbind(vma);
-			if (ret != -EINTR) /* "Get me out of here!" */
-				ret = 0;
+			/* take the vm->mutex and evict vma next time we loop */
+			goto retry;
 		}
 
-		list_for_each_entry_safe(vma, vn, &eviction_list, evict_link) {
-			__i915_vma_unpin(vma);
-			if (ret == 0)
-				ret = __i915_vma_unbind(vma);
-			if (ret != -EINTR) /* "Get me out of here!" */
-				ret = 0;
+		if (i915_vma_is_pinned(vma))
+			continue;
 
-			i915_gem_object_unlock(vma->obj);
-		}
-	} while (ret == 0);
+		ret = __i915_vma_unbind(vma);
+		if (ret)
+			break;
+	}
+
+	i915_gem_ww_ctx_unlock_evictions(ww);
 
 out:
 	mutex_unlock(&vm->mutex);
diff --git a/drivers/gpu/drm/i915/i915_gem_ww.c b/drivers/gpu/drm/i915/i915_gem_ww.c
index 3f6ff139478e..5e7c14cb43d0 100644
--- a/drivers/gpu/drm/i915/i915_gem_ww.c
+++ b/drivers/gpu/drm/i915/i915_gem_ww.c
@@ -10,8 +10,26 @@ void i915_gem_ww_ctx_init(struct i915_gem_ww_ctx *ww, bool intr)
 {
 	ww_acquire_init(&ww->ctx, &reservation_ww_class);
 	INIT_LIST_HEAD(&ww->obj_list);
+	INIT_LIST_HEAD(&ww->eviction_list);
 	ww->intr = intr;
 	ww->contended = NULL;
+	ww->contended_evict = false;
+}
+
+void i915_gem_ww_ctx_unlock_evictions(struct i915_gem_ww_ctx *ww)
+{
+	struct drm_i915_gem_object *obj;
+
+	while ((obj = list_first_entry_or_null(&ww->eviction_list, struct drm_i915_gem_object, obj_link))) {
+		GEM_WARN_ON(!dma_resv_is_locked(obj->base.resv));
+		GEM_WARN_ON(dma_resv_locking_ctx(obj->base.resv) != &ww->ctx);
+		dma_resv_assert_held(obj->base.resv);
+		list_del(&obj->obj_link);
+		i915_gem_object_unlock(obj);
+	}
+
+	/* clear flag that we hold a single contended object in eviction list */
+	ww->contended_evict = false;
 }
 
 static void i915_gem_ww_ctx_unlock_all(struct i915_gem_ww_ctx *ww)
@@ -23,6 +41,8 @@ static void i915_gem_ww_ctx_unlock_all(struct i915_gem_ww_ctx *ww)
 		i915_gem_object_unlock(obj);
 		i915_gem_object_put(obj);
 	}
+
+	i915_gem_ww_ctx_unlock_evictions(ww);
 }
 
 void i915_gem_ww_unlock_single(struct drm_i915_gem_object *obj)
@@ -52,12 +72,98 @@ int __must_check i915_gem_ww_ctx_backoff(struct i915_gem_ww_ctx *ww)
 	else
 		dma_resv_lock_slow(ww->contended->base.resv, &ww->ctx);
 
-	if (!ret)
-		list_add_tail(&ww->contended->obj_link, &ww->obj_list);
-	else
+	if (!ret) {
+		list_add_tail(&ww->contended->obj_link,
+			      !ww->contended_evict ? &ww->obj_list : &ww->eviction_list);
+	}
+	if (ret || ww->contended_evict)
 		i915_gem_object_put(ww->contended);
 
 	ww->contended = NULL;
 
 	return ret;
 }
+
+static inline int __i915_gem_object_lock(struct drm_i915_gem_object *obj,
+					 struct i915_gem_ww_ctx *ww,
+					 bool intr, bool evict)
+{
+	int ret;
+
+	if (!evict && ww) {
+		if (ww->contended_evict) /* -EDEADLK handling on eviction list */
+			GEM_WARN_ON(!list_is_singular(&ww->eviction_list));
+		else /* Don't mix and mash eviction and normal locking calls, for now.. */
+			GEM_WARN_ON(!list_empty(&ww->eviction_list));
+	}
+
+	if (intr)
+		ret = dma_resv_lock_interruptible(obj->base.resv, ww ? &ww->ctx : NULL);
+	else
+		ret = dma_resv_lock(obj->base.resv, ww ? &ww->ctx : NULL);
+
+	if (!ret && ww) {
+		if (!evict)
+			i915_gem_object_get(obj);
+		list_add_tail(&obj->obj_link, !evict ? &ww->obj_list : &ww->eviction_list);
+	}
+
+	if (ret == -EALREADY)
+		ret = 0;
+
+	if (ret == -EDEADLK) {
+		i915_gem_object_get(obj);
+		ww->contended = obj;
+		ww->contended_evict = evict;
+	}
+
+	return ret;
+}
+
+int i915_gem_object_lock_to_evict(struct drm_i915_gem_object *obj,
+				  struct i915_gem_ww_ctx *ww)
+{
+	return __i915_gem_object_lock(obj, ww, ww->intr, true);
+}
+
+int i915_gem_object_lock(struct drm_i915_gem_object *obj,
+			 struct i915_gem_ww_ctx *ww)
+{
+	return __i915_gem_object_lock(obj, ww, ww && ww->intr, false);
+}
+
+int i915_gem_object_lock_interruptible(struct drm_i915_gem_object *obj,
+				       struct i915_gem_ww_ctx *ww)
+{
+	WARN_ON(ww && !ww->intr);
+
+	return __i915_gem_object_lock(obj, ww, true, false);
+}
+
+bool i915_gem_object_trylock(struct drm_i915_gem_object *obj,
+			     struct i915_gem_ww_ctx *ww)
+{
+	if (!ww)
+		return dma_resv_trylock(obj->base.resv);
+	else
+		return ww_mutex_trylock(&obj->base.resv->lock, &ww->ctx);
+}
+
+bool i915_gem_object_trylock_to_evict(struct drm_i915_gem_object *obj,
+				      struct i915_gem_ww_ctx *ww)
+{
+	bool ret = i915_gem_object_trylock(obj, ww);
+
+	if (ret)
+		list_add_tail(&obj->obj_link, &ww->eviction_list);
+
+	return ret;
+}
+
+void i915_gem_object_unlock(struct drm_i915_gem_object *obj)
+{
+	if (obj->ops->adjust_lru)
+		obj->ops->adjust_lru(obj);
+
+	dma_resv_unlock(obj->base.resv);
+}
diff --git a/drivers/gpu/drm/i915/i915_gem_ww.h b/drivers/gpu/drm/i915/i915_gem_ww.h
index 86f0fe343de6..22ee301f5705 100644
--- a/drivers/gpu/drm/i915/i915_gem_ww.h
+++ b/drivers/gpu/drm/i915/i915_gem_ww.h
@@ -9,15 +9,16 @@
 
 struct i915_gem_ww_ctx {
 	struct ww_acquire_ctx ctx;
-	struct list_head obj_list;
+	struct list_head obj_list, eviction_list;
 	struct drm_i915_gem_object *contended;
-	bool intr;
+	bool intr, contended_evict;
 };
 
 void i915_gem_ww_ctx_init(struct i915_gem_ww_ctx *ctx, bool intr);
 void i915_gem_ww_ctx_fini(struct i915_gem_ww_ctx *ctx);
 int __must_check i915_gem_ww_ctx_backoff(struct i915_gem_ww_ctx *ctx);
 void i915_gem_ww_unlock_single(struct drm_i915_gem_object *obj);
+void i915_gem_ww_ctx_unlock_evictions(struct i915_gem_ww_ctx *ww);
 
 /* Internal function used by the inlines! Don't use. */
 static inline int __i915_gem_ww_fini(struct i915_gem_ww_ctx *ww, int err)
@@ -38,4 +39,17 @@ static inline int __i915_gem_ww_fini(struct i915_gem_ww_ctx *ww, int err)
 	for (i915_gem_ww_ctx_init(_ww, _intr), (_err) = -EDEADLK; \
 	     (_err) == -EDEADLK;				  \
 	     (_err) = __i915_gem_ww_fini(_ww, _err))
+
+int i915_gem_object_lock_to_evict(struct drm_i915_gem_object *obj,
+				  struct i915_gem_ww_ctx *ww);
+int i915_gem_object_lock(struct drm_i915_gem_object *obj,
+			 struct i915_gem_ww_ctx *ww);
+int i915_gem_object_lock_interruptible(struct drm_i915_gem_object *obj,
+				       struct i915_gem_ww_ctx *ww);
+bool i915_gem_object_trylock(struct drm_i915_gem_object *obj,
+			     struct i915_gem_ww_ctx *ww);
+bool i915_gem_object_trylock_to_evict(struct drm_i915_gem_object *obj,
+				      struct i915_gem_ww_ctx *ww);
+void i915_gem_object_unlock(struct drm_i915_gem_object *obj);
+
 #endif
diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
index 85a22288d024..474022816374 100644
--- a/drivers/gpu/drm/i915/i915_vma.c
+++ b/drivers/gpu/drm/i915/i915_vma.c
@@ -1436,7 +1436,7 @@ static int __i915_ggtt_pin(struct i915_vma *vma, struct i915_gem_ww_ctx *ww,
 
 		/* Unlike i915_vma_pin, we don't take no for an answer! */
 		flush_idle_contexts(vm->gt);
-		i915_gem_evict_vm(vm, NULL);
+		i915_gem_evict_vm(vm, ww);
 	} while (1);
 }
 
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_evict.c b/drivers/gpu/drm/i915/selftests/i915_gem_evict.c
index a387d9ce3ece..9042090c1243 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem_evict.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem_evict.c
@@ -343,7 +343,9 @@ static int igt_evict_vm(void *arg)
 		goto cleanup;
 
 	/* Everything is pinned, nothing should happen */
-	err = i915_gem_evict_vm(&ggtt->vm, NULL);
+	for_i915_gem_ww(&ww, err, false)
+		err = i915_gem_evict_vm(&ggtt->vm, &ww);
+
 	if (err) {
 		pr_err("i915_gem_evict_vm on a full GGTT returned err=%d]\n",
 		       err);
@@ -352,11 +354,8 @@ static int igt_evict_vm(void *arg)
 
 	unpin_ggtt(ggtt);
 
-	i915_gem_ww_ctx_init(&ww, false);
-	err = i915_gem_evict_vm(&ggtt->vm, &ww);
-
-	/* no -EDEADLK handling; can't happen with vm.mutex in place */
-	i915_gem_ww_ctx_fini(&ww);
+	for_i915_gem_ww(&ww, err, false)
+		err = i915_gem_evict_vm(&ggtt->vm, &ww);
 
 	if (err) {
 		pr_err("i915_gem_evict_vm on a full GGTT returned err=%d]\n",
-- 
2.34.1



More information about the Intel-gfx-trybot mailing list