[PATCH 20/22] drm/i915: Move vma binding to vm exclusive fence, instead of relying on i915_active

Maarten Lankhorst maarten.lankhorst at linux.intel.com
Thu Nov 25 13:50:48 UTC 2021


Instead of relying on the i915_active exclusive fence, use the exclusive
slot inside vm->_resv. As far as I can tell, only the lock part of the
reservation object is used by default, so there should be no shared fences.
Just in case there is, add a debug warning that we missed that part.

As i915_active kept the vma alive, just in case of a race add an extra
ref to i915_vma in the worker.

i915_active.excl is now unused, so remove all related infrastructure as
well.

Signed-off-by: Maarten Lankhorst <maarten.lankhorst at linux.intel.com>
---
 drivers/gpu/drm/i915/gt/intel_ggtt.c         |  2 -
 drivers/gpu/drm/i915/i915_active.c           | 47 -------------------
 drivers/gpu/drm/i915/i915_active.h           | 19 --------
 drivers/gpu/drm/i915/i915_active_types.h     |  3 --
 drivers/gpu/drm/i915/i915_vma.c              | 49 +++++++++++---------
 drivers/gpu/drm/i915/selftests/i915_active.c |  1 -
 6 files changed, 28 insertions(+), 93 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c b/drivers/gpu/drm/i915/gt/intel_ggtt.c
index f9790f45a492..9f56087b2ebc 100644
--- a/drivers/gpu/drm/i915/gt/intel_ggtt.c
+++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c
@@ -142,8 +142,6 @@ void i915_ggtt_suspend_vm(struct i915_address_space *vm)
 
 		GEM_BUG_ON(!drm_mm_node_allocated(&vma->node));
 
-		i915_vma_wait_for_bind(vma);
-
 		if (i915_vma_is_pinned(vma) || !i915_vma_is_bound(vma, I915_VMA_GLOBAL_BIND))
 			continue;
 
diff --git a/drivers/gpu/drm/i915/i915_active.c b/drivers/gpu/drm/i915/i915_active.c
index ee2b3a375362..fa77d8284f1b 100644
--- a/drivers/gpu/drm/i915/i915_active.c
+++ b/drivers/gpu/drm/i915/i915_active.c
@@ -136,7 +136,6 @@ __active_retire(struct i915_active *ref)
 	if (!atomic_dec_and_lock_irqsave(&ref->count, &ref->tree_lock, flags))
 		return;
 
-	GEM_BUG_ON(rcu_access_pointer(ref->excl.fence));
 	debug_active_deactivate(ref);
 
 	/* Even if we have not used the cache, we may still have a barrier */
@@ -223,13 +222,6 @@ node_retire(struct dma_fence *fence, struct dma_fence_cb *cb)
 		active_retire(container_of(cb, struct active_node, base.cb)->ref);
 }
 
-static void
-excl_retire(struct dma_fence *fence, struct dma_fence_cb *cb)
-{
-	if (active_fence_cb(fence, cb))
-		active_retire(container_of(cb, struct i915_active, excl.cb));
-}
-
 static struct active_node *__active_lookup(struct i915_active *ref, u64 idx)
 {
 	struct active_node *it;
@@ -356,7 +348,6 @@ void __i915_active_init(struct i915_active *ref,
 	init_llist_head(&ref->preallocated_barriers);
 	atomic_set(&ref->count, 0);
 	__mutex_init(&ref->mutex, "i915_active", mkey);
-	__i915_active_fence_init(&ref->excl, NULL, excl_retire);
 	INIT_WORK(&ref->work, active_work);
 #if IS_ENABLED(CONFIG_LOCKDEP)
 	lockdep_init_map(&ref->work.lockdep_map, "i915_active.work", wkey, 0);
@@ -455,36 +446,6 @@ int i915_active_add_request(struct i915_active *ref, struct i915_request *rq)
 	return err;
 }
 
-static struct dma_fence *
-__i915_active_set_fence(struct i915_active *ref,
-			struct i915_active_fence *active,
-			struct dma_fence *fence)
-{
-	struct dma_fence *prev;
-
-	if (replace_barrier(ref, active)) {
-		RCU_INIT_POINTER(active->fence, fence);
-		return NULL;
-	}
-
-	rcu_read_lock();
-	prev = __i915_active_fence_set(active, fence);
-	if (prev)
-		prev = dma_fence_get_rcu(prev);
-	else
-		__i915_active_acquire(ref);
-	rcu_read_unlock();
-
-	return prev;
-}
-
-struct dma_fence *
-i915_active_set_exclusive(struct i915_active *ref, struct dma_fence *f)
-{
-	/* We expect the caller to manage the exclusive timeline ordering */
-	return __i915_active_set_fence(ref, &ref->excl, f);
-}
-
 bool i915_active_acquire_if_busy(struct i915_active *ref)
 {
 	debug_active_assert(ref);
@@ -585,7 +546,6 @@ static int flush_lazy_signals(struct i915_active *ref)
 	struct active_node *it, *n;
 	int err = 0;
 
-	enable_signaling(&ref->excl);
 	rbtree_postorder_for_each_entry_safe(it, n, &ref->tree, node) {
 		err = flush_barrier(it); /* unconnected idle barrier? */
 		if (err)
@@ -697,13 +657,6 @@ static int await_active(struct i915_active *ref,
 	if (!i915_active_acquire_if_busy(ref))
 		return 0;
 
-	if (flags & I915_ACTIVE_AWAIT_EXCL &&
-	    rcu_access_pointer(ref->excl.fence)) {
-		err = __await_active(&ref->excl, fn, arg);
-		if (err)
-			goto out;
-	}
-
 	if (flags & I915_ACTIVE_AWAIT_ACTIVE) {
 		struct active_node *it, *n;
 
diff --git a/drivers/gpu/drm/i915/i915_active.h b/drivers/gpu/drm/i915/i915_active.h
index 7eb44132183a..735a304457be 100644
--- a/drivers/gpu/drm/i915/i915_active.h
+++ b/drivers/gpu/drm/i915/i915_active.h
@@ -166,9 +166,6 @@ void __i915_active_init(struct i915_active *ref,
 
 int i915_active_add_request(struct i915_active *ref, struct i915_request *rq);
 
-struct dma_fence *
-i915_active_set_exclusive(struct i915_active *ref, struct dma_fence *f);
-
 int __i915_active_wait(struct i915_active *ref, int state);
 static inline int i915_active_wait(struct i915_active *ref)
 {
@@ -181,7 +178,6 @@ int i915_sw_fence_await_active(struct i915_sw_fence *fence,
 int i915_request_await_active(struct i915_request *rq,
 			      struct i915_active *ref,
 			      unsigned int flags);
-#define I915_ACTIVE_AWAIT_EXCL BIT(0)
 #define I915_ACTIVE_AWAIT_ACTIVE BIT(1)
 #define I915_ACTIVE_AWAIT_BARRIER BIT(2)
 
@@ -217,21 +213,6 @@ struct i915_active *i915_active_create(void);
 struct i915_active *i915_active_get(struct i915_active *ref);
 void i915_active_put(struct i915_active *ref);
 
-static inline int __i915_request_await_exclusive(struct i915_request *rq,
-						 struct i915_active *active)
-{
-	struct dma_fence *fence;
-	int err = 0;
-
-	fence = i915_active_fence_get(&active->excl);
-	if (fence) {
-		err = i915_request_await_dma_fence(rq, fence);
-		dma_fence_put(fence);
-	}
-
-	return err;
-}
-
 void i915_active_module_exit(void);
 int i915_active_module_init(void);
 
diff --git a/drivers/gpu/drm/i915/i915_active_types.h b/drivers/gpu/drm/i915/i915_active_types.h
index c149f348a972..c3109602cf23 100644
--- a/drivers/gpu/drm/i915/i915_active_types.h
+++ b/drivers/gpu/drm/i915/i915_active_types.h
@@ -32,9 +32,6 @@ struct i915_active {
 	struct active_node *cache;
 	struct rb_root tree;
 
-	/* Preallocated "exclusive" node */
-	struct i915_active_fence excl;
-
 	unsigned long flags;
 #define I915_ACTIVE_RETIRE_SLEEPS BIT(0)
 
diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
index 5a3c2bc157df..a62cf19a51c9 100644
--- a/drivers/gpu/drm/i915/i915_vma.c
+++ b/drivers/gpu/drm/i915/i915_vma.c
@@ -310,6 +310,8 @@ static void __vma_release(struct dma_fence_work *work)
 	}
 
 	i915_vm_free_pt_stash(vw->vm, &vw->stash);
+	if (vw->vma)
+		__i915_vma_put(vw->vma);
 	i915_vm_put(vw->vm);
 }
 
@@ -335,19 +337,14 @@ struct i915_vma_work *i915_vma_work(void)
 
 int i915_vma_wait_for_bind(struct i915_vma *vma)
 {
-	int err = 0;
+	struct dma_fence *fence = i915_gem_object_get_moving_fence(vma->obj);
+	int err;
 
-	if (rcu_access_pointer(vma->active.excl.fence)) {
-		struct dma_fence *fence;
+	if (!fence)
+		return 0;
 
-		rcu_read_lock();
-		fence = dma_fence_get_rcu_safe(&vma->active.excl.fence);
-		rcu_read_unlock();
-		if (fence) {
-			err = dma_fence_wait(fence, true);
-			dma_fence_put(fence);
-		}
-	}
+	err = dma_fence_wait(fence, true);
+	dma_fence_put(fence);
 
 	return err;
 }
@@ -355,7 +352,7 @@ int i915_vma_wait_for_bind(struct i915_vma *vma)
 #if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)
 static int i915_vma_verify_bind_complete(struct i915_vma *vma)
 {
-	struct dma_fence *fence = i915_active_fence_get(&vma->active.excl);
+	struct dma_fence *fence = i915_gem_object_get_moving_fence(vma->obj);
 	int err;
 
 	if (!fence)
@@ -420,7 +417,7 @@ int i915_vma_bind(struct i915_vma *vma,
 	if (work) {
 		struct dma_fence *prev;
 
-		work->vma = vma;
+		work->vma = __i915_vma_get(vma);
 		work->cache_level = cache_level;
 		work->flags = bind_flags;
 
@@ -429,17 +426,18 @@ int i915_vma_bind(struct i915_vma *vma,
 		 * the pages (not the object itself). As we don't track that,
 		 * yet, we have to use the exclusive fence instead.
 		 *
-		 * Also note that we do not want to track the async vma as
-		 * part of the obj->resv->excl_fence as it only affects
-		 * execution and not content or object's backing store lifetime.
+		 * Use the vm's exclusive fence for serialization.
 		 */
-		prev = i915_active_set_exclusive(&vma->active, &work->base.dma);
-		if (prev) {
+		prev = dma_resv_excl_fence(&vma->vm->_resv);
+		if (prev)
 			__i915_sw_fence_await_dma_fence(&work->base.chain,
 							prev,
 							&work->cb);
-			dma_fence_put(prev);
-		}
+
+		GEM_WARN_ON(dma_resv_shared_list(&vma->vm->_resv) &&
+			    dma_resv_shared_list(&vma->vm->_resv)->shared_count);
+
+		dma_resv_add_excl_fence(&vma->vm->_resv, &work->base.dma);
 
 		work->base.dma.error = 0; /* enable the queue_work() */
 
@@ -1634,7 +1632,16 @@ void i915_vma_revoke_mmap(struct i915_vma *vma)
 static int
 __i915_request_await_bind(struct i915_request *rq, struct i915_vma *vma)
 {
-	return __i915_request_await_exclusive(rq, &vma->active);
+	struct dma_resv_iter cursor;
+	struct dma_fence *fence;
+	int ret = 0;
+
+	dma_resv_for_each_fence(&cursor, &vma->vm->_resv, false, fence) {
+		ret = i915_request_await_dma_fence(rq, fence);
+		if (ret)
+			break;
+	}
+	return ret;
 }
 
 static int __i915_vma_move_to_active(struct i915_vma *vma, struct i915_request *rq)
diff --git a/drivers/gpu/drm/i915/selftests/i915_active.c b/drivers/gpu/drm/i915/selftests/i915_active.c
index 61bf4560d8af..ddaf389b35e0 100644
--- a/drivers/gpu/drm/i915/selftests/i915_active.c
+++ b/drivers/gpu/drm/i915/selftests/i915_active.c
@@ -335,7 +335,6 @@ void i915_active_unlock_wait(struct i915_active *ref)
 
 		/* Wait for all active callbacks */
 		rcu_read_lock();
-		active_flush(ref, &ref->excl);
 		rbtree_postorder_for_each_entry_safe(it, n, &ref->tree, node)
 			active_flush(ref, &it->base);
 		rcu_read_unlock();
-- 
2.34.0



More information about the Intel-gfx-trybot mailing list