[PATCH 14/15] drm/i915: Move vma binding to vm exclusive fence, instead of relying on i915_active

Maarten Lankhorst maarten.lankhorst at linux.intel.com
Wed Jul 28 13:23:19 UTC 2021


Instead of relying on the i915_active exclusive fence, use the exclusive
slot inside vm->_resv. As far as I can tell, only the lock part of the
reservation object is used by default, so there should be no shared fences.
Just in case there is, add a debug warning that we missed that part.

As i915_active kept the vma alive, just in case of a race add an extra
ref to i915_vma in the worker.

i915_active.excl is now unused, so remove all related infrastructure as
well.

Signed-off-by: Maarten Lankhorst <maarten.lankhorst at linux.intel.com>
---
 drivers/gpu/drm/i915/i915_active.c           | 47 --------------------
 drivers/gpu/drm/i915/i915_active.h           | 19 --------
 drivers/gpu/drm/i915/i915_active_types.h     |  3 --
 drivers/gpu/drm/i915/i915_vma.c              | 43 ++++++++++--------
 drivers/gpu/drm/i915/selftests/i915_active.c |  1 -
 5 files changed, 24 insertions(+), 89 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_active.c b/drivers/gpu/drm/i915/i915_active.c
index eb73ed5e4854..bb2faed0f04d 100644
--- a/drivers/gpu/drm/i915/i915_active.c
+++ b/drivers/gpu/drm/i915/i915_active.c
@@ -140,7 +140,6 @@ __active_retire(struct i915_active *ref)
 	if (!atomic_dec_and_lock_irqsave(&ref->count, &ref->tree_lock, flags))
 		return;
 
-	GEM_BUG_ON(rcu_access_pointer(ref->excl.fence));
 	debug_active_deactivate(ref);
 
 	/* Even if we have not used the cache, we may still have a barrier */
@@ -227,13 +226,6 @@ node_retire(struct dma_fence *fence, struct dma_fence_cb *cb)
 		active_retire(container_of(cb, struct active_node, base.cb)->ref);
 }
 
-static void
-excl_retire(struct dma_fence *fence, struct dma_fence_cb *cb)
-{
-	if (active_fence_cb(fence, cb))
-		active_retire(container_of(cb, struct i915_active, excl.cb));
-}
-
 static struct active_node *__active_lookup(struct i915_active *ref, u64 idx)
 {
 	struct active_node *it;
@@ -360,7 +352,6 @@ void __i915_active_init(struct i915_active *ref,
 	init_llist_head(&ref->preallocated_barriers);
 	atomic_set(&ref->count, 0);
 	__mutex_init(&ref->mutex, "i915_active", mkey);
-	__i915_active_fence_init(&ref->excl, NULL, excl_retire);
 	INIT_WORK(&ref->work, active_work);
 #if IS_ENABLED(CONFIG_LOCKDEP)
 	lockdep_init_map(&ref->work.lockdep_map, "i915_active.work", wkey, 0);
@@ -459,36 +450,6 @@ int i915_active_add_request(struct i915_active *ref, struct i915_request *rq)
 	return err;
 }
 
-static struct dma_fence *
-__i915_active_set_fence(struct i915_active *ref,
-			struct i915_active_fence *active,
-			struct dma_fence *fence)
-{
-	struct dma_fence *prev;
-
-	if (replace_barrier(ref, active)) {
-		RCU_INIT_POINTER(active->fence, fence);
-		return NULL;
-	}
-
-	rcu_read_lock();
-	prev = __i915_active_fence_set(active, fence);
-	if (prev)
-		prev = dma_fence_get_rcu(prev);
-	else
-		__i915_active_acquire(ref);
-	rcu_read_unlock();
-
-	return prev;
-}
-
-struct dma_fence *
-i915_active_set_exclusive(struct i915_active *ref, struct dma_fence *f)
-{
-	/* We expect the caller to manage the exclusive timeline ordering */
-	return __i915_active_set_fence(ref, &ref->excl, f);
-}
-
 bool i915_active_acquire_if_busy(struct i915_active *ref)
 {
 	debug_active_assert(ref);
@@ -589,7 +550,6 @@ static int flush_lazy_signals(struct i915_active *ref)
 	struct active_node *it, *n;
 	int err = 0;
 
-	enable_signaling(&ref->excl);
 	rbtree_postorder_for_each_entry_safe(it, n, &ref->tree, node) {
 		err = flush_barrier(it); /* unconnected idle barrier? */
 		if (err)
@@ -701,13 +661,6 @@ static int await_active(struct i915_active *ref,
 	if (!i915_active_acquire_if_busy(ref))
 		return 0;
 
-	if (flags & I915_ACTIVE_AWAIT_EXCL &&
-	    rcu_access_pointer(ref->excl.fence)) {
-		err = __await_active(&ref->excl, fn, arg);
-		if (err)
-			goto out;
-	}
-
 	if (flags & I915_ACTIVE_AWAIT_ACTIVE) {
 		struct active_node *it, *n;
 
diff --git a/drivers/gpu/drm/i915/i915_active.h b/drivers/gpu/drm/i915/i915_active.h
index 2ac875888872..c6ab5fe8da0c 100644
--- a/drivers/gpu/drm/i915/i915_active.h
+++ b/drivers/gpu/drm/i915/i915_active.h
@@ -166,9 +166,6 @@ void __i915_active_init(struct i915_active *ref,
 
 int i915_active_add_request(struct i915_active *ref, struct i915_request *rq);
 
-struct dma_fence *
-i915_active_set_exclusive(struct i915_active *ref, struct dma_fence *f);
-
 int __i915_active_wait(struct i915_active *ref, int state);
 static inline int i915_active_wait(struct i915_active *ref)
 {
@@ -181,7 +178,6 @@ int i915_sw_fence_await_active(struct i915_sw_fence *fence,
 int i915_request_await_active(struct i915_request *rq,
 			      struct i915_active *ref,
 			      unsigned int flags);
-#define I915_ACTIVE_AWAIT_EXCL BIT(0)
 #define I915_ACTIVE_AWAIT_ACTIVE BIT(1)
 #define I915_ACTIVE_AWAIT_BARRIER BIT(2)
 
@@ -217,19 +213,4 @@ struct i915_active *i915_active_create(void);
 struct i915_active *i915_active_get(struct i915_active *ref);
 void i915_active_put(struct i915_active *ref);
 
-static inline int __i915_request_await_exclusive(struct i915_request *rq,
-						 struct i915_active *active)
-{
-	struct dma_fence *fence;
-	int err = 0;
-
-	fence = i915_active_fence_get(&active->excl);
-	if (fence) {
-		err = i915_request_await_dma_fence(rq, fence);
-		dma_fence_put(fence);
-	}
-
-	return err;
-}
-
 #endif /* _I915_ACTIVE_H_ */
diff --git a/drivers/gpu/drm/i915/i915_active_types.h b/drivers/gpu/drm/i915/i915_active_types.h
index c149f348a972..c3109602cf23 100644
--- a/drivers/gpu/drm/i915/i915_active_types.h
+++ b/drivers/gpu/drm/i915/i915_active_types.h
@@ -32,9 +32,6 @@ struct i915_active {
 	struct active_node *cache;
 	struct rb_root tree;
 
-	/* Preallocated "exclusive" node */
-	struct i915_active_fence excl;
-
 	unsigned long flags;
 #define I915_ACTIVE_RETIRE_SLEEPS BIT(0)
 
diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
index 8b3adb8ccf71..48b94d467127 100644
--- a/drivers/gpu/drm/i915/i915_vma.c
+++ b/drivers/gpu/drm/i915/i915_vma.c
@@ -317,6 +317,8 @@ static void __vma_release(struct dma_fence_work *work)
 	}
 
 	i915_vm_free_pt_stash(vw->vm, &vw->stash);
+	if (vw->vma)
+		__i915_vma_put(vw->vma);
 	i915_vm_put(vw->vm);
 }
 
@@ -342,18 +344,12 @@ struct i915_vma_work *i915_vma_work(void)
 
 int i915_vma_wait_for_bind(struct i915_vma *vma)
 {
+	struct dma_fence *fence = dma_resv_get_excl_unlocked(&vma->vm->_resv);
 	int err = 0;
 
-	if (rcu_access_pointer(vma->active.excl.fence)) {
-		struct dma_fence *fence;
-
-		rcu_read_lock();
-		fence = dma_fence_get_rcu_safe(&vma->active.excl.fence);
-		rcu_read_unlock();
-		if (fence) {
-			err = dma_fence_wait(fence, MAX_SCHEDULE_TIMEOUT);
-			dma_fence_put(fence);
-		}
+	if (fence) {
+		err = dma_fence_wait(fence, true);
+		dma_fence_put(fence);
 	}
 
 	return err;
@@ -405,7 +401,7 @@ int i915_vma_bind(struct i915_vma *vma,
 	if (work && bind_flags & vma->vm->bind_async_flags) {
 		struct dma_fence *prev;
 
-		work->vma = vma;
+		work->vma = __i915_vma_get(vma);
 		work->cache_level = cache_level;
 		work->flags = bind_flags;
 
@@ -414,17 +410,18 @@ int i915_vma_bind(struct i915_vma *vma,
 		 * the pages (not the object itself). As we don't track that,
 		 * yet, we have to use the exclusive fence instead.
 		 *
-		 * Also note that we do not want to track the async vma as
-		 * part of the obj->resv->excl_fence as it only affects
-		 * execution and not content or object's backing store lifetime.
+		 * Use the vm's exclusive fence for serialization.
 		 */
-		prev = i915_active_set_exclusive(&vma->active, &work->base.dma);
-		if (prev) {
+		prev = dma_resv_excl_fence(&vma->vm->_resv);
+		if (prev)
 			__i915_sw_fence_await_dma_fence(&work->base.chain,
 							prev,
 							&work->cb);
-			dma_fence_put(prev);
-		}
+
+		GEM_WARN_ON(dma_resv_shared_list(&vma->vm->_resv) &&
+			    dma_resv_shared_list(&vma->vm->_resv)->shared_count);
+
+		dma_resv_add_excl_fence(&vma->vm->_resv, &work->base.dma);
 
 		work->base.dma.error = 0; /* enable the queue_work() */
 
@@ -1536,7 +1533,15 @@ void i915_vma_revoke_mmap(struct i915_vma *vma)
 static int
 __i915_request_await_bind(struct i915_request *rq, struct i915_vma *vma)
 {
-	return __i915_request_await_exclusive(rq, &vma->active);
+	struct dma_fence *fence = dma_resv_get_excl_unlocked(&vma->vm->_resv);
+	int ret;
+
+	if (!fence)
+		return 0;
+
+	ret = i915_request_await_dma_fence(rq, fence);
+	dma_fence_put(fence);
+	return ret;
 }
 
 static int __i915_vma_move_to_active(struct i915_vma *vma, struct i915_request *rq)
diff --git a/drivers/gpu/drm/i915/selftests/i915_active.c b/drivers/gpu/drm/i915/selftests/i915_active.c
index 61bf4560d8af..ddaf389b35e0 100644
--- a/drivers/gpu/drm/i915/selftests/i915_active.c
+++ b/drivers/gpu/drm/i915/selftests/i915_active.c
@@ -335,7 +335,6 @@ void i915_active_unlock_wait(struct i915_active *ref)
 
 		/* Wait for all active callbacks */
 		rcu_read_lock();
-		active_flush(ref, &ref->excl);
 		rbtree_postorder_for_each_entry_safe(it, n, &ref->tree, node)
 			active_flush(ref, &it->base);
 		rcu_read_unlock();
-- 
2.31.0



More information about the Intel-gfx-trybot mailing list