[PATCH 12/13] drm/i915: Move vma binding to obj, instead of relying on i915_active

Maarten Lankhorst maarten.lankhorst at linux.intel.com
Tue Jul 27 08:27:17 UTC 2021


Instead of relying on the i915_active exclusive fence, use obj->moving
fence. This will also be used for later support for async gpu
binding/clearing, which does allow us to keep it all in a single place.

As i915_active kept the vma alive, just in case of a race add an extra
ref to i915_vma in the worker.

i915_active.excl is now unused, so remove all related infrastructure as
well.

Signed-off-by: Maarten Lankhorst <maarten.lankhorst at linux.intel.com>
---
 drivers/gpu/drm/i915/gt/intel_ggtt.c         | 24 ++++++++--
 drivers/gpu/drm/i915/i915_active.c           | 47 --------------------
 drivers/gpu/drm/i915/i915_active.h           | 19 --------
 drivers/gpu/drm/i915/i915_active_types.h     |  3 --
 drivers/gpu/drm/i915/i915_vma.c              | 32 ++++++++-----
 drivers/gpu/drm/i915/selftests/i915_active.c |  1 -
 6 files changed, 40 insertions(+), 86 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c b/drivers/gpu/drm/i915/gt/intel_ggtt.c
index b3eae0722250..ee485a7be32c 100644
--- a/drivers/gpu/drm/i915/gt/intel_ggtt.c
+++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c
@@ -118,6 +118,7 @@ void i915_ggtt_suspend(struct i915_ggtt *ggtt)
 	struct i915_vma *vma, *vn;
 	int open;
 
+retry:
 	mutex_lock(&ggtt->vm.mutex);
 
 	/* Skip rewriting PTE on VMA unbind. */
@@ -125,15 +126,30 @@ void i915_ggtt_suspend(struct i915_ggtt *ggtt)
 
 	list_for_each_entry_safe(vma, vn, &ggtt->vm.bound_list, vm_link) {
 		GEM_BUG_ON(!drm_mm_node_allocated(&vma->node));
-		i915_vma_wait_for_bind(vma);
 
-		if (i915_vma_is_pinned(vma))
-			continue;
+		i915_vma_get(vma);
+
+		/* unlikely to race when GPU is idle, so no worry about slowpath.. */
+		if (!i915_gem_object_trylock(vma->obj)) {
+			atomic_set(&ggtt->vm.open, open);
+			mutex_unlock(&ggtt->vm.mutex);
+
+			i915_gem_object_lock(vma->obj, NULL);
+			i915_vma_wait_for_bind(vma);
+			i915_gem_object_unlock(vma->obj);
+
+			i915_vma_put(vma);
+			goto retry;
+		}
+
+		i915_vma_wait_for_bind(vma);
 
-		if (!i915_vma_is_bound(vma, I915_VMA_GLOBAL_BIND)) {
+		if (!i915_vma_is_pinned(vma) && !i915_vma_is_bound(vma, I915_VMA_GLOBAL_BIND)) {
 			__i915_vma_evict(vma);
 			drm_mm_remove_node(&vma->node);
 		}
+		i915_gem_object_unlock(vma->obj);
+		i915_vma_put(vma);
 	}
 
 	ggtt->vm.clear_range(&ggtt->vm, 0, ggtt->vm.total);
diff --git a/drivers/gpu/drm/i915/i915_active.c b/drivers/gpu/drm/i915/i915_active.c
index eb73ed5e4854..bb2faed0f04d 100644
--- a/drivers/gpu/drm/i915/i915_active.c
+++ b/drivers/gpu/drm/i915/i915_active.c
@@ -140,7 +140,6 @@ __active_retire(struct i915_active *ref)
 	if (!atomic_dec_and_lock_irqsave(&ref->count, &ref->tree_lock, flags))
 		return;
 
-	GEM_BUG_ON(rcu_access_pointer(ref->excl.fence));
 	debug_active_deactivate(ref);
 
 	/* Even if we have not used the cache, we may still have a barrier */
@@ -227,13 +226,6 @@ node_retire(struct dma_fence *fence, struct dma_fence_cb *cb)
 		active_retire(container_of(cb, struct active_node, base.cb)->ref);
 }
 
-static void
-excl_retire(struct dma_fence *fence, struct dma_fence_cb *cb)
-{
-	if (active_fence_cb(fence, cb))
-		active_retire(container_of(cb, struct i915_active, excl.cb));
-}
-
 static struct active_node *__active_lookup(struct i915_active *ref, u64 idx)
 {
 	struct active_node *it;
@@ -360,7 +352,6 @@ void __i915_active_init(struct i915_active *ref,
 	init_llist_head(&ref->preallocated_barriers);
 	atomic_set(&ref->count, 0);
 	__mutex_init(&ref->mutex, "i915_active", mkey);
-	__i915_active_fence_init(&ref->excl, NULL, excl_retire);
 	INIT_WORK(&ref->work, active_work);
 #if IS_ENABLED(CONFIG_LOCKDEP)
 	lockdep_init_map(&ref->work.lockdep_map, "i915_active.work", wkey, 0);
@@ -459,36 +450,6 @@ int i915_active_add_request(struct i915_active *ref, struct i915_request *rq)
 	return err;
 }
 
-static struct dma_fence *
-__i915_active_set_fence(struct i915_active *ref,
-			struct i915_active_fence *active,
-			struct dma_fence *fence)
-{
-	struct dma_fence *prev;
-
-	if (replace_barrier(ref, active)) {
-		RCU_INIT_POINTER(active->fence, fence);
-		return NULL;
-	}
-
-	rcu_read_lock();
-	prev = __i915_active_fence_set(active, fence);
-	if (prev)
-		prev = dma_fence_get_rcu(prev);
-	else
-		__i915_active_acquire(ref);
-	rcu_read_unlock();
-
-	return prev;
-}
-
-struct dma_fence *
-i915_active_set_exclusive(struct i915_active *ref, struct dma_fence *f)
-{
-	/* We expect the caller to manage the exclusive timeline ordering */
-	return __i915_active_set_fence(ref, &ref->excl, f);
-}
-
 bool i915_active_acquire_if_busy(struct i915_active *ref)
 {
 	debug_active_assert(ref);
@@ -589,7 +550,6 @@ static int flush_lazy_signals(struct i915_active *ref)
 	struct active_node *it, *n;
 	int err = 0;
 
-	enable_signaling(&ref->excl);
 	rbtree_postorder_for_each_entry_safe(it, n, &ref->tree, node) {
 		err = flush_barrier(it); /* unconnected idle barrier? */
 		if (err)
@@ -701,13 +661,6 @@ static int await_active(struct i915_active *ref,
 	if (!i915_active_acquire_if_busy(ref))
 		return 0;
 
-	if (flags & I915_ACTIVE_AWAIT_EXCL &&
-	    rcu_access_pointer(ref->excl.fence)) {
-		err = __await_active(&ref->excl, fn, arg);
-		if (err)
-			goto out;
-	}
-
 	if (flags & I915_ACTIVE_AWAIT_ACTIVE) {
 		struct active_node *it, *n;
 
diff --git a/drivers/gpu/drm/i915/i915_active.h b/drivers/gpu/drm/i915/i915_active.h
index 2ac875888872..c6ab5fe8da0c 100644
--- a/drivers/gpu/drm/i915/i915_active.h
+++ b/drivers/gpu/drm/i915/i915_active.h
@@ -166,9 +166,6 @@ void __i915_active_init(struct i915_active *ref,
 
 int i915_active_add_request(struct i915_active *ref, struct i915_request *rq);
 
-struct dma_fence *
-i915_active_set_exclusive(struct i915_active *ref, struct dma_fence *f);
-
 int __i915_active_wait(struct i915_active *ref, int state);
 static inline int i915_active_wait(struct i915_active *ref)
 {
@@ -181,7 +178,6 @@ int i915_sw_fence_await_active(struct i915_sw_fence *fence,
 int i915_request_await_active(struct i915_request *rq,
 			      struct i915_active *ref,
 			      unsigned int flags);
-#define I915_ACTIVE_AWAIT_EXCL BIT(0)
 #define I915_ACTIVE_AWAIT_ACTIVE BIT(1)
 #define I915_ACTIVE_AWAIT_BARRIER BIT(2)
 
@@ -217,19 +213,4 @@ struct i915_active *i915_active_create(void);
 struct i915_active *i915_active_get(struct i915_active *ref);
 void i915_active_put(struct i915_active *ref);
 
-static inline int __i915_request_await_exclusive(struct i915_request *rq,
-						 struct i915_active *active)
-{
-	struct dma_fence *fence;
-	int err = 0;
-
-	fence = i915_active_fence_get(&active->excl);
-	if (fence) {
-		err = i915_request_await_dma_fence(rq, fence);
-		dma_fence_put(fence);
-	}
-
-	return err;
-}
-
 #endif /* _I915_ACTIVE_H_ */
diff --git a/drivers/gpu/drm/i915/i915_active_types.h b/drivers/gpu/drm/i915/i915_active_types.h
index c149f348a972..c3109602cf23 100644
--- a/drivers/gpu/drm/i915/i915_active_types.h
+++ b/drivers/gpu/drm/i915/i915_active_types.h
@@ -32,9 +32,6 @@ struct i915_active {
 	struct active_node *cache;
 	struct rb_root tree;
 
-	/* Preallocated "exclusive" node */
-	struct i915_active_fence excl;
-
 	unsigned long flags;
 #define I915_ACTIVE_RETIRE_SLEEPS BIT(0)
 
diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
index f370c3581038..5350defac2a2 100644
--- a/drivers/gpu/drm/i915/i915_vma.c
+++ b/drivers/gpu/drm/i915/i915_vma.c
@@ -317,6 +317,8 @@ static void __vma_release(struct dma_fence_work *work)
 	}
 
 	i915_vm_free_pt_stash(vw->vm, &vw->stash);
+	if (work->vma)
+		__i915_vma_put(work->vma);
 	i915_vm_put(vw->vm);
 }
 
@@ -342,18 +344,14 @@ struct i915_vma_work *i915_vma_work(void)
 
 int i915_vma_wait_for_bind(struct i915_vma *vma)
 {
+	struct dma_fence *fence = i915_gem_object_get_moving_fence(vma->obj);
 	int err = 0;
 
-	if (rcu_access_pointer(vma->active.excl.fence)) {
-		struct dma_fence *fence;
+	assert_vma_held(vma);
 
-		rcu_read_lock();
-		fence = dma_fence_get_rcu_safe(&vma->active.excl.fence);
-		rcu_read_unlock();
-		if (fence) {
-			err = dma_fence_wait(fence, MAX_SCHEDULE_TIMEOUT);
-			dma_fence_put(fence);
-		}
+	if (fence) {
+		err = dma_fence_wait(fence, true);
+		dma_fence_put(fence);
 	}
 
 	return err;
@@ -403,9 +401,10 @@ int i915_vma_bind(struct i915_vma *vma,
 
 	trace_i915_vma_bind(vma, bind_flags);
 	if (work && bind_flags & vma->vm->bind_async_flags) {
+		struct drm_i915_gem_object *obj = vma->obj;
 		struct dma_fence *prev;
 
-		work->vma = vma;
+		work->vma = __i915_vma_get(vma);
 		work->cache_level = cache_level;
 		work->flags = bind_flags;
 
@@ -418,13 +417,14 @@ int i915_vma_bind(struct i915_vma *vma,
 		 * part of the obj->resv->excl_fence as it only affects
 		 * execution and not content or object's backing store lifetime.
 		 */
-		prev = i915_active_set_exclusive(&vma->active, &work->base.dma);
+		prev = i915_gem_object_get_moving_fence(obj);
 		if (prev) {
 			__i915_sw_fence_await_dma_fence(&work->base.chain,
 							prev,
 							&work->cb);
 			dma_fence_put(prev);
 		}
+		i915_gem_object_set_moving_fence(obj, &work->base.dma);
 
 		work->base.dma.error = 0; /* enable the queue_work() */
 
@@ -1512,7 +1512,15 @@ void i915_vma_revoke_mmap(struct i915_vma *vma)
 static int
 __i915_request_await_bind(struct i915_request *rq, struct i915_vma *vma)
 {
-	return __i915_request_await_exclusive(rq, &vma->active);
+	struct dma_fence *fence = i915_gem_object_get_moving_fence(vma->obj);
+	int ret;
+
+	if (!fence)
+		return 0;
+
+	ret = i915_request_await_dma_fence(rq, fence);
+	dma_fence_put(fence);
+	return ret;
 }
 
 static int __i915_vma_move_to_active(struct i915_vma *vma, struct i915_request *rq)
diff --git a/drivers/gpu/drm/i915/selftests/i915_active.c b/drivers/gpu/drm/i915/selftests/i915_active.c
index 61bf4560d8af..ddaf389b35e0 100644
--- a/drivers/gpu/drm/i915/selftests/i915_active.c
+++ b/drivers/gpu/drm/i915/selftests/i915_active.c
@@ -335,7 +335,6 @@ void i915_active_unlock_wait(struct i915_active *ref)
 
 		/* Wait for all active callbacks */
 		rcu_read_lock();
-		active_flush(ref, &ref->excl);
 		rbtree_postorder_for_each_entry_safe(it, n, &ref->tree, node)
 			active_flush(ref, &it->base);
 		rcu_read_unlock();
-- 
2.31.0



More information about the Intel-gfx-trybot mailing list