[PATCH 3/3] drm/i915/active: Preallocate idle barriers more carefully

Janusz Krzysztofik janusz.krzysztofik at linux.intel.com
Tue Feb 14 20:29:17 UTC 2023


When we collect barriers for preallocating them, we reuse either idle or
non-idle ones, whichever we find.  In case of non-idle barriers, we
depend on their successful deletion from their barrier tasks lists as an
indication of them not being used concurrently by other threads.  However,
in case of idle barriers, we neither perform any similar checks nor take
any preventive countermeasures, then we risk unexpected races with other
threads, e.g. those trying to select an idle barrier for using it as a
fence tracker.  We may then end up overwriting their fence pointer and/or
corrupting their list node pointers, or they can hurt us in a similar way.

Whenever an idle barrier is selected for preallocation, set it up
immediately as non-idle -- mark it with our ERR_PTR(-EAGAIN) barrier
marker, assign a pointer to the related engine to it, and increase its
composite tracker use count.  Serialize that operation with potential
concurrent updates of active fence pointer, and skip the node if it has
been already claimed by another thread.

While being at it, ensure that preallocated_barriers list is still empty
when we populate it with preallocated barriers.

Signed-off-by: Janusz Krzysztofik <janusz.krzysztofik at linux.intel.com>
---
 drivers/gpu/drm/i915/i915_active.c | 49 +++++++++++++++---------------
 1 file changed, 24 insertions(+), 25 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_active.c b/drivers/gpu/drm/i915/i915_active.c
index 69ffdd8de82bb..8c91dcc59568d 100644
--- a/drivers/gpu/drm/i915/i915_active.c
+++ b/drivers/gpu/drm/i915/i915_active.c
@@ -784,8 +784,10 @@ static inline bool is_idle_barrier(struct active_node *node, u64 idx)
 	return node->timeline == idx && !i915_active_fence_isset(&node->base);
 }
 
-static struct active_node *reuse_idle_barrier(struct i915_active *ref, u64 idx)
+static struct active_node *reuse_idle_barrier(struct i915_active *ref,
+					      struct intel_engine_cs *engine)
 {
+	u64 idx = engine->kernel_context->timeline->fence_context;
 	struct rb_node *prev, *p;
 
 	if (RB_EMPTY_ROOT(&ref->tree))
@@ -800,7 +802,10 @@ static struct active_node *reuse_idle_barrier(struct i915_active *ref, u64 idx)
 	 * completely idle barriers (less hassle in manipulating the llists),
 	 * but otherwise any will do.
 	 */
-	if (ref->cache && is_idle_barrier(ref->cache, idx)) {
+	if (ref->cache && is_idle_barrier(ref->cache, idx) &&
+	    !cmpxchg(__active_fence_slot(&ref->cache->base), NULL, ERR_PTR(-EAGAIN))) {
+		ref->cache->base.cb.node.prev = (void *)engine;
+		__i915_active_acquire(ref);
 		p = &ref->cache->node;
 		goto match;
 	}
@@ -811,8 +816,12 @@ static struct active_node *reuse_idle_barrier(struct i915_active *ref, u64 idx)
 		struct active_node *node =
 			rb_entry(p, struct active_node, node);
 
-		if (is_idle_barrier(node, idx))
+		if (is_idle_barrier(node, idx) &&
+		    !cmpxchg(__active_fence_slot(&node->base), NULL, ERR_PTR(-EAGAIN))) {
+			node->base.cb.node.prev = (void *)engine;
+			__i915_active_acquire(ref);
 			goto match;
+		}
 
 		prev = p;
 		if (node->timeline < idx)
@@ -838,8 +847,12 @@ static struct active_node *reuse_idle_barrier(struct i915_active *ref, u64 idx)
 		if (node->timeline < idx)
 			continue;
 
-		if (is_idle_barrier(node, idx))
+		if (is_idle_barrier(node, idx) &&
+		    !cmpxchg(__active_fence_slot(&node->base), NULL, ERR_PTR(-EAGAIN))) {
+			node->base.cb.node.prev = (void *)engine;
+			__i915_active_acquire(ref);
 			goto match;
+		}
 
 		/*
 		 * The list of pending barriers is protected by the
@@ -848,7 +861,6 @@ static struct active_node *reuse_idle_barrier(struct i915_active *ref, u64 idx)
 		 * the barrier before we claim it, so we have to check
 		 * for success.
 		 */
-		engine = __barrier_to_engine(node);
 		smp_rmb(); /* serialise with add_active_barriers */
 		if (is_barrier(&node->base) &&
 		    ____active_del_barrier(ref, node, engine))
@@ -888,7 +900,6 @@ int i915_active_acquire_preallocate_barrier(struct i915_active *ref,
 	 */
 	GEM_BUG_ON(!mask);
 	for_each_engine_masked(engine, gt, mask, tmp) {
-		u64 idx = engine->kernel_context->timeline->fence_context;
 		struct llist_node *prev = first;
 		struct active_node *node;
 
@@ -896,36 +907,24 @@ int i915_active_acquire_preallocate_barrier(struct i915_active *ref,
 		intel_engine_pm_get(engine);
 
 		rcu_read_lock();
-		node = reuse_idle_barrier(ref, idx);
+		node = reuse_idle_barrier(ref, engine);
 		rcu_read_unlock();
 		if (!node) {
 			node = kmem_cache_alloc(slab_cache, GFP_KERNEL);
 			if (!node)
 				goto unwind;
 
-			RCU_INIT_POINTER(node->base.fence, NULL);
+			RCU_INIT_POINTER(node->base.fence, ERR_PTR(-EAGAIN));
 			node->base.cb.func = node_retire;
-			node->timeline = idx;
+			node->timeline = engine->kernel_context->timeline->fence_context;
 			node->ref = ref;
-		}
-
-		if (!i915_active_fence_isset(&node->base)) {
-			/*
-			 * Mark this as being *our* unconnected proto-node.
-			 *
-			 * Since this node is not in any list, and we have
-			 * decoupled it from the rbtree, we can reuse the
-			 * request to indicate this is an idle-barrier node
-			 * and then we can use the rb_node and list pointers
-			 * for our tracking of the pending barrier.
-			 */
-			RCU_INIT_POINTER(node->base.fence, ERR_PTR(-EAGAIN));
 			node->base.cb.node.prev = (void *)engine;
 			__i915_active_acquire(ref);
+		} else {
+			GEM_BUG_ON(rcu_access_pointer(node->base.fence) != ERR_PTR(-EAGAIN));
+			GEM_BUG_ON(engine != __barrier_to_engine(node));
 		}
-		GEM_BUG_ON(rcu_access_pointer(node->base.fence) != ERR_PTR(-EAGAIN));
 
-		GEM_BUG_ON(barrier_to_engine(node) != engine);
 		first = barrier_to_ll(node);
 		first->next = prev;
 		if (!last)
@@ -933,7 +932,7 @@ int i915_active_acquire_preallocate_barrier(struct i915_active *ref,
 	}
 
 	GEM_BUG_ON(!llist_empty(&ref->preallocated_barriers));
-	llist_add_batch(first, last, &ref->preallocated_barriers);
+	GEM_BUG_ON(!llist_add_batch(first, last, &ref->preallocated_barriers));
 
 	return 0;
 
-- 
2.25.1



More information about the Intel-gfx-trybot mailing list