[PATCH 3/3] drm/i915/active: Optimize barrier tasks lists locking

Janusz Krzysztofik janusz.krzysztofik at linux.intel.com
Fri Feb 10 08:06:29 UTC 2023


Drop locking where not needed or not critical.  In other cases, add inline
comments explaining why we take the lock.

Also, fix missing spin_lock_init(&mock_engine->base.barriers_lock).

Intended to be squashed with "drm/i915/active: Serialize access to barrier
tasks lists" once positively verified on trybot.

Signed-off-by: Janusz Krzysztofik <janusz.krzysztofik at linux.intel.com>
---
 drivers/gpu/drm/i915/gt/intel_engine_cs.c       |  6 ++----
 .../gpu/drm/i915/gt/intel_engine_heartbeat.c    |  4 ++++
 drivers/gpu/drm/i915/gt/intel_engine_pm.c       | 10 +++++-----
 drivers/gpu/drm/i915/gt/mock_engine.c           |  2 ++
 drivers/gpu/drm/i915/i915_active.c              | 17 ++++++++++++++---
 5 files changed, 27 insertions(+), 12 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
index b6fb07c188d16..f82f6f7b89e7b 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
@@ -1405,8 +1405,6 @@ int intel_engines_init(struct intel_gt *gt)
  */
 void intel_engine_cleanup_common(struct intel_engine_cs *engine)
 {
-	unsigned long flags;
-
 	GEM_BUG_ON(!list_empty(&engine->sched_engine->requests));
 
 	i915_sched_engine_put(engine->sched_engine);
@@ -1421,9 +1419,8 @@ void intel_engine_cleanup_common(struct intel_engine_cs *engine)
 	if (engine->kernel_context)
 		intel_engine_destroy_pinned_context(engine->kernel_context);
 
-	spin_lock_irqsave(&engine->barriers_lock, flags);
+	/* No concurrent tasks expected on driver remove, no need to lock */
 	GEM_BUG_ON(!llist_empty(&engine->barrier_tasks));
-	spin_unlock_irqrestore(&engine->barriers_lock, flags);
 	cleanup_status_page(engine);
 
 	intel_wa_list_free(&engine->ctx_wa_list);
@@ -2260,6 +2257,7 @@ void intel_engine_dump(struct intel_engine_cs *engine,
 		drm_printf(m, "*** WEDGED ***\n");
 
 	drm_printf(m, "\tAwake? %d\n", atomic_read(&engine->wakeref.count));
+	/* Serialize against ____active_del_barrier() for debugging accuracy */
 	spin_lock_irqsave(&engine->barriers_lock, flags);
 	drm_printf(m, "\tBarriers?: %s\n",
 		   str_yes_no(!llist_empty(&engine->barrier_tasks)));
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c
index 9c3c8c431f8d8..ae1d4ffff2139 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c
@@ -381,6 +381,10 @@ int intel_engine_flush_barriers(struct intel_engine_cs *engine)
 	unsigned long flags;
 	int err;
 
+	/*
+	 * Serialize against ____active_del_barrier()
+	 * or we risk the barriers not flushed.
+	 */
 	spin_lock_irqsave(&engine->barriers_lock, flags);
 	err = !llist_empty(&engine->barrier_tasks);
 	spin_unlock_irqrestore(&engine->barriers_lock, flags);
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.c b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
index 708f8907f410b..209fb3f2bb786 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_pm.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
@@ -235,12 +235,12 @@ static bool switch_to_kernel_context(struct intel_engine_cs *engine)
 static void call_idle_barriers(struct intel_engine_cs *engine)
 {
 	struct llist_node *node, *next;
-	unsigned long flags;
 
-	spin_lock_irqsave(&engine->barriers_lock, flags);
-	node = llist_del_all(&engine->barrier_tasks);
-	spin_unlock_irqrestore(&engine->barriers_lock, flags);
-	llist_for_each_safe(node, next, node) {
+	/*
+	 * Locking around llist_del_all() not needed as long as we always
+	 * call ____active_del_barrier() with engine's wakeref acquired.
+	 */
+	llist_for_each_safe(node, next, llist_del_all(&engine->barrier_tasks)) {
 		struct dma_fence_cb *cb =
 			container_of((struct list_head *)node,
 				     typeof(*cb), node);
diff --git a/drivers/gpu/drm/i915/gt/mock_engine.c b/drivers/gpu/drm/i915/gt/mock_engine.c
index c0637bf799a33..383a27b64634f 100644
--- a/drivers/gpu/drm/i915/gt/mock_engine.c
+++ b/drivers/gpu/drm/i915/gt/mock_engine.c
@@ -394,6 +394,8 @@ int mock_engine_init(struct intel_engine_cs *engine)
 {
 	struct intel_context *ce;
 
+	spin_lock_init(&engine->barriers_lock);
+
 	INIT_LIST_HEAD(&engine->pinned_contexts_list);
 
 	engine->sched_engine = i915_sched_engine_create(ENGINE_MOCK);
diff --git a/drivers/gpu/drm/i915/i915_active.c b/drivers/gpu/drm/i915/i915_active.c
index b4c36ba198524..9346f772a1871 100644
--- a/drivers/gpu/drm/i915/i915_active.c
+++ b/drivers/gpu/drm/i915/i915_active.c
@@ -372,6 +372,8 @@ static bool ____active_del_barrier(struct i915_active *ref,
 	struct llist_node *pos, *next;
 	unsigned long flags;
 
+	/* Must be serialized via pm wakeref with call_idle_barriers() */
+	GEM_BUG_ON(!intel_engine_pm_is_awake(engine));
 	GEM_BUG_ON(node->timeline != engine->kernel_context->timeline->fence_context);
 
 	/*
@@ -884,6 +886,9 @@ int i915_active_acquire_preallocate_barrier(struct i915_active *ref,
 		struct llist_node *prev = first;
 		struct active_node *node;
 
+		/* Serialize with call_idle_barriers() via pm wakeref */
+		intel_engine_pm_get(engine);
+
 		rcu_read_lock();
 		node = reuse_idle_barrier(ref, idx);
 		rcu_read_unlock();
@@ -919,7 +924,6 @@ int i915_active_acquire_preallocate_barrier(struct i915_active *ref,
 		first->next = prev;
 		if (!last)
 			last = first;
-		intel_engine_pm_get(engine);
 	}
 
 	GEM_BUG_ON(!llist_empty(&ref->preallocated_barriers));
@@ -979,9 +983,12 @@ void i915_active_acquire_barrier(struct i915_active *ref)
 		spin_unlock_irqrestore(&ref->tree_lock, flags);
 
 		GEM_BUG_ON(!intel_engine_pm_is_awake(engine));
-		spin_lock_irqsave(&engine->barriers_lock, flags);
+		/*
+		 * As long as we serialize processing of barrier_tasks inside
+		 * ____active_del_barrier(), it should be safe to add a new node
+		 * without locking, even while the llist is temporarily emptied.
+		 */
 		llist_add(barrier_to_ll(node), &engine->barrier_tasks);
-		spin_unlock_irqrestore(&engine->barriers_lock, flags);
 		intel_engine_pm_put_delay(engine, 2);
 	}
 }
@@ -1001,6 +1008,10 @@ void i915_request_add_active_barriers(struct i915_request *rq)
 	GEM_BUG_ON(intel_engine_is_virtual(engine));
 	GEM_BUG_ON(i915_request_timeline(rq) != engine->kernel_context->timeline);
 
+	/*
+	 * Serialize against ____active_del_barrier() or we risk
+	 * the barriers left intact, not replaced with the request.
+	 */
 	spin_lock_irqsave(&engine->barriers_lock, flags);
 	node = llist_del_all(&engine->barrier_tasks);
 	spin_unlock_irqrestore(&engine->barriers_lock, flags);
-- 
2.25.1



More information about the Intel-gfx-trybot mailing list