[PATCH 1/2] drm/i915/active: Serialize access to barrier tasks lists

Janusz Krzysztofik janusz.krzysztofik at linux.intel.com
Fri Feb 3 17:01:12 UTC 2023


Barriers are now deleted from a barrier tasks list by temporarily removing
the list content, traversing that content with skip over the node to be
deleted, then adding the modified content back to the list.  Since that
complex operation is not serialized with other concurrent uses of the
list, functions that depend on the list being either empty or not empty
can take wrong decisions.

Protect access to the list pending that complex barrier delete operation
with a lock, and teach other users of the list to acquire the lock.  While
this solution may be considered suboptimal, it seems to be the the
smallest, least invasive fix.

References: https://gitlab.freedesktop.org/drm/intel/-/issues/6333
Signed-off-by: Janusz Krzysztofik <janusz.krzysztofik at linux.intel.com>
---
 drivers/gpu/drm/i915/gt/intel_engine_cs.c           | 8 ++++++++
 drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c    | 6 +++++-
 drivers/gpu/drm/i915/gt/intel_engine_pm.c           | 6 +++++-
 drivers/gpu/drm/i915/gt/intel_engine_types.h        | 1 +
 drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c | 5 +++++
 drivers/gpu/drm/i915/i915_active.c                  | 7 +++++++
 6 files changed, 31 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
index d4e29da74612d..b6fb07c188d16 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
@@ -1148,6 +1148,7 @@ static int engine_setup_common(struct intel_engine_cs *engine)
 	int err;
 
 	init_llist_head(&engine->barrier_tasks);
+	spin_lock_init(&engine->barriers_lock);
 
 	err = init_status_page(engine);
 	if (err)
@@ -1404,6 +1405,8 @@ int intel_engines_init(struct intel_gt *gt)
  */
 void intel_engine_cleanup_common(struct intel_engine_cs *engine)
 {
+	unsigned long flags;
+
 	GEM_BUG_ON(!list_empty(&engine->sched_engine->requests));
 
 	i915_sched_engine_put(engine->sched_engine);
@@ -1418,7 +1421,9 @@ void intel_engine_cleanup_common(struct intel_engine_cs *engine)
 	if (engine->kernel_context)
 		intel_engine_destroy_pinned_context(engine->kernel_context);
 
+	spin_lock_irqsave(&engine->barriers_lock, flags);
 	GEM_BUG_ON(!llist_empty(&engine->barrier_tasks));
+	spin_unlock_irqrestore(&engine->barriers_lock, flags);
 	cleanup_status_page(engine);
 
 	intel_wa_list_free(&engine->ctx_wa_list);
@@ -2240,6 +2245,7 @@ void intel_engine_dump(struct intel_engine_cs *engine,
 	struct i915_gpu_error * const error = &engine->i915->gpu_error;
 	struct i915_request *rq;
 	intel_wakeref_t wakeref;
+	unsigned long flags;
 	ktime_t dummy;
 
 	if (header) {
@@ -2254,8 +2260,10 @@ void intel_engine_dump(struct intel_engine_cs *engine,
 		drm_printf(m, "*** WEDGED ***\n");
 
 	drm_printf(m, "\tAwake? %d\n", atomic_read(&engine->wakeref.count));
+	spin_lock_irqsave(&engine->barriers_lock, flags);
 	drm_printf(m, "\tBarriers?: %s\n",
 		   str_yes_no(!llist_empty(&engine->barrier_tasks)));
+	spin_unlock_irqrestore(&engine->barriers_lock, flags);
 	drm_printf(m, "\tLatency: %luus\n",
 		   ewma__engine_latency_read(&engine->latency));
 	if (intel_engine_supports_stats(engine))
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c
index 9a527e1f5be65..9c3c8c431f8d8 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c
@@ -378,9 +378,13 @@ int intel_engine_flush_barriers(struct intel_engine_cs *engine)
 	struct i915_sched_attr attr = { .priority = I915_PRIORITY_MIN };
 	struct intel_context *ce = engine->kernel_context;
 	struct i915_request *rq;
+	unsigned long flags;
 	int err;
 
-	if (llist_empty(&engine->barrier_tasks))
+	spin_lock_irqsave(&engine->barriers_lock, flags);
+	err = !llist_empty(&engine->barrier_tasks);
+	spin_unlock_irqrestore(&engine->barriers_lock, flags);
+	if (!err)
 		return 0;
 
 	if (!intel_engine_pm_get_if_awake(engine))
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.c b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
index e971b153fda97..708f8907f410b 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_pm.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
@@ -235,8 +235,12 @@ static bool switch_to_kernel_context(struct intel_engine_cs *engine)
 static void call_idle_barriers(struct intel_engine_cs *engine)
 {
 	struct llist_node *node, *next;
+	unsigned long flags;
 
-	llist_for_each_safe(node, next, llist_del_all(&engine->barrier_tasks)) {
+	spin_lock_irqsave(&engine->barriers_lock, flags);
+	node = llist_del_all(&engine->barrier_tasks);
+	spin_unlock_irqrestore(&engine->barriers_lock, flags);
+	llist_for_each_safe(node, next, node) {
 		struct dma_fence_cb *cb =
 			container_of((struct list_head *)node,
 				     typeof(*cb), node);
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h
index 4fd54fb8810fb..ab9e0a6de70d4 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
@@ -396,6 +396,7 @@ struct intel_engine_cs {
 	struct intel_context *hung_ce;
 
 	struct llist_head barrier_tasks;
+	spinlock_t barriers_lock;
 
 	struct intel_context *kernel_context; /* pinned */
 
diff --git a/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c b/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c
index 273d440a53e3f..cbc03662fc693 100644
--- a/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c
+++ b/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c
@@ -90,6 +90,7 @@ static void pulse_unlock_wait(struct pulse *p)
 static int __live_idle_pulse(struct intel_engine_cs *engine,
 			     int (*fn)(struct intel_engine_cs *cs))
 {
+	unsigned long flags;
 	struct pulse *p;
 	int err;
 
@@ -113,13 +114,17 @@ static int __live_idle_pulse(struct intel_engine_cs *engine,
 	i915_active_release(&p->active);
 
 	GEM_BUG_ON(i915_active_is_idle(&p->active));
+	spin_lock_irqsave(&engine->barriers_lock, flags);
 	GEM_BUG_ON(llist_empty(&engine->barrier_tasks));
+	spin_unlock_irqrestore(&engine->barriers_lock, flags);
 
 	err = fn(engine);
 	if (err)
 		goto out;
 
+	spin_lock_irqsave(&engine->barriers_lock, flags);
 	GEM_BUG_ON(!llist_empty(&engine->barrier_tasks));
+	spin_unlock_irqrestore(&engine->barriers_lock, flags);
 
 	if (engine_sync_barrier(engine)) {
 		struct drm_printer m = drm_err_printer("pulse");
diff --git a/drivers/gpu/drm/i915/i915_active.c b/drivers/gpu/drm/i915/i915_active.c
index 7412abf166a8c..320840eea30ce 100644
--- a/drivers/gpu/drm/i915/i915_active.c
+++ b/drivers/gpu/drm/i915/i915_active.c
@@ -370,6 +370,7 @@ static bool ____active_del_barrier(struct i915_active *ref,
 {
 	struct llist_node *head = NULL, *tail = NULL;
 	struct llist_node *pos, *next;
+	unsigned long flags;
 
 	GEM_BUG_ON(node->timeline != engine->kernel_context->timeline->fence_context);
 
@@ -388,6 +389,7 @@ static bool ____active_del_barrier(struct i915_active *ref,
 	 * we are actively using the barrier, we know that there will be
 	 * at least another opportunity when we idle.
 	 */
+	spin_lock_irqsave(&engine->barriers_lock, flags);
 	llist_for_each_safe(pos, next, llist_del_all(&engine->barrier_tasks)) {
 		if (node == barrier_from_ll(pos)) {
 			node = NULL;
@@ -401,6 +403,7 @@ static bool ____active_del_barrier(struct i915_active *ref,
 	}
 	if (head)
 		llist_add_batch(head, tail, &engine->barrier_tasks);
+	spin_unlock_irqrestore(&engine->barriers_lock, flags);
 
 	return !node;
 }
@@ -973,7 +976,9 @@ void i915_active_acquire_barrier(struct i915_active *ref)
 		spin_unlock_irqrestore(&ref->tree_lock, flags);
 
 		GEM_BUG_ON(!intel_engine_pm_is_awake(engine));
+		spin_lock_irqsave(&engine->barriers_lock, flags);
 		llist_add(barrier_to_ll(node), &engine->barrier_tasks);
+		spin_unlock_irqrestore(&engine->barriers_lock, flags);
 		intel_engine_pm_put_delay(engine, 2);
 	}
 }
@@ -993,7 +998,9 @@ void i915_request_add_active_barriers(struct i915_request *rq)
 	GEM_BUG_ON(intel_engine_is_virtual(engine));
 	GEM_BUG_ON(i915_request_timeline(rq) != engine->kernel_context->timeline);
 
+	spin_lock_irqsave(&engine->barriers_lock, flags);
 	node = llist_del_all(&engine->barrier_tasks);
+	spin_unlock_irqrestore(&engine->barriers_lock, flags);
 	if (!node)
 		return;
 	/*
-- 
2.25.1



More information about the Intel-gfx-trybot mailing list