[PATCH 2/2] drm/i915/active: Serialize access to barrier lists

Janusz Krzysztofik janusz.krzysztofik at linux.intel.com
Tue Jan 24 12:15:51 UTC 2023


Barriers are now deleted from a barrier list by temporarily removing the
list content, traversing that content with skip over the node to be
deleted, and adding the modified content back to the list.  Since that
complex operation is not serialized with other concurrent uses of the
list, functions that depend on the list being either empty or not empty
can take wrong decisions.

Protect access to the list pending that complex barrier delete operation
with a lock and teach other users of the list to acquire the lock.

Signed-off-by: Janusz Krzysztofik <janusz.krzysztofik at linux.intel.com>
---
 drivers/gpu/drm/i915/gt/intel_engine_cs.c           | 7 +++++++
 drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c    | 6 +++++-
 drivers/gpu/drm/i915/gt/intel_engine_pm.c           | 6 +++++-
 drivers/gpu/drm/i915/gt/intel_engine_types.h        | 1 +
 drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c | 5 +++++
 drivers/gpu/drm/i915/i915_active.c                  | 7 +++++++
 6 files changed, 30 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
index 922f1bb22dc68..58e5bb55abf2d 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
@@ -1148,6 +1148,7 @@ static int engine_setup_common(struct intel_engine_cs *engine)
 	int err;
 
 	init_llist_head(&engine->barrier_tasks);
+	spin_lock_init(&engine->barriers_lock);
 
 	err = init_status_page(engine);
 	if (err)
@@ -1404,6 +1405,8 @@ int intel_engines_init(struct intel_gt *gt)
  */
 void intel_engine_cleanup_common(struct intel_engine_cs *engine)
 {
+	unsigned long flags;
+
 	GEM_BUG_ON(!list_empty(&engine->sched_engine->requests));
 
 	i915_sched_engine_put(engine->sched_engine);
@@ -1418,7 +1421,9 @@ void intel_engine_cleanup_common(struct intel_engine_cs *engine)
 	if (engine->kernel_context)
 		intel_engine_destroy_pinned_context(engine->kernel_context);
 
+	spin_lock_irqsave(&engine->barriers_lock, flags);
 	GEM_BUG_ON(!llist_empty(&engine->barrier_tasks));
+	spin_unlock_irqrestore(&engine->barriers_lock, flags);
 	cleanup_status_page(engine);
 
 	intel_wa_list_free(&engine->ctx_wa_list);
@@ -2274,8 +2279,10 @@ void intel_engine_dump(struct intel_engine_cs *engine,
 		drm_printf(m, "*** WEDGED ***\n");
 
 	drm_printf(m, "\tAwake? %d\n", atomic_read(&engine->wakeref.count));
+	spin_lock_irqsave(&engine->barriers_lock, flags);
 	drm_printf(m, "\tBarriers?: %s\n",
 		   str_yes_no(!llist_empty(&engine->barrier_tasks)));
+	spin_unlock_irqrestore(&engine->barriers_lock, flags);
 	drm_printf(m, "\tLatency: %luus\n",
 		   ewma__engine_latency_read(&engine->latency));
 	if (intel_engine_supports_stats(engine))
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c
index 9a527e1f5be65..9c3c8c431f8d8 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c
@@ -378,9 +378,13 @@ int intel_engine_flush_barriers(struct intel_engine_cs *engine)
 	struct i915_sched_attr attr = { .priority = I915_PRIORITY_MIN };
 	struct intel_context *ce = engine->kernel_context;
 	struct i915_request *rq;
+	unsigned long flags;
 	int err;
 
-	if (llist_empty(&engine->barrier_tasks))
+	spin_lock_irqsave(&engine->barriers_lock, flags);
+	err = !llist_empty(&engine->barrier_tasks);
+	spin_unlock_irqrestore(&engine->barriers_lock, flags);
+	if (!err)
 		return 0;
 
 	if (!intel_engine_pm_get_if_awake(engine))
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.c b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
index e971b153fda97..708f8907f410b 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_pm.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
@@ -235,8 +235,12 @@ static bool switch_to_kernel_context(struct intel_engine_cs *engine)
 static void call_idle_barriers(struct intel_engine_cs *engine)
 {
 	struct llist_node *node, *next;
+	unsigned long flags;
 
-	llist_for_each_safe(node, next, llist_del_all(&engine->barrier_tasks)) {
+	spin_lock_irqsave(&engine->barriers_lock, flags);
+	node = llist_del_all(&engine->barrier_tasks);
+	spin_unlock_irqrestore(&engine->barriers_lock, flags);
+	llist_for_each_safe(node, next, node) {
 		struct dma_fence_cb *cb =
 			container_of((struct list_head *)node,
 				     typeof(*cb), node);
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h
index 4fd54fb8810fb..ab9e0a6de70d4 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
@@ -396,6 +396,7 @@ struct intel_engine_cs {
 	struct intel_context *hung_ce;
 
 	struct llist_head barrier_tasks;
+	spinlock_t barriers_lock;
 
 	struct intel_context *kernel_context; /* pinned */
 
diff --git a/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c b/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c
index 273d440a53e3f..051c172763e71 100644
--- a/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c
+++ b/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c
@@ -90,6 +90,7 @@ static void pulse_unlock_wait(struct pulse *p)
 static int __live_idle_pulse(struct intel_engine_cs *engine,
 			     int (*fn)(struct intel_engine_cs *cs))
 {
+	unsigned long flags;
 	struct pulse *p;
 	int err;
 
@@ -113,13 +114,17 @@ static int __live_idle_pulse(struct intel_engine_cs *engine,
 	i915_active_release(&p->active);
 
 	GEM_BUG_ON(i915_active_is_idle(&p->active));
+	spin_lock_irqsave(&engine->barriers_lock);
 	GEM_BUG_ON(llist_empty(&engine->barrier_tasks));
+	spin_unlock_irqrestore(&engine->barriers_lock);
 
 	err = fn(engine);
 	if (err)
 		goto out;
 
+	spin_lock_irqsave(&engine->barriers_lock);
 	GEM_BUG_ON(!llist_empty(&engine->barrier_tasks));
+	spin_unlock_irqrestore(&engine->barriers_lock);
 
 	if (engine_sync_barrier(engine)) {
 		struct drm_printer m = drm_err_printer("pulse");
diff --git a/drivers/gpu/drm/i915/i915_active.c b/drivers/gpu/drm/i915/i915_active.c
index 21159b7c9c244..ea562f6983314 100644
--- a/drivers/gpu/drm/i915/i915_active.c
+++ b/drivers/gpu/drm/i915/i915_active.c
@@ -370,6 +370,7 @@ static bool ____active_del_barrier(struct i915_active *ref,
 {
 	struct llist_node *head = NULL, *tail = NULL;
 	struct llist_node *pos, *next;
+	unsigned long flags;
 
 	GEM_BUG_ON(node->timeline != engine->kernel_context->timeline->fence_context);
 
@@ -388,6 +389,7 @@ static bool ____active_del_barrier(struct i915_active *ref,
 	 * we are actively using the barrier, we know that there will be
 	 * at least another opportunity when we idle.
 	 */
+	spin_lock_irqsave(&engine->barriers_lock, flags);
 	llist_for_each_safe(pos, next, llist_del_all(&engine->barrier_tasks)) {
 		if (node == barrier_from_ll(pos)) {
 			node = NULL;
@@ -401,6 +403,7 @@ static bool ____active_del_barrier(struct i915_active *ref,
 	}
 	if (head)
 		llist_add_batch(head, tail, &engine->barrier_tasks);
+	spin_unlock_irqrestore(&engine->barriers_lock, flags);
 
 	return !node;
 }
@@ -974,7 +977,9 @@ void i915_active_acquire_barrier(struct i915_active *ref)
 		spin_unlock_irqrestore(&ref->tree_lock, flags);
 
 		GEM_BUG_ON(!intel_engine_pm_is_awake(engine));
+		spin_lock_irqsave(&engine->barriers_lock, flags);
 		llist_add(barrier_to_ll(node), &engine->barrier_tasks);
+		spin_unlock_irqrestore(&engine->barriers_lock, flags);
 		intel_engine_pm_put_delay(engine, 2);
 	}
 }
@@ -994,7 +999,9 @@ void i915_request_add_active_barriers(struct i915_request *rq)
 	GEM_BUG_ON(intel_engine_is_virtual(engine));
 	GEM_BUG_ON(i915_request_timeline(rq) != engine->kernel_context->timeline);
 
+	spin_lock_irqsave(&engine->barriers_lock, flags);
 	node = llist_del_all(&engine->barrier_tasks);
+	spin_unlock_irqrestore(&engine->barriers_lock, flags);
 	if (!node)
 		return;
 	/*
-- 
2.25.1



More information about the Intel-gfx-trybot mailing list