[PATCH 3/3] drm/i915/active: Optimize barrier tasks lists locking
Janusz Krzysztofik
janusz.krzysztofik at linux.intel.com
Fri Feb 10 08:06:29 UTC 2023
Drop locking where not needed or not critical. In other cases, add inline
comments explaining why we take the lock.
Also, fix missing spin_lock_init(&mock_engine->base.barriers_lock).
Intended to be squashed with "drm/i915/active: Serialize access to barrier
tasks lists" once positively verified on trybot.
Signed-off-by: Janusz Krzysztofik <janusz.krzysztofik at linux.intel.com>
---
drivers/gpu/drm/i915/gt/intel_engine_cs.c | 6 ++----
.../gpu/drm/i915/gt/intel_engine_heartbeat.c | 4 ++++
drivers/gpu/drm/i915/gt/intel_engine_pm.c | 10 +++++-----
drivers/gpu/drm/i915/gt/mock_engine.c | 2 ++
drivers/gpu/drm/i915/i915_active.c | 17 ++++++++++++++---
5 files changed, 27 insertions(+), 12 deletions(-)
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
index b6fb07c188d16..f82f6f7b89e7b 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
@@ -1405,8 +1405,6 @@ int intel_engines_init(struct intel_gt *gt)
*/
void intel_engine_cleanup_common(struct intel_engine_cs *engine)
{
- unsigned long flags;
-
GEM_BUG_ON(!list_empty(&engine->sched_engine->requests));
i915_sched_engine_put(engine->sched_engine);
@@ -1421,9 +1419,8 @@ void intel_engine_cleanup_common(struct intel_engine_cs *engine)
if (engine->kernel_context)
intel_engine_destroy_pinned_context(engine->kernel_context);
- spin_lock_irqsave(&engine->barriers_lock, flags);
+ /* No concurrent tasks expected on driver remove, no need to lock */
GEM_BUG_ON(!llist_empty(&engine->barrier_tasks));
- spin_unlock_irqrestore(&engine->barriers_lock, flags);
cleanup_status_page(engine);
intel_wa_list_free(&engine->ctx_wa_list);
@@ -2260,6 +2257,7 @@ void intel_engine_dump(struct intel_engine_cs *engine,
drm_printf(m, "*** WEDGED ***\n");
drm_printf(m, "\tAwake? %d\n", atomic_read(&engine->wakeref.count));
+ /* Serialize against ____active_del_barrier() for debugging accuracy */
spin_lock_irqsave(&engine->barriers_lock, flags);
drm_printf(m, "\tBarriers?: %s\n",
str_yes_no(!llist_empty(&engine->barrier_tasks)));
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c
index 9c3c8c431f8d8..ae1d4ffff2139 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c
@@ -381,6 +381,10 @@ int intel_engine_flush_barriers(struct intel_engine_cs *engine)
unsigned long flags;
int err;
+ /*
+ * Serialize against ____active_del_barrier()
+ * or we risk the barriers not flushed.
+ */
spin_lock_irqsave(&engine->barriers_lock, flags);
err = !llist_empty(&engine->barrier_tasks);
spin_unlock_irqrestore(&engine->barriers_lock, flags);
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.c b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
index 708f8907f410b..209fb3f2bb786 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_pm.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
@@ -235,12 +235,12 @@ static bool switch_to_kernel_context(struct intel_engine_cs *engine)
static void call_idle_barriers(struct intel_engine_cs *engine)
{
struct llist_node *node, *next;
- unsigned long flags;
- spin_lock_irqsave(&engine->barriers_lock, flags);
- node = llist_del_all(&engine->barrier_tasks);
- spin_unlock_irqrestore(&engine->barriers_lock, flags);
- llist_for_each_safe(node, next, node) {
+ /*
+ * Locking around llist_del_all() not needed as long as we always
+ * call ____active_del_barrier() with engine's wakeref acquired.
+ */
+ llist_for_each_safe(node, next, llist_del_all(&engine->barrier_tasks)) {
struct dma_fence_cb *cb =
container_of((struct list_head *)node,
typeof(*cb), node);
diff --git a/drivers/gpu/drm/i915/gt/mock_engine.c b/drivers/gpu/drm/i915/gt/mock_engine.c
index c0637bf799a33..383a27b64634f 100644
--- a/drivers/gpu/drm/i915/gt/mock_engine.c
+++ b/drivers/gpu/drm/i915/gt/mock_engine.c
@@ -394,6 +394,8 @@ int mock_engine_init(struct intel_engine_cs *engine)
{
struct intel_context *ce;
+ spin_lock_init(&engine->barriers_lock);
+
INIT_LIST_HEAD(&engine->pinned_contexts_list);
engine->sched_engine = i915_sched_engine_create(ENGINE_MOCK);
diff --git a/drivers/gpu/drm/i915/i915_active.c b/drivers/gpu/drm/i915/i915_active.c
index b4c36ba198524..9346f772a1871 100644
--- a/drivers/gpu/drm/i915/i915_active.c
+++ b/drivers/gpu/drm/i915/i915_active.c
@@ -372,6 +372,8 @@ static bool ____active_del_barrier(struct i915_active *ref,
struct llist_node *pos, *next;
unsigned long flags;
+ /* Must be serialized via pm wakeref with call_idle_barriers() */
+ GEM_BUG_ON(!intel_engine_pm_is_awake(engine));
GEM_BUG_ON(node->timeline != engine->kernel_context->timeline->fence_context);
/*
@@ -884,6 +886,9 @@ int i915_active_acquire_preallocate_barrier(struct i915_active *ref,
struct llist_node *prev = first;
struct active_node *node;
+ /* Serialize with call_idle_barriers() via pm wakeref */
+ intel_engine_pm_get(engine);
+
rcu_read_lock();
node = reuse_idle_barrier(ref, idx);
rcu_read_unlock();
@@ -919,7 +924,6 @@ int i915_active_acquire_preallocate_barrier(struct i915_active *ref,
first->next = prev;
if (!last)
last = first;
- intel_engine_pm_get(engine);
}
GEM_BUG_ON(!llist_empty(&ref->preallocated_barriers));
@@ -979,9 +983,12 @@ void i915_active_acquire_barrier(struct i915_active *ref)
spin_unlock_irqrestore(&ref->tree_lock, flags);
GEM_BUG_ON(!intel_engine_pm_is_awake(engine));
- spin_lock_irqsave(&engine->barriers_lock, flags);
+ /*
+ * As long as we serialize processing of barrier_tasks inside
+ * ____active_del_barrier(), it should be safe to add a new node
+ * without locking, even while the llist is temporarily emptied.
+ */
llist_add(barrier_to_ll(node), &engine->barrier_tasks);
- spin_unlock_irqrestore(&engine->barriers_lock, flags);
intel_engine_pm_put_delay(engine, 2);
}
}
@@ -1001,6 +1008,10 @@ void i915_request_add_active_barriers(struct i915_request *rq)
GEM_BUG_ON(intel_engine_is_virtual(engine));
GEM_BUG_ON(i915_request_timeline(rq) != engine->kernel_context->timeline);
+ /*
+ * Serialize against ____active_del_barrier() or we risk
+ * the barriers left intact, not replaced with the request.
+ */
spin_lock_irqsave(&engine->barriers_lock, flags);
node = llist_del_all(&engine->barrier_tasks);
spin_unlock_irqrestore(&engine->barriers_lock, flags);
--
2.25.1
More information about the Intel-gfx-trybot
mailing list