[Intel-gfx] [CI] drm/i915: Hold irq-off for the entire fake lock period

Fri Aug 23 08:13:40 UTC 2019

Sadly lockdep records when the irqs are re-enabled and then marks up the
fake lock as being irq-unsafe. Our hand is forced and so we must mark up
the entire fake lock critical section as irq-off.

Hopefully this is the last tweak required!

Fixes: d67739268cf0 ("drm/i915/gt: Mark up the nested engine-pm timeline lock as irqsafe")
Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
Cc: Mika Kuoppala <mika.kuoppala at linux.intel.com>
Reviewed-by: Mika Kuoppala <mika.kuoppala at linux.intel.com>
---
 drivers/gpu/drm/i915/gt/intel_engine_pm.c | 41 +++++++++++++++++------
 1 file changed, 30 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.c b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
index a372d4ea9370..7eec8670ff27 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_pm.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
@@ -39,35 +39,53 @@ static int __engine_unpark(struct intel_wakeref *wf)
 
 #if IS_ENABLED(CONFIG_LOCKDEP)
 
-static inline void __timeline_mark_lock(struct intel_context *ce)
+static inline unsigned long __timeline_mark_lock(struct intel_context *ce)
 {
 	unsigned long flags;
 
 	local_irq_save(flags);
 	mutex_acquire(&ce->timeline->mutex.dep_map, 2, 0, _THIS_IP_);
-	local_irq_restore(flags);
+
+	return flags;
 }
 
-static inline void __timeline_mark_unlock(struct intel_context *ce)
+static inline void __timeline_mark_unlock(struct intel_context *ce,
+					  unsigned long flags)
 {
 	mutex_release(&ce->timeline->mutex.dep_map, 0, _THIS_IP_);
+	local_irq_restore(flags);
 }
 
 #else
 
-static inline void __timeline_mark_lock(struct intel_context *ce)
+static inline unsigned long __timeline_mark_lock(struct intel_context *ce)
 {
+	return 0;
 }
 
-static inline void __timeline_mark_unlock(struct intel_context *ce)
+static inline void __timeline_mark_unlock(struct intel_context *ce,
+					  unsigned long flags)
 {
 }
 
 #endif /* !IS_ENABLED(CONFIG_LOCKDEP) */
 
+static void __timeline_mark_active(struct intel_timeline *tl)
+{
+	struct intel_gt_timelines *timelines = &tl->gt->timelines;
+
+	GEM_BUG_ON(!list_empty(&timelines->active_list));
+	GEM_BUG_ON(tl->active_count);
+
+	list_add(&tl->link, &timelines->active_list);
+	tl->active_count = 1;
+}
+
 static bool switch_to_kernel_context(struct intel_engine_cs *engine)
 {
 	struct i915_request *rq;
+	unsigned long flags;
+	bool result = true;
 
 	/* Already inside the kernel context, safe to power down. */
 	if (engine->wakeref_serial == engine->serial)
@@ -89,14 +107,14 @@ static bool switch_to_kernel_context(struct intel_engine_cs *engine)
 	 * retiring the last request, thus all rings should be empty and
 	 * all timelines idle.
 	 */
-	__timeline_mark_lock(engine->kernel_context);
+	flags = __timeline_mark_lock(engine->kernel_context);
 
 	rq = __i915_request_create(engine->kernel_context, GFP_NOWAIT);
 	if (IS_ERR(rq))
 		/* Context switch failed, hope for the best! Maybe reset? */
-		return true;
+		goto out_unlock;
 
-	intel_timeline_enter(rq->timeline);
+	__timeline_mark_active(rq->timeline);
 
 	/* Check again on the next retirement. */
 	engine->wakeref_serial = engine->serial + 1;
@@ -110,9 +128,10 @@ static bool switch_to_kernel_context(struct intel_engine_cs *engine)
 	__intel_wakeref_defer_park(&engine->wakeref);
 	__i915_request_queue(rq, NULL);
 
-	__timeline_mark_unlock(engine->kernel_context);
-
-	return false;
+	result = false;
+out_unlock:
+	__timeline_mark_unlock(engine->kernel_context, flags);
+	return result;
 }
 
 static int __engine_park(struct intel_wakeref *wf)
-- 
2.23.0