[Intel-xe] [PATCH] drm/xe: Fix an invalid locking wait context bug

Rodrigo Vivi rodrigo.vivi at intel.com
Tue Jul 25 20:17:58 UTC 2023


We cannot have spin locks around xe_irq_reset, since it will
call the intel_display_power_is_enabled() function, and
that needs a mutex lock. Hence causing the undesired
"[ BUG: Invalid wait context ]"

We cannot convert i915's power domain lock to spin lock
due to the nested dependency of non-atomic context waits.

So, let's move the xe_irq_reset functions from the
critical area, while still ensuring that we are protecting
the irq.enabled and ensuring the right serialization
in the irq handlers.

v2: On the first version, I had missed the fact that
irq.enabled is checked on the xe/display glue layer,
and that i915 display code is actually using the irq
spin lock properly. So, this got changed to a version
suggested by Matthew Auld, along with introducing
the lockdep_assert_held at the display glue code.

Suggested-by: Matthew Auld <matthew.auld at intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi at intel.com>
---
 drivers/gpu/drm/xe/display/ext/i915_irq.c |  1 +
 drivers/gpu/drm/xe/xe_irq.c               | 32 ++++++++++++++++++-----
 2 files changed, 27 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/xe/display/ext/i915_irq.c b/drivers/gpu/drm/xe/display/ext/i915_irq.c
index fbb0e99143f6..ac435f7f854b 100644
--- a/drivers/gpu/drm/xe/display/ext/i915_irq.c
+++ b/drivers/gpu/drm/xe/display/ext/i915_irq.c
@@ -107,6 +107,7 @@ void intel_display_irq_uninstall(struct drm_i915_private *dev_priv)
 
 bool intel_irqs_enabled(struct xe_device *xe)
 {
+	lockdep_assert_held(&xe->irq.lock);
 	return xe->irq.enabled;
 }
 
diff --git a/drivers/gpu/drm/xe/xe_irq.c b/drivers/gpu/drm/xe/xe_irq.c
index eae190cb0969..d6b001f6a3c5 100644
--- a/drivers/gpu/drm/xe/xe_irq.c
+++ b/drivers/gpu/drm/xe/xe_irq.c
@@ -309,6 +309,13 @@ static irqreturn_t xelp_irq_handler(int irq, void *arg)
 	unsigned long intr_dw[2];
 	u32 identity[32];
 
+	spin_lock_irq(&xe->irq.lock);
+	if (!xe->irq.enabled) {
+		spin_unlock_irq(&xe->irq.lock);
+		return IRQ_NONE;
+	}
+	spin_unlock_irq(&xe->irq.lock);
+
 	master_ctl = xelp_intr_disable(xe);
 	if (!master_ctl) {
 		xelp_intr_enable(xe, false);
@@ -371,6 +378,13 @@ static irqreturn_t dg1_irq_handler(int irq, void *arg)
 
 	/* TODO: This really shouldn't be copied+pasted */
 
+	spin_lock_irq(&xe->irq.lock);
+	if (!xe->irq.enabled) {
+		spin_unlock_irq(&xe->irq.lock);
+		return IRQ_NONE;
+	}
+	spin_unlock_irq(&xe->irq.lock);
+
 	master_tile_ctl = dg1_intr_disable(xe);
 	if (!master_tile_ctl) {
 		dg1_intr_enable(xe, false);
@@ -574,10 +588,14 @@ void xe_irq_shutdown(struct xe_device *xe)
 
 void xe_irq_suspend(struct xe_device *xe)
 {
+	int irq = to_pci_dev(xe->drm.dev)->irq;
+
 	spin_lock_irq(&xe->irq.lock);
-	xe->irq.enabled = false;
-	xe_irq_reset(xe);
+	xe->irq.enabled = false; /* no new irqs */
 	spin_unlock_irq(&xe->irq.lock);
+
+	synchronize_irq(irq); /* flush irqs */
+	xe_irq_reset(xe); /* turn irqs off */
 }
 
 void xe_irq_resume(struct xe_device *xe)
@@ -585,13 +603,15 @@ void xe_irq_resume(struct xe_device *xe)
 	struct xe_gt *gt;
 	int id;
 
-	spin_lock_irq(&xe->irq.lock);
+	/*
+	 * lock not needed:
+	 * 1. no irq will arrive before the postinstall
+	 * 2. display is not yet resumed
+	 */
 	xe->irq.enabled = true;
 	xe_irq_reset(xe);
-	xe_irq_postinstall(xe);
+	xe_irq_postinstall(xe); /* turn irqs on */
 
 	for_each_gt(gt, xe, id)
 		xe_irq_enable_hwe(gt);
-
-	spin_unlock_irq(&xe->irq.lock);
 }
-- 
2.41.0



More information about the Intel-xe mailing list