[Intel-xe] [PATCH] drm/xe: Fix an invalid locking wait context bug
Rodrigo Vivi
rodrigo.vivi at intel.com
Tue Jul 25 20:17:58 UTC 2023
We cannot have spin locks around xe_irq_reset, since it will
call the intel_display_power_is_enabled() function, and
that needs a mutex lock. Hence causing the undesired
"[ BUG: Invalid wait context ]"
We cannot convert i915's power domain lock to spin lock
due to the nested dependency of non-atomic context waits.
So, let's move the xe_irq_reset functions from the
critical area, while still ensuring that we are protecting
the irq.enabled and ensuring the right serialization
in the irq handlers.
v2: On the first version, I had missed the fact that
irq.enabled is checked on the xe/display glue layer,
and that i915 display code is actually using the irq
spin lock properly. So, this got changed to a version
suggested by Matthew Auld, along with introducing
the lockdep_assert_held at the display glue code.
Suggested-by: Matthew Auld <matthew.auld at intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi at intel.com>
---
drivers/gpu/drm/xe/display/ext/i915_irq.c | 1 +
drivers/gpu/drm/xe/xe_irq.c | 32 ++++++++++++++++++-----
2 files changed, 27 insertions(+), 6 deletions(-)
diff --git a/drivers/gpu/drm/xe/display/ext/i915_irq.c b/drivers/gpu/drm/xe/display/ext/i915_irq.c
index fbb0e99143f6..ac435f7f854b 100644
--- a/drivers/gpu/drm/xe/display/ext/i915_irq.c
+++ b/drivers/gpu/drm/xe/display/ext/i915_irq.c
@@ -107,6 +107,7 @@ void intel_display_irq_uninstall(struct drm_i915_private *dev_priv)
bool intel_irqs_enabled(struct xe_device *xe)
{
+ lockdep_assert_held(&xe->irq.lock);
return xe->irq.enabled;
}
diff --git a/drivers/gpu/drm/xe/xe_irq.c b/drivers/gpu/drm/xe/xe_irq.c
index eae190cb0969..d6b001f6a3c5 100644
--- a/drivers/gpu/drm/xe/xe_irq.c
+++ b/drivers/gpu/drm/xe/xe_irq.c
@@ -309,6 +309,13 @@ static irqreturn_t xelp_irq_handler(int irq, void *arg)
unsigned long intr_dw[2];
u32 identity[32];
+ spin_lock_irq(&xe->irq.lock);
+ if (!xe->irq.enabled) {
+ spin_unlock_irq(&xe->irq.lock);
+ return IRQ_NONE;
+ }
+ spin_unlock_irq(&xe->irq.lock);
+
master_ctl = xelp_intr_disable(xe);
if (!master_ctl) {
xelp_intr_enable(xe, false);
@@ -371,6 +378,13 @@ static irqreturn_t dg1_irq_handler(int irq, void *arg)
/* TODO: This really shouldn't be copied+pasted */
+ spin_lock_irq(&xe->irq.lock);
+ if (!xe->irq.enabled) {
+ spin_unlock_irq(&xe->irq.lock);
+ return IRQ_NONE;
+ }
+ spin_unlock_irq(&xe->irq.lock);
+
master_tile_ctl = dg1_intr_disable(xe);
if (!master_tile_ctl) {
dg1_intr_enable(xe, false);
@@ -574,10 +588,14 @@ void xe_irq_shutdown(struct xe_device *xe)
void xe_irq_suspend(struct xe_device *xe)
{
+ int irq = to_pci_dev(xe->drm.dev)->irq;
+
spin_lock_irq(&xe->irq.lock);
- xe->irq.enabled = false;
- xe_irq_reset(xe);
+ xe->irq.enabled = false; /* no new irqs */
spin_unlock_irq(&xe->irq.lock);
+
+ synchronize_irq(irq); /* flush irqs */
+ xe_irq_reset(xe); /* turn irqs off */
}
void xe_irq_resume(struct xe_device *xe)
@@ -585,13 +603,15 @@ void xe_irq_resume(struct xe_device *xe)
struct xe_gt *gt;
int id;
- spin_lock_irq(&xe->irq.lock);
+ /*
+ * lock not needed:
+ * 1. no irq will arrive before the postinstall
+ * 2. display is not yet resumed
+ */
xe->irq.enabled = true;
xe_irq_reset(xe);
- xe_irq_postinstall(xe);
+ xe_irq_postinstall(xe); /* turn irqs on */
for_each_gt(gt, xe, id)
xe_irq_enable_hwe(gt);
-
- spin_unlock_irq(&xe->irq.lock);
}
--
2.41.0
More information about the Intel-xe
mailing list