[Intel-gfx] [PATCH v3 3/3] drm/i915: fix HW lockup due to missing RPS IRQ workaround on GEN6

Imre Deak imre.deak at intel.com
Fri Dec 19 08:52:36 PST 2014


In

commit dbea3cea69508e9d548ed4a6be13de35492e5d15
Author: Imre Deak <imre.deak at intel.com>
Date:   Mon Dec 15 18:59:28 2014 +0200

    drm/i915: sanitize RPS resetting during GPU reset

we disable RPS interrupts during GPU resetting, but don't apply the
necessary GEN6 HW workaround. This leads to a HW lockup during a
subsequent "looping batchbuffer" workload. This is triggered by the
testcase that submits exactly this kind of workload after a simulated
GPU reset. I'm not sure how likely the bug would have triggered
otherwise, since we would have applied the workaround anyway shortly
after the GPU reset, when enabling GT powersaving from the deferred
work.

This may also fix unrelated issues, since during driver loading /
suspending we also disable RPS interrupts and so we also had a short
window during the rest of the loading / resuming where a similar
workload could run without the workaround applied.

v2:
- separate the fix to route RPS interrupts to the CPU on GEN9 too
  to a separate patch (Daniel)

Bisected-by: Ander Conselvan de Oliveira <ander.conselvan.de.oliveira at intel.com>
Testcase: igt/gem_reset_stats/ban-ctx-render
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=87429
Signed-off-by: Imre Deak <imre.deak at intel.com>
Reviewed-by: Daniel Vetter <daniel.vetter at ffwll.ch>
---
 drivers/gpu/drm/i915/i915_irq.c  | 20 ++++++++++++++++++--
 drivers/gpu/drm/i915/intel_drv.h |  1 +
 drivers/gpu/drm/i915/intel_pm.c  | 14 +-------------
 3 files changed, 20 insertions(+), 15 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index aa3180c..818ab4e 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -296,6 +296,23 @@ void gen6_enable_rps_interrupts(struct drm_device *dev)
 	spin_unlock_irq(&dev_priv->irq_lock);
 }
 
+u32 gen6_sanitize_rps_pm_mask(struct drm_i915_private *dev_priv, u32 mask)
+{
+	/*
+	 * SNB,IVB can while VLV,CHV may hard hang on looping batchbuffer
+	 * if GEN6_PM_UP_EI_EXPIRED is masked.
+	 *
+	 * TODO: verify if this can be reproduced on VLV,CHV.
+	 */
+	if (INTEL_INFO(dev_priv)->gen <= 7 && !IS_HASWELL(dev_priv))
+		mask &= ~GEN6_PM_RP_UP_EI_EXPIRED;
+
+	if (INTEL_INFO(dev_priv)->gen >= 8)
+		mask &= ~GEN8_PMINTR_REDIRECT_TO_NON_DISP;
+
+	return mask;
+}
+
 void gen6_disable_rps_interrupts(struct drm_device *dev)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
@@ -308,8 +325,7 @@ void gen6_disable_rps_interrupts(struct drm_device *dev)
 
 	spin_lock_irq(&dev_priv->irq_lock);
 
-	I915_WRITE(GEN6_PMINTRMSK, INTEL_INFO(dev_priv)->gen >= 8 ?
-		   ~GEN8_PMINTR_REDIRECT_TO_NON_DISP : ~0);
+	I915_WRITE(GEN6_PMINTRMSK, gen6_sanitize_rps_pm_mask(dev_priv, ~0));
 
 	__gen6_disable_pm_irq(dev_priv, dev_priv->pm_rps_events);
 	I915_WRITE(gen6_pm_ier(dev_priv), I915_READ(gen6_pm_ier(dev_priv)) &
diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
index 588b618..bb871f3 100644
--- a/drivers/gpu/drm/i915/intel_drv.h
+++ b/drivers/gpu/drm/i915/intel_drv.h
@@ -795,6 +795,7 @@ void gen6_disable_pm_irq(struct drm_i915_private *dev_priv, uint32_t mask);
 void gen6_reset_rps_interrupts(struct drm_device *dev);
 void gen6_enable_rps_interrupts(struct drm_device *dev);
 void gen6_disable_rps_interrupts(struct drm_device *dev);
+u32 gen6_sanitize_rps_pm_mask(struct drm_i915_private *dev_priv, u32 mask);
 void intel_runtime_pm_disable_interrupts(struct drm_i915_private *dev_priv);
 void intel_runtime_pm_enable_interrupts(struct drm_i915_private *dev_priv);
 static inline bool intel_irqs_enabled(struct drm_i915_private *dev_priv)
diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index ba4e667..7d99a9c 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -3745,19 +3745,7 @@ static u32 gen6_rps_pm_mask(struct drm_i915_private *dev_priv, u8 val)
 	mask |= dev_priv->pm_rps_events & (GEN6_PM_RP_DOWN_EI_EXPIRED | GEN6_PM_RP_UP_EI_EXPIRED);
 	mask &= dev_priv->pm_rps_events;
 
-	/*
-	 * SNB,IVB can while VLV,CHV may hard hang on looping batchbuffer
-	 * if GEN6_PM_UP_EI_EXPIRED is masked.
-	 *
-	 * TODO: verify if this can be reproduced on VLV,CHV.
-	 */
-	if (INTEL_INFO(dev_priv->dev)->gen <= 7 && !IS_HASWELL(dev_priv->dev))
-		mask |= GEN6_PM_RP_UP_EI_EXPIRED;
-
-	if (INTEL_INFO(dev_priv)->gen >= 8)
-		mask |= GEN8_PMINTR_REDIRECT_TO_NON_DISP;
-
-	return ~mask;
+	return gen6_sanitize_rps_pm_mask(dev_priv, ~mask);
 }
 
 /* gen6_set_rps is called to update the frequency request, but should also be
-- 
1.8.4



More information about the Intel-gfx mailing list