[Intel-gfx] [PATCH v2 2/2] drm/i915: fix HW lockup due to missing RPS IRQ workaround on GEN6

Daniel Vetter daniel at ffwll.ch
Fri Dec 19 05:41:11 PST 2014


On Fri, Dec 19, 2014 at 02:51:57PM +0200, Imre Deak wrote:
> In
> 
> commit dbea3cea69508e9d548ed4a6be13de35492e5d15
> Author: Imre Deak <imre.deak at intel.com>
> Date:   Mon Dec 15 18:59:28 2014 +0200
> 
>     drm/i915: sanitize RPS resetting during GPU reset
> 
> we disable RPS interrupts during GPU resetting, but don't apply the
> necessary GEN6 HW workaround. This leads to a HW lockup during a
> subsequent "looping batchbuffer" workload. This is triggered by the
> testcase that submits exactly this kind of workload after a simulated
> GPU reset. I'm not sure how likely the bug would have triggered
> otherwise, since we would have applied the workaround anyway shortly
> after the GPU reset, when enabling GT powersaving from the deferred
> work.
> 
> This may also fix unrelated issues, since during driver loading /
> suspending we also disable RPS interrupts and so we also had a short
> window during the rest of the loading / resuming where a similar
> workload could run without the workaround applied.
> 
> v2:
> - separate the fix to route RPS interrupts to the CPU on GEN9 too
>   to a separate patch (Daniel)
> 
> Bisected-by: Ander Conselvan de Oliveira <ander.conselvan.de.oliveira at intel.com>
> Testcase: igt/gem_reset_stats/ban-ctx-render
> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=87429
> Signed-off-by: Imre Deak <imre.deak at intel.com>
> ---
>  drivers/gpu/drm/i915/i915_irq.c  | 18 ++++++++++++++++--
>  drivers/gpu/drm/i915/intel_drv.h |  1 +
>  drivers/gpu/drm/i915/intel_pm.c  | 11 +----------
>  3 files changed, 18 insertions(+), 12 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
> index aa3180c..f853f26 100644
> --- a/drivers/gpu/drm/i915/i915_irq.c
> +++ b/drivers/gpu/drm/i915/i915_irq.c
> @@ -296,6 +296,21 @@ void gen6_enable_rps_interrupts(struct drm_device *dev)
>  	spin_unlock_irq(&dev_priv->irq_lock);
>  }
>  
> +u32 gen6_sanitize_rps_pm_mask(struct drm_i915_private *dev_priv, u32 mask)
> +{
> +	/*
> +	 * IVB and SNB hard hangs on looping batchbuffer
> +	 * if GEN6_PM_UP_EI_EXPIRED is masked.
> +	 */
> +	if (INTEL_INFO(dev_priv)->gen <= 7 && !IS_HASWELL(dev_priv))

Hm, the comment says snb&ivb, but the code includes byt. We also have an
unprocted write to PMINTRMSK still in vlv_set_rps_idle. For consistency I
think we should either switch to explicitly check for only snb/ivb here or
adapt the mask in vlv_set_rps_idle too. I think the later is preferrable
since that's the only vlv function that doesn't sanitize the rps
interrupts with this now. We could later on blow through some cycles on
vlv to figure out whether it's afflicted from this bug or not.
-Daniel

> +		mask &= ~GEN6_PM_RP_UP_EI_EXPIRED;
> +
> +	if (INTEL_INFO(dev_priv)->gen >= 8)
> +		mask &= ~GEN8_PMINTR_REDIRECT_TO_NON_DISP;
> +
> +	return mask;
> +}
> +
>  void gen6_disable_rps_interrupts(struct drm_device *dev)
>  {
>  	struct drm_i915_private *dev_priv = dev->dev_private;
> @@ -308,8 +323,7 @@ void gen6_disable_rps_interrupts(struct drm_device *dev)
>  
>  	spin_lock_irq(&dev_priv->irq_lock);
>  
> -	I915_WRITE(GEN6_PMINTRMSK, INTEL_INFO(dev_priv)->gen >= 8 ?
> -		   ~GEN8_PMINTR_REDIRECT_TO_NON_DISP : ~0);
> +	I915_WRITE(GEN6_PMINTRMSK, gen6_sanitize_rps_pm_mask(dev_priv, ~0));
>  
>  	__gen6_disable_pm_irq(dev_priv, dev_priv->pm_rps_events);
>  	I915_WRITE(gen6_pm_ier(dev_priv), I915_READ(gen6_pm_ier(dev_priv)) &
> diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
> index 588b618..bb871f3 100644
> --- a/drivers/gpu/drm/i915/intel_drv.h
> +++ b/drivers/gpu/drm/i915/intel_drv.h
> @@ -795,6 +795,7 @@ void gen6_disable_pm_irq(struct drm_i915_private *dev_priv, uint32_t mask);
>  void gen6_reset_rps_interrupts(struct drm_device *dev);
>  void gen6_enable_rps_interrupts(struct drm_device *dev);
>  void gen6_disable_rps_interrupts(struct drm_device *dev);
> +u32 gen6_sanitize_rps_pm_mask(struct drm_i915_private *dev_priv, u32 mask);
>  void intel_runtime_pm_disable_interrupts(struct drm_i915_private *dev_priv);
>  void intel_runtime_pm_enable_interrupts(struct drm_i915_private *dev_priv);
>  static inline bool intel_irqs_enabled(struct drm_i915_private *dev_priv)
> diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
> index f1f06d7..4bd1b8b 100644
> --- a/drivers/gpu/drm/i915/intel_pm.c
> +++ b/drivers/gpu/drm/i915/intel_pm.c
> @@ -3745,16 +3745,7 @@ static u32 gen6_rps_pm_mask(struct drm_i915_private *dev_priv, u8 val)
>  	mask |= dev_priv->pm_rps_events & (GEN6_PM_RP_DOWN_EI_EXPIRED | GEN6_PM_RP_UP_EI_EXPIRED);
>  	mask &= dev_priv->pm_rps_events;
>  
> -	/* IVB and SNB hard hangs on looping batchbuffer
> -	 * if GEN6_PM_UP_EI_EXPIRED is masked.
> -	 */
> -	if (INTEL_INFO(dev_priv->dev)->gen <= 7 && !IS_HASWELL(dev_priv->dev))
> -		mask |= GEN6_PM_RP_UP_EI_EXPIRED;
> -
> -	if (INTEL_INFO(dev_priv)->gen >= 8)
> -		mask |= GEN8_PMINTR_REDIRECT_TO_NON_DISP;
> -
> -	return ~mask;
> +	return gen6_sanitize_rps_pm_mask(dev_priv, ~mask);
>  }
>  
>  /* gen6_set_rps is called to update the frequency request, but should also be
> -- 
> 1.8.4
> 
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx

-- 
Daniel Vetter
Software Engineer, Intel Corporation
+41 (0) 79 365 57 48 - http://blog.ffwll.ch


More information about the Intel-gfx mailing list