[Intel-gfx] [PATCH] drm/i915: fix HW lockup due to missing RPS IRQ workaround on GEN6

Mika Kuoppala mika.kuoppala at linux.intel.com
Fri Dec 19 04:30:27 PST 2014


Daniel Vetter <daniel at ffwll.ch> writes:

> On Fri, Dec 19, 2014 at 11:53:21AM +0200, Imre Deak wrote:
>> In
>> 
>> commit dbea3cea69508e9d548ed4a6be13de35492e5d15
>> Author: Imre Deak <imre.deak at intel.com>
>> Date:   Mon Dec 15 18:59:28 2014 +0200
>> 
>>     drm/i915: sanitize RPS resetting during GPU reset
>> 
>> we disable RPS interrupts during GPU resetting, but don't apply the
>> necessary GEN6 HW workaround. This leads to a HW lockup during a
>> subsequent "looping batchbuffer" workload. This is triggered by the
>> testcase that submits exactly this kind of workload after a simulated
>> GPU reset. I'm not sure how likely the bug would have triggered
>> otherwise, since we would have applied the workaround anyway shortly
>> after the GPU reset, when enabling GT powersaving from the deferred
>> work.
>> 
>> This may also fix unrelated issues, since during driver loading /
>> suspending we also disable RPS interrupts and so we also had a short
>> window during the rest of the loading / resuming where a similar
>> workload could run without the workaround applied.
>
> Hm this might explain why the reset tests randomly hard-crashed snb every
> once in a while. So symptoms matches. Mika, can you please give this a
> spin?

307 loops of gem_reset_stats/ban-ctx-render on snb without issues.

-Mika

>> At the same time also keep the IRQ routing to display enabled at all
>> times and everywhere on GEN8+, since there isn't ever a reason to route
>> them to GT. For instance this way we could catch spurious interrupts on
>> the CPU instead of allowing some undefined behaviour on the GT (due to
>> the same spurious interrupt routed there).
>
> I don't really grok that part, but it does feel like a separate patch
> (especially since you also change from IS_GEN8 to gen >= 8). Or maybe I'm
> just really confused.
> -Daniel
>
>> 
>> Testcase: igt/gem_reset_stats/ban-ctx-render
>> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=87429
>> Signed-off-by: Imre Deak <imre.deak at intel.com>
>> ---
>>  drivers/gpu/drm/i915/i915_irq.c  | 17 +++++++++++++++--
>>  drivers/gpu/drm/i915/intel_drv.h |  1 +
>>  drivers/gpu/drm/i915/intel_pm.c  | 11 +----------
>>  3 files changed, 17 insertions(+), 12 deletions(-)
>> 
>> diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
>> index aa3180c..12acc40 100644
>> --- a/drivers/gpu/drm/i915/i915_irq.c
>> +++ b/drivers/gpu/drm/i915/i915_irq.c
>> @@ -296,6 +296,20 @@ void gen6_enable_rps_interrupts(struct drm_device *dev)
>>  	spin_unlock_irq(&dev_priv->irq_lock);
>>  }
>>  
>> +u32 gen6_sanitize_rps_pm_mask(struct drm_i915_private *dev_priv, u32 mask)
>> +{
>> +	/* IVB and SNB hard hangs on looping batchbuffer
>> +	 * if GEN6_PM_UP_EI_EXPIRED is masked.
>> +	 */
>> +	if (INTEL_INFO(dev_priv->dev)->gen <= 7 && !IS_HASWELL(dev_priv->dev))
>> +		mask &= ~GEN6_PM_RP_UP_EI_EXPIRED;
>> +
>> +	if (INTEL_INFO(dev_priv->dev)->gen >= 8)
>> +		mask &= ~GEN8_PMINTR_REDIRECT_TO_NON_DISP;
>> +
>> +	return mask;
>> +}
>> +
>>  void gen6_disable_rps_interrupts(struct drm_device *dev)
>>  {
>>  	struct drm_i915_private *dev_priv = dev->dev_private;
>> @@ -308,8 +322,7 @@ void gen6_disable_rps_interrupts(struct drm_device *dev)
>>  
>>  	spin_lock_irq(&dev_priv->irq_lock);
>>  
>> -	I915_WRITE(GEN6_PMINTRMSK, INTEL_INFO(dev_priv)->gen >= 8 ?
>> -		   ~GEN8_PMINTR_REDIRECT_TO_NON_DISP : ~0);
>> +	I915_WRITE(GEN6_PMINTRMSK, gen6_sanitize_rps_pm_mask(dev_priv, ~0));
>>  
>>  	__gen6_disable_pm_irq(dev_priv, dev_priv->pm_rps_events);
>>  	I915_WRITE(gen6_pm_ier(dev_priv), I915_READ(gen6_pm_ier(dev_priv)) &
>> diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
>> index 588b618..bb871f3 100644
>> --- a/drivers/gpu/drm/i915/intel_drv.h
>> +++ b/drivers/gpu/drm/i915/intel_drv.h
>> @@ -795,6 +795,7 @@ void gen6_disable_pm_irq(struct drm_i915_private *dev_priv, uint32_t mask);
>>  void gen6_reset_rps_interrupts(struct drm_device *dev);
>>  void gen6_enable_rps_interrupts(struct drm_device *dev);
>>  void gen6_disable_rps_interrupts(struct drm_device *dev);
>> +u32 gen6_sanitize_rps_pm_mask(struct drm_i915_private *dev_priv, u32 mask);
>>  void intel_runtime_pm_disable_interrupts(struct drm_i915_private *dev_priv);
>>  void intel_runtime_pm_enable_interrupts(struct drm_i915_private *dev_priv);
>>  static inline bool intel_irqs_enabled(struct drm_i915_private *dev_priv)
>> diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
>> index a3ebaa8..4bd1b8b 100644
>> --- a/drivers/gpu/drm/i915/intel_pm.c
>> +++ b/drivers/gpu/drm/i915/intel_pm.c
>> @@ -3745,16 +3745,7 @@ static u32 gen6_rps_pm_mask(struct drm_i915_private *dev_priv, u8 val)
>>  	mask |= dev_priv->pm_rps_events & (GEN6_PM_RP_DOWN_EI_EXPIRED | GEN6_PM_RP_UP_EI_EXPIRED);
>>  	mask &= dev_priv->pm_rps_events;
>>  
>> -	/* IVB and SNB hard hangs on looping batchbuffer
>> -	 * if GEN6_PM_UP_EI_EXPIRED is masked.
>> -	 */
>> -	if (INTEL_INFO(dev_priv->dev)->gen <= 7 && !IS_HASWELL(dev_priv->dev))
>> -		mask |= GEN6_PM_RP_UP_EI_EXPIRED;
>> -
>> -	if (IS_GEN8(dev_priv->dev))
>> -		mask |= GEN8_PMINTR_REDIRECT_TO_NON_DISP;
>> -
>> -	return ~mask;
>> +	return gen6_sanitize_rps_pm_mask(dev_priv, ~mask);
>>  }
>>  
>>  /* gen6_set_rps is called to update the frequency request, but should also be
>> -- 
>> 1.8.4
>> 
>> _______________________________________________
>> Intel-gfx mailing list
>> Intel-gfx at lists.freedesktop.org
>> http://lists.freedesktop.org/mailman/listinfo/intel-gfx
>
> -- 
> Daniel Vetter
> Software Engineer, Intel Corporation
> +41 (0) 79 365 57 48 - http://blog.ffwll.ch


More information about the Intel-gfx mailing list