[Intel-gfx] [PATCH v2 3/4] drm/i915: Make workaround verification *optional*

Tvrtko Ursulin tvrtko.ursulin at linux.intel.com
Tue Apr 16 09:48:05 UTC 2019


On 16/04/2019 09:10, Chris Wilson wrote:
> Sometimes the HW doesn't even play fair, and completely forgets about
> register writes. Skip verifying known troublemakers.
> 
> References: https://bugs.freedesktop.org/show_bug.cgi?id=108954
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> Cc: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
> ---
>   drivers/gpu/drm/i915/intel_workarounds.c      | 40 ++++++++++++++-----
>   .../gpu/drm/i915/intel_workarounds_types.h    |  7 ++--
>   2 files changed, 33 insertions(+), 14 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/intel_workarounds.c b/drivers/gpu/drm/i915/intel_workarounds.c
> index db99f2e676bb..ba58be05f58c 100644
> --- a/drivers/gpu/drm/i915/intel_workarounds.c
> +++ b/drivers/gpu/drm/i915/intel_workarounds.c
> @@ -122,6 +122,7 @@ static void _wa_add(struct i915_wa_list *wal, const struct i915_wa *wa)
>   			wal->wa_count++;
>   			wa_->val |= wa->val;
>   			wa_->mask |= wa->mask;
> +			wa_->read |= wa->read;
>   			return;
>   		}
>   	}
> @@ -146,9 +147,10 @@ wa_write_masked_or(struct i915_wa_list *wal, i915_reg_t reg, u32 mask,
>   		   u32 val)
>   {
>   	struct i915_wa wa = {
> -		.reg = reg,
> +		.reg  = reg,
>   		.mask = mask,
> -		.val = val
> +		.val  = val,
> +		.read = mask,
>   	};
>   
>   	_wa_add(wal, &wa);
> @@ -172,6 +174,19 @@ wa_write_or(struct i915_wa_list *wal, i915_reg_t reg, u32 val)
>   	wa_write_masked_or(wal, reg, val, val);
>   }
>   
> +static void
> +ignore_wa_write_or(struct i915_wa_list *wal, i915_reg_t reg, u32 mask, u32 val)
> +{
> +	struct i915_wa wa = {
> +		.reg  = reg,
> +		.mask = mask,
> +		.val  = val,
> +		/* Bonkers HW, skip verifying */
> +	};
> +
> +	_wa_add(wal, &wa);
> +}
> +
>   #define WA_SET_BIT_MASKED(addr, mask) \
>   	wa_write_masked_or(wal, (addr), (mask), _MASKED_BIT_ENABLE(mask))
>   
> @@ -916,10 +931,11 @@ wal_get_fw_for_rmw(struct intel_uncore *uncore, const struct i915_wa_list *wal)
>   static bool
>   wa_verify(const struct i915_wa *wa, u32 cur, const char *name, const char *from)
>   {
> -	if ((cur ^ wa->val) & wa->mask) {
> +	if ((cur ^ wa->val) & wa->read) {
>   		DRM_ERROR("%s workaround lost on %s! (%x=%x/%x, expected %x, mask=%x)\n",
> -			  name, from, i915_mmio_reg_offset(wa->reg), cur,
> -			  cur & wa->mask, wa->val, wa->mask);
> +			  name, from, i915_mmio_reg_offset(wa->reg),
> +			  cur, cur & wa->read,
> +			  wa->val, wa->mask);
>   
>   		return false;
>   	}
> @@ -1122,9 +1138,10 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
>   			     _3D_CHICKEN3_AA_LINE_QUALITY_FIX_ENABLE);
>   
>   		/* WaPipelineFlushCoherentLines:icl */
> -		wa_write_or(wal,
> -			    GEN8_L3SQCREG4,
> -			    GEN8_LQSC_FLUSH_COHERENT_LINES);
> +		ignore_wa_write_or(wal,
> +				   GEN8_L3SQCREG4,
> +				   GEN8_LQSC_FLUSH_COHERENT_LINES,
> +				   GEN8_LQSC_FLUSH_COHERENT_LINES);
>   
>   		/*
>   		 * Wa_1405543622:icl
> @@ -1151,9 +1168,10 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
>   		 * Wa_1405733216:icl
>   		 * Formerly known as WaDisableCleanEvicts
>   		 */
> -		wa_write_or(wal,
> -			    GEN8_L3SQCREG4,
> -			    GEN11_LQSC_CLEAN_EVICT_DISABLE);
> +		ignore_wa_write_or(wal,
> +				   GEN8_L3SQCREG4,
> +				   GEN11_LQSC_CLEAN_EVICT_DISABLE,
> +				   GEN11_LQSC_CLEAN_EVICT_DISABLE);
>   
>   		/* WaForwardProgressSoftReset:icl */
>   		wa_write_or(wal,
> diff --git a/drivers/gpu/drm/i915/intel_workarounds_types.h b/drivers/gpu/drm/i915/intel_workarounds_types.h
> index 30918da180ff..42ac1fb99572 100644
> --- a/drivers/gpu/drm/i915/intel_workarounds_types.h
> +++ b/drivers/gpu/drm/i915/intel_workarounds_types.h
> @@ -12,9 +12,10 @@
>   #include "i915_reg.h"
>   
>   struct i915_wa {
> -	i915_reg_t	  reg;
> -	u32		  mask;
> -	u32		  val;
> +	i915_reg_t	reg;
> +	u32		mask;
> +	u32		val;
> +	u32		read;
>   };
>   
>   struct i915_wa_list {
> 

The sporadic nature of failures worries me here. What is better:

a) Stop verifying from the driver and potentially lose a workaround 
and/or never figure out what is actually going on.

b) Have CI buglog track this as a known issue "forever"? Can't our 
automated tooling handle this?

Regards,

Tvrtko


More information about the Intel-gfx mailing list