[Intel-gfx] [RFC] drm/i915/bdw: Apply workarounds to the golden render state

Siluvery, Arun arun.siluvery at linux.intel.com
Fri Aug 8 15:11:02 CEST 2014


On 08/08/2014 13:20, Ville Syrjälä wrote:
> On Fri, Aug 08, 2014 at 10:52:57AM +0100, arun.siluvery at linux.intel.com wrote:
>> From: Arun Siluvery <arun.siluvery at linux.intel.com>
>>
>> Workarounds for bdw are currently applied in init_clock_gating() but they
>> are lost following a gpu reset. Some of the registers are part of register
>> state context and they are restored with every context switch so initializing
>> WAs in golden render state ensures that they are applied even when we start
>> with an uninitialized context or during hw initialization followed by a reset.
>
> This approach might require separate null states for BDW vs. CHV and IVB
> vs. HSW vs. VLV, which seems a bit unfortunate. Might be better to just
> issue the w/a register writes via LRIs from the code as part of the null
> state load.
>
Yes this is a better approach, I am currently changing the code to 
achieve this, not sure how easy it would be.

> Although I don't actually undertand how this improves things as opposed
> to just appllying the w/as via mmio writes. Does it?
>
I observed random behaviour CACHE_MODE_1 which simply used to lose the 
applied workaround on first context switch even though it is loaded with 
inhibit==1; register values are not supposed to change but it was changing.

I think it is better to add them in null batch to ensure hardware starts 
with WAs applied.

regards
Arun

>>
>> Signed-off-by: Arun Siluvery <arun.siluvery at linux.intel.com>
>> ---
>>   drivers/gpu/drm/i915/intel_pm.c               | 50 ---------------------
>>   drivers/gpu/drm/i915/intel_renderstate_gen8.c | 62 +++++++++++++++++----------
>>   2 files changed, 39 insertions(+), 73 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
>> index 1ddd4df..ab64b64 100644
>> --- a/drivers/gpu/drm/i915/intel_pm.c
>> +++ b/drivers/gpu/drm/i915/intel_pm.c
>> @@ -5402,38 +5402,11 @@ static void gen8_init_clock_gating(struct drm_device *dev)
>>   	/* FIXME(BDW): Check all the w/a, some might only apply to
>>   	 * pre-production hw. */
>>
>> -	/* WaDisablePartialInstShootdown:bdw */
>> -	I915_WRITE(GEN8_ROW_CHICKEN,
>> -		   _MASKED_BIT_ENABLE(PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE));
>> -
>> -	/* WaDisableThreadStallDopClockGating:bdw */
>> -	/* FIXME: Unclear whether we really need this on production bdw. */
>> -	I915_WRITE(GEN8_ROW_CHICKEN,
>> -		   _MASKED_BIT_ENABLE(STALL_DOP_GATING_DISABLE));
>> -
>> -	/*
>> -	 * This GEN8_CENTROID_PIXEL_OPT_DIS W/A is only needed for
>> -	 * pre-production hardware
>> -	 */
>> -	I915_WRITE(HALF_SLICE_CHICKEN3,
>> -		   _MASKED_BIT_ENABLE(GEN8_CENTROID_PIXEL_OPT_DIS));
>> -	I915_WRITE(HALF_SLICE_CHICKEN3,
>> -		   _MASKED_BIT_ENABLE(GEN8_SAMPLER_POWER_BYPASS_DIS));
>>   	I915_WRITE(GAMTARBMODE, _MASKED_BIT_ENABLE(ARB_MODE_BWGTLB_DISABLE));
>>
>>   	I915_WRITE(_3D_CHICKEN3,
>>   		   _MASKED_BIT_ENABLE(_3D_CHICKEN_SDE_LIMIT_FIFO_POLY_DEPTH(2)));
>>
>> -	I915_WRITE(COMMON_SLICE_CHICKEN2,
>> -		   _MASKED_BIT_ENABLE(GEN8_CSC2_SBE_VUE_CACHE_CONSERVATIVE));
>> -
>> -	I915_WRITE(GEN7_HALF_SLICE_CHICKEN1,
>> -		   _MASKED_BIT_ENABLE(GEN7_SINGLE_SUBSCAN_DISPATCH_ENABLE));
>> -
>> -	/* WaDisableDopClockGating:bdw May not be needed for production */
>> -	I915_WRITE(GEN7_ROW_CHICKEN2,
>> -		   _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE));
>> -
>>   	/* WaSwitchSolVfFArbitrationPriority:bdw */
>>   	I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) | HSW_ECOCHK_ARB_PRIO_SOL);
>>
>> @@ -5448,41 +5421,18 @@ static void gen8_init_clock_gating(struct drm_device *dev)
>>   			   BDW_DPRS_MASK_VBLANK_SRD);
>>   	}
>>
>> -	/* Use Force Non-Coherent whenever executing a 3D context. This is a
>> -	 * workaround for for a possible hang in the unlikely event a TLB
>> -	 * invalidation occurs during a PSD flush.
>> -	 */
>> -	I915_WRITE(HDC_CHICKEN0,
>> -		   I915_READ(HDC_CHICKEN0) |
>> -		   _MASKED_BIT_ENABLE(HDC_FORCE_NON_COHERENT));
>> -
>>   	/* WaVSRefCountFullforceMissDisable:bdw */
>>   	/* WaDSRefCountFullforceMissDisable:bdw */
>>   	I915_WRITE(GEN7_FF_THREAD_MODE,
>>   		   I915_READ(GEN7_FF_THREAD_MODE) &
>>   		   ~(GEN8_FF_DS_REF_CNT_FFME | GEN7_FF_VS_REF_CNT_FFME));
>>
>> -	/*
>> -	 * BSpec recommends 8x4 when MSAA is used,
>> -	 * however in practice 16x4 seems fastest.
>> -	 *
>> -	 * Note that PS/WM thread counts depend on the WIZ hashing
>> -	 * disable bit, which we don't touch here, but it's good
>> -	 * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
>> -	 */
>> -	I915_WRITE(GEN7_GT_MODE,
>> -		   GEN6_WIZ_HASHING_MASK | GEN6_WIZ_HASHING_16x4);
>> -
>>   	I915_WRITE(GEN6_RC_SLEEP_PSMI_CONTROL,
>>   		   _MASKED_BIT_ENABLE(GEN8_RC_SEMA_IDLE_MSG_DISABLE));
>>
>>   	/* WaDisableSDEUnitClockGating:bdw */
>>   	I915_WRITE(GEN8_UCGCTL6, I915_READ(GEN8_UCGCTL6) |
>>   		   GEN8_SDEUNIT_CLOCK_GATE_DISABLE);
>> -
>> -	/* Wa4x4STCOptimizationDisable:bdw */
>> -	I915_WRITE(CACHE_MODE_1,
>> -		   _MASKED_BIT_ENABLE(GEN8_4x4_STC_OPTIMIZATION_DISABLE));
>>   }
>>
>>   static void haswell_init_clock_gating(struct drm_device *dev)
>> diff --git a/drivers/gpu/drm/i915/intel_renderstate_gen8.c b/drivers/gpu/drm/i915/intel_renderstate_gen8.c
>> index 75ef1b5..0b26783 100644
>> --- a/drivers/gpu/drm/i915/intel_renderstate_gen8.c
>> +++ b/drivers/gpu/drm/i915/intel_renderstate_gen8.c
>> @@ -1,14 +1,38 @@
>>   #include "intel_renderstate.h"
>>
>>   static const u32 gen8_null_state_relocs[] = {
>> -	0x00000048,
>> -	0x00000050,
>> -	0x00000060,
>> -	0x000003ec,
>> +	0x000000a8,
>> +	0x000000b0,
>> +	0x000000c0,
>> +	0x0000044c,
>>   	-1,
>>   };
>>
>>   static const u32 gen8_null_state_batch[] = {
>> +	0x11000001,
>> +	0x0000e4f0,
>> +	0x83208320,
>> +	0x11000001,
>> +	0x0000e4f4,
>> +	0x00010001,
>> +	0x11000001,
>> +	0x0000e184,
>> +	0x01020102,
>> +	0x11000001,
>> +	0x0000e100,
>> +	0x04000400,
>> +	0x11000001,
>> +	0x00007014,
>> +	0x00010001,
>> +	0x11000001,
>> +	0x00007300,
>> +	0x00100010,
>> +	0x11000001,
>> +	0x00007004,
>> +	0x00400040,
>> +	0x11000001,
>> +	0x00007008,
>> +	0x02800200,
>>   	0x69040000,
>>   	0x61020001,
>>   	0x00000000,
>> @@ -40,9 +64,9 @@ static const u32 gen8_null_state_batch[] = {
>>   	0xfffff001,
>>   	0x00001001,
>>   	0x78230000,
>> -	0x000006e0,
>> +	0x00000720,
>>   	0x78210000,
>> -	0x00000700,
>> +	0x00000740,
>>   	0x78300000,
>>   	0x08010040,
>>   	0x78330000,
>> @@ -52,9 +76,9 @@ static const u32 gen8_null_state_batch[] = {
>>   	0x78320000,
>>   	0x08000000,
>>   	0x78240000,
>> -	0x00000641,
>> +	0x00000681,
>>   	0x780e0000,
>> -	0x00000601,
>> +	0x00000641,
>>   	0x780d0000,
>>   	0x00000000,
>>   	0x78180000,
>> @@ -199,9 +223,9 @@ static const u32 gen8_null_state_batch[] = {
>>   	0x00000000,
>>   	0x00000000,
>>   	0x782a0000,
>> -	0x00000480,
>> +	0x000004c0,
>>   	0x782f0000,
>> -	0x00000540,
>> +	0x00000580,
>>   	0x78140000,
>>   	0x00000800,
>>   	0x78170009,
>> @@ -216,7 +240,7 @@ static const u32 gen8_null_state_batch[] = {
>>   	0x00000000,
>>   	0x00000000,
>>   	0x7820000a,
>> -	0x00000580,
>> +	0x000005c0,
>>   	0x00000000,
>>   	0x08080000,
>>   	0x00000000,
>> @@ -232,7 +256,7 @@ static const u32 gen8_null_state_batch[] = {
>>   	0x784f0000,
>>   	0x80000100,
>>   	0x780f0000,
>> -	0x00000740,
>> +	0x00000780,
>>   	0x78050006,
>>   	0x00000000,
>>   	0x00000000,
>> @@ -260,7 +284,7 @@ static const u32 gen8_null_state_batch[] = {
>>   	0x00000000,
>>   	0x78080003,
>>   	0x00006000,
>> -	0x000005e0,	 /* reloc */
>> +	0x00000620,	 /* reloc */
>>   	0x00000000,
>>   	0x00000000,
>>   	0x78090005,
>> @@ -289,16 +313,8 @@ static const u32 gen8_null_state_batch[] = {
>>   	0x00000000,
>>   	0x00000000,
>>   	0x00000000,
>> -	0x00000000,
>> -	0x00000000,
>> -	0x00000000,
>> -	0x00000000,
>> -	0x00000000,
>> -	0x00000000,
>> -	0x00000000,
>> -	0x00000000,
>> -	0x000004c0,	 /* state start */
>> -	0x00000500,
>> +	0x00000500,	 /* state start */
>> +	0x00000540,
>>   	0x00000000,
>>   	0x00000000,
>>   	0x00000000,
>> --
>> 2.0.4
>>
>> _______________________________________________
>> Intel-gfx mailing list
>> Intel-gfx at lists.freedesktop.org
>> http://lists.freedesktop.org/mailman/listinfo/intel-gfx
>




More information about the Intel-gfx mailing list