[Intel-gfx] [RFC] drm/i915/bdw: Apply workarounds to the golden render state
arun.siluvery at linux.intel.com
arun.siluvery at linux.intel.com
Fri Aug 8 11:52:57 CEST 2014
From: Arun Siluvery <arun.siluvery at linux.intel.com>
Workarounds for bdw are currently applied in init_clock_gating() but they
are lost following a gpu reset. Some of the registers are part of register
state context and they are restored with every context switch so initializing
WAs in golden render state ensures that they are applied even when we start
with an uninitialized context or during hw initialization followed by a reset.
Signed-off-by: Arun Siluvery <arun.siluvery at linux.intel.com>
---
drivers/gpu/drm/i915/intel_pm.c | 50 ---------------------
drivers/gpu/drm/i915/intel_renderstate_gen8.c | 62 +++++++++++++++++----------
2 files changed, 39 insertions(+), 73 deletions(-)
diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index 1ddd4df..ab64b64 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -5402,38 +5402,11 @@ static void gen8_init_clock_gating(struct drm_device *dev)
/* FIXME(BDW): Check all the w/a, some might only apply to
* pre-production hw. */
- /* WaDisablePartialInstShootdown:bdw */
- I915_WRITE(GEN8_ROW_CHICKEN,
- _MASKED_BIT_ENABLE(PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE));
-
- /* WaDisableThreadStallDopClockGating:bdw */
- /* FIXME: Unclear whether we really need this on production bdw. */
- I915_WRITE(GEN8_ROW_CHICKEN,
- _MASKED_BIT_ENABLE(STALL_DOP_GATING_DISABLE));
-
- /*
- * This GEN8_CENTROID_PIXEL_OPT_DIS W/A is only needed for
- * pre-production hardware
- */
- I915_WRITE(HALF_SLICE_CHICKEN3,
- _MASKED_BIT_ENABLE(GEN8_CENTROID_PIXEL_OPT_DIS));
- I915_WRITE(HALF_SLICE_CHICKEN3,
- _MASKED_BIT_ENABLE(GEN8_SAMPLER_POWER_BYPASS_DIS));
I915_WRITE(GAMTARBMODE, _MASKED_BIT_ENABLE(ARB_MODE_BWGTLB_DISABLE));
I915_WRITE(_3D_CHICKEN3,
_MASKED_BIT_ENABLE(_3D_CHICKEN_SDE_LIMIT_FIFO_POLY_DEPTH(2)));
- I915_WRITE(COMMON_SLICE_CHICKEN2,
- _MASKED_BIT_ENABLE(GEN8_CSC2_SBE_VUE_CACHE_CONSERVATIVE));
-
- I915_WRITE(GEN7_HALF_SLICE_CHICKEN1,
- _MASKED_BIT_ENABLE(GEN7_SINGLE_SUBSCAN_DISPATCH_ENABLE));
-
- /* WaDisableDopClockGating:bdw May not be needed for production */
- I915_WRITE(GEN7_ROW_CHICKEN2,
- _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE));
-
/* WaSwitchSolVfFArbitrationPriority:bdw */
I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) | HSW_ECOCHK_ARB_PRIO_SOL);
@@ -5448,41 +5421,18 @@ static void gen8_init_clock_gating(struct drm_device *dev)
BDW_DPRS_MASK_VBLANK_SRD);
}
- /* Use Force Non-Coherent whenever executing a 3D context. This is a
- * workaround for for a possible hang in the unlikely event a TLB
- * invalidation occurs during a PSD flush.
- */
- I915_WRITE(HDC_CHICKEN0,
- I915_READ(HDC_CHICKEN0) |
- _MASKED_BIT_ENABLE(HDC_FORCE_NON_COHERENT));
-
/* WaVSRefCountFullforceMissDisable:bdw */
/* WaDSRefCountFullforceMissDisable:bdw */
I915_WRITE(GEN7_FF_THREAD_MODE,
I915_READ(GEN7_FF_THREAD_MODE) &
~(GEN8_FF_DS_REF_CNT_FFME | GEN7_FF_VS_REF_CNT_FFME));
- /*
- * BSpec recommends 8x4 when MSAA is used,
- * however in practice 16x4 seems fastest.
- *
- * Note that PS/WM thread counts depend on the WIZ hashing
- * disable bit, which we don't touch here, but it's good
- * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
- */
- I915_WRITE(GEN7_GT_MODE,
- GEN6_WIZ_HASHING_MASK | GEN6_WIZ_HASHING_16x4);
-
I915_WRITE(GEN6_RC_SLEEP_PSMI_CONTROL,
_MASKED_BIT_ENABLE(GEN8_RC_SEMA_IDLE_MSG_DISABLE));
/* WaDisableSDEUnitClockGating:bdw */
I915_WRITE(GEN8_UCGCTL6, I915_READ(GEN8_UCGCTL6) |
GEN8_SDEUNIT_CLOCK_GATE_DISABLE);
-
- /* Wa4x4STCOptimizationDisable:bdw */
- I915_WRITE(CACHE_MODE_1,
- _MASKED_BIT_ENABLE(GEN8_4x4_STC_OPTIMIZATION_DISABLE));
}
static void haswell_init_clock_gating(struct drm_device *dev)
diff --git a/drivers/gpu/drm/i915/intel_renderstate_gen8.c b/drivers/gpu/drm/i915/intel_renderstate_gen8.c
index 75ef1b5..0b26783 100644
--- a/drivers/gpu/drm/i915/intel_renderstate_gen8.c
+++ b/drivers/gpu/drm/i915/intel_renderstate_gen8.c
@@ -1,14 +1,38 @@
#include "intel_renderstate.h"
static const u32 gen8_null_state_relocs[] = {
- 0x00000048,
- 0x00000050,
- 0x00000060,
- 0x000003ec,
+ 0x000000a8,
+ 0x000000b0,
+ 0x000000c0,
+ 0x0000044c,
-1,
};
static const u32 gen8_null_state_batch[] = {
+ 0x11000001,
+ 0x0000e4f0,
+ 0x83208320,
+ 0x11000001,
+ 0x0000e4f4,
+ 0x00010001,
+ 0x11000001,
+ 0x0000e184,
+ 0x01020102,
+ 0x11000001,
+ 0x0000e100,
+ 0x04000400,
+ 0x11000001,
+ 0x00007014,
+ 0x00010001,
+ 0x11000001,
+ 0x00007300,
+ 0x00100010,
+ 0x11000001,
+ 0x00007004,
+ 0x00400040,
+ 0x11000001,
+ 0x00007008,
+ 0x02800200,
0x69040000,
0x61020001,
0x00000000,
@@ -40,9 +64,9 @@ static const u32 gen8_null_state_batch[] = {
0xfffff001,
0x00001001,
0x78230000,
- 0x000006e0,
+ 0x00000720,
0x78210000,
- 0x00000700,
+ 0x00000740,
0x78300000,
0x08010040,
0x78330000,
@@ -52,9 +76,9 @@ static const u32 gen8_null_state_batch[] = {
0x78320000,
0x08000000,
0x78240000,
- 0x00000641,
+ 0x00000681,
0x780e0000,
- 0x00000601,
+ 0x00000641,
0x780d0000,
0x00000000,
0x78180000,
@@ -199,9 +223,9 @@ static const u32 gen8_null_state_batch[] = {
0x00000000,
0x00000000,
0x782a0000,
- 0x00000480,
+ 0x000004c0,
0x782f0000,
- 0x00000540,
+ 0x00000580,
0x78140000,
0x00000800,
0x78170009,
@@ -216,7 +240,7 @@ static const u32 gen8_null_state_batch[] = {
0x00000000,
0x00000000,
0x7820000a,
- 0x00000580,
+ 0x000005c0,
0x00000000,
0x08080000,
0x00000000,
@@ -232,7 +256,7 @@ static const u32 gen8_null_state_batch[] = {
0x784f0000,
0x80000100,
0x780f0000,
- 0x00000740,
+ 0x00000780,
0x78050006,
0x00000000,
0x00000000,
@@ -260,7 +284,7 @@ static const u32 gen8_null_state_batch[] = {
0x00000000,
0x78080003,
0x00006000,
- 0x000005e0, /* reloc */
+ 0x00000620, /* reloc */
0x00000000,
0x00000000,
0x78090005,
@@ -289,16 +313,8 @@ static const u32 gen8_null_state_batch[] = {
0x00000000,
0x00000000,
0x00000000,
- 0x00000000,
- 0x00000000,
- 0x00000000,
- 0x00000000,
- 0x00000000,
- 0x00000000,
- 0x00000000,
- 0x00000000,
- 0x000004c0, /* state start */
- 0x00000500,
+ 0x00000500, /* state start */
+ 0x00000540,
0x00000000,
0x00000000,
0x00000000,
--
2.0.4
More information about the Intel-gfx
mailing list