[Intel-gfx] [RFC 1/2] tools/null_state_render: Add BDW workarounds to golden render state

arun.siluvery at linux.intel.com arun.siluvery at linux.intel.com
Fri Aug 8 11:54:55 CEST 2014


From: Arun Siluvery <arun.siluvery at linux.intel.com>

Some workaround registers are part of register state context and they are
restored with every context switch so initializing them in golden render state
ensures that they are applied even when we start with an uninitialized context
or during hw initialization followed by a reset.

Signed-off-by: Arun Siluvery <arun.siluvery at linux.intel.com>
---
 lib/intel_reg.h                               | 22 ++++++++++++++++
 tools/null_state_gen/intel_renderstate_gen8.c | 37 +++++++++++++++++++++++++++
 2 files changed, 59 insertions(+)

diff --git a/lib/intel_reg.h b/lib/intel_reg.h
index 4afec45..86175bb 100644
--- a/lib/intel_reg.h
+++ b/lib/intel_reg.h
@@ -3606,4 +3606,26 @@ typedef enum {
 #define GEN7_ROW_CHICKEN2_GT2		0xf4f4
 #define   DOP_CLOCK_GATING_DISABLE	(1<<0)
 
+#define HALF_SLICE_CHICKEN3		0xe184
+#define   GEN8_CENTROID_PIXEL_OPT_DIS	(1<<8)
+#define   GEN8_SAMPLER_POWER_BYPASS_DIS	(1<<1)
+
+#define GEN7_HALF_SLICE_CHICKEN1	0xe100
+#define   GEN7_SINGLE_SUBSCAN_DISPATCH_ENABLE	(1<<10)
+
+#define COMMON_SLICE_CHICKEN2			0x7014
+# define GEN8_CSC2_SBE_VUE_CACHE_CONSERVATIVE	(1<<0)
+
+/* GEN8 chicken */
+#define HDC_CHICKEN0				0x7300
+#define  HDC_FORCE_NON_COHERENT			(1<<4)
+
+#define GEN7_CACHE_MODE_1			0x7004 /* IVB+ */
+#define   GEN8_4x4_STC_OPTIMIZATION_DISABLE	(1<<6)
+
+#define GEN7_GT_MODE	0x7008
+#define   GEN6_WIZ_HASHING(hi, lo)		(((hi) << 9) | ((lo) << 7))
+#define   GEN6_WIZ_HASHING_16x4			GEN6_WIZ_HASHING(1, 0)
+#define   GEN6_WIZ_HASHING_MASK			(GEN6_WIZ_HASHING(1, 1) << 16)
+
 #endif /* _I810_REG_H */
diff --git a/tools/null_state_gen/intel_renderstate_gen8.c b/tools/null_state_gen/intel_renderstate_gen8.c
index 14690d2..a8143db 100644
--- a/tools/null_state_gen/intel_renderstate_gen8.c
+++ b/tools/null_state_gen/intel_renderstate_gen8.c
@@ -703,6 +703,43 @@ static void gen8_emit_workarounds(struct intel_batchbuffer *batch)
 
 	/* WaDisableDopClockGating:bdw May not be needed for production */
 	gen8_emit_write(batch, GEN7_ROW_CHICKEN2, _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE));
+
+	/*
+	 * This GEN8_CENTROID_PIXEL_OPT_DIS W/A is only needed for
+	 * pre-production hardware
+	 */
+	gen8_emit_write(batch, HALF_SLICE_CHICKEN3,
+			_MASKED_BIT_ENABLE(GEN8_CENTROID_PIXEL_OPT_DIS
+					   | GEN8_SAMPLER_POWER_BYPASS_DIS));
+
+	gen8_emit_write(batch, GEN7_HALF_SLICE_CHICKEN1,
+			_MASKED_BIT_ENABLE(GEN7_SINGLE_SUBSCAN_DISPATCH_ENABLE));
+
+	gen8_emit_write(batch, COMMON_SLICE_CHICKEN2,
+			_MASKED_BIT_ENABLE(GEN8_CSC2_SBE_VUE_CACHE_CONSERVATIVE));
+
+	/* Use Force Non-Coherent whenever executing a 3D context. This is a
+	 * workaround for for a possible hang in the unlikely event a TLB
+	 * invalidation occurs during a PSD flush.
+	 */
+	gen8_emit_write(batch, HDC_CHICKEN0,
+			_MASKED_BIT_ENABLE(HDC_FORCE_NON_COHERENT));
+
+	/* Wa4x4STCOptimizationDisable:bdw */
+	gen8_emit_write(batch, GEN7_CACHE_MODE_1,
+			_MASKED_BIT_ENABLE(GEN8_4x4_STC_OPTIMIZATION_DISABLE));
+
+	/*
+	 * BSpec recommends 8x4 when MSAA is used,
+	 * however in practice 16x4 seems fastest.
+	 *
+	 * Note that PS/WM thread counts depend on the WIZ hashing
+	 * disable bit, which we don't touch here, but it's good
+	 * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
+	 */
+	gen8_emit_write(batch, GEN7_GT_MODE,
+			GEN6_WIZ_HASHING_MASK | GEN6_WIZ_HASHING_16x4);
+
 }
 
 int gen8_setup_null_render_state(struct intel_batchbuffer *batch)
-- 
2.0.4




More information about the Intel-gfx mailing list