[Intel-gfx] [PATCH] drm/i915: Update WaFlushCoherentL3CacheLinesAtContextSwitch

Arun Siluvery arun.siluvery at linux.intel.com
Fri Jul 3 06:27:31 PDT 2015


In this WA we need to set GEN8_L3SQCREG4[21:21] and reset it after PIPE_CONTROL
instruction but there is a slight complication as this is applied in WA batch
where the values are only initialized once.
Dave identified an issue with the current implementation where the register value
is read once at the beginning and it is reused; this patch corrects this by saving
the register value to memory, update register with the bit of our interest and
restore it back with original value.

This implementation uses MI_LOAD_REGISTER_MEM which is currently only used
by command parser and was using a default length of 0. This is now updated
with correct length and moved to appropriate place.

Cc: Chris Wilson <chris at chris-wilson.co.uk>
Cc: Dave Gordon <david.s.gordon at intel.com>
Signed-off-by: Arun Siluvery <arun.siluvery at linux.intel.com>
---
 drivers/gpu/drm/i915/i915_cmd_parser.c |  6 +--
 drivers/gpu/drm/i915/i915_reg.h        |  3 +-
 drivers/gpu/drm/i915/intel_lrc.c       | 72 +++++++++++++++++++++++++---------
 3 files changed, 58 insertions(+), 23 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c b/drivers/gpu/drm/i915/i915_cmd_parser.c
index 306d9e4..430571b 100644
--- a/drivers/gpu/drm/i915/i915_cmd_parser.c
+++ b/drivers/gpu/drm/i915/i915_cmd_parser.c
@@ -131,7 +131,7 @@ static const struct drm_i915_cmd_descriptor common_cmds[] = {
 			.mask = MI_GLOBAL_GTT,
 			.expected = 0,
 	      }},						       ),
-	CMD(  MI_LOAD_REGISTER_MEM,             SMI,   !F,  0xFF,   W | B,
+	CMD(  MI_LOAD_REGISTER_MEM(1),             SMI,   !F,  0xFF,   W | B,
 	      .reg = { .offset = 1, .mask = 0x007FFFFC },
 	      .bits = {{
 			.offset = 0,
@@ -1021,7 +1021,7 @@ static bool check_cmd(const struct intel_engine_cs *ring,
 			 * only MI_LOAD_REGISTER_IMM commands.
 			 */
 			if (reg_addr == OACONTROL) {
-				if (desc->cmd.value == MI_LOAD_REGISTER_MEM) {
+				if (desc->cmd.value == MI_LOAD_REGISTER_MEM(1)) {
 					DRM_DEBUG_DRIVER("CMD: Rejected LRM to OACONTROL\n");
 					return false;
 				}
@@ -1035,7 +1035,7 @@ static bool check_cmd(const struct intel_engine_cs *ring,
 			 * allowed mask/value pair given in the whitelist entry.
 			 */
 			if (reg->mask) {
-				if (desc->cmd.value == MI_LOAD_REGISTER_MEM) {
+				if (desc->cmd.value == MI_LOAD_REGISTER_MEM(1)) {
 					DRM_DEBUG_DRIVER("CMD: Rejected LRM to masked register 0x%08X\n",
 							 reg_addr);
 					return false;
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index fa9ccb87..1d1cbe0 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -352,6 +352,8 @@
 #define   MI_INVALIDATE_BSD		(1<<7)
 #define   MI_FLUSH_DW_USE_GTT		(1<<2)
 #define   MI_FLUSH_DW_USE_PPGTT		(0<<2)
+#define MI_LOAD_REGISTER_MEM(x) MI_INSTR(0x29, 2*(x)-1)
+#define MI_LOAD_REGISTER_MEM_GEN8(x) MI_INSTR(0x29, 3*(x)-1)
 #define MI_BATCH_BUFFER		MI_INSTR(0x30, 1)
 #define   MI_BATCH_NON_SECURE		(1)
 /* for snb/ivb/vlv this also means "batch in ppgtt" when ppgtt is enabled. */
@@ -456,7 +458,6 @@
 #define MI_CLFLUSH              MI_INSTR(0x27, 0)
 #define MI_REPORT_PERF_COUNT    MI_INSTR(0x28, 0)
 #define   MI_REPORT_PERF_COUNT_GGTT (1<<0)
-#define MI_LOAD_REGISTER_MEM    MI_INSTR(0x29, 0)
 #define MI_LOAD_REGISTER_REG    MI_INSTR(0x2A, 0)
 #define MI_RS_STORE_DATA_IMM    MI_INSTR(0x2B, 0)
 #define MI_LOAD_URB_MEM         MI_INSTR(0x2C, 0)
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 5298103..23ff018 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -1095,6 +1095,56 @@ static int intel_logical_ring_workarounds_emit(struct drm_i915_gem_request *req)
 		batch[index++] = (cmd);					\
 	} while (0)
 
+
+/*
+ * In this WA we need to set GEN8_L3SQCREG4[21:21] and reset it after
+ * PIPE_CONTROL instruction. This is required for the flush to happen correctly
+ * but there is a slight complication as this is applied in WA batch where the
+ * values are only initialized once so we cannot take register value at the
+ * beginning and reuse it further; hence we save its value to memory, upload a
+ * constant value with bit21 set and then we restore it back with the saved value.
+ * To simplify the WA, a constant value is formed by using the default value
+ * of this register. This shouldn't be a problem because we are only modifying
+ * it for a short period and this batch in non-premptible. We can ofcourse
+ * use additional instructions that read the actual value of the register
+ * at that time and set our bit of interest but it makes the WA complicated.
+ *
+ * This WA is also required for Gen9 so extracting as a function avoids
+ * code duplication.
+ */
+static inline int gen8_emit_flush_coherentl3_wa(struct intel_engine_cs *ring,
+						uint32_t *const batch,
+						uint32_t index)
+{
+	uint32_t l3sqc4_flush = (0x40400000 | GEN8_LQSC_FLUSH_COHERENT_LINES);
+
+	wa_ctx_emit(batch, (MI_STORE_REGISTER_MEM_GEN8(1) |
+			    MI_SRM_LRM_GLOBAL_GTT));
+	wa_ctx_emit(batch, GEN8_L3SQCREG4);
+	wa_ctx_emit(batch, ring->scratch.gtt_offset + 256);
+	wa_ctx_emit(batch, 0);
+
+	wa_ctx_emit(batch, MI_LOAD_REGISTER_IMM(1));
+	wa_ctx_emit(batch, GEN8_L3SQCREG4);
+	wa_ctx_emit(batch, l3sqc4_flush);
+
+	wa_ctx_emit(batch, GFX_OP_PIPE_CONTROL(6));
+	wa_ctx_emit(batch, (PIPE_CONTROL_CS_STALL |
+			    PIPE_CONTROL_DC_FLUSH_ENABLE));
+	wa_ctx_emit(batch, 0);
+	wa_ctx_emit(batch, 0);
+	wa_ctx_emit(batch, 0);
+	wa_ctx_emit(batch, 0);
+
+	wa_ctx_emit(batch, (MI_LOAD_REGISTER_MEM_GEN8(1) |
+			    MI_SRM_LRM_GLOBAL_GTT));
+	wa_ctx_emit(batch, GEN8_L3SQCREG4);
+	wa_ctx_emit(batch, ring->scratch.gtt_offset + 256);
+	wa_ctx_emit(batch, 0);
+
+	return index;
+}
+
 static inline uint32_t wa_ctx_start(struct i915_wa_ctx_bb *wa_ctx,
 				    uint32_t offset,
 				    uint32_t start_alignment)
@@ -1155,25 +1205,9 @@ static int gen8_init_indirectctx_bb(struct intel_engine_cs *ring,
 
 	/* WaFlushCoherentL3CacheLinesAtContextSwitch:bdw */
 	if (IS_BROADWELL(ring->dev)) {
-		struct drm_i915_private *dev_priv = to_i915(ring->dev);
-		uint32_t l3sqc4_flush = (I915_READ(GEN8_L3SQCREG4) |
-					 GEN8_LQSC_FLUSH_COHERENT_LINES);
-
-		wa_ctx_emit(batch, MI_LOAD_REGISTER_IMM(1));
-		wa_ctx_emit(batch, GEN8_L3SQCREG4);
-		wa_ctx_emit(batch, l3sqc4_flush);
-
-		wa_ctx_emit(batch, GFX_OP_PIPE_CONTROL(6));
-		wa_ctx_emit(batch, (PIPE_CONTROL_CS_STALL |
-				    PIPE_CONTROL_DC_FLUSH_ENABLE));
-		wa_ctx_emit(batch, 0);
-		wa_ctx_emit(batch, 0);
-		wa_ctx_emit(batch, 0);
-		wa_ctx_emit(batch, 0);
-
-		wa_ctx_emit(batch, MI_LOAD_REGISTER_IMM(1));
-		wa_ctx_emit(batch, GEN8_L3SQCREG4);
-		wa_ctx_emit(batch, l3sqc4_flush & ~GEN8_LQSC_FLUSH_COHERENT_LINES);
+		index = gen8_emit_flush_coherentl3_wa(ring, batch, index);
+		if (index < 0)
+			return index;
 	}
 
 	/* WaClearSlmSpaceAtContextSwitch:bdw,chv */
-- 
1.9.1



More information about the Intel-gfx mailing list