[PATCH 4/5] use-mi-math-for-WA

Fri Sep 13 12:48:48 UTC 2019

---
 drivers/gpu/drm/i915/gt/intel_engine.h | 47 ++++++++++++++++++++++++++
 drivers/gpu/drm/i915/gt/intel_lrc.c    | 26 +++-----------
 2 files changed, 52 insertions(+), 21 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_engine.h b/drivers/gpu/drm/i915/gt/intel_engine.h
index d3c6993f4f46..5d1145fca797 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine.h
@@ -400,6 +400,53 @@ gen8_emit_ggtt_write(u32 *cs, u32 value, u32 gtt_offset, u32 flags)
 	return cs;
 }
 
+static inline u32 *
+gen8_emit_bitop_reg_mask(struct intel_engine_cs *engine,
+			 u32 *cs, u32 op, i915_reg_t reg, u32 mask)
+{
+	const u32 base = engine->mmio_base;
+
+	GEM_BUG_ON(op != MI_MATH_OR && op != MI_MATH_AND);
+
+	*cs++ = MI_LOAD_REGISTER_REG;
+	*cs++ = i915_mmio_reg_offset(reg);
+	*cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(base, 0));
+	*cs++ = MI_NOOP;
+
+	*cs++ = MI_LOAD_REGISTER_IMM(1);
+	*cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(base, 1));
+	*cs++ = mask;
+	*cs++ = MI_NOOP;
+
+	*cs++ = MI_MATH(4);
+	*cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCA, MI_MATH_REG(0));
+	*cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCB, MI_MATH_REG(1));
+	*cs++ = op;
+	*cs++ = MI_MATH_STORE(MI_MATH_REG(0), MI_MATH_REG_ACCU);
+	*cs++ = MI_NOOP;
+
+	*cs++ = MI_LOAD_REGISTER_REG;
+	*cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(base, 0));
+	*cs++ = i915_mmio_reg_offset(reg);
+	*cs++ = MI_NOOP;
+
+	return cs;
+}
+
+static inline u32 *
+gen8_emit_set_register(struct intel_engine_cs *engine,
+		       u32 *cs, i915_reg_t reg, u32 mask)
+{
+	return gen8_emit_bitop_reg_mask(engine, cs, MI_MATH_OR, reg, mask);
+}
+
+static inline u32 *
+gen8_emit_clear_register(struct intel_engine_cs *engine,
+			 u32 *cs, i915_reg_t reg, u32 mask)
+{
+	return gen8_emit_bitop_reg_mask(engine, cs, MI_MATH_AND, reg, ~mask);
+}
+
 static inline void __intel_engine_reset(struct intel_engine_cs *engine,
 					bool stalled)
 {
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index c87a4f9740e9..acc234d1ce7a 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -1888,13 +1888,7 @@ static int execlists_request_alloc(struct i915_request *request)
  * PIPE_CONTROL instruction. This is required for the flush to happen correctly
  * but there is a slight complication as this is applied in WA batch where the
  * values are only initialized once so we cannot take register value at the
- * beginning and reuse it further; hence we save its value to memory, upload a
- * constant value with bit21 set and then we restore it back with the saved value.
- * To simplify the WA, a constant value is formed by using the default value
- * of this register. This shouldn't be a problem because we are only modifying
- * it for a short period and this batch in non-premptible. We can ofcourse
- * use additional instructions that read the actual value of the register
- * at that time and set our bit of interest but it makes the WA complicated.
+ * beginning and reuse it further;
  *
  * This WA is also required for Gen9 so extracting as a function avoids
  * code duplication.
@@ -1903,26 +1897,16 @@ static u32 *
 gen8_emit_flush_coherentl3_wa(struct intel_engine_cs *engine, u32 *batch)
 {
 	/* NB no one else is allowed to scribble over scratch + 256! */
-	*batch++ = MI_STORE_REGISTER_MEM_GEN8 | MI_SRM_LRM_GLOBAL_GTT;
-	*batch++ = i915_mmio_reg_offset(GEN8_L3SQCREG4);
-	*batch++ = intel_gt_scratch_offset(engine->gt,
-					   INTEL_GT_SCRATCH_FIELD_COHERENTL3_WA);
-	*batch++ = 0;
-
-	*batch++ = MI_LOAD_REGISTER_IMM(1);
-	*batch++ = i915_mmio_reg_offset(GEN8_L3SQCREG4);
-	*batch++ = 0x40400000 | GEN8_LQSC_FLUSH_COHERENT_LINES;
+	batch = gen8_emit_set_register(engine, batch, GEN8_L3SQCREG4,
+				       GEN8_LQSC_FLUSH_COHERENT_LINES);
 
 	batch = gen8_emit_pipe_control(batch,
 				       PIPE_CONTROL_CS_STALL |
 				       PIPE_CONTROL_DC_FLUSH_ENABLE,
 				       0);
 
-	*batch++ = MI_LOAD_REGISTER_MEM_GEN8 | MI_SRM_LRM_GLOBAL_GTT;
-	*batch++ = i915_mmio_reg_offset(GEN8_L3SQCREG4);
-	*batch++ = intel_gt_scratch_offset(engine->gt,
-					   INTEL_GT_SCRATCH_FIELD_COHERENTL3_WA);
-	*batch++ = 0;
+	batch = gen8_emit_clear_register(engine, batch, GEN8_L3SQCREG4,
+					 GEN8_LQSC_FLUSH_COHERENT_LINES);
 
 	return batch;
 }
-- 
2.21.0