[Intel-gfx] [PATCH 5/6] drm/i915: Don't use scratch in WA batch.

Thu Sep 26 10:06:34 UTC 2019

We're currently doing one workaround where we're using scratch as a
temporary storage place, while we're overwriting the value of one
register with some known constant value in order to perform a
workaround.
While we could just do similar thing with CS_GPR register
and MI_LOAD_REGISTER_REG instead of scratch, since we would use CS_GPR
anyways, let's just drop the constant values and do the bitops using
MI_MATH.

Signed-off-by: Michał Winiarski <michal.winiarski at intel.com>
Cc: Chris Wilson <chris at chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
---
 drivers/gpu/drm/i915/gt/intel_engine.h   | 53 ++++++++++++++++++++++++
 drivers/gpu/drm/i915/gt/intel_gt_types.h |  3 --
 drivers/gpu/drm/i915/gt/intel_lrc.c      | 27 +++---------
 3 files changed, 58 insertions(+), 25 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_engine.h b/drivers/gpu/drm/i915/gt/intel_engine.h
index d3c6993f4f46..2dfe0b23af1d 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine.h
@@ -400,6 +400,59 @@ gen8_emit_ggtt_write(u32 *cs, u32 value, u32 gtt_offset, u32 flags)
 	return cs;
 }
 
+/*
+ * We're using CS_GPR registers for the MI_MATH ops.
+ * Note that user contexts are free to use those registers, which means that we
+ * should only use this this function during context initialization, before
+ * context restore (WA batch) or inside i915-owned contexts.
+ */
+static inline u32 *
+gen8_emit_bitop_reg_mask(struct intel_engine_cs *engine,
+			 u32 *cs, u32 op, i915_reg_t reg, u32 mask)
+{
+	const u32 base = engine->mmio_base;
+
+	GEM_BUG_ON(op != MI_MATH_OR && op != MI_MATH_AND);
+
+	*cs++ = MI_LOAD_REGISTER_REG;
+	*cs++ = i915_mmio_reg_offset(reg);
+	*cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(base, 0));
+	*cs++ = MI_NOOP;
+
+	*cs++ = MI_LOAD_REGISTER_IMM(1);
+	*cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(base, 1));
+	*cs++ = mask;
+	*cs++ = MI_NOOP;
+
+	*cs++ = MI_MATH(4);
+	*cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCA, MI_MATH_REG(0));
+	*cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCB, MI_MATH_REG(1));
+	*cs++ = op;
+	*cs++ = MI_MATH_STORE(MI_MATH_REG(0), MI_MATH_REG_ACCU);
+	*cs++ = MI_NOOP;
+
+	*cs++ = MI_LOAD_REGISTER_REG;
+	*cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(base, 0));
+	*cs++ = i915_mmio_reg_offset(reg);
+	*cs++ = MI_NOOP;
+
+	return cs;
+}
+
+static inline u32 *
+gen8_emit_set_register(struct intel_engine_cs *engine,
+		       u32 *cs, i915_reg_t reg, u32 mask)
+{
+	return gen8_emit_bitop_reg_mask(engine, cs, MI_MATH_OR, reg, mask);
+}
+
+static inline u32 *
+gen8_emit_clear_register(struct intel_engine_cs *engine,
+			 u32 *cs, i915_reg_t reg, u32 mask)
+{
+	return gen8_emit_bitop_reg_mask(engine, cs, MI_MATH_AND, reg, ~mask);
+}
+
 static inline void __intel_engine_reset(struct intel_engine_cs *engine,
 					bool stalled)
 {
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_types.h b/drivers/gpu/drm/i915/gt/intel_gt_types.h
index e64db4c13df6..d9f25ac520a8 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_types.h
@@ -92,9 +92,6 @@ enum intel_gt_scratch_field {
 	/* 8 bytes */
 	INTEL_GT_SCRATCH_FIELD_RENDER_FLUSH = 128,
 
-	/* 8 bytes */
-	INTEL_GT_SCRATCH_FIELD_COHERENTL3_WA = 256,
-
 };
 
 #endif /* __INTEL_GT_TYPES_H__ */
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index fa385218ce92..089c17599ca4 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -2269,13 +2269,7 @@ static int execlists_request_alloc(struct i915_request *request)
  * PIPE_CONTROL instruction. This is required for the flush to happen correctly
  * but there is a slight complication as this is applied in WA batch where the
  * values are only initialized once so we cannot take register value at the
- * beginning and reuse it further; hence we save its value to memory, upload a
- * constant value with bit21 set and then we restore it back with the saved value.
- * To simplify the WA, a constant value is formed by using the default value
- * of this register. This shouldn't be a problem because we are only modifying
- * it for a short period and this batch in non-premptible. We can ofcourse
- * use additional instructions that read the actual value of the register
- * at that time and set our bit of interest but it makes the WA complicated.
+ * beginning and reuse it further;
  *
  * This WA is also required for Gen9 so extracting as a function avoids
  * code duplication.
@@ -2283,27 +2277,16 @@ static int execlists_request_alloc(struct i915_request *request)
 static u32 *
 gen8_emit_flush_coherentl3_wa(struct intel_engine_cs *engine, u32 *batch)
 {
-	/* NB no one else is allowed to scribble over scratch + 256! */
-	*batch++ = MI_STORE_REGISTER_MEM_GEN8 | MI_SRM_LRM_GLOBAL_GTT;
-	*batch++ = i915_mmio_reg_offset(GEN8_L3SQCREG4);
-	*batch++ = intel_gt_scratch_offset(engine->gt,
-					   INTEL_GT_SCRATCH_FIELD_COHERENTL3_WA);
-	*batch++ = 0;
-
-	*batch++ = MI_LOAD_REGISTER_IMM(1);
-	*batch++ = i915_mmio_reg_offset(GEN8_L3SQCREG4);
-	*batch++ = 0x40400000 | GEN8_LQSC_FLUSH_COHERENT_LINES;
+	batch = gen8_emit_set_register(engine, batch, GEN8_L3SQCREG4,
+				       GEN8_LQSC_FLUSH_COHERENT_LINES);
 
 	batch = gen8_emit_pipe_control(batch,
 				       PIPE_CONTROL_CS_STALL |
 				       PIPE_CONTROL_DC_FLUSH_ENABLE,
 				       0);
 
-	*batch++ = MI_LOAD_REGISTER_MEM_GEN8 | MI_SRM_LRM_GLOBAL_GTT;
-	*batch++ = i915_mmio_reg_offset(GEN8_L3SQCREG4);
-	*batch++ = intel_gt_scratch_offset(engine->gt,
-					   INTEL_GT_SCRATCH_FIELD_COHERENTL3_WA);
-	*batch++ = 0;
+	batch = gen8_emit_clear_register(engine, batch, GEN8_L3SQCREG4,
+					 GEN8_LQSC_FLUSH_COHERENT_LINES);
 
 	return batch;
 }
-- 
2.21.0