[Intel-gfx] [PATCH 5/6] drm/i915: Don't use scratch in WA batch.
Michał Winiarski
michal.winiarski at intel.com
Thu Sep 26 10:06:34 UTC 2019
We're currently doing one workaround where we're using scratch as a
temporary storage place, while we're overwriting the value of one
register with some known constant value in order to perform a
workaround.
While we could just do similar thing with CS_GPR register
and MI_LOAD_REGISTER_REG instead of scratch, since we would use CS_GPR
anyways, let's just drop the constant values and do the bitops using
MI_MATH.
Signed-off-by: Michał Winiarski <michal.winiarski at intel.com>
Cc: Chris Wilson <chris at chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
---
drivers/gpu/drm/i915/gt/intel_engine.h | 53 ++++++++++++++++++++++++
drivers/gpu/drm/i915/gt/intel_gt_types.h | 3 --
drivers/gpu/drm/i915/gt/intel_lrc.c | 27 +++---------
3 files changed, 58 insertions(+), 25 deletions(-)
diff --git a/drivers/gpu/drm/i915/gt/intel_engine.h b/drivers/gpu/drm/i915/gt/intel_engine.h
index d3c6993f4f46..2dfe0b23af1d 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine.h
@@ -400,6 +400,59 @@ gen8_emit_ggtt_write(u32 *cs, u32 value, u32 gtt_offset, u32 flags)
return cs;
}
+/*
+ * We're using CS_GPR registers for the MI_MATH ops.
+ * Note that user contexts are free to use those registers, which means that we
+ * should only use this this function during context initialization, before
+ * context restore (WA batch) or inside i915-owned contexts.
+ */
+static inline u32 *
+gen8_emit_bitop_reg_mask(struct intel_engine_cs *engine,
+ u32 *cs, u32 op, i915_reg_t reg, u32 mask)
+{
+ const u32 base = engine->mmio_base;
+
+ GEM_BUG_ON(op != MI_MATH_OR && op != MI_MATH_AND);
+
+ *cs++ = MI_LOAD_REGISTER_REG;
+ *cs++ = i915_mmio_reg_offset(reg);
+ *cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(base, 0));
+ *cs++ = MI_NOOP;
+
+ *cs++ = MI_LOAD_REGISTER_IMM(1);
+ *cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(base, 1));
+ *cs++ = mask;
+ *cs++ = MI_NOOP;
+
+ *cs++ = MI_MATH(4);
+ *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCA, MI_MATH_REG(0));
+ *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCB, MI_MATH_REG(1));
+ *cs++ = op;
+ *cs++ = MI_MATH_STORE(MI_MATH_REG(0), MI_MATH_REG_ACCU);
+ *cs++ = MI_NOOP;
+
+ *cs++ = MI_LOAD_REGISTER_REG;
+ *cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(base, 0));
+ *cs++ = i915_mmio_reg_offset(reg);
+ *cs++ = MI_NOOP;
+
+ return cs;
+}
+
+static inline u32 *
+gen8_emit_set_register(struct intel_engine_cs *engine,
+ u32 *cs, i915_reg_t reg, u32 mask)
+{
+ return gen8_emit_bitop_reg_mask(engine, cs, MI_MATH_OR, reg, mask);
+}
+
+static inline u32 *
+gen8_emit_clear_register(struct intel_engine_cs *engine,
+ u32 *cs, i915_reg_t reg, u32 mask)
+{
+ return gen8_emit_bitop_reg_mask(engine, cs, MI_MATH_AND, reg, ~mask);
+}
+
static inline void __intel_engine_reset(struct intel_engine_cs *engine,
bool stalled)
{
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_types.h b/drivers/gpu/drm/i915/gt/intel_gt_types.h
index e64db4c13df6..d9f25ac520a8 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_types.h
@@ -92,9 +92,6 @@ enum intel_gt_scratch_field {
/* 8 bytes */
INTEL_GT_SCRATCH_FIELD_RENDER_FLUSH = 128,
- /* 8 bytes */
- INTEL_GT_SCRATCH_FIELD_COHERENTL3_WA = 256,
-
};
#endif /* __INTEL_GT_TYPES_H__ */
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index fa385218ce92..089c17599ca4 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -2269,13 +2269,7 @@ static int execlists_request_alloc(struct i915_request *request)
* PIPE_CONTROL instruction. This is required for the flush to happen correctly
* but there is a slight complication as this is applied in WA batch where the
* values are only initialized once so we cannot take register value at the
- * beginning and reuse it further; hence we save its value to memory, upload a
- * constant value with bit21 set and then we restore it back with the saved value.
- * To simplify the WA, a constant value is formed by using the default value
- * of this register. This shouldn't be a problem because we are only modifying
- * it for a short period and this batch in non-premptible. We can ofcourse
- * use additional instructions that read the actual value of the register
- * at that time and set our bit of interest but it makes the WA complicated.
+ * beginning and reuse it further;
*
* This WA is also required for Gen9 so extracting as a function avoids
* code duplication.
@@ -2283,27 +2277,16 @@ static int execlists_request_alloc(struct i915_request *request)
static u32 *
gen8_emit_flush_coherentl3_wa(struct intel_engine_cs *engine, u32 *batch)
{
- /* NB no one else is allowed to scribble over scratch + 256! */
- *batch++ = MI_STORE_REGISTER_MEM_GEN8 | MI_SRM_LRM_GLOBAL_GTT;
- *batch++ = i915_mmio_reg_offset(GEN8_L3SQCREG4);
- *batch++ = intel_gt_scratch_offset(engine->gt,
- INTEL_GT_SCRATCH_FIELD_COHERENTL3_WA);
- *batch++ = 0;
-
- *batch++ = MI_LOAD_REGISTER_IMM(1);
- *batch++ = i915_mmio_reg_offset(GEN8_L3SQCREG4);
- *batch++ = 0x40400000 | GEN8_LQSC_FLUSH_COHERENT_LINES;
+ batch = gen8_emit_set_register(engine, batch, GEN8_L3SQCREG4,
+ GEN8_LQSC_FLUSH_COHERENT_LINES);
batch = gen8_emit_pipe_control(batch,
PIPE_CONTROL_CS_STALL |
PIPE_CONTROL_DC_FLUSH_ENABLE,
0);
- *batch++ = MI_LOAD_REGISTER_MEM_GEN8 | MI_SRM_LRM_GLOBAL_GTT;
- *batch++ = i915_mmio_reg_offset(GEN8_L3SQCREG4);
- *batch++ = intel_gt_scratch_offset(engine->gt,
- INTEL_GT_SCRATCH_FIELD_COHERENTL3_WA);
- *batch++ = 0;
+ batch = gen8_emit_clear_register(engine, batch, GEN8_L3SQCREG4,
+ GEN8_LQSC_FLUSH_COHERENT_LINES);
return batch;
}
--
2.21.0
More information about the Intel-gfx
mailing list