[CI 2/3] drm/i915: Add per ctx batchbuffer wa for timestamp

Mika Kuoppala mika.kuoppala at linux.intel.com
Thu Apr 9 23:45:18 UTC 2020


Restoration of a previous timestamp can collide
with updating the timestamp, causing a value corruption.

Combat this issue by using indirect ctx bb and
per context bb which are run during and after
context restoration process.

For render engine, we should use indirect ctx pointer
as this gives the restoration part included in the
timestamp. For other engines, we have to settle for
per context bb as indirect is not available.

References: HSDES#16010904313
Testcase: igt/i915_selftest/gt_lrc
Suggested-by: Joseph Koston <joseph.koston at intel.com>
Cc: Chris Wilson <chris at chris-wilson.co.uk>
Signed-off-by: Mika Kuoppala <mika.kuoppala at linux.intel.com>
---
 drivers/gpu/drm/i915/gt/intel_context_types.h |  3 +
 drivers/gpu/drm/i915/gt/intel_gpu_commands.h  |  3 +-
 drivers/gpu/drm/i915/gt/intel_lrc.c           | 64 ++++++++++++++++++-
 drivers/gpu/drm/i915/gt/intel_lrc_reg.h       |  1 +
 4 files changed, 69 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h
index 07cb83a0d017..c7573d565f58 100644
--- a/drivers/gpu/drm/i915/gt/intel_context_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_context_types.h
@@ -70,6 +70,9 @@ struct intel_context {
 
 	u32 *lrc_reg_state;
 	u64 lrc_desc;
+
+	u32 ctx_bb_offset;
+
 	u32 tag; /* cookie passed to HW to track this context on submission */
 
 	/* Time on GPU as tracked by the hw. */
diff --git a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
index f04214a54f75..0c2adb4078a7 100644
--- a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
+++ b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
@@ -138,7 +138,7 @@
  */
 #define MI_LOAD_REGISTER_IMM(x)	MI_INSTR(0x22, 2*(x)-1)
 /* Gen11+. addr = base + (ctx_restore ? offset & GENMASK(12,2) : offset) */
-#define   MI_LRI_CS_MMIO		(1<<19)
+#define   MI_LRI_LRM_CS_MMIO		(1<<19)
 #define   MI_LRI_FORCE_POSTED		(1<<12)
 #define MI_LOAD_REGISTER_IMM_MAX_REGS (126)
 #define MI_STORE_REGISTER_MEM        MI_INSTR(0x24, 1)
@@ -155,6 +155,7 @@
 #define   MI_FLUSH_DW_USE_PPGTT		(0<<2)
 #define MI_LOAD_REGISTER_MEM	   MI_INSTR(0x29, 1)
 #define MI_LOAD_REGISTER_MEM_GEN8  MI_INSTR(0x29, 2)
+#define   MI_LRM_ASYNC			(1<<21)
 #define MI_LOAD_REGISTER_REG    MI_INSTR(0x2A, 1)
 #define MI_BATCH_BUFFER		MI_INSTR(0x30, 1)
 #define   MI_BATCH_NON_SECURE		(1)
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index 21340730fae9..2399bb1aa714 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -537,7 +537,7 @@ static void set_offsets(u32 *regs,
 		if (flags & POSTED)
 			*regs |= MI_LRI_FORCE_POSTED;
 		if (INTEL_GEN(engine->i915) >= 11)
-			*regs |= MI_LRI_CS_MMIO;
+			*regs |= MI_LRI_LRM_CS_MMIO;
 		regs++;
 
 		GEM_BUG_ON(!count);
@@ -3142,6 +3142,58 @@ static void execlists_context_unpin(struct intel_context *ce)
 	i915_gem_object_unpin_map(ce->state->obj);
 }
 
+static void
+gen12_setup_ctx_bb(const struct intel_context *ce,
+		   const struct intel_engine_cs *engine)
+{
+	const bool indirect = engine->class == RENDER_CLASS;
+	u32 * const regs = ce->lrc_reg_state;
+	u32 *batch = (void *)(regs) - LRC_STATE_PN * PAGE_SIZE + ce->ctx_bb_offset;
+	const u32 * const batch_start = batch;
+	const u32 gaddr = i915_ggtt_offset(ce->state);
+	const u32 ggtt_timestamp_offset = gaddr +
+		+ LRC_STATE_PN * PAGE_SIZE + CTX_TIMESTAMP * sizeof(u32);
+	const u32 ctx_timestamp_offset = i915_mmio_reg_offset(RING_CTX_TIMESTAMP(0));
+	const u32 cmd = MI_LOAD_REGISTER_MEM_GEN8 | MI_SRM_LRM_GLOBAL_GTT | MI_LRI_LRM_CS_MMIO;
+
+	*batch++ = cmd | MI_LRM_ASYNC;
+	*batch++ = ctx_timestamp_offset;
+	*batch++ = ggtt_timestamp_offset;
+	*batch++ = 0;
+
+	*batch++ = cmd | MI_LRM_ASYNC;
+	*batch++ = ctx_timestamp_offset;
+	*batch++ = ggtt_timestamp_offset;
+	*batch++ = 0;
+
+	*batch++ = cmd;
+	*batch++ = ctx_timestamp_offset;
+	*batch++ = ggtt_timestamp_offset;
+	*batch++ = 0;
+
+	if (indirect) {
+		while((unsigned long)batch % CACHELINE_BYTES)
+			*batch++ = MI_NOOP;
+	} else {
+		*batch++ = MI_BATCH_BUFFER_END;
+	}
+
+	GEM_DEBUG_BUG_ON(batch - batch_start > I915_GTT_PAGE_SIZE/sizeof(cmd));
+
+	if (indirect) {
+		regs[GEN12_CTX_BB_PER_CTX_PTR + 2] =
+			(gaddr + ce->ctx_bb_offset) |
+			DIV_ROUND_UP((batch - batch_start) * sizeof(cmd),
+				     CACHELINE_BYTES);
+
+		regs[GEN12_CTX_BB_PER_CTX_PTR + 4] =
+			GEN12_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT << 6;
+	} else {
+		regs[GEN12_CTX_BB_PER_CTX_PTR] =
+			(gaddr + ce->ctx_bb_offset) | CTX_BB_PER_CTX_PTR_VALID;
+	}
+}
+
 static void
 __execlists_update_reg_state(const struct intel_context *ce,
 			     const struct intel_engine_cs *engine,
@@ -3164,7 +3216,11 @@ __execlists_update_reg_state(const struct intel_context *ce,
 			intel_sseu_make_rpcs(engine->i915, &ce->sseu);
 
 		i915_oa_init_reg_state(ce, engine);
+
 	}
+
+	if (ce->ctx_bb_offset)
+		gen12_setup_ctx_bb(ce, engine);
 }
 
 static int
@@ -3184,6 +3240,7 @@ __execlists_context_pin(struct intel_context *ce,
 
 	ce->lrc_desc = lrc_descriptor(ce, engine) | CTX_DESC_FORCE_RESTORE;
 	ce->lrc_reg_state = vaddr + LRC_STATE_PN * PAGE_SIZE;
+
 	__execlists_update_reg_state(ce, engine, ce->ring->tail);
 
 	return 0;
@@ -4844,6 +4901,11 @@ static int __execlists_context_alloc(struct intel_context *ce,
 	if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
 		context_size += I915_GTT_PAGE_SIZE; /* for redzone */
 
+	if (INTEL_GEN(engine->i915) == 12) {
+		ce->ctx_bb_offset = context_size;
+		context_size += PAGE_SIZE;
+	}
+
 	ctx_obj = i915_gem_object_create_shmem(engine->i915, context_size);
 	if (IS_ERR(ctx_obj))
 		return PTR_ERR(ctx_obj);
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc_reg.h b/drivers/gpu/drm/i915/gt/intel_lrc_reg.h
index d39b72590e40..1364c1e31ebb 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc_reg.h
+++ b/drivers/gpu/drm/i915/gt/intel_lrc_reg.h
@@ -32,6 +32,7 @@
 
 /* GEN12+ Reg State Context */
 #define GEN12_CTX_BB_PER_CTX_PTR		(0x12 + 1)
+#define   CTX_BB_PER_CTX_PTR_VALID 		BIT(0)
 
 #define ASSIGN_CTX_PDP(ppgtt, reg_state, n) do { \
 	u32 *reg_state__ = (reg_state); \
-- 
2.17.1



More information about the Intel-gfx-trybot mailing list