[PATCH 8/8] drm/i915/selftests: Verify the LRC register layout between init and HW

Chris Wilson chris at chris-wilson.co.uk
Sat Sep 7 20:47:29 UTC 2019


Before we submit the first context to HW, we need to construct a valid
image of the register state. This layout is defined by the HW and should
match the layout generated by HW when it saves the context image.
Asserting that this should be equivalent should help avoid any undefined
behaviour and verify that we haven't missed anything important!

Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
Cc: Mika Kuoppala <mika.kuoppala at linux.intel.com>
Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio at intel.com>
---
 drivers/gpu/drm/i915/gt/intel_lrc.c           | 160 ++++++++++++++----
 drivers/gpu/drm/i915/gt/selftest_lrc.c        | 140 +++++++++++++++
 .../drm/i915/selftests/i915_live_selftests.h  |   1 +
 3 files changed, 268 insertions(+), 33 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index 0ddfbebbcbbc..88f622740c1b 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -3096,7 +3096,7 @@ int intel_execlists_submission_init(struct intel_engine_cs *engine)
 	return 0;
 }
 
-static u32 intel_lr_indirect_ctx_offset(struct intel_engine_cs *engine)
+static u32 intel_lr_indirect_ctx_offset(const struct intel_engine_cs *engine)
 {
 	u32 indirect_ctx_offset;
 
@@ -3156,10 +3156,10 @@ static void init_common_reg_state(u32 * const regs,
 }
 
 static void init_wa_bb_reg_state(u32 * const regs,
-				 struct intel_engine_cs *engine,
+				 const struct intel_engine_cs *engine,
 				 u32 pos_bb_per_ctx)
 {
-	struct i915_ctx_workarounds * const wa_ctx = &engine->wa_ctx;
+	const struct i915_ctx_workarounds * const wa_ctx = &engine->wa_ctx;
 	const u32 base = engine->mmio_base;
 	const u32 pos_indirect_ctx = pos_bb_per_ctx + 2;
 	const u32 pos_indirect_ctx_offset = pos_indirect_ctx + 2;
@@ -3222,46 +3222,140 @@ static struct i915_ppgtt *vm_alias(struct i915_address_space *vm)
 		return i915_vm_to_ppgtt(vm);
 }
 
-static void gen8_init_reg_state(u32 * const regs,
-				struct intel_context *ce,
-				struct intel_engine_cs *engine,
-				struct intel_ring *ring)
+static void init_offsets(u32 *regs,
+			 const u8 *data, const u8 * const end,
+			 const struct intel_engine_cs *engine)
+#define NOP(x) (BIT(7) | (x))
+#define LRI(count, flags) ((flags) << 5 | (count))
+#define LRI_POSTED BIT(0)
+#define REG(x) (((x) >> 2) | BUILD_BUG_ON_ZERO(x >> 2 > 0xff))
 {
-	struct i915_ppgtt * const ppgtt = vm_alias(ce->vm);
-	const bool rcs = engine->class == RENDER_CLASS;
 	const u32 base = engine->mmio_base;
-	const u32 lri_base =
-		intel_engine_has_relative_mmio(engine) ? MI_LRI_CS_MMIO : 0;
 
-	regs[CTX_LRI_HEADER_0] =
-		MI_LOAD_REGISTER_IMM(rcs ? 14 : 11) |
-		MI_LRI_FORCE_POSTED |
-		lri_base;
+	while (data < end) {
+		u8 count, flags;
 
-	init_common_reg_state(regs, ppgtt, engine, ring);
-	CTX_REG(regs, CTX_SECOND_BB_HEAD_U, RING_SBBADDR_UDW(base), 0);
-	CTX_REG(regs, CTX_SECOND_BB_HEAD_L, RING_SBBADDR(base), 0);
-	CTX_REG(regs, CTX_SECOND_BB_STATE, RING_SBBSTATE(base), 0);
-	if (rcs)
-		init_wa_bb_reg_state(regs, engine, CTX_BB_PER_CTX_PTR);
-
-	regs[CTX_LRI_HEADER_1] =
-		MI_LOAD_REGISTER_IMM(9) |
-		MI_LRI_FORCE_POSTED |
-		lri_base;
+		if (*data & BIT(7)) { /* skip */
+			regs += *data++ & ~BIT(7);
+			continue;
+		}
 
-	CTX_REG(regs, CTX_CTX_TIMESTAMP, RING_CTX_TIMESTAMP(base), 0);
+		count = *data & 0x1f;
+		flags = *data >> 5;
+		data++;
 
-	init_ppgtt_reg_state(regs, base, ppgtt);
+		*regs = MI_LOAD_REGISTER_IMM(count);
+		if (flags & LRI_POSTED)
+			*regs |= MI_LRI_FORCE_POSTED;
+		regs++;
 
-	if (rcs) {
-		regs[CTX_LRI_HEADER_2] = MI_LOAD_REGISTER_IMM(1) | lri_base;
-		CTX_REG(regs, CTX_R_PWR_CLK_STATE, GEN8_R_PWR_CLK_STATE, 0);
+		while (count--) {
+			*regs = base + (*data++ << 2);
+			regs += 2;
+		}
 	}
 
-	regs[CTX_END] = MI_BATCH_BUFFER_END;
+	*regs = MI_BATCH_BUFFER_END;
 	if (INTEL_GEN(engine->i915) >= 10)
-		regs[CTX_END] |= BIT(0);
+		*regs |= BIT(0);
+}
+
+static void gen8_xcs_offsets(u32 *regs,
+			     const struct intel_engine_cs *engine)
+{
+	static const u8 data[] = {
+		NOP(1),
+		LRI(11, 0),
+		REG(0x244),
+		REG(0x034),
+		REG(0x030),
+		REG(0x038),
+		REG(0x03c),
+		REG(0x168),
+		REG(0x140),
+		REG(0x110),
+		REG(0x11c),
+		REG(0x114),
+		REG(0x118),
+
+		NOP(9),
+		LRI(9, 0),
+		REG(0x3a8),
+		REG(0x28c),
+		REG(0x288),
+		REG(0x284),
+		REG(0x280),
+		REG(0x27c),
+		REG(0x278),
+		REG(0x274),
+		REG(0x270),
+
+		NOP(13),
+		LRI(2, 0),
+		REG(0x200),
+		REG(0x028),
+	};
+
+	init_offsets(regs, data, data + sizeof(data), engine);
+}
+
+static void gen8_rcs_offsets(u32 * const regs,
+			     const struct intel_engine_cs *engine)
+{
+	static const u8 data[] = {
+		NOP(1),
+		LRI(14, LRI_POSTED),
+		REG(0x244),
+		REG(0x034),
+		REG(0x030),
+		REG(0x038),
+		REG(0x03c),
+		REG(0x168),
+		REG(0x140),
+		REG(0x110),
+		REG(0x11c),
+		REG(0x114),
+		REG(0x118),
+		REG(0x1c0),
+		REG(0x1c4),
+		REG(0x1c8),
+
+		NOP(3),
+		LRI(9, LRI_POSTED),
+		REG(0x3a8),
+		REG(0x28c),
+		REG(0x288),
+		REG(0x284),
+		REG(0x280),
+		REG(0x27c),
+		REG(0x278),
+		REG(0x274),
+		REG(0x270),
+
+		NOP(13),
+		LRI(1, 0),
+		REG(0x0c8),
+	};
+
+	init_offsets(regs, data, data + sizeof(data), engine);
+}
+
+static void gen8_init_reg_state(u32 * const regs,
+				struct intel_context *ce,
+				struct intel_engine_cs *engine,
+				struct intel_ring *ring)
+{
+	struct i915_ppgtt * const ppgtt = vm_alias(ce->vm);
+
+	if (engine->class == RENDER_CLASS) {
+		gen8_rcs_offsets(regs, engine);
+		init_wa_bb_reg_state(regs, engine, CTX_BB_PER_CTX_PTR);
+	} else {
+		gen8_xcs_offsets(regs, engine);
+	}
+
+	init_common_reg_state(regs, ppgtt, engine, ring);
+	init_ppgtt_reg_state(regs, engine->mmio_base, ppgtt);
 }
 
 static void gen12_init_reg_state(u32 * const regs,
diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c
index d791158988d6..5e7524c483d9 100644
--- a/drivers/gpu/drm/i915/gt/selftest_lrc.c
+++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c
@@ -2191,3 +2191,143 @@ int intel_execlists_live_selftests(struct drm_i915_private *i915)
 
 	return i915_live_subtests(tests, i915);
 }
+
+static void hexdump(const void *buf, size_t len)
+{
+	const size_t rowsize = 8 * sizeof(u32);
+	const void *prev = NULL;
+	bool skip = false;
+	size_t pos;
+
+	for (pos = 0; pos < len; pos += rowsize) {
+		char line[128];
+
+		if (prev && !memcmp(prev, buf + pos, rowsize)) {
+			if (!skip) {
+				pr_info("*\n");
+				skip = true;
+			}
+			continue;
+		}
+
+		WARN_ON_ONCE(hex_dump_to_buffer(buf + pos, len - pos,
+						rowsize, sizeof(u32),
+						line, sizeof(line),
+						false) >= sizeof(line));
+		pr_info("[%04zx] %s\n", pos, line);
+
+		prev = buf + pos;
+		skip = false;
+	}
+}
+
+static int live_lrc_layout(void *arg)
+{
+	struct intel_gt *gt = arg;
+	struct intel_engine_cs *engine;
+	enum intel_engine_id id;
+	u32 *mem, *lrc;
+	int err = 0;
+
+	/*
+	 * Check the registers offsets we use to create the initial reg state
+	 * match the layout saved by HW.
+	 */
+
+	mem = kmalloc(PAGE_SIZE, GFP_KERNEL);
+	if (!mem)
+		return -ENOMEM;
+
+	for_each_engine(engine, gt->i915, id) {
+		u32 *hw;
+		int dw;
+
+		if (!engine->default_state)
+			continue;
+
+		hw = i915_gem_object_pin_map(engine->default_state,
+					     I915_MAP_WB);
+		if (IS_ERR(hw)) {
+			err = PTR_ERR(hw);
+			break;
+		}
+		hw += LRC_STATE_PN * PAGE_SIZE / sizeof(*hw);
+
+		lrc = memset(mem, 0, PAGE_SIZE);
+		execlists_init_reg_state(lrc,
+					 engine->kernel_context,
+					 engine,
+					 engine->kernel_context->ring);
+
+		dw = 0;
+		do {
+			u32 lri = hw[dw];
+
+			if (lri == 0) {
+				dw++;
+				continue;
+			}
+
+			if ((lri & GENMASK(31, 23)) != MI_INSTR(0x22, 0)) {
+				pr_err("%s: Expected LRI command at dword %d, found %08x\n",
+				       engine->name, dw, lri);
+				err = -EINVAL;
+				break;
+			}
+
+			if (lrc[dw] != lri) {
+				pr_err("%s: LRI command mismatch at dword %d, expected %08x found %08x\n",
+				       engine->name, dw, lri, lrc[dw]);
+				err = -EINVAL;
+				break;
+			}
+
+			lri &= 0x7f;
+			lri++;
+			dw++;
+
+			while (lri) {
+				if (hw[dw] != lrc[dw]) {
+					pr_err("%s: Different registers found at dword %d, expected %x, found %x\n",
+					       engine->name, dw, hw[dw], lrc[dw]);
+					err = -EINVAL;
+					break;
+				}
+
+				/*
+				 * Skip over the actual register value as we
+				 * expect that to differ.
+				 */
+				dw += 2;
+				lri -= 2;
+			}
+		} while ((lrc[dw] & ~BIT(0)) != MI_BATCH_BUFFER_END);
+
+		if (err) {
+			pr_info("HW register image:\n");
+			hexdump(hw, PAGE_SIZE);
+
+			pr_info("SW register image:\n");
+			hexdump(lrc, PAGE_SIZE);
+		}
+
+		i915_gem_object_unpin_map(engine->default_state);
+		if (err)
+			break;
+	}
+
+	kfree(lrc);
+	return err;
+}
+
+int intel_lrc_live_selftests(struct drm_i915_private *i915)
+{
+	static const struct i915_subtest tests[] = {
+		SUBTEST(live_lrc_layout),
+	};
+
+	if (!HAS_LOGICAL_RING_CONTEXTS(i915))
+		return 0;
+
+	return intel_gt_live_subtests(tests, &i915->gt);
+}
diff --git a/drivers/gpu/drm/i915/selftests/i915_live_selftests.h b/drivers/gpu/drm/i915/selftests/i915_live_selftests.h
index 1ccf0f731ac0..66d83c1390c1 100644
--- a/drivers/gpu/drm/i915/selftests/i915_live_selftests.h
+++ b/drivers/gpu/drm/i915/selftests/i915_live_selftests.h
@@ -15,6 +15,7 @@ selftest(workarounds, intel_workarounds_live_selftests)
 selftest(gt_engines, intel_engine_live_selftests)
 selftest(gt_timelines, intel_timeline_live_selftests)
 selftest(gt_contexts, intel_context_live_selftests)
+selftest(gt_lrc, intel_lrc_live_selftests)
 selftest(requests, i915_request_live_selftests)
 selftest(active, i915_active_live_selftests)
 selftest(objects, i915_gem_object_live_selftests)
-- 
2.23.0



More information about the Intel-gfx-trybot mailing list