[PATCH v2 5/6] drm/xe/xelp: Implement Wa_16010904313

Tvrtko Ursulin tvrtko.ursulin at igalia.com
Mon Jun 2 11:19:55 UTC 2025


Add XeLP workaround 16010904313.

The description calls for it to be emitted as the indirect context buffer
workaround for render and compute, and from the workaround batch buffer
for the other engines. Therefore we plug into the previously added
respective top level emission functions.

The actual command streamer programming sequence differs from what is
described in the PRM, in that it assumes the listed LRCA offset was
supposed to actually refer to the location of the CTX_TIMESTAMP register
instead of LRCA + 0x180c (which is in GPR space). Latter appears to make
more sense under the assumption that multiple writes are helping with
restoring the CTX_TIMESTAMP register content from the saved context state.

Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin at igalia.com>
Cc: Lucas De Marchi <lucas.demarchi at intel.com>
Cc: Matt Roper <matthew.d.roper at intel.com>
---
 .../gpu/drm/xe/instructions/xe_mi_commands.h  |  1 +
 drivers/gpu/drm/xe/xe_lrc.c                   | 52 ++++++++++++++++++-
 drivers/gpu/drm/xe/xe_wa_oob.rules            |  1 +
 3 files changed, 52 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/xe/instructions/xe_mi_commands.h b/drivers/gpu/drm/xe/instructions/xe_mi_commands.h
index e3f5e8bb3ebc..c47b290e0e9f 100644
--- a/drivers/gpu/drm/xe/instructions/xe_mi_commands.h
+++ b/drivers/gpu/drm/xe/instructions/xe_mi_commands.h
@@ -65,6 +65,7 @@
 
 #define MI_LOAD_REGISTER_MEM		(__MI_INSTR(0x29) | XE_INSTR_NUM_DW(4))
 #define   MI_LRM_USE_GGTT		REG_BIT(22)
+#define   MI_LRM_ASYNC			REG_BIT(21)
 
 #define MI_LOAD_REGISTER_REG		(__MI_INSTR(0x2a) | XE_INSTR_NUM_DW(3))
 #define   MI_LRR_DST_CS_MMIO		REG_BIT(19)
diff --git a/drivers/gpu/drm/xe/xe_lrc.c b/drivers/gpu/drm/xe/xe_lrc.c
index 6bc71f78fea1..d24d6a7fc5df 100644
--- a/drivers/gpu/drm/xe/xe_lrc.c
+++ b/drivers/gpu/drm/xe/xe_lrc.c
@@ -51,6 +51,11 @@ lrc_to_xe(struct xe_lrc *lrc)
 static bool
 gt_engine_needs_indirect_ctx(struct xe_gt *gt, enum xe_engine_class class)
 {
+	if (XE_WA(gt, 16010904313) &&
+	    (class == XE_ENGINE_CLASS_RENDER ||
+	     class == XE_ENGINE_CLASS_COMPUTE))
+		return true;
+
 	return false;
 }
 
@@ -985,11 +990,51 @@ xe_lrc_setup_utilization(struct xe_lrc *lrc,
 	return offset + num_dw;
 }
 
-static void xe_lrc_setup_wa_bb(struct xe_lrc *lrc)
+static unsigned int
+xelp_emit_timestamp_wa(struct xe_lrc *lrc,
+		       struct iosys_map *map,
+		       unsigned int offset)
+{
+	const u32 ts_addr = __xe_lrc_ctx_timestamp_ggtt_addr(lrc);
+	unsigned int num_dw;
+	u32 batch[12];
+	u32 *cmd = batch;
+
+	*cmd++ = MI_LOAD_REGISTER_MEM | MI_LRM_USE_GGTT | MI_LRI_LRM_CS_MMIO |
+		 MI_LRM_ASYNC;
+	*cmd++ = RING_CTX_TIMESTAMP(0).addr;
+	*cmd++ = ts_addr;
+	*cmd++ = 0;
+
+	*cmd++ = MI_LOAD_REGISTER_MEM | MI_LRM_USE_GGTT | MI_LRI_LRM_CS_MMIO |
+		 MI_LRM_ASYNC;
+	*cmd++ = RING_CTX_TIMESTAMP(0).addr;
+	*cmd++ = ts_addr;
+	*cmd++ = 0;
+
+	*cmd++ = MI_LOAD_REGISTER_MEM | MI_LRM_USE_GGTT | MI_LRI_LRM_CS_MMIO;
+	*cmd++ = RING_CTX_TIMESTAMP(0).addr;
+	*cmd++ = ts_addr;
+	*cmd++ = 0;
+
+	num_dw = cmd - batch;
+	xe_map_memcpy_to(lrc_to_xe(lrc), map, 0, batch, num_dw * sizeof(u32));
+	iosys_map_incr(map, num_dw * sizeof(u32));
+
+	return offset + num_dw;
+}
+
+static void xe_lrc_setup_wa_bb(struct xe_lrc *lrc, struct xe_hw_engine *hwe)
 {
 	struct iosys_map map = lrc->bb_per_ctx_bo->vmap;
 	unsigned int offset = 0;
 
+	if (XE_WA(lrc->gt, 16010904313) &&
+	    (hwe->class == XE_ENGINE_CLASS_COPY ||
+	     hwe->class == XE_ENGINE_CLASS_VIDEO_DECODE ||
+	     hwe->class == XE_ENGINE_CLASS_VIDEO_ENHANCE))
+		offset = xelp_emit_timestamp_wa(lrc, &map, offset);
+
 	offset = xe_lrc_setup_utilization(lrc, &map, offset);
 
 	xe_map_write32(lrc_to_xe(lrc), &map, MI_BATCH_BUFFER_END);
@@ -1003,6 +1048,9 @@ static unsigned int
 xelp_setup_indirect_ctx_rcs(struct xe_lrc *lrc, struct xe_hw_engine *hwe,
 			    struct iosys_map *map, unsigned int offset)
 {
+	if (XE_WA(lrc->gt, 16010904313))
+		offset = xelp_emit_timestamp_wa(lrc, map, offset);
+
 	return offset;
 }
 
@@ -1198,7 +1246,7 @@ static int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe,
 	map = __xe_lrc_start_seqno_map(lrc);
 	xe_map_write32(lrc_to_xe(lrc), &map, lrc->fence_ctx.next_seqno - 1);
 
-	xe_lrc_setup_wa_bb(lrc);
+	xe_lrc_setup_wa_bb(lrc, hwe);
 
 	xelp_setup_indirect_ctx(lrc, hwe);
 
diff --git a/drivers/gpu/drm/xe/xe_wa_oob.rules b/drivers/gpu/drm/xe/xe_wa_oob.rules
index 9efc5accd43d..9abc4b09ac38 100644
--- a/drivers/gpu/drm/xe/xe_wa_oob.rules
+++ b/drivers/gpu/drm/xe/xe_wa_oob.rules
@@ -1,4 +1,5 @@
 1607983814	GRAPHICS_VERSION_RANGE(1200, 1210)
+16010904313	GRAPHICS_VERSION_RANGE(1200, 1210)
 22012773006	GRAPHICS_VERSION_RANGE(1200, 1250)
 14014475959	GRAPHICS_VERSION_RANGE(1270, 1271), GRAPHICS_STEP(A0, B0)
 		PLATFORM(DG2)
-- 
2.48.0



More information about the Intel-xe mailing list