[PATCH v6 02/19] drm/xe/xelp: Implement HSDES#16010904313 workarounds
Tvrtko Ursulin
tvrtko.ursulin at igalia.com
Wed May 14 13:38:44 UTC 2025
Add XeLP workarounds specified in HSDES#16010904313.
To do this we add the context indirect workaround page to the context
state and set it up using the i915 as the programming reference.
Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin at igalia.com>
---
drivers/gpu/drm/xe/regs/xe_engine_regs.h | 1 +
drivers/gpu/drm/xe/regs/xe_lrc_layout.h | 7 ++
drivers/gpu/drm/xe/xe_lrc.c | 115 +++++++++++++++++++++++
drivers/gpu/drm/xe/xe_lrc_types.h | 3 +-
4 files changed, 125 insertions(+), 1 deletion(-)
diff --git a/drivers/gpu/drm/xe/regs/xe_engine_regs.h b/drivers/gpu/drm/xe/regs/xe_engine_regs.h
index 7ade41e2b7b3..d1d2592e010d 100644
--- a/drivers/gpu/drm/xe/regs/xe_engine_regs.h
+++ b/drivers/gpu/drm/xe/regs/xe_engine_regs.h
@@ -75,6 +75,7 @@
#define RING_ACTHD(base) XE_REG((base) + 0x74)
#define RING_DMA_FADD(base) XE_REG((base) + 0x78)
#define RING_HWS_PGA(base) XE_REG((base) + 0x80)
+#define RING_CMD_BUF_CCTL(base) XE_REG((base) + 0x84)
#define RING_HWSTAM(base) XE_REG((base) + 0x98)
#define RING_MI_MODE(base) XE_REG((base) + 0x9c)
#define RING_NOPID(base) XE_REG((base) + 0x94)
diff --git a/drivers/gpu/drm/xe/regs/xe_lrc_layout.h b/drivers/gpu/drm/xe/regs/xe_lrc_layout.h
index 994af591a2e8..82723f9783b1 100644
--- a/drivers/gpu/drm/xe/regs/xe_lrc_layout.h
+++ b/drivers/gpu/drm/xe/regs/xe_lrc_layout.h
@@ -12,6 +12,8 @@
#define CTX_RING_START (0x08 + 1)
#define CTX_RING_CTL (0x0a + 1)
#define CTX_BB_PER_CTX_PTR (0x12 + 1)
+#define CTX_CS_INDIRECT_CTX (0x14 + 1)
+#define CTX_CS_INDIRECT_CTX_OFFSET (0x16 + 1)
#define CTX_TIMESTAMP (0x22 + 1)
#define CTX_TIMESTAMP_UDW (0x24 + 1)
#define CTX_INDIRECT_RING_STATE (0x26 + 1)
@@ -30,10 +32,15 @@
#define CTX_CS_INT_VEC_REG 0x5a
#define CTX_CS_INT_VEC_DATA (CTX_CS_INT_VEC_REG + 1)
+#define CTX_GPR0 (0x74 + 1)
+#define CTX_CMD_BUF_CCTL (0xb6 + 1)
+
#define INDIRECT_CTX_RING_HEAD (0x02 + 1)
#define INDIRECT_CTX_RING_TAIL (0x04 + 1)
#define INDIRECT_CTX_RING_START (0x06 + 1)
#define INDIRECT_CTX_RING_START_UDW (0x08 + 1)
#define INDIRECT_CTX_RING_CTL (0x0a + 1)
+#define XELP_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT 0xd
+
#endif
diff --git a/drivers/gpu/drm/xe/xe_lrc.c b/drivers/gpu/drm/xe/xe_lrc.c
index 08835e36a60d..6022068ce6b6 100644
--- a/drivers/gpu/drm/xe/xe_lrc.c
+++ b/drivers/gpu/drm/xe/xe_lrc.c
@@ -39,6 +39,7 @@
#define LRC_ENGINE_INSTANCE GENMASK_ULL(53, 48)
#define LRC_PPHWSP_SIZE SZ_4K
+#define LRC_INDIRECT_CTX_SIZE SZ_4K
#define LRC_INDIRECT_RING_STATE_SIZE SZ_4K
static struct xe_device *
@@ -47,6 +48,11 @@ lrc_to_xe(struct xe_lrc *lrc)
return gt_to_xe(lrc->fence_ctx.gt);
}
+static bool xe_needs_indirect_ctx(struct xe_device *xe)
+{
+ return GRAPHICS_VERx100(xe) >= 1200 && GRAPHICS_VERx100(xe) <= 1210;
+}
+
size_t xe_gt_lrc_size(struct xe_gt *gt, enum xe_engine_class class)
{
struct xe_device *xe = gt_to_xe(gt);
@@ -79,6 +85,9 @@ size_t xe_gt_lrc_size(struct xe_gt *gt, enum xe_engine_class class)
size += 1 * SZ_4K;
}
+ if (xe_needs_indirect_ctx(xe))
+ size += LRC_INDIRECT_CTX_SIZE;
+
/* Add indirect ring state page */
if (xe_gt_has_indirect_ring_state(gt))
size += LRC_INDIRECT_RING_STATE_SIZE;
@@ -974,6 +983,106 @@ static void xe_lrc_setup_utilization(struct xe_lrc *lrc)
}
+static u32 *
+xelp_emit_timestamp_wa(struct xe_lrc *lrc, u32 *cmd)
+{
+ *cmd++ = MI_LOAD_REGISTER_MEM | MI_LRM_USE_GGTT | MI_LRI_LRM_CS_MMIO;
+ *cmd++ = CS_GPR_REG(0, 0).addr;
+ *cmd++ = __xe_lrc_regs_ggtt_addr(lrc) + CTX_TIMESTAMP * sizeof(u32);
+ *cmd++ = 0;
+
+ *cmd++ = MI_LOAD_REGISTER_REG | MI_LRR_SRC_CS_MMIO | MI_LRI_LRM_CS_MMIO;
+ *cmd++ = CS_GPR_REG(0, 0).addr;
+ *cmd++ = RING_CTX_TIMESTAMP(0).addr;
+
+ *cmd++ = MI_LOAD_REGISTER_REG | MI_LRR_SRC_CS_MMIO | MI_LRI_LRM_CS_MMIO;
+ *cmd++ = CS_GPR_REG(0, 0).addr;
+ *cmd++ = RING_CTX_TIMESTAMP(0).addr;
+
+ return cmd;
+}
+
+static u32 *
+xelp_emit_restore_scratch(struct xe_lrc *lrc, u32 *cmd)
+{
+ *cmd++ = MI_LOAD_REGISTER_MEM | MI_LRM_USE_GGTT | MI_LRI_LRM_CS_MMIO;
+ *cmd++ = CS_GPR_REG(0, 0).addr;
+ *cmd++ = __xe_lrc_regs_ggtt_addr(lrc) + CTX_GPR0 * sizeof(u32);
+ *cmd++ = 0;
+
+ return cmd;
+}
+
+static u32 *
+xelp_emit_cmd_buf_wa(struct xe_lrc *lrc, u32 *cmd)
+{
+ *cmd++ = MI_LOAD_REGISTER_MEM | MI_LRM_USE_GGTT | MI_LRI_LRM_CS_MMIO;
+ *cmd++ = CS_GPR_REG(0, 0).addr;
+ *cmd++ = __xe_lrc_regs_ggtt_addr(lrc)+ CTX_CMD_BUF_CCTL * sizeof(u32);
+ *cmd++ = 0;
+
+ *cmd++ = MI_LOAD_REGISTER_REG | MI_LRR_SRC_CS_MMIO | MI_LRR_DST_CS_MMIO;
+ *cmd++ = CS_GPR_REG(0, 0).addr;
+ *cmd++ = RING_CMD_BUF_CCTL(0).addr;
+
+ return cmd;
+}
+
+static u32 *
+xelp_setup_indirect_ctx_rcs(struct xe_lrc *lrc, struct xe_hw_engine *hwe,
+ u32 *cmd)
+{
+ /* HSDES#16010904313 */
+ cmd = xelp_emit_timestamp_wa(lrc, cmd);
+ cmd = xelp_emit_cmd_buf_wa(lrc, cmd);
+ cmd = xelp_emit_restore_scratch(lrc, cmd);
+
+ return cmd;
+}
+
+static u32 *
+xelp_setup_indirect_ctx_xcs(struct xe_lrc *lrc, struct xe_hw_engine *hwe,
+ u32 *cmd)
+{
+ /* HSDES#16010904313 */
+ cmd = xelp_emit_timestamp_wa(lrc, cmd);
+ cmd = xelp_emit_restore_scratch(lrc, cmd);
+
+ return cmd;
+}
+
+static void
+xelp_setup_indirect_ctx(struct xe_lrc *lrc, struct xe_hw_engine *hwe)
+{
+ u32 *start, *cmd, *regs;
+ struct iosys_map map;
+ u32 ggtt, offset;
+
+ if (!(lrc->flags & XE_LRC_FLAG_INDIRECT_CTX))
+ return;
+
+ offset = lrc->size - LRC_INDIRECT_CTX_SIZE;
+ ggtt = xe_bo_ggtt_addr(lrc->bo) + offset;
+
+ map = lrc->bo->vmap;
+ iosys_map_incr(&map, offset);
+ start = cmd = map.vaddr;
+
+ if (hwe->class == XE_ENGINE_CLASS_RENDER)
+ cmd = xelp_setup_indirect_ctx_rcs(lrc, hwe, cmd);
+ else
+ cmd = xelp_setup_indirect_ctx_xcs(lrc, hwe, cmd);
+
+ while ((unsigned long)cmd & 0x3f) /* Align to 64B cacheline. */
+ *cmd++ = MI_NOOP;
+
+ map = __xe_lrc_regs_map(lrc);
+ regs = map.vaddr;
+
+ regs[CTX_CS_INDIRECT_CTX] = ggtt | ((cmd - start) * sizeof(u32) / 64); /* Size in CLs. */
+ regs[CTX_CS_INDIRECT_CTX_OFFSET] = XELP_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT << 6;
+}
+
#define PVC_CTX_ASID (0x2e + 1)
#define PVC_CTX_ACC_CTR_THOLD (0x2a + 1)
@@ -997,6 +1106,10 @@ static int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe,
lrc->size = ring_size + lrc_size;
lrc->ring.size = ring_size;
lrc->ring.tail = 0;
+
+ if (xe_needs_indirect_ctx(xe))
+ lrc->flags |= XE_LRC_FLAG_INDIRECT_CTX;
+
if (xe_gt_has_indirect_ring_state(gt))
lrc->flags |= XE_LRC_FLAG_INDIRECT_RING_STATE;
@@ -1127,6 +1240,8 @@ static int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe,
xe_lrc_setup_utilization(lrc);
+ xelp_setup_indirect_ctx(lrc, hwe);
+
return 0;
err_lrc_finish:
diff --git a/drivers/gpu/drm/xe/xe_lrc_types.h b/drivers/gpu/drm/xe/xe_lrc_types.h
index 559c7c831212..9ce7d02ef210 100644
--- a/drivers/gpu/drm/xe/xe_lrc_types.h
+++ b/drivers/gpu/drm/xe/xe_lrc_types.h
@@ -29,7 +29,8 @@ struct xe_lrc {
struct xe_gt *gt;
/** @flags: LRC flags */
-#define XE_LRC_FLAG_INDIRECT_RING_STATE 0x1
+#define XE_LRC_FLAG_INDIRECT_CTX 0x1
+#define XE_LRC_FLAG_INDIRECT_RING_STATE 0x2
u32 flags;
/** @refcount: ref count of this lrc */
--
2.48.0
More information about the Intel-xe
mailing list