[PATCH v10 6/8] drm/xe: Add plumbing for indirect context workarounds

Lucas De Marchi lucas.demarchi at intel.com
Fri Jul 11 16:33:38 UTC 2025


On Fri, Jul 11, 2025 at 05:01:51PM +0100, Tvrtko Ursulin wrote:
>Some upcoming workarounds need to be emitted from the indirect workaround
>context so lets add some plumbing where they will be able to easily slot
>in.
>
>No functional changes for now since everything is still deactivated.
>
>Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin at igalia.com>
>Bspec: 45954
>Cc: Lucas De Marchi <lucas.demarchi at intel.com>
>Cc: Matt Roper <matthew.d.roper at intel.com>


Reviewed-by: Lucas De Marchi <lucas.demarchi at intel.com>

thanks,
Lucas De Marchi

>---
> drivers/gpu/drm/xe/regs/xe_lrc_layout.h |  5 ++
> drivers/gpu/drm/xe/xe_lrc.c             | 84 ++++++++++++++++++++++++-
> drivers/gpu/drm/xe/xe_lrc_types.h       |  3 +-
> 3 files changed, 89 insertions(+), 3 deletions(-)
>
>diff --git a/drivers/gpu/drm/xe/regs/xe_lrc_layout.h b/drivers/gpu/drm/xe/regs/xe_lrc_layout.h
>index 994af591a2e8..e0437d0454e6 100644
>--- a/drivers/gpu/drm/xe/regs/xe_lrc_layout.h
>+++ b/drivers/gpu/drm/xe/regs/xe_lrc_layout.h
>@@ -12,6 +12,8 @@
> #define CTX_RING_START			(0x08 + 1)
> #define CTX_RING_CTL			(0x0a + 1)
> #define CTX_BB_PER_CTX_PTR		(0x12 + 1)
>+#define CTX_CS_INDIRECT_CTX		(0x14 + 1)
>+#define CTX_CS_INDIRECT_CTX_OFFSET	(0x16 + 1)
> #define CTX_TIMESTAMP			(0x22 + 1)
> #define CTX_TIMESTAMP_UDW		(0x24 + 1)
> #define CTX_INDIRECT_RING_STATE		(0x26 + 1)
>@@ -36,4 +38,7 @@
> #define INDIRECT_CTX_RING_START_UDW	(0x08 + 1)
> #define INDIRECT_CTX_RING_CTL		(0x0a + 1)
>
>+#define CTX_INDIRECT_OFFSET_MASK	REG_GENMASK(15, 6)
>+#define CTX_INDIRECT_CTX_OFFSET_DEFAULT	REG_FIELD_PREP(CTX_INDIRECT_OFFSET_MASK, 0xd)
>+
> #endif
>diff --git a/drivers/gpu/drm/xe/xe_lrc.c b/drivers/gpu/drm/xe/xe_lrc.c
>index 768cb9de48d6..fc5c6ff6e34a 100644
>--- a/drivers/gpu/drm/xe/xe_lrc.c
>+++ b/drivers/gpu/drm/xe/xe_lrc.c
>@@ -39,6 +39,7 @@
> #define LRC_ENGINE_INSTANCE			GENMASK_ULL(53, 48)
>
> #define LRC_PPHWSP_SIZE				SZ_4K
>+#define LRC_INDIRECT_CTX_BO_SIZE		SZ_4K
> #define LRC_INDIRECT_RING_STATE_SIZE		SZ_4K
> #define LRC_WA_BB_SIZE				SZ_4K
>
>@@ -48,6 +49,12 @@ lrc_to_xe(struct xe_lrc *lrc)
> 	return gt_to_xe(lrc->fence_ctx.gt);
> }
>
>+static bool
>+gt_engine_needs_indirect_ctx(struct xe_gt *gt, enum xe_engine_class class)
>+{
>+	return false;
>+}
>+
> size_t xe_gt_lrc_size(struct xe_gt *gt, enum xe_engine_class class)
> {
> 	struct xe_device *xe = gt_to_xe(gt);
>@@ -717,7 +724,18 @@ static u32 __xe_lrc_ctx_timestamp_udw_offset(struct xe_lrc *lrc)
>
> static inline u32 __xe_lrc_indirect_ring_offset(struct xe_lrc *lrc)
> {
>-	return xe_bo_size(lrc->bo) - LRC_WA_BB_SIZE - LRC_INDIRECT_RING_STATE_SIZE;
>+	u32 offset = xe_bo_size(lrc->bo) - LRC_WA_BB_SIZE -
>+		     LRC_INDIRECT_RING_STATE_SIZE;
>+
>+	if (lrc->flags & XE_LRC_FLAG_INDIRECT_CTX)
>+		offset -= LRC_INDIRECT_CTX_BO_SIZE;
>+
>+	return offset;
>+}
>+
>+static inline u32 __xe_lrc_indirect_ctx_offset(struct xe_lrc *lrc)
>+{
>+	return xe_bo_size(lrc->bo) - LRC_WA_BB_SIZE - LRC_INDIRECT_CTX_BO_SIZE;
> }
>
> static inline u32 __xe_lrc_wa_bb_offset(struct xe_lrc *lrc)
>@@ -1077,6 +1095,58 @@ static int setup_wa_bb(struct xe_lrc *lrc, struct xe_hw_engine *hwe)
> 	return 0;
> }
>
>+static int
>+setup_indirect_ctx(struct xe_lrc *lrc, struct xe_hw_engine *hwe)
>+{
>+	static struct bo_setup rcs_funcs[] = {
>+	};
>+	struct bo_setup_state state = {
>+		.lrc = lrc,
>+		.hwe = hwe,
>+		.max_size = (63 * 64) /* max 63 cachelines */,
>+		.offset = __xe_lrc_indirect_ctx_offset(lrc),
>+	};
>+	int ret;
>+
>+	if (!(lrc->flags & XE_LRC_FLAG_INDIRECT_CTX))
>+		return 0;
>+
>+	if (hwe->class == XE_ENGINE_CLASS_RENDER ||
>+	    hwe->class == XE_ENGINE_CLASS_COMPUTE) {
>+		state.funcs = rcs_funcs;
>+		state.num_funcs = ARRAY_SIZE(rcs_funcs);
>+	}
>+
>+	if (xe_gt_WARN_ON(lrc->gt, !state.funcs))
>+		return 0;
>+
>+	ret = setup_bo(&state);
>+	if (ret)
>+		return ret;
>+
>+	/*
>+	 * Align to 64B cacheline so there's no garbage at the end for CS to
>+	 * execute: size for indirect ctx must be a multiple of 64.
>+	 */
>+	while (state.written & 0xf) {
>+		*state.ptr++ = MI_NOOP;
>+		state.written++;
>+	}
>+
>+	finish_bo(&state);
>+
>+	xe_lrc_write_ctx_reg(lrc,
>+			     CTX_CS_INDIRECT_CTX,
>+			     (xe_bo_ggtt_addr(lrc->bo) + state.offset) |
>+			     /* Size in CLs. */
>+			     (state.written * sizeof(u32) / 64));
>+	xe_lrc_write_ctx_reg(lrc,
>+			     CTX_CS_INDIRECT_CTX_OFFSET,
>+			     CTX_INDIRECT_CTX_OFFSET_DEFAULT);
>+
>+	return 0;
>+}
>+
> #define PVC_CTX_ASID		(0x2e + 1)
> #define PVC_CTX_ACC_CTR_THOLD	(0x2a + 1)
>
>@@ -1086,7 +1156,7 @@ static int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe,
> {
> 	struct xe_gt *gt = hwe->gt;
> 	const u32 lrc_size = xe_gt_lrc_size(gt, hwe->class);
>-	const u32 bo_size = ring_size + lrc_size + LRC_WA_BB_SIZE;
>+	u32 bo_size = ring_size + lrc_size + LRC_WA_BB_SIZE;
> 	struct xe_tile *tile = gt_to_tile(gt);
> 	struct xe_device *xe = gt_to_xe(gt);
> 	struct iosys_map map;
>@@ -1101,6 +1171,12 @@ static int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe,
> 	lrc->flags = 0;
> 	lrc->ring.size = ring_size;
> 	lrc->ring.tail = 0;
>+
>+	if (gt_engine_needs_indirect_ctx(gt, hwe->class)) {
>+		lrc->flags |= XE_LRC_FLAG_INDIRECT_CTX;
>+		bo_size += LRC_INDIRECT_CTX_BO_SIZE;
>+	}
>+
> 	if (xe_gt_has_indirect_ring_state(gt))
> 		lrc->flags |= XE_LRC_FLAG_INDIRECT_RING_STATE;
>
>@@ -1225,6 +1301,10 @@ static int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe,
> 	if (err)
> 		goto err_lrc_finish;
>
>+	err = setup_indirect_ctx(lrc, hwe);
>+	if (err)
>+		goto err_lrc_finish;
>+
> 	return 0;
>
> err_lrc_finish:
>diff --git a/drivers/gpu/drm/xe/xe_lrc_types.h b/drivers/gpu/drm/xe/xe_lrc_types.h
>index 2c7c81079801..e9883706e004 100644
>--- a/drivers/gpu/drm/xe/xe_lrc_types.h
>+++ b/drivers/gpu/drm/xe/xe_lrc_types.h
>@@ -29,7 +29,8 @@ struct xe_lrc {
> 	struct xe_gt *gt;
>
> 	/** @flags: LRC flags */
>-#define XE_LRC_FLAG_INDIRECT_RING_STATE		0x1
>+#define XE_LRC_FLAG_INDIRECT_CTX		0x1
>+#define XE_LRC_FLAG_INDIRECT_RING_STATE		0x2
> 	u32 flags;
>
> 	/** @refcount: ref count of this lrc */
>-- 
>2.48.0
>


More information about the Intel-xe mailing list