[PATCH v5 6/7] drm/xe/xelp: Implement Wa_16010904313
Lucas De Marchi
lucas.demarchi at intel.com
Wed Jun 25 21:47:28 UTC 2025
On Wed, Jun 25, 2025 at 04:36:31PM +0100, Tvrtko Ursulin wrote:
>Add XeLP workaround 16010904313.
>
>The description calls for it to be emitted as the indirect context buffer
>workaround for render and compute, and from the workaround batch buffer
>for the other engines. Therefore we plug into the previously added
>respective top level emission functions.
>
>The actual command streamer programming sequence differs from what is
>described in the PRM, in that it assumes the listed LRCA offset was
>supposed to actually refer to the location of the CTX_TIMESTAMP register
>instead of LRCA + 0x180c (which is in GPR space). Latter appears to make
>more sense under the assumption that multiple writes are helping with
>restoring the CTX_TIMESTAMP register content from the saved context state.
humn... but i915, which is supposedly doing the right thing,
diverges from the PRM by using LRM + LRR + LRR. Here you are doing
something different from both. Does gputop show saner values after this?
>
>Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin at igalia.com>
>Cc: Lucas De Marchi <lucas.demarchi at intel.com>
>Cc: Matt Roper <matthew.d.roper at intel.com>
>---
> .../gpu/drm/xe/instructions/xe_mi_commands.h | 1 +
> drivers/gpu/drm/xe/xe_lrc.c | 45 +++++++++++++++++++
> drivers/gpu/drm/xe/xe_wa_oob.rules | 1 +
> 3 files changed, 47 insertions(+)
>
>diff --git a/drivers/gpu/drm/xe/instructions/xe_mi_commands.h b/drivers/gpu/drm/xe/instructions/xe_mi_commands.h
>index e3f5e8bb3ebc..c47b290e0e9f 100644
>--- a/drivers/gpu/drm/xe/instructions/xe_mi_commands.h
>+++ b/drivers/gpu/drm/xe/instructions/xe_mi_commands.h
>@@ -65,6 +65,7 @@
>
> #define MI_LOAD_REGISTER_MEM (__MI_INSTR(0x29) | XE_INSTR_NUM_DW(4))
> #define MI_LRM_USE_GGTT REG_BIT(22)
>+#define MI_LRM_ASYNC REG_BIT(21)
>
> #define MI_LOAD_REGISTER_REG (__MI_INSTR(0x2a) | XE_INSTR_NUM_DW(3))
> #define MI_LRR_DST_CS_MMIO REG_BIT(19)
>diff --git a/drivers/gpu/drm/xe/xe_lrc.c b/drivers/gpu/drm/xe/xe_lrc.c
>index d4f558a863eb..f86a09ed6848 100644
>--- a/drivers/gpu/drm/xe/xe_lrc.c
>+++ b/drivers/gpu/drm/xe/xe_lrc.c
>@@ -52,6 +52,11 @@ lrc_to_xe(struct xe_lrc *lrc)
> static bool
> gt_engine_needs_indirect_ctx(struct xe_gt *gt, enum xe_engine_class class)
> {
>+ if (XE_WA(gt, 16010904313) &&
>+ (class == XE_ENGINE_CLASS_RENDER ||
>+ class == XE_ENGINE_CLASS_COMPUTE))
>+ return true;
>+
> return false;
> }
>
>@@ -990,6 +995,44 @@ static ssize_t wa_bb_setup_utilization(struct xe_lrc *lrc, struct xe_hw_engine *
> return cmd - batch;
> }
>
>+static ssize_t
>+xelp_emit_timestamp_wa(struct xe_lrc *lrc, struct xe_hw_engine *hwe,
>+ u32 *batch, size_t max_len)
>+{
>+ const u32 ts_addr = __xe_lrc_ctx_timestamp_ggtt_addr(lrc);
>+ u32 *cmd = batch;
>+
>+ if (!XE_WA(lrc->gt, 16010904313) ||
>+ !(hwe->class == XE_ENGINE_CLASS_RENDER ||
>+ hwe->class == XE_ENGINE_CLASS_COMPUTE ||
>+ hwe->class == XE_ENGINE_CLASS_COPY ||
>+ hwe->class == XE_ENGINE_CLASS_VIDEO_DECODE ||
>+ hwe->class == XE_ENGINE_CLASS_VIDEO_ENHANCE))
>+ return 0;
>+
>+ if (xe_gt_WARN_ON(lrc->gt, max_len < 12))
>+ return -ENOSPC;
>+
>+ *cmd++ = MI_LOAD_REGISTER_MEM | MI_LRM_USE_GGTT | MI_LRI_LRM_CS_MMIO |
>+ MI_LRM_ASYNC;
>+ *cmd++ = RING_CTX_TIMESTAMP(0).addr;
>+ *cmd++ = ts_addr;
>+ *cmd++ = 0;
>+
>+ *cmd++ = MI_LOAD_REGISTER_MEM | MI_LRM_USE_GGTT | MI_LRI_LRM_CS_MMIO |
>+ MI_LRM_ASYNC;
>+ *cmd++ = RING_CTX_TIMESTAMP(0).addr;
>+ *cmd++ = ts_addr;
>+ *cmd++ = 0;
>+
>+ *cmd++ = MI_LOAD_REGISTER_MEM | MI_LRM_USE_GGTT | MI_LRI_LRM_CS_MMIO;
>+ *cmd++ = RING_CTX_TIMESTAMP(0).addr;
>+ *cmd++ = ts_addr;
>+ *cmd++ = 0;
>+
>+ return cmd - batch;
>+}
>+
> struct wa_bo_setup {
> ssize_t (*setup)(struct xe_lrc *lrc, struct xe_hw_engine *hwe,
> u32 *batch, size_t max_size);
>@@ -1061,6 +1104,7 @@ static void finish_wa_bo(struct xe_lrc *lrc,
> static int setup_wa_bb(struct xe_lrc *lrc, struct xe_hw_engine *hwe)
> {
> static const struct wa_bo_setup funcs[] = {
>+ { .setup = xelp_emit_timestamp_wa },
> { .setup = wa_bb_setup_utilization },
I think there's a mix of names here. It seems it would be better to name
these like this:
static const struct bo_setup funcs[] = {
{ .setup = bo_setup_timestamp_wa },
{ .setup = bo_setup_utilization },
}
> };
> unsigned int offset = __xe_lrc_wa_bb_offset(lrc);
>@@ -1087,6 +1131,7 @@ static int
> setup_indirect_ctx(struct xe_lrc *lrc, struct xe_hw_engine *hwe)
> {
> static struct wa_bo_setup rcs_funcs[] = {
>+ { .setup = xelp_emit_timestamp_wa },
> };
here it would be:
static struct bo_setup rcs_funcs[] = {
{ .setup = bo_setup_timestamp_wa },
};
or drop the bo_ prefix too and just use setup_xyz
Lucas De Marchi
> unsigned int offset, num_funcs, written = 0;
> struct wa_bo_setup *funcs = NULL;
>diff --git a/drivers/gpu/drm/xe/xe_wa_oob.rules b/drivers/gpu/drm/xe/xe_wa_oob.rules
>index 8c2aa48cb33a..51967489c564 100644
>--- a/drivers/gpu/drm/xe/xe_wa_oob.rules
>+++ b/drivers/gpu/drm/xe/xe_wa_oob.rules
>@@ -1,4 +1,5 @@
> 1607983814 GRAPHICS_VERSION_RANGE(1200, 1210)
>+16010904313 GRAPHICS_VERSION_RANGE(1200, 1210)
> 22012773006 GRAPHICS_VERSION_RANGE(1200, 1250)
> 14014475959 GRAPHICS_VERSION_RANGE(1270, 1271), GRAPHICS_STEP(A0, B0)
> PLATFORM(DG2)
>--
>2.48.0
>
More information about the Intel-xe
mailing list