[PATCH v5 6/7] drm/xe/xelp: Implement Wa_16010904313
Tvrtko Ursulin
tvrtko.ursulin at igalia.com
Thu Jun 26 08:57:05 UTC 2025
On 25/06/2025 22:47, Lucas De Marchi wrote:
> On Wed, Jun 25, 2025 at 04:36:31PM +0100, Tvrtko Ursulin wrote:
>> Add XeLP workaround 16010904313.
>>
>> The description calls for it to be emitted as the indirect context buffer
>> workaround for render and compute, and from the workaround batch buffer
>> for the other engines. Therefore we plug into the previously added
>> respective top level emission functions.
>>
>> The actual command streamer programming sequence differs from what is
>> described in the PRM, in that it assumes the listed LRCA offset was
>> supposed to actually refer to the location of the CTX_TIMESTAMP register
>> instead of LRCA + 0x180c (which is in GPR space). Latter appears to make
>> more sense under the assumption that multiple writes are helping with
>> restoring the CTX_TIMESTAMP register content from the saved context
>> state.
>
> humn... but i915, which is supposedly doing the right thing,
> diverges from the PRM by using LRM + LRR + LRR. Here you are doing
Matt reported i915 was doing the wrong thing actually, compared to what
was documented in the WA database at least. Not that the latter was
completely clear, but we had a brief guessing game and I understood the
conclusion was 3x LRM to forcefully restore CTX_TIMESTAMP is what made
most sense.
See
https://lore.kernel.org/intel-xe/20250523174135.GS5080@mdroper-desk1.amr.corp.intel.com/.
> something different from both. Does gputop show saner values after this?
Gputop looks okay to me. Are you saying this WA is fixing something is
regularly goes wrong so could be easily observed? I thought it was about
a rare failure to restore CTX_TIMESTAMP but that was just a guess to be
fair.
>> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin at igalia.com>
>> Cc: Lucas De Marchi <lucas.demarchi at intel.com>
>> Cc: Matt Roper <matthew.d.roper at intel.com>
>> ---
>> .../gpu/drm/xe/instructions/xe_mi_commands.h | 1 +
>> drivers/gpu/drm/xe/xe_lrc.c | 45 +++++++++++++++++++
>> drivers/gpu/drm/xe/xe_wa_oob.rules | 1 +
>> 3 files changed, 47 insertions(+)
>>
>> diff --git a/drivers/gpu/drm/xe/instructions/xe_mi_commands.h b/
>> drivers/gpu/drm/xe/instructions/xe_mi_commands.h
>> index e3f5e8bb3ebc..c47b290e0e9f 100644
>> --- a/drivers/gpu/drm/xe/instructions/xe_mi_commands.h
>> +++ b/drivers/gpu/drm/xe/instructions/xe_mi_commands.h
>> @@ -65,6 +65,7 @@
>>
>> #define MI_LOAD_REGISTER_MEM (__MI_INSTR(0x29) |
>> XE_INSTR_NUM_DW(4))
>> #define MI_LRM_USE_GGTT REG_BIT(22)
>> +#define MI_LRM_ASYNC REG_BIT(21)
>>
>> #define MI_LOAD_REGISTER_REG (__MI_INSTR(0x2a) |
>> XE_INSTR_NUM_DW(3))
>> #define MI_LRR_DST_CS_MMIO REG_BIT(19)
>> diff --git a/drivers/gpu/drm/xe/xe_lrc.c b/drivers/gpu/drm/xe/xe_lrc.c
>> index d4f558a863eb..f86a09ed6848 100644
>> --- a/drivers/gpu/drm/xe/xe_lrc.c
>> +++ b/drivers/gpu/drm/xe/xe_lrc.c
>> @@ -52,6 +52,11 @@ lrc_to_xe(struct xe_lrc *lrc)
>> static bool
>> gt_engine_needs_indirect_ctx(struct xe_gt *gt, enum xe_engine_class
>> class)
>> {
>> + if (XE_WA(gt, 16010904313) &&
>> + (class == XE_ENGINE_CLASS_RENDER ||
>> + class == XE_ENGINE_CLASS_COMPUTE))
>> + return true;
>> +
>> return false;
>> }
>>
>> @@ -990,6 +995,44 @@ static ssize_t wa_bb_setup_utilization(struct
>> xe_lrc *lrc, struct xe_hw_engine *
>> return cmd - batch;
>> }
>>
>> +static ssize_t
>> +xelp_emit_timestamp_wa(struct xe_lrc *lrc, struct xe_hw_engine *hwe,
>> + u32 *batch, size_t max_len)
>> +{
>> + const u32 ts_addr = __xe_lrc_ctx_timestamp_ggtt_addr(lrc);
>> + u32 *cmd = batch;
>> +
>> + if (!XE_WA(lrc->gt, 16010904313) ||
>> + !(hwe->class == XE_ENGINE_CLASS_RENDER ||
>> + hwe->class == XE_ENGINE_CLASS_COMPUTE ||
>> + hwe->class == XE_ENGINE_CLASS_COPY ||
>> + hwe->class == XE_ENGINE_CLASS_VIDEO_DECODE ||
>> + hwe->class == XE_ENGINE_CLASS_VIDEO_ENHANCE))
>> + return 0;
>> +
>> + if (xe_gt_WARN_ON(lrc->gt, max_len < 12))
>> + return -ENOSPC;
>> +
>> + *cmd++ = MI_LOAD_REGISTER_MEM | MI_LRM_USE_GGTT |
>> MI_LRI_LRM_CS_MMIO |
>> + MI_LRM_ASYNC;
>> + *cmd++ = RING_CTX_TIMESTAMP(0).addr;
>> + *cmd++ = ts_addr;
>> + *cmd++ = 0;
>> +
>> + *cmd++ = MI_LOAD_REGISTER_MEM | MI_LRM_USE_GGTT |
>> MI_LRI_LRM_CS_MMIO |
>> + MI_LRM_ASYNC;
>> + *cmd++ = RING_CTX_TIMESTAMP(0).addr;
>> + *cmd++ = ts_addr;
>> + *cmd++ = 0;
>> +
>> + *cmd++ = MI_LOAD_REGISTER_MEM | MI_LRM_USE_GGTT |
>> MI_LRI_LRM_CS_MMIO;
>> + *cmd++ = RING_CTX_TIMESTAMP(0).addr;
>> + *cmd++ = ts_addr;
>> + *cmd++ = 0;
>> +
>> + return cmd - batch;
>> +}
>> +
>> struct wa_bo_setup {
>> ssize_t (*setup)(struct xe_lrc *lrc, struct xe_hw_engine *hwe,
>> u32 *batch, size_t max_size);
>> @@ -1061,6 +1104,7 @@ static void finish_wa_bo(struct xe_lrc *lrc,
>> static int setup_wa_bb(struct xe_lrc *lrc, struct xe_hw_engine *hwe)
>> {
>> static const struct wa_bo_setup funcs[] = {
>> + { .setup = xelp_emit_timestamp_wa },
>> { .setup = wa_bb_setup_utilization },
>
> I think there's a mix of names here. It seems it would be better to name
> these like this:
>
> static const struct bo_setup funcs[] = {
> { .setup = bo_setup_timestamp_wa },
> { .setup = bo_setup_utilization },
> }
>
>
>> };
>> unsigned int offset = __xe_lrc_wa_bb_offset(lrc);
>> @@ -1087,6 +1131,7 @@ static int
>> setup_indirect_ctx(struct xe_lrc *lrc, struct xe_hw_engine *hwe)
>> {
>> static struct wa_bo_setup rcs_funcs[] = {
>> + { .setup = xelp_emit_timestamp_wa },
>> };
>
> here it would be:
>
> static struct bo_setup rcs_funcs[] = {
> { .setup = bo_setup_timestamp_wa },
> };
>
> or drop the bo_ prefix too and just use setup_xyz
No strong opinion from me - I can change it to either. Logic behind
xelp_ prefix was that it is a workaround which only applies to those
platforms.
Regards,
Tvrtko
>
>
> Lucas De Marchi
>
>> unsigned int offset, num_funcs, written = 0;
>> struct wa_bo_setup *funcs = NULL;
>> diff --git a/drivers/gpu/drm/xe/xe_wa_oob.rules b/drivers/gpu/drm/xe/
>> xe_wa_oob.rules
>> index 8c2aa48cb33a..51967489c564 100644
>> --- a/drivers/gpu/drm/xe/xe_wa_oob.rules
>> +++ b/drivers/gpu/drm/xe/xe_wa_oob.rules
>> @@ -1,4 +1,5 @@
>> 1607983814 GRAPHICS_VERSION_RANGE(1200, 1210)
>> +16010904313 GRAPHICS_VERSION_RANGE(1200, 1210)
>> 22012773006 GRAPHICS_VERSION_RANGE(1200, 1250)
>> 14014475959 GRAPHICS_VERSION_RANGE(1270, 1271), GRAPHICS_STEP(A0, B0)
>> PLATFORM(DG2)
>> --
>> 2.48.0
>>
More information about the Intel-xe
mailing list