[PATCH] drm/xe: Apply Wa_16023105232

Daniele Ceraolo Spurio daniele.ceraolospurio at intel.com
Thu Mar 6 01:50:48 UTC 2025



On 2/25/2025 7:29 PM, Vinay Belgaumkar wrote:
> The WA requires KMD to disable DOP clock gating during a semaphore
> wait and also ensure that idle delay for every CS is lower than the
> idle wait time in the PWRCTX_MAXCNT register. Default values for these
> registers already comply with this restriction.
>
> Signed-off-by: Vinay Belgaumkar <vinay.belgaumkar at intel.com>
> ---
>   drivers/gpu/drm/xe/regs/xe_engine_regs.h |  2 ++
>   drivers/gpu/drm/xe/xe_gt_clock.c         | 29 +++++++++++++++++++++++
>   drivers/gpu/drm/xe/xe_gt_clock.h         |  1 +
>   drivers/gpu/drm/xe/xe_hw_engine.c        | 30 ++++++++++++++++++++++++
>   drivers/gpu/drm/xe/xe_wa_oob.rules       |  2 ++
>   5 files changed, 64 insertions(+)
>
> diff --git a/drivers/gpu/drm/xe/regs/xe_engine_regs.h b/drivers/gpu/drm/xe/regs/xe_engine_regs.h
> index 4f372dc2cb89..067468c62adb 100644
> --- a/drivers/gpu/drm/xe/regs/xe_engine_regs.h
> +++ b/drivers/gpu/drm/xe/regs/xe_engine_regs.h
> @@ -130,6 +130,8 @@
>   #define RING_EXECLIST_STATUS_LO(base)		XE_REG((base) + 0x234)
>   #define RING_EXECLIST_STATUS_HI(base)		XE_REG((base) + 0x234 + 4)
>   
> +#define RING_IDLEDLY(base)			XE_REG((base) + 0x23C)
> +
>   #define RING_CONTEXT_CONTROL(base)		XE_REG((base) + 0x244, XE_REG_OPTION_MASKED)
>   #define	  CTX_CTRL_PXP_ENABLE			REG_BIT(10)
>   #define	  CTX_CTRL_OAC_CONTEXT_ENABLE		REG_BIT(8)
> diff --git a/drivers/gpu/drm/xe/xe_gt_clock.c b/drivers/gpu/drm/xe/xe_gt_clock.c
> index cc2ae159298e..88e3df76a885 100644
> --- a/drivers/gpu/drm/xe/xe_gt_clock.c
> +++ b/drivers/gpu/drm/xe/xe_gt_clock.c
> @@ -55,6 +55,35 @@ static u32 get_crystal_clock_freq(u32 rpm_config_reg)
>   	}
>   }
>   
> +/**
> + * xe_gt_timestamp_base - Return the GT timestamp base
> + *
> + * @gt: the &xe_gt
> + * Returns: time in nsec

Might be worth noting that this is a rounded value (though the rounding 
maxes out at ~0.4%, so not a big problem)

> + */
> +u32 xe_gt_timestamp_base(struct xe_gt *gt)
> +{
> +	const u32 ts_base_83 = 83333;
> +	const u32 ts_base_52 = 52083;
> +	const u32 ts_base_80 = 80000;
> +	u32 rpm_config0 = xe_mmio_read32(&gt->mmio, RPM_CONFIG0);
> +	u32 crystal_clock = REG_FIELD_GET(RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_MASK,
> +					  rpm_config0);
> +

The xe_gt_clock_init function is already reading this and saving it into 
gt->info.reference_clock (with some manipulation). Can we re-use it from 
there? Or can we calculate the timestamp base there and save it in gt->info?

> +	switch (crystal_clock) {
> +	case RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_24_MHZ:
> +		return DIV_ROUND_CLOSEST(ts_base_83, 1000);
> +	case RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_19_2_MHZ:
> +	case RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_38_4_MHZ:
> +		return DIV_ROUND_CLOSEST(ts_base_52, 1000);
> +	case RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_25_MHZ:
> +		return DIV_ROUND_CLOSEST(ts_base_80, 1000);
> +	default:
> +		XE_WARN_ON("NOT_POSSIBLE");
> +		return 0;
> +	}
> +}
> +
>   int xe_gt_clock_init(struct xe_gt *gt)
>   {
>   	u32 ctc_reg = xe_mmio_read32(&gt->mmio, CTC_MODE);
> diff --git a/drivers/gpu/drm/xe/xe_gt_clock.h b/drivers/gpu/drm/xe/xe_gt_clock.h
> index 3adeb7baaca4..b78665b0b924 100644
> --- a/drivers/gpu/drm/xe/xe_gt_clock.h
> +++ b/drivers/gpu/drm/xe/xe_gt_clock.h
> @@ -12,5 +12,6 @@ struct xe_gt;
>   
>   int xe_gt_clock_init(struct xe_gt *gt);
>   u64 xe_gt_clock_interval_to_ms(struct xe_gt *gt, u64 count);
> +u32 xe_gt_timestamp_base(struct xe_gt *gt);
>   
>   #endif
> diff --git a/drivers/gpu/drm/xe/xe_hw_engine.c b/drivers/gpu/drm/xe/xe_hw_engine.c
> index fc447751fe78..609ba19251bd 100644
> --- a/drivers/gpu/drm/xe/xe_hw_engine.c
> +++ b/drivers/gpu/drm/xe/xe_hw_engine.c
> @@ -8,7 +8,9 @@
>   #include <linux/nospec.h>
>   
>   #include <drm/drm_managed.h>
> +#include <drm/drm_print.h>
>   #include <uapi/drm/xe_drm.h>
> +#include <generated/xe_wa_oob.h>
>   
>   #include "regs/xe_engine_regs.h"
>   #include "regs/xe_gt_regs.h"
> @@ -21,6 +23,7 @@
>   #include "xe_gsc.h"
>   #include "xe_gt.h"
>   #include "xe_gt_ccs_mode.h"
> +#include "xe_gt_clock.h"
>   #include "xe_gt_printk.h"
>   #include "xe_gt_mcr.h"
>   #include "xe_gt_topology.h"
> @@ -459,6 +462,12 @@ hw_engine_setup_default_state(struct xe_hw_engine *hwe)
>   		  XE_RTP_ACTIONS(SET(CSFE_CHICKEN1(0), CS_PRIORITY_MEM_READ,
>   				     XE_RTP_ACTION_FLAG(ENGINE_BASE)))
>   		},
> +		{ XE_RTP_NAME("Disable DOP clk gating"),
> +		  XE_RTP_RULES(MEDIA_VERSION_RANGE(1301, 3000),
> +			       GRAPHICS_VERSION_RANGE(2001, 3001)),

XE_RTP_RULES() does an AND of the rules, while here it looks like you 
want an OR. Maybe use FUNC() helper?

> +		  XE_RTP_ACTIONS(SET(RING_PSMI_CTL(0), RC_SEMA_IDLE_MSG_DISABLE,
> +				     XE_RTP_ACTION_FLAG(ENGINE_BASE)))
> +		},
>   		{}
>   	};
>   
> @@ -566,6 +575,24 @@ static void hw_engine_init_early(struct xe_gt *gt, struct xe_hw_engine *hwe,
>   	xe_reg_whitelist_process_engine(hwe);
>   }
>   
> +static void check_idledly(struct xe_gt *gt, struct xe_hw_engine *hwe)
> +{
> +	u32 idledly, maxcnt;
> +	u32 idledly_units_ns = 8 * xe_gt_timestamp_base(gt);
> +	u32 maxcnt_units_ns = 640;
> +
> +	if (XE_WA(gt, 16023105232)) {
> +		idledly = xe_mmio_read32(&gt->mmio, RING_IDLEDLY(hwe->mmio_base));
> +		maxcnt = xe_mmio_read32(&gt->mmio, RING_PWRCTX_MAXCNT(hwe->mmio_base));
> +
> +		if (drm_WARN_ON(&gt_to_xe(gt)->drm,
> +				(idledly * idledly_units_ns) >= (maxcnt * maxcnt_units_ns))) {
> +			xe_mmio_write32(&gt->mmio, RING_IDLEDLY(hwe->mmio_base),
> +					((maxcnt - 1) * maxcnt_units_ns) / idledly_units_ns);
> +		}
> +	}
> +}
> +
>   static int hw_engine_init(struct xe_gt *gt, struct xe_hw_engine *hwe,
>   			  enum xe_hw_engine_id id)
>   {
> @@ -606,6 +633,9 @@ static int hw_engine_init(struct xe_gt *gt, struct xe_hw_engine *hwe,
>   	if (xe->info.has_usm && hwe->class == XE_ENGINE_CLASS_COPY)
>   		gt->usm.reserved_bcs_instance = hwe->instance;
>   
> +	/* Ensure IDLEDLY is lower than MAXCNT */
> +	check_idledly(gt, hwe);

Might be worth to add "wa" somewhere in this function name to make it 
clear it is workaround-related. Maybe just check_wa_16023105232?

Daniele

> +
>   	return devm_add_action_or_reset(xe->drm.dev, hw_engine_fini, hwe);
>   
>   err_hwsp:
> diff --git a/drivers/gpu/drm/xe/xe_wa_oob.rules b/drivers/gpu/drm/xe/xe_wa_oob.rules
> index 228436532282..b45ec8bd0735 100644
> --- a/drivers/gpu/drm/xe/xe_wa_oob.rules
> +++ b/drivers/gpu/drm/xe/xe_wa_oob.rules
> @@ -43,3 +43,5 @@
>   no_media_l3	MEDIA_VERSION(3000)
>   14022866841	GRAPHICS_VERSION(3000), GRAPHICS_STEP(A0, B0)
>   		MEDIA_VERSION(3000), MEDIA_STEP(A0, B0)
> +16023105232	GRAPHICS_VERSION_RANGE(2001, 3001)
> +		MEDIA_VERSION_RANGE(1301, 3000)



More information about the Intel-xe mailing list