[PATCH v4] drm/xe: Apply Wa_16023105232

Matt Roper matthew.d.roper at intel.com
Thu Mar 13 20:22:23 UTC 2025


On Thu, Mar 13, 2025 at 11:04:40AM -0700, Vinay Belgaumkar wrote:
> The WA requires KMD to disable DOP clock gating during a semaphore
> wait and also ensure that idle delay for every CS is lower than the
> idle wait time in the PWRCTX_MAXCNT register. Default values for these
> registers already comply with this restriction.
> 
> v2: Store timestamp_base in gt info and other comments (Daniele)
> v3: Skip WA check for VF
> v4: Review comments (Matt Roper)
> 
> Cc: Matt Roper <matthew.d.roper at intel.com>
> Reviewed-by: Daniele Ceraolo Spurio <daniele.ceraolospurio at intel.com>
> Signed-off-by: Vinay Belgaumkar <vinay.belgaumkar at intel.com>
> ---
>  drivers/gpu/drm/xe/regs/xe_engine_regs.h |  2 ++
>  drivers/gpu/drm/xe/xe_gt_clock.c         | 36 ++++++++++++++++--------
>  drivers/gpu/drm/xe/xe_gt_types.h         |  2 ++
>  drivers/gpu/drm/xe/xe_hw_engine.c        | 30 ++++++++++++++++++++
>  drivers/gpu/drm/xe/xe_wa.c               |  6 ++++
>  drivers/gpu/drm/xe/xe_wa_oob.rules       |  2 ++
>  6 files changed, 67 insertions(+), 11 deletions(-)
> 
> diff --git a/drivers/gpu/drm/xe/regs/xe_engine_regs.h b/drivers/gpu/drm/xe/regs/xe_engine_regs.h
> index 659cf85fa3d6..05658ad991f4 100644
> --- a/drivers/gpu/drm/xe/regs/xe_engine_regs.h
> +++ b/drivers/gpu/drm/xe/regs/xe_engine_regs.h
> @@ -130,6 +130,8 @@
>  #define RING_EXECLIST_STATUS_LO(base)		XE_REG((base) + 0x234)
>  #define RING_EXECLIST_STATUS_HI(base)		XE_REG((base) + 0x234 + 4)
>  
> +#define RING_IDLEDLY(base)			XE_REG((base) + 0x23c)
> +
>  #define RING_CONTEXT_CONTROL(base)		XE_REG((base) + 0x244, XE_REG_OPTION_MASKED)
>  #define	  CTX_CTRL_PXP_ENABLE			REG_BIT(10)
>  #define	  CTX_CTRL_OAC_CONTEXT_ENABLE		REG_BIT(8)
> diff --git a/drivers/gpu/drm/xe/xe_gt_clock.c b/drivers/gpu/drm/xe/xe_gt_clock.c
> index fca38738e610..f493b1687851 100644
> --- a/drivers/gpu/drm/xe/xe_gt_clock.c
> +++ b/drivers/gpu/drm/xe/xe_gt_clock.c
> @@ -16,27 +16,41 @@
>  #include "xe_macros.h"
>  #include "xe_mmio.h"
>  
> -static u32 get_crystal_clock_freq(u32 rpm_config_reg)
> +#define f19_2_mhz	19200000
> +#define f24_mhz		24000000
> +#define f25_mhz		25000000
> +#define f38_4_mhz	38400000
> +#define ts_base_83	83333
> +#define ts_base_52	52083
> +#define ts_base_80	80000
> +
> +static void read_crystal_clock(u32 rpm_config_reg, u32 *freq, u32 *timestamp_base)
>  {
> -	const u32 f19_2_mhz = 19200000;
> -	const u32 f24_mhz = 24000000;
> -	const u32 f25_mhz = 25000000;
> -	const u32 f38_4_mhz = 38400000;
>  	u32 crystal_clock = REG_FIELD_GET(RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_MASK,
>  					  rpm_config_reg);
>  
>  	switch (crystal_clock) {
>  	case RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_24_MHZ:
> -		return f24_mhz;
> +		*freq = f24_mhz;
> +		*timestamp_base = ts_base_83;
> +		return;
>  	case RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_19_2_MHZ:
> -		return f19_2_mhz;
> +		*freq = f19_2_mhz;
> +		*timestamp_base = ts_base_52;
> +		return;
>  	case RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_38_4_MHZ:
> -		return f38_4_mhz;
> +		*freq = f38_4_mhz;
> +		*timestamp_base = ts_base_52;
> +		return;
>  	case RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_25_MHZ:
> -		return f25_mhz;
> +		*freq = f25_mhz;
> +		*timestamp_base = ts_base_80;
> +		return;
>  	default:
>  		XE_WARN_ON("NOT_POSSIBLE");

While we're making changes here, it might be worth improving the warning
here to actually print out the value we got.  If we get bug reports and
dmesg just says "NOT_POSSIBLE" without telling us the bogus value it
received it makes it harder to debug what's going on.  If we pass the GT
parameter to this function, we could also use xe_gt_WARN_ON so that
we'll know which GT (primary vs media) it was that caused the problem.

> -		return 0;
> +		*freq = 0;
> +		*timestamp_base = 0;
> +		return;
>  	}
>  }
>  
> @@ -65,7 +79,7 @@ int xe_gt_clock_init(struct xe_gt *gt)
>  		check_ctc_mode(gt);
>  
>  	c0 = xe_mmio_read32(&gt->mmio, RPM_CONFIG0);
> -	freq = get_crystal_clock_freq(c0);
> +	read_crystal_clock(c0, &freq, &gt->info.timestamp_base);
>  
>  	/*
>  	 * Now figure out how the command stream's timestamp
> diff --git a/drivers/gpu/drm/xe/xe_gt_types.h b/drivers/gpu/drm/xe/xe_gt_types.h
> index e3cfb026ac88..7def0959da35 100644
> --- a/drivers/gpu/drm/xe/xe_gt_types.h
> +++ b/drivers/gpu/drm/xe/xe_gt_types.h
> @@ -121,6 +121,8 @@ struct xe_gt {
>  		enum xe_gt_type type;
>  		/** @info.reference_clock: clock frequency */
>  		u32 reference_clock;
> +		/** @info.timestamp_base: GT timestamp base */
> +		u32 timestamp_base;
>  		/**
>  		 * @info.engine_mask: mask of engines present on GT. Some of
>  		 * them may be reserved in runtime and not available for user.
> diff --git a/drivers/gpu/drm/xe/xe_hw_engine.c b/drivers/gpu/drm/xe/xe_hw_engine.c
> index 223b95de388c..4b8a54c16ac7 100644
> --- a/drivers/gpu/drm/xe/xe_hw_engine.c
> +++ b/drivers/gpu/drm/xe/xe_hw_engine.c
> @@ -8,7 +8,9 @@
>  #include <linux/nospec.h>
>  
>  #include <drm/drm_managed.h>
> +#include <drm/drm_print.h>
>  #include <uapi/drm/xe_drm.h>
> +#include <generated/xe_wa_oob.h>
>  
>  #include "regs/xe_engine_regs.h"
>  #include "regs/xe_gt_regs.h"
> @@ -21,6 +23,7 @@
>  #include "xe_gsc.h"
>  #include "xe_gt.h"
>  #include "xe_gt_ccs_mode.h"
> +#include "xe_gt_clock.h"
>  #include "xe_gt_printk.h"
>  #include "xe_gt_mcr.h"
>  #include "xe_gt_topology.h"
> @@ -564,6 +567,29 @@ static void hw_engine_init_early(struct xe_gt *gt, struct xe_hw_engine *hwe,
>  	xe_reg_whitelist_process_engine(hwe);
>  }
>  
> +static void adjust_idledly(struct xe_hw_engine *hwe)
> +{
> +	struct xe_gt *gt = hwe->gt;
> +	u32 idledly, maxcnt;
> +	u32 idledly_units_ps = 8 * gt->info.timestamp_base;
> +	u32 maxcnt_units_ns = 640;
> +
> +	if (XE_WA(gt, 16023105232)) {
> +		idledly = xe_mmio_read32(&gt->mmio, RING_IDLEDLY(hwe->mmio_base));
> +		maxcnt = xe_mmio_read32(&gt->mmio, RING_PWRCTX_MAXCNT(hwe->mmio_base));
> +
> +		idledly = DIV_ROUND_CLOSEST(idledly * idledly_units_ps, 1000);

I overlooked it on the previous revision, but it looks like we're using
the full contents of the IDLEDLY register here rather than just [20:0]
that hold the delay value.  In practice things will probably still work
out properly since all of the other bits except for 31 are reserved and
probably 0's (and if bit 31 is set, then we'll probably satisfy the >=
condition necessary to rewrite the register.  But if this workaround
gets extended to future platforms, and those platforms also add more
non-delay flag bits to the register it could mess up the logic.

So we probably want to use REG_FIELD_GET() to extract the value, and
reprogram the register if the IDLEDLY value is larger, or if bit[31] is
set (since that effectively means "infinite delay").

> +		maxcnt *= maxcnt_units_ns;

Similarly, we should probalby do a REG_FIELD_GET() on PWRCTX_MAXCNT too
just in case something else gets added to the higher bits of this
register in the future.


Matt

> +
> +		if (xe_gt_WARN_ON(gt, idledly >= maxcnt)) {
> +			idledly = DIV_ROUND_CLOSEST(((maxcnt - 1) * maxcnt_units_ns),
> +						    idledly_units_ps);
> +			idledly = DIV_ROUND_CLOSEST(idledly, 1000);
> +			xe_mmio_write32(&gt->mmio, RING_IDLEDLY(hwe->mmio_base), idledly);
> +		}
> +	}
> +}
> +
>  static int hw_engine_init(struct xe_gt *gt, struct xe_hw_engine *hwe,
>  			  enum xe_hw_engine_id id)
>  {
> @@ -604,6 +630,10 @@ static int hw_engine_init(struct xe_gt *gt, struct xe_hw_engine *hwe,
>  	if (xe->info.has_usm && hwe->class == XE_ENGINE_CLASS_COPY)
>  		gt->usm.reserved_bcs_instance = hwe->instance;
>  
> +	/* Ensure IDLEDLY is lower than MAXCNT */
> +	if (XE_WA(gt, 16023105232))
> +		adjust_idledly(hwe);
> +
>  	return devm_add_action_or_reset(xe->drm.dev, hw_engine_fini, hwe);
>  
>  err_hwsp:
> diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c
> index a25afb757f70..24f644c0a673 100644
> --- a/drivers/gpu/drm/xe/xe_wa.c
> +++ b/drivers/gpu/drm/xe/xe_wa.c
> @@ -622,6 +622,12 @@ static const struct xe_rtp_entry_sr engine_was[] = {
>  		       FUNC(xe_rtp_match_first_render_or_compute)),
>  	  XE_RTP_ACTIONS(SET(TDL_TSL_CHICKEN, RES_CHK_SPR_DIS))
>  	},
> +	{ XE_RTP_NAME("16023105232"),
> +	  XE_RTP_RULES(MEDIA_VERSION_RANGE(1301, 3000), OR,
> +		       GRAPHICS_VERSION_RANGE(2001, 3001)),
> +	  XE_RTP_ACTIONS(SET(RING_PSMI_CTL(0), RC_SEMA_IDLE_MSG_DISABLE,
> +			     XE_RTP_ACTION_FLAG(ENGINE_BASE)))
> +	},
>  };
>  
>  static const struct xe_rtp_entry_sr lrc_was[] = {
> diff --git a/drivers/gpu/drm/xe/xe_wa_oob.rules b/drivers/gpu/drm/xe/xe_wa_oob.rules
> index e0c5fa460487..0c738af24f7c 100644
> --- a/drivers/gpu/drm/xe/xe_wa_oob.rules
> +++ b/drivers/gpu/drm/xe/xe_wa_oob.rules
> @@ -53,3 +53,5 @@ no_media_l3	MEDIA_VERSION(3000)
>  		GRAPHICS_VERSION_RANGE(1270, 1274)
>  1508761755	GRAPHICS_VERSION(1255)
>  		GRAPHICS_VERSION(1260), GRAPHICS_STEP(A0, B0)
> +16023105232	GRAPHICS_VERSION_RANGE(2001, 3001)
> +		MEDIA_VERSION_RANGE(1301, 3000)
> -- 
> 2.38.1
> 

-- 
Matt Roper
Graphics Software Engineer
Linux GPU Platform Enablement
Intel Corporation


More information about the Intel-xe mailing list