[PATCH v4] drm/xe: Apply Wa_16023105232

Belgaumkar, Vinay vinay.belgaumkar at intel.com
Fri Mar 14 19:53:40 UTC 2025


On 3/13/2025 1:22 PM, Matt Roper wrote:
> On Thu, Mar 13, 2025 at 11:04:40AM -0700, Vinay Belgaumkar wrote:
>> The WA requires KMD to disable DOP clock gating during a semaphore
>> wait and also ensure that idle delay for every CS is lower than the
>> idle wait time in the PWRCTX_MAXCNT register. Default values for these
>> registers already comply with this restriction.
>>
>> v2: Store timestamp_base in gt info and other comments (Daniele)
>> v3: Skip WA check for VF
>> v4: Review comments (Matt Roper)
>>
>> Cc: Matt Roper <matthew.d.roper at intel.com>
>> Reviewed-by: Daniele Ceraolo Spurio <daniele.ceraolospurio at intel.com>
>> Signed-off-by: Vinay Belgaumkar <vinay.belgaumkar at intel.com>
>> ---
>>   drivers/gpu/drm/xe/regs/xe_engine_regs.h |  2 ++
>>   drivers/gpu/drm/xe/xe_gt_clock.c         | 36 ++++++++++++++++--------
>>   drivers/gpu/drm/xe/xe_gt_types.h         |  2 ++
>>   drivers/gpu/drm/xe/xe_hw_engine.c        | 30 ++++++++++++++++++++
>>   drivers/gpu/drm/xe/xe_wa.c               |  6 ++++
>>   drivers/gpu/drm/xe/xe_wa_oob.rules       |  2 ++
>>   6 files changed, 67 insertions(+), 11 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/xe/regs/xe_engine_regs.h b/drivers/gpu/drm/xe/regs/xe_engine_regs.h
>> index 659cf85fa3d6..05658ad991f4 100644
>> --- a/drivers/gpu/drm/xe/regs/xe_engine_regs.h
>> +++ b/drivers/gpu/drm/xe/regs/xe_engine_regs.h
>> @@ -130,6 +130,8 @@
>>   #define RING_EXECLIST_STATUS_LO(base)		XE_REG((base) + 0x234)
>>   #define RING_EXECLIST_STATUS_HI(base)		XE_REG((base) + 0x234 + 4)
>>   
>> +#define RING_IDLEDLY(base)			XE_REG((base) + 0x23c)
>> +
>>   #define RING_CONTEXT_CONTROL(base)		XE_REG((base) + 0x244, XE_REG_OPTION_MASKED)
>>   #define	  CTX_CTRL_PXP_ENABLE			REG_BIT(10)
>>   #define	  CTX_CTRL_OAC_CONTEXT_ENABLE		REG_BIT(8)
>> diff --git a/drivers/gpu/drm/xe/xe_gt_clock.c b/drivers/gpu/drm/xe/xe_gt_clock.c
>> index fca38738e610..f493b1687851 100644
>> --- a/drivers/gpu/drm/xe/xe_gt_clock.c
>> +++ b/drivers/gpu/drm/xe/xe_gt_clock.c
>> @@ -16,27 +16,41 @@
>>   #include "xe_macros.h"
>>   #include "xe_mmio.h"
>>   
>> -static u32 get_crystal_clock_freq(u32 rpm_config_reg)
>> +#define f19_2_mhz	19200000
>> +#define f24_mhz		24000000
>> +#define f25_mhz		25000000
>> +#define f38_4_mhz	38400000
>> +#define ts_base_83	83333
>> +#define ts_base_52	52083
>> +#define ts_base_80	80000
>> +
>> +static void read_crystal_clock(u32 rpm_config_reg, u32 *freq, u32 *timestamp_base)
>>   {
>> -	const u32 f19_2_mhz = 19200000;
>> -	const u32 f24_mhz = 24000000;
>> -	const u32 f25_mhz = 25000000;
>> -	const u32 f38_4_mhz = 38400000;
>>   	u32 crystal_clock = REG_FIELD_GET(RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_MASK,
>>   					  rpm_config_reg);
>>   
>>   	switch (crystal_clock) {
>>   	case RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_24_MHZ:
>> -		return f24_mhz;
>> +		*freq = f24_mhz;
>> +		*timestamp_base = ts_base_83;
>> +		return;
>>   	case RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_19_2_MHZ:
>> -		return f19_2_mhz;
>> +		*freq = f19_2_mhz;
>> +		*timestamp_base = ts_base_52;
>> +		return;
>>   	case RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_38_4_MHZ:
>> -		return f38_4_mhz;
>> +		*freq = f38_4_mhz;
>> +		*timestamp_base = ts_base_52;
>> +		return;
>>   	case RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_25_MHZ:
>> -		return f25_mhz;
>> +		*freq = f25_mhz;
>> +		*timestamp_base = ts_base_80;
>> +		return;
>>   	default:
>>   		XE_WARN_ON("NOT_POSSIBLE");
> While we're making changes here, it might be worth improving the warning
> here to actually print out the value we got.  If we get bug reports and
> dmesg just says "NOT_POSSIBLE" without telling us the bogus value it
> received it makes it harder to debug what's going on.  If we pass the GT
> parameter to this function, we could also use xe_gt_WARN_ON so that
> we'll know which GT (primary vs media) it was that caused the problem.
ok.
>
>> -		return 0;
>> +		*freq = 0;
>> +		*timestamp_base = 0;
>> +		return;
>>   	}
>>   }
>>   
>> @@ -65,7 +79,7 @@ int xe_gt_clock_init(struct xe_gt *gt)
>>   		check_ctc_mode(gt);
>>   
>>   	c0 = xe_mmio_read32(&gt->mmio, RPM_CONFIG0);
>> -	freq = get_crystal_clock_freq(c0);
>> +	read_crystal_clock(c0, &freq, &gt->info.timestamp_base);
>>   
>>   	/*
>>   	 * Now figure out how the command stream's timestamp
>> diff --git a/drivers/gpu/drm/xe/xe_gt_types.h b/drivers/gpu/drm/xe/xe_gt_types.h
>> index e3cfb026ac88..7def0959da35 100644
>> --- a/drivers/gpu/drm/xe/xe_gt_types.h
>> +++ b/drivers/gpu/drm/xe/xe_gt_types.h
>> @@ -121,6 +121,8 @@ struct xe_gt {
>>   		enum xe_gt_type type;
>>   		/** @info.reference_clock: clock frequency */
>>   		u32 reference_clock;
>> +		/** @info.timestamp_base: GT timestamp base */
>> +		u32 timestamp_base;
>>   		/**
>>   		 * @info.engine_mask: mask of engines present on GT. Some of
>>   		 * them may be reserved in runtime and not available for user.
>> diff --git a/drivers/gpu/drm/xe/xe_hw_engine.c b/drivers/gpu/drm/xe/xe_hw_engine.c
>> index 223b95de388c..4b8a54c16ac7 100644
>> --- a/drivers/gpu/drm/xe/xe_hw_engine.c
>> +++ b/drivers/gpu/drm/xe/xe_hw_engine.c
>> @@ -8,7 +8,9 @@
>>   #include <linux/nospec.h>
>>   
>>   #include <drm/drm_managed.h>
>> +#include <drm/drm_print.h>
>>   #include <uapi/drm/xe_drm.h>
>> +#include <generated/xe_wa_oob.h>
>>   
>>   #include "regs/xe_engine_regs.h"
>>   #include "regs/xe_gt_regs.h"
>> @@ -21,6 +23,7 @@
>>   #include "xe_gsc.h"
>>   #include "xe_gt.h"
>>   #include "xe_gt_ccs_mode.h"
>> +#include "xe_gt_clock.h"
>>   #include "xe_gt_printk.h"
>>   #include "xe_gt_mcr.h"
>>   #include "xe_gt_topology.h"
>> @@ -564,6 +567,29 @@ static void hw_engine_init_early(struct xe_gt *gt, struct xe_hw_engine *hwe,
>>   	xe_reg_whitelist_process_engine(hwe);
>>   }
>>   
>> +static void adjust_idledly(struct xe_hw_engine *hwe)
>> +{
>> +	struct xe_gt *gt = hwe->gt;
>> +	u32 idledly, maxcnt;
>> +	u32 idledly_units_ps = 8 * gt->info.timestamp_base;
>> +	u32 maxcnt_units_ns = 640;
>> +
>> +	if (XE_WA(gt, 16023105232)) {
>> +		idledly = xe_mmio_read32(&gt->mmio, RING_IDLEDLY(hwe->mmio_base));
>> +		maxcnt = xe_mmio_read32(&gt->mmio, RING_PWRCTX_MAXCNT(hwe->mmio_base));
>> +
>> +		idledly = DIV_ROUND_CLOSEST(idledly * idledly_units_ps, 1000);
> I overlooked it on the previous revision, but it looks like we're using
> the full contents of the IDLEDLY register here rather than just [20:0]
> that hold the delay value.  In practice things will probably still work
> out properly since all of the other bits except for 31 are reserved and
> probably 0's (and if bit 31 is set, then we'll probably satisfy the >=
> condition necessary to rewrite the register.  But if this workaround
> gets extended to future platforms, and those platforms also add more
> non-delay flag bits to the register it could mess up the logic.
>
> So we probably want to use REG_FIELD_GET() to extract the value, and
> reprogram the register if the IDLEDLY value is larger, or if bit[31] is
> set (since that effectively means "infinite delay").
ok.
>
>> +		maxcnt *= maxcnt_units_ns;
> Similarly, we should probalby do a REG_FIELD_GET() on PWRCTX_MAXCNT too
> just in case something else gets added to the higher bits of this
> register in the future.

Ok, thanks,

Vinay.

>
>
> Matt
>
>> +
>> +		if (xe_gt_WARN_ON(gt, idledly >= maxcnt)) {
>> +			idledly = DIV_ROUND_CLOSEST(((maxcnt - 1) * maxcnt_units_ns),
>> +						    idledly_units_ps);
>> +			idledly = DIV_ROUND_CLOSEST(idledly, 1000);
>> +			xe_mmio_write32(&gt->mmio, RING_IDLEDLY(hwe->mmio_base), idledly);
>> +		}
>> +	}
>> +}
>> +
>>   static int hw_engine_init(struct xe_gt *gt, struct xe_hw_engine *hwe,
>>   			  enum xe_hw_engine_id id)
>>   {
>> @@ -604,6 +630,10 @@ static int hw_engine_init(struct xe_gt *gt, struct xe_hw_engine *hwe,
>>   	if (xe->info.has_usm && hwe->class == XE_ENGINE_CLASS_COPY)
>>   		gt->usm.reserved_bcs_instance = hwe->instance;
>>   
>> +	/* Ensure IDLEDLY is lower than MAXCNT */
>> +	if (XE_WA(gt, 16023105232))
>> +		adjust_idledly(hwe);
>> +
>>   	return devm_add_action_or_reset(xe->drm.dev, hw_engine_fini, hwe);
>>   
>>   err_hwsp:
>> diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c
>> index a25afb757f70..24f644c0a673 100644
>> --- a/drivers/gpu/drm/xe/xe_wa.c
>> +++ b/drivers/gpu/drm/xe/xe_wa.c
>> @@ -622,6 +622,12 @@ static const struct xe_rtp_entry_sr engine_was[] = {
>>   		       FUNC(xe_rtp_match_first_render_or_compute)),
>>   	  XE_RTP_ACTIONS(SET(TDL_TSL_CHICKEN, RES_CHK_SPR_DIS))
>>   	},
>> +	{ XE_RTP_NAME("16023105232"),
>> +	  XE_RTP_RULES(MEDIA_VERSION_RANGE(1301, 3000), OR,
>> +		       GRAPHICS_VERSION_RANGE(2001, 3001)),
>> +	  XE_RTP_ACTIONS(SET(RING_PSMI_CTL(0), RC_SEMA_IDLE_MSG_DISABLE,
>> +			     XE_RTP_ACTION_FLAG(ENGINE_BASE)))
>> +	},
>>   };
>>   
>>   static const struct xe_rtp_entry_sr lrc_was[] = {
>> diff --git a/drivers/gpu/drm/xe/xe_wa_oob.rules b/drivers/gpu/drm/xe/xe_wa_oob.rules
>> index e0c5fa460487..0c738af24f7c 100644
>> --- a/drivers/gpu/drm/xe/xe_wa_oob.rules
>> +++ b/drivers/gpu/drm/xe/xe_wa_oob.rules
>> @@ -53,3 +53,5 @@ no_media_l3	MEDIA_VERSION(3000)
>>   		GRAPHICS_VERSION_RANGE(1270, 1274)
>>   1508761755	GRAPHICS_VERSION(1255)
>>   		GRAPHICS_VERSION(1260), GRAPHICS_STEP(A0, B0)
>> +16023105232	GRAPHICS_VERSION_RANGE(2001, 3001)
>> +		MEDIA_VERSION_RANGE(1301, 3000)
>> -- 
>> 2.38.1
>>


More information about the Intel-xe mailing list