[PATCH v3] drm/xe: Apply Wa_16023105232

Belgaumkar, Vinay vinay.belgaumkar at intel.com
Wed Mar 12 01:41:08 UTC 2025


On 3/11/2025 2:33 PM, Matt Roper wrote:
> On Mon, Mar 10, 2025 at 11:55:52AM -0700, Vinay Belgaumkar wrote:
>> The WA requires KMD to disable DOP clock gating during a semaphore
>> wait and also ensure that idle delay for every CS is lower than the
>> idle wait time in the PWRCTX_MAXCNT register. Default values for these
>> registers already comply with this restriction.
> I'm not sure about the default value statement here.  For 0xD00, the
> default value is already correct for Xe3, but it is _not_ correct for
> Xe2.  For the delay values, the default for MAXCNT is ~6us, but there's
> no default value documented for IDLEDLY, so we can't really say whether
> it will be higher or lower by default (and it may even vary by platform,
> SKU, or firmware version).
>
>> v2: Store timestamp_base in gt info and other comments (Daniele)
>> v3: Skip WA check for VF
>>
>> Reviewed-by: Daniele Ceraolo Spurio <daniele.ceraolospurio at intel.com>
>> Signed-off-by: Vinay Belgaumkar <vinay.belgaumkar at intel.com>
>> ---
>>   drivers/gpu/drm/xe/regs/xe_engine_regs.h |  2 ++
>>   drivers/gpu/drm/xe/xe_gt_clock.c         | 38 +++++++++++++++++++++---
>>   drivers/gpu/drm/xe/xe_gt_types.h         |  2 ++
>>   drivers/gpu/drm/xe/xe_hw_engine.c        | 31 +++++++++++++++++++
>>   drivers/gpu/drm/xe/xe_wa_oob.rules       |  2 ++
>>   5 files changed, 71 insertions(+), 4 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/xe/regs/xe_engine_regs.h b/drivers/gpu/drm/xe/regs/xe_engine_regs.h
>> index 4f372dc2cb89..067468c62adb 100644
>> --- a/drivers/gpu/drm/xe/regs/xe_engine_regs.h
>> +++ b/drivers/gpu/drm/xe/regs/xe_engine_regs.h
>> @@ -130,6 +130,8 @@
>>   #define RING_EXECLIST_STATUS_LO(base)		XE_REG((base) + 0x234)
>>   #define RING_EXECLIST_STATUS_HI(base)		XE_REG((base) + 0x234 + 4)
>>   
>> +#define RING_IDLEDLY(base)			XE_REG((base) + 0x23C)
> Nitpick: for consistency with other definitions we should use lowercase
> hex (0x23c).
ok.
>
>> +
>>   #define RING_CONTEXT_CONTROL(base)		XE_REG((base) + 0x244, XE_REG_OPTION_MASKED)
>>   #define	  CTX_CTRL_PXP_ENABLE			REG_BIT(10)
>>   #define	  CTX_CTRL_OAC_CONTEXT_ENABLE		REG_BIT(8)
>> diff --git a/drivers/gpu/drm/xe/xe_gt_clock.c b/drivers/gpu/drm/xe/xe_gt_clock.c
>> index 2a958c92d8ea..0394292e777e 100644
>> --- a/drivers/gpu/drm/xe/xe_gt_clock.c
>> +++ b/drivers/gpu/drm/xe/xe_gt_clock.c
>> @@ -16,12 +16,13 @@
>>   #include "xe_macros.h"
>>   #include "xe_mmio.h"
>>   
>> +#define f19_2_mhz	19200000
>> +#define f24_mhz		24000000
>> +#define f25_mhz		25000000
>> +#define f38_4_mhz	38400000
>> +
>>   static u32 get_crystal_clock_freq(u32 rpm_config_reg)
>>   {
>> -	const u32 f19_2_mhz = 19200000;
>> -	const u32 f24_mhz = 24000000;
>> -	const u32 f25_mhz = 25000000;
>> -	const u32 f38_4_mhz = 38400000;
>>   	u32 crystal_clock = REG_FIELD_GET(RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_MASK,
>>   					  rpm_config_reg);
>>   
>> @@ -40,10 +41,37 @@ static u32 get_crystal_clock_freq(u32 rpm_config_reg)
>>   	}
>>   }
>>   
>> +/**
>> + * xe_gt_timestamp_base - Return the GT timestamp base
>> + *
>> + * @crystal_clock_freq: clock freq from 0xd00 register
>> + * Returns: rounded down time in nsec
>> + */
>> +static u32 xe_gt_timestamp_base(u32 crystal_clock_freq)
>> +{
>> +	const u32 ts_base_83 = 83333;
>> +	const u32 ts_base_52 = 52083;
>> +	const u32 ts_base_80 = 80000;
>> +
>> +	switch (crystal_clock_freq) {
>> +	case f24_mhz:
>> +		return DIV_ROUND_CLOSEST(ts_base_83, 1000);
>> +	case f19_2_mhz:
>> +	case f38_4_mhz:
>> +		return DIV_ROUND_CLOSEST(ts_base_52, 1000);
>> +	case f25_mhz:
>> +		return DIV_ROUND_CLOSEST(ts_base_80, 1000);
> Do we want to truncate these values down to full nanoseconds here?  It
> seems like that might cause a small amount of unnecessary rounding error
> when we later use the value in multiplication.  Maybe it would be better
> to leave this in picoseconds for now, and only convert to nanoseconds
> later after we're done doing the math?
ok.
>
>> +	default:
>> +		XE_WARN_ON("NOT_POSSIBLE");
>> +		return 0;
>> +	}
>> +}
>> +
>>   int xe_gt_clock_init(struct xe_gt *gt)
>>   {
>>   	u32 c0 = xe_mmio_read32(&gt->mmio, RPM_CONFIG0);
>>   	u32 freq = 0;
>> +	u32 timestamp_base = 0;
>>   
>>   	/*
>>   	 * CTC_MODE[0] = 1 is definitely not supported for Xe2 and later
>> @@ -59,6 +87,7 @@ int xe_gt_clock_init(struct xe_gt *gt)
>>   		xe_gt_warn(gt, "CTC_MODE[0] is set; this is unexpected and undocumented\n");
>>   
>>   	freq = get_crystal_clock_freq(c0);
>> +	timestamp_base = xe_gt_timestamp_base(freq);
> Would it be simpler to combine these two functions that both read from
> 0xd00 into a single call that returns both values from the table on
> bspec 72056?  E.g.,
>
>          read_crystal_clock(c0, &freq, &gt->info.timestamp_base);
ok, makes sense.
>
>>   
>>   	/*
>>   	 * Now figure out how the command stream's timestamp
>> @@ -68,6 +97,7 @@ int xe_gt_clock_init(struct xe_gt *gt)
>>   	freq >>= 3 - REG_FIELD_GET(RPM_CONFIG0_CTC_SHIFT_PARAMETER_MASK, c0);
>>   
>>   	gt->info.reference_clock = freq;
>> +	gt->info.timestamp_base = timestamp_base;
>>   	return 0;
>>   }
>>   
>> diff --git a/drivers/gpu/drm/xe/xe_gt_types.h b/drivers/gpu/drm/xe/xe_gt_types.h
>> index e3cfb026ac88..7def0959da35 100644
>> --- a/drivers/gpu/drm/xe/xe_gt_types.h
>> +++ b/drivers/gpu/drm/xe/xe_gt_types.h
>> @@ -121,6 +121,8 @@ struct xe_gt {
>>   		enum xe_gt_type type;
>>   		/** @info.reference_clock: clock frequency */
>>   		u32 reference_clock;
>> +		/** @info.timestamp_base: GT timestamp base */
>> +		u32 timestamp_base;
>>   		/**
>>   		 * @info.engine_mask: mask of engines present on GT. Some of
>>   		 * them may be reserved in runtime and not available for user.
>> diff --git a/drivers/gpu/drm/xe/xe_hw_engine.c b/drivers/gpu/drm/xe/xe_hw_engine.c
>> index 223b95de388c..47dfc403b71d 100644
>> --- a/drivers/gpu/drm/xe/xe_hw_engine.c
>> +++ b/drivers/gpu/drm/xe/xe_hw_engine.c
>> @@ -8,7 +8,9 @@
>>   #include <linux/nospec.h>
>>   
>>   #include <drm/drm_managed.h>
>> +#include <drm/drm_print.h>
>>   #include <uapi/drm/xe_drm.h>
>> +#include <generated/xe_wa_oob.h>
>>   
>>   #include "regs/xe_engine_regs.h"
>>   #include "regs/xe_gt_regs.h"
>> @@ -21,6 +23,7 @@
>>   #include "xe_gsc.h"
>>   #include "xe_gt.h"
>>   #include "xe_gt_ccs_mode.h"
>> +#include "xe_gt_clock.h"
>>   #include "xe_gt_printk.h"
>>   #include "xe_gt_mcr.h"
>>   #include "xe_gt_topology.h"
>> @@ -458,6 +461,12 @@ hw_engine_setup_default_state(struct xe_hw_engine *hwe)
>>   		  XE_RTP_ACTIONS(SET(CSFE_CHICKEN1(0), CS_PRIORITY_MEM_READ,
>>   				     XE_RTP_ACTION_FLAG(ENGINE_BASE)))
>>   		},
>> +		{ XE_RTP_NAME("Disable DOP clk gating"),
>> +		  XE_RTP_RULES(MEDIA_VERSION_RANGE(1301, 3000), OR,
>> +			       GRAPHICS_VERSION_RANGE(2001, 3001)),
>> +		  XE_RTP_ACTIONS(SET(RING_PSMI_CTL(0), RC_SEMA_IDLE_MSG_DISABLE,
>> +				     XE_RTP_ACTION_FLAG(ENGINE_BASE)))
>> +		},
> Since this is part of an actual workaround we should put this in xe_wa.c
> rather than here.  Also, this is only needed on Xe2 platforms; on Xe3
> the register already has the necessary value as long as we don't touch
> it.
Ok, will move it. The recommendation was to set it in Xe3 as well (event 
though default is 1), to maintain consistency.
>
>>   	};
>>   
>>   	xe_rtp_process_to_sr(&ctx, engine_entries, ARRAY_SIZE(engine_entries), &hwe->reg_sr);
>> @@ -564,6 +573,24 @@ static void hw_engine_init_early(struct xe_gt *gt, struct xe_hw_engine *hwe,
>>   	xe_reg_whitelist_process_engine(hwe);
>>   }
>>   
>> +static void check_wa_16023105232(struct xe_gt *gt, struct xe_hw_engine *hwe)
> "check" makes me think this is going to be a function that returns
> true/false and has no side effects, but that's not the case here.  It
> would be better to name this function for the action it's actually
> taking, e.g., "adjust_idledly" or "set_idledy_lower_than_maxcnt."  Also
> we should just be able to pass just the hwe as a parameter since we can
> obtain the GT from that.
ok.
>
>
>> +{
>> +	u32 idledly, maxcnt;
>> +	u32 idledly_units_ns = 8 * gt->info.timestamp_base;
>> +	u32 maxcnt_units_ns = 640;
>> +
>> +	if (XE_WA(gt, 16023105232)) {
>> +		idledly = xe_mmio_read32(&gt->mmio, RING_IDLEDLY(hwe->mmio_base));
>> +		maxcnt = xe_mmio_read32(&gt->mmio, RING_PWRCTX_MAXCNT(hwe->mmio_base));
>> +
>> +		if (drm_WARN_ON(&gt_to_xe(gt)->drm,
> We can use xe_gt_WARN_ON() for better GT-specific output.  But why do we
> need a warning at all?  This is basically what this workaround is asking
> us to do --- reprogram IDLEDLY such that its value is lower than
> MAXCNT's --- so I'd expect there to be legitimate cases where this isn't
> true before we apply the workaround, otherwise they wouldn't have needed
> to create the workaround.
The idledly is expected to be 0 and untouched in most platforms. If 
somebody programs this during boot, they should know about this 
restriction. The WARN will flag it up as such.
>
>> +				(idledly * idledly_units_ns) >= (maxcnt * maxcnt_units_ns))) {
>> +			xe_mmio_write32(&gt->mmio, RING_IDLEDLY(hwe->mmio_base),
>> +					((maxcnt - 1) * maxcnt_units_ns) / idledly_units_ns);
>> +		}
>> +	}
>> +}
>> +
>>   static int hw_engine_init(struct xe_gt *gt, struct xe_hw_engine *hwe,
>>   			  enum xe_hw_engine_id id)
>>   {
>> @@ -604,6 +631,10 @@ static int hw_engine_init(struct xe_gt *gt, struct xe_hw_engine *hwe,
>>   	if (xe->info.has_usm && hwe->class == XE_ENGINE_CLASS_COPY)
>>   		gt->usm.reserved_bcs_instance = hwe->instance;
>>   
>> +	/* Ensure IDLEDLY is lower than MAXCNT */
>> +	if (!IS_SRIOV_VF(xe))
>> +		check_wa_16023105232(gt, hwe);
> As noted above, we can adjust the name of the function to indicate what
> it actually does, and we can also move the XE_WA check to here:
>
>          if (XE_WA(gt, 16023105232))
>                  adjust_idledly(hwe);
>
> I believe the not-VF check should already be handled by the XE_WA check.

ok.

Thanks,

Vinay.

>
>
> Matt
>
>> +
>>   	return devm_add_action_or_reset(xe->drm.dev, hw_engine_fini, hwe);
>>   
>>   err_hwsp:
>> diff --git a/drivers/gpu/drm/xe/xe_wa_oob.rules b/drivers/gpu/drm/xe/xe_wa_oob.rules
>> index e0c5fa460487..0c738af24f7c 100644
>> --- a/drivers/gpu/drm/xe/xe_wa_oob.rules
>> +++ b/drivers/gpu/drm/xe/xe_wa_oob.rules
>> @@ -53,3 +53,5 @@ no_media_l3	MEDIA_VERSION(3000)
>>   		GRAPHICS_VERSION_RANGE(1270, 1274)
>>   1508761755	GRAPHICS_VERSION(1255)
>>   		GRAPHICS_VERSION(1260), GRAPHICS_STEP(A0, B0)
>> +16023105232	GRAPHICS_VERSION_RANGE(2001, 3001)
>> +		MEDIA_VERSION_RANGE(1301, 3000)
>> -- 
>> 2.38.1
>>


More information about the Intel-xe mailing list