[PATCH v4 3/3] drm/xe/bmg: Update Wa_22019338487

Mon Jun 16 20:35:48 UTC 2025

On Mon, Jun 16, 2025 at 10:38:34AM -0500, Lucas De Marchi wrote:
> On Mon, Jun 16, 2025 at 10:37:49AM -0400, Rodrigo Vivi wrote:
> > On Sun, Jun 15, 2025 at 11:17:36PM -0700, Lucas De Marchi wrote:
> > > From: Vinay Belgaumkar <vinay.belgaumkar at intel.com>
> > > 
> > > Limit GT max frequency to 2600Mhz during the L2 flush. Also, ensure
> > > GT actual frequency is limited to that value before performing the
> > > cache flush.
> > > 
> > > v2: Use generic names, ensure user set max frequency requests wait
> > > for flush to complete (Rodrigo)
> > > v3:
> > >  - User requests wait via wait_var_event_timeout (Lucas)
> > >  - Close races on flush + user requests (Lucas)
> > >  - Fix xe_guc_pc_remove_flush_freq_limit() being called on last gt
> > >    rather than root gt (Lucas)
> > > 
> > > Fixes: aaa08078e725 ("drm/xe/bmg: Apply Wa_22019338487")
> > > Fixes: 01570b446939 ("drm/xe/bmg: implement Wa_16023588340")
> > > Cc: Rodrigo Vivi <rodrigo.vivi at intel.com>
> > > Signed-off-by: Vinay Belgaumkar <vinay.belgaumkar at intel.com>
> > > Signed-off-by: Lucas De Marchi <lucas.demarchi at intel.com>
> > > ---
> > >  drivers/gpu/drm/xe/xe_device.c       |  13 +++-
> > >  drivers/gpu/drm/xe/xe_guc_pc.c       | 125 +++++++++++++++++++++++++++++++++++
> > >  drivers/gpu/drm/xe/xe_guc_pc.h       |   2 +
> > >  drivers/gpu/drm/xe/xe_guc_pc_types.h |   2 +
> > >  4 files changed, 139 insertions(+), 3 deletions(-)
> > > 
> > > diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c
> > > index 7e87344943cdf..6ff373ad0a965 100644
> > > --- a/drivers/gpu/drm/xe/xe_device.c
> > > +++ b/drivers/gpu/drm/xe/xe_device.c
> > > @@ -40,6 +40,7 @@
> > >  #include "xe_gt_printk.h"
> > >  #include "xe_gt_sriov_vf.h"
> > >  #include "xe_guc.h"
> > > +#include "xe_guc_pc.h"
> > >  #include "xe_hw_engine_group.h"
> > >  #include "xe_hwmon.h"
> > >  #include "xe_irq.h"
> > > @@ -1001,16 +1002,19 @@ void xe_device_wmb(struct xe_device *xe)
> > >   */
> > >  void xe_device_td_flush(struct xe_device *xe)
> > >  {
> > > -	struct xe_gt *gt;
> > > +	struct xe_gt *gt, *root_gt;
> > >  	unsigned int fw_ref;
> > >  	u8 id;
> > > 
> > >  	if (!IS_DGFX(xe) || GRAPHICS_VER(xe) < 20)
> > >  		return;
> > > 
> > > -	if (XE_WA(xe_root_mmio_gt(xe), 16023588340)) {
> > > +	root_gt = xe_root_mmio_gt(xe);
> > > +	xe_guc_pc_apply_flush_freq_limit(&root_gt->uc.guc.pc);
> > > +
> > > +	if (XE_WA(root_gt, 16023588340)) {
> > >  		xe_device_l2_flush(xe);
> > > -		return;
> > > +		goto done;
> > >  	}
> > > 
> > >  	for_each_gt(gt, xe, id) {
> > > @@ -1035,6 +1039,9 @@ void xe_device_td_flush(struct xe_device *xe)
> > > 
> > >  		xe_force_wake_put(gt_to_fw(gt), fw_ref);
> > >  	}
> > > +
> > > +done:
> > > +	xe_guc_pc_remove_flush_freq_limit(&root_gt->uc.guc.pc);
> > >  }
> > > 
> > >  void xe_device_l2_flush(struct xe_device *xe)
> > > diff --git a/drivers/gpu/drm/xe/xe_guc_pc.c b/drivers/gpu/drm/xe/xe_guc_pc.c
> > > index d449eb0e3e8af..eab932655b2fb 100644
> > > --- a/drivers/gpu/drm/xe/xe_guc_pc.c
> > > +++ b/drivers/gpu/drm/xe/xe_guc_pc.c
> > > @@ -7,7 +7,9 @@
> > > 
> > >  #include <linux/cleanup.h>
> > >  #include <linux/delay.h>
> > > +#include <linux/jiffies.h>
> > >  #include <linux/ktime.h>
> > > +#include <linux/wait_bit.h>
> > > 
> > >  #include <drm/drm_managed.h>
> > >  #include <drm/drm_print.h>
> > > @@ -53,9 +55,11 @@
> > >  #define LNL_MERT_FREQ_CAP	800
> > >  #define BMG_MERT_FREQ_CAP	2133
> > >  #define BMG_MIN_FREQ		1200
> > > +#define BMG_MERT_FLUSH_FREQ_CAP	2600
> > > 
> > >  #define SLPC_RESET_TIMEOUT_MS 5 /* roughly 5ms, but no need for precision */
> > >  #define SLPC_RESET_EXTENDED_TIMEOUT_MS 1000 /* To be used only at pc_start */
> > > +#define SLPC_ACT_FREQ_TIMEOUT_MS 100
> > > 
> > >  /**
> > >   * DOC: GuC Power Conservation (PC)
> > > @@ -143,6 +147,36 @@ static int wait_for_pc_state(struct xe_guc_pc *pc,
> > >  	return -ETIMEDOUT;
> > >  }
> > > 
> > > +static int wait_for_flush_complete(struct xe_guc_pc *pc)
> > > +{
> > > +	const unsigned long timeout = msecs_to_jiffies(30);
> > > +
> > > +	if (!wait_var_event_timeout(&pc->flush_freq_limit,
> > > +				    !atomic_read(&pc->flush_freq_limit),
> > > +				    timeout))
> > > +		return -ETIMEDOUT;
> > > +
> > > +	return 0;
> > > +}
> > > +
> > > +static int wait_for_act_freq_limit(struct xe_guc_pc *pc, u32 freq)
> > 
> > for a moment, the name of this function got me confused. I thought
> > it was going to wait for the *exact* act freq, and then it would be
> > risky because we can never know what PCODE will decide on extra
> > throttles.
> > 
> > But I don't have a suggestion for a better naming and reading the
> > rest of the code showed it is doing things correctly.
> > 
> > There's a risk though... if for some reason PCODE decides to keep
> > freq high for a longer time... but likely unreal for this platform.
> > 
> > Reviewed-by: Rodrigo Vivi <rodrigo.vivi at intel.com>
> 
> thanks... I will take a look in CI later today and think if we have
> another better name or error handling.
> 
> While we are here, a couple improvements I noticed while doing the
> changes; see below.
> 
> > 
> > > +{
> > > +	int timeout_us = SLPC_ACT_FREQ_TIMEOUT_MS * USEC_PER_MSEC;
> > > +	int slept, wait = 10;
> > > +
> > > +	for (slept = 0; slept < timeout_us;) {
> > > +		if (xe_guc_pc_get_act_freq(pc) <= freq)
> > > +			return 0;
> > > +
> > > +		usleep_range(wait, wait << 1);
> > > +		slept += wait;
> > > +		wait <<= 1;
> > > +		if (slept + wait > timeout_us)
> > > +			wait = timeout_us - slept;
> > > +	}
> 
> 1) wait functions
> 
> except from the call xe_guc_pc_get_act_freq(), this is a copy and paste
> from the other wait function in this same file.
> wait_for_flush_complete() was also a copy and paste and I moved it to
> wait_var_event_timeout(), which is a better synchronization than what
> this is doing.
> 
> All these functions to wait until a timeout are wrong: usleep_range()
> will sleep for anything between the 2 values and we are simply using the
> wait variable to decide on next sleep value. Worst case scenario, even
> if unlikely, we will wait for log2(timeout_us) * timeout_us.
> 
> The xe_mmio_wait* get the wait checks right. I think we need another
> function for the proper abstraction for that. Previously we decided
> against because:
> 
> 	a) this is the poorest synchronization method and it tends to be
> 	abused (see the preivous version of wait_for_flush_complete())
> 	b) adding such a thing in xe_helpers.h/xe_utils.h opens the
> 	door for things for doing other things there that should rather
> 	be using proper kernel infra
> 	c) previous attempts added that in xe_mmio or a new xe_utils.h
> 	and neither of them are good locations.
> 
> Proposal: xe_wait_for_cond() in xe_device.c. We should document when
> it could be valid to use it: it's not a synchronization between 2
> CPU threads but rather between GPU-FW-CPU; it doesn't fit the
> xe_mmio_wait* because it needs to do things other than read and check
> mmio value; and there's no other core kernel synchronization that can be
> used. Then we migrate wait_for_act_freq_limit() and wait_for_pc_state()
> to use it.

ack on this. But it could be a follow up right?!

> 
> 2) interaction with user requests
> 
> I kept the logic implemented on earlier versions, just changing how we
> are waiting on it to complete. However, should userspace really wait?
> Why not just update the stashed value that will be later flushed?
> This way, if there are multitple requests they don't all keep waiting
> and applying a random value depending on what thread was woken up first.
> For example, with thread number also meaning the execution order:
> 
> freq_limit == xe_guc_pc_apply_flush_freq_limit()
> set_max == xe_guc_pc_set_max_freq()
> 
> T0				t1	t2	t3	t4
> 
> xe_device_td_flush() {
> 
> 	freq_limit
> 				set_max
> 					set_max
> 						set_max
> 							set_max
> 	freq_flush
> 
> Here all of t1, t2, t3, t4 are waiting on freq_flush from t0. The end
> value will be whatever ordee threads are woken up later. I think that if
> we change the implementation to simply set the stashed value, it will be
> simpler (no need for wait) and better: the value is ultimately set in
> freq_flush() and is always the last value.

We could try, but I'm afraid that this will break our IGT test cases
badly. Because when we read back what we just requested, we want to
see the value applied.

Or we will open the can of warms of the soft_freq design that i915
has and all the confusions it brings...

> 
> 
> thoughts?
> Lucas De Marchi
> 
> > > +
> > > +	return -ETIMEDOUT;
> > > +}
> > >  static int pc_action_reset(struct xe_guc_pc *pc)
> > >  {
> > >  	struct xe_guc_ct *ct = pc_to_ct(pc);
> > > @@ -689,6 +723,11 @@ static int xe_guc_pc_set_max_freq_locked(struct xe_guc_pc *pc, u32 freq)
> > >   */
> > >  int xe_guc_pc_set_max_freq(struct xe_guc_pc *pc, u32 freq)
> > >  {
> > > +	if (XE_WA(pc_to_gt(pc), 22019338487)) {
> > > +		if (wait_for_flush_complete(pc) != 0)
> > > +			return -EAGAIN;
> > > +	}
> > > +
> > >  	guard(mutex)(&pc->freq_lock);
> > > 
> > >  	return xe_guc_pc_set_max_freq_locked(pc, freq);
> > > @@ -889,6 +928,92 @@ static int pc_adjust_requested_freq(struct xe_guc_pc *pc)
> > >  	return ret;
> > >  }
> > > 
> > > +static bool needs_flush_freq_limit(struct xe_guc_pc *pc)
> > > +{
> > > +	struct xe_gt *gt = pc_to_gt(pc);
> > > +
> > > +	return  XE_WA(gt, 22019338487) &&
> > > +		pc->rp0_freq > BMG_MERT_FLUSH_FREQ_CAP;
> > > +}
> > > +
> > > +/**
> > > + * xe_guc_pc_apply_flush_freq_limit() - Limit max GT freq during L2 flush
> > > + * @pc: the xe_guc_pc object
> > > + *
> > > + * As per the WA, reduce max GT frequency during L2 cache flush
> > > + */
> > > +void xe_guc_pc_apply_flush_freq_limit(struct xe_guc_pc *pc)
> > > +{
> > > +	struct xe_gt *gt = pc_to_gt(pc);
> > > +	u32 max_freq;
> > > +	int ret;
> > > +
> > > +	if (!needs_flush_freq_limit(pc))
> > > +		return;
> > > +
> > > +	guard(mutex)(&pc->freq_lock);
> > > +
> > > +	ret = xe_guc_pc_get_max_freq_locked(pc, &max_freq);
> > > +	if (!ret && max_freq > BMG_MERT_FLUSH_FREQ_CAP) {
> > > +		ret = pc_set_max_freq(pc, BMG_MERT_FLUSH_FREQ_CAP);
> > > +		if (ret) {
> > > +			xe_gt_err_once(gt, "Failed to cap max freq on flush to %u, %pe\n",
> > > +				       BMG_MERT_FLUSH_FREQ_CAP, ERR_PTR(ret));
> > > +			return;
> > > +		}
> > > +
> > > +		atomic_set(&pc->flush_freq_limit, 1);
> > > +
> > > +		/*
> > > +		 * If user has previously changed max freq, stash that value to
> > > +		 * restore later, otherwise use the current max. New user
> > > +		 * requests wait on flush.
> > > +		 */
> > > +		if (pc->user_requested_max != 0)
> > > +			pc->stashed_max_freq = pc->user_requested_max;
> > > +		else
> > > +			pc->stashed_max_freq = max_freq;
> > > +	}
> > > +
> > > +	/*
> > > +	 * Wait for actual freq to go below the flush cap: even if the previous
> > > +	 * max was below cap, the current one might still be above it
> > > +	 */
> > > +	ret = wait_for_act_freq_limit(pc, BMG_MERT_FLUSH_FREQ_CAP);
> > > +	if (ret)
> > > +		xe_gt_err_once(gt, "Actual freq did not reduce to %u, %pe\n",
> > > +			       BMG_MERT_FLUSH_FREQ_CAP, ERR_PTR(ret));
> > > +}
> > > +
> > > +/**
> > > + * xe_guc_pc_remove_flush_freq_limit() - Remove max GT freq limit after L2 flush completes.
> > > + * @pc: the xe_guc_pc object
> > > + *
> > > + * Retrieve the previous GT max frequency value.
> > > + */
> > > +void xe_guc_pc_remove_flush_freq_limit(struct xe_guc_pc *pc)
> > > +{
> > > +	struct xe_gt *gt = pc_to_gt(pc);
> > > +	int ret = 0;
> > > +
> > > +	if (!needs_flush_freq_limit(pc))
> > > +		return;
> > > +
> > > +	if (!atomic_read(&pc->flush_freq_limit))
> > > +		return;
> > > +
> > > +	mutex_lock(&pc->freq_lock);
> > > +
> > > +	ret = pc_set_max_freq(&gt->uc.guc.pc, pc->stashed_max_freq);
> > > +	if (ret)
> > > +		xe_gt_err_once(gt, "Failed to restore max freq %u:%d",
> > > +			       pc->stashed_max_freq, ret);
> > > +
> > > +	atomic_set(&pc->flush_freq_limit, 0);
> > > +	mutex_unlock(&pc->freq_lock);
> > > +	wake_up_var(&pc->flush_freq_limit);
> > > +}
> > > +
> > >  static int pc_set_mert_freq_cap(struct xe_guc_pc *pc)
> > >  {
> > >  	int ret;
> > > diff --git a/drivers/gpu/drm/xe/xe_guc_pc.h b/drivers/gpu/drm/xe/xe_guc_pc.h
> > > index 0a2664d5c8114..52ecdd5ddbff2 100644
> > > --- a/drivers/gpu/drm/xe/xe_guc_pc.h
> > > +++ b/drivers/gpu/drm/xe/xe_guc_pc.h
> > > @@ -38,5 +38,7 @@ u64 xe_guc_pc_mc6_residency(struct xe_guc_pc *pc);
> > >  void xe_guc_pc_init_early(struct xe_guc_pc *pc);
> > >  int xe_guc_pc_restore_stashed_freq(struct xe_guc_pc *pc);
> > >  void xe_guc_pc_raise_unslice(struct xe_guc_pc *pc);
> > > +void xe_guc_pc_apply_flush_freq_limit(struct xe_guc_pc *pc);
> > > +void xe_guc_pc_remove_flush_freq_limit(struct xe_guc_pc *pc);
> > > 
> > >  #endif /* _XE_GUC_PC_H_ */
> > > diff --git a/drivers/gpu/drm/xe/xe_guc_pc_types.h b/drivers/gpu/drm/xe/xe_guc_pc_types.h
> > > index 2978ac9a249b5..c02053948a579 100644
> > > --- a/drivers/gpu/drm/xe/xe_guc_pc_types.h
> > > +++ b/drivers/gpu/drm/xe/xe_guc_pc_types.h
> > > @@ -15,6 +15,8 @@
> > >  struct xe_guc_pc {
> > >  	/** @bo: GGTT buffer object that is shared with GuC PC */
> > >  	struct xe_bo *bo;
> > > +	/** @flush_freq_limit: 1 when max freq changes are limited by driver */
> > > +	atomic_t flush_freq_limit;
> > >  	/** @rp0_freq: HW RP0 frequency - The Maximum one */
> > >  	u32 rp0_freq;
> > >  	/** @rpa_freq: HW RPa frequency - The Achievable one */
> > > 
> > > --
> > > 2.49.0
> > >