[PATCH v4 1/6] drm/xe/pf: Prepare to stop SR-IOV support prior GT reset
Michal Wajdeczko
michal.wajdeczko at intel.com
Fri Jul 11 19:33:11 UTC 2025
As part of the resume or GT reset, the PF driver schedules work
which is then used to complete restarting of the SR-IOV support,
including resending to the GuC configurations of provisioned VFs.
However, in case of short delay between those two actions, which
could be seen by triggering a GT reset on the suspened device:
$ echo 1 > /sys/kernel/debug/dri/0000:00:02.0/gt0/force_reset
this PF worker might be still busy, which lead to errors due to
just stopped or disabled GuC CTB communication:
[ ] xe 0000:00:02.0: [drm:xe_gt_resume [xe]] GT0: resumed
[ ] xe 0000:00:02.0: [drm] GT0: trying reset from force_reset_show [xe]
[ ] xe 0000:00:02.0: [drm] GT0: reset queued
[ ] xe 0000:00:02.0: [drm] GT0: reset started
[ ] xe 0000:00:02.0: [drm:guc_ct_change_state [xe]] GT0: GuC CT communication channel stopped
[ ] xe 0000:00:02.0: [drm:guc_ct_send_recv [xe]] GT0: H2G request 0x5503 canceled!
[ ] xe 0000:00:02.0: [drm] GT0: PF: Failed to push VF1 12 config KLVs (-ECANCELED)
[ ] xe 0000:00:02.0: [drm] GT0: PF: Failed to push VF1 configuration (-ECANCELED)
[ ] xe 0000:00:02.0: [drm:guc_ct_change_state [xe]] GT0: GuC CT communication channel disabled
[ ] xe 0000:00:02.0: [drm] GT0: PF: Failed to push VF2 12 config KLVs (-ENODEV)
[ ] xe 0000:00:02.0: [drm] GT0: PF: Failed to push VF2 configuration (-ENODEV)
[ ] xe 0000:00:02.0: [drm] GT0: PF: Failed to push 2 of 2 VFs configurations
[ ] xe 0000:00:02.0: [drm:pf_worker_restart_func [xe]] GT0: PF: restart completed
While this VFs reprovisioning will be successful during next spin
of the worker, to avoid those errors, make sure to cancel restart
worker if we are about to trigger next reset.
Fixes: 411220808cee ("drm/xe/pf: Restart VFs provisioning after GT reset")
Signed-off-by: Michal Wajdeczko <michal.wajdeczko at intel.com>
---
drivers/gpu/drm/xe/xe_gt.c | 3 +++
drivers/gpu/drm/xe/xe_gt_sriov_pf.c | 19 +++++++++++++++++++
drivers/gpu/drm/xe/xe_gt_sriov_pf.h | 5 +++++
3 files changed, 27 insertions(+)
diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c
index af03e19ef9be..58fe2c4168b0 100644
--- a/drivers/gpu/drm/xe/xe_gt.c
+++ b/drivers/gpu/drm/xe/xe_gt.c
@@ -835,6 +835,9 @@ static int gt_reset(struct xe_gt *gt)
goto err_out;
}
+ if (IS_SRIOV_PF(gt_to_xe(gt)))
+ xe_gt_sriov_pf_stop_prepare(gt);
+
xe_uc_gucrc_disable(>->uc);
xe_uc_stop_prepare(>->uc);
xe_gt_pagefault_reset(gt);
diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf.c
index c08efca6420e..35489fa81825 100644
--- a/drivers/gpu/drm/xe/xe_gt_sriov_pf.c
+++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf.c
@@ -172,6 +172,25 @@ void xe_gt_sriov_pf_sanitize_hw(struct xe_gt *gt, unsigned int vfid)
pf_clear_vf_scratch_regs(gt, vfid);
}
+static void pf_cancel_restart(struct xe_gt *gt)
+{
+ xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt)));
+
+ if (cancel_work_sync(>->sriov.pf.workers.restart))
+ xe_gt_sriov_dbg_verbose(gt, "pending restart canceled!\n");
+}
+
+/**
+ * xe_gt_sriov_pf_stop_prepare() - Prepare to stop SR-IOV support.
+ * @gt: the &xe_gt
+ *
+ * This function can only be called on the PF.
+ */
+void xe_gt_sriov_pf_stop_prepare(struct xe_gt *gt)
+{
+ pf_cancel_restart(gt);
+}
+
static void pf_restart(struct xe_gt *gt)
{
struct xe_device *xe = gt_to_xe(gt);
diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf.h b/drivers/gpu/drm/xe/xe_gt_sriov_pf.h
index f474509411c0..e2b2ff8132dc 100644
--- a/drivers/gpu/drm/xe/xe_gt_sriov_pf.h
+++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf.h
@@ -13,6 +13,7 @@ int xe_gt_sriov_pf_init_early(struct xe_gt *gt);
int xe_gt_sriov_pf_init(struct xe_gt *gt);
void xe_gt_sriov_pf_init_hw(struct xe_gt *gt);
void xe_gt_sriov_pf_sanitize_hw(struct xe_gt *gt, unsigned int vfid);
+void xe_gt_sriov_pf_stop_prepare(struct xe_gt *gt);
void xe_gt_sriov_pf_restart(struct xe_gt *gt);
#else
static inline int xe_gt_sriov_pf_init_early(struct xe_gt *gt)
@@ -29,6 +30,10 @@ static inline void xe_gt_sriov_pf_init_hw(struct xe_gt *gt)
{
}
+static inline void xe_gt_sriov_pf_stop_prepare(struct xe_gt *gt)
+{
+}
+
static inline void xe_gt_sriov_pf_restart(struct xe_gt *gt)
{
}
--
2.47.1
More information about the Intel-xe
mailing list