[PATCH v4 2/8] drm/xe/vf: Finish RESFIX by reset if CTB not enabled
Tomasz Lis
tomasz.lis at intel.com
Fri Jun 6 00:18:17 UTC 2025
The RESFIX state should be achievable only when CTB communication is
enabled. If CTB was disabled and we still got it, then either we're
dealing with unclean initial state, or the driver is not currently
functional. In these cases, exit the RESFIX state by reset.
Signed-off-by: Tomasz Lis <tomasz.lis at intel.com>
Cc: Michal Wajdeczko <michal.wajdeczko at intel.com>
Cc: Michal Winiarski <michal.winiarski at intel.com>
Reviewed-by: Satyanarayana K V P <satyanarayana.k.v.p at intel.com>
---
drivers/gpu/drm/xe/xe_gt_sriov_vf.c | 11 +++++++++++
drivers/gpu/drm/xe/xe_sriov_vf.c | 16 ++++++++++++++++
drivers/gpu/drm/xe/xe_sriov_vf.h | 1 +
3 files changed, 28 insertions(+)
diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_vf.c b/drivers/gpu/drm/xe/xe_gt_sriov_vf.c
index 792523cfa6e6..8fa210c0ef1a 100644
--- a/drivers/gpu/drm/xe/xe_gt_sriov_vf.c
+++ b/drivers/gpu/drm/xe/xe_gt_sriov_vf.c
@@ -23,6 +23,7 @@
#include "xe_gt_sriov_vf.h"
#include "xe_gt_sriov_vf_types.h"
#include "xe_guc.h"
+#include "xe_guc_ct.h"
#include "xe_guc_hxg_helpers.h"
#include "xe_guc_relay.h"
#include "xe_mmio.h"
@@ -721,6 +722,16 @@ void xe_gt_sriov_vf_migrated_event_handler(struct xe_gt *gt)
xe_gt_assert(gt, IS_SRIOV_VF(xe));
+ if (!xe_guc_ct_enabled(>->uc.guc.ct)) {
+ /*
+ * If driver initialization is running in parallel to this handler,
+ * ignore the migration which happened before the driver was loaded.
+ * Force GuC to take the VF out of RESFIX state without any fixups.
+ */
+ xe_sriov_vf_post_migration_reset_guc_state(xe);
+ return;
+ }
+
set_bit(gt->info.id, &xe->sriov.vf.migration.gt_flags);
/*
* We need to be certain that if all flags were set, at least one
diff --git a/drivers/gpu/drm/xe/xe_sriov_vf.c b/drivers/gpu/drm/xe/xe_sriov_vf.c
index 6526fe450e55..eff6c7b96f25 100644
--- a/drivers/gpu/drm/xe/xe_sriov_vf.c
+++ b/drivers/gpu/drm/xe/xe_sriov_vf.c
@@ -147,6 +147,22 @@ void xe_sriov_vf_init_early(struct xe_device *xe)
xe_sriov_info(xe, "migration not supported by this module version\n");
}
+/**
+ * xe_sriov_vf_post_migration_reset_guc_state - Reset VF state in all GuCs.
+ * @xe: the &xe_device struct instance
+ *
+ * This function sends VF state reset to GuC, as a way of exiting RESFIX
+ * state if a proper post-migration recovery procedure has failed.
+ */
+void xe_sriov_vf_post_migration_reset_guc_state(struct xe_device *xe)
+{
+ struct xe_gt *gt;
+ unsigned int id;
+
+ for_each_gt(gt, xe, id)
+ xe_gt_reset_async(gt);
+}
+
/**
* vf_post_migration_requery_guc - Re-query GuC for current VF provisioning.
* @xe: the &xe_device struct instance
diff --git a/drivers/gpu/drm/xe/xe_sriov_vf.h b/drivers/gpu/drm/xe/xe_sriov_vf.h
index 7b8622cff2b7..ba846af34a13 100644
--- a/drivers/gpu/drm/xe/xe_sriov_vf.h
+++ b/drivers/gpu/drm/xe/xe_sriov_vf.h
@@ -10,5 +10,6 @@ struct xe_device;
void xe_sriov_vf_init_early(struct xe_device *xe);
void xe_sriov_vf_start_migration_recovery(struct xe_device *xe);
+void xe_sriov_vf_post_migration_reset_guc_state(struct xe_device *xe);
#endif
--
2.25.1
More information about the Intel-xe
mailing list