[PATCH v3 2/7] drm/xe/vf: Finish RESFIX by reset if CTB not enabled

Tomasz Lis tomasz.lis at intel.com
Mon May 19 23:19:20 UTC 2025


The RESFIX state should be achievable only when CTB communication is
enabled. If CTB was disabled and we still got it, then either we're
dealing with unclean initial state, or the driver is not currently
functional. In these cases, exit the RESFIX state by reset.

Signed-off-by: Tomasz Lis <tomasz.lis at intel.com>
Cc: Michal Wajdeczko <michal.wajdeczko at intel.com>
---
 drivers/gpu/drm/xe/xe_gt_sriov_vf.c | 10 ++++++++++
 drivers/gpu/drm/xe/xe_sriov_vf.c    | 16 ++++++++++++++++
 drivers/gpu/drm/xe/xe_sriov_vf.h    |  1 +
 3 files changed, 27 insertions(+)

diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_vf.c b/drivers/gpu/drm/xe/xe_gt_sriov_vf.c
index 4ff7ae1a5f16..b9af112ca771 100644
--- a/drivers/gpu/drm/xe/xe_gt_sriov_vf.c
+++ b/drivers/gpu/drm/xe/xe_gt_sriov_vf.c
@@ -23,6 +23,7 @@
 #include "xe_gt_sriov_vf.h"
 #include "xe_gt_sriov_vf_types.h"
 #include "xe_guc.h"
+#include "xe_guc_ct.h"
 #include "xe_guc_hxg_helpers.h"
 #include "xe_guc_relay.h"
 #include "xe_mmio.h"
@@ -932,6 +933,15 @@ void xe_gt_sriov_vf_migrated_event_handler(struct xe_gt *gt)
 
 	xe_gt_assert(gt, IS_SRIOV_VF(xe));
 
+	if (!xe_guc_ct_enabled(&gt->uc.guc.ct)) {
+		/*
+		 * If at driver init, ignore migration which happened
+		 * before the driver was loaded.
+		 */
+		xe_sriov_vf_post_migration_reset_guc_state(xe);
+		return;
+	}
+
 	set_bit(gt->info.id, &xe->sriov.vf.migration.gt_flags);
 	/*
 	 * We need to be certain that if all flags were set, at least one
diff --git a/drivers/gpu/drm/xe/xe_sriov_vf.c b/drivers/gpu/drm/xe/xe_sriov_vf.c
index 2674fa948fda..099a395fbf59 100644
--- a/drivers/gpu/drm/xe/xe_sriov_vf.c
+++ b/drivers/gpu/drm/xe/xe_sriov_vf.c
@@ -134,6 +134,22 @@ void xe_sriov_vf_init_early(struct xe_device *xe)
 	INIT_WORK(&xe->sriov.vf.migration.worker, migration_worker_func);
 }
 
+/**
+ * xe_sriov_vf_post_migration_reset_guc_state - Reset VF state in all GuCs.
+ * @xe: the &xe_device struct instance
+ *
+ * This function sends VF state reset to GuC, as a way of exiting RESFIX
+ * state if a proper post-migration recovery procedure has failed.
+ */
+void xe_sriov_vf_post_migration_reset_guc_state(struct xe_device *xe)
+{
+	struct xe_gt *gt;
+	unsigned int id;
+
+	for_each_gt(gt, xe, id)
+		xe_gt_reset_async(gt);
+}
+
 /**
  * vf_post_migration_requery_guc - Re-query GuC for current VF provisioning.
  * @xe: the &xe_device struct instance
diff --git a/drivers/gpu/drm/xe/xe_sriov_vf.h b/drivers/gpu/drm/xe/xe_sriov_vf.h
index 7b8622cff2b7..ba846af34a13 100644
--- a/drivers/gpu/drm/xe/xe_sriov_vf.h
+++ b/drivers/gpu/drm/xe/xe_sriov_vf.h
@@ -10,5 +10,6 @@ struct xe_device;
 
 void xe_sriov_vf_init_early(struct xe_device *xe);
 void xe_sriov_vf_start_migration_recovery(struct xe_device *xe);
+void xe_sriov_vf_post_migration_reset_guc_state(struct xe_device *xe);
 
 #endif
-- 
2.25.1



More information about the Intel-xe mailing list