[PATCH 3/4] drm/xe/vf: Start post-migration fixups with GuC MMIO handshake

Tomasz Lis tomasz.lis at intel.com
Fri Sep 20 22:29:25 UTC 2024


During post-migration recovery, only MMIO communication to GuC is
allowed. But that communication requires initialization.

Signed-off-by: Tomasz Lis <tomasz.lis at intel.com>
---
 drivers/gpu/drm/xe/xe_sriov_vf.c | 40 ++++++++++++++++++++++++++++++++
 1 file changed, 40 insertions(+)

diff --git a/drivers/gpu/drm/xe/xe_sriov_vf.c b/drivers/gpu/drm/xe/xe_sriov_vf.c
index 459fa936aaba..3cea2d21525f 100644
--- a/drivers/gpu/drm/xe/xe_sriov_vf.c
+++ b/drivers/gpu/drm/xe/xe_sriov_vf.c
@@ -22,6 +22,36 @@ void xe_sriov_vf_init_early(struct xe_device *xe)
 	INIT_WORK(&xe->sriov.vf.migration_worker, migration_worker_func);
 }
 
+/**
+ * vf_post_migration_reinit_guc - Re-initialize GuC communication.
+ * @xe: the &xe_device struct instance
+ *
+ * After migration, we need to reestablish communication with GuC and
+ * re-query all VF configuration to make sure they match previous
+ * provisioning. Note that most of VF provisioning shall be the same,
+ * except GGTT range, since GGTT is not virtualized per-VF.
+ *
+ * Returns: 0 if the operation completed successfully, or a negative error
+ * code otherwise.
+ */
+static int vf_post_migration_reinit_guc(struct xe_device *xe)
+{
+	struct xe_gt *gt;
+	unsigned int id;
+	int err, ret;
+
+	err = 0;
+	xe_pm_runtime_get(xe);
+	for_each_gt(gt, xe, id) {
+		ret = xe_gt_sriov_vf_bootstrap(gt);
+		if (!err)
+			err = ret;
+	}
+	xe_pm_runtime_put(xe);
+
+	return err;
+}
+
 /*
  * vf_post_migration_notify_resfix_done - Notify all GuCs about resource fixups apply finished.
  * @xe: the &xe_device struct instance
@@ -44,10 +74,20 @@ static void vf_post_migration_notify_resfix_done(struct xe_device *xe)
 
 static void vf_post_migration_recovery(struct xe_device *xe)
 {
+	int err;
+
 	drm_dbg(&xe->drm, "migration recovery in progress\n");
+	err = vf_post_migration_reinit_guc(xe);
+	if (unlikely(err))
+		goto fail;
+
 	/* FIXME: add the recovery steps */
 	vf_post_migration_notify_resfix_done(xe);
 	drm_notice(&xe->drm, "migration recovery completed\n");
+	return;
+fail:
+	drm_err(&xe->drm, "migration recovery failed (%pe)\n", ERR_PTR(err));
+	xe_device_declare_wedged(xe);
 }
 
 static void migration_worker_func(struct work_struct *w)
-- 
2.25.1



More information about the Intel-xe mailing list