[PATCH v5 4/5] drm/xe/vf: Start post-migration fixups with provisinoning query

Tomasz Lis tomasz.lis at intel.com
Tue Oct 29 19:39:55 UTC 2024


During post-migration recovery, only MMIO communication to GuC is
allowed. The VF KMD needs to use that channel to ask for the new
provisioning, which includes a new GGTT range assigned to the VF.

v2: query config only instead of handshake; no need to get pm ref as
 it's now kept through whole recovery (Michal)
v3: switched names of 'err' and 'ret'  (Michal)

Signed-off-by: Tomasz Lis <tomasz.lis at intel.com>
---
 drivers/gpu/drm/xe/xe_sriov_vf.c | 36 ++++++++++++++++++++++++++++++++
 1 file changed, 36 insertions(+)

diff --git a/drivers/gpu/drm/xe/xe_sriov_vf.c b/drivers/gpu/drm/xe/xe_sriov_vf.c
index 6343688822d5..6aac4e7d5945 100644
--- a/drivers/gpu/drm/xe/xe_sriov_vf.c
+++ b/drivers/gpu/drm/xe/xe_sriov_vf.c
@@ -25,6 +25,31 @@ void xe_sriov_vf_init_early(struct xe_device *xe)
 	INIT_WORK(&xe->sriov.vf.migration.worker, migration_worker_func);
 }
 
+/**
+ * vf_post_migration_requery_guc - Re-query GuC for current VF provisioning.
+ * @xe: the &xe_device struct instance
+ *
+ * After migration, we need to re-query all VF configuration to make sure
+ * they match previous provisioning. Note that most of VF provisioning
+ * shall be the same, except GGTT range, since GGTT is not virtualized per-VF.
+ *
+ * Returns: 0 if the operation completed successfully, or a negative error
+ * code otherwise.
+ */
+static int vf_post_migration_requery_guc(struct xe_device *xe)
+{
+	struct xe_gt *gt;
+	unsigned int id;
+	int err, ret = 0;
+
+	for_each_gt(gt, xe, id) {
+		err = xe_gt_sriov_vf_query_config(gt);
+		ret = ret ?: err;
+	}
+
+	return ret;
+}
+
 /*
  * Notify all GuCs about resource fixups apply finished.
  */
@@ -40,12 +65,23 @@ static void vf_post_migration_notify_resfix_done(struct xe_device *xe)
 
 static void vf_post_migration_recovery(struct xe_device *xe)
 {
+	int err;
+
 	drm_dbg(&xe->drm, "migration recovery in progress\n");
 	xe_pm_runtime_get(xe);
+	err = vf_post_migration_requery_guc(xe);
+	if (unlikely(err))
+		goto fail;
+
 	/* FIXME: add the recovery steps */
 	vf_post_migration_notify_resfix_done(xe);
 	xe_pm_runtime_put(xe);
 	drm_notice(&xe->drm, "migration recovery ended\n");
+	return;
+fail:
+	xe_pm_runtime_put(xe);
+	drm_err(&xe->drm, "migration recovery failed (%pe)\n", ERR_PTR(err));
+	xe_device_declare_wedged(xe);
 }
 
 static void migration_worker_func(struct work_struct *w)
-- 
2.25.1



More information about the Intel-xe mailing list