[PATCH i-g-t 2/2] tests/xe_sriov_migration: VF fake migration validation

Adam Miszczak adam.miszczak at linux.intel.com
Wed Feb 19 10:25:34 UTC 2025


Introduce validatation of simplified VF migration scenarios
(a.k.a. fake migration). Test exercise possiblity to restore
previously saved GuC state that is reset by VF FLR.
Post-restore health is verified by running simple workload on VF.

Multiple scenario's variants are supported:
- auto or manual VF provisioning
- with and without VF GTTT address change (relocation)

The test doesn't require neither running VM, nor VFIO driver's
migration support.

Signed-off-by: Adam Miszczak <adam.miszczak at linux.intel.com>
---
 tests/intel/xe_sriov_migration.c | 297 +++++++++++++++++++++++++++++++
 tests/meson.build                |   1 +
 2 files changed, 298 insertions(+)
 create mode 100644 tests/intel/xe_sriov_migration.c

diff --git a/tests/intel/xe_sriov_migration.c b/tests/intel/xe_sriov_migration.c
new file mode 100644
index 000000000..946deca22
--- /dev/null
+++ b/tests/intel/xe_sriov_migration.c
@@ -0,0 +1,297 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright(c) 2025 Intel Corporation. All rights reserved.
+ */
+
+#include "igt.h"
+#include "igt_sriov_device.h"
+#include "igt_syncobj.h"
+#include "xe/xe_ioctl.h"
+#include "xe/xe_query.h"
+#include "xe/xe_sriov_debugfs.h"
+#include "xe/xe_sriov_provisioning.h"
+
+/**
+ * TEST: xe_sriov_migration
+ * Category: Core
+ * Mega feature: SR-IOV
+ * Sub-category: Reset tests
+ * Functionality: VF migration
+ * Description: Examine behavior of SR-IOV VF save-restore (migration)
+ *
+ * SUBTEST: vf-fake-save-restore-auto-1vf
+ * Run type: FULL
+ * Description:
+ *   Verifies VF fake migration: GuC, GGTT and LMEM data is properly restored after FLR.
+ *   1xVF is auto provisioned. VF's GGTT address does not change.
+ *
+ * SUBTEST: vf-fake-save-restore-manual-2vf
+ * Run type: FULL
+ * Description:
+ *   Verifies VF fake migration: GuC, GGTT and LMEM data is properly restored after FLR.
+ *   2xVFs are manually provisioned. VF's GGTT address does not change.
+ *
+ * SUBTEST: vf-fake-save-restore-relocate-manual-1vf
+ * Run type: FULL
+ * Description:
+ *   Verifies VF fake migration: GuC, GGTT and LMEM data is properly restored after FLR.
+ *   1xVF is manually provisioned. VF GGTT is relocated - address changes.
+ *
+ * SUBTEST: vf-fake-save-restore-relocate-auto-2vf
+ * Run type: FULL
+ * Description:
+ *   Verifies VF fake migration: GuC, GGTT and LMEM data is properly restored after FLR.
+ *   2xVFs are auto provisioned. VF GGTT is relocated - address changes.
+ */
+
+IGT_TEST_DESCRIPTION("Xe tests for SR-IOV VF state save/restore (migration)");
+
+/* Basic submissions to verify VF is healthy pre and post-migration */
+static void simple_exec(int fd, struct drm_xe_engine_class_instance *eci)
+{
+	uint32_t vm;
+	uint64_t addr = 0x1a0000;
+	struct drm_xe_sync sync[2] = {
+		{ .type = DRM_XE_SYNC_TYPE_SYNCOBJ, .flags = DRM_XE_SYNC_FLAG_SIGNAL, },
+		{ .type = DRM_XE_SYNC_TYPE_SYNCOBJ, .flags = DRM_XE_SYNC_FLAG_SIGNAL, },
+	};
+	struct drm_xe_exec exec = {
+		.num_batch_buffer = 1,
+		.num_syncs = 2,
+		.syncs = to_user_pointer(sync),
+	};
+	uint64_t batch_offset, batch_addr, sdi_offset, sdi_addr;
+	uint32_t exec_queue;
+	uint32_t syncobjs;
+	size_t bo_size;
+	uint32_t bo = 0;
+	struct {
+		uint32_t batch[16];
+		uint64_t pad;
+		uint32_t data;
+	} *data;
+	int b;
+
+	vm = xe_vm_create(fd, 0, 0);
+
+	bo_size = sizeof(*data) * 2;
+	bo_size = xe_bb_size(fd, bo_size);
+	bo = xe_bo_create(fd, vm, bo_size,
+			  vram_if_possible(fd, eci->gt_id),
+			  DRM_XE_GEM_CREATE_FLAG_NEEDS_VISIBLE_VRAM);
+	data = xe_bo_map(fd, bo, bo_size);
+
+	exec_queue = xe_exec_queue_create(fd, vm, eci, 0);
+
+	syncobjs = syncobj_create(fd, 0);
+	sync[0].handle = syncobj_create(fd, 0);
+
+	xe_vm_bind_async(fd, vm, 0, bo, 0, addr,
+			 bo_size, sync, 1);
+
+	batch_offset = (char *)&data[0].batch - (char *)data;
+	batch_addr = addr + batch_offset;
+	sdi_offset = (char *)&data[0].data - (char *)data;
+	sdi_addr = addr + sdi_offset;
+
+	b = 0;
+	data[0].batch[b++] = MI_STORE_DWORD_IMM_GEN4;
+	data[0].batch[b++] = sdi_addr;
+	data[0].batch[b++] = sdi_addr >> 32;
+	data[0].batch[b++] = 0xc0ffee;
+	data[0].batch[b++] = MI_BATCH_BUFFER_END;
+	igt_assert(b <= ARRAY_SIZE(data[0].batch));
+
+	sync[0].flags &= ~DRM_XE_SYNC_FLAG_SIGNAL;
+	sync[1].flags |= DRM_XE_SYNC_FLAG_SIGNAL;
+	sync[1].handle = syncobjs;
+
+	exec.exec_queue_id = exec_queue;
+	exec.address = batch_addr;
+
+	syncobj_reset(fd, &syncobjs, 1);
+
+	xe_exec(fd, &exec);
+
+	igt_assert(syncobj_wait(fd, &syncobjs, 1, INT64_MAX, 0, NULL));
+	igt_assert_eq(data[0].data, 0xc0ffee);
+	igt_assert(syncobj_wait(fd, &sync[0].handle, 1, INT64_MAX, 0, NULL));
+	sync[0].flags |= DRM_XE_SYNC_FLAG_SIGNAL;
+	xe_vm_unbind_async(fd, vm, 0, 0, addr, bo_size, sync, 1);
+	igt_assert(syncobj_wait(fd, &sync[0].handle, 1, INT64_MAX, 0, NULL));
+	igt_assert_eq(data[0].data, 0xc0ffee);
+
+	syncobj_destroy(fd, sync[0].handle);
+	syncobj_destroy(fd, syncobjs);
+	xe_exec_queue_destroy(fd, exec_queue);
+	munmap(data, bo_size);
+	gem_close(fd, bo);
+	xe_vm_destroy(fd, vm);
+}
+
+static void provision_vf(int pf_fd, int vf_num, int gt_num)
+{
+	igt_debug("Provision VF%u\n", vf_num);
+	xe_sriov_pf_set_shared_res_attr(pf_fd, XE_SRIOV_SHARED_RES_LMEM, vf_num, gt_num, SZ_256M);
+	xe_sriov_pf_set_shared_res_attr(pf_fd, XE_SRIOV_SHARED_RES_GGTT, vf_num, gt_num, SZ_1G);
+	xe_sriov_pf_set_shared_res_attr(pf_fd, XE_SRIOV_SHARED_RES_CONTEXTS, vf_num, gt_num, 1024);
+	xe_sriov_pf_set_shared_res_attr(pf_fd, XE_SRIOV_SHARED_RES_DOORBELLS, vf_num, gt_num, 10);
+	xe_sriov_set_exec_quantum_ms(pf_fd, vf_num, gt_num, 500);
+	xe_sriov_set_preempt_timeout_us(pf_fd, vf_num, gt_num, 600 * USEC_PER_MSEC);
+}
+
+/* Test variant flags:
+ * BIT0: enable manual provisiong
+ * BIT1: enable GGTT relocation
+ */
+#define XE_SRIOV_VFSR_MANUAL_PROVISION (1 << 0)
+#define XE_SRIOV_VFSR_GGTT_RELOCATE    (1 << 1)
+
+static void vf_fake_save_restore(int pf_fd, int num_vfs, uint16_t test_flags)
+{
+	int vf_fd;
+	int vf_num = 1;
+	int gt_num = 0;
+	void *guc_state_buf;
+	void *ggtt_raw_buf;
+	void *lmem_state_buf;
+	int guc_state_size = 0;
+	int ggtt_raw_size = 0;
+	int lmem_state_size = 0;
+	struct drm_xe_engine_class_instance *hwe;
+
+	igt_require(igt_sriov_get_enabled_vfs(pf_fd) == 0);
+	igt_assert(!igt_sriov_is_vf_drm_driver_probed(pf_fd, vf_num));
+
+	if (test_flags & XE_SRIOV_VFSR_MANUAL_PROVISION) {
+		for (int vf_idx = 1; vf_idx <= num_vfs; vf_idx++)
+			provision_vf(pf_fd, vf_idx, gt_num);
+	}
+
+	igt_info("Enable %u VF(s)\n", num_vfs);
+	igt_sriov_enable_driver_autoprobe(pf_fd);
+	igt_sriov_enable_vfs(pf_fd, num_vfs);
+	igt_assert_eq(num_vfs, igt_sriov_get_enabled_vfs(pf_fd));
+	igt_assert(igt_sriov_is_vf_drm_driver_probed(pf_fd, vf_num));
+
+	vf_fd = igt_sriov_open_vf_drm_device(pf_fd, vf_num);
+
+	igt_debug("Disable runtime PM on VF%u\n", vf_num);
+	/* Setup needs to be called prior to disable runtime PM */
+	igt_assert(igt_setup_runtime_pm(vf_fd));
+	igt_disable_runtime_pm();
+
+	igt_debug("Simple execution on VF%u - pre-migration\n", vf_num);
+	xe_for_each_engine(vf_fd, hwe)
+		simple_exec(vf_fd, hwe);
+
+	igt_info("Pause VF%u\n", vf_num);
+	igt_fail_on(xe_sriov_set_vf_control(pf_fd, vf_num, gt_num, XE_SRIOV_VF_CONTROL_PAUSE));
+
+	igt_info("Read and store VF%u GuC, GGTT and LMEM state\n", vf_num);
+	guc_state_buf = xe_sriov_get_guc_state(pf_fd, vf_num, gt_num, &guc_state_size);
+	igt_assert(guc_state_size > 0);
+
+	ggtt_raw_buf = xe_sriov_get_ggtt_raw(pf_fd, vf_num, gt_num, &ggtt_raw_size);
+	igt_assert(ggtt_raw_size > 0);
+
+	if (xe_has_vram(pf_fd)) {
+		lmem_state_buf = xe_sriov_get_lmem_state(pf_fd, vf_num, gt_num, &lmem_state_size);
+		igt_assert(lmem_state_size > 0);
+	}
+
+	/* Trigger VF FLR to get back into VF_READY state */
+	igt_info("Reset VF%u\n", vf_num);
+	igt_assert(igt_sriov_device_reset(pf_fd, vf_num));
+	/* Assume FLR is finished after sleep */
+	sleep(1);
+
+	/* Move GGTT space of VF1 to different area */
+	if (test_flags & XE_SRIOV_VFSR_GGTT_RELOCATE) {
+		igt_info("Relocate VF%u GGTT\n", vf_num);
+		xe_sriov_relocate_ggtt(pf_fd, vf_num, gt_num);
+	}
+
+	/* Pause VF again - only READY_PAUSED VF can have the state restored */
+	igt_info("Pause VF%u again\n", vf_num);
+	igt_fail_on(xe_sriov_set_vf_control(pf_fd, vf_num, gt_num, XE_SRIOV_VF_CONTROL_PAUSE));
+
+	/* Pause causes overwrite of GuC state copy stored in KMD
+	 *- restore it from a previosusly stored buffer
+	 */
+	igt_info("Write back VF%u LMEM, GGTT and GuC state\n", vf_num);
+	if (xe_has_vram(pf_fd))
+		xe_sriov_set_lmem_state(pf_fd, vf_num, gt_num, lmem_state_buf, lmem_state_size);
+
+	xe_sriov_set_ggtt_raw(pf_fd, vf_num, gt_num, ggtt_raw_buf, ggtt_raw_size);
+	xe_sriov_set_guc_state(pf_fd, vf_num, gt_num, guc_state_buf, guc_state_size);
+
+	/* Restore the snapshot to actual GuC */
+	igt_info("Restore VF%u state\n", vf_num);
+	igt_fail_on(xe_sriov_set_vf_control(pf_fd, vf_num, gt_num, XE_SRIOV_VF_CONTROL_RESTORE));
+
+	igt_info("Resume VF%u\n", vf_num);
+	igt_fail_on(xe_sriov_set_vf_control(pf_fd, vf_num, gt_num, XE_SRIOV_VF_CONTROL_RESUME));
+
+	/* Execute simple workload to assure the state restore was successful */
+	igt_debug("Simple execution on VF%u - post-migration\n", vf_num);
+	xe_for_each_engine(vf_fd, hwe)
+		simple_exec(vf_fd, hwe);
+
+	/* Cleanup - restore initial state */
+	igt_debug("Restore runtime PM for VF\n");
+	igt_restore_runtime_pm();
+
+	close(vf_fd);
+	free(guc_state_buf);
+	free(ggtt_raw_buf);
+	free(lmem_state_buf);
+
+	igt_debug("Disable VF(s)\n");
+	igt_sriov_disable_vfs(pf_fd);
+}
+
+igt_main
+{
+	int pf_fd;
+	bool autoprobe;
+
+	igt_fixture {
+		pf_fd = drm_open_driver(DRIVER_XE);
+		igt_require(igt_sriov_is_pf(pf_fd));
+		igt_require(igt_sriov_get_enabled_vfs(pf_fd) == 0);
+		autoprobe = igt_sriov_is_driver_autoprobe_enabled(pf_fd);
+	}
+
+	igt_describe("Perform VF fake migration without GGTT relocation (1xVF auto provisioning)");
+	igt_subtest("vf-fake-save-restore-auto-1vf") {
+		vf_fake_save_restore(pf_fd, 1, 0);
+	}
+
+	igt_describe("Perform VF fake migration without GGTT relocation (2xVF manual provisioning)");
+	igt_subtest("vf-fake-save-restore-manual-2vf") {
+		vf_fake_save_restore(pf_fd, 2, XE_SRIOV_VFSR_MANUAL_PROVISION);
+	}
+
+	igt_describe("Perform VF fake migration with GGTT relocation (1xVF manual provisioning)");
+	igt_subtest("vf-fake-save-restore-relocate-manual-1vf") {
+		vf_fake_save_restore(pf_fd, 1,
+			XE_SRIOV_VFSR_MANUAL_PROVISION | XE_SRIOV_VFSR_GGTT_RELOCATE);
+	}
+
+	igt_describe("Perform VF fake migration with GGTT relocation (2xVF auto provisioning)");
+	igt_subtest("vf-fake-save-restore-relocate-auto-2vf") {
+		vf_fake_save_restore(pf_fd, 2, XE_SRIOV_VFSR_GGTT_RELOCATE);
+	}
+
+	igt_fixture {
+		igt_sriov_disable_vfs(pf_fd);
+		/* Abort to avoid execution of next tests with enabled VFs */
+		igt_abort_on_f(igt_sriov_get_enabled_vfs(pf_fd) > 0, "Failed to disable VF(s)");
+		autoprobe ? igt_sriov_enable_driver_autoprobe(pf_fd) :
+				igt_sriov_disable_driver_autoprobe(pf_fd);
+		igt_abort_on_f(autoprobe != igt_sriov_is_driver_autoprobe_enabled(pf_fd),
+				"Failed to restore sriov_drivers_autoprobe value\n");
+		close(pf_fd);
+	}
+}
diff --git a/tests/meson.build b/tests/meson.build
index f8a0ab836..b0100be18 100644
--- a/tests/meson.build
+++ b/tests/meson.build
@@ -318,6 +318,7 @@ intel_xe_progs = [
 	'xe_spin_batch',
 	'xe_sriov_auto_provisioning',
 	'xe_sriov_flr',
+	'xe_sriov_migration',
 	'xe_sriov_scheduling',
 	'xe_sysfs_defaults',
 	'xe_sysfs_preempt_timeout',
-- 
2.39.1



More information about the igt-dev mailing list