[RFC] [PATCH i-g-t] tests/intel/xe_fault_injection: Inject errors for VF provision

Satyanarayana K V P satyanarayana.k.v.p at intel.com
Mon May 26 14:01:34 UTC 2025


Use the kernel fault injection infrastructure to test error handling of xe
during VF provisioning of various resources like GGTT, contexts, lmem (in
case of DGFX) and doorbells so that more code paths are tested, such as
error handling and unwinding.

The test injects multiple errors into each resource and tests all possible
ways of error handling.

Error can be injected using:
igt at xe_fault_injection@probe-fail-vf-provision-xe_should_fail_vf_provisioning_ggtt
igt at xe_fault_injection@probe-fail-vf-provision-xe_should_fail_vf_provisioning_ctxs
igt at xe_fault_injection@probe-fail-vf-provision-xe_should_fail_vf_provisioning_dbs
igt at xe_fault_injection@probe-fail-vf-provision-xe_should_fail_vf_provisioning_lmem

Signed-off-by: Satyanarayana K V P <satyanarayana.k.v.p at intel.com>
---
Cc: Michal Wajdeczko <michal.wajdeczko at intel.com>

Test-With: 20250523073638.24842-1-satyanarayana.k.v.p at intel.com
---
 tests/intel/xe_fault_injection.c | 166 +++++++++++++++++++++++++++++++
 1 file changed, 166 insertions(+)

diff --git a/tests/intel/xe_fault_injection.c b/tests/intel/xe_fault_injection.c
index f9bd5c761..40b5b2e16 100644
--- a/tests/intel/xe_fault_injection.c
+++ b/tests/intel/xe_fault_injection.c
@@ -29,6 +29,42 @@
 #define BO_SIZE		(1024*1024)
 #define INJECT_ITERATIONS	100
 
+enum {
+	VF_PROVISION_MIN  = 0,
+	VF_PROVISION_GGTT = VF_PROVISION_MIN,
+	VF_PROVISION_LMEM,
+	VF_PROVISION_CTXS,
+	VF_PROVISION_DBS,
+	VF_PROVISION_MAX,
+};
+
+enum {
+	VF_PROVISION_ERR_MIN,
+	/* Force resource location invalid  */
+	VF_PROVISION_ERR_EINVAL = VF_PROVISION_ERR_MIN,
+	/* Force resource size bigger than HW limit */
+	VF_PROVISION_ERR_ENOSPC,
+	/* Force resource size to zero */
+	VF_PROVISION_ERR_ENODATA,
+	/* Force resource size larger than received with invalid base address. */
+	VF_PROVISION_ERR_ESRMNT,
+	/* Force resource size smaller than received */
+	VF_PROVISION_ERR_EREMCHG,
+	/* Force resource size larger than received */
+	VF_PROVISION_ERR_EDQUOT,
+	VF_PROVISION_ERR_MAX
+};
+
+int vf_provision_err_inject_list[VF_PROVISION_ERR_MAX] = {
+	-22 /* VF_PROVISION_ERR_EINVAL */,
+	-28 /* VF_PROVISION_ERR_ENOSPC */,
+	-61 /*VF_PROVISION_ERR_ENODATA */,
+	-69 /* VF_PROVISION_ERR_ESRMNT */,
+	-78 /* VF_PROVISION_ERR_EREMCHG */,
+	-122 /* VF_PROVISION_ERR_EDQUOT */
+};
+
+
 int32_t inject_iters_raw;
 struct fault_injection_params {
 	/* @probability: Likelihood of failure injection, in percent. */
@@ -71,6 +107,11 @@ static bool function_is_part_of_guc(const char function_name[])
 	       strstr(function_name, "_wopcm_") != NULL;
 }
 
+static bool function_is_part_of_vf(const char function_name[])
+{
+	return strstr(function_name, "_vf_") != NULL;
+}
+
 static void ignore_faults_in_dmesg(const char function_name[])
 {
 	/* Driver probe is expected to fail in all cases, so ignore in igt_runner */
@@ -87,6 +128,19 @@ static void ignore_faults_in_dmesg(const char function_name[])
 		strcat(regex, "|GT[0-9a-fA-F]*: GuC PC query task state failed:	-ENOMEM");
 	}
 
+	/*
+	 * If VF provisiong faults are injected, Guc and VF provision is
+	 * expected fail. So, ignore failures in igt_runner.
+	 */
+	if (function_is_part_of_vf(function_name)) {
+		strcat(regex, "|GT[0-9a-fA-F]*: GuC init failed with -ENOMEM");
+		strcat(regex, "|GT[0-9a-fA-F]*: Failed to initialize uC .-ENOMEM");
+		strcat(regex, "|GT[0-9a-fA-F]*: VF: Unexpected GGTT reassignment: [0-9] != [0-9]");
+		strcat(regex, "|GT[0-9a-fA-F]*: VF: Unexpected CTXs reassignment: [0-9] != [0-9]");
+		strcat(regex, "|GT[0-9a-fA-F]*: VF: Unexpected DBs reassignment: [0-9] != [0-9]");
+		strcat(regex, "|GT[0-9a-fA-F]*: VF: Unexpected LMEM reassignment: [0-9] != [0-9]");
+	}
+
 	igt_emit_ignore_dmesg_regex(regex);
 }
 
@@ -278,6 +332,103 @@ static void probe_fail_guc(int fd, char pci_slot[], const char function_name[],
 	}
 }
 
+static void get_fault_params(int fault_type, int has_vram,
+			     struct fault_injection_params *fault_params)
+{
+	igt_assert(fault_params);
+
+	igt_debug("has_vram = %d, fault_type = %d\n", has_vram, fault_type);
+
+	if (has_vram) {
+		switch (fault_type) {
+		case VF_PROVISION_GGTT:
+			fault_params->space = 1;
+			break;
+		case VF_PROVISION_LMEM:
+			fault_params->space = 2;
+			break;
+		case VF_PROVISION_CTXS:
+			fault_params->space = 3;
+			break;
+		case VF_PROVISION_DBS:
+			fault_params->space = 4;
+			break;
+		default:
+			return;
+		}
+	} else {
+		switch (fault_type) {
+		case VF_PROVISION_GGTT:
+			fault_params->space = 1;
+			break;
+		case VF_PROVISION_CTXS:
+			fault_params->space = 2;
+			break;
+		case VF_PROVISION_DBS:
+			fault_params->space = 3;
+			break;
+		default:
+			return;
+		}
+	}
+
+	fault_params->times = 1;
+}
+/**
+ * SUBTEST: probe-fail-vf-provision-%s
+ * Description: inject an error in the injectable function %arg[1] then reprobe driver
+ * Functionality: fault
+ *
+ * arg[1]:
+ * @xe_should_fail_vf_provisioning_ggtt:     Inject an error when provisoning ggtt.
+ * @xe_should_fail_vf_provisioning_lmem:     Inject an error when provisoning lmem.
+ * @xe_should_fail_vf_provisioning_ctxs:     Inject an error when provisoning ctxs.
+ * @xe_should_fail_vf_provisioning_dbs:     Inject an error when provisoning dbs.
+ */
+static void probe_fail_vf_provision(int fd, char pci_slot[], const char function_name[],
+               struct fault_injection_params *fault_params)
+{
+	int auto_probe_en = igt_sriov_is_driver_autoprobe_enabled(fd);
+	const char *func_name = "xe_should_fail_vf_provisioning";
+	unsigned int totalvfs = igt_sriov_get_total_vfs(fd);
+	int fault_type, i;
+
+	igt_skip_on(!totalvfs);
+	igt_assert(fault_params);
+
+	if (!strcmp("xe_should_fail_vf_provisioning_ggtt", function_name))
+		fault_type = VF_PROVISION_GGTT;
+	else if (!strcmp("xe_should_fail_vf_provisioning_lmem", function_name))
+		fault_type = VF_PROVISION_LMEM;
+	else if (!strcmp("xe_should_fail_vf_provisioning_ctxs", function_name))
+		fault_type = VF_PROVISION_CTXS;
+	else if (!strcmp("xe_should_fail_vf_provisioning_dbs", function_name))
+		fault_type = VF_PROVISION_DBS;
+	else
+		fault_type = VF_PROVISION_GGTT;
+
+	igt_skip_on(!strcmp("xe_should_fail_vf_provisioning_lmem", function_name) &&
+		    !xe_has_vram(fd));
+
+	ignore_faults_in_dmesg(function_name);
+	for (i = VF_PROVISION_ERR_MIN; i < VF_PROVISION_ERR_MAX; i++) {
+		if (igt_sriov_get_enabled_vfs(fd))
+			igt_sriov_disable_vfs(fd);
+
+		get_fault_params(fault_type, xe_has_vram(fd), fault_params);
+		setup_injection_fault(fault_params);
+
+		injection_list_add(func_name);
+		set_retval(func_name, vf_provision_err_inject_list[i]);
+
+		igt_sriov_enable_driver_autoprobe(fd);
+		igt_sriov_enable_vfs(fd, totalvfs);
+		igt_sriov_disable_vfs(fd);
+		if (!auto_probe_en)
+			igt_sriov_disable_driver_autoprobe(fd);
+		injection_list_remove(func_name);
+	}
+}
 /**
  * SUBTEST: exec-queue-create-fail-%s
  * Description: inject an error in function %arg[1] used in exec queue create IOCTL to make it fail
@@ -551,6 +702,14 @@ igt_main_args("I:", NULL, help_str, opt_handler, NULL)
 		{ }
 	};
 
+	const struct section vf_proviosin_fail_functions[] = {
+		{ "xe_should_fail_vf_provisioning_ggtt" },
+		{ "xe_should_fail_vf_provisioning_lmem" },
+		{ "xe_should_fail_vf_provisioning_ctxs" },
+		{ "xe_should_fail_vf_provisioning_dbs" },
+		{}
+	};
+
 	igt_fixture {
 		igt_require(fail_function_injection_enabled());
 		fd = drm_open_driver(DRIVER_XE);
@@ -586,6 +745,13 @@ igt_main_args("I:", NULL, help_str, opt_handler, NULL)
 		igt_subtest_f("oa-add-config-fail-%s", s->name)
 			oa_add_config_fail(fd, sysfs, devid, s->name);
 
+	for (const struct section *s = vf_proviosin_fail_functions; s->name; s++)
+		igt_subtest_f("probe-fail-vf-provision-%s", s->name) {
+			memcpy(&fault_params, &default_fault_params,
+					sizeof(struct fault_injection_params));
+			probe_fail_vf_provision(fd, pci_slot, s->name, &fault_params);
+		}
+
 	igt_fixture {
 		igt_kmod_unbind("xe", pci_slot);
 	}
-- 
2.43.0



More information about the igt-dev mailing list