[PATCH v2 3/3] RFC drm/xe: add fault injection for lmem init check

Riana Tauro riana.tauro at intel.com
Fri Mar 15 10:05:30 UTC 2024


add a boot time fault injection for lmem init check.
This can be triggered by adding a modparam fail_lmem_init

xe.fail_lmem_init=<interval>,<probability>,<space>,<times>

Adding this causes the lmem init check to fail causing
the probe to defer.

v2: add fault injection (Lucas)

Signed-off-by: Riana Tauro <riana.tauro at intel.com>
---
 drivers/gpu/drm/xe/xe_device.c | 21 +++++++++++++++++++++
 drivers/gpu/drm/xe/xe_module.c |  5 +++++
 drivers/gpu/drm/xe/xe_module.h |  3 +++
 3 files changed, 29 insertions(+)

diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c
index 50473329cce7..393610e95bd1 100644
--- a/drivers/gpu/drm/xe/xe_device.c
+++ b/drivers/gpu/drm/xe/xe_device.c
@@ -51,6 +51,10 @@ struct lockdep_map xe_device_mem_access_lockdep_map = {
 };
 #endif
 
+#ifdef CONFIG_FAULT_INJECTION
+DECLARE_FAULT_ATTR(lmem_init_fail);
+#endif
+
 static int xe_file_open(struct drm_device *dev, struct drm_file *file)
 {
 	struct xe_device *xe = to_xe_device(dev);
@@ -431,6 +435,23 @@ static int wait_for_lmem_ready(struct xe_device *xe)
 	if (IS_SRIOV_VF(xe))
 		return 0;
 
+#ifdef CONFIG_FAULT_INJECTION
+	/*
+	 * use fault injection to cause a lmem init failure to validate
+	 * deferred probe. Set the verbose to 0 to  avoid dump stack
+	 */
+	if (xe_modparam.fail_lmem_init) {
+		setup_fault_attr(&lmem_init_fail, xe_modparam.fail_lmem_init);
+		lmem_init_fail.verbose = 0;
+		if (should_fail(&lmem_init_fail, 1)) {
+			/* add delay to reduce the number of deferred probe attempts */
+			msleep(500);
+			drm_dbg(&xe->drm, "Fault Injection lmem init failure\n");
+			return -EPROBE_DEFER;
+		}
+	}
+#endif
+
 	if (verify_lmem_ready(gt))
 		return 0;
 
diff --git a/drivers/gpu/drm/xe/xe_module.c b/drivers/gpu/drm/xe/xe_module.c
index 110b69864656..c4efbab430a7 100644
--- a/drivers/gpu/drm/xe/xe_module.c
+++ b/drivers/gpu/drm/xe/xe_module.c
@@ -48,6 +48,11 @@ module_param_named_unsafe(force_probe, xe_modparam.force_probe, charp, 0400);
 MODULE_PARM_DESC(force_probe,
 		 "Force probe options for specified devices. See CONFIG_DRM_XE_FORCE_PROBE for details.");
 
+#ifdef CONFIG_FAULT_INJECTION
+module_param_named_unsafe(fail_lmem_init, xe_modparam.fail_lmem_init, charp, 0400);
+MODULE_PARM_DESC(fail_lmem_init, "Fault injection. fail_lmem_init=<interval>,<probability>,<space>,<times>");
+#endif
+
 struct init_funcs {
 	int (*init)(void);
 	void (*exit)(void);
diff --git a/drivers/gpu/drm/xe/xe_module.h b/drivers/gpu/drm/xe/xe_module.h
index 88ef0e8b2bfd..ccbeacbc3efb 100644
--- a/drivers/gpu/drm/xe/xe_module.h
+++ b/drivers/gpu/drm/xe/xe_module.h
@@ -18,6 +18,9 @@ struct xe_modparam {
 	char *huc_firmware_path;
 	char *gsc_firmware_path;
 	char *force_probe;
+#if IS_ENABLED(CONFIG_FAULT_INJECTION)
+	char *fail_lmem_init;
+#endif /* CONFIG_FAULT_INJECTION */
 };
 
 extern struct xe_modparam xe_modparam;
-- 
2.40.0



More information about the Intel-xe mailing list