[PATCH v2 3/3] RFC drm/xe: add fault injection for lmem init check
Ghimiray, Himal Prasad
himal.prasad.ghimiray at intel.com
Fri Mar 15 10:03:31 UTC 2024
On 15-03-2024 15:35, Riana Tauro wrote:
> add a boot time fault injection for lmem init check.
> This can be triggered by adding a modparam fail_lmem_init
>
> xe.fail_lmem_init=<interval>,<probability>,<space>,<times>
>
> Adding this causes the lmem init check to fail causing
> the probe to defer.
IIUC, this is fault injection is introduced for validating probe deffer
incase of lmem init failure.
Can you come up with an igt to validate to same ?
Rest all looks good to me.
>
> v2: add fault injection (Lucas)
>
> Signed-off-by: Riana Tauro <riana.tauro at intel.com>
> ---
> drivers/gpu/drm/xe/xe_device.c | 21 +++++++++++++++++++++
> drivers/gpu/drm/xe/xe_module.c | 5 +++++
> drivers/gpu/drm/xe/xe_module.h | 3 +++
> 3 files changed, 29 insertions(+)
>
> diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c
> index 50473329cce7..393610e95bd1 100644
> --- a/drivers/gpu/drm/xe/xe_device.c
> +++ b/drivers/gpu/drm/xe/xe_device.c
> @@ -51,6 +51,10 @@ struct lockdep_map xe_device_mem_access_lockdep_map = {
> };
> #endif
>
> +#ifdef CONFIG_FAULT_INJECTION
> +DECLARE_FAULT_ATTR(lmem_init_fail);
> +#endif
> +
> static int xe_file_open(struct drm_device *dev, struct drm_file *file)
> {
> struct xe_device *xe = to_xe_device(dev);
> @@ -431,6 +435,23 @@ static int wait_for_lmem_ready(struct xe_device *xe)
> if (IS_SRIOV_VF(xe))
> return 0;
>
> +#ifdef CONFIG_FAULT_INJECTION
> + /*
> + * use fault injection to cause a lmem init failure to validate
> + * deferred probe. Set the verbose to 0 to avoid dump stack
> + */
> + if (xe_modparam.fail_lmem_init) {
> + setup_fault_attr(&lmem_init_fail, xe_modparam.fail_lmem_init);
> + lmem_init_fail.verbose = 0;
> + if (should_fail(&lmem_init_fail, 1)) {
> + /* add delay to reduce the number of deferred probe attempts */
> + msleep(500);
> + drm_dbg(&xe->drm, "Fault Injection lmem init failure\n");
> + return -EPROBE_DEFER;
> + }
> + }
> +#endif
> +
> if (verify_lmem_ready(gt))
> return 0;
>
> diff --git a/drivers/gpu/drm/xe/xe_module.c b/drivers/gpu/drm/xe/xe_module.c
> index 110b69864656..c4efbab430a7 100644
> --- a/drivers/gpu/drm/xe/xe_module.c
> +++ b/drivers/gpu/drm/xe/xe_module.c
> @@ -48,6 +48,11 @@ module_param_named_unsafe(force_probe, xe_modparam.force_probe, charp, 0400);
> MODULE_PARM_DESC(force_probe,
> "Force probe options for specified devices. See CONFIG_DRM_XE_FORCE_PROBE for details.");
>
> +#ifdef CONFIG_FAULT_INJECTION
> +module_param_named_unsafe(fail_lmem_init, xe_modparam.fail_lmem_init, charp, 0400);
> +MODULE_PARM_DESC(fail_lmem_init, "Fault injection. fail_lmem_init=<interval>,<probability>,<space>,<times>");
> +#endif
> +
> struct init_funcs {
> int (*init)(void);
> void (*exit)(void);
> diff --git a/drivers/gpu/drm/xe/xe_module.h b/drivers/gpu/drm/xe/xe_module.h
> index 88ef0e8b2bfd..ccbeacbc3efb 100644
> --- a/drivers/gpu/drm/xe/xe_module.h
> +++ b/drivers/gpu/drm/xe/xe_module.h
> @@ -18,6 +18,9 @@ struct xe_modparam {
> char *huc_firmware_path;
> char *gsc_firmware_path;
> char *force_probe;
> +#if IS_ENABLED(CONFIG_FAULT_INJECTION)
> + char *fail_lmem_init;
> +#endif /* CONFIG_FAULT_INJECTION */
> };
>
> extern struct xe_modparam xe_modparam;
More information about the Intel-xe
mailing list