[PATCH 05/11] drm/xe: Add driver load error injection
Matthew Brost
matthew.brost at intel.com
Sat Aug 10 01:55:38 UTC 2024
Port over i915 driver load error injection.
Signed-off-by: Matthew Brost <matthew.brost at intel.com>
---
drivers/gpu/drm/xe/xe_device.c | 31 ++++++++++++++++++++++++++++
drivers/gpu/drm/xe/xe_device.h | 15 ++++++++++++++
drivers/gpu/drm/xe/xe_device_types.h | 4 ++++
drivers/gpu/drm/xe/xe_gt.c | 5 +++++
drivers/gpu/drm/xe/xe_gt_sriov_pf.c | 4 ++++
drivers/gpu/drm/xe/xe_guc.c | 8 +++++++
drivers/gpu/drm/xe/xe_guc_ads.c | 5 +++++
drivers/gpu/drm/xe/xe_guc_ct.c | 4 ++++
drivers/gpu/drm/xe/xe_guc_log.c | 5 +++++
drivers/gpu/drm/xe/xe_mmio.c | 5 +++++
drivers/gpu/drm/xe/xe_module.c | 5 +++++
drivers/gpu/drm/xe/xe_module.h | 3 +++
drivers/gpu/drm/xe/xe_pci.c | 9 ++++++++
drivers/gpu/drm/xe/xe_pm.c | 8 +++++++
drivers/gpu/drm/xe/xe_sriov.c | 8 ++++++-
drivers/gpu/drm/xe/xe_tile.c | 4 ++++
drivers/gpu/drm/xe/xe_uc.c | 4 ++++
drivers/gpu/drm/xe/xe_wa.c | 5 +++++
drivers/gpu/drm/xe/xe_wopcm.c | 4 ++++
19 files changed, 135 insertions(+), 1 deletion(-)
diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c
index 1aba6f9eaa19..f6cd13ed6d20 100644
--- a/drivers/gpu/drm/xe/xe_device.c
+++ b/drivers/gpu/drm/xe/xe_device.c
@@ -374,6 +374,10 @@ struct xe_device *xe_device_create(struct pci_dev *pdev,
if (WARN_ON(err))
goto err;
+ err = xe_device_inject_driver_load_error(xe);
+ if (err)
+ goto err;
+
return xe;
err:
@@ -477,6 +481,10 @@ static int xe_set_dma_info(struct xe_device *xe)
if (err)
goto mask_err;
+ err = xe_device_inject_driver_load_error(xe);
+ if (err)
+ goto mask_err;
+
return 0;
mask_err:
@@ -580,6 +588,10 @@ int xe_device_probe_early(struct xe_device *xe)
if (err)
return err;
+ err = xe_device_inject_driver_load_error(xe);
+ if (err)
+ return err;
+
xe->wedged.mode = xe_modparam.wedged_mode;
return 0;
@@ -995,3 +1007,22 @@ void xe_device_declare_wedged(struct xe_device *xe)
for_each_gt(gt, xe, id)
xe_gt_declare_wedged(gt);
}
+
+#if IS_ENABLED(CONFIG_DRM_XE_DEBUG)
+int __xe_device_inject_driver_load_error(struct xe_device *xe, int err,
+ const char *func, int line)
+{
+ if (xe->inject_driver_load_error >= xe_modparam.inject_driver_load_error)
+ return 0;
+
+ if (++xe->inject_driver_load_error < xe_modparam.inject_driver_load_error)
+ return 0;
+
+ drm_info(&xe->drm, "Injecting failure %d at checkpoint %u [%s:%d]\n",
+ err, xe->inject_driver_load_error, func, line);
+
+ xe_modparam.inject_driver_load_error = 0;
+ return err;
+
+}
+#endif
diff --git a/drivers/gpu/drm/xe/xe_device.h b/drivers/gpu/drm/xe/xe_device.h
index db6cc8d0d6b8..4f7e9cdac9fe 100644
--- a/drivers/gpu/drm/xe/xe_device.h
+++ b/drivers/gpu/drm/xe/xe_device.h
@@ -179,4 +179,19 @@ void xe_device_declare_wedged(struct xe_device *xe);
struct xe_file *xe_file_get(struct xe_file *xef);
void xe_file_put(struct xe_file *xef);
+#if IS_ENABLED(CONFIG_DRM_XE_DEBUG)
+
+int __xe_device_inject_driver_load_error(struct xe_device *xe, int err,
+ const char *func, int line);
+
+#define xe_device_inject_driver_load_error(__xe) \
+ __xe_device_inject_driver_load_error(__xe, -ENODEV, __func__, __LINE__)
+
+#else
+
+#define xe_device_inject_driver_load_error(__xe) \
+ ({ BUILD_BUG_ON_INVALID(__xe); 0; })
+
+#endif
+
#endif
diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h
index 5b7292a9a66d..3e620314eec2 100644
--- a/drivers/gpu/drm/xe/xe_device_types.h
+++ b/drivers/gpu/drm/xe/xe_device_types.h
@@ -484,6 +484,10 @@ struct xe_device {
int mode;
} wedged;
+#if IS_ENABLED(CONFIG_DRM_XE_DEBUG)
+ int inject_driver_load_error;
+#endif
+
#ifdef TEST_VM_OPS_ERROR
/**
* @vm_inject_error_position: inject errors at different places in VM
diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c
index 58895ed22f6e..8209079c0334 100644
--- a/drivers/gpu/drm/xe/xe_gt.c
+++ b/drivers/gpu/drm/xe/xe_gt.c
@@ -389,6 +389,10 @@ int xe_gt_init_early(struct xe_gt *gt)
xe_pcode_init(gt);
spin_lock_init(>->global_invl_lock);
+ err = xe_device_inject_driver_load_error(gt_to_xe(gt));
+ if (err)
+ return err;
+
return 0;
}
@@ -570,6 +574,7 @@ int xe_gt_init_hwconfig(struct xe_gt *gt)
xe_gt_topology_init(gt);
xe_gt_mcr_init(gt);
+ err = xe_device_inject_driver_load_error(gt_to_xe(gt));
out_fw:
xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
out:
diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf.c
index ef239440963c..897815ddf954 100644
--- a/drivers/gpu/drm/xe/xe_gt_sriov_pf.c
+++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf.c
@@ -57,6 +57,10 @@ int xe_gt_sriov_pf_init_early(struct xe_gt *gt)
if (err)
return err;
+ err = xe_device_inject_driver_load_error(gt_to_xe(gt));
+ if (err)
+ return err;
+
return 0;
}
diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c
index de0fe9e65746..980691c178c4 100644
--- a/drivers/gpu/drm/xe/xe_guc.c
+++ b/drivers/gpu/drm/xe/xe_guc.c
@@ -354,6 +354,10 @@ int xe_guc_init(struct xe_guc *guc)
if (ret)
goto out;
+ ret = xe_device_inject_driver_load_error(guc_to_xe(guc));
+ if (ret)
+ goto out;
+
guc_init_params(guc);
xe_guc_comm_init_early(guc);
@@ -411,6 +415,10 @@ int xe_guc_init_post_hwconfig(struct xe_guc *guc)
if (ret)
return ret;
+ ret = xe_device_inject_driver_load_error(guc_to_xe(guc));
+ if (ret)
+ return ret;
+
return xe_guc_ads_init_post_hwconfig(&guc->ads);
}
diff --git a/drivers/gpu/drm/xe/xe_guc_ads.c b/drivers/gpu/drm/xe/xe_guc_ads.c
index d1902a8581ca..1944912ef9b8 100644
--- a/drivers/gpu/drm/xe/xe_guc_ads.c
+++ b/drivers/gpu/drm/xe/xe_guc_ads.c
@@ -402,6 +402,7 @@ int xe_guc_ads_init(struct xe_guc_ads *ads)
struct xe_gt *gt = ads_to_gt(ads);
struct xe_tile *tile = gt_to_tile(gt);
struct xe_bo *bo;
+ int err;
ads->golden_lrc_size = calculate_golden_lrc_size(ads);
ads->regset_size = calculate_regset_size(gt);
@@ -416,6 +417,10 @@ int xe_guc_ads_init(struct xe_guc_ads *ads)
ads->bo = bo;
+ err = xe_device_inject_driver_load_error(xe);
+ if (err)
+ return err;
+
return 0;
}
diff --git a/drivers/gpu/drm/xe/xe_guc_ct.c b/drivers/gpu/drm/xe/xe_guc_ct.c
index beeeb120d1fc..76a26aaabb13 100644
--- a/drivers/gpu/drm/xe/xe_guc_ct.c
+++ b/drivers/gpu/drm/xe/xe_guc_ct.c
@@ -197,6 +197,10 @@ int xe_guc_ct_init(struct xe_guc_ct *ct)
if (err)
return err;
+ err = xe_device_inject_driver_load_error(xe);
+ if (err)
+ return err;
+
xe_gt_assert(gt, ct->state == XE_GUC_CT_STATE_NOT_INITIALIZED);
ct->state = XE_GUC_CT_STATE_DISABLED;
return 0;
diff --git a/drivers/gpu/drm/xe/xe_guc_log.c b/drivers/gpu/drm/xe/xe_guc_log.c
index a37ee3419428..f26c37e3ee3a 100644
--- a/drivers/gpu/drm/xe/xe_guc_log.c
+++ b/drivers/gpu/drm/xe/xe_guc_log.c
@@ -82,6 +82,7 @@ int xe_guc_log_init(struct xe_guc_log *log)
struct xe_device *xe = log_to_xe(log);
struct xe_tile *tile = gt_to_tile(log_to_gt(log));
struct xe_bo *bo;
+ int err;
bo = xe_managed_bo_create_pin_map(xe, tile, guc_log_size(),
XE_BO_FLAG_SYSTEM |
@@ -94,5 +95,9 @@ int xe_guc_log_init(struct xe_guc_log *log)
log->bo = bo;
log->level = xe_modparam.guc_log_level;
+ err = xe_device_inject_driver_load_error(log_to_xe(log));
+ if (err)
+ return err;
+
return 0;
}
diff --git a/drivers/gpu/drm/xe/xe_mmio.c b/drivers/gpu/drm/xe/xe_mmio.c
index f5bdb540e823..12ad2f73e8a4 100644
--- a/drivers/gpu/drm/xe/xe_mmio.c
+++ b/drivers/gpu/drm/xe/xe_mmio.c
@@ -137,6 +137,11 @@ int xe_mmio_probe_tiles(struct xe_device *xe)
{
size_t tile_mmio_size = SZ_16M;
size_t tile_mmio_ext_size = xe->info.tile_mmio_ext_size;
+ int err;
+
+ err = xe_device_inject_driver_load_error(xe);
+ if (err)
+ return err;
mmio_multi_tile_setup(xe, tile_mmio_size);
mmio_extension_setup(xe, tile_mmio_size, tile_mmio_ext_size);
diff --git a/drivers/gpu/drm/xe/xe_module.c b/drivers/gpu/drm/xe/xe_module.c
index 7bb99e451fcc..972b64a9f514 100644
--- a/drivers/gpu/drm/xe/xe_module.c
+++ b/drivers/gpu/drm/xe/xe_module.c
@@ -53,6 +53,11 @@ module_param_named_unsafe(force_probe, xe_modparam.force_probe, charp, 0400);
MODULE_PARM_DESC(force_probe,
"Force probe options for specified devices. See CONFIG_DRM_XE_FORCE_PROBE for details.");
+#if IS_ENABLED(CONFIG_DRM_XE_DEBUG)
+module_param_named_unsafe(inject_driver_load_error, xe_modparam.inject_driver_load_error, int, 0600);
+MODULE_PARM_DESC(inject_driver_load_error, "Inject driver load error");
+#endif
+
#ifdef CONFIG_PCI_IOV
module_param_named(max_vfs, xe_modparam.max_vfs, uint, 0400);
MODULE_PARM_DESC(max_vfs,
diff --git a/drivers/gpu/drm/xe/xe_module.h b/drivers/gpu/drm/xe/xe_module.h
index 61a0d28a28c8..409ea10be942 100644
--- a/drivers/gpu/drm/xe/xe_module.h
+++ b/drivers/gpu/drm/xe/xe_module.h
@@ -20,6 +20,9 @@ struct xe_modparam {
char *force_probe;
#ifdef CONFIG_PCI_IOV
unsigned int max_vfs;
+#endif
+#if IS_ENABLED(CONFIG_DRM_XE_DEBUG)
+ int inject_driver_load_error;
#endif
int wedged_mode;
};
diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c
index f818aa69f3ca..8b278c83128a 100644
--- a/drivers/gpu/drm/xe/xe_pci.c
+++ b/drivers/gpu/drm/xe/xe_pci.c
@@ -629,6 +629,10 @@ static int xe_info_init_early(struct xe_device *xe,
if (err)
return err;
+ err = xe_device_inject_driver_load_error(xe);
+ if (err)
+ return err;
+
return 0;
}
@@ -645,6 +649,7 @@ static int xe_info_init(struct xe_device *xe,
u32 graphics_gmdid_revid = 0, media_gmdid_revid = 0;
struct xe_tile *tile;
struct xe_gt *gt;
+ int err;
u8 id;
/*
@@ -745,6 +750,10 @@ static int xe_info_init(struct xe_device *xe,
gt->info.id = xe->info.gt_count++;
}
+ err = xe_device_inject_driver_load_error(xe);
+ if (err)
+ return err;
+
return 0;
}
diff --git a/drivers/gpu/drm/xe/xe_pm.c b/drivers/gpu/drm/xe/xe_pm.c
index 9f3c14fd9f33..64d992c12364 100644
--- a/drivers/gpu/drm/xe/xe_pm.c
+++ b/drivers/gpu/drm/xe/xe_pm.c
@@ -231,6 +231,10 @@ int xe_pm_init_early(struct xe_device *xe)
if (err)
return err;
+ err = xe_device_inject_driver_load_error(xe);
+ if (err)
+ return err;
+
return 0;
}
@@ -264,6 +268,10 @@ int xe_pm_init(struct xe_device *xe)
xe_pm_runtime_init(xe);
+ err = xe_device_inject_driver_load_error(xe);
+ if (err)
+ return err;
+
return 0;
}
diff --git a/drivers/gpu/drm/xe/xe_sriov.c b/drivers/gpu/drm/xe/xe_sriov.c
index 5a1d65e4f19f..1e738f1d80df 100644
--- a/drivers/gpu/drm/xe/xe_sriov.c
+++ b/drivers/gpu/drm/xe/xe_sriov.c
@@ -102,11 +102,17 @@ static void fini_sriov(struct drm_device *drm, void *arg)
*/
int xe_sriov_init(struct xe_device *xe)
{
+ int err;
+
if (!IS_SRIOV(xe))
return 0;
+ err = xe_device_inject_driver_load_error(xe);
+ if (err)
+ return err;
+
if (IS_SRIOV_PF(xe)) {
- int err = xe_sriov_pf_init_early(xe);
+ err = xe_sriov_pf_init_early(xe);
if (err)
return err;
diff --git a/drivers/gpu/drm/xe/xe_tile.c b/drivers/gpu/drm/xe/xe_tile.c
index 15ea0a942f67..2d25c7b59b0d 100644
--- a/drivers/gpu/drm/xe/xe_tile.c
+++ b/drivers/gpu/drm/xe/xe_tile.c
@@ -124,6 +124,10 @@ int xe_tile_init_early(struct xe_tile *tile, struct xe_device *xe, u8 id)
if (IS_ERR(tile->primary_gt))
return PTR_ERR(tile->primary_gt);
+ err = xe_device_inject_driver_load_error(xe);
+ if (err)
+ return err;
+
return 0;
}
diff --git a/drivers/gpu/drm/xe/xe_uc.c b/drivers/gpu/drm/xe/xe_uc.c
index 0d073a9987c2..a3786020838b 100644
--- a/drivers/gpu/drm/xe/xe_uc.c
+++ b/drivers/gpu/drm/xe/xe_uc.c
@@ -62,6 +62,10 @@ int xe_uc_init(struct xe_uc *uc)
if (ret)
goto err;
+ ret = xe_device_inject_driver_load_error(uc_to_xe(uc));
+ if (ret)
+ goto err;
+
return 0;
err:
diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c
index 564e32e44e3b..e558715d8027 100644
--- a/drivers/gpu/drm/xe/xe_wa.c
+++ b/drivers/gpu/drm/xe/xe_wa.c
@@ -821,6 +821,7 @@ int xe_wa_init(struct xe_gt *gt)
struct xe_device *xe = gt_to_xe(gt);
size_t n_oob, n_lrc, n_engine, n_gt, total;
unsigned long *p;
+ int err;
n_gt = BITS_TO_LONGS(ARRAY_SIZE(gt_was));
n_engine = BITS_TO_LONGS(ARRAY_SIZE(engine_was));
@@ -840,6 +841,10 @@ int xe_wa_init(struct xe_gt *gt)
p += n_lrc;
gt->wa_active.oob = p;
+ err = xe_device_inject_driver_load_error(xe);
+ if (err)
+ return err;
+
return 0;
}
diff --git a/drivers/gpu/drm/xe/xe_wopcm.c b/drivers/gpu/drm/xe/xe_wopcm.c
index d3a99157e523..edaad1c93e58 100644
--- a/drivers/gpu/drm/xe/xe_wopcm.c
+++ b/drivers/gpu/drm/xe/xe_wopcm.c
@@ -263,6 +263,10 @@ int xe_wopcm_init(struct xe_wopcm *wopcm)
return -E2BIG;
}
+ ret = xe_device_inject_driver_load_error(xe);
+ if (ret)
+ return ret;
+
if (!locked)
ret = __wopcm_init_regs(xe, gt, wopcm);
--
2.34.1
More information about the Intel-xe
mailing list