[Intel-xe] [PATCH v2 1/1] drm/xe: Add a debugfs for faking gt reset failure.

Himal Prasad Ghimiray himal.prasad.ghimiray at intel.com
Wed May 24 07:06:41 UTC 2023


In case of gt reset failure, KMD notifies userspace about failure
via uevent. To validate this notification we need to ensure gt
reset fails and there is no mechanism to cause failure from hardware.
Hence added a debugfs which will cause fake reset failure.

v1(Rodrigo)
- Change the variable to fake_reset_failure_in_progress.
- Drop usage of READ_ONCE and WRITE_ONCE.
- Follow consistency for variable assignment. Either use
  functions for all the assignments or don't use for any.

Cc: Rodrigo Vivi <rodrigo.vivi at intel.com>
Signed-off-by: Himal Prasad Ghimiray <himal.prasad.ghimiray at intel.com>
---
 drivers/gpu/drm/xe/xe_gt.c         | 15 ++++++++++++++-
 drivers/gpu/drm/xe/xe_gt_debugfs.c | 11 +++++++++++
 drivers/gpu/drm/xe/xe_gt_types.h   |  1 +
 3 files changed, 26 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c
index 80d42c7c7cfa..b3ac8b3ab455 100644
--- a/drivers/gpu/drm/xe/xe_gt.c
+++ b/drivers/gpu/drm/xe/xe_gt.c
@@ -506,6 +506,9 @@ int xe_gt_init(struct xe_gt *gt)
 	int err;
 	int i;
 
+	/*Fake reset failure should be disabled by default*/
+	gt->reset.fake_reset_failure_in_progress = false;
+
 	INIT_WORK(&gt->reset.worker, gt_reset_worker);
 
 	for (i = 0; i < XE_ENGINE_CLASS_MAX; ++i) {
@@ -601,8 +604,18 @@ static int gt_reset(struct xe_gt *gt)
 	xe_gt_info(gt, "reset started\n");
 
 	xe_gt_sanitize(gt);
-
 	xe_device_mem_access_get(gt_to_xe(gt));
+
+	err = gt->reset.fake_reset_failure_in_progress;
+	if (err) {
+		xe_gt_info(gt, "Fake GT reset failure is in progress\n");
+
+		/* Disable fake reset failure for next call */
+		gt->reset.fake_reset_failure_in_progress = false;
+
+		goto err_msg;
+	}
+
 	err = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL);
 	if (err)
 		goto err_msg;
diff --git a/drivers/gpu/drm/xe/xe_gt_debugfs.c b/drivers/gpu/drm/xe/xe_gt_debugfs.c
index 8bf441e850a0..13d2a974bc00 100644
--- a/drivers/gpu/drm/xe/xe_gt_debugfs.c
+++ b/drivers/gpu/drm/xe/xe_gt_debugfs.c
@@ -127,6 +127,16 @@ static int register_save_restore(struct seq_file *m, void *data)
 	return 0;
 }
 
+static int fake_reset_failure(struct seq_file *m, void *data)
+{
+	struct xe_gt *gt = node_to_gt(m->private);
+
+	gt->reset.fake_reset_failure_in_progress = true;
+	xe_gt_reset_async(gt);
+
+	return 0;
+}
+
 static const struct drm_info_list debugfs_list[] = {
 	{"hw_engines", hw_engines, 0},
 	{"force_reset", force_reset, 0},
@@ -135,6 +145,7 @@ static const struct drm_info_list debugfs_list[] = {
 	{"steering", steering, 0},
 	{"ggtt", ggtt, 0},
 	{"register-save-restore", register_save_restore, 0},
+	{"fake_reset_failure", fake_reset_failure, 0},
 };
 
 void xe_gt_debugfs_register(struct xe_gt *gt)
diff --git a/drivers/gpu/drm/xe/xe_gt_types.h b/drivers/gpu/drm/xe/xe_gt_types.h
index 7c47d67aa8be..746d65c4aacc 100644
--- a/drivers/gpu/drm/xe/xe_gt_types.h
+++ b/drivers/gpu/drm/xe/xe_gt_types.h
@@ -168,6 +168,7 @@ struct xe_gt {
 
 	/** @reset: state for GT resets */
 	struct {
+		bool fake_reset_failure_in_progress;
 		/**
 		 * @worker: work so GT resets can done async allowing to reset
 		 * code to safely flush all code paths
-- 
2.25.1



More information about the Intel-xe mailing list