[Intel-xe] [RFC 1/1] drm/xe: Add a debugfs for faking gt reset failure.

Himal Prasad Ghimiray himal.prasad.ghimiray at intel.com
Mon May 22 08:58:17 UTC 2023


In case of gt reset failure, KMD notifies userspace about failure
via uevent. To validate this notification we need to ensure gt
reset fails and there is no mechanism to cause failure from hardware.
Hence added a debugfs which will cause fake reset failure.

Cc: Rodrigo Vivi <rodrigo.vivi at intel.com>
Signed-off-by: Himal Prasad Ghimiray <himal.prasad.ghimiray at intel.com>
---
 drivers/gpu/drm/xe/xe_gt.c         | 26 +++++++++++++++++++++++++-
 drivers/gpu/drm/xe/xe_gt_debugfs.c | 11 +++++++++++
 drivers/gpu/drm/xe/xe_gt_types.h   |  3 +++
 3 files changed, 39 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c
index d98761d9eeba..e05615ca13e5 100644
--- a/drivers/gpu/drm/xe/xe_gt.c
+++ b/drivers/gpu/drm/xe/xe_gt.c
@@ -301,6 +301,9 @@ int xe_gt_init_early(struct xe_gt *gt)
 {
 	int err;
 
+	/* Reset is supported by default */
+	gt->reset_enabled = true;
+
 	xe_force_wake_init_gt(gt, gt_to_fw(gt));
 
 	err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
@@ -605,6 +608,16 @@ static void xe_uevent_gt_reset_failure(struct xe_device *xe, u8 id)
 	kfree(reset_event[3]);
 }
 
+static int reset_disabled(struct xe_gt *gt)
+{
+	return !READ_ONCE(gt->reset_enabled);
+}
+
+static void enable_reset(struct xe_gt *gt)
+{
+	WRITE_ONCE(gt->reset_enabled, true);
+}
+
 static int gt_reset(struct xe_gt *gt)
 {
 	struct xe_device *xe = gt_to_xe(gt);
@@ -617,8 +630,19 @@ static int gt_reset(struct xe_gt *gt)
 	drm_info(&xe->drm, "GT reset started\n");
 
 	xe_gt_sanitize(gt);
-
 	xe_device_mem_access_get(gt_to_xe(gt));
+
+	err = reset_disabled(gt);
+	if (err) {
+		drm_info(&xe->drm, "GT reset is disabled\n");
+
+		/*Enable GT reset for next call if disabled
+		 * for fake reset failure.
+		 */
+		enable_reset(gt);
+		goto err_msg;
+	}
+
 	err = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL);
 	if (err)
 		goto err_msg;
diff --git a/drivers/gpu/drm/xe/xe_gt_debugfs.c b/drivers/gpu/drm/xe/xe_gt_debugfs.c
index c45486c2015a..1c3e673c8c60 100644
--- a/drivers/gpu/drm/xe/xe_gt_debugfs.c
+++ b/drivers/gpu/drm/xe/xe_gt_debugfs.c
@@ -127,6 +127,16 @@ static int register_save_restore(struct seq_file *m, void *data)
 	return 0;
 }
 
+static int fake_reset_failure(struct seq_file *m, void *data)
+{
+	struct xe_gt *gt = node_to_gt(m->private);
+
+	WRITE_ONCE(gt->reset_enabled, false);
+	xe_gt_reset_async(gt);
+
+	return 0;
+}
+
 static const struct drm_info_list debugfs_list[] = {
 	{"hw_engines", hw_engines, 0},
 	{"force_reset", force_reset, 0},
@@ -135,6 +145,7 @@ static const struct drm_info_list debugfs_list[] = {
 	{"steering", steering, 0},
 	{"ggtt", ggtt, 0},
 	{"register-save-restore", register_save_restore, 0},
+	{"fake_reset_failure", fake_reset_failure, 0},
 };
 
 void xe_gt_debugfs_register(struct xe_gt *gt)
diff --git a/drivers/gpu/drm/xe/xe_gt_types.h b/drivers/gpu/drm/xe/xe_gt_types.h
index 7c47d67aa8be..7ec19ad0365d 100644
--- a/drivers/gpu/drm/xe/xe_gt_types.h
+++ b/drivers/gpu/drm/xe/xe_gt_types.h
@@ -175,6 +175,9 @@ struct xe_gt {
 		struct work_struct worker;
 	} reset;
 
+	/** @reset_enabled: GT supports reset */
+	bool reset_enabled;
+
 	/** @tlb_invalidation: TLB invalidation state */
 	struct {
 		/** @seqno: TLB invalidation seqno, protected by CT lock */
-- 
2.25.1



More information about the Intel-xe mailing list