[PATCH Review 1/1] drm/amdgpu: Support setting recover method

Stanley.Yang Stanley.Yang at amd.com
Thu Apr 11 11:11:08 UTC 2024


Don't modify amdgpu gpu recover get operation,
add amdgpu gpu recover set operation to select
reset method, only support mode1 and mode2 currently.

Signed-off-by: Stanley.Yang <Stanley.Yang at amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h        |  3 ++
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c |  1 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c  | 37 +++++++++++++++++++---
 3 files changed, 37 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 9c62552bec34..c82976b2b977 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -1151,6 +1151,9 @@ struct amdgpu_device {
 	bool                            debug_largebar;
 	bool                            debug_disable_soft_recovery;
 	bool                            debug_use_vram_fw_buf;
+
+	/* Used to set gpu reset method */
+	int                             recover_method;
 };
 
 static inline uint32_t amdgpu_ip_version(const struct amdgpu_device *adev,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 3204b8f6edeb..8411a793be18 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -3908,6 +3908,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
 	else
 		adev->asic_type = flags & AMD_ASIC_MASK;
 
+	adev->recover_method = AMD_RESET_METHOD_NONE;
 	adev->usec_timeout = AMDGPU_MAX_USEC_TIMEOUT;
 	if (amdgpu_emu_mode == 1)
 		adev->usec_timeout *= 10;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
index 10832b470448..e388a50d11d9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
@@ -965,9 +965,37 @@ static int gpu_recover_get(void *data, u64 *val)
 	return 0;
 }
 
+static int gpu_recover_set(void *data, u64 val)
+{
+	struct amdgpu_device *adev = (struct amdgpu_device *)data;
+	struct drm_device *dev = adev_to_drm(adev);
+	int r;
+
+	/* TODO: support mode1 and mode2 currently */
+	if (val == AMD_RESET_METHOD_MODE1 ||
+		val == AMD_RESET_METHOD_MODE2)
+		adev->recover_method = val;
+	else
+		adev->recover_method = AMD_RESET_METHOD_NONE;
+
+	r = pm_runtime_get_sync(dev->dev);
+	if (r < 0) {
+		pm_runtime_put_autosuspend(dev->dev);
+		return 0;
+	}
+
+	if (amdgpu_reset_domain_schedule(adev->reset_domain, &adev->reset_work))
+		flush_work(&adev->reset_work);
+
+	pm_runtime_mark_last_busy(dev->dev);
+	pm_runtime_put_autosuspend(dev->dev);
+
+	return 0;
+}
+
 DEFINE_SHOW_ATTRIBUTE(amdgpu_debugfs_fence_info);
-DEFINE_DEBUGFS_ATTRIBUTE(amdgpu_debugfs_gpu_recover_fops, gpu_recover_get, NULL,
-			 "%lld\n");
+DEFINE_DEBUGFS_ATTRIBUTE(amdgpu_debugfs_gpu_recover_fops, gpu_recover_get,
+			 gpu_recover_set, "%lld\n");
 
 static void amdgpu_debugfs_reset_work(struct work_struct *work)
 {
@@ -978,9 +1006,10 @@ static void amdgpu_debugfs_reset_work(struct work_struct *work)
 
 	memset(&reset_context, 0, sizeof(reset_context));
 
-	reset_context.method = AMD_RESET_METHOD_NONE;
+	reset_context.method = adev->recover_method;
 	reset_context.reset_req_dev = adev;
 	set_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
+	adev->recover_method = AMD_RESET_METHOD_NONE;
 
 	amdgpu_device_gpu_recover(adev, NULL, &reset_context);
 }
@@ -999,7 +1028,7 @@ void amdgpu_debugfs_fence_init(struct amdgpu_device *adev)
 	if (!amdgpu_sriov_vf(adev)) {
 
 		INIT_WORK(&adev->reset_work, amdgpu_debugfs_reset_work);
-		debugfs_create_file("amdgpu_gpu_recover", 0444, root, adev,
+		debugfs_create_file("amdgpu_gpu_recover", 0666, root, adev,
 				    &amdgpu_debugfs_gpu_recover_fops);
 	}
 #endif
-- 
2.25.1



More information about the amd-gfx mailing list