[PATCH Review 1/1] drm/amdgpu: Support setting recover method
Stanley.Yang
Stanley.Yang at amd.com
Thu Apr 11 11:11:08 UTC 2024
Don't modify amdgpu gpu recover get operation,
add amdgpu gpu recover set operation to select
reset method, only support mode1 and mode2 currently.
Signed-off-by: Stanley.Yang <Stanley.Yang at amd.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu.h | 3 ++
drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 1 +
drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c | 37 +++++++++++++++++++---
3 files changed, 37 insertions(+), 4 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 9c62552bec34..c82976b2b977 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -1151,6 +1151,9 @@ struct amdgpu_device {
bool debug_largebar;
bool debug_disable_soft_recovery;
bool debug_use_vram_fw_buf;
+
+ /* Used to set gpu reset method */
+ int recover_method;
};
static inline uint32_t amdgpu_ip_version(const struct amdgpu_device *adev,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 3204b8f6edeb..8411a793be18 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -3908,6 +3908,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
else
adev->asic_type = flags & AMD_ASIC_MASK;
+ adev->recover_method = AMD_RESET_METHOD_NONE;
adev->usec_timeout = AMDGPU_MAX_USEC_TIMEOUT;
if (amdgpu_emu_mode == 1)
adev->usec_timeout *= 10;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
index 10832b470448..e388a50d11d9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
@@ -965,9 +965,37 @@ static int gpu_recover_get(void *data, u64 *val)
return 0;
}
+static int gpu_recover_set(void *data, u64 val)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)data;
+ struct drm_device *dev = adev_to_drm(adev);
+ int r;
+
+ /* TODO: support mode1 and mode2 currently */
+ if (val == AMD_RESET_METHOD_MODE1 ||
+ val == AMD_RESET_METHOD_MODE2)
+ adev->recover_method = val;
+ else
+ adev->recover_method = AMD_RESET_METHOD_NONE;
+
+ r = pm_runtime_get_sync(dev->dev);
+ if (r < 0) {
+ pm_runtime_put_autosuspend(dev->dev);
+ return 0;
+ }
+
+ if (amdgpu_reset_domain_schedule(adev->reset_domain, &adev->reset_work))
+ flush_work(&adev->reset_work);
+
+ pm_runtime_mark_last_busy(dev->dev);
+ pm_runtime_put_autosuspend(dev->dev);
+
+ return 0;
+}
+
DEFINE_SHOW_ATTRIBUTE(amdgpu_debugfs_fence_info);
-DEFINE_DEBUGFS_ATTRIBUTE(amdgpu_debugfs_gpu_recover_fops, gpu_recover_get, NULL,
- "%lld\n");
+DEFINE_DEBUGFS_ATTRIBUTE(amdgpu_debugfs_gpu_recover_fops, gpu_recover_get,
+ gpu_recover_set, "%lld\n");
static void amdgpu_debugfs_reset_work(struct work_struct *work)
{
@@ -978,9 +1006,10 @@ static void amdgpu_debugfs_reset_work(struct work_struct *work)
memset(&reset_context, 0, sizeof(reset_context));
- reset_context.method = AMD_RESET_METHOD_NONE;
+ reset_context.method = adev->recover_method;
reset_context.reset_req_dev = adev;
set_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
+ adev->recover_method = AMD_RESET_METHOD_NONE;
amdgpu_device_gpu_recover(adev, NULL, &reset_context);
}
@@ -999,7 +1028,7 @@ void amdgpu_debugfs_fence_init(struct amdgpu_device *adev)
if (!amdgpu_sriov_vf(adev)) {
INIT_WORK(&adev->reset_work, amdgpu_debugfs_reset_work);
- debugfs_create_file("amdgpu_gpu_recover", 0444, root, adev,
+ debugfs_create_file("amdgpu_gpu_recover", 0666, root, adev,
&amdgpu_debugfs_gpu_recover_fops);
}
#endif
--
2.25.1
More information about the amd-gfx
mailing list