[PATCH Review 1/1] drm/amdgpu: Support setting recover method

Christian König ckoenig.leichtzumerken at gmail.com
Thu Apr 11 11:17:09 UTC 2024


Am 11.04.24 um 13:11 schrieb Stanley.Yang:
> Don't modify amdgpu gpu recover get operation,
> add amdgpu gpu recover set operation to select
> reset method, only support mode1 and mode2 currently.

Well I don't think setting this from userspace is valid.

The reset method to use is determined by the hardware and environment 
(e.g. SRIOV, passthrough, whatever) and can't be chosen simply.

Regards,
Christian.

>
> Signed-off-by: Stanley.Yang <Stanley.Yang at amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu.h        |  3 ++
>   drivers/gpu/drm/amd/amdgpu/amdgpu_device.c |  1 +
>   drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c  | 37 +++++++++++++++++++---
>   3 files changed, 37 insertions(+), 4 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> index 9c62552bec34..c82976b2b977 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> @@ -1151,6 +1151,9 @@ struct amdgpu_device {
>   	bool                            debug_largebar;
>   	bool                            debug_disable_soft_recovery;
>   	bool                            debug_use_vram_fw_buf;
> +
> +	/* Used to set gpu reset method */
> +	int                             recover_method;
>   };
>   
>   static inline uint32_t amdgpu_ip_version(const struct amdgpu_device *adev,
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> index 3204b8f6edeb..8411a793be18 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> @@ -3908,6 +3908,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
>   	else
>   		adev->asic_type = flags & AMD_ASIC_MASK;
>   
> +	adev->recover_method = AMD_RESET_METHOD_NONE;
>   	adev->usec_timeout = AMDGPU_MAX_USEC_TIMEOUT;
>   	if (amdgpu_emu_mode == 1)
>   		adev->usec_timeout *= 10;
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
> index 10832b470448..e388a50d11d9 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
> @@ -965,9 +965,37 @@ static int gpu_recover_get(void *data, u64 *val)
>   	return 0;
>   }
>   
> +static int gpu_recover_set(void *data, u64 val)
> +{
> +	struct amdgpu_device *adev = (struct amdgpu_device *)data;
> +	struct drm_device *dev = adev_to_drm(adev);
> +	int r;
> +
> +	/* TODO: support mode1 and mode2 currently */
> +	if (val == AMD_RESET_METHOD_MODE1 ||
> +		val == AMD_RESET_METHOD_MODE2)
> +		adev->recover_method = val;
> +	else
> +		adev->recover_method = AMD_RESET_METHOD_NONE;
> +
> +	r = pm_runtime_get_sync(dev->dev);
> +	if (r < 0) {
> +		pm_runtime_put_autosuspend(dev->dev);
> +		return 0;
> +	}
> +
> +	if (amdgpu_reset_domain_schedule(adev->reset_domain, &adev->reset_work))
> +		flush_work(&adev->reset_work);
> +
> +	pm_runtime_mark_last_busy(dev->dev);
> +	pm_runtime_put_autosuspend(dev->dev);
> +
> +	return 0;
> +}
> +
>   DEFINE_SHOW_ATTRIBUTE(amdgpu_debugfs_fence_info);
> -DEFINE_DEBUGFS_ATTRIBUTE(amdgpu_debugfs_gpu_recover_fops, gpu_recover_get, NULL,
> -			 "%lld\n");
> +DEFINE_DEBUGFS_ATTRIBUTE(amdgpu_debugfs_gpu_recover_fops, gpu_recover_get,
> +			 gpu_recover_set, "%lld\n");
>   
>   static void amdgpu_debugfs_reset_work(struct work_struct *work)
>   {
> @@ -978,9 +1006,10 @@ static void amdgpu_debugfs_reset_work(struct work_struct *work)
>   
>   	memset(&reset_context, 0, sizeof(reset_context));
>   
> -	reset_context.method = AMD_RESET_METHOD_NONE;
> +	reset_context.method = adev->recover_method;
>   	reset_context.reset_req_dev = adev;
>   	set_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
> +	adev->recover_method = AMD_RESET_METHOD_NONE;
>   
>   	amdgpu_device_gpu_recover(adev, NULL, &reset_context);
>   }
> @@ -999,7 +1028,7 @@ void amdgpu_debugfs_fence_init(struct amdgpu_device *adev)
>   	if (!amdgpu_sriov_vf(adev)) {
>   
>   		INIT_WORK(&adev->reset_work, amdgpu_debugfs_reset_work);
> -		debugfs_create_file("amdgpu_gpu_recover", 0444, root, adev,
> +		debugfs_create_file("amdgpu_gpu_recover", 0666, root, adev,
>   				    &amdgpu_debugfs_gpu_recover_fops);
>   	}
>   #endif



More information about the amd-gfx mailing list