[PATCH v5 3/5] drm/amdgpu: Encapsulate all device reset info

Shashank Sharma shashank.sharma at amd.com
Mon Aug 21 14:21:21 UTC 2023


On 17/08/2023 20:20, André Almeida wrote:
> To better organize struct amdgpu_device, keep all reset information
> related fields together in a separated struct.
>
> Signed-off-by: André Almeida <andrealmeid at igalia.com>
> ---
> v5: new patch, as requested by Shashank Sharma
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu.h         | 34 +++++++++++++--------
>   drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c | 10 +++---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_device.c  | 16 +++++-----
>   3 files changed, 34 insertions(+), 26 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> index 0d560b713948..56d78ca6e917 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> @@ -781,6 +781,26 @@ struct amdgpu_mqd {
>   #define AMDGPU_PRODUCT_NAME_LEN 64
>   struct amdgpu_reset_domain;
>   
> +#ifdef CONFIG_DEV_COREDUMP
> +struct amdgpu_coredump_info {
> +	struct amdgpu_device	*adev;
> +	struct amdgpu_task_info reset_task_info;
> +	struct timespec64	reset_time;
> +	bool			reset_vram_lost;
> +};
> +#endif
> +
> +struct amdgpu_reset_info {
> +	/* reset dump register */
> +	u32 *reset_dump_reg_list;
> +	u32 *reset_dump_reg_value;
> +	int num_regs;
> +
> +#ifdef CONFIG_DEV_COREDUMP
> +	struct amdgpu_coredump_info *coredump_info;
> +#endif
> +};
> +
>   /*
>    * Non-zero (true) if the GPU has VRAM. Zero (false) otherwise.
>    */
> @@ -1084,10 +1104,7 @@ struct amdgpu_device {
>   
>   	struct mutex			benchmark_mutex;
>   
> -	/* reset dump register */
> -	uint32_t                        *reset_dump_reg_list;
> -	uint32_t			*reset_dump_reg_value;
> -	int                             num_regs;
> +	struct amdgpu_reset_info	reset_info;
>   
>   	bool                            scpm_enabled;
>   	uint32_t                        scpm_status;
> @@ -1100,15 +1117,6 @@ struct amdgpu_device {
>   	uint32_t			aid_mask;
>   };
>   
> -#ifdef CONFIG_DEV_COREDUMP
> -struct amdgpu_coredump_info {
> -	struct amdgpu_device		*adev;
> -	struct amdgpu_task_info         reset_task_info;
> -	struct timespec64               reset_time;
> -	bool                            reset_vram_lost;
> -};
> -#endif
> -
>   static inline struct amdgpu_device *drm_to_adev(struct drm_device *ddev)
>   {
>   	return container_of(ddev, struct amdgpu_device, ddev);
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
> index a4faea4fa0b5..3136a0774dd9 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
> @@ -2016,8 +2016,8 @@ static ssize_t amdgpu_reset_dump_register_list_read(struct file *f,
>   	if (ret)
>   		return ret;
>   
> -	for (i = 0; i < adev->num_regs; i++) {
> -		sprintf(reg_offset, "0x%x\n", adev->reset_dump_reg_list[i]);
> +	for (i = 0; i < adev->reset_info.num_regs; i++) {
> +		sprintf(reg_offset, "0x%x\n", adev->reset_info.reset_dump_reg_list[i]);
>   		up_read(&adev->reset_domain->sem);
>   		if (copy_to_user(buf + len, reg_offset, strlen(reg_offset)))
>   			return -EFAULT;
> @@ -2074,9 +2074,9 @@ static ssize_t amdgpu_reset_dump_register_list_write(struct file *f,
>   	if (ret)
>   		goto error_free;
>   
> -	swap(adev->reset_dump_reg_list, tmp);
> -	swap(adev->reset_dump_reg_value, new);
> -	adev->num_regs = i;
> +	swap(adev->reset_info.reset_dump_reg_list, tmp);
> +	swap(adev->reset_info.reset_dump_reg_value, new);
> +	adev->reset_info.num_regs = i;
>   	up_write(&adev->reset_domain->sem);
>   	ret = size;
>   
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> index b5b879bcc5c9..96975591841d 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> @@ -4790,10 +4790,10 @@ static int amdgpu_reset_reg_dumps(struct amdgpu_device *adev)
>   
>   	lockdep_assert_held(&adev->reset_domain->sem);
>   
> -	for (i = 0; i < adev->num_regs; i++) {
> -		adev->reset_dump_reg_value[i] = RREG32(adev->reset_dump_reg_list[i]);
> -		trace_amdgpu_reset_reg_dumps(adev->reset_dump_reg_list[i],
> -					     adev->reset_dump_reg_value[i]);
> +	for (i = 0; i < adev->reset_info.num_regs; i++) {
> +		adev->reset_info.reset_dump_reg_value[i] = RREG32(adev->reset_info.reset_dump_reg_list[i]);
> +		trace_amdgpu_reset_reg_dumps(adev->reset_info.reset_dump_reg_list[i],
> +					     adev->reset_info.reset_dump_reg_value[i]);
>   	}
>   
>   	return 0;
> @@ -4831,13 +4831,13 @@ static ssize_t amdgpu_devcoredump_read(char *buffer, loff_t offset,
>   
>   	if (coredump->reset_vram_lost)
>   		drm_printf(&p, "VRAM is lost due to GPU reset!\n");
> -	if (coredump->adev->num_regs) {
> +	if (coredump->adev->reset_info.num_regs) {
>   		drm_printf(&p, "AMDGPU register dumps:\nOffset:     Value:\n");
>   
> -		for (i = 0; i < coredump->adev->num_regs; i++)
> +		for (i = 0; i < coredump->adev->reset_info.num_regs; i++)
>   			drm_printf(&p, "0x%08x: 0x%08x\n",
> -				   coredump->adev->reset_dump_reg_list[i],
> -				   coredump->adev->reset_dump_reg_value[i]);
> +				   coredump->adev->reset_info.reset_dump_reg_list[i],
> +				   coredump->adev->reset_info.reset_dump_reg_value[i]);
>   	}

Reviewed-by: Shashank Sharma <shashank.sharma at amd.com>

- Shashank
>   
>   	return count - iter.remain;


More information about the amd-gfx mailing list