[PATCH 1/2] drm/amd/pm: Add STB dump function.

Lazar, Lijo lijo.lazar at amd.com
Thu Mar 10 05:17:43 UTC 2022



On 3/10/2022 2:33 AM, Andrey Grodzovsky wrote:
> It will be used during GPU reset.
> 
> Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky at amd.com>
> ---
>   drivers/gpu/drm/amd/pm/amdgpu_dpm.c           | 10 +++++++
>   drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h       |  3 +++
>   drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c     | 26 +++++++++++++++++++
>   drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h |  2 ++
>   4 files changed, 41 insertions(+)
> 
> diff --git a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c
> index 1d63f1e8884c..815a6367d834 100644
> --- a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c
> +++ b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c
> @@ -1327,6 +1327,16 @@ void amdgpu_dpm_stb_debug_fs_init(struct amdgpu_device *adev)
>   	amdgpu_smu_stb_debug_fs_init(adev);
>   }
>   
> +void amdgpu_dpm_stb_dump(struct amdgpu_device *adev)
> +{
> +	struct smu_context *smu = adev->powerplay.pp_handle;
> +
> +	if (!is_support_sw_smu(adev))
> +		return;
> +
> +	smu_stb_dump(smu);
> +}
> +
>   int amdgpu_dpm_display_configuration_change(struct amdgpu_device *adev,
>   					    const struct amd_pp_display_configuration *input)
>   {
> diff --git a/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h b/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h
> index ddfa55b59d02..99351d463a72 100644
> --- a/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h
> +++ b/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h
> @@ -503,6 +503,7 @@ int amdgpu_dpm_set_pp_table(struct amdgpu_device *adev,
>   			    size_t size);
>   int amdgpu_dpm_get_num_cpu_cores(struct amdgpu_device *adev);
>   void amdgpu_dpm_stb_debug_fs_init(struct amdgpu_device *adev);
> +void amdgpu_dpm_stb_dump(struct amdgpu_device *adev);
>   int amdgpu_dpm_display_configuration_change(struct amdgpu_device *adev,
>   					    const struct amd_pp_display_configuration *input);
>   int amdgpu_dpm_get_clock_by_type(struct amdgpu_device *adev,
> @@ -540,4 +541,6 @@ enum pp_smu_status amdgpu_dpm_get_uclk_dpm_states(struct amdgpu_device *adev,
>   						  unsigned int *num_states);
>   int amdgpu_dpm_get_dpm_clock_table(struct amdgpu_device *adev,
>   				   struct dpm_clocks *clock_table);
> +
> +
>   #endif
> diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
> index 7e79a67bb8ef..aff0ed9b6f9a 100644
> --- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
> +++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
> @@ -2958,6 +2958,32 @@ int smu_stb_collect_info(struct smu_context *smu, void *buf, uint32_t size)
>   	return smu->ppt_funcs->stb_collect_info(smu, buf, size);
>   }
>   
> +void smu_stb_dump(struct smu_context *smu)
> +{
> +	unsigned char *buf;
> +
> +	/* STB is disabled */
> +	if (!drm_debug_enabled(DRM_UT_DRIVER) || !smu->stb_context.enabled)
> +		return;
> +
> +	buf = kvmalloc_array(smu->stb_context.stb_buf_size, sizeof(*buf), GFP_KERNEL);
> +	if (!buf)
> +		return;
> +
> +	if (smu_stb_collect_info(smu, buf, smu->stb_context.stb_buf_size))
> +		goto out;
> +
> +	DRM_DEV_DEBUG_DRIVER(smu->adev->dev, "START PRINT STB DUMP");
> +	print_hex_dump(KERN_INFO, "data: ", DUMP_PREFIX_NONE,
> +		       4, 4, buf, smu->stb_context.stb_buf_size, false);

Printing to kernel log with each reset doesn't look great. It makes it 
difficult to associate this with a corresponding register dump. Instead, 
it is better to have a buffer accepted from user through debugfs and 
copy the data there for each reset. The app may keep the reset data for 
a particular reset together at one place.

Thanks,
Lijo

> +	DRM_DEV_DEBUG_DRIVER(smu->adev->dev, "END PRINT STB DUMP");
> +
> +	return;
> +
> +out:
> +	kvfree(buf);
> +}
> +
>   #if defined(CONFIG_DEBUG_FS)
>   
>   static int smu_stb_debugfs_open(struct inode *inode, struct file *filp)
> diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
> index fbef3ab8d487..991586f61a85 100644
> --- a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
> +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
> @@ -1426,6 +1426,8 @@ int smu_wait_for_event(struct smu_context *smu, enum smu_event_type event,
>   		       uint64_t event_arg);
>   int smu_get_ecc_info(struct smu_context *smu, void *umc_ecc);
>   int smu_stb_collect_info(struct smu_context *smu, void *buff, uint32_t size);
> +void smu_stb_dump(struct smu_context *smu);
> +
>   void amdgpu_smu_stb_debug_fs_init(struct amdgpu_device *adev);
>   int smu_send_hbm_bad_pages_num(struct smu_context *smu, uint32_t size);
>   #endif
> 


More information about the amd-gfx mailing list