[PATCH] drm/amdgpu: SW part of MES event log enablement

Felix Kuehling felix.kuehling at amd.com
Thu Nov 23 19:27:43 UTC 2023


On 2023-11-23 14:12, shaoyunl wrote:
> This is the generic SW part, prepare the event log buffer and dump it through debugfs
>
> Signed-off-by: shaoyunl <shaoyun.liu at amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c |  2 +
>   drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.h |  2 +
>   drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c     | 61 +++++++++++++++++++++
>   drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h     |  5 ++
>   4 files changed, 70 insertions(+)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
> index a53f436fa9f1..8b2cbeae99ca 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
> @@ -2140,6 +2140,8 @@ int amdgpu_debugfs_init(struct amdgpu_device *adev)
>   	amdgpu_debugfs_firmware_init(adev);
>   	amdgpu_ta_if_debugfs_init(adev);
>   
> +	amdgpu_debugfs_mes_event_log_init(adev);
> +
>   #if defined(CONFIG_DRM_AMD_DC)
>   	if (adev->dc_enabled)
>   		dtn_debugfs_init(adev);
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.h
> index 371a6f0deb29..0425432d8659 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.h
> @@ -32,3 +32,5 @@ void amdgpu_debugfs_fini(struct amdgpu_device *adev);
>   void amdgpu_debugfs_fence_init(struct amdgpu_device *adev);
>   void amdgpu_debugfs_firmware_init(struct amdgpu_device *adev);
>   void amdgpu_debugfs_gem_init(struct amdgpu_device *adev);
> +void amdgpu_debugfs_mes_event_log_init(struct amdgpu_device *adev);
> +
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
> index 45280fb0e00c..b7af24d7db0e 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
> @@ -97,6 +97,26 @@ static int amdgpu_mes_doorbell_init(struct amdgpu_device *adev)
>   	return 0;
>   }
>   
> +static int amdgpu_mes_event_log_init(struct amdgpu_device *adev)
> +{
> +	int r;
> +
> +	r = amdgpu_bo_create_kernel(adev, PAGE_SIZE, PAGE_SIZE,
> +				    AMDGPU_GEM_DOMAIN_GTT,
> +				    &adev->mes.event_log_gpu_obj,
> +				    &adev->mes.event_log_gpu_addr,
> +				    &adev->mes.event_log_cpu_addr);
> +	if (r) {
> +		dev_warn(adev->dev, "failed to create MES event log buffer (%d)", r);
> +		return r;
> +	}
> +
> +	memset(adev->mes.event_log_cpu_addr, 0, PAGE_SIZE);
> +
> +	return  0;
> +
> +}
> +
>   static void amdgpu_mes_doorbell_free(struct amdgpu_device *adev)
>   {
>   	bitmap_free(adev->mes.doorbell_bitmap);
> @@ -181,6 +201,12 @@ int amdgpu_mes_init(struct amdgpu_device *adev)
>   	if (r)
>   		goto error;
>   
> +	r = amdgpu_mes_event_log_init(adev);
> +	if (r) {
> +		amdgpu_mes_doorbell_free(adev);
> +		goto error;

The usual preferred way of goto-error handling would be to add another 
error label and do all the cleanup in reverse. Then just jump to the 
correct error label depending on where the error happened. So here you 
would goto error_doorbell. See below.


> +	}
> +
>   	return 0;
>   

So you'd create another error label here to handle the doorbell cleanup:

error_doorbell:
	amdgpu_mes_doorbell_free(adev);

With that fixed, the patch is

Reviewed-by: Felix Kuehling <Felix.Kuehling at amd.com>


>   error:
> @@ -198,6 +224,10 @@ int amdgpu_mes_init(struct amdgpu_device *adev)
>   
>   void amdgpu_mes_fini(struct amdgpu_device *adev)
>   {
> +	amdgpu_bo_free_kernel(&adev->mes.event_log_gpu_obj,
> +			      &adev->mes.event_log_gpu_addr,
> +			      &adev->mes.event_log_cpu_addr);
> +
>   	amdgpu_device_wb_free(adev, adev->mes.sch_ctx_offs);
>   	amdgpu_device_wb_free(adev, adev->mes.query_status_fence_offs);
>   	amdgpu_device_wb_free(adev, adev->mes.read_val_offs);
> @@ -1483,3 +1513,34 @@ int amdgpu_mes_init_microcode(struct amdgpu_device *adev, int pipe)
>   	amdgpu_ucode_release(&adev->mes.fw[pipe]);
>   	return r;
>   }
> +
> +#if defined(CONFIG_DEBUG_FS)
> +
> +static int amdgpu_debugfs_mes_event_log_show(struct seq_file *m, void *unused)
> +{
> +	struct amdgpu_device *adev = m->private;
> +	uint32_t *mem = (uint32_t *)(adev->mes.event_log_cpu_addr);
> +
> +	seq_hex_dump(m, "", DUMP_PREFIX_OFFSET, 32, 4,
> +		     mem, PAGE_SIZE, false);
> +
> +	return 0;
> +}
> +
> +
> +DEFINE_SHOW_ATTRIBUTE(amdgpu_debugfs_mes_event_log);
> +
> +#endif
> +
> +void amdgpu_debugfs_mes_event_log_init(struct amdgpu_device *adev)
> +{
> +
> +#if defined(CONFIG_DEBUG_FS)
> +	struct drm_minor *minor = adev_to_drm(adev)->primary;
> +	struct dentry *root = minor->debugfs_root;
> +
> +	debugfs_create_file("amdgpu_mes_event_log", 0444, root,
> +			    adev, &amdgpu_debugfs_mes_event_log_fops);
> +
> +#endif
> +}
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
> index a27b424ffe00..894b9b133000 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
> @@ -133,6 +133,11 @@ struct amdgpu_mes {
>   	uint32_t			num_mes_dbs;
>   	unsigned long			*doorbell_bitmap;
>   
> +	/* MES event log buffer */
> +	struct amdgpu_bo		*event_log_gpu_obj;
> +	uint64_t                        event_log_gpu_addr;
> +	void				*event_log_cpu_addr;
> +
>   	/* ip specific functions */
>   	const struct amdgpu_mes_funcs   *funcs;
>   };


More information about the amd-gfx mailing list