[PATCH] drm/amdgpu: SW part of MES event log enablement
Felix Kuehling
felix.kuehling at amd.com
Thu Nov 23 19:27:43 UTC 2023
On 2023-11-23 14:12, shaoyunl wrote:
> This is the generic SW part, prepare the event log buffer and dump it through debugfs
>
> Signed-off-by: shaoyunl <shaoyun.liu at amd.com>
> ---
> drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c | 2 +
> drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.h | 2 +
> drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c | 61 +++++++++++++++++++++
> drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h | 5 ++
> 4 files changed, 70 insertions(+)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
> index a53f436fa9f1..8b2cbeae99ca 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
> @@ -2140,6 +2140,8 @@ int amdgpu_debugfs_init(struct amdgpu_device *adev)
> amdgpu_debugfs_firmware_init(adev);
> amdgpu_ta_if_debugfs_init(adev);
>
> + amdgpu_debugfs_mes_event_log_init(adev);
> +
> #if defined(CONFIG_DRM_AMD_DC)
> if (adev->dc_enabled)
> dtn_debugfs_init(adev);
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.h
> index 371a6f0deb29..0425432d8659 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.h
> @@ -32,3 +32,5 @@ void amdgpu_debugfs_fini(struct amdgpu_device *adev);
> void amdgpu_debugfs_fence_init(struct amdgpu_device *adev);
> void amdgpu_debugfs_firmware_init(struct amdgpu_device *adev);
> void amdgpu_debugfs_gem_init(struct amdgpu_device *adev);
> +void amdgpu_debugfs_mes_event_log_init(struct amdgpu_device *adev);
> +
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
> index 45280fb0e00c..b7af24d7db0e 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
> @@ -97,6 +97,26 @@ static int amdgpu_mes_doorbell_init(struct amdgpu_device *adev)
> return 0;
> }
>
> +static int amdgpu_mes_event_log_init(struct amdgpu_device *adev)
> +{
> + int r;
> +
> + r = amdgpu_bo_create_kernel(adev, PAGE_SIZE, PAGE_SIZE,
> + AMDGPU_GEM_DOMAIN_GTT,
> + &adev->mes.event_log_gpu_obj,
> + &adev->mes.event_log_gpu_addr,
> + &adev->mes.event_log_cpu_addr);
> + if (r) {
> + dev_warn(adev->dev, "failed to create MES event log buffer (%d)", r);
> + return r;
> + }
> +
> + memset(adev->mes.event_log_cpu_addr, 0, PAGE_SIZE);
> +
> + return 0;
> +
> +}
> +
> static void amdgpu_mes_doorbell_free(struct amdgpu_device *adev)
> {
> bitmap_free(adev->mes.doorbell_bitmap);
> @@ -181,6 +201,12 @@ int amdgpu_mes_init(struct amdgpu_device *adev)
> if (r)
> goto error;
>
> + r = amdgpu_mes_event_log_init(adev);
> + if (r) {
> + amdgpu_mes_doorbell_free(adev);
> + goto error;
The usual preferred way of goto-error handling would be to add another
error label and do all the cleanup in reverse. Then just jump to the
correct error label depending on where the error happened. So here you
would goto error_doorbell. See below.
> + }
> +
> return 0;
>
So you'd create another error label here to handle the doorbell cleanup:
error_doorbell:
amdgpu_mes_doorbell_free(adev);
With that fixed, the patch is
Reviewed-by: Felix Kuehling <Felix.Kuehling at amd.com>
> error:
> @@ -198,6 +224,10 @@ int amdgpu_mes_init(struct amdgpu_device *adev)
>
> void amdgpu_mes_fini(struct amdgpu_device *adev)
> {
> + amdgpu_bo_free_kernel(&adev->mes.event_log_gpu_obj,
> + &adev->mes.event_log_gpu_addr,
> + &adev->mes.event_log_cpu_addr);
> +
> amdgpu_device_wb_free(adev, adev->mes.sch_ctx_offs);
> amdgpu_device_wb_free(adev, adev->mes.query_status_fence_offs);
> amdgpu_device_wb_free(adev, adev->mes.read_val_offs);
> @@ -1483,3 +1513,34 @@ int amdgpu_mes_init_microcode(struct amdgpu_device *adev, int pipe)
> amdgpu_ucode_release(&adev->mes.fw[pipe]);
> return r;
> }
> +
> +#if defined(CONFIG_DEBUG_FS)
> +
> +static int amdgpu_debugfs_mes_event_log_show(struct seq_file *m, void *unused)
> +{
> + struct amdgpu_device *adev = m->private;
> + uint32_t *mem = (uint32_t *)(adev->mes.event_log_cpu_addr);
> +
> + seq_hex_dump(m, "", DUMP_PREFIX_OFFSET, 32, 4,
> + mem, PAGE_SIZE, false);
> +
> + return 0;
> +}
> +
> +
> +DEFINE_SHOW_ATTRIBUTE(amdgpu_debugfs_mes_event_log);
> +
> +#endif
> +
> +void amdgpu_debugfs_mes_event_log_init(struct amdgpu_device *adev)
> +{
> +
> +#if defined(CONFIG_DEBUG_FS)
> + struct drm_minor *minor = adev_to_drm(adev)->primary;
> + struct dentry *root = minor->debugfs_root;
> +
> + debugfs_create_file("amdgpu_mes_event_log", 0444, root,
> + adev, &amdgpu_debugfs_mes_event_log_fops);
> +
> +#endif
> +}
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
> index a27b424ffe00..894b9b133000 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
> @@ -133,6 +133,11 @@ struct amdgpu_mes {
> uint32_t num_mes_dbs;
> unsigned long *doorbell_bitmap;
>
> + /* MES event log buffer */
> + struct amdgpu_bo *event_log_gpu_obj;
> + uint64_t event_log_gpu_addr;
> + void *event_log_cpu_addr;
> +
> /* ip specific functions */
> const struct amdgpu_mes_funcs *funcs;
> };
More information about the amd-gfx
mailing list