[PATCH] drm/amdgpu: SW part of MES event log enablement
Felix Kuehling
felix.kuehling at amd.com
Thu Nov 23 21:33:24 UTC 2023
On 2023-11-23 16:29, Felix Kuehling wrote:
> On 2023-11-23 14:48, shaoyunl wrote:
>> This is the generic SW part, prepare the event log buffer and dump it
>> through debugfs
>>
>> Signed-off-by: shaoyunl <shaoyun.liu at amd.com>
>
> Reviewed-by: Felix Kuehling <Felix.Kuehling at amd.com>
Sorry, I just realized a potential problem, see inline.
>
>
>> ---
>> drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c | 2 +
>> drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.h | 2 +
>> drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c | 61 +++++++++++++++++++++
>> drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h | 5 ++
>> 4 files changed, 70 insertions(+)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
>> index a53f436fa9f1..8b2cbeae99ca 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
>> @@ -2140,6 +2140,8 @@ int amdgpu_debugfs_init(struct amdgpu_device
>> *adev)
>> amdgpu_debugfs_firmware_init(adev);
>> amdgpu_ta_if_debugfs_init(adev);
>> + amdgpu_debugfs_mes_event_log_init(adev);
This always gets initialized, even if the GPU isn't using MES. But the
log buffer only gets allocated on GPUs that have MES. I think reading
the log in debugfs on a GPU without MES would cause a kernel oops. You
either need to add a check for that in ..._event_log_show, or skip the
debugfs file creation in ..._event_log_init if the GPU doesn't use MES.
Regards,
Felix
>> +
>> #if defined(CONFIG_DRM_AMD_DC)
>> if (adev->dc_enabled)
>> dtn_debugfs_init(adev);
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.h
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.h
>> index 371a6f0deb29..0425432d8659 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.h
>> @@ -32,3 +32,5 @@ void amdgpu_debugfs_fini(struct amdgpu_device *adev);
>> void amdgpu_debugfs_fence_init(struct amdgpu_device *adev);
>> void amdgpu_debugfs_firmware_init(struct amdgpu_device *adev);
>> void amdgpu_debugfs_gem_init(struct amdgpu_device *adev);
>> +void amdgpu_debugfs_mes_event_log_init(struct amdgpu_device *adev);
>> +
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
>> index 45280fb0e00c..b4ba556dc733 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
>> @@ -97,6 +97,26 @@ static int amdgpu_mes_doorbell_init(struct
>> amdgpu_device *adev)
>> return 0;
>> }
>> +static int amdgpu_mes_event_log_init(struct amdgpu_device *adev)
>> +{
>> + int r;
>> +
>> + r = amdgpu_bo_create_kernel(adev, PAGE_SIZE, PAGE_SIZE,
>> + AMDGPU_GEM_DOMAIN_GTT,
>> + &adev->mes.event_log_gpu_obj,
>> + &adev->mes.event_log_gpu_addr,
>> + &adev->mes.event_log_cpu_addr);
>> + if (r) {
>> + dev_warn(adev->dev, "failed to create MES event log buffer
>> (%d)", r);
>> + return r;
>> + }
>> +
>> + memset(adev->mes.event_log_cpu_addr, 0, PAGE_SIZE);
>> +
>> + return 0;
>> +
>> +}
>> +
>> static void amdgpu_mes_doorbell_free(struct amdgpu_device *adev)
>> {
>> bitmap_free(adev->mes.doorbell_bitmap);
>> @@ -181,8 +201,14 @@ int amdgpu_mes_init(struct amdgpu_device *adev)
>> if (r)
>> goto error;
>> + r = amdgpu_mes_event_log_init(adev);
>> + if (r)
>> + goto error_doorbell;
>> +
>> return 0;
>> +error_doorbell:
>> + amdgpu_mes_doorbell_free(adev);
>> error:
>> amdgpu_device_wb_free(adev, adev->mes.sch_ctx_offs);
>> amdgpu_device_wb_free(adev, adev->mes.query_status_fence_offs);
>> @@ -198,6 +224,10 @@ int amdgpu_mes_init(struct amdgpu_device *adev)
>> void amdgpu_mes_fini(struct amdgpu_device *adev)
>> {
>> + amdgpu_bo_free_kernel(&adev->mes.event_log_gpu_obj,
>> + &adev->mes.event_log_gpu_addr,
>> + &adev->mes.event_log_cpu_addr);
>> +
>> amdgpu_device_wb_free(adev, adev->mes.sch_ctx_offs);
>> amdgpu_device_wb_free(adev, adev->mes.query_status_fence_offs);
>> amdgpu_device_wb_free(adev, adev->mes.read_val_offs);
>> @@ -1483,3 +1513,34 @@ int amdgpu_mes_init_microcode(struct
>> amdgpu_device *adev, int pipe)
>> amdgpu_ucode_release(&adev->mes.fw[pipe]);
>> return r;
>> }
>> +
>> +#if defined(CONFIG_DEBUG_FS)
>> +
>> +static int amdgpu_debugfs_mes_event_log_show(struct seq_file *m,
>> void *unused)
>> +{
>> + struct amdgpu_device *adev = m->private;
>> + uint32_t *mem = (uint32_t *)(adev->mes.event_log_cpu_addr);
>> +
>> + seq_hex_dump(m, "", DUMP_PREFIX_OFFSET, 32, 4,
>> + mem, PAGE_SIZE, false);
>> +
>> + return 0;
>> +}
>> +
>> +
>> +DEFINE_SHOW_ATTRIBUTE(amdgpu_debugfs_mes_event_log);
>> +
>> +#endif
>> +
>> +void amdgpu_debugfs_mes_event_log_init(struct amdgpu_device *adev)
>> +{
>> +
>> +#if defined(CONFIG_DEBUG_FS)
>> + struct drm_minor *minor = adev_to_drm(adev)->primary;
>> + struct dentry *root = minor->debugfs_root;
>> +
>> + debugfs_create_file("amdgpu_mes_event_log", 0444, root,
>> + adev, &amdgpu_debugfs_mes_event_log_fops);
>> +
>> +#endif
>> +}
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
>> index a27b424ffe00..894b9b133000 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
>> @@ -133,6 +133,11 @@ struct amdgpu_mes {
>> uint32_t num_mes_dbs;
>> unsigned long *doorbell_bitmap;
>> + /* MES event log buffer */
>> + struct amdgpu_bo *event_log_gpu_obj;
>> + uint64_t event_log_gpu_addr;
>> + void *event_log_cpu_addr;
>> +
>> /* ip specific functions */
>> const struct amdgpu_mes_funcs *funcs;
>> };
More information about the amd-gfx
mailing list