[PATCH] drm/amdgpu: SW part of MES event log enablement

Felix Kuehling felix.kuehling at amd.com
Thu Nov 23 21:33:24 UTC 2023


On 2023-11-23 16:29, Felix Kuehling wrote:
> On 2023-11-23 14:48, shaoyunl wrote:
>> This is the generic SW part, prepare the event log buffer and dump it 
>> through debugfs
>>
>> Signed-off-by: shaoyunl <shaoyun.liu at amd.com>
>
> Reviewed-by: Felix Kuehling <Felix.Kuehling at amd.com>

Sorry, I just realized a potential problem, see inline.


>
>
>> ---
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c |  2 +
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.h |  2 +
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c     | 61 +++++++++++++++++++++
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h     |  5 ++
>>   4 files changed, 70 insertions(+)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c 
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
>> index a53f436fa9f1..8b2cbeae99ca 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
>> @@ -2140,6 +2140,8 @@ int amdgpu_debugfs_init(struct amdgpu_device 
>> *adev)
>>       amdgpu_debugfs_firmware_init(adev);
>>       amdgpu_ta_if_debugfs_init(adev);
>>   +    amdgpu_debugfs_mes_event_log_init(adev);

This always gets initialized, even if the GPU isn't using MES. But the 
log buffer only gets allocated on GPUs that have MES. I think reading 
the log in debugfs on a GPU without MES would cause a kernel oops. You 
either need to add a check for that in ..._event_log_show, or skip the 
debugfs file creation in ..._event_log_init if the GPU doesn't use MES.

Regards,
   Felix


>> +
>>   #if defined(CONFIG_DRM_AMD_DC)
>>       if (adev->dc_enabled)
>>           dtn_debugfs_init(adev);
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.h 
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.h
>> index 371a6f0deb29..0425432d8659 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.h
>> @@ -32,3 +32,5 @@ void amdgpu_debugfs_fini(struct amdgpu_device *adev);
>>   void amdgpu_debugfs_fence_init(struct amdgpu_device *adev);
>>   void amdgpu_debugfs_firmware_init(struct amdgpu_device *adev);
>>   void amdgpu_debugfs_gem_init(struct amdgpu_device *adev);
>> +void amdgpu_debugfs_mes_event_log_init(struct amdgpu_device *adev);
>> +
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c 
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
>> index 45280fb0e00c..b4ba556dc733 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
>> @@ -97,6 +97,26 @@ static int amdgpu_mes_doorbell_init(struct 
>> amdgpu_device *adev)
>>       return 0;
>>   }
>>   +static int amdgpu_mes_event_log_init(struct amdgpu_device *adev)
>> +{
>> +    int r;
>> +
>> +    r = amdgpu_bo_create_kernel(adev, PAGE_SIZE, PAGE_SIZE,
>> +                    AMDGPU_GEM_DOMAIN_GTT,
>> +                    &adev->mes.event_log_gpu_obj,
>> +                    &adev->mes.event_log_gpu_addr,
>> +                    &adev->mes.event_log_cpu_addr);
>> +    if (r) {
>> +        dev_warn(adev->dev, "failed to create MES event log buffer 
>> (%d)", r);
>> +        return r;
>> +    }
>> +
>> +    memset(adev->mes.event_log_cpu_addr, 0, PAGE_SIZE);
>> +
>> +    return  0;
>> +
>> +}
>> +
>>   static void amdgpu_mes_doorbell_free(struct amdgpu_device *adev)
>>   {
>>       bitmap_free(adev->mes.doorbell_bitmap);
>> @@ -181,8 +201,14 @@ int amdgpu_mes_init(struct amdgpu_device *adev)
>>       if (r)
>>           goto error;
>>   +    r = amdgpu_mes_event_log_init(adev);
>> +    if (r)
>> +        goto error_doorbell;
>> +
>>       return 0;
>>   +error_doorbell:
>> +    amdgpu_mes_doorbell_free(adev);
>>   error:
>>       amdgpu_device_wb_free(adev, adev->mes.sch_ctx_offs);
>>       amdgpu_device_wb_free(adev, adev->mes.query_status_fence_offs);
>> @@ -198,6 +224,10 @@ int amdgpu_mes_init(struct amdgpu_device *adev)
>>     void amdgpu_mes_fini(struct amdgpu_device *adev)
>>   {
>> +    amdgpu_bo_free_kernel(&adev->mes.event_log_gpu_obj,
>> +                  &adev->mes.event_log_gpu_addr,
>> +                  &adev->mes.event_log_cpu_addr);
>> +
>>       amdgpu_device_wb_free(adev, adev->mes.sch_ctx_offs);
>>       amdgpu_device_wb_free(adev, adev->mes.query_status_fence_offs);
>>       amdgpu_device_wb_free(adev, adev->mes.read_val_offs);
>> @@ -1483,3 +1513,34 @@ int amdgpu_mes_init_microcode(struct 
>> amdgpu_device *adev, int pipe)
>>       amdgpu_ucode_release(&adev->mes.fw[pipe]);
>>       return r;
>>   }
>> +
>> +#if defined(CONFIG_DEBUG_FS)
>> +
>> +static int amdgpu_debugfs_mes_event_log_show(struct seq_file *m, 
>> void *unused)
>> +{
>> +    struct amdgpu_device *adev = m->private;
>> +    uint32_t *mem = (uint32_t *)(adev->mes.event_log_cpu_addr);
>> +
>> +    seq_hex_dump(m, "", DUMP_PREFIX_OFFSET, 32, 4,
>> +             mem, PAGE_SIZE, false);
>> +
>> +    return 0;
>> +}
>> +
>> +
>> +DEFINE_SHOW_ATTRIBUTE(amdgpu_debugfs_mes_event_log);
>> +
>> +#endif
>> +
>> +void amdgpu_debugfs_mes_event_log_init(struct amdgpu_device *adev)
>> +{
>> +
>> +#if defined(CONFIG_DEBUG_FS)
>> +    struct drm_minor *minor = adev_to_drm(adev)->primary;
>> +    struct dentry *root = minor->debugfs_root;
>> +
>> +    debugfs_create_file("amdgpu_mes_event_log", 0444, root,
>> +                adev, &amdgpu_debugfs_mes_event_log_fops);
>> +
>> +#endif
>> +}
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h 
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
>> index a27b424ffe00..894b9b133000 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
>> @@ -133,6 +133,11 @@ struct amdgpu_mes {
>>       uint32_t            num_mes_dbs;
>>       unsigned long            *doorbell_bitmap;
>>   +    /* MES event log buffer */
>> +    struct amdgpu_bo        *event_log_gpu_obj;
>> +    uint64_t                        event_log_gpu_addr;
>> +    void                *event_log_cpu_addr;
>> +
>>       /* ip specific functions */
>>       const struct amdgpu_mes_funcs   *funcs;
>>   };


More information about the amd-gfx mailing list