[PATCH v2 2/9] drm/amdgpu: Add helper funcs for jpeg devcoredump

Sundararaju, Sathishkumar sasundar at amd.com
Wed Jan 29 11:33:49 UTC 2025




On 1/29/2025 4:15 PM, Lazar, Lijo wrote:
>
> On 1/29/2025 3:51 PM, Sundararaju, Sathishkumar wrote:
>>
>>
>> On 1/29/2025 3:20 PM, Lazar, Lijo wrote:
>>> On 1/29/2025 2:16 PM, Sathishkumar S wrote:
>>>> Add devcoredump helper functions that can be reused for all jpeg
>>>> versions.
>>>>
>>>> V2: (Lijo)
>>>>    - add amdgpu_jpeg_reg_dump_init() and amdgpu_jpeg_reg_dump_fini()
>>>>    - use reg_list and reg_count from init() to dump and print registers
>>>>    - memory allocation and freeing is moved to the init() and fini()
>>>>
>>>> Signed-off-by: Sathishkumar S <sathishkumar.sundararaju at amd.com>
>>>> ---
>>>>    drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.c | 80 ++++++++++++++++++++++++
>>>>    drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.h | 10 +++
>>>>    2 files changed, 90 insertions(+)
>>>>
>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.c b/drivers/gpu/
>>>> drm/amd/amdgpu/amdgpu_jpeg.c
>>>> index b6d2eb049f54..0f9d81e27973 100644
>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.c
>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.c
>>>> @@ -452,3 +452,83 @@ void amdgpu_jpeg_sysfs_reset_mask_fini(struct
>>>> amdgpu_device *adev)
>>>>                device_remove_file(adev->dev, &dev_attr_jpeg_reset_mask);
>>>>        }
>>>>    }
>>>> +
>>>> +int amdgpu_jpeg_reg_dump_init(struct amdgpu_device *adev,
>>>> +                   const struct amdgpu_hwip_reg_entry *reg, u32 count)
>>>> +{
>>>> +    adev->jpeg.ip_dump = kcalloc(adev->jpeg.num_jpeg_inst * count,
>>>> +                     sizeof(uint32_t), GFP_KERNEL);
>>>> +    if (!adev->jpeg.ip_dump) {
>>>> +        DRM_ERROR("Failed to allocate memory for JPEG IP Dump\n");
>>>> +        return -ENOMEM;
>>>> +    }
>>>> +    adev->jpeg.reg_list = reg;
>>>> +    adev->jpeg.reg_count = count;
>>>> +
>>>> +    return 0;
>>>> +}
>>>> +
>>>> +void amdgpu_jpeg_reg_dump_fini(struct amdgpu_device *adev)
>>>> +{
>>>> +    kfree(adev->jpeg.ip_dump);
>>>> +    adev->jpeg.reg_list = NULL;
>>>> +    adev->jpeg.reg_count = 0;
>>>> +}
>>>> +
>>>> +void amdgpu_jpeg_dump_ip_state(struct amdgpu_ip_block *ip_block)
>>>> +{
>>>> +    struct amdgpu_device *adev = ip_block->adev;
>>>> +    u32 inst_off, inst_id, is_powered;
>>>> +    int i, j;
>>>> +
>>>> +    if (!adev->jpeg.ip_dump)
>>>> +        return;
>>>> +
>>>> +    for (i = 0; i < adev->jpeg.num_jpeg_inst; i++) {
>>>> +        if (adev->jpeg.harvest_config & (1 << i))
>>>> +            continue;
>>>> +
>>>> +        inst_id = GET_INST(JPEG, i);
>>>> +        inst_off = i * adev->jpeg.reg_count;
>>>> +        /* check power status from UVD_JPEG_POWER_STATUS */
>>>> +        adev->jpeg.ip_dump[inst_off] =
>>>> +            RREG32(SOC15_REG_ENTRY_OFFSET_INST(adev->jpeg.reg_list[0],
>>>> +                               inst_id));
>>>> +        is_powered = ((adev->jpeg.ip_dump[inst_off] & 0x1) != 1);
>>>> +
>>>> +        if (is_powered)
>>>> +            for (j = 1; j < adev->jpeg.reg_count; j++)
>>>> +                adev->jpeg.ip_dump[inst_off + j] =
>>>> +                    RREG32(SOC15_REG_ENTRY_OFFSET_INST(adev-
>>>>> jpeg.reg_list[j],
>>>> +                                       inst_id));
>>>> +    }
>>>> +}
>>>> +
>>>> +void amdgpu_jpeg_print_ip_state(struct amdgpu_ip_block *ip_block,
>>>> struct drm_printer *p)
>>>> +{
>>>> +    struct amdgpu_device *adev = ip_block->adev;
>>>> +    u32 inst_off, is_powered;
>>>> +    int i, j;
>>>> +
>>>> +    if (!adev->jpeg.ip_dump)
>>>> +        return;
>>>> +
>>>> +    drm_printf(p, "num_instances:%d\n", adev->jpeg.num_jpeg_inst);
>>>> +    for (i = 0; i < adev->jpeg.num_jpeg_inst; i++) {
>>>> +        if (adev->jpeg.harvest_config & (1 << i)) {
>>>> +            drm_printf(p, "\nHarvested Instance:JPEG%d Skipping
>>>> dump\n", i);
>>>> +            continue;
>>>> +        }
>>>> +
>>>> +        inst_off = i * adev->jpeg.reg_count;
>>>> +        is_powered = ((adev->jpeg.ip_dump[inst_off] & 0x1) != 1);
>>>> +
>>>> +        if (is_powered) {
>>>> +            drm_printf(p, "Active Instance:JPEG%d\n", i);
>>>> +            for (j = 0; j < adev->jpeg.reg_count; j++)
>>>> +                drm_printf(p, "%-50s \t 0x%08x\n", adev-
>>>>> jpeg.reg_list[j].reg_name,
>>>> +                       adev->jpeg.ip_dump[inst_off + j]);
>>>> +        } else
>>>> +            drm_printf(p, "\nInactive Instance:JPEG%d\n", i);
>>>> +    }
>>>> +}
>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.h b/drivers/gpu/
>>>> drm/amd/amdgpu/amdgpu_jpeg.h
>>>> index eb2096dcf1a6..02886ec4466e 100644
>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.h
>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.h
>>>> @@ -92,6 +92,8 @@
>>>>            *adev->jpeg.inst[inst_idx].dpg_sram_curr_addr++ = value;    \
>>>>        } while (0)
>>>>    +struct amdgpu_hwip_reg_entry;
>>>> +
>>>>    enum amdgpu_jpeg_caps {
>>>>        AMDGPU_JPEG_RRMT_ENABLED,
>>>>    };
>>>> @@ -137,6 +139,9 @@ struct amdgpu_jpeg {
>>>>        bool    indirect_sram;
>>>>        uint32_t supported_reset;
>>>>        uint32_t caps;
>>>> +    u32 *ip_dump;
>>>> +    u32 reg_count;
>>>> +    const struct amdgpu_hwip_reg_entry *reg_list;
>>>>    };
>>>>    
>>> Thanks, this is almost there. Personally, would still prefer something
>>> like below and have an instance of this kept inside jpeg_inst (though I
>>> see your point that jpeg_inst doesn't have an instance id and this will
>>> also mean duplicating list pointer/num reg info in all instances).
>> The multiple copies is one reason I am trying to avoid this approach,
>> and we can still print only
>> the affected instance registers in devcoredump in the future if support
>> for it comes up.
>>> amdgpu_jpeg_reg_dump {
>>>      u32 *ip_dump;
>>>      u32 reg_count;
>>>      const struct amdgpu_hwip_reg_entry *reg_list;
>>> };
>>>
>>> Ignoring that -
>> Thank you, would prefer to have single reference of the reg_list/
>> reg_count and use inst_id.
>>> With the current way,
>>>
>>> amdgpu_jpeg_reg_dump_fini() may be called from within sw_fini(). Just
>>> wanted to keep a wrapper fini() func to make sure all is cleaned up.
>>> That would avoid calling this from every IP version.
>> Can we have it the current way instead ? as few ip_versions do not
>> support devcoredump yet and calling
>> reg_dump_fini() for every ip version irrespective of the  support is
>> redundant, if we add that in sw_fini().
> Just add a check for reg_list being non-null and that should take care.
> Then this doesn't need to be taken care when more IP versions are added.
> Also kfree on NULL is harmless.
>
> Thanks,
> Lijo
Any init() caller should be responsible for calling the corresponding 
fini() as well,
it is either good to have both amdgpu_jpeg_reg_dump_init()/fini() moved 
inside sw_init/sw_fini
or leave it to the helper function consumers to call both init() and 
fini() themselves.

Regards,
Sathish
>
>>> Apart from those, leaving it to Leo or someone else from JPEG to take a
>>> look.
>> Okay, thank you.
>>
>> Regards,
>> Sathish
>>> Regards,
>>> LIjo
>>>
>>>>    int amdgpu_jpeg_sw_init(struct amdgpu_device *adev);
>>>> @@ -161,5 +166,10 @@ int amdgpu_jpeg_psp_update_sram(struct
>>>> amdgpu_device *adev, int inst_idx,
>>>>    void amdgpu_debugfs_jpeg_sched_mask_init(struct amdgpu_device *adev);
>>>>    int amdgpu_jpeg_sysfs_reset_mask_init(struct amdgpu_device *adev);
>>>>    void amdgpu_jpeg_sysfs_reset_mask_fini(struct amdgpu_device *adev);
>>>> +int amdgpu_jpeg_reg_dump_init(struct amdgpu_device *adev,
>>>> +                   const struct amdgpu_hwip_reg_entry *reg, u32 count);
>>>> +void amdgpu_jpeg_reg_dump_fini(struct amdgpu_device *adev);
>>>> +void amdgpu_jpeg_dump_ip_state(struct amdgpu_ip_block *ip_block);
>>>> +void amdgpu_jpeg_print_ip_state(struct amdgpu_ip_block *ip_block,
>>>> struct drm_printer *p);
>>>>      #endif /*__AMDGPU_JPEG_H__*/



More information about the amd-gfx mailing list