[PATCH v2 2/9] drm/amdgpu: Add helper funcs for jpeg devcoredump
Lazar, Lijo
lijo.lazar at amd.com
Wed Jan 29 10:45:46 UTC 2025
On 1/29/2025 3:51 PM, Sundararaju, Sathishkumar wrote:
>
>
>
> On 1/29/2025 3:20 PM, Lazar, Lijo wrote:
>>
>> On 1/29/2025 2:16 PM, Sathishkumar S wrote:
>>> Add devcoredump helper functions that can be reused for all jpeg
>>> versions.
>>>
>>> V2: (Lijo)
>>> - add amdgpu_jpeg_reg_dump_init() and amdgpu_jpeg_reg_dump_fini()
>>> - use reg_list and reg_count from init() to dump and print registers
>>> - memory allocation and freeing is moved to the init() and fini()
>>>
>>> Signed-off-by: Sathishkumar S <sathishkumar.sundararaju at amd.com>
>>> ---
>>> drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.c | 80 ++++++++++++++++++++++++
>>> drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.h | 10 +++
>>> 2 files changed, 90 insertions(+)
>>>
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.c b/drivers/gpu/
>>> drm/amd/amdgpu/amdgpu_jpeg.c
>>> index b6d2eb049f54..0f9d81e27973 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.c
>>> @@ -452,3 +452,83 @@ void amdgpu_jpeg_sysfs_reset_mask_fini(struct
>>> amdgpu_device *adev)
>>> device_remove_file(adev->dev, &dev_attr_jpeg_reset_mask);
>>> }
>>> }
>>> +
>>> +int amdgpu_jpeg_reg_dump_init(struct amdgpu_device *adev,
>>> + const struct amdgpu_hwip_reg_entry *reg, u32 count)
>>> +{
>>> + adev->jpeg.ip_dump = kcalloc(adev->jpeg.num_jpeg_inst * count,
>>> + sizeof(uint32_t), GFP_KERNEL);
>>> + if (!adev->jpeg.ip_dump) {
>>> + DRM_ERROR("Failed to allocate memory for JPEG IP Dump\n");
>>> + return -ENOMEM;
>>> + }
>>> + adev->jpeg.reg_list = reg;
>>> + adev->jpeg.reg_count = count;
>>> +
>>> + return 0;
>>> +}
>>> +
>>> +void amdgpu_jpeg_reg_dump_fini(struct amdgpu_device *adev)
>>> +{
>>> + kfree(adev->jpeg.ip_dump);
>>> + adev->jpeg.reg_list = NULL;
>>> + adev->jpeg.reg_count = 0;
>>> +}
>>> +
>>> +void amdgpu_jpeg_dump_ip_state(struct amdgpu_ip_block *ip_block)
>>> +{
>>> + struct amdgpu_device *adev = ip_block->adev;
>>> + u32 inst_off, inst_id, is_powered;
>>> + int i, j;
>>> +
>>> + if (!adev->jpeg.ip_dump)
>>> + return;
>>> +
>>> + for (i = 0; i < adev->jpeg.num_jpeg_inst; i++) {
>>> + if (adev->jpeg.harvest_config & (1 << i))
>>> + continue;
>>> +
>>> + inst_id = GET_INST(JPEG, i);
>>> + inst_off = i * adev->jpeg.reg_count;
>>> + /* check power status from UVD_JPEG_POWER_STATUS */
>>> + adev->jpeg.ip_dump[inst_off] =
>>> + RREG32(SOC15_REG_ENTRY_OFFSET_INST(adev->jpeg.reg_list[0],
>>> + inst_id));
>>> + is_powered = ((adev->jpeg.ip_dump[inst_off] & 0x1) != 1);
>>> +
>>> + if (is_powered)
>>> + for (j = 1; j < adev->jpeg.reg_count; j++)
>>> + adev->jpeg.ip_dump[inst_off + j] =
>>> + RREG32(SOC15_REG_ENTRY_OFFSET_INST(adev-
>>> >jpeg.reg_list[j],
>>> + inst_id));
>>> + }
>>> +}
>>> +
>>> +void amdgpu_jpeg_print_ip_state(struct amdgpu_ip_block *ip_block,
>>> struct drm_printer *p)
>>> +{
>>> + struct amdgpu_device *adev = ip_block->adev;
>>> + u32 inst_off, is_powered;
>>> + int i, j;
>>> +
>>> + if (!adev->jpeg.ip_dump)
>>> + return;
>>> +
>>> + drm_printf(p, "num_instances:%d\n", adev->jpeg.num_jpeg_inst);
>>> + for (i = 0; i < adev->jpeg.num_jpeg_inst; i++) {
>>> + if (adev->jpeg.harvest_config & (1 << i)) {
>>> + drm_printf(p, "\nHarvested Instance:JPEG%d Skipping
>>> dump\n", i);
>>> + continue;
>>> + }
>>> +
>>> + inst_off = i * adev->jpeg.reg_count;
>>> + is_powered = ((adev->jpeg.ip_dump[inst_off] & 0x1) != 1);
>>> +
>>> + if (is_powered) {
>>> + drm_printf(p, "Active Instance:JPEG%d\n", i);
>>> + for (j = 0; j < adev->jpeg.reg_count; j++)
>>> + drm_printf(p, "%-50s \t 0x%08x\n", adev-
>>> >jpeg.reg_list[j].reg_name,
>>> + adev->jpeg.ip_dump[inst_off + j]);
>>> + } else
>>> + drm_printf(p, "\nInactive Instance:JPEG%d\n", i);
>>> + }
>>> +}
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.h b/drivers/gpu/
>>> drm/amd/amdgpu/amdgpu_jpeg.h
>>> index eb2096dcf1a6..02886ec4466e 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.h
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.h
>>> @@ -92,6 +92,8 @@
>>> *adev->jpeg.inst[inst_idx].dpg_sram_curr_addr++ = value; \
>>> } while (0)
>>> +struct amdgpu_hwip_reg_entry;
>>> +
>>> enum amdgpu_jpeg_caps {
>>> AMDGPU_JPEG_RRMT_ENABLED,
>>> };
>>> @@ -137,6 +139,9 @@ struct amdgpu_jpeg {
>>> bool indirect_sram;
>>> uint32_t supported_reset;
>>> uint32_t caps;
>>> + u32 *ip_dump;
>>> + u32 reg_count;
>>> + const struct amdgpu_hwip_reg_entry *reg_list;
>>> };
>>>
>> Thanks, this is almost there. Personally, would still prefer something
>> like below and have an instance of this kept inside jpeg_inst (though I
>> see your point that jpeg_inst doesn't have an instance id and this will
>> also mean duplicating list pointer/num reg info in all instances).
> The multiple copies is one reason I am trying to avoid this approach,
> and we can still print only
> the affected instance registers in devcoredump in the future if support
> for it comes up.
>>
>> amdgpu_jpeg_reg_dump {
>> u32 *ip_dump;
>> u32 reg_count;
>> const struct amdgpu_hwip_reg_entry *reg_list;
>> };
>>
>> Ignoring that -
> Thank you, would prefer to have single reference of the reg_list/
> reg_count and use inst_id.
>>
>> With the current way,
>>
>> amdgpu_jpeg_reg_dump_fini() may be called from within sw_fini(). Just
>> wanted to keep a wrapper fini() func to make sure all is cleaned up.
>> That would avoid calling this from every IP version.
> Can we have it the current way instead ? as few ip_versions do not
> support devcoredump yet and calling
> reg_dump_fini() for every ip version irrespective of the support is
> redundant, if we add that in sw_fini().
Just add a check for reg_list being non-null and that should take care.
Then this doesn't need to be taken care when more IP versions are added.
Also kfree on NULL is harmless.
Thanks,
Lijo
>>
>> Apart from those, leaving it to Leo or someone else from JPEG to take a
>> look.
> Okay, thank you.
>
> Regards,
> Sathish
>>
>> Regards,
>> LIjo
>>
>>> int amdgpu_jpeg_sw_init(struct amdgpu_device *adev);
>>> @@ -161,5 +166,10 @@ int amdgpu_jpeg_psp_update_sram(struct
>>> amdgpu_device *adev, int inst_idx,
>>> void amdgpu_debugfs_jpeg_sched_mask_init(struct amdgpu_device *adev);
>>> int amdgpu_jpeg_sysfs_reset_mask_init(struct amdgpu_device *adev);
>>> void amdgpu_jpeg_sysfs_reset_mask_fini(struct amdgpu_device *adev);
>>> +int amdgpu_jpeg_reg_dump_init(struct amdgpu_device *adev,
>>> + const struct amdgpu_hwip_reg_entry *reg, u32 count);
>>> +void amdgpu_jpeg_reg_dump_fini(struct amdgpu_device *adev);
>>> +void amdgpu_jpeg_dump_ip_state(struct amdgpu_ip_block *ip_block);
>>> +void amdgpu_jpeg_print_ip_state(struct amdgpu_ip_block *ip_block,
>>> struct drm_printer *p);
>>> #endif /*__AMDGPU_JPEG_H__*/
>
More information about the amd-gfx
mailing list