[PATCH 2/9] drm/amdgpu: Add helper funcs for jpeg devcoredump

Sundararaju, Sathishkumar sasundar at amd.com
Tue Jan 28 14:25:25 UTC 2025




On 1/28/2025 5:29 PM, Lazar, Lijo wrote:
>
> On 1/28/2025 5:06 PM, Sundararaju, Sathishkumar wrote:
>> Hi Lijo,
>>
>>
>> On 1/28/2025 3:04 PM, Lazar, Lijo wrote:
>>> On 1/28/2025 2:39 PM, Sathishkumar S wrote:
>>>> Add devcoredump helper functions that can be reused for all jpeg
>>>> versions.
>>>>
>>>> Signed-off-by: Sathishkumar S <sathishkumar.sundararaju at amd.com>
>>>> ---
>>>>    drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.c | 59 ++++++++++++++++++++++++
>>>>    drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.h |  7 +++
>>>>    2 files changed, 66 insertions(+)
>>>>
>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.c b/drivers/gpu/
>>>> drm/amd/amdgpu/amdgpu_jpeg.c
>>>> index b6d2eb049f54..70f1e0e88f4b 100644
>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.c
>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.c
>>>> @@ -452,3 +452,62 @@ void amdgpu_jpeg_sysfs_reset_mask_fini(struct
>>>> amdgpu_device *adev)
>>>>                device_remove_file(adev->dev, &dev_attr_jpeg_reset_mask);
>>>>        }
>>>>    }
>>>> +
>>>> +void amdgpu_jpeg_dump_ip_state(struct amdgpu_ip_block *ip_block,
>>>> +                   const struct amdgpu_hwip_reg_entry *reg, u32
>>>> reg_count)
>>>> +{
>>>> +    struct amdgpu_device *adev = ip_block->adev;
>>>> +    u32 inst_off, inst_id, is_powered;
>>>> +    int i, j;
>>>> +
>>>> +    if (!adev->jpeg.ip_dump)
>>>> +        return;
>>>> +
>>>> +    for (i = 0; i < adev->jpeg.num_jpeg_inst; i++) {
>>>> +        if (adev->jpeg.harvest_config & (1 << i))
>>>> +            continue;
>>>> +
>>>> +        inst_id = GET_INST(JPEG, i);
>>>> +        inst_off = i * reg_count;
>>>> +        /* check power status from UVD_JPEG_POWER_STATUS */
>>>> +        adev->jpeg.ip_dump[inst_off] =
>>>> RREG32(SOC15_REG_ENTRY_OFFSET_INST(reg[0],
>>>> +                                          inst_id));
>>>> +        is_powered = ((adev->jpeg.ip_dump[inst_off] & 0x1) != 1);
>>>> +
>>>> +        if (is_powered)
>>>> +            for (j = 1; j < reg_count; j++)
>>>> +                adev->jpeg.ip_dump[inst_off + j] =
>>>> +                    RREG32(SOC15_REG_ENTRY_OFFSET_INST(reg[j],
>>>> +                                       inst_id));
>>>> +    }
>>>> +}
>>>> +
>>>> +void amdgpu_jpeg_print_ip_state(struct amdgpu_ip_block *ip_block,
>>>> struct drm_printer *p,
>>>> +                const struct amdgpu_hwip_reg_entry *reg, u32 reg_count)
>>>> +{
>>>> +    struct amdgpu_device *adev = ip_block->adev;
>>>> +    u32 inst_off, is_powered;
>>>> +    int i, j;
>>>> +
>>>> +    if (!adev->jpeg.ip_dump)
>>>> +        return;
>>>> +
>>>> +    drm_printf(p, "num_instances:%d\n", adev->jpeg.num_jpeg_inst);
>>>> +    for (i = 0; i < adev->jpeg.num_jpeg_inst; i++) {
>>>> +        if (adev->jpeg.harvest_config & (1 << i)) {
>>>> +            drm_printf(p, "\nHarvested Instance:JPEG%d Skipping
>>>> dump\n", i);
>>>> +            continue;
>>>> +        }
>>>> +
>>>> +        inst_off = i * reg_count;
>>>> +        is_powered = ((adev->jpeg.ip_dump[inst_off] & 0x1) != 1);
>>>> +
>>>> +        if (is_powered) {
>>>> +            drm_printf(p, "Active Instance:JPEG%d\n", i);
>>>> +            for (j = 0; j < reg_count; j++)
>>>> +                drm_printf(p, "%-50s \t 0x%08x\n", reg[j].reg_name,
>>>> +                       adev->jpeg.ip_dump[inst_off + j]);
>>>> +        } else
>>>> +            drm_printf(p, "\nInactive Instance:JPEG%d\n", i);
>>>> +    }
>>>> +}
>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.h b/drivers/gpu/
>>>> drm/amd/amdgpu/amdgpu_jpeg.h
>>>> index eb2096dcf1a6..1d334f35d8a8 100644
>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.h
>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.h
>>>> @@ -92,6 +92,8 @@
>>>>            *adev->jpeg.inst[inst_idx].dpg_sram_curr_addr++ = value;    \
>>>>        } while (0)
>>>>    +struct amdgpu_hwip_reg_entry;
>>>> +
>>>>    enum amdgpu_jpeg_caps {
>>>>        AMDGPU_JPEG_RRMT_ENABLED,
>>>>    };
>>>> @@ -137,6 +139,7 @@ struct amdgpu_jpeg {
>>>>        bool    indirect_sram;
>>>>        uint32_t supported_reset;
>>>>        uint32_t caps;
>>>> +    u32 *ip_dump;
>>> It's better to keep this at per jpeg instance level (amdgpu_jpeg_inst).
>>> If the hang happens for a single jpeg ring, that will help rather than
>>> dumping out reg settings for all instances.
>> The devcoredump infra in amdgpu does not propagate the job itself or the
>> hung instance, so there is no feasible way of doing this, meaning
>> dumping only the affected instance.
>> And if every instance is represented by an ip_block then I can implement
>> this with a flag to handle per instance case, but I doubt it is, as seen
>> from the below commit
>> fba4761ca00f drm/amdgpu: partially revert VCN IP block instancing support
>>
> Even if the API in its current form doesn't support per instance dump,
> suggest to keept the reg_dump data struct in jpeg_instance. That way it
> doesn't need any calculation to find the right offset etc.
>
> On the other hand, a single copy may be maintained for the reg address
> list if that is feasible. As I see the address calculation is dynamic
> based on the instance id and it may not make sense to keep it in all
> instances.
I am aligned with you on maintaining a single copy of the reg_list, and 
so, yes address calculation is dynamic
based on the the instance id as you pointed out, in that case 
maintaining this inside an instance tracking structure
doesn't feel right, as multiple instances track the same reg_list 
pointer which is the same list for all instances given an IP version.

The request I am proposing is to maintain the reg_list inside 
adev->jpeg, a common list of registers for any instance in the ip, and
all the ip_blocks of type JPEG refer to this common list when separate 
instances are represented by discrete ip_blocks, and dump per instance
register list in the future when that is planned and enabled.

Regards,
Sathish
>
> Thanks,
> Lijo
>
>> Regards,
>> Sathish
>>> Thanks,
>>> Lijo
>>>
>>>>    };
>>>>      int amdgpu_jpeg_sw_init(struct amdgpu_device *adev);
>>>> @@ -161,5 +164,9 @@ int amdgpu_jpeg_psp_update_sram(struct
>>>> amdgpu_device *adev, int inst_idx,
>>>>    void amdgpu_debugfs_jpeg_sched_mask_init(struct amdgpu_device *adev);
>>>>    int amdgpu_jpeg_sysfs_reset_mask_init(struct amdgpu_device *adev);
>>>>    void amdgpu_jpeg_sysfs_reset_mask_fini(struct amdgpu_device *adev);
>>>> +void amdgpu_jpeg_dump_ip_state(struct amdgpu_ip_block *ip_block,
>>>> +                   const struct amdgpu_hwip_reg_entry *reg, u32
>>>> reg_count);
>>>> +void amdgpu_jpeg_print_ip_state(struct amdgpu_ip_block *ip_block,
>>>> struct drm_printer *p,
>>>> +                const struct amdgpu_hwip_reg_entry *reg, u32
>>>> reg_count);
>>>>      #endif /*__AMDGPU_JPEG_H__*/



More information about the amd-gfx mailing list