[PATCH v1 3/4] drm/amdgpu: add compute registers in ip dump for gfx10

Khatri, Sunil sukhatri at amd.com
Fri May 3 17:29:20 UTC 2024


On 5/3/2024 9:52 PM, Alex Deucher wrote:
> On Fri, May 3, 2024 at 12:09 PM Khatri, Sunil <sukhatri at amd.com> wrote:
>>
>> On 5/3/2024 9:18 PM, Khatri, Sunil wrote:
>>> On 5/3/2024 8:52 PM, Alex Deucher wrote:
>>>> On Fri, May 3, 2024 at 4:45 AM Sunil Khatri <sunil.khatri at amd.com>
>>>> wrote:
>>>>> add compute registers in set of registers to dump
>>>>> during ip dump for gfx10.
>>>>>
>>>>> Signed-off-by: Sunil Khatri <sunil.khatri at amd.com>
>>>>> ---
>>>>>    drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 42
>>>>> +++++++++++++++++++++++++-
>>>>>    1 file changed, 41 insertions(+), 1 deletion(-)
>>>>>
>>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
>>>>> b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
>>>>> index 953df202953a..00c7a842ea3b 100644
>>>>> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
>>>>> @@ -378,7 +378,47 @@ static const struct amdgpu_hwip_reg_entry
>>>>> gc_reg_list_10_1[] = {
>>>>>           SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS_SE0),
>>>>>           SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS_SE1),
>>>>>           SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS_SE2),
>>>>> -       SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS_SE3)
>>>>> +       SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS_SE3),
>>>>> +       /* compute registers */
>>>>> +       SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_VMID),
>>>>> +       SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PERSISTENT_STATE),
>>>>> +       SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PIPE_PRIORITY),
>>>>> +       SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_QUEUE_PRIORITY),
>>>>> +       SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_QUANTUM),
>>>>> +       SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_BASE),
>>>>> +       SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_BASE_HI),
>>>>> +       SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_RPTR),
>>>>> +       SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR),
>>>>> +       SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI),
>>>>> +       SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL),
>>>>> +       SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_CONTROL),
>>>>> +       SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_IB_BASE_ADDR),
>>>>> +       SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_IB_BASE_ADDR_HI),
>>>>> +       SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_IB_RPTR),
>>>>> +       SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_IB_CONTROL),
>>>>> +       SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_DEQUEUE_REQUEST),
>>>>> +       SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_EOP_BASE_ADDR),
>>>>> +       SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_EOP_BASE_ADDR_HI),
>>>>> +       SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_EOP_CONTROL),
>>>>> +       SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_EOP_RPTR),
>>>>> +       SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_EOP_WPTR),
>>>>> +       SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_EOP_EVENTS),
>>>>> +       SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_CTX_SAVE_BASE_ADDR_LO),
>>>>> +       SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_CTX_SAVE_BASE_ADDR_HI),
>>>>> +       SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_CTX_SAVE_CONTROL),
>>>>> +       SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_CNTL_STACK_OFFSET),
>>>>> +       SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_CNTL_STACK_SIZE),
>>>>> +       SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_WG_STATE_OFFSET),
>>>>> +       SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_CTX_SAVE_SIZE),
>>>>> +       SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_GDS_RESOURCE_STATE),
>>>>> +       SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_ERROR),
>>>>> +       SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_EOP_WPTR_MEM),
>>>>> +       SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_WPTR_LO),
>>>>> +       SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_WPTR_HI),
>>>>> +       SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_SUSPEND_CNTL_STACK_OFFSET),
>>>>> +       SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_SUSPEND_CNTL_STACK_DW_CNT),
>>>>> +       SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_SUSPEND_WG_STATE_OFFSET),
>>>>> +       SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_DEQUEUE_STATUS)
>>>> The registers in patches 3 and 4 are multi-instance, so we should
>>>> ideally print every instance of them rather than just one.  Use
>>>> nv_grbm_select() to select the pipes and queues.  Make sure to protect
>>>> access using the adev->srbm_mutex mutex.
>>>>
>>>> E.g., for the compute registers (patch 3):
>>>>       mutex_lock(&adev->srbm_mutex);
>>>>           for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
>>>>                   for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) {
>>>>                          for (k = 0; k <
>>>> adev->gfx.mec.num_queue_per_pipe; k++) {
>>>>                    drm_printf("mec %d, pipe %d, queue %d\n", i, j, k);
>>>>                   nv_grbm_select(adev, i, j, k, 0);
>>>>                          for (reg = 0; reg < ARRAY_SIZE(compute_regs);
>>>> reg++)
>>>>                          drm_printf(...RREG(compute_regs[reg]));
>>>>                       }
>>>>                   }
>>>>       }
>>>>       nv_grbm_select(adev, 0, 0, 0, 0);
>>>>       mutex_unlock(&adev->srbm_mutex);
>>>>
>>>> For gfx registers (patch 4):
>>>>
>>>>       mutex_lock(&adev->srbm_mutex);
>>>>           for (i = 0; i < adev->gfx.me.num_me; ++i) {
>>>>                   for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) {
>>>>                       for (k = 0; k < adev->gfx.me.num_queue_per_pipe;
>>>> k++) {
>>>>                                 drm_printf("me %d, pipe %d, queue
>>>> %d\n", i, j, k);
>>>>                                   nv_grbm_select(adev, i, j, k, 0);
>>>>                          for (reg = 0; reg < ARRAY_SIZE(gfx_regs); reg++)
>>>>                          drm_printf(...RREG(gfx_regs[reg]));
>> I see one problem here, we dump the registers in memory allocated first
>> and read before and store and then dump later when user read the
>> devcoredump file. Here we do not know how many registers are there
>> considering multiple me and then pipe per me and queue per pipe.
>>
>> Should we run this loop in advance to count no of elements while
>> allocating memory or (count = gfx.me.num_me *
>> adev->gfx.me.num_pipe_per_me * adev->gfx.me.num_queue_per_pipe. No
>> matter what we do we need to save these registers in advance.
> Keep the multi-instanced registers in separate arrays and then add
> them all up to get the total size.
>
> adev->gfx.ip_dump_core[regs]
> adev->gfx.ip_dump_cp_compute_instanced[mec][pipe][queue]
> adev->gfx.ip_dump_cp_gfx_instanced[me][pipe][queue]
>
>> Also another problem in printing drm_printf("me %d, pipe %d, queue
>> %d\n", i, j, k); Need to think how we can do that ...
> Use multiple arrays to store the data. and just print that between
> them in the ip_print callback.

Ok, Sure

Thanks Sunil

>
> Alex
>
>>>>               }
>>>>                   }
>>>>           }
>>>>       nv_grbm_select(adev, 0, 0, 0, 0);
>>>>       mutex_unlock(&adev->srbm_mutex);
>>> Thanks for pointing that out and suggesting the sample code of how it
>>> should be. Will take care of this in next patch set.
>>>
>>> Regards
>>>
>>> Sunil
>>>
>>>> Alex
>>>>
>>>>>    };
>>>>>
>>>>>    static const struct soc15_reg_golden golden_settings_gc_10_1[] = {
>>>>> --
>>>>> 2.34.1
>>>>>


More information about the amd-gfx mailing list