[PATCH v3 2/4] drm/amdgpu: Add support to dump gfx10 cp registers

Khatri, Sunil sukhatri at amd.com
Thu May 16 03:41:04 UTC 2024


On 5/16/2024 1:40 AM, Deucher, Alexander wrote:
> [Public]
>
>> -----Original Message-----
>> From: Sunil Khatri <sunil.khatri at amd.com>
>> Sent: Wednesday, May 15, 2024 8:18 AM
>> To: Deucher, Alexander <Alexander.Deucher at amd.com>; Koenig, Christian
>> <Christian.Koenig at amd.com>
>> Cc: amd-gfx at lists.freedesktop.org; Khatri, Sunil <Sunil.Khatri at amd.com>
>> Subject: [PATCH v3 2/4] drm/amdgpu: Add support to dump gfx10 cp
>> registers
>>
>> add support to dump registers of all instances of cp registers in gfx10
>>
>> Signed-off-by: Sunil Khatri <sunil.khatri at amd.com>
>> ---
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h |   1 +
>>   drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c  | 117
>> +++++++++++++++++++++++-
>>   2 files changed, 114 insertions(+), 4 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
>> index 30d7f9c29478..d96873c154ed 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
>> @@ -436,6 +436,7 @@ struct amdgpu_gfx {
>>
>>        /* IP reg dump */
>>        uint32_t                        *ipdump_core;
>> +     uint32_t                        *ipdump_cp;
> I'd call this ip_dump_compute or ip_dump_compute_queues to align with that the registers represent.
Sure
>
> Alex
>
>>   };
>>
>>   struct amdgpu_gfx_ras_reg_entry {
>> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
>> b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
>> index f6d6a4b9802d..daf9a3571183 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
>> @@ -381,6 +381,49 @@ static const struct amdgpu_hwip_reg_entry
>> gc_reg_list_10_1[] = {
>>        SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS_SE3)  };
>>
>> +static const struct amdgpu_hwip_reg_entry gc_cp_reg_list_10[] = {
>> +     /* compute registers */
>> +     SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_VMID),
>> +     SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PERSISTENT_STATE),
>> +     SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PIPE_PRIORITY),
>> +     SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_QUEUE_PRIORITY),
>> +     SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_QUANTUM),
>> +     SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_BASE),
>> +     SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_BASE_HI),
>> +     SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_RPTR),
>> +     SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR),
>> +     SOC15_REG_ENTRY_STR(GC, 0,
>> mmCP_HQD_PQ_WPTR_POLL_ADDR_HI),
>> +     SOC15_REG_ENTRY_STR(GC, 0,
>> mmCP_HQD_PQ_DOORBELL_CONTROL),
>> +     SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_CONTROL),
>> +     SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_IB_BASE_ADDR),
>> +     SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_IB_BASE_ADDR_HI),
>> +     SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_IB_RPTR),
>> +     SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_IB_CONTROL),
>> +     SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_DEQUEUE_REQUEST),
>> +     SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_EOP_BASE_ADDR),
>> +     SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_EOP_BASE_ADDR_HI),
>> +     SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_EOP_CONTROL),
>> +     SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_EOP_RPTR),
>> +     SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_EOP_WPTR),
>> +     SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_EOP_EVENTS),
>> +     SOC15_REG_ENTRY_STR(GC, 0,
>> mmCP_HQD_CTX_SAVE_BASE_ADDR_LO),
>> +     SOC15_REG_ENTRY_STR(GC, 0,
>> mmCP_HQD_CTX_SAVE_BASE_ADDR_HI),
>> +     SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_CTX_SAVE_CONTROL),
>> +     SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_CNTL_STACK_OFFSET),
>> +     SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_CNTL_STACK_SIZE),
>> +     SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_WG_STATE_OFFSET),
>> +     SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_CTX_SAVE_SIZE),
>> +     SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_GDS_RESOURCE_STATE),
>> +     SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_ERROR),
>> +     SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_EOP_WPTR_MEM),
>> +     SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_WPTR_LO),
>> +     SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_WPTR_HI),
>> +     SOC15_REG_ENTRY_STR(GC, 0,
>> mmCP_HQD_SUSPEND_CNTL_STACK_OFFSET),
>> +     SOC15_REG_ENTRY_STR(GC, 0,
>> mmCP_HQD_SUSPEND_CNTL_STACK_DW_CNT),
>> +     SOC15_REG_ENTRY_STR(GC, 0,
>> mmCP_HQD_SUSPEND_WG_STATE_OFFSET),
>> +     SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_DEQUEUE_STATUS) };
>> +
>>   static const struct soc15_reg_golden golden_settings_gc_10_1[] = {
>>        SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_4,
>> 0xffffffff, 0x00400014),
>>        SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_CPF_CLK_CTRL,
>> 0xfcff8fff, 0xf8000100), @@ -4595,10 +4638,11 @@ static int
>> gfx_v10_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
>>                             hw_prio, NULL);
>>   }
>>
>> -static void gfx_v10_0_alloc_dump_mem(struct amdgpu_device *adev)
>> +static void gfx_v10_0_alloc_ip_dump(struct amdgpu_device *adev)
>>   {
>>        uint32_t reg_count = ARRAY_SIZE(gc_reg_list_10_1);
>>        uint32_t *ptr;
>> +     uint32_t inst;
>>
>>        ptr = kcalloc(reg_count, sizeof(uint32_t), GFP_KERNEL);
>>        if (ptr == NULL) {
>> @@ -4607,6 +4651,19 @@ static void gfx_v10_0_alloc_dump_mem(struct
>> amdgpu_device *adev)
>>        } else {
>>                adev->gfx.ipdump_core = ptr;
>>        }
>> +
>> +     /* Allocate memory for gfx cp registers for all the instances */
>> +     reg_count = ARRAY_SIZE(gc_cp_reg_list_10);
>> +     inst = adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe_per_mec *
>> +             adev->gfx.mec.num_queue_per_pipe;
>> +
>> +     ptr = kcalloc(reg_count * inst, sizeof(uint32_t), GFP_KERNEL);
>> +     if (ptr == NULL) {
>> +             DRM_ERROR("Failed to allocate memory for GFX CP IP
>> Dump\n");
>> +             adev->gfx.ipdump_cp = NULL;
>> +     } else {
>> +             adev->gfx.ipdump_cp = ptr;
>> +     }
>>   }
>>
>>   static int gfx_v10_0_sw_init(void *handle) @@ -4761,7 +4818,7 @@ static
>> int gfx_v10_0_sw_init(void *handle)
>>
>>        gfx_v10_0_gpu_early_init(adev);
>>
>> -     gfx_v10_0_alloc_dump_mem(adev);
>> +     gfx_v10_0_alloc_ip_dump(adev);
>>
>>        return 0;
>>   }
>> @@ -4816,6 +4873,7 @@ static int gfx_v10_0_sw_fini(void *handle)
>>        gfx_v10_0_free_microcode(adev);
>>
>>        kfree(adev->gfx.ipdump_core);
>> +     kfree(adev->gfx.ipdump_cp);
>>
>>        return 0;
>>   }
>> @@ -9280,7 +9338,7 @@ static void gfx_v10_0_emit_mem_sync(struct
>> amdgpu_ring *ring)  static void gfx_v10_ip_print(void *handle, struct
>> drm_printer *p)  {
>>        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
>> -     uint32_t i;
>> +     uint32_t i, j, k, reg, index = 0;
>>        uint32_t reg_count = ARRAY_SIZE(gc_reg_list_10_1);
>>
>>        if (!adev->gfx.ipdump_core)
>> @@ -9290,12 +9348,32 @@ static void gfx_v10_ip_print(void *handle, struct
>> drm_printer *p)
>>                drm_printf(p, "%-50s \t 0x%08x\n",
>>                           gc_reg_list_10_1[i].reg_name,
>>                           adev->gfx.ipdump_core[i]);
>> +
>> +     /* print cp registers for all instances */
>> +     if (!adev->gfx.ipdump_cp)
>> +             return;
>> +
>> +     reg_count = ARRAY_SIZE(gc_cp_reg_list_10);
>> +
>> +     for (i = 0; i < adev->gfx.mec.num_mec; i++) {
>> +             for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) {
>> +                     for (k = 0; k < adev->gfx.mec.num_queue_per_pipe;
>> k++) {
>> +                             drm_printf(p, "mec %d, pipe %d, queue
>> %d\n", i, j, k);
>> +                             for (reg = 0; reg < reg_count; reg++) {
>> +                                     drm_printf(p, "%-50s \t 0x%08x\n",
>> +
>> gc_cp_reg_list_10[reg].reg_name,
>> +                                                adev->gfx.ipdump_cp[index
>> + reg]);
>> +                             }
>> +                             index += reg_count;
>> +                     }
>> +             }
>> +     }
>>   }
>>
>>   static void gfx_v10_ip_dump(void *handle)  {
>>        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
>> -     uint32_t i;
>> +     uint32_t i, j, k, reg, index = 0;
>>        uint32_t reg_count = ARRAY_SIZE(gc_reg_list_10_1);
>>
>>        if (!adev->gfx.ipdump_core)
>> @@ -9305,6 +9383,37 @@ static void gfx_v10_ip_dump(void *handle)
>>        for (i = 0; i < reg_count; i++)
>>                adev->gfx.ipdump_core[i] =
>> RREG32(SOC15_REG_ENTRY_OFFSET(gc_reg_list_10_1[i]));
>>        amdgpu_gfx_off_ctrl(adev, true);
>> +
>> +     /* dump cp registers for all instances */
>> +     if (!adev->gfx.ipdump_cp)
>> +             return;
>> +
>> +     reg_count = ARRAY_SIZE(gc_cp_reg_list_10);
>> +     amdgpu_gfx_off_ctrl(adev, false);
>> +     mutex_lock(&adev->srbm_mutex);
>> +     for (i = 0; i < adev->gfx.mec.num_mec; i++) {
>> +             for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) {
>> +                     for (k = 0; k < adev->gfx.mec.num_queue_per_pipe;
>> k++) {
>> +                             nv_grbm_select(adev, i, j, k, 0);
>> +
>> +                             for (reg = 0; reg < reg_count; reg++) {
>> +                                     adev->gfx.ipdump_cp[index + reg] =
>> +
>>        RREG32(SOC15_REG_ENTRY_OFFSET(
>> +
>>        gc_cp_reg_list_10[reg]));
>> +                             }
>> +                             index += reg_count;
>> +                             /*
>> +                              * Reading all registers take long and causes
>> watchdog timeout,
>> +                              * releasing core for every set of registers read
>> helps in cpu
>> +                              * core not being locked.
>> +                              */
>> +                             msleep(1);
>> +                     }
>> +             }
>> +     }
>> +     nv_grbm_select(adev, 0, 0, 0, 0);
>> +     mutex_unlock(&adev->srbm_mutex);
>> +     amdgpu_gfx_off_ctrl(adev, true);
>>   }
>>
>>   static const struct amd_ip_funcs gfx_v10_0_ip_funcs = {
>> --
>> 2.34.1


More information about the amd-gfx mailing list