[PATCH v3 2/4] drm/amdgpu: Add support to dump gfx10 cp registers
Khatri, Sunil
sukhatri at amd.com
Thu May 16 03:41:04 UTC 2024
On 5/16/2024 1:40 AM, Deucher, Alexander wrote:
> [Public]
>
>> -----Original Message-----
>> From: Sunil Khatri <sunil.khatri at amd.com>
>> Sent: Wednesday, May 15, 2024 8:18 AM
>> To: Deucher, Alexander <Alexander.Deucher at amd.com>; Koenig, Christian
>> <Christian.Koenig at amd.com>
>> Cc: amd-gfx at lists.freedesktop.org; Khatri, Sunil <Sunil.Khatri at amd.com>
>> Subject: [PATCH v3 2/4] drm/amdgpu: Add support to dump gfx10 cp
>> registers
>>
>> add support to dump registers of all instances of cp registers in gfx10
>>
>> Signed-off-by: Sunil Khatri <sunil.khatri at amd.com>
>> ---
>> drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h | 1 +
>> drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 117
>> +++++++++++++++++++++++-
>> 2 files changed, 114 insertions(+), 4 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
>> index 30d7f9c29478..d96873c154ed 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
>> @@ -436,6 +436,7 @@ struct amdgpu_gfx {
>>
>> /* IP reg dump */
>> uint32_t *ipdump_core;
>> + uint32_t *ipdump_cp;
> I'd call this ip_dump_compute or ip_dump_compute_queues to align with that the registers represent.
Sure
>
> Alex
>
>> };
>>
>> struct amdgpu_gfx_ras_reg_entry {
>> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
>> b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
>> index f6d6a4b9802d..daf9a3571183 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
>> @@ -381,6 +381,49 @@ static const struct amdgpu_hwip_reg_entry
>> gc_reg_list_10_1[] = {
>> SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS_SE3) };
>>
>> +static const struct amdgpu_hwip_reg_entry gc_cp_reg_list_10[] = {
>> + /* compute registers */
>> + SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_VMID),
>> + SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PERSISTENT_STATE),
>> + SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PIPE_PRIORITY),
>> + SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_QUEUE_PRIORITY),
>> + SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_QUANTUM),
>> + SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_BASE),
>> + SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_BASE_HI),
>> + SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_RPTR),
>> + SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR),
>> + SOC15_REG_ENTRY_STR(GC, 0,
>> mmCP_HQD_PQ_WPTR_POLL_ADDR_HI),
>> + SOC15_REG_ENTRY_STR(GC, 0,
>> mmCP_HQD_PQ_DOORBELL_CONTROL),
>> + SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_CONTROL),
>> + SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_IB_BASE_ADDR),
>> + SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_IB_BASE_ADDR_HI),
>> + SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_IB_RPTR),
>> + SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_IB_CONTROL),
>> + SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_DEQUEUE_REQUEST),
>> + SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_EOP_BASE_ADDR),
>> + SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_EOP_BASE_ADDR_HI),
>> + SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_EOP_CONTROL),
>> + SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_EOP_RPTR),
>> + SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_EOP_WPTR),
>> + SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_EOP_EVENTS),
>> + SOC15_REG_ENTRY_STR(GC, 0,
>> mmCP_HQD_CTX_SAVE_BASE_ADDR_LO),
>> + SOC15_REG_ENTRY_STR(GC, 0,
>> mmCP_HQD_CTX_SAVE_BASE_ADDR_HI),
>> + SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_CTX_SAVE_CONTROL),
>> + SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_CNTL_STACK_OFFSET),
>> + SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_CNTL_STACK_SIZE),
>> + SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_WG_STATE_OFFSET),
>> + SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_CTX_SAVE_SIZE),
>> + SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_GDS_RESOURCE_STATE),
>> + SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_ERROR),
>> + SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_EOP_WPTR_MEM),
>> + SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_WPTR_LO),
>> + SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_WPTR_HI),
>> + SOC15_REG_ENTRY_STR(GC, 0,
>> mmCP_HQD_SUSPEND_CNTL_STACK_OFFSET),
>> + SOC15_REG_ENTRY_STR(GC, 0,
>> mmCP_HQD_SUSPEND_CNTL_STACK_DW_CNT),
>> + SOC15_REG_ENTRY_STR(GC, 0,
>> mmCP_HQD_SUSPEND_WG_STATE_OFFSET),
>> + SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_DEQUEUE_STATUS) };
>> +
>> static const struct soc15_reg_golden golden_settings_gc_10_1[] = {
>> SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_4,
>> 0xffffffff, 0x00400014),
>> SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_CPF_CLK_CTRL,
>> 0xfcff8fff, 0xf8000100), @@ -4595,10 +4638,11 @@ static int
>> gfx_v10_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
>> hw_prio, NULL);
>> }
>>
>> -static void gfx_v10_0_alloc_dump_mem(struct amdgpu_device *adev)
>> +static void gfx_v10_0_alloc_ip_dump(struct amdgpu_device *adev)
>> {
>> uint32_t reg_count = ARRAY_SIZE(gc_reg_list_10_1);
>> uint32_t *ptr;
>> + uint32_t inst;
>>
>> ptr = kcalloc(reg_count, sizeof(uint32_t), GFP_KERNEL);
>> if (ptr == NULL) {
>> @@ -4607,6 +4651,19 @@ static void gfx_v10_0_alloc_dump_mem(struct
>> amdgpu_device *adev)
>> } else {
>> adev->gfx.ipdump_core = ptr;
>> }
>> +
>> + /* Allocate memory for gfx cp registers for all the instances */
>> + reg_count = ARRAY_SIZE(gc_cp_reg_list_10);
>> + inst = adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe_per_mec *
>> + adev->gfx.mec.num_queue_per_pipe;
>> +
>> + ptr = kcalloc(reg_count * inst, sizeof(uint32_t), GFP_KERNEL);
>> + if (ptr == NULL) {
>> + DRM_ERROR("Failed to allocate memory for GFX CP IP
>> Dump\n");
>> + adev->gfx.ipdump_cp = NULL;
>> + } else {
>> + adev->gfx.ipdump_cp = ptr;
>> + }
>> }
>>
>> static int gfx_v10_0_sw_init(void *handle) @@ -4761,7 +4818,7 @@ static
>> int gfx_v10_0_sw_init(void *handle)
>>
>> gfx_v10_0_gpu_early_init(adev);
>>
>> - gfx_v10_0_alloc_dump_mem(adev);
>> + gfx_v10_0_alloc_ip_dump(adev);
>>
>> return 0;
>> }
>> @@ -4816,6 +4873,7 @@ static int gfx_v10_0_sw_fini(void *handle)
>> gfx_v10_0_free_microcode(adev);
>>
>> kfree(adev->gfx.ipdump_core);
>> + kfree(adev->gfx.ipdump_cp);
>>
>> return 0;
>> }
>> @@ -9280,7 +9338,7 @@ static void gfx_v10_0_emit_mem_sync(struct
>> amdgpu_ring *ring) static void gfx_v10_ip_print(void *handle, struct
>> drm_printer *p) {
>> struct amdgpu_device *adev = (struct amdgpu_device *)handle;
>> - uint32_t i;
>> + uint32_t i, j, k, reg, index = 0;
>> uint32_t reg_count = ARRAY_SIZE(gc_reg_list_10_1);
>>
>> if (!adev->gfx.ipdump_core)
>> @@ -9290,12 +9348,32 @@ static void gfx_v10_ip_print(void *handle, struct
>> drm_printer *p)
>> drm_printf(p, "%-50s \t 0x%08x\n",
>> gc_reg_list_10_1[i].reg_name,
>> adev->gfx.ipdump_core[i]);
>> +
>> + /* print cp registers for all instances */
>> + if (!adev->gfx.ipdump_cp)
>> + return;
>> +
>> + reg_count = ARRAY_SIZE(gc_cp_reg_list_10);
>> +
>> + for (i = 0; i < adev->gfx.mec.num_mec; i++) {
>> + for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) {
>> + for (k = 0; k < adev->gfx.mec.num_queue_per_pipe;
>> k++) {
>> + drm_printf(p, "mec %d, pipe %d, queue
>> %d\n", i, j, k);
>> + for (reg = 0; reg < reg_count; reg++) {
>> + drm_printf(p, "%-50s \t 0x%08x\n",
>> +
>> gc_cp_reg_list_10[reg].reg_name,
>> + adev->gfx.ipdump_cp[index
>> + reg]);
>> + }
>> + index += reg_count;
>> + }
>> + }
>> + }
>> }
>>
>> static void gfx_v10_ip_dump(void *handle) {
>> struct amdgpu_device *adev = (struct amdgpu_device *)handle;
>> - uint32_t i;
>> + uint32_t i, j, k, reg, index = 0;
>> uint32_t reg_count = ARRAY_SIZE(gc_reg_list_10_1);
>>
>> if (!adev->gfx.ipdump_core)
>> @@ -9305,6 +9383,37 @@ static void gfx_v10_ip_dump(void *handle)
>> for (i = 0; i < reg_count; i++)
>> adev->gfx.ipdump_core[i] =
>> RREG32(SOC15_REG_ENTRY_OFFSET(gc_reg_list_10_1[i]));
>> amdgpu_gfx_off_ctrl(adev, true);
>> +
>> + /* dump cp registers for all instances */
>> + if (!adev->gfx.ipdump_cp)
>> + return;
>> +
>> + reg_count = ARRAY_SIZE(gc_cp_reg_list_10);
>> + amdgpu_gfx_off_ctrl(adev, false);
>> + mutex_lock(&adev->srbm_mutex);
>> + for (i = 0; i < adev->gfx.mec.num_mec; i++) {
>> + for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) {
>> + for (k = 0; k < adev->gfx.mec.num_queue_per_pipe;
>> k++) {
>> + nv_grbm_select(adev, i, j, k, 0);
>> +
>> + for (reg = 0; reg < reg_count; reg++) {
>> + adev->gfx.ipdump_cp[index + reg] =
>> +
>> RREG32(SOC15_REG_ENTRY_OFFSET(
>> +
>> gc_cp_reg_list_10[reg]));
>> + }
>> + index += reg_count;
>> + /*
>> + * Reading all registers take long and causes
>> watchdog timeout,
>> + * releasing core for every set of registers read
>> helps in cpu
>> + * core not being locked.
>> + */
>> + msleep(1);
>> + }
>> + }
>> + }
>> + nv_grbm_select(adev, 0, 0, 0, 0);
>> + mutex_unlock(&adev->srbm_mutex);
>> + amdgpu_gfx_off_ctrl(adev, true);
>> }
>>
>> static const struct amd_ip_funcs gfx_v10_0_ip_funcs = {
>> --
>> 2.34.1
More information about the amd-gfx
mailing list