[PATCH 05/29] drm/amdgpu: setup hw debug registers on driver initialization

Felix Kuehling felix.kuehling at amd.com
Thu Dec 1 00:18:20 UTC 2022


On 2022-11-22 18:38, Felix Kuehling wrote:
>
> On 2022-10-31 12:23, Jonathan Kim wrote:
>> Add missing debug trap registers references and initialize all debug
>> registers on boot by clearing the hardware exception overrides and the
>> wave allocation ID index.
>>
>> For debug devices that only support single process debugging, enable
>> trap temporary setup by default.
>>
>> Debug devices that support multi-process debugging require trap
>> temporary setup to be disabled by default in order to satisfy microbench
>> performance when in non-debug mode.
>
> Where is this done? I don't think it's in the MQD setup because that 
> happens unconditionally on all GPUs.

If I understand it correctly, it's done by actually enabling the debug 
trap in patch 9 (for Aldebaran). For single-process debug devices, the 
debug trap is always on, as per this patch.

Maybe just add a reference to the Aldebaran patch to make it clearer.

Regards,
   Felix


>
>
>>
>> The debugger requires that TTMPs 6 & 7 save the dispatch ID to map
>> waves onto dispatch during compute context inspection.
>> In order to correctly this up, set the special reserved CP bit by 
>> default
>> whenever the MQD is initailized.
>
> There is a word missing here. "In order to correctly _set_ this up ..."?
>
> This patch covers GFXv9 and 10. Will GFXv11 be handled separately?
>
> Regards,
>   Felix
>
>
>>
>> Signed-off-by: Jonathan Kim <jonathan.kim at amd.com>
>> ---
>>   drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c        | 26 +++++++
>>   drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c         | 30 ++++++++
>>   .../gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c  |  5 ++
>>   .../gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c   |  5 ++
>>   .../include/asic_reg/gc/gc_10_1_0_offset.h    | 14 ++++
>>   .../include/asic_reg/gc/gc_10_1_0_sh_mask.h   | 69 +++++++++++++++++++
>>   .../include/asic_reg/gc/gc_10_3_0_offset.h    | 10 +++
>>   .../include/asic_reg/gc/gc_10_3_0_sh_mask.h   |  4 ++
>>   8 files changed, 163 insertions(+)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c 
>> b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
>> index af94ac580d3e..d49aff0b4ba3 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
>> @@ -4904,6 +4904,29 @@ static u32 
>> gfx_v10_0_init_pa_sc_tile_steering_override(struct amdgpu_device *ade
>>     #define DEFAULT_SH_MEM_BASES    (0x6000)
>>   +static void gfx_v10_0_debug_trap_config_init(struct amdgpu_device 
>> *adev,
>> +                uint32_t first_vmid,
>> +                uint32_t last_vmid)
>> +{
>> +    uint32_t data;
>> +    uint32_t trap_config_vmid_mask = 0;
>> +    int i;
>> +
>> +    /* Calculate trap config vmid mask */
>> +    for (i = first_vmid; i < last_vmid; i++)
>> +        trap_config_vmid_mask |= (1 << i);
>> +
>> +    data = REG_SET_FIELD(0, SPI_GDBG_TRAP_CONFIG,
>> +            VMID_SEL, trap_config_vmid_mask);
>> +    data = REG_SET_FIELD(data, SPI_GDBG_TRAP_CONFIG,
>> +            TRAP_EN, 1);
>> +    WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_CONFIG), data);
>> +    WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_MASK), 0);
>> +
>> +    WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_DATA0), 0);
>> +    WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_DATA1), 0);
>> +}
>> +
>>   static void gfx_v10_0_init_compute_vmid(struct amdgpu_device *adev)
>>   {
>>       int i;
>> @@ -4935,6 +4958,9 @@ static void gfx_v10_0_init_compute_vmid(struct 
>> amdgpu_device *adev)
>>           WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, i, 0);
>>           WREG32_SOC15_OFFSET(GC, 0, mmGDS_OA_VMID0, i, 0);
>>       }
>> +
>> +    gfx_v10_0_debug_trap_config_init(adev, 
>> adev->vm_manager.first_kfd_vmid,
>> +                    AMDGPU_NUM_VMID);
>>   }
>>     static void gfx_v10_0_init_gds_vmid(struct amdgpu_device *adev)
>> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c 
>> b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
>> index 0320be4a5fc6..a0e5ad342f13 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
>> @@ -2337,6 +2337,29 @@ static void gfx_v9_0_setup_rb(struct 
>> amdgpu_device *adev)
>>       adev->gfx.config.num_rbs = hweight32(active_rbs);
>>   }
>>   +static void gfx_v9_0_debug_trap_config_init(struct amdgpu_device 
>> *adev,
>> +                uint32_t first_vmid,
>> +                uint32_t last_vmid)
>> +{
>> +    uint32_t data;
>> +    uint32_t trap_config_vmid_mask = 0;
>> +    int i;
>> +
>> +    /* Calculate trap config vmid mask */
>> +    for (i = first_vmid; i < last_vmid; i++)
>> +        trap_config_vmid_mask |= (1 << i);
>> +
>> +    data = REG_SET_FIELD(0, SPI_GDBG_TRAP_CONFIG,
>> +            VMID_SEL, trap_config_vmid_mask);
>> +    data = REG_SET_FIELD(data, SPI_GDBG_TRAP_CONFIG,
>> +            TRAP_EN, 1);
>> +    WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_CONFIG), data);
>> +    WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_MASK), 0);
>> +
>> +    WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_DATA0), 0);
>> +    WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_DATA1), 0);
>> +}
>> +
>>   #define DEFAULT_SH_MEM_BASES    (0x6000)
>>   static void gfx_v9_0_init_compute_vmid(struct amdgpu_device *adev)
>>   {
>> @@ -4609,6 +4632,13 @@ static int gfx_v9_0_late_init(void *handle)
>>       if (r)
>>           return r;
>>   +    if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 2))
>> +        gfx_v9_4_2_debug_trap_config_init(adev,
>> +            adev->vm_manager.first_kfd_vmid, AMDGPU_NUM_VMID);
>> +    else
>> +        gfx_v9_0_debug_trap_config_init(adev,
>> +            adev->vm_manager.first_kfd_vmid, AMDGPU_NUM_VMID);
>> +
>>       return 0;
>>   }
>>   diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c 
>> b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c
>> index d3e2b6a599a4..cb484ace17de 100644
>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c
>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c
>> @@ -117,6 +117,11 @@ static void init_mqd(struct mqd_manager *mm, 
>> void **mqd,
>>               1 << CP_HQD_QUANTUM__QUANTUM_SCALE__SHIFT |
>>               1 << CP_HQD_QUANTUM__QUANTUM_DURATION__SHIFT;
>>   +    /* Set cp_hqd_hq_scheduler0 bit 14 to 1 to have the CP set up the
>> +     * DISPATCH_PTR.  This is required for the kfd debugger
>> +     */
>> +    m->cp_hqd_hq_scheduler0 = 1 << 14;
>> +
>>       if (q->format == KFD_QUEUE_FORMAT_AQL) {
>>           m->cp_hqd_aql_control =
>>               1 << CP_HQD_AQL_CONTROL__CONTROL0__SHIFT;
>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c 
>> b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
>> index 0778e587a2d6..86f1cf090246 100644
>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
>> @@ -164,6 +164,11 @@ static void init_mqd(struct mqd_manager *mm, 
>> void **mqd,
>>               1 << CP_HQD_QUANTUM__QUANTUM_SCALE__SHIFT |
>>               1 << CP_HQD_QUANTUM__QUANTUM_DURATION__SHIFT;
>>   +    /* Set cp_hqd_hq_scheduler0 bit 14 to 1 to have the CP set up the
>> +     * DISPATCH_PTR.  This is required for the kfd debugger
>> +     */
>> +    m->cp_hqd_hq_status0 = 1 << 14;
>> +
>>       if (q->format == KFD_QUEUE_FORMAT_AQL) {
>>           m->cp_hqd_aql_control =
>>               1 << CP_HQD_AQL_CONTROL__CONTROL0__SHIFT;
>> diff --git 
>> a/drivers/gpu/drm/amd/include/asic_reg/gc/gc_10_1_0_offset.h 
>> b/drivers/gpu/drm/amd/include/asic_reg/gc/gc_10_1_0_offset.h
>> index 18d34bbceebe..7d384f86bd67 100644
>> --- a/drivers/gpu/drm/amd/include/asic_reg/gc/gc_10_1_0_offset.h
>> +++ b/drivers/gpu/drm/amd/include/asic_reg/gc/gc_10_1_0_offset.h
>> @@ -5190,6 +5190,20 @@
>>   #define mmSPI_WCL_PIPE_PERCENT_CS6_BASE_IDX 0
>>   #define mmSPI_WCL_PIPE_PERCENT_CS7 0x1f70
>>   #define mmSPI_WCL_PIPE_PERCENT_CS7_BASE_IDX 0
>> +#define mmSPI_GDBG_WAVE_CNTL 0x1f71
>> +#define mmSPI_GDBG_WAVE_CNTL_BASE_IDX 0
>> +#define mmSPI_GDBG_TRAP_CONFIG 0x1f72
>> +#define mmSPI_GDBG_TRAP_CONFIG_BASE_IDX 0
>> +#define mmSPI_GDBG_TRAP_MASK 0x1f73
>> +#define mmSPI_GDBG_TRAP_MASK_BASE_IDX 0
>> +#define mmSPI_GDBG_WAVE_CNTL2 0x1f74
>> +#define mmSPI_GDBG_WAVE_CNTL2_BASE_IDX 0
>> +#define mmSPI_GDBG_WAVE_CNTL3 0x1f75
>> +#define mmSPI_GDBG_WAVE_CNTL3_BASE_IDX 0
>> +#define mmSPI_GDBG_TRAP_DATA0 0x1f78
>> +#define mmSPI_GDBG_TRAP_DATA0_BASE_IDX 0
>> +#define mmSPI_GDBG_TRAP_DATA1 0x1f79
>> +#define mmSPI_GDBG_TRAP_DATA1_BASE_IDX 0
>>   #define mmSPI_COMPUTE_QUEUE_RESET 0x1f7b
>>   #define mmSPI_COMPUTE_QUEUE_RESET_BASE_IDX 0
>>   #define mmSPI_RESOURCE_RESERVE_CU_0 0x1f7c
>> diff --git 
>> a/drivers/gpu/drm/amd/include/asic_reg/gc/gc_10_1_0_sh_mask.h 
>> b/drivers/gpu/drm/amd/include/asic_reg/gc/gc_10_1_0_sh_mask.h
>> index 4127896ffcdf..08772ba845b0 100644
>> --- a/drivers/gpu/drm/amd/include/asic_reg/gc/gc_10_1_0_sh_mask.h
>> +++ b/drivers/gpu/drm/amd/include/asic_reg/gc/gc_10_1_0_sh_mask.h
>> @@ -19646,6 +19646,75 @@
>>   //SPI_WCL_PIPE_PERCENT_CS7
>>   #define SPI_WCL_PIPE_PERCENT_CS7__VALUE__SHIFT 0x0
>>   #define SPI_WCL_PIPE_PERCENT_CS7__VALUE_MASK 0x7FL
>> +//SPI_GDBG_WAVE_CNTL
>> +#define SPI_GDBG_WAVE_CNTL__STALL_RA__SHIFT 0x0
>> +#define SPI_GDBG_WAVE_CNTL__STALL_VMID__SHIFT 0x1
>> +#define SPI_GDBG_WAVE_CNTL__STALL_RA_MASK 0x00000001L
>> +#define SPI_GDBG_WAVE_CNTL__STALL_VMID_MASK 0x0001FFFEL
>> +//SPI_GDBG_TRAP_CONFIG
>> +#define SPI_GDBG_TRAP_CONFIG__ME_SEL__SHIFT 0x0
>> +#define SPI_GDBG_TRAP_CONFIG__PIPE_SEL__SHIFT 0x2
>> +#define SPI_GDBG_TRAP_CONFIG__QUEUE_SEL__SHIFT 0x4
>> +#define SPI_GDBG_TRAP_CONFIG__ME_MATCH__SHIFT 0x7
>> +#define SPI_GDBG_TRAP_CONFIG__PIPE_MATCH__SHIFT 0x8
>> +#define SPI_GDBG_TRAP_CONFIG__QUEUE_MATCH__SHIFT 0x9
>> +#define SPI_GDBG_TRAP_CONFIG__TRAP_EN__SHIFT 0xf
>> +#define SPI_GDBG_TRAP_CONFIG__VMID_SEL__SHIFT 0x10
>> +#define SPI_GDBG_TRAP_CONFIG__ME_SEL_MASK 0x00000003L
>> +#define SPI_GDBG_TRAP_CONFIG__PIPE_SEL_MASK 0x0000000CL
>> +#define SPI_GDBG_TRAP_CONFIG__QUEUE_SEL_MASK 0x00000070L
>> +#define SPI_GDBG_TRAP_CONFIG__ME_MATCH_MASK 0x00000080L
>> +#define SPI_GDBG_TRAP_CONFIG__PIPE_MATCH_MASK 0x00000100L
>> +#define SPI_GDBG_TRAP_CONFIG__QUEUE_MATCH_MASK 0x00000200L
>> +#define SPI_GDBG_TRAP_CONFIG__TRAP_EN_MASK 0x00008000L
>> +#define SPI_GDBG_TRAP_CONFIG__VMID_SEL_MASK 0xFFFF0000L
>> +//SPI_GDBG_TRAP_MASK
>> +#define SPI_GDBG_TRAP_MASK__EXCP_EN__SHIFT 0x0
>> +#define SPI_GDBG_TRAP_MASK__REPLACE__SHIFT 0x9
>> +#define SPI_GDBG_TRAP_MASK__EXCP_EN_MASK 0x01FFL
>> +#define SPI_GDBG_TRAP_MASK__REPLACE_MASK 0x0200L
>> +//SPI_GDBG_WAVE_CNTL2
>> +#define SPI_GDBG_WAVE_CNTL2__VMID_MASK__SHIFT 0x0
>> +#define SPI_GDBG_WAVE_CNTL2__MODE__SHIFT 0x10
>> +#define SPI_GDBG_WAVE_CNTL2__VMID_MASK_MASK 0x0000FFFFL
>> +#define SPI_GDBG_WAVE_CNTL2__MODE_MASK 0x00030000L
>> +//SPI_GDBG_WAVE_CNTL3
>> +#define SPI_GDBG_WAVE_CNTL3__STALL_PS__SHIFT 0x0
>> +#define SPI_GDBG_WAVE_CNTL3__STALL_VS__SHIFT 0x1
>> +#define SPI_GDBG_WAVE_CNTL3__STALL_GS__SHIFT 0x2
>> +#define SPI_GDBG_WAVE_CNTL3__STALL_HS__SHIFT 0x3
>> +#define SPI_GDBG_WAVE_CNTL3__STALL_CSG__SHIFT 0x4
>> +#define SPI_GDBG_WAVE_CNTL3__STALL_CS0__SHIFT 0x5
>> +#define SPI_GDBG_WAVE_CNTL3__STALL_CS1__SHIFT 0x6
>> +#define SPI_GDBG_WAVE_CNTL3__STALL_CS2__SHIFT 0x7
>> +#define SPI_GDBG_WAVE_CNTL3__STALL_CS3__SHIFT 0x8
>> +#define SPI_GDBG_WAVE_CNTL3__STALL_CS4__SHIFT 0x9
>> +#define SPI_GDBG_WAVE_CNTL3__STALL_CS5__SHIFT 0xa
>> +#define SPI_GDBG_WAVE_CNTL3__STALL_CS6__SHIFT 0xb
>> +#define SPI_GDBG_WAVE_CNTL3__STALL_CS7__SHIFT 0xc
>> +#define SPI_GDBG_WAVE_CNTL3__STALL_DURATION__SHIFT 0xd
>> +#define SPI_GDBG_WAVE_CNTL3__STALL_MULT__SHIFT 0x1c
>> +#define SPI_GDBG_WAVE_CNTL3__STALL_PS_MASK 0x00000001L
>> +#define SPI_GDBG_WAVE_CNTL3__STALL_VS_MASK 0x00000002L
>> +#define SPI_GDBG_WAVE_CNTL3__STALL_GS_MASK 0x00000004L
>> +#define SPI_GDBG_WAVE_CNTL3__STALL_HS_MASK 0x00000008L
>> +#define SPI_GDBG_WAVE_CNTL3__STALL_CSG_MASK 0x00000010L
>> +#define SPI_GDBG_WAVE_CNTL3__STALL_CS0_MASK 0x00000020L
>> +#define SPI_GDBG_WAVE_CNTL3__STALL_CS1_MASK 0x00000040L
>> +#define SPI_GDBG_WAVE_CNTL3__STALL_CS2_MASK 0x00000080L
>> +#define SPI_GDBG_WAVE_CNTL3__STALL_CS3_MASK 0x00000100L
>> +#define SPI_GDBG_WAVE_CNTL3__STALL_CS4_MASK 0x00000200L
>> +#define SPI_GDBG_WAVE_CNTL3__STALL_CS5_MASK 0x00000400L
>> +#define SPI_GDBG_WAVE_CNTL3__STALL_CS6_MASK 0x00000800L
>> +#define SPI_GDBG_WAVE_CNTL3__STALL_CS7_MASK 0x00001000L
>> +#define SPI_GDBG_WAVE_CNTL3__STALL_DURATION_MASK 0x0FFFE000L
>> +#define SPI_GDBG_WAVE_CNTL3__STALL_MULT_MASK 0x10000000L
>> +//SPI_GDBG_TRAP_DATA0
>> +#define SPI_GDBG_TRAP_DATA0__DATA__SHIFT 0x0
>> +#define SPI_GDBG_TRAP_DATA0__DATA_MASK 0xFFFFFFFFL
>> +//SPI_GDBG_TRAP_DATA1
>> +#define SPI_GDBG_TRAP_DATA1__DATA__SHIFT 0x0
>> +#define SPI_GDBG_TRAP_DATA1__DATA_MASK 0xFFFFFFFFL
>>   //SPI_COMPUTE_QUEUE_RESET
>>   #define SPI_COMPUTE_QUEUE_RESET__RESET__SHIFT 0x0
>>   #define SPI_COMPUTE_QUEUE_RESET__RESET_MASK 0x01L
>> diff --git 
>> a/drivers/gpu/drm/amd/include/asic_reg/gc/gc_10_3_0_offset.h 
>> b/drivers/gpu/drm/amd/include/asic_reg/gc/gc_10_3_0_offset.h
>> index 3973110f149c..d09f1a06f4bf 100644
>> --- a/drivers/gpu/drm/amd/include/asic_reg/gc/gc_10_3_0_offset.h
>> +++ b/drivers/gpu/drm/amd/include/asic_reg/gc/gc_10_3_0_offset.h
>> @@ -26,6 +26,8 @@
>>   #define mmSQ_DEBUG_STS_GLOBAL_BASE_IDX 0
>>   #define mmSQ_DEBUG_STS_GLOBAL2 0x10B0
>>   #define mmSQ_DEBUG_STS_GLOBAL2_BASE_IDX 0
>> +#define mmSQ_DEBUG 0x10B1
>> +#define mmSQ_DEBUG_BASE_IDX 0
>>     // addressBlock: gc_sdma0_sdma0dec
>>   // base address: 0x4980
>> @@ -4849,10 +4851,18 @@
>>   #define mmSPI_WCL_PIPE_PERCENT_CS3_BASE_IDX 0
>>   #define mmSPI_GDBG_WAVE_CNTL 0x1f71
>>   #define mmSPI_GDBG_WAVE_CNTL_BASE_IDX 0
>> +#define mmSPI_GDBG_TRAP_CONFIG 0x1f72
>> +#define mmSPI_GDBG_TRAP_CONFIG_BASE_IDX 0
>>   #define mmSPI_GDBG_TRAP_MASK 0x1f73
>>   #define mmSPI_GDBG_TRAP_MASK_BASE_IDX 0
>>   #define mmSPI_GDBG_WAVE_CNTL2 0x1f74
>>   #define mmSPI_GDBG_WAVE_CNTL2_BASE_IDX 0
>> +#define mmSPI_GDBG_WAVE_CNTL3 0x1f75
>> +#define mmSPI_GDBG_WAVE_CNTL3_BASE_IDX 0
>> +#define mmSPI_GDBG_TRAP_DATA0 0x1f78
>> +#define mmSPI_GDBG_TRAP_DATA0_BASE_IDX 0
>> +#define mmSPI_GDBG_TRAP_DATA1 0x1f79
>> +#define mmSPI_GDBG_TRAP_DATA1_BASE_IDX 0
>>   #define mmSPI_COMPUTE_QUEUE_RESET 0x1f7b
>>   #define mmSPI_COMPUTE_QUEUE_RESET_BASE_IDX 0
>>   #define mmSPI_RESOURCE_RESERVE_CU_0 0x1f7c
>> diff --git 
>> a/drivers/gpu/drm/amd/include/asic_reg/gc/gc_10_3_0_sh_mask.h 
>> b/drivers/gpu/drm/amd/include/asic_reg/gc/gc_10_3_0_sh_mask.h
>> index d4e8ff22ecb8..fc85aee010fe 100644
>> --- a/drivers/gpu/drm/amd/include/asic_reg/gc/gc_10_3_0_sh_mask.h
>> +++ b/drivers/gpu/drm/amd/include/asic_reg/gc/gc_10_3_0_sh_mask.h
>> @@ -47853,6 +47853,10 @@
>>       // addressBlock: sqind
>> +//SQ_DEBUG
>> +#define SQ_DEBUG__SINGLE_MEMOP_MASK 0x00000001L
>> +#define SQ_DEBUG__SINGLE_MEMOP__SHIFT 0x00000000
>> +
>>   //SQ_DEBUG_STS_GLOBAL
>>   #define SQ_DEBUG_STS_GLOBAL2__FIFO_LEVEL_GFX0_MASK 0x000000ffL
>>   #define SQ_DEBUG_STS_GLOBAL2__FIFO_LEVEL_GFX0__SHIFT 0x00000000


More information about the amd-gfx mailing list