[PATCH 05/29] drm/amdgpu: setup hw debug registers on driver initialization
Felix Kuehling
felix.kuehling at amd.com
Thu Dec 1 00:18:20 UTC 2022
On 2022-11-22 18:38, Felix Kuehling wrote:
>
> On 2022-10-31 12:23, Jonathan Kim wrote:
>> Add missing debug trap registers references and initialize all debug
>> registers on boot by clearing the hardware exception overrides and the
>> wave allocation ID index.
>>
>> For debug devices that only support single process debugging, enable
>> trap temporary setup by default.
>>
>> Debug devices that support multi-process debugging require trap
>> temporary setup to be disabled by default in order to satisfy microbench
>> performance when in non-debug mode.
>
> Where is this done? I don't think it's in the MQD setup because that
> happens unconditionally on all GPUs.
If I understand it correctly, it's done by actually enabling the debug
trap in patch 9 (for Aldebaran). For single-process debug devices, the
debug trap is always on, as per this patch.
Maybe just add a reference to the Aldebaran patch to make it clearer.
Regards,
Felix
>
>
>>
>> The debugger requires that TTMPs 6 & 7 save the dispatch ID to map
>> waves onto dispatch during compute context inspection.
>> In order to correctly this up, set the special reserved CP bit by
>> default
>> whenever the MQD is initailized.
>
> There is a word missing here. "In order to correctly _set_ this up ..."?
>
> This patch covers GFXv9 and 10. Will GFXv11 be handled separately?
>
> Regards,
> Felix
>
>
>>
>> Signed-off-by: Jonathan Kim <jonathan.kim at amd.com>
>> ---
>> drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 26 +++++++
>> drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 30 ++++++++
>> .../gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c | 5 ++
>> .../gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c | 5 ++
>> .../include/asic_reg/gc/gc_10_1_0_offset.h | 14 ++++
>> .../include/asic_reg/gc/gc_10_1_0_sh_mask.h | 69 +++++++++++++++++++
>> .../include/asic_reg/gc/gc_10_3_0_offset.h | 10 +++
>> .../include/asic_reg/gc/gc_10_3_0_sh_mask.h | 4 ++
>> 8 files changed, 163 insertions(+)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
>> b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
>> index af94ac580d3e..d49aff0b4ba3 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
>> @@ -4904,6 +4904,29 @@ static u32
>> gfx_v10_0_init_pa_sc_tile_steering_override(struct amdgpu_device *ade
>> #define DEFAULT_SH_MEM_BASES (0x6000)
>> +static void gfx_v10_0_debug_trap_config_init(struct amdgpu_device
>> *adev,
>> + uint32_t first_vmid,
>> + uint32_t last_vmid)
>> +{
>> + uint32_t data;
>> + uint32_t trap_config_vmid_mask = 0;
>> + int i;
>> +
>> + /* Calculate trap config vmid mask */
>> + for (i = first_vmid; i < last_vmid; i++)
>> + trap_config_vmid_mask |= (1 << i);
>> +
>> + data = REG_SET_FIELD(0, SPI_GDBG_TRAP_CONFIG,
>> + VMID_SEL, trap_config_vmid_mask);
>> + data = REG_SET_FIELD(data, SPI_GDBG_TRAP_CONFIG,
>> + TRAP_EN, 1);
>> + WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_CONFIG), data);
>> + WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_MASK), 0);
>> +
>> + WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_DATA0), 0);
>> + WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_DATA1), 0);
>> +}
>> +
>> static void gfx_v10_0_init_compute_vmid(struct amdgpu_device *adev)
>> {
>> int i;
>> @@ -4935,6 +4958,9 @@ static void gfx_v10_0_init_compute_vmid(struct
>> amdgpu_device *adev)
>> WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, i, 0);
>> WREG32_SOC15_OFFSET(GC, 0, mmGDS_OA_VMID0, i, 0);
>> }
>> +
>> + gfx_v10_0_debug_trap_config_init(adev,
>> adev->vm_manager.first_kfd_vmid,
>> + AMDGPU_NUM_VMID);
>> }
>> static void gfx_v10_0_init_gds_vmid(struct amdgpu_device *adev)
>> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
>> b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
>> index 0320be4a5fc6..a0e5ad342f13 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
>> @@ -2337,6 +2337,29 @@ static void gfx_v9_0_setup_rb(struct
>> amdgpu_device *adev)
>> adev->gfx.config.num_rbs = hweight32(active_rbs);
>> }
>> +static void gfx_v9_0_debug_trap_config_init(struct amdgpu_device
>> *adev,
>> + uint32_t first_vmid,
>> + uint32_t last_vmid)
>> +{
>> + uint32_t data;
>> + uint32_t trap_config_vmid_mask = 0;
>> + int i;
>> +
>> + /* Calculate trap config vmid mask */
>> + for (i = first_vmid; i < last_vmid; i++)
>> + trap_config_vmid_mask |= (1 << i);
>> +
>> + data = REG_SET_FIELD(0, SPI_GDBG_TRAP_CONFIG,
>> + VMID_SEL, trap_config_vmid_mask);
>> + data = REG_SET_FIELD(data, SPI_GDBG_TRAP_CONFIG,
>> + TRAP_EN, 1);
>> + WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_CONFIG), data);
>> + WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_MASK), 0);
>> +
>> + WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_DATA0), 0);
>> + WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_DATA1), 0);
>> +}
>> +
>> #define DEFAULT_SH_MEM_BASES (0x6000)
>> static void gfx_v9_0_init_compute_vmid(struct amdgpu_device *adev)
>> {
>> @@ -4609,6 +4632,13 @@ static int gfx_v9_0_late_init(void *handle)
>> if (r)
>> return r;
>> + if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 2))
>> + gfx_v9_4_2_debug_trap_config_init(adev,
>> + adev->vm_manager.first_kfd_vmid, AMDGPU_NUM_VMID);
>> + else
>> + gfx_v9_0_debug_trap_config_init(adev,
>> + adev->vm_manager.first_kfd_vmid, AMDGPU_NUM_VMID);
>> +
>> return 0;
>> }
>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c
>> b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c
>> index d3e2b6a599a4..cb484ace17de 100644
>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c
>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c
>> @@ -117,6 +117,11 @@ static void init_mqd(struct mqd_manager *mm,
>> void **mqd,
>> 1 << CP_HQD_QUANTUM__QUANTUM_SCALE__SHIFT |
>> 1 << CP_HQD_QUANTUM__QUANTUM_DURATION__SHIFT;
>> + /* Set cp_hqd_hq_scheduler0 bit 14 to 1 to have the CP set up the
>> + * DISPATCH_PTR. This is required for the kfd debugger
>> + */
>> + m->cp_hqd_hq_scheduler0 = 1 << 14;
>> +
>> if (q->format == KFD_QUEUE_FORMAT_AQL) {
>> m->cp_hqd_aql_control =
>> 1 << CP_HQD_AQL_CONTROL__CONTROL0__SHIFT;
>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
>> b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
>> index 0778e587a2d6..86f1cf090246 100644
>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
>> @@ -164,6 +164,11 @@ static void init_mqd(struct mqd_manager *mm,
>> void **mqd,
>> 1 << CP_HQD_QUANTUM__QUANTUM_SCALE__SHIFT |
>> 1 << CP_HQD_QUANTUM__QUANTUM_DURATION__SHIFT;
>> + /* Set cp_hqd_hq_scheduler0 bit 14 to 1 to have the CP set up the
>> + * DISPATCH_PTR. This is required for the kfd debugger
>> + */
>> + m->cp_hqd_hq_status0 = 1 << 14;
>> +
>> if (q->format == KFD_QUEUE_FORMAT_AQL) {
>> m->cp_hqd_aql_control =
>> 1 << CP_HQD_AQL_CONTROL__CONTROL0__SHIFT;
>> diff --git
>> a/drivers/gpu/drm/amd/include/asic_reg/gc/gc_10_1_0_offset.h
>> b/drivers/gpu/drm/amd/include/asic_reg/gc/gc_10_1_0_offset.h
>> index 18d34bbceebe..7d384f86bd67 100644
>> --- a/drivers/gpu/drm/amd/include/asic_reg/gc/gc_10_1_0_offset.h
>> +++ b/drivers/gpu/drm/amd/include/asic_reg/gc/gc_10_1_0_offset.h
>> @@ -5190,6 +5190,20 @@
>> #define mmSPI_WCL_PIPE_PERCENT_CS6_BASE_IDX 0
>> #define mmSPI_WCL_PIPE_PERCENT_CS7 0x1f70
>> #define mmSPI_WCL_PIPE_PERCENT_CS7_BASE_IDX 0
>> +#define mmSPI_GDBG_WAVE_CNTL 0x1f71
>> +#define mmSPI_GDBG_WAVE_CNTL_BASE_IDX 0
>> +#define mmSPI_GDBG_TRAP_CONFIG 0x1f72
>> +#define mmSPI_GDBG_TRAP_CONFIG_BASE_IDX 0
>> +#define mmSPI_GDBG_TRAP_MASK 0x1f73
>> +#define mmSPI_GDBG_TRAP_MASK_BASE_IDX 0
>> +#define mmSPI_GDBG_WAVE_CNTL2 0x1f74
>> +#define mmSPI_GDBG_WAVE_CNTL2_BASE_IDX 0
>> +#define mmSPI_GDBG_WAVE_CNTL3 0x1f75
>> +#define mmSPI_GDBG_WAVE_CNTL3_BASE_IDX 0
>> +#define mmSPI_GDBG_TRAP_DATA0 0x1f78
>> +#define mmSPI_GDBG_TRAP_DATA0_BASE_IDX 0
>> +#define mmSPI_GDBG_TRAP_DATA1 0x1f79
>> +#define mmSPI_GDBG_TRAP_DATA1_BASE_IDX 0
>> #define mmSPI_COMPUTE_QUEUE_RESET 0x1f7b
>> #define mmSPI_COMPUTE_QUEUE_RESET_BASE_IDX 0
>> #define mmSPI_RESOURCE_RESERVE_CU_0 0x1f7c
>> diff --git
>> a/drivers/gpu/drm/amd/include/asic_reg/gc/gc_10_1_0_sh_mask.h
>> b/drivers/gpu/drm/amd/include/asic_reg/gc/gc_10_1_0_sh_mask.h
>> index 4127896ffcdf..08772ba845b0 100644
>> --- a/drivers/gpu/drm/amd/include/asic_reg/gc/gc_10_1_0_sh_mask.h
>> +++ b/drivers/gpu/drm/amd/include/asic_reg/gc/gc_10_1_0_sh_mask.h
>> @@ -19646,6 +19646,75 @@
>> //SPI_WCL_PIPE_PERCENT_CS7
>> #define SPI_WCL_PIPE_PERCENT_CS7__VALUE__SHIFT 0x0
>> #define SPI_WCL_PIPE_PERCENT_CS7__VALUE_MASK 0x7FL
>> +//SPI_GDBG_WAVE_CNTL
>> +#define SPI_GDBG_WAVE_CNTL__STALL_RA__SHIFT 0x0
>> +#define SPI_GDBG_WAVE_CNTL__STALL_VMID__SHIFT 0x1
>> +#define SPI_GDBG_WAVE_CNTL__STALL_RA_MASK 0x00000001L
>> +#define SPI_GDBG_WAVE_CNTL__STALL_VMID_MASK 0x0001FFFEL
>> +//SPI_GDBG_TRAP_CONFIG
>> +#define SPI_GDBG_TRAP_CONFIG__ME_SEL__SHIFT 0x0
>> +#define SPI_GDBG_TRAP_CONFIG__PIPE_SEL__SHIFT 0x2
>> +#define SPI_GDBG_TRAP_CONFIG__QUEUE_SEL__SHIFT 0x4
>> +#define SPI_GDBG_TRAP_CONFIG__ME_MATCH__SHIFT 0x7
>> +#define SPI_GDBG_TRAP_CONFIG__PIPE_MATCH__SHIFT 0x8
>> +#define SPI_GDBG_TRAP_CONFIG__QUEUE_MATCH__SHIFT 0x9
>> +#define SPI_GDBG_TRAP_CONFIG__TRAP_EN__SHIFT 0xf
>> +#define SPI_GDBG_TRAP_CONFIG__VMID_SEL__SHIFT 0x10
>> +#define SPI_GDBG_TRAP_CONFIG__ME_SEL_MASK 0x00000003L
>> +#define SPI_GDBG_TRAP_CONFIG__PIPE_SEL_MASK 0x0000000CL
>> +#define SPI_GDBG_TRAP_CONFIG__QUEUE_SEL_MASK 0x00000070L
>> +#define SPI_GDBG_TRAP_CONFIG__ME_MATCH_MASK 0x00000080L
>> +#define SPI_GDBG_TRAP_CONFIG__PIPE_MATCH_MASK 0x00000100L
>> +#define SPI_GDBG_TRAP_CONFIG__QUEUE_MATCH_MASK 0x00000200L
>> +#define SPI_GDBG_TRAP_CONFIG__TRAP_EN_MASK 0x00008000L
>> +#define SPI_GDBG_TRAP_CONFIG__VMID_SEL_MASK 0xFFFF0000L
>> +//SPI_GDBG_TRAP_MASK
>> +#define SPI_GDBG_TRAP_MASK__EXCP_EN__SHIFT 0x0
>> +#define SPI_GDBG_TRAP_MASK__REPLACE__SHIFT 0x9
>> +#define SPI_GDBG_TRAP_MASK__EXCP_EN_MASK 0x01FFL
>> +#define SPI_GDBG_TRAP_MASK__REPLACE_MASK 0x0200L
>> +//SPI_GDBG_WAVE_CNTL2
>> +#define SPI_GDBG_WAVE_CNTL2__VMID_MASK__SHIFT 0x0
>> +#define SPI_GDBG_WAVE_CNTL2__MODE__SHIFT 0x10
>> +#define SPI_GDBG_WAVE_CNTL2__VMID_MASK_MASK 0x0000FFFFL
>> +#define SPI_GDBG_WAVE_CNTL2__MODE_MASK 0x00030000L
>> +//SPI_GDBG_WAVE_CNTL3
>> +#define SPI_GDBG_WAVE_CNTL3__STALL_PS__SHIFT 0x0
>> +#define SPI_GDBG_WAVE_CNTL3__STALL_VS__SHIFT 0x1
>> +#define SPI_GDBG_WAVE_CNTL3__STALL_GS__SHIFT 0x2
>> +#define SPI_GDBG_WAVE_CNTL3__STALL_HS__SHIFT 0x3
>> +#define SPI_GDBG_WAVE_CNTL3__STALL_CSG__SHIFT 0x4
>> +#define SPI_GDBG_WAVE_CNTL3__STALL_CS0__SHIFT 0x5
>> +#define SPI_GDBG_WAVE_CNTL3__STALL_CS1__SHIFT 0x6
>> +#define SPI_GDBG_WAVE_CNTL3__STALL_CS2__SHIFT 0x7
>> +#define SPI_GDBG_WAVE_CNTL3__STALL_CS3__SHIFT 0x8
>> +#define SPI_GDBG_WAVE_CNTL3__STALL_CS4__SHIFT 0x9
>> +#define SPI_GDBG_WAVE_CNTL3__STALL_CS5__SHIFT 0xa
>> +#define SPI_GDBG_WAVE_CNTL3__STALL_CS6__SHIFT 0xb
>> +#define SPI_GDBG_WAVE_CNTL3__STALL_CS7__SHIFT 0xc
>> +#define SPI_GDBG_WAVE_CNTL3__STALL_DURATION__SHIFT 0xd
>> +#define SPI_GDBG_WAVE_CNTL3__STALL_MULT__SHIFT 0x1c
>> +#define SPI_GDBG_WAVE_CNTL3__STALL_PS_MASK 0x00000001L
>> +#define SPI_GDBG_WAVE_CNTL3__STALL_VS_MASK 0x00000002L
>> +#define SPI_GDBG_WAVE_CNTL3__STALL_GS_MASK 0x00000004L
>> +#define SPI_GDBG_WAVE_CNTL3__STALL_HS_MASK 0x00000008L
>> +#define SPI_GDBG_WAVE_CNTL3__STALL_CSG_MASK 0x00000010L
>> +#define SPI_GDBG_WAVE_CNTL3__STALL_CS0_MASK 0x00000020L
>> +#define SPI_GDBG_WAVE_CNTL3__STALL_CS1_MASK 0x00000040L
>> +#define SPI_GDBG_WAVE_CNTL3__STALL_CS2_MASK 0x00000080L
>> +#define SPI_GDBG_WAVE_CNTL3__STALL_CS3_MASK 0x00000100L
>> +#define SPI_GDBG_WAVE_CNTL3__STALL_CS4_MASK 0x00000200L
>> +#define SPI_GDBG_WAVE_CNTL3__STALL_CS5_MASK 0x00000400L
>> +#define SPI_GDBG_WAVE_CNTL3__STALL_CS6_MASK 0x00000800L
>> +#define SPI_GDBG_WAVE_CNTL3__STALL_CS7_MASK 0x00001000L
>> +#define SPI_GDBG_WAVE_CNTL3__STALL_DURATION_MASK 0x0FFFE000L
>> +#define SPI_GDBG_WAVE_CNTL3__STALL_MULT_MASK 0x10000000L
>> +//SPI_GDBG_TRAP_DATA0
>> +#define SPI_GDBG_TRAP_DATA0__DATA__SHIFT 0x0
>> +#define SPI_GDBG_TRAP_DATA0__DATA_MASK 0xFFFFFFFFL
>> +//SPI_GDBG_TRAP_DATA1
>> +#define SPI_GDBG_TRAP_DATA1__DATA__SHIFT 0x0
>> +#define SPI_GDBG_TRAP_DATA1__DATA_MASK 0xFFFFFFFFL
>> //SPI_COMPUTE_QUEUE_RESET
>> #define SPI_COMPUTE_QUEUE_RESET__RESET__SHIFT 0x0
>> #define SPI_COMPUTE_QUEUE_RESET__RESET_MASK 0x01L
>> diff --git
>> a/drivers/gpu/drm/amd/include/asic_reg/gc/gc_10_3_0_offset.h
>> b/drivers/gpu/drm/amd/include/asic_reg/gc/gc_10_3_0_offset.h
>> index 3973110f149c..d09f1a06f4bf 100644
>> --- a/drivers/gpu/drm/amd/include/asic_reg/gc/gc_10_3_0_offset.h
>> +++ b/drivers/gpu/drm/amd/include/asic_reg/gc/gc_10_3_0_offset.h
>> @@ -26,6 +26,8 @@
>> #define mmSQ_DEBUG_STS_GLOBAL_BASE_IDX 0
>> #define mmSQ_DEBUG_STS_GLOBAL2 0x10B0
>> #define mmSQ_DEBUG_STS_GLOBAL2_BASE_IDX 0
>> +#define mmSQ_DEBUG 0x10B1
>> +#define mmSQ_DEBUG_BASE_IDX 0
>> // addressBlock: gc_sdma0_sdma0dec
>> // base address: 0x4980
>> @@ -4849,10 +4851,18 @@
>> #define mmSPI_WCL_PIPE_PERCENT_CS3_BASE_IDX 0
>> #define mmSPI_GDBG_WAVE_CNTL 0x1f71
>> #define mmSPI_GDBG_WAVE_CNTL_BASE_IDX 0
>> +#define mmSPI_GDBG_TRAP_CONFIG 0x1f72
>> +#define mmSPI_GDBG_TRAP_CONFIG_BASE_IDX 0
>> #define mmSPI_GDBG_TRAP_MASK 0x1f73
>> #define mmSPI_GDBG_TRAP_MASK_BASE_IDX 0
>> #define mmSPI_GDBG_WAVE_CNTL2 0x1f74
>> #define mmSPI_GDBG_WAVE_CNTL2_BASE_IDX 0
>> +#define mmSPI_GDBG_WAVE_CNTL3 0x1f75
>> +#define mmSPI_GDBG_WAVE_CNTL3_BASE_IDX 0
>> +#define mmSPI_GDBG_TRAP_DATA0 0x1f78
>> +#define mmSPI_GDBG_TRAP_DATA0_BASE_IDX 0
>> +#define mmSPI_GDBG_TRAP_DATA1 0x1f79
>> +#define mmSPI_GDBG_TRAP_DATA1_BASE_IDX 0
>> #define mmSPI_COMPUTE_QUEUE_RESET 0x1f7b
>> #define mmSPI_COMPUTE_QUEUE_RESET_BASE_IDX 0
>> #define mmSPI_RESOURCE_RESERVE_CU_0 0x1f7c
>> diff --git
>> a/drivers/gpu/drm/amd/include/asic_reg/gc/gc_10_3_0_sh_mask.h
>> b/drivers/gpu/drm/amd/include/asic_reg/gc/gc_10_3_0_sh_mask.h
>> index d4e8ff22ecb8..fc85aee010fe 100644
>> --- a/drivers/gpu/drm/amd/include/asic_reg/gc/gc_10_3_0_sh_mask.h
>> +++ b/drivers/gpu/drm/amd/include/asic_reg/gc/gc_10_3_0_sh_mask.h
>> @@ -47853,6 +47853,10 @@
>> // addressBlock: sqind
>> +//SQ_DEBUG
>> +#define SQ_DEBUG__SINGLE_MEMOP_MASK 0x00000001L
>> +#define SQ_DEBUG__SINGLE_MEMOP__SHIFT 0x00000000
>> +
>> //SQ_DEBUG_STS_GLOBAL
>> #define SQ_DEBUG_STS_GLOBAL2__FIFO_LEVEL_GFX0_MASK 0x000000ffL
>> #define SQ_DEBUG_STS_GLOBAL2__FIFO_LEVEL_GFX0__SHIFT 0x00000000
More information about the amd-gfx
mailing list