[PATCH 05/29] drm/amdgpu: setup hw debug registers on driver initialization
Felix Kuehling
felix.kuehling at amd.com
Tue Nov 22 23:38:34 UTC 2022
On 2022-10-31 12:23, Jonathan Kim wrote:
> Add missing debug trap registers references and initialize all debug
> registers on boot by clearing the hardware exception overrides and the
> wave allocation ID index.
>
> For debug devices that only support single process debugging, enable
> trap temporary setup by default.
>
> Debug devices that support multi-process debugging require trap
> temporary setup to be disabled by default in order to satisfy microbench
> performance when in non-debug mode.
Where is this done? I don't think it's in the MQD setup because that
happens unconditionally on all GPUs.
>
> The debugger requires that TTMPs 6 & 7 save the dispatch ID to map
> waves onto dispatch during compute context inspection.
> In order to correctly this up, set the special reserved CP bit by default
> whenever the MQD is initailized.
There is a word missing here. "In order to correctly _set_ this up ..."?
This patch covers GFXv9 and 10. Will GFXv11 be handled separately?
Regards,
Felix
>
> Signed-off-by: Jonathan Kim <jonathan.kim at amd.com>
> ---
> drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 26 +++++++
> drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 30 ++++++++
> .../gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c | 5 ++
> .../gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c | 5 ++
> .../include/asic_reg/gc/gc_10_1_0_offset.h | 14 ++++
> .../include/asic_reg/gc/gc_10_1_0_sh_mask.h | 69 +++++++++++++++++++
> .../include/asic_reg/gc/gc_10_3_0_offset.h | 10 +++
> .../include/asic_reg/gc/gc_10_3_0_sh_mask.h | 4 ++
> 8 files changed, 163 insertions(+)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> index af94ac580d3e..d49aff0b4ba3 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> @@ -4904,6 +4904,29 @@ static u32 gfx_v10_0_init_pa_sc_tile_steering_override(struct amdgpu_device *ade
>
> #define DEFAULT_SH_MEM_BASES (0x6000)
>
> +static void gfx_v10_0_debug_trap_config_init(struct amdgpu_device *adev,
> + uint32_t first_vmid,
> + uint32_t last_vmid)
> +{
> + uint32_t data;
> + uint32_t trap_config_vmid_mask = 0;
> + int i;
> +
> + /* Calculate trap config vmid mask */
> + for (i = first_vmid; i < last_vmid; i++)
> + trap_config_vmid_mask |= (1 << i);
> +
> + data = REG_SET_FIELD(0, SPI_GDBG_TRAP_CONFIG,
> + VMID_SEL, trap_config_vmid_mask);
> + data = REG_SET_FIELD(data, SPI_GDBG_TRAP_CONFIG,
> + TRAP_EN, 1);
> + WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_CONFIG), data);
> + WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_MASK), 0);
> +
> + WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_DATA0), 0);
> + WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_DATA1), 0);
> +}
> +
> static void gfx_v10_0_init_compute_vmid(struct amdgpu_device *adev)
> {
> int i;
> @@ -4935,6 +4958,9 @@ static void gfx_v10_0_init_compute_vmid(struct amdgpu_device *adev)
> WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, i, 0);
> WREG32_SOC15_OFFSET(GC, 0, mmGDS_OA_VMID0, i, 0);
> }
> +
> + gfx_v10_0_debug_trap_config_init(adev, adev->vm_manager.first_kfd_vmid,
> + AMDGPU_NUM_VMID);
> }
>
> static void gfx_v10_0_init_gds_vmid(struct amdgpu_device *adev)
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> index 0320be4a5fc6..a0e5ad342f13 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> @@ -2337,6 +2337,29 @@ static void gfx_v9_0_setup_rb(struct amdgpu_device *adev)
> adev->gfx.config.num_rbs = hweight32(active_rbs);
> }
>
> +static void gfx_v9_0_debug_trap_config_init(struct amdgpu_device *adev,
> + uint32_t first_vmid,
> + uint32_t last_vmid)
> +{
> + uint32_t data;
> + uint32_t trap_config_vmid_mask = 0;
> + int i;
> +
> + /* Calculate trap config vmid mask */
> + for (i = first_vmid; i < last_vmid; i++)
> + trap_config_vmid_mask |= (1 << i);
> +
> + data = REG_SET_FIELD(0, SPI_GDBG_TRAP_CONFIG,
> + VMID_SEL, trap_config_vmid_mask);
> + data = REG_SET_FIELD(data, SPI_GDBG_TRAP_CONFIG,
> + TRAP_EN, 1);
> + WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_CONFIG), data);
> + WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_MASK), 0);
> +
> + WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_DATA0), 0);
> + WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_DATA1), 0);
> +}
> +
> #define DEFAULT_SH_MEM_BASES (0x6000)
> static void gfx_v9_0_init_compute_vmid(struct amdgpu_device *adev)
> {
> @@ -4609,6 +4632,13 @@ static int gfx_v9_0_late_init(void *handle)
> if (r)
> return r;
>
> + if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 2))
> + gfx_v9_4_2_debug_trap_config_init(adev,
> + adev->vm_manager.first_kfd_vmid, AMDGPU_NUM_VMID);
> + else
> + gfx_v9_0_debug_trap_config_init(adev,
> + adev->vm_manager.first_kfd_vmid, AMDGPU_NUM_VMID);
> +
> return 0;
> }
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c
> index d3e2b6a599a4..cb484ace17de 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c
> @@ -117,6 +117,11 @@ static void init_mqd(struct mqd_manager *mm, void **mqd,
> 1 << CP_HQD_QUANTUM__QUANTUM_SCALE__SHIFT |
> 1 << CP_HQD_QUANTUM__QUANTUM_DURATION__SHIFT;
>
> + /* Set cp_hqd_hq_scheduler0 bit 14 to 1 to have the CP set up the
> + * DISPATCH_PTR. This is required for the kfd debugger
> + */
> + m->cp_hqd_hq_scheduler0 = 1 << 14;
> +
> if (q->format == KFD_QUEUE_FORMAT_AQL) {
> m->cp_hqd_aql_control =
> 1 << CP_HQD_AQL_CONTROL__CONTROL0__SHIFT;
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
> index 0778e587a2d6..86f1cf090246 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
> @@ -164,6 +164,11 @@ static void init_mqd(struct mqd_manager *mm, void **mqd,
> 1 << CP_HQD_QUANTUM__QUANTUM_SCALE__SHIFT |
> 1 << CP_HQD_QUANTUM__QUANTUM_DURATION__SHIFT;
>
> + /* Set cp_hqd_hq_scheduler0 bit 14 to 1 to have the CP set up the
> + * DISPATCH_PTR. This is required for the kfd debugger
> + */
> + m->cp_hqd_hq_status0 = 1 << 14;
> +
> if (q->format == KFD_QUEUE_FORMAT_AQL) {
> m->cp_hqd_aql_control =
> 1 << CP_HQD_AQL_CONTROL__CONTROL0__SHIFT;
> diff --git a/drivers/gpu/drm/amd/include/asic_reg/gc/gc_10_1_0_offset.h b/drivers/gpu/drm/amd/include/asic_reg/gc/gc_10_1_0_offset.h
> index 18d34bbceebe..7d384f86bd67 100644
> --- a/drivers/gpu/drm/amd/include/asic_reg/gc/gc_10_1_0_offset.h
> +++ b/drivers/gpu/drm/amd/include/asic_reg/gc/gc_10_1_0_offset.h
> @@ -5190,6 +5190,20 @@
> #define mmSPI_WCL_PIPE_PERCENT_CS6_BASE_IDX 0
> #define mmSPI_WCL_PIPE_PERCENT_CS7 0x1f70
> #define mmSPI_WCL_PIPE_PERCENT_CS7_BASE_IDX 0
> +#define mmSPI_GDBG_WAVE_CNTL 0x1f71
> +#define mmSPI_GDBG_WAVE_CNTL_BASE_IDX 0
> +#define mmSPI_GDBG_TRAP_CONFIG 0x1f72
> +#define mmSPI_GDBG_TRAP_CONFIG_BASE_IDX 0
> +#define mmSPI_GDBG_TRAP_MASK 0x1f73
> +#define mmSPI_GDBG_TRAP_MASK_BASE_IDX 0
> +#define mmSPI_GDBG_WAVE_CNTL2 0x1f74
> +#define mmSPI_GDBG_WAVE_CNTL2_BASE_IDX 0
> +#define mmSPI_GDBG_WAVE_CNTL3 0x1f75
> +#define mmSPI_GDBG_WAVE_CNTL3_BASE_IDX 0
> +#define mmSPI_GDBG_TRAP_DATA0 0x1f78
> +#define mmSPI_GDBG_TRAP_DATA0_BASE_IDX 0
> +#define mmSPI_GDBG_TRAP_DATA1 0x1f79
> +#define mmSPI_GDBG_TRAP_DATA1_BASE_IDX 0
> #define mmSPI_COMPUTE_QUEUE_RESET 0x1f7b
> #define mmSPI_COMPUTE_QUEUE_RESET_BASE_IDX 0
> #define mmSPI_RESOURCE_RESERVE_CU_0 0x1f7c
> diff --git a/drivers/gpu/drm/amd/include/asic_reg/gc/gc_10_1_0_sh_mask.h b/drivers/gpu/drm/amd/include/asic_reg/gc/gc_10_1_0_sh_mask.h
> index 4127896ffcdf..08772ba845b0 100644
> --- a/drivers/gpu/drm/amd/include/asic_reg/gc/gc_10_1_0_sh_mask.h
> +++ b/drivers/gpu/drm/amd/include/asic_reg/gc/gc_10_1_0_sh_mask.h
> @@ -19646,6 +19646,75 @@
> //SPI_WCL_PIPE_PERCENT_CS7
> #define SPI_WCL_PIPE_PERCENT_CS7__VALUE__SHIFT 0x0
> #define SPI_WCL_PIPE_PERCENT_CS7__VALUE_MASK 0x7FL
> +//SPI_GDBG_WAVE_CNTL
> +#define SPI_GDBG_WAVE_CNTL__STALL_RA__SHIFT 0x0
> +#define SPI_GDBG_WAVE_CNTL__STALL_VMID__SHIFT 0x1
> +#define SPI_GDBG_WAVE_CNTL__STALL_RA_MASK 0x00000001L
> +#define SPI_GDBG_WAVE_CNTL__STALL_VMID_MASK 0x0001FFFEL
> +//SPI_GDBG_TRAP_CONFIG
> +#define SPI_GDBG_TRAP_CONFIG__ME_SEL__SHIFT 0x0
> +#define SPI_GDBG_TRAP_CONFIG__PIPE_SEL__SHIFT 0x2
> +#define SPI_GDBG_TRAP_CONFIG__QUEUE_SEL__SHIFT 0x4
> +#define SPI_GDBG_TRAP_CONFIG__ME_MATCH__SHIFT 0x7
> +#define SPI_GDBG_TRAP_CONFIG__PIPE_MATCH__SHIFT 0x8
> +#define SPI_GDBG_TRAP_CONFIG__QUEUE_MATCH__SHIFT 0x9
> +#define SPI_GDBG_TRAP_CONFIG__TRAP_EN__SHIFT 0xf
> +#define SPI_GDBG_TRAP_CONFIG__VMID_SEL__SHIFT 0x10
> +#define SPI_GDBG_TRAP_CONFIG__ME_SEL_MASK 0x00000003L
> +#define SPI_GDBG_TRAP_CONFIG__PIPE_SEL_MASK 0x0000000CL
> +#define SPI_GDBG_TRAP_CONFIG__QUEUE_SEL_MASK 0x00000070L
> +#define SPI_GDBG_TRAP_CONFIG__ME_MATCH_MASK 0x00000080L
> +#define SPI_GDBG_TRAP_CONFIG__PIPE_MATCH_MASK 0x00000100L
> +#define SPI_GDBG_TRAP_CONFIG__QUEUE_MATCH_MASK 0x00000200L
> +#define SPI_GDBG_TRAP_CONFIG__TRAP_EN_MASK 0x00008000L
> +#define SPI_GDBG_TRAP_CONFIG__VMID_SEL_MASK 0xFFFF0000L
> +//SPI_GDBG_TRAP_MASK
> +#define SPI_GDBG_TRAP_MASK__EXCP_EN__SHIFT 0x0
> +#define SPI_GDBG_TRAP_MASK__REPLACE__SHIFT 0x9
> +#define SPI_GDBG_TRAP_MASK__EXCP_EN_MASK 0x01FFL
> +#define SPI_GDBG_TRAP_MASK__REPLACE_MASK 0x0200L
> +//SPI_GDBG_WAVE_CNTL2
> +#define SPI_GDBG_WAVE_CNTL2__VMID_MASK__SHIFT 0x0
> +#define SPI_GDBG_WAVE_CNTL2__MODE__SHIFT 0x10
> +#define SPI_GDBG_WAVE_CNTL2__VMID_MASK_MASK 0x0000FFFFL
> +#define SPI_GDBG_WAVE_CNTL2__MODE_MASK 0x00030000L
> +//SPI_GDBG_WAVE_CNTL3
> +#define SPI_GDBG_WAVE_CNTL3__STALL_PS__SHIFT 0x0
> +#define SPI_GDBG_WAVE_CNTL3__STALL_VS__SHIFT 0x1
> +#define SPI_GDBG_WAVE_CNTL3__STALL_GS__SHIFT 0x2
> +#define SPI_GDBG_WAVE_CNTL3__STALL_HS__SHIFT 0x3
> +#define SPI_GDBG_WAVE_CNTL3__STALL_CSG__SHIFT 0x4
> +#define SPI_GDBG_WAVE_CNTL3__STALL_CS0__SHIFT 0x5
> +#define SPI_GDBG_WAVE_CNTL3__STALL_CS1__SHIFT 0x6
> +#define SPI_GDBG_WAVE_CNTL3__STALL_CS2__SHIFT 0x7
> +#define SPI_GDBG_WAVE_CNTL3__STALL_CS3__SHIFT 0x8
> +#define SPI_GDBG_WAVE_CNTL3__STALL_CS4__SHIFT 0x9
> +#define SPI_GDBG_WAVE_CNTL3__STALL_CS5__SHIFT 0xa
> +#define SPI_GDBG_WAVE_CNTL3__STALL_CS6__SHIFT 0xb
> +#define SPI_GDBG_WAVE_CNTL3__STALL_CS7__SHIFT 0xc
> +#define SPI_GDBG_WAVE_CNTL3__STALL_DURATION__SHIFT 0xd
> +#define SPI_GDBG_WAVE_CNTL3__STALL_MULT__SHIFT 0x1c
> +#define SPI_GDBG_WAVE_CNTL3__STALL_PS_MASK 0x00000001L
> +#define SPI_GDBG_WAVE_CNTL3__STALL_VS_MASK 0x00000002L
> +#define SPI_GDBG_WAVE_CNTL3__STALL_GS_MASK 0x00000004L
> +#define SPI_GDBG_WAVE_CNTL3__STALL_HS_MASK 0x00000008L
> +#define SPI_GDBG_WAVE_CNTL3__STALL_CSG_MASK 0x00000010L
> +#define SPI_GDBG_WAVE_CNTL3__STALL_CS0_MASK 0x00000020L
> +#define SPI_GDBG_WAVE_CNTL3__STALL_CS1_MASK 0x00000040L
> +#define SPI_GDBG_WAVE_CNTL3__STALL_CS2_MASK 0x00000080L
> +#define SPI_GDBG_WAVE_CNTL3__STALL_CS3_MASK 0x00000100L
> +#define SPI_GDBG_WAVE_CNTL3__STALL_CS4_MASK 0x00000200L
> +#define SPI_GDBG_WAVE_CNTL3__STALL_CS5_MASK 0x00000400L
> +#define SPI_GDBG_WAVE_CNTL3__STALL_CS6_MASK 0x00000800L
> +#define SPI_GDBG_WAVE_CNTL3__STALL_CS7_MASK 0x00001000L
> +#define SPI_GDBG_WAVE_CNTL3__STALL_DURATION_MASK 0x0FFFE000L
> +#define SPI_GDBG_WAVE_CNTL3__STALL_MULT_MASK 0x10000000L
> +//SPI_GDBG_TRAP_DATA0
> +#define SPI_GDBG_TRAP_DATA0__DATA__SHIFT 0x0
> +#define SPI_GDBG_TRAP_DATA0__DATA_MASK 0xFFFFFFFFL
> +//SPI_GDBG_TRAP_DATA1
> +#define SPI_GDBG_TRAP_DATA1__DATA__SHIFT 0x0
> +#define SPI_GDBG_TRAP_DATA1__DATA_MASK 0xFFFFFFFFL
> //SPI_COMPUTE_QUEUE_RESET
> #define SPI_COMPUTE_QUEUE_RESET__RESET__SHIFT 0x0
> #define SPI_COMPUTE_QUEUE_RESET__RESET_MASK 0x01L
> diff --git a/drivers/gpu/drm/amd/include/asic_reg/gc/gc_10_3_0_offset.h b/drivers/gpu/drm/amd/include/asic_reg/gc/gc_10_3_0_offset.h
> index 3973110f149c..d09f1a06f4bf 100644
> --- a/drivers/gpu/drm/amd/include/asic_reg/gc/gc_10_3_0_offset.h
> +++ b/drivers/gpu/drm/amd/include/asic_reg/gc/gc_10_3_0_offset.h
> @@ -26,6 +26,8 @@
> #define mmSQ_DEBUG_STS_GLOBAL_BASE_IDX 0
> #define mmSQ_DEBUG_STS_GLOBAL2 0x10B0
> #define mmSQ_DEBUG_STS_GLOBAL2_BASE_IDX 0
> +#define mmSQ_DEBUG 0x10B1
> +#define mmSQ_DEBUG_BASE_IDX 0
>
> // addressBlock: gc_sdma0_sdma0dec
> // base address: 0x4980
> @@ -4849,10 +4851,18 @@
> #define mmSPI_WCL_PIPE_PERCENT_CS3_BASE_IDX 0
> #define mmSPI_GDBG_WAVE_CNTL 0x1f71
> #define mmSPI_GDBG_WAVE_CNTL_BASE_IDX 0
> +#define mmSPI_GDBG_TRAP_CONFIG 0x1f72
> +#define mmSPI_GDBG_TRAP_CONFIG_BASE_IDX 0
> #define mmSPI_GDBG_TRAP_MASK 0x1f73
> #define mmSPI_GDBG_TRAP_MASK_BASE_IDX 0
> #define mmSPI_GDBG_WAVE_CNTL2 0x1f74
> #define mmSPI_GDBG_WAVE_CNTL2_BASE_IDX 0
> +#define mmSPI_GDBG_WAVE_CNTL3 0x1f75
> +#define mmSPI_GDBG_WAVE_CNTL3_BASE_IDX 0
> +#define mmSPI_GDBG_TRAP_DATA0 0x1f78
> +#define mmSPI_GDBG_TRAP_DATA0_BASE_IDX 0
> +#define mmSPI_GDBG_TRAP_DATA1 0x1f79
> +#define mmSPI_GDBG_TRAP_DATA1_BASE_IDX 0
> #define mmSPI_COMPUTE_QUEUE_RESET 0x1f7b
> #define mmSPI_COMPUTE_QUEUE_RESET_BASE_IDX 0
> #define mmSPI_RESOURCE_RESERVE_CU_0 0x1f7c
> diff --git a/drivers/gpu/drm/amd/include/asic_reg/gc/gc_10_3_0_sh_mask.h b/drivers/gpu/drm/amd/include/asic_reg/gc/gc_10_3_0_sh_mask.h
> index d4e8ff22ecb8..fc85aee010fe 100644
> --- a/drivers/gpu/drm/amd/include/asic_reg/gc/gc_10_3_0_sh_mask.h
> +++ b/drivers/gpu/drm/amd/include/asic_reg/gc/gc_10_3_0_sh_mask.h
> @@ -47853,6 +47853,10 @@
>
>
> // addressBlock: sqind
> +//SQ_DEBUG
> +#define SQ_DEBUG__SINGLE_MEMOP_MASK 0x00000001L
> +#define SQ_DEBUG__SINGLE_MEMOP__SHIFT 0x00000000
> +
> //SQ_DEBUG_STS_GLOBAL
> #define SQ_DEBUG_STS_GLOBAL2__FIFO_LEVEL_GFX0_MASK 0x000000ffL
> #define SQ_DEBUG_STS_GLOBAL2__FIFO_LEVEL_GFX0__SHIFT 0x00000000
More information about the amd-gfx
mailing list