[PATCH 06/29] drm/amdgpu: add gfx9 hw debug mode enable and disable calls

Felix Kuehling felix.kuehling at amd.com
Tue Nov 22 23:50:49 UTC 2022


On 2022-10-31 12:23, Jonathan Kim wrote:
> Implement the per-device calls to enable or disable HW debug mode for
> GFX9 prior to GFX9.4.1.
>
> GFX9.4.1 and onward will require their own enable/disable sequence as
> follow on patches.
>
> When hardware debug mode setting is requested, waves will inherit
> these settings in the Shader Processor Input's (SPI) Sequencer Global
> Block (SQG). This means that the KGD must drain all waves from the SPI
> into SQG (approximately 96 SPI clock cycles) prior to debug mode setting
> to ensure that the order of operations that the debugger expects with
> regards to debug mode setting transaction requests and wave inheritence
> of that mode is upheld.
>
> Also ensure that exception overrides are reset to their original state
> prior to debug enable or disable.
>
> Signed-off-by: Jonathan Kim <jonathan.kim at amd.com>
> ---
>   .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c | 100 ++++++++++++++++--
>   .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h |   9 ++
>   drivers/gpu/drm/amd/amdkfd/kfd_debug.h        |   3 +
>   3 files changed, 102 insertions(+), 10 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
> index 81e3b528bbc9..e1aac6f6d369 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
> @@ -46,14 +46,14 @@ enum hqd_dequeue_request_type {
>   	SAVE_WAVES
>   };
>   
> -static void lock_srbm(struct amdgpu_device *adev, uint32_t mec, uint32_t pipe,
> +static void kgd_gfx_v9_lock_srbm(struct amdgpu_device *adev, uint32_t mec, uint32_t pipe,
>   			uint32_t queue, uint32_t vmid)

What's the reason for renaming these functions? It seems unnecessary and 
unrelated to the rest of the patch.


>   {
>   	mutex_lock(&adev->srbm_mutex);
>   	soc15_grbm_select(adev, mec, pipe, queue, vmid);
>   }
>   
> -static void unlock_srbm(struct amdgpu_device *adev)
> +static void kgd_gfx_v9_unlock_srbm(struct amdgpu_device *adev)
>   {
>   	soc15_grbm_select(adev, 0, 0, 0, 0);
>   	mutex_unlock(&adev->srbm_mutex);
> @@ -65,7 +65,7 @@ static void acquire_queue(struct amdgpu_device *adev, uint32_t pipe_id,
>   	uint32_t mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
>   	uint32_t pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);
>   
> -	lock_srbm(adev, mec, pipe, queue_id, 0);
> +	kgd_gfx_v9_lock_srbm(adev, mec, pipe, queue_id, 0);
>   }
>   
>   static uint64_t get_queue_mask(struct amdgpu_device *adev,
> @@ -79,7 +79,7 @@ static uint64_t get_queue_mask(struct amdgpu_device *adev,
>   
>   static void release_queue(struct amdgpu_device *adev)
>   {
> -	unlock_srbm(adev);
> +	kgd_gfx_v9_unlock_srbm(adev);
>   }
>   
>   void kgd_gfx_v9_program_sh_mem_settings(struct amdgpu_device *adev, uint32_t vmid,
> @@ -88,13 +88,13 @@ void kgd_gfx_v9_program_sh_mem_settings(struct amdgpu_device *adev, uint32_t vmi
>   					uint32_t sh_mem_ape1_limit,
>   					uint32_t sh_mem_bases)
>   {
> -	lock_srbm(adev, 0, 0, 0, vmid);
> +	kgd_gfx_v9_lock_srbm(adev, 0, 0, 0, vmid);
>   
>   	WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmSH_MEM_CONFIG), sh_mem_config);
>   	WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmSH_MEM_BASES), sh_mem_bases);
>   	/* APE1 no longer exists on GFX9 */
>   
> -	unlock_srbm(adev);
> +	kgd_gfx_v9_unlock_srbm(adev);
>   }
>   
>   int kgd_gfx_v9_set_pasid_vmid_mapping(struct amdgpu_device *adev, u32 pasid,
> @@ -164,13 +164,13 @@ int kgd_gfx_v9_init_interrupts(struct amdgpu_device *adev, uint32_t pipe_id)
>   	mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
>   	pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);
>   
> -	lock_srbm(adev, mec, pipe, 0, 0);
> +	kgd_gfx_v9_lock_srbm(adev, mec, pipe, 0, 0);
>   
>   	WREG32_SOC15(GC, 0, mmCPC_INT_CNTL,
>   		CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK |
>   		CP_INT_CNTL_RING0__OPCODE_ERROR_INT_ENABLE_MASK);
>   
> -	unlock_srbm(adev);
> +	kgd_gfx_v9_unlock_srbm(adev);
>   
>   	return 0;
>   }
> @@ -646,6 +646,84 @@ int kgd_gfx_v9_wave_control_execute(struct amdgpu_device *adev,
>   	return 0;
>   }
>   
> +/*
> + * GFX9 helper for wave launch stall requirements on debug trap setting.
> + *
> + * vmid:
> + *   Target VMID to stall/unstall.
> + *
> + * stall:
> + *   0-unstall wave launch (enable), 1-stall wave launch (disable).
> + *   After wavefront launch has been stalled, allocated waves must drain from
> + *   SPI in order for debug trap settings to take effect on those waves.
> + *   This is roughly a ~96 clock cycle wait on SPI where a read on
> + *   SPI_GDBG_WAVE_CNTL translates to ~32 clock cycles.
> + *   KGD_GFX_V9_WAVE_LAUNCH_SPI_DRAIN_LATENCY indicates the number of reads required.
> + *
> + *   NOTE: We can afford to clear the entire STALL_VMID field on unstall
> + *   because GFX9.4.1 cannot support multi-process debugging due to trap
> + *   configuration and masking being limited to global scope.  Always assume
> + *   single process conditions.
> +
> + */
> +#define KGD_GFX_V9_WAVE_LAUNCH_SPI_DRAIN_LATENCY	3
> +void kgd_gfx_v9_set_wave_launch_stall(struct amdgpu_device *adev,
> +					uint32_t vmid,
> +					bool stall)
> +{
> +	int i;
> +	uint32_t data = RREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_WAVE_CNTL));
> +
> +	if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 1))
> +		data = REG_SET_FIELD(data, SPI_GDBG_WAVE_CNTL, STALL_VMID,
> +							stall ? 1 << vmid : 0);
> +	else
> +		data = REG_SET_FIELD(data, SPI_GDBG_WAVE_CNTL, STALL_RA,
> +							stall ? 1 : 0);
> +
> +	WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_WAVE_CNTL), data);
> +
> +	if (!stall)
> +		return;
> +
> +	for (i = 0; i < KGD_GFX_V9_WAVE_LAUNCH_SPI_DRAIN_LATENCY; i++)
> +		RREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_WAVE_CNTL));
> +}
> +
> +uint32_t kgd_gfx_v9_enable_debug_trap(struct amdgpu_device *adev,
> +				bool restore_dbg_registers,
> +				uint32_t vmid)
> +{
> +	mutex_lock(&adev->grbm_idx_mutex);
> +
> +	kgd_gfx_v9_set_wave_launch_stall(adev, vmid, true);
> +
> +	WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_MASK), 0);
> +
> +	kgd_gfx_v9_set_wave_launch_stall(adev, vmid, false);
> +
> +	mutex_unlock(&adev->grbm_idx_mutex);
> +
> +	return 0;
> +}
> +
> +uint32_t kgd_gfx_v9_disable_debug_trap(struct amdgpu_device *adev,
> +					bool keep_trap_enabled,
> +					uint32_t vmid)
> +{
> +	mutex_lock(&adev->grbm_idx_mutex);
> +
> +	kgd_gfx_v9_set_wave_launch_stall(adev, vmid, true);
> +
> +	WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_MASK), 0);
> +
> +	kgd_gfx_v9_set_wave_launch_stall(adev, vmid, false);
> +
> +	mutex_unlock(&adev->grbm_idx_mutex);
> +
> +	return 0;
> +}

The enable and disable functions do exactly the same thing. And they 
ignore the restore_dbg_registers and keep_trap_enabled arguments. Maybe 
add a comment why that is.

Regards,
   Felix


> +
>   void kgd_gfx_v9_set_vm_context_page_table_base(struct amdgpu_device *adev,
>   			uint32_t vmid, uint64_t page_table_base)
>   {
> @@ -833,7 +911,7 @@ void kgd_gfx_v9_get_cu_occupancy(struct amdgpu_device *adev, int pasid,
>   void kgd_gfx_v9_program_trap_handler_settings(struct amdgpu_device *adev,
>                           uint32_t vmid, uint64_t tba_addr, uint64_t tma_addr)
>   {
> -	lock_srbm(adev, 0, 0, 0, vmid);
> +	kgd_gfx_v9_lock_srbm(adev, 0, 0, 0, vmid);
>   
>   	/*
>   	 * Program TBA registers
> @@ -851,7 +929,7 @@ void kgd_gfx_v9_program_trap_handler_settings(struct amdgpu_device *adev,
>   	WREG32_SOC15(GC, 0, mmSQ_SHADER_TMA_HI,
>   			upper_32_bits(tma_addr >> 8));
>   
> -	unlock_srbm(adev);
> +	kgd_gfx_v9_unlock_srbm(adev);
>   }
>   
>   const struct kfd2kgd_calls gfx_v9_kfd2kgd = {
> @@ -871,6 +949,8 @@ const struct kfd2kgd_calls gfx_v9_kfd2kgd = {
>   	.get_atc_vmid_pasid_mapping_info =
>   			kgd_gfx_v9_get_atc_vmid_pasid_mapping_info,
>   	.set_vm_context_page_table_base = kgd_gfx_v9_set_vm_context_page_table_base,
> +	.enable_debug_trap = kgd_gfx_v9_enable_debug_trap,
> +	.disable_debug_trap = kgd_gfx_v9_disable_debug_trap,
>   	.get_cu_occupancy = kgd_gfx_v9_get_cu_occupancy,
>   	.program_trap_handler_settings = kgd_gfx_v9_program_trap_handler_settings,
>   };
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h
> index c7ed3bc9053c..d39256162616 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h
> @@ -58,3 +58,12 @@ void kgd_gfx_v9_get_cu_occupancy(struct amdgpu_device *adev, int pasid,
>   		int *pasid_wave_cnt, int *max_waves_per_cu);
>   void kgd_gfx_v9_program_trap_handler_settings(struct amdgpu_device *adev,
>   		uint32_t vmid, uint64_t tba_addr, uint64_t tma_addr);
> +void kgd_gfx_v9_set_wave_launch_stall(struct amdgpu_device *adev,
> +					uint32_t vmid,
> +					bool stall);
> +uint32_t kgd_gfx_v9_enable_debug_trap(struct amdgpu_device *adev,
> +				      bool restore_dbg_registers,
> +				      uint32_t vmid);
> +uint32_t kgd_gfx_v9_disable_debug_trap(struct amdgpu_device *adev,
> +					bool keep_trap_enabled,
> +					uint32_t vmid);
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_debug.h b/drivers/gpu/drm/amd/amdkfd/kfd_debug.h
> index b2217eb1399c..8aa7a3ad4e97 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_debug.h
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_debug.h
> @@ -25,6 +25,9 @@
>   
>   #include "kfd_priv.h"
>   
> +void kgd_gfx_v9_set_wave_launch_stall(struct amdgpu_device *adev,
> +					uint32_t vmid,
> +					bool stall);
>   int kfd_dbg_trap_disable(struct kfd_process *target);
>   int kfd_dbg_trap_enable(struct kfd_process *target, uint32_t fd,
>   			void __user *runtime_info,


More information about the amd-gfx mailing list