[PATCH 25/33] drm/amdkfd: add debug wave launch mode operation
Felix Kuehling
felix.kuehling at amd.com
Tue May 30 20:22:30 UTC 2023
Am 2023-05-25 um 13:27 schrieb Jonathan Kim:
> Allow the debugger to set wave behaviour on to either normally operate,
> halt at launch, trap on every instruction, terminate immediately or
> stall on allocation.
>
> v2: fixup with new kfd_node struct reference for mes check
>
> Signed-off-by: Jonathan Kim <jonathan.kim at amd.com>
Reviewed-by: Felix Kuehling <Felix.Kuehling at amd.com>
> ---
> .../drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c | 12 +++++++
> .../drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c | 1 +
> .../drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c | 25 +++++++++++++
> .../drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.h | 3 ++
> .../drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c | 3 +-
> .../drm/amd/amdgpu/amdgpu_amdkfd_gfx_v11.c | 14 +++++++-
> .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c | 25 +++++++++++++
> .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h | 3 ++
> drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 3 ++
> drivers/gpu/drm/amd/amdkfd/kfd_debug.c | 36 ++++++++++++++++++-
> drivers/gpu/drm/amd/amdkfd/kfd_debug.h | 2 ++
> 11 files changed, 124 insertions(+), 3 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c
> index d7881bbd828d..774ecfc3451a 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c
> @@ -107,6 +107,17 @@ static uint32_t kgd_aldebaran_set_wave_launch_trap_override(struct amdgpu_device
> return data;
> }
>
> +static uint32_t kgd_aldebaran_set_wave_launch_mode(struct amdgpu_device *adev,
> + uint8_t wave_launch_mode,
> + uint32_t vmid)
> +{
> + uint32_t data = 0;
> +
> + data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, LAUNCH_MODE, wave_launch_mode);
> +
> + return data;
> +}
> +
> const struct kfd2kgd_calls aldebaran_kfd2kgd = {
> .program_sh_mem_settings = kgd_gfx_v9_program_sh_mem_settings,
> .set_pasid_vmid_mapping = kgd_gfx_v9_set_pasid_vmid_mapping,
> @@ -129,6 +140,7 @@ const struct kfd2kgd_calls aldebaran_kfd2kgd = {
> .disable_debug_trap = kgd_aldebaran_disable_debug_trap,
> .validate_trap_override_request = kgd_aldebaran_validate_trap_override_request,
> .set_wave_launch_trap_override = kgd_aldebaran_set_wave_launch_trap_override,
> + .set_wave_launch_mode = kgd_aldebaran_set_wave_launch_mode,
> .get_iq_wait_times = kgd_gfx_v9_get_iq_wait_times,
> .build_grace_period_packet_info = kgd_gfx_v9_build_grace_period_packet_info,
> .program_trap_handler_settings = kgd_gfx_v9_program_trap_handler_settings,
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c
> index ec2587664001..fbdc1b7b1e42 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c
> @@ -412,6 +412,7 @@ const struct kfd2kgd_calls arcturus_kfd2kgd = {
> .disable_debug_trap = kgd_arcturus_disable_debug_trap,
> .validate_trap_override_request = kgd_gfx_v9_validate_trap_override_request,
> .set_wave_launch_trap_override = kgd_gfx_v9_set_wave_launch_trap_override,
> + .set_wave_launch_mode = kgd_gfx_v9_set_wave_launch_mode,
> .get_iq_wait_times = kgd_gfx_v9_get_iq_wait_times,
> .build_grace_period_packet_info = kgd_gfx_v9_build_grace_period_packet_info,
> .get_cu_occupancy = kgd_gfx_v9_get_cu_occupancy,
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
> index 7ea0362dcab3..a7a6edda557f 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
> @@ -856,6 +856,30 @@ uint32_t kgd_gfx_v10_set_wave_launch_trap_override(struct amdgpu_device *adev,
> return 0;
> }
>
> +uint32_t kgd_gfx_v10_set_wave_launch_mode(struct amdgpu_device *adev,
> + uint8_t wave_launch_mode,
> + uint32_t vmid)
> +{
> + uint32_t data = 0;
> + bool is_mode_set = !!wave_launch_mode;
> +
> + mutex_lock(&adev->grbm_idx_mutex);
> +
> + kgd_gfx_v10_set_wave_launch_stall(adev, vmid, true);
> +
> + data = REG_SET_FIELD(data, SPI_GDBG_WAVE_CNTL2,
> + VMID_MASK, is_mode_set ? 1 << vmid : 0);
> + data = REG_SET_FIELD(data, SPI_GDBG_WAVE_CNTL2,
> + MODE, is_mode_set ? wave_launch_mode : 0);
> + WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_WAVE_CNTL2), data);
> +
> + kgd_gfx_v10_set_wave_launch_stall(adev, vmid, false);
> +
> + mutex_unlock(&adev->grbm_idx_mutex);
> +
> + return 0;
> +}
> +
> /* kgd_gfx_v10_get_iq_wait_times: Returns the mmCP_IQ_WAIT_TIME1/2 values
> * The values read are:
> * ib_offload_wait_time -- Wait Count for Indirect Buffer Offloads.
> @@ -944,6 +968,7 @@ const struct kfd2kgd_calls gfx_v10_kfd2kgd = {
> .disable_debug_trap = kgd_gfx_v10_disable_debug_trap,
> .validate_trap_override_request = kgd_gfx_v10_validate_trap_override_request,
> .set_wave_launch_trap_override = kgd_gfx_v10_set_wave_launch_trap_override,
> + .set_wave_launch_mode = kgd_gfx_v10_set_wave_launch_mode,
> .get_iq_wait_times = kgd_gfx_v10_get_iq_wait_times,
> .build_grace_period_packet_info = kgd_gfx_v10_build_grace_period_packet_info,
> .program_trap_handler_settings = program_trap_handler_settings,
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.h
> index 57339fa12807..3a6aca2b0eaa 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.h
> @@ -36,6 +36,9 @@ uint32_t kgd_gfx_v10_set_wave_launch_trap_override(struct amdgpu_device *adev,
> uint32_t trap_mask_request,
> uint32_t *trap_mask_prev,
> uint32_t kfd_dbg_trap_cntl_prev);
> +uint32_t kgd_gfx_v10_set_wave_launch_mode(struct amdgpu_device *adev,
> + uint8_t wave_launch_mode,
> + uint32_t vmid);
> void kgd_gfx_v10_get_iq_wait_times(struct amdgpu_device *adev, uint32_t *wait_times);
> void kgd_gfx_v10_build_grace_period_packet_info(struct amdgpu_device *adev,
> uint32_t wait_times,
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c
> index 7120927fed15..ed36b433a48b 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c
> @@ -677,6 +677,7 @@ const struct kfd2kgd_calls gfx_v10_3_kfd2kgd = {
> .enable_debug_trap = kgd_gfx_v10_enable_debug_trap,
> .disable_debug_trap = kgd_gfx_v10_disable_debug_trap,
> .validate_trap_override_request = kgd_gfx_v10_validate_trap_override_request,
> - .set_wave_launch_trap_override = kgd_gfx_v10_set_wave_launch_trap_override
> + .set_wave_launch_trap_override = kgd_gfx_v10_set_wave_launch_trap_override,
> + .set_wave_launch_mode = kgd_gfx_v10_set_wave_launch_mode
>
> };
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v11.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v11.c
> index ae0c4707919f..9711d5128d09 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v11.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v11.c
> @@ -726,6 +726,17 @@ static uint32_t kgd_gfx_v11_set_wave_launch_trap_override(struct amdgpu_device *
> return data;
> }
>
> +static uint32_t kgd_gfx_v11_set_wave_launch_mode(struct amdgpu_device *adev,
> + uint8_t wave_launch_mode,
> + uint32_t vmid)
> +{
> + uint32_t data = 0;
> +
> + data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, LAUNCH_MODE, wave_launch_mode);
> +
> + return data;
> +}
> +
> const struct kfd2kgd_calls gfx_v11_kfd2kgd = {
> .program_sh_mem_settings = program_sh_mem_settings_v11,
> .set_pasid_vmid_mapping = set_pasid_vmid_mapping_v11,
> @@ -745,5 +756,6 @@ const struct kfd2kgd_calls gfx_v11_kfd2kgd = {
> .enable_debug_trap = kgd_gfx_v11_enable_debug_trap,
> .disable_debug_trap = kgd_gfx_v11_disable_debug_trap,
> .validate_trap_override_request = kgd_gfx_v11_validate_trap_override_request,
> - .set_wave_launch_trap_override = kgd_gfx_v11_set_wave_launch_trap_override
> + .set_wave_launch_trap_override = kgd_gfx_v11_set_wave_launch_trap_override,
> + .set_wave_launch_mode = kgd_gfx_v11_set_wave_launch_mode
> };
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
> index 705669c26a1a..060331652573 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
> @@ -792,6 +792,30 @@ uint32_t kgd_gfx_v9_set_wave_launch_trap_override(struct amdgpu_device *adev,
> return 0;
> }
>
> +uint32_t kgd_gfx_v9_set_wave_launch_mode(struct amdgpu_device *adev,
> + uint8_t wave_launch_mode,
> + uint32_t vmid)
> +{
> + uint32_t data = 0;
> + bool is_mode_set = !!wave_launch_mode;
> +
> + mutex_lock(&adev->grbm_idx_mutex);
> +
> + kgd_gfx_v9_set_wave_launch_stall(adev, vmid, true);
> +
> + data = REG_SET_FIELD(data, SPI_GDBG_WAVE_CNTL2,
> + VMID_MASK, is_mode_set ? 1 << vmid : 0);
> + data = REG_SET_FIELD(data, SPI_GDBG_WAVE_CNTL2,
> + MODE, is_mode_set ? wave_launch_mode : 0);
> + WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_WAVE_CNTL2), data);
> +
> + kgd_gfx_v9_set_wave_launch_stall(adev, vmid, false);
> +
> + mutex_unlock(&adev->grbm_idx_mutex);
> +
> + return 0;
> +}
> +
> /* kgd_gfx_v9_get_iq_wait_times: Returns the mmCP_IQ_WAIT_TIME1/2 values
> * The values read are:
> * ib_offload_wait_time -- Wait Count for Indirect Buffer Offloads.
> @@ -1063,6 +1087,7 @@ const struct kfd2kgd_calls gfx_v9_kfd2kgd = {
> .disable_debug_trap = kgd_gfx_v9_disable_debug_trap,
> .validate_trap_override_request = kgd_gfx_v9_validate_trap_override_request,
> .set_wave_launch_trap_override = kgd_gfx_v9_set_wave_launch_trap_override,
> + .set_wave_launch_mode = kgd_gfx_v9_set_wave_launch_mode,
> .get_iq_wait_times = kgd_gfx_v9_get_iq_wait_times,
> .build_grace_period_packet_info = kgd_gfx_v9_build_grace_period_packet_info,
> .get_cu_occupancy = kgd_gfx_v9_get_cu_occupancy,
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h
> index 76812ddd35b1..18f4970ac8e4 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h
> @@ -74,6 +74,9 @@ uint32_t kgd_gfx_v9_disable_debug_trap(struct amdgpu_device *adev,
> int kgd_gfx_v9_validate_trap_override_request(struct amdgpu_device *adev,
> uint32_t trap_override,
> uint32_t *trap_mask_supported);
> +uint32_t kgd_gfx_v9_set_wave_launch_mode(struct amdgpu_device *adev,
> + uint8_t wave_launch_mode,
> + uint32_t vmid);
> uint32_t kgd_gfx_v9_set_wave_launch_trap_override(struct amdgpu_device *adev,
> uint32_t vmid,
> uint32_t trap_override,
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> index e78103097162..4b45d4539d48 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> @@ -2992,6 +2992,9 @@ static int kfd_ioctl_set_debug_trap(struct file *filep, struct kfd_process *p, v
> &args->launch_override.support_request_mask);
> break;
> case KFD_IOC_DBG_TRAP_SET_WAVE_LAUNCH_MODE:
> + r = kfd_dbg_trap_set_wave_launch_mode(target,
> + args->launch_mode.launch_mode);
> + break;
> case KFD_IOC_DBG_TRAP_SUSPEND_QUEUES:
> case KFD_IOC_DBG_TRAP_RESUME_QUEUES:
> case KFD_IOC_DBG_TRAP_SET_NODE_ADDRESS_WATCH:
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_debug.c b/drivers/gpu/drm/amd/amdkfd/kfd_debug.c
> index 733390fb2459..53c3418562d4 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_debug.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_debug.c
> @@ -301,8 +301,10 @@ void kfd_dbg_trap_deactivate(struct kfd_process *target, bool unwind, int unwind
> {
> int i;
>
> - if (!unwind)
> + if (!unwind) {
> cancel_work_sync(&target->debug_event_workarea);
> + kfd_dbg_trap_set_wave_launch_mode(target, 0);
> + }
>
> for (i = 0; i < target->n_pdds; i++) {
> struct kfd_process_device *pdd = target->pdds[i];
> @@ -591,6 +593,38 @@ int kfd_dbg_trap_set_wave_launch_override(struct kfd_process *target,
> return r;
> }
>
> +int kfd_dbg_trap_set_wave_launch_mode(struct kfd_process *target,
> + uint8_t wave_launch_mode)
> +{
> + int r = 0, i;
> +
> + if (wave_launch_mode != KFD_DBG_TRAP_WAVE_LAUNCH_MODE_NORMAL &&
> + wave_launch_mode != KFD_DBG_TRAP_WAVE_LAUNCH_MODE_HALT &&
> + wave_launch_mode != KFD_DBG_TRAP_WAVE_LAUNCH_MODE_DEBUG)
> + return -EINVAL;
> +
> + for (i = 0; i < target->n_pdds; i++) {
> + struct kfd_process_device *pdd = target->pdds[i];
> +
> + amdgpu_gfx_off_ctrl(pdd->dev->adev, false);
> + pdd->spi_dbg_launch_mode = pdd->dev->kfd2kgd->set_wave_launch_mode(
> + pdd->dev->adev,
> + wave_launch_mode,
> + pdd->dev->vm_info.last_vmid_kfd);
> + amdgpu_gfx_off_ctrl(pdd->dev->adev, true);
> +
> + if (!pdd->dev->kfd->shared_resources.enable_mes)
> + r = debug_refresh_runlist(pdd->dev->dqm);
> + else
> + r = kfd_dbg_set_mes_debug_mode(pdd);
> +
> + if (r)
> + break;
> + }
> +
> + return r;
> +}
> +
> void kfd_dbg_set_enabled_debug_exception_mask(struct kfd_process *target,
> uint64_t exception_set_mask)
> {
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_debug.h b/drivers/gpu/drm/amd/amdkfd/kfd_debug.h
> index c9245221aa76..cb17869437c5 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_debug.h
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_debug.h
> @@ -48,6 +48,8 @@ int kfd_dbg_trap_set_wave_launch_override(struct kfd_process *target,
> uint32_t trap_mask_request,
> uint32_t *trap_mask_prev,
> uint32_t *trap_mask_supported);
> +int kfd_dbg_trap_set_wave_launch_mode(struct kfd_process *target,
> + uint8_t wave_launch_mode);
>
> int kfd_dbg_send_exception_to_runtime(struct kfd_process *p,
> unsigned int dev_id,
More information about the dri-devel
mailing list