[PATCH 24/33] drm/amdkfd: add debug wave launch override operation
Felix Kuehling
felix.kuehling at amd.com
Tue May 30 20:21:11 UTC 2023
Am 2023-05-25 um 13:27 schrieb Jonathan Kim:
> This operation allows the debugger to override the enabled HW
> exceptions on the device.
>
> On debug devices that only support the debugging of a single process,
> the HW exceptions are global and set through the SPI_GDBG_TRAP_MASK
> register.
> Because they are global, only address watch exceptions are allowed to
> be enabled. In other words, the debugger must preserve all non-address
> watch exception states in normal mode operation by barring a full
> replacement override or a non-address watch override request.
>
> For multi-process debugging, all HW exception overrides are per-VMID so
> all exceptions can be overridden or fully replaced.
>
> In order for the debugger to know what is permissible, returned the
> supported override mask back to the debugger along with the previously
> enable overrides.
>
> v2: fixup with new kfd_node struct reference for mes check
>
> Signed-off-by: Jonathan Kim <jonathan.kim at amd.com>
Reviewed-by: Felix Kuehling <Felix.Kuehling at amd.com>
> ---
> .../drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c | 47 ++++++++++
> .../drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c | 2 +
> .../drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c | 55 ++++++++++++
> .../drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.h | 10 +++
> .../drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c | 5 +-
> .../drm/amd/amdgpu/amdgpu_amdkfd_gfx_v11.c | 87 ++++++++++++++++++-
> .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c | 55 ++++++++++++
> .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h | 10 +++
> drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 7 ++
> drivers/gpu/drm/amd/amdkfd/kfd_debug.c | 69 +++++++++++++++
> drivers/gpu/drm/amd/amdkfd/kfd_debug.h | 6 ++
> 11 files changed, 351 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c
> index b811a0985050..d7881bbd828d 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c
> @@ -25,6 +25,7 @@
> #include "amdgpu_amdkfd_gfx_v9.h"
> #include "gc/gc_9_4_2_offset.h"
> #include "gc/gc_9_4_2_sh_mask.h"
> +#include <uapi/linux/kfd_ioctl.h>
>
> /*
> * Returns TRAP_EN, EXCP_EN and EXCP_REPLACE.
> @@ -62,6 +63,50 @@ static uint32_t kgd_aldebaran_disable_debug_trap(struct amdgpu_device *adev,
> return data;
> }
>
> +static int kgd_aldebaran_validate_trap_override_request(struct amdgpu_device *adev,
> + uint32_t trap_override,
> + uint32_t *trap_mask_supported)
> +{
> + *trap_mask_supported &= KFD_DBG_TRAP_MASK_FP_INVALID |
> + KFD_DBG_TRAP_MASK_FP_INPUT_DENORMAL |
> + KFD_DBG_TRAP_MASK_FP_DIVIDE_BY_ZERO |
> + KFD_DBG_TRAP_MASK_FP_OVERFLOW |
> + KFD_DBG_TRAP_MASK_FP_UNDERFLOW |
> + KFD_DBG_TRAP_MASK_FP_INEXACT |
> + KFD_DBG_TRAP_MASK_INT_DIVIDE_BY_ZERO |
> + KFD_DBG_TRAP_MASK_DBG_ADDRESS_WATCH |
> + KFD_DBG_TRAP_MASK_DBG_MEMORY_VIOLATION;
> +
> + if (trap_override != KFD_DBG_TRAP_OVERRIDE_OR &&
> + trap_override != KFD_DBG_TRAP_OVERRIDE_REPLACE)
> + return -EPERM;
> +
> + return 0;
> +}
> +
> +/* returns TRAP_EN, EXCP_EN and EXCP_RPLACE. */
> +static uint32_t kgd_aldebaran_set_wave_launch_trap_override(struct amdgpu_device *adev,
> + uint32_t vmid,
> + uint32_t trap_override,
> + uint32_t trap_mask_bits,
> + uint32_t trap_mask_request,
> + uint32_t *trap_mask_prev,
> + uint32_t kfd_dbg_trap_cntl_prev)
> +
> +{
> + uint32_t data = 0;
> +
> + *trap_mask_prev = REG_GET_FIELD(kfd_dbg_trap_cntl_prev, SPI_GDBG_PER_VMID_CNTL, EXCP_EN);
> + trap_mask_bits = (trap_mask_bits & trap_mask_request) |
> + (*trap_mask_prev & ~trap_mask_request);
> +
> + data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, TRAP_EN, 1);
> + data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, EXCP_EN, trap_mask_bits);
> + data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, EXCP_REPLACE, trap_override);
> +
> + return data;
> +}
> +
> const struct kfd2kgd_calls aldebaran_kfd2kgd = {
> .program_sh_mem_settings = kgd_gfx_v9_program_sh_mem_settings,
> .set_pasid_vmid_mapping = kgd_gfx_v9_set_pasid_vmid_mapping,
> @@ -82,6 +127,8 @@ const struct kfd2kgd_calls aldebaran_kfd2kgd = {
> .get_cu_occupancy = kgd_gfx_v9_get_cu_occupancy,
> .enable_debug_trap = kgd_aldebaran_enable_debug_trap,
> .disable_debug_trap = kgd_aldebaran_disable_debug_trap,
> + .validate_trap_override_request = kgd_aldebaran_validate_trap_override_request,
> + .set_wave_launch_trap_override = kgd_aldebaran_set_wave_launch_trap_override,
> .get_iq_wait_times = kgd_gfx_v9_get_iq_wait_times,
> .build_grace_period_packet_info = kgd_gfx_v9_build_grace_period_packet_info,
> .program_trap_handler_settings = kgd_gfx_v9_program_trap_handler_settings,
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c
> index a62bd0068515..ec2587664001 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c
> @@ -410,6 +410,8 @@ const struct kfd2kgd_calls arcturus_kfd2kgd = {
> kgd_gfx_v9_set_vm_context_page_table_base,
> .enable_debug_trap = kgd_arcturus_enable_debug_trap,
> .disable_debug_trap = kgd_arcturus_disable_debug_trap,
> + .validate_trap_override_request = kgd_gfx_v9_validate_trap_override_request,
> + .set_wave_launch_trap_override = kgd_gfx_v9_set_wave_launch_trap_override,
> .get_iq_wait_times = kgd_gfx_v9_get_iq_wait_times,
> .build_grace_period_packet_info = kgd_gfx_v9_build_grace_period_packet_info,
> .get_cu_occupancy = kgd_gfx_v9_get_cu_occupancy,
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
> index 98006c7021dd..7ea0362dcab3 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
> @@ -32,6 +32,7 @@
> #include "v10_structs.h"
> #include "nv.h"
> #include "nvd.h"
> +#include <uapi/linux/kfd_ioctl.h>
>
> enum hqd_dequeue_request_type {
> NO_ACTION = 0,
> @@ -803,6 +804,58 @@ uint32_t kgd_gfx_v10_disable_debug_trap(struct amdgpu_device *adev,
> return 0;
> }
>
> +int kgd_gfx_v10_validate_trap_override_request(struct amdgpu_device *adev,
> + uint32_t trap_override,
> + uint32_t *trap_mask_supported)
> +{
> + *trap_mask_supported &= KFD_DBG_TRAP_MASK_DBG_ADDRESS_WATCH;
> +
> + /* The SPI_GDBG_TRAP_MASK register is global and affects all
> + * processes. Only allow OR-ing the address-watch bit, since
> + * this only affects processes under the debugger. Other bits
> + * should stay 0 to avoid the debugger interfering with other
> + * processes.
> + */
> + if (trap_override != KFD_DBG_TRAP_OVERRIDE_OR)
> + return -EINVAL;
> +
> + return 0;
> +}
> +
> +uint32_t kgd_gfx_v10_set_wave_launch_trap_override(struct amdgpu_device *adev,
> + uint32_t vmid,
> + uint32_t trap_override,
> + uint32_t trap_mask_bits,
> + uint32_t trap_mask_request,
> + uint32_t *trap_mask_prev,
> + uint32_t kfd_dbg_trap_cntl_prev)
> +{
> + uint32_t data, wave_cntl_prev;
> +
> + mutex_lock(&adev->grbm_idx_mutex);
> +
> + wave_cntl_prev = RREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_WAVE_CNTL));
> +
> + kgd_gfx_v10_set_wave_launch_stall(adev, vmid, true);
> +
> + data = RREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_MASK));
> + *trap_mask_prev = REG_GET_FIELD(data, SPI_GDBG_TRAP_MASK, EXCP_EN);
> +
> + trap_mask_bits = (trap_mask_bits & trap_mask_request) |
> + (*trap_mask_prev & ~trap_mask_request);
> +
> + data = REG_SET_FIELD(data, SPI_GDBG_TRAP_MASK, EXCP_EN, trap_mask_bits);
> + data = REG_SET_FIELD(data, SPI_GDBG_TRAP_MASK, REPLACE, trap_override);
> + WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_MASK), data);
> +
> + /* We need to preserve wave launch mode stall settings. */
> + WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_WAVE_CNTL), wave_cntl_prev);
> +
> + mutex_unlock(&adev->grbm_idx_mutex);
> +
> + return 0;
> +}
> +
> /* kgd_gfx_v10_get_iq_wait_times: Returns the mmCP_IQ_WAIT_TIME1/2 values
> * The values read are:
> * ib_offload_wait_time -- Wait Count for Indirect Buffer Offloads.
> @@ -889,6 +942,8 @@ const struct kfd2kgd_calls gfx_v10_kfd2kgd = {
> .set_vm_context_page_table_base = set_vm_context_page_table_base,
> .enable_debug_trap = kgd_gfx_v10_enable_debug_trap,
> .disable_debug_trap = kgd_gfx_v10_disable_debug_trap,
> + .validate_trap_override_request = kgd_gfx_v10_validate_trap_override_request,
> + .set_wave_launch_trap_override = kgd_gfx_v10_set_wave_launch_trap_override,
> .get_iq_wait_times = kgd_gfx_v10_get_iq_wait_times,
> .build_grace_period_packet_info = kgd_gfx_v10_build_grace_period_packet_info,
> .program_trap_handler_settings = program_trap_handler_settings,
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.h
> index 1e993a213646..57339fa12807 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.h
> @@ -26,6 +26,16 @@ uint32_t kgd_gfx_v10_enable_debug_trap(struct amdgpu_device *adev,
> uint32_t kgd_gfx_v10_disable_debug_trap(struct amdgpu_device *adev,
> bool keep_trap_enabled,
> uint32_t vmid);
> +int kgd_gfx_v10_validate_trap_override_request(struct amdgpu_device *adev,
> + uint32_t trap_override,
> + uint32_t *trap_mask_supported);
> +uint32_t kgd_gfx_v10_set_wave_launch_trap_override(struct amdgpu_device *adev,
> + uint32_t vmid,
> + uint32_t trap_override,
> + uint32_t trap_mask_bits,
> + uint32_t trap_mask_request,
> + uint32_t *trap_mask_prev,
> + uint32_t kfd_dbg_trap_cntl_prev);
> void kgd_gfx_v10_get_iq_wait_times(struct amdgpu_device *adev, uint32_t *wait_times);
> void kgd_gfx_v10_build_grace_period_packet_info(struct amdgpu_device *adev,
> uint32_t wait_times,
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c
> index 387bdf4823c9..7120927fed15 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c
> @@ -675,5 +675,8 @@ const struct kfd2kgd_calls gfx_v10_3_kfd2kgd = {
> .get_iq_wait_times = kgd_gfx_v10_get_iq_wait_times,
> .build_grace_period_packet_info = kgd_gfx_v10_build_grace_period_packet_info,
> .enable_debug_trap = kgd_gfx_v10_enable_debug_trap,
> - .disable_debug_trap = kgd_gfx_v10_disable_debug_trap
> + .disable_debug_trap = kgd_gfx_v10_disable_debug_trap,
> + .validate_trap_override_request = kgd_gfx_v10_validate_trap_override_request,
> + .set_wave_launch_trap_override = kgd_gfx_v10_set_wave_launch_trap_override
> +
> };
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v11.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v11.c
> index cc954cf248ca..ae0c4707919f 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v11.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v11.c
> @@ -30,6 +30,7 @@
> #include "soc15d.h"
> #include "v11_structs.h"
> #include "soc21.h"
> +#include <uapi/linux/kfd_ioctl.h>
>
> enum hqd_dequeue_request_type {
> NO_ACTION = 0,
> @@ -643,6 +644,88 @@ static uint32_t kgd_gfx_v11_disable_debug_trap(struct amdgpu_device *adev,
> return data;
> }
>
> +static int kgd_gfx_v11_validate_trap_override_request(struct amdgpu_device *adev,
> + uint32_t trap_override,
> + uint32_t *trap_mask_supported)
> +{
> + *trap_mask_supported &= KFD_DBG_TRAP_MASK_FP_INVALID |
> + KFD_DBG_TRAP_MASK_FP_INPUT_DENORMAL |
> + KFD_DBG_TRAP_MASK_FP_DIVIDE_BY_ZERO |
> + KFD_DBG_TRAP_MASK_FP_OVERFLOW |
> + KFD_DBG_TRAP_MASK_FP_UNDERFLOW |
> + KFD_DBG_TRAP_MASK_FP_INEXACT |
> + KFD_DBG_TRAP_MASK_INT_DIVIDE_BY_ZERO |
> + KFD_DBG_TRAP_MASK_DBG_ADDRESS_WATCH |
> + KFD_DBG_TRAP_MASK_DBG_MEMORY_VIOLATION;
> +
> + if (adev->ip_versions[GC_HWIP][0] >= IP_VERSION(11, 0, 4))
> + *trap_mask_supported |= KFD_DBG_TRAP_MASK_TRAP_ON_WAVE_START |
> + KFD_DBG_TRAP_MASK_TRAP_ON_WAVE_END;
> +
> + if (trap_override != KFD_DBG_TRAP_OVERRIDE_OR &&
> + trap_override != KFD_DBG_TRAP_OVERRIDE_REPLACE)
> + return -EPERM;
> +
> + return 0;
> +}
> +
> +static uint32_t trap_mask_map_sw_to_hw(uint32_t mask)
> +{
> + uint32_t trap_on_start = (mask & KFD_DBG_TRAP_MASK_TRAP_ON_WAVE_START) ? 1 : 0;
> + uint32_t trap_on_end = (mask & KFD_DBG_TRAP_MASK_TRAP_ON_WAVE_END) ? 1 : 0;
> + uint32_t excp_en = mask & (KFD_DBG_TRAP_MASK_FP_INVALID |
> + KFD_DBG_TRAP_MASK_FP_INPUT_DENORMAL |
> + KFD_DBG_TRAP_MASK_FP_DIVIDE_BY_ZERO |
> + KFD_DBG_TRAP_MASK_FP_OVERFLOW |
> + KFD_DBG_TRAP_MASK_FP_UNDERFLOW |
> + KFD_DBG_TRAP_MASK_FP_INEXACT |
> + KFD_DBG_TRAP_MASK_INT_DIVIDE_BY_ZERO |
> + KFD_DBG_TRAP_MASK_DBG_ADDRESS_WATCH |
> + KFD_DBG_TRAP_MASK_DBG_MEMORY_VIOLATION);
> + uint32_t ret;
> +
> + ret = REG_SET_FIELD(0, SPI_GDBG_PER_VMID_CNTL, EXCP_EN, excp_en);
> + ret = REG_SET_FIELD(ret, SPI_GDBG_PER_VMID_CNTL, TRAP_ON_START, trap_on_start);
> + ret = REG_SET_FIELD(ret, SPI_GDBG_PER_VMID_CNTL, TRAP_ON_END, trap_on_end);
> +
> + return ret;
> +}
> +
> +static uint32_t trap_mask_map_hw_to_sw(uint32_t mask)
> +{
> + uint32_t ret = REG_GET_FIELD(mask, SPI_GDBG_PER_VMID_CNTL, EXCP_EN);
> +
> + if (REG_GET_FIELD(mask, SPI_GDBG_PER_VMID_CNTL, TRAP_ON_START))
> + ret |= KFD_DBG_TRAP_MASK_TRAP_ON_WAVE_START;
> +
> + if (REG_GET_FIELD(mask, SPI_GDBG_PER_VMID_CNTL, TRAP_ON_END))
> + ret |= KFD_DBG_TRAP_MASK_TRAP_ON_WAVE_END;
> +
> + return ret;
> +}
> +
> +/* Returns TRAP_EN, EXCP_EN and EXCP_REPLACE. */
> +static uint32_t kgd_gfx_v11_set_wave_launch_trap_override(struct amdgpu_device *adev,
> + uint32_t vmid,
> + uint32_t trap_override,
> + uint32_t trap_mask_bits,
> + uint32_t trap_mask_request,
> + uint32_t *trap_mask_prev,
> + uint32_t kfd_dbg_trap_cntl_prev)
> +{
> + uint32_t data = 0;
> +
> + *trap_mask_prev = trap_mask_map_hw_to_sw(kfd_dbg_trap_cntl_prev);
> +
> + data = (trap_mask_bits & trap_mask_request) | (*trap_mask_prev & ~trap_mask_request);
> + data = trap_mask_map_sw_to_hw(data);
> +
> + data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, TRAP_EN, 1);
> + data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, EXCP_REPLACE, trap_override);
> +
> + return data;
> +}
> +
> const struct kfd2kgd_calls gfx_v11_kfd2kgd = {
> .program_sh_mem_settings = program_sh_mem_settings_v11,
> .set_pasid_vmid_mapping = set_pasid_vmid_mapping_v11,
> @@ -660,5 +743,7 @@ const struct kfd2kgd_calls gfx_v11_kfd2kgd = {
> .get_atc_vmid_pasid_mapping_info = NULL,
> .set_vm_context_page_table_base = set_vm_context_page_table_base_v11,
> .enable_debug_trap = kgd_gfx_v11_enable_debug_trap,
> - .disable_debug_trap = kgd_gfx_v11_disable_debug_trap
> + .disable_debug_trap = kgd_gfx_v11_disable_debug_trap,
> + .validate_trap_override_request = kgd_gfx_v11_validate_trap_override_request,
> + .set_wave_launch_trap_override = kgd_gfx_v11_set_wave_launch_trap_override
> };
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
> index 4a155d11b8c8..705669c26a1a 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
> @@ -38,6 +38,7 @@
> #include "soc15d.h"
> #include "gfx_v9_0.h"
> #include "amdgpu_amdkfd_gfx_v9.h"
> +#include <uapi/linux/kfd_ioctl.h>
>
> enum hqd_dequeue_request_type {
> NO_ACTION = 0,
> @@ -739,6 +740,58 @@ uint32_t kgd_gfx_v9_disable_debug_trap(struct amdgpu_device *adev,
> return 0;
> }
>
> +int kgd_gfx_v9_validate_trap_override_request(struct amdgpu_device *adev,
> + uint32_t trap_override,
> + uint32_t *trap_mask_supported)
> +{
> + *trap_mask_supported &= KFD_DBG_TRAP_MASK_DBG_ADDRESS_WATCH;
> +
> + /* The SPI_GDBG_TRAP_MASK register is global and affects all
> + * processes. Only allow OR-ing the address-watch bit, since
> + * this only affects processes under the debugger. Other bits
> + * should stay 0 to avoid the debugger interfering with other
> + * processes.
> + */
> + if (trap_override != KFD_DBG_TRAP_OVERRIDE_OR)
> + return -EINVAL;
> +
> + return 0;
> +}
> +
> +uint32_t kgd_gfx_v9_set_wave_launch_trap_override(struct amdgpu_device *adev,
> + uint32_t vmid,
> + uint32_t trap_override,
> + uint32_t trap_mask_bits,
> + uint32_t trap_mask_request,
> + uint32_t *trap_mask_prev,
> + uint32_t kfd_dbg_cntl_prev)
> +{
> + uint32_t data, wave_cntl_prev;
> +
> + mutex_lock(&adev->grbm_idx_mutex);
> +
> + wave_cntl_prev = RREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_WAVE_CNTL));
> +
> + kgd_gfx_v9_set_wave_launch_stall(adev, vmid, true);
> +
> + data = RREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_MASK));
> + *trap_mask_prev = REG_GET_FIELD(data, SPI_GDBG_TRAP_MASK, EXCP_EN);
> +
> + trap_mask_bits = (trap_mask_bits & trap_mask_request) |
> + (*trap_mask_prev & ~trap_mask_request);
> +
> + data = REG_SET_FIELD(data, SPI_GDBG_TRAP_MASK, EXCP_EN, trap_mask_bits);
> + data = REG_SET_FIELD(data, SPI_GDBG_TRAP_MASK, REPLACE, trap_override);
> + WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_MASK), data);
> +
> + /* We need to preserve wave launch mode stall settings. */
> + WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_WAVE_CNTL), wave_cntl_prev);
> +
> + mutex_unlock(&adev->grbm_idx_mutex);
> +
> + return 0;
> +}
> +
> /* kgd_gfx_v9_get_iq_wait_times: Returns the mmCP_IQ_WAIT_TIME1/2 values
> * The values read are:
> * ib_offload_wait_time -- Wait Count for Indirect Buffer Offloads.
> @@ -1008,6 +1061,8 @@ const struct kfd2kgd_calls gfx_v9_kfd2kgd = {
> .set_vm_context_page_table_base = kgd_gfx_v9_set_vm_context_page_table_base,
> .enable_debug_trap = kgd_gfx_v9_enable_debug_trap,
> .disable_debug_trap = kgd_gfx_v9_disable_debug_trap,
> + .validate_trap_override_request = kgd_gfx_v9_validate_trap_override_request,
> + .set_wave_launch_trap_override = kgd_gfx_v9_set_wave_launch_trap_override,
> .get_iq_wait_times = kgd_gfx_v9_get_iq_wait_times,
> .build_grace_period_packet_info = kgd_gfx_v9_build_grace_period_packet_info,
> .get_cu_occupancy = kgd_gfx_v9_get_cu_occupancy,
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h
> index fed5b7f18b1a..76812ddd35b1 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h
> @@ -71,6 +71,16 @@ uint32_t kgd_gfx_v9_enable_debug_trap(struct amdgpu_device *adev,
> uint32_t kgd_gfx_v9_disable_debug_trap(struct amdgpu_device *adev,
> bool keep_trap_enabled,
> uint32_t vmid);
> +int kgd_gfx_v9_validate_trap_override_request(struct amdgpu_device *adev,
> + uint32_t trap_override,
> + uint32_t *trap_mask_supported);
> +uint32_t kgd_gfx_v9_set_wave_launch_trap_override(struct amdgpu_device *adev,
> + uint32_t vmid,
> + uint32_t trap_override,
> + uint32_t trap_mask_bits,
> + uint32_t trap_mask_request,
> + uint32_t *trap_mask_prev,
> + uint32_t kfd_dbg_trap_cntl_prev);
> void kgd_gfx_v9_get_iq_wait_times(struct amdgpu_device *adev, uint32_t *wait_times);
> void kgd_gfx_v9_build_grace_period_packet_info(struct amdgpu_device *adev,
> uint32_t wait_times,
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> index 80d354eade35..e78103097162 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> @@ -2984,6 +2984,13 @@ static int kfd_ioctl_set_debug_trap(struct file *filep, struct kfd_process *p, v
> args->set_exceptions_enabled.exception_mask);
> break;
> case KFD_IOC_DBG_TRAP_SET_WAVE_LAUNCH_OVERRIDE:
> + r = kfd_dbg_trap_set_wave_launch_override(target,
> + args->launch_override.override_mode,
> + args->launch_override.enable_mask,
> + args->launch_override.support_request_mask,
> + &args->launch_override.enable_mask,
> + &args->launch_override.support_request_mask);
> + break;
> case KFD_IOC_DBG_TRAP_SET_WAVE_LAUNCH_MODE:
> case KFD_IOC_DBG_TRAP_SUSPEND_QUEUES:
> case KFD_IOC_DBG_TRAP_RESUME_QUEUES:
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_debug.c b/drivers/gpu/drm/amd/amdkfd/kfd_debug.c
> index 48a4e3cc2234..733390fb2459 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_debug.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_debug.c
> @@ -522,6 +522,75 @@ int kfd_dbg_trap_enable(struct kfd_process *target, uint32_t fd,
> return r;
> }
>
> +static int kfd_dbg_validate_trap_override_request(struct kfd_process *p,
> + uint32_t trap_override,
> + uint32_t trap_mask_request,
> + uint32_t *trap_mask_supported)
> +{
> + int i = 0;
> +
> + *trap_mask_supported = 0xffffffff;
> +
> + for (i = 0; i < p->n_pdds; i++) {
> + struct kfd_process_device *pdd = p->pdds[i];
> + int err = pdd->dev->kfd2kgd->validate_trap_override_request(
> + pdd->dev->adev,
> + trap_override,
> + trap_mask_supported);
> +
> + if (err)
> + return err;
> + }
> +
> + if (trap_mask_request & ~*trap_mask_supported)
> + return -EACCES;
> +
> + return 0;
> +}
> +
> +int kfd_dbg_trap_set_wave_launch_override(struct kfd_process *target,
> + uint32_t trap_override,
> + uint32_t trap_mask_bits,
> + uint32_t trap_mask_request,
> + uint32_t *trap_mask_prev,
> + uint32_t *trap_mask_supported)
> +{
> + int r = 0, i;
> +
> + r = kfd_dbg_validate_trap_override_request(target,
> + trap_override,
> + trap_mask_request,
> + trap_mask_supported);
> +
> + if (r)
> + return r;
> +
> + for (i = 0; i < target->n_pdds; i++) {
> + struct kfd_process_device *pdd = target->pdds[i];
> +
> + amdgpu_gfx_off_ctrl(pdd->dev->adev, false);
> + pdd->spi_dbg_override = pdd->dev->kfd2kgd->set_wave_launch_trap_override(
> + pdd->dev->adev,
> + pdd->dev->vm_info.last_vmid_kfd,
> + trap_override,
> + trap_mask_bits,
> + trap_mask_request,
> + trap_mask_prev,
> + pdd->spi_dbg_override);
> + amdgpu_gfx_off_ctrl(pdd->dev->adev, true);
> +
> + if (!pdd->dev->kfd->shared_resources.enable_mes)
> + r = debug_refresh_runlist(pdd->dev->dqm);
> + else
> + r = kfd_dbg_set_mes_debug_mode(pdd);
> +
> + if (r)
> + break;
> + }
> +
> + return r;
> +}
> +
> void kfd_dbg_set_enabled_debug_exception_mask(struct kfd_process *target,
> uint64_t exception_set_mask)
> {
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_debug.h b/drivers/gpu/drm/amd/amdkfd/kfd_debug.h
> index 6c1054a08872..c9245221aa76 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_debug.h
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_debug.h
> @@ -42,6 +42,12 @@ int kfd_dbg_trap_disable(struct kfd_process *target);
> int kfd_dbg_trap_enable(struct kfd_process *target, uint32_t fd,
> void __user *runtime_info,
> uint32_t *runtime_info_size);
> +int kfd_dbg_trap_set_wave_launch_override(struct kfd_process *target,
> + uint32_t trap_override,
> + uint32_t trap_mask_bits,
> + uint32_t trap_mask_request,
> + uint32_t *trap_mask_prev,
> + uint32_t *trap_mask_supported);
>
> int kfd_dbg_send_exception_to_runtime(struct kfd_process *p,
> unsigned int dev_id,
More information about the amd-gfx
mailing list