[PATCH 23/29] drm/amdkfd: add debug set and clear address watch points operation
Felix Kuehling
felix.kuehling at amd.com
Wed Nov 30 00:34:05 UTC 2022
On 2022-10-31 12:23, Jonathan Kim wrote:
> Shader read, write and atomic memory operations can be alerted to the
> debugger as an address watch exception.
>
> Allow the debugger to pass in a watch point to a particular memory
> address per device.
>
> Note that there exists only 4 watch points per devices to date, so have
> the KFD keep track of what watch points are allocated or not.
>
> v2: change dev_id arg to gpu_id for consistency
>
> Signed-off-by: Jonathan Kim <jonathan.kim at amd.com>
Nit-picks inline.
> ---
> .../drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c | 2 +
> .../drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c | 2 +
> .../drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c | 78 +++++++++++
> .../drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.h | 8 ++
> .../drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c | 5 +-
> .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c | 128 +++++++++++++++++
> .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h | 8 ++
> drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 24 ++++
> drivers/gpu/drm/amd/amdkfd/kfd_debug.c | 130 ++++++++++++++++++
> drivers/gpu/drm/amd/amdkfd/kfd_debug.h | 8 +-
> drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c | 7 +
> drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 9 +-
> 12 files changed, 405 insertions(+), 4 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c
> index 91c7fdee883e..8f9b613e3152 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c
> @@ -138,6 +138,8 @@ const struct kfd2kgd_calls aldebaran_kfd2kgd = {
> .validate_trap_override_request = kgd_aldebaran_validate_trap_override_request,
> .set_wave_launch_trap_override = kgd_aldebaran_set_wave_launch_trap_override,
> .set_wave_launch_mode = kgd_aldebaran_set_wave_launch_mode,
> + .set_address_watch = kgd_gfx_v9_set_address_watch,
> + .clear_address_watch = kgd_gfx_v9_clear_address_watch,
> .get_iq_wait_times = kgd_gfx_v9_get_iq_wait_times,
> .build_grace_period_packet_info = kgd_gfx_v9_build_grace_period_packet_info,
> .program_trap_handler_settings = kgd_gfx_v9_program_trap_handler_settings,
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c
> index 10470f4a4eaf..5d6bd23a8cc1 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c
> @@ -400,6 +400,8 @@ const struct kfd2kgd_calls arcturus_kfd2kgd = {
> .validate_trap_override_request = kgd_gfx_v9_validate_trap_override_request,
> .set_wave_launch_trap_override = kgd_gfx_v9_set_wave_launch_trap_override,
> .set_wave_launch_mode = kgd_gfx_v9_set_wave_launch_mode,
> + .set_address_watch = kgd_gfx_v9_set_address_watch,
> + .clear_address_watch = kgd_gfx_v9_clear_address_watch,
> .get_iq_wait_times = kgd_gfx_v9_get_iq_wait_times,
> .build_grace_period_packet_info = kgd_gfx_v9_build_grace_period_packet_info,
> .get_cu_occupancy = kgd_gfx_v9_get_cu_occupancy,
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
> index 66a83e6fb9e5..ec48677772f6 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
> @@ -880,6 +880,82 @@ uint32_t kgd_gfx_v10_set_wave_launch_mode(struct amdgpu_device *adev,
> return 0;
> }
>
> +#define TCP_WATCH_STRIDE (mmTCP_WATCH1_ADDR_H - mmTCP_WATCH0_ADDR_H)
> +uint32_t kgd_gfx_v10_set_address_watch(struct amdgpu_device *adev,
> + uint64_t watch_address,
> + uint32_t watch_address_mask,
> + uint32_t watch_id,
> + uint32_t watch_mode,
> + uint32_t debug_vmid)
> +{
> + uint32_t watch_address_high;
> + uint32_t watch_address_low;
> + uint32_t watch_address_cntl;
> +
> + watch_address_cntl = 0;
> +
> + watch_address_low = lower_32_bits(watch_address);
> + watch_address_high = upper_32_bits(watch_address) & 0xffff;
> +
> + watch_address_cntl = REG_SET_FIELD(watch_address_cntl,
> + TCP_WATCH0_CNTL,
> + VMID,
> + debug_vmid);
> + watch_address_cntl = REG_SET_FIELD(watch_address_cntl,
> + TCP_WATCH0_CNTL,
> + MODE,
> + watch_mode);
> + watch_address_cntl = REG_SET_FIELD(watch_address_cntl,
> + TCP_WATCH0_CNTL,
> + MASK,
> + watch_address_mask >> 7);
> +
> + /* Turning off this watch point until we set all the registers */
> + watch_address_cntl = REG_SET_FIELD(watch_address_cntl,
> + TCP_WATCH0_CNTL,
> + VALID,
> + 0);
> +
> + WREG32((SOC15_REG_OFFSET(GC, 0, mmTCP_WATCH0_CNTL) +
> + (watch_id * TCP_WATCH_STRIDE)),
> + watch_address_cntl);
> +
> + WREG32((SOC15_REG_OFFSET(GC, 0, mmTCP_WATCH0_ADDR_H) +
> + (watch_id * TCP_WATCH_STRIDE)),
> + watch_address_high);
> +
> + WREG32((SOC15_REG_OFFSET(GC, 0, mmTCP_WATCH0_ADDR_L) +
> + (watch_id * TCP_WATCH_STRIDE)),
> + watch_address_low);
> +
> + /* Enable the watch point */
> + watch_address_cntl = REG_SET_FIELD(watch_address_cntl,
> + TCP_WATCH0_CNTL,
> + VALID,
> + 1);
> +
> + WREG32((SOC15_REG_OFFSET(GC, 0, mmTCP_WATCH0_CNTL) +
> + (watch_id * TCP_WATCH_STRIDE)),
> + watch_address_cntl);
> +
> + return 0;
> +}
> +
> +uint32_t kgd_gfx_v10_clear_address_watch(struct amdgpu_device *adev,
> + uint32_t watch_id)
> +{
> + uint32_t watch_address_cntl;
> +
> + watch_address_cntl = 0;
> +
> + WREG32((SOC15_REG_OFFSET(GC, 0, mmTCP_WATCH0_CNTL) +
> + (watch_id * TCP_WATCH_STRIDE)),
> + watch_address_cntl);
> +
> + return 0;
> +}
> +
> +
> /* kgd_gfx_v10_get_iq_wait_times: Returns the mmCP_IQ_WAIT_TIME1/2 values
> * The values read are:
> * ib_offload_wait_time -- Wait Count for Indirect Buffer Offloads.
> @@ -968,6 +1044,8 @@ const struct kfd2kgd_calls gfx_v10_kfd2kgd = {
> .validate_trap_override_request = kgd_gfx_v10_validate_trap_override_request,
> .set_wave_launch_trap_override = kgd_gfx_v10_set_wave_launch_trap_override,
> .set_wave_launch_mode = kgd_gfx_v10_set_wave_launch_mode,
> + .set_address_watch = kgd_gfx_v10_set_address_watch,
> + .clear_address_watch = kgd_gfx_v10_clear_address_watch,
> .get_iq_wait_times = kgd_gfx_v10_get_iq_wait_times,
> .build_grace_period_packet_info = kgd_gfx_v10_build_grace_period_packet_info,
> .program_trap_handler_settings = program_trap_handler_settings,
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.h
> index 34c04a2bb83b..334ff16e25db 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.h
> @@ -39,6 +39,14 @@ uint32_t kgd_gfx_v10_set_wave_launch_trap_override(struct amdgpu_device *adev,
> uint32_t kgd_gfx_v10_set_wave_launch_mode(struct amdgpu_device *adev,
> uint8_t wave_launch_mode,
> uint32_t vmid);
> +uint32_t kgd_gfx_v10_set_address_watch(struct amdgpu_device *adev,
> + uint64_t watch_address,
> + uint32_t watch_address_mask,
> + uint32_t watch_id,
> + uint32_t watch_mode,
> + uint32_t debug_vmid);
> +uint32_t kgd_gfx_v10_clear_address_watch(struct amdgpu_device *adev,
> + uint32_t watch_id);
> void kgd_gfx_v10_get_iq_wait_times(struct amdgpu_device *adev, uint32_t *wait_times);
> void kgd_gfx_v10_build_grace_period_packet_info(struct amdgpu_device *adev,
> uint32_t wait_times,
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c
> index 8627c5458973..ee36ba045dcf 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c
> @@ -676,6 +676,7 @@ const struct kfd2kgd_calls gfx_v10_3_kfd2kgd = {
> .disable_debug_trap = kgd_gfx_v10_disable_debug_trap,
> .validate_trap_override_request = kgd_gfx_v10_validate_trap_override_request,
> .set_wave_launch_trap_override = kgd_gfx_v10_set_wave_launch_trap_override,
> - .set_wave_launch_mode = kgd_gfx_v10_set_wave_launch_mode
> -
> + .set_wave_launch_mode = kgd_gfx_v10_set_wave_launch_mode,
> + .set_address_watch = kgd_gfx_v10_set_address_watch,
> + .clear_address_watch = kgd_gfx_v10_clear_address_watch
> };
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
> index 3bba7ca21926..98355a21740b 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
> @@ -810,6 +810,132 @@ uint32_t kgd_gfx_v9_set_wave_launch_mode(struct amdgpu_device *adev,
> return 0;
> }
>
> +#define TCP_WATCH_STRIDE (mmTCP_WATCH1_ADDR_H - mmTCP_WATCH0_ADDR_H)
> +static uint32_t kgd_gfx_set_multi_process_address_watch(
> + struct amdgpu_device *adev,
> + uint64_t watch_address,
> + uint32_t watch_address_mask,
> + uint32_t watch_id,
> + uint32_t watch_mode)
> +{
> + uint32_t watch_address_high;
> + uint32_t watch_address_low;
> + uint32_t watch_address_cntl;
> +
> + watch_address_cntl = 0;
> + watch_address_low = lower_32_bits(watch_address);
> + watch_address_high = upper_32_bits(watch_address) & 0xffff;
> +
> + watch_address_cntl = REG_SET_FIELD(watch_address_cntl,
> + TCP_WATCH0_CNTL,
> + MODE,
> + watch_mode);
> +
> + watch_address_cntl = REG_SET_FIELD(watch_address_cntl,
> + TCP_WATCH0_CNTL,
> + MASK,
> + watch_address_mask >> 6);
> +
> + watch_address_cntl = REG_SET_FIELD(watch_address_cntl,
> + TCP_WATCH0_CNTL,
> + VALID,
> + 1);
> +
> + WREG32_RLC((SOC15_REG_OFFSET(GC, 0, mmTCP_WATCH0_ADDR_H) +
> + (watch_id * TCP_WATCH_STRIDE)),
> + watch_address_high);
> +
> + WREG32_RLC((SOC15_REG_OFFSET(GC, 0, mmTCP_WATCH0_ADDR_L) +
> + (watch_id * TCP_WATCH_STRIDE)),
> + watch_address_low);
> +
> + return watch_address_cntl;
> +}
> +
> +uint32_t kgd_gfx_v9_set_address_watch(struct amdgpu_device *adev,
> + uint64_t watch_address,
> + uint32_t watch_address_mask,
> + uint32_t watch_id,
> + uint32_t watch_mode,
> + uint32_t debug_vmid)
> +{
> + uint32_t watch_address_high;
> + uint32_t watch_address_low;
> + uint32_t watch_address_cntl;
> +
> + if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 2))
Shouldn't this be handled by putting a different function into
aldebaran_kfd2kgd.set_address_watch?
> + return kgd_gfx_set_multi_process_address_watch(adev,
> + watch_address,
> + watch_address_mask,
> + watch_id,
> + watch_mode);
> +
> + watch_address_cntl = 0;
> +
> + watch_address_low = lower_32_bits(watch_address);
> + watch_address_high = upper_32_bits(watch_address) & 0xffff;
> +
> + watch_address_cntl = REG_SET_FIELD(watch_address_cntl,
> + TCP_WATCH0_CNTL,
> + VMID,
> + debug_vmid);
> + watch_address_cntl = REG_SET_FIELD(watch_address_cntl,
> + TCP_WATCH0_CNTL,
> + MODE,
> + watch_mode);
> + watch_address_cntl = REG_SET_FIELD(watch_address_cntl,
> + TCP_WATCH0_CNTL,
> + MASK,
> + watch_address_mask >> 6);
> +
> + /* Turning off this watch point until we set all the registers */
> + watch_address_cntl = REG_SET_FIELD(watch_address_cntl,
> + TCP_WATCH0_CNTL,
> + VALID,
> + 0);
> +
> + WREG32_RLC((SOC15_REG_OFFSET(GC, 0, mmTCP_WATCH0_CNTL) +
> + (watch_id * TCP_WATCH_STRIDE)),
> + watch_address_cntl);
> +
> + WREG32_RLC((SOC15_REG_OFFSET(GC, 0, mmTCP_WATCH0_ADDR_H) +
> + (watch_id * TCP_WATCH_STRIDE)),
> + watch_address_high);
> +
> + WREG32_RLC((SOC15_REG_OFFSET(GC, 0, mmTCP_WATCH0_ADDR_L) +
> + (watch_id * TCP_WATCH_STRIDE)),
> + watch_address_low);
> +
> + /* Enable the watch point */
> + watch_address_cntl = REG_SET_FIELD(watch_address_cntl,
> + TCP_WATCH0_CNTL,
> + VALID,
> + 1);
> +
> + WREG32_RLC((SOC15_REG_OFFSET(GC, 0, mmTCP_WATCH0_CNTL) +
> + (watch_id * TCP_WATCH_STRIDE)),
> + watch_address_cntl);
> +
> + return 0;
> +}
> +
> +uint32_t kgd_gfx_v9_clear_address_watch(struct amdgpu_device *adev,
> + uint32_t watch_id)
> +{
> + uint32_t watch_address_cntl;
> +
> + if (adev->asic_type == CHIP_ALDEBARAN)
Same as above.
> + return 0;
> +
> + watch_address_cntl = 0;
> +
> + WREG32_RLC((SOC15_REG_OFFSET(GC, 0, mmTCP_WATCH0_CNTL) +
> + (watch_id * TCP_WATCH_STRIDE)),
> + watch_address_cntl);
> +
> + return 0;
> +}
> +
> /* kgd_gfx_v9_get_iq_wait_times: Returns the mmCP_IQ_WAIT_TIME1/2 values
> * The values read are:
> * ib_offload_wait_time -- Wait Count for Indirect Buffer Offloads.
> @@ -1081,6 +1207,8 @@ const struct kfd2kgd_calls gfx_v9_kfd2kgd = {
> .validate_trap_override_request = kgd_gfx_v9_validate_trap_override_request,
> .set_wave_launch_trap_override = kgd_gfx_v9_set_wave_launch_trap_override,
> .set_wave_launch_mode = kgd_gfx_v9_set_wave_launch_mode,
> + .set_address_watch = kgd_gfx_v9_set_address_watch,
> + .clear_address_watch = kgd_gfx_v9_clear_address_watch,
> .get_iq_wait_times = kgd_gfx_v9_get_iq_wait_times,
> .build_grace_period_packet_info = kgd_gfx_v9_build_grace_period_packet_info,
> .get_cu_occupancy = kgd_gfx_v9_get_cu_occupancy,
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h
> index 2a2ab42037e4..ba52b61b68c5 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h
> @@ -77,6 +77,14 @@ uint32_t kgd_gfx_v9_set_wave_launch_trap_override(struct amdgpu_device *adev,
> uint32_t trap_mask_request,
> uint32_t *trap_mask_prev,
> uint32_t kfd_dbg_trap_cntl_prev);
> +uint32_t kgd_gfx_v9_set_address_watch(struct amdgpu_device *adev,
> + uint64_t watch_address,
> + uint32_t watch_address_mask,
> + uint32_t watch_id,
> + uint32_t watch_mode,
> + uint32_t debug_vmid);
> +uint32_t kgd_gfx_v9_clear_address_watch(struct amdgpu_device *adev,
> + uint32_t watch_id);
> void kgd_gfx_v9_get_iq_wait_times(struct amdgpu_device *adev, uint32_t *wait_times);
> void kgd_gfx_v9_build_grace_period_packet_info(struct amdgpu_device *adev,
> uint32_t wait_times,
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> index ec26c51177f9..9b2ea6e9e078 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> @@ -2797,6 +2797,7 @@ static int kfd_ioctl_set_debug_trap(struct file *filep, struct kfd_process *p, v
> struct task_struct *thread = NULL;
> struct pid *pid = NULL;
> struct kfd_process *target = NULL;
> + struct kfd_process_device *pdd = NULL;
> int r = 0;
>
> if (sched_policy == KFD_SCHED_POLICY_NO_HWS) {
> @@ -2864,6 +2865,20 @@ static int kfd_ioctl_set_debug_trap(struct file *filep, struct kfd_process *p, v
> goto unlock_out;
> }
>
> + if (args->op == KFD_IOC_DBG_TRAP_SET_NODE_ADDRESS_WATCH ||
> + args->op == KFD_IOC_DBG_TRAP_CLEAR_NODE_ADDRESS_WATCH) {
Weird indentation.
> + int user_gpu_id = kfd_process_get_user_gpu_id(target,
> + args->op == KFD_IOC_DBG_TRAP_SET_NODE_ADDRESS_WATCH ?
> + args->set_node_address_watch.gpu_id :
> + args->clear_node_address_watch.gpu_id);
> +
> + pdd = kfd_process_device_data_by_id(target, user_gpu_id);
> + if (user_gpu_id == -EINVAL || !pdd) {
> + r = -ENODEV;
> + goto unlock_out;
> + }
> + }
> +
> switch (args->op) {
> case KFD_IOC_DBG_TRAP_ENABLE:
> if (target != p)
> @@ -2917,7 +2932,16 @@ static int kfd_ioctl_set_debug_trap(struct file *filep, struct kfd_process *p, v
> (uint32_t *)args->resume_queues.queue_array_ptr);
> break;
> case KFD_IOC_DBG_TRAP_SET_NODE_ADDRESS_WATCH:
> + r = kfd_dbg_trap_set_dev_address_watch(pdd,
> + args->set_node_address_watch.address,
> + args->set_node_address_watch.mask,
> + &args->set_node_address_watch.id,
> + args->set_node_address_watch.mode);
> + break;
> case KFD_IOC_DBG_TRAP_CLEAR_NODE_ADDRESS_WATCH:
> + r = kfd_dbg_trap_clear_dev_address_watch(pdd,
> + args->clear_node_address_watch.id);
> + break;
> case KFD_IOC_DBG_TRAP_SET_FLAGS:
> case KFD_IOC_DBG_TRAP_QUERY_DEBUG_EVENT:
> case KFD_IOC_DBG_TRAP_QUERY_EXCEPTION_INFO:
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_debug.c b/drivers/gpu/drm/amd/amdkfd/kfd_debug.c
> index afa56aad316b..68bc1d5bfd05 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_debug.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_debug.c
> @@ -24,6 +24,14 @@
> #include "kfd_device_queue_manager.h"
> #include <linux/file.h>
>
> +/*
> + * The spinlock protects the per device dev->alloc_watch_ids for multi-process access.
> + * The per-process per-device pdd->alloc_watch_ids is protected by the debug IOCTL
> + * process mutex.
> + */
> +#define MAX_WATCH_ADDRESSES 4
> +static DEFINE_SPINLOCK(watch_points_lock);
This spin lock seems to be a left-over from when we managed watch-points
globally. Now that they are per device, I think this spinlock should be
per-device as well, in struct kfd_dev.
> +
> void debug_event_write_work_handler(struct work_struct *work)
> {
> struct kfd_process *process;
> @@ -227,6 +235,127 @@ int kfd_dbg_send_exception_to_runtime(struct kfd_process *p,
> return 0;
> }
>
> +#define KFD_DEBUGGER_INVALID_WATCH_POINT_ID -1
> +static int kfd_dbg_get_dev_watch_id(struct kfd_process_device *pdd, int *watch_id)
> +{
> + int i;
> +
> + *watch_id = KFD_DEBUGGER_INVALID_WATCH_POINT_ID;
> +
> + spin_lock(&watch_points_lock);
> +
> + for (i = 0; i < MAX_WATCH_ADDRESSES; i++) {
> + /* device watchpoint in use so skip */
> + if ((pdd->dev->alloc_watch_ids >> i) & 0x1)
> + continue;
> +
> + pdd->alloc_watch_ids |= 0x1 << i;
> + pdd->dev->alloc_watch_ids |= 0x1 << i;
> + *watch_id = i;
> + spin_unlock(&watch_points_lock);
> + return 0;
> + }
> +
> + spin_unlock(&watch_points_lock);
> +
> + return -ENOMEM;
> +}
> +
> +static void kfd_dbg_clear_dev_watch_id(struct kfd_process_device *pdd, int watch_id)
> +{
> + spin_lock(&watch_points_lock);
> +
> + /* process owns device watch point so safe to clear */
> + if ((pdd->alloc_watch_ids >> watch_id) & 0x1) {
> + pdd->alloc_watch_ids &= ~(0x1 << watch_id);
> + pdd->dev->alloc_watch_ids &= ~(0x1 << watch_id);
> + }
> +
> + spin_unlock(&watch_points_lock);
> +}
> +
> +static bool kfd_dbg_owns_dev_watch_id(struct kfd_process_device *pdd, int watch_id)
> +{
> + bool owns_watch_id = false;
> +
> + spin_lock(&watch_points_lock);
> + owns_watch_id = watch_id < MAX_WATCH_ADDRESSES && ((pdd->alloc_watch_ids >> watch_id) & 0x1);
> +
> + spin_unlock(&watch_points_lock);
> +
> + return owns_watch_id;
> +}
> +
> +int kfd_dbg_trap_clear_dev_address_watch(struct kfd_process_device *pdd,
> + uint32_t watch_id)
> +{
> + int r;
> +
> + if (!kfd_dbg_owns_dev_watch_id(pdd, watch_id))
> + return -EINVAL;
> +
> + r = debug_lock_and_unmap(pdd->dev->dqm);
> + if (r)
> + return r;
> +
> + amdgpu_gfx_off_ctrl(pdd->dev->adev, false);
> + pdd->watch_points[watch_id] = pdd->dev->kfd2kgd->clear_address_watch(
> + pdd->dev->adev,
> + watch_id);
> + amdgpu_gfx_off_ctrl(pdd->dev->adev, true);
> +
> + r = debug_map_and_unlock(pdd->dev->dqm);
> +
> + kfd_dbg_clear_dev_watch_id(pdd, watch_id);
> +
> + return r;
> +}
> +
> +int kfd_dbg_trap_set_dev_address_watch(struct kfd_process_device *pdd,
> + uint64_t watch_address,
> + uint32_t watch_address_mask,
> + uint32_t *watch_id,
> + uint32_t watch_mode)
> +{
> + int r = kfd_dbg_get_dev_watch_id(pdd, watch_id);
> +
> + if (r)
> + return r;
> +
> + r = debug_lock_and_unmap(pdd->dev->dqm);
> + if (r) {
> + kfd_dbg_clear_dev_watch_id(pdd, *watch_id);
> + return r;
> + }
> +
> + amdgpu_gfx_off_ctrl(pdd->dev->adev, false);
> + pdd->watch_points[*watch_id] = pdd->dev->kfd2kgd->set_address_watch(
> + pdd->dev->adev,
> + watch_address,
> + watch_address_mask,
> + *watch_id,
> + watch_mode,
> + pdd->dev->vm_info.last_vmid_kfd);
> + amdgpu_gfx_off_ctrl(pdd->dev->adev, true);
> +
> + r = debug_map_and_unlock(pdd->dev->dqm);
> + /* HWS is broken so no point in HW rollback but release the watchpoint anyways */
> + if (r)
> + kfd_dbg_clear_dev_watch_id(pdd, *watch_id);
> +
> + return 0;
> +}
> +
> +static void kfd_dbg_clear_process_address_watch(struct kfd_process *target)
> +{
> + int i, j;
> +
> + for (i = 0; i < target->n_pdds; i++)
> + for (j = 0; j < MAX_WATCH_ADDRESSES; j++)
> + kfd_dbg_trap_clear_dev_address_watch(target->pdds[i], j);
> +}
> +
> +
> /* kfd_dbg_trap_deactivate:
> * target: target process
> * unwind: If this is unwinding a failed kfd_dbg_trap_enable()
> @@ -241,6 +370,7 @@ void kfd_dbg_trap_deactivate(struct kfd_process *target, bool unwind, int unwind
>
> if (!unwind) {
> cancel_work_sync(&target->debug_event_workarea);
> + kfd_dbg_clear_process_address_watch(target);
> kfd_dbg_trap_set_wave_launch_mode(target, 0);
> }
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_debug.h b/drivers/gpu/drm/amd/amdkfd/kfd_debug.h
> index ca3ab1f01985..ad677e67e7eb 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_debug.h
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_debug.h
> @@ -50,7 +50,13 @@ int kfd_dbg_trap_set_wave_launch_override(struct kfd_process *target,
> uint32_t *trap_mask_supported);
> int kfd_dbg_trap_set_wave_launch_mode(struct kfd_process *target,
> uint8_t wave_launch_mode);
> -
> +int kfd_dbg_trap_clear_dev_address_watch(struct kfd_process_device *pdd,
> + uint32_t watch_id);
> +int kfd_dbg_trap_set_dev_address_watch(struct kfd_process_device *pdd,
> + uint64_t watch_address,
> + uint32_t watch_address_mask,
> + uint32_t *watch_id,
> + uint32_t watch_mode);
> int kfd_dbg_send_exception_to_runtime(struct kfd_process *p,
> unsigned int dev_id,
> unsigned int queue_id,
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c b/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c
> index 8aebe408c544..733987de595a 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c
> @@ -395,6 +395,8 @@ int kfd_init_apertures(struct kfd_process *process)
> pdd->gpuvm_base = pdd->gpuvm_limit = 0;
> pdd->scratch_base = pdd->scratch_limit = 0;
> } else {
> + int num_watchpoints = pdd->dev->device_info.num_of_watch_points;
> +
> switch (dev->adev->asic_type) {
> case CHIP_KAVERI:
> case CHIP_HAWAII:
> @@ -424,6 +426,11 @@ int kfd_init_apertures(struct kfd_process *process)
> pdd->qpd.cwsr_base = SVM_CWSR_BASE;
> pdd->qpd.ib_base = SVM_IB_BASE;
> }
> +
> + process->max_watch_points =
> + !process->max_watch_points ? num_watchpoints :
> + min(num_watchpoints, process->max_watch_points);
> +
> }
>
> dev_dbg(kfd_device, "node id %u\n", id);
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> index 3d529c7499f8..aee4fe20e676 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> @@ -348,6 +348,9 @@ struct kfd_dev {
>
> /* HMM page migration MEMORY_DEVICE_PRIVATE mapping */
> struct dev_pagemap pgmap;
> +
> + /* Track per device allocated watch points */
> + uint32_t alloc_watch_ids;
> };
>
> enum kfd_mempool {
> @@ -796,6 +799,7 @@ struct kfd_process_device {
> uint32_t spi_dbg_override;
> uint32_t spi_dbg_launch_mode;
> uint32_t watch_points[4];
> + uint32_t alloc_watch_ids;
>
> /*
> * If this process has been checkpointed before, then the user
> @@ -907,6 +911,10 @@ struct kfd_process {
> /* per-process-per device debug event fd file */
> struct file *dbg_ev_file;
>
> + /* Allocated debug watch point IDs bitmask */
> + uint32_t allocated_debug_watch_point_bitmask;
> + int max_watch_points;
These two variable are unused.
Regards,
Felix
> +
> /* If the process is a kfd debugger, we need to know so we can clean
> * up at exit time. If a process enables debugging on itself, it does
> * its own clean-up, so we don't set the flag here. We track this by
> @@ -952,7 +960,6 @@ struct kfd_process {
> struct semaphore runtime_enable_sema;
> bool is_runtime_retry;
> struct kfd_runtime_info runtime_info;
> -
> };
>
> #define KFD_PROCESS_TABLE_SIZE 5 /* bits: 32 entries */
More information about the amd-gfx
mailing list