[PATCH 26/32] drm/amdkfd: add debug set and clear address watch points operation

Felix Kuehling felix.kuehling at amd.com
Wed Mar 22 21:38:00 UTC 2023


Am 2023-01-25 um 14:53 schrieb Jonathan Kim:
> Shader read, write and atomic memory operations can be alerted to the
> debugger as an address watch exception.
>
> Allow the debugger to pass in a watch point to a particular memory
> address per device.
>
> Note that there exists only 4 watch points per devices to date, so have
> the KFD keep track of what watch points are allocated or not.
>
> v3: add gfx11 support.
> cleanup gfx9 kgd calls to set and clear address watch.
> use per device spinlock to set watch points.
> fixup runlist refresh calls on set/clear address watch.
>
> v2: change dev_id arg to gpu_id for consistency
>
> Signed-off-by: Jonathan Kim <jonathan.kim at amd.com>
> ---
>   .../drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c  |  51 +++++++
>   .../drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c   |   2 +
>   .../drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c    |  78 ++++++++++
>   .../drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.h    |   8 ++
>   .../drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c  |   5 +-
>   .../drm/amd/amdgpu/amdgpu_amdkfd_gfx_v11.c    |  52 ++++++-
>   .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c |  77 ++++++++++
>   .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h |   8 ++
>   drivers/gpu/drm/amd/amdkfd/kfd_chardev.c      |  24 ++++
>   drivers/gpu/drm/amd/amdkfd/kfd_debug.c        | 136 ++++++++++++++++++
>   drivers/gpu/drm/amd/amdkfd/kfd_debug.h        |   8 +-
>   drivers/gpu/drm/amd/amdkfd/kfd_device.c       |   1 +
>   drivers/gpu/drm/amd/amdkfd/kfd_priv.h         |   6 +-
>   13 files changed, 451 insertions(+), 5 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c
> index 4de2066215b4..18baf1cd8c01 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c
> @@ -118,6 +118,55 @@ static uint32_t kgd_aldebaran_set_wave_launch_mode(struct amdgpu_device *adev,
>   	return data;
>   }
>   
> +#define TCP_WATCH_STRIDE (regTCP_WATCH1_ADDR_H - regTCP_WATCH0_ADDR_H)
> +static uint32_t kgd_gfx_aldebaran_set_address_watch(
> +					struct amdgpu_device *adev,
> +					uint64_t watch_address,
> +					uint32_t watch_address_mask,
> +					uint32_t watch_id,
> +					uint32_t watch_mode,
> +					uint32_t debug_vmid)
> +{
> +	uint32_t watch_address_high;
> +	uint32_t watch_address_low;
> +	uint32_t watch_address_cntl;
> +
> +	watch_address_cntl = 0;
> +	watch_address_low = lower_32_bits(watch_address);
> +	watch_address_high = upper_32_bits(watch_address) & 0xffff;
> +
> +	watch_address_cntl = REG_SET_FIELD(watch_address_cntl,
> +			TCP_WATCH0_CNTL,
> +			MODE,
> +			watch_mode);
> +
> +	watch_address_cntl = REG_SET_FIELD(watch_address_cntl,
> +			TCP_WATCH0_CNTL,
> +			MASK,
> +			watch_address_mask >> 6);
> +
> +	watch_address_cntl = REG_SET_FIELD(watch_address_cntl,
> +			TCP_WATCH0_CNTL,
> +			VALID,
> +			1);
> +
> +	WREG32_RLC((SOC15_REG_OFFSET(GC, 0, regTCP_WATCH0_ADDR_H) +
> +			(watch_id * TCP_WATCH_STRIDE)),
> +			watch_address_high);
> +
> +	WREG32_RLC((SOC15_REG_OFFSET(GC, 0, regTCP_WATCH0_ADDR_L) +
> +			(watch_id * TCP_WATCH_STRIDE)),
> +			watch_address_low);
> +
> +	return watch_address_cntl;
> +}
> +
> +uint32_t kgd_gfx_aldebaran_clear_address_watch(struct amdgpu_device *adev,
> +					uint32_t watch_id)
> +{
> +	return 0;
> +}
> +
>   const struct kfd2kgd_calls aldebaran_kfd2kgd = {
>   	.program_sh_mem_settings = kgd_gfx_v9_program_sh_mem_settings,
>   	.set_pasid_vmid_mapping = kgd_gfx_v9_set_pasid_vmid_mapping,
> @@ -140,6 +189,8 @@ const struct kfd2kgd_calls aldebaran_kfd2kgd = {
>   	.validate_trap_override_request = kgd_aldebaran_validate_trap_override_request,
>   	.set_wave_launch_trap_override = kgd_aldebaran_set_wave_launch_trap_override,
>   	.set_wave_launch_mode = kgd_aldebaran_set_wave_launch_mode,
> +	.set_address_watch = kgd_gfx_aldebaran_set_address_watch,
> +	.clear_address_watch = kgd_gfx_aldebaran_clear_address_watch,
>   	.get_iq_wait_times = kgd_gfx_v9_get_iq_wait_times,
>   	.build_grace_period_packet_info = kgd_gfx_v9_build_grace_period_packet_info,
>   	.program_trap_handler_settings = kgd_gfx_v9_program_trap_handler_settings,
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c
> index 500013540356..a7fb5ef13166 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c
> @@ -413,6 +413,8 @@ const struct kfd2kgd_calls arcturus_kfd2kgd = {
>   	.validate_trap_override_request = kgd_gfx_v9_validate_trap_override_request,
>   	.set_wave_launch_trap_override = kgd_gfx_v9_set_wave_launch_trap_override,
>   	.set_wave_launch_mode = kgd_gfx_v9_set_wave_launch_mode,
> +	.set_address_watch = kgd_gfx_v9_set_address_watch,
> +	.clear_address_watch = kgd_gfx_v9_clear_address_watch,
>   	.get_iq_wait_times = kgd_gfx_v9_get_iq_wait_times,
>   	.build_grace_period_packet_info = kgd_gfx_v9_build_grace_period_packet_info,
>   	.get_cu_occupancy = kgd_gfx_v9_get_cu_occupancy,
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
> index 7591145bc69f..c9246370984c 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
> @@ -878,6 +878,82 @@ uint32_t kgd_gfx_v10_set_wave_launch_mode(struct amdgpu_device *adev,
>   	return 0;
>   }
>   
> +#define TCP_WATCH_STRIDE (mmTCP_WATCH1_ADDR_H - mmTCP_WATCH0_ADDR_H)
> +uint32_t kgd_gfx_v10_set_address_watch(struct amdgpu_device *adev,
> +					uint64_t watch_address,
> +					uint32_t watch_address_mask,
> +					uint32_t watch_id,
> +					uint32_t watch_mode,
> +					uint32_t debug_vmid)
> +{
> +	uint32_t watch_address_high;
> +	uint32_t watch_address_low;
> +	uint32_t watch_address_cntl;
> +
> +	watch_address_cntl = 0;
> +
> +	watch_address_low = lower_32_bits(watch_address);
> +	watch_address_high = upper_32_bits(watch_address) & 0xffff;
> +
> +	watch_address_cntl = REG_SET_FIELD(watch_address_cntl,
> +			TCP_WATCH0_CNTL,
> +			VMID,
> +			debug_vmid);
> +	watch_address_cntl = REG_SET_FIELD(watch_address_cntl,
> +			TCP_WATCH0_CNTL,
> +			MODE,
> +			watch_mode);
> +	watch_address_cntl = REG_SET_FIELD(watch_address_cntl,
> +			TCP_WATCH0_CNTL,
> +			MASK,
> +			watch_address_mask >> 7);
> +
> +	/* Turning off this watch point until we set all the registers */
> +	watch_address_cntl = REG_SET_FIELD(watch_address_cntl,
> +			TCP_WATCH0_CNTL,
> +			VALID,
> +			0);
> +
> +	WREG32((SOC15_REG_OFFSET(GC, 0, mmTCP_WATCH0_CNTL) +
> +			(watch_id * TCP_WATCH_STRIDE)),
> +			watch_address_cntl);
> +
> +	WREG32((SOC15_REG_OFFSET(GC, 0, mmTCP_WATCH0_ADDR_H) +
> +			(watch_id * TCP_WATCH_STRIDE)),
> +			watch_address_high);
> +
> +	WREG32((SOC15_REG_OFFSET(GC, 0, mmTCP_WATCH0_ADDR_L) +
> +			(watch_id * TCP_WATCH_STRIDE)),
> +			watch_address_low);
> +
> +	/* Enable the watch point */
> +	watch_address_cntl = REG_SET_FIELD(watch_address_cntl,
> +			TCP_WATCH0_CNTL,
> +			VALID,
> +			1);
> +
> +	WREG32((SOC15_REG_OFFSET(GC, 0, mmTCP_WATCH0_CNTL) +
> +			(watch_id * TCP_WATCH_STRIDE)),
> +			watch_address_cntl);
> +
> +	return 0;
> +}
> +
> +uint32_t kgd_gfx_v10_clear_address_watch(struct amdgpu_device *adev,
> +					uint32_t watch_id)
> +{
> +	uint32_t watch_address_cntl;
> +
> +	watch_address_cntl = 0;
> +
> +	WREG32((SOC15_REG_OFFSET(GC, 0, mmTCP_WATCH0_CNTL) +
> +			(watch_id * TCP_WATCH_STRIDE)),
> +			watch_address_cntl);
> +
> +	return 0;
> +}
> +
> +
>   /* kgd_gfx_v10_get_iq_wait_times: Returns the mmCP_IQ_WAIT_TIME1/2 values
>    * The values read are:
>    *     ib_offload_wait_time     -- Wait Count for Indirect Buffer Offloads.
> @@ -966,6 +1042,8 @@ const struct kfd2kgd_calls gfx_v10_kfd2kgd = {
>   	.validate_trap_override_request = kgd_gfx_v10_validate_trap_override_request,
>   	.set_wave_launch_trap_override = kgd_gfx_v10_set_wave_launch_trap_override,
>   	.set_wave_launch_mode = kgd_gfx_v10_set_wave_launch_mode,
> +	.set_address_watch = kgd_gfx_v10_set_address_watch,
> +	.clear_address_watch = kgd_gfx_v10_clear_address_watch,
>   	.get_iq_wait_times = kgd_gfx_v10_get_iq_wait_times,
>   	.build_grace_period_packet_info = kgd_gfx_v10_build_grace_period_packet_info,
>   	.program_trap_handler_settings = program_trap_handler_settings,
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.h
> index 34c04a2bb83b..334ff16e25db 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.h
> @@ -39,6 +39,14 @@ uint32_t kgd_gfx_v10_set_wave_launch_trap_override(struct amdgpu_device *adev,
>   uint32_t kgd_gfx_v10_set_wave_launch_mode(struct amdgpu_device *adev,
>   					 uint8_t wave_launch_mode,
>   					 uint32_t vmid);
> +uint32_t kgd_gfx_v10_set_address_watch(struct amdgpu_device *adev,
> +					uint64_t watch_address,
> +					uint32_t watch_address_mask,
> +					uint32_t watch_id,
> +					uint32_t watch_mode,
> +					uint32_t debug_vmid);
> +uint32_t kgd_gfx_v10_clear_address_watch(struct amdgpu_device *adev,
> +					uint32_t watch_id);
>   void kgd_gfx_v10_get_iq_wait_times(struct amdgpu_device *adev, uint32_t *wait_times);
>   void kgd_gfx_v10_build_grace_period_packet_info(struct amdgpu_device *adev,
>   					       uint32_t wait_times,
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c
> index 8627c5458973..ee36ba045dcf 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c
> @@ -676,6 +676,7 @@ const struct kfd2kgd_calls gfx_v10_3_kfd2kgd = {
>   	.disable_debug_trap = kgd_gfx_v10_disable_debug_trap,
>   	.validate_trap_override_request = kgd_gfx_v10_validate_trap_override_request,
>   	.set_wave_launch_trap_override = kgd_gfx_v10_set_wave_launch_trap_override,
> -	.set_wave_launch_mode = kgd_gfx_v10_set_wave_launch_mode
> -
> +	.set_wave_launch_mode = kgd_gfx_v10_set_wave_launch_mode,
> +	.set_address_watch = kgd_gfx_v10_set_address_watch,
> +	.clear_address_watch = kgd_gfx_v10_clear_address_watch
>   };
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v11.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v11.c
> index 4fdc25222dcd..358c219fb704 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v11.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v11.c
> @@ -736,6 +736,54 @@ static uint32_t kgd_gfx_v11_set_wave_launch_mode(struct amdgpu_device *adev,
>   	return data;
>   }
>   
> +#define TCP_WATCH_STRIDE (regTCP_WATCH1_ADDR_H - regTCP_WATCH0_ADDR_H)
> +static uint32_t kgd_gfx_v11_set_address_watch(struct amdgpu_device *adev,
> +					uint64_t watch_address,
> +					uint32_t watch_address_mask,
> +					uint32_t watch_id,
> +					uint32_t watch_mode,
> +					uint32_t debug_vmid)
> +{
> +	uint32_t watch_address_high;
> +	uint32_t watch_address_low;
> +	uint32_t watch_address_cntl;
> +
> +	watch_address_cntl = 0;
> +	watch_address_low = lower_32_bits(watch_address);
> +	watch_address_high = upper_32_bits(watch_address) & 0xffff;
> +
> +	watch_address_cntl = REG_SET_FIELD(watch_address_cntl,
> +			TCP_WATCH0_CNTL,
> +			MODE,
> +			watch_mode);
> +
> +	watch_address_cntl = REG_SET_FIELD(watch_address_cntl,
> +			TCP_WATCH0_CNTL,
> +			MASK,
> +			watch_address_mask >> 7);
> +
> +	watch_address_cntl = REG_SET_FIELD(watch_address_cntl,
> +			TCP_WATCH0_CNTL,
> +			VALID,
> +			1);
> +
> +	WREG32_RLC((SOC15_REG_OFFSET(GC, 0, regTCP_WATCH0_ADDR_H) +
> +			(watch_id * TCP_WATCH_STRIDE)),
> +			watch_address_high);
> +
> +	WREG32_RLC((SOC15_REG_OFFSET(GC, 0, regTCP_WATCH0_ADDR_L) +
> +			(watch_id * TCP_WATCH_STRIDE)),
> +			watch_address_low);
> +
> +	return watch_address_cntl;
> +}
> +
> +uint32_t kgd_gfx_v11_clear_address_watch(struct amdgpu_device *adev,
> +					uint32_t watch_id)
> +{
> +	return 0;
> +}
> +
>   const struct kfd2kgd_calls gfx_v11_kfd2kgd = {
>   	.program_sh_mem_settings = program_sh_mem_settings_v11,
>   	.set_pasid_vmid_mapping = set_pasid_vmid_mapping_v11,
> @@ -756,5 +804,7 @@ const struct kfd2kgd_calls gfx_v11_kfd2kgd = {
>   	.disable_debug_trap = kgd_gfx_v11_disable_debug_trap,
>   	.validate_trap_override_request = kgd_gfx_v11_validate_trap_override_request,
>   	.set_wave_launch_trap_override = kgd_gfx_v11_set_wave_launch_trap_override,
> -	.set_wave_launch_mode = kgd_gfx_v11_set_wave_launch_mode
> +	.set_wave_launch_mode = kgd_gfx_v11_set_wave_launch_mode,
> +	.set_address_watch = kgd_gfx_v11_set_address_watch,
> +	.clear_address_watch = kgd_gfx_v11_clear_address_watch
>   };
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
> index a3c8f5578788..43296b78d888 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
> @@ -814,6 +814,81 @@ uint32_t kgd_gfx_v9_set_wave_launch_mode(struct amdgpu_device *adev,
>   	return 0;
>   }
>   
> +#define TCP_WATCH_STRIDE (mmTCP_WATCH1_ADDR_H - mmTCP_WATCH0_ADDR_H)
> +uint32_t kgd_gfx_v9_set_address_watch(struct amdgpu_device *adev,
> +					uint64_t watch_address,
> +					uint32_t watch_address_mask,
> +					uint32_t watch_id,
> +					uint32_t watch_mode,
> +					uint32_t debug_vmid)
> +{
> +	uint32_t watch_address_high;
> +	uint32_t watch_address_low;
> +	uint32_t watch_address_cntl;
> +
> +	watch_address_cntl = 0;
> +
> +	watch_address_low = lower_32_bits(watch_address);
> +	watch_address_high = upper_32_bits(watch_address) & 0xffff;
> +
> +	watch_address_cntl = REG_SET_FIELD(watch_address_cntl,
> +			TCP_WATCH0_CNTL,
> +			VMID,
> +			debug_vmid);
> +	watch_address_cntl = REG_SET_FIELD(watch_address_cntl,
> +			TCP_WATCH0_CNTL,
> +			MODE,
> +			watch_mode);
> +	watch_address_cntl = REG_SET_FIELD(watch_address_cntl,
> +			TCP_WATCH0_CNTL,
> +			MASK,
> +			watch_address_mask >> 6);
> +
> +	/* Turning off this watch point until we set all the registers */
> +	watch_address_cntl = REG_SET_FIELD(watch_address_cntl,
> +			TCP_WATCH0_CNTL,
> +			VALID,
> +			0);
> +
> +	WREG32_RLC((SOC15_REG_OFFSET(GC, 0, mmTCP_WATCH0_CNTL) +
> +			(watch_id * TCP_WATCH_STRIDE)),
> +			watch_address_cntl);
> +
> +	WREG32_RLC((SOC15_REG_OFFSET(GC, 0, mmTCP_WATCH0_ADDR_H) +
> +			(watch_id * TCP_WATCH_STRIDE)),
> +			watch_address_high);
> +
> +	WREG32_RLC((SOC15_REG_OFFSET(GC, 0, mmTCP_WATCH0_ADDR_L) +
> +			(watch_id * TCP_WATCH_STRIDE)),
> +			watch_address_low);
> +
> +	/* Enable the watch point */
> +	watch_address_cntl = REG_SET_FIELD(watch_address_cntl,
> +			TCP_WATCH0_CNTL,
> +			VALID,
> +			1);
> +
> +	WREG32_RLC((SOC15_REG_OFFSET(GC, 0, mmTCP_WATCH0_CNTL) +
> +			(watch_id * TCP_WATCH_STRIDE)),
> +			watch_address_cntl);
> +
> +	return 0;
> +}
> +
> +uint32_t kgd_gfx_v9_clear_address_watch(struct amdgpu_device *adev,
> +					uint32_t watch_id)
> +{
> +	uint32_t watch_address_cntl;
> +
> +	watch_address_cntl = 0;
> +
> +	WREG32_RLC((SOC15_REG_OFFSET(GC, 0, mmTCP_WATCH0_CNTL) +
> +			(watch_id * TCP_WATCH_STRIDE)),
> +			watch_address_cntl);
> +
> +	return 0;
> +}
> +
>   /* kgd_gfx_v9_get_iq_wait_times: Returns the mmCP_IQ_WAIT_TIME1/2 values
>    * The values read are:
>    *     ib_offload_wait_time     -- Wait Count for Indirect Buffer Offloads.
> @@ -1085,6 +1160,8 @@ const struct kfd2kgd_calls gfx_v9_kfd2kgd = {
>   	.validate_trap_override_request = kgd_gfx_v9_validate_trap_override_request,
>   	.set_wave_launch_trap_override = kgd_gfx_v9_set_wave_launch_trap_override,
>   	.set_wave_launch_mode = kgd_gfx_v9_set_wave_launch_mode,
> +	.set_address_watch = kgd_gfx_v9_set_address_watch,
> +	.clear_address_watch = kgd_gfx_v9_clear_address_watch,
>   	.get_iq_wait_times = kgd_gfx_v9_get_iq_wait_times,
>   	.build_grace_period_packet_info = kgd_gfx_v9_build_grace_period_packet_info,
>   	.get_cu_occupancy = kgd_gfx_v9_get_cu_occupancy,
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h
> index 2a2ab42037e4..ba52b61b68c5 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h
> @@ -77,6 +77,14 @@ uint32_t kgd_gfx_v9_set_wave_launch_trap_override(struct amdgpu_device *adev,
>   					     uint32_t trap_mask_request,
>   					     uint32_t *trap_mask_prev,
>   					     uint32_t kfd_dbg_trap_cntl_prev);
> +uint32_t kgd_gfx_v9_set_address_watch(struct amdgpu_device *adev,
> +					uint64_t watch_address,
> +					uint32_t watch_address_mask,
> +					uint32_t watch_id,
> +					uint32_t watch_mode,
> +					uint32_t debug_vmid);
> +uint32_t kgd_gfx_v9_clear_address_watch(struct amdgpu_device *adev,
> +					uint32_t watch_id);
>   void kgd_gfx_v9_get_iq_wait_times(struct amdgpu_device *adev, uint32_t *wait_times);
>   void kgd_gfx_v9_build_grace_period_packet_info(struct amdgpu_device *adev,
>   					       uint32_t wait_times,
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> index b62e93b35a44..8f2ede781863 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> @@ -2802,6 +2802,7 @@ static int kfd_ioctl_set_debug_trap(struct file *filep, struct kfd_process *p, v
>   	struct task_struct *thread = NULL;
>   	struct pid *pid = NULL;
>   	struct kfd_process *target = NULL;
> +	struct kfd_process_device *pdd = NULL;
>   	int r = 0;
>   
>   	if (sched_policy == KFD_SCHED_POLICY_NO_HWS) {
> @@ -2869,6 +2870,20 @@ static int kfd_ioctl_set_debug_trap(struct file *filep, struct kfd_process *p, v
>   		goto unlock_out;
>   	}
>   
> +	if (args->op == KFD_IOC_DBG_TRAP_SET_NODE_ADDRESS_WATCH ||
> +			args->op == KFD_IOC_DBG_TRAP_CLEAR_NODE_ADDRESS_WATCH) {

The indentation is still messed up here. The second line should be 
aligned with the open parenthesis. With that fixed, the patch is

Reviewed-by: Felix Kuehling <Felix.Kuehling at amd.com>


> +		int user_gpu_id = kfd_process_get_user_gpu_id(target,
> +				args->op == KFD_IOC_DBG_TRAP_SET_NODE_ADDRESS_WATCH ?
> +					args->set_node_address_watch.gpu_id :
> +					args->clear_node_address_watch.gpu_id);
> +
> +		pdd = kfd_process_device_data_by_id(target, user_gpu_id);
> +		if (user_gpu_id == -EINVAL || !pdd) {
> +			r = -ENODEV;
> +			goto unlock_out;
> +		}
> +	}
> +
>   	switch (args->op) {
>   	case KFD_IOC_DBG_TRAP_ENABLE:
>   		if (target != p)
> @@ -2921,7 +2936,16 @@ static int kfd_ioctl_set_debug_trap(struct file *filep, struct kfd_process *p, v
>   				(uint32_t *)args->resume_queues.queue_array_ptr);
>   		break;
>   	case KFD_IOC_DBG_TRAP_SET_NODE_ADDRESS_WATCH:
> +		r = kfd_dbg_trap_set_dev_address_watch(pdd,
> +				args->set_node_address_watch.address,
> +				args->set_node_address_watch.mask,
> +				&args->set_node_address_watch.id,
> +				args->set_node_address_watch.mode);
> +		break;
>   	case KFD_IOC_DBG_TRAP_CLEAR_NODE_ADDRESS_WATCH:
> +		r = kfd_dbg_trap_clear_dev_address_watch(pdd,
> +				args->clear_node_address_watch.id);
> +		break;
>   	case KFD_IOC_DBG_TRAP_SET_FLAGS:
>   	case KFD_IOC_DBG_TRAP_QUERY_DEBUG_EVENT:
>   	case KFD_IOC_DBG_TRAP_QUERY_EXCEPTION_INFO:
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_debug.c b/drivers/gpu/drm/amd/amdkfd/kfd_debug.c
> index 730e53584113..8d2e1adb442d 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_debug.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_debug.c
> @@ -24,6 +24,8 @@
>   #include "kfd_device_queue_manager.h"
>   #include <linux/file.h>
>   
> +#define MAX_WATCH_ADDRESSES	4
> +
>   void debug_event_write_work_handler(struct work_struct *work)
>   {
>   	struct kfd_process *process;
> @@ -291,6 +293,139 @@ int kfd_dbg_set_mes_debug_mode(struct kfd_process_device *pdd)
>   						pdd->watch_points, flags);
>   }
>   
> +#define KFD_DEBUGGER_INVALID_WATCH_POINT_ID -1
> +static int kfd_dbg_get_dev_watch_id(struct kfd_process_device *pdd, int *watch_id)
> +{
> +	int i;
> +
> +	*watch_id = KFD_DEBUGGER_INVALID_WATCH_POINT_ID;
> +
> +	spin_lock(&pdd->dev->watch_points_lock);
> +
> +	for (i = 0; i < MAX_WATCH_ADDRESSES; i++) {
> +		/* device watchpoint in use so skip */
> +		if ((pdd->dev->alloc_watch_ids >> i) & 0x1)
> +			continue;
> +
> +		pdd->alloc_watch_ids |= 0x1 << i;
> +		pdd->dev->alloc_watch_ids |= 0x1 << i;
> +		*watch_id = i;
> +		spin_unlock(&pdd->dev->watch_points_lock);
> +		return 0;
> +	}
> +
> +	spin_unlock(&pdd->dev->watch_points_lock);
> +
> +	return -ENOMEM;
> +}
> +
> +static void kfd_dbg_clear_dev_watch_id(struct kfd_process_device *pdd, int watch_id)
> +{
> +	spin_lock(&pdd->dev->watch_points_lock);
> +
> +	/* process owns device watch point so safe to clear */
> +	if ((pdd->alloc_watch_ids >> watch_id) & 0x1) {
> +		pdd->alloc_watch_ids &= ~(0x1 << watch_id);
> +		pdd->dev->alloc_watch_ids &= ~(0x1 << watch_id);
> +	}
> +
> +	spin_unlock(&pdd->dev->watch_points_lock);
> +}
> +
> +static bool kfd_dbg_owns_dev_watch_id(struct kfd_process_device *pdd, int watch_id)
> +{
> +	bool owns_watch_id = false;
> +
> +	spin_lock(&pdd->dev->watch_points_lock);
> +	owns_watch_id = watch_id < MAX_WATCH_ADDRESSES &&
> +			((pdd->alloc_watch_ids >> watch_id) & 0x1);
> +
> +	spin_unlock(&pdd->dev->watch_points_lock);
> +
> +	return owns_watch_id;
> +}
> +
> +int kfd_dbg_trap_clear_dev_address_watch(struct kfd_process_device *pdd,
> +					uint32_t watch_id)
> +{
> +	int r;
> +
> +	if (!kfd_dbg_owns_dev_watch_id(pdd, watch_id))
> +		return -EINVAL;
> +
> +	if (!pdd->dev->shared_resources.enable_mes) {
> +		r = debug_lock_and_unmap(pdd->dev->dqm);
> +		if (r)
> +			return r;
> +	}
> +
> +	amdgpu_gfx_off_ctrl(pdd->dev->adev, false);
> +	pdd->watch_points[watch_id] = pdd->dev->kfd2kgd->clear_address_watch(
> +							pdd->dev->adev,
> +							watch_id);
> +	amdgpu_gfx_off_ctrl(pdd->dev->adev, true);
> +
> +	if (!pdd->dev->shared_resources.enable_mes)
> +		r = debug_map_and_unlock(pdd->dev->dqm);
> +	else
> +		r = kfd_dbg_set_mes_debug_mode(pdd);
> +
> +	kfd_dbg_clear_dev_watch_id(pdd, watch_id);
> +
> +	return r;
> +}
> +
> +int kfd_dbg_trap_set_dev_address_watch(struct kfd_process_device *pdd,
> +					uint64_t watch_address,
> +					uint32_t watch_address_mask,
> +					uint32_t *watch_id,
> +					uint32_t watch_mode)
> +{
> +	int r = kfd_dbg_get_dev_watch_id(pdd, watch_id);
> +
> +	if (r)
> +		return r;
> +
> +	if (!pdd->dev->shared_resources.enable_mes) {
> +		r = debug_lock_and_unmap(pdd->dev->dqm);
> +		if (r) {
> +			kfd_dbg_clear_dev_watch_id(pdd, *watch_id);
> +			return r;
> +		}
> +	}
> +
> +	amdgpu_gfx_off_ctrl(pdd->dev->adev, false);
> +	pdd->watch_points[*watch_id] = pdd->dev->kfd2kgd->set_address_watch(
> +				pdd->dev->adev,
> +				watch_address,
> +				watch_address_mask,
> +				*watch_id,
> +				watch_mode,
> +				pdd->dev->vm_info.last_vmid_kfd);
> +	amdgpu_gfx_off_ctrl(pdd->dev->adev, true);
> +
> +	if (!pdd->dev->shared_resources.enable_mes)
> +		r = debug_map_and_unlock(pdd->dev->dqm);
> +	else
> +		r = kfd_dbg_set_mes_debug_mode(pdd);
> +
> +	/* HWS is broken so no point in HW rollback but release the watchpoint anyways */
> +	if (r)
> +		kfd_dbg_clear_dev_watch_id(pdd, *watch_id);
> +
> +	return 0;
> +}
> +
> +static void kfd_dbg_clear_process_address_watch(struct kfd_process *target)
> +{
> +	int i, j;
> +
> +	for (i = 0; i < target->n_pdds; i++)
> +		for (j = 0; j < MAX_WATCH_ADDRESSES; j++)
> +			kfd_dbg_trap_clear_dev_address_watch(target->pdds[i], j);
> +}
> +
> +
>   /* kfd_dbg_trap_deactivate:
>    *	target: target process
>    *	unwind: If this is unwinding a failed kfd_dbg_trap_enable()
> @@ -305,6 +440,7 @@ void kfd_dbg_trap_deactivate(struct kfd_process *target, bool unwind, int unwind
>   
>   	if (!unwind) {
>   		cancel_work_sync(&target->debug_event_workarea);
> +		kfd_dbg_clear_process_address_watch(target);
>   		kfd_dbg_trap_set_wave_launch_mode(target, 0);
>   	}
>   
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_debug.h b/drivers/gpu/drm/amd/amdkfd/kfd_debug.h
> index 0d70f162d6d8..63c716ce5ab9 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_debug.h
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_debug.h
> @@ -50,7 +50,13 @@ int kfd_dbg_trap_set_wave_launch_override(struct kfd_process *target,
>   					uint32_t *trap_mask_supported);
>   int kfd_dbg_trap_set_wave_launch_mode(struct kfd_process *target,
>   					uint8_t wave_launch_mode);
> -
> +int kfd_dbg_trap_clear_dev_address_watch(struct kfd_process_device *pdd,
> +					uint32_t watch_id);
> +int kfd_dbg_trap_set_dev_address_watch(struct kfd_process_device *pdd,
> +					uint64_t watch_address,
> +					uint32_t watch_address_mask,
> +					uint32_t *watch_id,
> +					uint32_t watch_mode);
>   int kfd_dbg_send_exception_to_runtime(struct kfd_process *p,
>   					unsigned int dev_id,
>   					unsigned int queue_id,
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
> index 6e25238d18f9..ca849cd051d5 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
> @@ -641,6 +641,7 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
>   	}
>   
>   	kfd_smi_init(kfd);
> +	spin_lock_init(&kfd->watch_points_lock);
>   
>   	kfd->init_complete = true;
>   	dev_info(kfd_device, "added device %x:%x\n", kfd->adev->pdev->vendor,
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> index 8dc7cc1e18a5..cfc50d1690c7 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> @@ -348,6 +348,10 @@ struct kfd_dev {
>   
>   	/* HMM page migration MEMORY_DEVICE_PRIVATE mapping */
>   	struct dev_pagemap pgmap;
> +
> +	/* Track per device allocated watch points */
> +	uint32_t alloc_watch_ids;
> +	spinlock_t watch_points_lock;
>   };
>   
>   enum kfd_mempool {
> @@ -799,6 +803,7 @@ struct kfd_process_device {
>   	uint32_t spi_dbg_override;
>   	uint32_t spi_dbg_launch_mode;
>   	uint32_t watch_points[4];
> +	uint32_t alloc_watch_ids;
>   
>   	/*
>   	 * If this process has been checkpointed before, then the user
> @@ -955,7 +960,6 @@ struct kfd_process {
>   	struct semaphore runtime_enable_sema;
>   	bool is_runtime_retry;
>   	struct kfd_runtime_info runtime_info;
> -
>   };
>   
>   #define KFD_PROCESS_TABLE_SIZE 5 /* bits: 32 entries */


More information about the dri-devel mailing list