[PATCH 34/34] drm/amdkfd: optimize gfx off enable toggle for debugging

Felix Kuehling felix.kuehling at amd.com
Mon Mar 27 21:44:32 UTC 2023


On 2023-03-27 14:43, Jonathan Kim wrote:
> Legacy debug devices limited to pinning a single debug VMID for debugging
> are the only devices that require disabling GFX OFF while accessing
> debug registers.  Debug devices that support multi-process debugging
> rely on the hardware scheduler to update debug registers and do not run
> into GFX OFF access issues.

The address watch functions still touch the address registers directly. 
I guess that needs GFXOFF to be disabled.

Regards,
   Felix


>
> Remove KFD GFX OFF enable toggle clutter by moving these calls into the
> KGD debug calls themselves.
>
> Signed-off-by: Jonathan Kim <jonathan.kim at amd.com>
> ---
>   .../drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c   |  7 ++++
>   .../drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c    | 33 ++++++++++++++++++-
>   .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c | 24 ++++++++++++++
>   drivers/gpu/drm/amd/amdkfd/kfd_chardev.c      | 30 +++++------------
>   drivers/gpu/drm/amd/amdkfd/kfd_debug.c        | 21 +-----------
>   5 files changed, 73 insertions(+), 42 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c
> index 6df215aba4c4..dec4a3381ccb 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c
> @@ -350,6 +350,8 @@ static uint32_t kgd_arcturus_enable_debug_trap(struct amdgpu_device *adev,
>   				bool restore_dbg_registers,
>   				uint32_t vmid)
>   {
> +	amdgpu_gfx_off_ctrl(adev, false);
> +
>   	mutex_lock(&adev->grbm_idx_mutex);
>   
>   	kgd_gfx_v9_set_wave_launch_stall(adev, vmid, true);
> @@ -362,6 +364,8 @@ static uint32_t kgd_arcturus_enable_debug_trap(struct amdgpu_device *adev,
>   
>   	mutex_unlock(&adev->grbm_idx_mutex);
>   
> +	amdgpu_gfx_off_ctrl(adev, true);
> +
>   	return 0;
>   }
>   
> @@ -375,6 +379,7 @@ static uint32_t kgd_arcturus_disable_debug_trap(struct amdgpu_device *adev,
>   					bool keep_trap_enabled,
>   					uint32_t vmid)
>   {
> +	amdgpu_gfx_off_ctrl(adev, false);
>   
>   	mutex_lock(&adev->grbm_idx_mutex);
>   
> @@ -388,6 +393,8 @@ static uint32_t kgd_arcturus_disable_debug_trap(struct amdgpu_device *adev,
>   
>   	mutex_unlock(&adev->grbm_idx_mutex);
>   
> +	amdgpu_gfx_off_ctrl(adev, true);
> +
>   	return 0;
>   }
>   const struct kfd2kgd_calls arcturus_kfd2kgd = {
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
> index 444f9ea758d6..2132376e2107 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
> @@ -753,12 +753,13 @@ uint32_t kgd_gfx_v10_enable_debug_trap(struct amdgpu_device *adev,
>   				bool restore_dbg_registers,
>   				uint32_t vmid)
>   {
> +	amdgpu_gfx_off_ctrl(adev, false);
>   
>   	mutex_lock(&adev->grbm_idx_mutex);
>   
>   	kgd_gfx_v10_set_wave_launch_stall(adev, vmid, true);
>   
> -	/* assume gfx off is disabled for the debug session if rlc restore not supported. */
> +	/* keep gfx off disabled for the debug session if rlc restore not supported. */
>   	if (restore_dbg_registers) {
>   		uint32_t data = 0;
>   
> @@ -783,6 +784,8 @@ uint32_t kgd_gfx_v10_enable_debug_trap(struct amdgpu_device *adev,
>   
>   	mutex_unlock(&adev->grbm_idx_mutex);
>   
> +	amdgpu_gfx_off_ctrl(adev, true);
> +
>   	return 0;
>   }
>   
> @@ -790,6 +793,8 @@ uint32_t kgd_gfx_v10_disable_debug_trap(struct amdgpu_device *adev,
>   					bool keep_trap_enabled,
>   					uint32_t vmid)
>   {
> +	amdgpu_gfx_off_ctrl(adev, false);
> +
>   	mutex_lock(&adev->grbm_idx_mutex);
>   
>   	kgd_gfx_v10_set_wave_launch_stall(adev, vmid, true);
> @@ -800,6 +805,16 @@ uint32_t kgd_gfx_v10_disable_debug_trap(struct amdgpu_device *adev,
>   
>   	mutex_unlock(&adev->grbm_idx_mutex);
>   
> +	amdgpu_gfx_off_ctrl(adev, true);
> +
> +	/*
> +	 * Remove the extra gfx off disable reference from debug restore call
> +	 * for asics that do not support rlc restore for debug registers.
> +	 */
> +	if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(10, 1, 10) ||
> +	    adev->ip_versions[GC_HWIP][0] == IP_VERSION(10, 1, 1))
> +		amdgpu_gfx_off_ctrl(adev, true);
> +
>   	return 0;
>   }
>   
> @@ -831,6 +846,8 @@ uint32_t kgd_gfx_v10_set_wave_launch_trap_override(struct amdgpu_device *adev,
>   {
>   	uint32_t data, wave_cntl_prev;
>   
> +	amdgpu_gfx_off_ctrl(adev, false);
> +
>   	mutex_lock(&adev->grbm_idx_mutex);
>   
>   	wave_cntl_prev = RREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_WAVE_CNTL));
> @@ -852,6 +869,8 @@ uint32_t kgd_gfx_v10_set_wave_launch_trap_override(struct amdgpu_device *adev,
>   
>   	mutex_unlock(&adev->grbm_idx_mutex);
>   
> +	amdgpu_gfx_off_ctrl(adev, true);
> +
>   	return 0;
>   }
>   
> @@ -862,6 +881,8 @@ uint32_t kgd_gfx_v10_set_wave_launch_mode(struct amdgpu_device *adev,
>   	uint32_t data = 0;
>   	bool is_mode_set = !!wave_launch_mode;
>   
> +	amdgpu_gfx_off_ctrl(adev, false);
> +
>   	mutex_lock(&adev->grbm_idx_mutex);
>   
>   	kgd_gfx_v10_set_wave_launch_stall(adev, vmid, true);
> @@ -876,6 +897,8 @@ uint32_t kgd_gfx_v10_set_wave_launch_mode(struct amdgpu_device *adev,
>   
>   	mutex_unlock(&adev->grbm_idx_mutex);
>   
> +	amdgpu_gfx_off_ctrl(adev, true);
> +
>   	return 0;
>   }
>   
> @@ -933,10 +956,14 @@ uint32_t kgd_gfx_v10_set_address_watch(struct amdgpu_device *adev,
>   			VALID,
>   			1);
>   
> +	amdgpu_gfx_off_ctrl(adev, false);
> +
>   	WREG32((SOC15_REG_OFFSET(GC, 0, mmTCP_WATCH0_CNTL) +
>   			(watch_id * TCP_WATCH_STRIDE)),
>   			watch_address_cntl);
>   
> +	amdgpu_gfx_off_ctrl(adev, true);
> +
>   	return 0;
>   }
>   
> @@ -947,10 +974,14 @@ uint32_t kgd_gfx_v10_clear_address_watch(struct amdgpu_device *adev,
>   
>   	watch_address_cntl = 0;
>   
> +	amdgpu_gfx_off_ctrl(adev, false);
> +
>   	WREG32((SOC15_REG_OFFSET(GC, 0, mmTCP_WATCH0_CNTL) +
>   			(watch_id * TCP_WATCH_STRIDE)),
>   			watch_address_cntl);
>   
> +	amdgpu_gfx_off_ctrl(adev, true);
> +
>   	return 0;
>   }
>   
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
> index 87eef794d299..4b025fa0beb5 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
> @@ -701,6 +701,8 @@ uint32_t kgd_gfx_v9_enable_debug_trap(struct amdgpu_device *adev,
>   				bool restore_dbg_registers,
>   				uint32_t vmid)
>   {
> +	amdgpu_gfx_off_ctrl(adev, false);
> +
>   	mutex_lock(&adev->grbm_idx_mutex);
>   
>   	kgd_gfx_v9_set_wave_launch_stall(adev, vmid, true);
> @@ -711,6 +713,8 @@ uint32_t kgd_gfx_v9_enable_debug_trap(struct amdgpu_device *adev,
>   
>   	mutex_unlock(&adev->grbm_idx_mutex);
>   
> +	amdgpu_gfx_off_ctrl(adev, true);
> +
>   	return 0;
>   }
>   
> @@ -724,6 +728,8 @@ uint32_t kgd_gfx_v9_disable_debug_trap(struct amdgpu_device *adev,
>   					bool keep_trap_enabled,
>   					uint32_t vmid)
>   {
> +	amdgpu_gfx_off_ctrl(adev, false);
> +
>   	mutex_lock(&adev->grbm_idx_mutex);
>   
>   	kgd_gfx_v9_set_wave_launch_stall(adev, vmid, true);
> @@ -734,6 +740,8 @@ uint32_t kgd_gfx_v9_disable_debug_trap(struct amdgpu_device *adev,
>   
>   	mutex_unlock(&adev->grbm_idx_mutex);
>   
> +	amdgpu_gfx_off_ctrl(adev, true);
> +
>   	return 0;
>   }
>   
> @@ -765,6 +773,8 @@ uint32_t kgd_gfx_v9_set_wave_launch_trap_override(struct amdgpu_device *adev,
>   {
>   	uint32_t data, wave_cntl_prev;
>   
> +	amdgpu_gfx_off_ctrl(adev, false);
> +
>   	mutex_lock(&adev->grbm_idx_mutex);
>   
>   	wave_cntl_prev = RREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_WAVE_CNTL));
> @@ -786,6 +796,8 @@ uint32_t kgd_gfx_v9_set_wave_launch_trap_override(struct amdgpu_device *adev,
>   
>   	mutex_unlock(&adev->grbm_idx_mutex);
>   
> +	amdgpu_gfx_off_ctrl(adev, true);
> +
>   	return 0;
>   }
>   
> @@ -796,6 +808,8 @@ uint32_t kgd_gfx_v9_set_wave_launch_mode(struct amdgpu_device *adev,
>   	uint32_t data = 0;
>   	bool is_mode_set = !!wave_launch_mode;
>   
> +	amdgpu_gfx_off_ctrl(adev, false);
> +
>   	mutex_lock(&adev->grbm_idx_mutex);
>   
>   	kgd_gfx_v9_set_wave_launch_stall(adev, vmid, true);
> @@ -810,6 +824,8 @@ uint32_t kgd_gfx_v9_set_wave_launch_mode(struct amdgpu_device *adev,
>   
>   	mutex_unlock(&adev->grbm_idx_mutex);
>   
> +	amdgpu_gfx_off_ctrl(adev, true);
> +
>   	return 0;
>   }
>   
> @@ -867,10 +883,14 @@ uint32_t kgd_gfx_v9_set_address_watch(struct amdgpu_device *adev,
>   			VALID,
>   			1);
>   
> +	amdgpu_gfx_off_ctrl(adev, false);
> +
>   	WREG32_RLC((SOC15_REG_OFFSET(GC, 0, mmTCP_WATCH0_CNTL) +
>   			(watch_id * TCP_WATCH_STRIDE)),
>   			watch_address_cntl);
>   
> +	amdgpu_gfx_off_ctrl(adev, true);
> +
>   	return 0;
>   }
>   
> @@ -881,10 +901,14 @@ uint32_t kgd_gfx_v9_clear_address_watch(struct amdgpu_device *adev,
>   
>   	watch_address_cntl = 0;
>   
> +	amdgpu_gfx_off_ctrl(adev, false);
> +
>   	WREG32_RLC((SOC15_REG_OFFSET(GC, 0, mmTCP_WATCH0_CNTL) +
>   			(watch_id * TCP_WATCH_STRIDE)),
>   			watch_address_cntl);
>   
> +	amdgpu_gfx_off_ctrl(adev, true);
> +
>   	return 0;
>   }
>   
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> index ead5afe4216b..131b9c25e3ec 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> @@ -2752,7 +2752,6 @@ static int runtime_enable(struct kfd_process *p, uint64_t r_debug,
>   			struct kfd_process_device *pdd = p->pdds[i];
>   
>   			if (!kfd_dbg_is_rlc_restore_supported(pdd->dev)) {
> -				amdgpu_gfx_off_ctrl(pdd->dev->adev, false);
>   				pdd->dev->kfd2kgd->enable_debug_trap(
>   						pdd->dev->adev,
>   						true,
> @@ -2809,33 +2808,22 @@ static int runtime_disable(struct kfd_process *p)
>   			return ret;
>   	}
>   
> -	if (was_enabled && p->runtime_info.ttmp_setup) {
> -		for (i = 0; i < p->n_pdds; i++) {
> -			struct kfd_process_device *pdd = p->pdds[i];
> -
> -			if (!kfd_dbg_is_rlc_restore_supported(pdd->dev))
> -				amdgpu_gfx_off_ctrl(pdd->dev->adev, true);
> -		}
> -	}
> -
>   	p->runtime_info.ttmp_setup = false;
>   
>   	/* disable ttmp setup */
>   	for (i = 0; i < p->n_pdds; i++) {
>   		struct kfd_process_device *pdd = p->pdds[i];
>   
> -		if (kfd_dbg_is_per_vmid_supported(pdd->dev)) {
> -			pdd->spi_dbg_override =
> -					pdd->dev->kfd2kgd->disable_debug_trap(
> -					pdd->dev->adev,
> -					false,
> -					pdd->dev->vm_info.last_vmid_kfd);
> +		pdd->spi_dbg_override =
> +				pdd->dev->kfd2kgd->disable_debug_trap(
> +				pdd->dev->adev,
> +				false,
> +				pdd->dev->vm_info.last_vmid_kfd);
>   
> -			if (!pdd->dev->shared_resources.enable_mes)
> -				debug_refresh_runlist(pdd->dev->dqm);
> -			else
> -				kfd_dbg_set_mes_debug_mode(pdd);
> -		}
> +		if (!pdd->dev->shared_resources.enable_mes)
> +			debug_refresh_runlist(pdd->dev->dqm);
> +		else
> +			kfd_dbg_set_mes_debug_mode(pdd);
>   	}
>   
>   	return 0;
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_debug.c b/drivers/gpu/drm/amd/amdkfd/kfd_debug.c
> index 0ea85afcffd3..6e306defcc85 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_debug.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_debug.c
> @@ -423,11 +423,9 @@ int kfd_dbg_trap_clear_dev_address_watch(struct kfd_process_device *pdd,
>   			return r;
>   	}
>   
> -	amdgpu_gfx_off_ctrl(pdd->dev->adev, false);
>   	pdd->watch_points[watch_id] = pdd->dev->kfd2kgd->clear_address_watch(
>   							pdd->dev->adev,
>   							watch_id);
> -	amdgpu_gfx_off_ctrl(pdd->dev->adev, true);
>   
>   	if (!pdd->dev->shared_resources.enable_mes)
>   		r = debug_map_and_unlock(pdd->dev->dqm);
> @@ -458,7 +456,6 @@ int kfd_dbg_trap_set_dev_address_watch(struct kfd_process_device *pdd,
>   		}
>   	}
>   
> -	amdgpu_gfx_off_ctrl(pdd->dev->adev, false);
>   	pdd->watch_points[*watch_id] = pdd->dev->kfd2kgd->set_address_watch(
>   				pdd->dev->adev,
>   				watch_address,
> @@ -466,7 +463,6 @@ int kfd_dbg_trap_set_dev_address_watch(struct kfd_process_device *pdd,
>   				*watch_id,
>   				watch_mode,
>   				pdd->dev->vm_info.last_vmid_kfd);
> -	amdgpu_gfx_off_ctrl(pdd->dev->adev, true);
>   
>   	if (!pdd->dev->shared_resources.enable_mes)
>   		r = debug_map_and_unlock(pdd->dev->dqm);
> @@ -577,15 +573,11 @@ void kfd_dbg_trap_deactivate(struct kfd_process *target, bool unwind, int unwind
>   
>   		kfd_process_set_trap_debug_flag(&pdd->qpd, false);
>   
> -		/* GFX off is already disabled by debug activate if not RLC restore supported. */
> -		if (kfd_dbg_is_rlc_restore_supported(pdd->dev))
> -			amdgpu_gfx_off_ctrl(pdd->dev->adev, false);
>   		pdd->spi_dbg_override =
>   				pdd->dev->kfd2kgd->disable_debug_trap(
>   				pdd->dev->adev,
>   				target->runtime_info.ttmp_setup,
>   				pdd->dev->vm_info.last_vmid_kfd);
> -		amdgpu_gfx_off_ctrl(pdd->dev->adev, true);
>   
>   		if (!kfd_dbg_is_per_vmid_supported(pdd->dev) &&
>   				release_debug_trap_vmid(pdd->dev->dqm, &pdd->qpd))
> @@ -684,14 +676,10 @@ int kfd_dbg_trap_activate(struct kfd_process *target)
>   			}
>   		}
>   
> -		/* Disable GFX OFF to prevent garbage read/writes to debug registers.
> +		/*
>   		 * If RLC restore of debug registers is not supported and runtime enable
>   		 * hasn't done so already on ttmp setup request, restore the trap config registers.
> -		 *
> -		 * If RLC restore of debug registers is not supported, keep gfx off disabled for
> -		 * the debug session.
>   		 */
> -		amdgpu_gfx_off_ctrl(pdd->dev->adev, false);
>   		if (!(kfd_dbg_is_rlc_restore_supported(pdd->dev) ||
>   						target->runtime_info.ttmp_setup))
>   			pdd->dev->kfd2kgd->enable_debug_trap(pdd->dev->adev, true,
> @@ -702,9 +690,6 @@ int kfd_dbg_trap_activate(struct kfd_process *target)
>   					false,
>   					pdd->dev->vm_info.last_vmid_kfd);
>   
> -		if (kfd_dbg_is_rlc_restore_supported(pdd->dev))
> -			amdgpu_gfx_off_ctrl(pdd->dev->adev, true);
> -
>   		/**
>   		 * Setting the debug flag in the trap handler requires that the TMA has been
>   		 * allocated, which occurs during CWSR initialization.
> @@ -836,7 +821,6 @@ int kfd_dbg_trap_set_wave_launch_override(struct kfd_process *target,
>   	for (i = 0; i < target->n_pdds; i++) {
>   		struct kfd_process_device *pdd = target->pdds[i];
>   
> -		amdgpu_gfx_off_ctrl(pdd->dev->adev, false);
>   		pdd->spi_dbg_override = pdd->dev->kfd2kgd->set_wave_launch_trap_override(
>   				pdd->dev->adev,
>   				pdd->dev->vm_info.last_vmid_kfd,
> @@ -845,7 +829,6 @@ int kfd_dbg_trap_set_wave_launch_override(struct kfd_process *target,
>   				trap_mask_request,
>   				trap_mask_prev,
>   				pdd->spi_dbg_override);
> -		amdgpu_gfx_off_ctrl(pdd->dev->adev, true);
>   
>   		if (!pdd->dev->shared_resources.enable_mes)
>   			r = debug_refresh_runlist(pdd->dev->dqm);
> @@ -872,12 +855,10 @@ int kfd_dbg_trap_set_wave_launch_mode(struct kfd_process *target,
>   	for (i = 0; i < target->n_pdds; i++) {
>   		struct kfd_process_device *pdd = target->pdds[i];
>   
> -		amdgpu_gfx_off_ctrl(pdd->dev->adev, false);
>   		pdd->spi_dbg_launch_mode = pdd->dev->kfd2kgd->set_wave_launch_mode(
>   				pdd->dev->adev,
>   				wave_launch_mode,
>   				pdd->dev->vm_info.last_vmid_kfd);
> -		amdgpu_gfx_off_ctrl(pdd->dev->adev, true);
>   
>   		if (!pdd->dev->shared_resources.enable_mes)
>   			r = debug_refresh_runlist(pdd->dev->dqm);


More information about the dri-devel mailing list