[PATCH 34/34] drm/amdkfd: optimize gfx off enable toggle for debugging
Felix Kuehling
felix.kuehling at amd.com
Mon Mar 27 21:44:32 UTC 2023
On 2023-03-27 14:43, Jonathan Kim wrote:
> Legacy debug devices limited to pinning a single debug VMID for debugging
> are the only devices that require disabling GFX OFF while accessing
> debug registers. Debug devices that support multi-process debugging
> rely on the hardware scheduler to update debug registers and do not run
> into GFX OFF access issues.
The address watch functions still touch the address registers directly.
I guess that needs GFXOFF to be disabled.
Regards,
Felix
>
> Remove KFD GFX OFF enable toggle clutter by moving these calls into the
> KGD debug calls themselves.
>
> Signed-off-by: Jonathan Kim <jonathan.kim at amd.com>
> ---
> .../drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c | 7 ++++
> .../drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c | 33 ++++++++++++++++++-
> .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c | 24 ++++++++++++++
> drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 30 +++++------------
> drivers/gpu/drm/amd/amdkfd/kfd_debug.c | 21 +-----------
> 5 files changed, 73 insertions(+), 42 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c
> index 6df215aba4c4..dec4a3381ccb 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c
> @@ -350,6 +350,8 @@ static uint32_t kgd_arcturus_enable_debug_trap(struct amdgpu_device *adev,
> bool restore_dbg_registers,
> uint32_t vmid)
> {
> + amdgpu_gfx_off_ctrl(adev, false);
> +
> mutex_lock(&adev->grbm_idx_mutex);
>
> kgd_gfx_v9_set_wave_launch_stall(adev, vmid, true);
> @@ -362,6 +364,8 @@ static uint32_t kgd_arcturus_enable_debug_trap(struct amdgpu_device *adev,
>
> mutex_unlock(&adev->grbm_idx_mutex);
>
> + amdgpu_gfx_off_ctrl(adev, true);
> +
> return 0;
> }
>
> @@ -375,6 +379,7 @@ static uint32_t kgd_arcturus_disable_debug_trap(struct amdgpu_device *adev,
> bool keep_trap_enabled,
> uint32_t vmid)
> {
> + amdgpu_gfx_off_ctrl(adev, false);
>
> mutex_lock(&adev->grbm_idx_mutex);
>
> @@ -388,6 +393,8 @@ static uint32_t kgd_arcturus_disable_debug_trap(struct amdgpu_device *adev,
>
> mutex_unlock(&adev->grbm_idx_mutex);
>
> + amdgpu_gfx_off_ctrl(adev, true);
> +
> return 0;
> }
> const struct kfd2kgd_calls arcturus_kfd2kgd = {
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
> index 444f9ea758d6..2132376e2107 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
> @@ -753,12 +753,13 @@ uint32_t kgd_gfx_v10_enable_debug_trap(struct amdgpu_device *adev,
> bool restore_dbg_registers,
> uint32_t vmid)
> {
> + amdgpu_gfx_off_ctrl(adev, false);
>
> mutex_lock(&adev->grbm_idx_mutex);
>
> kgd_gfx_v10_set_wave_launch_stall(adev, vmid, true);
>
> - /* assume gfx off is disabled for the debug session if rlc restore not supported. */
> + /* keep gfx off disabled for the debug session if rlc restore not supported. */
> if (restore_dbg_registers) {
> uint32_t data = 0;
>
> @@ -783,6 +784,8 @@ uint32_t kgd_gfx_v10_enable_debug_trap(struct amdgpu_device *adev,
>
> mutex_unlock(&adev->grbm_idx_mutex);
>
> + amdgpu_gfx_off_ctrl(adev, true);
> +
> return 0;
> }
>
> @@ -790,6 +793,8 @@ uint32_t kgd_gfx_v10_disable_debug_trap(struct amdgpu_device *adev,
> bool keep_trap_enabled,
> uint32_t vmid)
> {
> + amdgpu_gfx_off_ctrl(adev, false);
> +
> mutex_lock(&adev->grbm_idx_mutex);
>
> kgd_gfx_v10_set_wave_launch_stall(adev, vmid, true);
> @@ -800,6 +805,16 @@ uint32_t kgd_gfx_v10_disable_debug_trap(struct amdgpu_device *adev,
>
> mutex_unlock(&adev->grbm_idx_mutex);
>
> + amdgpu_gfx_off_ctrl(adev, true);
> +
> + /*
> + * Remove the extra gfx off disable reference from debug restore call
> + * for asics that do not support rlc restore for debug registers.
> + */
> + if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(10, 1, 10) ||
> + adev->ip_versions[GC_HWIP][0] == IP_VERSION(10, 1, 1))
> + amdgpu_gfx_off_ctrl(adev, true);
> +
> return 0;
> }
>
> @@ -831,6 +846,8 @@ uint32_t kgd_gfx_v10_set_wave_launch_trap_override(struct amdgpu_device *adev,
> {
> uint32_t data, wave_cntl_prev;
>
> + amdgpu_gfx_off_ctrl(adev, false);
> +
> mutex_lock(&adev->grbm_idx_mutex);
>
> wave_cntl_prev = RREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_WAVE_CNTL));
> @@ -852,6 +869,8 @@ uint32_t kgd_gfx_v10_set_wave_launch_trap_override(struct amdgpu_device *adev,
>
> mutex_unlock(&adev->grbm_idx_mutex);
>
> + amdgpu_gfx_off_ctrl(adev, true);
> +
> return 0;
> }
>
> @@ -862,6 +881,8 @@ uint32_t kgd_gfx_v10_set_wave_launch_mode(struct amdgpu_device *adev,
> uint32_t data = 0;
> bool is_mode_set = !!wave_launch_mode;
>
> + amdgpu_gfx_off_ctrl(adev, false);
> +
> mutex_lock(&adev->grbm_idx_mutex);
>
> kgd_gfx_v10_set_wave_launch_stall(adev, vmid, true);
> @@ -876,6 +897,8 @@ uint32_t kgd_gfx_v10_set_wave_launch_mode(struct amdgpu_device *adev,
>
> mutex_unlock(&adev->grbm_idx_mutex);
>
> + amdgpu_gfx_off_ctrl(adev, true);
> +
> return 0;
> }
>
> @@ -933,10 +956,14 @@ uint32_t kgd_gfx_v10_set_address_watch(struct amdgpu_device *adev,
> VALID,
> 1);
>
> + amdgpu_gfx_off_ctrl(adev, false);
> +
> WREG32((SOC15_REG_OFFSET(GC, 0, mmTCP_WATCH0_CNTL) +
> (watch_id * TCP_WATCH_STRIDE)),
> watch_address_cntl);
>
> + amdgpu_gfx_off_ctrl(adev, true);
> +
> return 0;
> }
>
> @@ -947,10 +974,14 @@ uint32_t kgd_gfx_v10_clear_address_watch(struct amdgpu_device *adev,
>
> watch_address_cntl = 0;
>
> + amdgpu_gfx_off_ctrl(adev, false);
> +
> WREG32((SOC15_REG_OFFSET(GC, 0, mmTCP_WATCH0_CNTL) +
> (watch_id * TCP_WATCH_STRIDE)),
> watch_address_cntl);
>
> + amdgpu_gfx_off_ctrl(adev, true);
> +
> return 0;
> }
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
> index 87eef794d299..4b025fa0beb5 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
> @@ -701,6 +701,8 @@ uint32_t kgd_gfx_v9_enable_debug_trap(struct amdgpu_device *adev,
> bool restore_dbg_registers,
> uint32_t vmid)
> {
> + amdgpu_gfx_off_ctrl(adev, false);
> +
> mutex_lock(&adev->grbm_idx_mutex);
>
> kgd_gfx_v9_set_wave_launch_stall(adev, vmid, true);
> @@ -711,6 +713,8 @@ uint32_t kgd_gfx_v9_enable_debug_trap(struct amdgpu_device *adev,
>
> mutex_unlock(&adev->grbm_idx_mutex);
>
> + amdgpu_gfx_off_ctrl(adev, true);
> +
> return 0;
> }
>
> @@ -724,6 +728,8 @@ uint32_t kgd_gfx_v9_disable_debug_trap(struct amdgpu_device *adev,
> bool keep_trap_enabled,
> uint32_t vmid)
> {
> + amdgpu_gfx_off_ctrl(adev, false);
> +
> mutex_lock(&adev->grbm_idx_mutex);
>
> kgd_gfx_v9_set_wave_launch_stall(adev, vmid, true);
> @@ -734,6 +740,8 @@ uint32_t kgd_gfx_v9_disable_debug_trap(struct amdgpu_device *adev,
>
> mutex_unlock(&adev->grbm_idx_mutex);
>
> + amdgpu_gfx_off_ctrl(adev, true);
> +
> return 0;
> }
>
> @@ -765,6 +773,8 @@ uint32_t kgd_gfx_v9_set_wave_launch_trap_override(struct amdgpu_device *adev,
> {
> uint32_t data, wave_cntl_prev;
>
> + amdgpu_gfx_off_ctrl(adev, false);
> +
> mutex_lock(&adev->grbm_idx_mutex);
>
> wave_cntl_prev = RREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_WAVE_CNTL));
> @@ -786,6 +796,8 @@ uint32_t kgd_gfx_v9_set_wave_launch_trap_override(struct amdgpu_device *adev,
>
> mutex_unlock(&adev->grbm_idx_mutex);
>
> + amdgpu_gfx_off_ctrl(adev, true);
> +
> return 0;
> }
>
> @@ -796,6 +808,8 @@ uint32_t kgd_gfx_v9_set_wave_launch_mode(struct amdgpu_device *adev,
> uint32_t data = 0;
> bool is_mode_set = !!wave_launch_mode;
>
> + amdgpu_gfx_off_ctrl(adev, false);
> +
> mutex_lock(&adev->grbm_idx_mutex);
>
> kgd_gfx_v9_set_wave_launch_stall(adev, vmid, true);
> @@ -810,6 +824,8 @@ uint32_t kgd_gfx_v9_set_wave_launch_mode(struct amdgpu_device *adev,
>
> mutex_unlock(&adev->grbm_idx_mutex);
>
> + amdgpu_gfx_off_ctrl(adev, true);
> +
> return 0;
> }
>
> @@ -867,10 +883,14 @@ uint32_t kgd_gfx_v9_set_address_watch(struct amdgpu_device *adev,
> VALID,
> 1);
>
> + amdgpu_gfx_off_ctrl(adev, false);
> +
> WREG32_RLC((SOC15_REG_OFFSET(GC, 0, mmTCP_WATCH0_CNTL) +
> (watch_id * TCP_WATCH_STRIDE)),
> watch_address_cntl);
>
> + amdgpu_gfx_off_ctrl(adev, true);
> +
> return 0;
> }
>
> @@ -881,10 +901,14 @@ uint32_t kgd_gfx_v9_clear_address_watch(struct amdgpu_device *adev,
>
> watch_address_cntl = 0;
>
> + amdgpu_gfx_off_ctrl(adev, false);
> +
> WREG32_RLC((SOC15_REG_OFFSET(GC, 0, mmTCP_WATCH0_CNTL) +
> (watch_id * TCP_WATCH_STRIDE)),
> watch_address_cntl);
>
> + amdgpu_gfx_off_ctrl(adev, true);
> +
> return 0;
> }
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> index ead5afe4216b..131b9c25e3ec 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> @@ -2752,7 +2752,6 @@ static int runtime_enable(struct kfd_process *p, uint64_t r_debug,
> struct kfd_process_device *pdd = p->pdds[i];
>
> if (!kfd_dbg_is_rlc_restore_supported(pdd->dev)) {
> - amdgpu_gfx_off_ctrl(pdd->dev->adev, false);
> pdd->dev->kfd2kgd->enable_debug_trap(
> pdd->dev->adev,
> true,
> @@ -2809,33 +2808,22 @@ static int runtime_disable(struct kfd_process *p)
> return ret;
> }
>
> - if (was_enabled && p->runtime_info.ttmp_setup) {
> - for (i = 0; i < p->n_pdds; i++) {
> - struct kfd_process_device *pdd = p->pdds[i];
> -
> - if (!kfd_dbg_is_rlc_restore_supported(pdd->dev))
> - amdgpu_gfx_off_ctrl(pdd->dev->adev, true);
> - }
> - }
> -
> p->runtime_info.ttmp_setup = false;
>
> /* disable ttmp setup */
> for (i = 0; i < p->n_pdds; i++) {
> struct kfd_process_device *pdd = p->pdds[i];
>
> - if (kfd_dbg_is_per_vmid_supported(pdd->dev)) {
> - pdd->spi_dbg_override =
> - pdd->dev->kfd2kgd->disable_debug_trap(
> - pdd->dev->adev,
> - false,
> - pdd->dev->vm_info.last_vmid_kfd);
> + pdd->spi_dbg_override =
> + pdd->dev->kfd2kgd->disable_debug_trap(
> + pdd->dev->adev,
> + false,
> + pdd->dev->vm_info.last_vmid_kfd);
>
> - if (!pdd->dev->shared_resources.enable_mes)
> - debug_refresh_runlist(pdd->dev->dqm);
> - else
> - kfd_dbg_set_mes_debug_mode(pdd);
> - }
> + if (!pdd->dev->shared_resources.enable_mes)
> + debug_refresh_runlist(pdd->dev->dqm);
> + else
> + kfd_dbg_set_mes_debug_mode(pdd);
> }
>
> return 0;
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_debug.c b/drivers/gpu/drm/amd/amdkfd/kfd_debug.c
> index 0ea85afcffd3..6e306defcc85 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_debug.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_debug.c
> @@ -423,11 +423,9 @@ int kfd_dbg_trap_clear_dev_address_watch(struct kfd_process_device *pdd,
> return r;
> }
>
> - amdgpu_gfx_off_ctrl(pdd->dev->adev, false);
> pdd->watch_points[watch_id] = pdd->dev->kfd2kgd->clear_address_watch(
> pdd->dev->adev,
> watch_id);
> - amdgpu_gfx_off_ctrl(pdd->dev->adev, true);
>
> if (!pdd->dev->shared_resources.enable_mes)
> r = debug_map_and_unlock(pdd->dev->dqm);
> @@ -458,7 +456,6 @@ int kfd_dbg_trap_set_dev_address_watch(struct kfd_process_device *pdd,
> }
> }
>
> - amdgpu_gfx_off_ctrl(pdd->dev->adev, false);
> pdd->watch_points[*watch_id] = pdd->dev->kfd2kgd->set_address_watch(
> pdd->dev->adev,
> watch_address,
> @@ -466,7 +463,6 @@ int kfd_dbg_trap_set_dev_address_watch(struct kfd_process_device *pdd,
> *watch_id,
> watch_mode,
> pdd->dev->vm_info.last_vmid_kfd);
> - amdgpu_gfx_off_ctrl(pdd->dev->adev, true);
>
> if (!pdd->dev->shared_resources.enable_mes)
> r = debug_map_and_unlock(pdd->dev->dqm);
> @@ -577,15 +573,11 @@ void kfd_dbg_trap_deactivate(struct kfd_process *target, bool unwind, int unwind
>
> kfd_process_set_trap_debug_flag(&pdd->qpd, false);
>
> - /* GFX off is already disabled by debug activate if not RLC restore supported. */
> - if (kfd_dbg_is_rlc_restore_supported(pdd->dev))
> - amdgpu_gfx_off_ctrl(pdd->dev->adev, false);
> pdd->spi_dbg_override =
> pdd->dev->kfd2kgd->disable_debug_trap(
> pdd->dev->adev,
> target->runtime_info.ttmp_setup,
> pdd->dev->vm_info.last_vmid_kfd);
> - amdgpu_gfx_off_ctrl(pdd->dev->adev, true);
>
> if (!kfd_dbg_is_per_vmid_supported(pdd->dev) &&
> release_debug_trap_vmid(pdd->dev->dqm, &pdd->qpd))
> @@ -684,14 +676,10 @@ int kfd_dbg_trap_activate(struct kfd_process *target)
> }
> }
>
> - /* Disable GFX OFF to prevent garbage read/writes to debug registers.
> + /*
> * If RLC restore of debug registers is not supported and runtime enable
> * hasn't done so already on ttmp setup request, restore the trap config registers.
> - *
> - * If RLC restore of debug registers is not supported, keep gfx off disabled for
> - * the debug session.
> */
> - amdgpu_gfx_off_ctrl(pdd->dev->adev, false);
> if (!(kfd_dbg_is_rlc_restore_supported(pdd->dev) ||
> target->runtime_info.ttmp_setup))
> pdd->dev->kfd2kgd->enable_debug_trap(pdd->dev->adev, true,
> @@ -702,9 +690,6 @@ int kfd_dbg_trap_activate(struct kfd_process *target)
> false,
> pdd->dev->vm_info.last_vmid_kfd);
>
> - if (kfd_dbg_is_rlc_restore_supported(pdd->dev))
> - amdgpu_gfx_off_ctrl(pdd->dev->adev, true);
> -
> /**
> * Setting the debug flag in the trap handler requires that the TMA has been
> * allocated, which occurs during CWSR initialization.
> @@ -836,7 +821,6 @@ int kfd_dbg_trap_set_wave_launch_override(struct kfd_process *target,
> for (i = 0; i < target->n_pdds; i++) {
> struct kfd_process_device *pdd = target->pdds[i];
>
> - amdgpu_gfx_off_ctrl(pdd->dev->adev, false);
> pdd->spi_dbg_override = pdd->dev->kfd2kgd->set_wave_launch_trap_override(
> pdd->dev->adev,
> pdd->dev->vm_info.last_vmid_kfd,
> @@ -845,7 +829,6 @@ int kfd_dbg_trap_set_wave_launch_override(struct kfd_process *target,
> trap_mask_request,
> trap_mask_prev,
> pdd->spi_dbg_override);
> - amdgpu_gfx_off_ctrl(pdd->dev->adev, true);
>
> if (!pdd->dev->shared_resources.enable_mes)
> r = debug_refresh_runlist(pdd->dev->dqm);
> @@ -872,12 +855,10 @@ int kfd_dbg_trap_set_wave_launch_mode(struct kfd_process *target,
> for (i = 0; i < target->n_pdds; i++) {
> struct kfd_process_device *pdd = target->pdds[i];
>
> - amdgpu_gfx_off_ctrl(pdd->dev->adev, false);
> pdd->spi_dbg_launch_mode = pdd->dev->kfd2kgd->set_wave_launch_mode(
> pdd->dev->adev,
> wave_launch_mode,
> pdd->dev->vm_info.last_vmid_kfd);
> - amdgpu_gfx_off_ctrl(pdd->dev->adev, true);
>
> if (!pdd->dev->shared_resources.enable_mes)
> r = debug_refresh_runlist(pdd->dev->dqm);
More information about the dri-devel
mailing list