[PATCH 34/34] drm/amdkfd: optimize gfx off enable toggle for debugging

Jonathan Kim jonathan.kim at amd.com
Mon Mar 27 18:43:39 UTC 2023


Legacy debug devices limited to pinning a single debug VMID for debugging
are the only devices that require disabling GFX OFF while accessing
debug registers.  Debug devices that support multi-process debugging
rely on the hardware scheduler to update debug registers and do not run
into GFX OFF access issues.

Remove KFD GFX OFF enable toggle clutter by moving these calls into the
KGD debug calls themselves.

Signed-off-by: Jonathan Kim <jonathan.kim at amd.com>
---
 .../drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c   |  7 ++++
 .../drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c    | 33 ++++++++++++++++++-
 .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c | 24 ++++++++++++++
 drivers/gpu/drm/amd/amdkfd/kfd_chardev.c      | 30 +++++------------
 drivers/gpu/drm/amd/amdkfd/kfd_debug.c        | 21 +-----------
 5 files changed, 73 insertions(+), 42 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c
index 6df215aba4c4..dec4a3381ccb 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c
@@ -350,6 +350,8 @@ static uint32_t kgd_arcturus_enable_debug_trap(struct amdgpu_device *adev,
 				bool restore_dbg_registers,
 				uint32_t vmid)
 {
+	amdgpu_gfx_off_ctrl(adev, false);
+
 	mutex_lock(&adev->grbm_idx_mutex);
 
 	kgd_gfx_v9_set_wave_launch_stall(adev, vmid, true);
@@ -362,6 +364,8 @@ static uint32_t kgd_arcturus_enable_debug_trap(struct amdgpu_device *adev,
 
 	mutex_unlock(&adev->grbm_idx_mutex);
 
+	amdgpu_gfx_off_ctrl(adev, true);
+
 	return 0;
 }
 
@@ -375,6 +379,7 @@ static uint32_t kgd_arcturus_disable_debug_trap(struct amdgpu_device *adev,
 					bool keep_trap_enabled,
 					uint32_t vmid)
 {
+	amdgpu_gfx_off_ctrl(adev, false);
 
 	mutex_lock(&adev->grbm_idx_mutex);
 
@@ -388,6 +393,8 @@ static uint32_t kgd_arcturus_disable_debug_trap(struct amdgpu_device *adev,
 
 	mutex_unlock(&adev->grbm_idx_mutex);
 
+	amdgpu_gfx_off_ctrl(adev, true);
+
 	return 0;
 }
 const struct kfd2kgd_calls arcturus_kfd2kgd = {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
index 444f9ea758d6..2132376e2107 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
@@ -753,12 +753,13 @@ uint32_t kgd_gfx_v10_enable_debug_trap(struct amdgpu_device *adev,
 				bool restore_dbg_registers,
 				uint32_t vmid)
 {
+	amdgpu_gfx_off_ctrl(adev, false);
 
 	mutex_lock(&adev->grbm_idx_mutex);
 
 	kgd_gfx_v10_set_wave_launch_stall(adev, vmid, true);
 
-	/* assume gfx off is disabled for the debug session if rlc restore not supported. */
+	/* keep gfx off disabled for the debug session if rlc restore not supported. */
 	if (restore_dbg_registers) {
 		uint32_t data = 0;
 
@@ -783,6 +784,8 @@ uint32_t kgd_gfx_v10_enable_debug_trap(struct amdgpu_device *adev,
 
 	mutex_unlock(&adev->grbm_idx_mutex);
 
+	amdgpu_gfx_off_ctrl(adev, true);
+
 	return 0;
 }
 
@@ -790,6 +793,8 @@ uint32_t kgd_gfx_v10_disable_debug_trap(struct amdgpu_device *adev,
 					bool keep_trap_enabled,
 					uint32_t vmid)
 {
+	amdgpu_gfx_off_ctrl(adev, false);
+
 	mutex_lock(&adev->grbm_idx_mutex);
 
 	kgd_gfx_v10_set_wave_launch_stall(adev, vmid, true);
@@ -800,6 +805,16 @@ uint32_t kgd_gfx_v10_disable_debug_trap(struct amdgpu_device *adev,
 
 	mutex_unlock(&adev->grbm_idx_mutex);
 
+	amdgpu_gfx_off_ctrl(adev, true);
+
+	/*
+	 * Remove the extra gfx off disable reference from debug restore call
+	 * for asics that do not support rlc restore for debug registers.
+	 */
+	if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(10, 1, 10) ||
+	    adev->ip_versions[GC_HWIP][0] == IP_VERSION(10, 1, 1))
+		amdgpu_gfx_off_ctrl(adev, true);
+
 	return 0;
 }
 
@@ -831,6 +846,8 @@ uint32_t kgd_gfx_v10_set_wave_launch_trap_override(struct amdgpu_device *adev,
 {
 	uint32_t data, wave_cntl_prev;
 
+	amdgpu_gfx_off_ctrl(adev, false);
+
 	mutex_lock(&adev->grbm_idx_mutex);
 
 	wave_cntl_prev = RREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_WAVE_CNTL));
@@ -852,6 +869,8 @@ uint32_t kgd_gfx_v10_set_wave_launch_trap_override(struct amdgpu_device *adev,
 
 	mutex_unlock(&adev->grbm_idx_mutex);
 
+	amdgpu_gfx_off_ctrl(adev, true);
+
 	return 0;
 }
 
@@ -862,6 +881,8 @@ uint32_t kgd_gfx_v10_set_wave_launch_mode(struct amdgpu_device *adev,
 	uint32_t data = 0;
 	bool is_mode_set = !!wave_launch_mode;
 
+	amdgpu_gfx_off_ctrl(adev, false);
+
 	mutex_lock(&adev->grbm_idx_mutex);
 
 	kgd_gfx_v10_set_wave_launch_stall(adev, vmid, true);
@@ -876,6 +897,8 @@ uint32_t kgd_gfx_v10_set_wave_launch_mode(struct amdgpu_device *adev,
 
 	mutex_unlock(&adev->grbm_idx_mutex);
 
+	amdgpu_gfx_off_ctrl(adev, true);
+
 	return 0;
 }
 
@@ -933,10 +956,14 @@ uint32_t kgd_gfx_v10_set_address_watch(struct amdgpu_device *adev,
 			VALID,
 			1);
 
+	amdgpu_gfx_off_ctrl(adev, false);
+
 	WREG32((SOC15_REG_OFFSET(GC, 0, mmTCP_WATCH0_CNTL) +
 			(watch_id * TCP_WATCH_STRIDE)),
 			watch_address_cntl);
 
+	amdgpu_gfx_off_ctrl(adev, true);
+
 	return 0;
 }
 
@@ -947,10 +974,14 @@ uint32_t kgd_gfx_v10_clear_address_watch(struct amdgpu_device *adev,
 
 	watch_address_cntl = 0;
 
+	amdgpu_gfx_off_ctrl(adev, false);
+
 	WREG32((SOC15_REG_OFFSET(GC, 0, mmTCP_WATCH0_CNTL) +
 			(watch_id * TCP_WATCH_STRIDE)),
 			watch_address_cntl);
 
+	amdgpu_gfx_off_ctrl(adev, true);
+
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
index 87eef794d299..4b025fa0beb5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
@@ -701,6 +701,8 @@ uint32_t kgd_gfx_v9_enable_debug_trap(struct amdgpu_device *adev,
 				bool restore_dbg_registers,
 				uint32_t vmid)
 {
+	amdgpu_gfx_off_ctrl(adev, false);
+
 	mutex_lock(&adev->grbm_idx_mutex);
 
 	kgd_gfx_v9_set_wave_launch_stall(adev, vmid, true);
@@ -711,6 +713,8 @@ uint32_t kgd_gfx_v9_enable_debug_trap(struct amdgpu_device *adev,
 
 	mutex_unlock(&adev->grbm_idx_mutex);
 
+	amdgpu_gfx_off_ctrl(adev, true);
+
 	return 0;
 }
 
@@ -724,6 +728,8 @@ uint32_t kgd_gfx_v9_disable_debug_trap(struct amdgpu_device *adev,
 					bool keep_trap_enabled,
 					uint32_t vmid)
 {
+	amdgpu_gfx_off_ctrl(adev, false);
+
 	mutex_lock(&adev->grbm_idx_mutex);
 
 	kgd_gfx_v9_set_wave_launch_stall(adev, vmid, true);
@@ -734,6 +740,8 @@ uint32_t kgd_gfx_v9_disable_debug_trap(struct amdgpu_device *adev,
 
 	mutex_unlock(&adev->grbm_idx_mutex);
 
+	amdgpu_gfx_off_ctrl(adev, true);
+
 	return 0;
 }
 
@@ -765,6 +773,8 @@ uint32_t kgd_gfx_v9_set_wave_launch_trap_override(struct amdgpu_device *adev,
 {
 	uint32_t data, wave_cntl_prev;
 
+	amdgpu_gfx_off_ctrl(adev, false);
+
 	mutex_lock(&adev->grbm_idx_mutex);
 
 	wave_cntl_prev = RREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_WAVE_CNTL));
@@ -786,6 +796,8 @@ uint32_t kgd_gfx_v9_set_wave_launch_trap_override(struct amdgpu_device *adev,
 
 	mutex_unlock(&adev->grbm_idx_mutex);
 
+	amdgpu_gfx_off_ctrl(adev, true);
+
 	return 0;
 }
 
@@ -796,6 +808,8 @@ uint32_t kgd_gfx_v9_set_wave_launch_mode(struct amdgpu_device *adev,
 	uint32_t data = 0;
 	bool is_mode_set = !!wave_launch_mode;
 
+	amdgpu_gfx_off_ctrl(adev, false);
+
 	mutex_lock(&adev->grbm_idx_mutex);
 
 	kgd_gfx_v9_set_wave_launch_stall(adev, vmid, true);
@@ -810,6 +824,8 @@ uint32_t kgd_gfx_v9_set_wave_launch_mode(struct amdgpu_device *adev,
 
 	mutex_unlock(&adev->grbm_idx_mutex);
 
+	amdgpu_gfx_off_ctrl(adev, true);
+
 	return 0;
 }
 
@@ -867,10 +883,14 @@ uint32_t kgd_gfx_v9_set_address_watch(struct amdgpu_device *adev,
 			VALID,
 			1);
 
+	amdgpu_gfx_off_ctrl(adev, false);
+
 	WREG32_RLC((SOC15_REG_OFFSET(GC, 0, mmTCP_WATCH0_CNTL) +
 			(watch_id * TCP_WATCH_STRIDE)),
 			watch_address_cntl);
 
+	amdgpu_gfx_off_ctrl(adev, true);
+
 	return 0;
 }
 
@@ -881,10 +901,14 @@ uint32_t kgd_gfx_v9_clear_address_watch(struct amdgpu_device *adev,
 
 	watch_address_cntl = 0;
 
+	amdgpu_gfx_off_ctrl(adev, false);
+
 	WREG32_RLC((SOC15_REG_OFFSET(GC, 0, mmTCP_WATCH0_CNTL) +
 			(watch_id * TCP_WATCH_STRIDE)),
 			watch_address_cntl);
 
+	amdgpu_gfx_off_ctrl(adev, true);
+
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index ead5afe4216b..131b9c25e3ec 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -2752,7 +2752,6 @@ static int runtime_enable(struct kfd_process *p, uint64_t r_debug,
 			struct kfd_process_device *pdd = p->pdds[i];
 
 			if (!kfd_dbg_is_rlc_restore_supported(pdd->dev)) {
-				amdgpu_gfx_off_ctrl(pdd->dev->adev, false);
 				pdd->dev->kfd2kgd->enable_debug_trap(
 						pdd->dev->adev,
 						true,
@@ -2809,33 +2808,22 @@ static int runtime_disable(struct kfd_process *p)
 			return ret;
 	}
 
-	if (was_enabled && p->runtime_info.ttmp_setup) {
-		for (i = 0; i < p->n_pdds; i++) {
-			struct kfd_process_device *pdd = p->pdds[i];
-
-			if (!kfd_dbg_is_rlc_restore_supported(pdd->dev))
-				amdgpu_gfx_off_ctrl(pdd->dev->adev, true);
-		}
-	}
-
 	p->runtime_info.ttmp_setup = false;
 
 	/* disable ttmp setup */
 	for (i = 0; i < p->n_pdds; i++) {
 		struct kfd_process_device *pdd = p->pdds[i];
 
-		if (kfd_dbg_is_per_vmid_supported(pdd->dev)) {
-			pdd->spi_dbg_override =
-					pdd->dev->kfd2kgd->disable_debug_trap(
-					pdd->dev->adev,
-					false,
-					pdd->dev->vm_info.last_vmid_kfd);
+		pdd->spi_dbg_override =
+				pdd->dev->kfd2kgd->disable_debug_trap(
+				pdd->dev->adev,
+				false,
+				pdd->dev->vm_info.last_vmid_kfd);
 
-			if (!pdd->dev->shared_resources.enable_mes)
-				debug_refresh_runlist(pdd->dev->dqm);
-			else
-				kfd_dbg_set_mes_debug_mode(pdd);
-		}
+		if (!pdd->dev->shared_resources.enable_mes)
+			debug_refresh_runlist(pdd->dev->dqm);
+		else
+			kfd_dbg_set_mes_debug_mode(pdd);
 	}
 
 	return 0;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_debug.c b/drivers/gpu/drm/amd/amdkfd/kfd_debug.c
index 0ea85afcffd3..6e306defcc85 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_debug.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_debug.c
@@ -423,11 +423,9 @@ int kfd_dbg_trap_clear_dev_address_watch(struct kfd_process_device *pdd,
 			return r;
 	}
 
-	amdgpu_gfx_off_ctrl(pdd->dev->adev, false);
 	pdd->watch_points[watch_id] = pdd->dev->kfd2kgd->clear_address_watch(
 							pdd->dev->adev,
 							watch_id);
-	amdgpu_gfx_off_ctrl(pdd->dev->adev, true);
 
 	if (!pdd->dev->shared_resources.enable_mes)
 		r = debug_map_and_unlock(pdd->dev->dqm);
@@ -458,7 +456,6 @@ int kfd_dbg_trap_set_dev_address_watch(struct kfd_process_device *pdd,
 		}
 	}
 
-	amdgpu_gfx_off_ctrl(pdd->dev->adev, false);
 	pdd->watch_points[*watch_id] = pdd->dev->kfd2kgd->set_address_watch(
 				pdd->dev->adev,
 				watch_address,
@@ -466,7 +463,6 @@ int kfd_dbg_trap_set_dev_address_watch(struct kfd_process_device *pdd,
 				*watch_id,
 				watch_mode,
 				pdd->dev->vm_info.last_vmid_kfd);
-	amdgpu_gfx_off_ctrl(pdd->dev->adev, true);
 
 	if (!pdd->dev->shared_resources.enable_mes)
 		r = debug_map_and_unlock(pdd->dev->dqm);
@@ -577,15 +573,11 @@ void kfd_dbg_trap_deactivate(struct kfd_process *target, bool unwind, int unwind
 
 		kfd_process_set_trap_debug_flag(&pdd->qpd, false);
 
-		/* GFX off is already disabled by debug activate if not RLC restore supported. */
-		if (kfd_dbg_is_rlc_restore_supported(pdd->dev))
-			amdgpu_gfx_off_ctrl(pdd->dev->adev, false);
 		pdd->spi_dbg_override =
 				pdd->dev->kfd2kgd->disable_debug_trap(
 				pdd->dev->adev,
 				target->runtime_info.ttmp_setup,
 				pdd->dev->vm_info.last_vmid_kfd);
-		amdgpu_gfx_off_ctrl(pdd->dev->adev, true);
 
 		if (!kfd_dbg_is_per_vmid_supported(pdd->dev) &&
 				release_debug_trap_vmid(pdd->dev->dqm, &pdd->qpd))
@@ -684,14 +676,10 @@ int kfd_dbg_trap_activate(struct kfd_process *target)
 			}
 		}
 
-		/* Disable GFX OFF to prevent garbage read/writes to debug registers.
+		/*
 		 * If RLC restore of debug registers is not supported and runtime enable
 		 * hasn't done so already on ttmp setup request, restore the trap config registers.
-		 *
-		 * If RLC restore of debug registers is not supported, keep gfx off disabled for
-		 * the debug session.
 		 */
-		amdgpu_gfx_off_ctrl(pdd->dev->adev, false);
 		if (!(kfd_dbg_is_rlc_restore_supported(pdd->dev) ||
 						target->runtime_info.ttmp_setup))
 			pdd->dev->kfd2kgd->enable_debug_trap(pdd->dev->adev, true,
@@ -702,9 +690,6 @@ int kfd_dbg_trap_activate(struct kfd_process *target)
 					false,
 					pdd->dev->vm_info.last_vmid_kfd);
 
-		if (kfd_dbg_is_rlc_restore_supported(pdd->dev))
-			amdgpu_gfx_off_ctrl(pdd->dev->adev, true);
-
 		/**
 		 * Setting the debug flag in the trap handler requires that the TMA has been
 		 * allocated, which occurs during CWSR initialization.
@@ -836,7 +821,6 @@ int kfd_dbg_trap_set_wave_launch_override(struct kfd_process *target,
 	for (i = 0; i < target->n_pdds; i++) {
 		struct kfd_process_device *pdd = target->pdds[i];
 
-		amdgpu_gfx_off_ctrl(pdd->dev->adev, false);
 		pdd->spi_dbg_override = pdd->dev->kfd2kgd->set_wave_launch_trap_override(
 				pdd->dev->adev,
 				pdd->dev->vm_info.last_vmid_kfd,
@@ -845,7 +829,6 @@ int kfd_dbg_trap_set_wave_launch_override(struct kfd_process *target,
 				trap_mask_request,
 				trap_mask_prev,
 				pdd->spi_dbg_override);
-		amdgpu_gfx_off_ctrl(pdd->dev->adev, true);
 
 		if (!pdd->dev->shared_resources.enable_mes)
 			r = debug_refresh_runlist(pdd->dev->dqm);
@@ -872,12 +855,10 @@ int kfd_dbg_trap_set_wave_launch_mode(struct kfd_process *target,
 	for (i = 0; i < target->n_pdds; i++) {
 		struct kfd_process_device *pdd = target->pdds[i];
 
-		amdgpu_gfx_off_ctrl(pdd->dev->adev, false);
 		pdd->spi_dbg_launch_mode = pdd->dev->kfd2kgd->set_wave_launch_mode(
 				pdd->dev->adev,
 				wave_launch_mode,
 				pdd->dev->vm_info.last_vmid_kfd);
-		amdgpu_gfx_off_ctrl(pdd->dev->adev, true);
 
 		if (!pdd->dev->shared_resources.enable_mes)
 			r = debug_refresh_runlist(pdd->dev->dqm);
-- 
2.25.1



More information about the dri-devel mailing list