[PATCH] drm/amdkfd: optimize gfx off enable toggle for debugging

Jonathan Kim jonathan.kim at amd.com
Wed Jun 7 17:32:11 UTC 2023


Legacy debug devices limited to pinning a single debug VMID for debugging
are the only devices that require disabling GFX OFF while accessing
debug registers.  Debug devices that support multi-process debugging
rely on the hardware scheduler to update debug registers and do not run
into GFX OFF access issues.

Remove KFD GFX OFF enable toggle clutter by moving these calls into the
KGD debug calls themselves.

v2: toggle gfx off around address watch hi/lo settings as well.

Signed-off-by: Jonathan Kim <jonathan.kim at amd.com>
---
 .../drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c  |  4 +++
 .../drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c   |  7 ++++
 .../drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c    | 33 ++++++++++++++++++-
 .../drm/amd/amdgpu/amdgpu_amdkfd_gfx_v11.c    |  4 +++
 .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c | 24 ++++++++++++++
 drivers/gpu/drm/amd/amdkfd/kfd_chardev.c      | 22 +++----------
 drivers/gpu/drm/amd/amdkfd/kfd_debug.c        | 21 +-----------
 7 files changed, 77 insertions(+), 38 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c
index 60f9e027fb66..1f0e6ec56618 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c
@@ -150,6 +150,8 @@ static uint32_t kgd_gfx_aldebaran_set_address_watch(
 			VALID,
 			1);
 
+	amdgpu_gfx_off_ctrl(adev, false);
+
 	WREG32_RLC((SOC15_REG_OFFSET(GC, 0, regTCP_WATCH0_ADDR_H) +
 			(watch_id * TCP_WATCH_STRIDE)),
 			watch_address_high);
@@ -158,6 +160,8 @@ static uint32_t kgd_gfx_aldebaran_set_address_watch(
 			(watch_id * TCP_WATCH_STRIDE)),
 			watch_address_low);
 
+	amdgpu_gfx_off_ctrl(adev, true);
+
 	return watch_address_cntl;
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c
index 625db444df1c..a4e28d547173 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c
@@ -350,6 +350,8 @@ static uint32_t kgd_arcturus_enable_debug_trap(struct amdgpu_device *adev,
 				bool restore_dbg_registers,
 				uint32_t vmid)
 {
+	amdgpu_gfx_off_ctrl(adev, false);
+
 	mutex_lock(&adev->grbm_idx_mutex);
 
 	kgd_gfx_v9_set_wave_launch_stall(adev, vmid, true);
@@ -362,6 +364,8 @@ static uint32_t kgd_arcturus_enable_debug_trap(struct amdgpu_device *adev,
 
 	mutex_unlock(&adev->grbm_idx_mutex);
 
+	amdgpu_gfx_off_ctrl(adev, true);
+
 	return 0;
 }
 
@@ -375,6 +379,7 @@ static uint32_t kgd_arcturus_disable_debug_trap(struct amdgpu_device *adev,
 					bool keep_trap_enabled,
 					uint32_t vmid)
 {
+	amdgpu_gfx_off_ctrl(adev, false);
 
 	mutex_lock(&adev->grbm_idx_mutex);
 
@@ -388,6 +393,8 @@ static uint32_t kgd_arcturus_disable_debug_trap(struct amdgpu_device *adev,
 
 	mutex_unlock(&adev->grbm_idx_mutex);
 
+	amdgpu_gfx_off_ctrl(adev, true);
+
 	return 0;
 }
 const struct kfd2kgd_calls arcturus_kfd2kgd = {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
index 8ad7a7779e14..415928139861 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
@@ -754,12 +754,13 @@ uint32_t kgd_gfx_v10_enable_debug_trap(struct amdgpu_device *adev,
 				bool restore_dbg_registers,
 				uint32_t vmid)
 {
+	amdgpu_gfx_off_ctrl(adev, false);
 
 	mutex_lock(&adev->grbm_idx_mutex);
 
 	kgd_gfx_v10_set_wave_launch_stall(adev, vmid, true);
 
-	/* assume gfx off is disabled for the debug session if rlc restore not supported. */
+	/* keep gfx off disabled for the debug session if rlc restore not supported. */
 	if (restore_dbg_registers) {
 		uint32_t data = 0;
 
@@ -784,6 +785,8 @@ uint32_t kgd_gfx_v10_enable_debug_trap(struct amdgpu_device *adev,
 
 	mutex_unlock(&adev->grbm_idx_mutex);
 
+	amdgpu_gfx_off_ctrl(adev, true);
+
 	return 0;
 }
 
@@ -791,6 +794,8 @@ uint32_t kgd_gfx_v10_disable_debug_trap(struct amdgpu_device *adev,
 					bool keep_trap_enabled,
 					uint32_t vmid)
 {
+	amdgpu_gfx_off_ctrl(adev, false);
+
 	mutex_lock(&adev->grbm_idx_mutex);
 
 	kgd_gfx_v10_set_wave_launch_stall(adev, vmid, true);
@@ -801,6 +806,16 @@ uint32_t kgd_gfx_v10_disable_debug_trap(struct amdgpu_device *adev,
 
 	mutex_unlock(&adev->grbm_idx_mutex);
 
+	amdgpu_gfx_off_ctrl(adev, true);
+
+	/*
+	 * Remove the extra gfx off disable reference from debug restore call
+	 * for asics that do not support rlc restore for debug registers.
+	 */
+	if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(10, 1, 10) ||
+	    adev->ip_versions[GC_HWIP][0] == IP_VERSION(10, 1, 1))
+		amdgpu_gfx_off_ctrl(adev, true);
+
 	return 0;
 }
 
@@ -832,6 +847,8 @@ uint32_t kgd_gfx_v10_set_wave_launch_trap_override(struct amdgpu_device *adev,
 {
 	uint32_t data, wave_cntl_prev;
 
+	amdgpu_gfx_off_ctrl(adev, false);
+
 	mutex_lock(&adev->grbm_idx_mutex);
 
 	wave_cntl_prev = RREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_WAVE_CNTL));
@@ -853,6 +870,8 @@ uint32_t kgd_gfx_v10_set_wave_launch_trap_override(struct amdgpu_device *adev,
 
 	mutex_unlock(&adev->grbm_idx_mutex);
 
+	amdgpu_gfx_off_ctrl(adev, true);
+
 	return 0;
 }
 
@@ -863,6 +882,8 @@ uint32_t kgd_gfx_v10_set_wave_launch_mode(struct amdgpu_device *adev,
 	uint32_t data = 0;
 	bool is_mode_set = !!wave_launch_mode;
 
+	amdgpu_gfx_off_ctrl(adev, false);
+
 	mutex_lock(&adev->grbm_idx_mutex);
 
 	kgd_gfx_v10_set_wave_launch_stall(adev, vmid, true);
@@ -877,6 +898,8 @@ uint32_t kgd_gfx_v10_set_wave_launch_mode(struct amdgpu_device *adev,
 
 	mutex_unlock(&adev->grbm_idx_mutex);
 
+	amdgpu_gfx_off_ctrl(adev, true);
+
 	return 0;
 }
 
@@ -916,6 +939,8 @@ uint32_t kgd_gfx_v10_set_address_watch(struct amdgpu_device *adev,
 			VALID,
 			0);
 
+	amdgpu_gfx_off_ctrl(adev, false);
+
 	WREG32((SOC15_REG_OFFSET(GC, 0, mmTCP_WATCH0_CNTL) +
 			(watch_id * TCP_WATCH_STRIDE)),
 			watch_address_cntl);
@@ -938,6 +963,8 @@ uint32_t kgd_gfx_v10_set_address_watch(struct amdgpu_device *adev,
 			(watch_id * TCP_WATCH_STRIDE)),
 			watch_address_cntl);
 
+	amdgpu_gfx_off_ctrl(adev, true);
+
 	return 0;
 }
 
@@ -948,10 +975,14 @@ uint32_t kgd_gfx_v10_clear_address_watch(struct amdgpu_device *adev,
 
 	watch_address_cntl = 0;
 
+	amdgpu_gfx_off_ctrl(adev, false);
+
 	WREG32((SOC15_REG_OFFSET(GC, 0, mmTCP_WATCH0_CNTL) +
 			(watch_id * TCP_WATCH_STRIDE)),
 			watch_address_cntl);
 
+	amdgpu_gfx_off_ctrl(adev, true);
+
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v11.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v11.c
index 91c3574ebed3..bb6ad733b3d6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v11.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v11.c
@@ -768,6 +768,8 @@ static uint32_t kgd_gfx_v11_set_address_watch(struct amdgpu_device *adev,
 			VALID,
 			1);
 
+	amdgpu_gfx_off_ctrl(adev, false);
+
 	WREG32_RLC((SOC15_REG_OFFSET(GC, 0, regTCP_WATCH0_ADDR_H) +
 			(watch_id * TCP_WATCH_STRIDE)),
 			watch_address_high);
@@ -776,6 +778,8 @@ static uint32_t kgd_gfx_v11_set_address_watch(struct amdgpu_device *adev,
 			(watch_id * TCP_WATCH_STRIDE)),
 			watch_address_low);
 
+	amdgpu_gfx_off_ctrl(adev, true);
+
 	return watch_address_cntl;
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
index 51d93fb13ea3..e30d1f9f5564 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
@@ -704,6 +704,8 @@ uint32_t kgd_gfx_v9_enable_debug_trap(struct amdgpu_device *adev,
 				bool restore_dbg_registers,
 				uint32_t vmid)
 {
+	amdgpu_gfx_off_ctrl(adev, false);
+
 	mutex_lock(&adev->grbm_idx_mutex);
 
 	kgd_gfx_v9_set_wave_launch_stall(adev, vmid, true);
@@ -714,6 +716,8 @@ uint32_t kgd_gfx_v9_enable_debug_trap(struct amdgpu_device *adev,
 
 	mutex_unlock(&adev->grbm_idx_mutex);
 
+	amdgpu_gfx_off_ctrl(adev, true);
+
 	return 0;
 }
 
@@ -727,6 +731,8 @@ uint32_t kgd_gfx_v9_disable_debug_trap(struct amdgpu_device *adev,
 					bool keep_trap_enabled,
 					uint32_t vmid)
 {
+	amdgpu_gfx_off_ctrl(adev, false);
+
 	mutex_lock(&adev->grbm_idx_mutex);
 
 	kgd_gfx_v9_set_wave_launch_stall(adev, vmid, true);
@@ -737,6 +743,8 @@ uint32_t kgd_gfx_v9_disable_debug_trap(struct amdgpu_device *adev,
 
 	mutex_unlock(&adev->grbm_idx_mutex);
 
+	amdgpu_gfx_off_ctrl(adev, true);
+
 	return 0;
 }
 
@@ -768,6 +776,8 @@ uint32_t kgd_gfx_v9_set_wave_launch_trap_override(struct amdgpu_device *adev,
 {
 	uint32_t data, wave_cntl_prev;
 
+	amdgpu_gfx_off_ctrl(adev, false);
+
 	mutex_lock(&adev->grbm_idx_mutex);
 
 	wave_cntl_prev = RREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_WAVE_CNTL));
@@ -789,6 +799,8 @@ uint32_t kgd_gfx_v9_set_wave_launch_trap_override(struct amdgpu_device *adev,
 
 	mutex_unlock(&adev->grbm_idx_mutex);
 
+	amdgpu_gfx_off_ctrl(adev, true);
+
 	return 0;
 }
 
@@ -799,6 +811,8 @@ uint32_t kgd_gfx_v9_set_wave_launch_mode(struct amdgpu_device *adev,
 	uint32_t data = 0;
 	bool is_mode_set = !!wave_launch_mode;
 
+	amdgpu_gfx_off_ctrl(adev, false);
+
 	mutex_lock(&adev->grbm_idx_mutex);
 
 	kgd_gfx_v9_set_wave_launch_stall(adev, vmid, true);
@@ -813,6 +827,8 @@ uint32_t kgd_gfx_v9_set_wave_launch_mode(struct amdgpu_device *adev,
 
 	mutex_unlock(&adev->grbm_idx_mutex);
 
+	amdgpu_gfx_off_ctrl(adev, true);
+
 	return 0;
 }
 
@@ -852,6 +868,8 @@ uint32_t kgd_gfx_v9_set_address_watch(struct amdgpu_device *adev,
 			VALID,
 			0);
 
+	amdgpu_gfx_off_ctrl(adev, false);
+
 	WREG32_RLC((SOC15_REG_OFFSET(GC, 0, mmTCP_WATCH0_CNTL) +
 			(watch_id * TCP_WATCH_STRIDE)),
 			watch_address_cntl);
@@ -874,6 +892,8 @@ uint32_t kgd_gfx_v9_set_address_watch(struct amdgpu_device *adev,
 			(watch_id * TCP_WATCH_STRIDE)),
 			watch_address_cntl);
 
+	amdgpu_gfx_off_ctrl(adev, true);
+
 	return 0;
 }
 
@@ -884,10 +904,14 @@ uint32_t kgd_gfx_v9_clear_address_watch(struct amdgpu_device *adev,
 
 	watch_address_cntl = 0;
 
+	amdgpu_gfx_off_ctrl(adev, false);
+
 	WREG32_RLC((SOC15_REG_OFFSET(GC, 0, mmTCP_WATCH0_CNTL) +
 			(watch_id * TCP_WATCH_STRIDE)),
 			watch_address_cntl);
 
+	amdgpu_gfx_off_ctrl(adev, true);
+
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index cf1db0ab3471..f597e1c8ebee 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -2766,7 +2766,6 @@ static int runtime_enable(struct kfd_process *p, uint64_t r_debug,
 			struct kfd_process_device *pdd = p->pdds[i];
 
 			if (!kfd_dbg_is_rlc_restore_supported(pdd->dev)) {
-				amdgpu_gfx_off_ctrl(pdd->dev->adev, false);
 				pdd->dev->kfd2kgd->enable_debug_trap(
 						pdd->dev->adev,
 						true,
@@ -2823,33 +2822,22 @@ static int runtime_disable(struct kfd_process *p)
 			return ret;
 	}
 
-	if (was_enabled && p->runtime_info.ttmp_setup) {
-		for (i = 0; i < p->n_pdds; i++) {
-			struct kfd_process_device *pdd = p->pdds[i];
-
-			if (!kfd_dbg_is_rlc_restore_supported(pdd->dev))
-				amdgpu_gfx_off_ctrl(pdd->dev->adev, true);
-		}
-	}
-
 	p->runtime_info.ttmp_setup = false;
 
 	/* disable ttmp setup */
 	for (i = 0; i < p->n_pdds; i++) {
 		struct kfd_process_device *pdd = p->pdds[i];
 
-		if (kfd_dbg_is_per_vmid_supported(pdd->dev)) {
-			pdd->spi_dbg_override =
-					pdd->dev->kfd2kgd->disable_debug_trap(
-					pdd->dev->adev,
-					false,
-					pdd->dev->vm_info.last_vmid_kfd);
+		pdd->spi_dbg_override =
+				pdd->dev->kfd2kgd->disable_debug_trap(
+				pdd->dev->adev,
+				false,
+				pdd->dev->vm_info.last_vmid_kfd);
 
 			if (!pdd->dev->kfd->shared_resources.enable_mes)
 				debug_refresh_runlist(pdd->dev->dqm);
 			else
 				kfd_dbg_set_mes_debug_mode(pdd);
-		}
 	}
 
 	return 0;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_debug.c b/drivers/gpu/drm/amd/amdkfd/kfd_debug.c
index e7bc07068eed..5b381018407a 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_debug.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_debug.c
@@ -441,11 +441,9 @@ int kfd_dbg_trap_clear_dev_address_watch(struct kfd_process_device *pdd,
 			return r;
 	}
 
-	amdgpu_gfx_off_ctrl(pdd->dev->adev, false);
 	pdd->watch_points[watch_id] = pdd->dev->kfd2kgd->clear_address_watch(
 							pdd->dev->adev,
 							watch_id);
-	amdgpu_gfx_off_ctrl(pdd->dev->adev, true);
 
 	if (!pdd->dev->kfd->shared_resources.enable_mes)
 		r = debug_map_and_unlock(pdd->dev->dqm);
@@ -476,7 +474,6 @@ int kfd_dbg_trap_set_dev_address_watch(struct kfd_process_device *pdd,
 		}
 	}
 
-	amdgpu_gfx_off_ctrl(pdd->dev->adev, false);
 	pdd->watch_points[*watch_id] = pdd->dev->kfd2kgd->set_address_watch(
 				pdd->dev->adev,
 				watch_address,
@@ -484,7 +481,6 @@ int kfd_dbg_trap_set_dev_address_watch(struct kfd_process_device *pdd,
 				*watch_id,
 				watch_mode,
 				pdd->dev->vm_info.last_vmid_kfd);
-	amdgpu_gfx_off_ctrl(pdd->dev->adev, true);
 
 	if (!pdd->dev->kfd->shared_resources.enable_mes)
 		r = debug_map_and_unlock(pdd->dev->dqm);
@@ -599,15 +595,11 @@ void kfd_dbg_trap_deactivate(struct kfd_process *target, bool unwind, int unwind
 
 		kfd_process_set_trap_debug_flag(&pdd->qpd, false);
 
-		/* GFX off is already disabled by debug activate if not RLC restore supported. */
-		if (kfd_dbg_is_rlc_restore_supported(pdd->dev))
-			amdgpu_gfx_off_ctrl(pdd->dev->adev, false);
 		pdd->spi_dbg_override =
 				pdd->dev->kfd2kgd->disable_debug_trap(
 				pdd->dev->adev,
 				target->runtime_info.ttmp_setup,
 				pdd->dev->vm_info.last_vmid_kfd);
-		amdgpu_gfx_off_ctrl(pdd->dev->adev, true);
 
 		if (!kfd_dbg_is_per_vmid_supported(pdd->dev) &&
 				release_debug_trap_vmid(pdd->dev->dqm, &pdd->qpd))
@@ -699,14 +691,10 @@ int kfd_dbg_trap_activate(struct kfd_process *target)
 			}
 		}
 
-		/* Disable GFX OFF to prevent garbage read/writes to debug registers.
+		/*
 		 * If RLC restore of debug registers is not supported and runtime enable
 		 * hasn't done so already on ttmp setup request, restore the trap config registers.
-		 *
-		 * If RLC restore of debug registers is not supported, keep gfx off disabled for
-		 * the debug session.
 		 */
-		amdgpu_gfx_off_ctrl(pdd->dev->adev, false);
 		if (!(kfd_dbg_is_rlc_restore_supported(pdd->dev) ||
 						target->runtime_info.ttmp_setup))
 			pdd->dev->kfd2kgd->enable_debug_trap(pdd->dev->adev, true,
@@ -717,9 +705,6 @@ int kfd_dbg_trap_activate(struct kfd_process *target)
 					false,
 					pdd->dev->vm_info.last_vmid_kfd);
 
-		if (kfd_dbg_is_rlc_restore_supported(pdd->dev))
-			amdgpu_gfx_off_ctrl(pdd->dev->adev, true);
-
 		/*
 		 * Setting the debug flag in the trap handler requires that the TMA has been
 		 * allocated, which occurs during CWSR initialization.
@@ -851,7 +836,6 @@ int kfd_dbg_trap_set_wave_launch_override(struct kfd_process *target,
 	for (i = 0; i < target->n_pdds; i++) {
 		struct kfd_process_device *pdd = target->pdds[i];
 
-		amdgpu_gfx_off_ctrl(pdd->dev->adev, false);
 		pdd->spi_dbg_override = pdd->dev->kfd2kgd->set_wave_launch_trap_override(
 				pdd->dev->adev,
 				pdd->dev->vm_info.last_vmid_kfd,
@@ -860,7 +844,6 @@ int kfd_dbg_trap_set_wave_launch_override(struct kfd_process *target,
 				trap_mask_request,
 				trap_mask_prev,
 				pdd->spi_dbg_override);
-		amdgpu_gfx_off_ctrl(pdd->dev->adev, true);
 
 		if (!pdd->dev->kfd->shared_resources.enable_mes)
 			r = debug_refresh_runlist(pdd->dev->dqm);
@@ -887,12 +870,10 @@ int kfd_dbg_trap_set_wave_launch_mode(struct kfd_process *target,
 	for (i = 0; i < target->n_pdds; i++) {
 		struct kfd_process_device *pdd = target->pdds[i];
 
-		amdgpu_gfx_off_ctrl(pdd->dev->adev, false);
 		pdd->spi_dbg_launch_mode = pdd->dev->kfd2kgd->set_wave_launch_mode(
 				pdd->dev->adev,
 				wave_launch_mode,
 				pdd->dev->vm_info.last_vmid_kfd);
-		amdgpu_gfx_off_ctrl(pdd->dev->adev, true);
 
 		if (!pdd->dev->kfd->shared_resources.enable_mes)
 			r = debug_refresh_runlist(pdd->dev->dqm);
-- 
2.25.1



More information about the amd-gfx mailing list