[PATCH 4/4] drm/amdkfd: use queue reset for SDMA RAS poison consumption
Tao Zhou
tao.zhou1 at amd.com
Wed Jun 11 03:34:53 UTC 2025
Replace mode2 reset with queue reset to improve system performance.
v2: fallback to mode1 reset if queue reset fails.
move the queue reset handling from KFD to KGD.
Signed-off-by: Tao Zhou <tao.zhou1 at amd.com>
---
drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c | 13 ++++++++++++-
1 file changed, 12 insertions(+), 1 deletion(-)
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c
index 0018d04f4b33..b9429774a006 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c
@@ -197,6 +197,9 @@ static void event_interrupt_poison_consumption_v9(struct kfd_node *dev,
case SOC15_IH_CLIENTID_SDMA2:
case SOC15_IH_CLIENTID_SDMA3:
case SOC15_IH_CLIENTID_SDMA4:
+ case SOC15_IH_CLIENTID_SDMA5:
+ case SOC15_IH_CLIENTID_SDMA6:
+ case SOC15_IH_CLIENTID_SDMA7:
block = AMDGPU_RAS_BLOCK__SDMA;
if (amdgpu_ip_version(dev->adev, SDMA0_HWIP, 0) == IP_VERSION(4, 4, 2)) {
/* driver mode-2 for gfx poison is only supported by
@@ -204,7 +207,7 @@ static void event_interrupt_poison_consumption_v9(struct kfd_node *dev,
if (dev->adev->pm.fw_version < 0x00557300)
reset = AMDGPU_RAS_GPU_RESET_MODE1_RESET;
else
- reset = AMDGPU_RAS_GPU_RESET_MODE2_RESET;
+ reset = 0;
} else if (amdgpu_ip_version(dev->adev, SDMA0_HWIP, 0) == IP_VERSION(4, 4, 5)) {
/* driver mode-2 for gfx poison is only supported by
* pmfw 0x05550C00 and onwards */
@@ -223,6 +226,14 @@ static void event_interrupt_poison_consumption_v9(struct kfd_node *dev,
return;
}
+ if (!reset) {
+ ret = amdgpu_amdkfd_ras_poison_queue_reset(dev->adev, block,
+ client_id, vmid, node_id);
+ /* fallback to mode1 reset */
+ if (ret)
+ reset = AMDGPU_RAS_GPU_RESET_MODE1_RESET;
+ }
+
ret = amdgpu_ras_mark_ras_event(dev->adev, type);
if (ret)
return;
--
2.34.1
More information about the amd-gfx
mailing list