[PATCH 08/14] drm/amd/amdgpu: Add hang detection before GFX queue reset in MES

Jesse.Zhang Jesse.Zhang at amd.com
Fri May 30 09:00:09 UTC 2025


This change introduces a new parameter `hang_detect_then_reset` in the
MES reset queue input structure to handle GFX queue resets more robustly.

The change includes:
1. Adding the `hang_detect_then_reset` flag to `mes_reset_queue_input`
2. Setting this flag when resetting GFX queues in `mes_userq_reset`
3. Implementing the hang detection path in `mes_v11_0_reset_hw_queue`

Signed-off-by: Jesse Zhang <jesse.zhang at amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h    | 1 +
 drivers/gpu/drm/amd/amdgpu/mes_userqueue.c | 2 ++
 drivers/gpu/drm/amd/amdgpu/mes_v11_0.c     | 3 +++
 3 files changed, 6 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
index c0d2c195fe2e..31826a20c56b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
@@ -277,6 +277,7 @@ struct mes_reset_queue_input {
 	uint64_t                           wptr_addr;
 	uint32_t                           vmid;
 	bool                               legacy_gfx;
+	bool				   hang_detect_then_reset;
 	bool                               is_kq;
 };
 
diff --git a/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c b/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c
index 650fdb68db12..2b5bd3691766 100644
--- a/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c
+++ b/drivers/gpu/drm/amd/amdgpu/mes_userqueue.c
@@ -358,6 +358,8 @@ static int mes_userq_reset(struct amdgpu_userq_mgr *uq_mgr,
 
 	queue_input.doorbell_offset = queue->doorbell_index;
 	queue_input.queue_type = queue->queue_type;
+	if (queue->queue_type == AMDGPU_RING_TYPE_GFX)
+		queue_input.hang_detect_then_reset = true;
 
 	amdgpu_mes_lock(&adev->mes);
 	r = adev->mes.funcs->reset_hw_queue(&adev->mes, &queue_input);
diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
index c9eba537de09..25ea06b507c0 100644
--- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
@@ -773,6 +773,9 @@ static int mes_v11_0_reset_hw_queue(struct amdgpu_mes *mes,
 		mes_reset_queue_pkt.doorbell_offset_lp = input->doorbell_offset;
 		mes_reset_queue_pkt.wptr_addr_lp = input->wptr_addr;
 		mes_reset_queue_pkt.vmid_id_lp = input->vmid;
+	} else if (input->hang_detect_then_reset) {
+		mes_reset_queue_pkt.hang_detect_then_reset = 1;
+		mes_reset_queue_pkt.doorbell_offset = input->doorbell_offset;
 	} else {
 		mes_reset_queue_pkt.reset_queue_only = 1;
 		mes_reset_queue_pkt.doorbell_offset = input->doorbell_offset;
-- 
2.49.0



More information about the amd-gfx mailing list