[PATCH 7/8] drm/amdgpu: rework gfx11 queue reset
Alex Deucher
alexander.deucher at amd.com
Mon May 19 18:22:08 UTC 2025
Apply the same changes to gfx11 as done to gfx10.
Background is that we don't use per application kernel queues for
gfx11 on Linux for performance reasons.
So instead use the gfx10 approach here as well and only reset all
submissions from a specific VMID instead of the whole queue.
v2: fix up pipeline_sync count, only emit vmid reset on gfx (Alex)
Signed-off-by: Alex Deucher <alexander.deucher at amd.com>
---
drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c | 116 +++++--------------------
1 file changed, 22 insertions(+), 94 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
index afd6d59164bfa..db69b76d6ab25 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
@@ -5936,7 +5936,19 @@ static void gfx_v11_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
uint32_t seq = ring->fence_drv.sync_seq;
uint64_t addr = ring->fence_drv.gpu_addr;
+ struct amdgpu_device *adev = ring->adev;
+ if (ring->funcs->type == AMDGPU_RING_TYPE_GFX) {
+ amdgpu_ring_emit_reg_wait(ring,
+ SOC15_REG_OFFSET(GC, 0, regCP_VMID_RESET),
+ 0, 0xffff);
+ amdgpu_ring_emit_wreg(ring,
+ SOC15_REG_OFFSET(GC, 0, regCP_VMID_RESET),
+ 0);
+ amdgpu_ring_emit_fence(ring, ring->fence_drv.gpu_addr,
+ ring->fence_drv.sync_seq,
+ AMDGPU_FENCE_FLAG_EXEC);
+ }
gfx_v11_0_wait_reg_mem(ring, usepfp, 1, 0, lower_32_bits(addr),
upper_32_bits(addr), seq, 0xffffffff, 4);
}
@@ -6278,21 +6290,6 @@ static void gfx_v11_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring,
ref, mask, 0x20);
}
-static void gfx_v11_0_ring_soft_recovery(struct amdgpu_ring *ring,
- unsigned vmid)
-{
- struct amdgpu_device *adev = ring->adev;
- uint32_t value = 0;
-
- value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03);
- value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01);
- value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1);
- value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid);
- amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
- WREG32_SOC15(GC, 0, regSQ_CMD, value);
- amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
-}
-
static void
gfx_v11_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
uint32_t me, uint32_t pipe,
@@ -6750,92 +6747,25 @@ static bool gfx_v11_pipe_reset_support(struct amdgpu_device *adev)
return false;
}
-
-static int gfx_v11_reset_gfx_pipe(struct amdgpu_ring *ring)
-{
- struct amdgpu_device *adev = ring->adev;
- uint32_t reset_pipe = 0, clean_pipe = 0;
- int r;
-
- if (!gfx_v11_pipe_reset_support(adev))
- return -EOPNOTSUPP;
-
- gfx_v11_0_set_safe_mode(adev, 0);
- mutex_lock(&adev->srbm_mutex);
- soc21_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
-
- switch (ring->pipe) {
- case 0:
- reset_pipe = REG_SET_FIELD(reset_pipe, CP_ME_CNTL,
- PFP_PIPE0_RESET, 1);
- reset_pipe = REG_SET_FIELD(reset_pipe, CP_ME_CNTL,
- ME_PIPE0_RESET, 1);
- clean_pipe = REG_SET_FIELD(clean_pipe, CP_ME_CNTL,
- PFP_PIPE0_RESET, 0);
- clean_pipe = REG_SET_FIELD(clean_pipe, CP_ME_CNTL,
- ME_PIPE0_RESET, 0);
- break;
- case 1:
- reset_pipe = REG_SET_FIELD(reset_pipe, CP_ME_CNTL,
- PFP_PIPE1_RESET, 1);
- reset_pipe = REG_SET_FIELD(reset_pipe, CP_ME_CNTL,
- ME_PIPE1_RESET, 1);
- clean_pipe = REG_SET_FIELD(clean_pipe, CP_ME_CNTL,
- PFP_PIPE1_RESET, 0);
- clean_pipe = REG_SET_FIELD(clean_pipe, CP_ME_CNTL,
- ME_PIPE1_RESET, 0);
- break;
- default:
- break;
- }
-
- WREG32_SOC15(GC, 0, regCP_ME_CNTL, reset_pipe);
- WREG32_SOC15(GC, 0, regCP_ME_CNTL, clean_pipe);
-
- r = (RREG32(SOC15_REG_OFFSET(GC, 0, regCP_GFX_RS64_INSTR_PNTR1)) << 2) -
- RS64_FW_UC_START_ADDR_LO;
- soc21_grbm_select(adev, 0, 0, 0, 0);
- mutex_unlock(&adev->srbm_mutex);
- gfx_v11_0_unset_safe_mode(adev, 0);
-
- dev_info(adev->dev, "The ring %s pipe reset to the ME firmware start PC: %s\n", ring->name,
- r == 0 ? "successfully" : "failed");
- /* FIXME: Sometimes driver can't cache the ME firmware start PC correctly,
- * so the pipe reset status relies on the later gfx ring test result.
- */
- return 0;
-}
-
static int gfx_v11_0_reset_kgq(struct amdgpu_ring *ring, unsigned int vmid)
{
struct amdgpu_device *adev = ring->adev;
+ u32 tmp;
int r;
if (amdgpu_sriov_vf(adev))
return -EINVAL;
- r = amdgpu_mes_reset_legacy_queue(ring->adev, ring, vmid, false);
- if (r) {
-
- dev_warn(adev->dev, "reset via MES failed and try pipe reset %d\n", r);
- r = gfx_v11_reset_gfx_pipe(ring);
- if (r)
- return r;
- }
-
- r = gfx_v11_0_kgq_init_queue(ring, true);
- if (r) {
- dev_err(adev->dev, "failed to init kgq\n");
- return r;
- }
-
- r = amdgpu_mes_map_legacy_queue(adev, ring);
- if (r) {
- dev_err(adev->dev, "failed to remap kgq\n");
+ tmp = REG_SET_FIELD(0, CP_VMID_RESET, RESET_REQUEST, 1 << vmid);
+ r = amdgpu_mes_wreg(adev, SOC15_REG_OFFSET(GC, 0, regCP_VMID_RESET), tmp);
+ if (r)
return r;
- }
+ if (amdgpu_ring_alloc(ring, 7 + 7 + 5 + 8))
+ return -ENOMEM;
+ gfx_v11_0_ring_emit_pipeline_sync(ring);
+ amdgpu_ring_commit(ring);
- return amdgpu_ring_test_ring(ring);
+ return gfx_v11_0_ring_test_ib(ring, AMDGPU_QUEUE_RESET_TIMEOUT);
}
static int gfx_v11_0_reset_compute_pipe(struct amdgpu_ring *ring)
@@ -7196,7 +7126,7 @@ static const struct amdgpu_ring_funcs gfx_v11_0_ring_funcs_gfx = {
5 + /* update_spm_vmid */
5 + /* COND_EXEC */
22 + /* SET_Q_PREEMPTION_MODE */
- 7 + /* PIPELINE_SYNC */
+ 7 + 7 + 5 + 8 + /* PIPELINE_SYNC */
SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
4 + /* VM_FLUSH */
@@ -7231,7 +7161,6 @@ static const struct amdgpu_ring_funcs gfx_v11_0_ring_funcs_gfx = {
.emit_wreg = gfx_v11_0_ring_emit_wreg,
.emit_reg_wait = gfx_v11_0_ring_emit_reg_wait,
.emit_reg_write_reg_wait = gfx_v11_0_ring_emit_reg_write_reg_wait,
- .soft_recovery = gfx_v11_0_ring_soft_recovery,
.emit_mem_sync = gfx_v11_0_emit_mem_sync,
.reset = gfx_v11_0_reset_kgq,
.emit_cleaner_shader = gfx_v11_0_ring_emit_cleaner_shader,
@@ -7273,7 +7202,6 @@ static const struct amdgpu_ring_funcs gfx_v11_0_ring_funcs_compute = {
.emit_wreg = gfx_v11_0_ring_emit_wreg,
.emit_reg_wait = gfx_v11_0_ring_emit_reg_wait,
.emit_reg_write_reg_wait = gfx_v11_0_ring_emit_reg_write_reg_wait,
- .soft_recovery = gfx_v11_0_ring_soft_recovery,
.emit_mem_sync = gfx_v11_0_emit_mem_sync,
.reset = gfx_v11_0_reset_kcq,
.emit_cleaner_shader = gfx_v11_0_ring_emit_cleaner_shader,
--
2.49.0
More information about the amd-gfx
mailing list