[PATCH v2 4/4] drm/amdgpu/gfx12: Implement the GFX12 KCQ pipe reset
Liang, Prike
Prike.Liang at amd.com
Thu Feb 27 12:36:10 UTC 2025
[AMD Official Use Only - AMD Internal Distribution Only]
Please review the series patch to catch up the gfx latest base and to avoid the commit merged problem.
Regards,
Prike
> -----Original Message-----
> From: Liang, Prike <Prike.Liang at amd.com>
> Sent: Friday, February 21, 2025 9:01 PM
> To: amd-gfx at lists.freedesktop.org
> Cc: Deucher, Alexander <Alexander.Deucher at amd.com>; Liang, Prike
> <Prike.Liang at amd.com>
> Subject: [PATCH v2 4/4] drm/amdgpu/gfx12: Implement the GFX12 KCQ pipe
> reset
>
> Implement the GFX12 KCQ pipe reset, and disable the GFX12 kernel compute
> queue until the CPFW fully supports it.
>
> Signed-off-by: Prike Liang <Prike.Liang at amd.com>
> ---
> drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c | 90 +++++++++++++++++++++++++-
> 1 file changed, 88 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c
> b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c
> index 79ae7d538844..103298938d22 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c
> @@ -5352,6 +5352,90 @@ static int gfx_v12_0_reset_kgq(struct amdgpu_ring
> *ring, unsigned int vmid)
> return amdgpu_ring_test_ring(ring);
> }
>
> +static int gfx_v12_0_reset_compute_pipe(struct amdgpu_ring *ring) {
> +
> + struct amdgpu_device *adev = ring->adev;
> + uint32_t reset_pipe = 0, clean_pipe = 0;
> + int r;
> +
> + if (!gfx_v12_pipe_reset_support(adev))
> + return -EOPNOTSUPP;
> +
> + gfx_v12_0_set_safe_mode(adev, 0);
> + mutex_lock(&adev->srbm_mutex);
> + soc24_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
> +
> + reset_pipe = RREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL);
> + clean_pipe = reset_pipe;
> +
> + if (adev->gfx.rs64_enable) {
> +
> + switch (ring->pipe) {
> + case 0:
> + reset_pipe = REG_SET_FIELD(reset_pipe,
> CP_MEC_RS64_CNTL,
> + MEC_PIPE0_RESET, 1);
> + clean_pipe = REG_SET_FIELD(clean_pipe,
> CP_MEC_RS64_CNTL,
> + MEC_PIPE0_RESET, 0);
> + break;
> + case 1:
> + reset_pipe = REG_SET_FIELD(reset_pipe,
> CP_MEC_RS64_CNTL,
> + MEC_PIPE1_RESET, 1);
> + clean_pipe = REG_SET_FIELD(clean_pipe,
> CP_MEC_RS64_CNTL,
> + MEC_PIPE1_RESET, 0);
> + break;
> + case 2:
> + reset_pipe = REG_SET_FIELD(reset_pipe,
> CP_MEC_RS64_CNTL,
> + MEC_PIPE2_RESET, 1);
> + clean_pipe = REG_SET_FIELD(clean_pipe,
> CP_MEC_RS64_CNTL,
> + MEC_PIPE2_RESET, 0);
> + break;
> + case 3:
> + reset_pipe = REG_SET_FIELD(reset_pipe,
> CP_MEC_RS64_CNTL,
> + MEC_PIPE3_RESET, 1);
> + clean_pipe = REG_SET_FIELD(clean_pipe,
> CP_MEC_RS64_CNTL,
> + MEC_PIPE3_RESET, 0);
> + break;
> + default:
> + break;
> + }
> + WREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL, reset_pipe);
> + WREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL, clean_pipe);
> + r = (RREG32_SOC15(GC, 0, regCP_MEC_RS64_INSTR_PNTR)
> << 2) - RS64_FW_UC_START_ADDR_LO;
> + } else {
> + switch (ring->pipe) {
> + case 0:
> + reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_CNTL,
> + MEC_ME1_PIPE0_RESET,
> 1);
> + clean_pipe = REG_SET_FIELD(clean_pipe,
> CP_MEC_CNTL,
> + MEC_ME1_PIPE0_RESET,
> 0);
> + break;
> + case 1:
> + reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_CNTL,
> + MEC_ME1_PIPE1_RESET,
> 1);
> + clean_pipe = REG_SET_FIELD(clean_pipe,
> CP_MEC_CNTL,
> + MEC_ME1_PIPE1_RESET,
> 0);
> + break;
> + default:
> + break;
> + }
> + WREG32_SOC15(GC, 0, regCP_MEC_CNTL, reset_pipe);
> + WREG32_SOC15(GC, 0, regCP_MEC_CNTL, clean_pipe);
> + /* Doesn't find the F32 MEC instruction pointer register, and
> suppose
> + * the driver won't run into the F32 mode.
> + */
> + }
> +
> + soc24_grbm_select(adev, 0, 0, 0, 0);
> + mutex_unlock(&adev->srbm_mutex);
> + gfx_v12_0_unset_safe_mode(adev, 0);
> +
> + dev_info(adev->dev,"The ring %s pipe resets: %s\n", ring->name,
> + r == 0 ? "successfully" : "failed");
> + /* Need the ring test to verify the pipe reset result.*/
> + return 0;
> +
> +}
> static int gfx_v12_0_reset_kcq(struct amdgpu_ring *ring, unsigned int vmid) {
> struct amdgpu_device *adev = ring->adev; @@ -5362,8 +5446,10 @@
> static int gfx_v12_0_reset_kcq(struct amdgpu_ring *ring, unsigned int vmid)
>
> r = amdgpu_mes_reset_legacy_queue(ring->adev, ring, vmid, true);
> if (r) {
> - dev_err(adev->dev, "reset via MMIO failed %d\n", r);
> - return r;
> + dev_warn(adev->dev,"fail(%d) to reset kcq and try pipe reset\n", r);
> + r = gfx_v12_0_reset_compute_pipe(ring);
> + if (r)
> + return r;
> }
>
> r = amdgpu_bo_reserve(ring->mqd_obj, false);
> --
> 2.34.1
More information about the amd-gfx
mailing list