[PATCH v2 4/4] drm/amdgpu/gfx12: Implement the GFX12 KCQ pipe reset
Alex Deucher
alexdeucher at gmail.com
Thu Feb 27 14:37:43 UTC 2025
On Thu, Feb 27, 2025 at 7:36 AM Liang, Prike <Prike.Liang at amd.com> wrote:
>
> [AMD Official Use Only - AMD Internal Distribution Only]
>
> Please review the series patch to catch up the gfx latest base and to avoid the commit merged problem.
See my comment on patch 1:
#define RS64_FW_UC_START_ADDR_LO 0x3000
Will this potentially change in future firmware versions or is it
fixed? If it will change, then let's just read it back from registers
and store it in adev->gfx.rs64_fw_use_start_addr_lo so
that it will be correct if the user has a mix of GPUs in the system.
Other than those comments, the series looks good to me.
Alex
>
> Regards,
> Prike
>
> > -----Original Message-----
> > From: Liang, Prike <Prike.Liang at amd.com>
> > Sent: Friday, February 21, 2025 9:01 PM
> > To: amd-gfx at lists.freedesktop.org
> > Cc: Deucher, Alexander <Alexander.Deucher at amd.com>; Liang, Prike
> > <Prike.Liang at amd.com>
> > Subject: [PATCH v2 4/4] drm/amdgpu/gfx12: Implement the GFX12 KCQ pipe
> > reset
> >
> > Implement the GFX12 KCQ pipe reset, and disable the GFX12 kernel compute
> > queue until the CPFW fully supports it.
> >
> > Signed-off-by: Prike Liang <Prike.Liang at amd.com>
> > ---
> > drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c | 90 +++++++++++++++++++++++++-
> > 1 file changed, 88 insertions(+), 2 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c
> > b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c
> > index 79ae7d538844..103298938d22 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c
> > @@ -5352,6 +5352,90 @@ static int gfx_v12_0_reset_kgq(struct amdgpu_ring
> > *ring, unsigned int vmid)
> > return amdgpu_ring_test_ring(ring);
> > }
> >
> > +static int gfx_v12_0_reset_compute_pipe(struct amdgpu_ring *ring) {
> > +
> > + struct amdgpu_device *adev = ring->adev;
> > + uint32_t reset_pipe = 0, clean_pipe = 0;
> > + int r;
> > +
> > + if (!gfx_v12_pipe_reset_support(adev))
> > + return -EOPNOTSUPP;
> > +
> > + gfx_v12_0_set_safe_mode(adev, 0);
> > + mutex_lock(&adev->srbm_mutex);
> > + soc24_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
> > +
> > + reset_pipe = RREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL);
> > + clean_pipe = reset_pipe;
> > +
> > + if (adev->gfx.rs64_enable) {
> > +
> > + switch (ring->pipe) {
> > + case 0:
> > + reset_pipe = REG_SET_FIELD(reset_pipe,
> > CP_MEC_RS64_CNTL,
> > + MEC_PIPE0_RESET, 1);
> > + clean_pipe = REG_SET_FIELD(clean_pipe,
> > CP_MEC_RS64_CNTL,
> > + MEC_PIPE0_RESET, 0);
> > + break;
> > + case 1:
> > + reset_pipe = REG_SET_FIELD(reset_pipe,
> > CP_MEC_RS64_CNTL,
> > + MEC_PIPE1_RESET, 1);
> > + clean_pipe = REG_SET_FIELD(clean_pipe,
> > CP_MEC_RS64_CNTL,
> > + MEC_PIPE1_RESET, 0);
> > + break;
> > + case 2:
> > + reset_pipe = REG_SET_FIELD(reset_pipe,
> > CP_MEC_RS64_CNTL,
> > + MEC_PIPE2_RESET, 1);
> > + clean_pipe = REG_SET_FIELD(clean_pipe,
> > CP_MEC_RS64_CNTL,
> > + MEC_PIPE2_RESET, 0);
> > + break;
> > + case 3:
> > + reset_pipe = REG_SET_FIELD(reset_pipe,
> > CP_MEC_RS64_CNTL,
> > + MEC_PIPE3_RESET, 1);
> > + clean_pipe = REG_SET_FIELD(clean_pipe,
> > CP_MEC_RS64_CNTL,
> > + MEC_PIPE3_RESET, 0);
> > + break;
> > + default:
> > + break;
> > + }
> > + WREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL, reset_pipe);
> > + WREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL, clean_pipe);
> > + r = (RREG32_SOC15(GC, 0, regCP_MEC_RS64_INSTR_PNTR)
> > << 2) - RS64_FW_UC_START_ADDR_LO;
> > + } else {
> > + switch (ring->pipe) {
> > + case 0:
> > + reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_CNTL,
> > + MEC_ME1_PIPE0_RESET,
> > 1);
> > + clean_pipe = REG_SET_FIELD(clean_pipe,
> > CP_MEC_CNTL,
> > + MEC_ME1_PIPE0_RESET,
> > 0);
> > + break;
> > + case 1:
> > + reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_CNTL,
> > + MEC_ME1_PIPE1_RESET,
> > 1);
> > + clean_pipe = REG_SET_FIELD(clean_pipe,
> > CP_MEC_CNTL,
> > + MEC_ME1_PIPE1_RESET,
> > 0);
> > + break;
> > + default:
> > + break;
> > + }
> > + WREG32_SOC15(GC, 0, regCP_MEC_CNTL, reset_pipe);
> > + WREG32_SOC15(GC, 0, regCP_MEC_CNTL, clean_pipe);
> > + /* Doesn't find the F32 MEC instruction pointer register, and
> > suppose
> > + * the driver won't run into the F32 mode.
> > + */
> > + }
> > +
> > + soc24_grbm_select(adev, 0, 0, 0, 0);
> > + mutex_unlock(&adev->srbm_mutex);
> > + gfx_v12_0_unset_safe_mode(adev, 0);
> > +
> > + dev_info(adev->dev,"The ring %s pipe resets: %s\n", ring->name,
> > + r == 0 ? "successfully" : "failed");
> > + /* Need the ring test to verify the pipe reset result.*/
> > + return 0;
> > +
> > +}
> > static int gfx_v12_0_reset_kcq(struct amdgpu_ring *ring, unsigned int vmid) {
> > struct amdgpu_device *adev = ring->adev; @@ -5362,8 +5446,10 @@
> > static int gfx_v12_0_reset_kcq(struct amdgpu_ring *ring, unsigned int vmid)
> >
> > r = amdgpu_mes_reset_legacy_queue(ring->adev, ring, vmid, true);
> > if (r) {
> > - dev_err(adev->dev, "reset via MMIO failed %d\n", r);
> > - return r;
> > + dev_warn(adev->dev,"fail(%d) to reset kcq and try pipe reset\n", r);
> > + r = gfx_v12_0_reset_compute_pipe(ring);
> > + if (r)
> > + return r;
> > }
> >
> > r = amdgpu_bo_reserve(ring->mqd_obj, false);
> > --
> > 2.34.1
>
More information about the amd-gfx
mailing list