[PATCH v2 4/4] drm/amdgpu/gfx12: Implement the GFX12 KCQ pipe reset

Liang, Prike Prike.Liang at amd.com
Thu Mar 27 09:38:43 UTC 2025


[Public]

> From: Alex Deucher <alexdeucher at gmail.com>
> Sent: Monday, March 24, 2025 10:20 PM
> To: Liang, Prike <Prike.Liang at amd.com>
> Cc: amd-gfx at lists.freedesktop.org; Deucher, Alexander
> <Alexander.Deucher at amd.com>
> Subject: Re: [PATCH v2 4/4] drm/amdgpu/gfx12: Implement the GFX12 KCQ pipe
> reset
>
> On Mon, Mar 24, 2025 at 8:01 AM Liang, Prike <Prike.Liang at amd.com> wrote:
> >
> > [Public]
> >
> > It seems there still requires implementing the gfx11/gfx12 pipe reset from the
> project perspective, so please help review the patch series.
>
> See my latest comments on patch 1.  With those addressed the series is:
> Acked-by: Alex Deucher <alexander.deucher at amd.com>
>
Thank you for the review. According to the firmware team, the GFX engine FW start PC will be kept as 0x3000 consistently.

Thanks,
Prike
> >
> > Regards,
> >       Prike
> >
> > > -----Original Message-----
> > > From: amd-gfx <amd-gfx-bounces at lists.freedesktop.org> On Behalf Of
> > > Liang, Prike
> > > Sent: Monday, March 3, 2025 7:23 PM
> > > To: Alex Deucher <alexdeucher at gmail.com>
> > > Cc: amd-gfx at lists.freedesktop.org; Deucher, Alexander
> > > <Alexander.Deucher at amd.com>
> > > Subject: RE: [PATCH v2 4/4] drm/amdgpu/gfx12: Implement the GFX12
> > > KCQ pipe reset
> > >
> > > [Public]
> > >
> > > [Public]
> > >
> > > Thank you for the input. I have confirmed with the firmware team
> > > that the CP FW _start instruction address will be kept consistent.
> > > Could we move this patch series forward now?
> > >
> > > Regards,
> > >       Prike
> > >
> > > > -----Original Message-----
> > > > From: Alex Deucher <alexdeucher at gmail.com>
> > > > Sent: Thursday, February 27, 2025 10:38 PM
> > > > To: Liang, Prike <Prike.Liang at amd.com>
> > > > Cc: amd-gfx at lists.freedesktop.org; Deucher, Alexander
> > > > <Alexander.Deucher at amd.com>
> > > > Subject: Re: [PATCH v2 4/4] drm/amdgpu/gfx12: Implement the GFX12
> > > > KCQ pipe reset
> > > >
> > > > On Thu, Feb 27, 2025 at 7:36 AM Liang, Prike <Prike.Liang at amd.com>
> wrote:
> > > > >
> > > > > [AMD Official Use Only - AMD Internal Distribution Only]
> > > > >
> > > > > Please review the series patch to catch up the gfx latest base
> > > > > and to avoid the
> > > > commit merged problem.
> > > >
> > > > See my comment on patch 1:
> > > >
> > > > #define RS64_FW_UC_START_ADDR_LO 0x3000
> > > >
> > > > Will this potentially change in future firmware versions or is it
> > > > fixed?  If it will change, then let's just read it back from
> > > > registers and store it in adev-
> > > > >gfx.rs64_fw_use_start_addr_lo so that it will be correct if the
> > > > >user has a mix of
> > > > GPUs in the system.
> > > > Other than those comments, the series looks good to me.
> > > >
> > > > Alex
> > > >
> > > >
> > > > >
> > > > > Regards,
> > > > >       Prike
> > > > >
> > > > > > -----Original Message-----
> > > > > > From: Liang, Prike <Prike.Liang at amd.com>
> > > > > > Sent: Friday, February 21, 2025 9:01 PM
> > > > > > To: amd-gfx at lists.freedesktop.org
> > > > > > Cc: Deucher, Alexander <Alexander.Deucher at amd.com>; Liang,
> > > > > > Prike <Prike.Liang at amd.com>
> > > > > > Subject: [PATCH v2 4/4] drm/amdgpu/gfx12: Implement the GFX12
> > > > > > KCQ pipe reset
> > > > > >
> > > > > > Implement the GFX12 KCQ pipe reset, and disable the GFX12
> > > > > > kernel compute queue until the CPFW fully supports it.
> > > > > >
> > > > > > Signed-off-by: Prike Liang <Prike.Liang at amd.com>
> > > > > > ---
> > > > > >  drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c | 90
> > > > > > +++++++++++++++++++++++++-
> > > > > >  1 file changed, 88 insertions(+), 2 deletions(-)
> > > > > >
> > > > > > diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c
> > > > > > b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c
> > > > > > index 79ae7d538844..103298938d22 100644
> > > > > > --- a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c
> > > > > > +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c
> > > > > > @@ -5352,6 +5352,90 @@ static int gfx_v12_0_reset_kgq(struct
> > > > > > amdgpu_ring *ring, unsigned int vmid)
> > > > > >       return amdgpu_ring_test_ring(ring);  }
> > > > > >
> > > > > > +static int gfx_v12_0_reset_compute_pipe(struct amdgpu_ring
> > > > > > +*ring) {
> > > > > > +
> > > > > > +     struct amdgpu_device *adev = ring->adev;
> > > > > > +     uint32_t reset_pipe = 0, clean_pipe = 0;
> > > > > > +     int r;
> > > > > > +
> > > > > > +     if (!gfx_v12_pipe_reset_support(adev))
> > > > > > +             return -EOPNOTSUPP;
> > > > > > +
> > > > > > +     gfx_v12_0_set_safe_mode(adev, 0);
> > > > > > +     mutex_lock(&adev->srbm_mutex);
> > > > > > +     soc24_grbm_select(adev, ring->me, ring->pipe,
> > > > > > + ring->queue, 0);
> > > > > > +
> > > > > > +     reset_pipe = RREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL);
> > > > > > +     clean_pipe = reset_pipe;
> > > > > > +
> > > > > > +     if (adev->gfx.rs64_enable) {
> > > > > > +
> > > > > > +             switch (ring->pipe) {
> > > > > > +             case 0:
> > > > > > +                     reset_pipe = REG_SET_FIELD(reset_pipe,
> > > > > > CP_MEC_RS64_CNTL,
> > > > > > +                                                MEC_PIPE0_RESET, 1);
> > > > > > +                     clean_pipe = REG_SET_FIELD(clean_pipe,
> > > > > > CP_MEC_RS64_CNTL,
> > > > > > +                                                MEC_PIPE0_RESET, 0);
> > > > > > +                     break;
> > > > > > +             case 1:
> > > > > > +                     reset_pipe = REG_SET_FIELD(reset_pipe,
> > > > > > CP_MEC_RS64_CNTL,
> > > > > > +                                                MEC_PIPE1_RESET, 1);
> > > > > > +                     clean_pipe = REG_SET_FIELD(clean_pipe,
> > > > > > CP_MEC_RS64_CNTL,
> > > > > > +                                                MEC_PIPE1_RESET, 0);
> > > > > > +                     break;
> > > > > > +             case 2:
> > > > > > +                     reset_pipe = REG_SET_FIELD(reset_pipe,
> > > > > > CP_MEC_RS64_CNTL,
> > > > > > +                                                MEC_PIPE2_RESET, 1);
> > > > > > +                     clean_pipe = REG_SET_FIELD(clean_pipe,
> > > > > > CP_MEC_RS64_CNTL,
> > > > > > +                                                MEC_PIPE2_RESET, 0);
> > > > > > +                     break;
> > > > > > +             case 3:
> > > > > > +                     reset_pipe = REG_SET_FIELD(reset_pipe,
> > > > > > CP_MEC_RS64_CNTL,
> > > > > > +                                                MEC_PIPE3_RESET, 1);
> > > > > > +                     clean_pipe = REG_SET_FIELD(clean_pipe,
> > > > > > CP_MEC_RS64_CNTL,
> > > > > > +                                                MEC_PIPE3_RESET, 0);
> > > > > > +                     break;
> > > > > > +             default:
> > > > > > +                     break;
> > > > > > +             }
> > > > > > +             WREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL,
> reset_pipe);
> > > > > > +             WREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL,
> clean_pipe);
> > > > > > +             r = (RREG32_SOC15(GC, 0,
> > > > > > + regCP_MEC_RS64_INSTR_PNTR)
> > > > > > << 2) - RS64_FW_UC_START_ADDR_LO;
> > > > > > +     } else {
> > > > > > +             switch (ring->pipe) {
> > > > > > +             case 0:
> > > > > > +                     reset_pipe = REG_SET_FIELD(reset_pipe,
> > > > > > + CP_MEC_CNTL,
> > > > > > +
> > > > > > + MEC_ME1_PIPE0_RESET,
> > > > > > 1);
> > > > > > +                     clean_pipe = REG_SET_FIELD(clean_pipe,
> > > > > > CP_MEC_CNTL,
> > > > > > +
> > > > > > + MEC_ME1_PIPE0_RESET,
> > > > > > 0);
> > > > > > +                     break;
> > > > > > +             case 1:
> > > > > > +                     reset_pipe = REG_SET_FIELD(reset_pipe,
> > > > > > + CP_MEC_CNTL,
> > > > > > +
> > > > > > + MEC_ME1_PIPE1_RESET,
> > > > > > 1);
> > > > > > +                     clean_pipe = REG_SET_FIELD(clean_pipe,
> > > > > > CP_MEC_CNTL,
> > > > > > +
> > > > > > + MEC_ME1_PIPE1_RESET,
> > > > > > 0);
> > > > > > +                     break;
> > > > > > +             default:
> > > > > > +             break;
> > > > > > +             }
> > > > > > +             WREG32_SOC15(GC, 0, regCP_MEC_CNTL, reset_pipe);
> > > > > > +             WREG32_SOC15(GC, 0, regCP_MEC_CNTL, clean_pipe);
> > > > > > +             /* Doesn't find the F32 MEC instruction pointer
> > > > > > + register, and
> > > > > > suppose
> > > > > > +              * the driver won't run into the F32 mode.
> > > > > > +              */
> > > > > > +     }
> > > > > > +
> > > > > > +     soc24_grbm_select(adev, 0, 0, 0, 0);
> > > > > > +     mutex_unlock(&adev->srbm_mutex);
> > > > > > +     gfx_v12_0_unset_safe_mode(adev, 0);
> > > > > > +
> > > > > > +     dev_info(adev->dev,"The ring %s pipe resets: %s\n", ring->name,
> > > > > > +                     r == 0 ? "successfully" : "failed");
> > > > > > +     /* Need the ring test to verify the pipe reset result.*/
> > > > > > +     return 0;
> > > > > > +
> > > > > > +}
> > > > > >  static int gfx_v12_0_reset_kcq(struct amdgpu_ring *ring,
> > > > > > unsigned int vmid)
> > > {
> > > > > >       struct amdgpu_device *adev = ring->adev; @@ -5362,8
> > > > > > +5446,10 @@ static int gfx_v12_0_reset_kcq(struct amdgpu_ring
> > > > > > *ring, unsigned int vmid)
> > > > > >
> > > > > >       r = amdgpu_mes_reset_legacy_queue(ring->adev, ring, vmid, true);
> > > > > >       if (r) {
> > > > > > -             dev_err(adev->dev, "reset via MMIO failed %d\n", r);
> > > > > > -             return r;
> > > > > > +             dev_warn(adev->dev,"fail(%d) to reset kcq  and
> > > > > > + try pipe reset\n",
> > > r);
> > > > > > +             r = gfx_v12_0_reset_compute_pipe(ring);
> > > > > > +             if (r)
> > > > > > +                     return r;
> > > > > >       }
> > > > > >
> > > > > >       r = amdgpu_bo_reserve(ring->mqd_obj, false);
> > > > > > --
> > > > > > 2.34.1
> > > > >


More information about the amd-gfx mailing list