[PATCH v2 1/4] drm/amdgpu/gfx11: Implement the GFX11 KGQ pipe reset
Alex Deucher
alexdeucher at gmail.com
Mon Mar 24 14:18:38 UTC 2025
On Mon, Mar 3, 2025 at 9:46 AM Alex Deucher <alexdeucher at gmail.com> wrote:
>
> On Fri, Feb 21, 2025 at 8:38 AM Prike Liang <Prike.Liang at amd.com> wrote:
> >
> > Implement the kernel graphics queue pipe reset,and the driver
> > will fallback to pipe reset when the queue reset fails. However,
> > the ME FW hasn't fully supported pipe reset yet so disable the
> > KGQ pipe reset temporarily.
> >
> > Signed-off-by: Prike Liang <Prike.Liang at amd.com>
> > ---
> > drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h | 2 +
> > drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c | 71 ++++++++++++++++++++++-
> > 2 files changed, 71 insertions(+), 2 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h
> > index 4eedd92f000b..06fe21e15ed6 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h
> > @@ -25,6 +25,8 @@
> >
> > #include "amdgpu_socbb.h"
> >
> > +#define RS64_FW_UC_START_ADDR_LO 0x3000
> > +
> > struct common_firmware_header {
> > uint32_t size_bytes; /* size of the entire header+image(s) in bytes */
> > uint32_t header_size_bytes; /* size of just the header in bytes */
> > diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
> > index 2c7f0bb242ff..7e53c0b63f88 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
> > @@ -6653,6 +6653,68 @@ static void gfx_v11_0_emit_mem_sync(struct amdgpu_ring *ring)
> > amdgpu_ring_write(ring, gcr_cntl); /* GCR_CNTL */
> > }
> >
> > +static bool gfx_v11_pipe_reset_support(struct amdgpu_device *adev)
> > +{
> > + /* Disable the pipe reset until the CPFW fully support it.*/
> > + dev_warn_once(adev->dev, "The CPFW hasn't support pipe reset yet.\n");
>
> I'd drop these or make them debug only for now. Same for gfx12. With
> that fixed, the series is:
> Acked-by: Alex Deucher <alexander.deucher at amd.com>
Just in case you missed my reply. See above ^^^^
Alex
>
>
> > + return false;
> > +}
> > +
> > +
> > +static int gfx_v11_reset_gfx_pipe(struct amdgpu_ring *ring)
> > +{
> > + struct amdgpu_device *adev = ring->adev;
> > + uint32_t reset_pipe = 0, clean_pipe = 0;
> > + int r;
> > +
> > + if (!gfx_v11_pipe_reset_support(adev))
> > + return -EOPNOTSUPP;
> > +
> > + gfx_v11_0_set_safe_mode(adev, 0);
> > + mutex_lock(&adev->srbm_mutex);
> > + soc21_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
> > +
> > + switch (ring->pipe) {
> > + case 0:
> > + reset_pipe = REG_SET_FIELD(reset_pipe, CP_ME_CNTL,
> > + PFP_PIPE0_RESET, 1);
> > + reset_pipe = REG_SET_FIELD(reset_pipe, CP_ME_CNTL,
> > + ME_PIPE0_RESET, 1);
> > + clean_pipe = REG_SET_FIELD(clean_pipe, CP_ME_CNTL,
> > + PFP_PIPE0_RESET, 0);
> > + clean_pipe = REG_SET_FIELD(clean_pipe, CP_ME_CNTL,
> > + ME_PIPE0_RESET, 0);
> > + break;
> > + case 1:
> > + reset_pipe = REG_SET_FIELD(reset_pipe, CP_ME_CNTL,
> > + PFP_PIPE1_RESET, 1);
> > + reset_pipe = REG_SET_FIELD(reset_pipe, CP_ME_CNTL,
> > + ME_PIPE1_RESET, 1);
> > + clean_pipe = REG_SET_FIELD(clean_pipe, CP_ME_CNTL,
> > + PFP_PIPE1_RESET, 0);
> > + clean_pipe = REG_SET_FIELD(clean_pipe, CP_ME_CNTL,
> > + ME_PIPE1_RESET, 0);
> > + break;
> > + default:
> > + break;
> > + }
> > +
> > + WREG32_SOC15(GC, 0, regCP_ME_CNTL, reset_pipe);
> > + WREG32_SOC15(GC, 0, regCP_ME_CNTL, clean_pipe);
> > +
> > + r = (RREG32(SOC15_REG_OFFSET(GC, 0, regCP_GFX_RS64_INSTR_PNTR1)) << 2) - RS64_FW_UC_START_ADDR_LO;
> > + soc21_grbm_select(adev, 0, 0, 0, 0);
> > + mutex_unlock(&adev->srbm_mutex);
> > + gfx_v11_0_unset_safe_mode(adev, 0);
> > +
> > + dev_info(adev->dev,"The ring %s pipe reset to the ME firmware start PC: %s\n", ring->name,
> > + r == 0 ? "successfuly" : "failed");
> > + /* FIXME: Sometimes driver can't cache the ME firmware start PC correctly, so the pipe reset status
> > + * relies on the later gfx ring test result.
> > + */
> > + return 0;
> > +}
> > +
> > static int gfx_v11_0_reset_kgq(struct amdgpu_ring *ring, unsigned int vmid)
> > {
> > struct amdgpu_device *adev = ring->adev;
> > @@ -6662,8 +6724,13 @@ static int gfx_v11_0_reset_kgq(struct amdgpu_ring *ring, unsigned int vmid)
> > return -EINVAL;
> >
> > r = amdgpu_mes_reset_legacy_queue(ring->adev, ring, vmid, false);
> > - if (r)
> > - return r;
> > + if (r) {
> > +
> > + dev_warn(adev->dev,"reset via MES failed and try pipe reset %d\n", r);
> > + r = gfx_v11_reset_gfx_pipe(ring);
> > + if (r)
> > + return r;
> > + }
> >
> > r = amdgpu_bo_reserve(ring->mqd_obj, false);
> > if (unlikely(r != 0)) {
> > --
> > 2.34.1
> >
More information about the amd-gfx
mailing list