[PATCH 1/4] drm/amdgpu/gfx11: Implement the GFX11 KGQ pipe reset
Liang, Prike
Prike.Liang at amd.com
Fri Feb 21 01:36:12 UTC 2025
[Public]
> From: Alex Deucher <alexdeucher at gmail.com>
> Sent: Thursday, February 20, 2025 10:18 PM
> To: Liang, Prike <Prike.Liang at amd.com>
> Cc: amd-gfx at lists.freedesktop.org; Deucher, Alexander
> <Alexander.Deucher at amd.com>; Koenig, Christian
> <Christian.Koenig at amd.com>; Lazar, Lijo <Lijo.Lazar at amd.com>
> Subject: Re: [PATCH 1/4] drm/amdgpu/gfx11: Implement the GFX11 KGQ pipe
> reset
>
> On Thu, Feb 20, 2025 at 4:39 AM Liang, Prike <Prike.Liang at amd.com> wrote:
> >
> > [Public]
> >
> > The various gfx11/gfx12 systems share the same start PC value, but it seems
> better to use the specific register CP_ME_PRGRM_CNTR_START to get the start
> PC value.
>
> Why not store the value per device? Or if it's always the same, just use a macro.
>
Thank you for the input, the firmware assigns the _start PC using the macro as well.
It will be more efficient to retrieve the _start PC from the macro instead of accessing the MMIO register each time.
Thanks,
Prike
> Alex
>
>
> >
> > Regards,
> > Prike
> >
> > > -----Original Message-----
> > > From: Alex Deucher <alexdeucher at gmail.com>
> > > Sent: Thursday, February 20, 2025 3:56 AM
> > > To: Liang, Prike <Prike.Liang at amd.com>
> > > Cc: amd-gfx at lists.freedesktop.org; Deucher, Alexander
> > > <Alexander.Deucher at amd.com>; Koenig, Christian
> > > <Christian.Koenig at amd.com>; Lazar, Lijo <Lijo.Lazar at amd.com>
> > > Subject: Re: [PATCH 1/4] drm/amdgpu/gfx11: Implement the GFX11 KGQ
> > > pipe reset
> > >
> > > On Sun, Jan 26, 2025 at 3:38 AM Prike Liang <Prike.Liang at amd.com> wrote:
> > > >
> > > > Implement the kernel graphics queue pipe reset,and the driver will
> > > > fallback to pipe reset when the queue reset fails. However, the ME
> > > > FW hasn't fully supported pipe reset yet so disable the KGQ pipe
> > > > reset temporarily.
> > > >
> > > > Signed-off-by: Prike Liang <Prike.Liang at amd.com>
> > > > ---
> > > > drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c | 76
> > > > +++++++++++++++++++++++++-
> > > > 1 file changed, 74 insertions(+), 2 deletions(-)
> > > >
> > > > diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
> > > > b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
> > > > index 89d17750af04..395872bb1401 100644
> > > > --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
> > > > +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
> > > > @@ -65,6 +65,8 @@
> > > > #define regPC_CONFIG_CNTL_1 0x194d
> > > > #define regPC_CONFIG_CNTL_1_BASE_IDX 1
> > > >
> > > > +static uint32_t me_fw_start_pc;
> > > > +
> > > > MODULE_FIRMWARE("amdgpu/gc_11_0_0_pfp.bin");
> > > > MODULE_FIRMWARE("amdgpu/gc_11_0_0_me.bin");
> > > > MODULE_FIRMWARE("amdgpu/gc_11_0_0_mec.bin");
> > > > @@ -2932,6 +2934,9 @@ static void gfx_v11_0_config_gfx_rs64(struct
> > > amdgpu_device *adev)
> > > > tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL,
> > > MEC_PIPE2_RESET, 0);
> > > > tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL,
> > > MEC_PIPE3_RESET, 0);
> > > > WREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL, tmp);
> > > > +
> > > > + /* cache the firmware start PC */
> > > > + me_fw_start_pc = RREG32(SOC15_REG_OFFSET(GC, 0,
> > > > + regCP_GFX_RS64_INSTR_PNTR1));
> > >
> > > You can't use a global variable. It won't work if you have multiple
> > > GPUs in the system.
> > >
> > > Alex
> > >
> > > > }
> > > >
> > > > static int gfx_v11_0_wait_for_rlc_autoload_complete(struct
> > > > amdgpu_device *adev) @@ -6654,6 +6659,68 @@ static void
> > > gfx_v11_0_emit_mem_sync(struct amdgpu_ring *ring)
> > > > amdgpu_ring_write(ring, gcr_cntl); /* GCR_CNTL */ }
> > > >
> > > > +static bool gfx_v11_pipe_reset_support(struct amdgpu_device *adev) {
> > > > + /* Disable the pipe reset until the CPFW fully support it.*/
> > > > + dev_warn_once(adev->dev, "The CPFW hasn't support pipe reset
> yet.\n");
> > > > + return false;
> > > > +}
> > > > +
> > > > +
> > > > +static int gfx_v11_reset_gfx_pipe(struct amdgpu_ring *ring) {
> > > > + struct amdgpu_device *adev = ring->adev;
> > > > + uint32_t reset_pipe = 0, clean_pipe = 0;
> > > > + int r;
> > > > +
> > > > + if (!gfx_v11_pipe_reset_support(adev))
> > > > + return -EOPNOTSUPP;
> > > > +
> > > > + gfx_v11_0_set_safe_mode(adev, 0);
> > > > + mutex_lock(&adev->srbm_mutex);
> > > > + soc21_grbm_select(adev, ring->me, ring->pipe, ring->queue,
> > > > + 0);
> > > > +
> > > > + switch (ring->pipe) {
> > > > + case 0:
> > > > + reset_pipe = REG_SET_FIELD(reset_pipe, CP_ME_CNTL,
> > > > + PFP_PIPE0_RESET, 1);
> > > > + reset_pipe = REG_SET_FIELD(reset_pipe, CP_ME_CNTL,
> > > > + ME_PIPE0_RESET, 1);
> > > > + clean_pipe = REG_SET_FIELD(clean_pipe, CP_ME_CNTL,
> > > > + PFP_PIPE0_RESET, 0);
> > > > + clean_pipe = REG_SET_FIELD(clean_pipe, CP_ME_CNTL,
> > > > + ME_PIPE0_RESET, 0);
> > > > + break;
> > > > + case 1:
> > > > + reset_pipe = REG_SET_FIELD(reset_pipe, CP_ME_CNTL,
> > > > + PFP_PIPE1_RESET, 1);
> > > > + reset_pipe = REG_SET_FIELD(reset_pipe, CP_ME_CNTL,
> > > > + ME_PIPE1_RESET, 1);
> > > > + clean_pipe = REG_SET_FIELD(clean_pipe, CP_ME_CNTL,
> > > > + PFP_PIPE1_RESET, 0);
> > > > + clean_pipe = REG_SET_FIELD(clean_pipe, CP_ME_CNTL,
> > > > + ME_PIPE1_RESET, 0);
> > > > + break;
> > > > + default:
> > > > + break;
> > > > + }
> > > > +
> > > > + WREG32_SOC15(GC, 0, regCP_ME_CNTL, reset_pipe);
> > > > + WREG32_SOC15(GC, 0, regCP_ME_CNTL, clean_pipe);
> > > > +
> > > > + r = RREG32(SOC15_REG_OFFSET(GC, 0,
> > > regCP_GFX_RS64_INSTR_PNTR1)) - me_fw_start_pc;
> > > > + soc21_grbm_select(adev, 0, 0, 0, 0);
> > > > + mutex_unlock(&adev->srbm_mutex);
> > > > + gfx_v11_0_unset_safe_mode(adev, 0);
> > > > +
> > > > + dev_info(adev->dev,"The ring %s pipe reset to the ME
> > > > + firmware start
> > > PC: %s\n", ring->name,
> > > > + r == 0 ? "successfuly" : "failed");
> > > > + /* FIXME: Sometimes driver can't cache the ME firmware
> > > > + start PC
> > > correctly, so the pipe reset status
> > > > + * relies on the later gfx ring test result.
> > > > + */
> > > > + return 0;
> > > > +}
> > > > +
> > > > static int gfx_v11_0_reset_kgq(struct amdgpu_ring *ring, unsigned
> > > > int
> > > > vmid) {
> > > > struct amdgpu_device *adev = ring->adev; @@ -6663,8
> > > > +6730,13 @@ static int gfx_v11_0_reset_kgq(struct amdgpu_ring *ring,
> unsigned int vmid)
> > > > return -EINVAL;
> > > >
> > > > r = amdgpu_mes_reset_legacy_queue(ring->adev, ring, vmid, false);
> > > > - if (r)
> > > > - return r;
> > > > + if (r) {
> > > > +
> > > > + dev_warn(adev->dev,"reset via MES failed and try
> > > > + pipe reset %d\n",
> > > r);
> > > > + r = gfx_v11_reset_gfx_pipe(ring);
> > > > + if (r)
> > > > + return r;
> > > > + }
> > > >
> > > > r = amdgpu_bo_reserve(ring->mqd_obj, false);
> > > > if (unlikely(r != 0)) {
> > > > --
> > > > 2.34.1
> > > >
More information about the amd-gfx
mailing list