[PATCH 1/4] drm/amdgpu/gfx11: Implement the GFX11 KGQ pipe reset
Alex Deucher
alexdeucher at gmail.com
Wed Feb 19 19:56:11 UTC 2025
On Sun, Jan 26, 2025 at 3:38 AM Prike Liang <Prike.Liang at amd.com> wrote:
>
> Implement the kernel graphics queue pipe reset,and the driver
> will fallback to pipe reset when the queue reset fails. However,
> the ME FW hasn't fully supported pipe reset yet so disable the
> KGQ pipe reset temporarily.
>
> Signed-off-by: Prike Liang <Prike.Liang at amd.com>
> ---
> drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c | 76 +++++++++++++++++++++++++-
> 1 file changed, 74 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
> index 89d17750af04..395872bb1401 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
> @@ -65,6 +65,8 @@
> #define regPC_CONFIG_CNTL_1 0x194d
> #define regPC_CONFIG_CNTL_1_BASE_IDX 1
>
> +static uint32_t me_fw_start_pc;
> +
> MODULE_FIRMWARE("amdgpu/gc_11_0_0_pfp.bin");
> MODULE_FIRMWARE("amdgpu/gc_11_0_0_me.bin");
> MODULE_FIRMWARE("amdgpu/gc_11_0_0_mec.bin");
> @@ -2932,6 +2934,9 @@ static void gfx_v11_0_config_gfx_rs64(struct amdgpu_device *adev)
> tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE2_RESET, 0);
> tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE3_RESET, 0);
> WREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL, tmp);
> +
> + /* cache the firmware start PC */
> + me_fw_start_pc = RREG32(SOC15_REG_OFFSET(GC, 0, regCP_GFX_RS64_INSTR_PNTR1));
You can't use a global variable. It won't work if you have multiple
GPUs in the system.
Alex
> }
>
> static int gfx_v11_0_wait_for_rlc_autoload_complete(struct amdgpu_device *adev)
> @@ -6654,6 +6659,68 @@ static void gfx_v11_0_emit_mem_sync(struct amdgpu_ring *ring)
> amdgpu_ring_write(ring, gcr_cntl); /* GCR_CNTL */
> }
>
> +static bool gfx_v11_pipe_reset_support(struct amdgpu_device *adev)
> +{
> + /* Disable the pipe reset until the CPFW fully support it.*/
> + dev_warn_once(adev->dev, "The CPFW hasn't support pipe reset yet.\n");
> + return false;
> +}
> +
> +
> +static int gfx_v11_reset_gfx_pipe(struct amdgpu_ring *ring)
> +{
> + struct amdgpu_device *adev = ring->adev;
> + uint32_t reset_pipe = 0, clean_pipe = 0;
> + int r;
> +
> + if (!gfx_v11_pipe_reset_support(adev))
> + return -EOPNOTSUPP;
> +
> + gfx_v11_0_set_safe_mode(adev, 0);
> + mutex_lock(&adev->srbm_mutex);
> + soc21_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
> +
> + switch (ring->pipe) {
> + case 0:
> + reset_pipe = REG_SET_FIELD(reset_pipe, CP_ME_CNTL,
> + PFP_PIPE0_RESET, 1);
> + reset_pipe = REG_SET_FIELD(reset_pipe, CP_ME_CNTL,
> + ME_PIPE0_RESET, 1);
> + clean_pipe = REG_SET_FIELD(clean_pipe, CP_ME_CNTL,
> + PFP_PIPE0_RESET, 0);
> + clean_pipe = REG_SET_FIELD(clean_pipe, CP_ME_CNTL,
> + ME_PIPE0_RESET, 0);
> + break;
> + case 1:
> + reset_pipe = REG_SET_FIELD(reset_pipe, CP_ME_CNTL,
> + PFP_PIPE1_RESET, 1);
> + reset_pipe = REG_SET_FIELD(reset_pipe, CP_ME_CNTL,
> + ME_PIPE1_RESET, 1);
> + clean_pipe = REG_SET_FIELD(clean_pipe, CP_ME_CNTL,
> + PFP_PIPE1_RESET, 0);
> + clean_pipe = REG_SET_FIELD(clean_pipe, CP_ME_CNTL,
> + ME_PIPE1_RESET, 0);
> + break;
> + default:
> + break;
> + }
> +
> + WREG32_SOC15(GC, 0, regCP_ME_CNTL, reset_pipe);
> + WREG32_SOC15(GC, 0, regCP_ME_CNTL, clean_pipe);
> +
> + r = RREG32(SOC15_REG_OFFSET(GC, 0, regCP_GFX_RS64_INSTR_PNTR1)) - me_fw_start_pc;
> + soc21_grbm_select(adev, 0, 0, 0, 0);
> + mutex_unlock(&adev->srbm_mutex);
> + gfx_v11_0_unset_safe_mode(adev, 0);
> +
> + dev_info(adev->dev,"The ring %s pipe reset to the ME firmware start PC: %s\n", ring->name,
> + r == 0 ? "successfuly" : "failed");
> + /* FIXME: Sometimes driver can't cache the ME firmware start PC correctly, so the pipe reset status
> + * relies on the later gfx ring test result.
> + */
> + return 0;
> +}
> +
> static int gfx_v11_0_reset_kgq(struct amdgpu_ring *ring, unsigned int vmid)
> {
> struct amdgpu_device *adev = ring->adev;
> @@ -6663,8 +6730,13 @@ static int gfx_v11_0_reset_kgq(struct amdgpu_ring *ring, unsigned int vmid)
> return -EINVAL;
>
> r = amdgpu_mes_reset_legacy_queue(ring->adev, ring, vmid, false);
> - if (r)
> - return r;
> + if (r) {
> +
> + dev_warn(adev->dev,"reset via MES failed and try pipe reset %d\n", r);
> + r = gfx_v11_reset_gfx_pipe(ring);
> + if (r)
> + return r;
> + }
>
> r = amdgpu_bo_reserve(ring->mqd_obj, false);
> if (unlikely(r != 0)) {
> --
> 2.34.1
>
More information about the amd-gfx
mailing list