[PATCH] drm/amdgpu: correctly toggle gfx on/off around RLC_SPM_* register access

Alex Deucher alexdeucher at gmail.com
Thu Nov 4 13:16:29 UTC 2021


On Thu, Nov 4, 2021 at 2:20 AM Evan Quan <evan.quan at amd.com> wrote:
>
> As part of the ib padding process, accessing the RLC_SPM_* register may
> trigger gfx hang. Since gfxoff may be already kicked during the whole period.
> To address that, we manually toggle gfx on/off around the RLC_SPM_*
> register access.
>
> This can resolve the gfx hang issue observed on running Talos with RDP launched
> in parallel.
>
> Signed-off-by: Evan Quan <evan.quan at amd.com>
> Change-Id: Ifae152e8151fecd25a238ebe87dffb3b17cdb540

Reviewed-by: Alex Deucher <alexander.deucher at amd.com>

> ---
>  drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 5 +++++
>  drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c  | 4 ++++
>  drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c  | 4 ++++
>  drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c  | 4 ++++
>  4 files changed, 17 insertions(+)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> index fa03db34aec4..10fc9197602e 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> @@ -8388,6 +8388,9 @@ static int gfx_v10_0_update_gfx_clock_gating(struct amdgpu_device *adev,
>  static void gfx_v10_0_update_spm_vmid(struct amdgpu_device *adev, unsigned vmid)
>  {
>         u32 reg, data;
> +
> +       amdgpu_gfx_off_ctrl(adev, false);
> +
>         /* not for *_SOC15 */
>         reg = SOC15_REG_OFFSET(GC, 0, mmRLC_SPM_MC_CNTL);
>         if (amdgpu_sriov_is_pp_one_vf(adev))
> @@ -8402,6 +8405,8 @@ static void gfx_v10_0_update_spm_vmid(struct amdgpu_device *adev, unsigned vmid)
>                 WREG32_SOC15_NO_KIQ(GC, 0, mmRLC_SPM_MC_CNTL, data);
>         else
>                 WREG32_SOC15(GC, 0, mmRLC_SPM_MC_CNTL, data);
> +
> +       amdgpu_gfx_off_ctrl(adev, true);
>  }
>
>  static bool gfx_v10_0_check_rlcg_range(struct amdgpu_device *adev,
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
> index 37b4a3db6360..d17a6f399347 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
> @@ -3575,12 +3575,16 @@ static void gfx_v7_0_update_spm_vmid(struct amdgpu_device *adev, unsigned vmid)
>  {
>         u32 data;
>
> +       amdgpu_gfx_off_ctrl(adev, false);
> +
>         data = RREG32(mmRLC_SPM_VMID);
>
>         data &= ~RLC_SPM_VMID__RLC_SPM_VMID_MASK;
>         data |= (vmid & RLC_SPM_VMID__RLC_SPM_VMID_MASK) << RLC_SPM_VMID__RLC_SPM_VMID__SHIFT;
>
>         WREG32(mmRLC_SPM_VMID, data);
> +
> +       amdgpu_gfx_off_ctrl(adev, true);
>  }
>
>  static void gfx_v7_0_enable_cgcg(struct amdgpu_device *adev, bool enable)
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
> index aefae5b1ff7b..1a476de20d08 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
> @@ -5727,6 +5727,8 @@ static void gfx_v8_0_update_spm_vmid(struct amdgpu_device *adev, unsigned vmid)
>  {
>         u32 data;
>
> +       amdgpu_gfx_off_ctrl(adev, false);
> +
>         if (amdgpu_sriov_is_pp_one_vf(adev))
>                 data = RREG32_NO_KIQ(mmRLC_SPM_VMID);
>         else
> @@ -5739,6 +5741,8 @@ static void gfx_v8_0_update_spm_vmid(struct amdgpu_device *adev, unsigned vmid)
>                 WREG32_NO_KIQ(mmRLC_SPM_VMID, data);
>         else
>                 WREG32(mmRLC_SPM_VMID, data);
> +
> +       amdgpu_gfx_off_ctrl(adev, true);
>  }
>
>  static const struct amdgpu_rlc_funcs iceland_rlc_funcs = {
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> index 08e91e7245df..d9367747fed3 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> @@ -5218,6 +5218,8 @@ static void gfx_v9_0_update_spm_vmid(struct amdgpu_device *adev, unsigned vmid)
>  {
>         u32 reg, data;
>
> +       amdgpu_gfx_off_ctrl(adev, false);
> +
>         reg = SOC15_REG_OFFSET(GC, 0, mmRLC_SPM_MC_CNTL);
>         if (amdgpu_sriov_is_pp_one_vf(adev))
>                 data = RREG32_NO_KIQ(reg);
> @@ -5231,6 +5233,8 @@ static void gfx_v9_0_update_spm_vmid(struct amdgpu_device *adev, unsigned vmid)
>                 WREG32_SOC15_NO_KIQ(GC, 0, mmRLC_SPM_MC_CNTL, data);
>         else
>                 WREG32_SOC15(GC, 0, mmRLC_SPM_MC_CNTL, data);
> +
> +       amdgpu_gfx_off_ctrl(adev, true);
>  }
>
>  static bool gfx_v9_0_check_rlcg_range(struct amdgpu_device *adev,
> --
> 2.29.0
>


More information about the amd-gfx mailing list