[PATCH] drm/amdgpu: correctly toggle gfx on/off around RLC_SPM_* register access
Alex Deucher
alexdeucher at gmail.com
Thu Nov 4 13:16:29 UTC 2021
On Thu, Nov 4, 2021 at 2:20 AM Evan Quan <evan.quan at amd.com> wrote:
>
> As part of the ib padding process, accessing the RLC_SPM_* register may
> trigger gfx hang. Since gfxoff may be already kicked during the whole period.
> To address that, we manually toggle gfx on/off around the RLC_SPM_*
> register access.
>
> This can resolve the gfx hang issue observed on running Talos with RDP launched
> in parallel.
>
> Signed-off-by: Evan Quan <evan.quan at amd.com>
> Change-Id: Ifae152e8151fecd25a238ebe87dffb3b17cdb540
Reviewed-by: Alex Deucher <alexander.deucher at amd.com>
> ---
> drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 5 +++++
> drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c | 4 ++++
> drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 4 ++++
> drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 4 ++++
> 4 files changed, 17 insertions(+)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> index fa03db34aec4..10fc9197602e 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> @@ -8388,6 +8388,9 @@ static int gfx_v10_0_update_gfx_clock_gating(struct amdgpu_device *adev,
> static void gfx_v10_0_update_spm_vmid(struct amdgpu_device *adev, unsigned vmid)
> {
> u32 reg, data;
> +
> + amdgpu_gfx_off_ctrl(adev, false);
> +
> /* not for *_SOC15 */
> reg = SOC15_REG_OFFSET(GC, 0, mmRLC_SPM_MC_CNTL);
> if (amdgpu_sriov_is_pp_one_vf(adev))
> @@ -8402,6 +8405,8 @@ static void gfx_v10_0_update_spm_vmid(struct amdgpu_device *adev, unsigned vmid)
> WREG32_SOC15_NO_KIQ(GC, 0, mmRLC_SPM_MC_CNTL, data);
> else
> WREG32_SOC15(GC, 0, mmRLC_SPM_MC_CNTL, data);
> +
> + amdgpu_gfx_off_ctrl(adev, true);
> }
>
> static bool gfx_v10_0_check_rlcg_range(struct amdgpu_device *adev,
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
> index 37b4a3db6360..d17a6f399347 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
> @@ -3575,12 +3575,16 @@ static void gfx_v7_0_update_spm_vmid(struct amdgpu_device *adev, unsigned vmid)
> {
> u32 data;
>
> + amdgpu_gfx_off_ctrl(adev, false);
> +
> data = RREG32(mmRLC_SPM_VMID);
>
> data &= ~RLC_SPM_VMID__RLC_SPM_VMID_MASK;
> data |= (vmid & RLC_SPM_VMID__RLC_SPM_VMID_MASK) << RLC_SPM_VMID__RLC_SPM_VMID__SHIFT;
>
> WREG32(mmRLC_SPM_VMID, data);
> +
> + amdgpu_gfx_off_ctrl(adev, true);
> }
>
> static void gfx_v7_0_enable_cgcg(struct amdgpu_device *adev, bool enable)
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
> index aefae5b1ff7b..1a476de20d08 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
> @@ -5727,6 +5727,8 @@ static void gfx_v8_0_update_spm_vmid(struct amdgpu_device *adev, unsigned vmid)
> {
> u32 data;
>
> + amdgpu_gfx_off_ctrl(adev, false);
> +
> if (amdgpu_sriov_is_pp_one_vf(adev))
> data = RREG32_NO_KIQ(mmRLC_SPM_VMID);
> else
> @@ -5739,6 +5741,8 @@ static void gfx_v8_0_update_spm_vmid(struct amdgpu_device *adev, unsigned vmid)
> WREG32_NO_KIQ(mmRLC_SPM_VMID, data);
> else
> WREG32(mmRLC_SPM_VMID, data);
> +
> + amdgpu_gfx_off_ctrl(adev, true);
> }
>
> static const struct amdgpu_rlc_funcs iceland_rlc_funcs = {
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> index 08e91e7245df..d9367747fed3 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> @@ -5218,6 +5218,8 @@ static void gfx_v9_0_update_spm_vmid(struct amdgpu_device *adev, unsigned vmid)
> {
> u32 reg, data;
>
> + amdgpu_gfx_off_ctrl(adev, false);
> +
> reg = SOC15_REG_OFFSET(GC, 0, mmRLC_SPM_MC_CNTL);
> if (amdgpu_sriov_is_pp_one_vf(adev))
> data = RREG32_NO_KIQ(reg);
> @@ -5231,6 +5233,8 @@ static void gfx_v9_0_update_spm_vmid(struct amdgpu_device *adev, unsigned vmid)
> WREG32_SOC15_NO_KIQ(GC, 0, mmRLC_SPM_MC_CNTL, data);
> else
> WREG32_SOC15(GC, 0, mmRLC_SPM_MC_CNTL, data);
> +
> + amdgpu_gfx_off_ctrl(adev, true);
> }
>
> static bool gfx_v9_0_check_rlcg_range(struct amdgpu_device *adev,
> --
> 2.29.0
>
More information about the amd-gfx
mailing list