[PATCH] drm/amdgpu: Fix L1 policy violations (PSP) on Navi21 SRIOV
Nirmoy
nirmodas at amd.com
Fri Sep 4 08:17:36 UTC 2020
On 9/3/20 11:11 PM, Rohit Khaire wrote:
Please add commit message here.
> Signed-off-by: Rohit Khaire <rohit.khaire at amd.com>
> ---
> drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 49 ++++++++++++++++--
> drivers/gpu/drm/amd/amdgpu/gfxhub_v2_1.c | 64 +++++++++++-------------
> drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c | 42 ++++++++--------
> 3 files changed, 95 insertions(+), 60 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> index d502e30f67d9..4bafbd453e53 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> @@ -4808,14 +4808,23 @@ static int gfx_v10_0_init_csb(struct amdgpu_device *adev)
>
> void gfx_v10_0_rlc_stop(struct amdgpu_device *adev)
> {
> - u32 tmp = RREG32_SOC15(GC, 0, mmRLC_CNTL);
> + u32 tmp;
>
> + /* For SRIOV, don't touch RLC_G */
> + if (amdgpu_sriov_vf(adev))
> + return;
> +
> + tmp = RREG32_SOC15(GC, 0, mmRLC_CNTL);
> tmp = REG_SET_FIELD(tmp, RLC_CNTL, RLC_ENABLE_F32, 0);
> WREG32_SOC15(GC, 0, mmRLC_CNTL, tmp);
> }
>
> static void gfx_v10_0_rlc_reset(struct amdgpu_device *adev)
> {
> + /* For SRIOV, don't touch RLC_G */
> + if (amdgpu_sriov_vf(adev))
> + return;
> +
> WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
> udelay(50);
> WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
> @@ -4846,6 +4855,10 @@ static void gfx_v10_0_rlc_smu_handshake_cntl(struct amdgpu_device *adev,
>
> static void gfx_v10_0_rlc_start(struct amdgpu_device *adev)
> {
> + /* For SRIOV, don't touch RLC_G */
> + if (amdgpu_sriov_vf(adev))
> + return;
> +
> /* TODO: enable rlc & smu handshake until smu
> * and gfxoff feature works as expected */
> if (!(amdgpu_pp_feature_mask & PP_GFXOFF_MASK))
> @@ -4859,6 +4872,10 @@ static void gfx_v10_0_rlc_enable_srm(struct amdgpu_device *adev)
> {
> uint32_t tmp;
>
> + /* For SRIOV, don't touch RLC_G */
> + if (amdgpu_sriov_vf(adev))
> + return;
> +
> /* enable Save Restore Machine */
> tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL));
> tmp |= RLC_SRM_CNTL__AUTO_INCR_ADDR_MASK;
> @@ -4872,6 +4889,10 @@ static int gfx_v10_0_rlc_load_microcode(struct amdgpu_device *adev)
> const __le32 *fw_data;
> unsigned i, fw_size;
>
> + /* For SRIOV, don't touch RLC_G */
> + if (amdgpu_sriov_vf(adev))
> + return;
> +
> if (!adev->gfx.rlc_fw)
> return -EINVAL;
>
> @@ -4906,8 +4927,7 @@ static int gfx_v10_0_rlc_resume(struct amdgpu_device *adev)
>
> gfx_v10_0_init_csb(adev);
>
> - if (!amdgpu_sriov_vf(adev)) /* enable RLC SRM */
> - gfx_v10_0_rlc_enable_srm(adev);
> + gfx_v10_0_rlc_enable_srm(adev);
> } else {
> if (amdgpu_sriov_vf(adev)) {
> gfx_v10_0_init_csb(adev);
> @@ -6990,7 +7010,6 @@ static int gfx_v10_0_hw_fini(void *handle)
> if (amdgpu_gfx_disable_kcq(adev))
> DRM_ERROR("KCQ disable failed\n");
> if (amdgpu_sriov_vf(adev)) {
> - gfx_v10_0_cp_gfx_enable(adev, false);
> /* Program KIQ position of RLC_CP_SCHEDULERS during destroy */
> tmp = RREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS);
> tmp &= 0xffffff00;
> @@ -7272,6 +7291,10 @@ static void gfx_v10_0_update_medium_grain_clock_gating(struct amdgpu_device *ade
> {
> uint32_t data, def;
>
> + /* For SRIOV, guest VM should not touch CGCG and PG stuff */
> + if (amdgpu_sriov_vf(adev))
> + return;
> +
> /* It is disabled by HW by default */
> if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
> /* 0 - Disable some blocks' MGCG */
> @@ -7339,6 +7362,10 @@ static void gfx_v10_0_update_3d_clock_gating(struct amdgpu_device *adev,
> {
> uint32_t data, def;
>
> + /* For SRIOV, guest VM should not touch CGCG and PG stuff */
> + if (amdgpu_sriov_vf(adev))
> + return;
> +
> /* Enable 3D CGCG/CGLS */
> if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG)) {
> /* write cmd to clear cgcg/cgls ov */
> @@ -7381,6 +7408,10 @@ static void gfx_v10_0_update_coarse_grain_clock_gating(struct amdgpu_device *ade
> {
> uint32_t def, data;
>
> + /* For SRIOV, guest VM should not touch CGCG and PG stuff */
> + if (amdgpu_sriov_vf(adev))
> + return;
> +
> if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
> def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
> /* unset CGCG override */
> @@ -7422,6 +7453,10 @@ static void gfx_v10_0_update_coarse_grain_clock_gating(struct amdgpu_device *ade
> static int gfx_v10_0_update_gfx_clock_gating(struct amdgpu_device *adev,
> bool enable)
> {
> + /* For SRIOV, guest VM should not touch CGCG and PG stuff */
> + if (amdgpu_sriov_vf(adev))
> + return;
> +
> amdgpu_gfx_rlc_enter_safe_mode(adev);
>
> if (enable) {
> @@ -7584,6 +7619,12 @@ static void gfx_v10_0_get_clockgating_state(void *handle, u32 *flags)
> struct amdgpu_device *adev = (struct amdgpu_device *)handle;
> int data;
>
> + /* bypass PG and CG on SRIOV VF */
> + if (amdgpu_sriov_vf(adev)) {
> + *flags = 0;
> + return;
> + }
> +
> /* AMD_CG_SUPPORT_GFX_MGCG */
> data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE));
> if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK))
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_1.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_1.c
> index 237a9ff5afa0..44ebb2722c4e 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_1.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_1.c
> @@ -152,33 +152,35 @@ static void gfxhub_v2_1_init_system_aperture_regs(struct amdgpu_device *adev)
> {
> uint64_t value;
>
> - /* Disable AGP. */
> - WREG32_SOC15(GC, 0, mmGCMC_VM_AGP_BASE, 0);
> - WREG32_SOC15(GC, 0, mmGCMC_VM_AGP_TOP, 0);
> - WREG32_SOC15(GC, 0, mmGCMC_VM_AGP_BOT, 0x00FFFFFF);
> -
> - /* Program the system aperture low logical page number. */
> - WREG32_SOC15(GC, 0, mmGCMC_VM_SYSTEM_APERTURE_LOW_ADDR,
> - adev->gmc.vram_start >> 18);
> - WREG32_SOC15(GC, 0, mmGCMC_VM_SYSTEM_APERTURE_HIGH_ADDR,
> - adev->gmc.vram_end >> 18);
> -
> - /* Set default page address. */
> - value = adev->vram_scratch.gpu_addr - adev->gmc.vram_start
> - + adev->vm_manager.vram_base_offset;
> - WREG32_SOC15(GC, 0, mmGCMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_LSB,
> - (u32)(value >> 12));
> - WREG32_SOC15(GC, 0, mmGCMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_MSB,
> - (u32)(value >> 44));
> + if (!amdgpu_sriov_vf(adev)) {
> + /* Disable AGP. */
> + WREG32_SOC15(GC, 0, mmGCMC_VM_AGP_BASE, 0);
> + WREG32_SOC15(GC, 0, mmGCMC_VM_AGP_TOP, 0);
> + WREG32_SOC15(GC, 0, mmGCMC_VM_AGP_BOT, 0x00FFFFFF);
> +
> + /* Program the system aperture low logical page number. */
> + WREG32_SOC15(GC, 0, mmGCMC_VM_SYSTEM_APERTURE_LOW_ADDR,
> + adev->gmc.vram_start >> 18);
> + WREG32_SOC15(GC, 0, mmGCMC_VM_SYSTEM_APERTURE_HIGH_ADDR,
> + adev->gmc.vram_end >> 18);
> +
> + /* Set default page address. */
> + value = adev->vram_scratch.gpu_addr - adev->gmc.vram_start
> + + adev->vm_manager.vram_base_offset;
> + WREG32_SOC15(GC, 0, mmGCMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_LSB,
> + (u32)(value >> 12));
> + WREG32_SOC15(GC, 0, mmGCMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_MSB,
> + (u32)(value >> 44));
> + }
>
> /* Program "protection fault". */
> WREG32_SOC15(GC, 0, mmGCVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_LO32,
> - (u32)(adev->dummy_page_addr >> 12));
> + (u32)(adev->dummy_page_addr >> 12));
> WREG32_SOC15(GC, 0, mmGCVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_HI32,
> - (u32)((u64)adev->dummy_page_addr >> 44));
> + (u32)((u64)adev->dummy_page_addr >> 44));
>
> WREG32_FIELD15(GC, 0, GCVM_L2_PROTECTION_FAULT_CNTL2,
> - ACTIVE_PAGE_MIGRATION_PTE_READ_RETRY, 1);
> + ACTIVE_PAGE_MIGRATION_PTE_READ_RETRY, 1);
> }
>
Indentation looks off here and in
gfxhub_v2_1_init_system_aperture_regs() as well.
Nirmoy
>
> @@ -350,18 +352,6 @@ static void gfxhub_v2_1_program_invalidation(struct amdgpu_device *adev)
>
> int gfxhub_v2_1_gart_enable(struct amdgpu_device *adev)
> {
> - if (amdgpu_sriov_vf(adev)) {
> - /*
> - * GCMC_VM_FB_LOCATION_BASE/TOP is NULL for VF, becuase they are
> - * VF copy registers so vbios post doesn't program them, for
> - * SRIOV driver need to program them
> - */
> - WREG32_SOC15(GC, 0, mmGCMC_VM_FB_LOCATION_BASE,
> - adev->gmc.vram_start >> 24);
> - WREG32_SOC15(GC, 0, mmGCMC_VM_FB_LOCATION_TOP,
> - adev->gmc.vram_end >> 24);
> - }
> -
> /* GART Enable. */
> gfxhub_v2_1_init_gart_aperture_regs(adev);
> gfxhub_v2_1_init_system_aperture_regs(adev);
> @@ -394,9 +384,11 @@ void gfxhub_v2_1_gart_disable(struct amdgpu_device *adev)
> ENABLE_ADVANCED_DRIVER_MODEL, 0);
> WREG32_SOC15(GC, 0, mmGCMC_VM_MX_L1_TLB_CNTL, tmp);
>
> - /* Setup L2 cache */
> - WREG32_FIELD15(GC, 0, GCVM_L2_CNTL, ENABLE_L2_CACHE, 0);
> - WREG32_SOC15(GC, 0, mmGCVM_L2_CNTL3, 0);
> + if (!amdgpu_sriov_vf(adev)) {
> + /* Setup L2 cache */
> + WREG32_FIELD15(GC, 0, GCVM_L2_CNTL, ENABLE_L2_CACHE, 0);
> + WREG32_SOC15(GC, 0, mmGCVM_L2_CNTL3, 0);
> + }
> }
>
> /**
> diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c
> index 2d88278c50bf..99f697e63e3c 100644
> --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c
> @@ -211,26 +211,26 @@ static void mmhub_v2_0_init_system_aperture_regs(struct amdgpu_device *adev)
> adev->gmc.vram_start >> 18);
> WREG32_SOC15(MMHUB, 0, mmMMMC_VM_SYSTEM_APERTURE_HIGH_ADDR,
> adev->gmc.vram_end >> 18);
> - }
>
> - /* Set default page address. */
> - value = adev->vram_scratch.gpu_addr - adev->gmc.vram_start +
> - adev->vm_manager.vram_base_offset;
> - WREG32_SOC15(MMHUB, 0, mmMMMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_LSB,
> - (u32)(value >> 12));
> - WREG32_SOC15(MMHUB, 0, mmMMMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_MSB,
> - (u32)(value >> 44));
> -
> - /* Program "protection fault". */
> - WREG32_SOC15(MMHUB, 0, mmMMVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_LO32,
> - (u32)(adev->dummy_page_addr >> 12));
> - WREG32_SOC15(MMHUB, 0, mmMMVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_HI32,
> - (u32)((u64)adev->dummy_page_addr >> 44));
> -
> - tmp = RREG32_SOC15(MMHUB, 0, mmMMVM_L2_PROTECTION_FAULT_CNTL2);
> - tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL2,
> - ACTIVE_PAGE_MIGRATION_PTE_READ_RETRY, 1);
> - WREG32_SOC15(MMHUB, 0, mmMMVM_L2_PROTECTION_FAULT_CNTL2, tmp);
> + /* Set default page address. */
> + value = adev->vram_scratch.gpu_addr - adev->gmc.vram_start +
> + adev->vm_manager.vram_base_offset;
> + WREG32_SOC15(MMHUB, 0, mmMMMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_LSB,
> + (u32)(value >> 12));
> + WREG32_SOC15(MMHUB, 0, mmMMMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_MSB,
> + (u32)(value >> 44));
> +
> + /* Program "protection fault". */
> + WREG32_SOC15(MMHUB, 0, mmMMVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_LO32,
> + (u32)(adev->dummy_page_addr >> 12));
> + WREG32_SOC15(MMHUB, 0, mmMMVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_HI32,
> + (u32)((u64)adev->dummy_page_addr >> 44));
> +
> + tmp = RREG32_SOC15(MMHUB, 0, mmMMVM_L2_PROTECTION_FAULT_CNTL2);
> + tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL2,
> + ACTIVE_PAGE_MIGRATION_PTE_READ_RETRY, 1);
> + WREG32_SOC15(MMHUB, 0, mmMMVM_L2_PROTECTION_FAULT_CNTL2, tmp);
> + }
> }
>
> static void mmhub_v2_0_init_tlb_regs(struct amdgpu_device *adev)
> @@ -656,8 +656,10 @@ static void mmhub_v2_0_get_clockgating(struct amdgpu_device *adev, u32 *flags)
> {
> int data, data1;
>
> - if (amdgpu_sriov_vf(adev))
> + if (amdgpu_sriov_vf(adev)) {
> *flags = 0;
> + return;
> + }
>
> switch (adev->asic_type) {
> case CHIP_SIENNA_CICHLID:
More information about the amd-gfx
mailing list