[PATCH] drm/amdgpu: Add vf critical region check for poison injection

Chai, Thomas YiPeng.Chai at amd.com
Mon Aug 18 02:02:35 UTC 2025


[AMD Official Use Only - AMD Internal Distribution Only]

The address needs to check should be passed to PF for UMC address conversion.
It is invalid for VF to directly convert the address into retired addresses.

-----Original Message-----
From: Liu, Xiang(Dean) <Xiang.Liu at amd.com>
Sent: Friday, August 15, 2025 7:17 PM
To: amd-gfx at lists.freedesktop.org
Cc: Zhang, Hawking <Hawking.Zhang at amd.com>; Zhou1, Tao <Tao.Zhou1 at amd.com>; Yang, Stanley <Stanley.Yang at amd.com>; Chai, Thomas <YiPeng.Chai at amd.com>; Liu, Xiang(Dean) <Xiang.Liu at amd.com>
Subject: [PATCH] drm/amdgpu: Add vf critical region check for poison injection

Check poison injection address whether in vbios or data exchange region to aviod hitting vf critical region in SRIOV.

Signed-off-by: Xiang Liu <xiang.liu at amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 38 +++++++++++++++++++++++--
 1 file changed, 36 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index b00cbb927ca8..158aa4ac9327 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -212,6 +212,31 @@ static int amdgpu_reserve_page_direct(struct amdgpu_device *adev, uint64_t addre
        return 0;
 }

+static int amdgpu_check_vf_critical_region(struct amdgpu_device *adev,
+u64 pa) {
+       u64 offset, size;
+
+       if (!amdgpu_sriov_vf(adev))
+               return 0;
+
+       /* check pa whether in vbios and data exchange region */
+       offset = adev->gmc.mc_vram_size + (AMD_SRIOV_MSG_VBIOS_OFFSET << 10);
+       size = (AMD_SRIOV_MSG_DATAEXCHANGE_OFFSET_KB << 10) +
+              (AMD_SRIOV_MSG_DATAEXCHANGE_SIZE_KB << 10);
+
+       /*
+        * Add an additional 4MB to critical range as we cannot reserve allocations
+        * done during guest sw init
+        */
+       if (amdgpu_ip_version(adev, UMC_HWIP, 0) == IP_VERSION(6, 7, 0))
+               size += (0x1000 << 10);
+
+       if (pa >= offset && pa < offset + size)
+               return -EACCES;
+
+       return 0;
+}
+
 static int amdgpu_check_address_validity(struct amdgpu_device *adev,
                        uint64_t address, uint64_t flags)
 {
@@ -223,16 +248,25 @@ static int amdgpu_check_address_validity(struct amdgpu_device *adev,
        if (amdgpu_ip_version(adev, UMC_HWIP, 0) < IP_VERSION(12, 0, 0))
                return 0;

-       if ((address >= adev->gmc.mc_vram_size) ||
-           (address >= RAS_UMC_INJECT_ADDR_LIMIT))
+       if (!amdgpu_sriov_vf(adev) && ((address >= adev->gmc.mc_vram_size) ||
+           (address >= RAS_UMC_INJECT_ADDR_LIMIT)))
                return -EFAULT;

+       if (amdgpu_sriov_vf(adev) && adev->umc.ras && adev->umc.ras->get_retire_flip_bits)
+               adev->umc.ras->get_retire_flip_bits(adev);
+
        count = amdgpu_umc_lookup_bad_pages_in_a_row(adev,
                                address, page_pfns, ARRAY_SIZE(page_pfns));
        if (count <= 0)
                return -EPERM;

        for (i = 0; i < count; i++) {
+               if (amdgpu_sriov_vf(adev)) {
+                       ret = amdgpu_check_vf_critical_region(adev, address);
+                       if (ret)
+                               return ret;
+               }
+
                memset(&blk_info, 0, sizeof(blk_info));
                ret = amdgpu_vram_mgr_query_address_block_info(&adev->mman.vram_mgr,
                                        page_pfns[i] << AMDGPU_GPU_PAGE_SHIFT, &blk_info);
--
2.34.1



More information about the amd-gfx mailing list