[PATCH] drm/amdgpu/: Remove bo_create_kernel_at path from virt page

Luo, Zhigang Zhigang.Luo at amd.com
Thu Mar 14 16:28:22 UTC 2024


[AMD Official Use Only - General]

Reviewed-by: Zhigang Luo <zhigang.luo at amd.com>

-----Original Message-----
From: Skvortsov, Victor <Victor.Skvortsov at amd.com>
Sent: Tuesday, March 12, 2024 1:51 PM
To: Skvortsov, Victor <Victor.Skvortsov at amd.com>; Luo, Zhigang <Zhigang.Luo at amd.com>; amd-gfx at lists.freedesktop.org
Cc: Koenig, Christian <Christian.Koenig at amd.com>
Subject: [PATCH] drm/amdgpu/: Remove bo_create_kernel_at path from virt page

Use amdgpu_vram_mgr to reserve bad page ranges.
Reserved ranges will be freed by amdgpu_vram_mgr_fini() Delete bo_create path as it is redundant.

Suggested-by: Christian König <christian.koenig at amd.com>
Signed-off-by: Victor Skvortsov <victor.skvortsov at amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c | 55 ++----------------------  drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h |  2 -
 2 files changed, 3 insertions(+), 54 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
index 7a4eae36778a..2a20714b9c16 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
@@ -244,7 +244,6 @@ static int amdgpu_virt_init_ras_err_handler_data(struct amdgpu_device *adev)
         */
        unsigned int align_space = 512;
        void *bps = NULL;
-       struct amdgpu_bo **bps_bo = NULL;

        *data = kmalloc(sizeof(struct amdgpu_virt_ras_err_handler_data), GFP_KERNEL);
        if (!*data)
@@ -254,12 +253,7 @@ static int amdgpu_virt_init_ras_err_handler_data(struct amdgpu_device *adev)
        if (!bps)
                goto bps_failure;

-       bps_bo = kmalloc_array(align_space, sizeof(*(*data)->bps_bo), GFP_KERNEL);
-       if (!bps_bo)
-               goto bps_bo_failure;
-
        (*data)->bps = bps;
-       (*data)->bps_bo = bps_bo;
        (*data)->count = 0;
        (*data)->last_reserved = 0;

@@ -267,34 +261,12 @@ static int amdgpu_virt_init_ras_err_handler_data(struct amdgpu_device *adev)

        return 0;

-bps_bo_failure:
-       kfree(bps);
 bps_failure:
        kfree(*data);
 data_failure:
        return -ENOMEM;
 }

-static void amdgpu_virt_ras_release_bp(struct amdgpu_device *adev) -{
-       struct amdgpu_virt *virt = &adev->virt;
-       struct amdgpu_virt_ras_err_handler_data *data = virt->virt_eh_data;
-       struct amdgpu_bo *bo;
-       int i;
-
-       if (!data)
-               return;
-
-       for (i = data->last_reserved - 1; i >= 0; i--) {
-               bo = data->bps_bo[i];
-               if (bo) {
-                       amdgpu_bo_free_kernel(&bo, NULL, NULL);
-                       data->bps_bo[i] = bo;
-               }
-               data->last_reserved = i;
-       }
-}
-
 void amdgpu_virt_release_ras_err_handler_data(struct amdgpu_device *adev)  {
        struct amdgpu_virt *virt = &adev->virt; @@ -305,10 +277,7 @@ void amdgpu_virt_release_ras_err_handler_data(struct amdgpu_device *adev)
        if (!data)
                return;

-       amdgpu_virt_ras_release_bp(adev);
-
        kfree(data->bps);
-       kfree(data->bps_bo);
        kfree(data);
        virt->virt_eh_data = NULL;
 }
@@ -330,9 +299,6 @@ static void amdgpu_virt_ras_reserve_bps(struct amdgpu_device *adev)  {
        struct amdgpu_virt *virt = &adev->virt;
        struct amdgpu_virt_ras_err_handler_data *data = virt->virt_eh_data;
-       struct amdgpu_vram_mgr *mgr = &adev->mman.vram_mgr;
-       struct ttm_resource_manager *man = &mgr->manager;
-       struct amdgpu_bo *bo = NULL;
        uint64_t bp;
        int i;

@@ -341,26 +307,11 @@ static void amdgpu_virt_ras_reserve_bps(struct amdgpu_device *adev)

        for (i = data->last_reserved; i < data->count; i++) {
                bp = data->bps[i].retired_page;
+               if (amdgpu_vram_mgr_reserve_range(&adev->mman.vram_mgr,
+                       bp << AMDGPU_GPU_PAGE_SHIFT, AMDGPU_GPU_PAGE_SIZE))
+                       DRM_DEBUG("RAS WARN: reserve vram for retired page %llx fail\n",
+bp);

-               /* There are two cases of reserve error should be ignored:
-                * 1) a ras bad page has been allocated (used by someone);
-                * 2) a ras bad page has been reserved (duplicate error injection
-                *    for one page);
-                */
-               if  (ttm_resource_manager_used(man)) {
-                       amdgpu_vram_mgr_reserve_range(&adev->mman.vram_mgr,
-                               bp << AMDGPU_GPU_PAGE_SHIFT,
-                               AMDGPU_GPU_PAGE_SIZE);
-                       data->bps_bo[i] = NULL;
-               } else {
-                       if (amdgpu_bo_create_kernel_at(adev, bp << AMDGPU_GPU_PAGE_SHIFT,
-                                                       AMDGPU_GPU_PAGE_SIZE,
-                                                       &bo, NULL))
-                               DRM_DEBUG("RAS WARN: reserve vram for retired page %llx fail\n", bp);
-                       data->bps_bo[i] = bo;
-               }
                data->last_reserved = i + 1;
-               bo = NULL;
        }
 }

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
index 3f59b7b5523f..15599951e7b8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
@@ -224,8 +224,6 @@ struct amdgim_vf2pf_info_v2 {  struct amdgpu_virt_ras_err_handler_data {
        /* point to bad page records array */
        struct eeprom_table_record *bps;
-       /* point to reserved bo array */
-       struct amdgpu_bo **bps_bo;
        /* the count of entries */
        int count;
        /* last reserved entry's index + 1 */
--
2.25.1



More information about the amd-gfx mailing list