[PATCH V3] Revert "drm/amdgpu: remove vm sanity check from amdgpu_vm_make_compute" for Raven

Zhang, Jesse(Jie) Jesse.Zhang at amd.com
Thu Feb 29 06:01:52 UTC 2024


[AMD Official Use Only - General]

Please ignore the patch and will send a new patch out soon.

-----Original Message-----
From: Jesse.Zhang <Jesse.Zhang at amd.com>
Sent: Thursday, February 29, 2024 1:51 PM
To: amd-gfx at lists.freedesktop.org
Cc: Deucher, Alexander <Alexander.Deucher at amd.com>; Kuehling, Felix <Felix.Kuehling at amd.com>; Paneer Selvam, Arunpravin <Arunpravin.PaneerSelvam at amd.com>; Zhang, Yifan <Yifan1.Zhang at amd.com>; Zhang, Jesse(Jie) <Jesse.Zhang at amd.com>
Subject: [PATCH V3] Revert "drm/amdgpu: remove vm sanity check from amdgpu_vm_make_compute" for Raven

fix the issue:
"amdgpu: Failed to create process VM object".

[Why]when amdgpu initialized, seq64 do mampping and update bo mapping in vm page table.
But when clifo run. It also initializes a vm for a process device through the function kfd_process_device_init_vm and ensure the root PD is clean through the function amdgpu_vm_pt_is_root_clean.
So they have a conflict, and clinfo  always failed.

 v1:
    - remove all the pte_supports_ats stuff from the amdgpu_vm code (Felix)

Signed-off-by: Jesse Zhang <Jesse.Zhang at amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c    | 23 ----------
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h    |  3 --
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c | 55 +----------------------
 3 files changed, 1 insertion(+), 80 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index ed4a8c5d26d7..d004ace79536 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -1385,10 +1385,6 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev,
                        struct amdgpu_bo_va_mapping, list);
                list_del(&mapping->list);

-               if (vm->pte_support_ats &&
-                   mapping->start < AMDGPU_GMC_HOLE_START)
-                       init_pte_value = AMDGPU_PTE_DEFAULT_ATC;
-
                r = amdgpu_vm_update_range(adev, vm, false, false, true, false,
                                           resv, mapping->start, mapping->last,
                                           init_pte_value, 0, 0, NULL, NULL, @@ -2264,7 +2260,6 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
        if (r)
                return r;

-       vm->pte_support_ats = false;
        vm->is_compute_context = false;

        vm->use_cpu_for_update = !!(adev->vm_manager.vm_update_mode & @@ -2350,30 +2345,12 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
  */
 int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm)  {
-       bool pte_support_ats = (adev->asic_type == CHIP_RAVEN);
        int r;

        r = amdgpu_bo_reserve(vm->root.bo, true);
        if (r)
                return r;

-       /* Check if PD needs to be reinitialized and do it before
-        * changing any other state, in case it fails.
-        */
-       if (pte_support_ats != vm->pte_support_ats) {
-               /* Sanity checks */
-               if (!amdgpu_vm_pt_is_root_clean(adev, vm)) {
-                       r = -EINVAL;
-                       goto unreserve_bo;
-               }
-
-               vm->pte_support_ats = pte_support_ats;
-               r = amdgpu_vm_pt_clear(adev, vm, to_amdgpu_bo_vm(vm->root.bo),
-                                      false);
-               if (r)
-                       goto unreserve_bo;
-       }
-
        /* Update VM state */
        vm->use_cpu_for_update = !!(adev->vm_manager.vm_update_mode &
                                    AMDGPU_VM_USE_CPU_FOR_COMPUTE); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
index 42f6ddec50c1..9f6b5e1ccf34 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
@@ -357,9 +357,6 @@ struct amdgpu_vm {
        /* Functions to use for VM table updates */
        const struct amdgpu_vm_update_funcs     *update_funcs;

-       /* Flag to indicate ATS support from PTE for GFX9 */
-       bool                    pte_support_ats;
-
        /* Up to 128 pending retry page faults */
        DECLARE_KFIFO(faults, u64, 128);

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c
index a160265ddc07..f07255532aae 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c
@@ -89,22 +89,6 @@ static unsigned int amdgpu_vm_pt_num_entries(struct amdgpu_device *adev,
        return AMDGPU_VM_PTE_COUNT(adev);
 }

-/**
- * amdgpu_vm_pt_num_ats_entries - return the number of ATS entries in the root PD
- *
- * @adev: amdgpu_device pointer
- *
- * Returns:
- * The number of entries in the root page directory which needs the ATS setting.
- */
-static unsigned int amdgpu_vm_pt_num_ats_entries(struct amdgpu_device *adev) -{
-       unsigned int shift;
-
-       shift = amdgpu_vm_pt_level_shift(adev, adev->vm_manager.root_level);
-       return AMDGPU_GMC_HOLE_START >> (shift + AMDGPU_GPU_PAGE_SHIFT);
-}
-
 /**
  * amdgpu_vm_pt_entries_mask - the mask to get the entry number of a PD/PT
  *
@@ -394,27 +378,7 @@ int amdgpu_vm_pt_clear(struct amdgpu_device *adev, struct amdgpu_vm *vm,
        }

        entries = amdgpu_bo_size(bo) / 8;
-       if (!vm->pte_support_ats) {
-               ats_entries = 0;
-
-       } else if (!bo->parent) {
-               ats_entries = amdgpu_vm_pt_num_ats_entries(adev);
-               ats_entries = min(ats_entries, entries);
-               entries -= ats_entries;
-
-       } else {
-               struct amdgpu_vm_bo_base *pt;
-
-               pt = ancestor->vm_bo;
-               ats_entries = amdgpu_vm_pt_num_ats_entries(adev);
-               if ((pt - to_amdgpu_bo_vm(vm->root.bo)->entries) >=
-                   ats_entries) {
-                       ats_entries = 0;
-               } else {
-                       ats_entries = entries;
-                       entries = 0;
-               }
-       }
+       ats_entries = 0;

        r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
        if (r)
@@ -445,23 +409,6 @@ int amdgpu_vm_pt_clear(struct amdgpu_device *adev, struct amdgpu_vm *vm,
                goto exit;

        addr = 0;
-       if (ats_entries) {
-               uint64_t value = 0, flags;
-
-               flags = AMDGPU_PTE_DEFAULT_ATC;
-               if (level != AMDGPU_VM_PTB) {
-                       /* Handle leaf PDEs as PTEs */
-                       flags |= AMDGPU_PDE_PTE;
-                       amdgpu_gmc_get_vm_pde(adev, level, &value, &flags);
-               }
-
-               r = vm->update_funcs->update(&params, vmbo, addr, 0,
-                                            ats_entries, value, flags);
-               if (r)
-                       goto exit;
-
-               addr += ats_entries * 8;
-       }

        if (entries) {
                uint64_t value = 0, flags = 0;
--
2.34.1



More information about the amd-gfx mailing list