[PATCH V3] Revert "drm/amdgpu: remove vm sanity check from amdgpu_vm_make_compute" for Raven
Christian König
christian.koenig at amd.com
Thu Feb 29 11:31:14 UTC 2024
Am 29.02.24 um 06:51 schrieb Jesse.Zhang:
> fix the issue:
> "amdgpu: Failed to create process VM object".
>
> [Why]when amdgpu initialized, seq64 do mampping and update bo mapping in vm page table.
> But when clifo run. It also initializes a vm for a process device through the function kfd_process_device_init_vm and ensure the root PD is clean through the function amdgpu_vm_pt_is_root_clean.
> So they have a conflict, and clinfo always failed.
>
> v1:
> - remove all the pte_supports_ats stuff from the amdgpu_vm code (Felix)
>
> Signed-off-by: Jesse Zhang <Jesse.Zhang at amd.com>
> ---
> drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 23 ----------
> drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 3 --
> drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c | 55 +----------------------
> 3 files changed, 1 insertion(+), 80 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> index ed4a8c5d26d7..d004ace79536 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> @@ -1385,10 +1385,6 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev,
> struct amdgpu_bo_va_mapping, list);
> list_del(&mapping->list);
>
> - if (vm->pte_support_ats &&
> - mapping->start < AMDGPU_GMC_HOLE_START)
> - init_pte_value = AMDGPU_PTE_DEFAULT_ATC;
> -
> r = amdgpu_vm_update_range(adev, vm, false, false, true, false,
> resv, mapping->start, mapping->last,
> init_pte_value, 0, 0, NULL, NULL,
> @@ -2264,7 +2260,6 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
> if (r)
> return r;
>
> - vm->pte_support_ats = false;
> vm->is_compute_context = false;
>
> vm->use_cpu_for_update = !!(adev->vm_manager.vm_update_mode &
> @@ -2350,30 +2345,12 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
> */
> int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm)
> {
> - bool pte_support_ats = (adev->asic_type == CHIP_RAVEN);
> int r;
>
> r = amdgpu_bo_reserve(vm->root.bo, true);
> if (r)
> return r;
>
> - /* Check if PD needs to be reinitialized and do it before
> - * changing any other state, in case it fails.
> - */
> - if (pte_support_ats != vm->pte_support_ats) {
> - /* Sanity checks */
> - if (!amdgpu_vm_pt_is_root_clean(adev, vm)) {
> - r = -EINVAL;
> - goto unreserve_bo;
> - }
> -
> - vm->pte_support_ats = pte_support_ats;
> - r = amdgpu_vm_pt_clear(adev, vm, to_amdgpu_bo_vm(vm->root.bo),
> - false);
> - if (r)
> - goto unreserve_bo;
> - }
> -
> /* Update VM state */
> vm->use_cpu_for_update = !!(adev->vm_manager.vm_update_mode &
> AMDGPU_VM_USE_CPU_FOR_COMPUTE);
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> index 42f6ddec50c1..9f6b5e1ccf34 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> @@ -357,9 +357,6 @@ struct amdgpu_vm {
> /* Functions to use for VM table updates */
> const struct amdgpu_vm_update_funcs *update_funcs;
>
> - /* Flag to indicate ATS support from PTE for GFX9 */
> - bool pte_support_ats;
> -
> /* Up to 128 pending retry page faults */
> DECLARE_KFIFO(faults, u64, 128);
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c
> index a160265ddc07..f07255532aae 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c
> @@ -89,22 +89,6 @@ static unsigned int amdgpu_vm_pt_num_entries(struct amdgpu_device *adev,
> return AMDGPU_VM_PTE_COUNT(adev);
> }
>
> -/**
> - * amdgpu_vm_pt_num_ats_entries - return the number of ATS entries in the root PD
> - *
> - * @adev: amdgpu_device pointer
> - *
> - * Returns:
> - * The number of entries in the root page directory which needs the ATS setting.
> - */
> -static unsigned int amdgpu_vm_pt_num_ats_entries(struct amdgpu_device *adev)
> -{
> - unsigned int shift;
> -
> - shift = amdgpu_vm_pt_level_shift(adev, adev->vm_manager.root_level);
> - return AMDGPU_GMC_HOLE_START >> (shift + AMDGPU_GPU_PAGE_SHIFT);
> -}
> -
> /**
> * amdgpu_vm_pt_entries_mask - the mask to get the entry number of a PD/PT
> *
> @@ -394,27 +378,7 @@ int amdgpu_vm_pt_clear(struct amdgpu_device *adev, struct amdgpu_vm *vm,
> }
>
> entries = amdgpu_bo_size(bo) / 8;
> - if (!vm->pte_support_ats) {
> - ats_entries = 0;
> -
> - } else if (!bo->parent) {
> - ats_entries = amdgpu_vm_pt_num_ats_entries(adev);
> - ats_entries = min(ats_entries, entries);
> - entries -= ats_entries;
> -
> - } else {
> - struct amdgpu_vm_bo_base *pt;
> -
> - pt = ancestor->vm_bo;
> - ats_entries = amdgpu_vm_pt_num_ats_entries(adev);
> - if ((pt - to_amdgpu_bo_vm(vm->root.bo)->entries) >=
> - ats_entries) {
> - ats_entries = 0;
> - } else {
> - ats_entries = entries;
> - entries = 0;
> - }
> - }
> + ats_entries = 0;
You should be able to completely drop the local variable ats_entries as
well.
Apart from that looks good to me.
Christian.
>
> r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
> if (r)
> @@ -445,23 +409,6 @@ int amdgpu_vm_pt_clear(struct amdgpu_device *adev, struct amdgpu_vm *vm,
> goto exit;
>
> addr = 0;
> - if (ats_entries) {
> - uint64_t value = 0, flags;
> -
> - flags = AMDGPU_PTE_DEFAULT_ATC;
> - if (level != AMDGPU_VM_PTB) {
> - /* Handle leaf PDEs as PTEs */
> - flags |= AMDGPU_PDE_PTE;
> - amdgpu_gmc_get_vm_pde(adev, level, &value, &flags);
> - }
> -
> - r = vm->update_funcs->update(¶ms, vmbo, addr, 0,
> - ats_entries, value, flags);
> - if (r)
> - goto exit;
> -
> - addr += ats_entries * 8;
> - }
>
> if (entries) {
> uint64_t value = 0, flags = 0;
More information about the amd-gfx
mailing list