[PATCH V3] Revert "drm/amdgpu: remove vm sanity check from amdgpu_vm_make_compute" for Raven

Felix Kuehling felix.kuehling at amd.com
Fri Mar 1 21:11:48 UTC 2024


On 2024-02-29 01:04, Jesse.Zhang wrote:
> fix the issue:
> "amdgpu: Failed to create process VM object".
>
> [Why]when amdgpu initialized, seq64 do mampping and update bo mapping in vm page table.
> But when clifo run. It also initializes a vm for a process device through the function kfd_process_device_init_vm and ensure the root PD is clean through the function amdgpu_vm_pt_is_root_clean.
> So they have a conflict, and clinfo  always failed.
>
> v1:
>    - remove all the pte_supports_ats stuff from the amdgpu_vm code (Felix)
>
> Signed-off-by: Jesse Zhang <Jesse.Zhang at amd.com>
The headline should be updated. This is no longer a revert of the quoted 
patch. Other than that, this patch looks reasonable to me. One more 
comment inline.

> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c    | 23 ----------
>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h    |  3 --
>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c | 56 +----------------------
>   3 files changed, 1 insertion(+), 81 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> index ed4a8c5d26d7..d004ace79536 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> @@ -1385,10 +1385,6 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev,
>   			struct amdgpu_bo_va_mapping, list);
>   		list_del(&mapping->list);
>   
> -		if (vm->pte_support_ats &&
> -		    mapping->start < AMDGPU_GMC_HOLE_START)
> -			init_pte_value = AMDGPU_PTE_DEFAULT_ATC;
> -
>   		r = amdgpu_vm_update_range(adev, vm, false, false, true, false,
>   					   resv, mapping->start, mapping->last,
>   					   init_pte_value, 0, 0, NULL, NULL,
> @@ -2264,7 +2260,6 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
>   	if (r)
>   		return r;
>   
> -	vm->pte_support_ats = false;
>   	vm->is_compute_context = false;
>   
>   	vm->use_cpu_for_update = !!(adev->vm_manager.vm_update_mode &
> @@ -2350,30 +2345,12 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
>    */
>   int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm)
>   {
> -	bool pte_support_ats = (adev->asic_type == CHIP_RAVEN);
>   	int r;
>   
>   	r = amdgpu_bo_reserve(vm->root.bo, true);
>   	if (r)
>   		return r;
>   
> -	/* Check if PD needs to be reinitialized and do it before
> -	 * changing any other state, in case it fails.
> -	 */
> -	if (pte_support_ats != vm->pte_support_ats) {
> -		/* Sanity checks */
> -		if (!amdgpu_vm_pt_is_root_clean(adev, vm)) {
> -			r = -EINVAL;
> -			goto unreserve_bo;
> -		}
> -
> -		vm->pte_support_ats = pte_support_ats;
> -		r = amdgpu_vm_pt_clear(adev, vm, to_amdgpu_bo_vm(vm->root.bo),
> -				       false);
> -		if (r)
> -			goto unreserve_bo;
> -	}
> -
>   	/* Update VM state */
>   	vm->use_cpu_for_update = !!(adev->vm_manager.vm_update_mode &
>   				    AMDGPU_VM_USE_CPU_FOR_COMPUTE);
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> index 42f6ddec50c1..9f6b5e1ccf34 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> @@ -357,9 +357,6 @@ struct amdgpu_vm {
>   	/* Functions to use for VM table updates */
>   	const struct amdgpu_vm_update_funcs	*update_funcs;
>   
> -	/* Flag to indicate ATS support from PTE for GFX9 */
> -	bool			pte_support_ats;
> -
>   	/* Up to 128 pending retry page faults */
>   	DECLARE_KFIFO(faults, u64, 128);
>   
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c
> index a160265ddc07..2835cb3f76eb 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c
> @@ -89,22 +89,6 @@ static unsigned int amdgpu_vm_pt_num_entries(struct amdgpu_device *adev,
>   	return AMDGPU_VM_PTE_COUNT(adev);
>   }
>   
> -/**
> - * amdgpu_vm_pt_num_ats_entries - return the number of ATS entries in the root PD
> - *
> - * @adev: amdgpu_device pointer
> - *
> - * Returns:
> - * The number of entries in the root page directory which needs the ATS setting.
> - */
> -static unsigned int amdgpu_vm_pt_num_ats_entries(struct amdgpu_device *adev)
> -{
> -	unsigned int shift;
> -
> -	shift = amdgpu_vm_pt_level_shift(adev, adev->vm_manager.root_level);
> -	return AMDGPU_GMC_HOLE_START >> (shift + AMDGPU_GPU_PAGE_SHIFT);
> -}
> -
>   /**
>    * amdgpu_vm_pt_entries_mask - the mask to get the entry number of a PD/PT
>    *
> @@ -379,7 +363,7 @@ int amdgpu_vm_pt_clear(struct amdgpu_device *adev, struct amdgpu_vm *vm,
>   	struct ttm_operation_ctx ctx = { true, false };
>   	struct amdgpu_vm_update_params params;
>   	struct amdgpu_bo *ancestor = &vmbo->bo;
> -	unsigned int entries, ats_entries;
> +	unsigned int entries;
>   	struct amdgpu_bo *bo = &vmbo->bo;
>   	uint64_t addr;
>   	int r, idx;
> @@ -394,27 +378,6 @@ int amdgpu_vm_pt_clear(struct amdgpu_device *adev, struct amdgpu_vm *vm,
>   	}
>   
>   	entries = amdgpu_bo_size(bo) / 8;
> -	if (!vm->pte_support_ats) {
> -		ats_entries = 0;
> -
> -	} else if (!bo->parent) {
> -		ats_entries = amdgpu_vm_pt_num_ats_entries(adev);
> -		ats_entries = min(ats_entries, entries);
> -		entries -= ats_entries;
> -
> -	} else {
> -		struct amdgpu_vm_bo_base *pt;
> -
> -		pt = ancestor->vm_bo;
> -		ats_entries = amdgpu_vm_pt_num_ats_entries(adev);
> -		if ((pt - to_amdgpu_bo_vm(vm->root.bo)->entries) >=
> -		    ats_entries) {
> -			ats_entries = 0;
> -		} else {
> -			ats_entries = entries;
> -			entries = 0;
> -		}
> -	}
>   
>   	r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
>   	if (r)
> @@ -445,23 +408,6 @@ int amdgpu_vm_pt_clear(struct amdgpu_device *adev, struct amdgpu_vm *vm,
>   		goto exit;
>   
>   	addr = 0;
> -	if (ats_entries) {
> -		uint64_t value = 0, flags;
> -
> -		flags = AMDGPU_PTE_DEFAULT_ATC;
> -		if (level != AMDGPU_VM_PTB) {
> -			/* Handle leaf PDEs as PTEs */
> -			flags |= AMDGPU_PDE_PTE;
> -			amdgpu_gmc_get_vm_pde(adev, level, &value, &flags);
> -		}
> -
> -		r = vm->update_funcs->update(&params, vmbo, addr, 0,
> -					     ats_entries, value, flags);
> -		if (r)
> -			goto exit;
> -
> -		addr += ats_entries * 8;
> -	}
>   
>   	if (entries) {

I believe entries == 0 is not possible any more, so this condition could 
probably be removed.

Regards,
   Felix


>   		uint64_t value = 0, flags = 0;


More information about the amd-gfx mailing list