[PATCH 6/8] drm/amdgpu: allocate VM PDs/PTs on demand
Kuehling, Felix
Felix.Kuehling at amd.com
Tue Feb 12 15:26:43 UTC 2019
I pushed my patch series that simplifies eviction fence handling in KFD.
If you rebase this, it should be OK now.
Regards,
Felix
On 2019-02-04 7:42 a.m., Christian König wrote:
> Let's start to allocate VM PDs/PTs on demand instead of pre-allocating
> them during mapping.
>
> Signed-off-by: Christian König <christian.koenig at amd.com>
> ---
> .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 6 -
> drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c | 9 --
> drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 10 --
> drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 136 +++++-------------
> drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 3 -
> 5 files changed, 38 insertions(+), 126 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> index d7b10d79f1de..2176c92f9377 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> @@ -492,12 +492,6 @@ static int add_bo_to_vm(struct amdgpu_device *adev, struct kgd_mem *mem,
> vm->process_info->eviction_fence,
> NULL, NULL);
>
> - ret = amdgpu_vm_alloc_pts(adev, vm, va, amdgpu_bo_size(bo));
> - if (ret) {
> - pr_err("Failed to allocate pts, err=%d\n", ret);
> - goto err_alloc_pts;
> - }
> -
> ret = vm_validate_pt_pd_bos(vm);
> if (ret) {
> pr_err("validate_pt_pd_bos() failed\n");
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c
> index 7e22be7ca68a..54dd02a898b9 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c
> @@ -92,15 +92,6 @@ int amdgpu_map_static_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm,
> return -ENOMEM;
> }
>
> - r = amdgpu_vm_alloc_pts(adev, (*bo_va)->base.vm, csa_addr,
> - size);
> - if (r) {
> - DRM_ERROR("failed to allocate pts for static CSA, err=%d\n", r);
> - amdgpu_vm_bo_rmv(adev, *bo_va);
> - ttm_eu_backoff_reservation(&ticket, &list);
> - return r;
> - }
> -
> r = amdgpu_vm_bo_map(adev, *bo_va, csa_addr, 0, size,
> AMDGPU_PTE_READABLE | AMDGPU_PTE_WRITEABLE |
> AMDGPU_PTE_EXECUTABLE);
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
> index d21dd2f369da..e141e3b13112 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
> @@ -627,11 +627,6 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
>
> switch (args->operation) {
> case AMDGPU_VA_OP_MAP:
> - r = amdgpu_vm_alloc_pts(adev, bo_va->base.vm, args->va_address,
> - args->map_size);
> - if (r)
> - goto error_backoff;
> -
> va_flags = amdgpu_gmc_get_pte_flags(adev, args->flags);
> r = amdgpu_vm_bo_map(adev, bo_va, args->va_address,
> args->offset_in_bo, args->map_size,
> @@ -647,11 +642,6 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
> args->map_size);
> break;
> case AMDGPU_VA_OP_REPLACE:
> - r = amdgpu_vm_alloc_pts(adev, bo_va->base.vm, args->va_address,
> - args->map_size);
> - if (r)
> - goto error_backoff;
> -
> va_flags = amdgpu_gmc_get_pte_flags(adev, args->flags);
> r = amdgpu_vm_bo_replace_map(adev, bo_va, args->va_address,
> args->offset_in_bo, args->map_size,
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> index f7d410a5d848..0b8a1aa56c4a 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> @@ -504,47 +504,6 @@ static void amdgpu_vm_pt_next(struct amdgpu_device *adev,
> }
> }
>
> -/**
> - * amdgpu_vm_pt_first_leaf - get first leaf PD/PT
> - *
> - * @adev: amdgpu_device pointer
> - * @vm: amdgpu_vm structure
> - * @start: start addr of the walk
> - * @cursor: state to initialize
> - *
> - * Start a walk and go directly to the leaf node.
> - */
> -static void amdgpu_vm_pt_first_leaf(struct amdgpu_device *adev,
> - struct amdgpu_vm *vm, uint64_t start,
> - struct amdgpu_vm_pt_cursor *cursor)
> -{
> - amdgpu_vm_pt_start(adev, vm, start, cursor);
> - while (amdgpu_vm_pt_descendant(adev, cursor));
> -}
> -
> -/**
> - * amdgpu_vm_pt_next_leaf - get next leaf PD/PT
> - *
> - * @adev: amdgpu_device pointer
> - * @cursor: current state
> - *
> - * Walk the PD/PT tree to the next leaf node.
> - */
> -static void amdgpu_vm_pt_next_leaf(struct amdgpu_device *adev,
> - struct amdgpu_vm_pt_cursor *cursor)
> -{
> - amdgpu_vm_pt_next(adev, cursor);
> - if (cursor->pfn != ~0ll)
> - while (amdgpu_vm_pt_descendant(adev, cursor));
> -}
> -
> -/**
> - * for_each_amdgpu_vm_pt_leaf - walk over all leaf PDs/PTs in the hierarchy
> - */
> -#define for_each_amdgpu_vm_pt_leaf(adev, vm, start, end, cursor) \
> - for (amdgpu_vm_pt_first_leaf((adev), (vm), (start), &(cursor)); \
> - (cursor).pfn <= end; amdgpu_vm_pt_next_leaf((adev), &(cursor)))
> -
> /**
> * amdgpu_vm_pt_first_dfs - start a deep first search
> *
> @@ -919,74 +878,51 @@ static void amdgpu_vm_bo_param(struct amdgpu_device *adev, struct amdgpu_vm *vm,
> * Returns:
> * 0 on success, errno otherwise.
> */
> -int amdgpu_vm_alloc_pts(struct amdgpu_device *adev,
> - struct amdgpu_vm *vm,
> - uint64_t saddr, uint64_t size)
> +static int amdgpu_vm_alloc_pts(struct amdgpu_device *adev,
> + struct amdgpu_vm *vm,
> + struct amdgpu_vm_pt_cursor *cursor)
> {
> - struct amdgpu_vm_pt_cursor cursor;
> + struct amdgpu_vm_pt *entry = cursor->entry;
> + struct amdgpu_bo_param bp;
> struct amdgpu_bo *pt;
> - uint64_t eaddr;
> int r;
>
> - /* validate the parameters */
> - if (saddr & AMDGPU_GPU_PAGE_MASK || size & AMDGPU_GPU_PAGE_MASK)
> - return -EINVAL;
> + if (cursor->level < AMDGPU_VM_PTB && !entry->entries) {
> + unsigned num_entries;
>
> - eaddr = saddr + size - 1;
> -
> - saddr /= AMDGPU_GPU_PAGE_SIZE;
> - eaddr /= AMDGPU_GPU_PAGE_SIZE;
> -
> - if (eaddr >= adev->vm_manager.max_pfn) {
> - dev_err(adev->dev, "va above limit (0x%08llX >= 0x%08llX)\n",
> - eaddr, adev->vm_manager.max_pfn);
> - return -EINVAL;
> + num_entries = amdgpu_vm_num_entries(adev, cursor->level);
> + entry->entries = kvmalloc_array(num_entries,
> + sizeof(*entry->entries),
> + GFP_KERNEL | __GFP_ZERO);
> + if (!entry->entries)
> + return -ENOMEM;
> }
>
> - for_each_amdgpu_vm_pt_leaf(adev, vm, saddr, eaddr, cursor) {
> - struct amdgpu_vm_pt *entry = cursor.entry;
> - struct amdgpu_bo_param bp;
> -
> - if (cursor.level < AMDGPU_VM_PTB) {
> - unsigned num_entries;
> -
> - num_entries = amdgpu_vm_num_entries(adev, cursor.level);
> - entry->entries = kvmalloc_array(num_entries,
> - sizeof(*entry->entries),
> - GFP_KERNEL |
> - __GFP_ZERO);
> - if (!entry->entries)
> - return -ENOMEM;
> - }
> -
> -
> - if (entry->base.bo)
> - continue;
> -
> - amdgpu_vm_bo_param(adev, vm, cursor.level, &bp);
> -
> - r = amdgpu_bo_create(adev, &bp, &pt);
> - if (r)
> - return r;
> -
> - if (vm->use_cpu_for_update) {
> - r = amdgpu_bo_kmap(pt, NULL);
> - if (r)
> - goto error_free_pt;
> - }
> + if (entry->base.bo)
> + return 0;
>
> - /* Keep a reference to the root directory to avoid
> - * freeing them up in the wrong order.
> - */
> - pt->parent = amdgpu_bo_ref(cursor.parent->base.bo);
> + amdgpu_vm_bo_param(adev, vm, cursor->level, &bp);
>
> - amdgpu_vm_bo_base_init(&entry->base, vm, pt);
> + r = amdgpu_bo_create(adev, &bp, &pt);
> + if (r)
> + return r;
>
> - r = amdgpu_vm_clear_bo(adev, vm, pt);
> + if (vm->use_cpu_for_update) {
> + r = amdgpu_bo_kmap(pt, NULL);
> if (r)
> goto error_free_pt;
> }
>
> + /* Keep a reference to the root directory to avoid
> + * freeing them up in the wrong order.
> + */
> + pt->parent = amdgpu_bo_ref(cursor->parent->base.bo);
> + amdgpu_vm_bo_base_init(&entry->base, vm, pt);
> +
> + r = amdgpu_vm_clear_bo(adev, vm, pt);
> + if (r)
> + goto error_free_pt;
> +
> return 0;
>
> error_free_pt:
> @@ -1655,6 +1591,7 @@ static int amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params,
> struct amdgpu_vm_pt_cursor cursor;
> uint64_t frag_start = start, frag_end;
> unsigned int frag;
> + int r;
>
> /* figure out the initial fragment */
> amdgpu_vm_fragment(params, frag_start, end, flags, &frag, &frag_end);
> @@ -1662,12 +1599,15 @@ static int amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params,
> /* walk over the address space and update the PTs */
> amdgpu_vm_pt_start(adev, params->vm, start, &cursor);
> while (cursor.pfn < end) {
> - struct amdgpu_bo *pt = cursor.entry->base.bo;
> unsigned shift, parent_shift, mask;
> uint64_t incr, entry_end, pe_start;
> + struct amdgpu_bo *pt;
>
> - if (!pt)
> - return -ENOENT;
> + r = amdgpu_vm_alloc_pts(params->adev, params->vm, &cursor);
> + if (r)
> + return r;
> +
> + pt = cursor.entry->base.bo;
>
> /* The root level can't be a huge page */
> if (cursor.level == adev->vm_manager.root_level) {
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> index 81ff8177f092..116605c038d2 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> @@ -303,9 +303,6 @@ bool amdgpu_vm_ready(struct amdgpu_vm *vm);
> int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm,
> int (*callback)(void *p, struct amdgpu_bo *bo),
> void *param);
> -int amdgpu_vm_alloc_pts(struct amdgpu_device *adev,
> - struct amdgpu_vm *vm,
> - uint64_t saddr, uint64_t size);
> int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, bool need_pipe_sync);
> int amdgpu_vm_update_directories(struct amdgpu_device *adev,
> struct amdgpu_vm *vm);
More information about the amd-gfx
mailing list