[PATCH 03/11] drm/amdkfd: Improve amdgpu_vm_handle_moved
Felix Kuehling
felix.kuehling at amd.com
Wed Nov 1 17:09:58 UTC 2023
On 2023-10-17 17:13, Felix Kuehling wrote:
> Let amdgpu_vm_handle_moved update all BO VA mappings of BOs reserved by
> the caller. This will be useful for handling extra BO VA mappings in
> KFD VMs that are managed through the render node API.
>
> Signed-off-by: Felix Kuehling <Felix.Kuehling at amd.com>
> Reviewed-by: Christian König <christian.koenig at amd.com>
> ---
> drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 22 +--------------------
> drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c | 2 +-
> drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 19 +++++++++++++-----
> drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 3 ++-
> 4 files changed, 18 insertions(+), 28 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> index 74769afaa33d..c8f2907ebd6f 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> @@ -1113,7 +1113,6 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p)
> struct amdgpu_vm *vm = &fpriv->vm;
> struct amdgpu_bo_list_entry *e;
> struct amdgpu_bo_va *bo_va;
> - struct amdgpu_bo *bo;
> unsigned int i;
> int r;
>
> @@ -1141,26 +1140,7 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p)
> return r;
> }
>
> - amdgpu_bo_list_for_each_entry(e, p->bo_list) {
> - /* ignore duplicates */
> - bo = ttm_to_amdgpu_bo(e->tv.bo);
> - if (!bo)
> - continue;
> -
> - bo_va = e->bo_va;
> - if (bo_va == NULL)
> - continue;
> -
> - r = amdgpu_vm_bo_update(adev, bo_va, false);
> - if (r)
> - return r;
> -
> - r = amdgpu_sync_fence(&p->sync, bo_va->last_pt_update);
> - if (r)
> - return r;
> - }
FYI, removing this loop seemed to cause PSDB failures, mostly in RADV
tests. It may have been a glitch in the infrastructure, but the failures
were consistent on the three subsequent patches, too. Restoring this
loop seems to make the tests pass, so I'll do that for now. I don't have
time to debug CS with RADV, and this change is not needed for my ROCm work.
Regards,
Felix
> -
> - r = amdgpu_vm_handle_moved(adev, vm);
> + r = amdgpu_vm_handle_moved(adev, vm, &p->ticket);
> if (r)
> return r;
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
> index b5e28fa3f414..e7e87a3b2601 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
> @@ -409,7 +409,7 @@ amdgpu_dma_buf_move_notify(struct dma_buf_attachment *attach)
> if (!r)
> r = amdgpu_vm_clear_freed(adev, vm, NULL);
> if (!r)
> - r = amdgpu_vm_handle_moved(adev, vm);
> + r = amdgpu_vm_handle_moved(adev, vm, ticket);
>
> if (r && r != -EBUSY)
> DRM_ERROR("Failed to invalidate VM page tables (%d))\n",
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> index d72daf15662f..c586d0e93d75 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> @@ -1285,6 +1285,7 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev,
> *
> * @adev: amdgpu_device pointer
> * @vm: requested vm
> + * @ticket: optional reservation ticket used to reserve the VM
> *
> * Make sure all BOs which are moved are updated in the PTs.
> *
> @@ -1294,11 +1295,12 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev,
> * PTs have to be reserved!
> */
> int amdgpu_vm_handle_moved(struct amdgpu_device *adev,
> - struct amdgpu_vm *vm)
> + struct amdgpu_vm *vm,
> + struct ww_acquire_ctx *ticket)
> {
> struct amdgpu_bo_va *bo_va;
> struct dma_resv *resv;
> - bool clear;
> + bool clear, unlock;
> int r;
>
> spin_lock(&vm->status_lock);
> @@ -1321,17 +1323,24 @@ int amdgpu_vm_handle_moved(struct amdgpu_device *adev,
> spin_unlock(&vm->status_lock);
>
> /* Try to reserve the BO to avoid clearing its ptes */
> - if (!adev->debug_vm && dma_resv_trylock(resv))
> + if (!adev->debug_vm && dma_resv_trylock(resv)) {
> clear = false;
> + unlock = true;
> + /* The caller is already holding the reservation lock */
> + } else if (ticket && dma_resv_locking_ctx(resv) == ticket) {
> + clear = false;
> + unlock = false;
> /* Somebody else is using the BO right now */
> - else
> + } else {
> clear = true;
> + unlock = false;
> + }
>
> r = amdgpu_vm_bo_update(adev, bo_va, clear);
> if (r)
> return r;
>
> - if (!clear)
> + if (unlock)
> dma_resv_unlock(resv);
> spin_lock(&vm->status_lock);
> }
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> index 6e71978db13f..ebcc75132b74 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> @@ -432,7 +432,8 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev,
> struct amdgpu_vm *vm,
> struct dma_fence **fence);
> int amdgpu_vm_handle_moved(struct amdgpu_device *adev,
> - struct amdgpu_vm *vm);
> + struct amdgpu_vm *vm,
> + struct ww_acquire_ctx *ticket);
> void amdgpu_vm_bo_base_init(struct amdgpu_vm_bo_base *base,
> struct amdgpu_vm *vm, struct amdgpu_bo *bo);
> int amdgpu_vm_update_range(struct amdgpu_device *adev, struct amdgpu_vm *vm,
More information about the amd-gfx
mailing list