[PATCH 4/5] drm/amdgpu: add VM eviction lock v2
Felix Kuehling
felix.kuehling at amd.com
Thu Dec 5 16:33:45 UTC 2019
On 2019-12-05 8:39 a.m., Christian König wrote:
> This allows to invalidate VM entries without taking the reservation lock.
>
> Signed-off-by: Christian König <christian.koenig at amd.com>
> ---
> drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 39 +++++++++++++++++++++-----
> drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 4 +++
> 2 files changed, 36 insertions(+), 7 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> index 0d700e8154c4..839d6df394fc 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> @@ -656,7 +656,7 @@ int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm,
> void *param)
> {
> struct amdgpu_vm_bo_base *bo_base, *tmp;
> - int r = 0;
> + int r;
>
> vm->bulk_moveable &= list_empty(&vm->evicted);
>
> @@ -665,7 +665,7 @@ int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm,
>
> r = validate(param, bo);
> if (r)
> - break;
> + return r;
>
> if (bo->tbo.type != ttm_bo_type_kernel) {
> amdgpu_vm_bo_moved(bo_base);
> @@ -678,7 +678,11 @@ int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm,
> }
> }
>
> - return r;
> + mutex_lock(&vm->eviction_lock);
> + vm->evicting = false;
> + mutex_unlock(&vm->eviction_lock);
> +
> + return 0;
> }
>
> /**
> @@ -1555,15 +1559,25 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
> if (!(flags & AMDGPU_PTE_VALID))
> owner = AMDGPU_FENCE_OWNER_KFD;
>
> + mutex_lock(&vm->eviction_lock);
> + if (vm->evicting) {
> + r = EINPROGRESS;
Not sure about this error code. As far as I can find, this is normally
used on non-blocking sockets. I found this explanation:
https://stackoverflow.com/questions/8277970/what-are-possible-reason-for-socket-error-einprogress-in-solaris
Quote: "so there is another error for non-blocking connect: EINPROGRESS,
which tells you that the operation is in progress and you should check
its status later."
This call is neither non-blocking nor is the requested page table update
in progress when this error is returned. So I'd think a better error to
return here would be EBUSY.
Other than that, this patch is
Reviewed-by: Felix Kuehling <Felix.Kuehling at amd.com>
> + goto error_unlock;
> + }
> +
> r = vm->update_funcs->prepare(¶ms, owner, exclusive);
> if (r)
> - return r;
> + goto error_unlock;
>
> r = amdgpu_vm_update_ptes(¶ms, start, last + 1, addr, flags);
> if (r)
> - return r;
> + goto error_unlock;
>
> - return vm->update_funcs->commit(¶ms, fence);
> + r = vm->update_funcs->commit(¶ms, fence);
> +
> +error_unlock:
> + mutex_unlock(&vm->eviction_lock);
> + return r;
> }
>
> /**
> @@ -2522,11 +2536,19 @@ bool amdgpu_vm_evictable(struct amdgpu_bo *bo)
> if (!dma_resv_test_signaled_rcu(bo->tbo.base.resv, true))
> return false;
>
> + /* Try to block ongoing updates */
> + if (!mutex_trylock(&bo_base->vm->eviction_lock))
> + return false;
> +
> /* Don't evict VM page tables while they are updated */
> if (!dma_fence_is_signaled(bo_base->vm->last_direct) ||
> - !dma_fence_is_signaled(bo_base->vm->last_delayed))
> + !dma_fence_is_signaled(bo_base->vm->last_delayed)) {
> + mutex_unlock(&bo_base->vm->eviction_lock);
> return false;
> + }
>
> + bo_base->vm->evicting = true;
> + mutex_unlock(&bo_base->vm->eviction_lock);
> return true;
> }
>
> @@ -2773,6 +2795,9 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
> vm->last_direct = dma_fence_get_stub();
> vm->last_delayed = dma_fence_get_stub();
>
> + mutex_init(&vm->eviction_lock);
> + vm->evicting = false;
> +
> amdgpu_vm_bo_param(adev, vm, adev->vm_manager.root_level, false, &bp);
> if (vm_context == AMDGPU_VM_CONTEXT_COMPUTE)
> bp.flags &= ~AMDGPU_GEM_CREATE_SHADOW;
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> index d93ea9ad879e..d5613d184e99 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> @@ -242,6 +242,10 @@ struct amdgpu_vm {
> /* tree of virtual addresses mapped */
> struct rb_root_cached va;
>
> + /* Lock to prevent eviction while we are updating page tables */
> + struct mutex eviction_lock;
> + bool evicting;
> +
> /* BOs who needs a validation */
> struct list_head evicted;
>
More information about the amd-gfx
mailing list