[PATCH 4/5] drm/amdgpu: add VM eviction lock v2

Felix Kuehling felix.kuehling at amd.com
Thu Dec 5 16:33:45 UTC 2019


On 2019-12-05 8:39 a.m., Christian König wrote:
> This allows to invalidate VM entries without taking the reservation lock.
>
> Signed-off-by: Christian König <christian.koenig at amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 39 +++++++++++++++++++++-----
>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h |  4 +++
>   2 files changed, 36 insertions(+), 7 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> index 0d700e8154c4..839d6df394fc 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> @@ -656,7 +656,7 @@ int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm,
>   			      void *param)
>   {
>   	struct amdgpu_vm_bo_base *bo_base, *tmp;
> -	int r = 0;
> +	int r;
>   
>   	vm->bulk_moveable &= list_empty(&vm->evicted);
>   
> @@ -665,7 +665,7 @@ int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm,
>   
>   		r = validate(param, bo);
>   		if (r)
> -			break;
> +			return r;
>   
>   		if (bo->tbo.type != ttm_bo_type_kernel) {
>   			amdgpu_vm_bo_moved(bo_base);
> @@ -678,7 +678,11 @@ int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm,
>   		}
>   	}
>   
> -	return r;
> +	mutex_lock(&vm->eviction_lock);
> +	vm->evicting = false;
> +	mutex_unlock(&vm->eviction_lock);
> +
> +	return 0;
>   }
>   
>   /**
> @@ -1555,15 +1559,25 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
>   	if (!(flags & AMDGPU_PTE_VALID))
>   		owner = AMDGPU_FENCE_OWNER_KFD;
>   
> +	mutex_lock(&vm->eviction_lock);
> +	if (vm->evicting) {
> +		r = EINPROGRESS;

Not sure about this error code. As far as I can find, this is normally 
used on non-blocking sockets. I found this explanation: 
https://stackoverflow.com/questions/8277970/what-are-possible-reason-for-socket-error-einprogress-in-solaris

Quote: "so there is another error for non-blocking connect: EINPROGRESS, 
which tells you that the operation is in progress and you should check 
its status later."

This call is neither non-blocking nor is the requested page table update 
in progress when this error is returned. So I'd think a better error to 
return here would be EBUSY.

Other than that, this patch is

Reviewed-by: Felix Kuehling <Felix.Kuehling at amd.com>


> +		goto error_unlock;
> +	}
> +
>   	r = vm->update_funcs->prepare(&params, owner, exclusive);
>   	if (r)
> -		return r;
> +		goto error_unlock;
>   
>   	r = amdgpu_vm_update_ptes(&params, start, last + 1, addr, flags);
>   	if (r)
> -		return r;
> +		goto error_unlock;
>   
> -	return vm->update_funcs->commit(&params, fence);
> +	r = vm->update_funcs->commit(&params, fence);
> +
> +error_unlock:
> +	mutex_unlock(&vm->eviction_lock);
> +	return r;
>   }
>   
>   /**
> @@ -2522,11 +2536,19 @@ bool amdgpu_vm_evictable(struct amdgpu_bo *bo)
>   	if (!dma_resv_test_signaled_rcu(bo->tbo.base.resv, true))
>   		return false;
>   
> +	/* Try to block ongoing updates */
> +	if (!mutex_trylock(&bo_base->vm->eviction_lock))
> +		return false;
> +
>   	/* Don't evict VM page tables while they are updated */
>   	if (!dma_fence_is_signaled(bo_base->vm->last_direct) ||
> -	    !dma_fence_is_signaled(bo_base->vm->last_delayed))
> +	    !dma_fence_is_signaled(bo_base->vm->last_delayed)) {
> +		mutex_unlock(&bo_base->vm->eviction_lock);
>   		return false;
> +	}
>   
> +	bo_base->vm->evicting = true;
> +	mutex_unlock(&bo_base->vm->eviction_lock);
>   	return true;
>   }
>   
> @@ -2773,6 +2795,9 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
>   	vm->last_direct = dma_fence_get_stub();
>   	vm->last_delayed = dma_fence_get_stub();
>   
> +	mutex_init(&vm->eviction_lock);
> +	vm->evicting = false;
> +
>   	amdgpu_vm_bo_param(adev, vm, adev->vm_manager.root_level, false, &bp);
>   	if (vm_context == AMDGPU_VM_CONTEXT_COMPUTE)
>   		bp.flags &= ~AMDGPU_GEM_CREATE_SHADOW;
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> index d93ea9ad879e..d5613d184e99 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> @@ -242,6 +242,10 @@ struct amdgpu_vm {
>   	/* tree of virtual addresses mapped */
>   	struct rb_root_cached	va;
>   
> +	/* Lock to prevent eviction while we are updating page tables */
> +	struct mutex		eviction_lock;
> +	bool			evicting;
> +
>   	/* BOs who needs a validation */
>   	struct list_head	evicted;
>   


More information about the amd-gfx mailing list