[PATCH 3/3] drm/amdgpu: stop adding VM updates fences to the resv obj

Thu Dec 5 00:15:16 UTC 2019

[+Alejandro]

On 2019-12-04 10:38 a.m., Christian KÃ¶nig wrote:
> This way we can do updates even without the resv obj locked.

This could use a bit more explanation. This change depends on the 
previous one that adds explicit synchronization with page table updates 
during command submission in amdgpu_cs.c. You're adding two fences to 
the VM for the last direct/delayed page table update that is used to 
prevent eviction of busy page tables (that no longer have the update 
fence in their resv) and to implement amdgpu_vm_wait_idle.

In addition to this patch, updating page tables without the resv locked 
still needs additional safeguards to ensure the page tables are resident 
while preparing the page table update. That will come in Alejandro's 
changes for invalidating PTEs in MMU notifiers.

>
> Signed-off-by: Christian KÃ¶nig <christian.koenig at amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c    |  6 ++---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c      | 30 ++++++++++++++++++---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h      |  4 +++
>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c | 11 +++++---
>   4 files changed, 40 insertions(+), 11 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
> index 9b28c1eb5f49..7f17c06b8a3f 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
> @@ -241,10 +241,8 @@ int amdgpu_sync_resv(struct amdgpu_device *adev,
>   			/* VM updates are only interesting
>   			 * for other VM updates and moves.
>   			 */
> -			if ((owner != AMDGPU_FENCE_OWNER_UNDEFINED) &&
> -			    (fence_owner != AMDGPU_FENCE_OWNER_UNDEFINED) &&
> -			    ((owner == AMDGPU_FENCE_OWNER_VM) !=
> -			     (fence_owner == AMDGPU_FENCE_OWNER_VM)))
> +			if (owner == AMDGPU_FENCE_OWNER_VM &&
> +			    fence_owner != AMDGPU_FENCE_OWNER_UNDEFINED)

I don't really understand this condition now. Why does this function 
need to change at all? This function doesn't add fences to resvs, it 
adds fences from a resv to a sync_obj. Is this just a simplification 
because you assume that VM fences can no longer be found in resvs? Would 
it be worth adding a WARN_ONCE to check this assumption?

Maybe the comment above should also be updated. I think what this 
condition now states is something like /* VM updates don't wait for user 
mode fences. */

>   				continue;
>   
>   			/* Ignore fence from the same owner and explicit one as
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> index a22bd57129d1..0d700e8154c4 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> @@ -562,8 +562,8 @@ void amdgpu_vm_get_pd_bo(struct amdgpu_vm *vm,
>   {
>   	entry->priority = 0;
>   	entry->tv.bo = &vm->root.base.bo->tbo;
> -	/* One for the VM updates, one for TTM and one for the CS job */
> -	entry->tv.num_shared = 3;
> +	/* One for TTM and one for the CS job */
> +	entry->tv.num_shared = 2;
>   	entry->user_pages = NULL;
>   	list_add(&entry->tv.head, validated);
>   }
> @@ -2522,6 +2522,11 @@ bool amdgpu_vm_evictable(struct amdgpu_bo *bo)
>   	if (!dma_resv_test_signaled_rcu(bo->tbo.base.resv, true))
>   		return false;
>   
> +	/* Don't evict VM page tables while they are updated */
> +	if (!dma_fence_is_signaled(bo_base->vm->last_direct) ||
> +	    !dma_fence_is_signaled(bo_base->vm->last_delayed))
> +		return false;
> +
>   	return true;
>   }
>   
> @@ -2687,8 +2692,16 @@ void amdgpu_vm_adjust_size(struct amdgpu_device *adev, uint32_t min_vm_size,
>    */
>   long amdgpu_vm_wait_idle(struct amdgpu_vm *vm, long timeout)
>   {
> -	return dma_resv_wait_timeout_rcu(vm->root.base.bo->tbo.base.resv,
> -						   true, true, timeout);
> +	timeout = dma_resv_wait_timeout_rcu(vm->root.base.bo->tbo.base.resv,
> +					    true, true, timeout);
> +	if (timeout <= 0)
> +		return timeout;
> +
> +	timeout = dma_fence_wait_timeout(vm->last_direct, true, timeout);
> +	if (timeout <= 0)
> +		return timeout;
> +
> +	return dma_fence_wait_timeout(vm->last_delayed, true, timeout);
>   }
>   
>   /**
> @@ -2757,6 +2770,8 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
>   	else
>   		vm->update_funcs = &amdgpu_vm_sdma_funcs;
>   	vm->last_update = NULL;
> +	vm->last_direct = dma_fence_get_stub();
> +	vm->last_delayed = dma_fence_get_stub();
>   
>   	amdgpu_vm_bo_param(adev, vm, adev->vm_manager.root_level, false, &bp);
>   	if (vm_context == AMDGPU_VM_CONTEXT_COMPUTE)
> @@ -2807,6 +2822,8 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
>   	vm->root.base.bo = NULL;
>   
>   error_free_delayed:
> +	dma_fence_put(vm->last_direct);
> +	dma_fence_put(vm->last_delayed);
>   	drm_sched_entity_destroy(&vm->delayed);
>   
>   error_free_direct:
> @@ -3007,6 +3024,11 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
>   		vm->pasid = 0;
>   	}
>   
> +	dma_fence_wait(vm->last_direct, false);
> +	dma_fence_put(vm->last_direct);
> +	dma_fence_wait(vm->last_delayed, false);
> +	dma_fence_put(vm->last_delayed);
> +
>   	list_for_each_entry_safe(mapping, tmp, &vm->freed, list) {
>   		if (mapping->flags & AMDGPU_PTE_PRT && prt_fini_needed) {
>   			amdgpu_vm_prt_fini(adev, vm);
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> index db561765453b..d93ea9ad879e 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> @@ -269,6 +269,10 @@ struct amdgpu_vm {
>   	struct drm_sched_entity	direct;
>   	struct drm_sched_entity	delayed;
>   
> +	/* Last submission to the scheduler entities */
> +	struct dma_fence	*last_direct;
> +	struct dma_fence	*last_delayed;
> +
>   	unsigned int		pasid;
>   	/* dedicated to vm */
>   	struct amdgpu_vmid	*reserved_vmid[AMDGPU_MAX_VMHUBS];
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c
> index 832db59f441e..04e79c75c87e 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c
> @@ -95,11 +95,10 @@ static int amdgpu_vm_sdma_prepare(struct amdgpu_vm_update_params *p,
>   static int amdgpu_vm_sdma_commit(struct amdgpu_vm_update_params *p,
>   				 struct dma_fence **fence)
>   {
> -	struct amdgpu_bo *root = p->vm->root.base.bo;
>   	struct amdgpu_ib *ib = p->job->ibs;
>   	struct drm_sched_entity *entity;
> +	struct dma_fence *f, *tmp;
>   	struct amdgpu_ring *ring;
> -	struct dma_fence *f;
>   	int r;
>   
>   	entity = p->direct ? &p->vm->direct : &p->vm->delayed;
> @@ -112,7 +111,13 @@ static int amdgpu_vm_sdma_commit(struct amdgpu_vm_update_params *p,
>   	if (r)
>   		goto error;
>   
> -	amdgpu_bo_fence(root, f, true);

This seems to be the change advertised in the headline, where it no 
longer adds the shared VM fence to the PD resv. All the rest of this 
patch I'm trying to explain to myself as I'm reviewing it. Do my 
comments above sound about right? A better patch description would have 
saved me about half an hour of head-scratching.

Thanks,
 Â  Felix

> +	tmp = dma_fence_get(f);
> +	if (p->direct)
> +		swap(p->vm->last_direct, tmp);
> +	else
> +		swap(p->vm->last_delayed, tmp);
> +	dma_fence_put(tmp);
> +
>   	if (fence && !p->direct)
>   		swap(*fence, f);
>   	dma_fence_put(f);