[PATCH 23/23] drm/ttm: remove bo->moving

Tue Mar 29 16:02:32 UTC 2022

On Mon, Mar 21, 2022 at 02:58:56PM +0100, Christian König wrote:
> This is now handled by the DMA-buf framework in the dma_resv obj.
> 
> Signed-off-by: Christian König <christian.koenig at amd.com>
> ---
>  .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c  | 13 ++++---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_object.c    |  7 ++--
>  drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c    | 11 +++---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c   | 11 ++++--
>  drivers/gpu/drm/ttm/ttm_bo.c                  | 10 ++----
>  drivers/gpu/drm/ttm/ttm_bo_util.c             |  7 ----
>  drivers/gpu/drm/ttm/ttm_bo_vm.c               | 34 +++++++------------
>  drivers/gpu/drm/vmwgfx/vmwgfx_resource.c      |  6 ----
>  include/drm/ttm/ttm_bo_api.h                  |  2 --
>  9 files changed, 40 insertions(+), 61 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> index b461c3aab877..fe168b3cc3f2 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> @@ -2406,6 +2406,8 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef)
>  		struct amdgpu_bo *bo = mem->bo;
>  		uint32_t domain = mem->domain;
>  		struct kfd_mem_attachment *attachment;
> +		struct dma_resv_iter cursor;
> +		struct dma_fence *fence;
>  
>  		total_size += amdgpu_bo_size(bo);
>  
> @@ -2420,10 +2422,13 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef)
>  				goto validate_map_fail;
>  			}
>  		}
> -		ret = amdgpu_sync_fence(&sync_obj, bo->tbo.moving);
> -		if (ret) {
> -			pr_debug("Memory eviction: Sync BO fence failed. Try again\n");
> -			goto validate_map_fail;
> +		dma_resv_for_each_fence(&cursor, bo->tbo.base.resv,
> +					DMA_RESV_USAGE_KERNEL, fence) {
> +			ret = amdgpu_sync_fence(&sync_obj, fence);
> +			if (ret) {
> +				pr_debug("Memory eviction: Sync BO fence failed. Try again\n");
> +				goto validate_map_fail;
> +			}
>  		}
>  		list_for_each_entry(attachment, &mem->attachments, list) {
>  			if (!attachment->is_mapped)
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> index 1618b6847c69..887fa3f4284e 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> @@ -609,9 +609,8 @@ int amdgpu_bo_create(struct amdgpu_device *adev,
>  		if (unlikely(r))
>  			goto fail_unreserve;
>  
> -		amdgpu_bo_fence(bo, fence, false);
> -		dma_fence_put(bo->tbo.moving);
> -		bo->tbo.moving = dma_fence_get(fence);
> +		dma_resv_add_fence(bo->tbo.base.resv, fence,
> +				   DMA_RESV_USAGE_KERNEL);
>  		dma_fence_put(fence);
>  	}
>  	if (!bp->resv)
> @@ -1307,7 +1306,7 @@ void amdgpu_bo_release_notify(struct ttm_buffer_object *bo)
>  
>  	r = amdgpu_fill_buffer(abo, AMDGPU_POISON, bo->base.resv, &fence);
>  	if (!WARN_ON(r)) {
> -		amdgpu_bo_fence(abo, fence, false);
> +		dma_resv_add_fence(bo->base.resv, fence, DMA_RESV_USAGE_KERNEL);
>  		dma_fence_put(fence);
>  	}
>  
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c
> index e3fbf0f10add..31913ae86de6 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c
> @@ -74,13 +74,12 @@ static int amdgpu_vm_cpu_update(struct amdgpu_vm_update_params *p,
>  {
>  	unsigned int i;
>  	uint64_t value;
> -	int r;
> +	long r;
>  
> -	if (vmbo->bo.tbo.moving) {
> -		r = dma_fence_wait(vmbo->bo.tbo.moving, true);
> -		if (r)
> -			return r;
> -	}
> +	r = dma_resv_wait_timeout(vmbo->bo.tbo.base.resv, DMA_RESV_USAGE_KERNEL,
> +				  true, MAX_SCHEDULE_TIMEOUT);
> +	if (r < 0)
> +		return r;
>  
>  	pe += (unsigned long)amdgpu_bo_kptr(&vmbo->bo);
>  
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c
> index dbb551762805..bdb44cee19d3 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c
> @@ -204,14 +204,19 @@ static int amdgpu_vm_sdma_update(struct amdgpu_vm_update_params *p,
>  	struct amdgpu_bo *bo = &vmbo->bo;
>  	enum amdgpu_ib_pool_type pool = p->immediate ? AMDGPU_IB_POOL_IMMEDIATE
>  		: AMDGPU_IB_POOL_DELAYED;
> +	struct dma_resv_iter cursor;
>  	unsigned int i, ndw, nptes;
> +	struct dma_fence *fence;
>  	uint64_t *pte;
>  	int r;
>  
>  	/* Wait for PD/PT moves to be completed */
> -	r = amdgpu_sync_fence(&p->job->sync, bo->tbo.moving);
> -	if (r)
> -		return r;
> +	dma_resv_for_each_fence(&cursor, bo->tbo.base.resv,
> +				DMA_RESV_USAGE_KERNEL, fence) {
> +		r = amdgpu_sync_fence(&p->job->sync, fence);

Just a bikeshed, but I think a amdgpu_sync_resv(resv, usage) helper would
be neat.

> +		if (r)
> +			return r;
> +	}
>  
>  	do {
>  		ndw = p->num_dw_left;
> diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
> index 8fea9f88d118..9bce692075da 100644
> --- a/drivers/gpu/drm/ttm/ttm_bo.c
> +++ b/drivers/gpu/drm/ttm/ttm_bo.c
> @@ -468,7 +468,6 @@ static void ttm_bo_release(struct kref *kref)
>  	dma_resv_unlock(bo->base.resv);
>  
>  	atomic_dec(&ttm_glob.bo_count);
> -	dma_fence_put(bo->moving);
>  	bo->destroy(bo);
>  }
>  
> @@ -737,9 +736,8 @@ int ttm_mem_evict_first(struct ttm_device *bdev,
>  }
>  
>  /*
> - * Add the last move fence to the BO and reserve a new shared slot. We only use
> - * a shared slot to avoid unecessary sync and rely on the subsequent bo move to
> - * either stall or use an exclusive fence respectively set bo->moving.
> + * Add the last move fence to the BO as kernel dependency and reserve a new
> + * fence slot.
>   */
>  static int ttm_bo_add_move_fence(struct ttm_buffer_object *bo,
>  				 struct ttm_resource_manager *man,
> @@ -769,9 +767,6 @@ static int ttm_bo_add_move_fence(struct ttm_buffer_object *bo,
>  		dma_fence_put(fence);
>  		return ret;
>  	}
> -
> -	dma_fence_put(bo->moving);
> -	bo->moving = fence;
>  	return 0;
>  }
>  
> @@ -978,7 +973,6 @@ int ttm_bo_init_reserved(struct ttm_device *bdev,
>  	bo->bdev = bdev;
>  	bo->type = type;
>  	bo->page_alignment = page_alignment;
> -	bo->moving = NULL;
>  	bo->pin_count = 0;
>  	bo->sg = sg;
>  	if (resv) {
> diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c b/drivers/gpu/drm/ttm/ttm_bo_util.c
> index 98e1c804519e..a2e3a9626198 100644
> --- a/drivers/gpu/drm/ttm/ttm_bo_util.c
> +++ b/drivers/gpu/drm/ttm/ttm_bo_util.c
> @@ -229,7 +229,6 @@ static int ttm_buffer_object_transfer(struct ttm_buffer_object *bo,
>  	atomic_inc(&ttm_glob.bo_count);
>  	INIT_LIST_HEAD(&fbo->base.ddestroy);
>  	INIT_LIST_HEAD(&fbo->base.lru);
> -	fbo->base.moving = NULL;
>  	drm_vma_node_reset(&fbo->base.base.vma_node);
>  
>  	kref_init(&fbo->base.kref);
> @@ -501,9 +500,6 @@ static int ttm_bo_move_to_ghost(struct ttm_buffer_object *bo,
>  	 * operation has completed.
>  	 */
>  
> -	dma_fence_put(bo->moving);
> -	bo->moving = dma_fence_get(fence);
> -
>  	ret = ttm_buffer_object_transfer(bo, &ghost_obj);
>  	if (ret)
>  		return ret;
> @@ -547,9 +543,6 @@ static void ttm_bo_move_pipeline_evict(struct ttm_buffer_object *bo,
>  	spin_unlock(&from->move_lock);
>  
>  	ttm_resource_free(bo, &bo->resource);
> -
> -	dma_fence_put(bo->moving);
> -	bo->moving = dma_fence_get(fence);
>  }
>  
>  int ttm_bo_move_accel_cleanup(struct ttm_buffer_object *bo,
> diff --git a/drivers/gpu/drm/ttm/ttm_bo_vm.c b/drivers/gpu/drm/ttm/ttm_bo_vm.c
> index 08ba083a80d2..5b324f245265 100644
> --- a/drivers/gpu/drm/ttm/ttm_bo_vm.c
> +++ b/drivers/gpu/drm/ttm/ttm_bo_vm.c
> @@ -46,17 +46,13 @@
>  static vm_fault_t ttm_bo_vm_fault_idle(struct ttm_buffer_object *bo,
>  				struct vm_fault *vmf)
>  {
> -	vm_fault_t ret = 0;
> -	int err = 0;
> -
> -	if (likely(!bo->moving))
> -		goto out_unlock;
> +	long err = 0;
>  
>  	/*
>  	 * Quick non-stalling check for idle.
>  	 */
> -	if (dma_fence_is_signaled(bo->moving))
> -		goto out_clear;
> +	if (dma_resv_test_signaled(bo->base.resv, DMA_RESV_USAGE_KERNEL))
> +		return 0;
>  
>  	/*
>  	 * If possible, avoid waiting for GPU with mmap_lock
> @@ -64,34 +60,30 @@ static vm_fault_t ttm_bo_vm_fault_idle(struct ttm_buffer_object *bo,
>  	 * is the first attempt.
>  	 */
>  	if (fault_flag_allow_retry_first(vmf->flags)) {
> -		ret = VM_FAULT_RETRY;
>  		if (vmf->flags & FAULT_FLAG_RETRY_NOWAIT)
> -			goto out_unlock;
> +			return VM_FAULT_RETRY;
>  
>  		ttm_bo_get(bo);
>  		mmap_read_unlock(vmf->vma->vm_mm);
> -		(void) dma_fence_wait(bo->moving, true);
> +		(void)dma_resv_wait_timeout(bo->base.resv,
> +					    DMA_RESV_USAGE_KERNEL, true,
> +					    MAX_SCHEDULE_TIMEOUT);
>  		dma_resv_unlock(bo->base.resv);
>  		ttm_bo_put(bo);
> -		goto out_unlock;
> +		return VM_FAULT_RETRY;
>  	}
>  
>  	/*
>  	 * Ordinary wait.
>  	 */
> -	err = dma_fence_wait(bo->moving, true);
> -	if (unlikely(err != 0)) {
> -		ret = (err != -ERESTARTSYS) ? VM_FAULT_SIGBUS :
> +	err = dma_resv_wait_timeout(bo->base.resv, DMA_RESV_USAGE_KERNEL, true,
> +				    MAX_SCHEDULE_TIMEOUT);
> +	if (unlikely(err < 0)) {
> +		return (err != -ERESTARTSYS) ? VM_FAULT_SIGBUS :
>  			VM_FAULT_NOPAGE;
> -		goto out_unlock;
>  	}
>  
> -out_clear:
> -	dma_fence_put(bo->moving);
> -	bo->moving = NULL;
> -
> -out_unlock:
> -	return ret;
> +	return 0;
>  }
>  
>  static unsigned long ttm_bo_io_mem_pfn(struct ttm_buffer_object *bo,
> diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c b/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c
> index f999fdd927df..c6d02c98a19a 100644
> --- a/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c
> +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c
> @@ -1163,12 +1163,6 @@ int vmw_resources_clean(struct vmw_buffer_object *vbo, pgoff_t start,
>  		*num_prefault = __KERNEL_DIV_ROUND_UP(last_cleaned - res_start,
>  						      PAGE_SIZE);
>  		vmw_bo_fence_single(bo, NULL);
> -		if (bo->moving)
> -			dma_fence_put(bo->moving);
> -
> -		return dma_resv_get_singleton(bo->base.resv,
> -					      DMA_RESV_USAGE_KERNEL,
> -					      &bo->moving);

This seems to be entirely misplaced and I'm pretty sure doesn't even
compile interim.

>  	}
>  
>  	return 0;
> diff --git a/include/drm/ttm/ttm_bo_api.h b/include/drm/ttm/ttm_bo_api.h
> index c17b2df9178b..4c7134550262 100644
> --- a/include/drm/ttm/ttm_bo_api.h
> +++ b/include/drm/ttm/ttm_bo_api.h
> @@ -97,7 +97,6 @@ struct ttm_tt;
>   * @lru: List head for the lru list.
>   * @ddestroy: List head for the delayed destroy list.
>   * @swap: List head for swap LRU list.
> - * @moving: Fence set when BO is moving
>   * @offset: The current GPU offset, which can have different meanings
>   * depending on the memory type. For SYSTEM type memory, it should be 0.
>   * @cur_placement: Hint of current placement.
> @@ -150,7 +149,6 @@ struct ttm_buffer_object {
>  	 * Members protected by a bo reservation.
>  	 */
>  
> -	struct dma_fence *moving;
>  	unsigned priority;
>  	unsigned pin_count;

Aside from the vmwgfx thing this looks good. With the vmwgfx patch split
issue (I think it's just that) fixed:

Reviewed-by: Daniel Vetter <daniel.vetter at ffwll.ch>

>  
> -- 
> 2.25.1
> 

-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch