[PATCH 2/7] drm/xe: Rework rebinding

Matthew Brost matthew.brost at intel.com
Thu Mar 21 19:14:58 UTC 2024


On Thu, Mar 21, 2024 at 12:37:12PM +0100, Thomas Hellström wrote:
> Instead of handling the vm's rebind fence separately,
> which is error prone if they are not strictly ordered,
> attach rebind fences as kernel fences to the vm's resv.
> 

See comment from previous, do not like updates to __xe_pt_bind_vma but I
guess I can live with it. Otherwise LGTM.

With that:
Reviewed-by: Matthew Brost <matthew.brost at intel.com>

> Fixes: dd08ebf6c352 ("drm/xe: Introduce a new DRM driver for Intel GPUs")
> Cc: Rodrigo Vivi <rodrigo.vivi at intel.com>
> Cc: Matthew Brost <matthew.brost at intel.com>
> Cc: <stable at vger.kernel.org> # v6.8+
> Signed-off-by: Thomas Hellström <thomas.hellstrom at linux.intel.com>
> ---
>  drivers/gpu/drm/xe/xe_exec.c     | 31 +++----------------------------
>  drivers/gpu/drm/xe/xe_pt.c       |  2 +-
>  drivers/gpu/drm/xe/xe_vm.c       | 27 +++++++++------------------
>  drivers/gpu/drm/xe/xe_vm.h       |  2 +-
>  drivers/gpu/drm/xe/xe_vm_types.h |  3 ---
>  5 files changed, 14 insertions(+), 51 deletions(-)
> 
> diff --git a/drivers/gpu/drm/xe/xe_exec.c b/drivers/gpu/drm/xe/xe_exec.c
> index 7692ebfe7d47..759497d4a102 100644
> --- a/drivers/gpu/drm/xe/xe_exec.c
> +++ b/drivers/gpu/drm/xe/xe_exec.c
> @@ -152,7 +152,6 @@ int xe_exec_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
>  	struct drm_exec *exec = &vm_exec.exec;
>  	u32 i, num_syncs = 0, num_ufence = 0;
>  	struct xe_sched_job *job;
> -	struct dma_fence *rebind_fence;
>  	struct xe_vm *vm;
>  	bool write_locked, skip_retry = false;
>  	ktime_t end = 0;
> @@ -294,35 +293,11 @@ int xe_exec_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
>  	 * Rebind any invalidated userptr or evicted BOs in the VM, non-compute
>  	 * VM mode only.
>  	 */
> -	rebind_fence = xe_vm_rebind(vm, false);
> -	if (IS_ERR(rebind_fence)) {
> -		err = PTR_ERR(rebind_fence);
> +	err = xe_vm_rebind(vm, false);
> +	if (err)
>  		goto err_put_job;
> -	}
> -
> -	/*
> -	 * We store the rebind_fence in the VM so subsequent execs don't get
> -	 * scheduled before the rebinds of userptrs / evicted BOs is complete.
> -	 */
> -	if (rebind_fence) {
> -		dma_fence_put(vm->rebind_fence);
> -		vm->rebind_fence = rebind_fence;
> -	}
> -	if (vm->rebind_fence) {
> -		if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT,
> -			     &vm->rebind_fence->flags)) {
> -			dma_fence_put(vm->rebind_fence);
> -			vm->rebind_fence = NULL;
> -		} else {
> -			dma_fence_get(vm->rebind_fence);
> -			err = drm_sched_job_add_dependency(&job->drm,
> -							   vm->rebind_fence);
> -			if (err)
> -				goto err_put_job;
> -		}
> -	}
>  
> -	/* Wait behind munmap style rebinds */
> +	/* Wait behind rebinds */
>  	if (!xe_vm_in_lr_mode(vm)) {
>  		err = drm_sched_job_add_resv_dependencies(&job->drm,
>  							  xe_vm_resv(vm),
> diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c
> index 21bc0d13fccf..0484ed5b495f 100644
> --- a/drivers/gpu/drm/xe/xe_pt.c
> +++ b/drivers/gpu/drm/xe/xe_pt.c
> @@ -1298,7 +1298,7 @@ __xe_pt_bind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_exec_queue
>  		}
>  
>  		/* add shared fence now for pagetable delayed destroy */
> -		dma_resv_add_fence(xe_vm_resv(vm), fence, !rebind &&
> +		dma_resv_add_fence(xe_vm_resv(vm), fence, rebind ||
>  				   last_munmap_rebind ?
>  				   DMA_RESV_USAGE_KERNEL :
>  				   DMA_RESV_USAGE_BOOKKEEP);
> diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
> index 80d43d75b1da..35fba6e3f889 100644
> --- a/drivers/gpu/drm/xe/xe_vm.c
> +++ b/drivers/gpu/drm/xe/xe_vm.c
> @@ -522,7 +522,6 @@ static void preempt_rebind_work_func(struct work_struct *w)
>  {
>  	struct xe_vm *vm = container_of(w, struct xe_vm, preempt.rebind_work);
>  	struct drm_exec exec;
> -	struct dma_fence *rebind_fence;
>  	unsigned int fence_count = 0;
>  	LIST_HEAD(preempt_fences);
>  	ktime_t end = 0;
> @@ -568,18 +567,11 @@ static void preempt_rebind_work_func(struct work_struct *w)
>  	if (err)
>  		goto out_unlock;
>  
> -	rebind_fence = xe_vm_rebind(vm, true);
> -	if (IS_ERR(rebind_fence)) {
> -		err = PTR_ERR(rebind_fence);
> +	err = xe_vm_rebind(vm, true);
> +	if (err)
>  		goto out_unlock;
> -	}
> -
> -	if (rebind_fence) {
> -		dma_fence_wait(rebind_fence, false);
> -		dma_fence_put(rebind_fence);
> -	}
>  
> -	/* Wait on munmap style VM unbinds */
> +	/* Wait on rebinds and munmap style VM unbinds */
>  	wait = dma_resv_wait_timeout(xe_vm_resv(vm),
>  				     DMA_RESV_USAGE_KERNEL,
>  				     false, MAX_SCHEDULE_TIMEOUT);
> @@ -773,14 +765,14 @@ xe_vm_bind_vma(struct xe_vma *vma, struct xe_exec_queue *q,
>  	       struct xe_sync_entry *syncs, u32 num_syncs,
>  	       bool first_op, bool last_op);
>  
> -struct dma_fence *xe_vm_rebind(struct xe_vm *vm, bool rebind_worker)
> +int xe_vm_rebind(struct xe_vm *vm, bool rebind_worker)
>  {
> -	struct dma_fence *fence = NULL;
> +	struct dma_fence *fence;
>  	struct xe_vma *vma, *next;
>  
>  	lockdep_assert_held(&vm->lock);
>  	if (xe_vm_in_lr_mode(vm) && !rebind_worker)
> -		return NULL;
> +		return 0;
>  
>  	xe_vm_assert_held(vm);
>  	list_for_each_entry_safe(vma, next, &vm->rebind_list,
> @@ -788,17 +780,17 @@ struct dma_fence *xe_vm_rebind(struct xe_vm *vm, bool rebind_worker)
>  		xe_assert(vm->xe, vma->tile_present);
>  
>  		list_del_init(&vma->combined_links.rebind);
> -		dma_fence_put(fence);
>  		if (rebind_worker)
>  			trace_xe_vma_rebind_worker(vma);
>  		else
>  			trace_xe_vma_rebind_exec(vma);
>  		fence = xe_vm_bind_vma(vma, NULL, NULL, 0, false, false);
>  		if (IS_ERR(fence))
> -			return fence;
> +			return PTR_ERR(fence);
> +		dma_fence_put(fence);
>  	}
>  
> -	return fence;
> +	return 0;
>  }
>  
>  static void xe_vma_free(struct xe_vma *vma)
> @@ -1588,7 +1580,6 @@ static void vm_destroy_work_func(struct work_struct *w)
>  		XE_WARN_ON(vm->pt_root[id]);
>  
>  	trace_xe_vm_free(vm);
> -	dma_fence_put(vm->rebind_fence);
>  	kfree(vm);
>  }
>  
> diff --git a/drivers/gpu/drm/xe/xe_vm.h b/drivers/gpu/drm/xe/xe_vm.h
> index 6df1f1c7f85d..4853354336f2 100644
> --- a/drivers/gpu/drm/xe/xe_vm.h
> +++ b/drivers/gpu/drm/xe/xe_vm.h
> @@ -207,7 +207,7 @@ int __xe_vm_userptr_needs_repin(struct xe_vm *vm);
>  
>  int xe_vm_userptr_check_repin(struct xe_vm *vm);
>  
> -struct dma_fence *xe_vm_rebind(struct xe_vm *vm, bool rebind_worker);
> +int xe_vm_rebind(struct xe_vm *vm, bool rebind_worker);
>  
>  int xe_vm_invalidate_vma(struct xe_vma *vma);
>  
> diff --git a/drivers/gpu/drm/xe/xe_vm_types.h b/drivers/gpu/drm/xe/xe_vm_types.h
> index 5747f136d24d..badf3945083d 100644
> --- a/drivers/gpu/drm/xe/xe_vm_types.h
> +++ b/drivers/gpu/drm/xe/xe_vm_types.h
> @@ -177,9 +177,6 @@ struct xe_vm {
>  	 */
>  	struct list_head rebind_list;
>  
> -	/** @rebind_fence: rebind fence from execbuf */
> -	struct dma_fence *rebind_fence;
> -
>  	/**
>  	 * @destroy_work: worker to destroy VM, needed as a dma_fence signaling
>  	 * from an irq context can be last put and the destroy needs to be able
> -- 
> 2.44.0
> 


More information about the Intel-xe mailing list