[PATCH 05/13] drm/xe: Use xe_vma_ops to implement xe_vm_rebind

Fri Apr 19 04:14:19 UTC 2024

On Thu, Apr 18, 2024 at 09:43:06PM -0600, Zeng, Oak wrote:
> 
> 
> > -----Original Message-----
> > From: Intel-xe <intel-xe-bounces at lists.freedesktop.org> On Behalf Of
> > Matthew Brost
> > Sent: Wednesday, April 10, 2024 1:41 AM
> > To: intel-xe at lists.freedesktop.org
> > Cc: Brost, Matthew <matthew.brost at intel.com>
> > Subject: [PATCH 05/13] drm/xe: Use xe_vma_ops to implement
> > xe_vm_rebind
> > 
> > All page tables updates are moving to a xe_vma_ops interface to
> > implement 1 job per VM bind IOCTL.
> 
> Just want to make sure I understand it correctly. So far after this patch, the rebind is still many jobs (one job per vma), right?
> 

Yes. A follow on series will convert to 1 job for all of the rebind list.

> 
>  Convert xe_vm_rebind to use a
> > xe_vma_ops based interface.
> > 
> > Signed-off-by: Matthew Brost <matthew.brost at intel.com>
> > ---
> >  drivers/gpu/drm/xe/xe_vm.c | 78 +++++++++++++++++++++++++++++++-
> > ------
> >  1 file changed, 64 insertions(+), 14 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
> > index 4cd485d5bc0a..9d82396cf5d5 100644
> > --- a/drivers/gpu/drm/xe/xe_vm.c
> > +++ b/drivers/gpu/drm/xe/xe_vm.c
> > @@ -811,37 +811,87 @@ int xe_vm_userptr_check_repin(struct xe_vm *vm)
> >  		list_empty_careful(&vm->userptr.invalidated)) ? 0 : -EAGAIN;
> >  }
> > 
> > -static struct dma_fence *
> > -xe_vm_bind_vma(struct xe_vma *vma, struct xe_exec_queue *q,
> > -	       struct xe_sync_entry *syncs, u32 num_syncs,
> > -	       bool first_op, bool last_op);
> > +static void xe_vm_populate_rebind(struct xe_vma_op *op, struct xe_vma
> > *vma,
> > +				  u8 tile_mask)
> > +{
> > +	INIT_LIST_HEAD(&op->link);
> > +	op->base.op = DRM_GPUVA_OP_MAP;
> > +	op->base.map.va.addr = vma->gpuva.va.addr;
> > +	op->base.map.va.range = vma->gpuva.va.range;
> > +	op->base.map.gem.obj = vma->gpuva.gem.obj;
> > +	op->base.map.gem.offset = vma->gpuva.gem.offset;
> > +	op->map.vma = vma;
> > +	op->map.immediate = true;
> > +	op->map.dumpable = vma->gpuva.flags & XE_VMA_DUMPABLE;
> > +	op->map.is_null = xe_vma_is_null(vma);
> > +}
> > +
> > +static int xe_vm_ops_add_rebind(struct xe_vma_ops *vops, struct xe_vma
> > *vma,
> > +				u8 tile_mask)
> > +{
> > +	struct xe_vma_op *op;
> > +
> > +	op = kzalloc(sizeof(*op), GFP_KERNEL);
> > +	if (!op)
> > +		return -ENOMEM;
> > +
> > +	xe_vm_populate_rebind(op, vma, tile_mask);
> > +	list_add_tail(&op->link, &vops->list);
> > +
> > +	return 0;
> > +}
> > +
> > +static struct dma_fence *ops_execute(struct xe_vm *vm,
> > +				     struct xe_vma_ops *vops,
> > +				     bool cleanup);
> > +static void xe_vma_ops_init(struct xe_vma_ops *vops);
> > 
> >  int xe_vm_rebind(struct xe_vm *vm, bool rebind_worker)
> >  {
> >  	struct dma_fence *fence;
> >  	struct xe_vma *vma, *next;
> > +	struct xe_vma_ops vops;
> > +	struct xe_vma_op *op, *next_op;
> > +	int err;
> > 
> >  	lockdep_assert_held(&vm->lock);
> > -	if (xe_vm_in_lr_mode(vm) && !rebind_worker)
> > +	if ((xe_vm_in_lr_mode(vm) && !rebind_worker) ||
> > +	    list_empty(&vm->rebind_list))
> >  		return 0;
> > 
> > +	xe_vma_ops_init(&vops);
> > +
> >  	xe_vm_assert_held(vm);
> > -	list_for_each_entry_safe(vma, next, &vm->rebind_list,
> > -				 combined_links.rebind) {
> > +	list_for_each_entry(vma, &vm->rebind_list, combined_links.rebind)
> > {
> >  		xe_assert(vm->xe, vma->tile_present);
> > 
> > -		list_del_init(&vma->combined_links.rebind);
> >  		if (rebind_worker)
> >  			trace_xe_vma_rebind_worker(vma);
> >  		else
> >  			trace_xe_vma_rebind_exec(vma);
> > -		fence = xe_vm_bind_vma(vma, NULL, NULL, 0, false, false);
> > -		if (IS_ERR(fence))
> > -			return PTR_ERR(fence);
> > +
> > +		err = xe_vm_ops_add_rebind(&vops, vma,
> > +					   vma->tile_present);
> > +		if (err)
> > +			goto free_ops;
> > +	}
> > +
> > +	fence = ops_execute(vm, &vops, false);
> > +	if (IS_ERR(fence)) {
> > +		err = PTR_ERR(fence);
> 
> So here, if above ops_execute partially succeed (some vma bind failed, some succeed), for those vmas which are successfully bound, it is kept in the vm's rebind_list. Is this the correct behavior? Next time we will rebind them again....
> 

The VM is killed if any VMA ops fails so it doesn't really matter, also
it safe to issue a rebind twice.

In the follow up series, once we have 1 job per the rebind list we can
cope with errors and not kill the VM. In that case we must leave
everything on the rebind list.

So this patch is correct now and for the follow on series.

Matt

> 
> Oak
> 
> 
> > +	} else {
> >  		dma_fence_put(fence);
> > +		list_for_each_entry_safe(vma, next, &vm->rebind_list,
> > +					 combined_links.rebind)
> > +			list_del_init(&vma->combined_links.rebind);
> > +	}
> > +free_ops:
> > +	list_for_each_entry_safe(op, next_op, &vops.list, link) {
> > +		list_del(&op->link);
> > +		kfree(op);
> >  	}
> > 
> > -	return 0;
> > +	return err;
> >  }
> > 
> >  static void xe_vma_free(struct xe_vma *vma)
> > @@ -2516,7 +2566,7 @@ static struct dma_fence *op_execute(struct xe_vm
> > *vm, struct xe_vma *vma,
> >  {
> >  	struct dma_fence *fence = NULL;
> > 
> > -	lockdep_assert_held_write(&vm->lock);
> > +	lockdep_assert_held(&vm->lock);
> > 
> >  	xe_vm_assert_held(vm);
> >  	xe_bo_assert_held(xe_vma_bo(vma));
> > @@ -2635,7 +2685,7 @@ xe_vma_op_execute(struct xe_vm *vm, struct
> > xe_vma_op *op)
> >  {
> >  	struct dma_fence *fence = ERR_PTR(-ENOMEM);
> > 
> > -	lockdep_assert_held_write(&vm->lock);
> > +	lockdep_assert_held(&vm->lock);
> > 
> >  	switch (op->base.op) {
> >  	case DRM_GPUVA_OP_MAP:
> > --
> > 2.34.1
>