[PATCH 05/13] drm/xe: Use xe_vma_ops to implement xe_vm_rebind

Zeng, Oak oak.zeng at intel.com
Tue Apr 23 03:17:25 UTC 2024



> -----Original Message-----
> From: Brost, Matthew <matthew.brost at intel.com>
> Sent: Friday, April 19, 2024 12:14 AM
> To: Zeng, Oak <oak.zeng at intel.com>
> Cc: intel-xe at lists.freedesktop.org
> Subject: Re: [PATCH 05/13] drm/xe: Use xe_vma_ops to implement
> xe_vm_rebind
> 
> On Thu, Apr 18, 2024 at 09:43:06PM -0600, Zeng, Oak wrote:
> >
> >
> > > -----Original Message-----
> > > From: Intel-xe <intel-xe-bounces at lists.freedesktop.org> On Behalf Of
> > > Matthew Brost
> > > Sent: Wednesday, April 10, 2024 1:41 AM
> > > To: intel-xe at lists.freedesktop.org
> > > Cc: Brost, Matthew <matthew.brost at intel.com>
> > > Subject: [PATCH 05/13] drm/xe: Use xe_vma_ops to implement
> > > xe_vm_rebind
> > >
> > > All page tables updates are moving to a xe_vma_ops interface to
> > > implement 1 job per VM bind IOCTL.
> >
> > Just want to make sure I understand it correctly. So far after this patch, the
> rebind is still many jobs (one job per vma), right?
> >
> 
> Yes. A follow on series will convert to 1 job for all of the rebind list.
> 
> >
> >  Convert xe_vm_rebind to use a
> > > xe_vma_ops based interface.
> > >
> > > Signed-off-by: Matthew Brost <matthew.brost at intel.com>
> > > ---
> > >  drivers/gpu/drm/xe/xe_vm.c | 78
> +++++++++++++++++++++++++++++++-
> > > ------
> > >  1 file changed, 64 insertions(+), 14 deletions(-)
> > >
> > > diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
> > > index 4cd485d5bc0a..9d82396cf5d5 100644
> > > --- a/drivers/gpu/drm/xe/xe_vm.c
> > > +++ b/drivers/gpu/drm/xe/xe_vm.c
> > > @@ -811,37 +811,87 @@ int xe_vm_userptr_check_repin(struct xe_vm
> *vm)
> > >  		list_empty_careful(&vm->userptr.invalidated)) ? 0 : -EAGAIN;
> > >  }
> > >
> > > -static struct dma_fence *
> > > -xe_vm_bind_vma(struct xe_vma *vma, struct xe_exec_queue *q,
> > > -	       struct xe_sync_entry *syncs, u32 num_syncs,
> > > -	       bool first_op, bool last_op);
> > > +static void xe_vm_populate_rebind(struct xe_vma_op *op, struct
> xe_vma
> > > *vma,
> > > +				  u8 tile_mask)
> > > +{
> > > +	INIT_LIST_HEAD(&op->link);
> > > +	op->base.op = DRM_GPUVA_OP_MAP;
> > > +	op->base.map.va.addr = vma->gpuva.va.addr;
> > > +	op->base.map.va.range = vma->gpuva.va.range;
> > > +	op->base.map.gem.obj = vma->gpuva.gem.obj;
> > > +	op->base.map.gem.offset = vma->gpuva.gem.offset;
> > > +	op->map.vma = vma;
> > > +	op->map.immediate = true;
> > > +	op->map.dumpable = vma->gpuva.flags & XE_VMA_DUMPABLE;
> > > +	op->map.is_null = xe_vma_is_null(vma);
> > > +}
> > > +
> > > +static int xe_vm_ops_add_rebind(struct xe_vma_ops *vops, struct
> xe_vma
> > > *vma,
> > > +				u8 tile_mask)
> > > +{
> > > +	struct xe_vma_op *op;
> > > +
> > > +	op = kzalloc(sizeof(*op), GFP_KERNEL);
> > > +	if (!op)
> > > +		return -ENOMEM;
> > > +
> > > +	xe_vm_populate_rebind(op, vma, tile_mask);
> > > +	list_add_tail(&op->link, &vops->list);
> > > +
> > > +	return 0;
> > > +}
> > > +
> > > +static struct dma_fence *ops_execute(struct xe_vm *vm,
> > > +				     struct xe_vma_ops *vops,
> > > +				     bool cleanup);
> > > +static void xe_vma_ops_init(struct xe_vma_ops *vops);
> > >
> > >  int xe_vm_rebind(struct xe_vm *vm, bool rebind_worker)
> > >  {
> > >  	struct dma_fence *fence;
> > >  	struct xe_vma *vma, *next;
> > > +	struct xe_vma_ops vops;
> > > +	struct xe_vma_op *op, *next_op;
> > > +	int err;
> > >
> > >  	lockdep_assert_held(&vm->lock);
> > > -	if (xe_vm_in_lr_mode(vm) && !rebind_worker)
> > > +	if ((xe_vm_in_lr_mode(vm) && !rebind_worker) ||
> > > +	    list_empty(&vm->rebind_list))
> > >  		return 0;
> > >
> > > +	xe_vma_ops_init(&vops);
> > > +
> > >  	xe_vm_assert_held(vm);
> > > -	list_for_each_entry_safe(vma, next, &vm->rebind_list,
> > > -				 combined_links.rebind) {
> > > +	list_for_each_entry(vma, &vm->rebind_list, combined_links.rebind)
> > > {
> > >  		xe_assert(vm->xe, vma->tile_present);
> > >
> > > -		list_del_init(&vma->combined_links.rebind);
> > >  		if (rebind_worker)
> > >  			trace_xe_vma_rebind_worker(vma);
> > >  		else
> > >  			trace_xe_vma_rebind_exec(vma);
> > > -		fence = xe_vm_bind_vma(vma, NULL, NULL, 0, false, false);
> > > -		if (IS_ERR(fence))
> > > -			return PTR_ERR(fence);
> > > +
> > > +		err = xe_vm_ops_add_rebind(&vops, vma,
> > > +					   vma->tile_present);
> > > +		if (err)
> > > +			goto free_ops;
> > > +	}
> > > +
> > > +	fence = ops_execute(vm, &vops, false);
> > > +	if (IS_ERR(fence)) {
> > > +		err = PTR_ERR(fence);
> >
> > So here, if above ops_execute partially succeed (some vma bind failed,
> some succeed), for those vmas which are successfully bound, it is kept in the
> vm's rebind_list. Is this the correct behavior? Next time we will rebind them
> again....
> >
> 
> The VM is killed if any VMA ops fails so it doesn't really matter, also
> it safe to issue a rebind twice.
> 
> In the follow up series, once we have 1 job per the rebind list we can
> cope with errors and not kill the VM. In that case we must leave
> everything on the rebind list.
> 
> So this patch is correct now and for the follow on series.

I see. Patch is:

Reviewed-by: Oak Zeng <oak.zeng at intel.com>


> 
> Matt
> 
> >
> > Oak
> >
> >
> > > +	} else {
> > >  		dma_fence_put(fence);
> > > +		list_for_each_entry_safe(vma, next, &vm->rebind_list,
> > > +					 combined_links.rebind)
> > > +			list_del_init(&vma->combined_links.rebind);
> > > +	}
> > > +free_ops:
> > > +	list_for_each_entry_safe(op, next_op, &vops.list, link) {
> > > +		list_del(&op->link);
> > > +		kfree(op);
> > >  	}
> > >
> > > -	return 0;
> > > +	return err;
> > >  }
> > >
> > >  static void xe_vma_free(struct xe_vma *vma)
> > > @@ -2516,7 +2566,7 @@ static struct dma_fence *op_execute(struct
> xe_vm
> > > *vm, struct xe_vma *vma,
> > >  {
> > >  	struct dma_fence *fence = NULL;
> > >
> > > -	lockdep_assert_held_write(&vm->lock);
> > > +	lockdep_assert_held(&vm->lock);
> > >
> > >  	xe_vm_assert_held(vm);
> > >  	xe_bo_assert_held(xe_vma_bo(vma));
> > > @@ -2635,7 +2685,7 @@ xe_vma_op_execute(struct xe_vm *vm, struct
> > > xe_vma_op *op)
> > >  {
> > >  	struct dma_fence *fence = ERR_PTR(-ENOMEM);
> > >
> > > -	lockdep_assert_held_write(&vm->lock);
> > > +	lockdep_assert_held(&vm->lock);
> > >
> > >  	switch (op->base.op) {
> > >  	case DRM_GPUVA_OP_MAP:
> > > --
> > > 2.34.1
> >


More information about the Intel-xe mailing list