[PATCH 05/13] drm/xe: Use xe_vma_ops to implement xe_vm_rebind
Zeng, Oak
oak.zeng at intel.com
Tue Apr 23 03:17:25 UTC 2024
> -----Original Message-----
> From: Brost, Matthew <matthew.brost at intel.com>
> Sent: Friday, April 19, 2024 12:14 AM
> To: Zeng, Oak <oak.zeng at intel.com>
> Cc: intel-xe at lists.freedesktop.org
> Subject: Re: [PATCH 05/13] drm/xe: Use xe_vma_ops to implement
> xe_vm_rebind
>
> On Thu, Apr 18, 2024 at 09:43:06PM -0600, Zeng, Oak wrote:
> >
> >
> > > -----Original Message-----
> > > From: Intel-xe <intel-xe-bounces at lists.freedesktop.org> On Behalf Of
> > > Matthew Brost
> > > Sent: Wednesday, April 10, 2024 1:41 AM
> > > To: intel-xe at lists.freedesktop.org
> > > Cc: Brost, Matthew <matthew.brost at intel.com>
> > > Subject: [PATCH 05/13] drm/xe: Use xe_vma_ops to implement
> > > xe_vm_rebind
> > >
> > > All page tables updates are moving to a xe_vma_ops interface to
> > > implement 1 job per VM bind IOCTL.
> >
> > Just want to make sure I understand it correctly. So far after this patch, the
> rebind is still many jobs (one job per vma), right?
> >
>
> Yes. A follow on series will convert to 1 job for all of the rebind list.
>
> >
> > Convert xe_vm_rebind to use a
> > > xe_vma_ops based interface.
> > >
> > > Signed-off-by: Matthew Brost <matthew.brost at intel.com>
> > > ---
> > > drivers/gpu/drm/xe/xe_vm.c | 78
> +++++++++++++++++++++++++++++++-
> > > ------
> > > 1 file changed, 64 insertions(+), 14 deletions(-)
> > >
> > > diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
> > > index 4cd485d5bc0a..9d82396cf5d5 100644
> > > --- a/drivers/gpu/drm/xe/xe_vm.c
> > > +++ b/drivers/gpu/drm/xe/xe_vm.c
> > > @@ -811,37 +811,87 @@ int xe_vm_userptr_check_repin(struct xe_vm
> *vm)
> > > list_empty_careful(&vm->userptr.invalidated)) ? 0 : -EAGAIN;
> > > }
> > >
> > > -static struct dma_fence *
> > > -xe_vm_bind_vma(struct xe_vma *vma, struct xe_exec_queue *q,
> > > - struct xe_sync_entry *syncs, u32 num_syncs,
> > > - bool first_op, bool last_op);
> > > +static void xe_vm_populate_rebind(struct xe_vma_op *op, struct
> xe_vma
> > > *vma,
> > > + u8 tile_mask)
> > > +{
> > > + INIT_LIST_HEAD(&op->link);
> > > + op->base.op = DRM_GPUVA_OP_MAP;
> > > + op->base.map.va.addr = vma->gpuva.va.addr;
> > > + op->base.map.va.range = vma->gpuva.va.range;
> > > + op->base.map.gem.obj = vma->gpuva.gem.obj;
> > > + op->base.map.gem.offset = vma->gpuva.gem.offset;
> > > + op->map.vma = vma;
> > > + op->map.immediate = true;
> > > + op->map.dumpable = vma->gpuva.flags & XE_VMA_DUMPABLE;
> > > + op->map.is_null = xe_vma_is_null(vma);
> > > +}
> > > +
> > > +static int xe_vm_ops_add_rebind(struct xe_vma_ops *vops, struct
> xe_vma
> > > *vma,
> > > + u8 tile_mask)
> > > +{
> > > + struct xe_vma_op *op;
> > > +
> > > + op = kzalloc(sizeof(*op), GFP_KERNEL);
> > > + if (!op)
> > > + return -ENOMEM;
> > > +
> > > + xe_vm_populate_rebind(op, vma, tile_mask);
> > > + list_add_tail(&op->link, &vops->list);
> > > +
> > > + return 0;
> > > +}
> > > +
> > > +static struct dma_fence *ops_execute(struct xe_vm *vm,
> > > + struct xe_vma_ops *vops,
> > > + bool cleanup);
> > > +static void xe_vma_ops_init(struct xe_vma_ops *vops);
> > >
> > > int xe_vm_rebind(struct xe_vm *vm, bool rebind_worker)
> > > {
> > > struct dma_fence *fence;
> > > struct xe_vma *vma, *next;
> > > + struct xe_vma_ops vops;
> > > + struct xe_vma_op *op, *next_op;
> > > + int err;
> > >
> > > lockdep_assert_held(&vm->lock);
> > > - if (xe_vm_in_lr_mode(vm) && !rebind_worker)
> > > + if ((xe_vm_in_lr_mode(vm) && !rebind_worker) ||
> > > + list_empty(&vm->rebind_list))
> > > return 0;
> > >
> > > + xe_vma_ops_init(&vops);
> > > +
> > > xe_vm_assert_held(vm);
> > > - list_for_each_entry_safe(vma, next, &vm->rebind_list,
> > > - combined_links.rebind) {
> > > + list_for_each_entry(vma, &vm->rebind_list, combined_links.rebind)
> > > {
> > > xe_assert(vm->xe, vma->tile_present);
> > >
> > > - list_del_init(&vma->combined_links.rebind);
> > > if (rebind_worker)
> > > trace_xe_vma_rebind_worker(vma);
> > > else
> > > trace_xe_vma_rebind_exec(vma);
> > > - fence = xe_vm_bind_vma(vma, NULL, NULL, 0, false, false);
> > > - if (IS_ERR(fence))
> > > - return PTR_ERR(fence);
> > > +
> > > + err = xe_vm_ops_add_rebind(&vops, vma,
> > > + vma->tile_present);
> > > + if (err)
> > > + goto free_ops;
> > > + }
> > > +
> > > + fence = ops_execute(vm, &vops, false);
> > > + if (IS_ERR(fence)) {
> > > + err = PTR_ERR(fence);
> >
> > So here, if above ops_execute partially succeed (some vma bind failed,
> some succeed), for those vmas which are successfully bound, it is kept in the
> vm's rebind_list. Is this the correct behavior? Next time we will rebind them
> again....
> >
>
> The VM is killed if any VMA ops fails so it doesn't really matter, also
> it safe to issue a rebind twice.
>
> In the follow up series, once we have 1 job per the rebind list we can
> cope with errors and not kill the VM. In that case we must leave
> everything on the rebind list.
>
> So this patch is correct now and for the follow on series.
I see. Patch is:
Reviewed-by: Oak Zeng <oak.zeng at intel.com>
>
> Matt
>
> >
> > Oak
> >
> >
> > > + } else {
> > > dma_fence_put(fence);
> > > + list_for_each_entry_safe(vma, next, &vm->rebind_list,
> > > + combined_links.rebind)
> > > + list_del_init(&vma->combined_links.rebind);
> > > + }
> > > +free_ops:
> > > + list_for_each_entry_safe(op, next_op, &vops.list, link) {
> > > + list_del(&op->link);
> > > + kfree(op);
> > > }
> > >
> > > - return 0;
> > > + return err;
> > > }
> > >
> > > static void xe_vma_free(struct xe_vma *vma)
> > > @@ -2516,7 +2566,7 @@ static struct dma_fence *op_execute(struct
> xe_vm
> > > *vm, struct xe_vma *vma,
> > > {
> > > struct dma_fence *fence = NULL;
> > >
> > > - lockdep_assert_held_write(&vm->lock);
> > > + lockdep_assert_held(&vm->lock);
> > >
> > > xe_vm_assert_held(vm);
> > > xe_bo_assert_held(xe_vma_bo(vma));
> > > @@ -2635,7 +2685,7 @@ xe_vma_op_execute(struct xe_vm *vm, struct
> > > xe_vma_op *op)
> > > {
> > > struct dma_fence *fence = ERR_PTR(-ENOMEM);
> > >
> > > - lockdep_assert_held_write(&vm->lock);
> > > + lockdep_assert_held(&vm->lock);
> > >
> > > switch (op->base.op) {
> > > case DRM_GPUVA_OP_MAP:
> > > --
> > > 2.34.1
> >
More information about the Intel-xe
mailing list