[PATCH v4 05/30] drm/xe: Update xe_vm_rebind to use dummy VMA operations
Matthew Brost
matthew.brost at intel.com
Fri Mar 22 22:51:31 UTC 2024
On Fri, Mar 22, 2024 at 03:23:08PM -0600, Zeng, Oak wrote:
>
>
> > -----Original Message-----
> > From: Intel-xe <intel-xe-bounces at lists.freedesktop.org> On Behalf Of Matthew
> > Brost
> > Sent: Friday, March 8, 2024 12:08 AM
> > To: intel-xe at lists.freedesktop.org
> > Cc: Brost, Matthew <matthew.brost at intel.com>
> > Subject: [PATCH v4 05/30] drm/xe: Update xe_vm_rebind to use dummy VMA
> > operations
> >
> > All bind interfaces are transitioning to use VMA ops, update
> > xe_vm_rebind to use VMA ops.
> >
> > Signed-off-by: Matthew Brost <matthew.brost at intel.com>
> > ---
> > drivers/gpu/drm/xe/xe_vm.c | 47 +++++---
> > drivers/gpu/drm/xe/xe_vm_types.h | 189 ++++++++++++++++---------------
> > 2 files changed, 132 insertions(+), 104 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
> > index e342af6b51b1..0bb807c05d7b 100644
> > --- a/drivers/gpu/drm/xe/xe_vm.c
> > +++ b/drivers/gpu/drm/xe/xe_vm.c
> > @@ -755,10 +755,22 @@ int xe_vm_userptr_check_repin(struct xe_vm *vm)
> > list_empty_careful(&vm->userptr.invalidated)) ? 0 : -EAGAIN;
> > }
> >
> > -static struct dma_fence *
> > -xe_vm_bind_vma(struct xe_vma *vma, struct xe_exec_queue *q,
> > - struct xe_sync_entry *syncs, u32 num_syncs,
> > - bool first_op, bool last_op);
> > +static void xe_vm_populate_dummy_rebind(struct xe_vm *vm, struct xe_vma
> > *vma)
> > +{
> > + vm->dummy_ops.op.base.op = DRM_GPUVA_OP_MAP;
> > + vm->dummy_ops.op.base.map.va.addr = vma->gpuva.va.addr;
> > + vm->dummy_ops.op.base.map.va.range = vma->gpuva.va.range;
> > + vm->dummy_ops.op.base.map.gem.obj = vma->gpuva.gem.obj;
> > + vm->dummy_ops.op.base.map.gem.offset = vma->gpuva.gem.offset;
> > + vm->dummy_ops.op.map.vma = vma;
> > + vm->dummy_ops.op.map.immediate = true;
> > + vm->dummy_ops.op.map.dumpable = vma->gpuva.flags &
> > XE_VMA_DUMPABLE;
> > + vm->dummy_ops.op.map.is_null = xe_vma_is_null(vma);
> > +}
> > +
> > +static struct dma_fence *ops_execute(struct xe_vm *vm,
> > + struct xe_vma_ops *vops,
> > + bool cleanup);
> >
> > struct dma_fence *xe_vm_rebind(struct xe_vm *vm, bool rebind_worker)
> > {
> > @@ -780,7 +792,9 @@ struct dma_fence *xe_vm_rebind(struct xe_vm *vm,
> > bool rebind_worker)
> > trace_xe_vma_rebind_worker(vma);
> > else
> > trace_xe_vma_rebind_exec(vma);
> > - fence = xe_vm_bind_vma(vma, NULL, NULL, 0, false, false);
> > +
> > + xe_vm_populate_dummy_rebind(vm, vma);
> > + fence = ops_execute(vm, &vm->dummy_ops.vops, false);
> > if (IS_ERR(fence))
> > return fence;
> > }
> > @@ -1289,6 +1303,11 @@ static void xe_vm_free_scratch(struct xe_vm *vm)
> > }
> > }
> >
> > +static void xe_vma_ops_init(struct xe_vma_ops *vops)
> > +{
> > + INIT_LIST_HEAD(&vops->list);
> > +}
>
> this already showed up on patch 4... you just add it in patch5, then moved it to another location on patch 5...
>
> can this be better organized?
>
Yes.
>
> > +
> > struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags)
> > {
> > struct drm_gem_object *vm_resv_obj;
> > @@ -1310,6 +1329,10 @@ struct xe_vm *xe_vm_create(struct xe_device *xe,
> > u32 flags)
> > init_rwsem(&vm->lock);
> > mutex_init(&vm->snap_mutex);
> >
> > + xe_vma_ops_init(&vm->dummy_ops.vops);
> > + INIT_LIST_HEAD(&vm->dummy_ops.op.link);
> > + list_add(&vm->dummy_ops.op.link, &vm->dummy_ops.vops.list);
> > +
> > INIT_LIST_HEAD(&vm->rebind_list);
> >
> > INIT_LIST_HEAD(&vm->userptr.repin_list);
> > @@ -2140,6 +2163,7 @@ vm_bind_ioctl_ops_create(struct xe_vm *vm, struct
> > xe_bo *bo,
> > struct xe_vma_op *op = gpuva_op_to_vma_op(__op);
> >
> > if (__op->op == DRM_GPUVA_OP_MAP) {
> > + op->map.immediate = !xe_vm_in_fault_mode(vm);
> > op->map.is_null = flags &
> > DRM_XE_VM_BIND_FLAG_NULL;
> > op->map.dumpable = flags &
> > DRM_XE_VM_BIND_FLAG_DUMPABLE;
> > op->map.pat_index = pat_index;
> > @@ -2465,7 +2489,7 @@ static struct dma_fence *op_execute(struct xe_vm
> > *vm, struct xe_vma *vma,
> > {
> > struct dma_fence *fence = NULL;
> >
> > - lockdep_assert_held_write(&vm->lock);
> > + lockdep_assert_held(&vm->lock);
> > xe_vm_assert_held(vm);
> > xe_bo_assert_held(xe_vma_bo(vma));
> >
> > @@ -2473,7 +2497,7 @@ static struct dma_fence *op_execute(struct xe_vm
> > *vm, struct xe_vma *vma,
> > case DRM_GPUVA_OP_MAP:
> > fence = xe_vm_bind(vm, vma, op->q, xe_vma_bo(vma),
> > op->syncs, op->num_syncs,
> > - !xe_vm_in_fault_mode(vm),
> > + op->map.immediate,
> > op->flags & XE_VMA_OP_FIRST,
> > op->flags & XE_VMA_OP_LAST);
> > break;
> > @@ -2554,7 +2578,7 @@ __xe_vma_op_execute(struct xe_vm *vm, struct
> > xe_vma *vma,
> > retry_userptr:
> > fence = op_execute(vm, vma, op);
> > if (IS_ERR(fence) && PTR_ERR(fence) == -EAGAIN) {
> > - lockdep_assert_held_write(&vm->lock);
> > + lockdep_assert_held(&vm->lock);
> >
> > if (op->base.op == DRM_GPUVA_OP_REMAP) {
> > if (!op->remap.unmap_done)
> > @@ -2583,7 +2607,7 @@ xe_vma_op_execute(struct xe_vm *vm, struct
> > xe_vma_op *op)
> > {
> > struct dma_fence *fence = ERR_PTR(-ENOMEM);
> >
> > - lockdep_assert_held_write(&vm->lock);
> > + lockdep_assert_held(&vm->lock);
> >
> > switch (op->base.op) {
> > case DRM_GPUVA_OP_MAP:
> > @@ -2992,11 +3016,6 @@ static int vm_bind_ioctl_signal_fences(struct xe_vm
> > *vm,
> > return err;
> > }
> >
> > -static void xe_vma_ops_init(struct xe_vma_ops *vops)
> > -{
> > - INIT_LIST_HEAD(&vops->list);
> > -}
> > -
> > int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
> > {
> > struct xe_device *xe = to_xe_device(dev);
> > diff --git a/drivers/gpu/drm/xe/xe_vm_types.h
> > b/drivers/gpu/drm/xe/xe_vm_types.h
> > index cc3dce893f1e..7ef9e632154a 100644
> > --- a/drivers/gpu/drm/xe/xe_vm_types.h
> > +++ b/drivers/gpu/drm/xe/xe_vm_types.h
> > @@ -18,6 +18,7 @@
> > #include "xe_range_fence.h"
> >
> > struct xe_bo;
> > +struct xe_device;
> > struct xe_sync_entry;
> > struct xe_user_fence;
> > struct xe_vm;
> > @@ -124,7 +125,96 @@ struct xe_userptr_vma {
> > struct xe_userptr userptr;
> > };
> >
> > -struct xe_device;
> > +/** struct xe_vma_op_map - VMA map operation */
> > +struct xe_vma_op_map {
> > + /** @vma: VMA to map */
> > + struct xe_vma *vma;
> > + /** @immediate: Immediate bind */
> > + bool immediate;
> > + /** @is_null: is NULL binding */
> > + bool is_null;
> > + /** @dumpable: whether BO is dumped on GPU hang */
> > + bool dumpable;
> > + /** @pat_index: The pat index to use for this operation. */
> > + u16 pat_index;
> > +};
> > +
> > +/** struct xe_vma_op_remap - VMA remap operation */
> > +struct xe_vma_op_remap {
> > + /** @prev: VMA preceding part of a split mapping */
> > + struct xe_vma *prev;
> > + /** @next: VMA subsequent part of a split mapping */
> > + struct xe_vma *next;
> > + /** @start: start of the VMA unmap */
> > + u64 start;
> > + /** @range: range of the VMA unmap */
> > + u64 range;
> > + /** @skip_prev: skip prev rebind */
> > + bool skip_prev;
> > + /** @skip_next: skip next rebind */
> > + bool skip_next;
> > + /** @unmap_done: unmap operation in done */
> > + bool unmap_done;
> > +};
> > +
> > +/** struct xe_vma_op_prefetch - VMA prefetch operation */
> > +struct xe_vma_op_prefetch {
> > + /** @region: memory region to prefetch to */
> > + u32 region;
> > +};
> > +
> > +/** enum xe_vma_op_flags - flags for VMA operation */
> > +enum xe_vma_op_flags {
> > + /** @XE_VMA_OP_FIRST: first VMA operation for a set of syncs */
> > + XE_VMA_OP_FIRST = BIT(0),
> > + /** @XE_VMA_OP_LAST: last VMA operation for a set of syncs */
> > + XE_VMA_OP_LAST = BIT(1),
> > + /** @XE_VMA_OP_COMMITTED: VMA operation committed */
> > + XE_VMA_OP_COMMITTED = BIT(2),
> > + /** @XE_VMA_OP_PREV_COMMITTED: Previous VMA operation
> > committed */
> > + XE_VMA_OP_PREV_COMMITTED = BIT(3),
> > + /** @XE_VMA_OP_NEXT_COMMITTED: Next VMA operation committed
> > */
> > + XE_VMA_OP_NEXT_COMMITTED = BIT(4),
> > +};
> > +
> > +/** struct xe_vma_op - VMA operation */
> > +struct xe_vma_op {
> > + /** @base: GPUVA base operation */
> > + struct drm_gpuva_op base;
> > + /**
> > + * @ops: GPUVA ops, when set call drm_gpuva_ops_free after this
> > + * operations is processed
> > + */
> > + struct drm_gpuva_ops *ops;
> > + /** @q: exec queue for this operation */
> > + struct xe_exec_queue *q;
> > + /**
> > + * @syncs: syncs for this operation, only used on first and last
> > + * operation
> > + */
> > + struct xe_sync_entry *syncs;
> > + /** @num_syncs: number of syncs */
> > + u32 num_syncs;
> > + /** @link: async operation link */
> > + struct list_head link;
> > + /** @flags: operation flags */
> > + enum xe_vma_op_flags flags;
> > +
> > + union {
> > + /** @map: VMA map operation specific data */
> > + struct xe_vma_op_map map;
> > + /** @remap: VMA remap operation specific data */
> > + struct xe_vma_op_remap remap;
> > + /** @prefetch: VMA prefetch operation specific data */
> > + struct xe_vma_op_prefetch prefetch;
> > + };
> > +};
> > +
> > +/** struct xe_vma_ops - VMA operations */
> > +struct xe_vma_ops {
> > + /** @list: list of VMA operations */
> > + struct list_head list;
> > +};
>
> this already showed up on patch 4... you just add it in patch5, then moved it to another location on patch 5...
>
Yes.
> >
> > struct xe_vm {
> > /** @gpuvm: base GPUVM used to track VMAs */
> > @@ -267,99 +357,18 @@ struct xe_vm {
> > bool capture_once;
> > } error_capture;
> >
> > + /** @dummy_ops: dummy VMA ops to issue rebinds */
> > + struct {
> > + /** @dummy_ops.ops: dummy VMA ops */
> > + struct xe_vma_ops vops;
> > + /** @dummy_ops.op: dummy VMA op */
> > + struct xe_vma_op op;
> > + } dummy_ops;
>
> If only from this patch, it seems you don't have to introduce this dummy_ops member to xe_vm. For example, it can be a local variable in xe_vm_rebind function. But I will keep looking. Maybe you made it this way for future patches.
>
>
I'm going to rewrite or already have rewritten this to use local
xe_vm_ops member and execute all rebinds an atomic unit.
You can ignore this patch and also [1] in this rev of the review.
[1] https://patchwork.freedesktop.org/patch/582015/?series=125608&rev=5
> > +
> > /** @batch_invalidate_tlb: Always invalidate TLB before batch start */
> > bool batch_invalidate_tlb;
> > /** @xef: XE file handle for tracking this VM's drm client */
> > struct xe_file *xef;
> > };
> >
> > -/** struct xe_vma_op_map - VMA map operation */
> > -struct xe_vma_op_map {
> > - /** @vma: VMA to map */
> > - struct xe_vma *vma;
> > - /** @is_null: is NULL binding */
> > - bool is_null;
> > - /** @dumpable: whether BO is dumped on GPU hang */
> > - bool dumpable;
> > - /** @pat_index: The pat index to use for this operation. */
> > - u16 pat_index;
> > -};
> > -
> > -/** struct xe_vma_op_remap - VMA remap operation */
> > -struct xe_vma_op_remap {
> > - /** @prev: VMA preceding part of a split mapping */
> > - struct xe_vma *prev;
> > - /** @next: VMA subsequent part of a split mapping */
> > - struct xe_vma *next;
> > - /** @start: start of the VMA unmap */
> > - u64 start;
> > - /** @range: range of the VMA unmap */
> > - u64 range;
> > - /** @skip_prev: skip prev rebind */
> > - bool skip_prev;
> > - /** @skip_next: skip next rebind */
> > - bool skip_next;
> > - /** @unmap_done: unmap operation in done */
> > - bool unmap_done;
> > -};
> > -
> > -/** struct xe_vma_op_prefetch - VMA prefetch operation */
> > -struct xe_vma_op_prefetch {
> > - /** @region: memory region to prefetch to */
> > - u32 region;
> > -};
> > -
> > -/** enum xe_vma_op_flags - flags for VMA operation */
> > -enum xe_vma_op_flags {
> > - /** @XE_VMA_OP_FIRST: first VMA operation for a set of syncs */
> > - XE_VMA_OP_FIRST = BIT(0),
> > - /** @XE_VMA_OP_LAST: last VMA operation for a set of syncs */
> > - XE_VMA_OP_LAST = BIT(1),
> > - /** @XE_VMA_OP_COMMITTED: VMA operation committed */
> > - XE_VMA_OP_COMMITTED = BIT(2),
> > - /** @XE_VMA_OP_PREV_COMMITTED: Previous VMA operation
> > committed */
> > - XE_VMA_OP_PREV_COMMITTED = BIT(3),
> > - /** @XE_VMA_OP_NEXT_COMMITTED: Next VMA operation committed
> > */
> > - XE_VMA_OP_NEXT_COMMITTED = BIT(4),
> > -};
> > -
> > -/** struct xe_vma_op - VMA operation */
> > -struct xe_vma_op {
> > - /** @base: GPUVA base operation */
> > - struct drm_gpuva_op base;
> > - /**
> > - * @ops: GPUVA ops, when set call drm_gpuva_ops_free after this
> > - * operations is processed
> > - */
> > - struct drm_gpuva_ops *ops;
> > - /** @q: exec queue for this operation */
> > - struct xe_exec_queue *q;
> > - /**
> > - * @syncs: syncs for this operation, only used on first and last
> > - * operation
> > - */
> > - struct xe_sync_entry *syncs;
> > - /** @num_syncs: number of syncs */
> > - u32 num_syncs;
> > - /** @link: async operation link */
> > - struct list_head link;
> > - /** @flags: operation flags */
> > - enum xe_vma_op_flags flags;
> > -
> > - union {
> > - /** @map: VMA map operation specific data */
> > - struct xe_vma_op_map map;
> > - /** @remap: VMA remap operation specific data */
> > - struct xe_vma_op_remap remap;
> > - /** @prefetch: VMA prefetch operation specific data */
> > - struct xe_vma_op_prefetch prefetch;
> > - };
> > -};
> > -
> > -/** struct xe_vma_ops - VMA operations */
> > -struct xe_vma_ops {
> > - /** @list: list of VMA operations */
> > - struct list_head list;
> > -};
>
> It seems you moved a block of codes to another location. It caused more work for code review. Better to avoid this if we can.
>
See above, with my refactor dummy binds and this moving this is not
required.
Matt
> Oak
>
> > -
> > #endif
> > --
> > 2.34.1
>
More information about the Intel-xe
mailing list