[PATCH v4 05/30] drm/xe: Update xe_vm_rebind to use dummy VMA operations

Fri Mar 22 22:51:31 UTC 2024

On Fri, Mar 22, 2024 at 03:23:08PM -0600, Zeng, Oak wrote:
> 
> 
> > -----Original Message-----
> > From: Intel-xe <intel-xe-bounces at lists.freedesktop.org> On Behalf Of Matthew
> > Brost
> > Sent: Friday, March 8, 2024 12:08 AM
> > To: intel-xe at lists.freedesktop.org
> > Cc: Brost, Matthew <matthew.brost at intel.com>
> > Subject: [PATCH v4 05/30] drm/xe: Update xe_vm_rebind to use dummy VMA
> > operations
> > 
> > All bind interfaces are transitioning to use VMA ops, update
> > xe_vm_rebind to use VMA ops.
> > 
> > Signed-off-by: Matthew Brost <matthew.brost at intel.com>
> > ---
> >  drivers/gpu/drm/xe/xe_vm.c       |  47 +++++---
> >  drivers/gpu/drm/xe/xe_vm_types.h | 189 ++++++++++++++++---------------
> >  2 files changed, 132 insertions(+), 104 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
> > index e342af6b51b1..0bb807c05d7b 100644
> > --- a/drivers/gpu/drm/xe/xe_vm.c
> > +++ b/drivers/gpu/drm/xe/xe_vm.c
> > @@ -755,10 +755,22 @@ int xe_vm_userptr_check_repin(struct xe_vm *vm)
> >  		list_empty_careful(&vm->userptr.invalidated)) ? 0 : -EAGAIN;
> >  }
> > 
> > -static struct dma_fence *
> > -xe_vm_bind_vma(struct xe_vma *vma, struct xe_exec_queue *q,
> > -	       struct xe_sync_entry *syncs, u32 num_syncs,
> > -	       bool first_op, bool last_op);
> > +static void xe_vm_populate_dummy_rebind(struct xe_vm *vm, struct xe_vma
> > *vma)
> > +{
> > +	vm->dummy_ops.op.base.op = DRM_GPUVA_OP_MAP;
> > +	vm->dummy_ops.op.base.map.va.addr = vma->gpuva.va.addr;
> > +	vm->dummy_ops.op.base.map.va.range = vma->gpuva.va.range;
> > +	vm->dummy_ops.op.base.map.gem.obj = vma->gpuva.gem.obj;
> > +	vm->dummy_ops.op.base.map.gem.offset = vma->gpuva.gem.offset;
> > +	vm->dummy_ops.op.map.vma = vma;
> > +	vm->dummy_ops.op.map.immediate = true;
> > +	vm->dummy_ops.op.map.dumpable = vma->gpuva.flags &
> > XE_VMA_DUMPABLE;
> > +	vm->dummy_ops.op.map.is_null = xe_vma_is_null(vma);
> > +}
> > +
> > +static struct dma_fence *ops_execute(struct xe_vm *vm,
> > +				     struct xe_vma_ops *vops,
> > +				     bool cleanup);
> > 
> >  struct dma_fence *xe_vm_rebind(struct xe_vm *vm, bool rebind_worker)
> >  {
> > @@ -780,7 +792,9 @@ struct dma_fence *xe_vm_rebind(struct xe_vm *vm,
> > bool rebind_worker)
> >  			trace_xe_vma_rebind_worker(vma);
> >  		else
> >  			trace_xe_vma_rebind_exec(vma);
> > -		fence = xe_vm_bind_vma(vma, NULL, NULL, 0, false, false);
> > +
> > +		xe_vm_populate_dummy_rebind(vm, vma);
> > +		fence = ops_execute(vm, &vm->dummy_ops.vops, false);
> >  		if (IS_ERR(fence))
> >  			return fence;
> >  	}
> > @@ -1289,6 +1303,11 @@ static void xe_vm_free_scratch(struct xe_vm *vm)
> >  	}
> >  }
> > 
> > +static void xe_vma_ops_init(struct xe_vma_ops *vops)
> > +{
> > +	INIT_LIST_HEAD(&vops->list);
> > +}
> 
> this already showed up on patch 4... you just add it in patch5, then moved it to another location on patch 5...
> 
> can this be better organized?
> 

Yes.

> 
> > +
> >  struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags)
> >  {
> >  	struct drm_gem_object *vm_resv_obj;
> > @@ -1310,6 +1329,10 @@ struct xe_vm *xe_vm_create(struct xe_device *xe,
> > u32 flags)
> >  	init_rwsem(&vm->lock);
> >  	mutex_init(&vm->snap_mutex);
> > 
> > +	xe_vma_ops_init(&vm->dummy_ops.vops);
> > +	INIT_LIST_HEAD(&vm->dummy_ops.op.link);
> > +	list_add(&vm->dummy_ops.op.link, &vm->dummy_ops.vops.list);
> > +
> >  	INIT_LIST_HEAD(&vm->rebind_list);
> > 
> >  	INIT_LIST_HEAD(&vm->userptr.repin_list);
> > @@ -2140,6 +2163,7 @@ vm_bind_ioctl_ops_create(struct xe_vm *vm, struct
> > xe_bo *bo,
> >  		struct xe_vma_op *op = gpuva_op_to_vma_op(__op);
> > 
> >  		if (__op->op == DRM_GPUVA_OP_MAP) {
> > +			op->map.immediate = !xe_vm_in_fault_mode(vm);
> >  			op->map.is_null = flags &
> > DRM_XE_VM_BIND_FLAG_NULL;
> >  			op->map.dumpable = flags &
> > DRM_XE_VM_BIND_FLAG_DUMPABLE;
> >  			op->map.pat_index = pat_index;
> > @@ -2465,7 +2489,7 @@ static struct dma_fence *op_execute(struct xe_vm
> > *vm, struct xe_vma *vma,
> >  {
> >  	struct dma_fence *fence = NULL;
> > 
> > -	lockdep_assert_held_write(&vm->lock);
> > +	lockdep_assert_held(&vm->lock);
> >  	xe_vm_assert_held(vm);
> >  	xe_bo_assert_held(xe_vma_bo(vma));
> > 
> > @@ -2473,7 +2497,7 @@ static struct dma_fence *op_execute(struct xe_vm
> > *vm, struct xe_vma *vma,
> >  	case DRM_GPUVA_OP_MAP:
> >  		fence = xe_vm_bind(vm, vma, op->q, xe_vma_bo(vma),
> >  				   op->syncs, op->num_syncs,
> > -				   !xe_vm_in_fault_mode(vm),
> > +				   op->map.immediate,
> >  				   op->flags & XE_VMA_OP_FIRST,
> >  				   op->flags & XE_VMA_OP_LAST);
> >  		break;
> > @@ -2554,7 +2578,7 @@ __xe_vma_op_execute(struct xe_vm *vm, struct
> > xe_vma *vma,
> >  retry_userptr:
> >  	fence = op_execute(vm, vma, op);
> >  	if (IS_ERR(fence) && PTR_ERR(fence) == -EAGAIN) {
> > -		lockdep_assert_held_write(&vm->lock);
> > +		lockdep_assert_held(&vm->lock);
> > 
> >  		if (op->base.op == DRM_GPUVA_OP_REMAP) {
> >  			if (!op->remap.unmap_done)
> > @@ -2583,7 +2607,7 @@ xe_vma_op_execute(struct xe_vm *vm, struct
> > xe_vma_op *op)
> >  {
> >  	struct dma_fence *fence = ERR_PTR(-ENOMEM);
> > 
> > -	lockdep_assert_held_write(&vm->lock);
> > +	lockdep_assert_held(&vm->lock);
> > 
> >  	switch (op->base.op) {
> >  	case DRM_GPUVA_OP_MAP:
> > @@ -2992,11 +3016,6 @@ static int vm_bind_ioctl_signal_fences(struct xe_vm
> > *vm,
> >  	return err;
> >  }
> > 
> > -static void xe_vma_ops_init(struct xe_vma_ops *vops)
> > -{
> > -	INIT_LIST_HEAD(&vops->list);
> > -}
> > -
> >  int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
> >  {
> >  	struct xe_device *xe = to_xe_device(dev);
> > diff --git a/drivers/gpu/drm/xe/xe_vm_types.h
> > b/drivers/gpu/drm/xe/xe_vm_types.h
> > index cc3dce893f1e..7ef9e632154a 100644
> > --- a/drivers/gpu/drm/xe/xe_vm_types.h
> > +++ b/drivers/gpu/drm/xe/xe_vm_types.h
> > @@ -18,6 +18,7 @@
> >  #include "xe_range_fence.h"
> > 
> >  struct xe_bo;
> > +struct xe_device;
> >  struct xe_sync_entry;
> >  struct xe_user_fence;
> >  struct xe_vm;
> > @@ -124,7 +125,96 @@ struct xe_userptr_vma {
> >  	struct xe_userptr userptr;
> >  };
> > 
> > -struct xe_device;
> > +/** struct xe_vma_op_map - VMA map operation */
> > +struct xe_vma_op_map {
> > +	/** @vma: VMA to map */
> > +	struct xe_vma *vma;
> > +	/** @immediate: Immediate bind */
> > +	bool immediate;
> > +	/** @is_null: is NULL binding */
> > +	bool is_null;
> > +	/** @dumpable: whether BO is dumped on GPU hang */
> > +	bool dumpable;
> > +	/** @pat_index: The pat index to use for this operation. */
> > +	u16 pat_index;
> > +};
> > +
> > +/** struct xe_vma_op_remap - VMA remap operation */
> > +struct xe_vma_op_remap {
> > +	/** @prev: VMA preceding part of a split mapping */
> > +	struct xe_vma *prev;
> > +	/** @next: VMA subsequent part of a split mapping */
> > +	struct xe_vma *next;
> > +	/** @start: start of the VMA unmap */
> > +	u64 start;
> > +	/** @range: range of the VMA unmap */
> > +	u64 range;
> > +	/** @skip_prev: skip prev rebind */
> > +	bool skip_prev;
> > +	/** @skip_next: skip next rebind */
> > +	bool skip_next;
> > +	/** @unmap_done: unmap operation in done */
> > +	bool unmap_done;
> > +};
> > +
> > +/** struct xe_vma_op_prefetch - VMA prefetch operation */
> > +struct xe_vma_op_prefetch {
> > +	/** @region: memory region to prefetch to */
> > +	u32 region;
> > +};
> > +
> > +/** enum xe_vma_op_flags - flags for VMA operation */
> > +enum xe_vma_op_flags {
> > +	/** @XE_VMA_OP_FIRST: first VMA operation for a set of syncs */
> > +	XE_VMA_OP_FIRST			= BIT(0),
> > +	/** @XE_VMA_OP_LAST: last VMA operation for a set of syncs */
> > +	XE_VMA_OP_LAST			= BIT(1),
> > +	/** @XE_VMA_OP_COMMITTED: VMA operation committed */
> > +	XE_VMA_OP_COMMITTED		= BIT(2),
> > +	/** @XE_VMA_OP_PREV_COMMITTED: Previous VMA operation
> > committed */
> > +	XE_VMA_OP_PREV_COMMITTED	= BIT(3),
> > +	/** @XE_VMA_OP_NEXT_COMMITTED: Next VMA operation committed
> > */
> > +	XE_VMA_OP_NEXT_COMMITTED	= BIT(4),
> > +};
> > +
> > +/** struct xe_vma_op - VMA operation */
> > +struct xe_vma_op {
> > +	/** @base: GPUVA base operation */
> > +	struct drm_gpuva_op base;
> > +	/**
> > +	 * @ops: GPUVA ops, when set call drm_gpuva_ops_free after this
> > +	 * operations is processed
> > +	 */
> > +	struct drm_gpuva_ops *ops;
> > +	/** @q: exec queue for this operation */
> > +	struct xe_exec_queue *q;
> > +	/**
> > +	 * @syncs: syncs for this operation, only used on first and last
> > +	 * operation
> > +	 */
> > +	struct xe_sync_entry *syncs;
> > +	/** @num_syncs: number of syncs */
> > +	u32 num_syncs;
> > +	/** @link: async operation link */
> > +	struct list_head link;
> > +	/** @flags: operation flags */
> > +	enum xe_vma_op_flags flags;
> > +
> > +	union {
> > +		/** @map: VMA map operation specific data */
> > +		struct xe_vma_op_map map;
> > +		/** @remap: VMA remap operation specific data */
> > +		struct xe_vma_op_remap remap;
> > +		/** @prefetch: VMA prefetch operation specific data */
> > +		struct xe_vma_op_prefetch prefetch;
> > +	};
> > +};
> > +
> > +/** struct xe_vma_ops - VMA operations */
> > +struct xe_vma_ops {
> > +	/** @list: list of VMA operations */
> > +	struct list_head list;
> > +};
> 
> this already showed up on patch 4... you just add it in patch5, then moved it to another location on patch 5...
> 

Yes.

> > 
> >  struct xe_vm {
> >  	/** @gpuvm: base GPUVM used to track VMAs */
> > @@ -267,99 +357,18 @@ struct xe_vm {
> >  		bool capture_once;
> >  	} error_capture;
> > 
> > +	/** @dummy_ops: dummy VMA ops to issue rebinds */
> > +	struct {
> > +		/** @dummy_ops.ops: dummy VMA ops */
> > +		struct xe_vma_ops vops;
> > +		/** @dummy_ops.op: dummy VMA op */
> > +		struct xe_vma_op op;
> > +	} dummy_ops;
> 
> If only from this patch, it seems you don't have to introduce this dummy_ops member to xe_vm. For example, it can be a local variable in xe_vm_rebind function. But I will keep looking. Maybe you made it this way for future patches.
> 
>

I'm going to rewrite or already have rewritten this to use local
xe_vm_ops member and execute all rebinds an atomic unit.

You can ignore this patch and also [1] in this rev of the review.

[1] https://patchwork.freedesktop.org/patch/582015/?series=125608&rev=5

> > +
> >  	/** @batch_invalidate_tlb: Always invalidate TLB before batch start */
> >  	bool batch_invalidate_tlb;
> >  	/** @xef: XE file handle for tracking this VM's drm client */
> >  	struct xe_file *xef;
> >  };
> > 
> > -/** struct xe_vma_op_map - VMA map operation */
> > -struct xe_vma_op_map {
> > -	/** @vma: VMA to map */
> > -	struct xe_vma *vma;
> > -	/** @is_null: is NULL binding */
> > -	bool is_null;
> > -	/** @dumpable: whether BO is dumped on GPU hang */
> > -	bool dumpable;
> > -	/** @pat_index: The pat index to use for this operation. */
> > -	u16 pat_index;
> > -};
> > -
> > -/** struct xe_vma_op_remap - VMA remap operation */
> > -struct xe_vma_op_remap {
> > -	/** @prev: VMA preceding part of a split mapping */
> > -	struct xe_vma *prev;
> > -	/** @next: VMA subsequent part of a split mapping */
> > -	struct xe_vma *next;
> > -	/** @start: start of the VMA unmap */
> > -	u64 start;
> > -	/** @range: range of the VMA unmap */
> > -	u64 range;
> > -	/** @skip_prev: skip prev rebind */
> > -	bool skip_prev;
> > -	/** @skip_next: skip next rebind */
> > -	bool skip_next;
> > -	/** @unmap_done: unmap operation in done */
> > -	bool unmap_done;
> > -};
> > -
> > -/** struct xe_vma_op_prefetch - VMA prefetch operation */
> > -struct xe_vma_op_prefetch {
> > -	/** @region: memory region to prefetch to */
> > -	u32 region;
> > -};
> > -
> > -/** enum xe_vma_op_flags - flags for VMA operation */
> > -enum xe_vma_op_flags {
> > -	/** @XE_VMA_OP_FIRST: first VMA operation for a set of syncs */
> > -	XE_VMA_OP_FIRST			= BIT(0),
> > -	/** @XE_VMA_OP_LAST: last VMA operation for a set of syncs */
> > -	XE_VMA_OP_LAST			= BIT(1),
> > -	/** @XE_VMA_OP_COMMITTED: VMA operation committed */
> > -	XE_VMA_OP_COMMITTED		= BIT(2),
> > -	/** @XE_VMA_OP_PREV_COMMITTED: Previous VMA operation
> > committed */
> > -	XE_VMA_OP_PREV_COMMITTED	= BIT(3),
> > -	/** @XE_VMA_OP_NEXT_COMMITTED: Next VMA operation committed
> > */
> > -	XE_VMA_OP_NEXT_COMMITTED	= BIT(4),
> > -};
> > -
> > -/** struct xe_vma_op - VMA operation */
> > -struct xe_vma_op {
> > -	/** @base: GPUVA base operation */
> > -	struct drm_gpuva_op base;
> > -	/**
> > -	 * @ops: GPUVA ops, when set call drm_gpuva_ops_free after this
> > -	 * operations is processed
> > -	 */
> > -	struct drm_gpuva_ops *ops;
> > -	/** @q: exec queue for this operation */
> > -	struct xe_exec_queue *q;
> > -	/**
> > -	 * @syncs: syncs for this operation, only used on first and last
> > -	 * operation
> > -	 */
> > -	struct xe_sync_entry *syncs;
> > -	/** @num_syncs: number of syncs */
> > -	u32 num_syncs;
> > -	/** @link: async operation link */
> > -	struct list_head link;
> > -	/** @flags: operation flags */
> > -	enum xe_vma_op_flags flags;
> > -
> > -	union {
> > -		/** @map: VMA map operation specific data */
> > -		struct xe_vma_op_map map;
> > -		/** @remap: VMA remap operation specific data */
> > -		struct xe_vma_op_remap remap;
> > -		/** @prefetch: VMA prefetch operation specific data */
> > -		struct xe_vma_op_prefetch prefetch;
> > -	};
> > -};
> > -
> > -/** struct xe_vma_ops - VMA operations */
> > -struct xe_vma_ops {
> > -	/** @list: list of VMA operations */
> > -	struct list_head list;
> > -};
> 
> It seems you moved a block of codes to another location. It caused more work for code review. Better to avoid this if we can.
> 

See above, with my refactor dummy binds and this moving this is not
required.

Matt

> Oak
> 
> > -
> >  #endif
> > --
> > 2.34.1
>