[PATCH 07/13] drm/xe: Use xe_vma_ops to implement page fault rebinds

Zeng, Oak oak.zeng at intel.com
Tue Apr 23 03:27:46 UTC 2024



> -----Original Message-----
> From: Brost, Matthew <matthew.brost at intel.com>
> Sent: Friday, April 19, 2024 3:34 PM
> To: Zeng, Oak <oak.zeng at intel.com>
> Cc: intel-xe at lists.freedesktop.org
> Subject: Re: [PATCH 07/13] drm/xe: Use xe_vma_ops to implement page
> fault rebinds
> 
> On Fri, Apr 19, 2024 at 08:22:29AM -0600, Zeng, Oak wrote:
> >
> >
> > > -----Original Message-----
> > > From: Intel-xe <intel-xe-bounces at lists.freedesktop.org> On Behalf Of
> > > Matthew Brost
> > > Sent: Wednesday, April 10, 2024 1:41 AM
> > > To: intel-xe at lists.freedesktop.org
> > > Cc: Brost, Matthew <matthew.brost at intel.com>
> > > Subject: [PATCH 07/13] drm/xe: Use xe_vma_ops to implement page
> fault
> > > rebinds
> > >
> > > All page tables updates are moving to a xe_vma_ops interface to
> > > implement 1 job per VM bind IOCTL.
> >
> > Can you explain why using xe_vma_ops interface is necessary even to bind
> one vma? I understand it make sense to use this interface to bind multiple
> vmas. See also below
> >
> 
> Essentially once we switch to 1 bind per IOCTL [1] xe_vma_ops is passed
> around throughout all the layers. The xe_vma_ops list a single atomic
> unit for updating the GPUVA state, internal PT, and GPU page tables. If
> at point something fails, xe_vma_ops can be unwound restoring all the
> original state.


Ok, that make sense to me. Maybe explain this a little in the commit message? The current commit message doesn't speak why we are moving to xe_vma_ops interface. Anyway, Patch is:

Reviewed-by: Oak Zeng <oak.zeng at intel.com>

> 
> i.e. __xe_pt_bind_vma is will be deleted and replaces with a function
> that accepts a xe_vma_ops list, ops_execute() is the correct place to
> hook into the software pipeline as we already have the locks and only
> internal PT and GPU page tables need to be updated.
> 
> [1] https://patchwork.freedesktop.org/patch/582024/?series=125608&rev=5
> 
> >
> >  Add xe_vma_rebind function which is
> > > implemented using xe_vma_ops interface. Use xe_vma_rebind in page
> > > faults
> > > for rebinds.
> > >
> > > Signed-off-by: Matthew Brost <matthew.brost at intel.com>
> > > ---
> > >  drivers/gpu/drm/xe/xe_gt_pagefault.c | 16 ++++----
> > >  drivers/gpu/drm/xe/xe_vm.c           | 57 +++++++++++++++++++++++----
> -
> > >  drivers/gpu/drm/xe/xe_vm.h           |  2 +
> > >  drivers/gpu/drm/xe/xe_vm_types.h     |  2 +
> > >  4 files changed, 58 insertions(+), 19 deletions(-)
> > >
> > > diff --git a/drivers/gpu/drm/xe/xe_gt_pagefault.c
> > > b/drivers/gpu/drm/xe/xe_gt_pagefault.c
> > > index fa9e9853c53b..040dd142c49c 100644
> > > --- a/drivers/gpu/drm/xe/xe_gt_pagefault.c
> > > +++ b/drivers/gpu/drm/xe/xe_gt_pagefault.c
> > > @@ -19,7 +19,6 @@
> > >  #include "xe_guc.h"
> > >  #include "xe_guc_ct.h"
> > >  #include "xe_migrate.h"
> > > -#include "xe_pt.h"
> > >  #include "xe_trace.h"
> > >  #include "xe_vm.h"
> > >
> > > @@ -204,15 +203,14 @@ static int handle_pagefault(struct xe_gt *gt,
> struct
> > > pagefault *pf)
> > >  		drm_exec_retry_on_contention(&exec);
> > >  		if (ret)
> > >  			goto unlock_dma_resv;
> > > -	}
> > >
> > > -	/* Bind VMA only to the GT that has faulted */
> > > -	trace_xe_vma_pf_bind(vma);
> > > -	fence = __xe_pt_bind_vma(tile, vma, xe_tile_migrate_engine(tile),
> > > NULL, 0,
> > > -				 vma->tile_present & BIT(tile->id));
> > > -	if (IS_ERR(fence)) {
> > > -		ret = PTR_ERR(fence);
> > > -		goto unlock_dma_resv;
> > > +		/* Bind VMA only to the GT that has faulted */
> > > +		trace_xe_vma_pf_bind(vma);
> > > +		fence = xe_vma_rebind(vm, vma, BIT(tile->id));
> > > +		if (IS_ERR(fence)) {
> > > +			ret = PTR_ERR(fence);
> > > +			goto unlock_dma_resv;
> > > +		}
> > >  	}
> > >
> > >  	/*
> > > diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
> > > index 8f5b24c8f6cd..54a69fbfbb00 100644
> > > --- a/drivers/gpu/drm/xe/xe_vm.c
> > > +++ b/drivers/gpu/drm/xe/xe_vm.c
> > > @@ -815,6 +815,7 @@ static void xe_vm_populate_rebind(struct
> > > xe_vma_op *op, struct xe_vma *vma,
> > >  				  u8 tile_mask)
> > >  {
> > >  	INIT_LIST_HEAD(&op->link);
> > > +	op->tile_mask = tile_mask;
> > >  	op->base.op = DRM_GPUVA_OP_MAP;
> > >  	op->base.map.va.addr = vma->gpuva.va.addr;
> > >  	op->base.map.va.range = vma->gpuva.va.range;
> > > @@ -893,6 +894,33 @@ int xe_vm_rebind(struct xe_vm *vm, bool
> > > rebind_worker)
> > >  	return err;
> > >  }
> > >
> > > +struct dma_fence *xe_vma_rebind(struct xe_vm *vm, struct xe_vma
> *vma,
> > > u8 tile_mask)
> >
> >
> > I try to figure out why this function is necessary. We are only binding one
> vma here. Why we need to create xe_vma_ops list? We are only adding one
> vma to this list....
> >
> 
> See above ability to directly modify page tables without a xe_vma_ops
> list will be removed.
> 
> Matt
> 
> > Oak
> >
> > > +{
> > > +	struct dma_fence *fence = NULL;
> > > +	struct xe_vma_ops vops;
> > > +	struct xe_vma_op *op, *next_op;
> > > +	int err;
> > > +
> > > +	lockdep_assert_held(&vm->lock);
> > > +	xe_vm_assert_held(vm);
> > > +	xe_assert(vm->xe, xe_vm_in_fault_mode(vm));
> > > +
> > > +	xe_vma_ops_init(&vops);
> > > +
> > > +	err = xe_vm_ops_add_rebind(&vops, vma, tile_mask);
> > > +	if (err)
> > > +		return ERR_PTR(err);
> > > +
> > > +	fence = ops_execute(vm, &vops);
> > > +
> > > +	list_for_each_entry_safe(op, next_op, &vops.list, link) {
> > > +		list_del(&op->link);
> > > +		kfree(op);
> > > +	}
> > > +
> > > +	return fence;
> > > +}
> > > +
> > >  static void xe_vma_free(struct xe_vma *vma)
> > >  {
> > >  	if (xe_vma_is_userptr(vma))
> > > @@ -1796,7 +1824,7 @@ xe_vm_unbind_vma(struct xe_vma *vma,
> struct
> > > xe_exec_queue *q,
> > >  static struct dma_fence *
> > >  xe_vm_bind_vma(struct xe_vma *vma, struct xe_exec_queue *q,
> > >  	       struct xe_sync_entry *syncs, u32 num_syncs,
> > > -	       bool first_op, bool last_op)
> > > +	       u8 tile_mask, bool first_op, bool last_op)
> > >  {
> > >  	struct xe_tile *tile;
> > >  	struct dma_fence *fence;
> > > @@ -1804,7 +1832,7 @@ xe_vm_bind_vma(struct xe_vma *vma, struct
> > > xe_exec_queue *q,
> > >  	struct dma_fence_array *cf = NULL;
> > >  	struct xe_vm *vm = xe_vma_vm(vma);
> > >  	int cur_fence = 0, i;
> > > -	int number_tiles = hweight8(vma->tile_mask);
> > > +	int number_tiles = hweight8(tile_mask);
> > >  	int err;
> > >  	u8 id;
> > >
> > > @@ -1818,7 +1846,7 @@ xe_vm_bind_vma(struct xe_vma *vma, struct
> > > xe_exec_queue *q,
> > >  	}
> > >
> > >  	for_each_tile(tile, vm->xe, id) {
> > > -		if (!(vma->tile_mask & BIT(id)))
> > > +		if (!(tile_mask & BIT(id)))
> > >  			goto next;
> > >
> > >  		fence = __xe_pt_bind_vma(tile, vma, q ? q : vm->q[id],
> > > @@ -1886,7 +1914,7 @@ find_ufence_get(struct xe_sync_entry *syncs,
> u32
> > > num_syncs)
> > >  static struct dma_fence *
> > >  xe_vm_bind(struct xe_vm *vm, struct xe_vma *vma, struct
> > > xe_exec_queue *q,
> > >  	   struct xe_bo *bo, struct xe_sync_entry *syncs, u32 num_syncs,
> > > -	   bool immediate, bool first_op, bool last_op)
> > > +	   u8 tile_mask, bool immediate, bool first_op, bool last_op)
> > >  {
> > >  	struct dma_fence *fence;
> > >  	struct xe_exec_queue *wait_exec_queue =
> > > to_wait_exec_queue(vm, q);
> > > @@ -1902,8 +1930,8 @@ xe_vm_bind(struct xe_vm *vm, struct xe_vma
> > > *vma, struct xe_exec_queue *q,
> > >  	vma->ufence = ufence ?: vma->ufence;
> > >
> > >  	if (immediate) {
> > > -		fence = xe_vm_bind_vma(vma, q, syncs, num_syncs,
> > > first_op,
> > > -				       last_op);
> > > +		fence = xe_vm_bind_vma(vma, q, syncs, num_syncs,
> > > tile_mask,
> > > +				       first_op, last_op);
> > >  		if (IS_ERR(fence))
> > >  			return fence;
> > >  	} else {
> > > @@ -2095,7 +2123,7 @@ xe_vm_prefetch(struct xe_vm *vm, struct
> xe_vma
> > > *vma,
> > >
> > >  	if (vma->tile_mask != (vma->tile_present & ~vma->tile_invalidated))
> > > {
> > >  		return xe_vm_bind(vm, vma, q, xe_vma_bo(vma), syncs,
> > > num_syncs,
> > > -				  true, first_op, last_op);
> > > +				  vma->tile_mask, true, first_op, last_op);
> > >  	} else {
> > >  		struct dma_fence *fence =
> > >  			xe_exec_queue_last_fence_get(wait_exec_queue,
> > > vm);
> > > @@ -2408,10 +2436,15 @@ static int vm_bind_ioctl_ops_parse(struct
> xe_vm
> > > *vm, struct xe_exec_queue *q,
> > >  	struct xe_device *xe = vm->xe;
> > >  	struct xe_vma_op *last_op = NULL;
> > >  	struct drm_gpuva_op *__op;
> > > +	struct xe_tile *tile;
> > > +	u8 id, tile_mask = 0;
> > >  	int err = 0;
> > >
> > >  	lockdep_assert_held_write(&vm->lock);
> > >
> > > +	for_each_tile(tile, vm->xe, id)
> > > +		tile_mask |= 0x1 << id;
> > > +
> > >  	drm_gpuva_for_each_op(__op, ops) {
> > >  		struct xe_vma_op *op = gpuva_op_to_vma_op(__op);
> > >  		struct xe_vma *vma;
> > > @@ -2428,6 +2461,7 @@ static int vm_bind_ioctl_ops_parse(struct
> xe_vm
> > > *vm, struct xe_exec_queue *q,
> > >  		}
> > >
> > >  		op->q = q;
> > > +		op->tile_mask = tile_mask;
> > >
> > >  		switch (op->base.op) {
> > >  		case DRM_GPUVA_OP_MAP:
> > > @@ -2574,6 +2608,7 @@ static struct dma_fence *op_execute(struct
> xe_vm
> > > *vm, struct xe_vma *vma,
> > >  		fence = xe_vm_bind(vm, vma, op->q, xe_vma_bo(vma),
> > >  				   op->syncs, op->num_syncs,
> > >  				   op->map.immediate
> > > || !xe_vm_in_fault_mode(vm),
> > > +				   op->tile_mask,
> > >  				   op->flags & XE_VMA_OP_FIRST,
> > >  				   op->flags & XE_VMA_OP_LAST);
> > >  		break;
> > > @@ -2600,7 +2635,9 @@ static struct dma_fence *op_execute(struct
> xe_vm
> > > *vm, struct xe_vma *vma,
> > >  			dma_fence_put(fence);
> > >  			fence = xe_vm_bind(vm, op->remap.prev, op->q,
> > >  					   xe_vma_bo(op->remap.prev), op-
> > > >syncs,
> > > -					   op->num_syncs, true, false,
> > > +					   op->num_syncs,
> > > +					   op->remap.prev->tile_mask, true,
> > > +					   false,
> > >  					   op->flags & XE_VMA_OP_LAST
> > > && !next);
> > >  			op->remap.prev->gpuva.flags &=
> > > ~XE_VMA_LAST_REBIND;
> > >  			if (IS_ERR(fence))
> > > @@ -2614,8 +2651,8 @@ static struct dma_fence *op_execute(struct
> xe_vm
> > > *vm, struct xe_vma *vma,
> > >  			fence = xe_vm_bind(vm, op->remap.next, op->q,
> > >  					   xe_vma_bo(op->remap.next),
> > >  					   op->syncs, op->num_syncs,
> > > -					   true, false,
> > > -					   op->flags & XE_VMA_OP_LAST);
> > > +					   op->remap.next->tile_mask, true,
> > > +					   false, op->flags &
> > > XE_VMA_OP_LAST);
> > >  			op->remap.next->gpuva.flags &=
> > > ~XE_VMA_LAST_REBIND;
> > >  			if (IS_ERR(fence))
> > >  				break;
> > > diff --git a/drivers/gpu/drm/xe/xe_vm.h b/drivers/gpu/drm/xe/xe_vm.h
> > > index 306cd0934a19..204a4ff63f88 100644
> > > --- a/drivers/gpu/drm/xe/xe_vm.h
> > > +++ b/drivers/gpu/drm/xe/xe_vm.h
> > > @@ -208,6 +208,8 @@ int __xe_vm_userptr_needs_repin(struct xe_vm
> > > *vm);
> > >  int xe_vm_userptr_check_repin(struct xe_vm *vm);
> > >
> > >  int xe_vm_rebind(struct xe_vm *vm, bool rebind_worker);
> > > +struct dma_fence *xe_vma_rebind(struct xe_vm *vm, struct xe_vma
> *vma,
> > > +				u8 tile_mask);
> > >
> > >  int xe_vm_invalidate_vma(struct xe_vma *vma);
> > >
> > > diff --git a/drivers/gpu/drm/xe/xe_vm_types.h
> > > b/drivers/gpu/drm/xe/xe_vm_types.h
> > > index 149ab892967e..e9cd6da6263a 100644
> > > --- a/drivers/gpu/drm/xe/xe_vm_types.h
> > > +++ b/drivers/gpu/drm/xe/xe_vm_types.h
> > > @@ -343,6 +343,8 @@ struct xe_vma_op {
> > >  	struct list_head link;
> > >  	/** @flags: operation flags */
> > >  	enum xe_vma_op_flags flags;
> > > +	/** @tile_mask: Tile mask for operation */
> > > +	u8 tile_mask;
> > >
> > >  	union {
> > >  		/** @map: VMA map operation specific data */
> > > --
> > > 2.34.1
> >


More information about the Intel-xe mailing list