[PATCH v4 01/30] drm/xe: Lock all gpuva ops during VM bind IOCTL

Mon Mar 11 19:48:59 UTC 2024

On Sun, Mar 10, 2024 at 11:44:00AM -0600, Zeng, Oak wrote:
> 
> 
> > -----Original Message-----
> > From: Intel-xe <intel-xe-bounces at lists.freedesktop.org> On Behalf Of Matthew
> > Brost
> > Sent: Friday, March 8, 2024 12:08 AM
> > To: intel-xe at lists.freedesktop.org
> > Cc: Brost, Matthew <matthew.brost at intel.com>
> > Subject: [PATCH v4 01/30] drm/xe: Lock all gpuva ops during VM bind IOCTL
> > 
> > Lock all gpuva ops
> 
> Can we have a better wording? Better to say locking all Bos used in gpuva ops? 
> 
> Or maybe lock ops by locking and validating all Bos used in ops.
> 
>  and validate all BOs in a single step durin the VM

Lock all BOs used in gpuva ops and validate all BOs in a single step
during the VM bind IOCTL?

> > bind IOCTL. This help with the transition to making all gpuva ops in a
> > VM bind IOCTL a single atomic job.
> 
> Can you also explain, why you want bind to be a atomic job? 
> 
> My guess is, bind ioctl can end up with a series of operations, if some (not all) of those operations fail in the middle, it is hard to revert the successful operations before failure. 

Yes, exactly. It is ensure if we fail at some point in the bind IOCTL
(e.g. memory allocation failure) we can unwind to the initial state.
Also another benefit is we get 1 fence per IOCTL which is installed
dma-resv slots and returned to the user via out-syncs. Lastly, it
logically makes sense that 1 IOCTL translates to 1 job.

The cover letter at some point explained this. I add something in the
next rev.

> > 
> > Signed-off-by: Matthew Brost <matthew.brost at intel.com>
> > ---
> >  drivers/gpu/drm/xe/xe_vm.c | 142 ++++++++++++++++++++++++++-----------
> >  1 file changed, 101 insertions(+), 41 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
> > index 643b3701a738..3b5dc6de07f7 100644
> > --- a/drivers/gpu/drm/xe/xe_vm.c
> > +++ b/drivers/gpu/drm/xe/xe_vm.c
> > @@ -413,19 +413,23 @@ int __xe_vm_userptr_needs_repin(struct xe_vm *vm)
> > 
> >  #define XE_VM_REBIND_RETRY_TIMEOUT_MS 1000
> > 
> > -static void xe_vm_kill(struct xe_vm *vm)
> > +static void xe_vm_kill(struct xe_vm *vm, bool unlocked)
> >  {
> >  	struct xe_exec_queue *q;
> > 
> >  	lockdep_assert_held(&vm->lock);
> > 
> > -	xe_vm_lock(vm, false);
> > +	if (unlocked)
> > +		xe_vm_lock(vm, false);
> > +
> 
> Can you explain why we need xe_vm_lock in the first place here? 
> 
> My understanding is, xe_vm_lock protect gpu page table update. Below kill function eventually calls into guc_exec_queue_kill where I don't see any page table operation there. So I doubt whether we need the lock in the first place.
> 

I think it it protect vm->preempt.exec_queues list but that could
probably be reworked. For this series I'd rather leave the locking as is
and then in follow up rework the locking a bit and fully document it.

> >  	vm->flags |= XE_VM_FLAG_BANNED;
> >  	trace_xe_vm_kill(vm);
> > 
> >  	list_for_each_entry(q, &vm->preempt.exec_queues, compute.link)
> >  		q->ops->kill(q);
> > -	xe_vm_unlock(vm);
> > +
> > +	if (unlocked)
> > +		xe_vm_unlock(vm);
> > 
> >  	/* TODO: Inform user the VM is banned */
> >  }
> > @@ -621,7 +625,7 @@ static void preempt_rebind_work_func(struct work_struct
> > *w)
> > 
> >  	if (err) {
> >  		drm_warn(&vm->xe->drm, "VM worker error: %d\n", err);
> > -		xe_vm_kill(vm);
> > +		xe_vm_kill(vm, true);
> >  	}
> >  	up_write(&vm->lock);
> > 
> > @@ -1831,17 +1835,9 @@ static int xe_vm_bind(struct xe_vm *vm, struct xe_vma
> > *vma, struct xe_exec_queue
> >  		      u32 num_syncs, bool immediate, bool first_op,
> >  		      bool last_op)
> >  {
> > -	int err;
> > -
> >  	xe_vm_assert_held(vm);
> >  	xe_bo_assert_held(bo);
> > 
> > -	if (bo && immediate) {
> > -		err = xe_bo_validate(bo, vm, true);
> > -		if (err)
> > -			return err;
> > -	}
> > -
> >  	return __xe_vm_bind(vm, vma, q, syncs, num_syncs, immediate, first_op,
> >  			    last_op);
> >  }
> > @@ -2488,17 +2484,12 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm *vm,
> > struct xe_exec_queue *q,
> >  	return 0;
> >  }
> > 
> > -static int op_execute(struct drm_exec *exec, struct xe_vm *vm,
> > -		      struct xe_vma *vma, struct xe_vma_op *op)
> > +static int op_execute(struct xe_vm *vm, struct xe_vma *vma,
> > +		      struct xe_vma_op *op)
> >  {
> >  	int err;
> > 
> >  	lockdep_assert_held_write(&vm->lock);
> > -
> > -	err = xe_vm_prepare_vma(exec, vma, 1);
> > -	if (err)
> > -		return err;
> > -
> >  	xe_vm_assert_held(vm);
> >  	xe_bo_assert_held(xe_vma_bo(vma));
> > 
> > @@ -2579,19 +2570,10 @@ static int op_execute(struct drm_exec *exec, struct
> > xe_vm *vm,
> >  static int __xe_vma_op_execute(struct xe_vm *vm, struct xe_vma *vma,
> >  			       struct xe_vma_op *op)
> >  {
> > -	struct drm_exec exec;
> >  	int err;
> > 
> >  retry_userptr:
> > -	drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 0);
> > -	drm_exec_until_all_locked(&exec) {
> > -		err = op_execute(&exec, vm, vma, op);
> > -		drm_exec_retry_on_contention(&exec);
> > -		if (err)
> > -			break;
> > -	}
> > -	drm_exec_fini(&exec);
> > -
> > +	err = op_execute(vm, vma, op);
> >  	if (err == -EAGAIN) {
> >  		lockdep_assert_held_write(&vm->lock);
> > 
> > @@ -2756,29 +2738,107 @@ static void vm_bind_ioctl_ops_unwind(struct xe_vm
> > *vm,
> >  	}
> >  }
> > 
> > +static int vma_lock(struct drm_exec *exec, struct xe_vma *vma, bool validate)
> > +{
> > +	struct xe_bo *bo = xe_vma_bo(vma);
> > +	int err = 0;
> > +
> > +	if (bo) {
> > +		if (!bo->vm)
> > +			err = drm_exec_prepare_obj(exec, &bo->ttm.base, 1);
> > +		if (!err && validate)
> > +			err = xe_bo_validate(bo, xe_vma_vm(vma), true);
> > +	}
> > +
> > +	return err;
> > +}
> > +
> > +static int op_lock(struct drm_exec *exec, struct xe_vm *vm,
> > +		   struct xe_vma_op *op)
> > +{
> > +	int err = 0;
> > +
> > +	switch (op->base.op) {
> > +	case DRM_GPUVA_OP_MAP:
> > +		err = vma_lock(exec, op->map.vma, !xe_vm_in_fault_mode(vm));
> > +		break;
> > +	case DRM_GPUVA_OP_REMAP:
> > +		err = vma_lock(exec, gpuva_to_vma(op->base.remap.unmap->va),
> > +			       false);
> > +		if (!err && op->remap.prev)
> > +			err = vma_lock(exec, op->remap.prev, true);
> > +		if (!err && op->remap.next)
> > +			err = vma_lock(exec, op->remap.next, true);
> > +		break;
> > +	case DRM_GPUVA_OP_UNMAP:
> > +		err = vma_lock(exec, gpuva_to_vma(op->base.unmap.va), false);
> > +		break;
> > +	case DRM_GPUVA_OP_PREFETCH:
> > +		err = vma_lock(exec, gpuva_to_vma(op->base.prefetch.va), true);
> > +		break;
> > +	default:
> > +		drm_warn(&vm->xe->drm, "NOT POSSIBLE");
> > +	}
> > +
> > +	return err;
> > +}
> > +
> > +static int vm_bind_ioctl_ops_lock(struct drm_exec *exec,
> > +				  struct xe_vm *vm,
> > +				  struct list_head *ops_list)
> > +{
> > +	struct xe_vma_op *op;
> > +	int err;
> > +
> > +	err = drm_exec_prepare_obj(exec, xe_vm_obj(vm), 1);
> > +	if (err)
> > +		return err;
> > +
> > +	list_for_each_entry(op, ops_list, link) {
> > +		err = op_lock(exec, vm, op);
> > +		if (err)
> > +			return err;
> > +	}
> > +
> > +	return 0;
> > +}
> > +
> >  static int vm_bind_ioctl_ops_execute(struct xe_vm *vm,
> >  				     struct list_head *ops_list)
> >  {
> > +	struct drm_exec exec;
> >  	struct xe_vma_op *op, *next;
> >  	int err;
> > 
> >  	lockdep_assert_held_write(&vm->lock);
> > 
> > -	list_for_each_entry_safe(op, next, ops_list, link) {
> > -		err = xe_vma_op_execute(vm, op);
> > -		if (err) {
> > -			drm_warn(&vm->xe->drm, "VM op(%d) failed with %d",
> > -				 op->base.op, err);
> > -			/*
> > -			 * FIXME: Killing VM rather than proper error handling
> > -			 */
> > -			xe_vm_kill(vm);
> > -			return -ENOSPC;
> > +	drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT |
> > +		      DRM_EXEC_IGNORE_DUPLICATES, 0);
> > +	drm_exec_until_all_locked(&exec) {
> > +		err = vm_bind_ioctl_ops_lock(&exec, vm, ops_list);
> > +		drm_exec_retry_on_contention(&exec);
> > +		if (err)
> > +			goto unlock;
> > +
> 
> Do you need below ops inside the drm_exec_until_all_locked loop? After you locked all objects, you can close the drm_exec_until_all_locked loop, then perform below out of drm_exec_until_all_locked loop. 
> 

Yes, xe_vma_op_execute can GPU allocate memory. GPU memory allocations
can trigger evictions which try to grab more locks. Once the
drm_exec_until_all_locked loop is broken we are not allowed try grab
more locks. Also eventually on memory allocations I think we will pass
in drm_exec state so the looking can hook into that.

Matt

> Oak 
> 
> > +		list_for_each_entry_safe(op, next, ops_list, link) {
> > +			err = xe_vma_op_execute(vm, op);
> > +			if (err) {
> > +				drm_warn(&vm->xe->drm, "VM op(%d) failed
> > with %d",
> > +					 op->base.op, err);
> > +				/*
> > +				 * FIXME: Killing VM rather than proper error
> > handling
> > +				 */
> > +				xe_vm_kill(vm, false);
> > +				err = -ENOSPC;
> > +				goto unlock;
> > +			}
> > +			xe_vma_op_cleanup(vm, op);
> >  		}
> > -		xe_vma_op_cleanup(vm, op);
> >  	}
> > 
> > -	return 0;
> > +unlock:
> > +	drm_exec_fini(&exec);
> > +	return err;
> >  }
> > 
> >  #define SUPPORTED_FLAGS	(DRM_XE_VM_BIND_FLAG_NULL | \
> > --
> > 2.34.1
>