[Intel-xe] [PATCH 2/5] drm/xe: Reduce the number list links in xe_vma

Matthew Brost matthew.brost at intel.com
Wed Jul 19 22:19:47 UTC 2023


On Sat, Jul 15, 2023 at 03:11:59PM +0200, Thomas Hellström wrote:
> Hi, Matthew,
> 
> On 7/11/23 23:27, Matthew Brost wrote:
> > Combine the userptr, rebind, and destroy links into a union as
> > the lists these links belong to are mutually exclusive.
> > 
> > v2: Adjust which lists are combined (Thomas H)
> > 
> > Signed-off-by: Matthew Brost <matthew.brost at intel.com>
> > ---
> >   drivers/gpu/drm/xe/xe_bo.c       |  6 +++--
> >   drivers/gpu/drm/xe/xe_exec.c     |  2 +-
> >   drivers/gpu/drm/xe/xe_pt.c       |  2 +-
> >   drivers/gpu/drm/xe/xe_vm.c       | 45 ++++++++++++++++----------------
> >   drivers/gpu/drm/xe/xe_vm_types.h | 28 +++++++++-----------
> >   5 files changed, 41 insertions(+), 42 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c
> > index 6353afa8d846..ca7f74ecb753 100644
> > --- a/drivers/gpu/drm/xe/xe_bo.c
> > +++ b/drivers/gpu/drm/xe/xe_bo.c
> > @@ -474,8 +474,10 @@ static int xe_bo_trigger_rebind(struct xe_device *xe, struct xe_bo *bo,
> >   			}
> >   			xe_vm_assert_held(vm);
> > -			if (list_empty(&vma->rebind_link) && vma->tile_present)
> > -				list_add_tail(&vma->rebind_link, &vm->rebind_list);
> > +			if (list_empty(&vma->combined_links.rebind) &&
> > +			    vma->tile_present)
> > +				list_add_tail(&vma->combined_links.rebind,
> > +					      &vm->rebind_list);
> >   			if (vm_resv_locked)
> >   				dma_resv_unlock(&vm->resv);
> > diff --git a/drivers/gpu/drm/xe/xe_exec.c b/drivers/gpu/drm/xe/xe_exec.c
> > index ba13d20ed348..bc5bd4bdc582 100644
> > --- a/drivers/gpu/drm/xe/xe_exec.c
> > +++ b/drivers/gpu/drm/xe/xe_exec.c
> > @@ -120,7 +120,7 @@ static int xe_exec_begin(struct xe_engine *e, struct ww_acquire_ctx *ww,
> >   	 * BOs have valid placements possibly moving an evicted BO back
> >   	 * to a location where the GPU can access it).
> >   	 */
> > -	list_for_each_entry(vma, &vm->rebind_list, rebind_link) {
> > +	list_for_each_entry(vma, &vm->rebind_list, combined_links.rebind) {
> >   		XE_WARN_ON(xe_vma_is_null(vma));
> >   		if (xe_vma_is_userptr(vma))
> > diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c
> > index a8d96cbd53e3..3b43b32f4a33 100644
> > --- a/drivers/gpu/drm/xe/xe_pt.c
> > +++ b/drivers/gpu/drm/xe/xe_pt.c
> > @@ -1690,7 +1690,7 @@ __xe_pt_unbind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_engine *e
> >   	}
> >   	if (!vma->tile_present)
> > -		list_del_init(&vma->rebind_link);
> > +		list_del_init(&vma->combined_links.rebind);
> >   	if (unbind_pt_update.locked) {
> >   		XE_WARN_ON(!xe_vma_is_userptr(vma));
> > diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
> > index 5f4f6dab270a..b2847be6de6a 100644
> > --- a/drivers/gpu/drm/xe/xe_vm.c
> > +++ b/drivers/gpu/drm/xe/xe_vm.c
> > @@ -467,7 +467,8 @@ int xe_vm_lock_dma_resv(struct xe_vm *vm, struct ww_acquire_ctx *ww,
> >   		list_del_init(&vma->notifier.rebind_link);
> >   		if (vma->tile_present && !(vma->gpuva.flags & XE_VMA_DESTROYED))
> > -			list_move_tail(&vma->rebind_link, &vm->rebind_list);
> > +			list_move_tail(&vma->combined_links.rebind,
> > +				       &vm->rebind_list);
> >   	}
> >   	spin_unlock(&vm->notifier.list_lock);
> > @@ -608,7 +609,7 @@ static void preempt_rebind_work_func(struct work_struct *w)
> >   	if (err)
> >   		goto out_unlock;
> > -	list_for_each_entry(vma, &vm->rebind_list, rebind_link) {
> > +	list_for_each_entry(vma, &vm->rebind_list, combined_links.rebind) {
> >   		if (xe_vma_has_no_bo(vma) ||
> >   		    vma->gpuva.flags & XE_VMA_DESTROYED)
> >   			continue;
> > @@ -780,17 +781,20 @@ int xe_vm_userptr_pin(struct xe_vm *vm)
> >   	list_for_each_entry_safe(vma, next, &vm->userptr.invalidated,
> >   				 userptr.invalidate_link) {
> >   		list_del_init(&vma->userptr.invalidate_link);
> > -		list_move_tail(&vma->userptr_link, &vm->userptr.repin_list);
> > +		if (list_empty(&vma->combined_links.userptr))
> > +			list_move_tail(&vma->combined_links.userptr,
> > +				       &vm->userptr.repin_list);
> >   	}
> >   	spin_unlock(&vm->userptr.invalidated_lock);
> >   	/* Pin and move to temporary list */
> > -	list_for_each_entry_safe(vma, next, &vm->userptr.repin_list, userptr_link) {
> > +	list_for_each_entry_safe(vma, next, &vm->userptr.repin_list,
> > +				 combined_links.userptr) {
> >   		err = xe_vma_userptr_pin_pages(vma);
> >   		if (err < 0)
> >   			goto out_err;
> > -		list_move_tail(&vma->userptr_link, &tmp_evict);
> > +		list_move_tail(&vma->combined_links.userptr, &tmp_evict);
> >   	}
> >   	/* Take lock and move to rebind_list for rebinding. */
> > @@ -798,10 +802,8 @@ int xe_vm_userptr_pin(struct xe_vm *vm)
> >   	if (err)
> >   		goto out_err;
> > -	list_for_each_entry_safe(vma, next, &tmp_evict, userptr_link) {
> > -		list_del_init(&vma->userptr_link);
> > -		list_move_tail(&vma->rebind_link, &vm->rebind_list);
> > -	}
> > +	list_for_each_entry_safe(vma, next, &tmp_evict, combined_links.userptr)
> > +		list_move_tail(&vma->combined_links.rebind, &vm->rebind_list);
> >   	dma_resv_unlock(&vm->resv);
> > @@ -845,10 +847,11 @@ struct dma_fence *xe_vm_rebind(struct xe_vm *vm, bool rebind_worker)
> >   		return NULL;
> >   	xe_vm_assert_held(vm);
> > -	list_for_each_entry_safe(vma, next, &vm->rebind_list, rebind_link) {
> > +	list_for_each_entry_safe(vma, next, &vm->rebind_list,
> > +				 combined_links.rebind) {
> >   		XE_WARN_ON(!vma->tile_present);
> > -		list_del_init(&vma->rebind_link);
> > +		list_del_init(&vma->combined_links.rebind);
> >   		dma_fence_put(fence);
> >   		if (rebind_worker)
> >   			trace_xe_vma_rebind_worker(vma);
> > @@ -883,9 +886,7 @@ static struct xe_vma *xe_vma_create(struct xe_vm *vm,
> >   		return vma;
> >   	}
> > -	INIT_LIST_HEAD(&vma->rebind_link);
> > -	INIT_LIST_HEAD(&vma->unbind_link);
> > -	INIT_LIST_HEAD(&vma->userptr_link);
> > +	INIT_LIST_HEAD(&vma->combined_links.rebind);
> >   	INIT_LIST_HEAD(&vma->userptr.invalidate_link);
> >   	INIT_LIST_HEAD(&vma->notifier.rebind_link);
> >   	INIT_LIST_HEAD(&vma->extobj.link);
> > @@ -1058,15 +1059,14 @@ static void xe_vma_destroy(struct xe_vma *vma, struct dma_fence *fence)
> >   	struct xe_vm *vm = xe_vma_vm(vma);
> >   	lockdep_assert_held_write(&vm->lock);
> > -	XE_BUG_ON(!list_empty(&vma->unbind_link));
> > +	XE_BUG_ON(!list_empty(&vma->combined_links.destroy));
> >   	if (xe_vma_is_userptr(vma)) {
> >   		XE_WARN_ON(!(vma->gpuva.flags & XE_VMA_DESTROYED));
> >   		spin_lock(&vm->userptr.invalidated_lock);
> > -		list_del_init(&vma->userptr.invalidate_link);
> > +		list_del(&vma->userptr.invalidate_link);
> >   		spin_unlock(&vm->userptr.invalidated_lock);
> > -		list_del(&vma->userptr_link);
> >   	} else if (!xe_vma_is_null(vma)) {
> >   		xe_bo_assert_held(xe_vma_bo(vma));
> > @@ -1087,9 +1087,6 @@ static void xe_vma_destroy(struct xe_vma *vma, struct dma_fence *fence)
> >   	}
> >   	xe_vm_assert_held(vm);
> > -	if (!list_empty(&vma->rebind_link))
> > -		list_del(&vma->rebind_link);
> > -
> >   	if (fence) {
> >   		int ret = dma_fence_add_callback(fence, &vma->destroy_cb,
> >   						 vma_destroy_cb);
> > @@ -1443,11 +1440,12 @@ void xe_vm_close_and_put(struct xe_vm *vm)
> >   		/* easy case, remove from VMA? */
> >   		if (xe_vma_has_no_bo(vma) || xe_vma_bo(vma)->vm) {
> > +			list_del_init(&vma->combined_links.rebind);
> >   			xe_vma_destroy(vma, NULL);
> >   			continue;
> >   		}
> > -		list_add_tail(&vma->unbind_link, &contested);
> > +		list_move_tail(&vma->combined_links.destroy, &contested);
> >   	}
> >   	/*
> > @@ -1475,8 +1473,9 @@ void xe_vm_close_and_put(struct xe_vm *vm)
> >   	 * Since we hold a refcount to the bo, we can remove and free
> >   	 * the members safely without locking.
> >   	 */
> > -	list_for_each_entry_safe(vma, next_vma, &contested, unbind_link) {
> > -		list_del_init(&vma->unbind_link);
> > +	list_for_each_entry_safe(vma, next_vma, &contested,
> > +				 combined_links.destroy) {
> > +		list_del_init(&vma->combined_links.destroy);
> >   		xe_vma_destroy_unlocked(vma);
> >   	}
> > diff --git a/drivers/gpu/drm/xe/xe_vm_types.h b/drivers/gpu/drm/xe/xe_vm_types.h
> > index f17dc5d7370e..84bf1620214c 100644
> > --- a/drivers/gpu/drm/xe/xe_vm_types.h
> > +++ b/drivers/gpu/drm/xe/xe_vm_types.h
> > @@ -49,21 +49,19 @@ struct xe_vma {
> >   	 */
> >   	u64 tile_present;
> > -	/** @userptr_link: link into VM repin list if userptr */
> > -	struct list_head userptr_link;
> > -
> > -	/**
> > -	 * @rebind_link: link into VM if this VMA needs rebinding, and
> > -	 * if it's a bo (not userptr) needs validation after a possible
> > -	 * eviction. Protected by the vm's resv lock.
> > -	 */
> > -	struct list_head rebind_link;
> 
> The rebind_link and the userptr_link are not combinable. To be combinable
> they need to either be protected by the same lock or be kept mutually
> exclusive by some other *simple* argument, like construction / destruction,
> used for different subclasses etc.
> 

This is definitely safe, I think you are getting confused as we have a
lot of lists.

> The userptr_link is protected by the userptr.invalidated_lock and the

No, userptr.invalidate_link is protected by userptr.invalidated_lock.

combined_links.userptr is protected by vm->lock in write mode. I'll add
comment for this.

Yes, the combined_links.userptr & combined_links.rebind links have
different locks but this is 100% safe.

See xe_vm_userptr_pin, we safely move from userptr.invalidate_link ->
combined_links.userptr -> combined_links.rebind with the correct locks
(vm->lock is held throughout, dma-resv lock grabbed when using
combined_links.rebind).

We add to the combined_links.rebind with the dma-resv lock in
xe_bo_trigger_rebind which has mutually exlusive execution with
xe_vm_userptr_pin as the former is for VMAs with a BO and the later is
for userptrs.

We remove from combined_links.rebind in combined_links.rebind which has
both vm->lock & dma-resv lock. This makes it safe with both
xe_vm_userptr_pin & xe_bo_trigger_rebind.

In xe_vm_lock_dma_resv we move from notifier.rebind_link ->
combined_links.rebind and we have both vm->lock and dma-resv lock. Again
safe with everything above.

xe_vm_rebind we remove from combined_links.rebind and we have both
vm->lock and dma-resv lock. Again safe with everything above.

Lastly in xe_vm_close_and_put we use combined_links.destroy but again
have vm->lock and dma-resv lock.

All safe.

> rebind_link by the vm resv, and no simple argument can be made. In fact
> let's say that a userptr sits on or is being moved to the rebind list, and
> then the invalidation notifier gets called, which will try to move it to the
> userptr list again using the userptr link. In the best case it will fail the
> move since the list is not empty, in the worst case list corruption due to
> the list head is currently being modified.
> 
> So IMO for any list head being combined like this, we need to kerneldoc why
> this is possible, using either the lock argument or another simple argument
> like detailed above.
> 

I'll update the kernel doc.

Matt

> I *think* the notifier.rebind_list and the userptr_list are combinable,
> though, using the different vma subclass (userptr/bo) argument.
> 
> Thanks,
> 
> Thomas
> 
> 
> > -
> > -	/**
> > -	 * @unbind_link: link or list head if an unbind of multiple VMAs, in
> > -	 * single unbind op, is being done.
> > -	 */
> > -	struct list_head unbind_link;
> > +	/** @combined_links: links into lists which are mutually exclusive */
> > +	union {
> > +		/** @userptr: link into VM repin list if userptr */
> > +		struct list_head userptr;
> > +		/**
> > +		 * @rebind: link into VM if this VMA needs rebinding, and
> > +		 * if it's a bo (not userptr) needs validation after a possible
> > +		 * eviction. Protected by the vm's resv lock.
> > +		 */
> > +		struct list_head rebind;
> > +		/** @destroy: link to contested list when VM is being closed */
> > +		struct list_head destroy;
> > +	} combined_links;
> >   	/** @destroy_cb: callback to destroy VMA when unbind job is done */
> >   	struct dma_fence_cb destroy_cb;


More information about the Intel-xe mailing list