[Intel-xe] [PATCH v2 19/31] drm/xe: Reduce the number list links in xe_vma

Thomas Hellström thomas.hellstrom at linux.intel.com
Thu May 11 08:38:48 UTC 2023


On 5/2/23 02:17, Matthew Brost wrote:
> 5 list links in can be squashed into a union in xe_vma as being on the
> various list is mutually exclusive.
>
> Signed-off-by: Matthew Brost <matthew.brost at intel.com>
> ---
>   drivers/gpu/drm/xe/xe_gt_pagefault.c |  2 +-
>   drivers/gpu/drm/xe/xe_pt.c           |  5 +-
>   drivers/gpu/drm/xe/xe_vm.c           | 29 ++++++------
>   drivers/gpu/drm/xe/xe_vm_types.h     | 71 +++++++++++++++-------------
>   4 files changed, 55 insertions(+), 52 deletions(-)
>
> diff --git a/drivers/gpu/drm/xe/xe_gt_pagefault.c b/drivers/gpu/drm/xe/xe_gt_pagefault.c
> index cfffe3398fe4..d7bf6b0a0697 100644
> --- a/drivers/gpu/drm/xe/xe_gt_pagefault.c
> +++ b/drivers/gpu/drm/xe/xe_gt_pagefault.c
> @@ -157,7 +157,7 @@ static int handle_pagefault(struct xe_gt *gt, struct pagefault *pf)
>   
>   	if (xe_vma_is_userptr(vma) && write_locked) {
>   		spin_lock(&vm->userptr.invalidated_lock);
> -		list_del_init(&vma->userptr.invalidate_link);
> +		list_del_init(&vma->invalidate_link);
>   		spin_unlock(&vm->userptr.invalidated_lock);
>   
>   		ret = xe_vma_userptr_pin_pages(vma);
> diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c
> index 010f44260cda..8eab8e1bbaf0 100644
> --- a/drivers/gpu/drm/xe/xe_pt.c
> +++ b/drivers/gpu/drm/xe/xe_pt.c
> @@ -1116,8 +1116,7 @@ static int xe_pt_userptr_inject_eagain(struct xe_vma *vma)
>   
>   		vma->userptr.divisor = divisor << 1;
>   		spin_lock(&vm->userptr.invalidated_lock);
> -		list_move_tail(&vma->userptr.invalidate_link,
> -			       &vm->userptr.invalidated);
> +		list_move_tail(&vma->invalidate_link, &vm->userptr.invalidated);
>   		spin_unlock(&vm->userptr.invalidated_lock);
>   		return true;
>   	}
> @@ -1724,7 +1723,7 @@ __xe_pt_unbind_vma(struct xe_gt *gt, struct xe_vma *vma, struct xe_engine *e,
>   
>   		if (!vma->gt_present) {
>   			spin_lock(&vm->userptr.invalidated_lock);
> -			list_del_init(&vma->userptr.invalidate_link);
> +			list_del_init(&vma->invalidate_link);
>   			spin_unlock(&vm->userptr.invalidated_lock);
>   		}
>   		up_read(&vm->userptr.notifier_lock);
> diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
> index e0ed7201aeb0..e5f2fffb2aec 100644
> --- a/drivers/gpu/drm/xe/xe_vm.c
> +++ b/drivers/gpu/drm/xe/xe_vm.c
> @@ -677,8 +677,7 @@ static bool vma_userptr_invalidate(struct mmu_interval_notifier *mni,
>   	if (!xe_vm_in_fault_mode(vm) &&
>   	    !(vma->gpuva.flags & XE_VMA_DESTROYED) && vma->gt_present) {
>   		spin_lock(&vm->userptr.invalidated_lock);
> -		list_move_tail(&vma->userptr.invalidate_link,
> -			       &vm->userptr.invalidated);
> +		list_move_tail(&vma->invalidate_link, &vm->userptr.invalidated);
>   		spin_unlock(&vm->userptr.invalidated_lock);
>   	}
>   
> @@ -726,8 +725,8 @@ int xe_vm_userptr_pin(struct xe_vm *vm)
>   	/* Collect invalidated userptrs */
>   	spin_lock(&vm->userptr.invalidated_lock);
>   	list_for_each_entry_safe(vma, next, &vm->userptr.invalidated,
> -				 userptr.invalidate_link) {
> -		list_del_init(&vma->userptr.invalidate_link);
> +				 invalidate_link) {
> +		list_del_init(&vma->invalidate_link);
>   		list_move_tail(&vma->userptr_link, &vm->userptr.repin_list);
>   	}
>   	spin_unlock(&vm->userptr.invalidated_lock);
> @@ -830,12 +829,11 @@ static struct xe_vma *xe_vma_create(struct xe_vm *vm,
>   		return vma;
>   	}
>   
> -	/* FIXME: Way to many lists, should be able to reduce this */
> +	/*
> +	 * userptr_link, destroy_link, notifier.rebind_link,
> +	 * invalidate_link
> +	 */
>   	INIT_LIST_HEAD(&vma->rebind_link);
> -	INIT_LIST_HEAD(&vma->unbind_link);
> -	INIT_LIST_HEAD(&vma->userptr_link);
> -	INIT_LIST_HEAD(&vma->userptr.invalidate_link);
> -	INIT_LIST_HEAD(&vma->notifier.rebind_link);
>   	INIT_LIST_HEAD(&vma->extobj.link);
>   
>   	INIT_LIST_HEAD(&vma->gpuva.gem.entry);
> @@ -953,15 +951,14 @@ static void xe_vma_destroy(struct xe_vma *vma, struct dma_fence *fence)
>   	struct xe_vm *vm = xe_vma_vm(vma);
>   
>   	lockdep_assert_held_write(&vm->lock);
> -	XE_BUG_ON(!list_empty(&vma->unbind_link));
>   
>   	if (xe_vma_is_userptr(vma)) {
>   		XE_WARN_ON(!(vma->gpuva.flags & XE_VMA_DESTROYED));
>   
>   		spin_lock(&vm->userptr.invalidated_lock);
> -		list_del_init(&vma->userptr.invalidate_link);
> +		if (!list_empty(&vma->invalidate_link))
> +			list_del_init(&vma->invalidate_link);
>   		spin_unlock(&vm->userptr.invalidated_lock);
> -		list_del(&vma->userptr_link);
>   	} else if (!xe_vma_is_null(vma)) {
>   		xe_bo_assert_held(xe_vma_bo(vma));
>   		drm_gpuva_unlink(&vma->gpuva);
> @@ -1328,7 +1325,9 @@ void xe_vm_close_and_put(struct xe_vm *vm)
>   			continue;
>   		}
>   
> -		list_add_tail(&vma->unbind_link, &contested);
> +		if (!list_empty(&vma->destroy_link))
> +			list_del_init(&vma->destroy_link);
> +		list_add_tail(&vma->destroy_link, &contested);
>   	}
>   
>   	/*
> @@ -1356,8 +1355,8 @@ void xe_vm_close_and_put(struct xe_vm *vm)
>   	 * Since we hold a refcount to the bo, we can remove and free
>   	 * the members safely without locking.
>   	 */
> -	list_for_each_entry_safe(vma, next_vma, &contested, unbind_link) {
> -		list_del_init(&vma->unbind_link);
> +	list_for_each_entry_safe(vma, next_vma, &contested, destroy_link) {
> +		list_del_init(&vma->destroy_link);
>   		xe_vma_destroy_unlocked(vma);
>   	}
>   
> diff --git a/drivers/gpu/drm/xe/xe_vm_types.h b/drivers/gpu/drm/xe/xe_vm_types.h
> index d55ec8156caa..22def5483c12 100644
> --- a/drivers/gpu/drm/xe/xe_vm_types.h
> +++ b/drivers/gpu/drm/xe/xe_vm_types.h
> @@ -50,21 +50,32 @@ struct xe_vma {
>   	 */
>   	u64 gt_present;
>   
> -	/** @userptr_link: link into VM repin list if userptr */
> -	struct list_head userptr_link;
> +	union {
> +		/** @userptr_link: link into VM repin list if userptr */
> +		struct list_head userptr_link;
>   
> -	/**
> -	 * @rebind_link: link into VM if this VMA needs rebinding, and
> -	 * if it's a bo (not userptr) needs validation after a possible
> -	 * eviction. Protected by the vm's resv lock.
> -	 */
> -	struct list_head rebind_link;
> +		/**
> +		 * @rebind_link: link into VM if this VMA needs rebinding, and
> +		 * if it's a bo (not userptr) needs validation after a possible
> +		 * eviction. Protected by the vm's resv lock.
> +		 */
> +		struct list_head rebind_link;

Since the different lists have very different locking protection, I'm 
pretty sure you can come up with a scenario where this is invalid, for 
example a userptr vma being on the vm->userptr.repin_list and the 
vm->rebind_list simultaneously, if we, for example do a repin and then 
hit an -EINTR during xe_vm_lock_dma_resv() and then immediately a new 
userptr invalidation (now the rebind_link and userptr.invalidate_link 
are used on different lists), and then a new repin (now the rebind_link 
and userptr_link are used on different lists, simultaneously).

If you want to do a union like this, we either need to have the exact 
same locking rules for the links or they should be used for completely 
different purposes that can never happen together, like the 
userptr.invalidate_link and the notifier.rebind_link, one used for 
userptr only and the other for bo only.

I'm also pretty sure that a vma can be on the rebind_list and the 
notifier.rebind_list simultaneously, due to the different locking, with 
a similar reasoning as above.

So without the rule of exact same locking rules, it becomes practically 
impossible to reason that using a union is safe.

/Thomas




More information about the Intel-xe mailing list