[Intel-xe] [PATCH] drm/xe/vm: uncombine the combined_links.destroy

Matthew Auld matthew.auld at intel.com
Mon Aug 7 13:02:27 UTC 2023


It looks like it's plausible for eviction to be happening as we are
closing the vm, however the rebind_link and destroy links are in this
case not mutually exclusive, since eviction only needs the vm dma-resv
lock which the vm close drops when accessing the contested list. If we
race with eviction here it can run rampant and corrupt the list since
both the destroy and rebind links are the same list entry underneath.
Simplest is to just split these entries into separate links.

References: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/514
Signed-off-by: Matthew Auld <matthew.auld at intel.com>
Cc: Matthew Brost <matthew.brost at intel.com>
---
 drivers/gpu/drm/xe/xe_vm.c       | 10 +++++-----
 drivers/gpu/drm/xe/xe_vm_types.h | 11 ++++++-----
 2 files changed, 11 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
index cb28dbc2bdbb..aa13b2bcda86 100644
--- a/drivers/gpu/drm/xe/xe_vm.c
+++ b/drivers/gpu/drm/xe/xe_vm.c
@@ -891,6 +891,7 @@ static struct xe_vma *xe_vma_create(struct xe_vm *vm,
 	}
 
 	INIT_LIST_HEAD(&vma->combined_links.rebind);
+	INIT_LIST_HEAD(&vma->destroy_link);
 	INIT_LIST_HEAD(&vma->notifier.rebind_link);
 	INIT_LIST_HEAD(&vma->extobj.link);
 
@@ -1063,7 +1064,7 @@ static void xe_vma_destroy(struct xe_vma *vma, struct dma_fence *fence)
 	struct xe_vm *vm = xe_vma_vm(vma);
 
 	lockdep_assert_held_write(&vm->lock);
-	XE_WARN_ON(!list_empty(&vma->combined_links.destroy));
+	XE_WARN_ON(!list_empty(&vma->destroy_link));
 
 	if (xe_vma_is_userptr(vma)) {
 		XE_WARN_ON(!(vma->gpuva.flags & XE_VMA_DESTROYED));
@@ -1449,7 +1450,7 @@ void xe_vm_close_and_put(struct xe_vm *vm)
 			continue;
 		}
 
-		list_move_tail(&vma->combined_links.destroy, &contested);
+		list_add_tail(&vma->destroy_link, &contested);
 	}
 
 	/*
@@ -1477,9 +1478,8 @@ void xe_vm_close_and_put(struct xe_vm *vm)
 	 * Since we hold a refcount to the bo, we can remove and free
 	 * the members safely without locking.
 	 */
-	list_for_each_entry_safe(vma, next_vma, &contested,
-				 combined_links.destroy) {
-		list_del_init(&vma->combined_links.destroy);
+	list_for_each_entry_safe(vma, next_vma, &contested, destroy_link) {
+		list_del_init(&vma->destroy_link);
 		xe_vma_destroy_unlocked(vma);
 	}
 
diff --git a/drivers/gpu/drm/xe/xe_vm_types.h b/drivers/gpu/drm/xe/xe_vm_types.h
index 3681a5ff588b..a01dde418803 100644
--- a/drivers/gpu/drm/xe/xe_vm_types.h
+++ b/drivers/gpu/drm/xe/xe_vm_types.h
@@ -79,13 +79,14 @@ struct xe_vma {
 		 * mutually exclusive execution with userptr.
 		 */
 		struct list_head rebind;
-		/**
-		 * @destroy: link to contested list when VM is being closed.
-		 * Protected by vm->lock in write mode and vm's resv lock.
-		 */
-		struct list_head destroy;
 	} combined_links;
 
+	/**
+	 * @destroy_link: link to contested list when VM is being closed.
+	 * Protected by vm->lock in write mode and vm's resv lock.
+	 */
+	struct list_head destroy_link;
+
 	union {
 		/** @destroy_cb: callback to destroy VMA when unbind job is done */
 		struct dma_fence_cb destroy_cb;
-- 
2.41.0



More information about the Intel-xe mailing list