[PATCH 2/3] drm/amdkfd: refine the gfx BO based dmabuf handling

Mon Jul 25 10:32:04 UTC 2022

We have memory leak issue in current implenmention, i.e.,
the allocated struct kgd_mem memory is not handled properly.

The idea is always creating a buffer object when importing
a gfx BO based dmabuf.

Signed-off-by: Lang Yu <Lang.Yu at amd.com>
---
 .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c  | 99 +++++++++++++------
 1 file changed, 70 insertions(+), 29 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index b3806ebe5ef7..c1855b72a3f0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -225,7 +225,8 @@ void amdgpu_amdkfd_release_notify(struct amdgpu_bo *bo)
 	u32 alloc_flags = bo->kfd_bo->alloc_flags;
 	u64 size = amdgpu_bo_size(bo);
 
-	unreserve_mem_limit(adev, size, alloc_flags);
+	if (!bo->kfd_bo->is_imported)
+		unreserve_mem_limit(adev, size, alloc_flags);
 
 	kfree(bo->kfd_bo);
 }
@@ -784,6 +785,24 @@ kfd_mem_dmaunmap_attachment(struct kgd_mem *mem,
 	}
 }
 
+static struct drm_gem_object*
+amdgpu_amdkfd_import(struct drm_device *dev, struct dma_buf *dma_buf)
+{
+	struct drm_gem_object *gobj;
+	struct amdgpu_bo *abo;
+
+	if (dma_buf->ops == &amdgpu_dmabuf_ops) {
+		gobj = dma_buf->priv;
+		abo = gem_to_amdgpu_bo(gobj);
+		if (gobj->dev == dev && abo->kfd_bo) {
+			drm_gem_object_get(gobj);
+			return gobj;
+		}
+	}
+
+	return amdgpu_dma_buf_create_obj_and_attach(dev, dma_buf);
+}
+
 static int
 kfd_mem_attach_dmabuf(struct amdgpu_device *adev, struct kgd_mem *mem,
 		      struct amdgpu_bo **bo)
@@ -802,7 +821,7 @@ kfd_mem_attach_dmabuf(struct amdgpu_device *adev, struct kgd_mem *mem,
 		}
 	}
 
-	gobj = amdgpu_gem_prime_import(adev_to_drm(adev), mem->dmabuf);
+	gobj = amdgpu_amdkfd_import(adev_to_drm(adev), mem->dmabuf);
 	if (IS_ERR(gobj))
 		return PTR_ERR(gobj);
 
@@ -1805,12 +1824,13 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
 {
 	struct amdkfd_process_info *process_info = mem->process_info;
 	unsigned long bo_size = mem->bo->tbo.base.size;
+	bool is_imported = false;
+	struct drm_gem_object *imported_gobj;
 	struct kfd_mem_attachment *entry, *tmp;
 	struct bo_vm_reservation_context ctx;
 	struct ttm_validate_buffer *bo_list_entry;
 	unsigned int mapped_to_gpu_memory;
 	int ret;
-	bool is_imported = false;
 
 	mutex_lock(&mem->lock);
 
@@ -1885,7 +1905,13 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
 	}
 
 	/* Free the BO*/
-	drm_vma_node_revoke(&mem->bo->tbo.base.vma_node, drm_priv);
+	if (!is_imported) {
+		drm_vma_node_revoke(&mem->bo->tbo.base.vma_node, drm_priv);
+	} else {
+		imported_gobj = mem->dmabuf->priv;
+		drm_vma_node_revoke(&imported_gobj->vma_node, drm_priv);
+	}
+
 	if (mem->dmabuf)
 		dma_buf_put(mem->dmabuf);
 	mutex_destroy(&mem->lock);
@@ -2249,62 +2275,77 @@ int amdgpu_amdkfd_gpuvm_import_dmabuf(struct amdgpu_device *adev,
 				      uint64_t *mmap_offset)
 {
 	struct amdgpu_vm *avm = drm_priv_to_vm(drm_priv);
-	struct drm_gem_object *obj;
-	struct amdgpu_bo *bo;
+	struct drm_gem_object *imported_gobj, *gobj;
+	struct amdgpu_bo *imported_bo, *bo;
 	int ret;
 
-	if (dma_buf->ops != &amdgpu_dmabuf_ops)
-		/* Can't handle non-graphics buffers */
+	/*
+	 * Can't handle non-graphics buffers and
+	 * buffers from other devices
+	 */
+	imported_gobj = dma_buf->priv;
+	if (dma_buf->ops != &amdgpu_dmabuf_ops ||
+	    drm_to_adev(imported_gobj->dev) != adev)
 		return -EINVAL;
 
-	obj = dma_buf->priv;
-	if (drm_to_adev(obj->dev) != adev)
-		/* Can't handle buffers from other devices */
+	/* Only VRAM and GTT BOs are supported */
+	imported_bo = gem_to_amdgpu_bo(imported_gobj);
+	if (!(imported_bo->preferred_domains &
+	    (AMDGPU_GEM_DOMAIN_VRAM | AMDGPU_GEM_DOMAIN_GTT)))
 		return -EINVAL;
 
-	bo = gem_to_amdgpu_bo(obj);
-	if (!(bo->preferred_domains & (AMDGPU_GEM_DOMAIN_VRAM |
-				    AMDGPU_GEM_DOMAIN_GTT)))
-		/* Only VRAM and GTT BOs are supported */
-		return -EINVAL;
+	ret = drm_vma_node_allow(&imported_gobj->vma_node, drm_priv);
+	if (ret)
+		return ret;
 
-	*mem = kzalloc(sizeof(struct kgd_mem), GFP_KERNEL);
-	if (!*mem)
-		return -ENOMEM;
+	gobj = amdgpu_amdkfd_import(adev_to_drm(adev), dma_buf);
+	if (IS_ERR(gobj)) {
+		ret = PTR_ERR(gobj);
+		goto err_import;
+	}
 
-	ret = drm_vma_node_allow(&obj->vma_node, drm_priv);
-	if (ret) {
-		kfree(mem);
-		return ret;
+	bo = gem_to_amdgpu_bo(gobj);
+	bo->flags |= AMDGPU_GEM_CREATE_PREEMPTIBLE;
+
+	*mem = kzalloc(sizeof(struct kgd_mem), GFP_KERNEL);
+	if (!*mem) {
+		ret = -ENOMEM;
+		goto err_alloc_mem;
 	}
 
 	if (size)
-		*size = amdgpu_bo_size(bo);
+		*size = amdgpu_bo_size(imported_bo);
 
 	if (mmap_offset)
-		*mmap_offset = amdgpu_bo_mmap_offset(bo);
+		*mmap_offset = amdgpu_bo_mmap_offset(imported_bo);
 
 	INIT_LIST_HEAD(&(*mem)->attachments);
 	mutex_init(&(*mem)->lock);
 
 	(*mem)->alloc_flags =
-		((bo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM) ?
+		((imported_bo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM) ?
 		KFD_IOC_ALLOC_MEM_FLAGS_VRAM : KFD_IOC_ALLOC_MEM_FLAGS_GTT)
 		| KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE
 		| KFD_IOC_ALLOC_MEM_FLAGS_EXECUTABLE;
 
-	drm_gem_object_get(&bo->tbo.base);
+	get_dma_buf(dma_buf);
+	(*mem)->dmabuf = dma_buf;
 	(*mem)->bo = bo;
 	(*mem)->va = va;
-	(*mem)->domain = (bo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM) ?
-		AMDGPU_GEM_DOMAIN_VRAM : AMDGPU_GEM_DOMAIN_GTT;
+	(*mem)->domain = AMDGPU_GEM_DOMAIN_GTT;
 	(*mem)->mapped_to_gpu_memory = 0;
 	(*mem)->process_info = avm->process_info;
 	add_kgd_mem_to_kfd_bo_list(*mem, avm->process_info, false);
 	amdgpu_sync_create(&(*mem)->sync);
 	(*mem)->is_imported = true;
+	bo->kfd_bo = *mem;
 
 	return 0;
+err_import:
+	drm_vma_node_revoke(&imported_gobj->vma_node, drm_priv);
+err_alloc_mem:
+	drm_gem_object_put(gobj);
+	return ret;
 }
 
 /* Evict a userptr BO by stopping the queues if necessary
-- 
2.25.1