[PATCH 2/3] drm/amdkfd: refine the gfx BO based dmabuf handling

Mon Jul 25 14:20:17 UTC 2022

Am 2022-07-25 um 06:32 schrieb Lang Yu:
> We have memory leak issue in current implenmention, i.e.,
> the allocated struct kgd_mem memory is not handled properly.
>
> The idea is always creating a buffer object when importing
> a gfx BO based dmabuf.
>
> Signed-off-by: Lang Yu <Lang.Yu at amd.com>
> ---
>   .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c  | 99 +++++++++++++------
>   1 file changed, 70 insertions(+), 29 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> index b3806ebe5ef7..c1855b72a3f0 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> @@ -225,7 +225,8 @@ void amdgpu_amdkfd_release_notify(struct amdgpu_bo *bo)
>   	u32 alloc_flags = bo->kfd_bo->alloc_flags;
>   	u64 size = amdgpu_bo_size(bo);
>   
> -	unreserve_mem_limit(adev, size, alloc_flags);
> +	if (!bo->kfd_bo->is_imported)
> +		unreserve_mem_limit(adev, size, alloc_flags);
>   
>   	kfree(bo->kfd_bo);
>   }
> @@ -784,6 +785,24 @@ kfd_mem_dmaunmap_attachment(struct kgd_mem *mem,
>   	}
>   }
>   
> +static struct drm_gem_object*
> +amdgpu_amdkfd_import(struct drm_device *dev, struct dma_buf *dma_buf)
> +{
> +	struct drm_gem_object *gobj;
> +	struct amdgpu_bo *abo;
> +
> +	if (dma_buf->ops == &amdgpu_dmabuf_ops) {

I'd rather remove this limitation. We should be able to use any DMABuf 
with KFD. This check was added when we basically sidestepped all the 
dmabuf attachment code and just extracted the amdgpu_bo ourselves. I 
don't think we want to keep doing that.

Please see my patch "[PATCH v2 1/2] drm/amdgpu: Generalize KFD dmabuf 
import" sent to amd-gfx on March 16. I'm about to send an updated 
version of this as part of upstream RDMA support soon.

Regards,
   Felix

> +		gobj = dma_buf->priv;
> +		abo = gem_to_amdgpu_bo(gobj);
> +		if (gobj->dev == dev && abo->kfd_bo) {
> +			drm_gem_object_get(gobj);
> +			return gobj;
> +		}
> +	}
> +
> +	return amdgpu_dma_buf_create_obj_and_attach(dev, dma_buf);
> +}
> +
>   static int
>   kfd_mem_attach_dmabuf(struct amdgpu_device *adev, struct kgd_mem *mem,
>   		      struct amdgpu_bo **bo)
> @@ -802,7 +821,7 @@ kfd_mem_attach_dmabuf(struct amdgpu_device *adev, struct kgd_mem *mem,
>   		}
>   	}
>   
> -	gobj = amdgpu_gem_prime_import(adev_to_drm(adev), mem->dmabuf);
> +	gobj = amdgpu_amdkfd_import(adev_to_drm(adev), mem->dmabuf);
>   	if (IS_ERR(gobj))
>   		return PTR_ERR(gobj);
>   
> @@ -1805,12 +1824,13 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
>   {
>   	struct amdkfd_process_info *process_info = mem->process_info;
>   	unsigned long bo_size = mem->bo->tbo.base.size;
> +	bool is_imported = false;
> +	struct drm_gem_object *imported_gobj;
>   	struct kfd_mem_attachment *entry, *tmp;
>   	struct bo_vm_reservation_context ctx;
>   	struct ttm_validate_buffer *bo_list_entry;
>   	unsigned int mapped_to_gpu_memory;
>   	int ret;
> -	bool is_imported = false;
>   
>   	mutex_lock(&mem->lock);
>   
> @@ -1885,7 +1905,13 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
>   	}
>   
>   	/* Free the BO*/
> -	drm_vma_node_revoke(&mem->bo->tbo.base.vma_node, drm_priv);
> +	if (!is_imported) {
> +		drm_vma_node_revoke(&mem->bo->tbo.base.vma_node, drm_priv);
> +	} else {
> +		imported_gobj = mem->dmabuf->priv;
> +		drm_vma_node_revoke(&imported_gobj->vma_node, drm_priv);
> +	}
> +
>   	if (mem->dmabuf)
>   		dma_buf_put(mem->dmabuf);
>   	mutex_destroy(&mem->lock);
> @@ -2249,62 +2275,77 @@ int amdgpu_amdkfd_gpuvm_import_dmabuf(struct amdgpu_device *adev,
>   				      uint64_t *mmap_offset)
>   {
>   	struct amdgpu_vm *avm = drm_priv_to_vm(drm_priv);
> -	struct drm_gem_object *obj;
> -	struct amdgpu_bo *bo;
> +	struct drm_gem_object *imported_gobj, *gobj;
> +	struct amdgpu_bo *imported_bo, *bo;
>   	int ret;
>   
> -	if (dma_buf->ops != &amdgpu_dmabuf_ops)
> -		/* Can't handle non-graphics buffers */
> +	/*
> +	 * Can't handle non-graphics buffers and
> +	 * buffers from other devices
> +	 */
> +	imported_gobj = dma_buf->priv;
> +	if (dma_buf->ops != &amdgpu_dmabuf_ops ||
> +	    drm_to_adev(imported_gobj->dev) != adev)
>   		return -EINVAL;
>   
> -	obj = dma_buf->priv;
> -	if (drm_to_adev(obj->dev) != adev)
> -		/* Can't handle buffers from other devices */
> +	/* Only VRAM and GTT BOs are supported */
> +	imported_bo = gem_to_amdgpu_bo(imported_gobj);
> +	if (!(imported_bo->preferred_domains &
> +	    (AMDGPU_GEM_DOMAIN_VRAM | AMDGPU_GEM_DOMAIN_GTT)))
>   		return -EINVAL;
>   
> -	bo = gem_to_amdgpu_bo(obj);
> -	if (!(bo->preferred_domains & (AMDGPU_GEM_DOMAIN_VRAM |
> -				    AMDGPU_GEM_DOMAIN_GTT)))
> -		/* Only VRAM and GTT BOs are supported */
> -		return -EINVAL;
> +	ret = drm_vma_node_allow(&imported_gobj->vma_node, drm_priv);
> +	if (ret)
> +		return ret;
>   
> -	*mem = kzalloc(sizeof(struct kgd_mem), GFP_KERNEL);
> -	if (!*mem)
> -		return -ENOMEM;
> +	gobj = amdgpu_amdkfd_import(adev_to_drm(adev), dma_buf);
> +	if (IS_ERR(gobj)) {
> +		ret = PTR_ERR(gobj);
> +		goto err_import;
> +	}
>   
> -	ret = drm_vma_node_allow(&obj->vma_node, drm_priv);
> -	if (ret) {
> -		kfree(mem);
> -		return ret;
> +	bo = gem_to_amdgpu_bo(gobj);
> +	bo->flags |= AMDGPU_GEM_CREATE_PREEMPTIBLE;
> +
> +	*mem = kzalloc(sizeof(struct kgd_mem), GFP_KERNEL);
> +	if (!*mem) {
> +		ret = -ENOMEM;
> +		goto err_alloc_mem;
>   	}
>   
>   	if (size)
> -		*size = amdgpu_bo_size(bo);
> +		*size = amdgpu_bo_size(imported_bo);
>   
>   	if (mmap_offset)
> -		*mmap_offset = amdgpu_bo_mmap_offset(bo);
> +		*mmap_offset = amdgpu_bo_mmap_offset(imported_bo);
>   
>   	INIT_LIST_HEAD(&(*mem)->attachments);
>   	mutex_init(&(*mem)->lock);
>   
>   	(*mem)->alloc_flags =
> -		((bo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM) ?
> +		((imported_bo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM) ?
>   		KFD_IOC_ALLOC_MEM_FLAGS_VRAM : KFD_IOC_ALLOC_MEM_FLAGS_GTT)
>   		| KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE
>   		| KFD_IOC_ALLOC_MEM_FLAGS_EXECUTABLE;
>   
> -	drm_gem_object_get(&bo->tbo.base);
> +	get_dma_buf(dma_buf);
> +	(*mem)->dmabuf = dma_buf;
>   	(*mem)->bo = bo;
>   	(*mem)->va = va;
> -	(*mem)->domain = (bo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM) ?
> -		AMDGPU_GEM_DOMAIN_VRAM : AMDGPU_GEM_DOMAIN_GTT;
> +	(*mem)->domain = AMDGPU_GEM_DOMAIN_GTT;
>   	(*mem)->mapped_to_gpu_memory = 0;
>   	(*mem)->process_info = avm->process_info;
>   	add_kgd_mem_to_kfd_bo_list(*mem, avm->process_info, false);
>   	amdgpu_sync_create(&(*mem)->sync);
>   	(*mem)->is_imported = true;
> +	bo->kfd_bo = *mem;
>   
>   	return 0;
> +err_import:
> +	drm_vma_node_revoke(&imported_gobj->vma_node, drm_priv);
> +err_alloc_mem:
> +	drm_gem_object_put(gobj);
> +	return ret;
>   }
>   
>   /* Evict a userptr BO by stopping the queues if necessary