[PATCH 2/3] drm/amdkfd: refine the gfx BO based dmabuf handling

Tue Jul 26 00:32:14 UTC 2022

Am 2022-07-25 um 20:15 schrieb Lang Yu:
> On 07/25/ , Felix Kuehling wrote:
>> Am 2022-07-25 um 06:32 schrieb Lang Yu:
>>> We have memory leak issue in current implenmention, i.e.,
>>> the allocated struct kgd_mem memory is not handled properly.
>>>
>>> The idea is always creating a buffer object when importing
>>> a gfx BO based dmabuf.
>>>
>>> Signed-off-by: Lang Yu <Lang.Yu at amd.com>
>>> ---
>>>    .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c  | 99 +++++++++++++------
>>>    1 file changed, 70 insertions(+), 29 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
>>> index b3806ebe5ef7..c1855b72a3f0 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
>>> @@ -225,7 +225,8 @@ void amdgpu_amdkfd_release_notify(struct amdgpu_bo *bo)
>>>    	u32 alloc_flags = bo->kfd_bo->alloc_flags;
>>>    	u64 size = amdgpu_bo_size(bo);
>>> -	unreserve_mem_limit(adev, size, alloc_flags);
>>> +	if (!bo->kfd_bo->is_imported)
>>> +		unreserve_mem_limit(adev, size, alloc_flags);
>>>    	kfree(bo->kfd_bo);
>>>    }
>>> @@ -784,6 +785,24 @@ kfd_mem_dmaunmap_attachment(struct kgd_mem *mem,
>>>    	}
>>>    }
>>> +static struct drm_gem_object*
>>> +amdgpu_amdkfd_import(struct drm_device *dev, struct dma_buf *dma_buf)
>>> +{
>>> +	struct drm_gem_object *gobj;
>>> +	struct amdgpu_bo *abo;
>>> +
>>> +	if (dma_buf->ops == &amdgpu_dmabuf_ops) {
>> I'd rather remove this limitation. We should be able to use any DMABuf with
>> KFD. This check was added when we basically sidestepped all the dmabuf
>> attachment code and just extracted the amdgpu_bo ourselves. I don't think we
>> want to keep doing that.
> This limitation here is to just reference the gobj if it is an amdgpu bo
> and from same device instead of creating a gobj when importing a dmabuf.
>
>> Please see my patch "[PATCH v2 1/2] drm/amdgpu: Generalize KFD dmabuf
>> import" sent to amd-gfx on March 16. I'm about to send an updated version of
>> this as part of upstream RDMA support soon.
> The "drm/amdgpu: Generalize KFD dmabuf import" doesn't handle the struct kgd_mem
> memory leak issue. Looking forward to the updated version. Thanks!

Maybe we're trying to fix different problems. Can you give a more 
detailed explanation of the memory leak you're seeing? It's not obvious 
to me.

The problem I'm trying to solve is, to ensure that each exported BO only 
has a single dmabuf because we run into problems with GEM if we have 
multiple dmabufs pointing to the same GEM object. That also enables 
re-exporting dmabufs of imported BOs. At the same time I'm removing any 
limitations of the types of BOs we can import, and trying to eliminate 
any custom dmabuf handling in KFD.

Regards,
   Felix

>
> Regards,
> Lang
>
>> Regards,
>>    Felix
>>
>>
>>> +		gobj = dma_buf->priv;
>>> +		abo = gem_to_amdgpu_bo(gobj);
>>> +		if (gobj->dev == dev && abo->kfd_bo) {
>>> +			drm_gem_object_get(gobj);
>>> +			return gobj;
>>> +		}
>>> +	}
>>> +
>>> +	return amdgpu_dma_buf_create_obj_and_attach(dev, dma_buf);
>>> +}
>>> +
>>>    static int
>>>    kfd_mem_attach_dmabuf(struct amdgpu_device *adev, struct kgd_mem *mem,
>>>    		      struct amdgpu_bo **bo)
>>> @@ -802,7 +821,7 @@ kfd_mem_attach_dmabuf(struct amdgpu_device *adev, struct kgd_mem *mem,
>>>    		}
>>>    	}
>>> -	gobj = amdgpu_gem_prime_import(adev_to_drm(adev), mem->dmabuf);
>>> +	gobj = amdgpu_amdkfd_import(adev_to_drm(adev), mem->dmabuf);
>>>    	if (IS_ERR(gobj))
>>>    		return PTR_ERR(gobj);
>>> @@ -1805,12 +1824,13 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
>>>    {
>>>    	struct amdkfd_process_info *process_info = mem->process_info;
>>>    	unsigned long bo_size = mem->bo->tbo.base.size;
>>> +	bool is_imported = false;
>>> +	struct drm_gem_object *imported_gobj;
>>>    	struct kfd_mem_attachment *entry, *tmp;
>>>    	struct bo_vm_reservation_context ctx;
>>>    	struct ttm_validate_buffer *bo_list_entry;
>>>    	unsigned int mapped_to_gpu_memory;
>>>    	int ret;
>>> -	bool is_imported = false;
>>>    	mutex_lock(&mem->lock);
>>> @@ -1885,7 +1905,13 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
>>>    	}
>>>    	/* Free the BO*/
>>> -	drm_vma_node_revoke(&mem->bo->tbo.base.vma_node, drm_priv);
>>> +	if (!is_imported) {
>>> +		drm_vma_node_revoke(&mem->bo->tbo.base.vma_node, drm_priv);
>>> +	} else {
>>> +		imported_gobj = mem->dmabuf->priv;
>>> +		drm_vma_node_revoke(&imported_gobj->vma_node, drm_priv);
>>> +	}
>>> +
>>>    	if (mem->dmabuf)
>>>    		dma_buf_put(mem->dmabuf);
>>>    	mutex_destroy(&mem->lock);
>>> @@ -2249,62 +2275,77 @@ int amdgpu_amdkfd_gpuvm_import_dmabuf(struct amdgpu_device *adev,
>>>    				      uint64_t *mmap_offset)
>>>    {
>>>    	struct amdgpu_vm *avm = drm_priv_to_vm(drm_priv);
>>> -	struct drm_gem_object *obj;
>>> -	struct amdgpu_bo *bo;
>>> +	struct drm_gem_object *imported_gobj, *gobj;
>>> +	struct amdgpu_bo *imported_bo, *bo;
>>>    	int ret;
>>> -	if (dma_buf->ops != &amdgpu_dmabuf_ops)
>>> -		/* Can't handle non-graphics buffers */
>>> +	/*
>>> +	 * Can't handle non-graphics buffers and
>>> +	 * buffers from other devices
>>> +	 */
>>> +	imported_gobj = dma_buf->priv;
>>> +	if (dma_buf->ops != &amdgpu_dmabuf_ops ||
>>> +	    drm_to_adev(imported_gobj->dev) != adev)
>>>    		return -EINVAL;
>>> -	obj = dma_buf->priv;
>>> -	if (drm_to_adev(obj->dev) != adev)
>>> -		/* Can't handle buffers from other devices */
>>> +	/* Only VRAM and GTT BOs are supported */
>>> +	imported_bo = gem_to_amdgpu_bo(imported_gobj);
>>> +	if (!(imported_bo->preferred_domains &
>>> +	    (AMDGPU_GEM_DOMAIN_VRAM | AMDGPU_GEM_DOMAIN_GTT)))
>>>    		return -EINVAL;
>>> -	bo = gem_to_amdgpu_bo(obj);
>>> -	if (!(bo->preferred_domains & (AMDGPU_GEM_DOMAIN_VRAM |
>>> -				    AMDGPU_GEM_DOMAIN_GTT)))
>>> -		/* Only VRAM and GTT BOs are supported */
>>> -		return -EINVAL;
>>> +	ret = drm_vma_node_allow(&imported_gobj->vma_node, drm_priv);
>>> +	if (ret)
>>> +		return ret;
>>> -	*mem = kzalloc(sizeof(struct kgd_mem), GFP_KERNEL);
>>> -	if (!*mem)
>>> -		return -ENOMEM;
>>> +	gobj = amdgpu_amdkfd_import(adev_to_drm(adev), dma_buf);
>>> +	if (IS_ERR(gobj)) {
>>> +		ret = PTR_ERR(gobj);
>>> +		goto err_import;
>>> +	}
>>> -	ret = drm_vma_node_allow(&obj->vma_node, drm_priv);
>>> -	if (ret) {
>>> -		kfree(mem);
>>> -		return ret;
>>> +	bo = gem_to_amdgpu_bo(gobj);
>>> +	bo->flags |= AMDGPU_GEM_CREATE_PREEMPTIBLE;
>>> +
>>> +	*mem = kzalloc(sizeof(struct kgd_mem), GFP_KERNEL);
>>> +	if (!*mem) {
>>> +		ret = -ENOMEM;
>>> +		goto err_alloc_mem;
>>>    	}
>>>    	if (size)
>>> -		*size = amdgpu_bo_size(bo);
>>> +		*size = amdgpu_bo_size(imported_bo);
>>>    	if (mmap_offset)
>>> -		*mmap_offset = amdgpu_bo_mmap_offset(bo);
>>> +		*mmap_offset = amdgpu_bo_mmap_offset(imported_bo);
>>>    	INIT_LIST_HEAD(&(*mem)->attachments);
>>>    	mutex_init(&(*mem)->lock);
>>>    	(*mem)->alloc_flags =
>>> -		((bo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM) ?
>>> +		((imported_bo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM) ?
>>>    		KFD_IOC_ALLOC_MEM_FLAGS_VRAM : KFD_IOC_ALLOC_MEM_FLAGS_GTT)
>>>    		| KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE
>>>    		| KFD_IOC_ALLOC_MEM_FLAGS_EXECUTABLE;
>>> -	drm_gem_object_get(&bo->tbo.base);
>>> +	get_dma_buf(dma_buf);
>>> +	(*mem)->dmabuf = dma_buf;
>>>    	(*mem)->bo = bo;
>>>    	(*mem)->va = va;
>>> -	(*mem)->domain = (bo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM) ?
>>> -		AMDGPU_GEM_DOMAIN_VRAM : AMDGPU_GEM_DOMAIN_GTT;
>>> +	(*mem)->domain = AMDGPU_GEM_DOMAIN_GTT;
>>>    	(*mem)->mapped_to_gpu_memory = 0;
>>>    	(*mem)->process_info = avm->process_info;
>>>    	add_kgd_mem_to_kfd_bo_list(*mem, avm->process_info, false);
>>>    	amdgpu_sync_create(&(*mem)->sync);
>>>    	(*mem)->is_imported = true;
>>> +	bo->kfd_bo = *mem;
>>>    	return 0;
>>> +err_import:
>>> +	drm_vma_node_revoke(&imported_gobj->vma_node, drm_priv);
>>> +err_alloc_mem:
>>> +	drm_gem_object_put(gobj);
>>> +	return ret;
>>>    }
>>>    /* Evict a userptr BO by stopping the queues if necessary