[PATCH 2/3] drm/amdkfd: refine the gfx BO based dmabuf handling
Felix Kuehling
felix.kuehling at amd.com
Tue Jul 26 01:48:51 UTC 2022
Am 2022-07-25 um 20:40 schrieb Lang Yu:
> On 07/25/ , Felix Kuehling wrote:
>> Am 2022-07-25 um 20:15 schrieb Lang Yu:
>>> On 07/25/ , Felix Kuehling wrote:
>>>> Am 2022-07-25 um 06:32 schrieb Lang Yu:
>>>>> We have memory leak issue in current implenmention, i.e.,
>>>>> the allocated struct kgd_mem memory is not handled properly.
>>>>>
>>>>> The idea is always creating a buffer object when importing
>>>>> a gfx BO based dmabuf.
>>>>>
>>>>> Signed-off-by: Lang Yu <Lang.Yu at amd.com>
>>>>> ---
>>>>> .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 99 +++++++++++++------
>>>>> 1 file changed, 70 insertions(+), 29 deletions(-)
>>>>>
>>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
>>>>> index b3806ebe5ef7..c1855b72a3f0 100644
>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
>>>>> @@ -225,7 +225,8 @@ void amdgpu_amdkfd_release_notify(struct amdgpu_bo *bo)
>>>>> u32 alloc_flags = bo->kfd_bo->alloc_flags;
>>>>> u64 size = amdgpu_bo_size(bo);
>>>>> - unreserve_mem_limit(adev, size, alloc_flags);
>>>>> + if (!bo->kfd_bo->is_imported)
>>>>> + unreserve_mem_limit(adev, size, alloc_flags);
>>>>> kfree(bo->kfd_bo);
>>>>> }
>>>>> @@ -784,6 +785,24 @@ kfd_mem_dmaunmap_attachment(struct kgd_mem *mem,
>>>>> }
>>>>> }
>>>>> +static struct drm_gem_object*
>>>>> +amdgpu_amdkfd_import(struct drm_device *dev, struct dma_buf *dma_buf)
>>>>> +{
>>>>> + struct drm_gem_object *gobj;
>>>>> + struct amdgpu_bo *abo;
>>>>> +
>>>>> + if (dma_buf->ops == &amdgpu_dmabuf_ops) {
>>>> I'd rather remove this limitation. We should be able to use any DMABuf with
>>>> KFD. This check was added when we basically sidestepped all the dmabuf
>>>> attachment code and just extracted the amdgpu_bo ourselves. I don't think we
>>>> want to keep doing that.
>>> This limitation here is to just reference the gobj if it is an amdgpu bo
>>> and from same device instead of creating a gobj when importing a dmabuf.
>>>
>>>> Please see my patch "[PATCH v2 1/2] drm/amdgpu: Generalize KFD dmabuf
>>>> import" sent to amd-gfx on March 16. I'm about to send an updated version of
>>>> this as part of upstream RDMA support soon.
>>> The "drm/amdgpu: Generalize KFD dmabuf import" doesn't handle the struct kgd_mem
>>> memory leak issue. Looking forward to the updated version. Thanks!
>> Maybe we're trying to fix different problems. Can you give a more detailed
>> explanation of the memory leak you're seeing? It's not obvious to me.
> The struct kgd_mem is allocted by "*mem = kzalloc(sizeof(struct kgd_mem), GFP_KERNEL);",
> but it is not assigned to bo->kfd_bo like this "bo->kfd_bo = *mem;". Then *mem will
> never be freed.
True. With the current upstream driver there is no way this can happen,
because we don't have an API for KFD to export a dmabuf in a way that
could later be imported. But with the RDMA and IPC features we're
working on, this becomes a real possibility.
Your solution is to ensure that the dmabuf import always creates a new
amdgpu_bo. But that can add a lot of overhead. How about this idea: In
amdgpu_amdkfd_gpuvm_free_memory_of_gpu we could add this after
drm_gem_object_put:
if (mem->bo->kfd_bo != mem)
kfree(mem);
That way amdgpu_amdkfd_release_notify would only be responsible for
freeing the original kgd_mem. Any imports will be freed explicitly.
Regards,
Felix
>
> Regards,
> Lang
>
>> The problem I'm trying to solve is, to ensure that each exported BO only has
>> a single dmabuf because we run into problems with GEM if we have multiple
>> dmabufs pointing to the same GEM object. That also enables re-exporting
>> dmabufs of imported BOs. At the same time I'm removing any limitations of
>> the types of BOs we can import, and trying to eliminate any custom dmabuf
>> handling in KFD.
>>
>> Regards,
>> Felix
>>
>>
>>> Regards,
>>> Lang
>>>
>>>> Regards,
>>>> Felix
>>>>
>>>>
>>>>> + gobj = dma_buf->priv;
>>>>> + abo = gem_to_amdgpu_bo(gobj);
>>>>> + if (gobj->dev == dev && abo->kfd_bo) {
>>>>> + drm_gem_object_get(gobj);
>>>>> + return gobj;
>>>>> + }
>>>>> + }
>>>>> +
>>>>> + return amdgpu_dma_buf_create_obj_and_attach(dev, dma_buf);
>>>>> +}
>>>>> +
>>>>> static int
>>>>> kfd_mem_attach_dmabuf(struct amdgpu_device *adev, struct kgd_mem *mem,
>>>>> struct amdgpu_bo **bo)
>>>>> @@ -802,7 +821,7 @@ kfd_mem_attach_dmabuf(struct amdgpu_device *adev, struct kgd_mem *mem,
>>>>> }
>>>>> }
>>>>> - gobj = amdgpu_gem_prime_import(adev_to_drm(adev), mem->dmabuf);
>>>>> + gobj = amdgpu_amdkfd_import(adev_to_drm(adev), mem->dmabuf);
>>>>> if (IS_ERR(gobj))
>>>>> return PTR_ERR(gobj);
>>>>> @@ -1805,12 +1824,13 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
>>>>> {
>>>>> struct amdkfd_process_info *process_info = mem->process_info;
>>>>> unsigned long bo_size = mem->bo->tbo.base.size;
>>>>> + bool is_imported = false;
>>>>> + struct drm_gem_object *imported_gobj;
>>>>> struct kfd_mem_attachment *entry, *tmp;
>>>>> struct bo_vm_reservation_context ctx;
>>>>> struct ttm_validate_buffer *bo_list_entry;
>>>>> unsigned int mapped_to_gpu_memory;
>>>>> int ret;
>>>>> - bool is_imported = false;
>>>>> mutex_lock(&mem->lock);
>>>>> @@ -1885,7 +1905,13 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
>>>>> }
>>>>> /* Free the BO*/
>>>>> - drm_vma_node_revoke(&mem->bo->tbo.base.vma_node, drm_priv);
>>>>> + if (!is_imported) {
>>>>> + drm_vma_node_revoke(&mem->bo->tbo.base.vma_node, drm_priv);
>>>>> + } else {
>>>>> + imported_gobj = mem->dmabuf->priv;
>>>>> + drm_vma_node_revoke(&imported_gobj->vma_node, drm_priv);
>>>>> + }
>>>>> +
>>>>> if (mem->dmabuf)
>>>>> dma_buf_put(mem->dmabuf);
>>>>> mutex_destroy(&mem->lock);
>>>>> @@ -2249,62 +2275,77 @@ int amdgpu_amdkfd_gpuvm_import_dmabuf(struct amdgpu_device *adev,
>>>>> uint64_t *mmap_offset)
>>>>> {
>>>>> struct amdgpu_vm *avm = drm_priv_to_vm(drm_priv);
>>>>> - struct drm_gem_object *obj;
>>>>> - struct amdgpu_bo *bo;
>>>>> + struct drm_gem_object *imported_gobj, *gobj;
>>>>> + struct amdgpu_bo *imported_bo, *bo;
>>>>> int ret;
>>>>> - if (dma_buf->ops != &amdgpu_dmabuf_ops)
>>>>> - /* Can't handle non-graphics buffers */
>>>>> + /*
>>>>> + * Can't handle non-graphics buffers and
>>>>> + * buffers from other devices
>>>>> + */
>>>>> + imported_gobj = dma_buf->priv;
>>>>> + if (dma_buf->ops != &amdgpu_dmabuf_ops ||
>>>>> + drm_to_adev(imported_gobj->dev) != adev)
>>>>> return -EINVAL;
>>>>> - obj = dma_buf->priv;
>>>>> - if (drm_to_adev(obj->dev) != adev)
>>>>> - /* Can't handle buffers from other devices */
>>>>> + /* Only VRAM and GTT BOs are supported */
>>>>> + imported_bo = gem_to_amdgpu_bo(imported_gobj);
>>>>> + if (!(imported_bo->preferred_domains &
>>>>> + (AMDGPU_GEM_DOMAIN_VRAM | AMDGPU_GEM_DOMAIN_GTT)))
>>>>> return -EINVAL;
>>>>> - bo = gem_to_amdgpu_bo(obj);
>>>>> - if (!(bo->preferred_domains & (AMDGPU_GEM_DOMAIN_VRAM |
>>>>> - AMDGPU_GEM_DOMAIN_GTT)))
>>>>> - /* Only VRAM and GTT BOs are supported */
>>>>> - return -EINVAL;
>>>>> + ret = drm_vma_node_allow(&imported_gobj->vma_node, drm_priv);
>>>>> + if (ret)
>>>>> + return ret;
>>>>> - *mem = kzalloc(sizeof(struct kgd_mem), GFP_KERNEL);
>>>>> - if (!*mem)
>>>>> - return -ENOMEM;
>>>>> + gobj = amdgpu_amdkfd_import(adev_to_drm(adev), dma_buf);
>>>>> + if (IS_ERR(gobj)) {
>>>>> + ret = PTR_ERR(gobj);
>>>>> + goto err_import;
>>>>> + }
>>>>> - ret = drm_vma_node_allow(&obj->vma_node, drm_priv);
>>>>> - if (ret) {
>>>>> - kfree(mem);
>>>>> - return ret;
>>>>> + bo = gem_to_amdgpu_bo(gobj);
>>>>> + bo->flags |= AMDGPU_GEM_CREATE_PREEMPTIBLE;
>>>>> +
>>>>> + *mem = kzalloc(sizeof(struct kgd_mem), GFP_KERNEL);
>>>>> + if (!*mem) {
>>>>> + ret = -ENOMEM;
>>>>> + goto err_alloc_mem;
>>>>> }
>>>>> if (size)
>>>>> - *size = amdgpu_bo_size(bo);
>>>>> + *size = amdgpu_bo_size(imported_bo);
>>>>> if (mmap_offset)
>>>>> - *mmap_offset = amdgpu_bo_mmap_offset(bo);
>>>>> + *mmap_offset = amdgpu_bo_mmap_offset(imported_bo);
>>>>> INIT_LIST_HEAD(&(*mem)->attachments);
>>>>> mutex_init(&(*mem)->lock);
>>>>> (*mem)->alloc_flags =
>>>>> - ((bo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM) ?
>>>>> + ((imported_bo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM) ?
>>>>> KFD_IOC_ALLOC_MEM_FLAGS_VRAM : KFD_IOC_ALLOC_MEM_FLAGS_GTT)
>>>>> | KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE
>>>>> | KFD_IOC_ALLOC_MEM_FLAGS_EXECUTABLE;
>>>>> - drm_gem_object_get(&bo->tbo.base);
>>>>> + get_dma_buf(dma_buf);
>>>>> + (*mem)->dmabuf = dma_buf;
>>>>> (*mem)->bo = bo;
>>>>> (*mem)->va = va;
>>>>> - (*mem)->domain = (bo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM) ?
>>>>> - AMDGPU_GEM_DOMAIN_VRAM : AMDGPU_GEM_DOMAIN_GTT;
>>>>> + (*mem)->domain = AMDGPU_GEM_DOMAIN_GTT;
>>>>> (*mem)->mapped_to_gpu_memory = 0;
>>>>> (*mem)->process_info = avm->process_info;
>>>>> add_kgd_mem_to_kfd_bo_list(*mem, avm->process_info, false);
>>>>> amdgpu_sync_create(&(*mem)->sync);
>>>>> (*mem)->is_imported = true;
>>>>> + bo->kfd_bo = *mem;
>>>>> return 0;
>>>>> +err_import:
>>>>> + drm_vma_node_revoke(&imported_gobj->vma_node, drm_priv);
>>>>> +err_alloc_mem:
>>>>> + drm_gem_object_put(gobj);
>>>>> + return ret;
>>>>> }
>>>>> /* Evict a userptr BO by stopping the queues if necessary
More information about the amd-gfx
mailing list