[PATCH 1/1] drm/amdgpu: Fix dangling kfd_bo pointer for shared BOs
Christian König
ckoenig.leichtzumerken at gmail.com
Fri Nov 5 07:11:55 UTC 2021
Am 05.11.21 um 00:05 schrieb Felix Kuehling:
> If a kfd_bo was shared (e.g. a dmabuf export), the original kfd_bo may be
> freed when the amdgpu_bo still lives on. Free the kfd_bo struct in the
> release_notify callback then the amdgpu_bo is freed.
>
> Signed-off-by: Felix Kuehling <Felix.Kuehling at amd.com>
Reviewed-by: Christian König <christian.koenig at amd.com>
> ---
> drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h | 4 ++--
> drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 12 +++++++++---
> drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 2 +-
> 3 files changed, 12 insertions(+), 6 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
> index 4accd584886b..5f658823a637 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
> @@ -307,7 +307,7 @@ void amdgpu_amdkfd_ras_poison_consumption_handler(struct amdgpu_device *adev);
> void amdgpu_amdkfd_gpuvm_init_mem_limits(void);
> void amdgpu_amdkfd_gpuvm_destroy_cb(struct amdgpu_device *adev,
> struct amdgpu_vm *vm);
> -void amdgpu_amdkfd_unreserve_memory_limit(struct amdgpu_bo *bo);
> +void amdgpu_amdkfd_release_notify(struct amdgpu_bo *bo);
> void amdgpu_amdkfd_reserve_system_mem(uint64_t size);
> #else
> static inline
> @@ -322,7 +322,7 @@ void amdgpu_amdkfd_gpuvm_destroy_cb(struct amdgpu_device *adev,
> }
>
> static inline
> -void amdgpu_amdkfd_unreserve_memory_limit(struct amdgpu_bo *bo)
> +void amdgpu_amdkfd_release_notify(struct amdgpu_bo *bo)
> {
> }
> #endif
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> index 5174762f0b46..94fccf0b47ad 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> @@ -201,7 +201,7 @@ static void unreserve_mem_limit(struct amdgpu_device *adev,
> spin_unlock(&kfd_mem_limit.mem_limit_lock);
> }
>
> -void amdgpu_amdkfd_unreserve_memory_limit(struct amdgpu_bo *bo)
> +void amdgpu_amdkfd_release_notify(struct amdgpu_bo *bo)
> {
> struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
> u32 domain = bo->preferred_domains;
> @@ -213,6 +213,8 @@ void amdgpu_amdkfd_unreserve_memory_limit(struct amdgpu_bo *bo)
> }
>
> unreserve_mem_limit(adev, amdgpu_bo_size(bo), domain, sg);
> +
> + kfree(bo->kfd_bo);
> }
>
>
> @@ -1599,9 +1601,13 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
> drm_vma_node_revoke(&mem->bo->tbo.base.vma_node, drm_priv);
> if (mem->dmabuf)
> dma_buf_put(mem->dmabuf);
> - drm_gem_object_put(&mem->bo->tbo.base);
> mutex_destroy(&mem->lock);
> - kfree(mem);
> +
> + /* If this releases the last reference, it will end up calling
> + * amdgpu_amdkfd_release_notify and kfree the mem struct. That's why
> + * this needs to be the last call here.
> + */
> + drm_gem_object_put(&mem->bo->tbo.base);
>
> return ret;
> }
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> index 6b25982a9077..156002db24e1 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> @@ -1279,7 +1279,7 @@ void amdgpu_bo_release_notify(struct ttm_buffer_object *bo)
> abo = ttm_to_amdgpu_bo(bo);
>
> if (abo->kfd_bo)
> - amdgpu_amdkfd_unreserve_memory_limit(abo);
> + amdgpu_amdkfd_release_notify(abo);
>
> /* We only remove the fence if the resv has individualized. */
> WARN_ON_ONCE(bo->type == ttm_bo_type_kernel
More information about the amd-gfx
mailing list