[PATCH 4/6] drm/amdkfd: Export DMABufs from KFD using GEM handles

Errabolu, Ramesh Ramesh.Errabolu at amd.com
Wed Nov 8 02:08:57 UTC 2023


[AMD Official Use Only - General]

Reviewed-by: Ramesh Errabolu <Ramesh.Errabolu at amd.com>

-----Original Message-----
From: Kuehling, Felix <Felix.Kuehling at amd.com>
Sent: Wednesday, November 8, 2023 1:27 AM
To: Errabolu, Ramesh <Ramesh.Errabolu at amd.com>; amd-gfx at lists.freedesktop.org
Cc: Koenig, Christian <Christian.Koenig at amd.com>; Chen, Xiaogang <Xiaogang.Chen at amd.com>
Subject: Re: [PATCH 4/6] drm/amdkfd: Export DMABufs from KFD using GEM handles


On 2023-11-07 14:44, Errabolu, Ramesh wrote:
> [AMD Official Use Only - General]
>
> My queries inline.
>
> Regards,
> Ramesh
>
> -----Original Message-----
> From: Kuehling, Felix <Felix.Kuehling at amd.com>
> Sent: Tuesday, November 7, 2023 10:28 PM
> To: amd-gfx at lists.freedesktop.org
> Cc: Koenig, Christian <Christian.Koenig at amd.com>; Chen, Xiaogang
> <Xiaogang.Chen at amd.com>; Errabolu, Ramesh <Ramesh.Errabolu at amd.com>
> Subject: [PATCH 4/6] drm/amdkfd: Export DMABufs from KFD using GEM
> handles
>
> Create GEM handles for exporting DMABufs using GEM-Prime APIs. The GEM handles are created in a drm_client_dev context to avoid exposing them in user mode contexts through a DMABuf import.
>
> Signed-off-by: Felix Kuehling <Felix.Kuehling at amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c    | 11 +++++++
>   drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h    |  5 +++
>   .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c  | 33 +++++++++++++++----
>   drivers/gpu/drm/amd/amdkfd/kfd_chardev.c      |  4 +--
>   4 files changed, 44 insertions(+), 9 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
> index 6ab17330a6ed..b0a67f16540a 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
> @@ -142,6 +142,7 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)  {
>          int i;
>          int last_valid_bit;
> +       int ret;
>
>          amdgpu_amdkfd_gpuvm_init_mem_limits();
>
> @@ -160,6 +161,12 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)
>                          .enable_mes = adev->enable_mes,
>                  };
>
> +               ret = drm_client_init(&adev->ddev, &adev->kfd.client, "kfd", NULL);
> +               if (ret) {
> +                       dev_err(adev->dev, "Failed to init DRM client: %d\n", ret);
> +                       return;
> +               }
> +
>                  /* this is going to have a few of the MSBs set that we need to
>                   * clear
>                   */
> @@ -198,6 +205,10 @@ void amdgpu_amdkfd_device_init(struct
> amdgpu_device *adev)
>
>                  adev->kfd.init_complete = kgd2kfd_device_init(adev->kfd.dev,
>
> &gpu_resources);
> +               if (adev->kfd.init_complete)
> +                       drm_client_register(&adev->kfd.client);
> +               else
> +                       drm_client_release(&adev->kfd.client);
>
>                  amdgpu_amdkfd_total_mem_size +=
> adev->gmc.real_vram_size;
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
> index 68d534a89942..4caf8cece028 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
> @@ -32,6 +32,7 @@
>   #include <linux/mmu_notifier.h>
>   #include <linux/memremap.h>
>   #include <kgd_kfd_interface.h>
> +#include <drm/drm_client.h>
>   #include <drm/ttm/ttm_execbuf_util.h>
>   #include "amdgpu_sync.h"
>   #include "amdgpu_vm.h"
> @@ -84,6 +85,7 @@ struct kgd_mem {
>
>          struct amdgpu_sync sync;
>
> +       uint32_t gem_handle;
>          bool aql_queue;
>          bool is_imported;
>   };
> @@ -106,6 +108,9 @@ struct amdgpu_kfd_dev {
>
>          /* HMM page migration MEMORY_DEVICE_PRIVATE mapping */
>          struct dev_pagemap pgmap;
> +
> +       /* Client for KFD BO GEM handle allocations */
> +       struct drm_client_dev client;
>   };
>
>   enum kgd_engine_type {
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> index 0c1cb6048259..4bb8b5fd7598 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> @@ -25,6 +25,7 @@
>   #include <linux/pagemap.h>
>   #include <linux/sched/mm.h>
>   #include <linux/sched/task.h>
> +#include <linux/fdtable.h>
>   #include <drm/ttm/ttm_tt.h>
>
>   #include "amdgpu_object.h"
> @@ -804,13 +805,22 @@ kfd_mem_dmaunmap_attachment(struct kgd_mem *mem,  static int kfd_mem_export_dmabuf(struct kgd_mem *mem)  {
>          if (!mem->dmabuf) {
> -               struct dma_buf *ret = amdgpu_gem_prime_export(
> -                       &mem->bo->tbo.base,
> +               struct amdgpu_device *bo_adev;
> +               struct dma_buf *dmabuf;
> +               int r, fd;
> +
> +               bo_adev = amdgpu_ttm_adev(mem->bo->tbo.bdev);
> +               r = drm_gem_prime_handle_to_fd(&bo_adev->ddev, bo_adev->kfd.client.file,
> +                                              mem->gem_handle,
>                          mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE ?
> -                               DRM_RDWR : 0);
> -               if (IS_ERR(ret))
> -                       return PTR_ERR(ret);
> -               mem->dmabuf = ret;
> +                                              DRM_RDWR : 0, &fd);
> +               if (r)
> +                       return r;
> +               dmabuf = dma_buf_get(fd);
> +               close_fd(fd);
> +               if (WARN_ON_ONCE(IS_ERR(dmabuf)))
> +                       return PTR_ERR(dmabuf);
> +               mem->dmabuf = dmabuf;
>          }
>
>          return 0;
> @@ -1826,6 +1836,9 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
>                  pr_debug("Failed to allow vma node access. ret %d\n", ret);
>                  goto err_node_allow;
>          }
> +       ret = drm_gem_handle_create(adev->kfd.client.file, gobj, &(*mem)->gem_handle);
> +       if (ret)
> +               goto err_gem_handle_create;
>          bo = gem_to_amdgpu_bo(gobj);
>          if (bo_type == ttm_bo_type_sg) {
>                  bo->tbo.sg = sg;
> @@ -1877,6 +1890,8 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
>   err_pin_bo:
>   err_validate_bo:
>          remove_kgd_mem_from_kfd_bo_list(*mem, avm->process_info);
> +       drm_gem_handle_delete(adev->kfd.client.file,
> +(*mem)->gem_handle);
> +err_gem_handle_create:
>          drm_vma_node_revoke(&gobj->vma_node, drm_priv);
>   err_node_allow:
>          /* Don't unreserve system mem limit twice */ @@ -1991,8
> +2006,12 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
>
>          /* Free the BO*/
>          drm_vma_node_revoke(&mem->bo->tbo.base.vma_node, drm_priv);
> -       if (mem->dmabuf)
> +       if (!mem->is_imported)
> +               drm_gem_handle_delete(adev->kfd.client.file, mem->gem_handle);
> +       if (mem->dmabuf) {
>                  dma_buf_put(mem->dmabuf);
> +               mem->dmabuf = NULL;
> +       }
>          mutex_destroy(&mem->lock);
>
> Ramesh: What happens if user invokes "free" using KFD IOCTL while the BO is imported and mapped on the device using DRM IOCTLs. Could it lead to inconsistent state?. For example the call drm_gem_handle_delete() will remove Dmabuf, close the GEM object. If user invokes free() using KFD Apis then the underlying object could be removed. It is not clear if this impacts devices that have just imported.

Reference counting should prevent any such problems. Both the dmabuf itself or the import in a DRM device hold a reference to the underlying GEM object. Using the KFD free ioctl only drops the KFD reference to the GEM object and cleans up the handle and kgd_mem object in KFD.

When the BO gets imported in a DRM render node, that creates a new GEM handle in that context.

Regards,
   Felix


>
>          /* If this releases the last reference, it will end up
> calling diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> index 06988cf1db51..4417a9863cd0 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> @@ -1855,8 +1855,8 @@ static uint32_t get_process_num_bos(struct kfd_process *p)
>          return num_of_bos;
>   }
>
> -static int criu_get_prime_handle(struct kgd_mem *mem, int flags,
> -                                     u32 *shared_fd)
> +static int criu_get_prime_handle(struct kgd_mem *mem,
> +                                int flags, u32 *shared_fd)
>   {
>          struct dma_buf *dmabuf;
>          int ret;
> --
> 2.34.1
>


More information about the amd-gfx mailing list