[PATCH 3/4] drm/amdkfd: Add DMABuf import functionality
Kuehling, Felix
Felix.Kuehling at amd.com
Wed Nov 21 03:07:27 UTC 2018
This is used for interoperability between ROCm compute and graphics
APIs. It allows importing graphics driver BOs into the ROCm SVM
address space for zero-copy GPU access.
The API is split into two steps (query and import) to allow user mode
to manage the virtual address space allocation for the imported buffer.
Signed-off-by: Felix Kuehling <Felix.Kuehling at amd.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | 57 +++++++++++
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h | 11 +++
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 55 +++++++++++
drivers/gpu/drm/amd/amdgpu/amdgpu_gem.h | 2 +
drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c | 4 +-
drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 118 ++++++++++++++++++++++-
drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 1 +
drivers/gpu/drm/amd/amdkfd/kfd_topology.c | 18 ++++
include/uapi/linux/kfd_ioctl.h | 26 ++++-
9 files changed, 287 insertions(+), 5 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
index 274099a..44b8a12 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
@@ -26,6 +26,7 @@
#include "amdgpu.h"
#include "amdgpu_gfx.h"
#include <linux/module.h>
+#include <linux/dma-buf.h>
const struct kgd2kfd_calls *kgd2kfd;
@@ -444,6 +445,62 @@ void amdgpu_amdkfd_get_cu_info(struct kgd_dev *kgd, struct kfd_cu_info *cu_info)
cu_info->lds_size = acu_info.lds_size;
}
+int amdgpu_amdkfd_get_dmabuf_info(struct kgd_dev *kgd, int dma_buf_fd,
+ struct kgd_dev **dma_buf_kgd,
+ uint64_t *bo_size, void *metadata_buffer,
+ size_t buffer_size, uint32_t *metadata_size,
+ uint32_t *flags)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
+ struct dma_buf *dma_buf;
+ struct drm_gem_object *obj;
+ struct amdgpu_bo *bo;
+ uint64_t metadata_flags;
+ int r = -EINVAL;
+
+ dma_buf = dma_buf_get(dma_buf_fd);
+ if (IS_ERR(dma_buf))
+ return PTR_ERR(dma_buf);
+
+ if (dma_buf->ops != &amdgpu_dmabuf_ops)
+ /* Can't handle non-graphics buffers */
+ goto out_put;
+
+ obj = dma_buf->priv;
+ if (obj->dev->driver != adev->ddev->driver)
+ /* Can't handle buffers from different drivers */
+ goto out_put;
+
+ adev = obj->dev->dev_private;
+ bo = gem_to_amdgpu_bo(obj);
+ if (!(bo->preferred_domains & (AMDGPU_GEM_DOMAIN_VRAM |
+ AMDGPU_GEM_DOMAIN_GTT)))
+ /* Only VRAM and GTT BOs are supported */
+ goto out_put;
+
+ r = 0;
+ if (dma_buf_kgd)
+ *dma_buf_kgd = (struct kgd_dev *)adev;
+ if (bo_size)
+ *bo_size = amdgpu_bo_size(bo);
+ if (metadata_size)
+ *metadata_size = bo->metadata_size;
+ if (metadata_buffer)
+ r = amdgpu_bo_get_metadata(bo, metadata_buffer, buffer_size,
+ metadata_size, &metadata_flags);
+ if (flags) {
+ *flags = (bo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM) ?
+ ALLOC_MEM_FLAGS_VRAM : ALLOC_MEM_FLAGS_GTT;
+
+ if (bo->flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED)
+ *flags |= ALLOC_MEM_FLAGS_PUBLIC;
+ }
+
+out_put:
+ dma_buf_put(dma_buf);
+ return r;
+}
+
uint64_t amdgpu_amdkfd_get_vram_usage(struct kgd_dev *kgd)
{
struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
index 4d766cb..1a84fe2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
@@ -150,6 +150,11 @@ uint64_t amdgpu_amdkfd_get_gpu_clock_counter(struct kgd_dev *kgd);
uint32_t amdgpu_amdkfd_get_max_engine_clock_in_mhz(struct kgd_dev *kgd);
void amdgpu_amdkfd_get_cu_info(struct kgd_dev *kgd, struct kfd_cu_info *cu_info);
+int amdgpu_amdkfd_get_dmabuf_info(struct kgd_dev *kgd, int dma_buf_fd,
+ struct kgd_dev **dmabuf_kgd,
+ uint64_t *bo_size, void *metadata_buffer,
+ size_t buffer_size, uint32_t *metadata_size,
+ uint32_t *flags);
uint64_t amdgpu_amdkfd_get_vram_usage(struct kgd_dev *kgd);
uint64_t amdgpu_amdkfd_get_hive_id(struct kgd_dev *kgd);
@@ -201,6 +206,12 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *process_info,
int amdgpu_amdkfd_gpuvm_get_vm_fault_info(struct kgd_dev *kgd,
struct kfd_vm_fault_info *info);
+int amdgpu_amdkfd_gpuvm_import_dmabuf(struct kgd_dev *kgd,
+ struct dma_buf *dmabuf,
+ uint64_t va, void *vm,
+ struct kgd_mem **mem, uint64_t *size,
+ uint64_t *mmap_offset);
+
void amdgpu_amdkfd_gpuvm_init_mem_limits(void);
void amdgpu_amdkfd_unreserve_memory_limit(struct amdgpu_bo *bo);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index fca8788..14f76765 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -25,6 +25,7 @@
#include <linux/list.h>
#include <linux/pagemap.h>
#include <linux/sched/mm.h>
+#include <linux/dma-buf.h>
#include <drm/drmP.h>
#include "amdgpu_object.h"
#include "amdgpu_vm.h"
@@ -1664,6 +1665,60 @@ int amdgpu_amdkfd_gpuvm_get_vm_fault_info(struct kgd_dev *kgd,
return 0;
}
+int amdgpu_amdkfd_gpuvm_import_dmabuf(struct kgd_dev *kgd,
+ struct dma_buf *dma_buf,
+ uint64_t va, void *vm,
+ struct kgd_mem **mem, uint64_t *size,
+ uint64_t *mmap_offset)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
+ struct drm_gem_object *obj;
+ struct amdgpu_bo *bo;
+ struct amdgpu_vm *avm = (struct amdgpu_vm *)vm;
+
+ if (dma_buf->ops != &amdgpu_dmabuf_ops)
+ /* Can't handle non-graphics buffers */
+ return -EINVAL;
+
+ obj = dma_buf->priv;
+ if (obj->dev->dev_private != adev)
+ /* Can't handle buffers from other devices */
+ return -EINVAL;
+
+ bo = gem_to_amdgpu_bo(obj);
+ if (!(bo->preferred_domains & (AMDGPU_GEM_DOMAIN_VRAM |
+ AMDGPU_GEM_DOMAIN_GTT)))
+ /* Only VRAM and GTT BOs are supported */
+ return -EINVAL;
+
+ *mem = kzalloc(sizeof(struct kgd_mem), GFP_KERNEL);
+ if (!*mem)
+ return -ENOMEM;
+
+ if (size)
+ *size = amdgpu_bo_size(bo);
+
+ if (mmap_offset)
+ *mmap_offset = amdgpu_bo_mmap_offset(bo);
+
+ INIT_LIST_HEAD(&(*mem)->bo_va_list);
+ mutex_init(&(*mem)->lock);
+ (*mem)->mapping_flags =
+ AMDGPU_VM_PAGE_READABLE | AMDGPU_VM_PAGE_WRITEABLE |
+ AMDGPU_VM_PAGE_EXECUTABLE | AMDGPU_VM_MTYPE_NC;
+
+ (*mem)->bo = amdgpu_bo_ref(bo);
+ (*mem)->va = va;
+ (*mem)->domain = (bo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM) ?
+ AMDGPU_GEM_DOMAIN_VRAM : AMDGPU_GEM_DOMAIN_GTT;
+ (*mem)->mapped_to_gpu_memory = 0;
+ (*mem)->process_info = avm->process_info;
+ add_kgd_mem_to_kfd_bo_list(*mem, avm->process_info, false);
+ amdgpu_sync_create(&(*mem)->sync);
+
+ return 0;
+}
+
/* Evict a userptr BO by stopping the queues if necessary
*
* Runs in MMU notifier, may be in RECLAIM_FS context. This means it
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.h
index d63daba..f1ddfc5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.h
@@ -54,6 +54,8 @@ void *amdgpu_gem_prime_vmap(struct drm_gem_object *obj);
void amdgpu_gem_prime_vunmap(struct drm_gem_object *obj, void *vaddr);
int amdgpu_gem_prime_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma);
+extern const struct dma_buf_ops amdgpu_dmabuf_ops;
+
/*
* GEM objects.
*/
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
index e45e929..2b31f1b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
@@ -39,8 +39,6 @@
#include <drm/amdgpu_drm.h>
#include <linux/dma-buf.h>
-static const struct dma_buf_ops amdgpu_dmabuf_ops;
-
/**
* amdgpu_gem_prime_get_sg_table - &drm_driver.gem_prime_get_sg_table
* implementation
@@ -332,7 +330,7 @@ static int amdgpu_gem_begin_cpu_access(struct dma_buf *dma_buf,
return ret;
}
-static const struct dma_buf_ops amdgpu_dmabuf_ops = {
+const struct dma_buf_ops amdgpu_dmabuf_ops = {
.attach = amdgpu_gem_map_attach,
.detach = amdgpu_gem_map_detach,
.map_dma_buf = drm_gem_map_dma_buf,
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index 5f4062b..ae3ae0f 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -33,6 +33,7 @@
#include <linux/time.h>
#include <linux/mm.h>
#include <linux/mman.h>
+#include <linux/dma-buf.h>
#include <asm/processor.h>
#include "kfd_priv.h"
#include "kfd_device_queue_manager.h"
@@ -1550,6 +1551,115 @@ static int kfd_ioctl_unmap_memory_from_gpu(struct file *filep,
return err;
}
+static int kfd_ioctl_get_dmabuf_info(struct file *filep,
+ struct kfd_process *p, void *data)
+{
+ struct kfd_ioctl_get_dmabuf_info_args *args = data;
+ struct kfd_dev *dev = NULL;
+ struct kgd_dev *dma_buf_kgd;
+ void *metadata_buffer = NULL;
+ uint32_t flags;
+ unsigned int i;
+ int r;
+
+ /* Find a KFD GPU device that supports the get_dmabuf_info query */
+ for (i = 0; kfd_topology_enum_kfd_devices(i, &dev) == 0; i++)
+ if (dev)
+ break;
+ if (!dev)
+ return -EINVAL;
+
+ if (args->metadata_ptr) {
+ metadata_buffer = kzalloc(args->metadata_size, GFP_KERNEL);
+ if (!metadata_buffer)
+ return -ENOMEM;
+ }
+
+ /* Get dmabuf info from KGD */
+ r = amdgpu_amdkfd_get_dmabuf_info(dev->kgd, args->dmabuf_fd,
+ &dma_buf_kgd, &args->size,
+ metadata_buffer, args->metadata_size,
+ &args->metadata_size, &flags);
+ if (r)
+ goto exit;
+
+ /* Reverse-lookup gpu_id from kgd pointer */
+ dev = kfd_device_by_kgd(dma_buf_kgd);
+ if (!dev) {
+ r = -EINVAL;
+ goto exit;
+ }
+ args->gpu_id = dev->id;
+ args->flags = flags;
+
+ /* Copy metadata buffer to user mode */
+ if (metadata_buffer) {
+ r = copy_to_user((void __user *)args->metadata_ptr,
+ metadata_buffer, args->metadata_size);
+ if (r != 0)
+ r = -EFAULT;
+ }
+
+exit:
+ kfree(metadata_buffer);
+
+ return r;
+}
+
+static int kfd_ioctl_import_dmabuf(struct file *filep,
+ struct kfd_process *p, void *data)
+{
+ struct kfd_ioctl_import_dmabuf_args *args = data;
+ struct kfd_process_device *pdd;
+ struct dma_buf *dmabuf;
+ struct kfd_dev *dev;
+ int idr_handle;
+ uint64_t size;
+ void *mem;
+ int r;
+
+ dev = kfd_device_by_id(args->gpu_id);
+ if (!dev)
+ return -EINVAL;
+
+ dmabuf = dma_buf_get(args->dmabuf_fd);
+ if (!dmabuf)
+ return -EINVAL;
+
+ mutex_lock(&p->mutex);
+
+ pdd = kfd_bind_process_to_device(dev, p);
+ if (IS_ERR(pdd)) {
+ r = PTR_ERR(pdd);
+ goto err_unlock;
+ }
+
+ r = amdgpu_amdkfd_gpuvm_import_dmabuf(dev->kgd, dmabuf,
+ args->va_addr, pdd->vm,
+ (struct kgd_mem **)&mem, &size,
+ NULL);
+ if (r)
+ goto err_unlock;
+
+ idr_handle = kfd_process_device_create_obj_handle(pdd, mem);
+ if (idr_handle < 0) {
+ r = -EFAULT;
+ goto err_free;
+ }
+
+ mutex_unlock(&p->mutex);
+
+ args->handle = MAKE_HANDLE(args->gpu_id, idr_handle);
+
+ return 0;
+
+err_free:
+ amdgpu_amdkfd_gpuvm_free_memory_of_gpu(dev->kgd, (struct kgd_mem *)mem);
+err_unlock:
+ mutex_unlock(&p->mutex);
+ return r;
+}
+
#define AMDKFD_IOCTL_DEF(ioctl, _func, _flags) \
[_IOC_NR(ioctl)] = {.cmd = ioctl, .func = _func, .flags = _flags, \
.cmd_drv = 0, .name = #ioctl}
@@ -1635,7 +1745,13 @@ static const struct amdkfd_ioctl_desc amdkfd_ioctls[] = {
kfd_ioctl_set_cu_mask, 0),
AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_QUEUE_WAVE_STATE,
- kfd_ioctl_get_queue_wave_state, 0)
+ kfd_ioctl_get_queue_wave_state, 0),
+
+ AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_DMABUF_INFO,
+ kfd_ioctl_get_dmabuf_info, 0),
+
+ AMDKFD_IOCTL_DEF(AMDKFD_IOC_IMPORT_DMABUF,
+ kfd_ioctl_import_dmabuf, 0),
};
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index dec8e64..0689d4c 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -793,6 +793,7 @@ struct kfd_topology_device *kfd_topology_device_by_proximity_domain(
struct kfd_topology_device *kfd_topology_device_by_id(uint32_t gpu_id);
struct kfd_dev *kfd_device_by_id(uint32_t gpu_id);
struct kfd_dev *kfd_device_by_pci_dev(const struct pci_dev *pdev);
+struct kfd_dev *kfd_device_by_kgd(const struct kgd_dev *kgd);
int kfd_topology_enum_kfd_devices(uint8_t idx, struct kfd_dev **kdev);
int kfd_numa_node_to_apic_id(int numa_node_id);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
index c5ed21e..5f5b2ac 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
@@ -111,6 +111,24 @@ struct kfd_dev *kfd_device_by_pci_dev(const struct pci_dev *pdev)
return device;
}
+struct kfd_dev *kfd_device_by_kgd(const struct kgd_dev *kgd)
+{
+ struct kfd_topology_device *top_dev;
+ struct kfd_dev *device = NULL;
+
+ down_read(&topology_lock);
+
+ list_for_each_entry(top_dev, &topology_device_list, list)
+ if (top_dev->gpu && top_dev->gpu->kgd == kgd) {
+ device = top_dev->gpu;
+ break;
+ }
+
+ up_read(&topology_lock);
+
+ return device;
+}
+
/* Called with write topology_lock acquired */
static void kfd_release_topology_device(struct kfd_topology_device *dev)
{
diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h
index b01eb50..e622fd1 100644
--- a/include/uapi/linux/kfd_ioctl.h
+++ b/include/uapi/linux/kfd_ioctl.h
@@ -398,6 +398,24 @@ struct kfd_ioctl_unmap_memory_from_gpu_args {
__u32 n_success; /* to/from KFD */
};
+struct kfd_ioctl_get_dmabuf_info_args {
+ __u64 size; /* from KFD */
+ __u64 metadata_ptr; /* to KFD */
+ __u32 metadata_size; /* to KFD (space allocated by user)
+ * from KFD (actual metadata size)
+ */
+ __u32 gpu_id; /* from KFD */
+ __u32 flags; /* from KFD (KFD_IOC_ALLOC_MEM_FLAGS) */
+ __u32 dmabuf_fd; /* to KFD */
+};
+
+struct kfd_ioctl_import_dmabuf_args {
+ __u64 va_addr; /* to KFD */
+ __u64 handle; /* from KFD */
+ __u32 gpu_id; /* to KFD */
+ __u32 dmabuf_fd; /* to KFD */
+};
+
#define AMDKFD_IOCTL_BASE 'K'
#define AMDKFD_IO(nr) _IO(AMDKFD_IOCTL_BASE, nr)
#define AMDKFD_IOR(nr, type) _IOR(AMDKFD_IOCTL_BASE, nr, type)
@@ -486,7 +504,13 @@ struct kfd_ioctl_unmap_memory_from_gpu_args {
#define AMDKFD_IOC_GET_QUEUE_WAVE_STATE \
AMDKFD_IOWR(0x1B, struct kfd_ioctl_get_queue_wave_state_args)
+#define AMDKFD_IOC_GET_DMABUF_INFO \
+ AMDKFD_IOWR(0x1C, struct kfd_ioctl_get_dmabuf_info_args)
+
+#define AMDKFD_IOC_IMPORT_DMABUF \
+ AMDKFD_IOWR(0x1D, struct kfd_ioctl_import_dmabuf_args)
+
#define AMDKFD_COMMAND_START 0x01
-#define AMDKFD_COMMAND_END 0x1C
+#define AMDKFD_COMMAND_END 0x1E
#endif
--
2.7.4
More information about the amd-gfx
mailing list