[RFC PATCH] drm/prime: introduce DRM_PRIME_FD_TO_HANDLE_NO_MOVE

Simon Ser contact at emersion.fr
Sun Oct 13 13:34:39 UTC 2024


This is a flag to opt-out of the automagic buffer migration to
system memory when importing a DMA-BUF.

In multi-GPU scenarii, a Wayland client might allocate on any
device. The Wayland compositor receiving the DMA-BUF has no clue
where the buffer has been allocated from. The compositor will
typically try to import the buffer into its "primary" device,
although it would be capable of importing into any DRM device.

This causes issues in case buffer imports implicitly result in
the buffer being moved to system memory. For instance, on a
system with an Intel iGPU and an AMD dGPU, a client rendering
with the dGPU and whose window is displayed on a screen
connected to the dGPU would ideally not need any roundtrip
to the iGPU. However, any attempt at figuring out where the
DMA-BUF could be accessed from will move the buffer into system
memory, degrading performance for the rest of the lifetime of the
buffer.

Describing on which device the buffer has been allocated on is
not enough: on some setups the buffer may have been allocated on
one device but may still be directly accessible without any move
on another device. For instance, on a split render/display system,
a buffer allocated on the display device can be directly rendered
to from the render device.

With this new flag, a compositor can try to import on all DRM
devices without any side effects. If it finds a device which can
access the buffer without a move, it can use that device to render
the buffer. If it doesn't, it can fallback to the previous
behavior: try to import without the flag to the "primary" device,
knowing this could result in a move to system memory.

This is a very incomplete implementation: it just checks whether
the buffer was allocated from the main device it's imported to,
and if not, passes a flag to dma_buf_attach() checked by i915
only. All other drivers need to be wired up, and the DMA-BUF API
changes are just the first thing I could think of and unlikely
to be the best design.

The goal of this RFC is to gather feedback: would the general
idea of the new uAPI addition make sense to you? If so, do you
see a better way to plumb it in the DMA-BUF framework?

Signed-off-by: Simon Ser <contact at emersion.fr>
Cc: Sumit Semwal <sumit.semwal at linaro.org>
Cc: Christian König <christian.koenig at amd.com>
Cc: Daniel Stone <daniel at fooishbar.org>
Cc: Victoria Brekenfeld <victoria at system76.com>
Cc: Michel Dänzer <mdaenzer at redhat.com>
Cc: Xaver Hugl <xaver.hugl at gmail.com>
Cc: Simona Vetter <simona.vetter at ffwll.ch>
Cc: Austin Shafer <ashafer at nvidia.com>
---
 drivers/dma-buf/dma-buf.c                     | 11 ++++++--
 .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c  |  2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c   |  2 +-
 drivers/gpu/drm/drm_prime.c                   | 25 ++++++++++++++-----
 drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c    |  9 +++++++
 drivers/gpu/drm/i915/gem/i915_gem_object.h    |  7 ++++++
 drivers/gpu/drm/xe/xe_dma_buf.c               |  2 +-
 drivers/infiniband/core/umem_dmabuf.c         |  3 ++-
 include/drm/drm_prime.h                       |  9 ++++---
 include/linux/dma-buf.h                       |  6 ++++-
 include/uapi/drm/drm.h                        |  3 ++-
 11 files changed, 62 insertions(+), 17 deletions(-)

diff --git a/drivers/dma-buf/dma-buf.c b/drivers/dma-buf/dma-buf.c
index fbbb750bc5b1..98ff03067fe7 100644
--- a/drivers/dma-buf/dma-buf.c
+++ b/drivers/dma-buf/dma-buf.c
@@ -902,7 +902,7 @@ static struct sg_table * __map_dma_buf(struct dma_buf_attachment *attach,
 struct dma_buf_attachment *
 dma_buf_dynamic_attach(struct dma_buf *dmabuf, struct device *dev,
 		       const struct dma_buf_attach_ops *importer_ops,
-		       void *importer_priv)
+		       void *importer_priv, int flags)
 {
 	struct dma_buf_attachment *attach;
 	int ret;
@@ -925,6 +925,13 @@ dma_buf_dynamic_attach(struct dma_buf *dmabuf, struct device *dev,
 	attach->importer_priv = importer_priv;
 
 	if (dmabuf->ops->attach) {
+		if (DMA_BUF_ATTACH_NO_MOVE &&
+		    (!dmabuf->ops->attach_needs_move ||
+		      dmabuf->ops->attach_needs_move(dmabuf, attach))) {
+			ret = -EINVAL;
+			goto err_attach;
+		}
+
 		ret = dmabuf->ops->attach(dmabuf, attach);
 		if (ret)
 			goto err_attach;
@@ -989,7 +996,7 @@ EXPORT_SYMBOL_NS_GPL(dma_buf_dynamic_attach, DMA_BUF);
 struct dma_buf_attachment *dma_buf_attach(struct dma_buf *dmabuf,
 					  struct device *dev)
 {
-	return dma_buf_dynamic_attach(dmabuf, dev, NULL, NULL);
+	return dma_buf_dynamic_attach(dmabuf, dev, NULL, NULL, 0);
 }
 EXPORT_SYMBOL_NS_GPL(dma_buf_attach, DMA_BUF);
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index ce5ca304dba9..4059c9e114e1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -2452,7 +2452,7 @@ int amdgpu_amdkfd_gpuvm_import_dmabuf_fd(struct amdgpu_device *adev, int fd,
 	int ret;
 
 	ret = drm_gem_prime_fd_to_handle(&adev->ddev, adev->kfd.client.file, fd,
-					 &handle);
+					 &handle, 0);
 	if (ret)
 		return ret;
 	obj = drm_gem_object_lookup(adev->kfd.client.file, handle);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
index 8e81a83d37d8..ccda002b98f9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
@@ -430,7 +430,7 @@ struct drm_gem_object *amdgpu_gem_prime_import(struct drm_device *dev,
 		return obj;
 
 	attach = dma_buf_dynamic_attach(dma_buf, dev->dev,
-					&amdgpu_dma_buf_attach_ops, obj);
+					&amdgpu_dma_buf_attach_ops, obj, 0);
 	if (IS_ERR(attach)) {
 		drm_gem_object_put(obj);
 		return ERR_CAST(attach);
diff --git a/drivers/gpu/drm/drm_prime.c b/drivers/gpu/drm/drm_prime.c
index 0e3f8adf162f..7207273c04fa 100644
--- a/drivers/gpu/drm/drm_prime.c
+++ b/drivers/gpu/drm/drm_prime.c
@@ -294,7 +294,7 @@ EXPORT_SYMBOL(drm_gem_dmabuf_release);
  */
 int drm_gem_prime_fd_to_handle(struct drm_device *dev,
 			       struct drm_file *file_priv, int prime_fd,
-			       uint32_t *handle)
+			       uint32_t *handle, uint32_t flags)
 {
 	struct dma_buf *dma_buf;
 	struct drm_gem_object *obj;
@@ -314,9 +314,10 @@ int drm_gem_prime_fd_to_handle(struct drm_device *dev,
 	/* never seen this one, need to import */
 	mutex_lock(&dev->object_name_lock);
 	if (dev->driver->gem_prime_import)
+		/* TODO: pass flags around */
 		obj = dev->driver->gem_prime_import(dev, dma_buf);
 	else
-		obj = drm_gem_prime_import(dev, dma_buf);
+		obj = drm_gem_prime_import(dev, dma_buf, flags);
 	if (IS_ERR(obj)) {
 		ret = PTR_ERR(obj);
 		goto out_unlock;
@@ -368,11 +369,13 @@ int drm_prime_fd_to_handle_ioctl(struct drm_device *dev, void *data,
 	struct drm_prime_handle *args = data;
 
 	if (dev->driver->prime_fd_to_handle) {
+		/* TODO: pass around flags */
 		return dev->driver->prime_fd_to_handle(dev, file_priv, args->fd,
 						       &args->handle);
 	}
 
-	return drm_gem_prime_fd_to_handle(dev, file_priv, args->fd, &args->handle);
+	return drm_gem_prime_fd_to_handle(dev, file_priv, args->fd,
+					  &args->handle, args->flags);
 }
 
 static struct dma_buf *export_and_register_object(struct drm_device *dev,
@@ -636,6 +639,13 @@ void drm_gem_map_detach(struct dma_buf *dma_buf,
 }
 EXPORT_SYMBOL(drm_gem_map_detach);
 
+static int drm_gem_attach_needs_move(struct dma_buf *dma_buf,
+				     struct dma_buf_attachment *attach)
+{
+	/* TODO: is this correct for all drm_gem_prime_dmabuf_ops users? */
+	return 0;
+}
+
 /**
  * drm_gem_map_dma_buf - map_dma_buf implementation for GEM
  * @attach: attachment whose scatterlist is to be returned
@@ -813,6 +823,7 @@ static const struct dma_buf_ops drm_gem_prime_dmabuf_ops =  {
 	.cache_sgt_mapping = true,
 	.attach = drm_gem_map_attach,
 	.detach = drm_gem_map_detach,
+	.attach_needs_move = drm_gem_attach_needs_move,
 	.map_dma_buf = drm_gem_map_dma_buf,
 	.unmap_dma_buf = drm_gem_unmap_dma_buf,
 	.release = drm_gem_dmabuf_release,
@@ -933,7 +944,8 @@ EXPORT_SYMBOL(drm_gem_prime_export);
  */
 struct drm_gem_object *drm_gem_prime_import_dev(struct drm_device *dev,
 					    struct dma_buf *dma_buf,
-					    struct device *attach_dev)
+					    struct device *attach_dev,
+					    uint32_t flags)
 {
 	struct dma_buf_attachment *attach;
 	struct sg_table *sgt;
@@ -1002,9 +1014,10 @@ EXPORT_SYMBOL(drm_gem_prime_import_dev);
  * &drm_gem_object_funcs.free hook when using this function.
  */
 struct drm_gem_object *drm_gem_prime_import(struct drm_device *dev,
-					    struct dma_buf *dma_buf)
+					    struct dma_buf *dma_buf,
+					    uint32_t flags)
 {
-	return drm_gem_prime_import_dev(dev, dma_buf, dev->dev);
+	return drm_gem_prime_import_dev(dev, dma_buf, dev->dev, flags);
 }
 EXPORT_SYMBOL(drm_gem_prime_import);
 
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c
index 1df74f7aa3dc..d88a307016a6 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c
@@ -201,9 +201,18 @@ static void i915_gem_dmabuf_detach(struct dma_buf *dmabuf,
 	i915_gem_object_unpin_pages(obj);
 }
 
+static int i915_gem_dmabuf_attach_needs_move(struct dma_buf *dmabuf,
+					     struct dma_buf_attachment *attach)
+{
+	struct drm_i915_gem_object *obj = dma_buf_to_obj(dmabuf);
+
+	return i915_gem_object_get_region_id(obj) != INTEL_REGION_SMEM;
+}
+
 static const struct dma_buf_ops i915_dmabuf_ops =  {
 	.attach = i915_gem_dmabuf_attach,
 	.detach = i915_gem_dmabuf_detach,
+	.attach_needs_move = i915_gem_dmabuf_attach_needs_move,
 	.map_dma_buf = i915_gem_map_dma_buf,
 	.unmap_dma_buf = drm_gem_unmap_dma_buf,
 	.release = drm_gem_dmabuf_release,
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h b/drivers/gpu/drm/i915/gem/i915_gem_object.h
index 3dc61cbd2e11..f61637ada47a 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h
@@ -312,6 +312,13 @@ i915_gem_object_is_framebuffer(const struct drm_i915_gem_object *obj)
 	return READ_ONCE(obj->frontbuffer) || obj->is_dpt;
 }
 
+static inline enum intel_region_id
+i915_gem_object_get_region_id(const struct drm_i915_gem_object *obj)
+{
+	struct intel_memory_region *mr = READ_ONCE(obj->mm.region);
+	return mr->id;
+}
+
 static inline unsigned int
 i915_gem_object_get_tiling(const struct drm_i915_gem_object *obj)
 {
diff --git a/drivers/gpu/drm/xe/xe_dma_buf.c b/drivers/gpu/drm/xe/xe_dma_buf.c
index 68f309f5e981..73bdc804ce0e 100644
--- a/drivers/gpu/drm/xe/xe_dma_buf.c
+++ b/drivers/gpu/drm/xe/xe_dma_buf.c
@@ -296,7 +296,7 @@ struct drm_gem_object *xe_gem_prime_import(struct drm_device *dev,
 		attach_ops = test->attach_ops;
 #endif
 
-	attach = dma_buf_dynamic_attach(dma_buf, dev->dev, attach_ops, &bo->ttm.base);
+	attach = dma_buf_dynamic_attach(dma_buf, dev->dev, attach_ops, &bo->ttm.base, 0);
 	if (IS_ERR(attach)) {
 		obj = ERR_CAST(attach);
 		goto out_err;
diff --git a/drivers/infiniband/core/umem_dmabuf.c b/drivers/infiniband/core/umem_dmabuf.c
index 9fcd37761264..5ec4e90c80fd 100644
--- a/drivers/infiniband/core/umem_dmabuf.c
+++ b/drivers/infiniband/core/umem_dmabuf.c
@@ -159,7 +159,8 @@ ib_umem_dmabuf_get_with_dma_device(struct ib_device *device,
 					dmabuf,
 					dma_device,
 					ops,
-					umem_dmabuf);
+					umem_dmabuf,
+					0);
 	if (IS_ERR(umem_dmabuf->attach)) {
 		ret = ERR_CAST(umem_dmabuf->attach);
 		goto out_free_umem;
diff --git a/include/drm/drm_prime.h b/include/drm/drm_prime.h
index fa085c44d4ca..0dbf41564fdc 100644
--- a/include/drm/drm_prime.h
+++ b/include/drm/drm_prime.h
@@ -68,7 +68,8 @@ struct dma_buf *drm_gem_dmabuf_export(struct drm_device *dev,
 void drm_gem_dmabuf_release(struct dma_buf *dma_buf);
 
 int drm_gem_prime_fd_to_handle(struct drm_device *dev,
-			       struct drm_file *file_priv, int prime_fd, uint32_t *handle);
+			       struct drm_file *file_priv, int prime_fd,
+			       uint32_t *handle, uint32_t flags);
 struct dma_buf *drm_gem_prime_handle_to_dmabuf(struct drm_device *dev,
 			       struct drm_file *file_priv, uint32_t handle,
 			       uint32_t flags);
@@ -102,9 +103,11 @@ unsigned long drm_prime_get_contiguous_size(struct sg_table *sgt);
 /* helper functions for importing */
 struct drm_gem_object *drm_gem_prime_import_dev(struct drm_device *dev,
 						struct dma_buf *dma_buf,
-						struct device *attach_dev);
+						struct device *attach_dev,
+						uint32_t flags);
 struct drm_gem_object *drm_gem_prime_import(struct drm_device *dev,
-					    struct dma_buf *dma_buf);
+					    struct dma_buf *dma_buf,
+					    uint32_t flags);
 
 void drm_prime_gem_destroy(struct drm_gem_object *obj, struct sg_table *sg);
 
diff --git a/include/linux/dma-buf.h b/include/linux/dma-buf.h
index 36216d28d8bd..0fc7e059c934 100644
--- a/include/linux/dma-buf.h
+++ b/include/linux/dma-buf.h
@@ -82,6 +82,8 @@ struct dma_buf_ops {
 	 */
 	void (*detach)(struct dma_buf *, struct dma_buf_attachment *);
 
+	int (*attach_needs_move)(struct dma_buf *, struct dma_buf_attachment *);
+
 	/**
 	 * @pin:
 	 *
@@ -597,12 +599,14 @@ dma_buf_attachment_is_dynamic(struct dma_buf_attachment *attach)
 	return !!attach->importer_ops;
 }
 
+#define DMA_BUF_ATTACH_NO_MOVE (1 << 0)
+
 struct dma_buf_attachment *dma_buf_attach(struct dma_buf *dmabuf,
 					  struct device *dev);
 struct dma_buf_attachment *
 dma_buf_dynamic_attach(struct dma_buf *dmabuf, struct device *dev,
 		       const struct dma_buf_attach_ops *importer_ops,
-		       void *importer_priv);
+		       void *importer_priv, int flags);
 void dma_buf_detach(struct dma_buf *dmabuf,
 		    struct dma_buf_attachment *attach);
 int dma_buf_pin(struct dma_buf_attachment *attach);
diff --git a/include/uapi/drm/drm.h b/include/uapi/drm/drm.h
index 7fba37b94401..9eec1d1c5a14 100644
--- a/include/uapi/drm/drm.h
+++ b/include/uapi/drm/drm.h
@@ -883,10 +883,11 @@ struct drm_set_client_cap {
 
 #define DRM_RDWR O_RDWR
 #define DRM_CLOEXEC O_CLOEXEC
+#define DRM_PRIME_FD_TO_HANDLE_NO_MOVE (1 << 0)
 struct drm_prime_handle {
 	__u32 handle;
 
-	/** Flags.. only applicable for handle->fd */
+	/** Flags */
 	__u32 flags;
 
 	/** Returned dmabuf file descriptor */
-- 
2.47.0




More information about the dri-devel mailing list