[PATCH v4] dma-buf: Add ioctls to allow userspace to flush

Tiago Vignatti tiago.vignatti at intel.com
Tue Aug 25 17:02:55 PDT 2015


From: Daniel Vetter <daniel.vetter at ffwll.ch>

The userspace might need some sort of cache coherency management e.g. when CPU
and GPU domains are being accessed through dma-buf at the same time. To
circumvent this problem there are begin/end coherency markers, that forward
directly to existing dma-buf device drivers vfunc hooks. Userspace can make use
of those markers through the DMA_BUF_IOCTL_SYNC ioctl. The sequence would be
used like following:

  - mmap dma-buf fd
  - for each drawing/upload cycle in CPU
    1. SYNC_START ioctl
    2. read/write to mmap area or a 2d sub-region of it
    3. SYNC_END ioctl.
  - munamp once you don't need the buffer any more

v2 (Tiago): Fix header file type names (u64 -> __u64)
v3 (Tiago): Add documentation. Use enum dma_buf_sync_flags to the begin/end
dma-buf functions. Check for overflows in start/length.
v4 (Tiago): use 2d regions for sync.

Cc: Sumit Semwal <sumit.semwal at linaro.org>
Signed-off-by: Daniel Vetter <daniel.vetter at intel.com>
Signed-off-by: Tiago Vignatti <tiago.vignatti at intel.com>
---

I'm unable to test the 2d sync properly, because begin/end access in i915
don't track mapped range for nothing.

 Documentation/dma-buf-sharing.txt      | 13 ++++++
 drivers/dma-buf/dma-buf.c              | 77 ++++++++++++++++++++++++++++------
 drivers/gpu/drm/i915/i915_gem_dmabuf.c |  6 ++-
 include/linux/dma-buf.h                | 20 +++++----
 include/uapi/linux/dma-buf.h           | 57 +++++++++++++++++++++++++
 5 files changed, 150 insertions(+), 23 deletions(-)
 create mode 100644 include/uapi/linux/dma-buf.h

diff --git a/Documentation/dma-buf-sharing.txt b/Documentation/dma-buf-sharing.txt
index 480c8de..8061ac0 100644
--- a/Documentation/dma-buf-sharing.txt
+++ b/Documentation/dma-buf-sharing.txt
@@ -355,6 +355,19 @@ Being able to mmap an export dma-buf buffer object has 2 main use-cases:
 
    No special interfaces, userspace simply calls mmap on the dma-buf fd.
 
+   Also, the userspace might need some sort of cache coherency management e.g.
+   when CPU and GPU domains are being accessed through dma-buf at the same
+   time. To circumvent this problem there are begin/end coherency markers, that
+   forward directly to existing dma-buf device drivers vfunc hooks. Userspace
+   can make use of those markers through the DMA_BUF_IOCTL_SYNC ioctl. The
+   sequence would be used like following:
+     - mmap dma-buf fd
+     - for each drawing/upload cycle in CPU
+       1. SYNC_START ioctl
+       2. read/write to mmap area or a 2d sub-region of it
+       3. SYNC_END ioctl.
+     - munamp once you don't need the buffer any more
+
 2. Supporting existing mmap interfaces in importers
 
    Similar to the motivation for kernel cpu access it is again important that
diff --git a/drivers/dma-buf/dma-buf.c b/drivers/dma-buf/dma-buf.c
index 155c146..b6a4a06 100644
--- a/drivers/dma-buf/dma-buf.c
+++ b/drivers/dma-buf/dma-buf.c
@@ -251,11 +251,55 @@ out:
 	return events;
 }
 
+static long dma_buf_ioctl(struct file *file,
+			  unsigned int cmd, unsigned long arg)
+{
+	struct dma_buf *dmabuf;
+	struct dma_buf_sync sync;
+	enum dma_data_direction direction;
+
+	dmabuf = file->private_data;
+
+	if (!is_dma_buf_file(file))
+		return -EINVAL;
+
+	if (cmd != DMA_BUF_IOCTL_SYNC)
+		return -ENOTTY;
+
+	if (copy_from_user(&sync, (void __user *) arg, sizeof(sync)))
+		return -EFAULT;
+
+	if (sync.flags & DMA_BUF_SYNC_RW)
+		direction = DMA_BIDIRECTIONAL;
+	else if (sync.flags & DMA_BUF_SYNC_READ)
+		direction = DMA_FROM_DEVICE;
+	else if (sync.flags & DMA_BUF_SYNC_WRITE)
+		direction = DMA_TO_DEVICE;
+	else
+		return -EINVAL;
+
+	if (sync.flags & ~DMA_BUF_SYNC_VALID_FLAGS_MASK)
+		return -EINVAL;
+
+	/* TODO: check for overflowing the buffer's size - how so, checking region by
+	 * region here? Maybe need to check for the other parameters as well. */
+
+	if (sync.flags & DMA_BUF_SYNC_END)
+		dma_buf_end_cpu_access(dmabuf, sync.stride_bytes, sync.bytes_per_pixel,
+				sync.num_regions, sync.regions, direction);
+	else
+		dma_buf_begin_cpu_access(dmabuf, sync.stride_bytes, sync.bytes_per_pixel,
+				sync.num_regions, sync.regions, direction);
+
+	return 0;
+}
+
 static const struct file_operations dma_buf_fops = {
 	.release	= dma_buf_release,
 	.mmap		= dma_buf_mmap_internal,
 	.llseek		= dma_buf_llseek,
 	.poll		= dma_buf_poll,
+	.unlocked_ioctl	= dma_buf_ioctl,
 };
 
 /*
@@ -539,14 +583,17 @@ EXPORT_SYMBOL_GPL(dma_buf_unmap_attachment);
  * preparations. Coherency is only guaranteed in the specified range for the
  * specified access direction.
  * @dmabuf:	[in]	buffer to prepare cpu access for.
- * @start:	[in]	start of range for cpu access.
- * @len:	[in]	length of range for cpu access.
- * @direction:	[in]	length of range for cpu access.
+ * @stride_bytes:	[in]	stride in bytes for cpu access.
+ * @bytes_per_pixel:	[in]	bytes per pixel of the region for cpu access.
+ * @num_regions:   [in]  number of regions.
+ * @region:   [in] vector of 2-dimensional regions for cpu access.
+ * @direction:	[in]	direction of range for cpu access.
  *
  * Can return negative error values, returns 0 on success.
  */
-int dma_buf_begin_cpu_access(struct dma_buf *dmabuf, size_t start, size_t len,
-			     enum dma_data_direction direction)
+int dma_buf_begin_cpu_access(struct dma_buf *dmabuf, size_t stride_bytes,
+	size_t bytes_per_pixel, size_t num_regions, struct dma_buf_sync_region regions[],
+	enum dma_data_direction direction)
 {
 	int ret = 0;
 
@@ -554,8 +601,8 @@ int dma_buf_begin_cpu_access(struct dma_buf *dmabuf, size_t start, size_t len,
 		return -EINVAL;
 
 	if (dmabuf->ops->begin_cpu_access)
-		ret = dmabuf->ops->begin_cpu_access(dmabuf, start,
-							len, direction);
+		ret = dmabuf->ops->begin_cpu_access(dmabuf, stride_bytes, bytes_per_pixel,
+							num_regions, regions, direction);
 
 	return ret;
 }
@@ -567,19 +614,23 @@ EXPORT_SYMBOL_GPL(dma_buf_begin_cpu_access);
  * actions. Coherency is only guaranteed in the specified range for the
  * specified access direction.
  * @dmabuf:	[in]	buffer to complete cpu access for.
- * @start:	[in]	start of range for cpu access.
- * @len:	[in]	length of range for cpu access.
- * @direction:	[in]	length of range for cpu access.
+ * @stride_bytes:	[in]	stride in bytes for cpu access.
+ * @bytes_per_pixel:	[in]	bytes per pixel of the region for cpu access.
+ * @num_regions:   [in]  number of regions.
+ * @regions:   [in]  vector of 2-dimensional regions for cpu access.
+ * @direction:	[in]	direction of range for cpu access.
  *
  * This call must always succeed.
  */
-void dma_buf_end_cpu_access(struct dma_buf *dmabuf, size_t start, size_t len,
-			    enum dma_data_direction direction)
+void dma_buf_end_cpu_access(struct dma_buf *dmabuf, size_t stride_bytes,
+	size_t bytes_per_pixel, size_t num_regions, struct dma_buf_sync_region regions[],
+	enum dma_data_direction direction)
 {
 	WARN_ON(!dmabuf);
 
 	if (dmabuf->ops->end_cpu_access)
-		dmabuf->ops->end_cpu_access(dmabuf, start, len, direction);
+		dmabuf->ops->end_cpu_access(dmabuf, stride_bytes, bytes_per_pixel,
+			num_regions, regions, direction);
 }
 EXPORT_SYMBOL_GPL(dma_buf_end_cpu_access);
 
diff --git a/drivers/gpu/drm/i915/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/i915_gem_dmabuf.c
index 95cbfff..e5bb7a3 100644
--- a/drivers/gpu/drm/i915/i915_gem_dmabuf.c
+++ b/drivers/gpu/drm/i915/i915_gem_dmabuf.c
@@ -212,7 +212,8 @@ static int i915_gem_dmabuf_mmap(struct dma_buf *dma_buf, struct vm_area_struct *
 	return 0;
 }
 
-static int i915_gem_begin_cpu_access(struct dma_buf *dma_buf, size_t start, size_t length, enum dma_data_direction direction)
+static int i915_gem_begin_cpu_access(struct dma_buf *dma_buf, size_t stride_bytes, size_t bytes_per_pixel,
+		size_t num_regions, struct dma_buf_sync_region regions[], enum dma_data_direction direction)
 {
 	struct drm_i915_gem_object *obj = dma_buf_to_obj(dma_buf);
 	struct drm_device *dev = obj->base.dev;
@@ -228,7 +229,8 @@ static int i915_gem_begin_cpu_access(struct dma_buf *dma_buf, size_t start, size
 	return ret;
 }
 
-static void i915_gem_end_cpu_access(struct dma_buf *dma_buf, size_t start, size_t length, enum dma_data_direction direction)
+static void i915_gem_end_cpu_access(struct dma_buf *dma_buf, size_t stride_bytes, size_t bytes_per_pixel,
+		size_t num_regions, struct dma_buf_sync_region regions[], enum dma_data_direction direction)
 {
 	struct drm_i915_gem_object *obj = dma_buf_to_obj(dma_buf);
 	struct drm_device *dev = obj->base.dev;
diff --git a/include/linux/dma-buf.h b/include/linux/dma-buf.h
index f98bd70..ed457cb 100644
--- a/include/linux/dma-buf.h
+++ b/include/linux/dma-buf.h
@@ -33,6 +33,8 @@
 #include <linux/fence.h>
 #include <linux/wait.h>
 
+#include <uapi/linux/dma-buf.h>
+
 struct device;
 struct dma_buf;
 struct dma_buf_attachment;
@@ -93,10 +95,10 @@ struct dma_buf_ops {
 	/* after final dma_buf_put() */
 	void (*release)(struct dma_buf *);
 
-	int (*begin_cpu_access)(struct dma_buf *, size_t, size_t,
-				enum dma_data_direction);
-	void (*end_cpu_access)(struct dma_buf *, size_t, size_t,
-			       enum dma_data_direction);
+	int (*begin_cpu_access)(struct dma_buf *, size_t, size_t, size_t,
+				struct dma_buf_sync_region [], enum dma_data_direction);
+	void (*end_cpu_access)(struct dma_buf *, size_t, size_t, size_t,
+			       struct dma_buf_sync_region [], enum dma_data_direction);
 	void *(*kmap_atomic)(struct dma_buf *, unsigned long);
 	void (*kunmap_atomic)(struct dma_buf *, unsigned long, void *);
 	void *(*kmap)(struct dma_buf *, unsigned long);
@@ -224,10 +226,12 @@ struct sg_table *dma_buf_map_attachment(struct dma_buf_attachment *,
 					enum dma_data_direction);
 void dma_buf_unmap_attachment(struct dma_buf_attachment *, struct sg_table *,
 				enum dma_data_direction);
-int dma_buf_begin_cpu_access(struct dma_buf *dma_buf, size_t start, size_t len,
-			     enum dma_data_direction dir);
-void dma_buf_end_cpu_access(struct dma_buf *dma_buf, size_t start, size_t len,
-			    enum dma_data_direction dir);
+int dma_buf_begin_cpu_access(struct dma_buf *dma_buf, size_t stride_bytes,
+			     size_t bytes_per_pixel, size_t num_regions,
+			     struct dma_buf_sync_region regions[], enum dma_data_direction dir);
+void dma_buf_end_cpu_access(struct dma_buf *dma_buf, size_t stride_bytes,
+			     size_t bytes_per_pixel, size_t num_regions,
+			     struct dma_buf_sync_region regions[], enum dma_data_direction dir);
 void *dma_buf_kmap_atomic(struct dma_buf *, unsigned long);
 void dma_buf_kunmap_atomic(struct dma_buf *, unsigned long, void *);
 void *dma_buf_kmap(struct dma_buf *, unsigned long);
diff --git a/include/uapi/linux/dma-buf.h b/include/uapi/linux/dma-buf.h
new file mode 100644
index 0000000..c63b578
--- /dev/null
+++ b/include/uapi/linux/dma-buf.h
@@ -0,0 +1,57 @@
+/*
+ * Framework for buffer objects that can be shared across devices/subsystems.
+ *
+ * Copyright(C) 2015 Intel Ltd
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef _DMA_BUF_UAPI_H_
+#define _DMA_BUF_UAPI_H_
+
+enum dma_buf_sync_flags {
+	DMA_BUF_SYNC_READ = (1 << 0),
+	DMA_BUF_SYNC_WRITE = (2 << 0),
+	DMA_BUF_SYNC_RW = (3 << 0),
+	DMA_BUF_SYNC_START = (0 << 2),
+	DMA_BUF_SYNC_END = (1 << 2),
+
+	DMA_BUF_SYNC_VALID_FLAGS_MASK = DMA_BUF_SYNC_RW |
+		DMA_BUF_SYNC_END
+};
+
+/* 2-dimensional region, used for multi-range flush. This can be used to
+ * synchronize the CPU by batching several sub-regions, smaller than the
+ * mapped dma-buf, all at once. */
+struct dma_buf_sync_region {
+	__u64 x;
+	__u64 y;
+	__u64 width;
+	__u64 height;
+};
+
+/* begin/end dma-buf functions used for userspace mmap. */
+struct dma_buf_sync {
+	enum dma_buf_sync_flags flags;
+
+	__u64 stride_bytes;
+	__u32 bytes_per_pixel;
+	__u32 num_regions;
+
+	struct dma_buf_sync_region regions[];
+};
+
+#define DMA_BUF_BASE		'b'
+#define DMA_BUF_IOCTL_SYNC	_IOWR(DMA_BUF_BASE, 0, struct dma_buf_sync)
+
+#endif
-- 
2.1.0



More information about the dri-devel mailing list