[PATCH v4] dma-buf: Add ioctls to allow userspace to flush
Tiago Vignatti
tiago.vignatti at intel.com
Tue Aug 25 17:02:55 PDT 2015
From: Daniel Vetter <daniel.vetter at ffwll.ch>
The userspace might need some sort of cache coherency management e.g. when CPU
and GPU domains are being accessed through dma-buf at the same time. To
circumvent this problem there are begin/end coherency markers, that forward
directly to existing dma-buf device drivers vfunc hooks. Userspace can make use
of those markers through the DMA_BUF_IOCTL_SYNC ioctl. The sequence would be
used like following:
- mmap dma-buf fd
- for each drawing/upload cycle in CPU
1. SYNC_START ioctl
2. read/write to mmap area or a 2d sub-region of it
3. SYNC_END ioctl.
- munamp once you don't need the buffer any more
v2 (Tiago): Fix header file type names (u64 -> __u64)
v3 (Tiago): Add documentation. Use enum dma_buf_sync_flags to the begin/end
dma-buf functions. Check for overflows in start/length.
v4 (Tiago): use 2d regions for sync.
Cc: Sumit Semwal <sumit.semwal at linaro.org>
Signed-off-by: Daniel Vetter <daniel.vetter at intel.com>
Signed-off-by: Tiago Vignatti <tiago.vignatti at intel.com>
---
I'm unable to test the 2d sync properly, because begin/end access in i915
don't track mapped range for nothing.
Documentation/dma-buf-sharing.txt | 13 ++++++
drivers/dma-buf/dma-buf.c | 77 ++++++++++++++++++++++++++++------
drivers/gpu/drm/i915/i915_gem_dmabuf.c | 6 ++-
include/linux/dma-buf.h | 20 +++++----
include/uapi/linux/dma-buf.h | 57 +++++++++++++++++++++++++
5 files changed, 150 insertions(+), 23 deletions(-)
create mode 100644 include/uapi/linux/dma-buf.h
diff --git a/Documentation/dma-buf-sharing.txt b/Documentation/dma-buf-sharing.txt
index 480c8de..8061ac0 100644
--- a/Documentation/dma-buf-sharing.txt
+++ b/Documentation/dma-buf-sharing.txt
@@ -355,6 +355,19 @@ Being able to mmap an export dma-buf buffer object has 2 main use-cases:
No special interfaces, userspace simply calls mmap on the dma-buf fd.
+ Also, the userspace might need some sort of cache coherency management e.g.
+ when CPU and GPU domains are being accessed through dma-buf at the same
+ time. To circumvent this problem there are begin/end coherency markers, that
+ forward directly to existing dma-buf device drivers vfunc hooks. Userspace
+ can make use of those markers through the DMA_BUF_IOCTL_SYNC ioctl. The
+ sequence would be used like following:
+ - mmap dma-buf fd
+ - for each drawing/upload cycle in CPU
+ 1. SYNC_START ioctl
+ 2. read/write to mmap area or a 2d sub-region of it
+ 3. SYNC_END ioctl.
+ - munamp once you don't need the buffer any more
+
2. Supporting existing mmap interfaces in importers
Similar to the motivation for kernel cpu access it is again important that
diff --git a/drivers/dma-buf/dma-buf.c b/drivers/dma-buf/dma-buf.c
index 155c146..b6a4a06 100644
--- a/drivers/dma-buf/dma-buf.c
+++ b/drivers/dma-buf/dma-buf.c
@@ -251,11 +251,55 @@ out:
return events;
}
+static long dma_buf_ioctl(struct file *file,
+ unsigned int cmd, unsigned long arg)
+{
+ struct dma_buf *dmabuf;
+ struct dma_buf_sync sync;
+ enum dma_data_direction direction;
+
+ dmabuf = file->private_data;
+
+ if (!is_dma_buf_file(file))
+ return -EINVAL;
+
+ if (cmd != DMA_BUF_IOCTL_SYNC)
+ return -ENOTTY;
+
+ if (copy_from_user(&sync, (void __user *) arg, sizeof(sync)))
+ return -EFAULT;
+
+ if (sync.flags & DMA_BUF_SYNC_RW)
+ direction = DMA_BIDIRECTIONAL;
+ else if (sync.flags & DMA_BUF_SYNC_READ)
+ direction = DMA_FROM_DEVICE;
+ else if (sync.flags & DMA_BUF_SYNC_WRITE)
+ direction = DMA_TO_DEVICE;
+ else
+ return -EINVAL;
+
+ if (sync.flags & ~DMA_BUF_SYNC_VALID_FLAGS_MASK)
+ return -EINVAL;
+
+ /* TODO: check for overflowing the buffer's size - how so, checking region by
+ * region here? Maybe need to check for the other parameters as well. */
+
+ if (sync.flags & DMA_BUF_SYNC_END)
+ dma_buf_end_cpu_access(dmabuf, sync.stride_bytes, sync.bytes_per_pixel,
+ sync.num_regions, sync.regions, direction);
+ else
+ dma_buf_begin_cpu_access(dmabuf, sync.stride_bytes, sync.bytes_per_pixel,
+ sync.num_regions, sync.regions, direction);
+
+ return 0;
+}
+
static const struct file_operations dma_buf_fops = {
.release = dma_buf_release,
.mmap = dma_buf_mmap_internal,
.llseek = dma_buf_llseek,
.poll = dma_buf_poll,
+ .unlocked_ioctl = dma_buf_ioctl,
};
/*
@@ -539,14 +583,17 @@ EXPORT_SYMBOL_GPL(dma_buf_unmap_attachment);
* preparations. Coherency is only guaranteed in the specified range for the
* specified access direction.
* @dmabuf: [in] buffer to prepare cpu access for.
- * @start: [in] start of range for cpu access.
- * @len: [in] length of range for cpu access.
- * @direction: [in] length of range for cpu access.
+ * @stride_bytes: [in] stride in bytes for cpu access.
+ * @bytes_per_pixel: [in] bytes per pixel of the region for cpu access.
+ * @num_regions: [in] number of regions.
+ * @region: [in] vector of 2-dimensional regions for cpu access.
+ * @direction: [in] direction of range for cpu access.
*
* Can return negative error values, returns 0 on success.
*/
-int dma_buf_begin_cpu_access(struct dma_buf *dmabuf, size_t start, size_t len,
- enum dma_data_direction direction)
+int dma_buf_begin_cpu_access(struct dma_buf *dmabuf, size_t stride_bytes,
+ size_t bytes_per_pixel, size_t num_regions, struct dma_buf_sync_region regions[],
+ enum dma_data_direction direction)
{
int ret = 0;
@@ -554,8 +601,8 @@ int dma_buf_begin_cpu_access(struct dma_buf *dmabuf, size_t start, size_t len,
return -EINVAL;
if (dmabuf->ops->begin_cpu_access)
- ret = dmabuf->ops->begin_cpu_access(dmabuf, start,
- len, direction);
+ ret = dmabuf->ops->begin_cpu_access(dmabuf, stride_bytes, bytes_per_pixel,
+ num_regions, regions, direction);
return ret;
}
@@ -567,19 +614,23 @@ EXPORT_SYMBOL_GPL(dma_buf_begin_cpu_access);
* actions. Coherency is only guaranteed in the specified range for the
* specified access direction.
* @dmabuf: [in] buffer to complete cpu access for.
- * @start: [in] start of range for cpu access.
- * @len: [in] length of range for cpu access.
- * @direction: [in] length of range for cpu access.
+ * @stride_bytes: [in] stride in bytes for cpu access.
+ * @bytes_per_pixel: [in] bytes per pixel of the region for cpu access.
+ * @num_regions: [in] number of regions.
+ * @regions: [in] vector of 2-dimensional regions for cpu access.
+ * @direction: [in] direction of range for cpu access.
*
* This call must always succeed.
*/
-void dma_buf_end_cpu_access(struct dma_buf *dmabuf, size_t start, size_t len,
- enum dma_data_direction direction)
+void dma_buf_end_cpu_access(struct dma_buf *dmabuf, size_t stride_bytes,
+ size_t bytes_per_pixel, size_t num_regions, struct dma_buf_sync_region regions[],
+ enum dma_data_direction direction)
{
WARN_ON(!dmabuf);
if (dmabuf->ops->end_cpu_access)
- dmabuf->ops->end_cpu_access(dmabuf, start, len, direction);
+ dmabuf->ops->end_cpu_access(dmabuf, stride_bytes, bytes_per_pixel,
+ num_regions, regions, direction);
}
EXPORT_SYMBOL_GPL(dma_buf_end_cpu_access);
diff --git a/drivers/gpu/drm/i915/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/i915_gem_dmabuf.c
index 95cbfff..e5bb7a3 100644
--- a/drivers/gpu/drm/i915/i915_gem_dmabuf.c
+++ b/drivers/gpu/drm/i915/i915_gem_dmabuf.c
@@ -212,7 +212,8 @@ static int i915_gem_dmabuf_mmap(struct dma_buf *dma_buf, struct vm_area_struct *
return 0;
}
-static int i915_gem_begin_cpu_access(struct dma_buf *dma_buf, size_t start, size_t length, enum dma_data_direction direction)
+static int i915_gem_begin_cpu_access(struct dma_buf *dma_buf, size_t stride_bytes, size_t bytes_per_pixel,
+ size_t num_regions, struct dma_buf_sync_region regions[], enum dma_data_direction direction)
{
struct drm_i915_gem_object *obj = dma_buf_to_obj(dma_buf);
struct drm_device *dev = obj->base.dev;
@@ -228,7 +229,8 @@ static int i915_gem_begin_cpu_access(struct dma_buf *dma_buf, size_t start, size
return ret;
}
-static void i915_gem_end_cpu_access(struct dma_buf *dma_buf, size_t start, size_t length, enum dma_data_direction direction)
+static void i915_gem_end_cpu_access(struct dma_buf *dma_buf, size_t stride_bytes, size_t bytes_per_pixel,
+ size_t num_regions, struct dma_buf_sync_region regions[], enum dma_data_direction direction)
{
struct drm_i915_gem_object *obj = dma_buf_to_obj(dma_buf);
struct drm_device *dev = obj->base.dev;
diff --git a/include/linux/dma-buf.h b/include/linux/dma-buf.h
index f98bd70..ed457cb 100644
--- a/include/linux/dma-buf.h
+++ b/include/linux/dma-buf.h
@@ -33,6 +33,8 @@
#include <linux/fence.h>
#include <linux/wait.h>
+#include <uapi/linux/dma-buf.h>
+
struct device;
struct dma_buf;
struct dma_buf_attachment;
@@ -93,10 +95,10 @@ struct dma_buf_ops {
/* after final dma_buf_put() */
void (*release)(struct dma_buf *);
- int (*begin_cpu_access)(struct dma_buf *, size_t, size_t,
- enum dma_data_direction);
- void (*end_cpu_access)(struct dma_buf *, size_t, size_t,
- enum dma_data_direction);
+ int (*begin_cpu_access)(struct dma_buf *, size_t, size_t, size_t,
+ struct dma_buf_sync_region [], enum dma_data_direction);
+ void (*end_cpu_access)(struct dma_buf *, size_t, size_t, size_t,
+ struct dma_buf_sync_region [], enum dma_data_direction);
void *(*kmap_atomic)(struct dma_buf *, unsigned long);
void (*kunmap_atomic)(struct dma_buf *, unsigned long, void *);
void *(*kmap)(struct dma_buf *, unsigned long);
@@ -224,10 +226,12 @@ struct sg_table *dma_buf_map_attachment(struct dma_buf_attachment *,
enum dma_data_direction);
void dma_buf_unmap_attachment(struct dma_buf_attachment *, struct sg_table *,
enum dma_data_direction);
-int dma_buf_begin_cpu_access(struct dma_buf *dma_buf, size_t start, size_t len,
- enum dma_data_direction dir);
-void dma_buf_end_cpu_access(struct dma_buf *dma_buf, size_t start, size_t len,
- enum dma_data_direction dir);
+int dma_buf_begin_cpu_access(struct dma_buf *dma_buf, size_t stride_bytes,
+ size_t bytes_per_pixel, size_t num_regions,
+ struct dma_buf_sync_region regions[], enum dma_data_direction dir);
+void dma_buf_end_cpu_access(struct dma_buf *dma_buf, size_t stride_bytes,
+ size_t bytes_per_pixel, size_t num_regions,
+ struct dma_buf_sync_region regions[], enum dma_data_direction dir);
void *dma_buf_kmap_atomic(struct dma_buf *, unsigned long);
void dma_buf_kunmap_atomic(struct dma_buf *, unsigned long, void *);
void *dma_buf_kmap(struct dma_buf *, unsigned long);
diff --git a/include/uapi/linux/dma-buf.h b/include/uapi/linux/dma-buf.h
new file mode 100644
index 0000000..c63b578
--- /dev/null
+++ b/include/uapi/linux/dma-buf.h
@@ -0,0 +1,57 @@
+/*
+ * Framework for buffer objects that can be shared across devices/subsystems.
+ *
+ * Copyright(C) 2015 Intel Ltd
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef _DMA_BUF_UAPI_H_
+#define _DMA_BUF_UAPI_H_
+
+enum dma_buf_sync_flags {
+ DMA_BUF_SYNC_READ = (1 << 0),
+ DMA_BUF_SYNC_WRITE = (2 << 0),
+ DMA_BUF_SYNC_RW = (3 << 0),
+ DMA_BUF_SYNC_START = (0 << 2),
+ DMA_BUF_SYNC_END = (1 << 2),
+
+ DMA_BUF_SYNC_VALID_FLAGS_MASK = DMA_BUF_SYNC_RW |
+ DMA_BUF_SYNC_END
+};
+
+/* 2-dimensional region, used for multi-range flush. This can be used to
+ * synchronize the CPU by batching several sub-regions, smaller than the
+ * mapped dma-buf, all at once. */
+struct dma_buf_sync_region {
+ __u64 x;
+ __u64 y;
+ __u64 width;
+ __u64 height;
+};
+
+/* begin/end dma-buf functions used for userspace mmap. */
+struct dma_buf_sync {
+ enum dma_buf_sync_flags flags;
+
+ __u64 stride_bytes;
+ __u32 bytes_per_pixel;
+ __u32 num_regions;
+
+ struct dma_buf_sync_region regions[];
+};
+
+#define DMA_BUF_BASE 'b'
+#define DMA_BUF_IOCTL_SYNC _IOWR(DMA_BUF_BASE, 0, struct dma_buf_sync)
+
+#endif
--
2.1.0
More information about the dri-devel
mailing list