[PATCH v4] dma-buf: Add ioctls to allow userspace to flush

Thomas Hellstrom thellstrom at vmware.com
Tue Aug 25 23:49:00 PDT 2015


Hi, Tiago.

On 08/26/2015 02:02 AM, Tiago Vignatti wrote:
> From: Daniel Vetter <daniel.vetter at ffwll.ch>
>
> The userspace might need some sort of cache coherency management e.g. when CPU
> and GPU domains are being accessed through dma-buf at the same time. To
> circumvent this problem there are begin/end coherency markers, that forward
> directly to existing dma-buf device drivers vfunc hooks. Userspace can make use
> of those markers through the DMA_BUF_IOCTL_SYNC ioctl. The sequence would be
> used like following:
>
>   - mmap dma-buf fd
>   - for each drawing/upload cycle in CPU
>     1. SYNC_START ioctl
>     2. read/write to mmap area or a 2d sub-region of it
>     3. SYNC_END ioctl.
>   - munamp once you don't need the buffer any more
>
> v2 (Tiago): Fix header file type names (u64 -> __u64)
> v3 (Tiago): Add documentation. Use enum dma_buf_sync_flags to the begin/end
> dma-buf functions. Check for overflows in start/length.
> v4 (Tiago): use 2d regions for sync.

Daniel V had issues with the sync argument proposed by Daniel S. I'm
fine with that argument or an argument containing only a single sync
rect. I'm not sure whether Daniel V will find it easier to accept only a
single sync rect...

Also please see below.

>
> Cc: Sumit Semwal <sumit.semwal at linaro.org>
> Signed-off-by: Daniel Vetter <daniel.vetter at intel.com>
> Signed-off-by: Tiago Vignatti <tiago.vignatti at intel.com>
> ---
>
> I'm unable to test the 2d sync properly, because begin/end access in i915
> don't track mapped range for nothing.
>
>  Documentation/dma-buf-sharing.txt      | 13 ++++++
>  drivers/dma-buf/dma-buf.c              | 77 ++++++++++++++++++++++++++++------
>  drivers/gpu/drm/i915/i915_gem_dmabuf.c |  6 ++-
>  include/linux/dma-buf.h                | 20 +++++----
>  include/uapi/linux/dma-buf.h           | 57 +++++++++++++++++++++++++
>  5 files changed, 150 insertions(+), 23 deletions(-)
>  create mode 100644 include/uapi/linux/dma-buf.h
>
> diff --git a/Documentation/dma-buf-sharing.txt b/Documentation/dma-buf-sharing.txt
> index 480c8de..8061ac0 100644
> --- a/Documentation/dma-buf-sharing.txt
> +++ b/Documentation/dma-buf-sharing.txt
> @@ -355,6 +355,19 @@ Being able to mmap an export dma-buf buffer object has 2 main use-cases:
>  
>     No special interfaces, userspace simply calls mmap on the dma-buf fd.
>  
> +   Also, the userspace might need some sort of cache coherency management e.g.
> +   when CPU and GPU domains are being accessed through dma-buf at the same
> +   time. To circumvent this problem there are begin/end coherency markers, that
> +   forward directly to existing dma-buf device drivers vfunc hooks. Userspace
> +   can make use of those markers through the DMA_BUF_IOCTL_SYNC ioctl. The
> +   sequence would be used like following:
> +     - mmap dma-buf fd
> +     - for each drawing/upload cycle in CPU
> +       1. SYNC_START ioctl
> +       2. read/write to mmap area or a 2d sub-region of it
> +       3. SYNC_END ioctl.
> +     - munamp once you don't need the buffer any more
> +
>  2. Supporting existing mmap interfaces in importers
>  
>     Similar to the motivation for kernel cpu access it is again important that
> diff --git a/drivers/dma-buf/dma-buf.c b/drivers/dma-buf/dma-buf.c
> index 155c146..b6a4a06 100644
> --- a/drivers/dma-buf/dma-buf.c
> +++ b/drivers/dma-buf/dma-buf.c
> @@ -251,11 +251,55 @@ out:
>  	return events;
>  }
>  
> +static long dma_buf_ioctl(struct file *file,
> +			  unsigned int cmd, unsigned long arg)
> +{
> +	struct dma_buf *dmabuf;
> +	struct dma_buf_sync sync;
> +	enum dma_data_direction direction;
> +
> +	dmabuf = file->private_data;
> +
> +	if (!is_dma_buf_file(file))
> +		return -EINVAL;
> +
> +	if (cmd != DMA_BUF_IOCTL_SYNC)
> +		return -ENOTTY;
> +
> +	if (copy_from_user(&sync, (void __user *) arg, sizeof(sync)))
> +		return -EFAULT;

Do we actually copy all sync regions here?

> +
> +	if (sync.flags & DMA_BUF_SYNC_RW)
> +		direction = DMA_BIDIRECTIONAL;
> +	else if (sync.flags & DMA_BUF_SYNC_READ)
> +		direction = DMA_FROM_DEVICE;
> +	else if (sync.flags & DMA_BUF_SYNC_WRITE)
> +		direction = DMA_TO_DEVICE;
> +	else
> +		return -EINVAL;
> +
> +	if (sync.flags & ~DMA_BUF_SYNC_VALID_FLAGS_MASK)
> +		return -EINVAL;
> +
> +	/* TODO: check for overflowing the buffer's size - how so, checking region by
> +	 * region here? Maybe need to check for the other parameters as well. */
> +
> +	if (sync.flags & DMA_BUF_SYNC_END)
> +		dma_buf_end_cpu_access(dmabuf, sync.stride_bytes, sync.bytes_per_pixel,
> +				sync.num_regions, sync.regions, direction);
> +	else
> +		dma_buf_begin_cpu_access(dmabuf, sync.stride_bytes, sync.bytes_per_pixel,
> +				sync.num_regions, sync.regions, direction);
> +
> +	return 0;
> +}
> +
>  static const struct file_operations dma_buf_fops = {
>  	.release	= dma_buf_release,
>  	.mmap		= dma_buf_mmap_internal,
>  	.llseek		= dma_buf_llseek,
>  	.poll		= dma_buf_poll,
> +	.unlocked_ioctl	= dma_buf_ioctl,
>  };
>  
>  /*
> @@ -539,14 +583,17 @@ EXPORT_SYMBOL_GPL(dma_buf_unmap_attachment);
>   * preparations. Coherency is only guaranteed in the specified range for the
>   * specified access direction.
>   * @dmabuf:	[in]	buffer to prepare cpu access for.
> - * @start:	[in]	start of range for cpu access.
> - * @len:	[in]	length of range for cpu access.
> - * @direction:	[in]	length of range for cpu access.
> + * @stride_bytes:	[in]	stride in bytes for cpu access.
> + * @bytes_per_pixel:	[in]	bytes per pixel of the region for cpu access.
> + * @num_regions:   [in]  number of regions.
> + * @region:   [in] vector of 2-dimensional regions for cpu access.
> + * @direction:	[in]	direction of range for cpu access.
>   *
>   * Can return negative error values, returns 0 on success.
>   */
> -int dma_buf_begin_cpu_access(struct dma_buf *dmabuf, size_t start, size_t len,
> -			     enum dma_data_direction direction)
> +int dma_buf_begin_cpu_access(struct dma_buf *dmabuf, size_t stride_bytes,
> +	size_t bytes_per_pixel, size_t num_regions, struct dma_buf_sync_region regions[],
> +	enum dma_data_direction direction)
>  {
>  	int ret = 0;
>  
> @@ -554,8 +601,8 @@ int dma_buf_begin_cpu_access(struct dma_buf *dmabuf, size_t start, size_t len,
>  		return -EINVAL;
>  
>  	if (dmabuf->ops->begin_cpu_access)
> -		ret = dmabuf->ops->begin_cpu_access(dmabuf, start,
> -							len, direction);
> +		ret = dmabuf->ops->begin_cpu_access(dmabuf, stride_bytes, bytes_per_pixel,
> +							num_regions, regions, direction);
>  
>  	return ret;
>  }
> @@ -567,19 +614,23 @@ EXPORT_SYMBOL_GPL(dma_buf_begin_cpu_access);
>   * actions. Coherency is only guaranteed in the specified range for the
>   * specified access direction.
>   * @dmabuf:	[in]	buffer to complete cpu access for.
> - * @start:	[in]	start of range for cpu access.
> - * @len:	[in]	length of range for cpu access.
> - * @direction:	[in]	length of range for cpu access.
> + * @stride_bytes:	[in]	stride in bytes for cpu access.
> + * @bytes_per_pixel:	[in]	bytes per pixel of the region for cpu access.
> + * @num_regions:   [in]  number of regions.
> + * @regions:   [in]  vector of 2-dimensional regions for cpu access.
> + * @direction:	[in]	direction of range for cpu access.
>   *
>   * This call must always succeed.
>   */
> -void dma_buf_end_cpu_access(struct dma_buf *dmabuf, size_t start, size_t len,
> -			    enum dma_data_direction direction)
> +void dma_buf_end_cpu_access(struct dma_buf *dmabuf, size_t stride_bytes,
> +	size_t bytes_per_pixel, size_t num_regions, struct dma_buf_sync_region regions[],
> +	enum dma_data_direction direction)
>  {
>  	WARN_ON(!dmabuf);
>  
>  	if (dmabuf->ops->end_cpu_access)
> -		dmabuf->ops->end_cpu_access(dmabuf, start, len, direction);
> +		dmabuf->ops->end_cpu_access(dmabuf, stride_bytes, bytes_per_pixel,
> +			num_regions, regions, direction);
>  }
>  EXPORT_SYMBOL_GPL(dma_buf_end_cpu_access);
>  
> diff --git a/drivers/gpu/drm/i915/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/i915_gem_dmabuf.c
> index 95cbfff..e5bb7a3 100644
> --- a/drivers/gpu/drm/i915/i915_gem_dmabuf.c
> +++ b/drivers/gpu/drm/i915/i915_gem_dmabuf.c
> @@ -212,7 +212,8 @@ static int i915_gem_dmabuf_mmap(struct dma_buf *dma_buf, struct vm_area_struct *
>  	return 0;
>  }
>  
> -static int i915_gem_begin_cpu_access(struct dma_buf *dma_buf, size_t start, size_t length, enum dma_data_direction direction)
> +static int i915_gem_begin_cpu_access(struct dma_buf *dma_buf, size_t stride_bytes, size_t bytes_per_pixel,
> +		size_t num_regions, struct dma_buf_sync_region regions[], enum dma_data_direction direction)
>  {
>  	struct drm_i915_gem_object *obj = dma_buf_to_obj(dma_buf);
>  	struct drm_device *dev = obj->base.dev;
> @@ -228,7 +229,8 @@ static int i915_gem_begin_cpu_access(struct dma_buf *dma_buf, size_t start, size
>  	return ret;
>  }
>  
> -static void i915_gem_end_cpu_access(struct dma_buf *dma_buf, size_t start, size_t length, enum dma_data_direction direction)
> +static void i915_gem_end_cpu_access(struct dma_buf *dma_buf, size_t stride_bytes, size_t bytes_per_pixel,
> +		size_t num_regions, struct dma_buf_sync_region regions[], enum dma_data_direction direction)
>  {
>  	struct drm_i915_gem_object *obj = dma_buf_to_obj(dma_buf);
>  	struct drm_device *dev = obj->base.dev;
> diff --git a/include/linux/dma-buf.h b/include/linux/dma-buf.h
> index f98bd70..ed457cb 100644
> --- a/include/linux/dma-buf.h
> +++ b/include/linux/dma-buf.h
> @@ -33,6 +33,8 @@
>  #include <linux/fence.h>
>  #include <linux/wait.h>
>  
> +#include <uapi/linux/dma-buf.h>
> +
>  struct device;
>  struct dma_buf;
>  struct dma_buf_attachment;
> @@ -93,10 +95,10 @@ struct dma_buf_ops {
>  	/* after final dma_buf_put() */
>  	void (*release)(struct dma_buf *);
>  
> -	int (*begin_cpu_access)(struct dma_buf *, size_t, size_t,
> -				enum dma_data_direction);
> -	void (*end_cpu_access)(struct dma_buf *, size_t, size_t,
> -			       enum dma_data_direction);
> +	int (*begin_cpu_access)(struct dma_buf *, size_t, size_t, size_t,
> +				struct dma_buf_sync_region [], enum dma_data_direction);
> +	void (*end_cpu_access)(struct dma_buf *, size_t, size_t, size_t,
> +			       struct dma_buf_sync_region [], enum dma_data_direction);
>  	void *(*kmap_atomic)(struct dma_buf *, unsigned long);
>  	void (*kunmap_atomic)(struct dma_buf *, unsigned long, void *);
>  	void *(*kmap)(struct dma_buf *, unsigned long);
> @@ -224,10 +226,12 @@ struct sg_table *dma_buf_map_attachment(struct dma_buf_attachment *,
>  					enum dma_data_direction);
>  void dma_buf_unmap_attachment(struct dma_buf_attachment *, struct sg_table *,
>  				enum dma_data_direction);
> -int dma_buf_begin_cpu_access(struct dma_buf *dma_buf, size_t start, size_t len,
> -			     enum dma_data_direction dir);
> -void dma_buf_end_cpu_access(struct dma_buf *dma_buf, size_t start, size_t len,
> -			    enum dma_data_direction dir);
> +int dma_buf_begin_cpu_access(struct dma_buf *dma_buf, size_t stride_bytes,
> +			     size_t bytes_per_pixel, size_t num_regions,
> +			     struct dma_buf_sync_region regions[], enum dma_data_direction dir);
> +void dma_buf_end_cpu_access(struct dma_buf *dma_buf, size_t stride_bytes,
> +			     size_t bytes_per_pixel, size_t num_regions,
> +			     struct dma_buf_sync_region regions[], enum dma_data_direction dir);
>  void *dma_buf_kmap_atomic(struct dma_buf *, unsigned long);
>  void dma_buf_kunmap_atomic(struct dma_buf *, unsigned long, void *);
>  void *dma_buf_kmap(struct dma_buf *, unsigned long);
> diff --git a/include/uapi/linux/dma-buf.h b/include/uapi/linux/dma-buf.h
> new file mode 100644
> index 0000000..c63b578
> --- /dev/null
> +++ b/include/uapi/linux/dma-buf.h
> @@ -0,0 +1,57 @@
> +/*
> + * Framework for buffer objects that can be shared across devices/subsystems.
> + *
> + * Copyright(C) 2015 Intel Ltd
> + *
> + * This program is free software; you can redistribute it and/or modify it
> + * under the terms of the GNU General Public License version 2 as published by
> + * the Free Software Foundation.
> + *
> + * This program is distributed in the hope that it will be useful, but WITHOUT
> + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
> + * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
> + * more details.
> + *
> + * You should have received a copy of the GNU General Public License along with
> + * this program.  If not, see <http://www.gnu.org/licenses/>.
> + */
> +
> +#ifndef _DMA_BUF_UAPI_H_
> +#define _DMA_BUF_UAPI_H_
> +
> +enum dma_buf_sync_flags {
> +	DMA_BUF_SYNC_READ = (1 << 0),
> +	DMA_BUF_SYNC_WRITE = (2 << 0),
> +	DMA_BUF_SYNC_RW = (3 << 0),
> +	DMA_BUF_SYNC_START = (0 << 2),
> +	DMA_BUF_SYNC_END = (1 << 2),
> +
> +	DMA_BUF_SYNC_VALID_FLAGS_MASK = DMA_BUF_SYNC_RW |
> +		DMA_BUF_SYNC_END
> +};
> +
> +/* 2-dimensional region, used for multi-range flush. This can be used to
> + * synchronize the CPU by batching several sub-regions, smaller than the
> + * mapped dma-buf, all at once. */
> +struct dma_buf_sync_region {
> +	__u64 x;
> +	__u64 y;
> +	__u64 width;
> +	__u64 height;
> +};
> +
> +/* begin/end dma-buf functions used for userspace mmap. */
> +struct dma_buf_sync {
> +	enum dma_buf_sync_flags flags;
> +
> +	__u64 stride_bytes;
> +	__u32 bytes_per_pixel;
> +	__u32 num_regions;
> +
> +	struct dma_buf_sync_region regions[];
> +};
> +
> +#define DMA_BUF_BASE		'b'
> +#define DMA_BUF_IOCTL_SYNC	_IOWR(DMA_BUF_BASE, 0, struct dma_buf_sync)

Should be _IOW(  ?


> +
> +#endif

Thomas


More information about the dri-devel mailing list