[Intel-gfx] [PATCH 2/3] drm/i915: Android sync points for i915 v3

Chris Wilson chris at chris-wilson.co.uk
Thu Dec 4 00:10:22 PST 2014


On Wed, Dec 03, 2014 at 11:49:06AM -0800, Jesse Barnes wrote:
> Expose an ioctl to create Android fences based on the Android sync point
> infrastructure (which in turn is based on DMA-buf fences).  Just a
> sketch at this point, no testing has been done.
> 
> There are a couple of goals here:
>   1) allow applications and libraries to create fences without an
>      associated buffer
>   2) re-use a common API so userspace doesn't have to impedance mismatch
>      between different driver implementations too much
>   3) allow applications and libraries to use explicit synchronization if
>      they choose by exposing fences directly
> 
> v2: use struct fence directly using Maarten's new interface
> v3: use new i915 request structure (Jesse)
>     make i915 fences a compile time option since Android support is still
>     in staging (Jesse)
>     check for request complete after arming IRQs (Chris)
>     add timeline id to ioctl (Tvrtko)
> 
> Signed-off-by: Jesse Barnes <jbarnes at virtuousgeek.org>
> ---
>  drivers/gpu/drm/i915/Kconfig     |  14 ++
>  drivers/gpu/drm/i915/Makefile    |   1 +
>  drivers/gpu/drm/i915/i915_dma.c  |   5 +
>  drivers/gpu/drm/i915/i915_drv.h  |  20 +++
>  drivers/gpu/drm/i915/i915_gem.c  |   5 +
>  drivers/gpu/drm/i915/i915_irq.c  |   4 +-
>  drivers/gpu/drm/i915/i915_sync.c | 325 +++++++++++++++++++++++++++++++++++++++
>  include/uapi/drm/i915_drm.h      |  23 +++
>  8 files changed, 396 insertions(+), 1 deletion(-)
>  create mode 100644 drivers/gpu/drm/i915/i915_sync.c
> 
> diff --git a/drivers/gpu/drm/i915/Kconfig b/drivers/gpu/drm/i915/Kconfig
> index 4e39ab3..6b23d17 100644
> --- a/drivers/gpu/drm/i915/Kconfig
> +++ b/drivers/gpu/drm/i915/Kconfig
> @@ -43,6 +43,20 @@ config DRM_I915_KMS
>  
>  	  If in doubt, say "Y".
>  
> +config DRM_I915_SYNC
> +	bool "Enable explicit sync support"
> +	depends on DRM_I915
> +	default y
> +	select STAGING
> +	select ANDROID
> +	select SYNC
> +	help
> +	  Choose this option to enable Android native sync support and the
> +	  corresponding i915 driver code to expose it.  Slightly increases
> +	  driver size and pulls in sync support from staging.
> +
> +	  If in doubt, say "Y".
> +
>  config DRM_I915_FBDEV
>  	bool "Enable legacy fbdev support for the modesetting intel driver"
>  	depends on DRM_I915
> diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
> index e4083e4..45e155f 100644
> --- a/drivers/gpu/drm/i915/Makefile
> +++ b/drivers/gpu/drm/i915/Makefile
> @@ -56,6 +56,7 @@ i915-y += intel_audio.o \
>  	  intel_sprite.o
>  i915-$(CONFIG_ACPI)		+= intel_acpi.o intel_opregion.o
>  i915-$(CONFIG_DRM_I915_FBDEV)	+= intel_fbdev.o
> +i915-$(CONFIG_DRM_I915_SYNC)	+= i915_sync.o
>  
>  # modesetting output/encoder code
>  i915-y += dvo_ch7017.o \
> diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
> index 887d88f..621bd7f 100644
> --- a/drivers/gpu/drm/i915/i915_dma.c
> +++ b/drivers/gpu/drm/i915/i915_dma.c
> @@ -1062,6 +1062,11 @@ const struct drm_ioctl_desc i915_ioctls[] = {
>  	DRM_IOCTL_DEF_DRV(I915_REG_READ, i915_reg_read_ioctl, DRM_UNLOCKED|DRM_RENDER_ALLOW),
>  	DRM_IOCTL_DEF_DRV(I915_GET_RESET_STATS, i915_get_reset_stats_ioctl, DRM_UNLOCKED|DRM_RENDER_ALLOW),
>  	DRM_IOCTL_DEF_DRV(I915_GEM_USERPTR, i915_gem_userptr_ioctl, DRM_UNLOCKED|DRM_RENDER_ALLOW),
> +#ifdef CONFIG_DRM_I915_SYNC
> +	DRM_IOCTL_DEF_DRV(I915_GEM_FENCE, i915_sync_create_fence_ioctl, DRM_UNLOCKED|DRM_RENDER_ALLOW),
> +#else
> +	DRM_IOCTL_DEF_DRV(I915_GEM_FENCE, drm_noop, DRM_UNLOCKED|DRM_RENDER_ALLOW),
> +#endif

This can be done via the ifdef trickery in the header.

> +struct i915_sync_timeline;
> +
> +
>  struct drm_i915_private {
>  	struct drm_device *dev;
>  	struct kmem_cache *slab;
> @@ -1560,6 +1563,9 @@ struct drm_i915_private {
>  	uint32_t last_seqno, next_seqno;
>  
>  	struct drm_dma_handle *status_page_dmah;
> +
> +	struct i915_sync_timeline *sync_tl[I915_NUM_RINGS];

If I understand correctly, the fence timeline are indeed per-ring, but
here you set them up (and name them) on a per-engine basis.
> +
>  	struct resource mch_res;
>  
>  	/* protects the irq masks */
> @@ -2509,6 +2515,20 @@ void i915_init_vm(struct drm_i915_private *dev_priv,
>  void i915_gem_free_object(struct drm_gem_object *obj);
>  void i915_gem_vma_destroy(struct i915_vma *vma);
>  
> +/* i915_sync.c */
> +#ifdef CONFIG_DRM_I915_SYNC
> +int i915_sync_init(struct drm_i915_private *dev_priv);
> +void i915_sync_fini(struct drm_i915_private *dev_priv);
> +int i915_sync_create_fence_ioctl(struct drm_device *dev, void *data,
> +				 struct drm_file *file);
> +#else
> +static inline int i915_sync_init(struct drm_i915_private *dev_priv)
> +{
> +	return 0;
> +}
> +static inline void i915_sync_fini(struct drm_i915_private *dev_priv) { }
> +#endif
> +
>  #define PIN_MAPPABLE 0x1
>  #define PIN_NONBLOCK 0x2
>  #define PIN_GLOBAL 0x4
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 7a83a9f..39407b4 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -4887,6 +4887,9 @@ int i915_gem_init(struct drm_device *dev)
>  		atomic_set_mask(I915_WEDGED, &dev_priv->gpu_error.reset_counter);
>  		ret = 0;
>  	}
> +
> +	i915_sync_init(dev_priv);
> +
>  	mutex_unlock(&dev->struct_mutex);
>  
>  	return ret;
> @@ -5011,6 +5014,8 @@ void i915_gem_release(struct drm_device *dev, struct drm_file *file)
>  		request->file_priv = NULL;
>  	}
>  	spin_unlock(&file_priv->mm.lock);
> +
> +	i915_sync_fini(dev->dev_private);
>  }
>  
>  static void
> diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
> index 7913a72..d4ede83 100644
> --- a/drivers/gpu/drm/i915/i915_irq.c
> +++ b/drivers/gpu/drm/i915/i915_irq.c
> @@ -33,6 +33,7 @@
>  #include <linux/circ_buf.h>
>  #include <drm/drmP.h>
>  #include <drm/i915_drm.h>
> +#include "../../../staging/android/sync.h"
>  #include "i915_drv.h"
>  #include "i915_trace.h"
>  #include "intel_drv.h"
> @@ -2378,8 +2379,9 @@ static void i915_error_wake_up(struct drm_i915_private *dev_priv,
>  	 */
>  
>  	/* Wake up __wait_seqno, potentially holding dev->struct_mutex. */
> -	for_each_ring(ring, dev_priv, i)
> +	for_each_ring(ring, dev_priv, i) {
>  		wake_up_all(&ring->irq_queue);
> +	}
>  
>  	/* Wake up intel_crtc_wait_for_pending_flips, holding crtc->mutex. */
>  	wake_up_all(&dev_priv->pending_flip_queue);
> diff --git a/drivers/gpu/drm/i915/i915_sync.c b/drivers/gpu/drm/i915/i915_sync.c
> new file mode 100644
> index 0000000..4741b0c
> --- /dev/null
> +++ b/drivers/gpu/drm/i915/i915_sync.c
> @@ -0,0 +1,325 @@
> +/*
> + * Copyright © 2014 Intel Corporation
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a
> + * copy of this software and associated documentation files (the "Software"),
> + * to deal in the Software without restriction, including without limitation
> + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
> + * and/or sell copies of the Software, and to permit persons to whom the
> + * Software is furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice (including the next
> + * paragraph) shall be included in all copies or substantial portions of the
> + * Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
> + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
> + * IN THE SOFTWARE.
> + *
> + * Authors:
> + *    Jesse Barnes <jbarnes at virtuousgeek.org>
> + *
> + */
> +
> +#include <drm/drmP.h>
> +#include <drm/drm_vma_manager.h>
> +#include <drm/i915_drm.h>
> +#include "i915_drv.h"
> +#include "i915_trace.h"
> +#include "intel_drv.h"
> +#include <linux/oom.h>
> +#include <linux/shmem_fs.h>
> +#include <linux/slab.h>
> +#include <linux/swap.h>
> +#include <linux/pci.h>
> +#include <linux/dma-buf.h>
> +#include "../../../staging/android/sync.h"
> +
> +/* Nothing really to protect here... */
> +static spinlock_t fence_lock;
> +
> +/*
> + * i915 fences on sync timelines
> + *
> + * We implement sync points in terms of i915 seqnos.  They're exposed
> + * through the new DRM_I915_GEM_FENCE ioctl, and can be mixed and matched
> + * with other Android timelines and aggregated into sync_fences, etc.
> + *
> + * TODO:
> + *   rebase on top of Chris's seqno/request stuff and use requests
> + *   allow non-RCS fences (need ring/context association)
> + */
> +
> +struct i915_fence {
> +	struct fence base;
> +	struct intel_engine_cs *ring;
> +	struct intel_context *ctx;
> +	wait_queue_t wait;
> +	struct drm_i915_gem_request *request;
> +};
> +
> +#define to_intel_fence(x) container_of(x, struct i915_fence, base)
> +
> +int __wait_seqno(struct intel_engine_cs *ring, u32 seqno,
> +		 unsigned reset_counter,
> +		 bool interruptible,
> +		 struct timespec *timeout,
> +		 struct drm_i915_file_private *file_priv);
> +
> +static const char *i915_fence_get_driver_name(struct fence *fence)
> +{
> +	return "i915";
> +}
> +
> +static const char *i915_fence_get_timeline_name(struct fence *fence)
> +{
> +	struct i915_fence *intel_fence = to_intel_fence(fence);
> +
> +	return intel_fence->ring->name;
> +}
> +
> +static int i915_fence_check(wait_queue_t *wait, unsigned mode, int flags,
> +			    void *key)
> +{
> +	struct i915_fence *intel_fence = wait->private;
> +	struct intel_engine_cs *ring = intel_fence->ring;
> +
> +	if (!i915_gem_request_completed(intel_fence->request, false))
> +		return 0;
> +
> +	fence_signal_locked(&intel_fence->base);
> +
> +	__remove_wait_queue(&ring->irq_queue, wait);
> +	fence_put(&intel_fence->base);
> +	ring->irq_put(ring);
> +
> +	return 0;
> +}
> +
> +static bool i915_fence_enable_signaling(struct fence *fence)
> +{
> +	struct i915_fence *intel_fence = to_intel_fence(fence);
> +	struct intel_engine_cs *ring = intel_fence->ring;
> +	struct drm_i915_private *dev_priv = ring->dev->dev_private;
> +	wait_queue_t *wait = &intel_fence->wait;
> +
> +	/* queue fence wait queue on irq queue and get fence */
> +	if (i915_gem_request_completed(intel_fence->request, false) ||
> +	    i915_terminally_wedged(&dev_priv->gpu_error))
> +		return false;
> +
> +	if (!ring->irq_get(ring))
> +		return false;
> +
> +	if (i915_gem_request_completed(intel_fence->request, false)) {
> +		ring->irq_put(ring);
> +		return true;
> +	}
> +
> +	wait->flags = 0;
> +	wait->private = intel_fence;
> +	wait->func = i915_fence_check;
> +
> +	__add_wait_queue(&ring->irq_queue, wait);
> +	fence_get(fence);
> +
> +	return true;
> +}
> +
> +static bool i915_fence_signaled(struct fence *fence)
> +{
> +	struct i915_fence *intel_fence = to_intel_fence(fence);
> +
> +	return i915_gem_request_completed(intel_fence->request, false);
> +}
> +
> +static signed long i915_fence_wait(struct fence *fence, bool intr,
> +				   signed long timeout_js)
> +{
> +	struct i915_fence *intel_fence = to_intel_fence(fence);
> +	struct drm_i915_private *dev_priv = intel_fence->ring->dev->dev_private;
> +	int ret;
> +	s64 timeout;
> +
> +	timeout = jiffies_to_nsecs(timeout_js);
> +
> +	ret = __i915_wait_request(intel_fence->request,
> +				  atomic_read(&dev_priv->gpu_error.reset_counter),
> +				  intr, &timeout, NULL);
> +	if (ret == -ETIME)
> +		return nsecs_to_jiffies(timeout);
> +
> +	return ret;
> +}
> +
> +static int i915_fence_fill_driver_data(struct fence *fence, void *data,
> +				      int size)
> +{
> +	struct i915_fence *intel_fence = to_intel_fence(fence);
> +
> +	if (size < sizeof(intel_fence->request->seqno))
> +		return -ENOMEM;
> +
> +	memcpy(data, &intel_fence->request->seqno,
> +	       sizeof(intel_fence->request->seqno));
> +
> +	return sizeof(intel_fence->request->seqno);
> +}
> +
> +static void i915_fence_value_str(struct fence *fence, char *str, int size)
> +{
> +	struct i915_fence *intel_fence = to_intel_fence(fence);
> +
> +	snprintf(str, size, "%u", intel_fence->request->seqno);
> +}
> +
> +static void i915_fence_timeline_value_str(struct fence *fence, char *str,
> +					  int size)
> +{
> +	struct i915_fence *intel_fence = to_intel_fence(fence);
> +	struct intel_engine_cs *ring = intel_fence->ring;
> +
> +	snprintf(str, size, "%u", ring->get_seqno(ring, false));
> +}
> +
> +static struct fence_ops i915_fence_ops = {
> +	.get_driver_name = 	i915_fence_get_driver_name,
> +	.get_timeline_name =	i915_fence_get_timeline_name,
> +	.enable_signaling =	i915_fence_enable_signaling,
> +	.signaled =		i915_fence_signaled,
> +	.wait =			i915_fence_wait,
> +	.fill_driver_data =	i915_fence_fill_driver_data,
> +	.fence_value_str =	i915_fence_value_str,
> +	.timeline_value_str =	i915_fence_timeline_value_str,
> +};
> +
> +static struct fence *i915_fence_create(struct intel_engine_cs *ring,
> +				       struct intel_context *ctx)
> +{
> +	struct i915_fence *fence;
> +	int ret;
> +
> +	fence = kzalloc(sizeof(*fence), GFP_KERNEL);
> +	if (!fence)
> +		return NULL;
> +
> +	ret = ring->add_request(ring);
> +	if (ret) {
> +		DRM_ERROR("add_request failed\n");
> +		fence_free((struct fence *)fence);
> +		return NULL;
> +	}
> +
> +	fence->ring = ring;
> +	fence->ctx = ctx;
> +	fence->request = ring->outstanding_lazy_request;

Pardon? This is horribly, horribly broken. (Emitting the
same breadcrumb/interrupt multiple times without coordination.)

What are the fence semantics with flushing top/bottom of pipe?

I think you what something along the lines of:

rq = i915_request_create(ctx, engine);
if (IS_ERR(rq)) {
	free(fence);
	return ERR_CAST(rq);
}

/* breadcrumb will insert all necessary flushes and barriers for fence
 * completion, or if you want to be explicit:
 i915_request_emit_flush(rq, I915_COMMAND_BARRIER | I915_FLUSH_CACHES);
 */
ret = i915_request_emit_breadcrumb(rq);
if (ret == 0)
	ret = i915_request_commit(rq);
if (ret) {
	i915_request_put(rq);
	free(fence);
	return ERR_PTR(ret);
}

fence->rq = rq;

and rq->ctx, rq->engine, rq->ring already exist and so the redundant
information can be removed from i915_fence.


> +	fence_init(&fence->base, &i915_fence_ops, &fence_lock, ctx->user_handle,
> +		   fence->request->seqno);
> +
> +	return &fence->base;
> +}
> +
> +/**
> + * i915_sync_create_fence_ioctl - fence creation function
> + * @dev: drm device
> + * @data: ioctl data
> + * @file: file struct
> + *
> + * This function creates a fence given a context and ring, and returns
> + * it to the caller in the form of a file descriptor.
> + *
> + * The returned descriptor is a sync fence fd, and can be used with all
> + * the usual sync fence operations (poll, ioctl, etc).
> + *
> + * The process fd limit should prevent an overallocation of fence objects,
> + * which need to be destroyed manually with a close() call.
> + */
> +int i915_sync_create_fence_ioctl(struct drm_device *dev, void *data,
> +				 struct drm_file *file)
> +{
> +	struct drm_i915_private *dev_priv = dev->dev_private;
> +	struct drm_i915_gem_fence *fdata = data;
> +	struct fence *fence;
> +	struct sync_fence *sfence;
> +	struct intel_engine_cs *ring;
> +	struct intel_context *ctx;
> +	u32 ctx_id = fdata->ctx_id;
> +	int fd = get_unused_fd_flags(O_CLOEXEC);
> +	int ret = 0;
> +
> +	if (file == NULL) {
> +		DRM_ERROR("no file priv?\n");
> +		return -EINVAL;
> +	}
> +
> +	ret = i915_mutex_lock_interruptible(dev);
> +	if (ret) {
> +		DRM_ERROR("mutex interrupted\n");
> +		goto out;
> +	}
> +
> +	ctx = i915_gem_context_get(file->driver_priv, ctx_id);
> +	if (ctx == NULL) {
> +		DRM_ERROR("context lookup failed\n");
> +		ret = -ENOENT;
> +		goto err;
> +	}
> +
> +	ring = &dev_priv->ring[RCS];

Hmm. This looks unfortunately restrictive. Especially for an i915 custom
ioctl.

> +
> +	if (!intel_ring_initialized(ring)) {
> +		DRM_ERROR("ring not ready\n");
> +		ret = -EIO;

ret = -EINVAL;
The ring is invalid, not just "not ready".

> +		goto err;
> +	}
> +
> +	fence = i915_fence_create(ring, ctx);
> +	if (!fence) {

IS_ERR(fence) itym. Especially as fence creation could fail with
-ERESTARTSYS.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre



More information about the Intel-gfx mailing list