[Intel-gfx] [RFC] drm/i915: Add sync framework support to execbuff IOCTL

Daniel Vetter daniel at ffwll.ch
Mon Jul 6 02:29:58 PDT 2015


On Fri, Jul 03, 2015 at 12:17:33PM +0100, Tvrtko Ursulin wrote:
> 
> On 07/02/2015 04:55 PM, Chris Wilson wrote:
> > It would be nice if we could reuse one seqno both for internal/external
> > fences. If you need to expose a fence ordering within a timeline that is
> > based on the creation stamp rather than execution stamp, it seems like
> > we could just add such a stamp when creating the sync_pt and not worry
> > about its relationship to the execution seqno.
> > 
> > Doing so does expose that requests are reordered to userspace since the
> > signalling timeline is not the same as userspace's ordered timeline. Not
> > sure if that is a problem or not.
> > 
> > Afaict the sync uapi is based on waiting for all of a set of fences to
> > retire. It doesn't seem to rely on fence ordering (that is knowing that
> > fence A will signal before fence B so it need only wait on fence B).
> > 
> > Here's hoping that we can have both simplicity and efficiency...
> 
> Jumping in with not even perfect understanding of everything here - but
> timeline business has always been confusing me. There is nothing in the 
> uapi which needs it afaics and iirc there was some discussion at the time
> Jesse floated his patches that it can be removed. Based on that when I
> squashed his patches and ported them on top of John's request to fence
> conversion it ended up something like the below (manually edited a bit to
> be less noisy and some prep patches omitted):
> 
> This implements the ioctl based uapi and indeed seqnos are not actually
> used in waits. So is this insufficient for some reason? (Other that it
> does not implement the input fence side of things.)

Yeah android syncpt on top of struct fence embedded int i915 request is
what I'd have expected.
> 
> diff --git a/drivers/gpu/drm/i915/Kconfig b/drivers/gpu/drm/i915/Kconfig
> index 74acca9..07f6ad9 100644
> --- a/drivers/gpu/drm/i915/Kconfig
> +++ b/drivers/gpu/drm/i915/Kconfig
> @@ -71,3 +71,17 @@ config DRM_I915_PRELIMINARY_HW_SUPPORT
>  	  option changes the default for that module option.
>  
>  	  If in doubt, say "N".
> +
> +config DRM_I915_SYNC
> +	bool "Enable explicit sync support"
> +	depends on DRM_I915
> +	default y if STAGING
> +	depends on STAGING
> +	select ANDROID
> +	select SYNC
> +	help

No Kconfig for userspace ABI please. Yes this means we need to destage
android syncpts first. The problem I see there is that apparently google
is still changing the uabi a lot, and that's a no-go for upstream. And it
needs to be cleaned up to work more seamlessly with struct fence (i.e.
anything that's missing there should be moved to struct fence, drivers
should only use fd_to_fence and fenct_to_fd functions similar to dma-buf).

And we don't have anyone except android using syncpts, so a bit a trouble
with finding userspace vehicles for this. We probably need agreement from
google to be happy with a frozen abi for syncpts first ...
-Daniel

> +	  Choose this option to enable Android native sync support and the
> +	  corresponding i915 driver code to expose it.  Slightly increases
> +	  driver size and pulls in sync support from staging.
> +
> +	  If in doubt, say "Y".
> diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
> index db21c93..93a3bc0 100644
> --- a/drivers/gpu/drm/i915/Makefile
> +++ b/drivers/gpu/drm/i915/Makefile
> @@ -91,6 +91,9 @@ i915-y += i915_vgpu.o
>  # legacy horrors
>  i915-y += i915_dma.o
>  
> +# sync points
> +i915-$(CONFIG_DRM_I915_SYNC)	+= i915_sync.o
> +
>  obj-$(CONFIG_DRM_I915)  += i915.o
>  
>  CFLAGS_i915_trace_points.o := -I$(src)
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 3ef3997..2cf4d3f 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -2753,6 +2753,26 @@ void i915_init_vm(struct drm_i915_private *dev_priv,
>  void i915_gem_free_object(struct drm_gem_object *obj);
>  void i915_gem_vma_destroy(struct i915_vma *vma);
>  
> +/* i915_sync.c */
> +struct sync_fence;
> +
> +#ifdef CONFIG_DRM_I915_SYNC
> +int i915_fence_ring_fill_driver_data(struct fence *fence, void *data, int size);
> +void i915_fence_ring_value_str(struct fence *fence, char *str, int size);
> +void i915_fence_ring_timeline_value_str(struct fence *fence, char *str,
> +					int size);
> +
> +int i915_create_sync_fence_ring(struct drm_i915_gem_request *req,
> +				struct sync_fence **sync_fence, int *fence_fd);
> +#else
> +static inline
> +int i915_create_sync_fence_ring(struct drm_i915_gem_request *req,
> +				struct sync_fence **sync_fence, int *fence_fd)
> +{
> +	return -ENODEV;
> +}
> +#endif
> +
>  #define PIN_MAPPABLE 0x1
>  #define PIN_NONBLOCK 0x2
>  #define PIN_GLOBAL 0x4
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 560d244..a04853c 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -2633,7 +2633,7 @@ static const char *i915_gem_request_get_timeline_name(struct fence *req_fence)
>  	return req->ring->name;
>  }
>  
> -#if 0
> +#if CONFIG_DRM_I915_SYNC
>  static bool i915_gem_request_is_signaled(struct fence *req_fence)
>  {
>  	struct drm_i915_gem_request *req = container_of(req_fence,
> @@ -2770,12 +2770,14 @@ static const struct fence_ops i915_gem_request_fops = {
>  	.get_driver_name	= i915_gem_request_get_driver_name,
>  	.get_timeline_name	= i915_gem_request_get_timeline_name,
>  	.enable_signaling	= i915_gem_request_enable_signaling,
>  	.wait			= fence_default_wait,
>  	.release		= i915_gem_request_free,
> +#if CONFIG_DRM_I915_SYNC
> +	.signaled		= i915_gem_request_is_signaled,
> +	.fill_driver_data	= i915_fence_ring_fill_driver_data,
> +	.fence_value_str	= i915_fence_ring_value_str,
> +	.timeline_value_str	= i915_fence_ring_timeline_value_str
> +#endif
>  };
>  
>  int _i915_gem_request_alloc(struct intel_engine_cs *ring,
> diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> index 182c730..e6342ac 100644
> --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> @@ -32,6 +32,7 @@
>  #include "i915_trace.h"
>  #include "intel_drv.h"
>  #include <linux/dma_remapping.h>
> +#include "../../../staging/android/sync.h"
>  
>  #define  __EXEC_OBJECT_HAS_PIN (1<<31)
>  #define  __EXEC_OBJECT_HAS_FENCE (1<<30)
> @@ -1417,6 +1418,8 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
>  	u32 dispatch_flags;
>  	int ret;
>  	bool need_relocs;
> +	struct sync_fence *sync_fence = NULL;
> +	int fence_fd = -1;
>  
>  	if (!i915_gem_check_execbuffer(args))
>  		return -EINVAL;
> @@ -1610,6 +1613,13 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
>  	if (ret)
>  		goto err_batch_unpin;
>  
> +	if (args->flags & I915_EXEC_FENCE_OUT) {
> +		ret = i915_create_sync_fence_ring(params->request, &sync_fence,
> +						  &fence_fd);
> +		if (ret)
> +			goto err_batch_unpin;
> +	}
> +
>  	ret = i915_gem_request_add_to_client(params->request, file);
>  	if (ret)
>  		goto err_batch_unpin;
> @@ -1628,6 +1638,26 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
>  	params->ctx                     = ctx;
>  
>  	ret = dev_priv->gt.execbuf_submit(params, args, &eb->vmas);
> +	if (ret)
> +		goto err_submit;
> +
> +	if (sync_fence) {
> +		sync_fence_install(sync_fence, fence_fd);
> +		args->rsvd2 = fence_fd;
> +		sync_fence = NULL;
> +	}
> +
> +err_submit:
> +	if (sync_fence) {
> +		/*
> +		 * We are under the struct mutex here and sync fence we
> +		 * created will attempt to grab it in its destructor.
> +		 * Therefore remove the lock before unreferencing.
> +		 */
> +		sync_fence->lock = NULL;
> +		fput(sync_fence->file);
> +		put_unused_fd(fence_fd);
> +	}
>  
>  err_batch_unpin:
>  	/*
> diff --git a/drivers/gpu/drm/i915/i915_sync.c b/drivers/gpu/drm/i915/i915_sync.c
> new file mode 100644
> index 0000000..1a50610
> --- /dev/null
> +++ b/drivers/gpu/drm/i915/i915_sync.c
> @@ -0,0 +1,106 @@
> +/*
> + * Copyright © 2013-2015 Intel Corporation
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a
> + * copy of this software and associated documentation files (the "Software"),
> + * to deal in the Software without restriction, including without limitation
> + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
> + * and/or sell copies of the Software, and to permit persons to whom the
> + * Software is furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice (including the next
> + * paragraph) shall be included in all copies or substantial portions of the
> + * Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
> + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
> + * IN THE SOFTWARE.
> + *
> + * Authors:
> + *    Jesse Barnes <jbarnes at virtuousgeek.org>
> + *    Tvrtko Ursulin <tvrtko.ursulin at intel.com>
> + *
> + */
> +
> +#include <linux/fs.h>
> +#include <linux/fence.h>
> +#include "../../../staging/android/sync.h"
> +#include "i915_drv.h"
> +
> +/*
> + * i915 Android native sync fences.
> + *
> + * We implement sync points in terms of i915 seqnos. They're exposed through
> + * the DRM_IOCTL_I915_GEM_EXECBUFFER2 ioctl, and can be mixed and matched
> + * with other Android timelines and aggregated into sync_fences, etc.
> + *
> + * TODO:
> + *   * Display engine fences.
> + *   * Extend driver data with context id / ring id.
> + */
> +
> +int i915_fence_ring_fill_driver_data(struct fence *fence, void *data, int size)
> +{
> +	struct drm_i915_gem_request *req = container_of(fence, typeof(*req),
> +							fence);
> +
> +	if (size < sizeof(req->seqno))
> +		return -ENOMEM;
> +
> +	memcpy(data, &req->seqno, sizeof(req->seqno));
> +
> +	return sizeof(req->seqno);
> +}
> +
> +void i915_fence_ring_value_str(struct fence *fence, char *str, int size)
> +{
> +	struct drm_i915_gem_request *req = container_of(fence, typeof(*req),
> +							fence);
> +
> +	snprintf(str, size, "%u", req->seqno);
> +}
> +
> +void i915_fence_ring_timeline_value_str(struct fence *fence, char *str,
> +					int size)
> +{
> +	struct drm_i915_gem_request *req = container_of(fence, typeof(*req),
> +							fence);
> +	struct intel_engine_cs *ring = req->ring;
> +
> +	snprintf(str, size, "%u", ring->get_seqno(ring, false));
> +}
> +
> +int i915_create_sync_fence_ring(struct drm_i915_gem_request *req,
> +				struct sync_fence **sync_fence, int *fence_fd)
> +{
> +	struct drm_device *dev = req->i915->dev;
> +	struct intel_engine_cs *ring = req->ring;
> +	struct sync_fence *sfence;
> +	char ring_name[6] = "ring0";
> +	int fd;
> +
> +	fd = get_unused_fd_flags(O_CLOEXEC);
> +	if (fd < 0) {
> +		DRM_DEBUG("No available file descriptors!\n");
> +		return fd;
> +	}
> +
> +	ring_name[4] += ring->id;
> +	sfence = sync_fence_create_dma(ring_name, &req->fence,
> +				       &dev->struct_mutex);
> +	if (!sfence) {
> +		put_unused_fd(fd);
> +		return -ENOMEM;
> +	}
> +
> +	*sync_fence = sfence;
> +	*fence_fd = fd;
> +
> +	fence_get(&req->fence);
> +
> +	return 0;
> +}
> diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
> index 4851d66..2522f78 100644
> --- a/include/uapi/drm/i915_drm.h
> +++ b/include/uapi/drm/i915_drm.h
> @@ -246,7 +246,7 @@ typedef struct _drm_i915_sarea {
>  #define DRM_IOCTL_I915_HWS_ADDR		DRM_IOW(DRM_COMMAND_BASE + DRM_I915_HWS_ADDR, struct drm_i915_gem_init)
>  #define DRM_IOCTL_I915_GEM_INIT		DRM_IOW(DRM_COMMAND_BASE + DRM_I915_GEM_INIT, struct drm_i915_gem_init)
>  #define DRM_IOCTL_I915_GEM_EXECBUFFER	DRM_IOW(DRM_COMMAND_BASE + DRM_I915_GEM_EXECBUFFER, struct drm_i915_gem_execbuffer)
> -#define DRM_IOCTL_I915_GEM_EXECBUFFER2	DRM_IOW(DRM_COMMAND_BASE + DRM_I915_GEM_EXECBUFFER2, struct drm_i915_gem_execbuffer2)
> +#define DRM_IOCTL_I915_GEM_EXECBUFFER2	DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_EXECBUFFER2, struct drm_i915_gem_execbuffer2)
>  #define DRM_IOCTL_I915_GEM_PIN		DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_PIN, struct drm_i915_gem_pin)
>  #define DRM_IOCTL_I915_GEM_UNPIN	DRM_IOW(DRM_COMMAND_BASE + DRM_I915_GEM_UNPIN, struct drm_i915_gem_unpin)
>  #define DRM_IOCTL_I915_GEM_BUSY		DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_BUSY, struct drm_i915_gem_busy)
> @@ -722,7 +722,7 @@ struct drm_i915_gem_execbuffer2 {
>  #define I915_EXEC_CONSTANTS_REL_SURFACE (2<<6) /* gen4/5 only */
>  	__u64 flags;
>  	__u64 rsvd1; /* now used for context info */
> -	__u64 rsvd2;
> +	__u64 rsvd2; /* now used for fence fd */
>  };
>  
>  /** Resets the SO write offset registers for transform feedback on gen7. */
> @@ -760,7 +760,9 @@ struct drm_i915_gem_execbuffer2 {
>  #define I915_EXEC_BSD_RING1		(1<<13)
>  #define I915_EXEC_BSD_RING2		(2<<13)
>  
> -#define __I915_EXEC_UNKNOWN_FLAGS -(1<<15)
> +#define I915_EXEC_FENCE_OUT		(1<<15)
> +
> +#define __I915_EXEC_UNKNOWN_FLAGS -(1<<16)
>  
>  #define I915_EXEC_CONTEXT_ID_MASK	(0xffffffff)
>  #define i915_execbuffer2_set_context_id(eb2, context) \
> -- 
> 2.4.0
> 
> 
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx

-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch


More information about the Intel-gfx mailing list