[Intel-gfx] [PATCH 13/13] drm/i915: Allow specification of parallel execbuf

Tvrtko Ursulin tvrtko.ursulin at linux.intel.com
Mon Mar 11 13:40:52 UTC 2019


On 08/03/2019 14:12, Chris Wilson wrote:
> There is a desire to split a task onto two engines and have them run at
> the same time, e.g. scanline interleaving to spread the workload evenly.
> Through the use of the out-fence from the first execbuf, we can
> coordinate secondary execbuf to only become ready simultaneously with
> the first, so that with all things idle the second execbufs are executed
> in parallel with the first. The key difference here between the new
> EXEC_FENCE_SUBMIT and the existing EXEC_FENCE_IN is that the in-fence
> waits for the completion of the first request (so that all of its
> rendering results are visible to the second execbuf, the more common
> userspace fence requirement).
> 
> Since we only have a single input fence slot, userspace cannot mix an
> in-fence and a submit-fence. It has to use one or the other! This is not
> such a harsh requirement, since by virtue of the submit-fence, the
> secondary execbuf inherit all of the dependencies from the first
> request, and for the application the dependencies should be common
> between the primary and secondary execbuf.
> 
> Suggested-by: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
> Testcase: igt/gem_exec_fence/parallel
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> Cc: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
> ---
>   drivers/gpu/drm/i915/i915_drv.c            |  1 +
>   drivers/gpu/drm/i915/i915_gem_execbuffer.c | 25 +++++++++++++++++++++-
>   include/uapi/drm/i915_drm.h                | 17 ++++++++++++++-
>   3 files changed, 41 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
> index 93e41c937d96..afdfced262e6 100644
> --- a/drivers/gpu/drm/i915/i915_drv.c
> +++ b/drivers/gpu/drm/i915/i915_drv.c
> @@ -421,6 +421,7 @@ static int i915_getparam_ioctl(struct drm_device *dev, void *data,
>   	case I915_PARAM_HAS_EXEC_CAPTURE:
>   	case I915_PARAM_HAS_EXEC_BATCH_FIRST:
>   	case I915_PARAM_HAS_EXEC_FENCE_ARRAY:
> +	case I915_PARAM_HAS_EXEC_SUBMIT_FENCE:
>   		/* For the time being all of these are always true;
>   		 * if some supported hardware does not have one of these
>   		 * features this value needs to be provided from
> diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> index 67e4a0c2ebff..8f14ea41d4e7 100644
> --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> @@ -2285,6 +2285,7 @@ i915_gem_do_execbuffer(struct drm_device *dev,
>   {
>   	struct i915_execbuffer eb;
>   	struct dma_fence *in_fence = NULL;
> +	struct dma_fence *exec_fence = NULL;
>   	struct sync_file *out_fence = NULL;
>   	intel_wakeref_t wakeref;
>   	int out_fence_fd = -1;
> @@ -2328,11 +2329,24 @@ i915_gem_do_execbuffer(struct drm_device *dev,
>   			return -EINVAL;
>   	}
>   
> +	if (args->flags & I915_EXEC_FENCE_SUBMIT) {
> +		if (in_fence) {
> +			err = -EINVAL;
> +			goto err_in_fence;
> +		}
> +
> +		exec_fence = sync_file_get_fence(lower_32_bits(args->rsvd2));
> +		if (!exec_fence) {
> +			err = -EINVAL;
> +			goto err_in_fence;
> +		}
> +	}
> +
>   	if (args->flags & I915_EXEC_FENCE_OUT) {
>   		out_fence_fd = get_unused_fd_flags(O_CLOEXEC);
>   		if (out_fence_fd < 0) {
>   			err = out_fence_fd;
> -			goto err_in_fence;
> +			goto err_exec_fence;
>   		}
>   	}
>   
> @@ -2464,6 +2478,13 @@ i915_gem_do_execbuffer(struct drm_device *dev,
>   			goto err_request;
>   	}
>   
> +	if (exec_fence) {
> +		err = i915_request_await_execution(eb.request, exec_fence,
> +						   eb.engine->bond_execute);
> +		if (err < 0)
> +			goto err_request;
> +	}
> +
>   	if (fences) {
>   		err = await_fence_array(&eb, fences);
>   		if (err)
> @@ -2524,6 +2545,8 @@ i915_gem_do_execbuffer(struct drm_device *dev,
>   err_out_fence:
>   	if (out_fence_fd != -1)
>   		put_unused_fd(out_fence_fd);
> +err_exec_fence:
> +	dma_fence_put(exec_fence);
>   err_in_fence:
>   	dma_fence_put(in_fence);
>   	return err;
> diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
> index 94e72ae954a0..a6cfd1232537 100644
> --- a/include/uapi/drm/i915_drm.h
> +++ b/include/uapi/drm/i915_drm.h
> @@ -591,6 +591,12 @@ typedef struct drm_i915_irq_wait {
>    */
>   #define I915_PARAM_MMAP_GTT_COHERENT	52
>   
> +/*
> + * Query whether DRM_I915_GEM_EXECBUFFER2 supports coordination of parallel
> + * execution through use of explicit fence support.
> + * See I915_EXEC_FENCE_OUT and I915_EXEC_FENCE_SUBMIT.
> + */
> +#define I915_PARAM_HAS_EXEC_SUBMIT_FENCE 53
>   /* Must be kept compact -- no holes and well documented */
>   
>   typedef struct drm_i915_getparam {
> @@ -1113,7 +1119,16 @@ struct drm_i915_gem_execbuffer2 {
>    */
>   #define I915_EXEC_FENCE_ARRAY   (1<<19)
>   
> -#define __I915_EXEC_UNKNOWN_FLAGS (-(I915_EXEC_FENCE_ARRAY<<1))
> +/*
> + * Setting I915_EXEC_FENCE_SUBMIT implies that lower_32_bits(rsvd2) represent
> + * a sync_file fd to wait upon (in a nonblocking manner) prior to executing
> + * the batch.
> + *
> + * Returns -EINVAL if the sync_file fd cannot be found.
> + */
> +#define I915_EXEC_FENCE_SUBMIT		(1 << 20)
> +
> +#define __I915_EXEC_UNKNOWN_FLAGS (-(I915_EXEC_FENCE_SUBMIT << 1))
>   
>   #define I915_EXEC_CONTEXT_ID_MASK	(0xffffffff)
>   #define i915_execbuffer2_set_context_id(eb2, context) \
> 

Simple enough, LGTM.

Regards,

Tvrtko


More information about the Intel-gfx mailing list