[Intel-gfx] [PATCH 17/39] drm/i915: Allow specification of parallel execbuf
Tvrtko Ursulin
tvrtko.ursulin at linux.intel.com
Thu Mar 14 17:27:40 UTC 2019
On 13/03/2019 14:43, Chris Wilson wrote:
> There is a desire to split a task onto two engines and have them run at
> the same time, e.g. scanline interleaving to spread the workload evenly.
> Through the use of the out-fence from the first execbuf, we can
> coordinate secondary execbuf to only become ready simultaneously with
> the first, so that with all things idle the second execbufs are executed
> in parallel with the first. The key difference here between the new
> EXEC_FENCE_SUBMIT and the existing EXEC_FENCE_IN is that the in-fence
> waits for the completion of the first request (so that all of its
> rendering results are visible to the second execbuf, the more common
> userspace fence requirement).
>
> Since we only have a single input fence slot, userspace cannot mix an
> in-fence and a submit-fence. It has to use one or the other! This is not
> such a harsh requirement, since by virtue of the submit-fence, the
> secondary execbuf inherit all of the dependencies from the first
> request, and for the application the dependencies should be common
> between the primary and secondary execbuf.
>
> Suggested-by: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
> Testcase: igt/gem_exec_fence/parallel
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> Cc: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
> ---
> drivers/gpu/drm/i915/i915_drv.c | 1 +
> drivers/gpu/drm/i915/i915_gem_execbuffer.c | 25 +++++++++++++++++++++-
> include/uapi/drm/i915_drm.h | 17 ++++++++++++++-
> 3 files changed, 41 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
> index 93e41c937d96..afdfced262e6 100644
> --- a/drivers/gpu/drm/i915/i915_drv.c
> +++ b/drivers/gpu/drm/i915/i915_drv.c
> @@ -421,6 +421,7 @@ static int i915_getparam_ioctl(struct drm_device *dev, void *data,
> case I915_PARAM_HAS_EXEC_CAPTURE:
> case I915_PARAM_HAS_EXEC_BATCH_FIRST:
> case I915_PARAM_HAS_EXEC_FENCE_ARRAY:
> + case I915_PARAM_HAS_EXEC_SUBMIT_FENCE:
> /* For the time being all of these are always true;
> * if some supported hardware does not have one of these
> * features this value needs to be provided from
> diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> index 70a26f0a9f1e..064c649f3f46 100644
> --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> @@ -2282,6 +2282,7 @@ i915_gem_do_execbuffer(struct drm_device *dev,
> {
> struct i915_execbuffer eb;
> struct dma_fence *in_fence = NULL;
> + struct dma_fence *exec_fence = NULL;
> struct sync_file *out_fence = NULL;
> intel_wakeref_t wakeref;
> int out_fence_fd = -1;
> @@ -2325,11 +2326,24 @@ i915_gem_do_execbuffer(struct drm_device *dev,
> return -EINVAL;
> }
>
> + if (args->flags & I915_EXEC_FENCE_SUBMIT) {
> + if (in_fence) {
> + err = -EINVAL;
> + goto err_in_fence;
> + }
> +
> + exec_fence = sync_file_get_fence(lower_32_bits(args->rsvd2));
> + if (!exec_fence) {
> + err = -EINVAL;
> + goto err_in_fence;
> + }
> + }
> +
> if (args->flags & I915_EXEC_FENCE_OUT) {
> out_fence_fd = get_unused_fd_flags(O_CLOEXEC);
> if (out_fence_fd < 0) {
> err = out_fence_fd;
> - goto err_in_fence;
> + goto err_exec_fence;
> }
> }
>
> @@ -2461,6 +2475,13 @@ i915_gem_do_execbuffer(struct drm_device *dev,
> goto err_request;
> }
>
> + if (exec_fence) {
> + err = i915_request_await_execution(eb.request, exec_fence,
> + eb.engine->bond_execute);
> + if (err < 0)
> + goto err_request;
> + }
> +
> if (fences) {
> err = await_fence_array(&eb, fences);
> if (err)
> @@ -2521,6 +2542,8 @@ i915_gem_do_execbuffer(struct drm_device *dev,
> err_out_fence:
> if (out_fence_fd != -1)
> put_unused_fd(out_fence_fd);
> +err_exec_fence:
> + dma_fence_put(exec_fence);
> err_in_fence:
> dma_fence_put(in_fence);
> return err;
> diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
> index ed33b8af8692..dbab4d365a6d 100644
> --- a/include/uapi/drm/i915_drm.h
> +++ b/include/uapi/drm/i915_drm.h
> @@ -593,6 +593,12 @@ typedef struct drm_i915_irq_wait {
> */
> #define I915_PARAM_MMAP_GTT_COHERENT 52
>
> +/*
> + * Query whether DRM_I915_GEM_EXECBUFFER2 supports coordination of parallel
> + * execution through use of explicit fence support.
> + * See I915_EXEC_FENCE_OUT and I915_EXEC_FENCE_SUBMIT.
> + */
> +#define I915_PARAM_HAS_EXEC_SUBMIT_FENCE 53
> /* Must be kept compact -- no holes and well documented */
>
> typedef struct drm_i915_getparam {
> @@ -1115,7 +1121,16 @@ struct drm_i915_gem_execbuffer2 {
> */
> #define I915_EXEC_FENCE_ARRAY (1<<19)
>
> -#define __I915_EXEC_UNKNOWN_FLAGS (-(I915_EXEC_FENCE_ARRAY<<1))
> +/*
> + * Setting I915_EXEC_FENCE_SUBMIT implies that lower_32_bits(rsvd2) represent
> + * a sync_file fd to wait upon (in a nonblocking manner) prior to executing
> + * the batch.
> + *
> + * Returns -EINVAL if the sync_file fd cannot be found.
> + */
> +#define I915_EXEC_FENCE_SUBMIT (1 << 20)
> +
> +#define __I915_EXEC_UNKNOWN_FLAGS (-(I915_EXEC_FENCE_SUBMIT << 1))
>
> #define I915_EXEC_CONTEXT_ID_MASK (0xffffffff)
> #define i915_execbuffer2_set_context_id(eb2, context) \
>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
Regards,
Tvrtko
More information about the Intel-gfx
mailing list