[Intel-gfx] [PATCH 34/40] drm/i915: Allow contexts to share a single timeline across all engines

Tue Sep 25 08:45:17 UTC 2018

On 19/09/2018 20:55, Chris Wilson wrote:
> Previously, our view has been always to run the engines independently
> within a context. (Multiple engines happened before we had contexts and
> timelines, so they always operated independently and that behaviour
> persisted into contexts.) However, at the user level the context often
> represents a single timeline (e.g. GL contexts) and userspace must
> ensure that the individual engines are serialised to present that
> ordering to the client (or forgot about this detail entirely and hope no
> one notices - a fair ploy if the client can only directly control one
> engine themselves ;)
> 
> In the next patch, we will want to construct a set of engines that
> operate as one, that have a single timeline interwoven between them, to
> present a single virtual engine to the user. (They submit to the virtual
> engine, then we decide which engine to execute on based.)
> 
> To that end, we want to be able to create contexts which have a single
> timeline (fence context) shared between all engines, rather than multiple
> timelines.
> 
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> ---
>   drivers/gpu/drm/i915/i915_gem_context.c | 26 ++++++++++++++++-
>   drivers/gpu/drm/i915/i915_gem_context.h |  3 ++
>   drivers/gpu/drm/i915/i915_request.c     | 10 +++++--
>   drivers/gpu/drm/i915/i915_request.h     |  5 +++-
>   drivers/gpu/drm/i915/i915_sw_fence.c    | 39 +++++++++++++++++++++----
>   drivers/gpu/drm/i915/i915_sw_fence.h    | 13 +++++++--
>   drivers/gpu/drm/i915/intel_lrc.c        |  5 +++-
>   include/uapi/drm/i915_drm.h             |  3 +-
>   8 files changed, 91 insertions(+), 13 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
> index da2ac10f8e8a..a8570a07b3b7 100644
> --- a/drivers/gpu/drm/i915/i915_gem_context.c
> +++ b/drivers/gpu/drm/i915/i915_gem_context.c
> @@ -223,6 +223,9 @@ static void i915_gem_context_free(struct i915_gem_context *ctx)
>   			ce->ops->destroy(ce);
>   	}
>   
> +	if (ctx->timeline)
> +		i915_timeline_put(ctx->timeline);
> +
>   	kfree(ctx->name);
>   	put_pid(ctx->pid);
>   
> @@ -402,6 +405,7 @@ static void __destroy_hw_context(struct i915_gem_context *ctx,
>   }
>   
>   #define CREATE_VM BIT(0)
> +#define CREATE_TIMELINE BIT(1)
>   
>   static struct i915_gem_context *
>   i915_gem_create_context(struct drm_i915_private *dev_priv,
> @@ -412,6 +416,9 @@ i915_gem_create_context(struct drm_i915_private *dev_priv,
>   
>   	lockdep_assert_held(&dev_priv->drm.struct_mutex);
>   
> +	if (flags & CREATE_TIMELINE && !HAS_EXECLISTS(dev_priv))
> +		return ERR_PTR(-EINVAL);
> +
>   	/* Reap the most stale context */
>   	contexts_free_first(dev_priv);
>   
> @@ -434,6 +441,18 @@ i915_gem_create_context(struct drm_i915_private *dev_priv,
>   		ctx->desc_template = default_desc_template(dev_priv, ppgtt);
>   	}
>   
> +	if (flags & CREATE_TIMELINE) {
> +		struct i915_timeline *timeline;
> +
> +		timeline = i915_timeline_create(dev_priv, ctx->name);
> +		if (IS_ERR(timeline)) {
> +			__destroy_hw_context(ctx, file_priv);
> +			return ERR_CAST(timeline);
> +		}
> +
> +		ctx->timeline = timeline;
> +	}
> +
>   	trace_i915_context_create(ctx);
>   
>   	return ctx;
> @@ -796,7 +815,9 @@ int i915_gem_context_create_ioctl(struct drm_device *dev, void *data,
>   	if (args->pad != 0)
>   		return -EINVAL;
>   
> -	if (args->flags & ~I915_GEM_CONTEXT_SHARE_GTT)
> +	if (args->flags &
> +	    ~(I915_GEM_CONTEXT_SHARE_GTT |
> +	      I915_GEM_CONTEXT_SINGLE_TIMELINE))
>   		return -EINVAL;
>   
>   	if (client_is_banned(file_priv)) {
> @@ -820,6 +841,9 @@ int i915_gem_context_create_ioctl(struct drm_device *dev, void *data,
>   		flags &= ~CREATE_VM;
>   	}
>   
> +	if (args->flags & I915_GEM_CONTEXT_SINGLE_TIMELINE)
> +		flags |= CREATE_TIMELINE;
> +
>   	err = i915_mutex_lock_interruptible(dev);
>   	if (err)
>   		goto out;
> diff --git a/drivers/gpu/drm/i915/i915_gem_context.h b/drivers/gpu/drm/i915/i915_gem_context.h
> index 08165f6a0a84..770043449ba6 100644
> --- a/drivers/gpu/drm/i915/i915_gem_context.h
> +++ b/drivers/gpu/drm/i915/i915_gem_context.h
> @@ -41,6 +41,7 @@ struct drm_i915_private;
>   struct drm_i915_file_private;
>   struct i915_hw_ppgtt;
>   struct i915_request;
> +struct i915_timeline;
>   struct i915_vma;
>   struct intel_ring;
>   
> @@ -66,6 +67,8 @@ struct i915_gem_context {
>   	/** file_priv: owning file descriptor */
>   	struct drm_i915_file_private *file_priv;
>   
> +	struct i915_timeline *timeline;

Put a short comment please.

> +
>   	/**
>   	 * @ppgtt: unique address space (GTT)
>   	 *
> diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
> index fc7ad8dbc36e..34d410cfa577 100644
> --- a/drivers/gpu/drm/i915/i915_request.c
> +++ b/drivers/gpu/drm/i915/i915_request.c
> @@ -1007,8 +1007,14 @@ void i915_request_add(struct i915_request *request)
>   	prev = i915_gem_active_raw(&timeline->last_request,
>   				   &request->i915->drm.struct_mutex);
>   	if (prev && !i915_request_completed(prev)) {
> -		i915_sw_fence_await_sw_fence(&request->submit, &prev->submit,
> -					     &request->submitq);
> +		if (prev->engine == engine)
> +			i915_sw_fence_await_sw_fence(&request->submit,
> +						     &prev->submit,
> +						     &request->submitq);
> +		else
> +			__i915_sw_fence_await_dma_fence(&request->submit,
> +							&prev->fence,
> +							&request->dmaq);

Could be worth a comment on the conditional block to help future readers.

>   		if (engine->schedule)
>   			__i915_sched_node_add_dependency(&request->sched,
>   							 &prev->sched,
> diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h
> index 90e9d170a0cd..68d36eeb5edb 100644
> --- a/drivers/gpu/drm/i915/i915_request.h
> +++ b/drivers/gpu/drm/i915/i915_request.h
> @@ -116,7 +116,10 @@ struct i915_request {
>   	 * It is used by the driver to then queue the request for execution.
>   	 */
>   	struct i915_sw_fence submit;
> -	wait_queue_entry_t submitq;
> +	union {
> +		wait_queue_entry_t submitq;
> +		struct i915_sw_dma_fence_cb dmaq;

Drop a comment here as well explaining the duality.

> +	};
>   	wait_queue_head_t execute;
>   
>   	/*
> diff --git a/drivers/gpu/drm/i915/i915_sw_fence.c b/drivers/gpu/drm/i915/i915_sw_fence.c
> index 6dbeed079ae5..b68883fe73c5 100644
> --- a/drivers/gpu/drm/i915/i915_sw_fence.c
> +++ b/drivers/gpu/drm/i915/i915_sw_fence.c
> @@ -362,11 +362,6 @@ int i915_sw_fence_await_sw_fence_gfp(struct i915_sw_fence *fence,
>   	return __i915_sw_fence_await_sw_fence(fence, signaler, NULL, gfp);
>   }
>   
> -struct i915_sw_dma_fence_cb {
> -	struct dma_fence_cb base;
> -	struct i915_sw_fence *fence;
> -};
> -
>   struct i915_sw_dma_fence_cb_timer {
>   	struct i915_sw_dma_fence_cb base;
>   	struct dma_fence *dma;
> @@ -483,6 +478,40 @@ int i915_sw_fence_await_dma_fence(struct i915_sw_fence *fence,
>   	return ret;
>   }
>   
> +static void __dma_i915_sw_fence_wake(struct dma_fence *dma,
> +				     struct dma_fence_cb *data)
> +{
> +	struct i915_sw_dma_fence_cb *cb = container_of(data, typeof(*cb), base);
> +
> +	i915_sw_fence_complete(cb->fence);
> +}
> +
> +int __i915_sw_fence_await_dma_fence(struct i915_sw_fence *fence,
> +				    struct dma_fence *dma,
> +				    struct i915_sw_dma_fence_cb *cb)
> +{
> +	int ret;
> +
> +	debug_fence_assert(fence);
> +
> +	if (dma_fence_is_signaled(dma))
> +		return 0;
> +
> +	cb->fence = fence;
> +	i915_sw_fence_await(fence);
> +
> +	ret = dma_fence_add_callback(dma, &cb->base, __dma_i915_sw_fence_wake);
> +	if (ret == 0) {
> +		ret = 1;
> +	} else {
> +		i915_sw_fence_complete(fence);
> +		if (ret == -ENOENT) /* fence already signaled */
> +			ret = 0;
> +	}
> +
> +	return ret;
> +}

ACAICS there is scope to extract commonality between this and 
i915_sw_fence_await_dma_fence by perhaps having a common static function 
which takes the _cb and func pointers. Or something like:

static __i915_sw_fence_await_dma_fence(..., *cb, *func)
{
	...
}

i915_sw_fence_await_dma_fence(..)
{
	... alloc ..

	__i915_sw_fence_await_dma_fence(..., cb, i915_sw_fence_await);

	...
}

i915_sw_fence_await_dma_fence_builtin(...)
{
	_i915_sw_fence_await_dma_fence(..., &dma->cb, __dma_i915_sw_fence_wake);
}

Thoughts?

Regards,

Tvrtko

> +
>   int i915_sw_fence_await_reservation(struct i915_sw_fence *fence,
>   				    struct reservation_object *resv,
>   				    const struct dma_fence_ops *exclude,
> diff --git a/drivers/gpu/drm/i915/i915_sw_fence.h b/drivers/gpu/drm/i915/i915_sw_fence.h
> index fe2ef4dadfc6..914a734d49bc 100644
> --- a/drivers/gpu/drm/i915/i915_sw_fence.h
> +++ b/drivers/gpu/drm/i915/i915_sw_fence.h
> @@ -10,14 +10,13 @@
>   #ifndef _I915_SW_FENCE_H_
>   #define _I915_SW_FENCE_H_
>   
> +#include <linux/dma-fence.h>
>   #include <linux/gfp.h>
>   #include <linux/kref.h>
>   #include <linux/notifier.h> /* for NOTIFY_DONE */
>   #include <linux/wait.h>
>   
>   struct completion;
> -struct dma_fence;
> -struct dma_fence_ops;
>   struct reservation_object;
>   
>   struct i915_sw_fence {
> @@ -69,10 +68,20 @@ int i915_sw_fence_await_sw_fence(struct i915_sw_fence *fence,
>   int i915_sw_fence_await_sw_fence_gfp(struct i915_sw_fence *fence,
>   				     struct i915_sw_fence *after,
>   				     gfp_t gfp);
> +
> +struct i915_sw_dma_fence_cb {
> +	struct dma_fence_cb base;
> +	struct i915_sw_fence *fence;
> +};
> +
> +int __i915_sw_fence_await_dma_fence(struct i915_sw_fence *fence,
> +				    struct dma_fence *dma,
> +				    struct i915_sw_dma_fence_cb *cb);
>   int i915_sw_fence_await_dma_fence(struct i915_sw_fence *fence,
>   				  struct dma_fence *dma,
>   				  unsigned long timeout,
>   				  gfp_t gfp);
> +
>   int i915_sw_fence_await_reservation(struct i915_sw_fence *fence,
>   				    struct reservation_object *resv,
>   				    const struct dma_fence_ops *exclude,
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index 74be9a49ef9e..48a2bca7fec3 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -2626,7 +2626,10 @@ static int execlists_context_deferred_alloc(struct i915_gem_context *ctx,
>   		goto error_deref_obj;
>   	}
>   
> -	timeline = i915_timeline_create(ctx->i915, ctx->name);
> +	if (ctx->timeline)
> +		timeline = i915_timeline_get(ctx->timeline);
> +	else
> +		timeline = i915_timeline_create(ctx->i915, ctx->name);
>   	if (IS_ERR(timeline)) {
>   		ret = PTR_ERR(timeline);
>   		goto error_deref_obj;
> diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
> index dc1c52f95cab..adb9fed86ef7 100644
> --- a/include/uapi/drm/i915_drm.h
> +++ b/include/uapi/drm/i915_drm.h
> @@ -1433,7 +1433,8 @@ struct drm_i915_gem_context_create_v2 {
>   	/*  output: id of new context*/
>   	__u32 ctx_id;
>   	__u32 flags;
> -#define I915_GEM_CONTEXT_SHARE_GTT 0x1
> +#define I915_GEM_CONTEXT_SHARE_GTT		0x1
> +#define I915_GEM_CONTEXT_SINGLE_TIMELINE	0x2
>   	__u32 share_ctx;
>   	__u32 pad;
>   };
>