[Intel-gfx] [PATCH 31/38] drm/i915: Allow contexts to share a single timeline across all engines
Tvrtko Ursulin
tvrtko.ursulin at linux.intel.com
Thu Jan 24 17:35:28 UTC 2019
On 18/01/2019 14:01, Chris Wilson wrote:
> Previously, our view has been always to run the engines independently
> within a context. (Multiple engines happened before we had contexts and
> timelines, so they always operated independently and that behaviour
> persisted into contexts.) However, at the user level the context often
> represents a single timeline (e.g. GL contexts) and userspace must
> ensure that the individual engines are serialised to present that
> ordering to the client (or forgot about this detail entirely and hope no
> one notices - a fair ploy if the client can only directly control one
> engine themselves ;)
>
> In the next patch, we will want to construct a set of engines that
> operate as one, that have a single timeline interwoven between them, to
> present a single virtual engine to the user. (They submit to the virtual
> engine, then we decide which engine to execute on based.)
>
> To that end, we want to be able to create contexts which have a single
> timeline (fence context) shared between all engines, rather than multiple
> timelines.
>
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> ---
> drivers/gpu/drm/i915/i915_gem_context.c | 33 +++++++++++++---
> drivers/gpu/drm/i915/i915_gem_context.h | 3 ++
> drivers/gpu/drm/i915/i915_request.c | 10 ++++-
> drivers/gpu/drm/i915/i915_request.h | 5 ++-
> drivers/gpu/drm/i915/i915_sw_fence.c | 39 ++++++++++++++++---
> drivers/gpu/drm/i915/i915_sw_fence.h | 13 ++++++-
> drivers/gpu/drm/i915/intel_lrc.c | 5 ++-
> .../gpu/drm/i915/selftests/i915_gem_context.c | 17 ++++----
> drivers/gpu/drm/i915/selftests/mock_context.c | 2 +-
> include/uapi/drm/i915_drm.h | 1 +
> 10 files changed, 103 insertions(+), 25 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
> index ec5e3e1c6402..e28be242399d 100644
> --- a/drivers/gpu/drm/i915/i915_gem_context.c
> +++ b/drivers/gpu/drm/i915/i915_gem_context.c
> @@ -225,6 +225,9 @@ static void i915_gem_context_free(struct i915_gem_context *ctx)
> ce->ops->destroy(ce);
> }
>
> + if (ctx->timeline)
> + i915_timeline_put(ctx->timeline);
> +
> kfree(ctx->name);
> put_pid(ctx->pid);
>
> @@ -425,12 +428,17 @@ static void __set_ppgtt(struct i915_gem_context *ctx,
>
> static struct i915_gem_context *
> i915_gem_create_context(struct drm_i915_private *dev_priv,
> - struct drm_i915_file_private *file_priv)
> + struct drm_i915_file_private *file_priv,
> + unsigned int flags)
> {
> struct i915_gem_context *ctx;
>
> lockdep_assert_held(&dev_priv->drm.struct_mutex);
>
> + if (flags & I915_GEM_CONTEXT_SINGLE_TIMELINE &&
> + !HAS_EXECLISTS(dev_priv))
> + return ERR_PTR(-EINVAL);
> +
> /* Reap the most stale context */
> contexts_free_first(dev_priv);
>
> @@ -453,6 +461,18 @@ i915_gem_create_context(struct drm_i915_private *dev_priv,
> i915_ppgtt_put(ppgtt);
> }
>
> + if (flags & I915_GEM_CONTEXT_SINGLE_TIMELINE) {
> + struct i915_timeline *timeline;
> +
> + timeline = i915_timeline_create(dev_priv, ctx->name, NULL);
> + if (IS_ERR(timeline)) {
> + __destroy_hw_context(ctx, file_priv);
> + return ERR_CAST(timeline);
> + }
> +
> + ctx->timeline = timeline;
> + }
> +
> trace_i915_context_create(ctx);
>
> return ctx;
> @@ -481,7 +501,7 @@ i915_gem_context_create_gvt(struct drm_device *dev)
> if (ret)
> return ERR_PTR(ret);
>
> - ctx = i915_gem_create_context(to_i915(dev), NULL);
> + ctx = i915_gem_create_context(to_i915(dev), NULL, 0);
> if (IS_ERR(ctx))
> goto out;
>
> @@ -517,7 +537,7 @@ i915_gem_context_create_kernel(struct drm_i915_private *i915, int prio)
> struct i915_gem_context *ctx;
> int err;
>
> - ctx = i915_gem_create_context(i915, NULL);
> + ctx = i915_gem_create_context(i915, NULL, 0);
> if (IS_ERR(ctx))
> return ctx;
>
> @@ -638,7 +658,7 @@ int i915_gem_context_open(struct drm_i915_private *i915,
> idr_init_base(&file_priv->vm_idr, 1);
>
> mutex_lock(&i915->drm.struct_mutex);
> - ctx = i915_gem_create_context(i915, file_priv);
> + ctx = i915_gem_create_context(i915, file_priv, 0);
> mutex_unlock(&i915->drm.struct_mutex);
> if (IS_ERR(ctx)) {
> idr_destroy(&file_priv->context_idr);
> @@ -992,7 +1012,8 @@ int i915_gem_context_create_ioctl(struct drm_device *dev, void *data,
> if (!DRIVER_CAPS(dev_priv)->has_logical_contexts)
> return -ENODEV;
>
> - if (args->flags)
> + if (args->flags &
> + ~(I915_GEM_CONTEXT_SINGLE_TIMELINE))
> return -EINVAL;
>
> if (client_is_banned(file_priv)) {
> @@ -1007,7 +1028,7 @@ int i915_gem_context_create_ioctl(struct drm_device *dev, void *data,
> if (ret)
> return ret;
>
> - ctx = i915_gem_create_context(dev_priv, file_priv);
> + ctx = i915_gem_create_context(dev_priv, file_priv, args->flags);
> mutex_unlock(&dev->struct_mutex);
> if (IS_ERR(ctx))
> return PTR_ERR(ctx);
> diff --git a/drivers/gpu/drm/i915/i915_gem_context.h b/drivers/gpu/drm/i915/i915_gem_context.h
> index 9786f86b659d..b3a840747330 100644
> --- a/drivers/gpu/drm/i915/i915_gem_context.h
> +++ b/drivers/gpu/drm/i915/i915_gem_context.h
> @@ -41,6 +41,7 @@ struct drm_i915_private;
> struct drm_i915_file_private;
> struct i915_hw_ppgtt;
> struct i915_request;
> +struct i915_timeline;
> struct i915_vma;
> struct intel_ring;
>
> @@ -66,6 +67,8 @@ struct i915_gem_context {
> /** file_priv: owning file descriptor */
> struct drm_i915_file_private *file_priv;
>
> + struct i915_timeline *timeline;
> +
> /**
> * @ppgtt: unique address space (GTT)
> *
> diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
> index 7bccf578cd65..ca432d3d8211 100644
> --- a/drivers/gpu/drm/i915/i915_request.c
> +++ b/drivers/gpu/drm/i915/i915_request.c
> @@ -860,8 +860,14 @@ void i915_request_add(struct i915_request *request)
> prev = i915_gem_active_raw(&timeline->last_request,
> &request->i915->drm.struct_mutex);
> if (prev && !i915_request_completed(prev)) {
> - i915_sw_fence_await_sw_fence(&request->submit, &prev->submit,
> - &request->submitq);
> + if (prev->engine == engine)
> + i915_sw_fence_await_sw_fence(&request->submit,
> + &prev->submit,
> + &request->submitq);
> + else
> + __i915_sw_fence_await_dma_fence(&request->submit,
> + &prev->fence,
> + &request->dmaq);
Drop a comment here explaining this trick, at least I always forget the
many different await flavours.
The subtlety of why we need a special flavour here of await helpers
which use the builtin call back storage, vs using the existing ones
which allocate that, totally escapes me at the moment.
It's probably a good idea to put a paragraph in the commit message
explaining what new sw fence facility needs to be added to implement
this and why.
> if (engine->schedule)
> __i915_sched_node_add_dependency(&request->sched,
> &prev->sched,
> diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h
> index 679b4663f774..f715384ff485 100644
> --- a/drivers/gpu/drm/i915/i915_request.h
> +++ b/drivers/gpu/drm/i915/i915_request.h
> @@ -108,7 +108,10 @@ struct i915_request {
> * It is used by the driver to then queue the request for execution.
> */
> struct i915_sw_fence submit;
> - wait_queue_entry_t submitq;
> + union {
> + wait_queue_entry_t submitq;
> + struct i915_sw_dma_fence_cb dmaq;
> + };
Union deserves a comment as well I think, like this is for that and that
is for this, and only one can be in use at a time because of the third
thing.
>
> /*
> * A list of everyone we wait upon, and everyone who waits upon us.
> diff --git a/drivers/gpu/drm/i915/i915_sw_fence.c b/drivers/gpu/drm/i915/i915_sw_fence.c
> index 7c58b049ecb5..7bb64437dbbe 100644
> --- a/drivers/gpu/drm/i915/i915_sw_fence.c
> +++ b/drivers/gpu/drm/i915/i915_sw_fence.c
> @@ -359,11 +359,6 @@ int i915_sw_fence_await_sw_fence_gfp(struct i915_sw_fence *fence,
> return __i915_sw_fence_await_sw_fence(fence, signaler, NULL, gfp);
> }
>
> -struct i915_sw_dma_fence_cb {
> - struct dma_fence_cb base;
> - struct i915_sw_fence *fence;
> -};
> -
> struct i915_sw_dma_fence_cb_timer {
> struct i915_sw_dma_fence_cb base;
> struct dma_fence *dma;
> @@ -480,6 +475,40 @@ int i915_sw_fence_await_dma_fence(struct i915_sw_fence *fence,
> return ret;
> }
>
> +static void __dma_i915_sw_fence_wake(struct dma_fence *dma,
> + struct dma_fence_cb *data)
> +{
> + struct i915_sw_dma_fence_cb *cb = container_of(data, typeof(*cb), base);
> +
> + i915_sw_fence_complete(cb->fence);
> +}
> +
> +int __i915_sw_fence_await_dma_fence(struct i915_sw_fence *fence,
> + struct dma_fence *dma,
> + struct i915_sw_dma_fence_cb *cb)
> +{
> + int ret;
> +
> + debug_fence_assert(fence);
> +
> + if (dma_fence_is_signaled(dma))
> + return 0;
> +
> + cb->fence = fence;
> + i915_sw_fence_await(fence);
> +
> + ret = dma_fence_add_callback(dma, &cb->base, __dma_i915_sw_fence_wake);
> + if (ret == 0) {
> + ret = 1;
> + } else {
> + i915_sw_fence_complete(fence);
> + if (ret == -ENOENT) /* fence already signaled */
> + ret = 0;
> + }
> +
> + return ret;
> +}
> +
> int i915_sw_fence_await_reservation(struct i915_sw_fence *fence,
> struct reservation_object *resv,
> const struct dma_fence_ops *exclude,
> diff --git a/drivers/gpu/drm/i915/i915_sw_fence.h b/drivers/gpu/drm/i915/i915_sw_fence.h
> index 0e055ea0179f..b420ceadb813 100644
> --- a/drivers/gpu/drm/i915/i915_sw_fence.h
> +++ b/drivers/gpu/drm/i915/i915_sw_fence.h
> @@ -9,14 +9,13 @@
> #ifndef _I915_SW_FENCE_H_
> #define _I915_SW_FENCE_H_
>
> +#include <linux/dma-fence.h>
> #include <linux/gfp.h>
> #include <linux/kref.h>
> #include <linux/notifier.h> /* for NOTIFY_DONE */
> #include <linux/wait.h>
>
> struct completion;
> -struct dma_fence;
> -struct dma_fence_ops;
> struct reservation_object;
>
> struct i915_sw_fence {
> @@ -68,10 +67,20 @@ int i915_sw_fence_await_sw_fence(struct i915_sw_fence *fence,
> int i915_sw_fence_await_sw_fence_gfp(struct i915_sw_fence *fence,
> struct i915_sw_fence *after,
> gfp_t gfp);
> +
> +struct i915_sw_dma_fence_cb {
> + struct dma_fence_cb base;
> + struct i915_sw_fence *fence;
> +};
> +
> +int __i915_sw_fence_await_dma_fence(struct i915_sw_fence *fence,
> + struct dma_fence *dma,
> + struct i915_sw_dma_fence_cb *cb);
> int i915_sw_fence_await_dma_fence(struct i915_sw_fence *fence,
> struct dma_fence *dma,
> unsigned long timeout,
> gfp_t gfp);
> +
> int i915_sw_fence_await_reservation(struct i915_sw_fence *fence,
> struct reservation_object *resv,
> const struct dma_fence_ops *exclude,
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index 323341e9bf2d..10c42820bb46 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -2644,7 +2644,10 @@ static int execlists_context_deferred_alloc(struct i915_gem_context *ctx,
> goto error_deref_obj;
> }
>
> - timeline = i915_timeline_create(ctx->i915, ctx->name, NULL);
> + if (ctx->timeline)
> + timeline = i915_timeline_get(ctx->timeline);
> + else
> + timeline = i915_timeline_create(ctx->i915, ctx->name, NULL);
> if (IS_ERR(timeline)) {
> ret = PTR_ERR(timeline);
> goto error_deref_obj;
> diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/selftests/i915_gem_context.c
> index 2864cfb82325..3e68c2888b9c 100644
> --- a/drivers/gpu/drm/i915/selftests/i915_gem_context.c
> +++ b/drivers/gpu/drm/i915/selftests/i915_gem_context.c
> @@ -143,7 +143,7 @@ static int live_nop_switch(void *arg)
> }
>
> for (n = 0; n < nctx; n++) {
> - ctx[n] = i915_gem_create_context(i915, file->driver_priv);
> + ctx[n] = i915_gem_create_context(i915, file->driver_priv, 0);
> if (IS_ERR(ctx[n])) {
> err = PTR_ERR(ctx[n]);
> goto out_unlock;
> @@ -601,7 +601,8 @@ static int igt_ctx_exec(void *arg)
> struct i915_gem_context *ctx;
> intel_wakeref_t wakeref;
>
> - ctx = i915_gem_create_context(i915, file->driver_priv);
> + ctx = i915_gem_create_context(i915,
> + file->driver_priv, 0);
> if (IS_ERR(ctx)) {
> err = PTR_ERR(ctx);
> goto out_unlock;
> @@ -698,7 +699,8 @@ static int igt_shared_ctx_exec(void *arg)
> if (err)
> goto out_unlock;
>
> - parent = i915_gem_create_context(i915, file->driver_priv);
> + parent = i915_gem_create_context(i915,
> + file->driver_priv, 0);
> if (IS_ERR(parent)) {
> err = PTR_ERR(parent);
> if (err == -ENODEV) /* no logical ctx support */
> @@ -720,7 +722,8 @@ static int igt_shared_ctx_exec(void *arg)
> if (ctx)
> __destroy_hw_context(ctx, file->driver_priv);
>
> - ctx = i915_gem_create_context(i915, file->driver_priv);
> + ctx = i915_gem_create_context(i915,
> + file->driver_priv, 0);
> if (IS_ERR(ctx)) {
> err = PTR_ERR(ctx);
> goto out_unlock;
> @@ -813,7 +816,7 @@ static int igt_ctx_readonly(void *arg)
> if (err)
> goto out_unlock;
>
> - ctx = i915_gem_create_context(i915, file->driver_priv);
> + ctx = i915_gem_create_context(i915, file->driver_priv, 0);
> if (IS_ERR(ctx)) {
> err = PTR_ERR(ctx);
> goto out_unlock;
> @@ -1139,13 +1142,13 @@ static int igt_vm_isolation(void *arg)
> if (err)
> goto out_unlock;
>
> - ctx_a = i915_gem_create_context(i915, file->driver_priv);
> + ctx_a = i915_gem_create_context(i915, file->driver_priv, 0);
> if (IS_ERR(ctx_a)) {
> err = PTR_ERR(ctx_a);
> goto out_unlock;
> }
>
> - ctx_b = i915_gem_create_context(i915, file->driver_priv);
> + ctx_b = i915_gem_create_context(i915, file->driver_priv, 0);
> if (IS_ERR(ctx_b)) {
> err = PTR_ERR(ctx_b);
> goto out_unlock;
> diff --git a/drivers/gpu/drm/i915/selftests/mock_context.c b/drivers/gpu/drm/i915/selftests/mock_context.c
> index b9fd2a4b95e9..f13f9c726034 100644
> --- a/drivers/gpu/drm/i915/selftests/mock_context.c
> +++ b/drivers/gpu/drm/i915/selftests/mock_context.c
> @@ -99,7 +99,7 @@ live_context(struct drm_i915_private *i915, struct drm_file *file)
> {
> lockdep_assert_held(&i915->drm.struct_mutex);
>
> - return i915_gem_create_context(i915, file->driver_priv);
> + return i915_gem_create_context(i915, file->driver_priv, 0);
> }
>
> struct i915_gem_context *
> diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
> index 704e9d2fe2d6..72749dc9801e 100644
> --- a/include/uapi/drm/i915_drm.h
> +++ b/include/uapi/drm/i915_drm.h
> @@ -1444,6 +1444,7 @@ struct drm_i915_gem_context_create {
> struct drm_i915_gem_context_create_ext {
> __u32 ctx_id; /* output: id of new context*/
> __u32 flags;
> +#define I915_GEM_CONTEXT_SINGLE_TIMELINE 0x1
And some kernel doc please.
> __u64 extensions;
> };
>
>
Look fine, no complaints, apart from needing help to remind me how some
things work.
Regards,
Tvrtko
More information about the Intel-gfx
mailing list