[Intel-gfx] [PATCH 21/29] drm/i915: Switch back to an array of logical per-engine HW contexts
Tvrtko Ursulin
tvrtko.ursulin at linux.intel.com
Wed Apr 10 15:32:18 UTC 2019
On 08/04/2019 10:17, Chris Wilson wrote:
> We switched to a tree of per-engine HW context to accommodate the
> introduction of virtual engines. However, we plan to also support
> multiple instances of the same engine within the GEM context, defeating
> our use of the engine as a key to looking up the HW context. Just
> allocate a logical per-engine instance and always use an index into the
> ctx->engines[]. Later on, this ctx->engines[] may be replaced by a user
> specified map.
>
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> ---
> drivers/gpu/drm/i915/gt/intel_context.c | 97 ++-----------------
> drivers/gpu/drm/i915/gt/intel_context.h | 25 +----
> drivers/gpu/drm/i915/gt/intel_context_types.h | 2 -
> drivers/gpu/drm/i915/gt/intel_engine_cs.c | 2 +-
> drivers/gpu/drm/i915/gt/mock_engine.c | 3 +-
> drivers/gpu/drm/i915/gvt/scheduler.c | 2 +-
> drivers/gpu/drm/i915/i915_gem.c | 29 +++---
> drivers/gpu/drm/i915/i915_gem_context.c | 92 +++++++++++++++---
> drivers/gpu/drm/i915/i915_gem_context.h | 42 ++++++++
> drivers/gpu/drm/i915/i915_gem_context_types.h | 26 ++++-
> drivers/gpu/drm/i915/i915_gem_execbuffer.c | 70 ++++++-------
> drivers/gpu/drm/i915/i915_perf.c | 81 +++++++++-------
> drivers/gpu/drm/i915/i915_request.c | 2 +-
> drivers/gpu/drm/i915/intel_guc_submission.c | 24 +++--
> .../gpu/drm/i915/selftests/i915_gem_context.c | 2 +-
> drivers/gpu/drm/i915/selftests/mock_context.c | 14 ++-
> 16 files changed, 283 insertions(+), 230 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c
> index 46bf8d8fb7e4..803ac9a1dd15 100644
> --- a/drivers/gpu/drm/i915/gt/intel_context.c
> +++ b/drivers/gpu/drm/i915/gt/intel_context.c
> @@ -17,7 +17,7 @@ static struct i915_global_context {
> struct kmem_cache *slab_ce;
> } global;
>
> -struct intel_context *intel_context_alloc(void)
> +static struct intel_context *intel_context_alloc(void)
> {
> return kmem_cache_zalloc(global.slab_ce, GFP_KERNEL);
> }
> @@ -28,104 +28,17 @@ void intel_context_free(struct intel_context *ce)
> }
>
> struct intel_context *
> -intel_context_lookup(struct i915_gem_context *ctx,
> +intel_context_create(struct i915_gem_context *ctx,
> struct intel_engine_cs *engine)
> {
> - struct intel_context *ce = NULL;
> - struct rb_node *p;
> -
> - spin_lock(&ctx->hw_contexts_lock);
> - p = ctx->hw_contexts.rb_node;
> - while (p) {
> - struct intel_context *this =
> - rb_entry(p, struct intel_context, node);
> -
> - if (this->engine == engine) {
> - GEM_BUG_ON(this->gem_context != ctx);
> - ce = this;
> - break;
> - }
> -
> - if (this->engine < engine)
> - p = p->rb_right;
> - else
> - p = p->rb_left;
> - }
> - spin_unlock(&ctx->hw_contexts_lock);
> -
> - return ce;
> -}
> -
> -struct intel_context *
> -__intel_context_insert(struct i915_gem_context *ctx,
> - struct intel_engine_cs *engine,
> - struct intel_context *ce)
> -{
> - struct rb_node **p, *parent;
> - int err = 0;
> -
> - spin_lock(&ctx->hw_contexts_lock);
> -
> - parent = NULL;
> - p = &ctx->hw_contexts.rb_node;
> - while (*p) {
> - struct intel_context *this;
> -
> - parent = *p;
> - this = rb_entry(parent, struct intel_context, node);
> -
> - if (this->engine == engine) {
> - err = -EEXIST;
> - ce = this;
> - break;
> - }
> -
> - if (this->engine < engine)
> - p = &parent->rb_right;
> - else
> - p = &parent->rb_left;
> - }
> - if (!err) {
> - rb_link_node(&ce->node, parent, p);
> - rb_insert_color(&ce->node, &ctx->hw_contexts);
> - }
> -
> - spin_unlock(&ctx->hw_contexts_lock);
> -
> - return ce;
> -}
> -
> -void __intel_context_remove(struct intel_context *ce)
> -{
> - struct i915_gem_context *ctx = ce->gem_context;
> -
> - spin_lock(&ctx->hw_contexts_lock);
> - rb_erase(&ce->node, &ctx->hw_contexts);
> - spin_unlock(&ctx->hw_contexts_lock);
> -}
> -
> -struct intel_context *
> -intel_context_instance(struct i915_gem_context *ctx,
> - struct intel_engine_cs *engine)
> -{
> - struct intel_context *ce, *pos;
> -
> - ce = intel_context_lookup(ctx, engine);
> - if (likely(ce))
> - return intel_context_get(ce);
> + struct intel_context *ce;
>
> ce = intel_context_alloc();
> if (!ce)
> return ERR_PTR(-ENOMEM);
>
> intel_context_init(ce, ctx, engine);
> -
> - pos = __intel_context_insert(ctx, engine, ce);
> - if (unlikely(pos != ce)) /* Beaten! Use their HW context instead */
> - intel_context_free(ce);
> -
> - GEM_BUG_ON(intel_context_lookup(ctx, engine) != pos);
> - return intel_context_get(pos);
> + return ce;
> }
>
> int __intel_context_do_pin(struct intel_context *ce)
> @@ -204,6 +117,8 @@ intel_context_init(struct intel_context *ce,
> struct i915_gem_context *ctx,
> struct intel_engine_cs *engine)
> {
> + GEM_BUG_ON(!engine->cops);
> +
> kref_init(&ce->ref);
>
> ce->gem_context = ctx;
> diff --git a/drivers/gpu/drm/i915/gt/intel_context.h b/drivers/gpu/drm/i915/gt/intel_context.h
> index 4f8ef45e6344..567a415be513 100644
> --- a/drivers/gpu/drm/i915/gt/intel_context.h
> +++ b/drivers/gpu/drm/i915/gt/intel_context.h
> @@ -12,24 +12,16 @@
> #include "intel_context_types.h"
> #include "intel_engine_types.h"
>
> -struct intel_context *intel_context_alloc(void);
> -void intel_context_free(struct intel_context *ce);
> -
> void intel_context_init(struct intel_context *ce,
> struct i915_gem_context *ctx,
> struct intel_engine_cs *engine);
>
> -/**
> - * intel_context_lookup - Find the matching HW context for this (ctx, engine)
> - * @ctx - the parent GEM context
> - * @engine - the target HW engine
> - *
> - * May return NULL if the HW context hasn't been instantiated (i.e. unused).
> - */
> struct intel_context *
> -intel_context_lookup(struct i915_gem_context *ctx,
> +intel_context_create(struct i915_gem_context *ctx,
> struct intel_engine_cs *engine);
>
> +void intel_context_free(struct intel_context *ce);
> +
> /**
> * intel_context_pin_lock - Stablises the 'pinned' status of the HW context
> * @ctx - the parent GEM context
> @@ -59,17 +51,6 @@ intel_context_is_pinned(struct intel_context *ce)
>
> void intel_context_pin_unlock(struct intel_context *ce);
>
> -struct intel_context *
> -__intel_context_insert(struct i915_gem_context *ctx,
> - struct intel_engine_cs *engine,
> - struct intel_context *ce);
> -void
> -__intel_context_remove(struct intel_context *ce);
> -
> -struct intel_context *
> -intel_context_instance(struct i915_gem_context *ctx,
> - struct intel_engine_cs *engine);
> -
> int __intel_context_do_pin(struct intel_context *ce);
>
> static inline int intel_context_pin(struct intel_context *ce)
> diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h
> index f02d27734e3b..3579c2708321 100644
> --- a/drivers/gpu/drm/i915/gt/intel_context_types.h
> +++ b/drivers/gpu/drm/i915/gt/intel_context_types.h
> @@ -10,7 +10,6 @@
> #include <linux/kref.h>
> #include <linux/list.h>
> #include <linux/mutex.h>
> -#include <linux/rbtree.h>
> #include <linux/types.h>
>
> #include "i915_active_types.h"
> @@ -61,7 +60,6 @@ struct intel_context {
> struct i915_active_request active_tracker;
>
> const struct intel_context_ops *ops;
> - struct rb_node node;
>
> /** sseu: Control eu/slice partitioning */
> struct intel_sseu sseu;
> diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> index 3f794bc71958..0df3c5238c04 100644
> --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> @@ -716,7 +716,7 @@ static int pin_context(struct i915_gem_context *ctx,
> struct intel_context *ce;
> int err;
>
> - ce = intel_context_instance(ctx, engine);
> + ce = i915_gem_context_get_engine(ctx, engine->id);
Staying with intel_context_instance wouldn't help to reduce the churn?
> if (IS_ERR(ce))
> return PTR_ERR(ce);
>
> diff --git a/drivers/gpu/drm/i915/gt/mock_engine.c b/drivers/gpu/drm/i915/gt/mock_engine.c
> index 3b672e011cf0..87e538825fee 100644
> --- a/drivers/gpu/drm/i915/gt/mock_engine.c
> +++ b/drivers/gpu/drm/i915/gt/mock_engine.c
> @@ -23,6 +23,7 @@
> */
>
> #include "i915_drv.h"
> +#include "i915_gem_context.h"
> #include "intel_context.h"
> #include "intel_engine_pm.h"
>
> @@ -286,7 +287,7 @@ int mock_engine_init(struct intel_engine_cs *engine)
> i915_timeline_set_subclass(&engine->timeline, TIMELINE_ENGINE);
>
> engine->kernel_context =
> - intel_context_instance(i915->kernel_context, engine);
> + i915_gem_context_get_engine(i915->kernel_context, engine->id);
> if (IS_ERR(engine->kernel_context))
> goto err_timeline;
>
> diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c
> index 606fc2713240..8b6574e1b495 100644
> --- a/drivers/gpu/drm/i915/gvt/scheduler.c
> +++ b/drivers/gpu/drm/i915/gvt/scheduler.c
> @@ -1188,7 +1188,7 @@ int intel_vgpu_setup_submission(struct intel_vgpu *vgpu)
> INIT_LIST_HEAD(&s->workload_q_head[i]);
> s->shadow[i] = ERR_PTR(-EINVAL);
>
> - ce = intel_context_instance(ctx, engine);
> + ce = i915_gem_context_get_engine(ctx, i);
> if (IS_ERR(ce)) {
> ret = PTR_ERR(ce);
> goto out_shadow_ctx;
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 50266e87c225..21b4a04c424b 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -4312,8 +4312,9 @@ int i915_gem_init_hw(struct drm_i915_private *dev_priv)
>
> static int __intel_engines_record_defaults(struct drm_i915_private *i915)
> {
> - struct i915_gem_context *ctx;
> struct intel_engine_cs *engine;
> + struct i915_gem_context *ctx;
> + struct i915_gem_engines *e;
> enum intel_engine_id id;
> int err = 0;
>
> @@ -4330,18 +4331,26 @@ static int __intel_engines_record_defaults(struct drm_i915_private *i915)
> if (IS_ERR(ctx))
> return PTR_ERR(ctx);
>
> + e = i915_gem_context_engine_list_lock(ctx);
> +
> for_each_engine(engine, i915, id) {
Do we need i915 version of for_each_context_engine? If all call sites
will doing this "lock ctx" -> "walk physical engines" -> "lookup in
context" then it seems a bit disconnected.
> + struct intel_context *ce = e->engines[id];
How will index by engine->id work for engine map?
> struct i915_request *rq;
>
> - rq = i915_request_alloc(engine, ctx);
> + err = intel_context_pin(ce);
> + if (err)
> + goto err_active;
> +
> + rq = i915_request_create(ce);
> + intel_context_unpin(ce);
Kind of verbose, no? Do you want to add some
middle-ground-between-request-alloc-and-create helper?
> if (IS_ERR(rq)) {
> err = PTR_ERR(rq);
> - goto out_ctx;
> + goto err_active;
> }
>
> err = 0;
> - if (engine->init_context)
> - err = engine->init_context(rq);
> + if (rq->engine->init_context)
> + err = rq->engine->init_context(rq);
>
> i915_request_add(rq);
> if (err)
> @@ -4355,15 +4364,10 @@ static int __intel_engines_record_defaults(struct drm_i915_private *i915)
> }
>
> for_each_engine(engine, i915, id) {
> - struct intel_context *ce;
> - struct i915_vma *state;
> + struct intel_context *ce = e->engines[id];
> + struct i915_vma *state = ce->state;
> void *vaddr;
>
> - ce = intel_context_lookup(ctx, engine);
> - if (!ce)
> - continue;
> -
> - state = ce->state;
> if (!state)
> continue;
>
> @@ -4419,6 +4423,7 @@ static int __intel_engines_record_defaults(struct drm_i915_private *i915)
> }
>
> out_ctx:
> + i915_gem_context_engine_list_unlock(ctx);
> i915_gem_context_set_closed(ctx);
> i915_gem_context_put(ctx);
> return err;
> diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
> index 1e84fe0a4c55..517816558c85 100644
> --- a/drivers/gpu/drm/i915/i915_gem_context.c
> +++ b/drivers/gpu/drm/i915/i915_gem_context.c
> @@ -150,7 +150,7 @@ lookup_user_engine(struct i915_gem_context *ctx, u16 class, u16 instance)
> if (!engine)
> return ERR_PTR(-EINVAL);
>
> - return intel_context_instance(ctx, engine);
> + return i915_gem_context_get_engine(ctx, engine->id);
> }
>
> static inline int new_hw_id(struct drm_i915_private *i915, gfp_t gfp)
> @@ -242,10 +242,54 @@ static void release_hw_id(struct i915_gem_context *ctx)
> mutex_unlock(&i915->contexts.mutex);
> }
>
> -static void i915_gem_context_free(struct i915_gem_context *ctx)
> +static void free_engines(struct i915_gem_engines *e)
> +{
> + unsigned int n;
> +
> + for (n = 0; n < e->num_engines; n++) {
> + if (!e->engines[n])
> + continue;
> +
> + intel_context_put(e->engines[n]);
> + }
> + kfree(e);
> +}
> +
> +static void free_engines_n(struct i915_gem_engines *e, unsigned long n)
> {
> - struct intel_context *it, *n;
> + e->num_engines = n;
> + free_engines(e);
> +}
> +
> +static struct i915_gem_engines *default_engines(struct i915_gem_context *ctx)
> +{
> + struct intel_engine_cs *engine;
> + struct i915_gem_engines *e;
> + enum intel_engine_id id;
> +
> + e = kzalloc(struct_size(e, engines, I915_NUM_ENGINES), GFP_KERNEL);
> + if (!e)
> + return ERR_PTR(-ENOMEM);
> +
> + e->i915 = ctx->i915;
> + for_each_engine(engine, ctx->i915, id) {
> + struct intel_context *ce;
>
> + ce = intel_context_create(ctx, engine);
> + if (IS_ERR(ce)) {
> + free_engines_n(e, id);
I dislike piggy-back of n into e. How about:
__free_engines(e, n)
{
...
}
free_engines(e)
{
__fre_engines(e, e->num_engines):
}
?
Or even you could e->num_engines++ in the above loop and just have one
free_engines.
> + return ERR_CAST(ce);
> + }
> +
> + e->engines[id] = ce;
Each context would have a sparse array of engines, on most platforms.
Would it be workable to instead create a compact array per context, and
just have a per device translation table of idx to engine->id? Or
vice-versa, I can't figure out straight from the bat which one would you
need.
I guess in this scheme you would need some translation as well to
support customised engine maps.. will see if I will spot that later in
the patch.
> + }
> + e->num_engines = id;
> +
> + return e;
> +}
> +
> +static void i915_gem_context_free(struct i915_gem_context *ctx)
> +{
> lockdep_assert_held(&ctx->i915->drm.struct_mutex);
> GEM_BUG_ON(!i915_gem_context_is_closed(ctx));
> GEM_BUG_ON(!list_empty(&ctx->active_engines));
> @@ -253,8 +297,8 @@ static void i915_gem_context_free(struct i915_gem_context *ctx)
> release_hw_id(ctx);
> i915_ppgtt_put(ctx->ppgtt);
>
> - rbtree_postorder_for_each_entry_safe(it, n, &ctx->hw_contexts, node)
> - intel_context_put(it);
> + free_engines(rcu_access_pointer(ctx->engines));
> + mutex_destroy(&ctx->engines_mutex);
>
> if (ctx->timeline)
> i915_timeline_put(ctx->timeline);
> @@ -363,6 +407,8 @@ static struct i915_gem_context *
> __create_context(struct drm_i915_private *dev_priv)
> {
> struct i915_gem_context *ctx;
> + struct i915_gem_engines *e;
> + int err;
> int i;
>
> ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
> @@ -376,8 +422,13 @@ __create_context(struct drm_i915_private *dev_priv)
> INIT_LIST_HEAD(&ctx->active_engines);
> mutex_init(&ctx->mutex);
>
> - ctx->hw_contexts = RB_ROOT;
> - spin_lock_init(&ctx->hw_contexts_lock);
> + mutex_init(&ctx->engines_mutex);
> + e = default_engines(ctx);
> + if (IS_ERR(e)) {
> + err = PTR_ERR(e);
> + goto err_free;
> + }
> + RCU_INIT_POINTER(ctx->engines, e);
>
> INIT_RADIX_TREE(&ctx->handles_vma, GFP_KERNEL);
> INIT_LIST_HEAD(&ctx->handles_list);
> @@ -399,6 +450,10 @@ __create_context(struct drm_i915_private *dev_priv)
> ctx->hang_timestamp[i] = jiffies - CONTEXT_FAST_HANG_JIFFIES;
>
> return ctx;
> +
> +err_free:
> + kfree(ctx);
> + return ERR_PTR(err);
> }
>
> static struct i915_hw_ppgtt *
> @@ -862,7 +917,8 @@ static int context_barrier_task(struct i915_gem_context *ctx,
> {
> struct drm_i915_private *i915 = ctx->i915;
> struct context_barrier_task *cb;
> - struct intel_context *ce, *next;
> + struct i915_gem_engines *e;
> + unsigned int i;
> int err = 0;
>
> lockdep_assert_held(&i915->drm.struct_mutex);
> @@ -875,20 +931,29 @@ static int context_barrier_task(struct i915_gem_context *ctx,
> i915_active_init(i915, &cb->base, cb_retire);
> i915_active_acquire(&cb->base);
>
> - rbtree_postorder_for_each_entry_safe(ce, next, &ctx->hw_contexts, node) {
> - struct intel_engine_cs *engine = ce->engine;
> + e = i915_gem_context_engine_list_lock(ctx);
> + for (i = 0; i < e->num_engines; i++) {
> + struct intel_context *ce = e->engines[i];
> struct i915_request *rq;
>
> - if (!(engine->mask & engines))
> + if (!ce || !(ce->engine->mask & engines))
Definitely looks like a case for for_each_context_engine.
> + continue;
> +
> + if (!intel_context_is_pinned(ce))
> continue;
>
> if (I915_SELFTEST_ONLY(context_barrier_inject_fault &
> - engine->mask)) {
> + ce->engine->mask)) {
> err = -ENXIO;
> break;
> }
>
> - rq = i915_request_alloc(engine, ctx);
> + err = intel_context_pin(ce);
> + if (err)
> + break;
> +
> + rq = i915_request_create(ce);
> + intel_context_unpin(ce);
Yep as mentioned somewhere above. I definitely think another helper
would help code base redability. Even if called unimaginatively as
__i915_request_add.
> if (IS_ERR(rq)) {
> err = PTR_ERR(rq);
> break;
> @@ -904,6 +969,7 @@ static int context_barrier_task(struct i915_gem_context *ctx,
> if (err)
> break;
> }
> + i915_gem_context_engine_list_unlock(ctx);
>
> cb->task = err ? NULL : task; /* caller needs to unwind instead */
> cb->data = data;
> diff --git a/drivers/gpu/drm/i915/i915_gem_context.h b/drivers/gpu/drm/i915/i915_gem_context.h
> index 5a8e080499fb..d94fc4c1907c 100644
> --- a/drivers/gpu/drm/i915/i915_gem_context.h
> +++ b/drivers/gpu/drm/i915/i915_gem_context.h
> @@ -176,6 +176,48 @@ static inline void i915_gem_context_put(struct i915_gem_context *ctx)
> kref_put(&ctx->ref, i915_gem_context_release);
> }
>
> +static inline struct i915_gem_engines *
> +i915_gem_context_engine_list(struct i915_gem_context *ctx)
> +{
> + return rcu_dereference_protected(ctx->engines,
> + lockdep_is_held(&ctx->engines_mutex));
> +}
> +
> +static inline struct i915_gem_engines *
> +i915_gem_context_engine_list_lock(struct i915_gem_context *ctx)
> + __acquires(&ctx->engines_mutex)
> +{
> + mutex_lock(&ctx->engines_mutex);
> + return i915_gem_context_engine_list(ctx);
> +}
> +
> +static inline void
> +i915_gem_context_engine_list_unlock(struct i915_gem_context *ctx)
> + __releases(&ctx->engines_mutex)
> +{
> + mutex_unlock(&ctx->engines_mutex);
> +}
> +
> +static inline struct intel_context *
> +i915_gem_context_lookup_engine(struct i915_gem_context *ctx, unsigned long idx)
> +{
> + return i915_gem_context_engine_list(ctx)->engines[idx];
> +}
> +
> +static inline struct intel_context *
> +i915_gem_context_get_engine(struct i915_gem_context *ctx, unsigned long idx)
> +{
> + struct intel_context *ce = ERR_PTR(-EINVAL);
> +
> + rcu_read_lock(); {
> + struct i915_gem_engines *e = rcu_dereference(ctx->engines);
> + if (likely(idx < e->num_engines && e->engines[idx]))
> + ce = intel_context_get(e->engines[idx]);
> + } rcu_read_unlock();
> +
> + return ce;
> +}
> +
> struct i915_lut_handle *i915_lut_handle_alloc(void);
> void i915_lut_handle_free(struct i915_lut_handle *lut);
>
> diff --git a/drivers/gpu/drm/i915/i915_gem_context_types.h b/drivers/gpu/drm/i915/i915_gem_context_types.h
> index d282a6ab3b9f..1610a74b0283 100644
> --- a/drivers/gpu/drm/i915/i915_gem_context_types.h
> +++ b/drivers/gpu/drm/i915/i915_gem_context_types.h
> @@ -29,6 +29,13 @@ struct i915_hw_ppgtt;
> struct i915_timeline;
> struct intel_ring;
>
> +struct i915_gem_engines {
> + struct rcu_work rcu;
> + struct drm_i915_private *i915;
> + unsigned long num_engines;
unsigned int?
> + struct intel_context *engines[];
> +};
> +
> /**
> * struct i915_gem_context - client state
> *
> @@ -42,6 +49,21 @@ struct i915_gem_context {
> /** file_priv: owning file descriptor */
> struct drm_i915_file_private *file_priv;
>
> + /**
> + * @engines: User defined engines for this context
> + *
> + * NULL means to use legacy definitions (including random meaning of
> + * I915_EXEC_BSD with I915_EXEC_BSD_SELECTOR overrides).
> + *
> + * If defined, execbuf uses the I915_EXEC_MASK as an index into
> + * array, and various uAPI other the ability to lookup up an
> + * index from this array to select an engine operate on.
> + *
> + * User defined by I915_CONTEXT_PARAM_ENGINE.
> + */
> + struct i915_gem_engines __rcu *engines;
> + struct mutex engines_mutex; /* guards writes to engines */
> +
> struct i915_timeline *timeline;
>
> /**
> @@ -134,10 +156,6 @@ struct i915_gem_context {
>
> struct i915_sched_attr sched;
>
> - /** hw_contexts: per-engine logical HW state */
> - struct rb_root hw_contexts;
> - spinlock_t hw_contexts_lock;
> -
> /** ring_size: size for allocating the per-engine ring buffer */
> u32 ring_size;
> /** desc_template: invariant fields for the HW context descriptor */
> diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> index c700cbc2f594..941192c60832 100644
> --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> @@ -2077,9 +2077,7 @@ gen8_dispatch_bsd_engine(struct drm_i915_private *dev_priv,
> return file_priv->bsd_engine;
> }
>
> -#define I915_USER_RINGS (4)
> -
> -static const enum intel_engine_id user_ring_map[I915_USER_RINGS + 1] = {
> +static const enum intel_engine_id user_ring_map[] = {
> [I915_EXEC_DEFAULT] = RCS0,
> [I915_EXEC_RENDER] = RCS0,
> [I915_EXEC_BLT] = BCS0,
> @@ -2087,10 +2085,8 @@ static const enum intel_engine_id user_ring_map[I915_USER_RINGS + 1] = {
> [I915_EXEC_VEBOX] = VECS0
> };
>
> -static int eb_pin_context(struct i915_execbuffer *eb,
> - struct intel_engine_cs *engine)
> +static int eb_pin_context(struct i915_execbuffer *eb, struct intel_context *ce)
> {
> - struct intel_context *ce;
> int err;
>
> /*
> @@ -2101,21 +2097,16 @@ static int eb_pin_context(struct i915_execbuffer *eb,
> if (err)
> return err;
>
> - ce = intel_context_instance(eb->gem_context, engine);
> - if (IS_ERR(ce))
> - return PTR_ERR(ce);
> -
> /*
> * Pinning the contexts may generate requests in order to acquire
> * GGTT space, so do this first before we reserve a seqno for
> * ourselves.
> */
> err = intel_context_pin(ce);
> - intel_context_put(ce);
> if (err)
> return err;
>
> - eb->engine = engine;
> + eb->engine = ce->engine;
> eb->context = ce;
> return 0;
> }
> @@ -2125,24 +2116,18 @@ static void eb_unpin_context(struct i915_execbuffer *eb)
> intel_context_unpin(eb->context);
> }
>
> -static int
> -eb_select_engine(struct i915_execbuffer *eb,
> - struct drm_file *file,
> - struct drm_i915_gem_execbuffer2 *args)
> +static unsigned int
> +eb_select_legacy_ring(struct i915_execbuffer *eb,
> + struct drm_file *file,
> + struct drm_i915_gem_execbuffer2 *args)
> {
> unsigned int user_ring_id = args->flags & I915_EXEC_RING_MASK;
> - struct intel_engine_cs *engine;
> -
> - if (user_ring_id > I915_USER_RINGS) {
> - DRM_DEBUG("execbuf with unknown ring: %u\n", user_ring_id);
> - return -EINVAL;
> - }
>
> - if ((user_ring_id != I915_EXEC_BSD) &&
> - ((args->flags & I915_EXEC_BSD_MASK) != 0)) {
> + if (user_ring_id != I915_EXEC_BSD &&
> + (args->flags & I915_EXEC_BSD_MASK)) {
> DRM_DEBUG("execbuf with non bsd ring but with invalid "
> "bsd dispatch flags: %d\n", (int)(args->flags));
> - return -EINVAL;
> + return -1;
> }
>
> if (user_ring_id == I915_EXEC_BSD && HAS_ENGINE(eb->i915, VCS1)) {
> @@ -2157,20 +2142,39 @@ eb_select_engine(struct i915_execbuffer *eb,
> } else {
> DRM_DEBUG("execbuf with unknown bsd ring: %u\n",
> bsd_idx);
> - return -EINVAL;
> + return -1;
> }
>
> - engine = eb->i915->engine[_VCS(bsd_idx)];
> - } else {
> - engine = eb->i915->engine[user_ring_map[user_ring_id]];
> + return _VCS(bsd_idx);
> }
>
> - if (!engine) {
> - DRM_DEBUG("execbuf with invalid ring: %u\n", user_ring_id);
> - return -EINVAL;
> + if (user_ring_id >= ARRAY_SIZE(user_ring_map)) {
> + DRM_DEBUG("execbuf with unknown ring: %u\n", user_ring_id);
> + return -1;
> }
>
> - return eb_pin_context(eb, engine);
> + return user_ring_map[user_ring_id];
> +}
> +
> +static int
> +eb_select_engine(struct i915_execbuffer *eb,
> + struct drm_file *file,
> + struct drm_i915_gem_execbuffer2 *args)
> +{
> + struct intel_context *ce;
> + unsigned int idx;
> + int err;
> +
> + idx = eb_select_legacy_ring(eb, file, args);
> +
> + ce = i915_gem_context_get_engine(eb->gem_context, idx);
> + if (IS_ERR(ce))
> + return PTR_ERR(ce);
> +
> + err = eb_pin_context(eb, ce);
> + intel_context_put(ce);
> +
> + return err;
> }
>
> static void
> diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
> index afaeabe5e531..304e597e19e6 100644
> --- a/drivers/gpu/drm/i915/i915_perf.c
> +++ b/drivers/gpu/drm/i915/i915_perf.c
I leave the rest for a second pass.
Regards,
Tvrtko
> @@ -1203,35 +1203,37 @@ static int i915_oa_read(struct i915_perf_stream *stream,
> static struct intel_context *oa_pin_context(struct drm_i915_private *i915,
> struct i915_gem_context *ctx)
> {
> - struct intel_engine_cs *engine = i915->engine[RCS0];
> - struct intel_context *ce;
> + struct i915_gem_engines *e;
> + unsigned int i;
> int err;
>
> - ce = intel_context_instance(ctx, engine);
> - if (IS_ERR(ce))
> - return ce;
> -
> err = i915_mutex_lock_interruptible(&i915->drm);
> - if (err) {
> - intel_context_put(ce);
> + if (err)
> return ERR_PTR(err);
> - }
>
> - /*
> - * As the ID is the gtt offset of the context's vma we
> - * pin the vma to ensure the ID remains fixed.
> - *
> - * NB: implied RCS engine...
> - */
> - err = intel_context_pin(ce);
> + e = i915_gem_context_engine_list_lock(ctx);
> + for (i = 0; i < e->num_engines; i++) {
> + struct intel_context *ce = e->engines[i];
> +
> + if (ce->engine->class != RENDER_CLASS)
> + continue;
> +
> + /*
> + * As the ID is the gtt offset of the context's vma we
> + * pin the vma to ensure the ID remains fixed.
> + */
> + err = intel_context_pin(ce);
> + if (err == 0) {
> + i915->perf.oa.pinned_ctx = ce;
> + break;
> + }
> + }
> + i915_gem_context_engine_list_unlock(ctx);
> mutex_unlock(&i915->drm.struct_mutex);
> - intel_context_put(ce);
> if (err)
> return ERR_PTR(err);
>
> - i915->perf.oa.pinned_ctx = ce;
> -
> - return ce;
> + return i915->perf.oa.pinned_ctx;
> }
>
> /**
> @@ -1717,10 +1719,10 @@ gen8_update_reg_state_unlocked(struct intel_context *ce,
> static int gen8_configure_all_contexts(struct drm_i915_private *dev_priv,
> const struct i915_oa_config *oa_config)
> {
> - struct intel_engine_cs *engine = dev_priv->engine[RCS0];
> unsigned int map_type = i915_coherent_map_type(dev_priv);
> struct i915_gem_context *ctx;
> struct i915_request *rq;
> + unsigned int i;
> int ret;
>
> lockdep_assert_held(&dev_priv->drm.struct_mutex);
> @@ -1746,30 +1748,41 @@ static int gen8_configure_all_contexts(struct drm_i915_private *dev_priv,
>
> /* Update all contexts now that we've stalled the submission. */
> list_for_each_entry(ctx, &dev_priv->contexts.list, link) {
> - struct intel_context *ce = intel_context_lookup(ctx, engine);
> - u32 *regs;
> + struct i915_gem_engines *e =
> + i915_gem_context_engine_list_lock(ctx);
>
> - /* OA settings will be set upon first use */
> - if (!ce || !ce->state)
> - continue;
> + for (i = 0; i < e->num_engines; i++) {
> + struct intel_context *ce = e->engines[i];
> + u32 *regs;
> +
> + if (!ce || ce->engine->class != RENDER_CLASS)
> + continue;
> +
> + /* OA settings will be set upon first use */
> + if (!ce->state)
> + continue;
>
> - regs = i915_gem_object_pin_map(ce->state->obj, map_type);
> - if (IS_ERR(regs))
> - return PTR_ERR(regs);
> + regs = i915_gem_object_pin_map(ce->state->obj,
> + map_type);
> + if (IS_ERR(regs))
> + return PTR_ERR(regs);
>
> - ce->state->obj->mm.dirty = true;
> - regs += LRC_STATE_PN * PAGE_SIZE / sizeof(*regs);
> + ce->state->obj->mm.dirty = true;
> + regs += LRC_STATE_PN * PAGE_SIZE / sizeof(*regs);
>
> - gen8_update_reg_state_unlocked(ce, regs, oa_config);
> + gen8_update_reg_state_unlocked(ce, regs, oa_config);
> +
> + i915_gem_object_unpin_map(ce->state->obj);
> + }
>
> - i915_gem_object_unpin_map(ce->state->obj);
> + i915_gem_context_engine_list_unlock(ctx);
> }
>
> /*
> * Apply the configuration by doing one context restore of the edited
> * context image.
> */
> - rq = i915_request_create(engine->kernel_context);
> + rq = i915_request_create(dev_priv->engine[RCS0]->kernel_context);
> if (IS_ERR(rq))
> return PTR_ERR(rq);
>
> diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
> index 8abd891d9287..5e52c1623c17 100644
> --- a/drivers/gpu/drm/i915/i915_request.c
> +++ b/drivers/gpu/drm/i915/i915_request.c
> @@ -767,7 +767,7 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx)
> * GGTT space, so do this first before we reserve a seqno for
> * ourselves.
> */
> - ce = intel_context_instance(ctx, engine);
> + ce = i915_gem_context_get_engine(ctx, engine->id);
> if (IS_ERR(ce))
> return ERR_CAST(ce);
>
> diff --git a/drivers/gpu/drm/i915/intel_guc_submission.c b/drivers/gpu/drm/i915/intel_guc_submission.c
> index 7fbfcb3d63e0..ae230b38138a 100644
> --- a/drivers/gpu/drm/i915/intel_guc_submission.c
> +++ b/drivers/gpu/drm/i915/intel_guc_submission.c
> @@ -364,11 +364,10 @@ static void guc_stage_desc_pool_destroy(struct intel_guc *guc)
> static void guc_stage_desc_init(struct intel_guc_client *client)
> {
> struct intel_guc *guc = client->guc;
> - struct drm_i915_private *dev_priv = guc_to_i915(guc);
> - struct intel_engine_cs *engine;
> struct i915_gem_context *ctx = client->owner;
> + struct i915_gem_engines *e;
> struct guc_stage_desc *desc;
> - unsigned int tmp;
> + unsigned int i;
> u32 gfx_addr;
>
> desc = __get_stage_desc(client);
> @@ -382,10 +381,13 @@ static void guc_stage_desc_init(struct intel_guc_client *client)
> desc->priority = client->priority;
> desc->db_id = client->doorbell_id;
>
> - for_each_engine_masked(engine, dev_priv, client->engines, tmp) {
> - struct intel_context *ce = intel_context_lookup(ctx, engine);
> - u32 guc_engine_id = engine->guc_id;
> - struct guc_execlist_context *lrc = &desc->lrc[guc_engine_id];
> + e = i915_gem_context_engine_list_lock(ctx);
> + for (i = 0; i < e->num_engines; i++) {
> + struct intel_context *ce = e->engines[i];
> + struct guc_execlist_context *lrc;
> +
> + if (!ce || !(ce->engine->mask & client->engines))
> + continue;
>
> /* TODO: We have a design issue to be solved here. Only when we
> * receive the first batch, we know which engine is used by the
> @@ -394,7 +396,7 @@ static void guc_stage_desc_init(struct intel_guc_client *client)
> * for now who owns a GuC client. But for future owner of GuC
> * client, need to make sure lrc is pinned prior to enter here.
> */
> - if (!ce || !ce->state)
> + if (!ce->state)
> break; /* XXX: continue? */
>
> /*
> @@ -404,6 +406,7 @@ static void guc_stage_desc_init(struct intel_guc_client *client)
> * Instead, the GuC uses the LRCA of the user mode context (see
> * guc_add_request below).
> */
> + lrc = &desc->lrc[ce->engine->guc_id];
> lrc->context_desc = lower_32_bits(ce->lrc_desc);
>
> /* The state page is after PPHWSP */
> @@ -414,15 +417,16 @@ static void guc_stage_desc_init(struct intel_guc_client *client)
> * here. In proxy submission, it wants the stage id
> */
> lrc->context_id = (client->stage_id << GUC_ELC_CTXID_OFFSET) |
> - (guc_engine_id << GUC_ELC_ENGINE_OFFSET);
> + (ce->engine->guc_id << GUC_ELC_ENGINE_OFFSET);
>
> lrc->ring_begin = intel_guc_ggtt_offset(guc, ce->ring->vma);
> lrc->ring_end = lrc->ring_begin + ce->ring->size - 1;
> lrc->ring_next_free_location = lrc->ring_begin;
> lrc->ring_current_tail_pointer_value = 0;
>
> - desc->engines_used |= (1 << guc_engine_id);
> + desc->engines_used |= BIT(ce->engine->guc_id);
> }
> + i915_gem_context_engine_list_unlock(ctx);
>
> DRM_DEBUG_DRIVER("Host engines 0x%x => GuC engines used 0x%x\n",
> client->engines, desc->engines_used);
> diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/selftests/i915_gem_context.c
> index 214d1fd2f4dc..7fd224a4ca4c 100644
> --- a/drivers/gpu/drm/i915/selftests/i915_gem_context.c
> +++ b/drivers/gpu/drm/i915/selftests/i915_gem_context.c
> @@ -1094,7 +1094,7 @@ __igt_ctx_sseu(struct drm_i915_private *i915,
>
> wakeref = intel_runtime_pm_get(i915);
>
> - ce = intel_context_instance(ctx, i915->engine[RCS0]);
> + ce = i915_gem_context_get_engine(ctx, RCS0);
> if (IS_ERR(ce)) {
> ret = PTR_ERR(ce);
> goto out_rpm;
> diff --git a/drivers/gpu/drm/i915/selftests/mock_context.c b/drivers/gpu/drm/i915/selftests/mock_context.c
> index 0426093bf1d9..71c750693585 100644
> --- a/drivers/gpu/drm/i915/selftests/mock_context.c
> +++ b/drivers/gpu/drm/i915/selftests/mock_context.c
> @@ -30,6 +30,7 @@ mock_context(struct drm_i915_private *i915,
> const char *name)
> {
> struct i915_gem_context *ctx;
> + struct i915_gem_engines *e;
> int ret;
>
> ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
> @@ -40,8 +41,11 @@ mock_context(struct drm_i915_private *i915,
> INIT_LIST_HEAD(&ctx->link);
> ctx->i915 = i915;
>
> - ctx->hw_contexts = RB_ROOT;
> - spin_lock_init(&ctx->hw_contexts_lock);
> + mutex_init(&ctx->engines_mutex);
> + e = default_engines(ctx);
> + if (IS_ERR(e))
> + goto err_free;
> + RCU_INIT_POINTER(ctx->engines, e);
>
> INIT_RADIX_TREE(&ctx->handles_vma, GFP_KERNEL);
> INIT_LIST_HEAD(&ctx->handles_list);
> @@ -51,7 +55,7 @@ mock_context(struct drm_i915_private *i915,
>
> ret = i915_gem_context_pin_hw_id(ctx);
> if (ret < 0)
> - goto err_handles;
> + goto err_engines;
>
> if (name) {
> struct i915_hw_ppgtt *ppgtt;
> @@ -69,7 +73,9 @@ mock_context(struct drm_i915_private *i915,
>
> return ctx;
>
> -err_handles:
> +err_engines:
> + free_engines(rcu_access_pointer(ctx->engines));
> +err_free:
> kfree(ctx);
> return NULL;
>
>
More information about the Intel-gfx
mailing list