[Intel-gfx] [igt-dev] [RFC 2/7] drm/i915: Track per-context engine busyness

Tue Apr 17 12:58:01 UTC 2018

Quoting Tvrtko Ursulin (2018-04-17 13:27:31)
> From: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
> 
> Some customers want to know how much of the GPU time are their clients
> using in order to make dynamic load balancing decisions.
> 
> With the hooks already in place which track the overall engine busyness,
> we can extend that slightly to split that time between contexts.
> 
> v2: Fix accounting for tail updates.
> v3: Rebase.
> v4: Mark currently running contexts as active on stats enable.
> v5: Include some headers to fix the build.
> v6: Added fine grained lock.
> v7: Convert to seqlock. (Chris Wilson)
> 
> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
> Cc: gordon.kelly at intel.com
> ---
>  drivers/gpu/drm/i915/i915_gem_context.c |  8 +++++
>  drivers/gpu/drm/i915/i915_gem_context.h |  7 +++++
>  drivers/gpu/drm/i915/intel_engine_cs.c  | 31 +++++++++++++++++++
>  drivers/gpu/drm/i915/intel_lrc.c        | 14 ++++++---
>  drivers/gpu/drm/i915/intel_ringbuffer.h | 55 +++++++++++++++++++++++++++++----
>  5 files changed, 104 insertions(+), 11 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
> index 5cfac0255758..85627fa4565b 100644
> --- a/drivers/gpu/drm/i915/i915_gem_context.c
> +++ b/drivers/gpu/drm/i915/i915_gem_context.c
> @@ -349,7 +349,9 @@ static struct i915_gem_context *
>  i915_gem_create_context(struct drm_i915_private *dev_priv,
>                         struct drm_i915_file_private *file_priv)
>  {
> +       struct intel_engine_cs *engine;
>         struct i915_gem_context *ctx;
> +       enum intel_engine_id id;
>  
>         lockdep_assert_held(&dev_priv->drm.struct_mutex);
>  
> @@ -375,6 +377,12 @@ i915_gem_create_context(struct drm_i915_private *dev_priv,
>                 ctx->desc_template = default_desc_template(dev_priv, ppgtt);
>         }
>  
> +       /* Initialize the context stats lock. */
> +       for_each_engine(engine, dev_priv, id) {
> +               if (intel_engine_supports_stats(engine))
> +                       seqlock_init(&ctx->engine[id].stats.lock);

Keep it simple, always init the substruct.

> diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
> index de61d3d1653d..e66166466d6d 100644
> --- a/drivers/gpu/drm/i915/intel_engine_cs.c
> +++ b/drivers/gpu/drm/i915/intel_engine_cs.c
> @@ -2078,6 +2078,16 @@ int intel_enable_engine_stats(struct intel_engine_cs *engine)
>  
>                 engine->stats.enabled_at = ktime_get();
>  
> +               /* Mark currently running context as active. */
> +               if (port_isset(port)) {
> +                       struct i915_request *req = port_request(port);
> +                       struct intel_context *ce =
> +                                       &req->ctx->engine[engine->id];

Bleh, better break out the
	struct i915_request {
		struct intel_context *ce;
	}
patch, as this is getting repetitive ;)
> +
> +                       ce->stats.start = engine->stats.enabled_at;
> +                       ce->stats.active = true;
> +               }
> +
>                 /* XXX submission method oblivious? */
>                 while (num_ports-- && port_isset(port)) {
>                         engine->stats.active++;
> @@ -2151,6 +2161,27 @@ void intel_disable_engine_stats(struct intel_engine_cs *engine)
>         write_sequnlock_irqrestore(&engine->stats.lock, flags);
>  }
>  
> +ktime_t intel_context_engine_get_busy_time(struct i915_gem_context *ctx,
> +                                          struct intel_engine_cs *engine)
> +{
> +       struct intel_context *ce = &ctx->engine[engine->id];
> +       unsigned int seq;
> +       ktime_t total;
> +
> +       do {
> +               seq = read_seqbegin(&ce->stats.lock);
> +
> +               total = ce->stats.total;
> +
> +               if (ce->stats.active)
> +                       total = ktime_add(total,
> +                                         ktime_sub(ktime_get(),
> +                                                   ce->stats.start));
> +       } while (read_seqretry(&ce->stats.lock, seq));
> +
> +       return total;
> +}
> +
>  #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
>  #include "selftests/mock_engine.c"
>  #include "selftests/intel_engine_cs.c"
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index ddd14e30be6c..930fcf0b1e86 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -389,16 +389,18 @@ execlists_user_end(struct intel_engine_execlists *execlists)
>  }
>  
>  static inline void
> -execlists_context_schedule_in(struct i915_request *rq)
> +execlists_context_schedule_in(struct i915_request *rq, unsigned int port)
>  {
>         execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_IN);
> -       intel_engine_context_in(rq->engine);
> +       intel_engine_context_in(rq->engine,
> +                               &rq->ctx->engine[rq->engine->id],
> +                               port == 0);
>  }
>  
>  static inline void
>  execlists_context_schedule_out(struct i915_request *rq)
>  {
> -       intel_engine_context_out(rq->engine);
> +       intel_engine_context_out(rq->engine, &rq->ctx->engine[rq->engine->id]);
>         execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_OUT);
>  }
>  
> @@ -463,7 +465,7 @@ static void execlists_submit_ports(struct intel_engine_cs *engine)
>                 if (rq) {
>                         GEM_BUG_ON(count > !n);
>                         if (!count++)
> -                               execlists_context_schedule_in(rq);
> +                               execlists_context_schedule_in(rq, n);
>                         port_set(&port[n], port_pack(rq, count));
>                         desc = execlists_update_context(rq);
>                         GEM_DEBUG_EXEC(port[n].context_id = upper_32_bits(desc));
> @@ -751,7 +753,9 @@ execlists_cancel_port_requests(struct intel_engine_execlists * const execlists)
>                           intel_engine_get_seqno(rq->engine));
>  
>                 GEM_BUG_ON(!execlists->active);
> -               intel_engine_context_out(rq->engine);
> +
> +               intel_engine_context_out(rq->engine,
> +                                        &rq->ctx->engine[rq->engine->id]);
>  
>                 execlists_context_status_change(rq,
>                                                 i915_request_completed(rq) ?
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
> index f24ea9826037..4d122ba1cd30 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.h
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
> @@ -6,6 +6,7 @@
>  #include <linux/seqlock.h>
>  
>  #include "i915_gem_batch_pool.h"
> +#include "i915_gem_context.h"
>  #include "i915_gem_timeline.h"
>  
>  #include "i915_reg.h"
> @@ -1076,25 +1077,44 @@ void intel_engine_dump(struct intel_engine_cs *engine,
>  struct intel_engine_cs *
>  intel_engine_lookup_user(struct drm_i915_private *i915, u8 class, u8 instance);
>  
> -static inline void intel_engine_context_in(struct intel_engine_cs *engine)
> +static inline void
> +intel_engine_context_in(struct intel_engine_cs *engine,
> +                       struct intel_context *ce,
> +                       bool submit)
>  {
>         unsigned long flags;
> +       ktime_t now;
>  
>         if (READ_ONCE(engine->stats.enabled) == 0)
>                 return;
>  
>         write_seqlock_irqsave(&engine->stats.lock, flags);
>  
> +       if (submit) {
> +               now = ktime_get();
> +               write_seqlock(&ce->stats.lock);
> +               ce->stats.start = now;
> +               ce->stats.active = true;
> +               write_sequnlock(&ce->stats.lock);
> +       } else {
> +               now = 0;
> +       }

So "submit" means that this is the first request in a sequence. But you
only then update ce->stats.start/active when the context is submitted
the first time in port 0. If it is pulled in via port N, when it is
promoted to port 0, we don't start the client timer.

I must have misunderstood.

> +
>         if (engine->stats.enabled > 0) {
> -               if (engine->stats.active++ == 0)
> -                       engine->stats.start = ktime_get();
> +               if (engine->stats.active++ == 0) {
> +                       if (!now)
> +                               now = ktime_get();
> +                       engine->stats.start = now;
> +               }
>                 GEM_BUG_ON(engine->stats.active == 0);
>         }
>  
>         write_sequnlock_irqrestore(&engine->stats.lock, flags);
>  }
>  
> -static inline void intel_engine_context_out(struct intel_engine_cs *engine)
> +static inline void
> +intel_engine_context_out(struct intel_engine_cs *engine,
> +                        struct intel_context *ce)
>  {
>         unsigned long flags;
>  
> @@ -1104,14 +1124,34 @@ static inline void intel_engine_context_out(struct intel_engine_cs *engine)
>         write_seqlock_irqsave(&engine->stats.lock, flags);
>  
>         if (engine->stats.enabled > 0) {
> +               struct execlist_port *next_port = &engine->execlists.port[1];

Yikes, we had better pass in the next port. There's a similarity here
with execlists_begin_user, maybe we should refactor that to help in this
case (single hook?).

Yeah, I'm thinking that we now have 3/4 different cases that try to
update bookkeeping between context switches, that all want the same
hooks.
-Chris