[Intel-gfx] [RFC 5/8] drm/i915: Track per-context engine busyness
Chris Wilson
chris at chris-wilson.co.uk
Thu Dec 19 20:51:54 UTC 2019
Quoting Tvrtko Ursulin (2019-12-19 18:00:16)
> From: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
>
> Some customers want to know how much of the GPU time are their clients
> using in order to make dynamic load balancing decisions.
>
> With the hooks already in place which track the overall engine busyness,
> we can extend that slightly to split that time between contexts.
>
> v2: Fix accounting for tail updates.
> v3: Rebase.
> v4: Mark currently running contexts as active on stats enable.
> v5: Include some headers to fix the build.
> v6: Added fine grained lock.
> v7: Convert to seqlock. (Chris Wilson)
> v8: Rebase and tidy with helpers.
> v9: Refactor.
> v10: Move recording start to promotion. (Chris)
>
> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
> ---
> drivers/gpu/drm/i915/gt/intel_context.c | 20 +++++++
> drivers/gpu/drm/i915/gt/intel_context.h | 11 ++++
> drivers/gpu/drm/i915/gt/intel_context_types.h | 9 ++++
> drivers/gpu/drm/i915/gt/intel_engine_cs.c | 16 +++++-
> drivers/gpu/drm/i915/gt/intel_lrc.c | 52 ++++++++++++++++---
> 5 files changed, 100 insertions(+), 8 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c
> index b1e346d2d35f..b211b48d6cae 100644
> --- a/drivers/gpu/drm/i915/gt/intel_context.c
> +++ b/drivers/gpu/drm/i915/gt/intel_context.c
> @@ -243,6 +243,7 @@ intel_context_init(struct intel_context *ce,
> INIT_LIST_HEAD(&ce->signals);
>
> mutex_init(&ce->pin_mutex);
> + seqlock_init(&ce->stats.lock);
>
> i915_active_init(&ce->active,
> __intel_context_active, __intel_context_retire);
> @@ -337,6 +338,25 @@ struct i915_request *intel_context_create_request(struct intel_context *ce)
> return rq;
> }
>
> +ktime_t intel_context_get_busy_time(struct intel_context *ce)
> +{
> + unsigned int seq;
> + ktime_t total;
> +
> + do {
> + seq = read_seqbegin(&ce->stats.lock);
> +
> + total = ce->stats.total;
> +
> + if (ce->stats.active)
> + total = ktime_add(total,
> + ktime_sub(ktime_get(),
> + ce->stats.start));
> + } while (read_seqretry(&ce->stats.lock, seq));
> +
> + return total;
> +}
> +
> #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
> #include "selftest_context.c"
> #endif
> diff --git a/drivers/gpu/drm/i915/gt/intel_context.h b/drivers/gpu/drm/i915/gt/intel_context.h
> index b39eb1fcfbca..3a15cf32f0a3 100644
> --- a/drivers/gpu/drm/i915/gt/intel_context.h
> +++ b/drivers/gpu/drm/i915/gt/intel_context.h
> @@ -160,4 +160,15 @@ static inline struct intel_ring *__intel_context_ring_size(u64 sz)
> return u64_to_ptr(struct intel_ring, sz);
> }
>
> +static inline void
> +__intel_context_stats_start(struct intel_context_stats *stats, ktime_t now)
> +{
> + if (!stats->active) {
> + stats->start = now;
> + stats->active = true;
> + }
> +}
> +
> +ktime_t intel_context_get_busy_time(struct intel_context *ce);
> +
> #endif /* __INTEL_CONTEXT_H__ */
> diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h
> index d1204cc899a3..12cbad0798cb 100644
> --- a/drivers/gpu/drm/i915/gt/intel_context_types.h
> +++ b/drivers/gpu/drm/i915/gt/intel_context_types.h
> @@ -11,6 +11,7 @@
> #include <linux/list.h>
> #include <linux/mutex.h>
> #include <linux/types.h>
> +#include <linux/seqlock.h>
>
> #include "i915_active_types.h"
> #include "i915_utils.h"
> @@ -76,6 +77,14 @@ struct intel_context {
>
> /** sseu: Control eu/slice partitioning */
> struct intel_sseu sseu;
> +
> + /** stats: Context GPU engine busyness tracking. */
> + struct intel_context_stats {
> + seqlock_t lock;
> + bool active;
> + ktime_t start;
> + ktime_t total;
> + } stats;
> };
>
> #endif /* __INTEL_CONTEXT_TYPES__ */
> diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> index 3d1d48bf90cf..ac08781c8b24 100644
> --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> @@ -1577,8 +1577,20 @@ int intel_enable_engine_stats(struct intel_engine_cs *engine)
>
> engine->stats.enabled_at = ktime_get();
>
> - /* XXX submission method oblivious? */
> - for (port = execlists->active; (rq = *port); port++)
> + /*
> + * Mark currently running context as active.
> + * XXX submission method oblivious?
> + */
> +
> + rq = NULL;
> + port = execlists->active;
> + if (port)
> + rq = *port;
> + if (rq)
> + __intel_context_stats_start(&rq->hw_context->stats,
> + engine->stats.enabled_at);
> +
> + for (; (rq = *port); port++)
> engine->stats.active++;
>
> for (port = execlists->pending; (rq = *port); port++) {
> diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
> index 4db54fd6a2fe..b186f06e508d 100644
> --- a/drivers/gpu/drm/i915/gt/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
> @@ -940,6 +940,7 @@ static void intel_engine_context_in(struct intel_engine_cs *engine)
> if (engine->stats.enabled > 0) {
> if (engine->stats.active++ == 0)
> engine->stats.start = ktime_get();
> +
> GEM_BUG_ON(engine->stats.active == 0);
> }
>
> @@ -1088,6 +1089,32 @@ static void reset_active(struct i915_request *rq,
> ce->lrc_desc |= CTX_DESC_FORCE_RESTORE;
> }
>
> +static void
> +intel_context_stats_start(struct intel_context_stats *stats)
> +{
> + unsigned long flags;
> +
> + write_seqlock_irqsave(&stats->lock, flags);
> + __intel_context_stats_start(stats, ktime_get());
> + write_sequnlock_irqrestore(&stats->lock, flags);
> +}
> +
> +static void
> +intel_context_stats_stop(struct intel_context_stats *stats)
> +{
> + unsigned long flags;
> +
> + if (!READ_ONCE(stats->active))
> + return;
> +
> + write_seqlock_irqsave(&stats->lock, flags);
> + GEM_BUG_ON(!READ_ONCE(stats->active));
> + stats->total = ktime_add(stats->total,
> + ktime_sub(ktime_get(), stats->start));
> + stats->active = false;
> + write_sequnlock_irqrestore(&stats->lock, flags);
> +}
> +
> static inline struct intel_engine_cs *
> __execlists_schedule_in(struct i915_request *rq)
> {
> @@ -1155,7 +1182,7 @@ static inline void
> __execlists_schedule_out(struct i915_request *rq,
> struct intel_engine_cs * const engine)
> {
> - struct intel_context * const ce = rq->hw_context;
> + struct intel_context *ce = rq->hw_context;
>
> /*
> * NB process_csb() is not under the engine->active.lock and hence
> @@ -1172,6 +1199,7 @@ __execlists_schedule_out(struct i915_request *rq,
> intel_engine_add_retire(engine, ce->timeline);
>
> intel_engine_context_out(engine);
> + intel_context_stats_stop(&ce->stats);
> execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_OUT);
> intel_gt_pm_put_async(engine->gt);
>
> @@ -2174,9 +2202,11 @@ static void process_csb(struct intel_engine_cs *engine)
> promote = gen8_csb_parse(execlists, buf + 2 * head);
> if (promote) {
> struct i915_request * const *old = execlists->active;
> + struct i915_request *rq;
>
> /* Point active to the new ELSP; prevent overwriting */
> WRITE_ONCE(execlists->active, execlists->pending);
> +
> set_timeslice(engine);
>
> if (!inject_preempt_hang(execlists))
> @@ -2196,8 +2226,16 @@ static void process_csb(struct intel_engine_cs *engine)
> sizeof(*execlists->pending)));
>
> WRITE_ONCE(execlists->pending[0], NULL);
> +
> + rq = *execlists->active;
> + if (rq)
> + intel_context_stats_start(&rq->hw_context->stats);
Code duplication :) Both branches are doing
rq = *execlists->active;
if (rq)
intel_context_stats_start(&rq->hw_context->stats);
as their final setp, just move it to after the if.
Cost of maintaining stats was unnoticeable in profiles and did not
affect wsim, so lgtm.
-Chris
More information about the Intel-gfx
mailing list