[Intel-gfx] [CI] drm/i915: Track hw reported context runtime
Lionel Landwerlin
lionel.g.landwerlin at intel.com
Sun Feb 16 09:34:03 UTC 2020
On 16/02/2020 00:06, Chris Wilson wrote:
> From: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
>
> GPU saves accumulated context runtime (in CS timestamp units) in PPHWSP
> which will be useful for us in cases when we are not able to track context
> busyness ourselves (like with GuC). Keep a copy of this in struct
> intel_context from where it can be easily read even if the context is not
> pinned.
>
> QQQ: Do we want to make this accounting conditional / able to turn on/off?
>
> v2:
> (Chris)
> * Do not store pphwsp address in intel_context.
> * Log CS wrap-around.
> * Simplify calculation by relying on integer wraparound.
>
> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
> ---
> drivers/gpu/drm/i915/gt/intel_context.c | 6 +-
> drivers/gpu/drm/i915/gt/intel_context.h | 18 ++++
> drivers/gpu/drm/i915/gt/intel_context_types.h | 11 +++
> drivers/gpu/drm/i915/gt/intel_lrc.c | 42 ++++++++-
> drivers/gpu/drm/i915/gt/selftest_lrc.c | 87 +++++++++++++++++++
> drivers/gpu/drm/i915/i915_gpu_error.c | 11 ++-
> drivers/gpu/drm/i915/i915_gpu_error.h | 4 +
> drivers/gpu/drm/i915/intel_device_info.c | 6 ++
> drivers/gpu/drm/i915/intel_device_info.h | 1 +
> 9 files changed, 181 insertions(+), 5 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c
> index e4f89341d17c..8bb444cda14f 100644
> --- a/drivers/gpu/drm/i915/gt/intel_context.c
> +++ b/drivers/gpu/drm/i915/gt/intel_context.c
> @@ -220,7 +220,9 @@ static void __intel_context_retire(struct i915_active *active)
> {
> struct intel_context *ce = container_of(active, typeof(*ce), active);
>
> - CE_TRACE(ce, "retire\n");
> + CE_TRACE(ce, "retire runtime: { total:%lluns, avg:%lluns }\n",
> + intel_context_get_total_runtime_ns(ce),
> + intel_context_get_avg_runtime_ns(ce));
>
> set_bit(CONTEXT_VALID_BIT, &ce->flags);
> if (ce->state)
> @@ -281,6 +283,8 @@ intel_context_init(struct intel_context *ce,
> ce->sseu = engine->sseu;
> ce->ring = __intel_context_ring_size(SZ_4K);
>
> + ewma_runtime_init(&ce->runtime.avg);
> +
> ce->vm = i915_vm_get(engine->gt->vm);
>
> INIT_LIST_HEAD(&ce->signal_link);
> diff --git a/drivers/gpu/drm/i915/gt/intel_context.h b/drivers/gpu/drm/i915/gt/intel_context.h
> index 604d5cfc46ba..e58da0938dcb 100644
> --- a/drivers/gpu/drm/i915/gt/intel_context.h
> +++ b/drivers/gpu/drm/i915/gt/intel_context.h
> @@ -12,6 +12,7 @@
> #include <linux/types.h>
>
> #include "i915_active.h"
> +#include "i915_drv.h"
> #include "intel_context_types.h"
> #include "intel_engine_types.h"
> #include "intel_ring_types.h"
> @@ -227,4 +228,21 @@ intel_context_clear_nopreempt(struct intel_context *ce)
> clear_bit(CONTEXT_NOPREEMPT, &ce->flags);
> }
>
> +static inline u64 intel_context_get_total_runtime_ns(struct intel_context *ce)
> +{
> + const u32 period =
> + RUNTIME_INFO(ce->engine->i915)->cs_timestamp_period_ns;
> +
> + return ce->runtime.total * period;
> +}
> +
> +static inline u64 intel_context_get_avg_runtime_ns(struct intel_context *ce)
> +{
> + const u32 period =
> + RUNTIME_INFO(ce->engine->i915)->cs_timestamp_period_ns;
> +
> + return mul_u32_u32(ewma_runtime_read(&ce->runtime.avg), period);
> +}
> +
> +
> #endif /* __INTEL_CONTEXT_H__ */
> diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h
> index ca1420fb8b53..6112ec97fbdf 100644
> --- a/drivers/gpu/drm/i915/gt/intel_context_types.h
> +++ b/drivers/gpu/drm/i915/gt/intel_context_types.h
> @@ -7,6 +7,7 @@
> #ifndef __INTEL_CONTEXT_TYPES__
> #define __INTEL_CONTEXT_TYPES__
>
> +#include <linux/average.h>
> #include <linux/kref.h>
> #include <linux/list.h>
> #include <linux/mutex.h>
> @@ -19,6 +20,8 @@
>
> #define CONTEXT_REDZONE POISON_INUSE
>
> +DECLARE_EWMA(runtime, 3, 4);
> +
> struct i915_gem_context;
> struct i915_vma;
> struct intel_context;
> @@ -68,6 +71,14 @@ struct intel_context {
> u64 lrc_desc;
> u32 tag; /* cookie passed to HW to track this context on submission */
>
> + /* Time on GPU as tracked by the hw. */
> + struct {
> + struct ewma_runtime avg;
> + u64 total;
> + u32 last;
> + I915_SELFTEST_DECLARE(u32 underflow);
> + } runtime;
> +
> unsigned int active_count; /* protected by timeline->mutex */
>
> atomic_t pin_count;
> diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
> index c3d7727021db..c09079c93345 100644
> --- a/drivers/gpu/drm/i915/gt/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
> @@ -1195,6 +1195,38 @@ static void reset_active(struct i915_request *rq,
> ce->lrc_desc |= CTX_DESC_FORCE_RESTORE;
> }
>
> +static u32 intel_context_get_runtime(const struct intel_context *ce)
> +{
> + /*
> + * PPHWSP is one page before the lrc state page and in it at
> + * dword 16 we have cumulative context runtime in CS timestamp ticks.
> + */
I read somewhere this accumulated runtime was incremented only every 8th
CS timestamp tick.
-Lionel
> + BUILD_BUG_ON((LRC_STATE_PN - LRC_PPHWSP_PN) != 1);
> + return ce->lrc_reg_state[-1024 + 16];
> +}
> +
> +static void intel_context_update_runtime(struct intel_context *ce)
> +{
> + u32 new, old;
> +
> + if (intel_context_is_barrier(ce))
> + return;
> +
> + old = ce->runtime.last;
> + new = intel_context_get_runtime(ce);
> + if ((s32)(new - old) <= 0) {
> + CE_TRACE(ce, "runtime underflow: last=%u, new=%u, delta=%d\n",
> + old, new, new - old);
> + I915_SELFTEST_ONLY(ce->runtime.underflow++);
> + GEM_TRACE_DUMP();
> + return;
> + }
> +
> + ewma_runtime_add(&ce->runtime.avg, new - old);
> + ce->runtime.total += new - old;
> + ce->runtime.last = new;
> +}
> +
> static inline struct intel_engine_cs *
> __execlists_schedule_in(struct i915_request *rq)
> {
> @@ -1278,6 +1310,7 @@ __execlists_schedule_out(struct i915_request *rq,
> i915_request_completed(rq))
> intel_engine_add_retire(engine, ce->timeline);
>
> + intel_context_update_runtime(ce);
> intel_engine_context_out(engine);
> execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_OUT);
> intel_gt_pm_put_async(engine->gt);
> @@ -4607,8 +4640,13 @@ populate_lr_context(struct intel_context *ce,
> inhibit = false;
> }
>
> - /* The second page of the context object contains some fields which must
> - * be set up prior to the first execution. */
> + /* Clear the ppHWSP (inc. per-context counters) */
> + memset(vaddr, 0, PAGE_SIZE);
> +
> + /*
> + * The second page of the context object contains some registers which
> + * must be set up prior to the first execution.
> + */
> execlists_init_reg_state(vaddr + LRC_STATE_PN * PAGE_SIZE,
> ce, engine, ring, inhibit);
>
> diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c
> index 64761e619876..8c6edb6904cd 100644
> --- a/drivers/gpu/drm/i915/gt/selftest_lrc.c
> +++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c
> @@ -4450,6 +4450,92 @@ static int live_gpr_clear(void *arg)
> return err;
> }
>
> +static int __live_pphwsp_runtime(struct intel_engine_cs *engine)
> +{
> + struct intel_context *ce;
> + struct i915_request *rq;
> + IGT_TIMEOUT(end_time);
> + bool timeout = false;
> + int err;
> +
> + ce = intel_context_create(engine);
> + if (IS_ERR(ce))
> + return PTR_ERR(ce);
> +
> + ce->runtime.underflow = 0;
> +
> + while (!timeout) {
> + unsigned int loop = 1024;
> +
> + while (loop) {
> + rq = intel_context_create_request(ce);
> + if (IS_ERR(rq)) {
> + err = PTR_ERR(rq);
> + goto err_rq;
> + }
> +
> + if (--loop == 0)
> + i915_request_get(rq);
> +
> + i915_request_add(rq);
> + }
> + timeout = __igt_timeout(end_time, NULL);
> + if (!timeout)
> + i915_request_put(rq);
> + }
> +
> + err = i915_request_wait(rq, 0, HZ / 5);
> + if (err < 0) {
> + pr_err("%s: request not completed! (err=%d)\n",
> + engine->name, err);
> + goto err_wait;
> + }
> +
> + igt_flush_test(engine->i915);
> +
> + pr_info("%s: pphwsp runtime %lluns, average %llu\n",
> + engine->name,
> + intel_context_get_total_runtime_ns(ce),
> + intel_context_get_avg_runtime_ns(ce));
> +
> + err = 0;
> + if (ce->runtime.underflow) {
> + pr_err("%s: pphwsp underflow %u time(s)!\n",
> + engine->name, ce->runtime.underflow);
> + err = -EOVERFLOW;
> + }
> +
> +err_wait:
> + i915_request_put(rq);
> +err_rq:
> + intel_context_put(ce);
> + return err;
> +}
> +
> +static int live_pphwsp_runtime(void *arg)
> +{
> + struct intel_gt *gt = arg;
> + struct intel_engine_cs *engine;
> + enum intel_engine_id id;
> + int err = 0;
> +
> + /*
> + * Check that cumulative context runtime as stored in the pphwsp[16]
> + * is monotonic.
> + */
> +
> + for_each_engine(engine, gt, id) {
> + err = __live_pphwsp_runtime(engine);
> + if (err)
> + break;
> + }
> +
> + if (igt_flush_test(gt->i915))
> + err = -EIO;
> +
> + return err;
> +}
> +
> int intel_lrc_live_selftests(struct drm_i915_private *i915)
> {
> static const struct i915_subtest tests[] = {
> @@ -4457,6 +4543,7 @@ int intel_lrc_live_selftests(struct drm_i915_private *i915)
> SUBTEST(live_lrc_fixed),
> SUBTEST(live_lrc_state),
> SUBTEST(live_gpr_clear),
> + SUBTEST(live_pphwsp_runtime),
> };
>
> if (!HAS_LOGICAL_RING_CONTEXTS(i915))
> diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
> index b2ed977ed971..3052c4eaf9f6 100644
> --- a/drivers/gpu/drm/i915/i915_gpu_error.c
> +++ b/drivers/gpu/drm/i915/i915_gpu_error.c
> @@ -481,9 +481,13 @@ static void error_print_context(struct drm_i915_error_state_buf *m,
> const char *header,
> const struct i915_gem_context_coredump *ctx)
> {
> - err_printf(m, "%s%s[%d] prio %d, guilty %d active %d\n",
> + const u32 period = RUNTIME_INFO(m->i915)->cs_timestamp_period_ns;
> +
> + err_printf(m, "%s%s[%d] prio %d, guilty %d active %d, runtime total %lluns, avg %lluns\n",
> header, ctx->comm, ctx->pid, ctx->sched_attr.priority,
> - ctx->guilty, ctx->active);
> + ctx->guilty, ctx->active,
> + ctx->total_runtime * period,
> + mul_u32_u32(ctx->avg_runtime, period));
> }
>
> static struct i915_vma_coredump *
> @@ -1260,6 +1264,9 @@ static bool record_context(struct i915_gem_context_coredump *e,
> e->guilty = atomic_read(&ctx->guilty_count);
> e->active = atomic_read(&ctx->active_count);
>
> + e->total_runtime = rq->context->runtime.total;
> + e->avg_runtime = ewma_runtime_read(&rq->context->runtime.avg);
> +
> simulated = i915_gem_context_no_error_capture(ctx);
>
> i915_gem_context_put(ctx);
> diff --git a/drivers/gpu/drm/i915/i915_gpu_error.h b/drivers/gpu/drm/i915/i915_gpu_error.h
> index b35bc9edd733..0d1f6c8ff355 100644
> --- a/drivers/gpu/drm/i915/i915_gpu_error.h
> +++ b/drivers/gpu/drm/i915/i915_gpu_error.h
> @@ -88,6 +88,10 @@ struct intel_engine_coredump {
>
> struct i915_gem_context_coredump {
> char comm[TASK_COMM_LEN];
> +
> + u64 total_runtime;
> + u32 avg_runtime;
> +
> pid_t pid;
> int active;
> int guilty;
> diff --git a/drivers/gpu/drm/i915/intel_device_info.c b/drivers/gpu/drm/i915/intel_device_info.c
> index fcdacd6d4aa5..113decd59b40 100644
> --- a/drivers/gpu/drm/i915/intel_device_info.c
> +++ b/drivers/gpu/drm/i915/intel_device_info.c
> @@ -1045,6 +1045,12 @@ void intel_device_info_runtime_init(struct drm_i915_private *dev_priv)
>
> /* Initialize command stream timestamp frequency */
> runtime->cs_timestamp_frequency_khz = read_timestamp_frequency(dev_priv);
> + runtime->cs_timestamp_period_ns =
> + div_u64(1e6, runtime->cs_timestamp_frequency_khz);
> + drm_dbg(&dev_priv->drm,
> + "CS timestamp wraparound in %lldms\n",
> + div_u64(mul_u32_u32(runtime->cs_timestamp_period_ns, U32_MAX),
> + USEC_PER_SEC));
> }
>
> void intel_driver_caps_print(const struct intel_driver_caps *caps,
> diff --git a/drivers/gpu/drm/i915/intel_device_info.h b/drivers/gpu/drm/i915/intel_device_info.h
> index 7d4d122d2182..f8bfa26388c1 100644
> --- a/drivers/gpu/drm/i915/intel_device_info.h
> +++ b/drivers/gpu/drm/i915/intel_device_info.h
> @@ -217,6 +217,7 @@ struct intel_runtime_info {
> struct sseu_dev_info sseu;
>
> u32 cs_timestamp_frequency_khz;
> + u32 cs_timestamp_period_ns;
>
> /* Media engine access to SFC per instance */
> u8 vdbox_sfc_access;
More information about the Intel-gfx
mailing list