[RFC 1/3] drm/i915: Track per context/class rq completion frequency
Tvrtko Ursulin
tvrtko.ursulin at linux.intel.com
Fri Jan 10 10:59:44 UTC 2020
From: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
* With some sort of sliding exponetial average window.
* Sketch only.
* Execlists only.
...
Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
---
drivers/gpu/drm/i915/gem/i915_gem_context.c | 12 +++++
.../gpu/drm/i915/gem/i915_gem_context_types.h | 4 ++
drivers/gpu/drm/i915/gt/intel_context.c | 53 +++++++++++++++++++
drivers/gpu/drm/i915/gt/intel_context_types.h | 3 ++
drivers/gpu/drm/i915/gt/intel_lrc.c | 17 ++++--
5 files changed, 86 insertions(+), 3 deletions(-)
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c
index a2e57e62af30..b952fc1c7c8b 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
@@ -576,6 +576,7 @@ __create_context(struct drm_i915_private *i915)
{
struct i915_gem_context *ctx;
struct i915_gem_engines *e;
+ ktime_t now;
int err;
int i;
@@ -598,6 +599,17 @@ __create_context(struct drm_i915_private *i915)
INIT_RADIX_TREE(&ctx->handles_vma, GFP_KERNEL);
+ BUILD_BUG_ON(ARRAY_SIZE(ctx->rq_retired) !=
+ ARRAY_SIZE(ctx->rq_retire_freq));
+ BUILD_BUG_ON(ARRAY_SIZE(ctx->rq_retired) !=
+ ARRAY_SIZE(ctx->last_update));
+ now = ktime_get();
+ for (i = 0; i < ARRAY_SIZE(ctx->rq_retired); i++) {
+ atomic_set(&ctx->rq_retired[i], 0);
+// ctx->rq_retire_freq[i] = 1;
+ ctx->last_update[i] = ktime_get();
+ }
+
/* NB: Mark all slices as needing a remap so that when the context first
* loads it will restore whatever remap state already exists. If there
* is no remap info, it will be a NOP. */
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h
index 017ca803ab47..a05709122cb7 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h
@@ -120,6 +120,10 @@ struct i915_gem_context {
*/
struct rcu_head rcu;
+ atomic_t rq_retired[MAX_ENGINE_CLASS + 1];
+ int rq_retire_freq[MAX_ENGINE_CLASS + 1];
+ ktime_t last_update[MAX_ENGINE_CLASS + 1];
+
/**
* @user_flags: small set of booleans controlled by the user
*/
diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c
index 9796a54b4f47..5c272c5d5d85 100644
--- a/drivers/gpu/drm/i915/gt/intel_context.c
+++ b/drivers/gpu/drm/i915/gt/intel_context.c
@@ -105,6 +105,8 @@ int __intel_context_do_pin(struct intel_context *ce)
return -EINTR;
if (likely(!atomic_read(&ce->pin_count))) {
+ struct i915_gem_context *ctx;
+
err = intel_context_active_acquire(ce);
if (unlikely(err))
goto err;
@@ -116,6 +118,15 @@ int __intel_context_do_pin(struct intel_context *ce)
CE_TRACE(ce, "pin ring:{head:%04x, tail:%04x}\n",
ce->ring->head, ce->ring->tail);
+ ctx = rcu_dereference_protected(ce->gem_context, true);
+ if (ctx) {
+ GEM_BUG_ON(ce->engine->class >=
+ ARRAY_SIZE(ctx->rq_retired));
+ ce->ctx_rq_retired_at_pin =
+ atomic_read(&ctx->rq_retired[ce->engine->class]);
+ ce->pin_time = ktime_get();
+ }
+
smp_mb__before_atomic(); /* flush pin before it is visible */
}
@@ -134,9 +145,51 @@ int __intel_context_do_pin(struct intel_context *ce)
void intel_context_unpin(struct intel_context *ce)
{
+ struct i915_gem_context *ctx;
+
if (!atomic_dec_and_test(&ce->pin_count))
return;
+ ctx = rcu_dereference_protected(ce->gem_context, true);
+ if (ctx) {
+ int rq_retired, pinned_time, a;
+ unsigned int class = ce->engine->class;
+ long prev, retire_freq, freq;
+ ktime_t now;
+
+ GEM_BUG_ON(class >= ARRAY_SIZE(ctx->rq_retired));
+ rq_retired = atomic_read(&ctx->rq_retired[class]);
+
+ if (rq_retired >= ce->ctx_rq_retired_at_pin)
+ rq_retired -= ce->ctx_rq_retired_at_pin;
+ else
+ rq_retired += INT_MAX - ce->ctx_rq_retired_at_pin + 1;
+
+ now = ktime_get();
+ pinned_time = ktime_to_ms(now - ce->pin_time);
+ if (pinned_time)
+ retire_freq = rq_retired * 1000 / pinned_time;
+ else
+ retire_freq = rq_retired ? 1 : 0;
+ freq = retire_freq;
+
+ // FIXME needs a lock to be atomic
+ prev = ctx->rq_retire_freq[class];
+ a = ktime_to_ms(ktime_sub(now, ctx->last_update[class]));
+ // 5 second window before weight goes to zero
+ if (a > 5000)
+ a = 5000;
+ a = (5000 - a) / 5; // a=0..1, but keep in ms for headroom
+ retire_freq = prev * 1000 + a * (retire_freq - prev);
+ retire_freq /= 1000;
+ ctx->rq_retire_freq[class] = retire_freq;
+ ctx->last_update[class] = now;
+#if 0
+ printk("unpin ce=%p rq=%4d time=%4dms freq/avg=%5ld/%5ld (prev=%5ld a=%3d)\n",
+ ce, rq_retired, pinned_time, freq, retire_freq, prev, a);
+#endif
+ }
+
CE_TRACE(ce, "unpin\n");
ce->ops->unpin(ce);
diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h
index ca1420fb8b53..d678615a3c74 100644
--- a/drivers/gpu/drm/i915/gt/intel_context_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_context_types.h
@@ -83,6 +83,9 @@ struct intel_context {
/** sseu: Control eu/slice partitioning */
struct intel_sseu sseu;
+
+ unsigned int ctx_rq_retired_at_pin;
+ ktime_t pin_time;
};
#endif /* __INTEL_CONTEXT_TYPES__ */
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index 9af1b2b493f4..439cd46d6156 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -2305,7 +2305,10 @@ static void process_csb(struct intel_engine_cs *engine)
WRITE_ONCE(execlists->pending[0], NULL);
} else {
- GEM_BUG_ON(!*execlists->active);
+ struct i915_gem_context *ctx;
+ struct i915_request *rq = *execlists->active;
+
+ GEM_BUG_ON(!rq);
/* port0 completed, advanced to port1 */
trace_ports(execlists, "completed", execlists->active);
@@ -2316,9 +2319,17 @@ static void process_csb(struct intel_engine_cs *engine)
* coherent (visible from the CPU) before the
* user interrupt and CSB is processed.
*/
- GEM_BUG_ON(!i915_request_completed(*execlists->active) &&
+ GEM_BUG_ON(!i915_request_completed(rq) &&
!reset_in_progress(execlists));
- execlists_schedule_out(*execlists->active++);
+ execlists_schedule_out(rq);
+ execlists->active++;
+
+ ctx = i915_request_gem_context(rq);
+ if (ctx) {
+ GEM_BUG_ON(rq->engine->class >=
+ ARRAY_SIZE(ctx->rq_retired));
+ atomic_inc(&ctx->rq_retired[engine->class]);
+ }
GEM_BUG_ON(execlists->active - execlists->inflight >
execlists_num_ports(execlists));
--
2.20.1
More information about the Intel-gfx-trybot
mailing list