[Intel-gfx] [RFC 1/5] drm/i915: Track per-context engine busyness

Tvrtko Ursulin tvrtko.ursulin at linux.intel.com
Fri Oct 25 14:21:27 UTC 2019


From: Tvrtko Ursulin <tvrtko.ursulin at intel.com>

Some customers want to know how much of the GPU time are their clients
using in order to make dynamic load balancing decisions.

With the hooks already in place which track the overall engine busyness,
we can extend that slightly to split that time between contexts.

v2: Fix accounting for tail updates.
v3: Rebase.
v4: Mark currently running contexts as active on stats enable.
v5: Include some headers to fix the build.
v6: Added fine grained lock.
v7: Convert to seqlock. (Chris Wilson)
v8: Rebase and tidy with helpers.
v9: Rebase.

Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
---
 drivers/gpu/drm/i915/gt/intel_context.c       | 20 ++++++
 drivers/gpu/drm/i915/gt/intel_context.h       |  9 +++
 drivers/gpu/drm/i915/gt/intel_context_types.h |  9 +++
 drivers/gpu/drm/i915/gt/intel_engine_cs.c     | 16 ++++-
 drivers/gpu/drm/i915/gt/intel_lrc.c           | 66 ++++++++++++++++---
 5 files changed, 108 insertions(+), 12 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c
index ee9d2bcd2c13..3d68720df512 100644
--- a/drivers/gpu/drm/i915/gt/intel_context.c
+++ b/drivers/gpu/drm/i915/gt/intel_context.c
@@ -248,6 +248,7 @@ intel_context_init(struct intel_context *ce,
 	INIT_LIST_HEAD(&ce->signals);
 
 	mutex_init(&ce->pin_mutex);
+	seqlock_init(&ce->stats.lock);
 
 	i915_active_init(&ce->active,
 			 __intel_context_active, __intel_context_retire);
@@ -348,6 +349,25 @@ struct i915_request *intel_context_create_request(struct intel_context *ce)
 	return rq;
 }
 
+ktime_t intel_context_get_busy_time(struct intel_context *ce)
+{
+	unsigned int seq;
+	ktime_t total;
+
+	do {
+		seq = read_seqbegin(&ce->stats.lock);
+
+		total = ce->stats.total;
+
+		if (ce->stats.active)
+			total = ktime_add(total,
+					  ktime_sub(ktime_get(),
+						    ce->stats.start));
+	} while (read_seqretry(&ce->stats.lock, seq));
+
+	return total;
+}
+
 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
 #include "selftest_context.c"
 #endif
diff --git a/drivers/gpu/drm/i915/gt/intel_context.h b/drivers/gpu/drm/i915/gt/intel_context.h
index 68b3d317d959..c7ab8efa3573 100644
--- a/drivers/gpu/drm/i915/gt/intel_context.h
+++ b/drivers/gpu/drm/i915/gt/intel_context.h
@@ -153,4 +153,13 @@ static inline struct intel_ring *__intel_context_ring_size(u64 sz)
 	return u64_to_ptr(struct intel_ring, sz);
 }
 
+static inline void
+__intel_context_stats_start(struct intel_context_stats *stats, ktime_t now)
+{
+	stats->start = now;
+	stats->active = true;
+}
+
+ktime_t intel_context_get_busy_time(struct intel_context *ce);
+
 #endif /* __INTEL_CONTEXT_H__ */
diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h
index 6959b05ae5f8..b3384e01d033 100644
--- a/drivers/gpu/drm/i915/gt/intel_context_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_context_types.h
@@ -11,6 +11,7 @@
 #include <linux/list.h>
 #include <linux/mutex.h>
 #include <linux/types.h>
+#include <linux/seqlock.h>
 
 #include "i915_active_types.h"
 #include "i915_utils.h"
@@ -75,6 +76,14 @@ struct intel_context {
 
 	/** sseu: Control eu/slice partitioning */
 	struct intel_sseu sseu;
+
+	/** stats: Context GPU engine busyness tracking. */
+	struct intel_context_stats {
+		seqlock_t lock;
+		bool active;
+		ktime_t start;
+		ktime_t total;
+	} stats;
 };
 
 #endif /* __INTEL_CONTEXT_TYPES__ */
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
index 9cc1ea6519ec..6792ec01f3f2 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
@@ -1568,8 +1568,20 @@ int intel_enable_engine_stats(struct intel_engine_cs *engine)
 
 		engine->stats.enabled_at = ktime_get();
 
-		/* XXX submission method oblivious? */
-		for (port = execlists->active; (rq = *port); port++)
+		/*
+		 * Mark currently running context as active.
+		 * XXX submission method oblivious?
+		 */
+
+		rq = NULL;
+		port = execlists->active;
+		if (port)
+			rq = *port;
+		if (rq)
+			__intel_context_stats_start(&rq->hw_context->stats,
+						    engine->stats.enabled_at);
+
+		for (; (rq = *port); port++)
 			engine->stats.active++;
 
 		for (port = execlists->pending; (rq = *port); port++) {
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index 523de1fd4452..2305d7a7ac68 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -944,26 +944,60 @@ execlists_context_status_change(struct i915_request *rq, unsigned long status)
 				   status, rq);
 }
 
-static void intel_engine_context_in(struct intel_engine_cs *engine)
+static void
+intel_context_stats_start(struct intel_context_stats *stats, ktime_t now)
+{
+	write_seqlock(&stats->lock);
+	__intel_context_stats_start(stats, now);
+	write_sequnlock(&stats->lock);
+}
+
+static void
+intel_context_stats_stop(struct intel_context_stats *stats, ktime_t now)
+{
+	write_seqlock(&stats->lock);
+	if (stats->active) {
+		stats->total = ktime_add(stats->total,
+					 ktime_sub(now, stats->start));
+		stats->active = false;
+	}
+	write_sequnlock(&stats->lock);
+}
+
+static void intel_context_in(struct intel_context *ce, bool submit)
 {
+	struct intel_engine_cs *engine = ce->engine;
 	unsigned long flags;
+	ktime_t now;
 
 	if (READ_ONCE(engine->stats.enabled) == 0)
 		return;
 
 	write_seqlock_irqsave(&engine->stats.lock, flags);
 
+	if (submit) {
+		now = ktime_get();
+		intel_context_stats_start(&ce->stats, now);
+	} else {
+		now = 0;
+	}
+
 	if (engine->stats.enabled > 0) {
-		if (engine->stats.active++ == 0)
-			engine->stats.start = ktime_get();
+		if (engine->stats.active++ == 0) {
+			if (!now)
+				now = ktime_get();
+			engine->stats.start = now;
+		}
+
 		GEM_BUG_ON(engine->stats.active == 0);
 	}
 
 	write_sequnlock_irqrestore(&engine->stats.lock, flags);
 }
 
-static void intel_engine_context_out(struct intel_engine_cs *engine)
+static void intel_context_out(struct intel_context *ce)
 {
+	struct intel_engine_cs *engine = ce->engine;
 	unsigned long flags;
 
 	if (READ_ONCE(engine->stats.enabled) == 0)
@@ -972,14 +1006,25 @@ static void intel_engine_context_out(struct intel_engine_cs *engine)
 	write_seqlock_irqsave(&engine->stats.lock, flags);
 
 	if (engine->stats.enabled > 0) {
+		ktime_t now = ktime_get();
+		struct i915_request *next;
 		ktime_t last;
 
+		intel_context_stats_stop(&ce->stats, now);
+
+		next = NULL;
+		if (engine->execlists.active)
+			next = *engine->execlists.active;
+		if (next)
+			intel_context_stats_start(&next->hw_context->stats,
+						  now);
+
 		if (engine->stats.active && --engine->stats.active == 0) {
 			/*
 			 * Decrement the active context count and in case GPU
 			 * is now idle add up to the running total.
 			 */
-			last = ktime_sub(ktime_get(), engine->stats.start);
+			last = ktime_sub(now, engine->stats.start);
 
 			engine->stats.total = ktime_add(engine->stats.total,
 							last);
@@ -989,7 +1034,7 @@ static void intel_engine_context_out(struct intel_engine_cs *engine)
 			 * the first event in which case we account from the
 			 * time stats gathering was turned on.
 			 */
-			last = ktime_sub(ktime_get(), engine->stats.enabled_at);
+			last = ktime_sub(now, engine->stats.enabled_at);
 
 			engine->stats.total = ktime_add(engine->stats.total,
 							last);
@@ -1000,7 +1045,7 @@ static void intel_engine_context_out(struct intel_engine_cs *engine)
 }
 
 static inline struct intel_engine_cs *
-__execlists_schedule_in(struct i915_request *rq)
+__execlists_schedule_in(struct i915_request *rq, int idx)
 {
 	struct intel_engine_cs * const engine = rq->engine;
 	struct intel_context * const ce = rq->hw_context;
@@ -1021,7 +1066,7 @@ __execlists_schedule_in(struct i915_request *rq)
 
 	intel_gt_pm_get(engine->gt);
 	execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_IN);
-	intel_engine_context_in(engine);
+	intel_context_in(rq->hw_context, idx == 0);
 
 	return engine;
 }
@@ -1038,7 +1083,8 @@ execlists_schedule_in(struct i915_request *rq, int idx)
 	old = READ_ONCE(ce->inflight);
 	do {
 		if (!old) {
-			WRITE_ONCE(ce->inflight, __execlists_schedule_in(rq));
+			WRITE_ONCE(ce->inflight,
+				   __execlists_schedule_in(rq, idx));
 			break;
 		}
 	} while (!try_cmpxchg(&ce->inflight, &old, ptr_inc(old)));
@@ -1114,7 +1160,7 @@ __execlists_schedule_out(struct i915_request *rq,
 {
 	struct intel_context * const ce = rq->hw_context;
 
-	intel_engine_context_out(engine);
+	intel_context_out(rq->hw_context);
 	execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_OUT);
 	intel_gt_pm_put(engine->gt);
 
-- 
2.20.1



More information about the Intel-gfx mailing list