[PATCH 1/2] drm/i915/gt: Reduce engine runtime stats from seqlock to a latch

Chris Wilson chris at chris-wilson.co.uk
Sun Jan 17 09:56:47 UTC 2021


Since we can compute the elapsed time to add to the total, during the
PMU sample we only need to have a consistent view of the (start, total,
active) tuple to be able to locally determine the runtime. That can be
arrange by a pair of memory bariiers and carefully sequencing of the
writes and reads.

Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
Reviewed-by: Andi Shyti <andi.shyti at intel.com>
---
 drivers/gpu/drm/i915/gt/intel_engine_cs.c    | 37 +++++++------------
 drivers/gpu/drm/i915/gt/intel_engine_stats.h | 39 +++++---------------
 drivers/gpu/drm/i915/gt/intel_engine_types.h |  5 ---
 3 files changed, 24 insertions(+), 57 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
index fb1b1d096975..21488b8572de 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
@@ -342,7 +342,6 @@ static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id)
 	engine->schedule = NULL;
 
 	ewma__engine_latency_init(&engine->latency);
-	seqcount_init(&engine->stats.lock);
 
 	ATOMIC_INIT_NOTIFIER_HEAD(&engine->context_status_notifier);
 
@@ -1744,22 +1743,6 @@ void intel_engine_dump(struct intel_engine_cs *engine,
 	intel_engine_print_breadcrumbs(engine, m);
 }
 
-static ktime_t __intel_engine_get_busy_time(struct intel_engine_cs *engine,
-					    ktime_t *now)
-{
-	ktime_t total = engine->stats.total;
-
-	/*
-	 * If the engine is executing something at the moment
-	 * add it to the total.
-	 */
-	*now = ktime_get();
-	if (READ_ONCE(engine->stats.active))
-		total = ktime_add(total, ktime_sub(*now, engine->stats.start));
-
-	return total;
-}
-
 /**
  * intel_engine_get_busy_time() - Return current accumulated engine busyness
  * @engine: engine to report on
@@ -1769,15 +1752,23 @@ static ktime_t __intel_engine_get_busy_time(struct intel_engine_cs *engine,
  */
 ktime_t intel_engine_get_busy_time(struct intel_engine_cs *engine, ktime_t *now)
 {
-	unsigned int seq;
 	ktime_t total;
+	ktime_t start;
 
-	do {
-		seq = read_seqcount_begin(&engine->stats.lock);
-		total = __intel_engine_get_busy_time(engine, now);
-	} while (read_seqcount_retry(&engine->stats.lock, seq));
+	/*
+	 * If the engine is executing something at the moment
+	 * add it to the total.
+	 */
+	*now = ktime_get();
 
-	return total;
+	total = engine->stats.total;
+	start = READ_ONCE(engine->stats.start);
+	if (start) {
+		smp_rmb(); /* pairs with intel_engine_context_in/out */
+		start = ktime_sub(*now, start);
+	}
+
+	return ktime_add(total, start);
 }
 
 static bool match_ring(struct i915_request *rq)
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_stats.h b/drivers/gpu/drm/i915/gt/intel_engine_stats.h
index 24fbdd94351a..e94d23242093 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_stats.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine_stats.h
@@ -15,46 +15,27 @@
 
 static inline void intel_engine_context_in(struct intel_engine_cs *engine)
 {
-	unsigned long flags;
-
-	if (engine->stats.active) {
-		engine->stats.active++;
+	if (engine->stats.active++)
 		return;
-	}
 
-	/* The writer is serialised; but the pmu reader may be from hardirq */
-	local_irq_save(flags);
-	write_seqcount_begin(&engine->stats.lock);
-
-	engine->stats.start = ktime_get();
-	engine->stats.active++;
-
-	write_seqcount_end(&engine->stats.lock);
-	local_irq_restore(flags);
-
-	GEM_BUG_ON(!engine->stats.active);
+	smp_wmb(); /* pairs with intel_engine_get_busy_time() */
+	WRITE_ONCE(engine->stats.start, ktime_get());
 }
 
 static inline void intel_engine_context_out(struct intel_engine_cs *engine)
 {
-	unsigned long flags;
+	ktime_t total;
 
 	GEM_BUG_ON(!engine->stats.active);
-	if (engine->stats.active > 1) {
-		engine->stats.active--;
+	if (--engine->stats.active)
 		return;
-	}
 
-	local_irq_save(flags);
-	write_seqcount_begin(&engine->stats.lock);
+	total = ktime_sub(ktime_get(), engine->stats.start);
+	total = ktime_add(engine->stats.total, total);
 
-	engine->stats.active--;
-	engine->stats.total =
-		ktime_add(engine->stats.total,
-			  ktime_sub(ktime_get(), engine->stats.start));
-
-	write_seqcount_end(&engine->stats.lock);
-	local_irq_restore(flags);
+	WRITE_ONCE(engine->stats.start, 0);
+	smp_wmb(); /* pairs with intel_engine_get_busy_time() */
+	engine->stats.total = total;
 }
 
 #endif /* __INTEL_ENGINE_STATS_H__ */
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h
index d2346b425547..ed13012b9338 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
@@ -518,11 +518,6 @@ struct intel_engine_cs {
 		 */
 		unsigned int active;
 
-		/**
-		 * @lock: Lock protecting the below fields.
-		 */
-		seqcount_t lock;
-
 		/**
 		 * @total: Total time this engine was busy.
 		 *
-- 
2.20.1



More information about the Intel-gfx-trybot mailing list