[PATCH 42/42] drm/i915/gt: Reduce engine runtime stats from seqlock to a latch
Chris Wilson
chris at chris-wilson.co.uk
Thu Jan 28 14:55:28 UTC 2021
Since we can compute the elapsed time to add to the total, during the
PMU sample we only need to have a consistent view of the (start, total,
active) tuple to be able to locally determine the runtime. That can be
arrange by a pair of memory bariiers and carefully sequencing of the
writes and reads.
Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
Reviewed-by: Andi Shyti <andi.shyti at intel.com>
---
drivers/gpu/drm/i915/gt/intel_engine_cs.c | 37 +++++++------------
drivers/gpu/drm/i915/gt/intel_engine_stats.h | 39 +++++---------------
drivers/gpu/drm/i915/gt/intel_engine_types.h | 5 ---
3 files changed, 24 insertions(+), 57 deletions(-)
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
index e4ab914a6077..f56f9803ac46 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
@@ -322,7 +322,6 @@ static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id)
DRIVER_CAPS(i915)->has_logical_contexts = true;
ewma__engine_latency_init(&engine->latency);
- seqcount_init(&engine->stats.lock);
ATOMIC_INIT_NOTIFIER_HEAD(&engine->context_status_notifier);
@@ -1622,22 +1621,6 @@ void intel_engine_dump(struct intel_engine_cs *engine,
intel_engine_print_breadcrumbs(engine, m);
}
-static ktime_t __intel_engine_get_busy_time(struct intel_engine_cs *engine,
- ktime_t *now)
-{
- ktime_t total = engine->stats.total;
-
- /*
- * If the engine is executing something at the moment
- * add it to the total.
- */
- *now = ktime_get();
- if (READ_ONCE(engine->stats.active))
- total = ktime_add(total, ktime_sub(*now, engine->stats.start));
-
- return total;
-}
-
/**
* intel_engine_get_busy_time() - Return current accumulated engine busyness
* @engine: engine to report on
@@ -1647,15 +1630,23 @@ static ktime_t __intel_engine_get_busy_time(struct intel_engine_cs *engine,
*/
ktime_t intel_engine_get_busy_time(struct intel_engine_cs *engine, ktime_t *now)
{
- unsigned int seq;
ktime_t total;
+ ktime_t start;
- do {
- seq = read_seqcount_begin(&engine->stats.lock);
- total = __intel_engine_get_busy_time(engine, now);
- } while (read_seqcount_retry(&engine->stats.lock, seq));
+ /*
+ * If the engine is executing something at the moment
+ * add it to the total.
+ */
+ *now = ktime_get();
- return total;
+ total = engine->stats.total;
+ start = READ_ONCE(engine->stats.start);
+ if (start) {
+ smp_rmb(); /* pairs with intel_engine_context_in/out */
+ start = ktime_sub(*now, start);
+ }
+
+ return ktime_add(total, start);
}
struct i915_request *
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_stats.h b/drivers/gpu/drm/i915/gt/intel_engine_stats.h
index 24fbdd94351a..e94d23242093 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_stats.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine_stats.h
@@ -15,46 +15,27 @@
static inline void intel_engine_context_in(struct intel_engine_cs *engine)
{
- unsigned long flags;
-
- if (engine->stats.active) {
- engine->stats.active++;
+ if (engine->stats.active++)
return;
- }
- /* The writer is serialised; but the pmu reader may be from hardirq */
- local_irq_save(flags);
- write_seqcount_begin(&engine->stats.lock);
-
- engine->stats.start = ktime_get();
- engine->stats.active++;
-
- write_seqcount_end(&engine->stats.lock);
- local_irq_restore(flags);
-
- GEM_BUG_ON(!engine->stats.active);
+ smp_wmb(); /* pairs with intel_engine_get_busy_time() */
+ WRITE_ONCE(engine->stats.start, ktime_get());
}
static inline void intel_engine_context_out(struct intel_engine_cs *engine)
{
- unsigned long flags;
+ ktime_t total;
GEM_BUG_ON(!engine->stats.active);
- if (engine->stats.active > 1) {
- engine->stats.active--;
+ if (--engine->stats.active)
return;
- }
- local_irq_save(flags);
- write_seqcount_begin(&engine->stats.lock);
+ total = ktime_sub(ktime_get(), engine->stats.start);
+ total = ktime_add(engine->stats.total, total);
- engine->stats.active--;
- engine->stats.total =
- ktime_add(engine->stats.total,
- ktime_sub(ktime_get(), engine->stats.start));
-
- write_seqcount_end(&engine->stats.lock);
- local_irq_restore(flags);
+ WRITE_ONCE(engine->stats.start, 0);
+ smp_wmb(); /* pairs with intel_engine_get_busy_time() */
+ engine->stats.total = total;
}
#endif /* __INTEL_ENGINE_STATS_H__ */
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h
index 4174158c5870..711092cd61c7 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
@@ -473,11 +473,6 @@ struct intel_engine_cs {
*/
unsigned int active;
- /**
- * @lock: Lock protecting the below fields.
- */
- seqcount_t lock;
-
/**
* @total: Total time this engine was busy.
*
--
2.20.1
More information about the Intel-gfx-trybot
mailing list