[Intel-gfx] [PATCH 8/8] drm/i915: Gate engine stats collection with a static key

Tvrtko Ursulin tursulin at ursulin.net
Mon Sep 18 11:38:14 UTC 2017


From: Tvrtko Ursulin <tvrtko.ursulin at intel.com>

This reduces the cost of the software engine busyness tracking
to a single no-op instruction when there are no listeners.

v2: Rebase and some comments.
v3: Rebase.
v4: Checkpatch fixes.
v5: Rebase.
v6: Use system_long_wq to avoid being blocked by struct_mutex
    users.

Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
---
 drivers/gpu/drm/i915/i915_pmu.c         |  54 +++++++++++++++--
 drivers/gpu/drm/i915/intel_engine_cs.c  |  17 ++++++
 drivers/gpu/drm/i915/intel_ringbuffer.h | 101 ++++++++++++++++++++------------
 3 files changed, 130 insertions(+), 42 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c
index ffba21eeb5d0..6d22172b8fb0 100644
--- a/drivers/gpu/drm/i915/i915_pmu.c
+++ b/drivers/gpu/drm/i915/i915_pmu.c
@@ -499,11 +499,17 @@ static void i915_pmu_enable(struct perf_event *event)
 		GEM_BUG_ON(sample >= I915_PMU_SAMPLE_BITS);
 		GEM_BUG_ON(engine->pmu.enable_count[sample] == ~0);
 		if (engine->pmu.enable_count[sample]++ == 0) {
+			/*
+			 * Enable engine busy stats tracking if needed or
+			 * alternatively cancel the scheduled disabling of the
+			 * same.
+			 */
 			if (engine_needs_busy_stats(engine) &&
 			    !engine->pmu.busy_stats) {
-				engine->pmu.busy_stats =
-					intel_enable_engine_stats(engine) == 0;
-				WARN_ON_ONCE(!engine->pmu.busy_stats);
+				engine->pmu.busy_stats = true;
+				if (!cancel_delayed_work(&engine->pmu.disable_busy_stats))
+					queue_work(system_long_wq,
+						   &engine->pmu.enable_busy_stats);
 			}
 		}
 	}
@@ -546,7 +552,15 @@ static void i915_pmu_disable(struct perf_event *event)
 			if (!engine_needs_busy_stats(engine) &&
 			    engine->pmu.busy_stats) {
 				engine->pmu.busy_stats = false;
-				intel_disable_engine_stats(engine);
+				/*
+				 * We request a delayed disable to handle the
+				 * rapid on/off cycles on events which can
+				 * happen when tools like perf stat start in a
+				 * nicer way.
+				 */
+				queue_delayed_work(system_long_wq,
+						   &engine->pmu.disable_busy_stats,
+						   round_jiffies_up_relative(HZ));
 			}
 		}
 	}
@@ -737,9 +751,27 @@ static int i915_pmu_cpu_offline(unsigned int cpu, struct hlist_node *node)
 	return 0;
 }
 
+static void __enable_busy_stats(struct work_struct *work)
+{
+	struct intel_engine_cs *engine =
+		container_of(work, typeof(*engine), pmu.enable_busy_stats);
+
+	WARN_ON_ONCE(intel_enable_engine_stats(engine));
+}
+
+static void __disable_busy_stats(struct work_struct *work)
+{
+	struct intel_engine_cs *engine =
+	       container_of(work, typeof(*engine), pmu.disable_busy_stats.work);
+
+	intel_disable_engine_stats(engine);
+}
+
 void i915_pmu_register(struct drm_i915_private *i915)
 {
 	int ret;
+	struct intel_engine_cs *engine;
+	enum intel_engine_id id;
 
 	if (INTEL_GEN(i915) <= 2) {
 		DRM_INFO("PMU not supported for this GPU.");
@@ -773,6 +805,12 @@ void i915_pmu_register(struct drm_i915_private *i915)
 	i915->pmu.timer.function = i915_sample;
 	i915->pmu.enable = 0;
 
+	for_each_engine(engine, i915, id) {
+		INIT_WORK(&engine->pmu.enable_busy_stats, __enable_busy_stats);
+		INIT_DELAYED_WORK(&engine->pmu.disable_busy_stats,
+				  __disable_busy_stats);
+	}
+
 	ret = perf_pmu_register(&i915->pmu.base, "i915", -1);
 	if (ret == 0)
 		return;
@@ -791,6 +829,9 @@ void i915_pmu_register(struct drm_i915_private *i915)
 
 void i915_pmu_unregister(struct drm_i915_private *i915)
 {
+	struct intel_engine_cs *engine;
+	enum intel_engine_id id;
+
 	if (!i915->pmu.base.event_init)
 		return;
 
@@ -802,6 +843,11 @@ void i915_pmu_unregister(struct drm_i915_private *i915)
 
 	hrtimer_cancel(&i915->pmu.timer);
 
+	for_each_engine(engine, i915, id) {
+		flush_work(&engine->pmu.enable_busy_stats);
+		flush_delayed_work(&engine->pmu.disable_busy_stats);
+	}
+
 	perf_pmu_unregister(&i915->pmu.base);
 	i915->pmu.base.event_init = NULL;
 }
diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
index cbf978d39052..490b5254f2c0 100644
--- a/drivers/gpu/drm/i915/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/intel_engine_cs.c
@@ -21,6 +21,7 @@
  * IN THE SOFTWARE.
  *
  */
+#include <linux/static_key.h>
 
 #include "i915_drv.h"
 #include "intel_ringbuffer.h"
@@ -1558,6 +1559,10 @@ bool intel_engine_can_store_dword(struct intel_engine_cs *engine)
 	}
 }
 
+DEFINE_STATIC_KEY_FALSE(i915_engine_stats_key);
+static DEFINE_MUTEX(i915_engine_stats_mutex);
+static int i915_engine_stats_ref;
+
 /**
  * intel_enable_engine_stats() - Enable engine busy tracking on engine
  * @engine: engine to enable stats collection
@@ -1573,6 +1578,8 @@ int intel_enable_engine_stats(struct intel_engine_cs *engine)
 	if (!i915.enable_execlists)
 		return -ENODEV;
 
+	mutex_lock(&i915_engine_stats_mutex);
+
 	spin_lock_irqsave(&engine->stats.lock, flags);
 	if (engine->stats.enabled == ~0)
 		goto busy;
@@ -1580,10 +1587,16 @@ int intel_enable_engine_stats(struct intel_engine_cs *engine)
 		engine->stats.enabled_at = ktime_get();
 	spin_unlock_irqrestore(&engine->stats.lock, flags);
 
+	if (i915_engine_stats_ref++ == 0)
+		static_branch_enable(&i915_engine_stats_key);
+
+	mutex_unlock(&i915_engine_stats_mutex);
+
 	return 0;
 
 busy:
 	spin_unlock_irqrestore(&engine->stats.lock, flags);
+	mutex_unlock(&i915_engine_stats_mutex);
 
 	return -EBUSY;
 }
@@ -1601,6 +1614,7 @@ void intel_disable_engine_stats(struct intel_engine_cs *engine)
 	if (!i915.enable_execlists)
 		return;
 
+	mutex_lock(&i915_engine_stats_mutex);
 	spin_lock_irqsave(&engine->stats.lock, flags);
 	WARN_ON_ONCE(engine->stats.enabled == 0);
 	if (--engine->stats.enabled == 0) {
@@ -1610,6 +1624,9 @@ void intel_disable_engine_stats(struct intel_engine_cs *engine)
 		engine->stats.total = 0;
 	}
 	spin_unlock_irqrestore(&engine->stats.lock, flags);
+	if (--i915_engine_stats_ref == 0)
+		static_branch_disable(&i915_engine_stats_key);
+	mutex_unlock(&i915_engine_stats_mutex);
 }
 
 /**
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index cb58ce34ab13..2c3ddf862bde 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -270,6 +270,22 @@ struct intel_engine_cs {
 		 * 		requested.
 		 */
 		bool busy_stats;
+		/**
+		 * @enable_busy_stats: Work item for engine busy stats enabling.
+		 *
+		 * Since the action can sleep it needs to be decoupled from the
+		 * perf API callback.
+		 */
+		struct work_struct enable_busy_stats;
+		/**
+		 * @disable_busy_stats: Work item for busy stats disabling.
+		 *
+		 * Same as with @enable_busy_stats action, with the difference
+		 * that we delay it in case there are rapid enable-disable
+		 * actions, which can happen during tool startup (like perf
+		 * stat).
+		 */
+		struct delayed_work disable_busy_stats;
 	} pmu;
 
 	/*
@@ -805,59 +821,68 @@ bool intel_engine_can_store_dword(struct intel_engine_cs *engine);
 struct intel_engine_cs *
 intel_engine_lookup_user(struct drm_i915_private *i915, u8 class, u8 instance);
 
+DECLARE_STATIC_KEY_FALSE(i915_engine_stats_key);
+
 static inline void intel_engine_context_in(struct intel_engine_cs *engine)
 {
 	unsigned long flags;
 
-	if (READ_ONCE(engine->stats.enabled) == 0)
-		return;
+	if (static_branch_unlikely(&i915_engine_stats_key)) {
+		if (READ_ONCE(engine->stats.enabled) == 0)
+			return;
 
-	spin_lock_irqsave(&engine->stats.lock, flags);
+		spin_lock_irqsave(&engine->stats.lock, flags);
 
-	if (engine->stats.enabled > 0) {
-		if (engine->stats.active++ == 0)
-			engine->stats.start = ktime_get();
-		GEM_BUG_ON(engine->stats.active == 0);
-	}
+			if (engine->stats.enabled > 0) {
+				if (engine->stats.active++ == 0)
+					engine->stats.start = ktime_get();
+				GEM_BUG_ON(engine->stats.active == 0);
+			}
 
-	spin_unlock_irqrestore(&engine->stats.lock, flags);
+		spin_unlock_irqrestore(&engine->stats.lock, flags);
+	}
 }
 
 static inline void intel_engine_context_out(struct intel_engine_cs *engine)
 {
 	unsigned long flags;
 
-	if (READ_ONCE(engine->stats.enabled) == 0)
-		return;
-
-	spin_lock_irqsave(&engine->stats.lock, flags);
-
-	if (engine->stats.enabled > 0) {
-		ktime_t last, now = ktime_get();
-
-		if (engine->stats.active && --engine->stats.active == 0) {
-			/*
-			 * Decrement the active context count and in case GPU
-			 * is now idle add up to the running total.
-			 */
-			last = ktime_sub(now, engine->stats.start);
-
-			engine->stats.total = ktime_add(engine->stats.total,
-							last);
-		} else if (engine->stats.active == 0) {
-			/*
-			 * After turning on engine stats, context out might be
-			 * the first event in which case we account from the
-			 * time stats gathering was turned on.
-			 */
-			last = ktime_sub(now, engine->stats.enabled_at);
-
-			engine->stats.total = ktime_add(engine->stats.total,
-							last);
+	if (static_branch_unlikely(&i915_engine_stats_key)) {
+		if (READ_ONCE(engine->stats.enabled) == 0)
+			return;
+
+		spin_lock_irqsave(&engine->stats.lock, flags);
+
+		if (engine->stats.enabled > 0) {
+			ktime_t last, now = ktime_get();
+
+			if (engine->stats.active &&
+			    --engine->stats.active == 0) {
+				/*
+				 * Decrement the active context count and in
+				 * case GPU is now idle add up to the running
+				 * total.
+				 */
+				last = ktime_sub(now, engine->stats.start);
+
+				engine->stats.total =
+					ktime_add(engine->stats.total, last);
+			} else if (engine->stats.active == 0) {
+				/*
+				 * After turning on engine stats, context out
+				 * might be the first event in which case we
+				 * account from the time stats gathering was
+				 * turned on.
+				 */
+				last = ktime_sub(now, engine->stats.enabled_at);
+
+				engine->stats.total =
+					ktime_add(engine->stats.total, last);
+			}
+
+			spin_unlock_irqrestore(&engine->stats.lock, flags);
 		}
 	}
-
-	spin_unlock_irqrestore(&engine->stats.lock, flags);
 }
 
 int intel_enable_engine_stats(struct intel_engine_cs *engine);
-- 
2.9.5



More information about the Intel-gfx mailing list