[Intel-gfx] [RFC 2/2] drm/i915/pmu: serve global events and support perf stat

Dmitry Rogozhkin dmitry.v.rogozhkin at intel.com
Tue Aug 15 17:56:45 UTC 2017


This patch should probably be squashed with Tvrtko's PMU enabling
patch...

i915 events don't have a CPU context to work with, so i915
PMU should work in global mode, i.e. expose perf_invalid_context.
This will make the following call to perf:
   perf stat workload.sh
to error out with "failed to read counter". Correct usage would be:
   perf stat -a -C0 workload.sh
And we will get expected output:

Another change required to make perf stat happy is properly support
pmu->del(): comments in del() declaration says that del() is required
to call stop(event, PERF_EF_UPDATE) and perf stat implicitly gets
event count thru this.

Finally, if perf stat will be ran as follows:
   perf stat -a workload.sh
i.e. without '-C0' option, then event will be initilized as many times
as there are CPUs. Thus, patch adds PMU refcounting support to avoid
multiple init/close of internal things. The issue which I did not
overcome is that in this case counters will be multiplied on number
of CPUs. Not sure whether it is possible to have some event enabling
CPU mask or rather I need to simply error out.

Change-Id: I7d1abe747a4399196e72253f7b66441a6528dbee
Signed-off-by: Dmitry Rogozhkin <dmitry.v.rogozhkin at intel.com>
Cc: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
Cc: Peter Zijlstra <peterz at infradead.org>
---
 drivers/gpu/drm/i915/i915_drv.h |   1 +
 drivers/gpu/drm/i915/i915_pmu.c | 106 ++++++++++++++++++++++------------------
 2 files changed, 60 insertions(+), 47 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index b59da2c..215d47b 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2663,6 +2663,7 @@ struct drm_i915_private {
 	struct {
 		struct pmu base;
 		spinlock_t lock;
+		unsigned int ref;
 		struct hrtimer timer;
 		bool timer_enabled;
 		u64 enable;
diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c
index bcdf2bc..0972203 100644
--- a/drivers/gpu/drm/i915/i915_pmu.c
+++ b/drivers/gpu/drm/i915/i915_pmu.c
@@ -451,34 +451,39 @@ static void i915_pmu_enable(struct perf_event *event)
 		container_of(event->pmu, typeof(*i915), pmu.base);
 	struct intel_engine_cs *engine = NULL;
 	unsigned long flags;
+	bool start_timer = false;
 
 	spin_lock_irqsave(&i915->pmu.lock, flags);
 
-	if (is_engine_event(event)) {
-		engine = intel_engine_lookup_user(i915,
-						  engine_event_class(event),
-						  engine_event_instance(event));
-		GEM_BUG_ON(!engine);
-		engine->pmu.enable |= BIT(engine_event_sample(event));
-	}
-
-	i915->pmu.enable |= event_enabled_mask(event);
+	if (i915->pmu.ref++ == 0) {
+		if (is_engine_event(event)) {
+			engine = intel_engine_lookup_user(i915,
+							engine_event_class(event),
+							engine_event_instance(event));
+			GEM_BUG_ON(!engine);
+			engine->pmu.enable |= BIT(engine_event_sample(event));
+		}
 
-	if (pmu_needs_timer(i915, true) && !i915->pmu.timer_enabled) {
-		hrtimer_start_range_ns(&i915->pmu.timer,
-				       ns_to_ktime(PERIOD), 0,
-				       HRTIMER_MODE_REL_PINNED);
-		i915->pmu.timer_enabled = true;
-	} else if (is_engine_event(event) && engine_needs_busy_stats(engine) &&
-		   !engine->pmu.busy_stats) {
-		engine->pmu.busy_stats = true;
-		if (!cancel_delayed_work(&engine->pmu.disable_busy_stats))
-			queue_work(i915->wq, &engine->pmu.enable_busy_stats);
+		i915->pmu.enable |= event_enabled_mask(event);
+
+		if (pmu_needs_timer(i915, true) && !i915->pmu.timer_enabled) {
+			hrtimer_start_range_ns(&i915->pmu.timer,
+						ns_to_ktime(PERIOD), 0,
+						HRTIMER_MODE_REL_PINNED);
+			i915->pmu.timer_enabled = true;
+		} else if (is_engine_event(event) && engine_needs_busy_stats(engine) &&
+			!engine->pmu.busy_stats) {
+			engine->pmu.busy_stats = true;
+			if (!cancel_delayed_work(&engine->pmu.disable_busy_stats))
+				queue_work(i915->wq, &engine->pmu.enable_busy_stats);
+		}
+		start_timer = true;
 	}
 
 	spin_unlock_irqrestore(&i915->pmu.lock, flags);
 
-	i915_pmu_timer_start(event);
+	if (start_timer)
+		i915_pmu_timer_start(event);
 }
 
 static void i915_pmu_disable(struct perf_event *event)
@@ -486,40 +491,45 @@ static void i915_pmu_disable(struct perf_event *event)
 	struct drm_i915_private *i915 =
 		container_of(event->pmu, typeof(*i915), pmu.base);
 	unsigned long flags;
+	bool timer_cancel = true;
 	u64 mask;
 
 	spin_lock_irqsave(&i915->pmu.lock, flags);
 
-	if (is_engine_event(event)) {
-		struct intel_engine_cs *engine;
-		enum intel_engine_id id;
-
-		engine = intel_engine_lookup_user(i915,
-						  engine_event_class(event),
-						  engine_event_instance(event));
-		GEM_BUG_ON(!engine);
-		engine->pmu.enable &= ~BIT(engine_event_sample(event));
-		if (engine->pmu.busy_stats &&
-		    !engine_needs_busy_stats(engine)) {
-			engine->pmu.busy_stats = false;
-			queue_delayed_work(i915->wq,
-					   &engine->pmu.disable_busy_stats,
-					   round_jiffies_up_relative(2 * HZ));
+	if (i915->pmu.ref && --i915->pmu.ref == 0) {
+		if (is_engine_event(event)) {
+			struct intel_engine_cs *engine;
+			enum intel_engine_id id;
+
+			engine = intel_engine_lookup_user(i915,
+							engine_event_class(event),
+							engine_event_instance(event));
+			GEM_BUG_ON(!engine);
+			engine->pmu.enable &= ~BIT(engine_event_sample(event));
+			if (engine->pmu.busy_stats &&
+				!engine_needs_busy_stats(engine)) {
+				engine->pmu.busy_stats = false;
+				queue_delayed_work(i915->wq,
+						&engine->pmu.disable_busy_stats,
+						round_jiffies_up_relative(2 * HZ));
+			}
+			mask = 0;
+			for_each_engine(engine, i915, id)
+				mask |= engine->pmu.enable;
+			mask = (~mask) & ENGINE_SAMPLE_MASK;
+		} else {
+			mask = event_enabled_mask(event);
 		}
-		mask = 0;
-		for_each_engine(engine, i915, id)
-			mask |= engine->pmu.enable;
-		mask = (~mask) & ENGINE_SAMPLE_MASK;
-	} else {
-		mask = event_enabled_mask(event);
-	}
 
-	i915->pmu.enable &= ~mask;
-	i915->pmu.timer_enabled &= pmu_needs_timer(i915, true);
+		i915->pmu.enable &= ~mask;
+		i915->pmu.timer_enabled &= pmu_needs_timer(i915, true);
+		timer_cancel = true;
+	}
 
 	spin_unlock_irqrestore(&i915->pmu.lock, flags);
 
-	i915_pmu_timer_cancel(event);
+	if (timer_cancel)
+		i915_pmu_timer_cancel(event);
 }
 
 static void i915_pmu_event_start(struct perf_event *event, int flags)
@@ -529,6 +539,8 @@ static void i915_pmu_event_start(struct perf_event *event, int flags)
 
 static void i915_pmu_event_stop(struct perf_event *event, int flags)
 {
+	if (flags & PERF_EF_UPDATE)
+		i915_pmu_event_read(event);
 	i915_pmu_disable(event);
 }
 
@@ -546,7 +558,7 @@ static int i915_pmu_event_add(struct perf_event *event, int flags)
 
 static void i915_pmu_event_del(struct perf_event *event, int flags)
 {
-	i915_pmu_disable(event);
+	i915_pmu_event_stop(event, PERF_EF_UPDATE);
 }
 
 static int i915_pmu_event_event_idx(struct perf_event *event)
@@ -687,7 +699,7 @@ void i915_pmu_register(struct drm_i915_private *i915)
 		return;
 
 	i915->pmu.base.attr_groups	= i915_pmu_attr_groups;
-	i915->pmu.base.task_ctx_nr	= perf_sw_context;
+	i915->pmu.base.task_ctx_nr	= perf_invalid_context;
 	i915->pmu.base.event_init	= i915_pmu_event_init;
 	i915->pmu.base.add		= i915_pmu_event_add;
 	i915->pmu.base.del		= i915_pmu_event_del;
-- 
1.8.3.1



More information about the Intel-gfx mailing list