[Intel-gfx] [RFC v1 2/2] drm/i915/pmu: serve global events and support perf stat

Dmitry Rogozhkin dmitry.v.rogozhkin at intel.com
Thu Aug 17 16:14:52 UTC 2017


This patch should probably be squashed with Tvrtko's PMU enabling
patch...

i915 events don't have a CPU context to work with, so i915
PMU should work in global mode, i.e. expose perf_invalid_context.
This will make the following call to perf:
   perf stat -e i915/rcs0-busy/ workload.sh
to error out with "failed to read counter". Correct usage would be:
   perf stat -e i915/rcs0-busy/ -a -C0 workload.sh
And we will get expected output:

Another change required to make perf-stat happy is properly support
pmu->del(): comments in del() declaration says that del() is required
to call stop(event, PERF_EF_UPDATE) and perf stat implicitly gets
event count thru this.

Finally, if perf stat will be ran as follows:
   perf stat -e i915/rcs0-busy/ -a workload.sh
i.e. without '-C0' option, then event will be initilized as many times
as there are CPUs. Thus, patch adds PMU refcounting support to avoid
multiple init/close of internal things. The issue which I did not
overcome is that in this case counters will be multiplied on number
of CPUs. Not sure whether it is possible to have some event enabling
CPU mask or rather I need to simply error out.

v1: Make pmu.busy_stats a refcounter to avoid busy stats going away
    with some deleted event.

Change-Id: I7d1abe747a4399196e72253f7b66441a6528dbee
Signed-off-by: Dmitry Rogozhkin <dmitry.v.rogozhkin at intel.com>
Cc: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
Cc: Peter Zijlstra <peterz at infradead.org>
---
 drivers/gpu/drm/i915/i915_pmu.c         | 37 ++++++++++++++++++++-------------
 drivers/gpu/drm/i915/intel_ringbuffer.h |  2 +-
 2 files changed, 23 insertions(+), 16 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c
index bcdf2bc..fc29c75 100644
--- a/drivers/gpu/drm/i915/i915_pmu.c
+++ b/drivers/gpu/drm/i915/i915_pmu.c
@@ -469,11 +469,16 @@ static void i915_pmu_enable(struct perf_event *event)
 				       ns_to_ktime(PERIOD), 0,
 				       HRTIMER_MODE_REL_PINNED);
 		i915->pmu.timer_enabled = true;
-	} else if (is_engine_event(event) && engine_needs_busy_stats(engine) &&
-		   !engine->pmu.busy_stats) {
-		engine->pmu.busy_stats = true;
-		if (!cancel_delayed_work(&engine->pmu.disable_busy_stats))
-			queue_work(i915->wq, &engine->pmu.enable_busy_stats);
+	} else if (is_engine_event(event) && engine_needs_busy_stats(engine)) {
+		/* Enable busy stats for the first event demanding it, next
+		 * events just reference counter. So, if some events will go
+		 * away, we will still have busy stats enabled till remaining
+		 * events still use them.
+		 */
+		if (engine->pmu.busy_stats++ == 0) {
+			if (!cancel_delayed_work(&engine->pmu.disable_busy_stats))
+				queue_work(i915->wq, &engine->pmu.enable_busy_stats);
+		}
 	}
 
 	spin_unlock_irqrestore(&i915->pmu.lock, flags);
@@ -495,16 +500,16 @@ static void i915_pmu_disable(struct perf_event *event)
 		enum intel_engine_id id;
 
 		engine = intel_engine_lookup_user(i915,
-						  engine_event_class(event),
-						  engine_event_instance(event));
+						engine_event_class(event),
+						engine_event_instance(event));
 		GEM_BUG_ON(!engine);
 		engine->pmu.enable &= ~BIT(engine_event_sample(event));
-		if (engine->pmu.busy_stats &&
-		    !engine_needs_busy_stats(engine)) {
-			engine->pmu.busy_stats = false;
-			queue_delayed_work(i915->wq,
-					   &engine->pmu.disable_busy_stats,
-					   round_jiffies_up_relative(2 * HZ));
+		if (!engine_needs_busy_stats(engine)) {
+			if (engine->pmu.busy_stats && --engine->pmu.busy_stats == 0) {
+				queue_delayed_work(i915->wq,
+						&engine->pmu.disable_busy_stats,
+						round_jiffies_up_relative(2 * HZ));
+			}
 		}
 		mask = 0;
 		for_each_engine(engine, i915, id)
@@ -529,6 +534,8 @@ static void i915_pmu_event_start(struct perf_event *event, int flags)
 
 static void i915_pmu_event_stop(struct perf_event *event, int flags)
 {
+	if (flags & PERF_EF_UPDATE)
+		i915_pmu_event_read(event);
 	i915_pmu_disable(event);
 }
 
@@ -546,7 +553,7 @@ static int i915_pmu_event_add(struct perf_event *event, int flags)
 
 static void i915_pmu_event_del(struct perf_event *event, int flags)
 {
-	i915_pmu_disable(event);
+	i915_pmu_event_stop(event, PERF_EF_UPDATE);
 }
 
 static int i915_pmu_event_event_idx(struct perf_event *event)
@@ -687,7 +694,7 @@ void i915_pmu_register(struct drm_i915_private *i915)
 		return;
 
 	i915->pmu.base.attr_groups	= i915_pmu_attr_groups;
-	i915->pmu.base.task_ctx_nr	= perf_sw_context;
+	i915->pmu.base.task_ctx_nr	= perf_invalid_context;
 	i915->pmu.base.event_init	= i915_pmu_event_init;
 	i915->pmu.base.add		= i915_pmu_event_add;
 	i915->pmu.base.del		= i915_pmu_event_del;
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index fd5d838..0530e4e 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -248,7 +248,7 @@ struct intel_engine_cs {
 	struct {
 		u32 enable;
 		u64 sample[4];
-		bool busy_stats;
+		unsigned int busy_stats;
 		struct work_struct enable_busy_stats;
 		struct delayed_work disable_busy_stats;
 	} pmu;
-- 
1.8.3.1



More information about the Intel-gfx mailing list