[Intel-gfx] [RFC v2 2/3] drm/i915/pmu: serve global events and support perf stat

Dmitry Rogozhkin dmitry.v.rogozhkin at intel.com
Wed Aug 23 15:26:02 UTC 2017


This patch should probably be squashed with Tvrtko's PMU enabling
patch...

Making perf-stat workable with i915 PMU. The major point is that
current implementation of i915 PMU exposes global counter rather
thatn per-task counters. Thus, required changes are:
* Register PMU with .task_ctx_nr=perf_invalid_context
* Expose cpumask for the PMU with the single CPU in the mask
* Properly support pmu->stop(): it should call pmu->read()
* Properly support pmu->del(): it should call stop(event, PERF_EF_UPDATE)

Examples:
perf stat -e i915/rcs0-busy/ workload.sh
   This should error out with "failed to read counter" because the
requested counter can't be queried in per-task mode (which is the case).

perf stat -e i915/rcs0-busy/ -a workload.sh
perf stat -e i915/rcs0-busy/ -a -C0 workload.sh
   These 2 commands should give the same result with the correct counter
values. Counter value will be queried once in the end of the wrokload.
The example output would be:

 Performance counter stats for 'system wide':
       649,547,987      i915/rcs0-busy/

       1.895530680 seconds time elapsed

perf stat -e i915/rcs0-busy/ -a -I 100 workload.sh
   This will query counter perdiodically (each 100ms) and dump output:

     0.100108369          4,137,438      i915/rcs0-busy/
i915/rcs0-busy/: 37037414 100149071 100149071
     0.200249024         37,037,414      i915/rcs0-busy/
i915/rcs0-busy/: 36935429 100145077 100145077
     0.300391916         36,935,429      i915/rcs0-busy/
i915/rcs0-busy/: 34262017 100126136 100126136
     0.400518037         34,262,017      i915/rcs0-busy/
i915/rcs0-busy/: 34539960 100126217 100126217

v1: Make pmu.busy_stats a refcounter to avoid busy stats going away
    with some deleted event.

v2: Expose cpumask for i915 PMU to avoid multiple events creation of
    the same type followed by counter aggregation by perf-stat.

Change-Id: I7d1abe747a4399196e72253f7b66441a6528dbee
Signed-off-by: Dmitry Rogozhkin <dmitry.v.rogozhkin at intel.com>
Cc: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
Cc: Peter Zijlstra <peterz at infradead.org>
---
 drivers/gpu/drm/i915/i915_pmu.c         | 71 +++++++++++++++++++++++++--------
 drivers/gpu/drm/i915/intel_ringbuffer.h |  2 +-
 2 files changed, 55 insertions(+), 18 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c
index bcdf2bc..c551d64 100644
--- a/drivers/gpu/drm/i915/i915_pmu.c
+++ b/drivers/gpu/drm/i915/i915_pmu.c
@@ -15,6 +15,8 @@
 
 #define ENGINE_SAMPLE_BITS (16)
 
+static cpumask_t i915_pmu_cpumask = CPU_MASK_CPU0;
+
 static u8 engine_config_sample(u64 config)
 {
 	return config & I915_PMU_SAMPLE_MASK;
@@ -285,16 +287,24 @@ static int i915_pmu_event_init(struct perf_event *event)
 {
 	struct drm_i915_private *i915 =
 		container_of(event->pmu, typeof(*i915), pmu.base);
+	int cpu;
 	int ret;
 
-	/* XXX ideally only want pid == -1 && cpu == -1 */
-
 	if (event->attr.type != event->pmu->type)
 		return -ENOENT;
 
 	if (has_branch_stack(event))
 		return -EOPNOTSUPP;
 
+	if (event->cpu < 0)
+		return -EINVAL;
+
+	cpu = cpumask_any_and(&i915_pmu_cpumask,
+		topology_sibling_cpumask(event->cpu));
+
+	if (cpu >= nr_cpu_ids)
+		return -ENODEV;
+
 	ret = 0;
 	if (is_engine_event(event)) {
 		ret = engine_event_init(event);
@@ -314,6 +324,7 @@ static int i915_pmu_event_init(struct perf_event *event)
 	if (ret)
 		return ret;
 
+	event->cpu = cpu;
 	if (!event->parent)
 		event->destroy = i915_pmu_event_destroy;
 
@@ -469,11 +480,16 @@ static void i915_pmu_enable(struct perf_event *event)
 				       ns_to_ktime(PERIOD), 0,
 				       HRTIMER_MODE_REL_PINNED);
 		i915->pmu.timer_enabled = true;
-	} else if (is_engine_event(event) && engine_needs_busy_stats(engine) &&
-		   !engine->pmu.busy_stats) {
-		engine->pmu.busy_stats = true;
-		if (!cancel_delayed_work(&engine->pmu.disable_busy_stats))
-			queue_work(i915->wq, &engine->pmu.enable_busy_stats);
+	} else if (is_engine_event(event) && engine_needs_busy_stats(engine)) {
+		/* Enable busy stats for the first event demanding it, next
+		 * events just reference counter. So, if some events will go
+		 * away, we will still have busy stats enabled till remaining
+		 * events still use them.
+		 */
+		if (engine->pmu.busy_stats++ == 0) {
+			if (!cancel_delayed_work(&engine->pmu.disable_busy_stats))
+				queue_work(i915->wq, &engine->pmu.enable_busy_stats);
+		}
 	}
 
 	spin_unlock_irqrestore(&i915->pmu.lock, flags);
@@ -495,16 +511,16 @@ static void i915_pmu_disable(struct perf_event *event)
 		enum intel_engine_id id;
 
 		engine = intel_engine_lookup_user(i915,
-						  engine_event_class(event),
-						  engine_event_instance(event));
+						engine_event_class(event),
+						engine_event_instance(event));
 		GEM_BUG_ON(!engine);
 		engine->pmu.enable &= ~BIT(engine_event_sample(event));
-		if (engine->pmu.busy_stats &&
-		    !engine_needs_busy_stats(engine)) {
-			engine->pmu.busy_stats = false;
-			queue_delayed_work(i915->wq,
-					   &engine->pmu.disable_busy_stats,
-					   round_jiffies_up_relative(2 * HZ));
+		if (!engine_needs_busy_stats(engine)) {
+			if (engine->pmu.busy_stats && --engine->pmu.busy_stats == 0) {
+				queue_delayed_work(i915->wq,
+						&engine->pmu.disable_busy_stats,
+						round_jiffies_up_relative(2 * HZ));
+			}
 		}
 		mask = 0;
 		for_each_engine(engine, i915, id)
@@ -529,6 +545,8 @@ static void i915_pmu_event_start(struct perf_event *event, int flags)
 
 static void i915_pmu_event_stop(struct perf_event *event, int flags)
 {
+	if (flags & PERF_EF_UPDATE)
+		i915_pmu_event_read(event);
 	i915_pmu_disable(event);
 }
 
@@ -546,7 +564,7 @@ static int i915_pmu_event_add(struct perf_event *event, int flags)
 
 static void i915_pmu_event_del(struct perf_event *event, int flags)
 {
-	i915_pmu_disable(event);
+	i915_pmu_event_stop(event, PERF_EF_UPDATE);
 }
 
 static int i915_pmu_event_event_idx(struct perf_event *event)
@@ -656,9 +674,28 @@ static ssize_t i915_pmu_event_show(struct device *dev,
         .attrs = i915_pmu_events_attrs,
 };
 
+static ssize_t i915_pmu_get_attr_cpumask(struct device *dev,
+				       struct device_attribute *attr,
+				       char *buf)
+{
+	return cpumap_print_to_pagebuf(true, buf, &i915_pmu_cpumask);
+}
+
+static DEVICE_ATTR(cpumask, S_IRUGO, i915_pmu_get_attr_cpumask, NULL);
+
+static struct attribute *i915_cpumask_attrs[] = {
+	&dev_attr_cpumask.attr,
+	NULL,
+};
+
+static struct attribute_group i915_pmu_cpumask_attr_group = {
+	.attrs = i915_cpumask_attrs,
+};
+
 static const struct attribute_group *i915_pmu_attr_groups[] = {
         &i915_pmu_format_attr_group,
         &i915_pmu_events_attr_group,
+        &i915_pmu_cpumask_attr_group,
         NULL
 };
 
@@ -687,7 +724,7 @@ void i915_pmu_register(struct drm_i915_private *i915)
 		return;
 
 	i915->pmu.base.attr_groups	= i915_pmu_attr_groups;
-	i915->pmu.base.task_ctx_nr	= perf_sw_context;
+	i915->pmu.base.task_ctx_nr	= perf_invalid_context;
 	i915->pmu.base.event_init	= i915_pmu_event_init;
 	i915->pmu.base.add		= i915_pmu_event_add;
 	i915->pmu.base.del		= i915_pmu_event_del;
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index fd5d838..0530e4e 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -248,7 +248,7 @@ struct intel_engine_cs {
 	struct {
 		u32 enable;
 		u64 sample[4];
-		bool busy_stats;
+		unsigned int busy_stats;
 		struct work_struct enable_busy_stats;
 		struct delayed_work disable_busy_stats;
 	} pmu;
-- 
1.8.3.1



More information about the Intel-gfx mailing list