[Intel-gfx] [PATCH i-g-t 5/9] intel-gpu-overlay: Catch-up to new i915 PMU

Tue Oct 10 09:30:04 UTC 2017

From: Tvrtko Ursulin <tvrtko.ursulin at intel.com>

v2: Update for i915 changes.
v3: Use 1eN for large numbers. (Chris Wilson)

Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
Reviewed-by: Chris Wilson <chris at chris-wilson.co.uk>
---
 lib/igt_perf.h           | 89 +++++++++++++++++++++++++++++++++---------------
 overlay/gem-interrupts.c |  2 +-
 overlay/gpu-freq.c       |  8 ++---
 overlay/gpu-top.c        | 68 ++++++++++++++++++++----------------
 overlay/power.c          |  4 +--
 overlay/rc6.c            | 20 +++++------
 6 files changed, 116 insertions(+), 75 deletions(-)

diff --git a/lib/igt_perf.h b/lib/igt_perf.h
index cc10cb300aaf..285823786324 100644
--- a/lib/igt_perf.h
+++ b/lib/igt_perf.h
@@ -1,3 +1,27 @@
+/*
+ * Copyright © 2017 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ */
+
 #ifndef I915_PERF_H
 #define I915_PERF_H
 
@@ -5,41 +29,52 @@
 
 #include <linux/perf_event.h>
 
-#define I915_SAMPLE_BUSY	0
-#define I915_SAMPLE_WAIT	1
-#define I915_SAMPLE_SEMA	2
+enum drm_i915_gem_engine_class {
+	I915_ENGINE_CLASS_OTHER = 0,
+	I915_ENGINE_CLASS_RENDER = 1,
+	I915_ENGINE_CLASS_COPY = 2,
+	I915_ENGINE_CLASS_VIDEO = 3,
+	I915_ENGINE_CLASS_VIDEO_ENHANCE = 4,
+	I915_ENGINE_CLASS_MAX /* non-ABI */
+};
+
+enum drm_i915_pmu_engine_sample {
+	I915_SAMPLE_BUSY = 0,
+	I915_SAMPLE_WAIT = 1,
+	I915_SAMPLE_SEMA = 2,
+	I915_ENGINE_SAMPLE_MAX /* non-ABI */
+};
 
-#define I915_SAMPLE_RCS		0
-#define I915_SAMPLE_VCS		1
-#define I915_SAMPLE_BCS		2
-#define I915_SAMPLE_VECS	3
+#define I915_PMU_SAMPLE_BITS (4)
+#define I915_PMU_SAMPLE_MASK (0xf)
+#define I915_PMU_SAMPLE_INSTANCE_BITS (8)
+#define I915_PMU_CLASS_SHIFT \
+	(I915_PMU_SAMPLE_BITS + I915_PMU_SAMPLE_INSTANCE_BITS)
 
-#define __I915_PERF_COUNT(ring, id) ((ring) << 4 | (id))
+#define __I915_PMU_ENGINE(class, instance, sample) \
+	((class) << I915_PMU_CLASS_SHIFT | \
+	(instance) << I915_PMU_SAMPLE_BITS | \
+	(sample))
 
-#define I915_PERF_COUNT_RCS_BUSY __I915_PERF_COUNT(I915_SAMPLE_RCS, I915_SAMPLE_BUSY)
-#define I915_PERF_COUNT_RCS_WAIT __I915_PERF_COUNT(I915_SAMPLE_RCS, I915_SAMPLE_WAIT)
-#define I915_PERF_COUNT_RCS_SEMA __I915_PERF_COUNT(I915_SAMPLE_RCS, I915_SAMPLE_SEMA)
+#define I915_PMU_ENGINE_BUSY(class, instance) \
+	__I915_PMU_ENGINE(class, instance, I915_SAMPLE_BUSY)
 
-#define I915_PERF_COUNT_VCS_BUSY __I915_PERF_COUNT(I915_SAMPLE_VCS, I915_SAMPLE_BUSY)
-#define I915_PERF_COUNT_VCS_WAIT __I915_PERF_COUNT(I915_SAMPLE_VCS, I915_SAMPLE_WAIT)
-#define I915_PERF_COUNT_VCS_SEMA __I915_PERF_COUNT(I915_SAMPLE_VCS, I915_SAMPLE_SEMA)
+#define I915_PMU_ENGINE_WAIT(class, instance) \
+	__I915_PMU_ENGINE(class, instance, I915_SAMPLE_WAIT)
 
-#define I915_PERF_COUNT_BCS_BUSY __I915_PERF_COUNT(I915_SAMPLE_BCS, I915_SAMPLE_BUSY)
-#define I915_PERF_COUNT_BCS_WAIT __I915_PERF_COUNT(I915_SAMPLE_BCS, I915_SAMPLE_WAIT)
-#define I915_PERF_COUNT_BCS_SEMA __I915_PERF_COUNT(I915_SAMPLE_BCS, I915_SAMPLE_SEMA)
+#define I915_PMU_ENGINE_SEMA(class, instance) \
+	__I915_PMU_ENGINE(class, instance, I915_SAMPLE_SEMA)
 
-#define I915_PERF_COUNT_VECS_BUSY __I915_PERF_COUNT(I915_SAMPLE_VECS, I915_SAMPLE_BUSY)
-#define I915_PERF_COUNT_VECS_WAIT __I915_PERF_COUNT(I915_SAMPLE_VECS, I915_SAMPLE_WAIT)
-#define I915_PERF_COUNT_VECS_SEMA __I915_PERF_COUNT(I915_SAMPLE_VECS, I915_SAMPLE_SEMA)
+#define __I915_PMU_OTHER(x) (__I915_PMU_ENGINE(0xff, 0xff, 0xf) + 1 + (x))
 
-#define I915_PERF_ACTUAL_FREQUENCY 32
-#define I915_PERF_REQUESTED_FREQUENCY 33
-#define I915_PERF_ENERGY 34
-#define I915_PERF_INTERRUPTS 35
+#define I915_PMU_ACTUAL_FREQUENCY	__I915_PMU_OTHER(0)
+#define I915_PMU_REQUESTED_FREQUENCY	__I915_PMU_OTHER(1)
+#define I915_PMU_INTERRUPTS		__I915_PMU_OTHER(2)
+#define I915_PMU_RC6_RESIDENCY		__I915_PMU_OTHER(3)
+#define I915_PMU_RC6p_RESIDENCY		__I915_PMU_OTHER(4)
+#define I915_PMU_RC6pp_RESIDENCY	__I915_PMU_OTHER(5)
 
-#define I915_PERF_RC6_RESIDENCY		40
-#define I915_PERF_RC6p_RESIDENCY	41
-#define I915_PERF_RC6pp_RESIDENCY	42
+#define I915_PMU_LAST I915_PMU_RC6pp_RESIDENCY
 
 static inline int
 perf_event_open(struct perf_event_attr *attr,
diff --git a/overlay/gem-interrupts.c b/overlay/gem-interrupts.c
index 3eda24f4d7eb..add4a9dfd725 100644
--- a/overlay/gem-interrupts.c
+++ b/overlay/gem-interrupts.c
@@ -113,7 +113,7 @@ int gem_interrupts_init(struct gem_interrupts *irqs)
 {
 	memset(irqs, 0, sizeof(*irqs));
 
-	irqs->fd = perf_i915_open(I915_PERF_INTERRUPTS);
+	irqs->fd = perf_i915_open(I915_PMU_INTERRUPTS);
 	if (irqs->fd < 0 && interrupts_read() < 0)
 		irqs->error = ENODEV;
 
diff --git a/overlay/gpu-freq.c b/overlay/gpu-freq.c
index 76c5ed9acfd1..0d8032592ef5 100644
--- a/overlay/gpu-freq.c
+++ b/overlay/gpu-freq.c
@@ -37,8 +37,8 @@ static int perf_open(void)
 {
 	int fd;
 
-	fd = perf_i915_open_group(I915_PERF_ACTUAL_FREQUENCY, -1);
-	if (perf_i915_open_group(I915_PERF_REQUESTED_FREQUENCY, fd) < 0) {
+	fd = perf_i915_open_group(I915_PMU_ACTUAL_FREQUENCY, -1);
+	if (perf_i915_open_group(I915_PMU_REQUESTED_FREQUENCY, fd) < 0) {
 		close(fd);
 		fd = -1;
 	}
@@ -176,8 +176,8 @@ int gpu_freq_update(struct gpu_freq *gf)
 			return EAGAIN;
 		}
 
-		gf->current = (s->act - d->act) / d_time;
-		gf->request = (s->req - d->req) / d_time;
+		gf->current = (s->act - d->act) * 1e9 / d_time;
+		gf->request = (s->req - d->req) * 1e9 / d_time;
 	}
 
 	return 0;
diff --git a/overlay/gpu-top.c b/overlay/gpu-top.c
index 812f47d5aced..61b8f62fd78c 100644
--- a/overlay/gpu-top.c
+++ b/overlay/gpu-top.c
@@ -43,49 +43,57 @@
 #define   RING_WAIT		(1<<11)
 #define   RING_WAIT_SEMAPHORE	(1<<10)
 
-#define __I915_PERF_RING(n) (4*n)
-#define I915_PERF_RING_BUSY(n) (__I915_PERF_RING(n) + 0)
-#define I915_PERF_RING_WAIT(n) (__I915_PERF_RING(n) + 1)
-#define I915_PERF_RING_SEMA(n) (__I915_PERF_RING(n) + 2)
-
 static int perf_init(struct gpu_top *gt)
 {
-	const char *names[] = {
-		"RCS",
-		"BCS",
-		"VCS0",
-		"VCS1",
-		NULL,
+	struct engine_desc {
+		unsigned class, inst;
+		const char *name;
+	} *d, engines[] = {
+		{ I915_ENGINE_CLASS_RENDER, 0, "rcs0" },
+		{ I915_ENGINE_CLASS_COPY, 0, "bcs0" },
+		{ I915_ENGINE_CLASS_VIDEO, 0, "vcs0" },
+		{ I915_ENGINE_CLASS_VIDEO, 1, "vcs1" },
+		{ I915_ENGINE_CLASS_VIDEO_ENHANCE, 0, "vecs0" },
+		{ 0, 0, NULL }
 	};
-	int n;
 
-	gt->fd = perf_i915_open_group(I915_PERF_RING_BUSY(0), -1);
+	d = &engines[0];
+
+	gt->fd = perf_i915_open_group(I915_PMU_ENGINE_BUSY(d->class, d->inst),
+				      -1);
 	if (gt->fd < 0)
 		return -1;
 
-	if (perf_i915_open_group(I915_PERF_RING_WAIT(0), gt->fd) >= 0)
+	if (perf_i915_open_group(I915_PMU_ENGINE_WAIT(d->class, d->inst),
+				 gt->fd) >= 0)
 		gt->have_wait = 1;
 
-	if (perf_i915_open_group(I915_PERF_RING_SEMA(0), gt->fd) >= 0)
+	if (perf_i915_open_group(I915_PMU_ENGINE_SEMA(d->class, d->inst),
+				 gt->fd) >= 0)
 		gt->have_sema = 1;
 
-	gt->ring[0].name = names[0];
+	gt->ring[0].name = d->name;
 	gt->num_rings = 1;
 
-	for (n = 1; names[n]; n++) {
-		if (perf_i915_open_group(I915_PERF_RING_BUSY(n), gt->fd) >= 0) {
-			if (gt->have_wait &&
-			    perf_i915_open_group(I915_PERF_RING_WAIT(n),
-						 gt->fd) < 0)
-				return -1;
-
-			if (gt->have_sema &&
-			    perf_i915_open_group(I915_PERF_RING_SEMA(n),
-						 gt->fd) < 0)
-				return -1;
-
-			gt->ring[gt->num_rings++].name = names[n];
-		}
+	for (d++; d->name; d++) {
+		if (perf_i915_open_group(I915_PMU_ENGINE_BUSY(d->class,
+							      d->inst),
+					gt->fd) < 0)
+			continue;
+
+		if (gt->have_wait &&
+		    perf_i915_open_group(I915_PMU_ENGINE_WAIT(d->class,
+							      d->inst),
+					 gt->fd) < 0)
+			return -1;
+
+		if (gt->have_sema &&
+		    perf_i915_open_group(I915_PMU_ENGINE_SEMA(d->class,
+							      d->inst),
+				   gt->fd) < 0)
+			return -1;
+
+		gt->ring[gt->num_rings++].name = d->name;
 	}
 
 	return 0;
diff --git a/overlay/power.c b/overlay/power.c
index dd4aec6bffd9..805f4ca7805c 100644
--- a/overlay/power.c
+++ b/overlay/power.c
@@ -45,9 +45,7 @@ int power_init(struct power *power)
 
 	memset(power, 0, sizeof(*power));
 
-	power->fd = perf_i915_open(I915_PERF_ENERGY);
-	if (power->fd != -1)
-		return 0;
+	power->fd = -1;
 
 	sprintf(buf, "%s/i915_energy_uJ", debugfs_dri_path);
 	fd = open(buf, 0);
diff --git a/overlay/rc6.c b/overlay/rc6.c
index 46c975a557ff..8977f0993095 100644
--- a/overlay/rc6.c
+++ b/overlay/rc6.c
@@ -43,15 +43,15 @@ static int perf_open(unsigned *flags)
 {
 	int fd;
 
-	fd = perf_i915_open_group(I915_PERF_RC6_RESIDENCY, -1);
+	fd = perf_i915_open_group(I915_PMU_RC6_RESIDENCY, -1);
 	if (fd < 0)
 		return -1;
 
 	*flags |= RC6;
-	if (perf_i915_open_group(I915_PERF_RC6p_RESIDENCY, fd) >= 0)
+	if (perf_i915_open_group(I915_PMU_RC6p_RESIDENCY, fd) >= 0)
 		*flags |= RC6p;
 
-	if (perf_i915_open_group(I915_PERF_RC6pp_RESIDENCY, fd) >= 0)
+	if (perf_i915_open_group(I915_PMU_RC6pp_RESIDENCY, fd) >= 0)
 		*flags |= RC6pp;
 
 	return fd;
@@ -132,11 +132,11 @@ int rc6_update(struct rc6 *rc6)
 
 		len = 2;
 		if (rc6->flags & RC6)
-			s->rc6_residency = data[len++];
+			s->rc6_residency = data[len++] / 1e6;
 		if (rc6->flags & RC6p)
-			s->rc6p_residency = data[len++];
+			s->rc6p_residency = data[len++] / 1e6;
 		if (rc6->flags & RC6pp)
-			s->rc6pp_residency = data[len++];
+			s->rc6pp_residency = data[len++] / 1e6;
 	}
 
 	if (rc6->count == 1)
@@ -149,14 +149,14 @@ int rc6_update(struct rc6 *rc6)
 	}
 
 	d_rc6 = s->rc6_residency - d->rc6_residency;
-	rc6->rc6 = (100 * d_rc6 + d_time/2) / d_time;
+	rc6->rc6 = 100 * d_rc6 / d_time;
 
 	d_rc6p = s->rc6p_residency - d->rc6p_residency;
-	rc6->rc6p = (100 * d_rc6p + d_time/2) / d_time;
+	rc6->rc6p = 100 * d_rc6p / d_time;
 
 	d_rc6pp = s->rc6pp_residency - d->rc6pp_residency;
-	rc6->rc6pp = (100 * d_rc6pp + d_time/2) / d_time;
+	rc6->rc6pp = 100 * d_rc6pp / d_time;
 
-	rc6->rc6_combined = (100 * (d_rc6 + d_rc6p + d_rc6pp) + d_time/2) / d_time;
+	rc6->rc6_combined = 100 * (d_rc6 + d_rc6p + d_rc6pp) / d_time;
 	return 0;
 }
-- 
2.9.5