[Intel-gfx] [PATCH 2/2] drm/i915/pmu: Add queued counter

Tvrtko Ursulin tursulin at ursulin.net
Wed Nov 22 12:46:22 UTC 2017


From: Tvrtko Ursulin <tvrtko.ursulin at intel.com>

We add a PMU counter to expose the number of requests currently submitted
to the GPU, plus the number of runnable requests waiting on GPU time.

This is useful to analyze the overall load of the system.

Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
---
 drivers/gpu/drm/i915/i915_pmu.c | 30 +++++++++++++++++++++++++-----
 include/uapi/drm/i915_drm.h     |  6 ++++++
 2 files changed, 31 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c
index 112243720ff3..b2b4b32af35f 100644
--- a/drivers/gpu/drm/i915/i915_pmu.c
+++ b/drivers/gpu/drm/i915/i915_pmu.c
@@ -36,7 +36,8 @@
 #define ENGINE_SAMPLE_MASK \
 	(BIT(I915_SAMPLE_BUSY) | \
 	 BIT(I915_SAMPLE_WAIT) | \
-	 BIT(I915_SAMPLE_SEMA))
+	 BIT(I915_SAMPLE_SEMA) | \
+	 BIT(I915_SAMPLE_QUEUED))
 
 #define ENGINE_SAMPLE_BITS (1 << I915_PMU_SAMPLE_BITS)
 
@@ -223,6 +224,12 @@ static void engines_sample(struct drm_i915_private *dev_priv)
 
 		update_sample(&engine->pmu.sample[I915_SAMPLE_SEMA],
 			      PERIOD, !!(val & RING_WAIT_SEMAPHORE));
+
+		if (engine->pmu.enable & BIT(I915_SAMPLE_QUEUED))
+			update_sample(&engine->pmu.sample[I915_SAMPLE_QUEUED],
+				      1 / I915_SAMPLE_QUEUED_SCALE,
+				      engine->queued +
+				      (last_seqno - current_seqno));
 	}
 
 	if (fw)
@@ -310,6 +317,10 @@ static int engine_event_init(struct perf_event *event)
 		if (INTEL_GEN(i915) < 6)
 			return -ENODEV;
 		break;
+	case I915_SAMPLE_QUEUED:
+		if (INTEL_GEN(i915) < 8)
+			return -ENODEV;
+		break;
 	default:
 		return -ENOENT;
 	}
@@ -399,6 +410,10 @@ static u64 __i915_pmu_event_read(struct perf_event *event)
 		} else if (sample == I915_SAMPLE_BUSY &&
 			   engine->pmu.busy_stats) {
 			val = ktime_to_ns(intel_engine_get_busy_time(engine));
+		} else if (sample == I915_SAMPLE_QUEUED) {
+			val =
+			   div_u64(engine->pmu.sample[I915_SAMPLE_QUEUED].cur,
+				   FREQUENCY);
 		} else {
 			val = engine->pmu.sample[sample].cur;
 		}
@@ -679,13 +694,18 @@ static ssize_t i915_pmu_event_show(struct device *dev,
 	I915_EVENT_STR(_name.unit, _unit)
 
 #define I915_ENGINE_EVENT(_name, _class, _instance, _sample) \
-	I915_EVENT_ATTR(_name, __I915_PMU_ENGINE(_class, _instance, _sample)), \
+	I915_EVENT_ATTR(_name, __I915_PMU_ENGINE(_class, _instance, _sample))
+
+#define I915_ENGINE_EVENT_NS(_name, _class, _instance, _sample) \
+	I915_ENGINE_EVENT(_name, _class, _instance, _sample), \
 	I915_EVENT_STR(_name.unit, "ns")
 
 #define I915_ENGINE_EVENTS(_name, _class, _instance) \
-	I915_ENGINE_EVENT(_name##_instance-busy, _class, _instance, I915_SAMPLE_BUSY), \
-	I915_ENGINE_EVENT(_name##_instance-sema, _class, _instance, I915_SAMPLE_SEMA), \
-	I915_ENGINE_EVENT(_name##_instance-wait, _class, _instance, I915_SAMPLE_WAIT)
+	I915_ENGINE_EVENT_NS(_name##_instance-busy, _class, _instance, I915_SAMPLE_BUSY), \
+	I915_ENGINE_EVENT_NS(_name##_instance-sema, _class, _instance, I915_SAMPLE_SEMA), \
+	I915_ENGINE_EVENT_NS(_name##_instance-wait, _class, _instance, I915_SAMPLE_WAIT), \
+	I915_ENGINE_EVENT(_name##_instance-queued, _class, _instance, I915_SAMPLE_QUEUED), \
+	I915_EVENT_STR(_name##_instance-queued.scale, __stringify(I915_SAMPLE_QUEUED_SCALE))
 
 static struct attribute *i915_pmu_events_attrs[] = {
 	I915_ENGINE_EVENTS(rcs, I915_ENGINE_CLASS_RENDER, 0),
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 915a6e85a855..20ee668d1428 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -111,9 +111,12 @@ enum drm_i915_pmu_engine_sample {
 	I915_SAMPLE_BUSY = 0,
 	I915_SAMPLE_WAIT = 1,
 	I915_SAMPLE_SEMA = 2,
+	I915_SAMPLE_QUEUED = 3,
 	I915_ENGINE_SAMPLE_MAX /* non-ABI */
 };
 
+#define I915_SAMPLE_QUEUED_SCALE 1e-2 /* No braces please. */
+
 #define I915_PMU_SAMPLE_BITS (4)
 #define I915_PMU_SAMPLE_MASK (0xf)
 #define I915_PMU_SAMPLE_INSTANCE_BITS (8)
@@ -134,6 +137,9 @@ enum drm_i915_pmu_engine_sample {
 #define I915_PMU_ENGINE_SEMA(class, instance) \
 	__I915_PMU_ENGINE(class, instance, I915_SAMPLE_SEMA)
 
+#define I915_PMU_ENGINE_QUEUED(class, instance) \
+	__I915_PMU_ENGINE(class, instance, I915_SAMPLE_QUEUED)
+
 #define __I915_PMU_OTHER(x) (__I915_PMU_ENGINE(0xff, 0xff, 0xf) + 1 + (x))
 
 #define I915_PMU_ACTUAL_FREQUENCY	__I915_PMU_OTHER(0)
-- 
2.14.1



More information about the Intel-gfx mailing list