[PATCH 05/10] drm/i915/pmu: Add queued counter

Chris Wilson chris at chris-wilson.co.uk
Tue Feb 13 13:22:51 UTC 2018


From: Tvrtko Ursulin <tvrtko.ursulin at intel.com>

We add a PMU counter to expose the number of requests which have been
submitted from userspace but are not yet runnable due dependencies and
unsignaled fences.

This is useful to analyze the overall load of the system.

v2:
 * Rebase for name change and re-order.
 * Drop floating point constant. (Chris Wilson)

v3:
 * Change scale to 1024 for faster arithmetics. (Chris Wilson)

Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
---
 drivers/gpu/drm/i915/i915_pmu.c         | 40 +++++++++++++++++++++++++++++----
 drivers/gpu/drm/i915/intel_ringbuffer.h |  2 +-
 include/uapi/drm/i915_drm.h             |  9 +++++++-
 3 files changed, 45 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c
index a0ae7ca220fd..7a8e02824021 100644
--- a/drivers/gpu/drm/i915/i915_pmu.c
+++ b/drivers/gpu/drm/i915/i915_pmu.c
@@ -36,7 +36,8 @@
 #define ENGINE_SAMPLE_MASK \
 	(BIT(I915_SAMPLE_BUSY) | \
 	 BIT(I915_SAMPLE_WAIT) | \
-	 BIT(I915_SAMPLE_SEMA))
+	 BIT(I915_SAMPLE_SEMA) | \
+	 BIT(I915_SAMPLE_QUEUED))
 
 #define ENGINE_SAMPLE_BITS (1 << I915_PMU_SAMPLE_BITS)
 
@@ -220,6 +221,11 @@ static void engines_sample(struct drm_i915_private *dev_priv)
 
 		update_sample(&engine->pmu.sample[I915_SAMPLE_SEMA],
 			      PERIOD, !!(val & RING_WAIT_SEMAPHORE));
+
+		if (engine->pmu.enable & BIT(I915_SAMPLE_QUEUED))
+			update_sample(&engine->pmu.sample[I915_SAMPLE_QUEUED],
+				      I915_SAMPLE_QUEUED_DIVISOR,
+				      atomic_read(&engine->request_stats.queued));
 	}
 
 	if (fw)
@@ -317,6 +323,7 @@ engine_event_status(struct intel_engine_cs *engine,
 	switch (sample) {
 	case I915_SAMPLE_BUSY:
 	case I915_SAMPLE_WAIT:
+	case I915_SAMPLE_QUEUED:
 		break;
 	case I915_SAMPLE_SEMA:
 		if (INTEL_GEN(engine->i915) < 6)
@@ -524,6 +531,9 @@ static u64 __i915_pmu_event_read(struct perf_event *event, bool locked)
 		} else {
 			val = engine->pmu.sample[sample].cur;
 		}
+
+		if (sample == I915_SAMPLE_QUEUED)
+			val = div_u64(val, FREQUENCY);
 	} else {
 		switch (event->attr.config) {
 		case I915_PMU_ACTUAL_FREQUENCY:
@@ -779,6 +789,16 @@ static const struct attribute_group *i915_pmu_attr_groups[] = {
 { \
 	.sample = (__sample), \
 	.name = (__name), \
+	.suffix = "unit", \
+	.value = "ns", \
+}
+
+#define __engine_event_scale(__sample, __name, __scale) \
+{ \
+	.sample = (__sample), \
+	.name = (__name), \
+	.suffix = "scale", \
+	.value = (__scale), \
 }
 
 static struct i915_ext_attribute *
@@ -806,6 +826,9 @@ add_pmu_attr(struct perf_pmu_events_attr *attr, const char *name,
 	return ++attr;
 }
 
+/* No brackets or quotes below please. */
+#define I915_SAMPLE_QUEUED_SCALE 0.0009765625
+
 static struct attribute **
 create_event_attributes(struct drm_i915_private *i915)
 {
@@ -822,10 +845,14 @@ create_event_attributes(struct drm_i915_private *i915)
 	static const struct {
 		enum drm_i915_pmu_engine_sample sample;
 		char *name;
+		char *suffix;
+		char *value;
 	} engine_events[] = {
 		__engine_event(I915_SAMPLE_BUSY, "busy"),
 		__engine_event(I915_SAMPLE_SEMA, "sema"),
 		__engine_event(I915_SAMPLE_WAIT, "wait"),
+		__engine_event_scale(I915_SAMPLE_QUEUED, "queued",
+				     __stringify(I915_SAMPLE_QUEUED_SCALE)),
 	};
 	unsigned int count = 0;
 	struct perf_pmu_events_attr *pmu_attr = NULL, *pmu_iter;
@@ -835,6 +862,9 @@ create_event_attributes(struct drm_i915_private *i915)
 	enum intel_engine_id id;
 	unsigned int i;
 
+	BUILD_BUG_ON(I915_SAMPLE_QUEUED_DIVISOR !=
+		     (1 / I915_SAMPLE_QUEUED_SCALE));
+
 	/* Count how many counters we will be exposing. */
 	for (i = 0; i < ARRAY_SIZE(events); i++) {
 		if (!config_status(i915, events[i].config))
@@ -912,13 +942,15 @@ create_event_attributes(struct drm_i915_private *i915)
 								engine->instance,
 								engine_events[i].sample));
 
-			str = kasprintf(GFP_KERNEL, "%s-%s.unit",
-					engine->name, engine_events[i].name);
+			str = kasprintf(GFP_KERNEL, "%s-%s.%s",
+					engine->name, engine_events[i].name,
+					engine_events[i].suffix);
 			if (!str)
 				goto err;
 
 			*attr_iter++ = &pmu_iter->attr.attr;
-			pmu_iter = add_pmu_attr(pmu_iter, str, "ns");
+			pmu_iter = add_pmu_attr(pmu_iter, str,
+						engine_events[i].value);
 		}
 	}
 
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 4d0995470809..01464b68031c 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -386,7 +386,7 @@ struct intel_engine_cs {
 		 *
 		 * Our internal timer stores the current counters in this field.
 		 */
-#define I915_ENGINE_SAMPLE_MAX (I915_SAMPLE_SEMA + 1)
+#define I915_ENGINE_SAMPLE_MAX (I915_SAMPLE_QUEUED + 1)
 		struct i915_pmu_sample sample[I915_ENGINE_SAMPLE_MAX];
 	} pmu;
 
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 29fa48e4755d..b65c80b39e8b 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -110,9 +110,13 @@ enum drm_i915_gem_engine_class {
 enum drm_i915_pmu_engine_sample {
 	I915_SAMPLE_BUSY = 0,
 	I915_SAMPLE_WAIT = 1,
-	I915_SAMPLE_SEMA = 2
+	I915_SAMPLE_SEMA = 2,
+	I915_SAMPLE_QUEUED = 3
 };
 
+ /* Divide counter value by divisor to get the real value. */
+#define I915_SAMPLE_QUEUED_DIVISOR (1024)
+
 #define I915_PMU_SAMPLE_BITS (4)
 #define I915_PMU_SAMPLE_MASK (0xf)
 #define I915_PMU_SAMPLE_INSTANCE_BITS (8)
@@ -133,6 +137,9 @@ enum drm_i915_pmu_engine_sample {
 #define I915_PMU_ENGINE_SEMA(class, instance) \
 	__I915_PMU_ENGINE(class, instance, I915_SAMPLE_SEMA)
 
+#define I915_PMU_ENGINE_QUEUED(class, instance) \
+	__I915_PMU_ENGINE(class, instance, I915_SAMPLE_QUEUED)
+
 #define __I915_PMU_OTHER(x) (__I915_PMU_ENGINE(0xff, 0xff, 0xf) + 1 + (x))
 
 #define I915_PMU_ACTUAL_FREQUENCY	__I915_PMU_OTHER(0)
-- 
2.16.1



More information about the Intel-gfx-trybot mailing list