[Intel-gfx] [RFC v4 4/5] drm/i915/pmu: introduce refcounting of event subscriptions

Dmitry Rogozhkin dmitry.v.rogozhkin at intel.com
Fri Sep 1 15:56:31 UTC 2017


As 'enable' i915 PMU IGT test will show, current PMU implementation
does not properly handle multiple parallel consumers. This patch
introduces refcounting of event subscriptions meaning that even if
one consumer will disable its event PMU event mask will not fly away
if there are other subscribers for this event.

Change-Id: I0d787f5feab988ad3c8af6d7ad70fb26d5839263
Signed-off-by: Dmitry Rogozhkin <dmitry.v.rogozhkin at intel.com>
Cc: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
---
 drivers/gpu/drm/i915/i915_drv.h         |  1 +
 drivers/gpu/drm/i915/i915_pmu.c         | 38 ++++++++++++++++++++++++++++++---
 drivers/gpu/drm/i915/intel_ringbuffer.h |  3 ++-
 include/uapi/drm/i915_drm.h             | 28 +++++++++++++++++-------
 4 files changed, 58 insertions(+), 12 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index e629e5e..8bb967a 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2160,6 +2160,7 @@ struct i915_pmu {
 	struct hrtimer timer;
 	bool timer_enabled;
 	u64 enable;
+	unsigned int enable_count[I915_SAMPLE_MAX];
 	u64 sample[__I915_NUM_PMU_SAMPLERS];
 };
 
diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c
index 7bfedb7..5f32356 100644
--- a/drivers/gpu/drm/i915/i915_pmu.c
+++ b/drivers/gpu/drm/i915/i915_pmu.c
@@ -17,6 +17,16 @@
 
 static cpumask_t i915_pmu_cpumask = CPU_MASK_NONE;
 
+static u8 i915_config_sample(u64 config)
+{
+	return config - __I915_PMU_OTHER(0);
+}
+
+static u8 i915_event_sample(struct perf_event *event)
+{
+	return i915_config_sample(event->attr.config);
+}
+
 static u8 engine_config_sample(u64 config)
 {
 	return config & I915_PMU_SAMPLE_MASK;
@@ -56,6 +66,11 @@ static bool is_engine_event(struct perf_event *event)
 	return is_engine_config(event->attr.config);
 }
 
+static bool is_event_sample_busy(struct perf_event *event)
+{
+	return is_engine_event(event) && BIT(engine_event_sample(event));
+}
+
 static u64 event_enabled_mask(struct perf_event *event)
 {
 	return config_enabled_mask(event->attr.config);
@@ -470,7 +485,10 @@ static void i915_pmu_enable(struct perf_event *event)
 						  engine_event_class(event),
 						  engine_event_instance(event));
 		GEM_BUG_ON(!engine);
+		++engine->pmu.enable_count[engine_event_sample(event)];
 		engine->pmu.enable |= BIT(engine_event_sample(event));
+	} else {
+		++i915->pmu.enable_count[i915_event_sample(event)];
 	}
 
 	i915->pmu.enable |= event_enabled_mask(event);
@@ -480,7 +498,8 @@ static void i915_pmu_enable(struct perf_event *event)
 				       ns_to_ktime(PERIOD), 0,
 				       HRTIMER_MODE_REL_PINNED);
 		i915->pmu.timer_enabled = true;
-	} else if (is_engine_event(event) && engine_needs_busy_stats(engine)) {
+	}
+	if (is_event_sample_busy(event) && supports_busy_stats()) {
 		/* Enable busy stats for the first event demanding it, next
 		 * events just reference counter. So, if some events will go
 		 * away, we will still have busy stats enabled till remaining
@@ -514,7 +533,13 @@ static void i915_pmu_disable(struct perf_event *event)
 						engine_event_class(event),
 						engine_event_instance(event));
 		GEM_BUG_ON(!engine);
-		engine->pmu.enable &= ~BIT(engine_event_sample(event));
+
+		GEM_BUG_ON(engine->pmu.enable_count[engine_event_sample(event)] == 0);
+
+		if (engine->pmu.enable_count[engine_event_sample(event)] &&
+			--engine->pmu.enable_count[engine_event_sample(event)] == 0) {
+			engine->pmu.enable &= ~BIT(engine_event_sample(event));
+		}
 		if (!engine_needs_busy_stats(engine)) {
 			if (engine->pmu.busy_stats && --engine->pmu.busy_stats == 0) {
 				queue_delayed_work(i915->wq,
@@ -527,7 +552,14 @@ static void i915_pmu_disable(struct perf_event *event)
 			mask |= engine->pmu.enable;
 		mask = (~mask) & ENGINE_SAMPLE_MASK;
 	} else {
-		mask = event_enabled_mask(event);
+		mask = i915->pmu.enable;
+		GEM_BUG_ON(i915->pmu.enable_count[i915_event_sample(event)]);
+
+		if (i915->pmu.enable_count[i915_event_sample(event)] &&
+			--i915->pmu.enable_count[i915_event_sample(event)] == 0) {
+			mask &= ~event_enabled_mask(event);
+		}
+		mask = ~mask;
 	}
 
 	i915->pmu.enable &= ~mask;
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 0530e4e..1841155 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -247,7 +247,8 @@ struct intel_engine_cs {
 
 	struct {
 		u32 enable;
-		u64 sample[4];
+		u32 enable_count[I915_ENGINE_SAMPLE_MAX];
+		u64 sample[I915_ENGINE_SAMPLE_MAX];
 		unsigned int busy_stats;
 		struct work_struct enable_busy_stats;
 		struct delayed_work disable_busy_stats;
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 28bff96..7fa22aa 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -104,7 +104,19 @@ enum drm_i915_pmu_engine_sample {
 	I915_SAMPLE_QUEUED = 0,
 	I915_SAMPLE_BUSY = 1,
 	I915_SAMPLE_WAIT = 2,
-	I915_SAMPLE_SEMA = 3
+	I915_SAMPLE_SEMA = 3,
+	I915_ENGINE_SAMPLE_MAX /* non-ABI*/
+};
+
+enum drm_i915_pmu_sample {
+	I915_SAMPLE_ACTUAL_FREQUENCY = 0,
+	I915_SAMPLE_REQUESTED_FREQUENCY = 1,
+	I915_SAMPLE_ENERGY = 2,
+	I915_SAMPLE_INTERRUPTS = 3,
+	I915_SAMPLE_RC6_RESIDENCY = 4,
+	I915_SAMPLE_RC6p_RESIDENCY = 5,
+	I915_SAMPLE_RC6pp_RESIDENCY = 6,
+	I915_SAMPLE_MAX /* non-ABI*/
 };
 
 #define I915_PMU_SAMPLE_BITS (4)
@@ -132,14 +144,14 @@ enum drm_i915_pmu_engine_sample {
 
 #define __I915_PMU_OTHER(x) (__I915_PMU_ENGINE(0xff, 0xff, 0xf) + 1 + (x))
 
-#define I915_PMU_ACTUAL_FREQUENCY 	__I915_PMU_OTHER(0)
-#define I915_PMU_REQUESTED_FREQUENCY	__I915_PMU_OTHER(1)
-#define I915_PMU_ENERGY			__I915_PMU_OTHER(2)
-#define I915_PMU_INTERRUPTS		__I915_PMU_OTHER(3)
+#define I915_PMU_ACTUAL_FREQUENCY 	__I915_PMU_OTHER(I915_SAMPLE_ACTUAL_FREQUENCY)
+#define I915_PMU_REQUESTED_FREQUENCY	__I915_PMU_OTHER(I915_SAMPLE_REQUESTED_FREQUENCY)
+#define I915_PMU_ENERGY			__I915_PMU_OTHER(I915_SAMPLE_ENERGY)
+#define I915_PMU_INTERRUPTS		__I915_PMU_OTHER(I915_SAMPLE_INTERRUPTS)
 
-#define I915_PMU_RC6_RESIDENCY		__I915_PMU_OTHER(4)
-#define I915_PMU_RC6p_RESIDENCY		__I915_PMU_OTHER(5)
-#define I915_PMU_RC6pp_RESIDENCY	__I915_PMU_OTHER(6)
+#define I915_PMU_RC6_RESIDENCY		__I915_PMU_OTHER(I915_SAMPLE_RC6_RESIDENCY)
+#define I915_PMU_RC6p_RESIDENCY		__I915_PMU_OTHER(I915_SAMPLE_RC6p_RESIDENCY)
+#define I915_PMU_RC6pp_RESIDENCY	__I915_PMU_OTHER(I915_SAMPLE_RC6pp_RESIDENCY)
 
 /* Each region is a minimum of 16k, and there are at most 255 of them.
  */
-- 
1.8.3.1



More information about the Intel-gfx mailing list