[Intel-gfx] [PATCH] drm/i915/pmu: Do not assume fixed hrtimer period

Tvrtko Ursulin tursulin at ursulin.net
Mon Jun 4 12:52:39 UTC 2018


From: Tvrtko Ursulin <tvrtko.ursulin at intel.com>

As Chris has discovered on his Ivybridge, and later automated test runs
have confirmed, on most of our platforms hrtimer faced with heavy GPU load
ca occasionally become sufficiently imprecise to affect PMU sampling
calculations.

This means we cannot assume sampling frequency is what we asked for, but
we need to measure the interval ourselves.

This patch is similar to Chris' original proposal for per-engine counters,
but instead of introducing a new set to work around the problem with
frequency sampling, it swaps around the way internal frequency accounting
is done. Instead of accumulating current frequency and dividing by
sampling frequency on readout, it accumulates frequency scaled by each
period.

Testcase: igt/perf_pmu/*busy* # snb, ivb, hsw
Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
Suggested-by: Chris Wilson <chris at chris-wilson.co.uk>
Cc: Chris Wilson <chris at chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_pmu.c | 61 ++++++++++++++++++++++-----------
 drivers/gpu/drm/i915/i915_pmu.h |  8 +++++
 2 files changed, 49 insertions(+), 20 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c
index dc87797db500..ff3e3f865567 100644
--- a/drivers/gpu/drm/i915/i915_pmu.c
+++ b/drivers/gpu/drm/i915/i915_pmu.c
@@ -127,6 +127,7 @@ static void __i915_pmu_maybe_start_timer(struct drm_i915_private *i915)
 {
 	if (!i915->pmu.timer_enabled && pmu_needs_timer(i915, true)) {
 		i915->pmu.timer_enabled = true;
+		i915->pmu.timer_last = ktime_get();
 		hrtimer_start_range_ns(&i915->pmu.timer,
 				       ns_to_ktime(PERIOD), 0,
 				       HRTIMER_MODE_REL_PINNED);
@@ -155,12 +156,13 @@ static bool grab_forcewake(struct drm_i915_private *i915, bool fw)
 }
 
 static void
-update_sample(struct i915_pmu_sample *sample, u32 unit, u32 val)
+add_sample(struct i915_pmu_sample *sample, u32 val)
 {
-	sample->cur += mul_u32_u32(val, unit);
+	sample->cur += val;
 }
 
-static void engines_sample(struct drm_i915_private *dev_priv)
+static void
+engines_sample(struct drm_i915_private *dev_priv, unsigned int period_ns)
 {
 	struct intel_engine_cs *engine;
 	enum intel_engine_id id;
@@ -182,8 +184,9 @@ static void engines_sample(struct drm_i915_private *dev_priv)
 
 		val = !i915_seqno_passed(current_seqno, last_seqno);
 
-		update_sample(&engine->pmu.sample[I915_SAMPLE_BUSY],
-			      PERIOD, val);
+		if (val)
+			add_sample(&engine->pmu.sample[I915_SAMPLE_BUSY],
+				   period_ns);
 
 		if (val && (engine->pmu.enable &
 		    (BIT(I915_SAMPLE_WAIT) | BIT(I915_SAMPLE_SEMA)))) {
@@ -194,11 +197,13 @@ static void engines_sample(struct drm_i915_private *dev_priv)
 			val = 0;
 		}
 
-		update_sample(&engine->pmu.sample[I915_SAMPLE_WAIT],
-			      PERIOD, !!(val & RING_WAIT));
+		if (val & RING_WAIT)
+			add_sample(&engine->pmu.sample[I915_SAMPLE_WAIT],
+				   period_ns);
 
-		update_sample(&engine->pmu.sample[I915_SAMPLE_SEMA],
-			      PERIOD, !!(val & RING_WAIT_SEMAPHORE));
+		if (val & RING_WAIT_SEMAPHORE)
+			add_sample(&engine->pmu.sample[I915_SAMPLE_SEMA],
+				   period_ns);
 	}
 
 	if (fw)
@@ -207,7 +212,14 @@ static void engines_sample(struct drm_i915_private *dev_priv)
 	intel_runtime_pm_put(dev_priv);
 }
 
-static void frequency_sample(struct drm_i915_private *dev_priv)
+static void
+add_sample_mult(struct i915_pmu_sample *sample, u32 val, u32 mul)
+{
+	sample->cur += mul_u32_u32(val, mul);
+}
+
+static void
+frequency_sample(struct drm_i915_private *dev_priv, unsigned int period_ns)
 {
 	if (dev_priv->pmu.enable &
 	    config_enabled_mask(I915_PMU_ACTUAL_FREQUENCY)) {
@@ -221,15 +233,17 @@ static void frequency_sample(struct drm_i915_private *dev_priv)
 			intel_runtime_pm_put(dev_priv);
 		}
 
-		update_sample(&dev_priv->pmu.sample[__I915_SAMPLE_FREQ_ACT],
-			      1, intel_gpu_freq(dev_priv, val));
+		add_sample_mult(&dev_priv->pmu.sample[__I915_SAMPLE_FREQ_ACT],
+			        intel_gpu_freq(dev_priv, val),
+				period_ns / 1000);
 	}
 
 	if (dev_priv->pmu.enable &
 	    config_enabled_mask(I915_PMU_REQUESTED_FREQUENCY)) {
-		update_sample(&dev_priv->pmu.sample[__I915_SAMPLE_FREQ_REQ], 1,
-			      intel_gpu_freq(dev_priv,
-					     dev_priv->gt_pm.rps.cur_freq));
+		add_sample_mult(&dev_priv->pmu.sample[__I915_SAMPLE_FREQ_REQ],
+				intel_gpu_freq(dev_priv,
+					       dev_priv->gt_pm.rps.cur_freq),
+				period_ns / 1000);
 	}
 }
 
@@ -237,14 +251,21 @@ static enum hrtimer_restart i915_sample(struct hrtimer *hrtimer)
 {
 	struct drm_i915_private *i915 =
 		container_of(hrtimer, struct drm_i915_private, pmu.timer);
+	unsigned int period_ns;
+	ktime_t now;
 
 	if (!READ_ONCE(i915->pmu.timer_enabled))
 		return HRTIMER_NORESTART;
 
-	engines_sample(i915);
-	frequency_sample(i915);
+	now = ktime_get();
+	period_ns = ktime_to_ns(ktime_sub(now, i915->pmu.timer_last));
+	i915->pmu.timer_last = now;
+
+	engines_sample(i915, period_ns);
+	frequency_sample(i915, period_ns);
+
+	hrtimer_forward(hrtimer, now, ns_to_ktime(PERIOD));
 
-	hrtimer_forward_now(hrtimer, ns_to_ktime(PERIOD));
 	return HRTIMER_RESTART;
 }
 
@@ -519,12 +540,12 @@ static u64 __i915_pmu_event_read(struct perf_event *event)
 		case I915_PMU_ACTUAL_FREQUENCY:
 			val =
 			   div_u64(i915->pmu.sample[__I915_SAMPLE_FREQ_ACT].cur,
-				   FREQUENCY);
+				   1000000 /* to MHz */);
 			break;
 		case I915_PMU_REQUESTED_FREQUENCY:
 			val =
 			   div_u64(i915->pmu.sample[__I915_SAMPLE_FREQ_REQ].cur,
-				   FREQUENCY);
+				   1000000 /* to MHz */);
 			break;
 		case I915_PMU_INTERRUPTS:
 			val = count_interrupts(i915);
diff --git a/drivers/gpu/drm/i915/i915_pmu.h b/drivers/gpu/drm/i915/i915_pmu.h
index 2ba735299f7c..7f164ca3db12 100644
--- a/drivers/gpu/drm/i915/i915_pmu.h
+++ b/drivers/gpu/drm/i915/i915_pmu.h
@@ -65,6 +65,14 @@ struct i915_pmu {
 	 * event types.
 	 */
 	u64 enable;
+
+	/**
+	 * @timer_last:
+	 *
+	 * Timestmap of the previous timer invocation.
+	 */
+	ktime_t timer_last;
+
 	/**
 	 * @enable_count: Reference counts for the enabled events.
 	 *
-- 
2.17.0



More information about the Intel-gfx mailing list