[Intel-gfx] [PATCH igt 3/3] igt/perf_pmu: Idle the GPU before starting to measure busyness

Fri Nov 24 07:32:30 UTC 2017

On 23/11/17 16:07, Chris Wilson wrote:
> Quoting Chris Wilson (2017-11-23 16:02:20)
>> Quoting Tvrtko Ursulin (2017-11-23 13:37:30)
>>>
>>> On 23/11/2017 08:22, Chris Wilson wrote:
>>>> Make sure the HW is idle before we start sampling the GPU for busyness.
>>>> If we do not rest for long enough between tests, we may carry the
>>>> sampling over.
>>>
>>> I'd rather not have this since as I said yesterday each opened PMU event
>>> is supposed to record the initial counter value as reference. If that is
>>> failing or not good enough on some tests/platforms I would rather first
>>> understand why and how.
>>
>> All legacy sampling fails... :| I'm putting it back!
> 
> So presumably it is coupling with the parking.

Or more precisely averaging with the previous sample. Every time one PMU
client signals the timer to self-disarm, but the new PMU client enables
the timer again in this window, the previous sample value does not get
cleared.

I tried this for instance:

diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c
index 650089b30d46..a5ca27eeef40 100644
--- a/drivers/gpu/drm/i915/i915_pmu.c
+++ b/drivers/gpu/drm/i915/i915_pmu.c
@@ -147,9 +147,26 @@ void i915_pmu_gt_parked(struct drm_i915_private *i915)
        spin_unlock_irq(&i915->pmu.lock);
 }
 
+static void pmu_init_previous_samples(struct drm_i915_private *i915)
+{
+       struct intel_engine_cs *engine;
+       enum intel_engine_id id;
+       unsigned int i;
+
+       for_each_engine(engine, i915, id) {
+               for (i = 0; i < ARRAY_SIZE(engine->pmu.sample); i++)
+                       engine->pmu.sample[i].prev = 0;
+       }
+
+       for (i = 0; i < ARRAY_SIZE(i915->pmu.sample); i++)
+               i915->pmu.sample[i].prev = i915->gt_pm.rps.idle_freq;
+}
+
 static void __i915_pmu_maybe_start_timer(struct drm_i915_private *i915)
 {
        if (!i915->pmu.timer_enabled && pmu_needs_timer(i915, true)) {
+               hrtimer_cancel(&i915->pmu.timer);
+               pmu_init_previous_samples(i915);
                i915->pmu.timer_enabled = true;
                hrtimer_start_range_ns(&i915->pmu.timer,
                                       ns_to_ktime(PERIOD), 0,
@@ -262,31 +279,13 @@ static void frequency_sample(struct drm_i915_private *dev_priv)
        }
 }
 
-static void pmu_init_previous_samples(struct drm_i915_private *i915)
-{
-       struct intel_engine_cs *engine;
-       enum intel_engine_id id;
-       unsigned int i;
-
-       for_each_engine(engine, i915, id) {
-               for (i = 0; i < ARRAY_SIZE(engine->pmu.sample); i++)
-                       engine->pmu.sample[i].prev = 0;
-       }
-
-       for (i = 0; i < ARRAY_SIZE(i915->pmu.sample); i++)
-               i915->pmu.sample[i].prev = i915->gt_pm.rps.idle_freq;
-}
-
 static enum hrtimer_restart i915_sample(struct hrtimer *hrtimer)
 {
        struct drm_i915_private *i915 =
                container_of(hrtimer, struct drm_i915_private, pmu.timer);
 
-       if (!READ_ONCE(i915->pmu.timer_enabled)) {
-               pmu_init_previous_samples(i915);
-
+       if (!READ_ONCE(i915->pmu.timer_enabled))
                return HRTIMER_NORESTART;
-       }
 
        engines_sample(i915);
        frequency_sample(i915);
@@ -847,8 +846,6 @@ void i915_pmu_register(struct drm_i915_private *i915)
        hrtimer_init(&i915->pmu.timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
        i915->pmu.timer.function = i915_sample;
 
-       pmu_init_previous_samples(i915);
-
        for_each_engine(engine, i915, id)
                INIT_DELAYED_WORK(&engine->pmu.disable_busy_stats,
                                  __disable_busy_stats);
---

But it is too evil to wait for the sampling timer in case of MMIO in the
irq disabled section. Even though it would happen only on disable-enable
transitions quicker than sampling period. It is not an use case, but still..

Or we could drop sample averaging.. ?

Regards,

Tvrtko