[Intel-gfx] [PATCH] drm/i915/pmu: Deprecate I915_PMU_LAST and optimize state tracking

Tvrtko Ursulin tvrtko.ursulin at linux.intel.com
Fri Dec 4 10:57:52 UTC 2020


On 03/12/2020 22:33, Chris Wilson wrote:
> Quoting Tvrtko Ursulin (2020-11-26 16:47:03)
>> From: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
>>
>> Adding any kinds of "last" abi markers is usually a mistake which I
>> repeated when implementing the PMU because it felt convenient at the time.
>>
>> This patch marks I915_PMU_LAST as deprecated and stops the internal
>> implementation using it for sizing the event status bitmask and array.
>>
>> New way of sizing the fields is a bit less elegant, but it omits reserving
>> slots for tracking events we are not interested in, and as such saves some
>> runtime space. Adding sampling events is likely to be a special event and
>> the new plumbing needed will be easily detected in testing. Existing
>> asserts against the bitfield and array sizes are keeping the code safe.
>>
>> First event which gets the new treatment in this new scheme are the
>> interrupts - which neither needs any tracking in i915 pmu nor needs
>> waking up the GPU to read it.
>>
>> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
>> ---
>>   drivers/gpu/drm/i915/i915_pmu.c | 64 +++++++++++++++++++++++++++------
>>   drivers/gpu/drm/i915/i915_pmu.h | 35 ++++++++++++------
>>   include/uapi/drm/i915_drm.h     |  2 +-
>>   3 files changed, 78 insertions(+), 23 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c
>> index cd786ad12be7..cd564c709115 100644
>> --- a/drivers/gpu/drm/i915/i915_pmu.c
>> +++ b/drivers/gpu/drm/i915/i915_pmu.c
>> @@ -27,8 +27,6 @@
>>           BIT(I915_SAMPLE_WAIT) | \
>>           BIT(I915_SAMPLE_SEMA))
>>   
>> -#define ENGINE_SAMPLE_BITS (1 << I915_PMU_SAMPLE_BITS)
>> -
>>   static cpumask_t i915_pmu_cpumask;
>>   static unsigned int i915_pmu_target_cpu = -1;
>>   
>> @@ -57,12 +55,39 @@ static bool is_engine_config(u64 config)
>>          return config < __I915_PMU_OTHER(0);
>>   }
>>   
>> -static unsigned int config_enabled_bit(u64 config)
>> +static unsigned int is_tracked_config(const u64 config)
>>   {
>> -       if (is_engine_config(config))
>> +       unsigned int val;
>> +
>> +       switch (config) {
>> +       case I915_PMU_ACTUAL_FREQUENCY:
>> +               val =  __I915_PMU_ACTUAL_FREQUENCY_ENABLED;
>> +               break;
>> +       case I915_PMU_REQUESTED_FREQUENCY:
>> +               val = __I915_PMU_REQUESTED_FREQUENCY_ENABLED;
>> +               break;
>> +       case I915_PMU_RC6_RESIDENCY:
>> +               val = __I915_PMU_RC6_RESIDENCY_ENABLED;
>> +               break;
>> +       default:
>> +               return 0;
>> +       }
>> +
>> +       return val + 1;
>> +}
>> +
>> +static unsigned int config_enabled_bit(const u64 config)
>> +{
>> +       if (is_engine_config(config)) {
>>                  return engine_config_sample(config);
>> -       else
>> -               return ENGINE_SAMPLE_BITS + (config - __I915_PMU_OTHER(0));
>> +       } else {
>> +               unsigned int bit = is_tracked_config(config);
>> +
>> +               if (bit)
>> +                       return I915_ENGINE_SAMPLE_COUNT + bit - 1;
>> +               else
>> +                       return -1;
>> +       }
>>   }
>>   
>>   static u64 config_enabled_mask(u64 config)
>> @@ -80,10 +105,15 @@ static unsigned int event_enabled_bit(struct perf_event *event)
>>          return config_enabled_bit(event->attr.config);
>>   }
>>   
>> +static bool event_read_needs_wakeref(const struct perf_event *event)
>> +{
>> +       return event->attr.config == I915_PMU_RC6_RESIDENCY;
>> +}
>> +
>>   static bool pmu_needs_timer(struct i915_pmu *pmu, bool gpu_active)
>>   {
>>          struct drm_i915_private *i915 = container_of(pmu, typeof(*i915), pmu);
>> -       u64 enable;
>> +       u32 enable;
>>   
>>          /*
>>           * Only some counters need the sampling timer.
>> @@ -627,12 +657,19 @@ static void i915_pmu_enable(struct perf_event *event)
>>   {
>>          struct drm_i915_private *i915 =
>>                  container_of(event->pmu, typeof(*i915), pmu.base);
>> -       unsigned int bit = event_enabled_bit(event);
>> +       bool need_wakeref = event_read_needs_wakeref(event);
>>          struct i915_pmu *pmu = &i915->pmu;
>> -       intel_wakeref_t wakeref;
>> +       intel_wakeref_t wakeref = 0;
>>          unsigned long flags;
>> +       unsigned int bit;
>> +
>> +       if (need_wakeref)
>> +               wakeref = intel_runtime_pm_get(&i915->runtime_pm);
>> +
>> +       bit = event_enabled_bit(event);
>> +       if (bit == -1)
>> +               goto update;
>>   
>> -       wakeref = intel_runtime_pm_get(&i915->runtime_pm);
>>          spin_lock_irqsave(&pmu->lock, flags);
> 
> What are we taking a wakeref here for?
> 
> Looks just to be __get_rc6. Do we need to update the sample at all?

Yes, so __get_rc6 can record the start state. But if you have seen it 
called from irq context then obviously it is not safe.. Just no idea why 
we haven't hit it so far. Does perf_pmu need a more evil subtest?

But the part of needing wakeref for read makes no sense, 
i915_pmu_event_read itself is not taking it.. nor does event disable. 
Looks like I need to re-visit this whole topic to remind myself what 
gets called from irq context and what does not.

Regards,

Tvrtko


More information about the Intel-gfx mailing list