[Intel-gfx] [PATCH 2/2] drm/i915/pmu: Make PMU sample array two-dimensional

Andrzej Hajda andrzej.hajda at intel.com
Wed May 24 09:14:59 UTC 2023


On 23.05.2023 17:19, Ashutosh Dixit wrote:
> No functional changes but we can remove some unsightly index computation
> and read/write functions if we convert the PMU sample array from a
> one-dimensional to a two-dimensional array.
> 
> Suggested-by: Tvrtko Ursulin <tvrtko.ursulin at linux.intel.com>
> Signed-off-by: Ashutosh Dixit <ashutosh.dixit at intel.com>

Reviewed-by: Andrzej Hajda <andrzej.hajda at intel.com>

Regards
Andrzej

> ---
>   drivers/gpu/drm/i915/i915_pmu.c | 60 ++++++++++-----------------------
>   drivers/gpu/drm/i915/i915_pmu.h |  2 +-
>   2 files changed, 19 insertions(+), 43 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c
> index b47d890d4ada1..137e0df9573ee 100644
> --- a/drivers/gpu/drm/i915/i915_pmu.c
> +++ b/drivers/gpu/drm/i915/i915_pmu.c
> @@ -195,33 +195,6 @@ static inline s64 ktime_since_raw(const ktime_t kt)
>   	return ktime_to_ns(ktime_sub(ktime_get_raw(), kt));
>   }
>   
> -static unsigned int
> -__sample_idx(struct i915_pmu *pmu, unsigned int gt_id, int sample)
> -{
> -	unsigned int idx = gt_id * __I915_NUM_PMU_SAMPLERS + sample;
> -
> -	GEM_BUG_ON(idx >= ARRAY_SIZE(pmu->sample));
> -
> -	return idx;
> -}
> -
> -static u64 read_sample(struct i915_pmu *pmu, unsigned int gt_id, int sample)
> -{
> -	return pmu->sample[__sample_idx(pmu, gt_id, sample)].cur;
> -}
> -
> -static void
> -store_sample(struct i915_pmu *pmu, unsigned int gt_id, int sample, u64 val)
> -{
> -	pmu->sample[__sample_idx(pmu, gt_id, sample)].cur = val;
> -}
> -
> -static void
> -add_sample_mult(struct i915_pmu *pmu, unsigned int gt_id, int sample, u32 val, u32 mul)
> -{
> -	pmu->sample[__sample_idx(pmu, gt_id, sample)].cur += mul_u32_u32(val, mul);
> -}
> -
>   static u64 get_rc6(struct intel_gt *gt)
>   {
>   	struct drm_i915_private *i915 = gt->i915;
> @@ -240,7 +213,7 @@ static u64 get_rc6(struct intel_gt *gt)
>   	spin_lock_irqsave(&pmu->lock, flags);
>   
>   	if (awake) {
> -		store_sample(pmu, gt_id, __I915_SAMPLE_RC6, val);
> +		pmu->sample[gt_id][__I915_SAMPLE_RC6].cur = val;
>   	} else {
>   		/*
>   		 * We think we are runtime suspended.
> @@ -250,13 +223,13 @@ static u64 get_rc6(struct intel_gt *gt)
>   		 * counter value.
>   		 */
>   		val = ktime_since_raw(pmu->sleep_last[gt_id]);
> -		val += read_sample(pmu, gt_id, __I915_SAMPLE_RC6);
> +		val += pmu->sample[gt_id][__I915_SAMPLE_RC6].cur;
>   	}
>   
> -	if (val < read_sample(pmu, gt_id, __I915_SAMPLE_RC6_LAST_REPORTED))
> -		val = read_sample(pmu, gt_id, __I915_SAMPLE_RC6_LAST_REPORTED);
> +	if (val < pmu->sample[gt_id][__I915_SAMPLE_RC6_LAST_REPORTED].cur)
> +		val = pmu->sample[gt_id][__I915_SAMPLE_RC6_LAST_REPORTED].cur;
>   	else
> -		store_sample(pmu, gt_id, __I915_SAMPLE_RC6_LAST_REPORTED, val);
> +		pmu->sample[gt_id][__I915_SAMPLE_RC6_LAST_REPORTED].cur = val;
>   
>   	spin_unlock_irqrestore(&pmu->lock, flags);
>   
> @@ -275,9 +248,8 @@ static void init_rc6(struct i915_pmu *pmu)
>   		with_intel_runtime_pm(gt->uncore->rpm, wakeref) {
>   			u64 val = __get_rc6(gt);
>   
> -			store_sample(pmu, i, __I915_SAMPLE_RC6, val);
> -			store_sample(pmu, i, __I915_SAMPLE_RC6_LAST_REPORTED,
> -				     val);
> +			pmu->sample[i][__I915_SAMPLE_RC6].cur = val;
> +			pmu->sample[i][__I915_SAMPLE_RC6_LAST_REPORTED].cur = val;
>   			pmu->sleep_last[i] = ktime_get_raw();
>   		}
>   	}
> @@ -287,7 +259,7 @@ static void park_rc6(struct intel_gt *gt)
>   {
>   	struct i915_pmu *pmu = &gt->i915->pmu;
>   
> -	store_sample(pmu, gt->info.id, __I915_SAMPLE_RC6, __get_rc6(gt));
> +	pmu->sample[gt->info.id][__I915_SAMPLE_RC6].cur = __get_rc6(gt);
>   	pmu->sleep_last[gt->info.id] = ktime_get_raw();
>   }
>   
> @@ -428,6 +400,12 @@ engines_sample(struct intel_gt *gt, unsigned int period_ns)
>   	}
>   }
>   
> +static void
> +add_sample_mult(struct i915_pmu_sample *sample, u32 val, u32 mul)
> +{
> +	sample->cur += mul_u32_u32(val, mul);
> +}
> +
>   static bool
>   frequency_sampling_enabled(struct i915_pmu *pmu, unsigned int gt)
>   {
> @@ -467,12 +445,12 @@ frequency_sample(struct intel_gt *gt, unsigned int period_ns)
>   		if (!val)
>   			val = intel_gpu_freq(rps, rps->cur_freq);
>   
> -		add_sample_mult(pmu, gt_id, __I915_SAMPLE_FREQ_ACT,
> +		add_sample_mult(&pmu->sample[gt_id][__I915_SAMPLE_FREQ_ACT],
>   				val, period_ns / 1000);
>   	}
>   
>   	if (pmu->enable & config_mask(__I915_PMU_REQUESTED_FREQUENCY(gt_id))) {
> -		add_sample_mult(pmu, gt_id, __I915_SAMPLE_FREQ_REQ,
> +		add_sample_mult(&pmu->sample[gt_id][__I915_SAMPLE_FREQ_REQ],
>   				intel_rps_get_requested_frequency(rps),
>   				period_ns / 1000);
>   	}
> @@ -673,14 +651,12 @@ static u64 __i915_pmu_event_read(struct perf_event *event)
>   		switch (config) {
>   		case I915_PMU_ACTUAL_FREQUENCY:
>   			val =
> -			   div_u64(read_sample(pmu, gt_id,
> -					       __I915_SAMPLE_FREQ_ACT),
> +			   div_u64(pmu->sample[gt_id][__I915_SAMPLE_FREQ_ACT].cur,
>   				   USEC_PER_SEC /* to MHz */);
>   			break;
>   		case I915_PMU_REQUESTED_FREQUENCY:
>   			val =
> -			   div_u64(read_sample(pmu, gt_id,
> -					       __I915_SAMPLE_FREQ_REQ),
> +			   div_u64(pmu->sample[gt_id][__I915_SAMPLE_FREQ_REQ].cur,
>   				   USEC_PER_SEC /* to MHz */);
>   			break;
>   		case I915_PMU_INTERRUPTS:
> diff --git a/drivers/gpu/drm/i915/i915_pmu.h b/drivers/gpu/drm/i915/i915_pmu.h
> index 33d80fbaab8bc..d20592e7db999 100644
> --- a/drivers/gpu/drm/i915/i915_pmu.h
> +++ b/drivers/gpu/drm/i915/i915_pmu.h
> @@ -127,7 +127,7 @@ struct i915_pmu {
>   	 * Only global counters are held here, while the per-engine ones are in
>   	 * struct intel_engine_cs.
>   	 */
> -	struct i915_pmu_sample sample[I915_PMU_MAX_GTS * __I915_NUM_PMU_SAMPLERS];
> +	struct i915_pmu_sample sample[I915_PMU_MAX_GTS][__I915_NUM_PMU_SAMPLERS];
>   	/**
>   	 * @sleep_last: Last time GT parked for RC6 estimation.
>   	 */



More information about the dri-devel mailing list