[Intel-gfx] [PATCH 2/3] drm/i915/gt: Compare average group occupancy for RPS evaluation

Belgaumkar, Vinay vinay.belgaumkar at intel.com
Tue Nov 23 17:35:26 UTC 2021



On 11/17/2021 2:49 PM, Vinay Belgaumkar wrote:
> From: Chris Wilson <chris.p.wilson at intel.com>
> 
> Currently, we inspect each engine individually and measure the occupancy
> of that engine over the last evaluation interval. If that exceeds our
> busyness thresholds, we decide to increase the GPU frequency. However,
> under a load balancer, we should consider the occupancy of entire engine
> groups, as work may be spread out across the group. In doing so, we
> prefer wide over fast, power consumption is approximately proportional to
> the square of the frequency. However, since the load balancer is greedy,
> the first idle engine gets all the work, and preferrentially reuses the
> last active engine, under light loads all work is assigned to one
> engine, and so that engine appears very busy. But if the work happened
> to overlap slightly, the workload would spread across multiple engines,
> reducing each individual engine's runtime, and so reducing the rps
> contribution, keeping the frequency low. Instead, when considering the
> contribution, consider the contribution over the entire engine group
> (capacity).
> 
> Signed-off-by: Chris Wilson <chris.p.wilson at intel.com>
> Cc: Vinay Belgaumkar <vinay.belgaumkar at intel.com>
> Cc: Tvrtko Ursulin <tvrtko.ursulin at linux.intel.com>

Reviewed-by: Vinay Belgaumkar <vinay.belgaumkar at intel.com>

> ---
>   drivers/gpu/drm/i915/gt/intel_rps.c | 48 ++++++++++++++++++++---------
>   1 file changed, 34 insertions(+), 14 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gt/intel_rps.c b/drivers/gpu/drm/i915/gt/intel_rps.c
> index 07ff7ba7b2b7..3675ac93ded0 100644
> --- a/drivers/gpu/drm/i915/gt/intel_rps.c
> +++ b/drivers/gpu/drm/i915/gt/intel_rps.c
> @@ -7,6 +7,7 @@
>   
>   #include "i915_drv.h"
>   #include "intel_breadcrumbs.h"
> +#include "intel_engine_pm.h"
>   #include "intel_gt.h"
>   #include "intel_gt_clock_utils.h"
>   #include "intel_gt_irq.h"
> @@ -65,26 +66,45 @@ static void set(struct intel_uncore *uncore, i915_reg_t reg, u32 val)
>   static void rps_timer(struct timer_list *t)
>   {
>   	struct intel_rps *rps = from_timer(rps, t, timer);
> -	struct intel_engine_cs *engine;
> -	ktime_t dt, last, timestamp;
> -	enum intel_engine_id id;
> +	struct intel_gt *gt = rps_to_gt(rps);
> +	ktime_t dt, last, timestamp = 0;
>   	s64 max_busy[3] = {};
> +	int i, j;
>   
> -	timestamp = 0;
> -	for_each_engine(engine, rps_to_gt(rps), id) {
> -		s64 busy;
> -		int i;
> +	/* Compare average occupancy over each engine group */
> +	for (i = 0; i < ARRAY_SIZE(gt->engine_class); i++) {
> +		s64 busy = 0;
> +		int count = 0;
> +
> +		for (j = 0; j < ARRAY_SIZE(gt->engine_class[i]); j++) {
> +			struct intel_engine_cs *engine;
>   
> -		dt = intel_engine_get_busy_time(engine, &timestamp);
> -		last = engine->stats.rps;
> -		engine->stats.rps = dt;
> +			engine = gt->engine_class[i][j];
> +			if (!engine)
> +				continue;
>   
> -		busy = ktime_to_ns(ktime_sub(dt, last));
> -		for (i = 0; i < ARRAY_SIZE(max_busy); i++) {
> -			if (busy > max_busy[i])
> -				swap(busy, max_busy[i]);
> +			dt = intel_engine_get_busy_time(engine, &timestamp);
> +			last = engine->stats.rps;
> +			engine->stats.rps = dt;
> +
> +			if (!intel_engine_pm_is_awake(engine))
> +				continue;
> +
> +			busy += ktime_to_ns(ktime_sub(dt, last));
> +			count++;
> +		}
> +
> +		if (count > 1)
> +			busy = div_u64(busy, count);
> +		if (busy <= max_busy[ARRAY_SIZE(max_busy) - 1])
> +			continue;
> +
> +		for (j = 0; j < ARRAY_SIZE(max_busy); j++) {
> +			if (busy > max_busy[j])
> +				swap(busy, max_busy[j]);
>   		}
>   	}
> +
>   	last = rps->pm_timestamp;
>   	rps->pm_timestamp = timestamp;
>   
> 


More information about the Intel-gfx mailing list