[PATCH 11/11] drm/i915/gt: Try to smooth RPS spikes

Mon Apr 20 01:23:20 UTC 2020

By the time we respond to the RPS interrupt [inside a worker], the GPU
may be running a different workload. As we look to make the evalution
intervals shorter, these spikes are more likely to okay. Let's try to
smooth over the spikes in the workload by comparing the EI interrupt
[up/down events] with the most recently completed EI; if both say up,
then increase the clocks, if they disagree stay the same. In principle,
this means we now take 2 up EI to go increase into the next bin, and
similarly 2 down EI to decrease. However, if the worker runs fast enough,
the previous EI in the registers will be the same as triggered the
interrupt, so responsiveness remains unaffect. [Under the current scheme
where EI are on the order of 10ms, it is likely that this is true and we
compare the interrupt with the EI that caused it.]

As usual, Valleyview just likes to be different; and there since we are
manually evaluating the threshold, we cannot sample the previous EI
registers.

References: https://gitlab.freedesktop.org/drm/intel/-/issues/1698
Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
Cc: Mika Kuoppala <mika.kuoppala at linux.intel.com>
Cc: Andi Shyti <andi.shyti at intel.com>
---
 drivers/gpu/drm/i915/gt/intel_rps.c | 54 +++++++++++++++++++++++++----
 1 file changed, 47 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_rps.c b/drivers/gpu/drm/i915/gt/intel_rps.c
index 9ab90bf12ee6..34ba971d59e8 100644
--- a/drivers/gpu/drm/i915/gt/intel_rps.c
+++ b/drivers/gpu/drm/i915/gt/intel_rps.c
@@ -1461,6 +1461,11 @@ static void vlv_c0_read(struct intel_uncore *uncore, struct intel_rps_ei *ei)
 	ei->media_c0 = intel_uncore_read(uncore, VLV_MEDIA_C0_COUNT);
 }
 
+static bool vlv_manual_ei(u32 pm_iir)
+{
+	return pm_iir & GEN6_PM_RP_UP_EI_EXPIRED;
+}
+
 static u32 vlv_wa_c0_ei(struct intel_rps *rps, u32 pm_iir)
 {
 	struct intel_uncore *uncore = rps_to_uncore(rps);
@@ -1468,7 +1473,7 @@ static u32 vlv_wa_c0_ei(struct intel_rps *rps, u32 pm_iir)
 	struct intel_rps_ei now;
 	u32 events = 0;
 
-	if ((pm_iir & GEN6_PM_RP_UP_EI_EXPIRED) == 0)
+	if (!vlv_manual_ei(pm_iir))
 		return 0;
 
 	vlv_c0_read(uncore, &now);
@@ -1501,6 +1506,37 @@ static u32 vlv_wa_c0_ei(struct intel_rps *rps, u32 pm_iir)
 	return events;
 }
 
+static bool __confirm_ei(struct intel_rps *rps,
+			 i915_reg_t ei_sample,
+			 i915_reg_t ei_threshold)
+{
+	struct intel_uncore *uncore = rps_to_uncore(rps);
+	u32 threshold, sample;
+
+	sample = intel_uncore_read(uncore, ei_sample);
+	threshold = intel_uncore_read(uncore, ei_threshold);
+
+	sample &= GEN6_CURBSYTAVG_MASK;
+
+	return sample > threshold;
+}
+
+static bool confirm_up(struct intel_rps *rps, u32 pm_iir)
+{
+	if (vlv_manual_ei(pm_iir))
+		return true;
+
+	return __confirm_ei(rps, GEN6_RP_PREV_UP, GEN6_RP_UP_THRESHOLD);
+}
+
+static bool confirm_down(struct intel_rps *rps, u32 pm_iir)
+{
+	if (vlv_manual_ei(pm_iir))
+		return true;
+
+	return !__confirm_ei(rps, GEN6_RP_PREV_UP, GEN6_RP_UP_THRESHOLD);
+}
+
 static void rps_work(struct work_struct *work)
 {
 	struct intel_rps *rps = container_of(work, typeof(*rps), work);
@@ -1535,10 +1571,11 @@ static void rps_work(struct work_struct *work)
 		 pm_iir, yesno(client_boost),
 		 adj, new_freq, min, max);
 
-	if (client_boost && new_freq < rps->boost_freq) {
+	if (client_boost && new_freq <= rps->boost_freq) {
 		new_freq = rps->boost_freq;
 		adj = 0;
-	} else if (pm_iir & GEN6_PM_RP_UP_THRESHOLD) {
+	} else if (pm_iir & GEN6_PM_RP_UP_THRESHOLD &&
+		   confirm_up(rps, pm_iir)) {
 		if (adj > 0)
 			adj *= 2;
 		else /* CHV needs even encode values */
@@ -1548,13 +1585,15 @@ static void rps_work(struct work_struct *work)
 			adj = 0;
 	} else if (client_boost) {
 		adj = 0;
-	} else if (pm_iir & GEN6_PM_RP_DOWN_TIMEOUT) {
+	} else if (pm_iir & GEN6_PM_RP_DOWN_TIMEOUT &&
+		   confirm_down(rps, pm_iir)) {
 		if (rps->cur_freq > rps->efficient_freq)
 			new_freq = rps->efficient_freq;
 		else if (rps->cur_freq > rps->min_freq_softlimit)
 			new_freq = rps->min_freq_softlimit;
 		adj = 0;
-	} else if (pm_iir & GEN6_PM_RP_DOWN_THRESHOLD) {
+	} else if (pm_iir & GEN6_PM_RP_DOWN_THRESHOLD &&
+		   confirm_down(rps, pm_iir)) {
 		if (adj < 0)
 			adj *= 2;
 		else /* CHV needs even encode values */
@@ -1562,8 +1601,8 @@ static void rps_work(struct work_struct *work)
 
 		if (new_freq <= rps->min_freq_softlimit)
 			adj = 0;
-	} else { /* unknown event */
-		adj = 0;
+	} else { /* unknown event, or unwanted */
+		goto unlock;
 	}
 
 	/*
@@ -1579,6 +1618,7 @@ static void rps_work(struct work_struct *work)
 	}
 	rps->last_adj = adj;
 
+unlock:
 	mutex_unlock(&rps->lock);
 
 out:
-- 
2.20.1