[PATCH 31/39] drm/i915/gt: Enable ring scheduling for gen5-7

Tue Feb 9 13:08:27 UTC 2021

Switch over from FIFO global submission to the priority-sorted
topographical scheduler. At the cost of more busy work on the CPU to
keep the GPU supplied with the next packet of requests, this allows us
to reorder requests around submission stalls and so allow low latency
under load while maintaining fairness between clients.

The downside is that we enable interrupts on all requests (unlike with
execlists where we have an interrupt for context switches). This means
that instead of receiving an interrupt for when we are waitng for
completion, we are processing them all the time, with noticeable
overhead of cpu time absorbed by the interrupt handler. The effect is
most pronounced on CPU-throughput limited renderers like uxa, where
performance can be degraded by 20% in the worst case. Nevertheless, this
is a pathological example of an obsolete userspace driver. (There are
also cases where uxa performs better by 20%, which is an interesting
quirk...) The glxgears-not-a-benchmark (cpu throughtput bound) is one
such example of a performance hit, only affecting uxa.

The expectation is that allowing request reordering will allow much
smoother UX that greatly compensates for reduced throughput under high
submission load (but low GPU load).

This also enables the timer based RPS for better powersaving, with the
exception of Valleyview whose PCU doesn't take kindly to our
interference.

References: 0f46832fab77 ("drm/i915: Mask USER interrupts on gen6 (until required)")
Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c | 2 +-
 drivers/gpu/drm/i915/gt/intel_engine_cs.c             | 5 ++++-
 drivers/gpu/drm/i915/gt/intel_gt_types.h              | 1 +
 drivers/gpu/drm/i915/gt/intel_rps.c                   | 6 ++----
 4 files changed, 8 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c
index df949320f2b5..1ecd362b131a 100644
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c
@@ -94,7 +94,7 @@ static int live_nop_switch(void *arg)
 			rq = i915_request_get(this);
 			i915_request_add(this);
 		}
-		if (i915_request_wait(rq, 0, HZ / 5) < 0) {
+		if (i915_request_wait(rq, 0, HZ) < 0) {
 			pr_err("Failed to populated %d contexts\n", nctx);
 			intel_gt_set_wedged(&i915->gt);
 			i915_request_put(rq);
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
index 7d34bf03670b..9c731338837d 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
@@ -882,8 +882,11 @@ int intel_engines_init(struct intel_gt *gt)
 	} else if (HAS_EXECLISTS(gt->i915)) {
 		gt->submission_method = INTEL_SUBMISSION_ELSP;
 		setup = intel_execlists_submission_setup;
-	} else {
+	} else if (INTEL_GEN(gt->i915) >= 5) {
 		gt->submission_method = INTEL_SUBMISSION_RING;
+		setup = intel_ring_scheduler_setup;
+	} else {
+		gt->submission_method = INTEL_SUBMISSION_LEGACY;
 		setup = intel_ring_submission_setup;
 	}
 
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_types.h b/drivers/gpu/drm/i915/gt/intel_gt_types.h
index 626af37c7790..125b40f62644 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_types.h
@@ -30,6 +30,7 @@ struct intel_engine_cs;
 struct intel_uncore;
 
 enum intel_submission_method {
+	INTEL_SUBMISSION_LEGACY,
 	INTEL_SUBMISSION_RING,
 	INTEL_SUBMISSION_ELSP,
 	INTEL_SUBMISSION_GUC,
diff --git a/drivers/gpu/drm/i915/gt/intel_rps.c b/drivers/gpu/drm/i915/gt/intel_rps.c
index 97cab1b99871..80044ece2b12 100644
--- a/drivers/gpu/drm/i915/gt/intel_rps.c
+++ b/drivers/gpu/drm/i915/gt/intel_rps.c
@@ -1081,9 +1081,7 @@ static bool gen6_rps_enable(struct intel_rps *rps)
 	intel_uncore_write_fw(uncore, GEN6_RP_DOWN_TIMEOUT, 50000);
 	intel_uncore_write_fw(uncore, GEN6_RP_IDLE_HYSTERSIS, 10);
 
-	rps->pm_events = (GEN6_PM_RP_UP_THRESHOLD |
-			  GEN6_PM_RP_DOWN_THRESHOLD |
-			  GEN6_PM_RP_DOWN_TIMEOUT);
+	rps->pm_events = GEN6_PM_RP_UP_THRESHOLD | GEN6_PM_RP_DOWN_THRESHOLD;
 
 	return rps_reset(rps);
 }
@@ -1391,7 +1389,7 @@ void intel_rps_enable(struct intel_rps *rps)
 	GEM_BUG_ON(rps->efficient_freq < rps->min_freq);
 	GEM_BUG_ON(rps->efficient_freq > rps->max_freq);
 
-	if (has_busy_stats(rps))
+	if (has_busy_stats(rps) && !IS_VALLEYVIEW(i915))
 		intel_rps_set_timer(rps);
 	else if (INTEL_GEN(i915) >= 6)
 		intel_rps_set_interrupts(rps);
-- 
2.20.1