[PATCH 20/23] drm/i915: Prioritise non-busywait semaphore workloads

Chris Wilson chris at chris-wilson.co.uk
Sat Feb 2 22:44:47 UTC 2019


We don't want to busywait on the GPU if we have other work to do. If we
give non-busywaiting workloads higher (initial) priority than workloads
that require a busywait, we will prioritise work that is ready to run
immediately. We then also have to be careful that we don't give earlier
semaphores an accidental boost because later work doesn't wait on other
rings, hence we keep a history of semaphore usage of the dependency chain.

Testcase: igt/gem_exec_schedule/semaphore
Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_request.c   | 17 ++++++++++++++++-
 drivers/gpu/drm/i915/i915_scheduler.c | 10 +++++++++-
 drivers/gpu/drm/i915/i915_scheduler.h | 10 ++++++----
 drivers/gpu/drm/i915/intel_lrc.c      |  5 +++--
 4 files changed, 34 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index 2789530b00f8..c15d52b61baa 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -821,7 +821,7 @@ emit_semaphore_wait(struct i915_request *to,
 
 	intel_ring_advance(to, cs);
 
-	to->sched.semaphore = true;
+	to->sched.semaphore |= I915_SCHED_HAS_SEMAPHORE;
 	return 0;
 }
 
@@ -1092,6 +1092,21 @@ void i915_request_add(struct i915_request *request)
 	if (engine->schedule) {
 		struct i915_sched_attr attr = request->gem_context->sched;
 
+		/*
+		 * Boost actual workloads past semaphores!
+		 *
+		 * With semaphores we spin on one engine waiting for another,
+		 * simply to reduce the latency of starting our work when
+		 * the signaler completes. However, if there is any other
+		 * work that we could be doing on this engine instead, that
+		 * is better utilisation and will reduce the overall duration
+		 * of the current work. To avoid PI boosting a semaphore
+		 * far in the distance past over useful work, we keep a history
+		 * of any semaphore use along our dependency chain.
+		 */
+		if (!request->sched.semaphore)
+			attr.priority |= I915_PRIORITY_NOSEMAPHORE;
+
 		/*
 		 * Boost priorities to new clients (new request flows).
 		 *
diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c
index dee81d1e618b..99a698c28877 100644
--- a/drivers/gpu/drm/i915/i915_scheduler.c
+++ b/drivers/gpu/drm/i915/i915_scheduler.c
@@ -28,13 +28,18 @@ static inline bool node_signaled(const struct i915_sched_node *node)
 	return i915_request_completed(node_to_request(node));
 }
 
+static inline bool node_started(const struct i915_sched_node *node)
+{
+	return i915_request_started(node_to_request(node));
+}
+
 void i915_sched_node_init(struct i915_sched_node *node)
 {
 	INIT_LIST_HEAD(&node->signalers_list);
 	INIT_LIST_HEAD(&node->waiters_list);
 	INIT_LIST_HEAD(&node->link);
 	node->attr.priority = I915_PRIORITY_INVALID;
-	node->semaphore = false;
+	node->semaphore = 0;
 }
 
 static struct i915_dependency *
@@ -65,6 +70,9 @@ bool __i915_sched_node_add_dependency(struct i915_sched_node *node,
 		dep->signaler = signal;
 		dep->flags = flags;
 
+		if (signal->semaphore && !node_started(signal))
+			node->semaphore |= signal->semaphore << 1;
+
 		ret = true;
 	}
 
diff --git a/drivers/gpu/drm/i915/i915_scheduler.h b/drivers/gpu/drm/i915/i915_scheduler.h
index 3de5c5c57375..4f33ebee8d7a 100644
--- a/drivers/gpu/drm/i915/i915_scheduler.h
+++ b/drivers/gpu/drm/i915/i915_scheduler.h
@@ -24,14 +24,15 @@ enum {
 	I915_PRIORITY_INVALID = INT_MIN
 };
 
-#define I915_USER_PRIORITY_SHIFT 2
+#define I915_USER_PRIORITY_SHIFT 3
 #define I915_USER_PRIORITY(x) ((x) << I915_USER_PRIORITY_SHIFT)
 
 #define I915_PRIORITY_COUNT BIT(I915_USER_PRIORITY_SHIFT)
 #define I915_PRIORITY_MASK (I915_PRIORITY_COUNT - 1)
 
-#define I915_PRIORITY_WAIT	((u8)BIT(0))
-#define I915_PRIORITY_NEWCLIENT	((u8)BIT(1))
+#define I915_PRIORITY_WAIT		((u8)BIT(0))
+#define I915_PRIORITY_NEWCLIENT		((u8)BIT(1))
+#define I915_PRIORITY_NOSEMAPHORE	((u8)BIT(2))
 
 struct i915_sched_attr {
 	/**
@@ -72,7 +73,8 @@ struct i915_sched_node {
 	struct list_head waiters_list; /* those after us, they depend upon us */
 	struct list_head link;
 	struct i915_sched_attr attr;
-	bool semaphore;
+	unsigned long semaphore;
+#define I915_SCHED_HAS_SEMAPHORE	BIT(0)
 };
 
 struct i915_dependency {
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 12cf133d58de..c18c53b69241 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -164,7 +164,7 @@
 #define WA_TAIL_DWORDS 2
 #define WA_TAIL_BYTES (sizeof(u32) * WA_TAIL_DWORDS)
 
-#define ACTIVE_PRIORITY (I915_PRIORITY_NEWCLIENT)
+#define ACTIVE_PRIORITY (I915_PRIORITY_NEWCLIENT | I915_PRIORITY_NOSEMAPHORE)
 
 static int execlists_context_deferred_alloc(struct i915_gem_context *ctx,
 					    struct intel_engine_cs *engine,
@@ -615,7 +615,8 @@ static bool can_merge_rq(const struct i915_request *prev,
 	 * rules should mean that if this semaphore is preempted, its
 	 * dependency chain is preserved and suitably promoted via PI.
 	 */
-	if (prev->sched.semaphore && !i915_request_started(prev))
+	if (prev->sched.semaphore & I915_SCHED_HAS_SEMAPHORE &&
+	    !i915_request_started(prev))
 		return false;
 
 	if (!can_merge_ctx(prev->hw_context, next->hw_context))
-- 
2.20.1



More information about the Intel-gfx-trybot mailing list