[PATCH 6/7] drm/i915/execlists: Force single submission for sentinels

Chris Wilson chris at chris-wilson.co.uk
Thu Mar 19 17:30:58 UTC 2020


Currently, we only combine a sentinel request with a max-priority
barrier such that a sentinel request is always in ELSP[0] with nothing
following it. However, we will want to create similar ELSP[] submissions
providing a full-barrier in the submission queue, but without forcing
maximum priority. As such I915_FENCE_FLAG_SENTINEL takes on the
single-submission property and so we can remove the gvt special casing.

v2: Do not coalesce sentinels along the same context (this preserves
GVT's requirement).

Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/gt/intel_context.h       | 24 ++++----
 drivers/gpu/drm/i915/gt/intel_context_types.h |  4 +-
 drivers/gpu/drm/i915/gt/intel_lrc.c           | 60 +++++++++----------
 drivers/gpu/drm/i915/gvt/scheduler.c          |  7 ++-
 4 files changed, 48 insertions(+), 47 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_context.h b/drivers/gpu/drm/i915/gt/intel_context.h
index 6f4e6c75fbd5..d75716839817 100644
--- a/drivers/gpu/drm/i915/gt/intel_context.h
+++ b/drivers/gpu/drm/i915/gt/intel_context.h
@@ -216,18 +216,6 @@ static inline bool intel_context_set_banned(struct intel_context *ce)
 	return test_and_set_bit(CONTEXT_BANNED, &ce->flags);
 }
 
-static inline bool
-intel_context_force_single_submission(const struct intel_context *ce)
-{
-	return test_bit(CONTEXT_FORCE_SINGLE_SUBMISSION, &ce->flags);
-}
-
-static inline void
-intel_context_set_single_submission(struct intel_context *ce)
-{
-	__set_bit(CONTEXT_FORCE_SINGLE_SUBMISSION, &ce->flags);
-}
-
 static inline bool
 intel_context_nopreempt(const struct intel_context *ce)
 {
@@ -246,6 +234,18 @@ intel_context_clear_nopreempt(struct intel_context *ce)
 	clear_bit(CONTEXT_NOPREEMPT, &ce->flags);
 }
 
+static inline bool
+intel_context_is_gvt(const struct intel_context *ce)
+{
+	return test_bit(CONTEXT_GVT, &ce->flags);
+}
+
+static inline void
+intel_context_set_gvt(struct intel_context *ce)
+{
+	set_bit(CONTEXT_GVT, &ce->flags);
+}
+
 static inline u64 intel_context_get_total_runtime_ns(struct intel_context *ce)
 {
 	const u32 period =
diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h
index 07cb83a0d017..418516fd9b9e 100644
--- a/drivers/gpu/drm/i915/gt/intel_context_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_context_types.h
@@ -65,8 +65,8 @@ struct intel_context {
 #define CONTEXT_CLOSED_BIT		3
 #define CONTEXT_USE_SEMAPHORES		4
 #define CONTEXT_BANNED			5
-#define CONTEXT_FORCE_SINGLE_SUBMISSION	6
-#define CONTEXT_NOPREEMPT		7
+#define CONTEXT_NOPREEMPT		6
+#define CONTEXT_GVT			7
 
 	u32 *lrc_reg_state;
 	u64 lrc_desc;
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index ac7ad5f7bceb..5cf36813d353 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -1579,27 +1579,42 @@ static void execlists_submit_ports(struct intel_engine_cs *engine)
 		writel(EL_CTRL_LOAD, execlists->ctrl_reg);
 }
 
-static bool ctx_single_port_submission(const struct intel_context *ce)
+static bool can_merge_ctx(const struct intel_context *prev,
+			  const struct intel_context *next)
 {
-	return (IS_ENABLED(CONFIG_DRM_I915_GVT) &&
-		intel_context_force_single_submission(ce));
+	return prev == next;
 }
 
-static bool can_merge_ctx(const struct intel_context *prev,
-			  const struct intel_context *next)
+static unsigned long i915_request_flags(const struct i915_request *rq)
 {
-	if (prev != next)
-		return false;
+	return READ_ONCE(rq->fence.flags);
+}
 
-	if (ctx_single_port_submission(prev))
-		return false;
+static bool has_sentinel(const struct i915_request *prev,
+			 const struct i915_request *next)
+{
+	unsigned int p_flags = i915_request_flags(prev);
+	unsigned int n_flags = i915_request_flags(next);
 
-	return true;
+	return (p_flags | n_flags) & BIT(I915_FENCE_FLAG_SENTINEL);
 }
 
-static unsigned long i915_request_flags(const struct i915_request *rq)
+static bool
+can_merge_flags(const struct i915_request *prev,
+		const struct i915_request *next)
 {
-	return READ_ONCE(rq->fence.flags);
+	unsigned int p_flags = i915_request_flags(prev);
+	unsigned int n_flags = i915_request_flags(next);
+
+	/* If either request is a sentinel, we do not allow ctx coalescing */
+	if ((p_flags | n_flags) & BIT(I915_FENCE_FLAG_SENTINEL))
+		return false;
+
+	/* Do not hide changes of preempt status as we only check the last rq */
+	if ((p_flags ^ n_flags) & BIT(I915_FENCE_FLAG_NOPREEMPT))
+		return false;
+
+	return true;
 }
 
 static bool can_merge_rq(const struct i915_request *prev,
@@ -1619,16 +1634,11 @@ static bool can_merge_rq(const struct i915_request *prev,
 	if (i915_request_completed(next))
 		return true;
 
-	if (unlikely((i915_request_flags(prev) ^ i915_request_flags(next)) &
-		     (BIT(I915_FENCE_FLAG_NOPREEMPT) |
-		      BIT(I915_FENCE_FLAG_SENTINEL))))
-		return false;
-
 	if (!can_merge_ctx(prev->context, next->context))
 		return false;
 
 	GEM_BUG_ON(i915_seqno_passed(prev->fence.seqno, next->fence.seqno));
-	return true;
+	return likely(can_merge_flags(prev, next));
 }
 
 static void virtual_update_register_offsets(u32 *regs,
@@ -2125,18 +2135,8 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 				if (last->context == rq->context)
 					goto done;
 
-				if (i915_request_has_sentinel(last))
-					goto done;
-
-				/*
-				 * If GVT overrides us we only ever submit
-				 * port[0], leaving port[1] empty. Note that we
-				 * also have to be careful that we don't queue
-				 * the same context (even though a different
-				 * request) to the second port.
-				 */
-				if (ctx_single_port_submission(last->context) ||
-				    ctx_single_port_submission(rq->context))
+				/* Sentinels are solitary in ELSP[0]. */
+				if (has_sentinel(last, rq))
 					goto done;
 
 				merge = false;
diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c
index 1c95bf8cbed0..4fccf4b194b0 100644
--- a/drivers/gpu/drm/i915/gvt/scheduler.c
+++ b/drivers/gpu/drm/i915/gvt/scheduler.c
@@ -204,9 +204,9 @@ static int populate_shadow_context(struct intel_vgpu_workload *workload)
 	return 0;
 }
 
-static inline bool is_gvt_request(struct i915_request *rq)
+static inline bool is_gvt_request(const struct i915_request *rq)
 {
-	return intel_context_force_single_submission(rq->context);
+	return intel_context_is_gvt(rq->context);
 }
 
 static void save_ring_hw_state(struct intel_vgpu *vgpu,
@@ -401,6 +401,7 @@ intel_gvt_workload_req_alloc(struct intel_vgpu_workload *workload)
 		return PTR_ERR(rq);
 	}
 
+	__set_bit(I915_FENCE_FLAG_SENTINEL, &rq->fence.flags);
 	workload->req = i915_request_get(rq);
 	return 0;
 }
@@ -1226,7 +1227,7 @@ int intel_vgpu_setup_submission(struct intel_vgpu *vgpu)
 
 		i915_vm_put(ce->vm);
 		ce->vm = i915_vm_get(&ppgtt->vm);
-		intel_context_set_single_submission(ce);
+		intel_context_set_gvt(ce);
 
 		/* Max ring buffer size */
 		if (!intel_uc_wants_guc_submission(&engine->gt->uc)) {
-- 
2.20.1



More information about the Intel-gfx-trybot mailing list