[PATCH 15/45] drm/i915: Keep a global seqno per-engine

Chris Wilson chris at chris-wilson.co.uk
Tue Dec 6 00:12:29 UTC 2016


Replace the global device seqno with one for each engine, and account
for in-flight seqno on each separately. This is consistent with
dma-fence as each timeline has separate fence-contexts for each engine
and a seqno is only ordered within a fence-context (i.e.  seqno do not
need to be ordered wrt to other engines, just ordered within a single
engine). This is required to enable request rewinding for preemption on
individual engines (we have to rewind the global seqno to avoid
overflow, and we do not have to rewind all engines just to preempt one.)

Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_debugfs.c      |  5 +--
 drivers/gpu/drm/i915/i915_gem_request.c  | 62 +++++++++++++++-----------------
 drivers/gpu/drm/i915/i915_gem_request.h  |  8 +----
 drivers/gpu/drm/i915/i915_gem_timeline.h |  4 +--
 drivers/gpu/drm/i915/intel_breadcrumbs.c | 33 ++++++++---------
 drivers/gpu/drm/i915/intel_engine_cs.c   |  2 --
 drivers/gpu/drm/i915/intel_ringbuffer.h  |  4 +--
 7 files changed, 48 insertions(+), 70 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 1b59d1251da8..a3582eee235a 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -1024,10 +1024,7 @@ static const struct file_operations i915_error_state_fops = {
 static int
 i915_next_seqno_get(void *data, u64 *val)
 {
-	struct drm_i915_private *dev_priv = data;
-
-	*val = 1 + atomic_read(&dev_priv->gt.global_timeline.seqno);
-	return 0;
+	return -ENODEV;
 }
 
 static int
diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
index 59c2a1f276ba..eb64738bf3f9 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/i915_gem_request.c
@@ -213,7 +213,9 @@ static void i915_gem_request_retire(struct drm_i915_gem_request *request)
 	GEM_BUG_ON(!i915_sw_fence_signaled(&request->submit));
 	GEM_BUG_ON(!i915_sw_fence_signaled(&request->execute));
 	GEM_BUG_ON(!i915_gem_request_completed(request));
+
 	GEM_BUG_ON(!request->i915->gt.active_requests);
+	GEM_BUG_ON(!request->engine->timeline->active_seqno);
 
 	trace_i915_gem_request_retire(request);
 
@@ -237,6 +239,7 @@ static void i915_gem_request_retire(struct drm_i915_gem_request *request)
 				 &request->i915->gt.idle_work,
 				 msecs_to_jiffies(100));
 	}
+	request->engine->timeline->active_seqno--;
 
 	/* Walk through the active list, calling retire on each. This allows
 	 * objects to track their GPU activity and mark themselves as idle
@@ -345,15 +348,19 @@ static int i915_gem_init_global_seqno(struct drm_i915_private *i915, u32 seqno)
 	GEM_BUG_ON(i915->gt.active_requests > 1);
 
 	/* If the seqno wraps around, we need to clear the breadcrumb rbtree */
-	if (!i915_seqno_passed(seqno, atomic_read(&timeline->seqno))) {
-		while (intel_breadcrumbs_busy(i915))
-			cond_resched(); /* spin until threads are complete */
-	}
-	atomic_set(&timeline->seqno, seqno);
+	for_each_engine(engine, i915, id) {
+		struct intel_timeline *tl = &timeline->engine[id];
 
-	/* Finally reset hw state */
-	for_each_engine(engine, i915, id)
+		if (!i915_seqno_passed(seqno, tl->seqno)) {
+			/* spin until threads are complete */
+			while (intel_breadcrumbs_busy(engine))
+				cond_resched();
+		}
+
+		/* Finally reset hw state */
+		tl->seqno = seqno;
 		intel_engine_init_global_seqno(engine, seqno);
+	}
 
 	list_for_each_entry(timeline, &i915->gt.timelines, link) {
 		for_each_engine(engine, i915, id) {
@@ -381,34 +388,28 @@ int i915_gem_set_global_seqno(struct drm_device *dev, u32 seqno)
 	return i915_gem_init_global_seqno(dev_priv, seqno - 1);
 }
 
-static int reserve_global_seqno(struct drm_i915_private *i915)
+static int reserve_global_seqno(struct intel_engine_cs *engine)
 {
-	u32 active_requests = ++i915->gt.active_requests;
-	u32 seqno = atomic_read(&i915->gt.global_timeline.seqno);
+	u32 active = ++engine->timeline->active_seqno;
+	u32 seqno = engine->timeline->seqno;
 	int ret;
 
 	/* Reservation is fine until we need to wrap around */
-	if (likely(seqno + active_requests > seqno))
+	if (likely(seqno + active > seqno))
 		return 0;
 
-	ret = i915_gem_init_global_seqno(i915, 0);
+	ret = i915_gem_init_global_seqno(engine->i915, 0);
 	if (ret) {
-		i915->gt.active_requests--;
+		engine->timeline->active_seqno--;
 		return ret;
 	}
 
 	return 0;
 }
 
-static u32 __timeline_get_seqno(struct i915_gem_timeline *tl)
+static u32 timeline_get_seqno(struct intel_timeline *tl)
 {
-	/* seqno only incremented under a mutex */
-	return ++tl->seqno.counter;
-}
-
-static u32 timeline_get_seqno(struct i915_gem_timeline *tl)
-{
-	return atomic_inc_return(&tl->seqno);
+	return ++tl->seqno;
 }
 
 void __i915_gem_request_submit(struct drm_i915_gem_request *request)
@@ -422,14 +423,10 @@ void __i915_gem_request_submit(struct drm_i915_gem_request *request)
 	GEM_BUG_ON(timeline == request->timeline);
 	assert_spin_locked(&timeline->lock);
 
-	seqno = timeline_get_seqno(timeline->common);
+	seqno = timeline_get_seqno(timeline);
 	GEM_BUG_ON(!seqno);
 	GEM_BUG_ON(i915_seqno_passed(intel_engine_get_seqno(engine), seqno));
 
-	GEM_BUG_ON(i915_seqno_passed(timeline->last_submitted_seqno, seqno));
-	request->previous_seqno = timeline->last_submitted_seqno;
-	timeline->last_submitted_seqno = seqno;
-
 	/* We may be recursing from the signal callback of another i915 fence */
 	spin_lock_nested(&request->lock, SINGLE_DEPTH_NESTING);
 	request->global_seqno = seqno;
@@ -536,7 +533,7 @@ i915_gem_request_alloc(struct intel_engine_cs *engine,
 	if (ret)
 		return ERR_PTR(ret);
 
-	ret = reserve_global_seqno(dev_priv);
+	ret = reserve_global_seqno(engine);
 	if (ret)
 		goto err_unpin;
 
@@ -588,7 +585,7 @@ i915_gem_request_alloc(struct intel_engine_cs *engine,
 		       &i915_fence_ops,
 		       &req->lock,
 		       req->timeline->fence_context,
-		       __timeline_get_seqno(req->timeline->common));
+		       timeline_get_seqno(req->timeline));
 
 	/* We bump the ref for the fence chain */
 	i915_sw_fence_init(&i915_gem_request_get(req)->submit, submit_notify);
@@ -643,7 +640,7 @@ i915_gem_request_alloc(struct intel_engine_cs *engine,
 
 	kmem_cache_free(dev_priv->requests, req);
 err_unreserve:
-	dev_priv->gt.active_requests--;
+	engine->timeline->active_seqno--;
 err_unpin:
 	engine->context_unpin(engine, ctx);
 	return ERR_PTR(ret);
@@ -904,16 +901,13 @@ void __i915_add_request(struct drm_i915_gem_request *request, bool flush_caches)
 	list_add_tail(&request->link, &timeline->requests);
 	spin_unlock_irq(&timeline->lock);
 
-	GEM_BUG_ON(i915_seqno_passed(timeline->last_submitted_seqno,
-				     request->fence.seqno));
-
-	timeline->last_submitted_seqno = request->fence.seqno;
 	i915_gem_active_set(&timeline->last_request, request);
 
 	list_add_tail(&request->ring_link, &ring->request_list);
 	request->emitted_jiffies = jiffies;
 
-	i915_gem_mark_busy(engine);
+	if (!request->i915->gt.active_requests++)
+		i915_gem_mark_busy(engine);
 
 	/* Let the backend know a new request has arrived that may need
 	 * to adjust the existing execution schedule due to a high priority
diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_gem_request.h
index 8569b35a332a..e74513c27dc1 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.h
+++ b/drivers/gpu/drm/i915/i915_gem_request.h
@@ -145,12 +145,6 @@ struct drm_i915_gem_request {
 
 	u32 global_seqno;
 
-	/** GEM sequence number associated with the previous request,
-	 * when the HWS breadcrumb is equal to this the GPU is processing
-	 * this request.
-	 */
-	u32 previous_seqno;
-
 	/** Position in the ring of the start of the request */
 	u32 head;
 
@@ -287,7 +281,7 @@ __i915_gem_request_started(const struct drm_i915_gem_request *req)
 {
 	GEM_BUG_ON(!req->global_seqno);
 	return i915_seqno_passed(intel_engine_get_seqno(req->engine),
-				 req->previous_seqno);
+				 req->global_seqno - 1);
 }
 
 static inline bool
diff --git a/drivers/gpu/drm/i915/i915_gem_timeline.h b/drivers/gpu/drm/i915/i915_gem_timeline.h
index f2e51f42cc2f..bc101e644143 100644
--- a/drivers/gpu/drm/i915/i915_gem_timeline.h
+++ b/drivers/gpu/drm/i915/i915_gem_timeline.h
@@ -33,7 +33,8 @@ struct i915_gem_timeline;
 
 struct intel_timeline {
 	u64 fence_context;
-	u32 last_submitted_seqno;
+	u32 seqno;
+	u32 active_seqno;
 
 	spinlock_t lock;
 
@@ -56,7 +57,6 @@ struct intel_timeline {
 
 struct i915_gem_timeline {
 	struct list_head link;
-	atomic_t seqno;
 
 	struct drm_i915_private *i915;
 	const char *name;
diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c
index eef32fa733b6..0e8bb8cf0905 100644
--- a/drivers/gpu/drm/i915/intel_breadcrumbs.c
+++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c
@@ -645,31 +645,26 @@ void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine)
 	cancel_fake_irq(engine);
 }
 
-unsigned int intel_breadcrumbs_busy(struct drm_i915_private *i915)
+bool intel_breadcrumbs_busy(struct intel_engine_cs *engine)
 {
-	struct intel_engine_cs *engine;
-	enum intel_engine_id id;
-	unsigned int mask = 0;
-
-	for_each_engine(engine, i915, id) {
-		struct intel_breadcrumbs *b = &engine->breadcrumbs;
-
-		spin_lock_irq(&b->lock);
+	struct intel_breadcrumbs *b = &engine->breadcrumbs;
+	bool busy = false;
 
-		if (b->first_wait) {
-			wake_up_process(b->first_wait->tsk);
-			mask |= intel_engine_flag(engine);
-		}
+	spin_lock_irq(&b->lock);
 
-		if (b->first_signal) {
-			wake_up_process(b->signaler);
-			mask |= intel_engine_flag(engine);
-		}
+	if (b->first_wait) {
+		wake_up_process(b->first_wait->tsk);
+		busy |= intel_engine_flag(engine);
+	}
 
-		spin_unlock_irq(&b->lock);
+	if (b->first_signal) {
+		wake_up_process(b->signaler);
+		busy |= intel_engine_flag(engine);
 	}
 
-	return mask;
+	spin_unlock_irq(&b->lock);
+
+	return busy;
 }
 
 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
index 97bbbc3d6aa8..19f63c1b7a55 100644
--- a/drivers/gpu/drm/i915/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/intel_engine_cs.c
@@ -212,8 +212,6 @@ void intel_engine_init_global_seqno(struct intel_engine_cs *engine, u32 seqno)
 		engine->irq_seqno_barrier(engine);
 
 	GEM_BUG_ON(i915_gem_active_isset(&engine->timeline->last_request));
-	engine->timeline->last_submitted_seqno = seqno;
-
 	engine->hangcheck.seqno = seqno;
 
 	/* After manually advancing the seqno, fake the interrupt in case
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 4208715425eb..58a0c4ed1a22 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -542,7 +542,7 @@ static inline u32 intel_engine_last_submit(struct intel_engine_cs *engine)
 	 * wtih serialising this hint with anything, so document it as
 	 * a hint and nothing more.
 	 */
-	return READ_ONCE(engine->timeline->last_submitted_seqno);
+	return READ_ONCE(engine->timeline->seqno);
 }
 
 int init_workarounds_ring(struct intel_engine_cs *engine);
@@ -615,6 +615,6 @@ static inline bool intel_engine_wakeup(const struct intel_engine_cs *engine)
 
 void intel_engine_reset_breadcrumbs(struct intel_engine_cs *engine);
 void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine);
-unsigned int intel_breadcrumbs_busy(struct drm_i915_private *i915);
+bool intel_breadcrumbs_busy(struct intel_engine_cs *engine);
 
 #endif /* _INTEL_RINGBUFFER_H_ */
-- 
2.11.0



More information about the Intel-gfx-trybot mailing list