[Intel-gfx] [RFC 3/4] drm/i915: Interrupt driven fences

John.C.Harrison at Intel.com John.C.Harrison at Intel.com
Fri Mar 20 10:48:36 PDT 2015


From: John Harrison <John.C.Harrison at Intel.com>

The intended usage model for struct fence is that the signalled status should be
set on demand rather than polled. That is, there should not be a need for a
'signaled' function to be called everytime the status is queried. Instead,
'something' should be done to enable a signal callback from the hardware which
will update the state directly. In the case of requests, this is the seqno
update interrupt. The idea is that this callback will only be enabled on demand
when something actually tries to wait on the fence.

This change removes the polling test and replaces it with the callback scheme.
To avoid race conditions where signals can be sent before anyone is waiting for
them, it does not implement the callback on demand feature. When the GPU
scheduler arrives, it will need to know about the completion of every single
request anyway. So it is far simpler to not put in complex and messy anti-race
code in the first place given that it will not be needed in the future.

Instead, each fence is added to a 'please poke me' list at the start of
i915_add_request(). This happens before the commands to generate the seqno
interrupt are added to the ring thus is guaranteed to be race free. The
interrupt handler then scans through the 'poke me' list when a new seqno pops
out and signals any matching fence/request. The fence is then removed from the
list so the entire request stack does not need to be scanned every time.

The only complication here is that the 'poke me' system requires holding a
reference count on the request to guarantee that it won't be freed prematurely.
Unfortunately, it is unsafe to decrement the reference count from the interrupt
handler because if that is the last reference, the clean up code gets run and
the clean up code is not IRQ friendly. Hence, the request is added to a 'please
clean me' list that gets processed at retire time. Any request in this list
simply has its count decremented and is then removed from that list.

Lastly, the ring clean up code has the possibility to cancel outstanding
requests (e.g. because TDR has reset the ring). These requests will never get
signalled and so must be removed from the signal list manually. This is done by
setting a 'cancelled' flag and then calling the regular notify/retire code path
rather than attempting to duplicate the list manipulatation and clean up code in
multiple places.

For: VIZ-5190
Signed-off-by: John Harrison <John.C.Harrison at Intel.com>
---
 drivers/gpu/drm/i915/i915_drv.h         |    5 ++
 drivers/gpu/drm/i915/i915_gem.c         |   84 ++++++++++++++++++++++++++++---
 drivers/gpu/drm/i915/i915_irq.c         |    3 ++
 drivers/gpu/drm/i915/intel_lrc.c        |    2 +
 drivers/gpu/drm/i915/intel_ringbuffer.c |    2 +
 drivers/gpu/drm/i915/intel_ringbuffer.h |    2 +
 6 files changed, 90 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 28b3c3c..ff662c9 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2054,6 +2054,9 @@ struct drm_i915_gem_request {
 	  * re-ordering, pre-emption, etc., there is no guarantee at all
 	  * about the validity or sequentialiaty of the fence's seqno! */
 	struct fence fence;
+	struct list_head signal_list;
+	struct list_head unsignal_list;
+	bool cancelled;
 
 	/** On Which ring this request was generated */
 	struct intel_engine_cs *ring;
@@ -2132,6 +2135,8 @@ int i915_gem_request_alloc(struct intel_engine_cs *ring,
 			   struct drm_i915_gem_request **req_out);
 void i915_gem_request_cancel(struct drm_i915_gem_request *req);
 
+void i915_gem_request_notify(struct intel_engine_cs *ring);
+
 static inline bool i915_gem_request_completed(struct drm_i915_gem_request *req)
 {
 	return fence_is_signaled(&req->fence);
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index b1cde7d..27b8893 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2364,6 +2364,12 @@ void __i915_add_request(struct drm_i915_gem_request *request,
 	 */
 	request->postfix = intel_ring_get_tail(ringbuf);
 
+	/*
+	 * Add the fence to the pending list before emitting the commands to
+	 * generate a seqno notification interrupt.
+	 */
+	fence_enable_sw_signaling(&request->fence);
+
 	if (i915.enable_execlists)
 		ret = ring->emit_request(request);
 	else
@@ -2492,6 +2498,10 @@ static void i915_gem_free_request(struct drm_i915_gem_request *request)
 
 	put_pid(request->pid);
 
+	/* In case the request is still in the signal pending list */
+	if (!list_empty(&request->signal_list))
+		request->cancelled = true;
+
 	i915_gem_request_unreference(request);
 }
 
@@ -2532,28 +2542,62 @@ static const char *i915_gem_request_get_timeline_name(struct fence *req_fence)
 
 static bool i915_gem_request_enable_signaling(struct fence *req_fence)
 {
-	WARN(true, "Is this required?");
+	struct drm_i915_gem_request *req = container_of(req_fence,
+						 typeof(*req), fence);
+	bool was_empty;
+
+	was_empty = list_empty(&req->ring->fence_signal_list);
+	if (was_empty)
+		WARN_ON(!req->ring->irq_get(req->ring));
+
+	i915_gem_request_reference(req);
+	list_add_tail(&req->signal_list, &req->ring->fence_signal_list);
+
+	/*
+	 * Note that signalling is always enabled for every request before
+	 * that request is submitted to the hardware. Therefore there is
+	 * no race condition whereby the signal could pop out before the
+	 * request has been added to the list. Hence no need to check
+	 * for completion and undo to the list add and return false.
+	 */
+
 	return true;
 }
 
-static bool i915_gem_request_is_completed(struct fence *req_fence)
+void i915_gem_request_notify(struct intel_engine_cs *ring)
 {
-	struct drm_i915_gem_request *req = container_of(req_fence,
-						 typeof(*req), fence);
+	struct drm_i915_gem_request *req, *req_next;
+	unsigned long flags;
 	u32 seqno;
 
-	BUG_ON(req == NULL);
+	if (list_empty(&ring->fence_signal_list))
+		return;
+
+	seqno = ring->get_seqno(ring, false);
+
+	spin_lock_irqsave(&ring->fence_lock, flags);
+	list_for_each_entry_safe(req, req_next, &ring->fence_signal_list, signal_list) {
+		if (!req->cancelled) {
+			if (!i915_seqno_passed(seqno, req->seqno))
+				continue;
 
-	seqno = req->ring->get_seqno(req->ring, false/*lazy_coherency*/);
+			fence_signal_locked(&req->fence);
+		}
+
+		list_del(&req->signal_list);
+		INIT_LIST_HEAD(&req->signal_list);
+		if (list_empty(&req->ring->fence_signal_list))
+			req->ring->irq_put(req->ring);
 
-	return i915_seqno_passed(seqno, req->seqno);
+		list_add_tail(&req->unsignal_list, &req->ring->fence_unsignal_list);
+	}
+	spin_unlock_irqrestore(&ring->fence_lock, flags);
 }
 
 static const struct fence_ops i915_gem_request_fops = {
 	.get_driver_name	= i915_gem_request_get_driver_name,
 	.get_timeline_name	= i915_gem_request_get_timeline_name,
 	.enable_signaling	= i915_gem_request_enable_signaling,
-	.signaled		= i915_gem_request_is_completed,
 	.wait			= fence_default_wait,
 	.release		= i915_gem_request_free,
 };
@@ -2596,6 +2640,7 @@ int i915_gem_request_alloc(struct intel_engine_cs *ring,
 		return ret;
 	}
 
+	INIT_LIST_HEAD(&request->signal_list);
 	fence_init(&request->fence, &i915_gem_request_fops, &ring->fence_lock, ring->fence_context, request->seqno);
 
 	/*
@@ -2714,6 +2759,13 @@ static void i915_gem_reset_ring_cleanup(struct drm_i915_private *dev_priv,
 
 		i915_gem_free_request(request);
 	}
+
+	/*
+	 * Make sure any requests that were on the signal pending list get
+	 * cleaned up.
+	 */
+	i915_gem_request_notify(ring);
+	i915_gem_retire_requests_ring(ring);
 }
 
 void i915_gem_restore_fences(struct drm_device *dev)
@@ -2816,6 +2868,20 @@ i915_gem_retire_requests_ring(struct intel_engine_cs *ring)
 		i915_gem_request_assign(&ring->trace_irq_req, NULL);
 	}
 
+	while (!list_empty(&ring->fence_unsignal_list)) {
+		struct drm_i915_gem_request *request;
+		unsigned long flags;
+
+		spin_lock_irqsave(&ring->fence_lock, flags);
+		request = list_first_entry(&ring->fence_unsignal_list,
+					   struct drm_i915_gem_request,
+					   unsignal_list);
+		list_del(&request->unsignal_list);
+		spin_unlock_irqrestore(&ring->fence_lock, flags);
+
+		i915_gem_request_unreference(request);
+	}
+
 	WARN_ON(i915_verify_lists(ring->dev));
 }
 
@@ -5049,6 +5115,8 @@ init_ring_lists(struct intel_engine_cs *ring)
 {
 	INIT_LIST_HEAD(&ring->active_list);
 	INIT_LIST_HEAD(&ring->request_list);
+	INIT_LIST_HEAD(&ring->fence_signal_list);
+	INIT_LIST_HEAD(&ring->fence_unsignal_list);
 }
 
 void i915_init_vm(struct drm_i915_private *dev_priv,
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index cc2796b..d1cf226 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -994,6 +994,8 @@ static void notify_ring(struct drm_device *dev,
 
 	trace_i915_gem_request_notify(ring);
 
+	i915_gem_request_notify(ring);
+
 	wake_up_all(&ring->irq_queue);
 }
 
@@ -2959,6 +2961,7 @@ static void i915_hangcheck_elapsed(struct work_struct *work)
 			DRM_INFO("%s on %s\n",
 				 stuck[i] ? "stuck" : "no progress",
 				 ring->name);
+			trace_printk("%s:%d> \x1B[31;1m<%s> Borked: %s @ %d!\x1B[0m\n", __func__, __LINE__, ring->name, stuck[i] ? "stuck" : "no progress", ring->hangcheck.seqno);
 			rings_hung++;
 		}
 	}
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index c1072b1..d87126e 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -1337,6 +1337,8 @@ static int logical_ring_init(struct drm_device *dev, struct intel_engine_cs *rin
 	ring->dev = dev;
 	INIT_LIST_HEAD(&ring->active_list);
 	INIT_LIST_HEAD(&ring->request_list);
+	INIT_LIST_HEAD(&ring->fence_signal_list);
+	INIT_LIST_HEAD(&ring->fence_unsignal_list);
 	spin_lock_init(&ring->fence_lock);
 	init_waitqueue_head(&ring->irq_queue);
 
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index fd65c0d..9d7ad51 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -1981,6 +1981,8 @@ static int intel_init_ring_buffer(struct drm_device *dev,
 	INIT_LIST_HEAD(&ring->active_list);
 	INIT_LIST_HEAD(&ring->request_list);
 	INIT_LIST_HEAD(&ring->execlist_queue);
+	INIT_LIST_HEAD(&ring->fence_signal_list);
+	INIT_LIST_HEAD(&ring->fence_unsignal_list);
 	spin_lock_init(&ring->fence_lock);
 	ringbuf->size = 32 * PAGE_SIZE;
 	ringbuf->ring = ring;
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index a0ce08e..7412fe4 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -311,6 +311,8 @@ struct  intel_engine_cs {
 
 	unsigned fence_context;
 	spinlock_t fence_lock;
+	struct list_head fence_signal_list;
+	struct list_head fence_unsignal_list;
 };
 
 bool intel_ring_initialized(struct intel_engine_cs *ring);
-- 
1.7.9.5



More information about the Intel-gfx mailing list