[Intel-gfx] [PATCH 09/13] drm/i915: Interrupt driven fences

John.C.Harrison at Intel.com John.C.Harrison at Intel.com
Fri Dec 11 05:11:57 PST 2015


From: John Harrison <John.C.Harrison at Intel.com>

The intended usage model for struct fence is that the signalled status
should be set on demand rather than polled. That is, there should not
be a need for a 'signaled' function to be called everytime the status
is queried. Instead, 'something' should be done to enable a signal
callback from the hardware which will update the state directly. In
the case of requests, this is the seqno update interrupt. The idea is
that this callback will only be enabled on demand when something
actually tries to wait on the fence.

This change removes the polling test and replaces it with the callback
scheme. Each fence is added to a 'please poke me' list at the start of
i915_add_request(). The interrupt handler then scans through the 'poke
me' list when a new seqno pops out and signals any matching
fence/request. The fence is then removed from the list so the entire
request stack does not need to be scanned every time. Note that the
fence is added to the list before the commands to generate the seqno
interrupt are added to the ring. Thus the sequence is guaranteed to be
race free if the interrupt is already enabled.

Note that the interrupt is only enabled on demand (i.e. when
__wait_request() is called). Thus there is still a potential race when
enabling the interrupt as the request may already have completed.
However, this is simply solved by calling the interrupt processing
code immediately after enabling the interrupt and thereby checking for
already completed requests.

Lastly, the ring clean up code has the possibility to cancel
outstanding requests (e.g. because TDR has reset the ring). These
requests will never get signalled and so must be removed from the
signal list manually. This is done by setting a 'cancelled' flag and
then calling the regular notify/retire code path rather than
attempting to duplicate the list manipulatation and clean up code in
multiple places. This also avoid any race condition where the
cancellation request might occur after/during the completion interrupt
actually arriving.

v2: Updated to take advantage of the request unreference no longer
requiring the mutex lock.

v3: Move the signal list processing around to prevent unsubmitted
requests being added to the list. This was occurring on Android
because the native sync implementation calls the
fence->enable_signalling API immediately on fence creation.

Updated after review comments by Tvrtko Ursulin. Renamed list nodes to
'link' instead of 'list'. Added support for returning an error code on
a cancelled fence. Update list processing to be more efficient/safer
with respect to spinlocks.

For: VIZ-5190
Signed-off-by: John Harrison <John.C.Harrison at Intel.com>
Cc: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
---
 drivers/gpu/drm/i915/i915_drv.h         |  10 ++
 drivers/gpu/drm/i915/i915_gem.c         | 188 ++++++++++++++++++++++++++++++--
 drivers/gpu/drm/i915/i915_irq.c         |   2 +
 drivers/gpu/drm/i915/intel_lrc.c        |   2 +
 drivers/gpu/drm/i915/intel_ringbuffer.c |   2 +
 drivers/gpu/drm/i915/intel_ringbuffer.h |   2 +
 6 files changed, 197 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index fbf591f..d013c6d 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2187,7 +2187,12 @@ void i915_gem_track_fb(struct drm_i915_gem_object *old,
 struct drm_i915_gem_request {
 	/** Underlying object for implementing the signal/wait stuff. */
 	struct fence fence;
+	struct list_head signal_link;
+	struct list_head unsignal_link;
 	struct list_head delayed_free_link;
+	bool cancelled;
+	bool irq_enabled;
+	bool signal_requested;
 
 	/** On Which ring this request was generated */
 	struct drm_i915_private *i915;
@@ -2265,6 +2270,11 @@ int i915_gem_request_alloc(struct intel_engine_cs *ring,
 			   struct drm_i915_gem_request **req_out);
 void i915_gem_request_cancel(struct drm_i915_gem_request *req);
 
+void i915_gem_request_submit(struct drm_i915_gem_request *req);
+void i915_gem_request_enable_interrupt(struct drm_i915_gem_request *req,
+				       bool fence_locked);
+void i915_gem_request_notify(struct intel_engine_cs *ring, bool fence_locked);
+
 int i915_create_fence_timeline(struct drm_device *dev,
 			       struct intel_context *ctx,
 			       struct intel_engine_cs *ring);
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index f6c3e96..f71215f 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1165,6 +1165,8 @@ static int __i915_spin_request(struct drm_i915_gem_request *req)
 
 	timeout = jiffies + 1;
 	while (!need_resched()) {
+		i915_gem_request_notify(req->ring, false);
+
 		if (i915_gem_request_completed(req))
 			return 0;
 
@@ -1173,6 +1175,9 @@ static int __i915_spin_request(struct drm_i915_gem_request *req)
 
 		cpu_relax_lowlatency();
 	}
+
+	i915_gem_request_notify(req->ring, false);
+
 	if (i915_gem_request_completed(req))
 		return 0;
 
@@ -1214,9 +1219,14 @@ int __i915_wait_request(struct drm_i915_gem_request *req,
 
 	WARN(!intel_irqs_enabled(dev_priv), "IRQs disabled");
 
-	if (list_empty(&req->list))
+	if (i915_gem_request_completed(req))
 		return 0;
 
+	/*
+	 * Enable interrupt completion of the request.
+	 */
+	fence_enable_sw_signaling(&req->fence);
+
 	if (i915_gem_request_completed(req))
 		return 0;
 
@@ -1377,6 +1387,19 @@ static void i915_gem_request_retire(struct drm_i915_gem_request *request)
 	list_del_init(&request->list);
 	i915_gem_request_remove_from_client(request);
 
+	/* In case the request is still in the signal pending list */
+	if (!list_empty(&request->signal_link)) {
+		/*
+		 * The request must be marked as cancelled and the underlying
+		 * fence as both failed. NB: There is no explicit fence fail
+		 * API, there is only a manual poke and signal.
+		 */
+		request->cancelled = true;
+		/* How to propagate to any associated sync_fence??? */
+		request->fence.status = -EIO;
+		fence_signal_locked(&request->fence);
+	}
+
 	i915_gem_request_unreference(request);
 }
 
@@ -2535,6 +2558,12 @@ void __i915_add_request(struct drm_i915_gem_request *request,
 	 */
 	request->postfix = intel_ring_get_tail(ringbuf);
 
+	/*
+	 * Add the fence to the pending list before emitting the commands to
+	 * generate a seqno notification interrupt.
+	 */
+	i915_gem_request_submit(request);
+
 	if (i915.enable_execlists)
 		ret = ring->emit_request(request);
 	else {
@@ -2653,25 +2682,135 @@ static void i915_gem_request_free(struct drm_i915_gem_request *req)
 		i915_gem_context_unreference(ctx);
 	}
 
+	if (req->irq_enabled)
+		req->ring->irq_put(req->ring);
+
 	kmem_cache_free(req->i915->requests, req);
 }
 
-static bool i915_gem_request_enable_signaling(struct fence *req_fence)
+/*
+ * The request is about to be submitted to the hardware so add the fence to
+ * the list of signalable fences.
+ *
+ * NB: This does not necessarily enable interrupts yet. That only occurs on
+ * demand when the request is actually waited on. However, adding it to the
+ * list early ensures that there is no race condition where the interrupt
+ * could pop out prematurely and thus be completely lost. The race is merely
+ * that the interrupt must be manually checked for after being enabled.
+ */
+void i915_gem_request_submit(struct drm_i915_gem_request *req)
 {
-	/* Interrupt driven fences are not implemented yet.*/
-	WARN(true, "This should not be called!");
-	return true;
+	unsigned long flags;
+
+	/*
+	 * Always enable signal processing for the request's fence object
+	 * before that request is submitted to the hardware. Thus there is no
+	 * race condition whereby the interrupt could pop out before the
+	 * request has been added to the signal list. Hence no need to check
+	 * for completion, undo the list add and return false.
+	 */
+	i915_gem_request_reference(req);
+	spin_lock_irqsave(&req->ring->fence_lock, flags);
+	WARN_ON(!list_empty(&req->signal_link));
+	list_add_tail(&req->signal_link, &req->ring->fence_signal_list);
+	spin_unlock_irqrestore(&req->ring->fence_lock, flags);
+
+	/*
+	 * NB: Interrupts are only enabled on demand. Thus there is still a
+	 * race where the request could complete before the interrupt has
+	 * been enabled. Thus care must be taken at that point.
+	 */
+
+	 /* Have interrupts already been requested? */
+	 if (req->signal_requested)
+		i915_gem_request_enable_interrupt(req, false);
+}
+
+/*
+ * The request is being actively waited on, so enable interrupt based
+ * completion signalling.
+ */
+void i915_gem_request_enable_interrupt(struct drm_i915_gem_request *req,
+				       bool fence_locked)
+{
+	if (req->irq_enabled)
+		return;
+
+	WARN_ON(!req->ring->irq_get(req->ring));
+	req->irq_enabled = true;
+
+	/*
+	 * Because the interrupt is only enabled on demand, there is a race
+	 * where the interrupt can fire before anyone is looking for it. So
+	 * do an explicit check for missed interrupts.
+	 */
+	i915_gem_request_notify(req->ring, fence_locked);
 }
 
-static bool i915_gem_request_is_completed(struct fence *req_fence)
+static bool i915_gem_request_enable_signaling(struct fence *req_fence)
 {
 	struct drm_i915_gem_request *req = container_of(req_fence,
 						 typeof(*req), fence);
+
+	/*
+	 * No need to actually enable interrupt based processing until the
+	 * request has been submitted to the hardware. At which point
+	 * 'i915_gem_request_submit()' is called. So only really enable
+	 * signalling in there. Just set a flag to say that interrupts are
+	 * wanted when the request is eventually submitted. On the other hand
+	 * if the request has already been submitted then interrupts do need
+	 * to be enabled now.
+	 */
+
+	req->signal_requested = true;
+
+	if (!list_empty(&req->signal_link))
+		i915_gem_request_enable_interrupt(req, true);
+
+	return true;
+}
+
+void i915_gem_request_notify(struct intel_engine_cs *ring, bool fence_locked)
+{
+	struct drm_i915_gem_request *req, *req_next;
+	unsigned long flags;
 	u32 seqno;
 
-	seqno = req->ring->get_seqno(req->ring, false/*lazy_coherency*/);
+	if (list_empty(&ring->fence_signal_list))
+		return;
+
+	if (!fence_locked)
+		spin_lock_irqsave(&ring->fence_lock, flags);
+
+	seqno = ring->get_seqno(ring, false);
+
+	list_for_each_entry_safe(req, req_next, &ring->fence_signal_list, signal_link) {
+		if (!req->cancelled) {
+			if (!i915_seqno_passed(seqno, req->seqno))
+				break;
+		}
 
-	return i915_seqno_passed(seqno, req->seqno);
+		/*
+		 * Start by removing the fence from the signal list otherwise
+		 * the retire code can run concurrently and get confused.
+		 */
+		list_del_init(&req->signal_link);
+
+		if (!req->cancelled) {
+			fence_signal_locked(&req->fence);
+		}
+
+		if (req->irq_enabled) {
+			req->ring->irq_put(req->ring);
+			req->irq_enabled = false;
+		}
+
+		/* Can't unreference here because that might grab fence_lock */
+		list_add_tail(&req->unsignal_link, &ring->fence_unsignal_list);
+	}
+
+	if (!fence_locked)
+		spin_unlock_irqrestore(&ring->fence_lock, flags);
 }
 
 static const char *i915_gem_request_get_driver_name(struct fence *req_fence)
@@ -2711,7 +2850,6 @@ static void i915_gem_request_fence_value_str(struct fence *req_fence, char *str,
 
 static const struct fence_ops i915_gem_request_fops = {
 	.enable_signaling	= i915_gem_request_enable_signaling,
-	.signaled		= i915_gem_request_is_completed,
 	.wait			= fence_default_wait,
 	.release		= i915_gem_request_release,
 	.get_driver_name	= i915_gem_request_get_driver_name,
@@ -2794,6 +2932,7 @@ int i915_gem_request_alloc(struct intel_engine_cs *ring,
 		goto err;
 	}
 
+	INIT_LIST_HEAD(&req->signal_link);
 	fence_init(&req->fence, &i915_gem_request_fops, &ring->fence_lock,
 		   ctx->engine[ring->id].fence_timeline.fence_context,
 		   i915_fence_timeline_get_next_seqno(&ctx->engine[ring->id].fence_timeline));
@@ -2831,6 +2970,11 @@ void i915_gem_request_cancel(struct drm_i915_gem_request *req)
 {
 	intel_ring_reserved_space_cancel(req->ringbuf);
 
+	req->cancelled = true;
+	/* How to propagate to any associated sync_fence??? */
+	req->fence.status = -EINVAL;
+	fence_signal_locked(&req->fence);
+
 	i915_gem_request_unreference(req);
 }
 
@@ -2924,6 +3068,13 @@ static void i915_gem_reset_ring_cleanup(struct drm_i915_private *dev_priv,
 		i915_gem_request_retire(request);
 	}
 
+	/*
+	 * Tidy up anything left over. This includes a call to
+	 * i915_gem_request_notify() which will make sure that any requests
+	 * that were on the signal pending list get also cleaned up.
+	 */
+	i915_gem_retire_requests_ring(ring);
+
 	/* Having flushed all requests from all queues, we know that all
 	 * ringbuffers must now be empty. However, since we do not reclaim
 	 * all space when retiring the request (to prevent HEADs colliding
@@ -2969,9 +3120,17 @@ i915_gem_retire_requests_ring(struct intel_engine_cs *ring)
 {
 	struct drm_i915_gem_request *req, *req_next;
 	LIST_HEAD(list_head);
+	unsigned long flags;
 
 	WARN_ON(i915_verify_lists(ring->dev));
 
+	/*
+	 * If no-one has waited on a request recently then interrupts will
+	 * not have been enabled and thus no requests will ever be marked as
+	 * completed. So do an interrupt check now.
+	 */
+	i915_gem_request_notify(ring, false);
+
 	/* Retire requests first as we use it above for the early return.
 	 * If we retire requests last, we may use a later seqno and so clear
 	 * the requests lists without clearing the active list, leading to
@@ -3013,6 +3172,15 @@ i915_gem_retire_requests_ring(struct intel_engine_cs *ring)
 		i915_gem_request_assign(&ring->trace_irq_req, NULL);
 	}
 
+	/* Tidy up any requests that were recently signalled */
+	spin_lock_irqsave(&ring->fence_lock, flags);
+	list_splice_init(&ring->fence_unsignal_list, &list_head);
+	spin_unlock_irqrestore(&ring->fence_lock, flags);
+	list_for_each_entry_safe(req, req_next, &list_head, unsignal_link) {
+		list_del(&req->unsignal_link);
+		i915_gem_request_unreference(req);
+	}
+
 	/* Really free any requests that were recently unreferenced */
 	spin_lock(&ring->delayed_free_lock);
 	list_splice_init(&ring->delayed_free_list, &list_head);
@@ -5064,6 +5232,8 @@ init_ring_lists(struct intel_engine_cs *ring)
 {
 	INIT_LIST_HEAD(&ring->active_list);
 	INIT_LIST_HEAD(&ring->request_list);
+	INIT_LIST_HEAD(&ring->fence_signal_list);
+	INIT_LIST_HEAD(&ring->fence_unsignal_list);
 	INIT_LIST_HEAD(&ring->delayed_free_list);
 }
 
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 68b094b..74f8552 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -981,6 +981,8 @@ static void notify_ring(struct intel_engine_cs *ring)
 
 	trace_i915_gem_request_notify(ring);
 
+	i915_gem_request_notify(ring, false);
+
 	wake_up_all(&ring->irq_queue);
 }
 
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 06a398a..76fc245 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -1920,6 +1920,8 @@ static int logical_ring_init(struct drm_device *dev, struct intel_engine_cs *rin
 	ring->dev = dev;
 	INIT_LIST_HEAD(&ring->active_list);
 	INIT_LIST_HEAD(&ring->request_list);
+	INIT_LIST_HEAD(&ring->fence_signal_list);
+	INIT_LIST_HEAD(&ring->fence_unsignal_list);
 	INIT_LIST_HEAD(&ring->delayed_free_list);
 	spin_lock_init(&ring->fence_lock);
 	spin_lock_init(&ring->delayed_free_lock);
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index e5573e7..1dec252 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -2158,6 +2158,8 @@ static int intel_init_ring_buffer(struct drm_device *dev,
 	INIT_LIST_HEAD(&ring->request_list);
 	INIT_LIST_HEAD(&ring->execlist_queue);
 	INIT_LIST_HEAD(&ring->buffers);
+	INIT_LIST_HEAD(&ring->fence_signal_list);
+	INIT_LIST_HEAD(&ring->fence_unsignal_list);
 	INIT_LIST_HEAD(&ring->delayed_free_list);
 	spin_lock_init(&ring->fence_lock);
 	spin_lock_init(&ring->delayed_free_lock);
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 77384ed..9d09edb 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -354,6 +354,8 @@ struct  intel_engine_cs {
 	u32 (*get_cmd_length_mask)(u32 cmd_header);
 
 	spinlock_t fence_lock;
+	struct list_head fence_signal_list;
+	struct list_head fence_unsignal_list;
 };
 
 bool intel_ring_initialized(struct intel_engine_cs *ring);
-- 
1.9.1



More information about the Intel-gfx mailing list