[PATCH 3/3] drm/i915: Wait for all requests before unwedging

Mon Mar 5 10:36:22 UTC 2018

During wedging, we may push the requests onto the execution queue
out-of-order, and so by simply waiting for the last request in each
timeline we may leave a request unsignaled and floating in the void,
ready to incorrectly resume during the reset procedure.

Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem.c | 67 +++++++++++++++++++++++++++++------------
 1 file changed, 48 insertions(+), 19 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 8d913d833ab9..5b83bda52859 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -3265,6 +3265,53 @@ void i915_gem_set_wedged(struct drm_i915_private *i915)
 	wake_up_all(&i915->gpu_error.reset_queue);
 }
 
+static bool __wait_for_wedge(struct intel_timeline *timeline)
+{
+	do {
+		struct i915_request *rq;
+		unsigned long flags;
+		bool err = false;
+
+		/*
+		 * During wedging we can not rely on the timelines being
+		 * monotonic, i915_gem_set_wedge() may result in the
+		 * requests being added to the execution timeline in a
+		 * different order than on the client's timeline and so
+		 * be signalled out-of-order. To compensate, we check evey
+		 * request on the timeline and wait for them all to be
+		 * signalled.
+		 */
+		spin_lock_irqsave(&timeline->lock, flags);
+		list_for_each_entry(rq, &timeline->requests, link) {
+			if (!dma_fence_is_signaled(&rq->fence)) {
+				i915_request_get(rq);
+				err = true;
+				break;
+			}
+		}
+		spin_unlock_irqrestore(&timeline->lock, flags);
+		if (!err)
+			return true;
+
+		/*
+		 * We can't use our normal waiter as we want to
+		 * avoid recursively trying to handle the current
+		 * reset. The basic dma_fence_default_wait() installs
+		 * a callback for dma_fence_signal(), which is
+		 * triggered by our nop handler (indirectly, the
+		 * callback enables the signaler thread which is
+		 * woken by the nop_submit_request() advancing the seqno
+		 * and when the seqno passes the fence, the signaler
+		 * then signals the fence waking us up).
+		 */
+		err = dma_fence_default_wait(&rq->fence, true,
+					     MAX_SCHEDULE_TIMEOUT) < 0;
+		i915_request_put(rq);
+		if (err)
+			return false;
+	} while (1);
+}
+
 bool i915_gem_unset_wedged(struct drm_i915_private *i915)
 {
 	struct i915_gem_timeline *tl;
@@ -3285,25 +3332,7 @@ bool i915_gem_unset_wedged(struct drm_i915_private *i915)
 	 */
 	list_for_each_entry(tl, &i915->gt.timelines, link) {
 		for (i = 0; i < ARRAY_SIZE(tl->engine); i++) {
-			struct i915_request *rq;
-
-			rq = i915_gem_active_peek(&tl->engine[i].last_request,
-						  &i915->drm.struct_mutex);
-			if (!rq)
-				continue;
-
-			/* We can't use our normal waiter as we want to
-			 * avoid recursively trying to handle the current
-			 * reset. The basic dma_fence_default_wait() installs
-			 * a callback for dma_fence_signal(), which is
-			 * triggered by our nop handler (indirectly, the
-			 * callback enables the signaler thread which is
-			 * woken by the nop_submit_request() advancing the seqno
-			 * and when the seqno passes the fence, the signaler
-			 * then signals the fence waking us up).
-			 */
-			if (dma_fence_default_wait(&rq->fence, true,
-						   MAX_SCHEDULE_TIMEOUT) < 0)
+			if (!__wait_for_wedge(&tl->engine[i]))
 				return false;
 		}
 	}
-- 
2.16.2



[PATCH 3/3] drm/i915: Wait for *all* requests before unwedging

[PATCH 3/3] drm/i915: Wait for all requests before unwedging