[PATCH 3/3] drm/i915: Wait for *all* requests before unwedging
Chris Wilson
chris at chris-wilson.co.uk
Mon Mar 5 10:36:22 UTC 2018
During wedging, we may push the requests onto the execution queue
out-of-order, and so by simply waiting for the last request in each
timeline we may leave a request unsignaled and floating in the void,
ready to incorrectly resume during the reset procedure.
Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
---
drivers/gpu/drm/i915/i915_gem.c | 67 +++++++++++++++++++++++++++++------------
1 file changed, 48 insertions(+), 19 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 8d913d833ab9..5b83bda52859 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -3265,6 +3265,53 @@ void i915_gem_set_wedged(struct drm_i915_private *i915)
wake_up_all(&i915->gpu_error.reset_queue);
}
+static bool __wait_for_wedge(struct intel_timeline *timeline)
+{
+ do {
+ struct i915_request *rq;
+ unsigned long flags;
+ bool err = false;
+
+ /*
+ * During wedging we can not rely on the timelines being
+ * monotonic, i915_gem_set_wedge() may result in the
+ * requests being added to the execution timeline in a
+ * different order than on the client's timeline and so
+ * be signalled out-of-order. To compensate, we check evey
+ * request on the timeline and wait for them all to be
+ * signalled.
+ */
+ spin_lock_irqsave(&timeline->lock, flags);
+ list_for_each_entry(rq, &timeline->requests, link) {
+ if (!dma_fence_is_signaled(&rq->fence)) {
+ i915_request_get(rq);
+ err = true;
+ break;
+ }
+ }
+ spin_unlock_irqrestore(&timeline->lock, flags);
+ if (!err)
+ return true;
+
+ /*
+ * We can't use our normal waiter as we want to
+ * avoid recursively trying to handle the current
+ * reset. The basic dma_fence_default_wait() installs
+ * a callback for dma_fence_signal(), which is
+ * triggered by our nop handler (indirectly, the
+ * callback enables the signaler thread which is
+ * woken by the nop_submit_request() advancing the seqno
+ * and when the seqno passes the fence, the signaler
+ * then signals the fence waking us up).
+ */
+ err = dma_fence_default_wait(&rq->fence, true,
+ MAX_SCHEDULE_TIMEOUT) < 0;
+ i915_request_put(rq);
+ if (err)
+ return false;
+ } while (1);
+}
+
bool i915_gem_unset_wedged(struct drm_i915_private *i915)
{
struct i915_gem_timeline *tl;
@@ -3285,25 +3332,7 @@ bool i915_gem_unset_wedged(struct drm_i915_private *i915)
*/
list_for_each_entry(tl, &i915->gt.timelines, link) {
for (i = 0; i < ARRAY_SIZE(tl->engine); i++) {
- struct i915_request *rq;
-
- rq = i915_gem_active_peek(&tl->engine[i].last_request,
- &i915->drm.struct_mutex);
- if (!rq)
- continue;
-
- /* We can't use our normal waiter as we want to
- * avoid recursively trying to handle the current
- * reset. The basic dma_fence_default_wait() installs
- * a callback for dma_fence_signal(), which is
- * triggered by our nop handler (indirectly, the
- * callback enables the signaler thread which is
- * woken by the nop_submit_request() advancing the seqno
- * and when the seqno passes the fence, the signaler
- * then signals the fence waking us up).
- */
- if (dma_fence_default_wait(&rq->fence, true,
- MAX_SCHEDULE_TIMEOUT) < 0)
+ if (!__wait_for_wedge(&tl->engine[i]))
return false;
}
}
--
2.16.2
More information about the Intel-gfx-trybot
mailing list