[PATCH 05/91] drm/i915: Only skip requests once a context is banned

Thu Jan 5 13:32:21 UTC 2017

On Thu, Jan 05, 2017 at 01:25:23PM +0000, Tvrtko Ursulin wrote:
> 
> On 05/01/2017 10:34, Chris Wilson wrote:
> >If we skip before banning, we have an inconsistent interface between
> >execbuf still queueing valid request but those requests already queued
> >being cancelled. If we only cancel the pending requests once we stop
> >accepting new requests, the interface is more consistent.
> >
> >Reported-by: Tvrtko Ursulin <tvrtko.ursulin at linux.intel.com>
> >Fixes: 821ed7df6e2a ("drm/i915: Update reset path to fix incomplete requests")
> >Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> >Cc: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
> >Cc: Mika Kuoppala <mika.kuoppala at intel.com>
> >---
> > drivers/gpu/drm/i915/i915_gem.c | 19 ++++++++++++-------
> > 1 file changed, 12 insertions(+), 7 deletions(-)
> >
> >diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> >index 1fcce2db86f2..e7a7f404fd85 100644
> >--- a/drivers/gpu/drm/i915/i915_gem.c
> >+++ b/drivers/gpu/drm/i915/i915_gem.c
> >@@ -2736,7 +2736,7 @@ void i915_gem_reset_prepare(struct drm_i915_private *dev_priv)
> > static void i915_gem_reset_engine(struct intel_engine_cs *engine)
> > {
> > 	struct drm_i915_gem_request *request;
> >-	struct i915_gem_context *incomplete_ctx;
> >+	struct i915_gem_context *hung_ctx;
> > 	struct intel_timeline *timeline;
> > 	unsigned long flags;
> > 	bool ring_hung;
> >@@ -2748,6 +2748,8 @@ static void i915_gem_reset_engine(struct intel_engine_cs *engine)
> > 	if (!request)
> > 		return;
> >
> >+	hung_ctx = request->ctx;
> >+
> > 	ring_hung = engine->hangcheck.stalled;
> > 	if (engine->hangcheck.seqno != intel_engine_get_seqno(engine)) {
> > 		DRM_DEBUG_DRIVER("%s pardoned, was guilty? %s\n",
> >@@ -2757,10 +2759,10 @@ static void i915_gem_reset_engine(struct intel_engine_cs *engine)
> > 	}
> >
> > 	if (ring_hung) {
> >-		i915_gem_context_mark_guilty(request->ctx);
> >+		i915_gem_context_mark_guilty(hung_ctx);
> > 		dma_fence_set_error(&request->fence, -EIO);
> > 	} else {
> >-		i915_gem_context_mark_innocent(request->ctx);
> >+		i915_gem_context_mark_innocent(hung_ctx);
> > 	}
> >
> > 	if (!ring_hung)
> >@@ -2772,6 +2774,10 @@ static void i915_gem_reset_engine(struct intel_engine_cs *engine)
> > 	/* Setup the CS to resume from the breadcrumb of the hung request */
> > 	engine->reset_hw(engine, request);
> >
> >+	/* If this context is now banned, skip all of its pending requests. */
> >+	if (!i915_gem_context_is_banned(hung_ctx))
> >+		return;
> >+
> > 	/* Users of the default context do not rely on logical state
> > 	 * preserved between batches. They have to emit full state on
> > 	 * every batch and so it is safe to execute queued requests following
> >@@ -2780,17 +2786,16 @@ static void i915_gem_reset_engine(struct intel_engine_cs *engine)
> > 	 * Other contexts preserve state, now corrupt. We want to skip all
> > 	 * queued requests that reference the corrupt context.
> > 	 */
> >-	incomplete_ctx = request->ctx;
> >-	if (i915_gem_context_is_default(incomplete_ctx))
> >+	if (i915_gem_context_is_default(hung_ctx))
> > 		return;
> >
> >-	timeline = i915_gem_context_lookup_timeline(incomplete_ctx, engine);
> >+	timeline = i915_gem_context_lookup_timeline(hung_ctx, engine);
> >
> > 	spin_lock_irqsave(&engine->timeline->lock, flags);
> > 	spin_lock(&timeline->lock);
> >
> > 	list_for_each_entry_continue(request, &engine->timeline->requests, link)
> >-		if (request->ctx == incomplete_ctx)
> >+		if (request->ctx == hung_ctx)
> > 			reset_request(request);
> >
> > 	list_for_each_entry(request, &timeline->requests, link)
> >
> 
> Thought I r-b'ed it already, anyway:

You had. I applied the r-b this morning but not until after trying to
get this series to trybot.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre