[Intel-gfx] [PATCH v2] drm/i915: Restore engine->submit_request before unwedging
Chris Wilson
chris at chris-wilson.co.uk
Fri Mar 10 19:01:25 UTC 2017
When we wedge the device, we override engine->submit_request with a nop
to ensure that all in-flight requests are marked in error. However, igt
would like to unwedge the device to test -EIO handling. This requires us
to flush those in-flight requests and restore the original
engine->submit_request.
v2: Use a vfunc to unify enabling request submission to engines
Fixes: 821ed7df6e2a ("drm/i915: Update reset path to fix incomplete requests")
Testcase: igt/gem_eio
Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
Cc: Mika Kuoppala <mika.kuoppala at intel.com>
---
drivers/gpu/drm/i915/i915_drv.c | 2 +-
drivers/gpu/drm/i915/i915_drv.h | 1 +
drivers/gpu/drm/i915/i915_gem.c | 45 ++++++++++++++++++++++++++++++
drivers/gpu/drm/i915/i915_guc_submission.c | 2 +-
drivers/gpu/drm/i915/intel_engine_cs.c | 10 +++++++
drivers/gpu/drm/i915/intel_lrc.c | 15 ++++------
drivers/gpu/drm/i915/intel_lrc.h | 1 -
drivers/gpu/drm/i915/intel_ringbuffer.c | 15 ++++++++--
drivers/gpu/drm/i915/intel_ringbuffer.h | 4 +++
9 files changed, 80 insertions(+), 15 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index b1e9027a4f80..576b03b0048c 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -1825,7 +1825,7 @@ void i915_reset(struct drm_i915_private *dev_priv)
return;
/* Clear any previous failed attempts at recovery. Time to try again. */
- __clear_bit(I915_WEDGED, &error->flags);
+ i915_gem_unset_wedged(dev_priv);
error->reset_count++;
pr_notice("drm/i915: Resetting chip after gpu hang\n");
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index eef096566b81..71f07f45d5fe 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -3409,6 +3409,7 @@ int i915_gem_reset_prepare(struct drm_i915_private *dev_priv);
void i915_gem_reset(struct drm_i915_private *dev_priv);
void i915_gem_reset_finish(struct drm_i915_private *dev_priv);
void i915_gem_set_wedged(struct drm_i915_private *dev_priv);
+void i915_gem_unset_wedged(struct drm_i915_private *dev_priv);
void i915_gem_init_mmio(struct drm_i915_private *i915);
int __must_check i915_gem_init(struct drm_i915_private *dev_priv);
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 202bb850f260..aae491076390 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -3000,6 +3000,51 @@ void i915_gem_set_wedged(struct drm_i915_private *dev_priv)
mod_delayed_work(dev_priv->wq, &dev_priv->gt.idle_work, 0);
}
+void i915_gem_unset_wedged(struct drm_i915_private *i915)
+{
+ struct i915_gem_timeline *tl;
+ int i;
+
+ lockdep_assert_held(&i915->drm.struct_mutex);
+ if (!test_bit(I915_WEDGED, &i915->gpu_error.flags))
+ return;
+
+ /* Before unwedging, make sure that all pending operations
+ * are flushed and errored out. No more can be submitted until
+ * we reset the wedged bit.
+ */
+ list_for_each_entry(tl, &i915->gt.timelines, link) {
+ for (i = 0; i < ARRAY_SIZE(tl->engine); i++) {
+ struct drm_i915_gem_request *rq;
+
+ rq = i915_gem_active_peek(&tl->engine[i].last_request,
+ &i915->drm.struct_mutex);
+ if (!rq)
+ continue;
+
+ /* We can't use our normal waiter as we want to
+ * avoid recursively trying to handle the current
+ * reset.
+ */
+ dma_fence_default_wait(&rq->fence, false,
+ MAX_SCHEDULE_TIMEOUT);
+ }
+ }
+
+ /* Undo nop_submit_request. We prevent all new i915 requests from
+ * being queued (by disallowing execbuf whilst wedged) so having
+ * waited for all active requests above, we know the system is idle
+ * and do not have to worry about a thread being inside
+ * engine->submit_request() as we swap over. So unlike installing
+ * the nop_submit_request on reset, we can do this from normal
+ * context and do not require stop_machine().
+ */
+ intel_engines_enable_submission(i915);
+
+ smp_mb__before_atomic(); /* complete takeover before enabling execbuf */
+ clear_bit(I915_WEDGED, &i915->gpu_error.flags);
+}
+
static void
i915_gem_retire_work_handler(struct work_struct *work)
{
diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c
index 41f2dd87b413..28289627a4ca 100644
--- a/drivers/gpu/drm/i915/i915_guc_submission.c
+++ b/drivers/gpu/drm/i915/i915_guc_submission.c
@@ -1003,7 +1003,7 @@ void i915_guc_submission_disable(struct drm_i915_private *dev_priv)
return;
/* Revert back to manual ELSP submission */
- intel_execlists_enable_submission(dev_priv);
+ intel_engines_enable_submission(dev_priv);
}
void i915_guc_submission_fini(struct drm_i915_private *dev_priv)
diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
index 73fe718516a5..5663ebab851f 100644
--- a/drivers/gpu/drm/i915/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/intel_engine_cs.c
@@ -191,6 +191,7 @@ int intel_engines_init(struct drm_i915_private *dev_priv)
goto cleanup;
}
+ engine->enable_submission(engine);
mask |= ENGINE_MASK(id);
}
@@ -1115,6 +1116,15 @@ bool intel_engines_are_idle(struct drm_i915_private *dev_priv)
return true;
}
+void intel_engines_enable_submission(struct drm_i915_private *i915)
+{
+ struct intel_engine_cs *engine;
+ enum intel_engine_id id;
+
+ for_each_engine(engine, i915, id)
+ engine->enable_submission(engine);
+}
+
#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
#include "selftests/mock_engine.c"
#endif
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 89f38e7def9f..f79df7a51e60 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -1560,15 +1560,10 @@ void intel_logical_ring_cleanup(struct intel_engine_cs *engine)
kfree(engine);
}
-void intel_execlists_enable_submission(struct drm_i915_private *dev_priv)
+static void logical_ring_enable_submission(struct intel_engine_cs *engine)
{
- struct intel_engine_cs *engine;
- enum intel_engine_id id;
-
- for_each_engine(engine, dev_priv, id) {
- engine->submit_request = execlists_submit_request;
- engine->schedule = execlists_schedule;
- }
+ engine->submit_request = execlists_submit_request;
+ engine->schedule = execlists_schedule;
}
static void
@@ -1586,8 +1581,8 @@ logical_ring_default_vfuncs(struct intel_engine_cs *engine)
engine->emit_flush = gen8_emit_flush;
engine->emit_breadcrumb = gen8_emit_breadcrumb;
engine->emit_breadcrumb_sz = gen8_emit_breadcrumb_sz;
- engine->submit_request = execlists_submit_request;
- engine->schedule = execlists_schedule;
+
+ engine->enable_submission = logical_ring_enable_submission;
engine->irq_enable = gen8_logical_ring_enable_irq;
engine->irq_disable = gen8_logical_ring_disable_irq;
diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h
index 5fc07761caff..e8015e7bf4e9 100644
--- a/drivers/gpu/drm/i915/intel_lrc.h
+++ b/drivers/gpu/drm/i915/intel_lrc.h
@@ -87,6 +87,5 @@ uint64_t intel_lr_context_descriptor(struct i915_gem_context *ctx,
/* Execlists */
int intel_sanitize_enable_execlists(struct drm_i915_private *dev_priv,
int enable_execlists);
-void intel_execlists_enable_submission(struct drm_i915_private *dev_priv);
#endif /* _INTEL_LRC_H_ */
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 4a864f8c9387..5b141f6639b6 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -2050,6 +2050,16 @@ static void intel_ring_init_irq(struct drm_i915_private *dev_priv,
}
}
+static void i9xx_enable_submission(struct intel_engine_cs *engine)
+{
+ engine->submit_request = i9xx_submit_request;
+}
+
+static void gen6_bsd_enable_submission(struct intel_engine_cs *engine)
+{
+ engine->submit_request = gen6_bsd_submit_request;
+}
+
static void intel_ring_default_vfuncs(struct drm_i915_private *dev_priv,
struct intel_engine_cs *engine)
{
@@ -2080,7 +2090,8 @@ static void intel_ring_default_vfuncs(struct drm_i915_private *dev_priv,
engine->emit_breadcrumb_sz++;
}
}
- engine->submit_request = i9xx_submit_request;
+
+ engine->enable_submission = i9xx_enable_submission;
if (INTEL_GEN(dev_priv) >= 8)
engine->emit_bb_start = gen8_emit_bb_start;
@@ -2165,7 +2176,7 @@ int intel_init_bsd_ring_buffer(struct intel_engine_cs *engine)
if (INTEL_GEN(dev_priv) >= 6) {
/* gen6 bsd needs a special wa for tail updates */
if (IS_GEN6(dev_priv))
- engine->submit_request = gen6_bsd_submit_request;
+ engine->enable_submission = gen6_bsd_enable_submission;
engine->emit_flush = gen6_bsd_ring_flush;
if (INTEL_GEN(dev_priv) < 8)
engine->irq_enable_mask = GT_BSD_USER_INTERRUPT;
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 0ef491df5b4e..30d9820d978c 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -273,6 +273,8 @@ struct intel_engine_cs {
void (*reset_hw)(struct intel_engine_cs *engine,
struct drm_i915_gem_request *req);
+ void (*enable_submission)(struct intel_engine_cs *engine);
+
int (*context_pin)(struct intel_engine_cs *engine,
struct i915_gem_context *ctx);
void (*context_unpin)(struct intel_engine_cs *engine,
@@ -669,4 +671,6 @@ static inline u32 *gen8_emit_pipe_control(u32 *batch, u32 flags, u32 offset)
bool intel_engine_is_idle(struct intel_engine_cs *engine);
bool intel_engines_are_idle(struct drm_i915_private *dev_priv);
+void intel_engines_enable_submission(struct drm_i915_private *i915);
+
#endif /* _INTEL_RINGBUFFER_H_ */
--
2.11.0
More information about the Intel-gfx
mailing list