[Intel-gfx] [PATCH 3/3] reset-request-recovery
Chris Wilson
chris at chris-wilson.co.uk
Wed Jul 27 21:36:00 UTC 2016
---
drivers/gpu/drm/i915/i915_drv.c | 4 +-
drivers/gpu/drm/i915/i915_drv.h | 2 +-
drivers/gpu/drm/i915/i915_gem.c | 66 +++++++++++++++++++++------------
drivers/gpu/drm/i915/i915_gem_context.c | 16 --------
drivers/gpu/drm/i915/intel_lrc.c | 34 ++++++++++++++++-
drivers/gpu/drm/i915/intel_ringbuffer.c | 32 ++++++++++------
drivers/gpu/drm/i915/intel_ringbuffer.h | 2 +
7 files changed, 100 insertions(+), 56 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 869baa6a5196..d250dce0c8fb 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -1574,7 +1574,7 @@ static int i915_drm_resume(struct drm_device *dev)
mutex_lock(&dev->struct_mutex);
if (i915_gem_init_hw(dev)) {
DRM_ERROR("failed to re-initialize GPU, declaring wedged!\n");
- atomic_or(I915_WEDGED, &dev_priv->gpu_error.reset_counter);
+ i915_gem_set_wedged(dev_priv);
}
mutex_unlock(&dev->struct_mutex);
@@ -1798,7 +1798,7 @@ int i915_reset(struct drm_i915_private *dev_priv)
return 0;
error:
- atomic_or(I915_WEDGED, &error->reset_counter);
+ i915_gem_set_wedged(dev_priv);
mutex_unlock(&dev->struct_mutex);
return ret;
}
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index aacce6625f84..08c212a5f1a8 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -3224,6 +3224,7 @@ static inline u32 i915_reset_count(struct i915_gpu_error *error)
}
void i915_gem_reset(struct drm_device *dev);
+void i915_gem_set_wedged(struct drm_i915_private *dev_priv);
bool i915_gem_clflush_object(struct drm_i915_gem_object *obj, bool force);
int __must_check i915_gem_init(struct drm_device *dev);
int __must_check i915_gem_init_hw(struct drm_device *dev);
@@ -3343,7 +3344,6 @@ void i915_gem_object_save_bit_17_swizzle(struct drm_i915_gem_object *obj);
int __must_check i915_gem_context_init(struct drm_device *dev);
void i915_gem_context_lost(struct drm_i915_private *dev_priv);
void i915_gem_context_fini(struct drm_device *dev);
-void i915_gem_context_reset(struct drm_device *dev);
int i915_gem_context_open(struct drm_device *dev, struct drm_file *file);
void i915_gem_context_close(struct drm_device *dev, struct drm_file *file);
int i915_switch_context(struct drm_i915_gem_request *req);
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index f56fa8ac30bb..1e5c62476585 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2426,23 +2426,57 @@ i915_gem_find_active_request(struct intel_engine_cs *engine)
return NULL;
}
-static void i915_gem_reset_engine_status(struct intel_engine_cs *engine)
+static void i915_gem_reset_engine(struct intel_engine_cs *engine)
{
struct drm_i915_gem_request *request;
+ struct i915_gem_context *incomplete_ctx;
bool ring_hung;
+ /* Ensure irq handler finishes or is cancelled. */
+ tasklet_kill(&engine->irq_tasklet);
+
request = i915_gem_find_active_request(engine);
- if (request == NULL)
+ if (!request)
return;
ring_hung = engine->hangcheck.score >= HANGCHECK_SCORE_RING_HUNG;
-
i915_set_reset_status(request->ctx, ring_hung);
- list_for_each_entry_continue(request, &engine->request_list, link)
+ engine->reset_hw(engine, request);
+
+ incomplete_ctx = request->ctx;
+ if (i915_gem_context_is_default(incomplete_ctx))
+ incomplete_ctx = NULL;
+ list_for_each_entry_continue(request, &engine->request_list, link) {
+ void *vaddr = request->ring->vaddr;
+ u32 head;
+
+ if (request->ctx != incomplete_ctx)
+ continue;
+
+ head = request->head;
+ if (request->postfix < head) {
+ memset(vaddr + head, 0, request->ring->size - head);
+ head = 0;
+ }
+ memset(vaddr + head, 0, request->postfix - head);
+
i915_set_reset_status(request->ctx, false);
+ }
+}
+
+void i915_gem_reset(struct drm_device *dev)
+{
+ struct drm_i915_private *dev_priv = to_i915(dev);
+ struct intel_engine_cs *engine;
+
+ for_each_engine(engine, dev_priv)
+ i915_gem_reset_engine(engine);
+
+ i915_gem_context_lost(dev_priv);
+ i915_gem_restore_fences(dev);
}
-static void i915_gem_reset_engine_cleanup(struct intel_engine_cs *engine)
+static void i915_gem_cleanup_engine(struct intel_engine_cs *engine)
{
struct intel_ring *ring;
@@ -2459,9 +2493,6 @@ static void i915_gem_reset_engine_cleanup(struct intel_engine_cs *engine)
*/
if (i915.enable_execlists) {
- /* Ensure irq handler finishes or is cancelled. */
- tasklet_kill(&engine->irq_tasklet);
-
INIT_LIST_HEAD(&engine->execlist_queue);
i915_gem_request_assign(&engine->execlist_port[0].request,
NULL);
@@ -2501,26 +2532,13 @@ static void i915_gem_reset_engine_cleanup(struct intel_engine_cs *engine)
engine->i915->gt.active_engines &= ~intel_engine_flag(engine);
}
-void i915_gem_reset(struct drm_device *dev)
+void i915_gem_set_wedged(struct drm_i915_private *dev_priv)
{
- struct drm_i915_private *dev_priv = to_i915(dev);
struct intel_engine_cs *engine;
- /*
- * Before we free the objects from the requests, we need to inspect
- * them for finding the guilty party. As the requests only borrow
- * their reference to the objects, the inspection must be done first.
- */
- for_each_engine(engine, dev_priv)
- i915_gem_reset_engine_status(engine);
-
for_each_engine(engine, dev_priv)
- i915_gem_reset_engine_cleanup(engine);
+ i915_gem_cleanup_engine(engine);
mod_delayed_work(dev_priv->wq, &dev_priv->gt.idle_work, 0);
-
- i915_gem_context_reset(dev);
-
- i915_gem_restore_fences(dev);
}
static void
@@ -4346,7 +4364,7 @@ int i915_gem_init(struct drm_device *dev)
* for all other failure, such as an allocation failure, bail.
*/
DRM_ERROR("Failed to initialize GPU, declaring it wedged\n");
- atomic_or(I915_WEDGED, &dev_priv->gpu_error.reset_counter);
+ i915_gem_set_wedged(dev_priv);
ret = 0;
}
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index bb72af5320b0..a5b3ba4cca4a 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -405,22 +405,6 @@ static void i915_gem_context_unpin(struct i915_gem_context *ctx,
}
}
-void i915_gem_context_reset(struct drm_device *dev)
-{
- struct drm_i915_private *dev_priv = to_i915(dev);
-
- lockdep_assert_held(&dev->struct_mutex);
-
- if (i915.enable_execlists) {
- struct i915_gem_context *ctx;
-
- list_for_each_entry(ctx, &dev_priv->context_list, link)
- intel_lr_context_reset(dev_priv, ctx);
- }
-
- i915_gem_context_lost(dev_priv);
-}
-
int i915_gem_context_init(struct drm_device *dev)
{
struct drm_i915_private *dev_priv = to_i915(dev);
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 9be71a8a1d63..e24ce2d5cf90 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -1193,6 +1193,7 @@ static int gen8_init_common_ring(struct intel_engine_cs *engine)
{
struct drm_i915_private *dev_priv = engine->i915;
+ intel_mocs_init_engine(engine);
lrc_init_hws(engine);
I915_WRITE_IMR(engine,
@@ -1208,7 +1209,10 @@ static int gen8_init_common_ring(struct intel_engine_cs *engine)
intel_engine_init_hangcheck(engine);
- return intel_mocs_init_engine(engine);
+ if (engine->execlist_port[0].request)
+ execlists_submit_ports(engine);
+
+ return 0;
}
static int gen8_init_render_ring(struct intel_engine_cs *engine)
@@ -1244,6 +1248,33 @@ static int gen9_init_render_ring(struct intel_engine_cs *engine)
return init_workarounds_ring(engine);
}
+static void reset_common_ring(struct intel_engine_cs *engine,
+ struct drm_i915_gem_request *request)
+{
+ struct drm_i915_private *dev_priv = engine->i915;
+ struct i915_gem_context *ctx = request->ctx;
+ struct intel_context *ce = &ctx->engine[engine->id];
+ u32 *reg_state;
+
+ reg_state = ce->lrc_reg_state;
+ reg_state[CTX_RING_HEAD+1] = request->postfix;
+
+ request->ring->head = request->postfix;
+ request->ring->last_retired_head = -1;
+ intel_ring_update_space(request->ring);
+
+ if (request == engine->execlist_port[1].request) {
+ i915_gem_request_put(engine->execlist_port[0].request);
+ engine->execlist_port[0] = engine->execlist_port[1];
+ memset(&engine->execlist_port[1], 0,
+ sizeof(engine->execlist_port[1]));
+ }
+
+ engine->execlist_port[0].count = 0;
+
+ I915_WRITE(RING_CONTEXT_STATUS_PTR(engine), _MASKED_FIELD(0xffff, 0));
+}
+
static int intel_logical_ring_emit_pdps(struct drm_i915_gem_request *req)
{
struct i915_hw_ppgtt *ppgtt = req->ctx->ppgtt;
@@ -1604,6 +1635,7 @@ logical_ring_default_vfuncs(struct intel_engine_cs *engine)
{
/* Default vfuncs which can be overriden by each engine. */
engine->init_hw = gen8_init_common_ring;
+ engine->reset_hw = reset_common_ring;
engine->emit_flush = gen8_emit_flush;
engine->emit_request = gen8_emit_request;
engine->submit_request = execlists_submit_request;
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 54ec2faa0bf2..4730b1f178fd 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -577,34 +577,31 @@ static int init_ring_common(struct intel_engine_cs *engine)
if (I915_READ_HEAD(engine))
DRM_DEBUG("%s initialization failed [head=%08x], fudging\n",
engine->name, I915_READ_HEAD(engine));
- I915_WRITE_HEAD(engine, 0);
- (void)I915_READ_HEAD(engine);
+ I915_WRITE_HEAD(engine, ring->head);
+ I915_WRITE_TAIL(engine, ring->tail);
+ (void)I915_READ_TAIL(engine);
I915_WRITE_CTL(engine,
((ring->size - PAGE_SIZE) & RING_NR_PAGES)
| RING_VALID);
/* If the head is still not zero, the ring is dead */
- if (wait_for((I915_READ_CTL(engine) & RING_VALID) != 0 &&
- I915_READ_START(engine) == i915_gem_obj_ggtt_offset(obj) &&
- (I915_READ_HEAD(engine) & HEAD_ADDR) == 0, 50)) {
+ if (intel_wait_for_register_fw(dev_priv, RING_CTL(engine->mmio_base),
+ RING_VALID, RING_VALID,
+ 50)) {
DRM_ERROR("%s initialization failed "
- "ctl %08x (valid? %d) head %08x tail %08x start %08x [expected %08lx]\n",
+ "ctl %08x (valid? %d) head %08x [%08x] tail %08x [%08x] start %08x [expected %08lx]\n",
engine->name,
I915_READ_CTL(engine),
I915_READ_CTL(engine) & RING_VALID,
- I915_READ_HEAD(engine), I915_READ_TAIL(engine),
+ I915_READ_HEAD(engine), ring->head,
+ I915_READ_TAIL(engine), ring->tail,
I915_READ_START(engine),
(unsigned long)i915_gem_obj_ggtt_offset(obj));
ret = -EIO;
goto out;
}
- ring->last_retired_head = -1;
- ring->head = I915_READ_HEAD(engine);
- ring->tail = I915_READ_TAIL(engine) & TAIL_ADDR;
- intel_ring_update_space(ring);
-
intel_engine_init_hangcheck(engine);
out:
@@ -613,6 +610,16 @@ out:
return ret;
}
+static void reset_ring_common(struct intel_engine_cs *engine,
+ struct drm_i915_gem_request *request)
+{
+ struct intel_ring *ring = request->ring;
+
+ ring->head = request->postfix;
+ ring->last_retired_head = -1;
+ intel_ring_update_space(ring);
+}
+
void intel_fini_pipe_control(struct intel_engine_cs *engine)
{
if (engine->scratch.obj == NULL)
@@ -2767,6 +2774,7 @@ static void intel_ring_default_vfuncs(struct drm_i915_private *dev_priv,
intel_ring_init_semaphores(dev_priv, engine);
engine->init_hw = init_ring_common;
+ engine->reset_hw = reset_ring_common;
engine->emit_request = i9xx_emit_request;
if (i915.semaphores)
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index a5cee335881a..092e5b27652f 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -203,6 +203,8 @@ struct intel_engine_cs {
void (*irq_disable)(struct intel_engine_cs *engine);
int (*init_hw)(struct intel_engine_cs *engine);
+ void (*reset_hw)(struct intel_engine_cs *engine,
+ struct drm_i915_gem_request *req);
int (*init_context)(struct drm_i915_gem_request *req);
--
2.8.1
More information about the Intel-gfx
mailing list