[Intel-gfx] [PATCH v2] drm/i915: Restore context and pd for ringbuffer submission after reset
Chris Wilson
chris at chris-wilson.co.uk
Tue Feb 7 09:58:24 UTC 2017
Following a reset, the context and page directory registers are lost.
However, the queue of requests that we resubmit after the reset may
depend upon them - the registers are restored from a context image, but
that restore may be inhibited and may simply be absent from the request
if it was in the middle of a sequence using the same context. If we
prime the CCID/PD registers with the first request in the queue (even
for the hung request), we prevent invalid memory access for the
following requests (and continually hung engines).
v2: Magic BIT(8), reserved for future use but still appears unused.
Fixes: 821ed7df6e2a ("drm/i915: Update reset path to fix incomplete requests")
Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
Cc: Mika Kuoppala <mika.kuoppala at intel.com>
---
drivers/gpu/drm/i915/i915_gem.c | 18 +++++++---------
drivers/gpu/drm/i915/i915_reg.h | 6 ++++--
drivers/gpu/drm/i915/intel_lrc.c | 6 +++++-
drivers/gpu/drm/i915/intel_ringbuffer.c | 37 ++++++++++++++++++++++++++++++---
4 files changed, 50 insertions(+), 17 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index fe32b1edbda2..7a16c859c875 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2735,21 +2735,17 @@ static void i915_gem_reset_engine(struct intel_engine_cs *engine)
engine->irq_seqno_barrier(engine);
request = i915_gem_find_active_request(engine);
- if (!request)
- return;
-
- if (!i915_gem_reset_request(request))
- return;
+ if (request && i915_gem_reset_request(request)) {
+ DRM_DEBUG_DRIVER("resetting %s to restart from tail of request 0x%x\n",
+ engine->name, request->global_seqno);
- DRM_DEBUG_DRIVER("resetting %s to restart from tail of request 0x%x\n",
- engine->name, request->global_seqno);
+ /* If this context is now banned, skip all pending requests. */
+ if (i915_gem_context_is_banned(request->ctx))
+ engine_skip_context(request);
+ }
/* Setup the CS to resume from the breadcrumb of the hung request */
engine->reset_hw(engine, request);
-
- /* If this context is now banned, skip all of its pending requests. */
- if (i915_gem_context_is_banned(request->ctx))
- engine_skip_context(request);
}
void i915_gem_reset_finish(struct drm_i915_private *dev_priv)
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index 5bf36bdc5926..4a59091c0152 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -3323,8 +3323,10 @@ enum skl_disp_power_wells {
/*
* Logical Context regs
*/
-#define CCID _MMIO(0x2180)
-#define CCID_EN (1<<0)
+#define CCID _MMIO(0x2180)
+#define CCID_EN BIT(0)
+#define CCID_EXTENDED_STATE_RESTORE BIT(2)
+#define CCID_EXTENDED_STATE_SAVE BIT(3)
/*
* Notes on SNB/IVB/VLV context size:
* - Power context is saved elsewhere (LLC or stolen)
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index df8e6f74dc7e..8a63a1bd0eb6 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -1352,7 +1352,10 @@ static void reset_common_ring(struct intel_engine_cs *engine,
struct drm_i915_gem_request *request)
{
struct execlist_port *port = engine->execlist_port;
- struct intel_context *ce = &request->ctx->engine[engine->id];
+ struct intel_context *ce;
+
+ if (!request || request->fence.error != -EIO)
+ return;
/* We want a simple context + ring to execute the breadcrumb update.
* We cannot rely on the context being intact across the GPU hang,
@@ -1361,6 +1364,7 @@ static void reset_common_ring(struct intel_engine_cs *engine,
* future request will be after userspace has had the opportunity
* to recreate its own state.
*/
+ ce = &request->ctx->engine[engine->id];
execlists_init_reg_state(ce->lrc_reg_state,
request->ctx, engine, ce->ring);
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 383083ef2210..1c4136cc3a84 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -599,10 +599,41 @@ static int init_ring_common(struct intel_engine_cs *engine)
static void reset_ring_common(struct intel_engine_cs *engine,
struct drm_i915_gem_request *request)
{
- struct intel_ring *ring = request->ring;
+ if (request) {
+ struct drm_i915_private *dev_priv = request->i915;
+ struct intel_context *ce = &request->ctx->engine[engine->id];
+ struct i915_hw_ppgtt *ppgtt;
+
+ /* FIXME consider gen8 reset */
+
+ if (ce->state) {
+ I915_WRITE(CCID,
+ i915_ggtt_offset(ce->state) |
+ BIT(8) /* must be set! */ |
+ CCID_EXTENDED_STATE_SAVE |
+ CCID_EXTENDED_STATE_RESTORE |
+ CCID_EN);
+ }
- ring->head = request->postfix;
- ring->last_retired_head = -1;
+ ppgtt = request->ctx->ppgtt ?: engine->i915->mm.aliasing_ppgtt;
+ if (ppgtt) {
+ u32 pd_offset = ppgtt->pd.base.ggtt_offset << 10;
+
+ I915_WRITE(RING_PP_DIR_DCLV(engine), PP_DIR_DCLV_2G);
+ I915_WRITE(RING_PP_DIR_BASE(engine), pd_offset);
+ ppgtt->pd_dirty_rings &= ~intel_engine_flag(engine);
+ }
+
+ /* If the rq hung, jump to its breadcrumb and skip the batch */
+ if (request->fence.error == -EIO) {
+ struct intel_ring *ring = request->ring;
+
+ ring->head = request->postfix;
+ ring->last_retired_head = -1;
+ }
+ } else {
+ engine->legacy_active_context = NULL;
+ }
}
static int intel_ring_workarounds_emit(struct drm_i915_gem_request *req)
--
2.11.0
More information about the Intel-gfx
mailing list