[Intel-gfx] [PATCH v3] drm/i915: Rework GPU reset sequence to match driver load & thaw
alistair.mcaulay at intel.com
alistair.mcaulay at intel.com
Fri Aug 15 19:51:35 CEST 2014
From: "McAulay, Alistair" <alistair.mcaulay at intel.com>
This patch is to address Daniels concerns over different code during reset:
http://lists.freedesktop.org/archives/intel-gfx/2014-June/047758.html
"The reason for aiming as hard as possible to use the exact same code for
driver load, gpu reset and runtime pm/system resume is that we've simply
seen too many bugs due to slight variations and unintended omissions."
Tested using igt drv_hangman.
V2: Cleaner way of preventing check_wedge returning -EAGAIN
V3: Clean the last_context during reset, to ensure do_switch() does the MI_SET_CONTEXT. As per review.
Signed-off-by: McAulay, Alistair <alistair.mcaulay at intel.com>
---
drivers/gpu/drm/i915/i915_drv.c | 6 +++
drivers/gpu/drm/i915/i915_drv.h | 3 ++
drivers/gpu/drm/i915/i915_gem.c | 4 +-
drivers/gpu/drm/i915/i915_gem_context.c | 33 +++-------------
drivers/gpu/drm/i915/i915_gem_gtt.c | 67 +++++----------------------------
drivers/gpu/drm/i915/i915_gem_gtt.h | 3 +-
6 files changed, 28 insertions(+), 88 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 5e4fefd..3bfafe6 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -806,7 +806,13 @@ int i915_reset(struct drm_device *dev)
!dev_priv->ums.mm_suspended) {
dev_priv->ums.mm_suspended = 0;
+ /* Used to prevent gem_check_wedged returning -EAGAIN during gpu reset */
+ dev_priv->gpu_error.reload_in_reset = true;
+
ret = i915_gem_init_hw(dev);
+
+ dev_priv->gpu_error.reload_in_reset = false;
+
mutex_unlock(&dev->struct_mutex);
if (ret) {
DRM_ERROR("Failed hw init on reset %d\n", ret);
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 991b663..116daff 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1217,6 +1217,9 @@ struct i915_gpu_error {
/* For missed irq/seqno simulation. */
unsigned int test_irq_rings;
+
+ /* Used to prevent gem_check_wedged returning -EAGAIN during gpu reset */
+ bool reload_in_reset;
};
enum modeset_restore {
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index ef047bc..e7396eb 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1085,7 +1085,9 @@ i915_gem_check_wedge(struct i915_gpu_error *error,
if (i915_terminally_wedged(error))
return -EIO;
- return -EAGAIN;
+ /* Check if GPU Reset is in progress */
+ if (!error->reload_in_reset)
+ return -EAGAIN;
}
return 0;
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index de72a28..9378ad8 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -377,34 +377,17 @@ void i915_gem_context_reset(struct drm_device *dev)
struct drm_i915_private *dev_priv = dev->dev_private;
int i;
- /* Prevent the hardware from restoring the last context (which hung) on
- * the next switch */
for (i = 0; i < I915_NUM_RINGS; i++) {
struct intel_engine_cs *ring = &dev_priv->ring[i];
- struct intel_context *dctx = ring->default_context;
struct intel_context *lctx = ring->last_context;
- /* Do a fake switch to the default context */
- if (lctx == dctx)
- continue;
-
- if (!lctx)
- continue;
+ if (lctx) {
+ if (lctx->legacy_hw_ctx.rcs_state && i == RCS)
+ i915_gem_object_ggtt_unpin(lctx->legacy_hw_ctx.rcs_state);
- if (dctx->legacy_hw_ctx.rcs_state && i == RCS) {
- WARN_ON(i915_gem_obj_ggtt_pin(dctx->legacy_hw_ctx.rcs_state,
- get_context_alignment(dev), 0));
- /* Fake a finish/inactive */
- dctx->legacy_hw_ctx.rcs_state->base.write_domain = 0;
- dctx->legacy_hw_ctx.rcs_state->active = 0;
+ i915_gem_context_unreference(lctx);
+ ring->last_context = NULL;
}
-
- if (lctx->legacy_hw_ctx.rcs_state && i == RCS)
- i915_gem_object_ggtt_unpin(lctx->legacy_hw_ctx.rcs_state);
-
- i915_gem_context_unreference(lctx);
- i915_gem_context_reference(dctx);
- ring->last_context = dctx;
}
}
@@ -498,10 +481,6 @@ int i915_gem_context_enable(struct drm_i915_private *dev_priv)
ppgtt->enable(ppgtt);
}
- /* FIXME: We should make this work, even in reset */
- if (i915_reset_in_progress(&dev_priv->gpu_error))
- return 0;
-
BUG_ON(!dev_priv->ring[RCS].default_context);
for_each_ring(ring, dev_priv, i) {
@@ -645,7 +624,7 @@ static int do_switch(struct intel_engine_cs *ring,
from = ring->last_context;
if (USES_FULL_PPGTT(ring->dev)) {
- ret = ppgtt->switch_mm(ppgtt, ring, false);
+ ret = ppgtt->switch_mm(ppgtt, ring);
if (ret)
goto unpin_out;
}
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 5188936..450c8a9 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -216,19 +216,12 @@ static gen6_gtt_pte_t iris_pte_encode(dma_addr_t addr,
/* Broadwell Page Directory Pointer Descriptors */
static int gen8_write_pdp(struct intel_engine_cs *ring, unsigned entry,
- uint64_t val, bool synchronous)
+ uint64_t val)
{
- struct drm_i915_private *dev_priv = ring->dev->dev_private;
int ret;
BUG_ON(entry >= 4);
- if (synchronous) {
- I915_WRITE(GEN8_RING_PDP_UDW(ring, entry), val >> 32);
- I915_WRITE(GEN8_RING_PDP_LDW(ring, entry), (u32)val);
- return 0;
- }
-
ret = intel_ring_begin(ring, 6);
if (ret)
return ret;
@@ -245,8 +238,7 @@ static int gen8_write_pdp(struct intel_engine_cs *ring, unsigned entry,
}
static int gen8_mm_switch(struct i915_hw_ppgtt *ppgtt,
- struct intel_engine_cs *ring,
- bool synchronous)
+ struct intel_engine_cs *ring)
{
int i, ret;
@@ -255,7 +247,7 @@ static int gen8_mm_switch(struct i915_hw_ppgtt *ppgtt,
for (i = used_pd - 1; i >= 0; i--) {
dma_addr_t addr = ppgtt->pd_dma_addr[i];
- ret = gen8_write_pdp(ring, i, addr, synchronous);
+ ret = gen8_write_pdp(ring, i, addr);
if (ret)
return ret;
}
@@ -724,29 +716,10 @@ static uint32_t get_pd_offset(struct i915_hw_ppgtt *ppgtt)
}
static int hsw_mm_switch(struct i915_hw_ppgtt *ppgtt,
- struct intel_engine_cs *ring,
- bool synchronous)
+ struct intel_engine_cs *ring)
{
- struct drm_device *dev = ppgtt->base.dev;
- struct drm_i915_private *dev_priv = dev->dev_private;
int ret;
- /* If we're in reset, we can assume the GPU is sufficiently idle to
- * manually frob these bits. Ideally we could use the ring functions,
- * except our error handling makes it quite difficult (can't use
- * intel_ring_begin, ring->flush, or intel_ring_advance)
- *
- * FIXME: We should try not to special case reset
- */
- if (synchronous ||
- i915_reset_in_progress(&dev_priv->gpu_error)) {
- WARN_ON(ppgtt != dev_priv->mm.aliasing_ppgtt);
- I915_WRITE(RING_PP_DIR_DCLV(ring), PP_DIR_DCLV_2G);
- I915_WRITE(RING_PP_DIR_BASE(ring), get_pd_offset(ppgtt));
- POSTING_READ(RING_PP_DIR_BASE(ring));
- return 0;
- }
-
/* NB: TLBs must be flushed and invalidated before a switch */
ret = ring->flush(ring, I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS);
if (ret)
@@ -768,29 +741,10 @@ static int hsw_mm_switch(struct i915_hw_ppgtt *ppgtt,
}
static int gen7_mm_switch(struct i915_hw_ppgtt *ppgtt,
- struct intel_engine_cs *ring,
- bool synchronous)
+ struct intel_engine_cs *ring)
{
- struct drm_device *dev = ppgtt->base.dev;
- struct drm_i915_private *dev_priv = dev->dev_private;
int ret;
- /* If we're in reset, we can assume the GPU is sufficiently idle to
- * manually frob these bits. Ideally we could use the ring functions,
- * except our error handling makes it quite difficult (can't use
- * intel_ring_begin, ring->flush, or intel_ring_advance)
- *
- * FIXME: We should try not to special case reset
- */
- if (synchronous ||
- i915_reset_in_progress(&dev_priv->gpu_error)) {
- WARN_ON(ppgtt != dev_priv->mm.aliasing_ppgtt);
- I915_WRITE(RING_PP_DIR_DCLV(ring), PP_DIR_DCLV_2G);
- I915_WRITE(RING_PP_DIR_BASE(ring), get_pd_offset(ppgtt));
- POSTING_READ(RING_PP_DIR_BASE(ring));
- return 0;
- }
-
/* NB: TLBs must be flushed and invalidated before a switch */
ret = ring->flush(ring, I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS);
if (ret)
@@ -819,14 +773,11 @@ static int gen7_mm_switch(struct i915_hw_ppgtt *ppgtt,
}
static int gen6_mm_switch(struct i915_hw_ppgtt *ppgtt,
- struct intel_engine_cs *ring,
- bool synchronous)
+ struct intel_engine_cs *ring)
{
struct drm_device *dev = ppgtt->base.dev;
struct drm_i915_private *dev_priv = dev->dev_private;
- if (!synchronous)
- return 0;
I915_WRITE(RING_PP_DIR_DCLV(ring), PP_DIR_DCLV_2G);
I915_WRITE(RING_PP_DIR_BASE(ring), get_pd_offset(ppgtt));
@@ -852,7 +803,7 @@ static int gen8_ppgtt_enable(struct i915_hw_ppgtt *ppgtt)
if (USES_FULL_PPGTT(dev))
continue;
- ret = ppgtt->switch_mm(ppgtt, ring, true);
+ ret = ppgtt->switch_mm(ppgtt, ring);
if (ret)
goto err_out;
}
@@ -897,7 +848,7 @@ static int gen7_ppgtt_enable(struct i915_hw_ppgtt *ppgtt)
if (USES_FULL_PPGTT(dev))
continue;
- ret = ppgtt->switch_mm(ppgtt, ring, true);
+ ret = ppgtt->switch_mm(ppgtt, ring);
if (ret)
return ret;
}
@@ -926,7 +877,7 @@ static int gen6_ppgtt_enable(struct i915_hw_ppgtt *ppgtt)
I915_WRITE(GFX_MODE, _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE));
for_each_ring(ring, dev_priv, i) {
- int ret = ppgtt->switch_mm(ppgtt, ring, true);
+ int ret = ppgtt->switch_mm(ppgtt, ring);
if (ret)
return ret;
}
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
index 8d6f7c1..bf1e4fc 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.h
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
@@ -262,8 +262,7 @@ struct i915_hw_ppgtt {
int (*enable)(struct i915_hw_ppgtt *ppgtt);
int (*switch_mm)(struct i915_hw_ppgtt *ppgtt,
- struct intel_engine_cs *ring,
- bool synchronous);
+ struct intel_engine_cs *ring);
void (*debug_dump)(struct i915_hw_ppgtt *ppgtt, struct seq_file *m);
};
--
2.0.4
More information about the Intel-gfx
mailing list