[Intel-gfx] [PATCH] drm/i915: Look for active requests earlier in the reset path
Michel Thierry
michel.thierry at intel.com
Thu May 18 18:22:57 UTC 2017
And store the active request so that we only search for it once; this
applies for reset-engine and full reset.
v2: Check for request completion inside _prepare_engine, don't use
ECANCELED, remove unnecessary null checks (Chris).
v3: Capture active requests during reset_prepare and store it the
engine hangcheck obj.
v4: Rename commit, change i915_gem_reset_request to just confirm the
active_request is still incomplete, instead of engine_stalled (Chris).
Suggested-by: Chris Wilson <chris at chris-wilson.co.uk>
Signed-off-by: Michel Thierry <michel.thierry at intel.com>
fixes
Signed-off-by: Michel Thierry <michel.thierry at intel.com>
---
drivers/gpu/drm/i915/i915_drv.c | 11 ++++++-----
drivers/gpu/drm/i915/i915_drv.h | 3 ++-
drivers/gpu/drm/i915/i915_gem.c | 34 +++++++++++++++++++++------------
drivers/gpu/drm/i915/intel_ringbuffer.h | 1 +
4 files changed, 31 insertions(+), 18 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index d62793805794..ec719376fc24 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -1900,15 +1900,16 @@ int i915_reset_engine(struct intel_engine_cs *engine)
DRM_DEBUG_DRIVER("resetting %s\n", engine->name);
- ret = i915_gem_reset_prepare_engine(engine);
- if (ret) {
- DRM_ERROR("Previous reset failed - promote to full reset\n");
+ engine->hangcheck.active_request = i915_gem_reset_prepare_engine(engine);
+ if (IS_ERR(engine->hangcheck.active_request)) {
+ DRM_DEBUG_DRIVER("Previous reset failed, promote to full reset\n");
+ ret = PTR_ERR(engine->hangcheck.active_request);
goto out;
}
/*
- * the request that caused the hang is stuck on elsp, identify the
- * active request and drop it, adjust head to skip the offending
+ * the request that caused the hang is stuck on elsp, we know the
+ * active request and can drop it, adjust head to skip the offending
* request to resume executing remaining requests in the queue.
*/
i915_gem_reset_engine(engine);
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index a5b9c666b3bf..6cbfeaa02246 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -3370,7 +3370,8 @@ static inline u32 i915_reset_count(struct i915_gpu_error *error)
return READ_ONCE(error->reset_count);
}
-int i915_gem_reset_prepare_engine(struct intel_engine_cs *engine);
+struct drm_i915_gem_request *
+i915_gem_reset_prepare_engine(struct intel_engine_cs *engine);
int i915_gem_reset_prepare(struct drm_i915_private *dev_priv);
void i915_gem_reset(struct drm_i915_private *dev_priv);
void i915_gem_reset_finish_engine(struct intel_engine_cs *engine);
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index b5dc073a5ddc..c9f139b322d2 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2793,12 +2793,14 @@ static bool engine_stalled(struct intel_engine_cs *engine)
return true;
}
-/* Ensure irq handler finishes, and not run again. */
-int i915_gem_reset_prepare_engine(struct intel_engine_cs *engine)
+/*
+ * Ensure irq handler finishes, and not run again.
+ * Also store the active request so that we only search for it once.
+ */
+struct drm_i915_gem_request *
+i915_gem_reset_prepare_engine(struct intel_engine_cs *engine)
{
- struct drm_i915_gem_request *request;
- int err = 0;
-
+ struct drm_i915_gem_request *request = NULL;
/* Prevent the signaler thread from updating the request
* state (by calling dma_fence_signal) as we are processing
@@ -2827,21 +2829,30 @@ int i915_gem_reset_prepare_engine(struct intel_engine_cs *engine)
if (engine_stalled(engine)) {
request = i915_gem_find_active_request(engine);
+
if (request && request->fence.error == -EIO)
- err = -EIO; /* Previous reset failed! */
+ return ERR_PTR(-EIO); /* Previous reset failed! */
}
- return err;
+ return request;
}
int i915_gem_reset_prepare(struct drm_i915_private *dev_priv)
{
struct intel_engine_cs *engine;
+ struct drm_i915_gem_request *request;
enum intel_engine_id id;
int err = 0;
- for_each_engine(engine, dev_priv, id)
- err = i915_gem_reset_prepare_engine(engine);
+ for_each_engine(engine, dev_priv, id) {
+ request = i915_gem_reset_prepare_engine(engine);
+ if (IS_ERR(request)) {
+ err = PTR_ERR(request);
+ break;
+ }
+
+ engine->hangcheck.active_request = request;
+ }
i915_gem_revoke_fences(dev_priv);
@@ -2894,7 +2905,7 @@ static void engine_skip_context(struct drm_i915_gem_request *request)
static bool i915_gem_reset_request(struct drm_i915_gem_request *request)
{
/* Read once and return the resolution */
- const bool guilty = engine_stalled(request->engine);
+ const bool guilty = !i915_gem_request_completed(request);
/* The guilty request will get skipped on a hung engine.
*
@@ -2930,9 +2941,8 @@ static bool i915_gem_reset_request(struct drm_i915_gem_request *request)
void i915_gem_reset_engine(struct intel_engine_cs *engine)
{
- struct drm_i915_gem_request *request;
+ struct drm_i915_gem_request *request = engine->hangcheck.active_request;
- request = i915_gem_find_active_request(engine);
if (request && i915_gem_reset_request(request)) {
DRM_DEBUG_DRIVER("resetting %s to restart from tail of request 0x%x\n",
engine->name, request->global_seqno);
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index ec16fb6fde62..f850c4b12337 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -121,6 +121,7 @@ struct intel_engine_hangcheck {
unsigned long action_timestamp;
int deadlock;
struct intel_instdone instdone;
+ struct drm_i915_gem_request *active_request;
bool stalled;
};
--
2.11.0
More information about the Intel-gfx
mailing list