[Intel-gfx] [PATCH] drm/i915: Decouple GPU error reporting from ring initialisation

Chris Wilson chris at chris-wilson.co.uk
Thu Jan 23 22:49:43 CET 2014


Currently we report through our error state only the rings that have
been initialised (as detected by ring->obj). This check is done after
the GPU reset and ring re-initialisation, which means that the software
state may not be the same as when we captured the hardware error and we
may not print out any of the vital information for debugging the hang.

This (and the implied object leak) is a regression from

commit 3d57e5bd1284f44e325f3a52d966259ed42f9e05
Author: Ben Widawsky <ben at bwidawsk.net>
Date:   Mon Oct 14 10:01:36 2013 -0700

    drm/i915: Do a fuller init after reset

Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
Cc: Ben Widawsky <ben at bwidawsk.net>
---
 drivers/gpu/drm/i915/i915_drv.h       |  1 +
 drivers/gpu/drm/i915/i915_gpu_error.c | 19 +++++++++++++------
 2 files changed, 14 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index c45cbbecd66a..64a1aca7804d 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -334,6 +334,7 @@ struct drm_i915_error_state {
 	struct timeval time;
 
 	struct drm_i915_error_ring {
+		int valid;
 		struct drm_i915_error_object {
 			int page_count;
 			u32 gtt_offset;
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index 260a215e3619..e2af1d490f8d 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -240,6 +240,9 @@ static void i915_ring_error_state(struct drm_i915_error_state_buf *m,
 				  unsigned ring)
 {
 	BUG_ON(ring >= I915_NUM_RINGS); /* shut up confused gcc */
+	if (!error->ring[ring].valid)
+		return;
+
 	err_printf(m, "%s command stream:\n", ring_str(ring));
 	err_printf(m, "  HEAD: 0x%08x\n", error->head[ring]);
 	err_printf(m, "  TAIL: 0x%08x\n", error->tail[ring]);
@@ -294,7 +297,6 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m,
 	struct drm_device *dev = error_priv->dev;
 	drm_i915_private_t *dev_priv = dev->dev_private;
 	struct drm_i915_error_state *error = error_priv->error;
-	struct intel_ring_buffer *ring;
 	int i, j, page, offset, elt;
 
 	if (!error) {
@@ -329,7 +331,7 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m,
 	if (INTEL_INFO(dev)->gen == 7)
 		err_printf(m, "ERR_INT: 0x%08x\n", error->err_int);
 
-	for_each_ring(ring, dev_priv, i)
+	for (i = 0; i < ARRAY_SIZE(error->ring); i++)
 		i915_ring_error_state(m, dev, error, i);
 
 	for (i = 0; i < error->vm_count; i++) {
@@ -388,8 +390,7 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m,
 			}
 		}
 
-		obj = error->ring[i].ctx;
-		if (obj) {
+		if ((obj = error->ring[i].ctx)) {
 			err_printf(m, "%s --- HW Context = 0x%08x\n",
 				   dev_priv->ring[i].name,
 				   obj->gtt_offset);
@@ -826,11 +827,17 @@ static void i915_gem_record_rings(struct drm_device *dev,
 				  struct drm_i915_error_state *error)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
-	struct intel_ring_buffer *ring;
 	struct drm_i915_gem_request *request;
 	int i, count;
 
-	for_each_ring(ring, dev_priv, i) {
+	for (i = 0; i < I915_NUM_RINGS; i++) {
+		struct intel_ring_buffer *ring = &dev_priv->ring[i];
+
+		if (ring->dev == NULL)
+			continue;
+
+		error->ring[i].valid = true;
+
 		i915_record_ring_state(dev, error, ring);
 
 		error->ring[i].batchbuffer =
-- 
1.8.5.3




More information about the Intel-gfx mailing list