[Intel-gfx] [PATCH 13/13] drm/i915: Compress GPU objects in error state

Chris Wilson chris at chris-wilson.co.uk
Fri Aug 5 09:06:04 UTC 2016


Our error states are quickly growing, pinning kernel memory with them.
The majority of the space is taken up by the error objects. These
compress well using zlib and without decode are mostly meaningless, so
encoding them does not hinder quickly parsing the error state for
familiarity.

Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/Kconfig          |   1 +
 drivers/gpu/drm/i915/i915_drv.h       |   4 +-
 drivers/gpu/drm/i915/i915_gem_gtt.c   |  10 ++
 drivers/gpu/drm/i915/i915_gem_gtt.h   |   2 +
 drivers/gpu/drm/i915/i915_gpu_error.c | 271 +++++++++++++++++-----------------
 5 files changed, 148 insertions(+), 140 deletions(-)

diff --git a/drivers/gpu/drm/i915/Kconfig b/drivers/gpu/drm/i915/Kconfig
index 7badcee88ebf..c8ea20526aef 100644
--- a/drivers/gpu/drm/i915/Kconfig
+++ b/drivers/gpu/drm/i915/Kconfig
@@ -5,6 +5,7 @@ config DRM_I915
 	select INTEL_GTT
 	select INTERVAL_TREE
 	select STOP_MACHINE
+	select ZLIB_DEFLATE
 	# we need shmfs for the swappable backing store, and in particular
 	# the shmem_readpage() which depends upon tmpfs
 	select SHMEM
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 370a4c9eea70..edc1e6d6be0d 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -534,6 +534,7 @@ struct drm_i915_error_state {
 		u32 tail;
 		u32 head;
 		u32 ctl;
+		u32 mode;
 		u32 hws;
 		u32 ipeir;
 		u32 ipehr;
@@ -550,9 +551,10 @@ struct drm_i915_error_state {
 		u32 semaphore_mboxes[I915_NUM_ENGINES - 1];
 
 		struct drm_i915_error_object {
-			int page_count;
 			u64 gtt_offset;
 			u64 gtt_size;
+			int page_count;
+			int unused;
 			u32 *pages[0];
 		} *ringbuffer, *batchbuffer, *wa_batchbuffer, *ctx, *hws_page;
 
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 7c9f654d515a..c45e7f456cea 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -2737,6 +2737,15 @@ int i915_gem_init_ggtt(struct drm_i915_private *dev_priv)
 	if (ret)
 		return ret;
 
+	/* Reserve a mappable slot for our lockless error capture */
+	ret = drm_mm_insert_node_in_range_generic(&ggtt->base.mm,
+						  &ggtt->gpu_error,
+						  4096, 0, -1,
+						  0, ggtt->mappable_end,
+						  0, 0);
+	if (ret)
+		return ret;
+
 	/* Clear any non-preallocated blocks */
 	drm_mm_for_each_hole(entry, &ggtt->base.mm, hole_start, hole_end) {
 		DRM_DEBUG_KMS("clearing unused GTT space: [%lx, %lx]\n",
@@ -2804,6 +2813,7 @@ void i915_ggtt_cleanup_hw(struct drm_i915_private *dev_priv)
 	if (drm_mm_initialized(&ggtt->base.mm)) {
 		intel_vgt_deballoon(dev_priv);
 
+		drm_mm_remove_node(&ggtt->gpu_error);
 		drm_mm_takedown(&ggtt->base.mm);
 		list_del(&ggtt->base.global_link);
 	}
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
index c54ae2323df3..010e10c0b62b 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.h
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
@@ -428,6 +428,8 @@ struct i915_ggtt {
 	bool do_idle_maps;
 
 	int mtrr;
+
+	struct drm_mm_node gpu_error;
 };
 
 struct i915_hw_ppgtt {
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index 6078f47d4bc0..f036584e55e3 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -29,6 +29,7 @@
 
 #include <generated/utsrelease.h>
 #include <linux/stop_machine.h>
+#include <linux/zlib.h>
 #include "i915_drv.h"
 
 static const char *engine_str(int engine)
@@ -237,6 +238,8 @@ static void error_print_engine(struct drm_i915_error_state_buf *m,
 	err_printf(m, "  HEAD:  0x%08x\n", ee->head);
 	err_printf(m, "  TAIL:  0x%08x\n", ee->tail);
 	err_printf(m, "  CTL:   0x%08x\n", ee->ctl);
+	err_printf(m, "  MODE:  0x%08x [idle? %d]\n",
+		   ee->mode, !!(ee->mode & MODE_IDLE));
 	err_printf(m, "  HWS:   0x%08x\n", ee->hws);
 	err_printf(m, "  ACTHD: 0x%08x %08x\n",
 		   (u32)(ee->acthd>>32), (u32)ee->acthd);
@@ -307,18 +310,46 @@ void i915_error_printf(struct drm_i915_error_state_buf *e, const char *f, ...)
 	va_end(args);
 }
 
+static bool
+ascii85_encode(u32 in, char *out)
+{
+	int i;
+
+	if (in == 0)
+		return false;
+
+	out[5] = '\0';
+	for (i = 5; i--; ) {
+		out[i] = '!' + in % 85;
+		in /= 85;
+	}
+
+	return true;
+}
+
 static void print_error_obj(struct drm_i915_error_state_buf *m,
 			    struct drm_i915_error_object *obj)
 {
-	int page, offset, elt;
+	char out[6];
+	int page;
+
+	err_puts(m, ":"); /* indicate compressed data */
+	for (page = 0; page < obj->page_count; page++) {
+		int i, len;
+
+		len = PAGE_SIZE;
+		if (page == obj->page_count - 1)
+			len -= obj->unused;
+		len = (len + 3) / 4;
 
-	for (page = offset = 0; page < obj->page_count; page++) {
-		for (elt = 0; elt < PAGE_SIZE/4; elt++) {
-			err_printf(m, "%08x :  %08x\n", offset,
-				   obj->pages[page][elt]);
-			offset += 4;
+		for (i = 0; i < len; i++) {
+			if (ascii85_encode(obj->pages[page][i], out))
+				err_puts(m, out);
+			else
+				err_puts(m, "z");
 		}
 	}
+	err_puts(m, "\n");
 }
 
 int i915_error_state_to_str(struct drm_i915_error_state_buf *m,
@@ -328,8 +359,8 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m,
 	struct drm_i915_private *dev_priv = to_i915(dev);
 	struct drm_i915_error_state *error = error_priv->error;
 	struct drm_i915_error_object *obj;
-	int i, j, offset, elt;
 	int max_hangcheck_score;
+	int i, j;
 
 	if (!error) {
 		err_printf(m, "no error state collected\n");
@@ -481,75 +512,33 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m,
 		}
 
 		if ((obj = ee->ringbuffer)) {
-			err_printf(m, "%s --- ringbuffer = 0x%08x\n",
-				   dev_priv->engine[i].name,
-				   lower_32_bits(obj->gtt_offset));
+			err_printf(m, "%s --- ringbuffer = 0x%08llx\n",
+				   dev_priv->engine[i].name, obj->gtt_offset);
 			print_error_obj(m, obj);
 		}
 
-		if ((obj = ee->hws_page)) {
-			u64 hws_offset = obj->gtt_offset;
-			u32 *hws_page = &obj->pages[0][0];
-
-			if (i915.enable_execlists) {
-				hws_offset += LRC_PPHWSP_PN * PAGE_SIZE;
-				hws_page = &obj->pages[LRC_PPHWSP_PN][0];
-			}
-			err_printf(m, "%s --- HW Status = 0x%08llx\n",
-				   dev_priv->engine[i].name, hws_offset);
-			offset = 0;
-			for (elt = 0; elt < PAGE_SIZE/16; elt += 4) {
-				err_printf(m, "[%04x] %08x %08x %08x %08x\n",
-					   offset,
-					   hws_page[elt],
-					   hws_page[elt+1],
-					   hws_page[elt+2],
-					   hws_page[elt+3]);
-				offset += 16;
-			}
+		if ((obj = ee->wa_ctx)) {
+			err_printf(m, "%s --- WA Context = 0x%08llx\n",
+				   dev_priv->engine[i].name, obj->gtt_offset);
+			print_error_obj(m, obj);
 		}
 
-		obj = ee->wa_ctx;
-		if (obj) {
-			u64 wa_ctx_offset = obj->gtt_offset;
-			u32 *wa_ctx_page = &obj->pages[0][0];
-			struct intel_engine_cs *engine = &dev_priv->engine[RCS];
-			u32 wa_ctx_size = (engine->wa_ctx.indirect_ctx.size +
-					   engine->wa_ctx.per_ctx.size);
-
-			err_printf(m, "%s --- WA ctx batch buffer = 0x%08llx\n",
-				   dev_priv->engine[i].name, wa_ctx_offset);
-			offset = 0;
-			for (elt = 0; elt < wa_ctx_size; elt += 4) {
-				err_printf(m, "[%04x] %08x %08x %08x %08x\n",
-					   offset,
-					   wa_ctx_page[elt + 0],
-					   wa_ctx_page[elt + 1],
-					   wa_ctx_page[elt + 2],
-					   wa_ctx_page[elt + 3]);
-				offset += 16;
-			}
+		if ((obj = ee->hws_page)) {
+			err_printf(m, "%s --- HW Status = 0x%08llx\n",
+				   dev_priv->engine[i].name, obj->gtt_offset);
+			print_error_obj(m, obj);
 		}
 
 		if ((obj = ee->ctx)) {
-			err_printf(m, "%s --- HW Context = 0x%08x\n",
-				   dev_priv->engine[i].name,
-				   lower_32_bits(obj->gtt_offset));
+			err_printf(m, "%s --- HW Context = 0x%08llx\n",
+				   dev_priv->engine[i].name, obj->gtt_offset);
 			print_error_obj(m, obj);
 		}
 	}
 
 	if ((obj = error->semaphore_obj)) {
-		err_printf(m, "Semaphore page = 0x%08x\n",
-			   lower_32_bits(obj->gtt_offset));
-		for (elt = 0; elt < PAGE_SIZE/16; elt += 4) {
-			err_printf(m, "[%04x] %08x %08x %08x %08x\n",
-				   elt * 4,
-				   obj->pages[0][elt],
-				   obj->pages[0][elt+1],
-				   obj->pages[0][elt+2],
-				   obj->pages[0][elt+3]);
-		}
+		err_printf(m, "Semaphore page = 0x%08llx\n", obj->gtt_offset);
+		print_error_obj(m, obj);
 	}
 
 	if (error->overlay)
@@ -605,7 +594,7 @@ static void i915_error_object_free(struct drm_i915_error_object *obj)
 		return;
 
 	for (page = 0; page < obj->page_count; page++)
-		kfree(obj->pages[page]);
+		free_page((unsigned long)obj->pages[page]);
 
 	kfree(obj);
 }
@@ -641,98 +630,107 @@ static void i915_error_state_free(struct kref *error_ref)
 	kfree(error);
 }
 
+static int compress_page(struct z_stream_s *zstream,
+			 void *src,
+			 struct drm_i915_error_object *dst)
+{
+	zstream->next_in = src;
+	zstream->avail_in = PAGE_SIZE;
+
+	do {
+		if (zstream->avail_out == 0) {
+			unsigned long page;
+
+			page = __get_free_page(GFP_ATOMIC | __GFP_NOWARN);
+			if (!page)
+				return -ENOMEM;
+
+			dst->pages[dst->page_count++] = (void *)page;
+
+			zstream->next_out = (void *)page;
+			zstream->avail_out = PAGE_SIZE;
+		}
+
+		if (zlib_deflate(zstream, Z_SYNC_FLUSH) != Z_OK)
+			return -EIO;
+
+#if 0
+		if (zstream->total_out > zstream->total_in)
+			return -E2BIG;
+#endif
+	} while (zstream->avail_in);
+
+	return 0;
+}
+
 static struct drm_i915_error_object *
-i915_error_object_create(struct drm_i915_private *dev_priv,
+i915_error_object_create(struct drm_i915_private *i915,
 			 struct i915_vma *vma)
 {
-	struct i915_ggtt *ggtt = &dev_priv->ggtt;
-	struct drm_i915_gem_object *src;
+	struct i915_ggtt *ggtt = &i915->ggtt;
+	const u64 slot = ggtt->gpu_error.start;
 	struct drm_i915_error_object *dst;
-	int num_pages;
-	bool use_ggtt;
-	int i = 0;
-	u64 reloc_offset;
+	struct z_stream_s zstream;
+	unsigned long num_pages;
+	struct sgt_iter iter;
+	dma_addr_t dma;
 
 	if (!vma)
 		return NULL;
 
-	src = vma->obj;
-	if (!src->pages)
-		return NULL;
-
-	num_pages = src->base.size >> PAGE_SHIFT;
-
-	dst = kmalloc(sizeof(*dst) + num_pages * sizeof(u32 *), GFP_ATOMIC);
+	num_pages = vma->size >> PAGE_SHIFT;
+	num_pages = 10 * num_pages * sizeof(u32 *) >> 3;
+	dst = kmalloc(sizeof(*dst) + num_pages, GFP_ATOMIC | __GFP_NOWARN);
 	if (!dst)
 		return NULL;
 
 	dst->gtt_offset = vma->node.start;
-	dst->gtt_size = vma->node.size;
+	dst->page_count = 0;
+	dst->unused = 0;
+
+	memset(&zstream, 0, sizeof(zstream));
+	zstream.workspace = kmalloc(zlib_deflate_workspacesize(MAX_WBITS,
+							       MAX_MEM_LEVEL),
+				    GFP_ATOMIC | __GFP_NOWARN);
+	if (!zstream.workspace ||
+	    zlib_deflateInit(&zstream, Z_DEFAULT_COMPRESSION) != Z_OK) {
+		kfree(zstream.workspace);
+		kfree(dst);
+		return NULL;
+	}
 
-	reloc_offset = dst->gtt_offset;
-	use_ggtt = (src->cache_level == I915_CACHE_NONE &&
-		   (vma->flags & I915_VMA_GLOBAL_BIND) &&
-		   reloc_offset + num_pages * PAGE_SIZE <= ggtt->mappable_end);
+	for_each_sgt_dma(dma, iter,
+			 vma->ggtt_view.pages ?: vma->obj->pages) {
+		int ret;
+		void *s;
 
-	/* Cannot access stolen address directly, try to use the aperture */
-	if (src->stolen) {
-		use_ggtt = true;
+		ggtt->base.insert_page(&ggtt->base, dma, slot,
+				       I915_CACHE_NONE, 0);
 
-		if (!(vma->flags & I915_VMA_GLOBAL_BIND))
-			goto unwind;
+		s = (void *__force)
+			io_mapping_map_atomic_wc(ggtt->mappable, slot);
+		ret = compress_page(&zstream, s, dst);
+		io_mapping_unmap_atomic(s);
 
-		reloc_offset = vma->node.start;
-		if (reloc_offset + num_pages * PAGE_SIZE > ggtt->mappable_end)
+		if (ret)
 			goto unwind;
 	}
+	zlib_deflate(&zstream, Z_FINISH);
+	dst->unused = zstream.avail_out;
+out:
+	zlib_deflateEnd(&zstream);
+	kfree(zstream.workspace);
 
-	/* Cannot access snooped pages through the aperture */
-	if (use_ggtt && src->cache_level != I915_CACHE_NONE &&
-	    !HAS_LLC(dev_priv))
-		goto unwind;
-
-	dst->page_count = num_pages;
-	while (num_pages--) {
-		void *d;
-
-		d = kmalloc(PAGE_SIZE, GFP_ATOMIC);
-		if (d == NULL)
-			goto unwind;
-
-		if (use_ggtt) {
-			void __iomem *s;
-
-			/* Simply ignore tiling or any overlapping fence.
-			 * It's part of the error state, and this hopefully
-			 * captures what the GPU read.
-			 */
-
-			s = io_mapping_map_atomic_wc(ggtt->mappable,
-						     reloc_offset);
-			memcpy_fromio(d, s, PAGE_SIZE);
-			io_mapping_unmap_atomic(s);
-		} else {
-			struct page *page;
-			void *s;
-
-			page = i915_gem_object_get_page(src, i);
-
-			s = kmap_atomic(page);
-			memcpy(d, s, PAGE_SIZE);
-			kunmap_atomic(s);
-		}
-
-		dst->pages[i++] = d;
-		reloc_offset += PAGE_SIZE;
-	}
+	ggtt->base.clear_range(&ggtt->base, slot, PAGE_SIZE, true);
 
 	return dst;
 
 unwind:
-	while (i--)
-		kfree(dst->pages[i]);
+	while (dst->page_count--)
+		free_page((unsigned long)dst->pages[dst->page_count]);
 	kfree(dst);
-	return NULL;
+	dst = NULL;
+	goto out;
 }
 
 /* The error capture is special as tries to run underneath the normal
@@ -979,6 +977,8 @@ static void error_record_engine_registers(struct drm_i915_error_state *error,
 	ee->head = I915_READ_HEAD(engine);
 	ee->tail = I915_READ_TAIL(engine);
 	ee->ctl = I915_READ_CTL(engine);
+	if (INTEL_GEN(dev_priv) > 2)
+		ee->mode = I915_READ_MODE(engine);
 
 	if (I915_NEED_GFX_HWS(dev_priv)) {
 		i915_reg_t mmio;
@@ -1367,9 +1367,6 @@ static int capture(void *data)
 {
 	struct drm_i915_error_state *error = data;
 
-	/* Ensure that what we readback from memory matches what the GPU sees */
-	wbinvd();
-
 	i915_capture_gen_state(error->i915, error);
 	i915_capture_reg_state(error->i915, error);
 	i915_gem_record_fences(error->i915, error);
@@ -1383,9 +1380,6 @@ static int capture(void *data)
 	error->overlay = intel_overlay_capture_error_state(error->i915);
 	error->display = intel_display_capture_error_state(error->i915);
 
-	/* And make sure we don't leave trash in the CPU cache */
-	wbinvd();
-
 	return 0;
 }
 
@@ -1459,7 +1453,6 @@ void i915_error_state_get(struct drm_device *dev,
 	if (error_priv->error)
 		kref_get(&error_priv->error->ref);
 	spin_unlock_irq(&dev_priv->gpu_error.lock);
-
 }
 
 void i915_error_state_put(struct i915_error_state_file_priv *error_priv)
-- 
2.8.1



More information about the Intel-gfx mailing list