[Intel-gfx] [PATCH 13/13] drm/i915: Compress GPU objects in error state

Daniel Vetter daniel at ffwll.ch
Fri Aug 5 18:56:57 UTC 2016


On Fri, Aug 05, 2016 at 10:06:04AM +0100, Chris Wilson wrote:
> Our error states are quickly growing, pinning kernel memory with them.
> The majority of the space is taken up by the error objects. These
> compress well using zlib and without decode are mostly meaningless, so
> encoding them does not hinder quickly parsing the error state for
> familiarity.
> 
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

Seems to also contain a wholesale rework of the capture logic using a ggtt
mappable entry. Would explain why the missing clflush isn't an issue for
you. Imo best if that part is reordered as the first patch (or at least
before stop_machine, which requires the removal of cflush), and then the
zlib on top.

On the idea itself, since I have no clue: How do we uncompress these
again? Patched intel_error_decode, or can zlib deal with in-line streams?
-Daniel

> ---
>  drivers/gpu/drm/i915/Kconfig          |   1 +
>  drivers/gpu/drm/i915/i915_drv.h       |   4 +-
>  drivers/gpu/drm/i915/i915_gem_gtt.c   |  10 ++
>  drivers/gpu/drm/i915/i915_gem_gtt.h   |   2 +
>  drivers/gpu/drm/i915/i915_gpu_error.c | 271 +++++++++++++++++-----------------
>  5 files changed, 148 insertions(+), 140 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/Kconfig b/drivers/gpu/drm/i915/Kconfig
> index 7badcee88ebf..c8ea20526aef 100644
> --- a/drivers/gpu/drm/i915/Kconfig
> +++ b/drivers/gpu/drm/i915/Kconfig
> @@ -5,6 +5,7 @@ config DRM_I915
>  	select INTEL_GTT
>  	select INTERVAL_TREE
>  	select STOP_MACHINE
> +	select ZLIB_DEFLATE
>  	# we need shmfs for the swappable backing store, and in particular
>  	# the shmem_readpage() which depends upon tmpfs
>  	select SHMEM
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 370a4c9eea70..edc1e6d6be0d 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -534,6 +534,7 @@ struct drm_i915_error_state {
>  		u32 tail;
>  		u32 head;
>  		u32 ctl;
> +		u32 mode;
>  		u32 hws;
>  		u32 ipeir;
>  		u32 ipehr;
> @@ -550,9 +551,10 @@ struct drm_i915_error_state {
>  		u32 semaphore_mboxes[I915_NUM_ENGINES - 1];
>  
>  		struct drm_i915_error_object {
> -			int page_count;
>  			u64 gtt_offset;
>  			u64 gtt_size;
> +			int page_count;
> +			int unused;
>  			u32 *pages[0];
>  		} *ringbuffer, *batchbuffer, *wa_batchbuffer, *ctx, *hws_page;
>  
> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
> index 7c9f654d515a..c45e7f456cea 100644
> --- a/drivers/gpu/drm/i915/i915_gem_gtt.c
> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
> @@ -2737,6 +2737,15 @@ int i915_gem_init_ggtt(struct drm_i915_private *dev_priv)
>  	if (ret)
>  		return ret;
>  
> +	/* Reserve a mappable slot for our lockless error capture */
> +	ret = drm_mm_insert_node_in_range_generic(&ggtt->base.mm,
> +						  &ggtt->gpu_error,
> +						  4096, 0, -1,
> +						  0, ggtt->mappable_end,
> +						  0, 0);
> +	if (ret)
> +		return ret;
> +
>  	/* Clear any non-preallocated blocks */
>  	drm_mm_for_each_hole(entry, &ggtt->base.mm, hole_start, hole_end) {
>  		DRM_DEBUG_KMS("clearing unused GTT space: [%lx, %lx]\n",
> @@ -2804,6 +2813,7 @@ void i915_ggtt_cleanup_hw(struct drm_i915_private *dev_priv)
>  	if (drm_mm_initialized(&ggtt->base.mm)) {
>  		intel_vgt_deballoon(dev_priv);
>  
> +		drm_mm_remove_node(&ggtt->gpu_error);
>  		drm_mm_takedown(&ggtt->base.mm);
>  		list_del(&ggtt->base.global_link);
>  	}
> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
> index c54ae2323df3..010e10c0b62b 100644
> --- a/drivers/gpu/drm/i915/i915_gem_gtt.h
> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
> @@ -428,6 +428,8 @@ struct i915_ggtt {
>  	bool do_idle_maps;
>  
>  	int mtrr;
> +
> +	struct drm_mm_node gpu_error;
>  };
>  
>  struct i915_hw_ppgtt {
> diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
> index 6078f47d4bc0..f036584e55e3 100644
> --- a/drivers/gpu/drm/i915/i915_gpu_error.c
> +++ b/drivers/gpu/drm/i915/i915_gpu_error.c
> @@ -29,6 +29,7 @@
>  
>  #include <generated/utsrelease.h>
>  #include <linux/stop_machine.h>
> +#include <linux/zlib.h>
>  #include "i915_drv.h"
>  
>  static const char *engine_str(int engine)
> @@ -237,6 +238,8 @@ static void error_print_engine(struct drm_i915_error_state_buf *m,
>  	err_printf(m, "  HEAD:  0x%08x\n", ee->head);
>  	err_printf(m, "  TAIL:  0x%08x\n", ee->tail);
>  	err_printf(m, "  CTL:   0x%08x\n", ee->ctl);
> +	err_printf(m, "  MODE:  0x%08x [idle? %d]\n",
> +		   ee->mode, !!(ee->mode & MODE_IDLE));
>  	err_printf(m, "  HWS:   0x%08x\n", ee->hws);
>  	err_printf(m, "  ACTHD: 0x%08x %08x\n",
>  		   (u32)(ee->acthd>>32), (u32)ee->acthd);
> @@ -307,18 +310,46 @@ void i915_error_printf(struct drm_i915_error_state_buf *e, const char *f, ...)
>  	va_end(args);
>  }
>  
> +static bool
> +ascii85_encode(u32 in, char *out)
> +{
> +	int i;
> +
> +	if (in == 0)
> +		return false;
> +
> +	out[5] = '\0';
> +	for (i = 5; i--; ) {
> +		out[i] = '!' + in % 85;
> +		in /= 85;
> +	}
> +
> +	return true;
> +}
> +
>  static void print_error_obj(struct drm_i915_error_state_buf *m,
>  			    struct drm_i915_error_object *obj)
>  {
> -	int page, offset, elt;
> +	char out[6];
> +	int page;
> +
> +	err_puts(m, ":"); /* indicate compressed data */
> +	for (page = 0; page < obj->page_count; page++) {
> +		int i, len;
> +
> +		len = PAGE_SIZE;
> +		if (page == obj->page_count - 1)
> +			len -= obj->unused;
> +		len = (len + 3) / 4;
>  
> -	for (page = offset = 0; page < obj->page_count; page++) {
> -		for (elt = 0; elt < PAGE_SIZE/4; elt++) {
> -			err_printf(m, "%08x :  %08x\n", offset,
> -				   obj->pages[page][elt]);
> -			offset += 4;
> +		for (i = 0; i < len; i++) {
> +			if (ascii85_encode(obj->pages[page][i], out))
> +				err_puts(m, out);
> +			else
> +				err_puts(m, "z");
>  		}
>  	}
> +	err_puts(m, "\n");
>  }
>  
>  int i915_error_state_to_str(struct drm_i915_error_state_buf *m,
> @@ -328,8 +359,8 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m,
>  	struct drm_i915_private *dev_priv = to_i915(dev);
>  	struct drm_i915_error_state *error = error_priv->error;
>  	struct drm_i915_error_object *obj;
> -	int i, j, offset, elt;
>  	int max_hangcheck_score;
> +	int i, j;
>  
>  	if (!error) {
>  		err_printf(m, "no error state collected\n");
> @@ -481,75 +512,33 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m,
>  		}
>  
>  		if ((obj = ee->ringbuffer)) {
> -			err_printf(m, "%s --- ringbuffer = 0x%08x\n",
> -				   dev_priv->engine[i].name,
> -				   lower_32_bits(obj->gtt_offset));
> +			err_printf(m, "%s --- ringbuffer = 0x%08llx\n",
> +				   dev_priv->engine[i].name, obj->gtt_offset);
>  			print_error_obj(m, obj);
>  		}
>  
> -		if ((obj = ee->hws_page)) {
> -			u64 hws_offset = obj->gtt_offset;
> -			u32 *hws_page = &obj->pages[0][0];
> -
> -			if (i915.enable_execlists) {
> -				hws_offset += LRC_PPHWSP_PN * PAGE_SIZE;
> -				hws_page = &obj->pages[LRC_PPHWSP_PN][0];
> -			}
> -			err_printf(m, "%s --- HW Status = 0x%08llx\n",
> -				   dev_priv->engine[i].name, hws_offset);
> -			offset = 0;
> -			for (elt = 0; elt < PAGE_SIZE/16; elt += 4) {
> -				err_printf(m, "[%04x] %08x %08x %08x %08x\n",
> -					   offset,
> -					   hws_page[elt],
> -					   hws_page[elt+1],
> -					   hws_page[elt+2],
> -					   hws_page[elt+3]);
> -				offset += 16;
> -			}
> +		if ((obj = ee->wa_ctx)) {
> +			err_printf(m, "%s --- WA Context = 0x%08llx\n",
> +				   dev_priv->engine[i].name, obj->gtt_offset);
> +			print_error_obj(m, obj);
>  		}
>  
> -		obj = ee->wa_ctx;
> -		if (obj) {
> -			u64 wa_ctx_offset = obj->gtt_offset;
> -			u32 *wa_ctx_page = &obj->pages[0][0];
> -			struct intel_engine_cs *engine = &dev_priv->engine[RCS];
> -			u32 wa_ctx_size = (engine->wa_ctx.indirect_ctx.size +
> -					   engine->wa_ctx.per_ctx.size);
> -
> -			err_printf(m, "%s --- WA ctx batch buffer = 0x%08llx\n",
> -				   dev_priv->engine[i].name, wa_ctx_offset);
> -			offset = 0;
> -			for (elt = 0; elt < wa_ctx_size; elt += 4) {
> -				err_printf(m, "[%04x] %08x %08x %08x %08x\n",
> -					   offset,
> -					   wa_ctx_page[elt + 0],
> -					   wa_ctx_page[elt + 1],
> -					   wa_ctx_page[elt + 2],
> -					   wa_ctx_page[elt + 3]);
> -				offset += 16;
> -			}
> +		if ((obj = ee->hws_page)) {
> +			err_printf(m, "%s --- HW Status = 0x%08llx\n",
> +				   dev_priv->engine[i].name, obj->gtt_offset);
> +			print_error_obj(m, obj);
>  		}
>  
>  		if ((obj = ee->ctx)) {
> -			err_printf(m, "%s --- HW Context = 0x%08x\n",
> -				   dev_priv->engine[i].name,
> -				   lower_32_bits(obj->gtt_offset));
> +			err_printf(m, "%s --- HW Context = 0x%08llx\n",
> +				   dev_priv->engine[i].name, obj->gtt_offset);
>  			print_error_obj(m, obj);
>  		}
>  	}
>  
>  	if ((obj = error->semaphore_obj)) {
> -		err_printf(m, "Semaphore page = 0x%08x\n",
> -			   lower_32_bits(obj->gtt_offset));
> -		for (elt = 0; elt < PAGE_SIZE/16; elt += 4) {
> -			err_printf(m, "[%04x] %08x %08x %08x %08x\n",
> -				   elt * 4,
> -				   obj->pages[0][elt],
> -				   obj->pages[0][elt+1],
> -				   obj->pages[0][elt+2],
> -				   obj->pages[0][elt+3]);
> -		}
> +		err_printf(m, "Semaphore page = 0x%08llx\n", obj->gtt_offset);
> +		print_error_obj(m, obj);
>  	}
>  
>  	if (error->overlay)
> @@ -605,7 +594,7 @@ static void i915_error_object_free(struct drm_i915_error_object *obj)
>  		return;
>  
>  	for (page = 0; page < obj->page_count; page++)
> -		kfree(obj->pages[page]);
> +		free_page((unsigned long)obj->pages[page]);
>  
>  	kfree(obj);
>  }
> @@ -641,98 +630,107 @@ static void i915_error_state_free(struct kref *error_ref)
>  	kfree(error);
>  }
>  
> +static int compress_page(struct z_stream_s *zstream,
> +			 void *src,
> +			 struct drm_i915_error_object *dst)
> +{
> +	zstream->next_in = src;
> +	zstream->avail_in = PAGE_SIZE;
> +
> +	do {
> +		if (zstream->avail_out == 0) {
> +			unsigned long page;
> +
> +			page = __get_free_page(GFP_ATOMIC | __GFP_NOWARN);
> +			if (!page)
> +				return -ENOMEM;
> +
> +			dst->pages[dst->page_count++] = (void *)page;
> +
> +			zstream->next_out = (void *)page;
> +			zstream->avail_out = PAGE_SIZE;
> +		}
> +
> +		if (zlib_deflate(zstream, Z_SYNC_FLUSH) != Z_OK)
> +			return -EIO;
> +
> +#if 0
> +		if (zstream->total_out > zstream->total_in)
> +			return -E2BIG;
> +#endif
> +	} while (zstream->avail_in);
> +
> +	return 0;
> +}
> +
>  static struct drm_i915_error_object *
> -i915_error_object_create(struct drm_i915_private *dev_priv,
> +i915_error_object_create(struct drm_i915_private *i915,
>  			 struct i915_vma *vma)
>  {
> -	struct i915_ggtt *ggtt = &dev_priv->ggtt;
> -	struct drm_i915_gem_object *src;
> +	struct i915_ggtt *ggtt = &i915->ggtt;
> +	const u64 slot = ggtt->gpu_error.start;
>  	struct drm_i915_error_object *dst;
> -	int num_pages;
> -	bool use_ggtt;
> -	int i = 0;
> -	u64 reloc_offset;
> +	struct z_stream_s zstream;
> +	unsigned long num_pages;
> +	struct sgt_iter iter;
> +	dma_addr_t dma;
>  
>  	if (!vma)
>  		return NULL;
>  
> -	src = vma->obj;
> -	if (!src->pages)
> -		return NULL;
> -
> -	num_pages = src->base.size >> PAGE_SHIFT;
> -
> -	dst = kmalloc(sizeof(*dst) + num_pages * sizeof(u32 *), GFP_ATOMIC);
> +	num_pages = vma->size >> PAGE_SHIFT;
> +	num_pages = 10 * num_pages * sizeof(u32 *) >> 3;
> +	dst = kmalloc(sizeof(*dst) + num_pages, GFP_ATOMIC | __GFP_NOWARN);
>  	if (!dst)
>  		return NULL;
>  
>  	dst->gtt_offset = vma->node.start;
> -	dst->gtt_size = vma->node.size;
> +	dst->page_count = 0;
> +	dst->unused = 0;
> +
> +	memset(&zstream, 0, sizeof(zstream));
> +	zstream.workspace = kmalloc(zlib_deflate_workspacesize(MAX_WBITS,
> +							       MAX_MEM_LEVEL),
> +				    GFP_ATOMIC | __GFP_NOWARN);
> +	if (!zstream.workspace ||
> +	    zlib_deflateInit(&zstream, Z_DEFAULT_COMPRESSION) != Z_OK) {
> +		kfree(zstream.workspace);
> +		kfree(dst);
> +		return NULL;
> +	}
>  
> -	reloc_offset = dst->gtt_offset;
> -	use_ggtt = (src->cache_level == I915_CACHE_NONE &&
> -		   (vma->flags & I915_VMA_GLOBAL_BIND) &&
> -		   reloc_offset + num_pages * PAGE_SIZE <= ggtt->mappable_end);
> +	for_each_sgt_dma(dma, iter,
> +			 vma->ggtt_view.pages ?: vma->obj->pages) {
> +		int ret;
> +		void *s;
>  
> -	/* Cannot access stolen address directly, try to use the aperture */
> -	if (src->stolen) {
> -		use_ggtt = true;
> +		ggtt->base.insert_page(&ggtt->base, dma, slot,
> +				       I915_CACHE_NONE, 0);
>  
> -		if (!(vma->flags & I915_VMA_GLOBAL_BIND))
> -			goto unwind;
> +		s = (void *__force)
> +			io_mapping_map_atomic_wc(ggtt->mappable, slot);
> +		ret = compress_page(&zstream, s, dst);
> +		io_mapping_unmap_atomic(s);
>  
> -		reloc_offset = vma->node.start;
> -		if (reloc_offset + num_pages * PAGE_SIZE > ggtt->mappable_end)
> +		if (ret)
>  			goto unwind;
>  	}
> +	zlib_deflate(&zstream, Z_FINISH);
> +	dst->unused = zstream.avail_out;
> +out:
> +	zlib_deflateEnd(&zstream);
> +	kfree(zstream.workspace);
>  
> -	/* Cannot access snooped pages through the aperture */
> -	if (use_ggtt && src->cache_level != I915_CACHE_NONE &&
> -	    !HAS_LLC(dev_priv))
> -		goto unwind;
> -
> -	dst->page_count = num_pages;
> -	while (num_pages--) {
> -		void *d;
> -
> -		d = kmalloc(PAGE_SIZE, GFP_ATOMIC);
> -		if (d == NULL)
> -			goto unwind;
> -
> -		if (use_ggtt) {
> -			void __iomem *s;
> -
> -			/* Simply ignore tiling or any overlapping fence.
> -			 * It's part of the error state, and this hopefully
> -			 * captures what the GPU read.
> -			 */
> -
> -			s = io_mapping_map_atomic_wc(ggtt->mappable,
> -						     reloc_offset);
> -			memcpy_fromio(d, s, PAGE_SIZE);
> -			io_mapping_unmap_atomic(s);
> -		} else {
> -			struct page *page;
> -			void *s;
> -
> -			page = i915_gem_object_get_page(src, i);
> -
> -			s = kmap_atomic(page);
> -			memcpy(d, s, PAGE_SIZE);
> -			kunmap_atomic(s);
> -		}
> -
> -		dst->pages[i++] = d;
> -		reloc_offset += PAGE_SIZE;
> -	}
> +	ggtt->base.clear_range(&ggtt->base, slot, PAGE_SIZE, true);
>  
>  	return dst;
>  
>  unwind:
> -	while (i--)
> -		kfree(dst->pages[i]);
> +	while (dst->page_count--)
> +		free_page((unsigned long)dst->pages[dst->page_count]);
>  	kfree(dst);
> -	return NULL;
> +	dst = NULL;
> +	goto out;
>  }
>  
>  /* The error capture is special as tries to run underneath the normal
> @@ -979,6 +977,8 @@ static void error_record_engine_registers(struct drm_i915_error_state *error,
>  	ee->head = I915_READ_HEAD(engine);
>  	ee->tail = I915_READ_TAIL(engine);
>  	ee->ctl = I915_READ_CTL(engine);
> +	if (INTEL_GEN(dev_priv) > 2)
> +		ee->mode = I915_READ_MODE(engine);
>  
>  	if (I915_NEED_GFX_HWS(dev_priv)) {
>  		i915_reg_t mmio;
> @@ -1367,9 +1367,6 @@ static int capture(void *data)
>  {
>  	struct drm_i915_error_state *error = data;
>  
> -	/* Ensure that what we readback from memory matches what the GPU sees */
> -	wbinvd();
> -
>  	i915_capture_gen_state(error->i915, error);
>  	i915_capture_reg_state(error->i915, error);
>  	i915_gem_record_fences(error->i915, error);
> @@ -1383,9 +1380,6 @@ static int capture(void *data)
>  	error->overlay = intel_overlay_capture_error_state(error->i915);
>  	error->display = intel_display_capture_error_state(error->i915);
>  
> -	/* And make sure we don't leave trash in the CPU cache */
> -	wbinvd();
> -
>  	return 0;
>  }
>  
> @@ -1459,7 +1453,6 @@ void i915_error_state_get(struct drm_device *dev,
>  	if (error_priv->error)
>  		kref_get(&error_priv->error->ref);
>  	spin_unlock_irq(&dev_priv->gpu_error.lock);
> -
>  }
>  
>  void i915_error_state_put(struct i915_error_state_file_priv *error_priv)
> -- 
> 2.8.1
> 
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx

-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch


More information about the Intel-gfx mailing list