[Intel-gfx] [PATCH 13/13] drm/i915: Compress GPU objects in error state
Daniel Vetter
daniel at ffwll.ch
Fri Aug 5 18:56:57 UTC 2016
On Fri, Aug 05, 2016 at 10:06:04AM +0100, Chris Wilson wrote:
> Our error states are quickly growing, pinning kernel memory with them.
> The majority of the space is taken up by the error objects. These
> compress well using zlib and without decode are mostly meaningless, so
> encoding them does not hinder quickly parsing the error state for
> familiarity.
>
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
Seems to also contain a wholesale rework of the capture logic using a ggtt
mappable entry. Would explain why the missing clflush isn't an issue for
you. Imo best if that part is reordered as the first patch (or at least
before stop_machine, which requires the removal of cflush), and then the
zlib on top.
On the idea itself, since I have no clue: How do we uncompress these
again? Patched intel_error_decode, or can zlib deal with in-line streams?
-Daniel
> ---
> drivers/gpu/drm/i915/Kconfig | 1 +
> drivers/gpu/drm/i915/i915_drv.h | 4 +-
> drivers/gpu/drm/i915/i915_gem_gtt.c | 10 ++
> drivers/gpu/drm/i915/i915_gem_gtt.h | 2 +
> drivers/gpu/drm/i915/i915_gpu_error.c | 271 +++++++++++++++++-----------------
> 5 files changed, 148 insertions(+), 140 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/Kconfig b/drivers/gpu/drm/i915/Kconfig
> index 7badcee88ebf..c8ea20526aef 100644
> --- a/drivers/gpu/drm/i915/Kconfig
> +++ b/drivers/gpu/drm/i915/Kconfig
> @@ -5,6 +5,7 @@ config DRM_I915
> select INTEL_GTT
> select INTERVAL_TREE
> select STOP_MACHINE
> + select ZLIB_DEFLATE
> # we need shmfs for the swappable backing store, and in particular
> # the shmem_readpage() which depends upon tmpfs
> select SHMEM
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 370a4c9eea70..edc1e6d6be0d 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -534,6 +534,7 @@ struct drm_i915_error_state {
> u32 tail;
> u32 head;
> u32 ctl;
> + u32 mode;
> u32 hws;
> u32 ipeir;
> u32 ipehr;
> @@ -550,9 +551,10 @@ struct drm_i915_error_state {
> u32 semaphore_mboxes[I915_NUM_ENGINES - 1];
>
> struct drm_i915_error_object {
> - int page_count;
> u64 gtt_offset;
> u64 gtt_size;
> + int page_count;
> + int unused;
> u32 *pages[0];
> } *ringbuffer, *batchbuffer, *wa_batchbuffer, *ctx, *hws_page;
>
> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
> index 7c9f654d515a..c45e7f456cea 100644
> --- a/drivers/gpu/drm/i915/i915_gem_gtt.c
> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
> @@ -2737,6 +2737,15 @@ int i915_gem_init_ggtt(struct drm_i915_private *dev_priv)
> if (ret)
> return ret;
>
> + /* Reserve a mappable slot for our lockless error capture */
> + ret = drm_mm_insert_node_in_range_generic(&ggtt->base.mm,
> + &ggtt->gpu_error,
> + 4096, 0, -1,
> + 0, ggtt->mappable_end,
> + 0, 0);
> + if (ret)
> + return ret;
> +
> /* Clear any non-preallocated blocks */
> drm_mm_for_each_hole(entry, &ggtt->base.mm, hole_start, hole_end) {
> DRM_DEBUG_KMS("clearing unused GTT space: [%lx, %lx]\n",
> @@ -2804,6 +2813,7 @@ void i915_ggtt_cleanup_hw(struct drm_i915_private *dev_priv)
> if (drm_mm_initialized(&ggtt->base.mm)) {
> intel_vgt_deballoon(dev_priv);
>
> + drm_mm_remove_node(&ggtt->gpu_error);
> drm_mm_takedown(&ggtt->base.mm);
> list_del(&ggtt->base.global_link);
> }
> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
> index c54ae2323df3..010e10c0b62b 100644
> --- a/drivers/gpu/drm/i915/i915_gem_gtt.h
> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
> @@ -428,6 +428,8 @@ struct i915_ggtt {
> bool do_idle_maps;
>
> int mtrr;
> +
> + struct drm_mm_node gpu_error;
> };
>
> struct i915_hw_ppgtt {
> diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
> index 6078f47d4bc0..f036584e55e3 100644
> --- a/drivers/gpu/drm/i915/i915_gpu_error.c
> +++ b/drivers/gpu/drm/i915/i915_gpu_error.c
> @@ -29,6 +29,7 @@
>
> #include <generated/utsrelease.h>
> #include <linux/stop_machine.h>
> +#include <linux/zlib.h>
> #include "i915_drv.h"
>
> static const char *engine_str(int engine)
> @@ -237,6 +238,8 @@ static void error_print_engine(struct drm_i915_error_state_buf *m,
> err_printf(m, " HEAD: 0x%08x\n", ee->head);
> err_printf(m, " TAIL: 0x%08x\n", ee->tail);
> err_printf(m, " CTL: 0x%08x\n", ee->ctl);
> + err_printf(m, " MODE: 0x%08x [idle? %d]\n",
> + ee->mode, !!(ee->mode & MODE_IDLE));
> err_printf(m, " HWS: 0x%08x\n", ee->hws);
> err_printf(m, " ACTHD: 0x%08x %08x\n",
> (u32)(ee->acthd>>32), (u32)ee->acthd);
> @@ -307,18 +310,46 @@ void i915_error_printf(struct drm_i915_error_state_buf *e, const char *f, ...)
> va_end(args);
> }
>
> +static bool
> +ascii85_encode(u32 in, char *out)
> +{
> + int i;
> +
> + if (in == 0)
> + return false;
> +
> + out[5] = '\0';
> + for (i = 5; i--; ) {
> + out[i] = '!' + in % 85;
> + in /= 85;
> + }
> +
> + return true;
> +}
> +
> static void print_error_obj(struct drm_i915_error_state_buf *m,
> struct drm_i915_error_object *obj)
> {
> - int page, offset, elt;
> + char out[6];
> + int page;
> +
> + err_puts(m, ":"); /* indicate compressed data */
> + for (page = 0; page < obj->page_count; page++) {
> + int i, len;
> +
> + len = PAGE_SIZE;
> + if (page == obj->page_count - 1)
> + len -= obj->unused;
> + len = (len + 3) / 4;
>
> - for (page = offset = 0; page < obj->page_count; page++) {
> - for (elt = 0; elt < PAGE_SIZE/4; elt++) {
> - err_printf(m, "%08x : %08x\n", offset,
> - obj->pages[page][elt]);
> - offset += 4;
> + for (i = 0; i < len; i++) {
> + if (ascii85_encode(obj->pages[page][i], out))
> + err_puts(m, out);
> + else
> + err_puts(m, "z");
> }
> }
> + err_puts(m, "\n");
> }
>
> int i915_error_state_to_str(struct drm_i915_error_state_buf *m,
> @@ -328,8 +359,8 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m,
> struct drm_i915_private *dev_priv = to_i915(dev);
> struct drm_i915_error_state *error = error_priv->error;
> struct drm_i915_error_object *obj;
> - int i, j, offset, elt;
> int max_hangcheck_score;
> + int i, j;
>
> if (!error) {
> err_printf(m, "no error state collected\n");
> @@ -481,75 +512,33 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m,
> }
>
> if ((obj = ee->ringbuffer)) {
> - err_printf(m, "%s --- ringbuffer = 0x%08x\n",
> - dev_priv->engine[i].name,
> - lower_32_bits(obj->gtt_offset));
> + err_printf(m, "%s --- ringbuffer = 0x%08llx\n",
> + dev_priv->engine[i].name, obj->gtt_offset);
> print_error_obj(m, obj);
> }
>
> - if ((obj = ee->hws_page)) {
> - u64 hws_offset = obj->gtt_offset;
> - u32 *hws_page = &obj->pages[0][0];
> -
> - if (i915.enable_execlists) {
> - hws_offset += LRC_PPHWSP_PN * PAGE_SIZE;
> - hws_page = &obj->pages[LRC_PPHWSP_PN][0];
> - }
> - err_printf(m, "%s --- HW Status = 0x%08llx\n",
> - dev_priv->engine[i].name, hws_offset);
> - offset = 0;
> - for (elt = 0; elt < PAGE_SIZE/16; elt += 4) {
> - err_printf(m, "[%04x] %08x %08x %08x %08x\n",
> - offset,
> - hws_page[elt],
> - hws_page[elt+1],
> - hws_page[elt+2],
> - hws_page[elt+3]);
> - offset += 16;
> - }
> + if ((obj = ee->wa_ctx)) {
> + err_printf(m, "%s --- WA Context = 0x%08llx\n",
> + dev_priv->engine[i].name, obj->gtt_offset);
> + print_error_obj(m, obj);
> }
>
> - obj = ee->wa_ctx;
> - if (obj) {
> - u64 wa_ctx_offset = obj->gtt_offset;
> - u32 *wa_ctx_page = &obj->pages[0][0];
> - struct intel_engine_cs *engine = &dev_priv->engine[RCS];
> - u32 wa_ctx_size = (engine->wa_ctx.indirect_ctx.size +
> - engine->wa_ctx.per_ctx.size);
> -
> - err_printf(m, "%s --- WA ctx batch buffer = 0x%08llx\n",
> - dev_priv->engine[i].name, wa_ctx_offset);
> - offset = 0;
> - for (elt = 0; elt < wa_ctx_size; elt += 4) {
> - err_printf(m, "[%04x] %08x %08x %08x %08x\n",
> - offset,
> - wa_ctx_page[elt + 0],
> - wa_ctx_page[elt + 1],
> - wa_ctx_page[elt + 2],
> - wa_ctx_page[elt + 3]);
> - offset += 16;
> - }
> + if ((obj = ee->hws_page)) {
> + err_printf(m, "%s --- HW Status = 0x%08llx\n",
> + dev_priv->engine[i].name, obj->gtt_offset);
> + print_error_obj(m, obj);
> }
>
> if ((obj = ee->ctx)) {
> - err_printf(m, "%s --- HW Context = 0x%08x\n",
> - dev_priv->engine[i].name,
> - lower_32_bits(obj->gtt_offset));
> + err_printf(m, "%s --- HW Context = 0x%08llx\n",
> + dev_priv->engine[i].name, obj->gtt_offset);
> print_error_obj(m, obj);
> }
> }
>
> if ((obj = error->semaphore_obj)) {
> - err_printf(m, "Semaphore page = 0x%08x\n",
> - lower_32_bits(obj->gtt_offset));
> - for (elt = 0; elt < PAGE_SIZE/16; elt += 4) {
> - err_printf(m, "[%04x] %08x %08x %08x %08x\n",
> - elt * 4,
> - obj->pages[0][elt],
> - obj->pages[0][elt+1],
> - obj->pages[0][elt+2],
> - obj->pages[0][elt+3]);
> - }
> + err_printf(m, "Semaphore page = 0x%08llx\n", obj->gtt_offset);
> + print_error_obj(m, obj);
> }
>
> if (error->overlay)
> @@ -605,7 +594,7 @@ static void i915_error_object_free(struct drm_i915_error_object *obj)
> return;
>
> for (page = 0; page < obj->page_count; page++)
> - kfree(obj->pages[page]);
> + free_page((unsigned long)obj->pages[page]);
>
> kfree(obj);
> }
> @@ -641,98 +630,107 @@ static void i915_error_state_free(struct kref *error_ref)
> kfree(error);
> }
>
> +static int compress_page(struct z_stream_s *zstream,
> + void *src,
> + struct drm_i915_error_object *dst)
> +{
> + zstream->next_in = src;
> + zstream->avail_in = PAGE_SIZE;
> +
> + do {
> + if (zstream->avail_out == 0) {
> + unsigned long page;
> +
> + page = __get_free_page(GFP_ATOMIC | __GFP_NOWARN);
> + if (!page)
> + return -ENOMEM;
> +
> + dst->pages[dst->page_count++] = (void *)page;
> +
> + zstream->next_out = (void *)page;
> + zstream->avail_out = PAGE_SIZE;
> + }
> +
> + if (zlib_deflate(zstream, Z_SYNC_FLUSH) != Z_OK)
> + return -EIO;
> +
> +#if 0
> + if (zstream->total_out > zstream->total_in)
> + return -E2BIG;
> +#endif
> + } while (zstream->avail_in);
> +
> + return 0;
> +}
> +
> static struct drm_i915_error_object *
> -i915_error_object_create(struct drm_i915_private *dev_priv,
> +i915_error_object_create(struct drm_i915_private *i915,
> struct i915_vma *vma)
> {
> - struct i915_ggtt *ggtt = &dev_priv->ggtt;
> - struct drm_i915_gem_object *src;
> + struct i915_ggtt *ggtt = &i915->ggtt;
> + const u64 slot = ggtt->gpu_error.start;
> struct drm_i915_error_object *dst;
> - int num_pages;
> - bool use_ggtt;
> - int i = 0;
> - u64 reloc_offset;
> + struct z_stream_s zstream;
> + unsigned long num_pages;
> + struct sgt_iter iter;
> + dma_addr_t dma;
>
> if (!vma)
> return NULL;
>
> - src = vma->obj;
> - if (!src->pages)
> - return NULL;
> -
> - num_pages = src->base.size >> PAGE_SHIFT;
> -
> - dst = kmalloc(sizeof(*dst) + num_pages * sizeof(u32 *), GFP_ATOMIC);
> + num_pages = vma->size >> PAGE_SHIFT;
> + num_pages = 10 * num_pages * sizeof(u32 *) >> 3;
> + dst = kmalloc(sizeof(*dst) + num_pages, GFP_ATOMIC | __GFP_NOWARN);
> if (!dst)
> return NULL;
>
> dst->gtt_offset = vma->node.start;
> - dst->gtt_size = vma->node.size;
> + dst->page_count = 0;
> + dst->unused = 0;
> +
> + memset(&zstream, 0, sizeof(zstream));
> + zstream.workspace = kmalloc(zlib_deflate_workspacesize(MAX_WBITS,
> + MAX_MEM_LEVEL),
> + GFP_ATOMIC | __GFP_NOWARN);
> + if (!zstream.workspace ||
> + zlib_deflateInit(&zstream, Z_DEFAULT_COMPRESSION) != Z_OK) {
> + kfree(zstream.workspace);
> + kfree(dst);
> + return NULL;
> + }
>
> - reloc_offset = dst->gtt_offset;
> - use_ggtt = (src->cache_level == I915_CACHE_NONE &&
> - (vma->flags & I915_VMA_GLOBAL_BIND) &&
> - reloc_offset + num_pages * PAGE_SIZE <= ggtt->mappable_end);
> + for_each_sgt_dma(dma, iter,
> + vma->ggtt_view.pages ?: vma->obj->pages) {
> + int ret;
> + void *s;
>
> - /* Cannot access stolen address directly, try to use the aperture */
> - if (src->stolen) {
> - use_ggtt = true;
> + ggtt->base.insert_page(&ggtt->base, dma, slot,
> + I915_CACHE_NONE, 0);
>
> - if (!(vma->flags & I915_VMA_GLOBAL_BIND))
> - goto unwind;
> + s = (void *__force)
> + io_mapping_map_atomic_wc(ggtt->mappable, slot);
> + ret = compress_page(&zstream, s, dst);
> + io_mapping_unmap_atomic(s);
>
> - reloc_offset = vma->node.start;
> - if (reloc_offset + num_pages * PAGE_SIZE > ggtt->mappable_end)
> + if (ret)
> goto unwind;
> }
> + zlib_deflate(&zstream, Z_FINISH);
> + dst->unused = zstream.avail_out;
> +out:
> + zlib_deflateEnd(&zstream);
> + kfree(zstream.workspace);
>
> - /* Cannot access snooped pages through the aperture */
> - if (use_ggtt && src->cache_level != I915_CACHE_NONE &&
> - !HAS_LLC(dev_priv))
> - goto unwind;
> -
> - dst->page_count = num_pages;
> - while (num_pages--) {
> - void *d;
> -
> - d = kmalloc(PAGE_SIZE, GFP_ATOMIC);
> - if (d == NULL)
> - goto unwind;
> -
> - if (use_ggtt) {
> - void __iomem *s;
> -
> - /* Simply ignore tiling or any overlapping fence.
> - * It's part of the error state, and this hopefully
> - * captures what the GPU read.
> - */
> -
> - s = io_mapping_map_atomic_wc(ggtt->mappable,
> - reloc_offset);
> - memcpy_fromio(d, s, PAGE_SIZE);
> - io_mapping_unmap_atomic(s);
> - } else {
> - struct page *page;
> - void *s;
> -
> - page = i915_gem_object_get_page(src, i);
> -
> - s = kmap_atomic(page);
> - memcpy(d, s, PAGE_SIZE);
> - kunmap_atomic(s);
> - }
> -
> - dst->pages[i++] = d;
> - reloc_offset += PAGE_SIZE;
> - }
> + ggtt->base.clear_range(&ggtt->base, slot, PAGE_SIZE, true);
>
> return dst;
>
> unwind:
> - while (i--)
> - kfree(dst->pages[i]);
> + while (dst->page_count--)
> + free_page((unsigned long)dst->pages[dst->page_count]);
> kfree(dst);
> - return NULL;
> + dst = NULL;
> + goto out;
> }
>
> /* The error capture is special as tries to run underneath the normal
> @@ -979,6 +977,8 @@ static void error_record_engine_registers(struct drm_i915_error_state *error,
> ee->head = I915_READ_HEAD(engine);
> ee->tail = I915_READ_TAIL(engine);
> ee->ctl = I915_READ_CTL(engine);
> + if (INTEL_GEN(dev_priv) > 2)
> + ee->mode = I915_READ_MODE(engine);
>
> if (I915_NEED_GFX_HWS(dev_priv)) {
> i915_reg_t mmio;
> @@ -1367,9 +1367,6 @@ static int capture(void *data)
> {
> struct drm_i915_error_state *error = data;
>
> - /* Ensure that what we readback from memory matches what the GPU sees */
> - wbinvd();
> -
> i915_capture_gen_state(error->i915, error);
> i915_capture_reg_state(error->i915, error);
> i915_gem_record_fences(error->i915, error);
> @@ -1383,9 +1380,6 @@ static int capture(void *data)
> error->overlay = intel_overlay_capture_error_state(error->i915);
> error->display = intel_display_capture_error_state(error->i915);
>
> - /* And make sure we don't leave trash in the CPU cache */
> - wbinvd();
> -
> return 0;
> }
>
> @@ -1459,7 +1453,6 @@ void i915_error_state_get(struct drm_device *dev,
> if (error_priv->error)
> kref_get(&error_priv->error->ref);
> spin_unlock_irq(&dev_priv->gpu_error.lock);
> -
> }
>
> void i915_error_state_put(struct i915_error_state_file_priv *error_priv)
> --
> 2.8.1
>
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx
--
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch
More information about the Intel-gfx
mailing list