[Intel-gfx] [CI 4/6] drm/i915: Always use the GTT for error capture
Daniel Vetter
daniel at ffwll.ch
Thu Oct 13 15:02:50 UTC 2016
On Wed, Oct 12, 2016 at 10:05:20AM +0100, Chris Wilson wrote:
> Since the GTT provides universal access to any GPU page, we can use it
> to reduce our plethora of read methods to just one. It also has the
> important characteristic of being exactly what the GPU sees - if there
> are incoherency problems, seeing the batch as executed (rather than as
> trapped inside the cpu cache) is important.
>
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> Reviewed-by: Joonas Lahtinen <joonas.lahtinen at linux.intel.com>
Meh, so much for me reading patches in order ;-)
Please reorder this one before the previous one and I'll be happy too.
> ---
> drivers/gpu/drm/i915/i915_gem_gtt.c | 43 ++++++++----
> drivers/gpu/drm/i915/i915_gem_gtt.h | 2 +
> drivers/gpu/drm/i915/i915_gpu_error.c | 120 ++++++++++++----------------------
> 3 files changed, 74 insertions(+), 91 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
> index 0bb4232f66bc..2d846aa39ca5 100644
> --- a/drivers/gpu/drm/i915/i915_gem_gtt.c
> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
> @@ -2717,6 +2717,7 @@ int i915_gem_init_ggtt(struct drm_i915_private *dev_priv)
> */
> struct i915_ggtt *ggtt = &dev_priv->ggtt;
> unsigned long hole_start, hole_end;
> + struct i915_hw_ppgtt *ppgtt;
> struct drm_mm_node *entry;
> int ret;
>
> @@ -2724,6 +2725,15 @@ int i915_gem_init_ggtt(struct drm_i915_private *dev_priv)
> if (ret)
> return ret;
>
> + /* Reserve a mappable slot for our lockless error capture */
> + ret = drm_mm_insert_node_in_range_generic(&ggtt->base.mm,
> + &ggtt->error_capture,
> + 4096, 0, -1,
> + 0, ggtt->mappable_end,
> + 0, 0);
> + if (ret)
> + return ret;
> +
> /* Clear any non-preallocated blocks */
> drm_mm_for_each_hole(entry, &ggtt->base.mm, hole_start, hole_end) {
> DRM_DEBUG_KMS("clearing unused GTT space: [%lx, %lx]\n",
> @@ -2738,25 +2748,21 @@ int i915_gem_init_ggtt(struct drm_i915_private *dev_priv)
> true);
>
> if (USES_PPGTT(dev_priv) && !USES_FULL_PPGTT(dev_priv)) {
> - struct i915_hw_ppgtt *ppgtt;
> -
> ppgtt = kzalloc(sizeof(*ppgtt), GFP_KERNEL);
> - if (!ppgtt)
> - return -ENOMEM;
> + if (!ppgtt) {
> + ret = -ENOMEM;
> + goto err;
> + }
>
> ret = __hw_ppgtt_init(ppgtt, dev_priv);
> - if (ret) {
> - kfree(ppgtt);
> - return ret;
> - }
> + if (ret)
> + goto err_ppgtt;
>
> - if (ppgtt->base.allocate_va_range)
> + if (ppgtt->base.allocate_va_range) {
> ret = ppgtt->base.allocate_va_range(&ppgtt->base, 0,
> ppgtt->base.total);
> - if (ret) {
> - ppgtt->base.cleanup(&ppgtt->base);
> - kfree(ppgtt);
> - return ret;
> + if (ret)
> + goto err_ppgtt_cleanup;
> }
>
> ppgtt->base.clear_range(&ppgtt->base,
> @@ -2770,6 +2776,14 @@ int i915_gem_init_ggtt(struct drm_i915_private *dev_priv)
> }
>
> return 0;
> +
> +err_ppgtt_cleanup:
> + ppgtt->base.cleanup(&ppgtt->base);
> +err_ppgtt:
> + kfree(ppgtt);
> +err:
> + drm_mm_remove_node(&ggtt->error_capture);
> + return ret;
> }
>
> /**
> @@ -2788,6 +2802,9 @@ void i915_ggtt_cleanup_hw(struct drm_i915_private *dev_priv)
>
> i915_gem_cleanup_stolen(&dev_priv->drm);
>
> + if (drm_mm_node_allocated(&ggtt->error_capture))
> + drm_mm_remove_node(&ggtt->error_capture);
> +
> if (drm_mm_initialized(&ggtt->base.mm)) {
> intel_vgt_deballoon(dev_priv);
>
> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
> index ec78be2f8c77..bd93fb8f99d2 100644
> --- a/drivers/gpu/drm/i915/i915_gem_gtt.h
> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
> @@ -450,6 +450,8 @@ struct i915_ggtt {
> bool do_idle_maps;
>
> int mtrr;
> +
> + struct drm_mm_node error_capture;
> };
>
> struct i915_hw_ppgtt {
> diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
> index 159d6d7e0cee..b3b2e6c1c6c6 100644
> --- a/drivers/gpu/drm/i915/i915_gpu_error.c
> +++ b/drivers/gpu/drm/i915/i915_gpu_error.c
> @@ -656,7 +656,7 @@ static void i915_error_object_free(struct drm_i915_error_object *obj)
> return;
>
> for (page = 0; page < obj->page_count; page++)
> - kfree(obj->pages[page]);
> + free_page((unsigned long)obj->pages[page]);
>
> kfree(obj);
> }
> @@ -693,98 +693,69 @@ static void i915_error_state_free(struct kref *error_ref)
> kfree(error);
> }
>
> +static int compress_page(void *src, struct drm_i915_error_object *dst)
Jumping ahead a bit with the name, but imo ok ...
> +{
> + unsigned long page;
> +
> + page = __get_free_page(GFP_ATOMIC | __GFP_NOWARN);
> + if (!page)
> + return -ENOMEM;
> +
> + dst->pages[dst->page_count++] = (void *)page;
> +
> + memcpy((void *)page, src, PAGE_SIZE);
> + return 0;
> +}
> +
> static struct drm_i915_error_object *
> -i915_error_object_create(struct drm_i915_private *dev_priv,
> +i915_error_object_create(struct drm_i915_private *i915,
> struct i915_vma *vma)
> {
> - struct i915_ggtt *ggtt = &dev_priv->ggtt;
> - struct drm_i915_gem_object *src;
> + struct i915_ggtt *ggtt = &i915->ggtt;
> + const u64 slot = ggtt->error_capture.start;
> struct drm_i915_error_object *dst;
> - int num_pages;
> - bool use_ggtt;
> - int i = 0;
> - u64 reloc_offset;
> + unsigned long num_pages;
> + struct sgt_iter iter;
> + dma_addr_t dma;
>
> if (!vma)
> return NULL;
>
> - src = vma->obj;
> - if (!src->pages)
> - return NULL;
> -
> - num_pages = src->base.size >> PAGE_SHIFT;
> -
> - dst = kmalloc(sizeof(*dst) + num_pages * sizeof(u32 *), GFP_ATOMIC);
> + num_pages = min_t(u64, vma->size, vma->obj->base.size) >> PAGE_SHIFT;
> + dst = kmalloc(sizeof(*dst) + num_pages * sizeof(u32 *),
> + GFP_ATOMIC | __GFP_NOWARN);
> if (!dst)
> return NULL;
>
> dst->gtt_offset = vma->node.start;
> dst->gtt_size = vma->node.size;
> + dst->page_count = 0;
>
> - reloc_offset = dst->gtt_offset;
> - use_ggtt = (src->cache_level == I915_CACHE_NONE &&
> - (vma->flags & I915_VMA_GLOBAL_BIND) &&
> - reloc_offset + num_pages * PAGE_SIZE <= ggtt->mappable_end);
> -
> - /* Cannot access stolen address directly, try to use the aperture */
> - if (src->stolen) {
> - use_ggtt = true;
> -
> - if (!(vma->flags & I915_VMA_GLOBAL_BIND))
> - goto unwind;
> -
> - reloc_offset = vma->node.start;
> - if (reloc_offset + num_pages * PAGE_SIZE > ggtt->mappable_end)
> - goto unwind;
> - }
> + for_each_sgt_dma(dma, iter, vma->pages) {
> + void __iomem *s;
> + int ret;
>
> - /* Cannot access snooped pages through the aperture */
> - if (use_ggtt && src->cache_level != I915_CACHE_NONE &&
> - !HAS_LLC(dev_priv))
> - goto unwind;
> + ggtt->base.insert_page(&ggtt->base, dma, slot,
> + I915_CACHE_NONE, 0);
>
> - dst->page_count = num_pages;
> - while (num_pages--) {
> - void *d;
> + s = io_mapping_map_atomic_wc(&ggtt->mappable, slot);
> + ret = compress_page((void * __force)s, dst);
> + io_mapping_unmap_atomic(s);
>
> - d = kmalloc(PAGE_SIZE, GFP_ATOMIC);
> - if (d == NULL)
> + if (ret)
> goto unwind;
> -
> - if (use_ggtt) {
> - void __iomem *s;
> -
> - /* Simply ignore tiling or any overlapping fence.
> - * It's part of the error state, and this hopefully
> - * captures what the GPU read.
> - */
> -
> - s = io_mapping_map_atomic_wc(&ggtt->mappable,
> - reloc_offset);
> - memcpy_fromio(d, s, PAGE_SIZE);
> - io_mapping_unmap_atomic(s);
> - } else {
> - struct page *page;
> - void *s;
> -
> - page = i915_gem_object_get_page(src, i);
> -
> - s = kmap_atomic(page);
> - memcpy(d, s, PAGE_SIZE);
> - kunmap_atomic(s);
> - }
> -
> - dst->pages[i++] = d;
> - reloc_offset += PAGE_SIZE;
> }
> -
> - return dst;
> + goto out;
>
> unwind:
> - while (i--)
> - kfree(dst->pages[i]);
> + while (dst->page_count--)
> + free_page((unsigned long)dst->pages[dst->page_count]);
> kfree(dst);
> - return NULL;
> + dst = NULL;
> +
> +out:
> + ggtt->base.clear_range(&ggtt->base, slot, PAGE_SIZE, true);
> + return dst;
> }
>
> /* The error capture is special as tries to run underneath the normal
> @@ -1445,9 +1416,6 @@ static int capture(void *data)
> {
> struct drm_i915_error_state *error = data;
>
> - /* Ensure that what we readback from memory matches what the GPU sees */
> - wbinvd();
> -
> i915_capture_gen_state(error->i915, error);
> i915_capture_reg_state(error->i915, error);
> i915_gem_record_fences(error->i915, error);
> @@ -1460,9 +1428,6 @@ static int capture(void *data)
> error->overlay = intel_overlay_capture_error_state(error->i915);
> error->display = intel_display_capture_error_state(error->i915);
>
> - /* And make sure we don't leave trash in the CPU cache */
> - wbinvd();
> -
> return 0;
> }
>
> @@ -1539,7 +1504,6 @@ void i915_error_state_get(struct drm_device *dev,
> if (error_priv->error)
> kref_get(&error_priv->error->ref);
> spin_unlock_irq(&dev_priv->gpu_error.lock);
> -
> }
Reviewed-by: Daniel Vetter <daniel.vetter at ffwll.ch>
--
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch
More information about the Intel-gfx
mailing list