[Intel-gfx] [PATCH v3] drm/i915: Use memcpy_from_wc for GPU error capture
Tvrtko Ursulin
tvrtko.ursulin at linux.intel.com
Tue Dec 6 13:29:35 UTC 2016
On 06/12/2016 12:40, Chris Wilson wrote:
> On all platforms we now always read the contents of buffers via the GTT,
> i.e. using WC cpu access. Reads are slow, but they can be accelerated
> with an internal read buffer using sse4.1 (movntqda). This is our
> i915_memcpy_from_wc() routine which also checks for sse4.1 support and
> so we can fallback to using a regular slow memcpy if we need to.
>
> When compressing the pages, the reads are currently done inside zlib's
> fill_window() routine and so we must copy the page into a temporary
> which is then already inside the CPU cache and fast for zlib's
> compression. When not compressing the pages, we don't need a temporary
> and can just use the accelerated read from WC into the destination.
>
> v2: Use zstream locals to reduce diff and allocate the additional
> temporary storage only if sse4.1 is supported.
> v3: Use length=0 for the sse4.1 support check
>
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> Cc: Tvrtko Ursulin <tvrtko.ursulin at linux.intel.com>
> ---
> drivers/gpu/drm/i915/i915_gpu_error.c | 50 ++++++++++++++++++++++++++---------
> 1 file changed, 37 insertions(+), 13 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
> index a14f7badc337..307999b852fd 100644
> --- a/drivers/gpu/drm/i915/i915_gpu_error.c
> +++ b/drivers/gpu/drm/i915/i915_gpu_error.c
> @@ -176,9 +176,14 @@ static void i915_error_puts(struct drm_i915_error_state_buf *e,
>
> #ifdef CONFIG_DRM_I915_COMPRESS_ERROR
>
> -static bool compress_init(struct z_stream_s *zstream)
> +struct compress {
> + struct z_stream_s zstream;
> + void *tmp;
> +};
> +
> +static bool compress_init(struct compress *c)
> {
> - memset(zstream, 0, sizeof(*zstream));
> + struct z_stream_s *zstream = memset(&c->zstream, 0, sizeof(c->zstream));
>
> zstream->workspace =
> kmalloc(zlib_deflate_workspacesize(MAX_WBITS, MAX_MEM_LEVEL),
> @@ -191,14 +196,22 @@ static bool compress_init(struct z_stream_s *zstream)
> return false;
> }
>
> + c->tmp = NULL;
> + if (i915_memcpy_from_wc(NULL, 0, 0))
> + c->tmp = (void *)__get_free_page(GFP_ATOMIC | __GFP_NOWARN);
> +
> return true;
> }
>
> -static int compress_page(struct z_stream_s *zstream,
> +static int compress_page(struct compress *c,
> void *src,
> struct drm_i915_error_object *dst)
> {
> + struct z_stream_s *zstream = &c->zstream;
> +
> zstream->next_in = src;
> + if (c->tmp && i915_memcpy_from_wc(c->tmp, src, PAGE_SIZE))
> + zstream->next_in = c->tmp;
> zstream->avail_in = PAGE_SIZE;
>
> do {
> @@ -226,9 +239,11 @@ static int compress_page(struct z_stream_s *zstream,
> return 0;
> }
>
> -static void compress_fini(struct z_stream_s *zstream,
> +static void compress_fini(struct compress *c,
> struct drm_i915_error_object *dst)
> {
> + struct z_stream_s *zstream = &c->zstream;
> +
> if (dst) {
> zlib_deflate(zstream, Z_FINISH);
> dst->unused = zstream->avail_out;
> @@ -236,6 +251,9 @@ static void compress_fini(struct z_stream_s *zstream,
>
> zlib_deflateEnd(zstream);
> kfree(zstream->workspace);
> +
> + if (c->tmp)
> + free_page((unsigned long)c->tmp);
> }
>
> static void err_compression_marker(struct drm_i915_error_state_buf *m)
> @@ -245,28 +263,34 @@ static void err_compression_marker(struct drm_i915_error_state_buf *m)
>
> #else
>
> -static bool compress_init(struct z_stream_s *zstream)
> +struct compress {
> +};
> +
> +static bool compress_init(struct compress *c)
> {
> return true;
> }
>
> -static int compress_page(struct z_stream_s *zstream,
> +static int compress_page(struct compress *c,
> void *src,
> struct drm_i915_error_object *dst)
> {
> unsigned long page;
> + void *ptr;
>
> page = __get_free_page(GFP_ATOMIC | __GFP_NOWARN);
> if (!page)
> return -ENOMEM;
>
> - dst->pages[dst->page_count++] =
> - memcpy((void *)page, src, PAGE_SIZE);
> + ptr = (void *)page;
> + if (!i915_memcpy_from_wc(ptr, src, PAGE_SIZE))
> + memcpy(ptr, src, PAGE_SIZE);
> + dst->pages[dst->page_count++] = ptr;
>
> return 0;
> }
>
> -static void compress_fini(struct z_stream_s *zstream,
> +static void compress_fini(struct compress *c,
> struct drm_i915_error_object *dst)
> {
> }
> @@ -784,7 +808,7 @@ i915_error_object_create(struct drm_i915_private *i915,
> struct i915_ggtt *ggtt = &i915->ggtt;
> const u64 slot = ggtt->error_capture.start;
> struct drm_i915_error_object *dst;
> - struct z_stream_s zstream;
> + struct compress compress;
> unsigned long num_pages;
> struct sgt_iter iter;
> dma_addr_t dma;
> @@ -804,7 +828,7 @@ i915_error_object_create(struct drm_i915_private *i915,
> dst->page_count = 0;
> dst->unused = 0;
>
> - if (!compress_init(&zstream)) {
> + if (!compress_init(&compress)) {
> kfree(dst);
> return NULL;
> }
> @@ -817,7 +841,7 @@ i915_error_object_create(struct drm_i915_private *i915,
> I915_CACHE_NONE, 0);
>
> s = io_mapping_map_atomic_wc(&ggtt->mappable, slot);
> - ret = compress_page(&zstream, (void __force *)s, dst);
> + ret = compress_page(&compress, (void __force *)s, dst);
> io_mapping_unmap_atomic(s);
>
> if (ret)
> @@ -832,7 +856,7 @@ i915_error_object_create(struct drm_i915_private *i915,
> dst = NULL;
>
> out:
> - compress_fini(&zstream, dst);
> + compress_fini(&compress, dst);
> ggtt->base.clear_range(&ggtt->base, slot, PAGE_SIZE);
> return dst;
> }
>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
Regards,
Tvrtko
More information about the Intel-gfx
mailing list