[PATCH 01/37] drm/i915: Use memcpy_from_wc for GPU error capture

Chris Wilson chris at chris-wilson.co.uk
Sat Dec 3 20:45:26 UTC 2016


On all platforms we now always read the contents of buffers via the GTT,
i.e. using WC cpu access. Reads are slow, but they can be accelerated
with an internal read buffer using sse4.1 (movntqda). This is our
i915_memcpy_from_wc() routine which also checks for sse4.1 support and
so we can fallback to using a regular slow memcpy if we need to.

When compressing the pages, the reads are currently done inside zlib's
fill_window() routine and so we must copy the page into a temporary
which is then already inside the CPU cache and fast for zlib's
compression. For simplicity, this temporary page is also allocated, but
not used, on the uncompressed path.

Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gpu_error.c | 29 ++++++++++++++++++++++-------
 1 file changed, 22 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index a14f7badc337..2a00100732d8 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -195,10 +195,13 @@ static bool compress_init(struct z_stream_s *zstream)
 }
 
 static int compress_page(struct z_stream_s *zstream,
-			 void *src,
+			 void *src, void *tmp,
 			 struct drm_i915_error_object *dst)
 {
-	zstream->next_in = src;
+	if (!i915_memcpy_from_wc(tmp, src, PAGE_SIZE))
+		memcpy(tmp, src, PAGE_SIZE);
+
+	zstream->next_in = tmp;
 	zstream->avail_in = PAGE_SIZE;
 
 	do {
@@ -251,17 +254,20 @@ static bool compress_init(struct z_stream_s *zstream)
 }
 
 static int compress_page(struct z_stream_s *zstream,
-			 void *src,
+			 void *src, void *tmp,
 			 struct drm_i915_error_object *dst)
 {
 	unsigned long page;
+	void *ptr;
 
 	page = __get_free_page(GFP_ATOMIC | __GFP_NOWARN);
 	if (!page)
 		return -ENOMEM;
 
-	dst->pages[dst->page_count++] =
-		memcpy((void *)page, src, PAGE_SIZE);
+	ptr = (void *)page;
+	if (!i915_memcpy_from_wc(ptr, src, PAGE_SIZE))
+		memcpy(ptr, src, PAGE_SIZE);
+	dst->pages[dst->page_count++] = ptr;
 
 	return 0;
 }
@@ -788,16 +794,23 @@ i915_error_object_create(struct drm_i915_private *i915,
 	unsigned long num_pages;
 	struct sgt_iter iter;
 	dma_addr_t dma;
+	void *tmp;
 
 	if (!vma)
 		return NULL;
 
+	tmp = (void *)__get_free_page(GFP_ATOMIC | __GFP_NOWARN);
+	if (!tmp)
+		return NULL;
+
 	num_pages = min_t(u64, vma->size, vma->obj->base.size) >> PAGE_SHIFT;
 	num_pages = DIV_ROUND_UP(10 * num_pages, 8); /* worstcase zlib growth */
 	dst = kmalloc(sizeof(*dst) + num_pages * sizeof(u32 *),
 		      GFP_ATOMIC | __GFP_NOWARN);
-	if (!dst)
+	if (!dst) {
+		free_page((unsigned long)tmp);
 		return NULL;
+	}
 
 	dst->gtt_offset = vma->node.start;
 	dst->gtt_size = vma->node.size;
@@ -806,6 +819,7 @@ i915_error_object_create(struct drm_i915_private *i915,
 
 	if (!compress_init(&zstream)) {
 		kfree(dst);
+		free_page((unsigned long)tmp);
 		return NULL;
 	}
 
@@ -817,7 +831,7 @@ i915_error_object_create(struct drm_i915_private *i915,
 				       I915_CACHE_NONE, 0);
 
 		s = io_mapping_map_atomic_wc(&ggtt->mappable, slot);
-		ret = compress_page(&zstream, (void  __force *)s, dst);
+		ret = compress_page(&zstream, (void  __force *)s, tmp, dst);
 		io_mapping_unmap_atomic(s);
 
 		if (ret)
@@ -834,6 +848,7 @@ i915_error_object_create(struct drm_i915_private *i915,
 out:
 	compress_fini(&zstream, dst);
 	ggtt->base.clear_range(&ggtt->base, slot, PAGE_SIZE);
+	free_page((unsigned long)tmp);
 	return dst;
 }
 
-- 
2.10.2



More information about the Intel-gfx-trybot mailing list