[Intel-gfx] [PATCH 1/2] drm/i915: fixup in-line clflushing on bit17 swizzled bos

Daniel Vetter daniel.vetter at ffwll.ch
Thu Mar 1 20:36:42 CET 2012


The issue is that with inline clflushing the clflushing isn't properly
swizzled. Fix this by
- always clflushing entire 128 byte chunks and
- checking against 128 byte chunks for partial cachelines when
  swizzling is required.

Now the usual approach is to fold this into the original patch series, but
I've opted against this because
- this fixes a corner case only very old userspace relies on and
- I'd like to not invalidate all the testing the pwrite rewrite has gotten.

This fixes the regression notice by tests/gem_tiled_partial_prite_pread
from i-g-t. Unfortunately it doesn't fix the issues with partial pwrites to
tiled buffers on bit17 swizzling machines. But that is also broken without
the pwrite patches, so likely a different issue (or a problem with the
testcase).

Signed-Off-by: Daniel Vetter <daniel.vetter at ffwll.ch>
---
 drivers/gpu/drm/i915/i915_gem.c |   60 +++++++++++++++++++++++++++++---------
 1 files changed, 46 insertions(+), 14 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 9b200f4e..4219bd1 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -333,6 +333,28 @@ shmem_pread_fast(struct page *page, int shmem_page_offset, int page_length,
 	return ret;
 }
 
+static void
+shmem_clflush_swizzled_range(char *addr, unsigned long length,
+			     bool swizzled)
+{
+	if (swizzled) {
+		unsigned long start = (unsigned long) addr;
+		unsigned long end = (unsigned long) addr + length;
+
+		/* For swizzling simply ensure that we always flush both
+		 * channels. Lame, but simple and it works. Swizzled
+		 * pwrite/pread is far from a hotpath - current userspace
+		 * doesn't use it at all. */
+		start = round_down(start, 128);
+		end = round_up(end, 128);
+
+		drm_clflush_virt_range((void *)start, end - start);
+	} else {
+		drm_clflush_virt_range(addr, length);
+	}
+
+}
+
 /* Only difference to the fast-path function is that this can handle bit17
  * and uses non-atomic copy and kmap functions. */
 static int
@@ -345,8 +367,9 @@ shmem_pread_slow(struct page *page, int shmem_page_offset, int page_length,
 
 	vaddr = kmap(page);
 	if (needs_clflush)
-		drm_clflush_virt_range(vaddr + shmem_page_offset,
-				       page_length);
+		shmem_clflush_swizzled_range(vaddr + shmem_page_offset,
+					     page_length,
+					     page_do_bit17_swizzling);
 
 	if (page_do_bit17_swizzling)
 		ret = __copy_to_user_swizzled(user_data,
@@ -655,9 +678,10 @@ shmem_pwrite_slow(struct page *page, int shmem_page_offset, int page_length,
 	int ret;
 
 	vaddr = kmap(page);
-	if (needs_clflush_before)
-		drm_clflush_virt_range(vaddr + shmem_page_offset,
-				       page_length);
+	if (needs_clflush_before || page_do_bit17_swizzling)
+		shmem_clflush_swizzled_range(vaddr + shmem_page_offset,
+					     page_length,
+					     page_do_bit17_swizzling);
 	if (page_do_bit17_swizzling)
 		ret = __copy_from_user_swizzled(vaddr, shmem_page_offset,
 						user_data,
@@ -667,8 +691,9 @@ shmem_pwrite_slow(struct page *page, int shmem_page_offset, int page_length,
 				       user_data,
 				       page_length);
 	if (needs_clflush)
-		drm_clflush_virt_range(vaddr + shmem_page_offset,
-				       page_length);
+		shmem_clflush_swizzled_range(vaddr + shmem_page_offset,
+					     page_length,
+					     page_do_bit17_swizzling);
 	kunmap(page);
 
 	return ret;
@@ -716,6 +741,7 @@ i915_gem_shmem_pwrite(struct drm_device *dev,
 	while (remain > 0) {
 		struct page *page;
 		int partial_cacheline_write;
+		unsigned clflush_size;
 
 		/* Operation in this page
 		 *
@@ -728,13 +754,6 @@ i915_gem_shmem_pwrite(struct drm_device *dev,
 		if ((shmem_page_offset + page_length) > PAGE_SIZE)
 			page_length = PAGE_SIZE - shmem_page_offset;
 
-		/* If we don't overwrite a cacheline completely we need to be
-		 * careful to have up-to-date data by first clflushing. Don't
-		 * overcomplicate things and flush the entire patch. */
-		partial_cacheline_write = needs_clflush_before &&
-			((shmem_page_offset | page_length)
-				& (boot_cpu_data.x86_clflush_size - 1));
-
 		if (obj->pages) {
 			page = obj->pages[offset >> PAGE_SHIFT];
 			release_page = 0;
@@ -750,6 +769,19 @@ i915_gem_shmem_pwrite(struct drm_device *dev,
 		page_do_bit17_swizzling = obj_do_bit17_swizzling &&
 			(page_to_phys(page) & (1 << 17)) != 0;
 
+		/* If we need swizzling, ensure that we always flush both
+		 * channels (otherwise we'd need to swizzle the clflushing,
+		 * which is a pain). */
+		clflush_size = page_do_bit17_swizzling ?
+			boot_cpu_data.x86_clflush_size : 128;
+
+		/* If we don't overwrite a cacheline completely we need to be
+		 * careful to have up-to-date data by first clflushing. Don't
+		 * overcomplicate things and flush the entire patch. */
+		partial_cacheline_write = needs_clflush_before &&
+			((shmem_page_offset | page_length)
+				& (clflush_size - 1));
+
 		ret = shmem_pwrite_fast(page, shmem_page_offset, page_length,
 					user_data, page_do_bit17_swizzling,
 					partial_cacheline_write, needs_clflush);
-- 
1.7.8.3




More information about the Intel-gfx mailing list