[PATCH 5/7] drm/i915: Align start for memcpy_from_wc

Chris Wilson chris at chris-wilson.co.uk
Sat Dec 7 10:37:15 UTC 2019


The movntqda requires 16-byte alignment for the source pointer. Avoid
falling back to clflush if the source pointer is misaligned by doing the
doing a small uncached memcpy to fixup the alignments.

Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_cmd_parser.c | 30 +++++++++++++++++---------
 1 file changed, 20 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c b/drivers/gpu/drm/i915/i915_cmd_parser.c
index 6cf4e336461b..2977316d64ae 100644
--- a/drivers/gpu/drm/i915/i915_cmd_parser.c
+++ b/drivers/gpu/drm/i915/i915_cmd_parser.c
@@ -1132,8 +1132,8 @@ static u32 *copy_batch(struct drm_i915_gem_object *dst_obj,
 {
 	unsigned int src_needs_clflush;
 	unsigned int dst_needs_clflush;
-	void *dst, *src;
-	int ret;
+	void *dst, *src, *ptr;
+	int ret, len;
 
 	ret = i915_gem_object_prepare_write(dst_obj, &dst_needs_clflush);
 	if (ret)
@@ -1150,19 +1150,30 @@ static u32 *copy_batch(struct drm_i915_gem_object *dst_obj,
 		return ERR_PTR(ret);
 	}
 
+	ptr = dst;
 	src = ERR_PTR(-ENODEV);
-	if (src_needs_clflush &&
-	    i915_can_memcpy_from_wc(NULL, offset, 0)) {
+	if (src_needs_clflush && i915_has_memcpy_from_wc()) {
 		src = i915_gem_object_pin_map(src_obj, I915_MAP_WC);
 		if (!IS_ERR(src)) {
-			i915_memcpy_from_wc(dst,
-					    src + offset,
-					    ALIGN(length, 16));
+			src += offset;
+
+			if (!IS_ALIGNED(offset, 16)) {
+				len = min(ALIGN(offset, 16) - offset, length);
+
+				memcpy(ptr, src, len);
+
+				offset += len;
+				length -= len;
+				ptr += len;
+				src += len;
+			}
+			GEM_BUG_ON(!IS_ALIGNED((unsigned long)src, 16));
+
+			i915_memcpy_from_wc(ptr, src, ALIGN(length, 16));
 			i915_gem_object_unpin_map(src_obj);
 		}
 	}
 	if (IS_ERR(src)) {
-		void *ptr;
 		int x, n;
 
 		/*
@@ -1177,10 +1188,9 @@ static u32 *copy_batch(struct drm_i915_gem_object *dst_obj,
 			length = round_up(length,
 					  boot_cpu_data.x86_clflush_size);
 
-		ptr = dst;
 		x = offset_in_page(offset);
 		for (n = offset >> PAGE_SHIFT; length; n++) {
-			int len = min_t(int, length, PAGE_SIZE - x);
+			len = min_t(int, length, PAGE_SIZE - x);
 
 			src = kmap_atomic(i915_gem_object_get_page(src_obj, n));
 			if (src_needs_clflush)
-- 
2.24.0



More information about the Intel-gfx-trybot mailing list