[Intel-gfx] [PATCH v2] drm/i915: Move to CPU domain in pwrite/pread

ville.syrjala at linux.intel.com ville.syrjala at linux.intel.com
Wed Nov 12 15:57:10 CET 2014


From: Ville Syrjälä <ville.syrjala at linux.intel.com>

Currently it's possible to get visible cache dirt on scanout on LLC
machines when using pwrite on the future scanout bo if its cache_level
is already NONE.

pwrite's "does this need clflush?" checks would decide that no clflush
is necessary since the bo isn't currently pinned to the display and LLC
makes everything else coherent. The subsequent set_cache_level(NONE)
would also do nothing since cache_level is already correct. And hence
no clflush will be performed and we flip to a bo which can still have
dirty data in the caches.

To correctly track the cache dirtyness move the object to CPU write
domain in pwrite. This cures the cache dirt since we explicitly flush
the CPU write domain in the pin_to_display path.

Give pread the same treatment simply in the name of symmetry.

v2: Use trace_i915_gem_object_change_domain() and provide some kind
    of commit message

Cc: Chris Wilson <chris at chris-wilson.co.uk>
Signed-off-by: Ville Syrjälä <ville.syrjala at linux.intel.com>
---
v1 was floated on irc.

 drivers/gpu/drm/i915/i915_gem.c | 32 +++++++++++++++++++++++++++++++-
 1 file changed, 31 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 3e0cabe..12a3c16 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -456,6 +456,7 @@ __copy_from_user_swizzled(char *gpu_vaddr, int gpu_offset,
 int i915_gem_obj_prepare_shmem_read(struct drm_i915_gem_object *obj,
 				    int *needs_clflush)
 {
+	u32 old_read_domains;
 	int ret;
 
 	*needs_clflush = 0;
@@ -475,6 +476,7 @@ int i915_gem_obj_prepare_shmem_read(struct drm_i915_gem_object *obj,
 			return ret;
 
 		i915_gem_object_retire(obj);
+		i915_gem_object_flush_gtt_write_domain(obj);
 	}
 
 	ret = i915_gem_object_get_pages(obj);
@@ -483,6 +485,13 @@ int i915_gem_obj_prepare_shmem_read(struct drm_i915_gem_object *obj,
 
 	i915_gem_object_pin_pages(obj);
 
+	old_read_domains = obj->base.read_domains;
+	obj->base.read_domains |= I915_GEM_DOMAIN_CPU;
+
+	trace_i915_gem_object_change_domain(obj,
+					    old_read_domains,
+					    obj->base.write_domain);
+
 	return ret;
 }
 
@@ -875,6 +884,7 @@ i915_gem_shmem_pwrite(struct drm_device *dev,
 	int needs_clflush_after = 0;
 	int needs_clflush_before = 0;
 	struct sg_page_iter sg_iter;
+	u32 old_write_domain, old_read_domains;
 
 	user_data = to_user_ptr(args->data_ptr);
 	remain = args->size;
@@ -892,6 +902,7 @@ i915_gem_shmem_pwrite(struct drm_device *dev,
 			return ret;
 
 		i915_gem_object_retire(obj);
+		i915_gem_object_flush_gtt_write_domain(obj);
 	}
 	/* Same trick applies to invalidate partially written cachelines read
 	 * before writing. */
@@ -908,6 +919,15 @@ i915_gem_shmem_pwrite(struct drm_device *dev,
 	offset = args->offset;
 	obj->dirty = 1;
 
+	old_read_domains = obj->base.read_domains;
+	old_write_domain = obj->base.write_domain;
+	obj->base.read_domains = I915_GEM_DOMAIN_CPU;
+	obj->base.write_domain = I915_GEM_DOMAIN_CPU;
+
+	trace_i915_gem_object_change_domain(obj,
+					    old_read_domains,
+					    old_write_domain);
+
 	for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents,
 			 offset >> PAGE_SHIFT) {
 		struct page *page = sg_page_iter_page(&sg_iter);
@@ -978,9 +998,19 @@ out:
 		}
 	}
 
-	if (needs_clflush_after)
+	if (needs_clflush_after) {
 		i915_gem_chipset_flush(dev);
 
+		if (obj->base.write_domain == I915_GEM_DOMAIN_CPU) {
+			old_write_domain = obj->base.write_domain;
+			obj->base.write_domain = 0;
+
+			trace_i915_gem_object_change_domain(obj,
+							    obj->base.read_domains,
+							    old_write_domain);
+		}
+	}
+
 	return ret;
 }
 
-- 
2.0.4




More information about the Intel-gfx mailing list