[Intel-gfx] [PATCH 3/4] drm/i915: Update rules for writing through the LLC with the cpu

Chris Wilson chris at chris-wilson.co.uk
Tue Aug 6 14:17:04 CEST 2013


As mentioned in the previous commit, reads and writes from both the CPU
and GPU go through the LLC. This gives us coherency between the CPU and
GPU irrespective of the attribute settings either device sets. We can
use to avoid having to clflush even uncached memory.

Except for the scanout.

The scanout resides within another functional block that does not use
the LLC but reads directly from main memory. So in order to maintain
coherency with the scanout, writes to uncached memory must be flushed.
In order to optimize writes elsewhere, we start tracking whether an
framebuffer is attached to an object.

Signed-off-by: Ville Syrjälä <ville.syrjala at linux.intel.com>
Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_drv.h      |  3 +++
 drivers/gpu/drm/i915/i915_gem.c      | 27 +++++++++++++++++----------
 drivers/gpu/drm/i915/intel_display.c |  3 +++
 3 files changed, 23 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index aa11731..93c4789 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1407,6 +1407,9 @@ struct drm_i915_gem_object {
 	/** for phy allocated objects */
 	struct drm_i915_gem_phys_object *phys_obj;
 
+	/** Track framebuffers associated with this object */
+	atomic_t fb_count;
+
 	union {
 		struct i915_gem_userptr {
 			uintptr_t ptr;
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 5671dab..9805693 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -66,6 +66,14 @@ static bool cpu_cache_is_coherent(struct drm_device *dev,
 	return HAS_LLC(dev) || level != I915_CACHE_NONE;
 }
 
+static bool cpu_write_needs_clflush(struct drm_i915_gem_object *obj)
+{
+	if (!cpu_cache_is_coherent(obj->base.dev, obj->cache_level))
+		return true;
+
+	return atomic_read(&obj->fb_count) > 0;
+}
+
 static inline void i915_gem_object_fence_lost(struct drm_i915_gem_object *obj)
 {
 	if (obj->tiling_mode)
@@ -832,8 +840,7 @@ i915_gem_shmem_pwrite(struct drm_device *dev,
 		 * write domain and manually flush cachelines (if required). This
 		 * optimizes for the case when the gpu will use the data
 		 * right away and we therefore have to clflush anyway. */
-		if (obj->cache_level == I915_CACHE_NONE)
-			needs_clflush_after = 1;
+		needs_clflush_after = cpu_write_needs_clflush(obj);
 		if (i915_gem_obj_ggtt_bound(obj)) {
 			ret = i915_gem_object_set_to_gtt_domain(obj, true);
 			if (ret)
@@ -1001,9 +1008,8 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
 		goto out;
 	}
 
-	if (obj->cache_level == I915_CACHE_NONE &&
-	    obj->tiling_mode == I915_TILING_NONE &&
-	    obj->base.write_domain != I915_GEM_DOMAIN_CPU) {
+	if (obj->tiling_mode == I915_TILING_NONE &&
+	    !cpu_write_needs_clflush(obj)) {
 		ret = i915_gem_gtt_pwrite_fast(dev, obj, args, file);
 		/* Note that the gtt paths might fail with non-page-backed user
 		 * pointers (e.g. gtt mappings when moving data between
@@ -3299,7 +3305,7 @@ i915_gem_clflush_object(struct drm_i915_gem_object *obj)
 	 * snooping behaviour occurs naturally as the result of our domain
 	 * tracking.
 	 */
-	if (obj->cache_level != I915_CACHE_NONE)
+	if (!cpu_write_needs_clflush(obj))
 		return;
 
 	trace_i915_gem_object_clflush(obj);
@@ -3462,7 +3468,11 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
 					       obj, cache_level);
 	}
 
-	if (cache_level == I915_CACHE_NONE) {
+	list_for_each_entry(vma, &obj->vma_list, vma_link)
+		vma->node.color = cache_level;
+	obj->cache_level = cache_level;
+
+	if (cpu_write_needs_clflush(obj)) {
 		u32 old_read_domains, old_write_domain;
 
 		/* If we're coming from LLC cached, then we haven't
@@ -3485,9 +3495,6 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
 						    old_write_domain);
 	}
 
-	list_for_each_entry(vma, &obj->vma_list, vma_link)
-		vma->node.color = cache_level;
-	obj->cache_level = cache_level;
 	i915_gem_verify_gtt(dev);
 	return 0;
 }
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index 0584480..cc5eaba 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -9443,6 +9443,8 @@ static void intel_user_framebuffer_destroy(struct drm_framebuffer *fb)
 	struct intel_framebuffer *intel_fb = to_intel_framebuffer(fb);
 
 	drm_framebuffer_cleanup(fb);
+
+	atomic_dec(&intel_fb->obj->fb_count);
 	drm_gem_object_unreference_unlocked(&intel_fb->obj->base);
 
 	kfree(intel_fb);
@@ -9568,6 +9570,7 @@ int intel_framebuffer_init(struct drm_device *dev,
 		return ret;
 	}
 
+	atomic_inc(&obj->fb_count);
 	return 0;
 }
 
-- 
1.8.4.rc1




More information about the Intel-gfx mailing list