[PATCH 15/15] flush-writes

Chris Wilson chris at chris-wilson.co.uk
Thu Dec 31 23:01:21 UTC 2020


---
 drivers/gpu/drm/i915/gem/i915_gem_domain.c | 281 +++++++++++++--------
 1 file changed, 169 insertions(+), 112 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_domain.c b/drivers/gpu/drm/i915/gem/i915_gem_domain.c
index f0379b550dfc..430b3f6fd715 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_domain.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_domain.c
@@ -23,16 +23,43 @@ static bool gpu_write_needs_clflush(struct drm_i915_gem_object *obj)
 }
 
 static void
+__flush_write_domain(struct drm_i915_gem_object *obj, unsigned int flush)
+{
+	if (!flush)
+		return;
+
+	switch (flush) {
+	case I915_GEM_DOMAIN_GTT:
+		break;
+
+	case I915_GEM_DOMAIN_WC:
+		wmb();
+		break;
+
+	case I915_GEM_DOMAIN_CPU:
+		drm_clflush_sg(obj->mm.pages);
+		break;
+
+	case I915_GEM_DOMAIN_RENDER:
+		break;
+	}
+
+	i915_gem_object_flush_frontbuffer(obj, ORIGIN_CPU);
+}
+
+static unsigned int
 flush_write_domain(struct drm_i915_gem_object *obj, unsigned int flush_domains)
 {
 	struct i915_vma *vma;
+	unsigned int flush;
 
 	assert_object_held(obj);
 
 	if (!(obj->write_domain & flush_domains))
-		return;
+		return 0;
 
-	switch (obj->write_domain) {
+	flush = fetch_and_zero(&obj->write_domain);
+	switch (flush) {
 	case I915_GEM_DOMAIN_GTT:
 		spin_lock(&obj->vma.lock);
 		for_each_ggtt_vma(vma, obj) {
@@ -40,8 +67,6 @@ flush_write_domain(struct drm_i915_gem_object *obj, unsigned int flush_domains)
 				intel_gt_flush_ggtt_writes(vma->vm->gt);
 		}
 		spin_unlock(&obj->vma.lock);
-
-		i915_gem_object_flush_frontbuffer(obj, ORIGIN_CPU);
 		break;
 
 	case I915_GEM_DOMAIN_WC:
@@ -49,7 +74,6 @@ flush_write_domain(struct drm_i915_gem_object *obj, unsigned int flush_domains)
 		break;
 
 	case I915_GEM_DOMAIN_CPU:
-		i915_gem_clflush_object(obj, I915_CLFLUSH_SYNC);
 		break;
 
 	case I915_GEM_DOMAIN_RENDER:
@@ -58,7 +82,7 @@ flush_write_domain(struct drm_i915_gem_object *obj, unsigned int flush_domains)
 		break;
 	}
 
-	obj->write_domain = 0;
+	return flush;
 }
 
 static void __i915_gem_object_flush_for_display(struct drm_i915_gem_object *obj)
@@ -67,10 +91,9 @@ static void __i915_gem_object_flush_for_display(struct drm_i915_gem_object *obj)
 	 * We manually flush the CPU domain so that we can override and
 	 * force the flush for the display, and perform it asyncrhonously.
 	 */
-	flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
-	if (obj->cache_dirty)
-		i915_gem_clflush_object(obj, I915_CLFLUSH_FORCE);
-	obj->write_domain = 0;
+	__flush_write_domain(obj,
+			     flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU) |
+			     I915_GEM_DOMAIN_CPU);
 }
 
 void i915_gem_object_flush_if_display(struct drm_i915_gem_object *obj)
@@ -97,36 +120,20 @@ void i915_gem_object_flush_if_display_locked(struct drm_i915_gem_object *obj)
  * This function returns when the move is complete, including waiting on
  * flushes to occur.
  */
-int
-i915_gem_object_set_to_wc_domain(struct drm_i915_gem_object *obj, bool write)
+static int
+set_to_wc_domain(struct drm_i915_gem_object *obj, bool write)
 {
-	int ret;
+	unsigned int flush;
 
 	assert_object_held(obj);
 
-	ret = i915_gem_object_wait(obj,
-				   I915_WAIT_INTERRUPTIBLE |
-				   (write ? I915_WAIT_ALL : 0),
-				   MAX_SCHEDULE_TIMEOUT);
-	if (ret)
-		return ret;
-
 	if (obj->write_domain == I915_GEM_DOMAIN_WC)
 		return 0;
 
-	/* Flush and acquire obj->pages so that we are coherent through
-	 * direct access in memory with previous cached writes through
-	 * shmemfs and that our cache domain tracking remains valid.
-	 * For example, if the obj->filp was moved to swap without us
-	 * being notified and releasing the pages, we would mistakenly
-	 * continue to assume that the obj remained out of the CPU cached
-	 * domain.
-	 */
-	ret = i915_gem_object_pin_pages(obj);
-	if (ret)
-		return ret;
-
-	flush_write_domain(obj, ~I915_GEM_DOMAIN_WC);
+	flush = flush_write_domain(obj, ~I915_GEM_DOMAIN_WC);
+	if (write)
+		flush |= obj->read_domains & ~I915_GEM_DOMAIN_WC;
+	obj->write_domain &= ~flush;
 
 	/* Serialise direct access to this object with the barriers for
 	 * coherent writes from the GPU, by effectively invalidating the
@@ -146,25 +153,21 @@ i915_gem_object_set_to_wc_domain(struct drm_i915_gem_object *obj, bool write)
 		obj->mm.dirty = true;
 	}
 
-	i915_gem_object_unpin_pages(obj);
-	return 0;
+	return flush;
 }
 
-/**
- * Moves a single object to the GTT read, and possibly write domain.
- * @obj: object to act on
- * @write: ask for write access or read only
- *
- * This function returns when the move is complete, including waiting on
- * flushes to occur.
- */
 int
-i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
+i915_gem_object_set_to_wc_domain(struct drm_i915_gem_object *obj, bool write)
 {
+	unsigned int flush;
 	int ret;
 
 	assert_object_held(obj);
 
+	flush = set_to_wc_domain(obj, write);
+	if (!flush)
+		return 0;
+
 	ret = i915_gem_object_wait(obj,
 				   I915_WAIT_INTERRUPTIBLE |
 				   (write ? I915_WAIT_ALL : 0),
@@ -172,22 +175,32 @@ i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
 	if (ret)
 		return ret;
 
+	__flush_write_domain(obj, flush);
+	return 0;
+}
+
+/**
+ * Moves a single object to the GTT read, and possibly write domain.
+ * @obj: object to act on
+ * @write: ask for write access or read only
+ *
+ * This function returns when the move is complete, including waiting on
+ * flushes to occur.
+ */
+static unsigned int
+set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
+{
+	unsigned int flush;
+
+	assert_object_held(obj);
+
 	if (obj->write_domain == I915_GEM_DOMAIN_GTT)
 		return 0;
 
-	/* Flush and acquire obj->pages so that we are coherent through
-	 * direct access in memory with previous cached writes through
-	 * shmemfs and that our cache domain tracking remains valid.
-	 * For example, if the obj->filp was moved to swap without us
-	 * being notified and releasing the pages, we would mistakenly
-	 * continue to assume that the obj remained out of the CPU cached
-	 * domain.
-	 */
-	ret = i915_gem_object_pin_pages(obj);
-	if (ret)
-		return ret;
-
-	flush_write_domain(obj, ~I915_GEM_DOMAIN_GTT);
+	flush = flush_write_domain(obj, ~I915_GEM_DOMAIN_GTT);
+	if (write)
+		flush |= obj->read_domains & ~I915_GEM_DOMAIN_GTT;
+	obj->write_domain &= ~flush;
 
 	/* Serialise direct access to this object with the barriers for
 	 * coherent writes from the GPU, by effectively invalidating the
@@ -202,20 +215,97 @@ i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
 	GEM_BUG_ON((obj->write_domain & ~I915_GEM_DOMAIN_GTT) != 0);
 	obj->read_domains |= I915_GEM_DOMAIN_GTT;
 	if (write) {
-		struct i915_vma *vma;
-
 		obj->read_domains = I915_GEM_DOMAIN_GTT;
 		obj->write_domain = I915_GEM_DOMAIN_GTT;
 		obj->mm.dirty = true;
+	}
 
-		spin_lock(&obj->vma.lock);
-		for_each_ggtt_vma(vma, obj)
-			if (i915_vma_is_bound(vma, I915_VMA_GLOBAL_BIND))
-				i915_vma_set_ggtt_write(vma);
-		spin_unlock(&obj->vma.lock);
+	return flush;
+}
+
+int
+i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
+{
+	unsigned int flush;
+	int ret;
+
+	assert_object_held(obj);
+
+	flush = set_to_gtt_domain(obj, write);
+	if (!flush)
+		return 0;
+
+	ret = i915_gem_object_wait(obj,
+				   I915_WAIT_INTERRUPTIBLE |
+				   (write ? I915_WAIT_ALL : 0),
+				   MAX_SCHEDULE_TIMEOUT);
+	if (ret)
+		return ret;
+
+	__flush_write_domain(obj, flush);
+	return 0;
+}
+
+static int
+set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write)
+{
+	unsigned int flush;
+
+	assert_object_held(obj);
+
+	flush = flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
+	if (write)
+		flush |= obj->read_domains & ~I915_GEM_DOMAIN_CPU;
+	obj->write_domain &= ~flush;
+
+	/* Flush the CPU cache if it's still invalid. */
+	if ((obj->read_domains & I915_GEM_DOMAIN_CPU) == 0) {
+		obj->read_domains |= I915_GEM_DOMAIN_CPU;
+		flush |= I915_GEM_DOMAIN_CPU;
 	}
 
-	i915_gem_object_unpin_pages(obj);
+	/* It should now be out of any other write domains, and we can update
+	 * the domain values for our changes.
+	 */
+	GEM_BUG_ON(obj->write_domain & ~I915_GEM_DOMAIN_CPU);
+
+	/* If we're writing through the CPU, then the GPU read domains will
+	 * need to be invalidated at next use.
+	 */
+	if (write)
+		__start_cpu_write(obj);
+
+	return flush;
+}
+
+/**
+ * Moves a single object to the CPU read, and possibly write domain.
+ * @obj: object to act on
+ * @write: requesting write or read-only access
+ *
+ * This function returns when the move is complete, including waiting on
+ * flushes to occur.
+ */
+int
+i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write)
+{
+	unsigned int flush;
+	int ret;
+
+	assert_object_held(obj);
+
+	flush = set_to_cpu_domain(obj, write);
+	if (!flush)
+		return 0;
+
+	ret = i915_gem_object_wait(obj,
+				   I915_WAIT_INTERRUPTIBLE |
+				   (write ? I915_WAIT_ALL : 0),
+				   MAX_SCHEDULE_TIMEOUT);
+	if (ret)
+		return ret;
+
+	__flush_write_domain(obj, flush);
 	return 0;
 }
 
@@ -475,50 +565,6 @@ i915_gem_object_unpin_from_display_plane(struct i915_vma *vma)
 	i915_vma_unpin(vma);
 }
 
-/**
- * Moves a single object to the CPU read, and possibly write domain.
- * @obj: object to act on
- * @write: requesting write or read-only access
- *
- * This function returns when the move is complete, including waiting on
- * flushes to occur.
- */
-int
-i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write)
-{
-	int ret;
-
-	assert_object_held(obj);
-
-	ret = i915_gem_object_wait(obj,
-				   I915_WAIT_INTERRUPTIBLE |
-				   (write ? I915_WAIT_ALL : 0),
-				   MAX_SCHEDULE_TIMEOUT);
-	if (ret)
-		return ret;
-
-	flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
-
-	/* Flush the CPU cache if it's still invalid. */
-	if ((obj->read_domains & I915_GEM_DOMAIN_CPU) == 0) {
-		i915_gem_clflush_object(obj, I915_CLFLUSH_SYNC);
-		obj->read_domains |= I915_GEM_DOMAIN_CPU;
-	}
-
-	/* It should now be out of any other write domains, and we can update
-	 * the domain values for our changes.
-	 */
-	GEM_BUG_ON(obj->write_domain & ~I915_GEM_DOMAIN_CPU);
-
-	/* If we're writing through the CPU, then the GPU read domains will
-	 * need to be invalidated at next use.
-	 */
-	if (write)
-		__start_cpu_write(obj);
-
-	return 0;
-}
-
 /**
  * Called when user space prepares to use an object with the CPU, either
  * through the mmap ioctl's mapping or a GTT mapping.
@@ -534,6 +580,7 @@ i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
 	struct drm_i915_gem_object *obj;
 	u32 read_domains = args->read_domains;
 	u32 write_domain = args->write_domain;
+	unsigned int flush;
 	int err;
 
 	/* Only handle setting domains to types used by the CPU. */
@@ -609,17 +656,27 @@ i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
 		goto out_unpin;
 
 	if (read_domains & I915_GEM_DOMAIN_WC)
-		err = i915_gem_object_set_to_wc_domain(obj, write_domain);
+		flush = set_to_wc_domain(obj, write_domain);
 	else if (read_domains & I915_GEM_DOMAIN_GTT)
-		err = i915_gem_object_set_to_gtt_domain(obj, write_domain);
+		flush = set_to_gtt_domain(obj, write_domain);
 	else
-		err = i915_gem_object_set_to_cpu_domain(obj, write_domain);
+		flush = set_to_cpu_domain(obj, write_domain);
 
 	/* And bump the LRU for this access */
 	i915_gem_object_bump_inactive_ggtt(obj);
 
 	i915_gem_object_unlock(obj);
 
+	if (flush) {
+		err = i915_gem_object_wait(obj,
+					   I915_WAIT_INTERRUPTIBLE,
+					   MAX_SCHEDULE_TIMEOUT);
+		if (err)
+			goto out_unpin;
+
+		__flush_write_domain(obj, flush);
+	}
+
 	if (write_domain)
 		i915_gem_object_invalidate_frontbuffer(obj, ORIGIN_CPU);
 
-- 
2.20.1



More information about the Intel-gfx-trybot mailing list