[Intel-gfx] [PATCH v2] drm/i915: Combine write_domain flushes to a single function
Chris Wilson
chris at chris-wilson.co.uk
Wed Apr 12 09:42:53 UTC 2017
In the next patch, we will introduce a new cache domain for
differentiating between GTT access and direct WC access. This will
require us to include WC in our write_domain flushes. Rather than
duplicate a third function, combine the existing two into one and
flushing WC writes will then be automatically handled as well.
v2: Be smarter and clearer by passing in the write domains to flush (Joonas)
Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
Cc: Joonas Lahtinen <joonas.lahtinen at linux.intel.com>
---
drivers/gpu/drm/i915/i915_gem.c | 125 ++++++++++-----------
.../gpu/drm/i915/selftests/i915_gem_coherency.c | 4 +-
drivers/gpu/drm/i915/selftests/i915_gem_object.c | 2 +-
3 files changed, 64 insertions(+), 67 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index cb8c6a94ba4e..f1c28668edbf 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -46,8 +46,6 @@
#include <linux/dma-buf.h>
static void i915_gem_flush_free_objects(struct drm_i915_private *i915);
-static void i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj);
-static void i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj);
static bool cpu_write_needs_clflush(struct drm_i915_gem_object *obj)
{
@@ -705,6 +703,61 @@ i915_gem_create_ioctl(struct drm_device *dev, void *data,
args->size, &args->handle);
}
+static inline enum fb_op_origin
+fb_write_origin(struct drm_i915_gem_object *obj, unsigned int domain)
+{
+ return (domain == I915_GEM_DOMAIN_GTT ?
+ obj->frontbuffer_ggtt_origin : ORIGIN_CPU);
+}
+
+static void
+flush_write_domain(struct drm_i915_gem_object *obj, unsigned int flush_domains)
+{
+ struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
+
+ if (!(obj->base.write_domain & flush_domains))
+ return;
+
+ /* No actual flushing is required for the GTT write domain. Writes
+ * to it "immediately" go to main memory as far as we know, so there's
+ * no chipset flush. It also doesn't land in render cache.
+ *
+ * However, we do have to enforce the order so that all writes through
+ * the GTT land before any writes to the device, such as updates to
+ * the GATT itself.
+ *
+ * We also have to wait a bit for the writes to land from the GTT.
+ * An uncached read (i.e. mmio) seems to be ideal for the round-trip
+ * timing. This issue has only been observed when switching quickly
+ * between GTT writes and CPU reads from inside the kernel on recent hw,
+ * and it appears to only affect discrete GTT blocks (i.e. on LLC
+ * system agents we cannot reproduce this behaviour).
+ */
+ wmb();
+
+ switch (obj->base.write_domain) {
+ case I915_GEM_DOMAIN_GTT:
+ if (INTEL_GEN(dev_priv) >= 6 && !HAS_LLC(dev_priv)) {
+ if (intel_runtime_pm_get_if_in_use(dev_priv)) {
+ spin_lock_irq(&dev_priv->uncore.lock);
+ POSTING_READ_FW(RING_ACTHD(dev_priv->engine[RCS]->mmio_base));
+ spin_unlock_irq(&dev_priv->uncore.lock);
+ intel_runtime_pm_put(dev_priv);
+ }
+ }
+
+ intel_fb_obj_flush(obj,
+ fb_write_origin(obj, I915_GEM_DOMAIN_GTT));
+ break;
+
+ case I915_GEM_DOMAIN_CPU:
+ i915_gem_clflush_object(obj, I915_CLFLUSH_SYNC);
+ break;
+ }
+
+ obj->base.write_domain = 0;
+}
+
static inline int
__copy_to_user_swizzled(char __user *cpu_vaddr,
const char *gpu_vaddr, int gpu_offset,
@@ -794,7 +847,7 @@ int i915_gem_obj_prepare_shmem_read(struct drm_i915_gem_object *obj,
goto out;
}
- i915_gem_object_flush_gtt_write_domain(obj);
+ flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
/* If we're not in the cpu read domain, set ourself into the gtt
* read domain and manually flush cachelines (if required). This
@@ -846,7 +899,7 @@ int i915_gem_obj_prepare_shmem_write(struct drm_i915_gem_object *obj,
goto out;
}
- i915_gem_object_flush_gtt_write_domain(obj);
+ flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
/* If we're not in the cpu write domain, set ourself into the
* gtt write domain and manually flush cachelines (as required).
@@ -1501,13 +1554,6 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
return ret;
}
-static inline enum fb_op_origin
-write_origin(struct drm_i915_gem_object *obj, unsigned domain)
-{
- return (domain == I915_GEM_DOMAIN_GTT ?
- obj->frontbuffer_ggtt_origin : ORIGIN_CPU);
-}
-
static void i915_gem_object_bump_inactive_ggtt(struct drm_i915_gem_object *obj)
{
struct drm_i915_private *i915;
@@ -1602,7 +1648,8 @@ i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
mutex_unlock(&dev->struct_mutex);
if (write_domain != 0)
- intel_fb_obj_invalidate(obj, write_origin(obj, write_domain));
+ intel_fb_obj_invalidate(obj,
+ fb_write_origin(obj, write_domain));
out_unpin:
i915_gem_object_unpin_pages(obj);
@@ -3320,56 +3367,6 @@ int i915_gem_wait_for_idle(struct drm_i915_private *i915, unsigned int flags)
return ret;
}
-/** Flushes the GTT write domain for the object if it's dirty. */
-static void
-i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj)
-{
- struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
-
- if (obj->base.write_domain != I915_GEM_DOMAIN_GTT)
- return;
-
- /* No actual flushing is required for the GTT write domain. Writes
- * to it "immediately" go to main memory as far as we know, so there's
- * no chipset flush. It also doesn't land in render cache.
- *
- * However, we do have to enforce the order so that all writes through
- * the GTT land before any writes to the device, such as updates to
- * the GATT itself.
- *
- * We also have to wait a bit for the writes to land from the GTT.
- * An uncached read (i.e. mmio) seems to be ideal for the round-trip
- * timing. This issue has only been observed when switching quickly
- * between GTT writes and CPU reads from inside the kernel on recent hw,
- * and it appears to only affect discrete GTT blocks (i.e. on LLC
- * system agents we cannot reproduce this behaviour).
- */
- wmb();
- if (INTEL_GEN(dev_priv) >= 6 && !HAS_LLC(dev_priv)) {
- if (intel_runtime_pm_get_if_in_use(dev_priv)) {
- spin_lock_irq(&dev_priv->uncore.lock);
- POSTING_READ_FW(RING_ACTHD(dev_priv->engine[RCS]->mmio_base));
- spin_unlock_irq(&dev_priv->uncore.lock);
- intel_runtime_pm_put(dev_priv);
- }
- }
-
- intel_fb_obj_flush(obj, write_origin(obj, I915_GEM_DOMAIN_GTT));
-
- obj->base.write_domain = 0;
-}
-
-/** Flushes the CPU write domain for the object if it's dirty. */
-static void
-i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj)
-{
- if (obj->base.write_domain != I915_GEM_DOMAIN_CPU)
- return;
-
- i915_gem_clflush_object(obj, I915_CLFLUSH_SYNC);
- obj->base.write_domain = 0;
-}
-
static void __i915_gem_object_flush_for_display(struct drm_i915_gem_object *obj)
{
if (obj->base.write_domain != I915_GEM_DOMAIN_CPU && !obj->cache_dirty)
@@ -3428,7 +3425,7 @@ i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
if (ret)
return ret;
- i915_gem_object_flush_cpu_write_domain(obj);
+ flush_write_domain(obj, ~I915_GEM_DOMAIN_GTT);
/* Serialise direct access to this object with the barriers for
* coherent writes from the GPU, by effectively invalidating the
@@ -3802,7 +3799,7 @@ i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write)
if (obj->base.write_domain == I915_GEM_DOMAIN_CPU)
return 0;
- i915_gem_object_flush_gtt_write_domain(obj);
+ flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
/* Flush the CPU cache if it's still invalid. */
if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0) {
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c b/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c
index f08d0179b3df..c61d0ef2118c 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c
@@ -139,7 +139,7 @@ static int wc_set(struct drm_i915_gem_object *obj,
int err;
/* XXX GTT write followed by WC write go missing */
- i915_gem_object_flush_gtt_write_domain(obj);
+ flush_write_domain(obj, ~0);
err = i915_gem_object_set_to_gtt_domain(obj, true);
if (err)
@@ -163,7 +163,7 @@ static int wc_get(struct drm_i915_gem_object *obj,
int err;
/* XXX WC write followed by GTT write go missing */
- i915_gem_object_flush_gtt_write_domain(obj);
+ flush_write_domain(obj, ~0);
err = i915_gem_object_set_to_gtt_domain(obj, false);
if (err)
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_object.c b/drivers/gpu/drm/i915/selftests/i915_gem_object.c
index 33cc514f9fe8..c54a296f47b1 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem_object.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem_object.c
@@ -266,7 +266,7 @@ static int check_partial_mapping(struct drm_i915_gem_object *obj,
if (offset >= obj->base.size)
continue;
- i915_gem_object_flush_gtt_write_domain(obj);
+ flush_write_domain(obj, I915_GEM_DOMAIN_CPU);
p = i915_gem_object_get_page(obj, offset >> PAGE_SHIFT);
cpu = kmap(p) + offset_in_page(offset);
--
2.11.0
More information about the Intel-gfx
mailing list