[Intel-gfx] [PATCH] drm/i915: Move whole object to CPU domain for coherent shmem access
Chris Wilson
chris at chris-wilson.co.uk
Fri Mar 10 00:09:42 UTC 2017
If the object is coherent, we can simply update the cache domain on the
whole object rather than calculate the before/after clflushes. The
advantage is that we then get correct tracking of ellided flushes when
changing coherency later.
Testcase: igt/gem_pwrite_snooped
Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
Cc: Joonas Lahtinen <joonas.lahtinen at linux.intel.com>
---
drivers/gpu/drm/i915/i915_drv.h | 6 +++---
drivers/gpu/drm/i915/i915_gem.c | 45 +++++++++++++++++++++--------------------
2 files changed, 26 insertions(+), 25 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 3002996ddbed..8de104b63209 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -3353,9 +3353,9 @@ int i915_gem_obj_prepare_shmem_read(struct drm_i915_gem_object *obj,
unsigned int *needs_clflush);
int i915_gem_obj_prepare_shmem_write(struct drm_i915_gem_object *obj,
unsigned int *needs_clflush);
-#define CLFLUSH_BEFORE 0x1
-#define CLFLUSH_AFTER 0x2
-#define CLFLUSH_FLAGS (CLFLUSH_BEFORE | CLFLUSH_AFTER)
+#define CLFLUSH_BEFORE BIT(0)
+#define CLFLUSH_AFTER BIT(1)
+#define CLFLUSH_FLAGS (CLFLUSH_BEFORE | CLFLUSH_AFTER)
static inline void
i915_gem_obj_finish_shmem_access(struct drm_i915_gem_object *obj)
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index aca1eaddafb4..202bb850f260 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -788,6 +788,15 @@ int i915_gem_obj_prepare_shmem_read(struct drm_i915_gem_object *obj,
if (ret)
return ret;
+ if (i915_gem_object_is_coherent(obj) ||
+ !static_cpu_has(X86_FEATURE_CLFLUSH)) {
+ ret = i915_gem_object_set_to_cpu_domain(obj, false);
+ if (ret)
+ goto err_unpin;
+ else
+ goto out;
+ }
+
i915_gem_object_flush_gtt_write_domain(obj);
/* If we're not in the cpu read domain, set ourself into the gtt
@@ -796,16 +805,9 @@ int i915_gem_obj_prepare_shmem_read(struct drm_i915_gem_object *obj,
* anyway again before the next pread happens.
*/
if (!(obj->base.read_domains & I915_GEM_DOMAIN_CPU))
- *needs_clflush = !i915_gem_object_is_coherent(obj);
-
- if (*needs_clflush && !static_cpu_has(X86_FEATURE_CLFLUSH)) {
- ret = i915_gem_object_set_to_cpu_domain(obj, false);
- if (ret)
- goto err_unpin;
-
- *needs_clflush = 0;
- }
+ *needs_clflush = CLFLUSH_BEFORE;
+out:
/* return with the pages pinned */
return 0;
@@ -838,6 +840,15 @@ int i915_gem_obj_prepare_shmem_write(struct drm_i915_gem_object *obj,
if (ret)
return ret;
+ if (i915_gem_object_is_coherent(obj) ||
+ !static_cpu_has(X86_FEATURE_CLFLUSH)) {
+ ret = i915_gem_object_set_to_cpu_domain(obj, true);
+ if (ret)
+ goto err_unpin;
+ else
+ goto out;
+ }
+
i915_gem_object_flush_gtt_write_domain(obj);
/* If we're not in the cpu write domain, set ourself into the
@@ -846,25 +857,15 @@ int i915_gem_obj_prepare_shmem_write(struct drm_i915_gem_object *obj,
* right away and we therefore have to clflush anyway.
*/
if (obj->base.write_domain != I915_GEM_DOMAIN_CPU)
- *needs_clflush |= cpu_write_needs_clflush(obj) << 1;
+ *needs_clflush |= CLFLUSH_AFTER;
/* Same trick applies to invalidate partially written cachelines read
* before writing.
*/
if (!(obj->base.read_domains & I915_GEM_DOMAIN_CPU))
- *needs_clflush |= !i915_gem_object_is_coherent(obj);
-
- if (*needs_clflush && !static_cpu_has(X86_FEATURE_CLFLUSH)) {
- ret = i915_gem_object_set_to_cpu_domain(obj, true);
- if (ret)
- goto err_unpin;
-
- *needs_clflush = 0;
- }
-
- if ((*needs_clflush & CLFLUSH_AFTER) == 0)
- obj->cache_dirty = true;
+ *needs_clflush |= CLFLUSH_BEFORE;
+out:
intel_fb_obj_invalidate(obj, ORIGIN_CPU);
obj->mm.dirty = true;
/* return with the pages pinned */
--
2.11.0
More information about the Intel-gfx
mailing list