[Intel-gfx] [PATCH 2/2] drm/i915: Store i915_gem_object_is_coherent() as a bit next to cache-dirty
Chris Wilson
chris at chris-wilson.co.uk
Thu Apr 27 14:46:43 UTC 2017
For ease of use (i.e. avoiding a few checks and function calls), store
the object's cache coherency next to the cache is dirty bit.
Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
Cc: Dongwon Kim <dongwon.kim at intel.com>
Cc: Matt Roper <matthew.d.roper at intel.com>
---
drivers/gpu/drm/i915/i915_gem.c | 14 +++++++-------
drivers/gpu/drm/i915/i915_gem_clflush.c | 2 +-
drivers/gpu/drm/i915/i915_gem_execbuffer.c | 2 +-
drivers/gpu/drm/i915/i915_gem_internal.c | 3 ++-
drivers/gpu/drm/i915/i915_gem_object.h | 1 +
drivers/gpu/drm/i915/i915_gem_stolen.c | 1 +
drivers/gpu/drm/i915/i915_gem_userptr.c | 3 ++-
drivers/gpu/drm/i915/selftests/huge_gem_object.c | 3 ++-
8 files changed, 17 insertions(+), 12 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 488ca7733c1e..56f70fd3c345 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -52,7 +52,7 @@ static bool cpu_write_needs_clflush(struct drm_i915_gem_object *obj)
if (obj->cache_dirty)
return false;
- if (!i915_gem_object_is_coherent(obj))
+ if (!obj->cache_coherent)
return true;
return obj->pin_display;
@@ -253,7 +253,7 @@ __i915_gem_object_release_shmem(struct drm_i915_gem_object *obj,
if (needs_clflush &&
(obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0 &&
- !i915_gem_object_is_coherent(obj))
+ !obj->cache_coherent)
drm_clflush_sg(pages);
__start_cpu_write(obj);
@@ -856,8 +856,7 @@ int i915_gem_obj_prepare_shmem_read(struct drm_i915_gem_object *obj,
if (ret)
return ret;
- if (i915_gem_object_is_coherent(obj) ||
- !static_cpu_has(X86_FEATURE_CLFLUSH)) {
+ if (obj->cache_coherent || !static_cpu_has(X86_FEATURE_CLFLUSH)) {
ret = i915_gem_object_set_to_cpu_domain(obj, false);
if (ret)
goto err_unpin;
@@ -909,8 +908,7 @@ int i915_gem_obj_prepare_shmem_write(struct drm_i915_gem_object *obj,
if (ret)
return ret;
- if (i915_gem_object_is_coherent(obj) ||
- !static_cpu_has(X86_FEATURE_CLFLUSH)) {
+ if (obj->cache_coherent || !static_cpu_has(X86_FEATURE_CLFLUSH)) {
ret = i915_gem_object_set_to_cpu_domain(obj, true);
if (ret)
goto err_unpin;
@@ -3664,6 +3662,7 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
list_for_each_entry(vma, &obj->vma_list, obj_link)
vma->node.color = cache_level;
obj->cache_level = cache_level;
+ obj->cache_coherent = i915_gem_object_is_coherent(obj);
if (obj->base.write_domain & I915_GEM_DOMAIN_CPU &&
cpu_write_needs_clflush(obj))
@@ -4326,7 +4325,8 @@ i915_gem_object_create(struct drm_i915_private *dev_priv, u64 size)
} else
obj->cache_level = I915_CACHE_NONE;
- obj->cache_dirty = !i915_gem_object_is_coherent(obj);
+ obj->cache_coherent = i915_gem_object_is_coherent(obj);
+ obj->cache_dirty = !obj->cache_coherent;
trace_i915_gem_object_create(obj);
diff --git a/drivers/gpu/drm/i915/i915_gem_clflush.c b/drivers/gpu/drm/i915/i915_gem_clflush.c
index a895643c4dc4..c4190b04f7f0 100644
--- a/drivers/gpu/drm/i915/i915_gem_clflush.c
+++ b/drivers/gpu/drm/i915/i915_gem_clflush.c
@@ -140,7 +140,7 @@ void i915_gem_clflush_object(struct drm_i915_gem_object *obj,
* snooping behaviour occurs naturally as the result of our domain
* tracking.
*/
- if (!(flags & I915_CLFLUSH_FORCE) && i915_gem_object_is_coherent(obj))
+ if (!(flags & I915_CLFLUSH_FORCE) && obj->cache_coherent)
return;
trace_i915_gem_object_clflush(obj);
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 0b8ae0f56675..6e77003d7f0f 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -1129,7 +1129,7 @@ i915_gem_execbuffer_move_to_gpu(struct drm_i915_gem_request *req,
if (vma->exec_entry->flags & EXEC_OBJECT_ASYNC)
continue;
- if (obj->cache_dirty)
+ if (obj->cache_dirty & !obj->cache_coherent)
i915_gem_clflush_object(obj, 0);
ret = i915_gem_request_await_object
diff --git a/drivers/gpu/drm/i915/i915_gem_internal.c b/drivers/gpu/drm/i915/i915_gem_internal.c
index 58e93e87d573..568bf83af1f5 100644
--- a/drivers/gpu/drm/i915/i915_gem_internal.c
+++ b/drivers/gpu/drm/i915/i915_gem_internal.c
@@ -191,7 +191,8 @@ i915_gem_object_create_internal(struct drm_i915_private *i915,
obj->base.read_domains = I915_GEM_DOMAIN_CPU;
obj->base.write_domain = I915_GEM_DOMAIN_CPU;
obj->cache_level = HAS_LLC(i915) ? I915_CACHE_LLC : I915_CACHE_NONE;
- obj->cache_dirty = !i915_gem_object_is_coherent(obj);
+ obj->cache_coherent = i915_gem_object_is_coherent(obj);
+ obj->cache_dirty = !obj->cache_coherent;
return obj;
}
diff --git a/drivers/gpu/drm/i915/i915_gem_object.h b/drivers/gpu/drm/i915/i915_gem_object.h
index 174cf923c236..dca15adc91de 100644
--- a/drivers/gpu/drm/i915/i915_gem_object.h
+++ b/drivers/gpu/drm/i915/i915_gem_object.h
@@ -106,6 +106,7 @@ struct drm_i915_gem_object {
unsigned long gt_ro:1;
unsigned int cache_level:3;
unsigned int cache_dirty:1;
+ unsigned int cache_coherent:1;
atomic_t frontbuffer_bits;
unsigned int frontbuffer_ggtt_origin; /* write once */
diff --git a/drivers/gpu/drm/i915/i915_gem_stolen.c b/drivers/gpu/drm/i915/i915_gem_stolen.c
index f3abdc27c5dd..68af4a39973d 100644
--- a/drivers/gpu/drm/i915/i915_gem_stolen.c
+++ b/drivers/gpu/drm/i915/i915_gem_stolen.c
@@ -592,6 +592,7 @@ _i915_gem_object_create_stolen(struct drm_i915_private *dev_priv,
obj->stolen = stolen;
obj->base.read_domains = I915_GEM_DOMAIN_CPU | I915_GEM_DOMAIN_GTT;
obj->cache_level = HAS_LLC(dev_priv) ? I915_CACHE_LLC : I915_CACHE_NONE;
+ obj->cache_coherent = true; /* assumptions! more like cache_oblivious */
if (i915_gem_object_pin_pages(obj))
goto cleanup;
diff --git a/drivers/gpu/drm/i915/i915_gem_userptr.c b/drivers/gpu/drm/i915/i915_gem_userptr.c
index 9f84be171ad2..4ec9a04aa165 100644
--- a/drivers/gpu/drm/i915/i915_gem_userptr.c
+++ b/drivers/gpu/drm/i915/i915_gem_userptr.c
@@ -805,7 +805,8 @@ i915_gem_userptr_ioctl(struct drm_device *dev, void *data, struct drm_file *file
obj->base.read_domains = I915_GEM_DOMAIN_CPU;
obj->base.write_domain = I915_GEM_DOMAIN_CPU;
obj->cache_level = I915_CACHE_LLC;
- obj->cache_dirty = !i915_gem_object_is_coherent(obj);
+ obj->cache_coherent = i915_gem_object_is_coherent(obj);
+ obj->cache_dirty = !obj->cache_coherent;
obj->userptr.ptr = args->user_ptr;
obj->userptr.read_only = !!(args->flags & I915_USERPTR_READ_ONLY);
diff --git a/drivers/gpu/drm/i915/selftests/huge_gem_object.c b/drivers/gpu/drm/i915/selftests/huge_gem_object.c
index 0ca867a877b6..caf76af36aba 100644
--- a/drivers/gpu/drm/i915/selftests/huge_gem_object.c
+++ b/drivers/gpu/drm/i915/selftests/huge_gem_object.c
@@ -129,7 +129,8 @@ huge_gem_object(struct drm_i915_private *i915,
obj->base.read_domains = I915_GEM_DOMAIN_CPU;
obj->base.write_domain = I915_GEM_DOMAIN_CPU;
obj->cache_level = HAS_LLC(i915) ? I915_CACHE_LLC : I915_CACHE_NONE;
- obj->cache_dirty = !i915_gem_object_is_coherent(obj);
+ obj->cache_coherent = i915_gem_object_is_coherent(obj);
+ obj->cache_dirty = !obj->cache_coherent;
obj->scratch = phys_size;
return obj;
--
2.11.0
More information about the Intel-gfx
mailing list