[PATCH 29/29] drm/i915: Mark CPU cache as dirty on every transition for CPU writes
Chris Wilson
chris at chris-wilson.co.uk
Thu Apr 13 00:29:49 UTC 2017
Currently, we only mark the CPU cache as dirty if we skip a clflush.
This leads to some confusion where we have to ask if the object is in
the write domain or missed a clflush. If we always mark the cache as
dirty, this becomes a much simply question to answer.
Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
---
drivers/gpu/drm/i915/i915_gem.c | 37 +++++++++++++++---------
drivers/gpu/drm/i915/i915_gem_clflush.c | 9 ++++--
drivers/gpu/drm/i915/i915_gem_execbuffer.c | 20 +++----------
drivers/gpu/drm/i915/i915_gem_internal.c | 3 +-
drivers/gpu/drm/i915/i915_gem_userptr.c | 5 ++--
drivers/gpu/drm/i915/selftests/huge_gem_object.c | 3 +-
6 files changed, 40 insertions(+), 37 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 174844249d6e..e7035c5eb2a9 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -49,7 +49,7 @@ static void i915_gem_flush_free_objects(struct drm_i915_private *i915);
static bool cpu_write_needs_clflush(struct drm_i915_gem_object *obj)
{
- if (obj->base.write_domain == I915_GEM_DOMAIN_CPU)
+ if (obj->cache_dirty)
return false;
if (!i915_gem_object_is_coherent(obj))
@@ -250,6 +250,7 @@ __i915_gem_object_release_shmem(struct drm_i915_gem_object *obj,
obj->base.read_domains = I915_GEM_DOMAIN_CPU;
obj->base.write_domain = I915_GEM_DOMAIN_CPU;
+ obj->cache_dirty = true;
}
static void
@@ -684,6 +685,12 @@ i915_gem_dumb_create(struct drm_file *file,
args->size, &args->handle);
}
+static bool gpu_write_needs_clflush(struct drm_i915_gem_object *obj)
+{
+ return !(obj->cache_level == I915_CACHE_NONE ||
+ obj->cache_level == I915_CACHE_WT);
+}
+
/**
* Creates a new mm object and returns a handle to it.
* @dev: drm device pointer
@@ -753,6 +760,11 @@ flush_write_domain(struct drm_i915_gem_object *obj, unsigned int flush_domains)
case I915_GEM_DOMAIN_CPU:
i915_gem_clflush_object(obj, I915_CLFLUSH_SYNC);
break;
+
+ case I915_GEM_DOMAIN_RENDER:
+ if (!obj->cache_dirty && gpu_write_needs_clflush(obj))
+ obj->cache_dirty = true;
+ break;
}
obj->base.write_domain = 0;
@@ -906,7 +918,7 @@ int i915_gem_obj_prepare_shmem_write(struct drm_i915_gem_object *obj,
* This optimizes for the case when the gpu will use the data
* right away and we therefore have to clflush anyway.
*/
- if (obj->base.write_domain != I915_GEM_DOMAIN_CPU)
+ if (!obj->cache_dirty)
*needs_clflush |= CLFLUSH_AFTER;
/* Same trick applies to invalidate partially written cachelines read
@@ -3380,11 +3392,9 @@ int i915_gem_wait_for_idle(struct drm_i915_private *i915, unsigned int flags)
static void __i915_gem_object_flush_for_display(struct drm_i915_gem_object *obj)
{
- if (obj->base.write_domain != I915_GEM_DOMAIN_CPU && !obj->cache_dirty)
- return;
-
- i915_gem_clflush_object(obj, I915_CLFLUSH_FORCE);
- obj->base.write_domain = 0;
+ flush_write_domain(obj, ~0);
+ if (obj->cache_dirty)
+ i915_gem_clflush_object(obj, I915_CLFLUSH_FORCE);
}
void i915_gem_object_flush_if_display(struct drm_i915_gem_object *obj)
@@ -3642,9 +3652,7 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
}
}
- if (obj->base.write_domain == I915_GEM_DOMAIN_CPU &&
- i915_gem_object_is_coherent(obj))
- obj->cache_dirty = true;
+ flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
list_for_each_entry(vma, &obj->vma_list, obj_link)
vma->node.color = cache_level;
@@ -3870,9 +3878,6 @@ i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write)
if (ret)
return ret;
- if (obj->base.write_domain == I915_GEM_DOMAIN_CPU)
- return 0;
-
flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
/* Flush the CPU cache if it's still invalid. */
@@ -3884,7 +3889,7 @@ i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write)
/* It should now be out of any other write domains, and we can update
* the domain values for our changes.
*/
- GEM_BUG_ON((obj->base.write_domain & ~I915_GEM_DOMAIN_CPU) != 0);
+ GEM_BUG_ON(obj->base.write_domain & ~I915_GEM_DOMAIN_CPU);
/* If we're writing through the CPU, then the GPU read domains will
* need to be invalidated at next use.
@@ -3892,6 +3897,7 @@ i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write)
if (write) {
obj->base.read_domains = I915_GEM_DOMAIN_CPU;
obj->base.write_domain = I915_GEM_DOMAIN_CPU;
+ obj->cache_dirty = true;
}
return 0;
@@ -4293,6 +4299,7 @@ i915_gem_object_create(struct drm_i915_private *dev_priv, u64 size)
obj->base.write_domain = I915_GEM_DOMAIN_CPU;
obj->base.read_domains = I915_GEM_DOMAIN_CPU;
+ obj->cache_dirty = true;
if (HAS_LLC(dev_priv)) {
/* On some devices, we can have the GPU use the LLC (the CPU
@@ -4976,6 +4983,7 @@ int i915_gem_freeze_late(struct drm_i915_private *dev_priv)
list_for_each_entry(obj, *p, global_link) {
obj->base.read_domains = I915_GEM_DOMAIN_CPU;
obj->base.write_domain = I915_GEM_DOMAIN_CPU;
+ obj->cache_dirty = true;
}
}
mutex_unlock(&dev_priv->drm.struct_mutex);
@@ -5080,6 +5088,7 @@ i915_gem_object_create_from_data(struct drm_i915_private *dev_priv,
return obj;
GEM_BUG_ON(obj->base.write_domain != I915_GEM_DOMAIN_CPU);
+ GEM_BUG_ON(!obj->cache_dirty);
file = obj->base.filp;
offset = 0;
diff --git a/drivers/gpu/drm/i915/i915_gem_clflush.c b/drivers/gpu/drm/i915/i915_gem_clflush.c
index ffac7a1f0caf..317e46d25049 100644
--- a/drivers/gpu/drm/i915/i915_gem_clflush.c
+++ b/drivers/gpu/drm/i915/i915_gem_clflush.c
@@ -124,6 +124,9 @@ void i915_gem_clflush_object(struct drm_i915_gem_object *obj,
{
struct clflush *clflush;
+ if (!obj->cache_dirty)
+ return;
+
/*
* Stolen memory is always coherent with the GPU as it is explicitly
* marked as wc by the system, or the system is cache-coherent.
@@ -131,10 +134,10 @@ void i915_gem_clflush_object(struct drm_i915_gem_object *obj,
* anything not backed by physical memory we consider to be always
* coherent and not need clflushing.
*/
- if (!i915_gem_object_has_struct_page(obj))
+ if (!i915_gem_object_has_struct_page(obj)) {
+ obj->cache_dirty = false;
return;
-
- obj->cache_dirty = true;
+ }
/* If the GPU is snooping the contents of the CPU cache,
* we do not need to manually clear the CPU cache lines. However,
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index bc639f671338..a535eac4a25a 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -504,7 +504,7 @@ static inline int use_cpu_reloc(const struct reloc_cache *cache,
return false;
return (cache->has_llc ||
- obj->base.write_domain == I915_GEM_DOMAIN_CPU ||
+ obj->cache_dirty ||
obj->cache_level != I915_CACHE_NONE);
}
@@ -1767,12 +1767,6 @@ static void eb_export_fence(struct drm_i915_gem_object *obj,
reservation_object_unlock(resv);
}
-static bool gpu_write_needs_clflush(struct drm_i915_gem_object *obj)
-{
- return !(obj->cache_level == I915_CACHE_NONE ||
- obj->cache_level == I915_CACHE_WT);
-}
-
static int
eb_move_to_gpu(struct i915_execbuffer *eb)
{
@@ -1800,10 +1794,8 @@ eb_move_to_gpu(struct i915_execbuffer *eb)
if (entry->flags & EXEC_OBJECT_ASYNC)
goto skip_flushes;
- if (obj->base.write_domain & I915_GEM_DOMAIN_CPU) {
+ if (obj->cache_dirty)
i915_gem_clflush_object(obj, 0);
- obj->base.write_domain = 0;
- }
err = i915_gem_request_await_object
(eb->request, obj, entry->flags & EXEC_OBJECT_WRITE);
@@ -1813,10 +1805,9 @@ eb_move_to_gpu(struct i915_execbuffer *eb)
skip_flushes:
obj->base.write_domain = 0;
if (entry->flags & EXEC_OBJECT_WRITE) {
- obj->base.read_domains = 0;
- if (!obj->cache_dirty && gpu_write_needs_clflush(obj))
- obj->cache_dirty = true;
+ obj->base.write_domain = I915_GEM_DOMAIN_RENDER;
intel_fb_obj_invalidate(obj, ORIGIN_CS);
+ obj->base.read_domains = 0;
}
obj->base.read_domains |= I915_GEM_GPU_DOMAINS;
@@ -1891,9 +1882,6 @@ void i915_vma_move_to_active(struct i915_vma *vma,
if (intel_fb_obj_invalidate(obj, ORIGIN_CS))
i915_gem_active_set(&obj->frontbuffer_write, req);
-
- if (!obj->cache_dirty && gpu_write_needs_clflush(obj))
- obj->cache_dirty = true;
}
if (flags & EXEC_OBJECT_NEEDS_FENCE)
diff --git a/drivers/gpu/drm/i915/i915_gem_internal.c b/drivers/gpu/drm/i915/i915_gem_internal.c
index fc950abbe400..aaa8ed33d69c 100644
--- a/drivers/gpu/drm/i915/i915_gem_internal.c
+++ b/drivers/gpu/drm/i915/i915_gem_internal.c
@@ -188,9 +188,10 @@ i915_gem_object_create_internal(struct drm_i915_private *i915,
drm_gem_private_object_init(&i915->drm, &obj->base, size);
i915_gem_object_init(obj, &i915_gem_object_internal_ops);
- obj->base.write_domain = I915_GEM_DOMAIN_CPU;
obj->base.read_domains = I915_GEM_DOMAIN_CPU;
+ obj->base.write_domain = I915_GEM_DOMAIN_CPU;
obj->cache_level = HAS_LLC(i915) ? I915_CACHE_LLC : I915_CACHE_NONE;
+ obj->cache_dirty = true;
return obj;
}
diff --git a/drivers/gpu/drm/i915/i915_gem_userptr.c b/drivers/gpu/drm/i915/i915_gem_userptr.c
index 66b09163bfba..eb20a1809734 100644
--- a/drivers/gpu/drm/i915/i915_gem_userptr.c
+++ b/drivers/gpu/drm/i915/i915_gem_userptr.c
@@ -802,9 +802,10 @@ i915_gem_userptr_ioctl(struct drm_device *dev, void *data, struct drm_file *file
drm_gem_private_object_init(dev, &obj->base, args->user_size);
i915_gem_object_init(obj, &i915_gem_userptr_ops);
- obj->cache_level = I915_CACHE_LLC;
- obj->base.write_domain = I915_GEM_DOMAIN_CPU;
obj->base.read_domains = I915_GEM_DOMAIN_CPU;
+ obj->base.write_domain = I915_GEM_DOMAIN_CPU;
+ obj->cache_level = I915_CACHE_LLC;
+ obj->cache_dirty = true;
obj->userptr.ptr = args->user_ptr;
obj->userptr.read_only = !!(args->flags & I915_USERPTR_READ_ONLY);
diff --git a/drivers/gpu/drm/i915/selftests/huge_gem_object.c b/drivers/gpu/drm/i915/selftests/huge_gem_object.c
index 4e681fc13be4..98de7ec245a1 100644
--- a/drivers/gpu/drm/i915/selftests/huge_gem_object.c
+++ b/drivers/gpu/drm/i915/selftests/huge_gem_object.c
@@ -126,8 +126,9 @@ huge_gem_object(struct drm_i915_private *i915,
drm_gem_private_object_init(&i915->drm, &obj->base, dma_size);
i915_gem_object_init(obj, &huge_ops);
- obj->base.write_domain = I915_GEM_DOMAIN_CPU;
obj->base.read_domains = I915_GEM_DOMAIN_CPU;
+ obj->base.write_domain = I915_GEM_DOMAIN_CPU;
+ obj->cache_dirty = true;
obj->cache_level = HAS_LLC(i915) ? I915_CACHE_LLC : I915_CACHE_NONE;
obj->scratch = phys_size;
--
2.11.0
More information about the Intel-gfx-trybot
mailing list