[Intel-gfx] [passive aggressive RESEND 2/2] drm/i915: Store i915_gem_object_is_coherent() as a bit next to cache-dirty
Tvrtko Ursulin
tvrtko.ursulin at linux.intel.com
Thu Jun 15 14:41:08 UTC 2017
On 15/06/2017 13:38, Chris Wilson wrote:
> For ease of use (i.e. avoiding a few checks and function calls), store
> the object's cache coherency next to the cache is dirty bit.
>
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> Cc: Dongwon Kim <dongwon.kim at intel.com>
> Cc: Matt Roper <matthew.d.roper at intel.com>
> Tested-by: Dongwon Kim <dongwon.kim at intel.com>
> ---
> drivers/gpu/drm/i915/i915_gem.c | 14 +++++++-------
> drivers/gpu/drm/i915/i915_gem_clflush.c | 2 +-
> drivers/gpu/drm/i915/i915_gem_execbuffer.c | 2 +-
> drivers/gpu/drm/i915/i915_gem_internal.c | 3 ++-
> drivers/gpu/drm/i915/i915_gem_object.h | 1 +
> drivers/gpu/drm/i915/i915_gem_stolen.c | 1 +
> drivers/gpu/drm/i915/i915_gem_userptr.c | 3 ++-
> drivers/gpu/drm/i915/selftests/huge_gem_object.c | 3 ++-
> 8 files changed, 17 insertions(+), 12 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index b1504a829c6a..4ae30f74c475 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -52,7 +52,7 @@ static bool cpu_write_needs_clflush(struct drm_i915_gem_object *obj)
> if (obj->cache_dirty)
> return false;
>
> - if (!i915_gem_object_is_coherent(obj))
> + if (!obj->cache_coherent)
> return true;
>
> return obj->pin_display;
> @@ -253,7 +253,7 @@ __i915_gem_object_release_shmem(struct drm_i915_gem_object *obj,
>
> if (needs_clflush &&
> (obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0 &&
> - !i915_gem_object_is_coherent(obj))
> + !obj->cache_coherent)
> drm_clflush_sg(pages);
>
> __start_cpu_write(obj);
> @@ -856,8 +856,7 @@ int i915_gem_obj_prepare_shmem_read(struct drm_i915_gem_object *obj,
> if (ret)
> return ret;
>
> - if (i915_gem_object_is_coherent(obj) ||
> - !static_cpu_has(X86_FEATURE_CLFLUSH)) {
> + if (obj->cache_coherent || !static_cpu_has(X86_FEATURE_CLFLUSH)) {
> ret = i915_gem_object_set_to_cpu_domain(obj, false);
> if (ret)
> goto err_unpin;
> @@ -909,8 +908,7 @@ int i915_gem_obj_prepare_shmem_write(struct drm_i915_gem_object *obj,
> if (ret)
> return ret;
>
> - if (i915_gem_object_is_coherent(obj) ||
> - !static_cpu_has(X86_FEATURE_CLFLUSH)) {
> + if (obj->cache_coherent || !static_cpu_has(X86_FEATURE_CLFLUSH)) {
> ret = i915_gem_object_set_to_cpu_domain(obj, true);
> if (ret)
> goto err_unpin;
> @@ -3684,6 +3682,7 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
> list_for_each_entry(vma, &obj->vma_list, obj_link)
> vma->node.color = cache_level;
> obj->cache_level = cache_level;
> + obj->cache_coherent = i915_gem_object_is_coherent(obj);
> obj->cache_dirty = true; /* Always invalidate stale cachelines */
>
> return 0;
> @@ -4344,7 +4343,8 @@ i915_gem_object_create(struct drm_i915_private *dev_priv, u64 size)
> } else
> obj->cache_level = I915_CACHE_NONE;
>
> - obj->cache_dirty = !i915_gem_object_is_coherent(obj);
> + obj->cache_coherent = i915_gem_object_is_coherent(obj);
> + obj->cache_dirty = !obj->cache_coherent;
>
> trace_i915_gem_object_create(obj);
>
> diff --git a/drivers/gpu/drm/i915/i915_gem_clflush.c b/drivers/gpu/drm/i915/i915_gem_clflush.c
> index 17b207e963c2..152f16c11878 100644
> --- a/drivers/gpu/drm/i915/i915_gem_clflush.c
> +++ b/drivers/gpu/drm/i915/i915_gem_clflush.c
> @@ -139,7 +139,7 @@ void i915_gem_clflush_object(struct drm_i915_gem_object *obj,
> * snooping behaviour occurs naturally as the result of our domain
> * tracking.
> */
> - if (!(flags & I915_CLFLUSH_FORCE) && i915_gem_object_is_coherent(obj))
> + if (!(flags & I915_CLFLUSH_FORCE) && obj->cache_coherent)
> return;
>
> trace_i915_gem_object_clflush(obj);
> diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> index 2a9aed5640e2..20933a15be46 100644
> --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> @@ -1110,7 +1110,7 @@ eb_move_to_gpu(struct i915_execbuffer *eb)
> if (vma->exec_entry->flags & EXEC_OBJECT_ASYNC)
> continue;
>
> - if (obj->cache_dirty)
> + if (obj->cache_dirty & ~obj->cache_coherent)
What is the explanation for this change?
> i915_gem_clflush_object(obj, 0);
>
> ret = i915_gem_request_await_object
> diff --git a/drivers/gpu/drm/i915/i915_gem_internal.c b/drivers/gpu/drm/i915/i915_gem_internal.c
> index 58e93e87d573..568bf83af1f5 100644
> --- a/drivers/gpu/drm/i915/i915_gem_internal.c
> +++ b/drivers/gpu/drm/i915/i915_gem_internal.c
> @@ -191,7 +191,8 @@ i915_gem_object_create_internal(struct drm_i915_private *i915,
> obj->base.read_domains = I915_GEM_DOMAIN_CPU;
> obj->base.write_domain = I915_GEM_DOMAIN_CPU;
> obj->cache_level = HAS_LLC(i915) ? I915_CACHE_LLC : I915_CACHE_NONE;
> - obj->cache_dirty = !i915_gem_object_is_coherent(obj);
> + obj->cache_coherent = i915_gem_object_is_coherent(obj);
> + obj->cache_dirty = !obj->cache_coherent;
>
> return obj;
> }
> diff --git a/drivers/gpu/drm/i915/i915_gem_object.h b/drivers/gpu/drm/i915/i915_gem_object.h
> index 915057824284..adb482b00271 100644
> --- a/drivers/gpu/drm/i915/i915_gem_object.h
> +++ b/drivers/gpu/drm/i915/i915_gem_object.h
> @@ -121,6 +121,7 @@ struct drm_i915_gem_object {
> unsigned long gt_ro:1;
> unsigned int cache_level:3;
> unsigned int cache_dirty:1;
> + unsigned int cache_coherent:1;
>
> atomic_t frontbuffer_bits;
> unsigned int frontbuffer_ggtt_origin; /* write once */
> diff --git a/drivers/gpu/drm/i915/i915_gem_stolen.c b/drivers/gpu/drm/i915/i915_gem_stolen.c
> index 681db6083f4d..a817b3e0b17e 100644
> --- a/drivers/gpu/drm/i915/i915_gem_stolen.c
> +++ b/drivers/gpu/drm/i915/i915_gem_stolen.c
> @@ -590,6 +590,7 @@ _i915_gem_object_create_stolen(struct drm_i915_private *dev_priv,
> obj->stolen = stolen;
> obj->base.read_domains = I915_GEM_DOMAIN_CPU | I915_GEM_DOMAIN_GTT;
> obj->cache_level = HAS_LLC(dev_priv) ? I915_CACHE_LLC : I915_CACHE_NONE;
> + obj->cache_coherent = true; /* assumptions! more like cache_oblivious */
>
> if (i915_gem_object_pin_pages(obj))
> goto cleanup;
> diff --git a/drivers/gpu/drm/i915/i915_gem_userptr.c b/drivers/gpu/drm/i915/i915_gem_userptr.c
> index 34461e1928bc..05c36f663550 100644
> --- a/drivers/gpu/drm/i915/i915_gem_userptr.c
> +++ b/drivers/gpu/drm/i915/i915_gem_userptr.c
> @@ -805,7 +805,8 @@ i915_gem_userptr_ioctl(struct drm_device *dev, void *data, struct drm_file *file
> obj->base.read_domains = I915_GEM_DOMAIN_CPU;
> obj->base.write_domain = I915_GEM_DOMAIN_CPU;
> obj->cache_level = I915_CACHE_LLC;
> - obj->cache_dirty = !i915_gem_object_is_coherent(obj);
> + obj->cache_coherent = i915_gem_object_is_coherent(obj);
> + obj->cache_dirty = !obj->cache_coherent;
>
> obj->userptr.ptr = args->user_ptr;
> obj->userptr.read_only = !!(args->flags & I915_USERPTR_READ_ONLY);
> diff --git a/drivers/gpu/drm/i915/selftests/huge_gem_object.c b/drivers/gpu/drm/i915/selftests/huge_gem_object.c
> index 0ca867a877b6..caf76af36aba 100644
> --- a/drivers/gpu/drm/i915/selftests/huge_gem_object.c
> +++ b/drivers/gpu/drm/i915/selftests/huge_gem_object.c
> @@ -129,7 +129,8 @@ huge_gem_object(struct drm_i915_private *i915,
> obj->base.read_domains = I915_GEM_DOMAIN_CPU;
> obj->base.write_domain = I915_GEM_DOMAIN_CPU;
> obj->cache_level = HAS_LLC(i915) ? I915_CACHE_LLC : I915_CACHE_NONE;
> - obj->cache_dirty = !i915_gem_object_is_coherent(obj);
> + obj->cache_coherent = i915_gem_object_is_coherent(obj);
> + obj->cache_dirty = !obj->cache_coherent;
> obj->scratch = phys_size;
>
> return obj;
>
Option of converting i915_gem_object_is_coherent to just return
obj->cache_coherent for less churn? (And adding
i915_gem_object_set_coherent or something if enough call sites to justify?)
Regards,
Tvrtko
More information about the Intel-gfx
mailing list