[RFC] drm/i915: Refactor PAT/cache handling
Yang, Fei
fei.yang at intel.com
Thu Jun 29 05:39:50 UTC 2023
> From: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
>
> Informal commit message for now.
>
> I got a bit impatient and curious to see if the idea we discussed would
> work so sketched something out. I think it is what I was describing back
> then..
Oops, you beat me on this, shame on me.
> So high level idea is to teach the driver what caching modes are hidden
> behind PAT indices. Given you already had that in static tables, if we
> just turn the tables a bit around and add a driver abstraction of caching
> modes this is what happens:
>
> * We can lose the ugly runtime i915_gem_get_pat_index.
>
> * We can have a smarter i915_gem_object_has_cache_level, which now can
> use the above mentioned table to understand the caching modes and so
> does not have to pessimistically return true for _any_ input when user
> has set the PAT index. This may improve things even for MTL.
>
> * We can simplify the debugfs printout to be platform agnostic.
>
> * We are perhaps opening the door to un-regress the dodgy addition
> made to i915_gem_object_can_bypass_llc? See QQQ/FIXME in the patch.
>
> I hope I did not forget anything, but anyway, please have a read and see
> what you think. I think it has potential.
>
> Proper commit message can come later.
>
> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
> Cc: Fei Yang <fei.yang at intel.com>
> ---
> drivers/gpu/drm/i915/Makefile | 1 +
> drivers/gpu/drm/i915/gem/i915_gem_domain.c | 34 ++---
> .../gpu/drm/i915/gem/i915_gem_execbuffer.c | 13 +-
> drivers/gpu/drm/i915/gem/i915_gem_mman.c | 10 +-
> drivers/gpu/drm/i915/gem/i915_gem_object.c | 78 ++++-------
> drivers/gpu/drm/i915/gem/i915_gem_object.h | 18 ++-
> .../gpu/drm/i915/gem/i915_gem_object_types.h | 99 +-------------
> drivers/gpu/drm/i915/gem/i915_gem_stolen.c | 7 +-
> drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c | 26 ++--
> .../gpu/drm/i915/gem/selftests/huge_pages.c | 2 +-
> drivers/gpu/drm/i915/gt/gen6_ppgtt.c | 4 +-
> drivers/gpu/drm/i915/gt/gen8_ppgtt.c | 13 +-
> drivers/gpu/drm/i915/gt/intel_ggtt.c | 9 +-
> drivers/gpu/drm/i915/gt/intel_migrate.c | 11 +-
> drivers/gpu/drm/i915/gt/selftest_migrate.c | 9 +-
> drivers/gpu/drm/i915/gt/selftest_reset.c | 14 +-
> drivers/gpu/drm/i915/gt/selftest_timeline.c | 5 +-
> drivers/gpu/drm/i915/gt/selftest_tlb.c | 5 +-
> drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c | 8 +-
> drivers/gpu/drm/i915/i915_cache.c | 59 ++++++++
> drivers/gpu/drm/i915/i915_cache.h | 129 ++++++++++++++++++
> drivers/gpu/drm/i915/i915_debugfs.c | 83 ++++++-----
> drivers/gpu/drm/i915/i915_driver.c | 3 +
> drivers/gpu/drm/i915/i915_drv.h | 3 +
> drivers/gpu/drm/i915/i915_gem.c | 21 +--
> drivers/gpu/drm/i915/i915_gpu_error.c | 7 +-
> drivers/gpu/drm/i915/i915_pci.c | 83 +++++------
> drivers/gpu/drm/i915/intel_device_info.h | 6 +-
> drivers/gpu/drm/i915/selftests/i915_gem.c | 5 +-
> .../gpu/drm/i915/selftests/i915_gem_evict.c | 4 +-
> drivers/gpu/drm/i915/selftests/i915_gem_gtt.c | 13 +-
> .../drm/i915/selftests/intel_memory_region.c | 4 +-
> .../gpu/drm/i915/selftests/mock_gem_device.c | 10 +-
> 33 files changed, 415 insertions(+), 381 deletions(-)
> create mode 100644 drivers/gpu/drm/i915/i915_cache.c
> create mode 100644 drivers/gpu/drm/i915/i915_cache.h
>
> diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
> index 2be9dd960540..2c3da8f0c78e 100644
> --- a/drivers/gpu/drm/i915/Makefile
> +++ b/drivers/gpu/drm/i915/Makefile
> @@ -30,6 +30,7 @@ subdir-ccflags-y += -I$(srctree)/$(src)
> # core driver code
> i915-y += i915_driver.o \
> i915_drm_client.o \
> + i915_cache.o \
> i915_config.o \
> i915_getparam.o \
> i915_ioctl.o \
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_domain.c b/drivers/gpu/drm/i915/gem/i915_gem_domain.c
> index dfaaa8b66ac3..49bfae45390f 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_domain.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_domain.c
> @@ -8,6 +8,7 @@
> #include "display/intel_frontbuffer.h"
> #include "gt/intel_gt.h"
>
> +#include "i915_cache.h"
> #include "i915_drv.h"
> #include "i915_gem_clflush.h"
> #include "i915_gem_domain.h"
> @@ -27,15 +28,8 @@ static bool gpu_write_needs_clflush(struct drm_i915_gem_object *obj)
> if (IS_DGFX(i915))
> return false;
>
> - /*
> - * For objects created by userspace through GEM_CREATE with pat_index
> - * set by set_pat extension, i915_gem_object_has_cache_level() will
> - * always return true, because the coherency of such object is managed
> - * by userspace. Othereise the call here would fall back to checking
> - * whether the object is un-cached or write-through.
> - */
> - return !(i915_gem_object_has_cache_level(obj, I915_CACHE_NONE) ||
> - i915_gem_object_has_cache_level(obj, I915_CACHE_WT));
> + return i915_gem_object_has_cache_mode(obj, I915_CACHE_MODE_UC) != 1 &&
> + i915_gem_object_has_cache_mode(obj, I915_CACHE_MODE_WT) != 1;
Why is it necessary to define I915_CACHE_MODE's while there is already i915_cache_level?
I thought we wanted to get rid of such abstractions instead of adding more.
This patch also introduced INTEL_INFO(i915)->cache_modes, why don't we directly add the
platform specific PAT there? For example, add the following for MTL,
INTEL_INFO(i915)->pat[] = {
[0] = MTL_PPAT_L4_0_WB, \
[1] = MTL_PPAT_L4_1_WT, \
[2] = MTL_PPAT_L4_3_UC, \
[3] = MTL_PPAT_L4_0_WB | MTL_2_COH_1W, \
[4] = MTL_PPAT_L4_0_WB | MTL_3_COH_2W, \
}
Everything here has already been defined, no need to introduce new macros.
This can also be used to initialize the PAT index registers, like in
xelpg_setup_private_ppat() and xelpmp_setup_private_ppat().
> }
>
> bool i915_gem_cpu_write_needs_clflush(struct drm_i915_gem_object *obj)
> @@ -272,15 +266,18 @@ i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
> int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
> enum i915_cache_level cache_level)
s/enum i915_cache_level cache_level/unsigned int pat_index
This function is for KMD objects only, I don't think we even need to keep
the i915_cache_level, simply passing in INTEL_INFO(i915)->pat_uc/wb/wt is
good enough.
> {
> + struct drm_i915_private *i915 = to_i915(obj->base.dev);
> + i915_cache_t mode;
> int ret;
>
> - /*
> - * For objects created by userspace through GEM_CREATE with pat_index
> - * set by set_pat extension, simply return 0 here without touching
> - * the cache setting, because such objects should have an immutable
> - * cache setting by desgin and always managed by userspace.
> - */
> - if (i915_gem_object_has_cache_level(obj, cache_level))
> + if (obj->pat_set_by_user)
> + return -EINVAL;
I don't think this condition would ever be true, but okay to keep it.
> + ret = i915_cache_level_to_pat_and_mode(i915, cache_level, &mode);
> + if (ret < 0)
> + return -EINVAL;
> +
> + if (mode == obj->cache_mode)
The above lines can be just one line,
if (pat_index == obj->pat_index)
> return 0;
>
> ret = i915_gem_object_wait(obj,
> @@ -326,10 +323,9 @@ int i915_gem_get_caching_ioctl(struct drm_device *dev, void *data,
> goto out;
> }
>
> - if (i915_gem_object_has_cache_level(obj, I915_CACHE_LLC) ||
> - i915_gem_object_has_cache_level(obj, I915_CACHE_L3_LLC))
> + if (i915_gem_object_has_cache_mode(obj, I915_CACHE_MODE_WB))
> args->caching = I915_CACHING_CACHED;
> - else if (i915_gem_object_has_cache_level(obj, I915_CACHE_WT))
> + else if (i915_gem_object_has_cache_mode(obj, I915_CACHE_MODE_WT))
> args->caching = I915_CACHING_DISPLAY;
> else
> args->caching = I915_CACHING_NONE;
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
> index d3208a325614..ee85221fa6eb 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
> @@ -640,15 +640,9 @@ static inline int use_cpu_reloc(const struct reloc_cache *cache,
> if (DBG_FORCE_RELOC == FORCE_GTT_RELOC)
> return false;
>
> - /*
> - * For objects created by userspace through GEM_CREATE with pat_index
> - * set by set_pat extension, i915_gem_object_has_cache_level() always
> - * return true, otherwise the call would fall back to checking whether
> - * the object is un-cached.
> - */
> return (cache->has_llc ||
> obj->cache_dirty ||
> - !i915_gem_object_has_cache_level(obj, I915_CACHE_NONE));
> + i915_gem_object_has_cache_mode(obj, I915_CACHE_MODE_UC) != 1);
> }
>
> static int eb_reserve_vma(struct i915_execbuffer *eb,
> @@ -1329,10 +1323,7 @@ static void *reloc_iomap(struct i915_vma *batch,
> if (drm_mm_node_allocated(&cache->node)) {
> ggtt->vm.insert_page(&ggtt->vm,
> i915_gem_object_get_dma_address(obj, page),
> - offset,
> - i915_gem_get_pat_index(ggtt->vm.i915,
> - I915_CACHE_NONE),
> - 0);
> + offset, eb->i915->pat_uc, 0);
> } else {
> offset += page << PAGE_SHIFT;
> }
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/i915_gem_mman.c
> index aa4d842d4c5a..5e21aedb02d2 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_mman.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_mman.c
> @@ -386,13 +386,11 @@ static vm_fault_t vm_fault_gtt(struct vm_fault *vmf)
> /*
> * For objects created by userspace through GEM_CREATE with pat_index
> * set by set_pat extension, coherency is managed by userspace, make
> - * sure we don't fail handling the vm fault by calling
> - * i915_gem_object_has_cache_level() which always return true for such
> - * objects. Otherwise this helper function would fall back to checking
> - * whether the object is un-cached.
> + * sure we don't fail handling the vm fault by making sure that we
> + * know the object is uncached or that we have LLC.
> */
> - if (!(i915_gem_object_has_cache_level(obj, I915_CACHE_NONE) ||
> - HAS_LLC(i915))) {
> + if (i915_gem_object_has_cache_mode(obj, I915_CACHE_MODE_UC) != 1 &&
> + !HAS_LLC(i915)) {
> ret = -EFAULT;
> goto err_unpin;
> }
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c b/drivers/gpu/drm/i915/gem/i915_gem_object.c
> index 0004d5fa7cc2..52c6c5f09bdd 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_object.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c
> @@ -45,33 +45,6 @@ static struct kmem_cache *slab_objects;
>
> static const struct drm_gem_object_funcs i915_gem_object_funcs;
>
> -unsigned int i915_gem_get_pat_index(struct drm_i915_private *i915,
> - enum i915_cache_level level)
> -{
> - if (drm_WARN_ON(&i915->drm, level >= I915_MAX_CACHE_LEVEL))
> - return 0;
> -
> - return INTEL_INFO(i915)->cachelevel_to_pat[level];
> -}
Yes, this can be removed. INTEL_INFO(i915)->pat_uc/wb/wt should be sufficient,
> -
> -bool i915_gem_object_has_cache_level(const struct drm_i915_gem_object *obj,
> - enum i915_cache_level lvl)
> -{
If we had INTEL_INFO(i915)->pat[] setup, it would be easier just to keep this
function, because we can simply check the cache policy bit field in
INTEL_INFO(i915)->pat[obj->pat_index] to see whether it is cached, uncached,
or write-through.
See Bspec https://gfxspecs.intel.com/Predator/Home/Index/44235
For MTL check bit[3:2], for other gen12 platforms check bit[1:0]
> - /*
> - * In case the pat_index is set by user space, this kernel mode
> - * driver should leave the coherency to be managed by user space,
> - * simply return true here.
> - */
> - if (obj->pat_set_by_user)
> - return true;
> -
> - /*
> - * Otherwise the pat_index should have been converted from cache_level
> - * so that the following comparison is valid.
> - */
> - return obj->pat_index == i915_gem_get_pat_index(obj_to_i915(obj), lvl);
> -}
> -
> struct drm_i915_gem_object *i915_gem_object_alloc(void)
> {
> struct drm_i915_gem_object *obj;
> @@ -144,6 +117,24 @@ void __i915_gem_object_fini(struct drm_i915_gem_object *obj)
> dma_resv_fini(&obj->base._resv);
> }
>
> +void __i915_gem_object_update_coherency(struct drm_i915_gem_object *obj)
> +{
> + struct drm_i915_private *i915 = to_i915(obj->base.dev);
> + const unsigned int mode = I915_CACHE_MODE(obj->cache_mode);
obj->cache_mode seems to be redundant if we have INTEL_INFO(i915)->pat[] and
obj->pat_index.
> +
> + if (!(mode == I915_CACHE_MODE_UNKNOWN || mode == I915_CACHE_MODE_UC))
> + obj->cache_coherent = (I915_BO_CACHE_COHERENT_FOR_READ |
> + I915_BO_CACHE_COHERENT_FOR_WRITE);
> + else if (mode != I915_CACHE_MODE_UNKNOWN && HAS_LLC(i915))
> + obj->cache_coherent = I915_BO_CACHE_COHERENT_FOR_READ;
> + else
> + obj->cache_coherent = 0;
> +
> + obj->cache_dirty =
> + !(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE) &&
> + !IS_DGFX(i915);
> +}
> +
> /**
> * i915_gem_object_set_cache_coherency - Mark up the object's coherency levels
> * for a given cache_level
> @@ -154,20 +145,15 @@ void i915_gem_object_set_cache_coherency(struct drm_i915_gem_object *obj,
> unsigned int cache_level)
> {
> struct drm_i915_private *i915 = to_i915(obj->base.dev);
> + i915_cache_t mode;
> + int found;
>
> - obj->pat_index = i915_gem_get_pat_index(i915, cache_level);
> + found = i915_cache_level_to_pat_and_mode(i915, cache_level, &mode);
> + GEM_WARN_ON(found < 0);
> + obj->pat_index = found;
> + obj->cache_mode = mode;
>
> - if (cache_level != I915_CACHE_NONE)
> - obj->cache_coherent = (I915_BO_CACHE_COHERENT_FOR_READ |
> - I915_BO_CACHE_COHERENT_FOR_WRITE);
> - else if (HAS_LLC(i915))
> - obj->cache_coherent = I915_BO_CACHE_COHERENT_FOR_READ;
> - else
> - obj->cache_coherent = 0;
> -
> - obj->cache_dirty =
> - !(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE) &&
> - !IS_DGFX(i915);
> + __i915_gem_object_update_coherency(obj);
> }
>
> /**
> @@ -187,18 +173,9 @@ void i915_gem_object_set_pat_index(struct drm_i915_gem_object *obj,
> return;
>
> obj->pat_index = pat_index;
> + obj->cache_mode = INTEL_INFO(i915)->cache_modes[pat_index];
>
> - if (pat_index != i915_gem_get_pat_index(i915, I915_CACHE_NONE))
> - obj->cache_coherent = (I915_BO_CACHE_COHERENT_FOR_READ |
> - I915_BO_CACHE_COHERENT_FOR_WRITE);
> - else if (HAS_LLC(i915))
> - obj->cache_coherent = I915_BO_CACHE_COHERENT_FOR_READ;
> - else
> - obj->cache_coherent = 0;
> -
> - obj->cache_dirty =
> - !(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE) &&
> - !IS_DGFX(i915);
> + __i915_gem_object_update_coherency(obj);
> }
>
> bool i915_gem_object_can_bypass_llc(struct drm_i915_gem_object *obj)
> @@ -215,6 +192,7 @@ bool i915_gem_object_can_bypass_llc(struct drm_i915_gem_object *obj)
> /*
> * Always flush cache for UMD objects at creation time.
> */
> + /* QQQ/FIXME why? avoidable performance penalty? */
> if (obj->pat_set_by_user)
> return true;
>
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h b/drivers/gpu/drm/i915/gem/i915_gem_object.h
> index 884a17275b3a..f84f41e9f81f 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_object.h
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h
> @@ -13,6 +13,7 @@
>
> #include "display/intel_frontbuffer.h"
> #include "intel_memory_region.h"
> +#include "i915_cache.h"
> #include "i915_gem_object_types.h"
> #include "i915_gem_gtt.h"
> #include "i915_gem_ww.h"
> @@ -32,10 +33,18 @@ static inline bool i915_gem_object_size_2big(u64 size)
> return false;
> }
>
> -unsigned int i915_gem_get_pat_index(struct drm_i915_private *i915,
> - enum i915_cache_level level);
> -bool i915_gem_object_has_cache_level(const struct drm_i915_gem_object *obj,
> - enum i915_cache_level lvl);
> +static inline int
> +i915_gem_object_has_cache_mode(const struct drm_i915_gem_object *obj,
> + unsigned int mode)
> +{
> + if (I915_CACHE_MODE(obj->cache_mode) == mode)
> + return 1;
> + else if (obj->pat_set_by_user)
> + return -1; /* Unknown, callers should assume no. */
> + else
> + return 0;
> +}
> +
> void i915_gem_init__objects(struct drm_i915_private *i915);
>
> void i915_objects_module_exit(void);
> @@ -764,6 +773,7 @@ int i915_gem_object_wait_moving_fence(struct drm_i915_gem_object *obj,
> bool intr);
> bool i915_gem_object_has_unknown_state(struct drm_i915_gem_object *obj);
>
> +void __i915_gem_object_update_coherency(struct drm_i915_gem_object *obj);
> void i915_gem_object_set_cache_coherency(struct drm_i915_gem_object *obj,
> unsigned int cache_level);
> void i915_gem_object_set_pat_index(struct drm_i915_gem_object *obj,
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
> index 8de2b91b3edf..1f9fa28d07df 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
> @@ -14,6 +14,7 @@
> #include <uapi/drm/i915_drm.h>
>
> #include "i915_active.h"
> +#include "i915_cache.h"
> #include "i915_selftest.h"
> #include "i915_vma_resource.h"
>
> @@ -116,93 +117,6 @@ struct drm_i915_gem_object_ops {
> const char *name; /* friendly name for debug, e.g. lockdep classes */
> };
>
> -/**
> - * enum i915_cache_level - The supported GTT caching values for system memory
> - * pages.
> - *
> - * These translate to some special GTT PTE bits when binding pages into some
> - * address space. It also determines whether an object, or rather its pages are
> - * coherent with the GPU, when also reading or writing through the CPU cache
> - * with those pages.
> - *
> - * Userspace can also control this through struct drm_i915_gem_caching.
> - */
> -enum i915_cache_level {
> - /**
> - * @I915_CACHE_NONE:
> - *
> - * GPU access is not coherent with the CPU cache. If the cache is dirty
> - * and we need the underlying pages to be coherent with some later GPU
> - * access then we need to manually flush the pages.
> - *
> - * On shared LLC platforms reads and writes through the CPU cache are
> - * still coherent even with this setting. See also
> - * &drm_i915_gem_object.cache_coherent for more details. Due to this we
> - * should only ever use uncached for scanout surfaces, otherwise we end
> - * up over-flushing in some places.
> - *
> - * This is the default on non-LLC platforms.
> - */
> - I915_CACHE_NONE = 0,
> - /**
> - * @I915_CACHE_LLC:
> - *
> - * GPU access is coherent with the CPU cache. If the cache is dirty,
> - * then the GPU will ensure that access remains coherent, when both
> - * reading and writing through the CPU cache. GPU writes can dirty the
> - * CPU cache.
> - *
> - * Not used for scanout surfaces.
> - *
> - * Applies to both platforms with shared LLC(HAS_LLC), and snooping
> - * based platforms(HAS_SNOOP).
> - *
> - * This is the default on shared LLC platforms. The only exception is
> - * scanout objects, where the display engine is not coherent with the
> - * CPU cache. For such objects I915_CACHE_NONE or I915_CACHE_WT is
> - * automatically applied by the kernel in pin_for_display, if userspace
> - * has not done so already.
> - */
> - I915_CACHE_LLC,
> - /**
> - * @I915_CACHE_L3_LLC:
> - *
> - * Explicitly enable the Gfx L3 cache, with coherent LLC.
> - *
> - * The Gfx L3 sits between the domain specific caches, e.g
> - * sampler/render caches, and the larger LLC. LLC is coherent with the
> - * GPU, but L3 is only visible to the GPU, so likely needs to be flushed
> - * when the workload completes.
> - *
> - * Not used for scanout surfaces.
> - *
> - * Only exposed on some gen7 + GGTT. More recent hardware has dropped
> - * this explicit setting, where it should now be enabled by default.
> - */
> - I915_CACHE_L3_LLC,
> - /**
> - * @I915_CACHE_WT:
> - *
> - * Write-through. Used for scanout surfaces.
> - *
> - * The GPU can utilise the caches, while still having the display engine
> - * be coherent with GPU writes, as a result we don't need to flush the
> - * CPU caches when moving out of the render domain. This is the default
> - * setting chosen by the kernel, if supported by the HW, otherwise we
> - * fallback to I915_CACHE_NONE. On the CPU side writes through the CPU
> - * cache still need to be flushed, to remain coherent with the display
> - * engine.
> - */
> - I915_CACHE_WT,
> - /**
> - * @I915_MAX_CACHE_LEVEL:
> - *
> - * Mark the last entry in the enum. Used for defining cachelevel_to_pat
> - * array for cache_level to pat translation table.
> - */
> - I915_MAX_CACHE_LEVEL,
> -};
> -
> enum i915_map_type {
> I915_MAP_WB = 0,
> I915_MAP_WC,
> @@ -375,6 +289,9 @@ struct drm_i915_gem_object {
> unsigned int mem_flags;
> #define I915_BO_FLAG_STRUCT_PAGE BIT(0) /* Object backed by struct pages */
> #define I915_BO_FLAG_IOMEM BIT(1) /* Object backed by IO memory */
> +
> + i915_cache_t cache_mode;
> +
> /**
> * @pat_index: The desired PAT index.
> *
> @@ -409,9 +326,7 @@ struct drm_i915_gem_object {
> * Check for @pat_set_by_user to find out if an object has pat index set
> * by userspace. The ioctl's to change cache settings have also been
> * disabled for the objects with pat index set by userspace. Please don't
> - * assume @cache_coherent having the flags set as describe here. A helper
> - * function i915_gem_object_has_cache_level() provides one way to bypass
> - * the use of this field.
> + * assume @cache_coherent having the flags set as describe here.
> *
> * Track whether the pages are coherent with the GPU if reading or
> * writing through the CPU caches. The largely depends on the
> @@ -492,9 +407,7 @@ struct drm_i915_gem_object {
> * Check for @pat_set_by_user to find out if an object has pat index set
> * by userspace. The ioctl's to change cache settings have also been
> * disabled for the objects with pat_index set by userspace. Please don't
> - * assume @cache_dirty is set as describe here. Also see helper function
> - * i915_gem_object_has_cache_level() for possible ways to bypass the use
> - * of this field.
> + * assume @cache_dirty is set as describe here.
> *
> * Track if we are we dirty with writes through the CPU cache for this
> * object. As a result reading directly from main memory might yield
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c
> index 3b094d36a0b0..a7012f1a9c70 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c
> @@ -563,11 +563,8 @@ static void dbg_poison(struct i915_ggtt *ggtt,
> while (size) {
> void __iomem *s;
>
> - ggtt->vm.insert_page(&ggtt->vm, addr,
> - ggtt->error_capture.start,
> - i915_gem_get_pat_index(ggtt->vm.i915,
> - I915_CACHE_NONE),
> - 0);
> + ggtt->vm.insert_page(&ggtt->vm, addr, ggtt->error_capture.start,
> + ggtt->vm.i915->pat_uc, 0);
> mb();
>
> s = io_mapping_map_wc(&ggtt->iomap,
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c
> index 7078af2f8f79..e794bd2a7ccb 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c
> @@ -58,6 +58,16 @@ i915_ttm_cache_level(struct drm_i915_private *i915, struct ttm_resource *res,
> I915_CACHE_NONE;
> }
>
> +static unsigned int
> +i915_ttm_cache_pat(struct drm_i915_private *i915, struct ttm_resource *res,
> + struct ttm_tt *ttm)
> +{
> + return ((HAS_LLC(i915) || HAS_SNOOP(i915)) &&
> + !i915_ttm_gtt_binds_lmem(res) &&
> + ttm->caching == ttm_cached) ? i915->pat_wb :
> + i915->pat_uc;
> +}
> +
> static struct intel_memory_region *
> i915_ttm_region(struct ttm_device *bdev, int ttm_mem_type)
> {
> @@ -196,7 +206,7 @@ static struct dma_fence *i915_ttm_accel_move(struct ttm_buffer_object *bo,
> struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo);
> struct i915_request *rq;
> struct ttm_tt *src_ttm = bo->ttm;
> - enum i915_cache_level src_level, dst_level;
> + unsigned int src_pat, dst_pat;
> int ret;
>
> if (!to_gt(i915)->migrate.context || intel_gt_is_wedged(to_gt(i915)))
> @@ -206,16 +216,15 @@ static struct dma_fence *i915_ttm_accel_move(struct ttm_buffer_object *bo,
> if (I915_SELFTEST_ONLY(fail_gpu_migration))
> clear = true;
>
> - dst_level = i915_ttm_cache_level(i915, dst_mem, dst_ttm);
> + dst_pat = i915_ttm_cache_pat(i915, dst_mem, dst_ttm);
> if (clear) {
> if (bo->type == ttm_bo_type_kernel &&
> !I915_SELFTEST_ONLY(fail_gpu_migration))
> return ERR_PTR(-EINVAL);
>
> intel_engine_pm_get(to_gt(i915)->migrate.context->engine);
> - ret = intel_context_migrate_clear(to_gt(i915)->migrate.context, deps,
> - dst_st->sgl,
> - i915_gem_get_pat_index(i915, dst_level),
> + ret = intel_context_migrate_clear(to_gt(i915)->migrate.context,
> + deps, dst_st->sgl, dst_pat,
> i915_ttm_gtt_binds_lmem(dst_mem),
> 0, &rq);
> } else {
> @@ -225,14 +234,13 @@ static struct dma_fence *i915_ttm_accel_move(struct ttm_buffer_object *bo,
> if (IS_ERR(src_rsgt))
> return ERR_CAST(src_rsgt);
>
> - src_level = i915_ttm_cache_level(i915, bo->resource, src_ttm);
> + src_pat = i915_ttm_cache_pat(i915, bo->resource, src_ttm);
> intel_engine_pm_get(to_gt(i915)->migrate.context->engine);
> ret = intel_context_migrate_copy(to_gt(i915)->migrate.context,
> deps, src_rsgt->table.sgl,
> - i915_gem_get_pat_index(i915, src_level),
> + src_pat,
> i915_ttm_gtt_binds_lmem(bo->resource),
> - dst_st->sgl,
> - i915_gem_get_pat_index(i915, dst_level),
> + dst_st->sgl, dst_pat,
> i915_ttm_gtt_binds_lmem(dst_mem),
> &rq);
>
> diff --git a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c
> index df6c9a84252c..c8925918784e 100644
> --- a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c
> +++ b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c
> @@ -354,7 +354,7 @@ fake_huge_pages_object(struct drm_i915_private *i915, u64 size, bool single)
>
> obj->write_domain = I915_GEM_DOMAIN_CPU;
> obj->read_domains = I915_GEM_DOMAIN_CPU;
> - obj->pat_index = i915_gem_get_pat_index(i915, I915_CACHE_NONE);
> + i915_gem_object_set_cache_coherency(obj, I915_CACHE_NONE);
>
> return obj;
> }
> diff --git a/drivers/gpu/drm/i915/gt/gen6_ppgtt.c b/drivers/gpu/drm/i915/gt/gen6_ppgtt.c
> index c2bdc133c89a..fb69f667652a 100644
> --- a/drivers/gpu/drm/i915/gt/gen6_ppgtt.c
> +++ b/drivers/gpu/drm/i915/gt/gen6_ppgtt.c
> @@ -226,9 +226,7 @@ static int gen6_ppgtt_init_scratch(struct gen6_ppgtt *ppgtt)
> return ret;
>
> vm->scratch[0]->encode =
> - vm->pte_encode(px_dma(vm->scratch[0]),
> - i915_gem_get_pat_index(vm->i915,
> - I915_CACHE_NONE),
> + vm->pte_encode(px_dma(vm->scratch[0]), vm->i915->pat_uc,
> PTE_READ_ONLY);
>
> vm->scratch[1] = vm->alloc_pt_dma(vm, I915_GTT_PAGE_SIZE_4K);
> diff --git a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
> index f948d33e5ec5..a6692ea1a91e 100644
> --- a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
> +++ b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
> @@ -40,16 +40,11 @@ static u64 gen8_pte_encode(dma_addr_t addr,
> if (flags & PTE_LM)
> pte |= GEN12_PPGTT_PTE_LM;
>
> - /*
> - * For pre-gen12 platforms pat_index is the same as enum
> - * i915_cache_level, so the switch-case here is still valid.
> - * See translation table defined by LEGACY_CACHELEVEL.
> - */
> switch (pat_index) {
> - case I915_CACHE_NONE:
> + case 0:
> pte |= PPAT_UNCACHED;
> break;
> - case I915_CACHE_WT:
> + case 3:
> pte |= PPAT_DISPLAY_ELLC;
> break;
> default:
> @@ -853,9 +848,7 @@ static int gen8_init_scratch(struct i915_address_space *vm)
> pte_flags |= PTE_LM;
>
> vm->scratch[0]->encode =
> - vm->pte_encode(px_dma(vm->scratch[0]),
> - i915_gem_get_pat_index(vm->i915,
> - I915_CACHE_NONE),
> + vm->pte_encode(px_dma(vm->scratch[0]), vm->i915->pat_uc,
> pte_flags);
>
> for (i = 1; i <= vm->top; i++) {
> diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c b/drivers/gpu/drm/i915/gt/intel_ggtt.c
> index dd0ed941441a..c97379cf8241 100644
> --- a/drivers/gpu/drm/i915/gt/intel_ggtt.c
> +++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c
> @@ -921,9 +921,7 @@ static int ggtt_probe_common(struct i915_ggtt *ggtt, u64 size)
> pte_flags |= PTE_LM;
>
> ggtt->vm.scratch[0]->encode =
> - ggtt->vm.pte_encode(px_dma(ggtt->vm.scratch[0]),
> - i915_gem_get_pat_index(i915,
> - I915_CACHE_NONE),
> + ggtt->vm.pte_encode(px_dma(ggtt->vm.scratch[0]), i915->pat_uc,
> pte_flags);
>
> return 0;
> @@ -1297,10 +1295,7 @@ bool i915_ggtt_resume_vm(struct i915_address_space *vm)
> * ptes to be repopulated.
> */
> vma->resource->bound_flags = 0;
> - vma->ops->bind_vma(vm, NULL, vma->resource,
> - obj ? obj->pat_index :
> - i915_gem_get_pat_index(vm->i915,
> - I915_CACHE_NONE),
> + vma->ops->bind_vma(vm, NULL, vma->resource, obj->cache_mode,
> was_bound);
>
> if (obj) { /* only used during resume => exclusive access */
> diff --git a/drivers/gpu/drm/i915/gt/intel_migrate.c b/drivers/gpu/drm/i915/gt/intel_migrate.c
> index 6023288b0e2d..81f7834cc2db 100644
> --- a/drivers/gpu/drm/i915/gt/intel_migrate.c
> +++ b/drivers/gpu/drm/i915/gt/intel_migrate.c
> @@ -45,9 +45,7 @@ static void xehpsdv_toggle_pdes(struct i915_address_space *vm,
> * Insert a dummy PTE into every PT that will map to LMEM to ensure
> * we have a correctly setup PDE structure for later use.
> */
> - vm->insert_page(vm, 0, d->offset,
> - i915_gem_get_pat_index(vm->i915, I915_CACHE_NONE),
> - PTE_LM);
> + vm->insert_page(vm, 0, d->offset, vm->i915->pat_uc, PTE_LM);
> GEM_BUG_ON(!pt->is_compact);
> d->offset += SZ_2M;
> }
> @@ -65,9 +63,7 @@ static void xehpsdv_insert_pte(struct i915_address_space *vm,
> * alignment is 64K underneath for the pt, and we are careful
> * not to access the space in the void.
> */
> - vm->insert_page(vm, px_dma(pt), d->offset,
> - i915_gem_get_pat_index(vm->i915, I915_CACHE_NONE),
> - PTE_LM);
> + vm->insert_page(vm, px_dma(pt), d->offset, vm->i915->pat_uc, PTE_LM);
> d->offset += SZ_64K;
> }
>
> @@ -77,8 +73,7 @@ static void insert_pte(struct i915_address_space *vm,
> {
> struct insert_pte_data *d = data;
>
> - vm->insert_page(vm, px_dma(pt), d->offset,
> - i915_gem_get_pat_index(vm->i915, I915_CACHE_NONE),
> + vm->insert_page(vm, px_dma(pt), d->offset, vm->i915->pat_uc,
> i915_gem_object_is_lmem(pt->base) ? PTE_LM : 0);
> d->offset += PAGE_SIZE;
> }
> diff --git a/drivers/gpu/drm/i915/gt/selftest_migrate.c b/drivers/gpu/drm/i915/gt/selftest_migrate.c
> index 3def5ca72dec..a67ede65d816 100644
> --- a/drivers/gpu/drm/i915/gt/selftest_migrate.c
> +++ b/drivers/gpu/drm/i915/gt/selftest_migrate.c
> @@ -904,8 +904,7 @@ static int perf_clear_blt(void *arg)
>
> err = __perf_clear_blt(gt->migrate.context,
> dst->mm.pages->sgl,
> - i915_gem_get_pat_index(gt->i915,
> - I915_CACHE_NONE),
> + gt->i915->pat_uc,
> i915_gem_object_is_lmem(dst),
> sizes[i]);
>
> @@ -995,12 +994,10 @@ static int perf_copy_blt(void *arg)
>
> err = __perf_copy_blt(gt->migrate.context,
> src->mm.pages->sgl,
> - i915_gem_get_pat_index(gt->i915,
> - I915_CACHE_NONE),
> + gt->i915->pat_uc,
> i915_gem_object_is_lmem(src),
> dst->mm.pages->sgl,
> - i915_gem_get_pat_index(gt->i915,
> - I915_CACHE_NONE),
> + gt->i915->pat_uc,
> i915_gem_object_is_lmem(dst),
> sz);
>
> diff --git a/drivers/gpu/drm/i915/gt/selftest_reset.c b/drivers/gpu/drm/i915/gt/selftest_reset.c
> index 79aa6ac66ad2..327dc9294e0f 100644
> --- a/drivers/gpu/drm/i915/gt/selftest_reset.c
> +++ b/drivers/gpu/drm/i915/gt/selftest_reset.c
> @@ -84,11 +84,8 @@ __igt_reset_stolen(struct intel_gt *gt,
> void __iomem *s;
> void *in;
>
> - ggtt->vm.insert_page(&ggtt->vm, dma,
> - ggtt->error_capture.start,
> - i915_gem_get_pat_index(gt->i915,
> - I915_CACHE_NONE),
> - 0);
> + ggtt->vm.insert_page(&ggtt->vm, dma, ggtt->error_capture.start,
> + gt->i915->pat_uc, 0);
> mb();
>
> s = io_mapping_map_wc(&ggtt->iomap,
> @@ -127,11 +124,8 @@ __igt_reset_stolen(struct intel_gt *gt,
> void *in;
> u32 x;
>
> - ggtt->vm.insert_page(&ggtt->vm, dma,
> - ggtt->error_capture.start,
> - i915_gem_get_pat_index(gt->i915,
> - I915_CACHE_NONE),
> - 0);
> + ggtt->vm.insert_page(&ggtt->vm, dma, ggtt->error_capture.start,
> + gt->i915->pat_uc, 0);
> mb();
>
> s = io_mapping_map_wc(&ggtt->iomap,
> diff --git a/drivers/gpu/drm/i915/gt/selftest_timeline.c b/drivers/gpu/drm/i915/gt/selftest_timeline.c
> index 39c3ec12df1a..db64dc7d3fce 100644
> --- a/drivers/gpu/drm/i915/gt/selftest_timeline.c
> +++ b/drivers/gpu/drm/i915/gt/selftest_timeline.c
> @@ -836,7 +836,10 @@ static int setup_watcher(struct hwsp_watcher *w, struct intel_gt *gt,
> return PTR_ERR(obj);
>
> /* keep the same cache settings as timeline */
> - i915_gem_object_set_pat_index(obj, tl->hwsp_ggtt->obj->pat_index);
> + obj->pat_index = tl->hwsp_ggtt->obj->pat_index;
> + obj->cache_mode = tl->hwsp_ggtt->obj->cache_mode;
> + __i915_gem_object_update_coherency(obj);
> +
> w->map = i915_gem_object_pin_map_unlocked(obj,
> page_unmask_bits(tl->hwsp_ggtt->obj->mm.mapping));
> if (IS_ERR(w->map)) {
> diff --git a/drivers/gpu/drm/i915/gt/selftest_tlb.c b/drivers/gpu/drm/i915/gt/selftest_tlb.c
> index 3bd6b540257b..6049f01be219 100644
> --- a/drivers/gpu/drm/i915/gt/selftest_tlb.c
> +++ b/drivers/gpu/drm/i915/gt/selftest_tlb.c
> @@ -36,8 +36,6 @@ pte_tlbinv(struct intel_context *ce,
> u64 length,
> struct rnd_state *prng)
> {
> - const unsigned int pat_index =
> - i915_gem_get_pat_index(ce->vm->i915, I915_CACHE_NONE);
> struct drm_i915_gem_object *batch;
> struct drm_mm_node vb_node;
> struct i915_request *rq;
> @@ -157,7 +155,8 @@ pte_tlbinv(struct intel_context *ce,
> /* Flip the PTE between A and B */
> if (i915_gem_object_is_lmem(vb->obj))
> pte_flags |= PTE_LM;
> - ce->vm->insert_entries(ce->vm, &vb_res, pat_index, pte_flags);
> + ce->vm->insert_entries(ce->vm, &vb_res, ce->vm->i915->pat_uc,
> + pte_flags);
>
> /* Flush the PTE update to concurrent HW */
> tlbinv(ce->vm, addr & -length, length);
> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c
> index d408856ae4c0..e099414d624d 100644
> --- a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c
> +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c
> @@ -991,14 +991,10 @@ static void uc_fw_bind_ggtt(struct intel_uc_fw *uc_fw)
>
> if (ggtt->vm.raw_insert_entries)
> ggtt->vm.raw_insert_entries(&ggtt->vm, vma_res,
> - i915_gem_get_pat_index(ggtt->vm.i915,
> - I915_CACHE_NONE),
> - pte_flags);
> + ggtt->vm.i915->pat_uc, pte_flags);
> else
> ggtt->vm.insert_entries(&ggtt->vm, vma_res,
> - i915_gem_get_pat_index(ggtt->vm.i915,
> - I915_CACHE_NONE),
> - pte_flags);
> + ggtt->vm.i915->pat_uc, pte_flags);
> }
>
> static void uc_fw_unbind_ggtt(struct intel_uc_fw *uc_fw)
> diff --git a/drivers/gpu/drm/i915/i915_cache.c b/drivers/gpu/drm/i915/i915_cache.c
> new file mode 100644
> index 000000000000..7a8002ebd2ec
> --- /dev/null
> +++ b/drivers/gpu/drm/i915/i915_cache.c
> @@ -0,0 +1,59 @@
> +/*
> + * SPDX-License-Identifier: MIT
> + *
> + * Copyright © 2023 Intel Corporation
> + */
> +
> +#include "i915_cache.h"
> +#include "i915_drv.h"
> +
> +static int find_pat(const struct intel_device_info *info, i915_cache_t mode)
> +{
> + int i;
> +
> + for (i = 0; i < ARRAY_SIZE(info->cache_modes); i++) {
> + if (info->cache_modes[i] == mode)
> + return i;
> + }
> +
> + return -1;
> +}
> +
> +void i915_cache_init(struct drm_i915_private *i915)
> +{
> + int ret;
> +
> + ret = find_pat(INTEL_INFO(i915), I915_CACHE(UC));
> + WARN_ON(ret < 0);
> + drm_info(&i915->drm, "Using PAT index %u for uncached access\n", ret);
> + i915->pat_uc = ret;
> +
> + ret = find_pat(INTEL_INFO(i915), I915_CACHE(WB));
> + WARN_ON(ret < 0);
> + drm_info(&i915->drm, "Using PAT index %u for write-back access\n", ret);
> + i915->pat_wb = ret;
> +}
I don't think we need the above two functions. Why don't we just hard code
pat_uc and pat_wb in device_info? plus pat_wt too? These are predetermined,
and used by KMD only.
> +int i915_cache_level_to_pat_and_mode(struct drm_i915_private *i915,
> + unsigned int cache_level,
> + i915_cache_t *mode)
> +{
> + const struct intel_device_info *info = INTEL_INFO(i915);
> + i915_cache_t level_to_mode[] = {
> + [I915_CACHE_NONE] = I915_CACHE(UC),
> + [I915_CACHE_WT] = I915_CACHE(WT),
> + [I915_CACHE_LLC] = I915_CACHE(WB),
> + };
> + int ret;
> +
> + if (GRAPHICS_VER(i915) >= 12)
> + level_to_mode[I915_CACHE_L3_LLC] = I915_CACHE(WB);
> + else
> + level_to_mode[I915_CACHE_L3_LLC] = _I915_CACHE(WB, LLC);
> +
> + ret = find_pat(info, level_to_mode[cache_level]);
> + if (ret >= 0 && mode)
> + *mode = info->cache_modes[ret];
> +
> + return ret;
> +}
> diff --git a/drivers/gpu/drm/i915/i915_cache.h b/drivers/gpu/drm/i915/i915_cache.h
> new file mode 100644
> index 000000000000..0df03f1f01ef
> --- /dev/null
> +++ b/drivers/gpu/drm/i915/i915_cache.h
> @@ -0,0 +1,129 @@
> +/* SPDX-License-Identifier: MIT */
> +/*
> + * Copyright © 2023 Intel Corporation
> + */
> +
> +#ifndef __I915_CACHE_H__
> +#define __I915_CACHE_H__
> +
> +#include <linux/types.h>
> +
> +struct drm_i915_private;
> +
> +typedef u16 i915_cache_t;
> +
> +#define I915_CACHE(mode) \
> + (i915_cache_t)(I915_CACHE_MODE_##mode)
> +
> +#define _I915_CACHE(mode, flag) \
> + (i915_cache_t)((I915_CACHE_MODE_##mode) | ( BIT(8 + I915_CACHE_##flag)))
> +
> +#define I915_CACHE_MODE(cache) \
> + (unsigned int)(((i915_cache_t)(cache)) & 0xff)
> +#define I915_CACHE_FLAGS(cache) \
> + (unsigned int)((((i915_cache_t)(cache) & 0xff00)) >> 16)
> +
> +/* Cache mode values */
> +#define I915_CACHE_MODE_UNKNOWN (0)
> +#define I915_CACHE_MODE_UC (1)
> +#define I915_CACHE_MODE_WB (2)
> +#define I915_CACHE_MODE_WT (3)
> +#define I915_CACHE_MODE_WC (4)
Why do you need these CACHE_MODE's? Aren't they the same as i915_cache_level, which
need some sort of translation?
> +
> +/* Mode flag bits */
> +#define I915_CACHE_L3 (0)
> +#define I915_CACHE_COH1W (1)
> +#define I915_CACHE_COH2W (2)
> +#define I915_CACHE_CLOS1 (3)
> +#define I915_CACHE_CLOS2 (4)
These had been defined in drivers/gpu/drm/i915/gt/intel_gtt.h already, why add new ones?
The CLOS ones are not needed in upstream unless we want to support PVC here.
> +
> +void i915_cache_init(struct drm_i915_private *i915);
> +int i915_cache_level_to_pat_and_mode(struct drm_i915_private *i915,
> + unsigned int cache_level,
> + i915_cache_t *mode);
> +
> +/*
> + * Legacy/kernel internal interface below:
> + */
> +
> +/**
> + * enum i915_cache_level - The supported GTT caching values for system memory
> + * pages.
> + *
> + * These translate to some special GTT PTE bits when binding pages into some
> + * address space. It also determines whether an object, or rather its pages are
> + * coherent with the GPU, when also reading or writing through the CPU cache
> + * with those pages.
> + *
> + * Userspace can also control this through struct drm_i915_gem_caching.
> + */
> +enum i915_cache_level {
Shouldn't we completely get rid of this enum now? It should be replaced by
INTEL_INFO(i915)->pat_uc/wb/wt.
> + /**
> + * @I915_CACHE_NONE:
> + *
> + * GPU access is not coherent with the CPU cache. If the cache is dirty
> + * and we need the underlying pages to be coherent with some later GPU
> + * access then we need to manually flush the pages.
> + *
> + * On shared LLC platforms reads and writes through the CPU cache are
> + * still coherent even with this setting. See also
> + * &drm_i915_gem_object.cache_coherent for more details. Due to this we
> + * should only ever use uncached for scanout surfaces, otherwise we end
> + * up over-flushing in some places.
> + *
> + * This is the default on non-LLC platforms.
> + */
> + I915_CACHE_NONE = 0,
> + /**
> + * @I915_CACHE_LLC:
> + *
> + * GPU access is coherent with the CPU cache. If the cache is dirty,
> + * then the GPU will ensure that access remains coherent, when both
> + * reading and writing through the CPU cache. GPU writes can dirty the
> + * CPU cache.
> + *
> + * Not used for scanout surfaces.
> + *
> + * Applies to both platforms with shared LLC(HAS_LLC), and snooping
> + * based platforms(HAS_SNOOP).
> + *
> + * This is the default on shared LLC platforms. The only exception is
> + * scanout objects, where the display engine is not coherent with the
> + * CPU cache. For such objects I915_CACHE_NONE or I915_CACHE_WT is
> + * automatically applied by the kernel in pin_for_display, if userspace
> + * has not done so already.
> + */
> + I915_CACHE_LLC,
> + /**
> + * @I915_CACHE_L3_LLC:
> + *
> + * Explicitly enable the Gfx L3 cache, with coherent LLC.
> + *
> + * The Gfx L3 sits between the domain specific caches, e.g
> + * sampler/render caches, and the larger LLC. LLC is coherent with the
> + * GPU, but L3 is only visible to the GPU, so likely needs to be flushed
> + * when the workload completes.
> + *
> + * Not used for scanout surfaces.
> + *
> + * Only exposed on some gen7 + GGTT. More recent hardware has dropped
> + * this explicit setting, where it should now be enabled by default.
> + */
> + I915_CACHE_L3_LLC,
> + /**
> + * @I915_CACHE_WT:
> + *
> + * Write-through. Used for scanout surfaces.
> + *
> + * The GPU can utilise the caches, while still having the display engine
> + * be coherent with GPU writes, as a result we don't need to flush the
> + * CPU caches when moving out of the render domain. This is the default
> + * setting chosen by the kernel, if supported by the HW, otherwise we
> + * fallback to I915_CACHE_NONE. On the CPU side writes through the CPU
> + * cache still need to be flushed, to remain coherent with the display
> + * engine.
> + */
> + I915_CACHE_WT,
> +};
> +
> +#endif /* __I915_CACHE_H__ */
> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
> index 76ccd4e03e31..e2da57397770 100644
> --- a/drivers/gpu/drm/i915/i915_debugfs.c
> +++ b/drivers/gpu/drm/i915/i915_debugfs.c
> @@ -139,48 +139,37 @@ static const char *stringify_vma_type(const struct i915_vma *vma)
> return "ppgtt";
> }
>
> -static const char *i915_cache_level_str(struct drm_i915_gem_object *obj)
> +static void obj_cache_str(struct seq_file *m, struct drm_i915_gem_object *obj)
> {
> - struct drm_i915_private *i915 = obj_to_i915(obj);
> + const i915_cache_t cache = obj->cache_mode;
> + const unsigned int mode = I915_CACHE_MODE(cache);
> + const unsigned long flags = I915_CACHE_FLAGS(cache);
> + static const char *mode_str[] = {
> + [I915_CACHE_MODE_UC] = "UC",
> + [I915_CACHE_MODE_WB] = "WB",
> + [I915_CACHE_MODE_WT] = "WT",
> + [I915_CACHE_MODE_WC] = "WC",
> + };
> + static const char *flag_str[] = {
> + [I915_CACHE_L3] = "L3",
> + [I915_CACHE_COH1W] = "1-Way-Coherent",
> + [I915_CACHE_COH2W] = "2-Way-Coherent",
> + [I915_CACHE_CLOS1] = "CLOS1",
> + [I915_CACHE_CLOS2] = "CLOS2",
> + };
>
> - if (IS_METEORLAKE(i915)) {
> - switch (obj->pat_index) {
> - case 0: return " WB";
> - case 1: return " WT";
> - case 2: return " UC";
> - case 3: return " WB (1-Way Coh)";
> - case 4: return " WB (2-Way Coh)";
> - default: return " not defined";
> - }
> - } else if (IS_PONTEVECCHIO(i915)) {
> - switch (obj->pat_index) {
> - case 0: return " UC";
> - case 1: return " WC";
> - case 2: return " WT";
> - case 3: return " WB";
> - case 4: return " WT (CLOS1)";
> - case 5: return " WB (CLOS1)";
> - case 6: return " WT (CLOS2)";
> - case 7: return " WT (CLOS2)";
> - default: return " not defined";
> - }
> - } else if (GRAPHICS_VER(i915) >= 12) {
> - switch (obj->pat_index) {
> - case 0: return " WB";
> - case 1: return " WC";
> - case 2: return " WT";
> - case 3: return " UC";
> - default: return " not defined";
> - }
> + if (mode == I915_CACHE_MODE_UNKNOWN || mode > ARRAY_SIZE(mode_str)) {
> + if (obj->pat_set_by_user)
> + seq_printf(m, " PAT-%u", obj->pat_index);
> + else
> + seq_printf(m, " PAT-%u-???", obj->pat_index);
> } else {
> - switch (obj->pat_index) {
> - case 0: return " UC";
> - case 1: return HAS_LLC(i915) ?
> - " LLC" : " snooped";
> - case 2: return " L3+LLC";
> - case 3: return " WT";
> - default: return " not defined";
> - }
> + unsigned long bit;
> +
> + seq_printf(m, " %s", mode_str[mode]);
> +
> + for_each_set_bit(bit, &flags, sizeof(i915_cache_t))
> + seq_printf(m, "-%s", flag_str[bit]);
> }
> }
>
> @@ -190,17 +179,23 @@ i915_debugfs_describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj)
> struct i915_vma *vma;
> int pin_count = 0;
>
> - seq_printf(m, "%pK: %c%c%c %8zdKiB %02x %02x %s%s%s",
> + seq_printf(m, "%pK: %c%c%c %8zdKiB %02x %02x",
> &obj->base,
> get_tiling_flag(obj),
> get_global_flag(obj),
> get_pin_mapped_flag(obj),
> obj->base.size / 1024,
> obj->read_domains,
> - obj->write_domain,
> - i915_cache_level_str(obj),
> - obj->mm.dirty ? " dirty" : "",
> - obj->mm.madv == I915_MADV_DONTNEED ? " purgeable" : "");
> + obj->write_domain);
> +
> + obj_cache_str(m, obj);
> +
> + if (obj->mm.dirty)
> + seq_puts(m, " dirty");
> +
> + if (obj->mm.madv == I915_MADV_DONTNEED)
> + seq_puts(m, " purgeable");
> +
> if (obj->base.name)
> seq_printf(m, " (name: %d)", obj->base.name);
>
> diff --git a/drivers/gpu/drm/i915/i915_driver.c b/drivers/gpu/drm/i915/i915_driver.c
> index 222d0a1f3b55..deab26752ba4 100644
> --- a/drivers/gpu/drm/i915/i915_driver.c
> +++ b/drivers/gpu/drm/i915/i915_driver.c
> @@ -80,6 +80,7 @@
> #include "soc/intel_dram.h"
> #include "soc/intel_gmch.h"
>
> +#include "i915_cache.h"
> #include "i915_debugfs.h"
> #include "i915_driver.h"
> #include "i915_drm_client.h"
> @@ -267,6 +268,8 @@ static int i915_driver_early_probe(struct drm_i915_private *dev_priv)
>
> intel_detect_preproduction_hw(dev_priv);
>
> + i915_cache_init(dev_priv);
> +
> return 0;
>
> err_rootgt:
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index b4cf6f0f636d..cb1c0c9d98ef 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -251,6 +251,9 @@ struct drm_i915_private {
> unsigned int hpll_freq;
> unsigned int czclk_freq;
>
> + unsigned int pat_uc;
> + unsigned int pat_wb;
How about making these part of INTEL_INFO(i915)? They are predetermined, no need to be
dynamic.
> +
> /**
> * wq - Driver workqueue for GEM.
> *
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 7ae42f746cc2..9aae75862e6f 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -422,9 +422,7 @@ i915_gem_gtt_pread(struct drm_i915_gem_object *obj,
> ggtt->vm.insert_page(&ggtt->vm,
> i915_gem_object_get_dma_address(obj,
> offset >> PAGE_SHIFT),
> - node.start,
> - i915_gem_get_pat_index(i915,
> - I915_CACHE_NONE), 0);
> + node.start, i915->pat_uc, 0);
> } else {
> page_base += offset & PAGE_MASK;
> }
> @@ -603,9 +601,7 @@ i915_gem_gtt_pwrite_fast(struct drm_i915_gem_object *obj,
> ggtt->vm.insert_page(&ggtt->vm,
> i915_gem_object_get_dma_address(obj,
> offset >> PAGE_SHIFT),
> - node.start,
> - i915_gem_get_pat_index(i915,
> - I915_CACHE_NONE), 0);
> + node.start, i915->pat_uc, 0);
> wmb(); /* flush modifications to the GGTT (insert_page) */
> } else {
> page_base += offset & PAGE_MASK;
> @@ -1148,19 +1144,6 @@ int i915_gem_init(struct drm_i915_private *dev_priv)
> unsigned int i;
> int ret;
>
> - /*
> - * In the proccess of replacing cache_level with pat_index a tricky
> - * dependency is created on the definition of the enum i915_cache_level.
> - * in case this enum is changed, PTE encode would be broken.
> - * Add a WARNING here. And remove when we completely quit using this
> - * enum
> - */
> - BUILD_BUG_ON(I915_CACHE_NONE != 0 ||
> - I915_CACHE_LLC != 1 ||
> - I915_CACHE_L3_LLC != 2 ||
> - I915_CACHE_WT != 3 ||
> - I915_MAX_CACHE_LEVEL != 4);
> -
> /* We need to fallback to 4K pages if host doesn't support huge gtt. */
> if (intel_vgpu_active(dev_priv) && !intel_vgpu_has_huge_gtt(dev_priv))
> RUNTIME_INFO(dev_priv)->page_sizes = I915_GTT_PAGE_SIZE_4K;
> diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
> index 4749f99e6320..fad336a45699 100644
> --- a/drivers/gpu/drm/i915/i915_gpu_error.c
> +++ b/drivers/gpu/drm/i915/i915_gpu_error.c
> @@ -1122,14 +1122,11 @@ i915_vma_coredump_create(const struct intel_gt *gt,
> mutex_lock(&ggtt->error_mutex);
> if (ggtt->vm.raw_insert_page)
> ggtt->vm.raw_insert_page(&ggtt->vm, dma, slot,
> - i915_gem_get_pat_index(gt->i915,
> - I915_CACHE_NONE),
> + ggtt->vm.i915->pat_uc,
> 0);
> else
> ggtt->vm.insert_page(&ggtt->vm, dma, slot,
> - i915_gem_get_pat_index(gt->i915,
> - I915_CACHE_NONE),
> - 0);
> + ggtt->vm.i915->pat_uc, 0);
> mb();
>
> s = io_mapping_map_wc(&ggtt->iomap, slot, PAGE_SIZE);
> diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c
> index 3d7a5db9833b..fbdce31afeb1 100644
> --- a/drivers/gpu/drm/i915/i915_pci.c
> +++ b/drivers/gpu/drm/i915/i915_pci.c
> @@ -32,6 +32,7 @@
> #include "gt/intel_sa_media.h"
> #include "gem/i915_gem_object_types.h"
>
> +#include "i915_cache.h"
> #include "i915_driver.h"
> #include "i915_drv.h"
> #include "i915_pci.h"
> @@ -46,36 +47,42 @@ __diag_ignore_all("-Woverride-init", "Allow overriding inherited members");
> .__runtime.graphics.ip.ver = (x), \
> .__runtime.media.ip.ver = (x)
>
> -#define LEGACY_CACHELEVEL \
> - .cachelevel_to_pat = { \
> - [I915_CACHE_NONE] = 0, \
> - [I915_CACHE_LLC] = 1, \
> - [I915_CACHE_L3_LLC] = 2, \
> - [I915_CACHE_WT] = 3, \
> +/* TODO/QQQ index 1 & 2 */
> +#define LEGACY_CACHE_MODES \
I was thinking to just put the PAT settings here, instead of cache_modes, simply
.pat = {\
GEN8_PPAT_WB, \
GEN8_PPAT_WC, \
GEN8_PPAT_WT, \
GEN8_PPAT_UC,
}
> + .cache_modes = { \
> + [0] = I915_CACHE(UC), \
> + [1] = I915_CACHE(WB), \
> + [2] = _I915_CACHE(WB, L3), \
> + [3] = I915_CACHE(WT), \
> }
>
> -#define TGL_CACHELEVEL \
> - .cachelevel_to_pat = { \
> - [I915_CACHE_NONE] = 3, \
> - [I915_CACHE_LLC] = 0, \
> - [I915_CACHE_L3_LLC] = 0, \
> - [I915_CACHE_WT] = 2, \
> +#define GEN12_CACHE_MODES \
> + .cache_modes = { \
> + [0] = I915_CACHE(WB), \
> + [1] = I915_CACHE(WC), \
> + [2] = I915_CACHE(WT), \
> + [3] = I915_CACHE(UC), \
> }
>
> -#define PVC_CACHELEVEL \
> - .cachelevel_to_pat = { \
> - [I915_CACHE_NONE] = 0, \
> - [I915_CACHE_LLC] = 3, \
> - [I915_CACHE_L3_LLC] = 3, \
> - [I915_CACHE_WT] = 2, \
> +#define PVC_CACHE_MODES \
> + .cache_modes = { \
> + [0] = I915_CACHE(UC), \
> + [1] = I915_CACHE(WC), \
> + [2] = I915_CACHE(WT), \
> + [3] = I915_CACHE(WB), \
> + [4] = _I915_CACHE(WT, CLOS1), \
> + [5] = _I915_CACHE(WB, CLOS1), \
> + [6] = _I915_CACHE(WT, CLOS2), \
> + [7] = _I915_CACHE(WB, CLOS2), \
> }
.pat = {\
GEN8_PPAT_UC, \
GEN8_PPAT_WC, \
GEN8_PPAT_WT, \
GEN8_PPAT_WB, \
GEN12_PPAT_CLOS(1) | GEN8_PPAT_WT, \
GEN12_PPAT_CLOS(1) | GEN8_PPAT_WB, \
GEN12_PPAT_CLOS(2) | GEN8_PPAT_WT, \
GEN12_PPAT_CLOS(2) | GEN8_PPAT_WB, \
}
>
> -#define MTL_CACHELEVEL \
> - .cachelevel_to_pat = { \
> - [I915_CACHE_NONE] = 2, \
> - [I915_CACHE_LLC] = 3, \
> - [I915_CACHE_L3_LLC] = 3, \
> - [I915_CACHE_WT] = 1, \
> +#define MTL_CACHE_MODES \
> + .cache_modes = { \
> + [0] = I915_CACHE(WB), \
> + [1] = I915_CACHE(WT), \
> + [2] = I915_CACHE(UC), \
> + [3] = _I915_CACHE(WB, COH1W), \
> + [4] = _I915_CACHE(WB, COH2W), \
> }
.pat = {\
MTL_PPAT_L4_0_WB, \
MTL_PPAT_L4_1_WT, \
MTL_PPAT_L4_3_UC, \
MTL_PPAT_L4_0_WB | MTL_2_COH_1W, \
MTL_PPAT_L4_0_WB | MTL_3_COH_2W, \
}
> /* Keep in gen based order, and chronological order within a gen */
> @@ -100,7 +107,7 @@ __diag_ignore_all("-Woverride-init", "Allow overriding inherited members");
> .max_pat_index = 3, \
> GEN_DEFAULT_PAGE_SIZES, \
> GEN_DEFAULT_REGIONS, \
> - LEGACY_CACHELEVEL
> + LEGACY_CACHE_MODES
>
> #define I845_FEATURES \
> GEN(2), \
> @@ -115,7 +122,7 @@ __diag_ignore_all("-Woverride-init", "Allow overriding inherited members");
> .max_pat_index = 3, \
> GEN_DEFAULT_PAGE_SIZES, \
> GEN_DEFAULT_REGIONS, \
> - LEGACY_CACHELEVEL
> + LEGACY_CACHE_MODES
>
> static const struct intel_device_info i830_info = {
> I830_FEATURES,
> @@ -148,7 +155,7 @@ static const struct intel_device_info i865g_info = {
> .max_pat_index = 3, \
> GEN_DEFAULT_PAGE_SIZES, \
> GEN_DEFAULT_REGIONS, \
> - LEGACY_CACHELEVEL
> + LEGACY_CACHE_MODES
>
> static const struct intel_device_info i915g_info = {
> GEN3_FEATURES,
> @@ -211,7 +218,7 @@ static const struct intel_device_info pnv_m_info = {
> .max_pat_index = 3, \
> GEN_DEFAULT_PAGE_SIZES, \
> GEN_DEFAULT_REGIONS, \
> - LEGACY_CACHELEVEL
> + LEGACY_CACHE_MODES
>
> static const struct intel_device_info i965g_info = {
> GEN4_FEATURES,
> @@ -255,7 +262,7 @@ static const struct intel_device_info gm45_info = {
> .max_pat_index = 3, \
> GEN_DEFAULT_PAGE_SIZES, \
> GEN_DEFAULT_REGIONS, \
> - LEGACY_CACHELEVEL
> + LEGACY_CACHE_MODES
>
> static const struct intel_device_info ilk_d_info = {
> GEN5_FEATURES,
> @@ -285,7 +292,7 @@ static const struct intel_device_info ilk_m_info = {
> .__runtime.ppgtt_size = 31, \
> GEN_DEFAULT_PAGE_SIZES, \
> GEN_DEFAULT_REGIONS, \
> - LEGACY_CACHELEVEL
> + LEGACY_CACHE_MODES
>
> #define SNB_D_PLATFORM \
> GEN6_FEATURES, \
> @@ -333,7 +340,7 @@ static const struct intel_device_info snb_m_gt2_info = {
> .__runtime.ppgtt_size = 31, \
> GEN_DEFAULT_PAGE_SIZES, \
> GEN_DEFAULT_REGIONS, \
> - LEGACY_CACHELEVEL
> + LEGACY_CACHE_MODES
>
> #define IVB_D_PLATFORM \
> GEN7_FEATURES, \
> @@ -390,7 +397,7 @@ static const struct intel_device_info vlv_info = {
> .__runtime.platform_engine_mask = BIT(RCS0) | BIT(VCS0) | BIT(BCS0),
> GEN_DEFAULT_PAGE_SIZES,
> GEN_DEFAULT_REGIONS,
> - LEGACY_CACHELEVEL,
> + LEGACY_CACHE_MODES
> };
>
> #define G75_FEATURES \
> @@ -476,7 +483,7 @@ static const struct intel_device_info chv_info = {
> .has_coherent_ggtt = false,
> GEN_DEFAULT_PAGE_SIZES,
> GEN_DEFAULT_REGIONS,
> - LEGACY_CACHELEVEL,
> + LEGACY_CACHE_MODES
> };
>
> #define GEN9_DEFAULT_PAGE_SIZES \
> @@ -539,7 +546,7 @@ static const struct intel_device_info skl_gt4_info = {
> .max_pat_index = 3, \
> GEN9_DEFAULT_PAGE_SIZES, \
> GEN_DEFAULT_REGIONS, \
> - LEGACY_CACHELEVEL
> + LEGACY_CACHE_MODES
>
> static const struct intel_device_info bxt_info = {
> GEN9_LP_FEATURES,
> @@ -643,7 +650,7 @@ static const struct intel_device_info jsl_info = {
> #define GEN12_FEATURES \
> GEN11_FEATURES, \
> GEN(12), \
> - TGL_CACHELEVEL, \
> + GEN12_CACHE_MODES, \
> .has_global_mocs = 1, \
> .has_pxp = 1, \
> .max_pat_index = 3
> @@ -711,7 +718,7 @@ static const struct intel_device_info adl_p_info = {
> .__runtime.graphics.ip.ver = 12, \
> .__runtime.graphics.ip.rel = 50, \
> XE_HP_PAGE_SIZES, \
> - TGL_CACHELEVEL, \
> + GEN12_CACHE_MODES, \
> .dma_mask_size = 46, \
> .has_3d_pipeline = 1, \
> .has_64bit_reloc = 1, \
> @@ -806,7 +813,7 @@ static const struct intel_device_info pvc_info = {
> BIT(VCS0) |
> BIT(CCS0) | BIT(CCS1) | BIT(CCS2) | BIT(CCS3),
> .require_force_probe = 1,
> - PVC_CACHELEVEL,
> + PVC_CACHE_MODES
> };
>
> static const struct intel_gt_definition xelpmp_extra_gt[] = {
> @@ -841,7 +848,7 @@ static const struct intel_device_info mtl_info = {
> .__runtime.memory_regions = REGION_SMEM | REGION_STOLEN_LMEM,
> .__runtime.platform_engine_mask = BIT(RCS0) | BIT(BCS0) | BIT(CCS0),
> .require_force_probe = 1,
> - MTL_CACHELEVEL,
> + MTL_CACHE_MODES
> };
>
> #undef PLATFORM
> diff --git a/drivers/gpu/drm/i915/intel_device_info.h b/drivers/gpu/drm/i915/intel_device_info.h
> index 069291b3bd37..5cbae7c2ee30 100644
> --- a/drivers/gpu/drm/i915/intel_device_info.h
> +++ b/drivers/gpu/drm/i915/intel_device_info.h
> @@ -27,6 +27,8 @@
>
> #include <uapi/drm/i915_drm.h>
>
> +#include "i915_cache.h"
> +
> #include "intel_step.h"
>
> #include "display/intel_display_device.h"
> @@ -248,8 +250,8 @@ struct intel_device_info {
> */
> const struct intel_runtime_info __runtime;
>
> - u32 cachelevel_to_pat[I915_MAX_CACHE_LEVEL];
> - u32 max_pat_index;
> + i915_cache_t cache_modes[9];
u32 pat[16];
See https://gfxspecs.intel.com/Predator/Home/Index/63019, there are PAT[3..0]
-Fei
> + unsigned int max_pat_index;
> };
>
> struct intel_driver_caps {
> diff --git a/drivers/gpu/drm/i915/selftests/i915_gem.c b/drivers/gpu/drm/i915/selftests/i915_gem.c
> index 61da4ed9d521..e620f73793a5 100644
> --- a/drivers/gpu/drm/i915/selftests/i915_gem.c
> +++ b/drivers/gpu/drm/i915/selftests/i915_gem.c
> @@ -57,10 +57,7 @@ static void trash_stolen(struct drm_i915_private *i915)
> u32 __iomem *s;
> int x;
>
> - ggtt->vm.insert_page(&ggtt->vm, dma, slot,
> - i915_gem_get_pat_index(i915,
> - I915_CACHE_NONE),
> - 0);
> + ggtt->vm.insert_page(&ggtt->vm, dma, slot, i915->pat_uc, 0);
>
> s = io_mapping_map_atomic_wc(&ggtt->iomap, slot);
> for (x = 0; x < PAGE_SIZE / sizeof(u32); x++) {
> diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_evict.c b/drivers/gpu/drm/i915/selftests/i915_gem_evict.c
> index f8fe3681c3dc..658a5b59545e 100644
> --- a/drivers/gpu/drm/i915/selftests/i915_gem_evict.c
> +++ b/drivers/gpu/drm/i915/selftests/i915_gem_evict.c
> @@ -246,7 +246,7 @@ static int igt_evict_for_cache_color(void *arg)
> struct drm_mm_node target = {
> .start = I915_GTT_PAGE_SIZE * 2,
> .size = I915_GTT_PAGE_SIZE,
> - .color = i915_gem_get_pat_index(gt->i915, I915_CACHE_LLC),
> + .color = I915_CACHE(WB),
> };
> struct drm_i915_gem_object *obj;
> struct i915_vma *vma;
> @@ -309,7 +309,7 @@ static int igt_evict_for_cache_color(void *arg)
> /* Attempt to remove the first *pinned* vma, by removing the (empty)
> * neighbour -- this should fail.
> */
> - target.color = i915_gem_get_pat_index(gt->i915, I915_CACHE_L3_LLC);
> + target.color = _I915_CACHE(WB, LLC);
>
> mutex_lock(&ggtt->vm.mutex);
> err = i915_gem_evict_for_node(&ggtt->vm, NULL, &target, 0);
> diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c
> index 5c397a2df70e..a24585784f75 100644
> --- a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c
> +++ b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c
> @@ -135,7 +135,7 @@ fake_dma_object(struct drm_i915_private *i915, u64 size)
>
> obj->write_domain = I915_GEM_DOMAIN_CPU;
> obj->read_domains = I915_GEM_DOMAIN_CPU;
> - obj->pat_index = i915_gem_get_pat_index(i915, I915_CACHE_NONE);
> + obj->pat_index = i915->pat_uc;
>
> /* Preallocate the "backing storage" */
> if (i915_gem_object_pin_pages_unlocked(obj))
> @@ -358,10 +358,8 @@ static int lowlevel_hole(struct i915_address_space *vm,
> mock_vma_res->start = addr;
>
> with_intel_runtime_pm(vm->gt->uncore->rpm, wakeref)
> - vm->insert_entries(vm, mock_vma_res,
> - i915_gem_get_pat_index(vm->i915,
> - I915_CACHE_NONE),
> - 0);
> + vm->insert_entries(vm, mock_vma_res,
> + vm->i915->pat_uc, 0);
> }
> count = n;
>
> @@ -1379,10 +1377,7 @@ static int igt_ggtt_page(void *arg)
>
> ggtt->vm.insert_page(&ggtt->vm,
> i915_gem_object_get_dma_address(obj, 0),
> - offset,
> - i915_gem_get_pat_index(i915,
> - I915_CACHE_NONE),
> - 0);
> + offset, ggtt->vm.i915->pat_uc, 0);
> }
>
> order = i915_random_order(count, &prng);
> diff --git a/drivers/gpu/drm/i915/selftests/intel_memory_region.c b/drivers/gpu/drm/i915/selftests/intel_memory_region.c
> index d985d9bae2e8..b82fe0ef8cd7 100644
> --- a/drivers/gpu/drm/i915/selftests/intel_memory_region.c
> +++ b/drivers/gpu/drm/i915/selftests/intel_memory_region.c
> @@ -1070,9 +1070,7 @@ static int igt_lmem_write_cpu(void *arg)
> /* Put the pages into a known state -- from the gpu for added fun */
> intel_engine_pm_get(engine);
> err = intel_context_migrate_clear(engine->gt->migrate.context, NULL,
> - obj->mm.pages->sgl,
> - i915_gem_get_pat_index(i915,
> - I915_CACHE_NONE),
> + obj->mm.pages->sgl, i915->pat_uc,
> true, 0xdeadbeaf, &rq);
> if (rq) {
> dma_resv_add_fence(obj->base.resv, &rq->fence,
> diff --git a/drivers/gpu/drm/i915/selftests/mock_gem_device.c b/drivers/gpu/drm/i915/selftests/mock_gem_device.c
> index 09d4bbcdcdbf..ad778842cba2 100644
> --- a/drivers/gpu/drm/i915/selftests/mock_gem_device.c
> +++ b/drivers/gpu/drm/i915/selftests/mock_gem_device.c
> @@ -126,7 +126,12 @@ struct drm_i915_private *mock_gem_device(void)
> struct drm_i915_private *i915;
> struct intel_device_info *i915_info;
> struct pci_dev *pdev;
> - unsigned int i;
> + static const i915_cache_t legacy_cache_modes[] = {
> + [0] = I915_CACHE(UC),
> + [1] = I915_CACHE(WB),
> + [2] = _I915_CACHE(WB, L3),
> + [3] = I915_CACHE(WT),
> + };
> int ret;
>
> pdev = kzalloc(sizeof(*pdev), GFP_KERNEL);
> @@ -187,8 +192,7 @@ struct drm_i915_private *mock_gem_device(void)
> /* simply use legacy cache level for mock device */
> i915_info = (struct intel_device_info *)INTEL_INFO(i915);
> i915_info->max_pat_index = 3;
> - for (i = 0; i < I915_MAX_CACHE_LEVEL; i++)
> - i915_info->cachelevel_to_pat[i] = i;
> + memcpy(i915_info->cache_modes, legacy_cache_modes, sizeof(legacy_cache_modes));
>
> intel_memory_regions_hw_probe(i915);
>
> --
> 2.39.2
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <https://lists.freedesktop.org/archives/dri-devel/attachments/20230629/7df69854/attachment-0001.htm>
More information about the dri-devel
mailing list