[Intel-gfx] [RFC 1/2] drm/i915: Refactor PAT/object cache handling

Tvrtko Ursulin tvrtko.ursulin at linux.intel.com
Fri Jul 14 10:08:18 UTC 2023


On 14/07/2023 06:36, Yang, Fei wrote:
> [snip]
>> @@ -326,10 +330,10 @@ int i915_gem_get_caching_ioctl(struct drm_device *dev, void *data,
>>                goto out;
>>        }
>>
>> -     if (i915_gem_object_has_cache_level(obj, I915_CACHE_LLC) ||
>> -         i915_gem_object_has_cache_level(obj, I915_CACHE_L3_LLC))
>> +     if (i915_gem_object_has_cache_mode(obj, I915_CACHE_MODE_WB) &&
>> +         i915_gem_object_has_cache_flag(obj, I915_CACHE_FLAG_COH1W) == 1)
> 
> Need to check L3 flag as well? The original code has L3_LLC.
> if (i915_gem_object_has_cache_mode(obj, I915_CACHE_MODE_WB) &&
>      (i915_gem_object_has_cache_flag(obj, I915_CACHE_FLAG_COH1W) == 1 ||
>       i915_gem_object_has_cache_flag(obj, I915_CACHE_FLAG_L3) == 1))

Don't think so, because I have:

   [2] = __I915_CACHE(WB, BIT(I915_CACHE_FLAG_COH1W) | BIT(I915_CACHE_FLAG_L3)), \

So L3 and COH1W are both set for the equivalent of old I915_CACHE_L3_LLC, meaning I915_CACHING_CACHED will be returned for either - same as old code.

> 
>>                args->caching = I915_CACHING_CACHED;
> 
> [snip]
>> +int i915_cache_init(struct drm_i915_private *i915)
>> +{
>> +     struct drm_printer p = drm_info_printer(i915->drm.dev);
>> +     char buf[I915_CACHE_NAME_LEN];
>> +     int ret;
>> +
>> +     i915->cache = HAS_LLC(i915) ? I915_CACHE_CACHED : I915_CACHE_NONE;
>> +     i915_cache_print(buf, sizeof(buf), "", i915->cache);
>> +     drm_printf(&p, "Coherent cache mode %s", buf);
>> +
>> +     ret = i915_cache_find_pat(i915, I915_CACHE_NONE);
>> +     if (ret < 0)
>> +             return -ENODEV;
>> +     drm_info(&i915->drm, "Using PAT index %u for uncached access\n", ret);
>> +     i915->pat_uc = ret;
>> +
>> +     ret = i915_cache_find_pat(i915, I915_CACHE_CACHED);
>> +     if (ret < 0)
>> +             return -ENODEV;
>> +     drm_info(&i915->drm, "Using PAT index %u for write-back access\n", ret);
>> +     i915->pat_wb = ret;
> 
> I think i915->pat_wt is needed as well, because display driver has code like this,
> HAS_WT(i915) ? I915_CACHE_WT : I915_CACHE_NONE
> 
> How did you handle functions like initial_plane_vma() and i915_gem_object_pin_to_display_plane()?

They have:

	i915_gem_object_set_cache_coherency(obj, HAS_WT(i915) ?
					    I915_CACHE_WT : I915_CACHE_NONE);

Where HAS_WT is equal to HAS_EDRAM. So it is not always WT. I don't have the history/background of that but that is how it is.

So I could add i915->pat_wt but a) I don't think it is right for display (display would need more like i915->display_pat), and b) there would be no call sites for it.

i915->display_pat (or display_cache actually) would be similar to how I added i915->cache, not be best name I know, but it is replacing many occurrences of:

   cache_level = HAS_LLC(mem->i915) ? I915_CACHE_LLC : I915_CACHE_NONE;
   i915_gem_object_set_cache_coherency(obj, cache_level)

I am unsure what would be the proper name for i915->cache, which would make it correctly self-documenting?

> 
>> +     return 0;
>> +}
> 
> [snip]
>> -#define TGL_CACHELEVEL \
>> -     .cachelevel_to_pat = { \
>> -             [I915_CACHE_NONE]   = 3, \
>> -             [I915_CACHE_LLC]    = 0, \
>> -             [I915_CACHE_L3_LLC] = 0, \
>> -             [I915_CACHE_WT]     = 2, \
>> +/*
>> + * TODO/QQQ
>> + *
>> + * PAT 0 - is it 1-way or 2-way?
> 
> 1-way
> 
>> + */
>> +#define GEN12_CACHE_MODES \
>> +     .cache_modes = { \
>> +             [0] = _I915_CACHE(WB, COH1W), \
>> +             [1] = I915_CACHE(WC), \
>> +             [2] = I915_CACHE(WT), \
>> +             [3] = I915_CACHE(UC), \
>>        }
> 
> [snip]
>> -#define PVC_CACHELEVEL \
>> -     .cachelevel_to_pat = { \
>> -             [I915_CACHE_NONE]   = 0, \
>> -             [I915_CACHE_LLC]    = 3, \
>> -             [I915_CACHE_L3_LLC] = 3, \
>> -             [I915_CACHE_WT]     = 2, \
>> +/*
>> + * TODO/QQQ
>> + *
>> + * PAT 3 - is it 1-way or 2-way?
> 
> 1-way
> 
> PVC access is always coherent, it should have 1-way for [5] and [7] as well.

Thanks x2!

Regards,

Tvrtko

> 
>> + */
>> +#define PVC_CACHE_MODES \
>> +     .cache_modes = { \
>> +             [0] = I915_CACHE(UC), \
>> +             [1] = I915_CACHE(WC), \
>> +             [2] = I915_CACHE(WT), \
>> +             [3] = _I915_CACHE(WB, COH1W), \
>> +             [4] = _I915_CACHE(WT, CLOS1), \
>> +             [5] = _I915_CACHE(WB, CLOS1), \
>> +             [6] = _I915_CACHE(WT, CLOS2), \
>> +             [7] = _I915_CACHE(WB, CLOS2), \
>>        }


More information about the Intel-gfx mailing list