[Intel-gfx] [PATCH v4 2/3] drm/i915: use pat_index instead of cache_level
Yang, Fei
fei.yang at intel.com
Wed May 3 20:39:52 UTC 2023
[...]
>> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c b/drivers/gpu/drm/i915/gem/i915_gem_object.c
>> index 8c70a0ec7d2f..27c948350b5b 100644
>> --- a/drivers/gpu/drm/i915/gem/i915_gem_object.c
>> +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c
>> @@ -54,6 +54,25 @@ unsigned int i915_gem_get_pat_index(struct drm_i915_private *i915,
>> return INTEL_INFO(i915)->cachelevel_to_pat[level];
>> }
>>
>> +bool i915_gem_object_has_cache_level(const struct drm_i915_gem_object *obj,
>> + enum i915_cache_level lvl)
>> +{
>> + /*
>> + * cache_level == I915_CACHE_INVAL indicates the UMD's have set the
>> + * caching policy through pat_index, in which case the KMD should
>> + * leave the coherency to be managed by user space, simply return
>> + * true here.
>> + */
>> + if (obj->cache_level == I915_CACHE_INVAL)
>> + return true;
>> +
>> + /*
>> + * Otherwise the pat_index should have been converted from cache_level
>> + * so that the following comparison is valid.
>> + */
>> + return obj->pat_index == i915_gem_get_pat_index(obj_to_i915(obj), lvl);
>> +}
>> +
>> struct drm_i915_gem_object *i915_gem_object_alloc(void)
>> {
>> struct drm_i915_gem_object *obj;
>> @@ -133,7 +152,7 @@ void i915_gem_object_set_cache_coherency(struct drm_i915_gem_object *obj,
>> {
>> struct drm_i915_private *i915 = to_i915(obj->base.dev);
>>
>> - obj->cache_level = cache_level;
>> + obj->pat_index = i915_gem_get_pat_index(i915, cache_level);
>
> obj->cache_level is only ever set to "invalid" from the set pat
> extension? Doesn't that make it a boolean so there is no need for three
> bits to hold the enum, just the "pat has been externally set" bit really?
Will update.
>>
>> if (cache_level != I915_CACHE_NONE)
>> obj->cache_coherent = (I915_BO_CACHE_COHERENT_FOR_READ |
[...]
>> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
>> index 41389a32e998..9a4922da3a71 100644
>> --- a/drivers/gpu/drm/i915/i915_debugfs.c
>> +++ b/drivers/gpu/drm/i915/i915_debugfs.c
>> @@ -139,21 +139,56 @@ static const char *stringify_vma_type(const struct i915_vma *vma)
>> return "ppgtt";
>> }
>>
>> -static const char *i915_cache_level_str(struct drm_i915_private *i915, int type)
>> -{
>> - switch (type) {
>> - case I915_CACHE_NONE: return " uncached";
>> - case I915_CACHE_LLC: return HAS_LLC(i915) ? " LLC" : " snooped";
>> - case I915_CACHE_L3_LLC: return " L3+LLC";
>> - case I915_CACHE_WT: return " WT";
>> - default: return "";
>> +static const char *i915_cache_level_str(struct drm_i915_gem_object *obj)
>> +{
>> + struct drm_i915_private *i915 = obj_to_i915(obj);
>> +
>> + if (IS_METEORLAKE(i915)) {
>> + switch (obj->pat_index) {
>> + case 0: return " WB";
>> + case 1: return " WT";
>> + case 2: return " UC";
>> + case 3: return " WB (1-Way Coh)";
>> + case 4: return " WB (2-Way Coh)";
>> + default: return " not defined";
>> + }
>> + } else if (IS_PONTEVECCHIO(i915)) {
>> + switch (obj->pat_index) {
>> + case 0: return " UC";
>> + case 1: return " WC";
>> + case 2: return " WT";
>> + case 3: return " WB";
>> + case 4: return " WT (CLOS1)";
>> + case 5: return " WB (CLOS1)";
>> + case 6: return " WT (CLOS2)";
>> + case 7: return " WT (CLOS2)";
>> + default: return " not defined";
>> + }
>> + } else if (GRAPHICS_VER(i915) >= 12) {
>> + switch (obj->pat_index) {
>> + case 0: return " WB";
>> + case 1: return " WC";
>> + case 2: return " WT";
>> + case 3: return " UC";
>> + default: return " not defined";
>> + }
>> + } else {
>> + if (i915_gem_object_has_cache_level(obj, I915_CACHE_NONE))
>> + return " uncached";
>
> This will print uncached for all legacy platforms if set pat extension
> has been used, regardless of the index set.
Will update. Should just use obj->pat_index here.
> Are we okay with that? I find it questionable and would say no. It
> diverges from >= 12 and so is confusing.
>
>> + else if (i915_gem_object_has_cache_level(obj, I915_CACHE_LLC))
>> + return HAS_LLC(i915) ? " LLC" : " snooped";
>> + else if (i915_gem_object_has_cache_level(obj, I915_CACHE_L3_LLC))
>> + return " L3+LLC";
>> + else if (i915_gem_object_has_cache_level(obj, I915_CACHE_WT))
>> + return " WT";
>> + else
>> + return " not defined";
>
> Another thing is why use different names for caching modes between
> "legacy" and the rest?
For new platforms the string matches bspec. For legacy platforms I think it's
still better to inherit the strings, no surprises here. What do you think?
-Fei
> Does this block aligns better with bspec names ie. there is no notion of
> UC/WB/WT there?
>
> Regards,
>
> Tvrtko
More information about the Intel-gfx
mailing list