[Intel-xe] [PATCH] drm/xe/ppgtt: Add support for correct PAT encoding in Page Table Entry (PTE) format

Mon Jul 24 14:11:56 UTC 2023


On 7/21/2023 11:42 PM, Lucas De Marchi wrote:
> On Fri, Jul 21, 2023 at 10:21:31PM +0530, Vodapalli, Ravi Kumar wrote:
>>
>>
>> On 7/21/2023 9:43 PM, Lucas De Marchi wrote:
>>> On Thu, Jul 20, 2023 at 07:48:33PM +0530, Vodapalli, Ravi Kumar wrote:
>>>>
>>>>
>>>> On 7/17/2023 6:53 PM, Lucas De Marchi wrote:
>>>>> On Wed, Jul 12, 2023 at 02:52:37PM +0000, Matthew Brost wrote:
>>>>>> On Wed, Jul 12, 2023 at 06:26:40PM +0530, Ravi Kumar Vodapalli 
>>>>>> wrote:
>>>>>>> Different platforms has different PAT encoding in PTE format, add
>>>>>>> correct PAT encoding for pre-Xe2 platforms (XELP, XEHPC, XELPG).
>>>>>>>
>>>>>>> Bspec: 45101, 71582
>>>>>>> Signed-off-by: Ravi Kumar Vodapalli 
>>>>>>> <ravi.kumar.vodapalli at intel.com>
>>>>>>> ---
>>>>>>>  drivers/gpu/drm/xe/xe_pt.c       | 110 
>>>>>>> +++++++++++++++++++++++++++----
>>>>>>>  drivers/gpu/drm/xe/xe_pte.h      |  31 +++++++++
>>>>>>>  drivers/gpu/drm/xe/xe_vm_types.h |   1 +
>>>>>>>  3 files changed, 129 insertions(+), 13 deletions(-)
>>>>>>>  create mode 100644 drivers/gpu/drm/xe/xe_pte.h
>>>>>>>
>>>>>>> diff --git a/drivers/gpu/drm/xe/xe_pt.c 
>>>>>>> b/drivers/gpu/drm/xe/xe_pt.c
>>>>>>> index 00855681c0d5..e7f8bd4dd518 100644
>>>>>>> --- a/drivers/gpu/drm/xe/xe_pt.c
>>>>>>> +++ b/drivers/gpu/drm/xe/xe_pt.c
>>>>>>> @@ -16,6 +16,7 @@
>>>>>>>  #include "xe_trace.h"
>>>>>>>  #include "xe_ttm_stolen_mgr.h"
>>>>>>>  #include "xe_vm.h"
>>>>>>> +#include "xe_pte.h"
>>>>>>>
>>>>>>>  struct xe_pt_dir {
>>>>>>>      struct xe_pt pt;
>>>>>>> @@ -111,19 +112,7 @@ static u64 __pte_encode(u64 pte, enum 
>>>>>>> xe_cache_level cache,
>>>>>>>      if (unlikely(vma && xe_vma_is_null(vma)))
>>>>>>>          pte |= XE_PTE_NULL;
>>>>>>>
>>>>>>> -    /* FIXME: I don't think the PPAT handling is correct for 
>>>>>>> MTL */
>>>>>>> -
>>>>>>> -    switch (cache) {
>>>>>>> -    case XE_CACHE_NONE:
>>>>>>> -        pte |= PPAT_UNCACHED;
>>>>>>> -        break;
>>>>>>> -    case XE_CACHE_WT:
>>>>>>> -        pte |= PPAT_DISPLAY_ELLC;
>>>>>>> -        break;
>>>>>>> -    default:
>>>>>>> -        pte |= PPAT_CACHED;
>>>>>>> -        break;
>>>>>>> -    }
>>>>>>> +    pte = xe_vma_vm(vma)->pte_encode(pte, cache);
>>>>>>>
>>>>>>>      if (pt_level == 1)
>>>>>>>          pte |= XE_PDE_PS_2M;
>>>>>>> @@ -187,6 +176,94 @@ static u64 __xe_pt_empty_pte(struct xe_tile 
>>>>>>> *tile, struct xe_vm *vm,
>>>>>>>      }
>>>>>>>  }
>>>>>>>
>>>>>>> +static u64 xelp_pte_encode_pat(u8 pat_index)
>>>>>>> +{
>>>>>>> +    u64 pte_pat = 0;
>>>>>>> +
>>>>>>> +    if (pat_index & BIT(0))
>>>>>>> +        pte_pat |= BIT(3);
>>>>>>> +
>>>>>>> +    if (pat_index & BIT(1))
>>>>>>> +        pte_pat |= BIT(4);
>>>>>>> +
>>>>>>> +    if (pat_index & BIT(2))
>>>>>>> +        pte_pat |= BIT(7);
>>>>>>> +
>>>>>>> +    return pte_pat;
>>>>>>> +}
>>>>>>> +
>>>>>>> +static u64 __xelp_pte_encode(u64 pte, enum xe_cache_level cache)
>>>>>>> +{
>>>>>>> +    pte &= ~XELP_PAT_MASK;
>>>>>>> +
>>>>>>> +    switch (cache) {
>>>>>>> +    case XE_CACHE_NONE:
>>>>>>> +        pte |= xelp_pte_encode_pat(XELP_PAT_UNCACHE);
>>>>>>> +        break;
>>>>>>> +    case XE_CACHE_WT:
>>>>>>> +        pte |= xelp_pte_encode_pat(XELP_PAT_WT_CACHE);
>>>>>>> +        break;
>>>>>>> +    case XE_CACHE_WB:
>>>>>>> +        pte |= xelp_pte_encode_pat(XELP_PAT_WB_CACHE);
>>>>>>> +        break;
>>>>>>> +    default:
>>>>>>> +        /* pte |= PPAT_CACHED; */
>>>>>>> +        break;
>>>>>>> +    }
>>>>>>> +
>>>>>>> +    return pte;
>>>>>>> +}
>>>>>>> +
>>>>>>> +static u64 __xehpc_pte_encode(u64 pte, enum xe_cache_level cache)
>>>>>>> +{
>>>>>>> +    pte &= ~XELP_PAT_MASK;
>>>>>>> +
>>>>>>> +    /* Initially assumed as CLOS0.
>>>>>>> +     * To Do: CLOS1/CLOS2 has to be set depending on requirement
>>>>>>> +     */
>>>>>>> +    switch (cache) {
>>>>>>> +    case XE_CACHE_NONE:
>>>>>>> +        pte |= xelp_pte_encode_pat(XEHPC_PAT_CLOS0_UNCACHE);
>>>>>>> +        break;
>>>>>>> +    case XE_CACHE_WT:
>>>>>>> +        pte |= xelp_pte_encode_pat(XEHPC_PAT_CLOS0_WT_CACHE);
>>>>>>> +        break;
>>>>>>> +    case XE_CACHE_WB:
>>>>>>> +        pte |= xelp_pte_encode_pat(XEHPC_PAT_CLOS0_WB_CACHE);
>>>>>>> +        break;
>>>>>>> +    default:
>>>>>>> +        /* pte |= PPAT_CACHED; */
>>>>>>> +        break;
>>>>>>> +    }
>>>>>>> +
>>>>>>> +    return pte;
>>>>>>> +}
>>>>>>> +
>>>>>>> +static u64 __xelpg_pte_encode(u64 pte, enum xe_cache_level cache)
>>>>>>> +{
>>>>>>> +    pte &= ~XELP_PAT_MASK;
>>>>>>> +
>>>>>>> +    /* Initially assumed as Non cohorent mode, correct values
>>>>>>> +     * has to be set depending on requirement.
>>>>>>> +     */
>>>>>>> +    switch (cache) {
>>>>>>> +    case XE_CACHE_NONE:
>>>>>>> +        pte |= xelp_pte_encode_pat(XELPG_PAT_UNCACHE);
>>>>>>> +        break;
>>>>>>> +    case XE_CACHE_WT:
>>>>>>> +        pte |= xelp_pte_encode_pat(XELPG_PAT_WT_CACHE);
>>>>>>> +        break;
>>>>>>> +    case XE_CACHE_WB:
>>>>>>> +        pte |= xelp_pte_encode_pat(XELPG_PAT_WB_CACHE);
>>>>>>> +        break;
>>>>>>> +    default:
>>>>>>> +        /* pte |= PPAT_CACHED; */
>>>>>>> +        break;
>>>>>>> +    }
>>>>>>> +
>>>>>>> +    return pte;
>>>>>>> +}
>>>>>>> +
>>>>>>>  /**
>>>>>>>   * xe_pt_create() - Create a page-table.
>>>>>>>   * @vm: The vm to create for.
>>>>>>> @@ -216,6 +293,13 @@ struct xe_pt *xe_pt_create(struct xe_vm 
>>>>>>> *vm, struct xe_tile *tile,
>>>>>>>      if (!pt)
>>>>>>>          return ERR_PTR(-ENOMEM);
>>>>>>>
>>>>>>> +    if (GRAPHICS_VERx100(vm->xe) >= 1270)
>>>>>>> +        vm->pte_encode = __xelpg_pte_encode;
>>>>>>> +    else if (GRAPHICS_VERx100(vm->xe) >= 1260)
>>>>>>> +        vm->pte_encode = __xehpc_pte_encode;
>>>>>>> +    else
>>>>>>> +        vm->pte_encode = __xelp_pte_encode;
>>>>>>> +
>>>>>>>      bo = xe_bo_create_pin_map(vm->xe, tile, vm, SZ_4K,
>>>>>>>                    ttm_bo_type_kernel,
>>>>>>>                    XE_BO_CREATE_VRAM_IF_DGFX(tile) |
>>>>>>> diff --git a/drivers/gpu/drm/xe/xe_pte.h 
>>>>>>> b/drivers/gpu/drm/xe/xe_pte.h
>>>>>>> new file mode 100644
>>>>>>> index 000000000000..8b3b5b471a99
>>>>>>> --- /dev/null
>>>>>>> +++ b/drivers/gpu/drm/xe/xe_pte.h
>>>>>>
>>>>>> s/xe_pte.h/xe_pte_pat.h
>>>>>>
>>>>>> Just my option prefer this.
>>>>>
>>>>> dunno, but since we have different pte formats for ggtt and ppgtt, 
>>>>> IMO
>>>>> it would add to the confusion.
>>>>>
>>>>> Maybe this belongs in xe_pat.h since it's xe_pat.c that programs the
>>>>> tables giving the meaning spelled out in the macros here for each 
>>>>> index.
>>>>> The rest of the code then need to set the bits in the PTE to 
>>>>> reference
>>>>> those indexes, according to the PTE format.
>>>>>
>>>>>>
>>>>>>> @@ -0,0 +1,31 @@
>>>>>>> +/* SPDX-License-Identifier: MIT */
>>>>>>> +/*
>>>>>>> + * Copyright © 2021 Intel Corporation
>>>>>>> + */
>>>>>>> +
>>>>>>> +#ifndef _XE_PTE_H_
>>>>>>> +#define _XE_PTE_H_
>>>>>>> +
>>>>>>> +#define XELP_PAT_MASK                  BIT_ULL(7) | BIT_ULL(4) 
>>>>>>> | BIT_ULL(3)
>>>>>>> +#define XELP_PAT_WB_CACHE              0
>>>>>>> +#define XELP_PAT_WC_CACHE              1
>>>>>>> +#define XELP_PAT_WT_CACHE              2
>>>>>>> +#define XELP_PAT_UNCACHE               3
>>>>>
>>>>> example... this duplicates what we have in xe_pat.c (comment added 
>>>>> here
>>>>> for clarity)
>>>>>
>>>>> static const u32 tgl_pat_table[] = {
>>>>>         [0] = TGL_PAT_WB,
>>>>>         [1] = TGL_PAT_WC,
>>>>>         [2] = TGL_PAT_WT,
>>>>>         [3] = TGL_PAT_UC,
>>>>>     // .... those are programmed, but not expected to be used
>>>>>         [4] = TGL_PAT_WB,
>>>>>         [5] = TGL_PAT_WB,
>>>>>         [6] = TGL_PAT_WB,
>>>>>         [7] = TGL_PAT_WB,
>>>>> };
>>>>>
>>>>
>>>> TGL_PAT_WB maps to PAT table register bit mapping, we cannot 
>>>> directly use tgl_pat_table to get PAT index rather we
>>>> need to implement code to retrieve the index like below instead we 
>>>> can use macro directly.
>>>>
>>>> xelp_pte_encode_pat(u8 cache_type) //example pass TGL_PAT_WB
>>>> {
>>>>     u8 index;
>>>>     for (i=0, i < 32, i++)
>>>>         if (tgl_pat_table[i] == cache_type)
>>>
>>> no, we don't want the caller to pass the platform-specific value.
>>> It should pass a platfrom-agnostic value and xe_pat_get_index()
>>> translates that to the platform-specific index. There are just 2 
>>> entries
>>> really used by the kernel: uc and wb. Then the xe_pat.c returns what is
>>> the entry to be used in those cases.
>>>
>>> Lucas De Marchi
>>>
>>
>> I have discussed Regarding encoding the index into pat pte with 
>> Matt,Roper
>> He said instead of writing common function for both ppgtt and ggtt, 
>> the best approach would be as below so that in future adding more 
>> support will be easier
>>
>> static u64 xelp_pte_encode_pat(u8 pat_index)
>> {
>>     u64 pte_pat = 0;
>>
>>     if (pat_index & BIT(0))
>>         pte_pat |= BIT(3);
>>
>>     if (pat_index & BIT(1))
>>         pte_pat |= BIT(4);
>>
>>     if (pat_index & BIT(2))
>>         pte_pat |= BIT(7);
>>
>>     return pte_pat;
>> }
>
> that is fine. My point is that selecting the *index* and the encoding
> are 2 different things. Here it's only doing a index -> pte, which is
> fine: there will be one for ppgtt and another for ggtt. However gettting
> the platform-specific index is shared. Check again the snippet I shared
> previously:
>
>     // this is valid for both ppgtt and ggtt
>     pat = xe_pat_get_index(xe, cache);
>

     For a cache type, index differs from platform to platform, we will 
need to add more if conditions for adding a new platform like below sudo 
code, we need to re think on it

     xe_pat_get_index(xe, cache)
{
     if (GRAPHICS_VERx100(xe) >= 1270)
     {
                 switch (cache) {
                 case XE_CACHE_NONE:
                         return 3;
                 case XE_CACHE_WT:
                         return 2;
                 case XE_CACHE_WB:
                         return 0;
                 }
       }
       else if (GRAPHICS_VERx100(xe) >= 1260)
      {
                 switch (cache) {
                 case XE_CACHE_NONE:
                         return 0;
                 case XE_CACHE_WT:
                         return 2;
                 case XE_CACHE_WB:
                         return 3;
                 }
         }
         else if (new platform)
         {
         }
     }


>     // this changes between ggtt / ppgtt
>     pte |= vm->pte_encode_pat(xe, pat);
>
>
> pte_encode_pat() is the hook set during init to xelp_pte_encode_pat(),
> for this platform.  The index  however is obtained in the call above.
>
> Lucas De Marchi