[Intel-xe] [PATCH] drm/xe/ppgtt: Add support for correct PAT encoding in Page Table Entry (PTE) format

Fri Jul 21 16:51:31 UTC 2023


On 7/21/2023 9:43 PM, Lucas De Marchi wrote:
> On Thu, Jul 20, 2023 at 07:48:33PM +0530, Vodapalli, Ravi Kumar wrote:
>>
>>
>> On 7/17/2023 6:53 PM, Lucas De Marchi wrote:
>>> On Wed, Jul 12, 2023 at 02:52:37PM +0000, Matthew Brost wrote:
>>>> On Wed, Jul 12, 2023 at 06:26:40PM +0530, Ravi Kumar Vodapalli wrote:
>>>>> Different platforms has different PAT encoding in PTE format, add
>>>>> correct PAT encoding for pre-Xe2 platforms (XELP, XEHPC, XELPG).
>>>>>
>>>>> Bspec: 45101, 71582
>>>>> Signed-off-by: Ravi Kumar Vodapalli <ravi.kumar.vodapalli at intel.com>
>>>>> ---
>>>>>  drivers/gpu/drm/xe/xe_pt.c       | 110 
>>>>> +++++++++++++++++++++++++++----
>>>>>  drivers/gpu/drm/xe/xe_pte.h      |  31 +++++++++
>>>>>  drivers/gpu/drm/xe/xe_vm_types.h |   1 +
>>>>>  3 files changed, 129 insertions(+), 13 deletions(-)
>>>>>  create mode 100644 drivers/gpu/drm/xe/xe_pte.h
>>>>>
>>>>> diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c
>>>>> index 00855681c0d5..e7f8bd4dd518 100644
>>>>> --- a/drivers/gpu/drm/xe/xe_pt.c
>>>>> +++ b/drivers/gpu/drm/xe/xe_pt.c
>>>>> @@ -16,6 +16,7 @@
>>>>>  #include "xe_trace.h"
>>>>>  #include "xe_ttm_stolen_mgr.h"
>>>>>  #include "xe_vm.h"
>>>>> +#include "xe_pte.h"
>>>>>
>>>>>  struct xe_pt_dir {
>>>>>      struct xe_pt pt;
>>>>> @@ -111,19 +112,7 @@ static u64 __pte_encode(u64 pte, enum 
>>>>> xe_cache_level cache,
>>>>>      if (unlikely(vma && xe_vma_is_null(vma)))
>>>>>          pte |= XE_PTE_NULL;
>>>>>
>>>>> -    /* FIXME: I don't think the PPAT handling is correct for MTL */
>>>>> -
>>>>> -    switch (cache) {
>>>>> -    case XE_CACHE_NONE:
>>>>> -        pte |= PPAT_UNCACHED;
>>>>> -        break;
>>>>> -    case XE_CACHE_WT:
>>>>> -        pte |= PPAT_DISPLAY_ELLC;
>>>>> -        break;
>>>>> -    default:
>>>>> -        pte |= PPAT_CACHED;
>>>>> -        break;
>>>>> -    }
>>>>> +    pte = xe_vma_vm(vma)->pte_encode(pte, cache);
>>>>>
>>>>>      if (pt_level == 1)
>>>>>          pte |= XE_PDE_PS_2M;
>>>>> @@ -187,6 +176,94 @@ static u64 __xe_pt_empty_pte(struct xe_tile 
>>>>> *tile, struct xe_vm *vm,
>>>>>      }
>>>>>  }
>>>>>
>>>>> +static u64 xelp_pte_encode_pat(u8 pat_index)
>>>>> +{
>>>>> +    u64 pte_pat = 0;
>>>>> +
>>>>> +    if (pat_index & BIT(0))
>>>>> +        pte_pat |= BIT(3);
>>>>> +
>>>>> +    if (pat_index & BIT(1))
>>>>> +        pte_pat |= BIT(4);
>>>>> +
>>>>> +    if (pat_index & BIT(2))
>>>>> +        pte_pat |= BIT(7);
>>>>> +
>>>>> +    return pte_pat;
>>>>> +}
>>>>> +
>>>>> +static u64 __xelp_pte_encode(u64 pte, enum xe_cache_level cache)
>>>>> +{
>>>>> +    pte &= ~XELP_PAT_MASK;
>>>>> +
>>>>> +    switch (cache) {
>>>>> +    case XE_CACHE_NONE:
>>>>> +        pte |= xelp_pte_encode_pat(XELP_PAT_UNCACHE);
>>>>> +        break;
>>>>> +    case XE_CACHE_WT:
>>>>> +        pte |= xelp_pte_encode_pat(XELP_PAT_WT_CACHE);
>>>>> +        break;
>>>>> +    case XE_CACHE_WB:
>>>>> +        pte |= xelp_pte_encode_pat(XELP_PAT_WB_CACHE);
>>>>> +        break;
>>>>> +    default:
>>>>> +        /* pte |= PPAT_CACHED; */
>>>>> +        break;
>>>>> +    }
>>>>> +
>>>>> +    return pte;
>>>>> +}
>>>>> +
>>>>> +static u64 __xehpc_pte_encode(u64 pte, enum xe_cache_level cache)
>>>>> +{
>>>>> +    pte &= ~XELP_PAT_MASK;
>>>>> +
>>>>> +    /* Initially assumed as CLOS0.
>>>>> +     * To Do: CLOS1/CLOS2 has to be set depending on requirement
>>>>> +     */
>>>>> +    switch (cache) {
>>>>> +    case XE_CACHE_NONE:
>>>>> +        pte |= xelp_pte_encode_pat(XEHPC_PAT_CLOS0_UNCACHE);
>>>>> +        break;
>>>>> +    case XE_CACHE_WT:
>>>>> +        pte |= xelp_pte_encode_pat(XEHPC_PAT_CLOS0_WT_CACHE);
>>>>> +        break;
>>>>> +    case XE_CACHE_WB:
>>>>> +        pte |= xelp_pte_encode_pat(XEHPC_PAT_CLOS0_WB_CACHE);
>>>>> +        break;
>>>>> +    default:
>>>>> +        /* pte |= PPAT_CACHED; */
>>>>> +        break;
>>>>> +    }
>>>>> +
>>>>> +    return pte;
>>>>> +}
>>>>> +
>>>>> +static u64 __xelpg_pte_encode(u64 pte, enum xe_cache_level cache)
>>>>> +{
>>>>> +    pte &= ~XELP_PAT_MASK;
>>>>> +
>>>>> +    /* Initially assumed as Non cohorent mode, correct values
>>>>> +     * has to be set depending on requirement.
>>>>> +     */
>>>>> +    switch (cache) {
>>>>> +    case XE_CACHE_NONE:
>>>>> +        pte |= xelp_pte_encode_pat(XELPG_PAT_UNCACHE);
>>>>> +        break;
>>>>> +    case XE_CACHE_WT:
>>>>> +        pte |= xelp_pte_encode_pat(XELPG_PAT_WT_CACHE);
>>>>> +        break;
>>>>> +    case XE_CACHE_WB:
>>>>> +        pte |= xelp_pte_encode_pat(XELPG_PAT_WB_CACHE);
>>>>> +        break;
>>>>> +    default:
>>>>> +        /* pte |= PPAT_CACHED; */
>>>>> +        break;
>>>>> +    }
>>>>> +
>>>>> +    return pte;
>>>>> +}
>>>>> +
>>>>>  /**
>>>>>   * xe_pt_create() - Create a page-table.
>>>>>   * @vm: The vm to create for.
>>>>> @@ -216,6 +293,13 @@ struct xe_pt *xe_pt_create(struct xe_vm *vm, 
>>>>> struct xe_tile *tile,
>>>>>      if (!pt)
>>>>>          return ERR_PTR(-ENOMEM);
>>>>>
>>>>> +    if (GRAPHICS_VERx100(vm->xe) >= 1270)
>>>>> +        vm->pte_encode = __xelpg_pte_encode;
>>>>> +    else if (GRAPHICS_VERx100(vm->xe) >= 1260)
>>>>> +        vm->pte_encode = __xehpc_pte_encode;
>>>>> +    else
>>>>> +        vm->pte_encode = __xelp_pte_encode;
>>>>> +
>>>>>      bo = xe_bo_create_pin_map(vm->xe, tile, vm, SZ_4K,
>>>>>                    ttm_bo_type_kernel,
>>>>>                    XE_BO_CREATE_VRAM_IF_DGFX(tile) |
>>>>> diff --git a/drivers/gpu/drm/xe/xe_pte.h 
>>>>> b/drivers/gpu/drm/xe/xe_pte.h
>>>>> new file mode 100644
>>>>> index 000000000000..8b3b5b471a99
>>>>> --- /dev/null
>>>>> +++ b/drivers/gpu/drm/xe/xe_pte.h
>>>>
>>>> s/xe_pte.h/xe_pte_pat.h
>>>>
>>>> Just my option prefer this.
>>>
>>> dunno, but since we have different pte formats for ggtt and ppgtt, IMO
>>> it would add to the confusion.
>>>
>>> Maybe this belongs in xe_pat.h since it's xe_pat.c that programs the
>>> tables giving the meaning spelled out in the macros here for each 
>>> index.
>>> The rest of the code then need to set the bits in the PTE to reference
>>> those indexes, according to the PTE format.
>>>
>>>>
>>>>> @@ -0,0 +1,31 @@
>>>>> +/* SPDX-License-Identifier: MIT */
>>>>> +/*
>>>>> + * Copyright © 2021 Intel Corporation
>>>>> + */
>>>>> +
>>>>> +#ifndef _XE_PTE_H_
>>>>> +#define _XE_PTE_H_
>>>>> +
>>>>> +#define XELP_PAT_MASK                  BIT_ULL(7) | BIT_ULL(4) | 
>>>>> BIT_ULL(3)
>>>>> +#define XELP_PAT_WB_CACHE              0
>>>>> +#define XELP_PAT_WC_CACHE              1
>>>>> +#define XELP_PAT_WT_CACHE              2
>>>>> +#define XELP_PAT_UNCACHE               3
>>>
>>> example... this duplicates what we have in xe_pat.c (comment added here
>>> for clarity)
>>>
>>> static const u32 tgl_pat_table[] = {
>>>         [0] = TGL_PAT_WB,
>>>         [1] = TGL_PAT_WC,
>>>         [2] = TGL_PAT_WT,
>>>         [3] = TGL_PAT_UC,
>>>     // .... those are programmed, but not expected to be used
>>>         [4] = TGL_PAT_WB,
>>>         [5] = TGL_PAT_WB,
>>>         [6] = TGL_PAT_WB,
>>>         [7] = TGL_PAT_WB,
>>> };
>>>
>>
>> TGL_PAT_WB maps to PAT table register bit mapping, we cannot directly 
>> use tgl_pat_table to get PAT index rather we
>> need to implement code to retrieve the index like below instead we 
>> can use macro directly.
>>
>> xelp_pte_encode_pat(u8 cache_type) //example pass TGL_PAT_WB
>> {
>>     u8 index;
>>     for (i=0, i < 32, i++)
>>         if (tgl_pat_table[i] == cache_type)
>
> no, we don't want the caller to pass the platform-specific value.
> It should pass a platfrom-agnostic value and xe_pat_get_index()
> translates that to the platform-specific index. There are just 2 entries
> really used by the kernel: uc and wb. Then the xe_pat.c returns what is
> the entry to be used in those cases.
>
> Lucas De Marchi
>

I have discussed Regarding encoding the index into pat pte with Matt,Roper
He said instead of writing common function for both ppgtt and ggtt, the 
best approach would be as below so that in future adding more support 
will be easier

static u64 xelp_pte_encode_pat(u8 pat_index)
{
     u64 pte_pat = 0;

     if (pat_index & BIT(0))
         pte_pat |= BIT(3);

     if (pat_index & BIT(1))
         pte_pat |= BIT(4);

     if (pat_index & BIT(2))
         pte_pat |= BIT(7);

     return pte_pat;
}

Thanks,
Ravi Kumar V

>>             index = i;
>> }
>>
>>
>>>
>>> IMO the abstration could be something like
>>>
>>>     // this is valid for both ppgtt and ggtt
>>>     pat = xe_pat_get_index(xe, cache);
>>>
>>>     // this changes between ggtt / ppgtt
>>>     pte |= vm->pte_encode_pat(xe, pat);
>>>
>>
>> I have one approach like below example where we can use same function 
>> for both ggtt and ppgtt
>>
>> static u64 pte_encode_pat(enum gtt_type, u8 pat_index)
>> {
>>     u64 pte_pat = 0;
>>
>>     if (gtt_type == PPGTT)
>>     {
>>         if (pat_index & BIT(0))
>>             pte_pat |= BIT(3);
>>
>>         if (pat_index & BIT(1))
>>             pte_pat |= BIT(4);
>>
>>         if (pat_index & BIT(2))
>>             pte_pat |= BIT(7);
>>      }
>>     else if(gtt_type = GGTT)
>>     {
>>         GGTT type PAT PTE encoding code here.
>>     }
>>         return pte_pat;
>> }
>>
>>
>> vm->pte_encode_pat(u64 pte, enum gtt_type, xe_cache_level cache)
>> {
>>     switch (cache) {
>>            case XE_CACHE_NONE:
>>             pte |= pte_encode_pat(gtt_type, XELP_PAT_UNCACHE);
>>             break;
>> }
>>
>> what's your opinion.
>>
>> Thanks,
>> Ravi Kumar V
>>>
>>> so the PAT indexes are handled by xe_pat.[ch]. The pte *encoding*
>>> is then handled differently by ggtt and ppgtt. Thoughts?
>>>
>>> Lucas De Marchi
>>>
>>>
>>>>> +
>>>>> +#define XEHPC_PAT_CLOS0_UNCACHE        0
>>>>> +#define XEHPC_PAT_CLOS0_WC_CACHE       1
>>>>> +#define XEHPC_PAT_CLOS0_WT_CACHE       2
>>>>> +#define XEHPC_PAT_CLOS0_WB_CACHE       3
>>>>> +#define XEHPC_PAT_CLOS1_WT_CACHE       4
>>>>> +#define XEHPC_PAT_CLOS1_WB_CACHE       5
>>>>> +#define XEHPC_PAT_CLOS2_WT_CACHE       6
>>>>> +#define XEHPC_PAT_CLOS2_WB_CACHE       7
>>>>> +
>>>>> +#define XELPG_PAT_WB_CACHE             0
>>>>> +#define XELPG_PAT_WT_CACHE             1
>>>>> +#define XELPG_PAT_UNCACHE              2
>>>>> +#define XELPG_PAT_1_WAY_WB_CACHE       3
>>>>> +#define XELPG_PAT_2_WAY_WB_CACHE       4
>>>>> +
>>>>> +#endif
>>>>> +
>>>>> diff --git a/drivers/gpu/drm/xe/xe_vm_types.h 
>>>>> b/drivers/gpu/drm/xe/xe_vm_types.h
>>>>> index edb3c99a9c81..9e1b33c17d05 100644
>>>>> --- a/drivers/gpu/drm/xe/xe_vm_types.h
>>>>> +++ b/drivers/gpu/drm/xe/xe_vm_types.h
>>>>> @@ -312,6 +312,7 @@ struct xe_vm {
>>>>>
>>>>>      /** @batch_invalidate_tlb: Always invalidate TLB before batch 
>>>>> start */
>>>>>      bool batch_invalidate_tlb;
>>>>> +    u64 (*pte_encode)(u64 pte, enum xe_cache_level cache);
>>>>
>>>> Add kernel doc.
>>>>
>>>> s/pte_encode/pte_pat_encode
>>>>
>>>> Again just my opinion. Let see if anyone else has an opinion on the 
>>>> naming.
>>>>
>>>> Other than these NITs it LGTM. Didn't read to the bspec to check the
>>>> actually bit setting, but can do that ahead of the next rev. 
>>>> Speaking of
>>>> that, do you have a bspec link so I don't have to search through it?
>>>>
>>>> Matt
>>>>
>>>>>  };
>>>>>
>>>>>  /** struct xe_vma_op_map - VMA map operation */
>>>>> -- 
>>>>> 2.25.1
>>>>>
>>