[Intel-xe] [PATCH] drm/xe/ppgtt: Add support for correct PAT encoding in Page Table Entry (PTE) format
Vodapalli, Ravi Kumar
ravi.kumar.vodapalli at intel.com
Thu Jul 20 14:18:33 UTC 2023
On 7/17/2023 6:53 PM, Lucas De Marchi wrote:
> On Wed, Jul 12, 2023 at 02:52:37PM +0000, Matthew Brost wrote:
>> On Wed, Jul 12, 2023 at 06:26:40PM +0530, Ravi Kumar Vodapalli wrote:
>>> Different platforms has different PAT encoding in PTE format, add
>>> correct PAT encoding for pre-Xe2 platforms (XELP, XEHPC, XELPG).
>>>
>>> Bspec: 45101, 71582
>>> Signed-off-by: Ravi Kumar Vodapalli <ravi.kumar.vodapalli at intel.com>
>>> ---
>>> drivers/gpu/drm/xe/xe_pt.c | 110 +++++++++++++++++++++++++++----
>>> drivers/gpu/drm/xe/xe_pte.h | 31 +++++++++
>>> drivers/gpu/drm/xe/xe_vm_types.h | 1 +
>>> 3 files changed, 129 insertions(+), 13 deletions(-)
>>> create mode 100644 drivers/gpu/drm/xe/xe_pte.h
>>>
>>> diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c
>>> index 00855681c0d5..e7f8bd4dd518 100644
>>> --- a/drivers/gpu/drm/xe/xe_pt.c
>>> +++ b/drivers/gpu/drm/xe/xe_pt.c
>>> @@ -16,6 +16,7 @@
>>> #include "xe_trace.h"
>>> #include "xe_ttm_stolen_mgr.h"
>>> #include "xe_vm.h"
>>> +#include "xe_pte.h"
>>>
>>> struct xe_pt_dir {
>>> struct xe_pt pt;
>>> @@ -111,19 +112,7 @@ static u64 __pte_encode(u64 pte, enum
>>> xe_cache_level cache,
>>> if (unlikely(vma && xe_vma_is_null(vma)))
>>> pte |= XE_PTE_NULL;
>>>
>>> - /* FIXME: I don't think the PPAT handling is correct for MTL */
>>> -
>>> - switch (cache) {
>>> - case XE_CACHE_NONE:
>>> - pte |= PPAT_UNCACHED;
>>> - break;
>>> - case XE_CACHE_WT:
>>> - pte |= PPAT_DISPLAY_ELLC;
>>> - break;
>>> - default:
>>> - pte |= PPAT_CACHED;
>>> - break;
>>> - }
>>> + pte = xe_vma_vm(vma)->pte_encode(pte, cache);
>>>
>>> if (pt_level == 1)
>>> pte |= XE_PDE_PS_2M;
>>> @@ -187,6 +176,94 @@ static u64 __xe_pt_empty_pte(struct xe_tile
>>> *tile, struct xe_vm *vm,
>>> }
>>> }
>>>
>>> +static u64 xelp_pte_encode_pat(u8 pat_index)
>>> +{
>>> + u64 pte_pat = 0;
>>> +
>>> + if (pat_index & BIT(0))
>>> + pte_pat |= BIT(3);
>>> +
>>> + if (pat_index & BIT(1))
>>> + pte_pat |= BIT(4);
>>> +
>>> + if (pat_index & BIT(2))
>>> + pte_pat |= BIT(7);
>>> +
>>> + return pte_pat;
>>> +}
>>> +
>>> +static u64 __xelp_pte_encode(u64 pte, enum xe_cache_level cache)
>>> +{
>>> + pte &= ~XELP_PAT_MASK;
>>> +
>>> + switch (cache) {
>>> + case XE_CACHE_NONE:
>>> + pte |= xelp_pte_encode_pat(XELP_PAT_UNCACHE);
>>> + break;
>>> + case XE_CACHE_WT:
>>> + pte |= xelp_pte_encode_pat(XELP_PAT_WT_CACHE);
>>> + break;
>>> + case XE_CACHE_WB:
>>> + pte |= xelp_pte_encode_pat(XELP_PAT_WB_CACHE);
>>> + break;
>>> + default:
>>> + /* pte |= PPAT_CACHED; */
>>> + break;
>>> + }
>>> +
>>> + return pte;
>>> +}
>>> +
>>> +static u64 __xehpc_pte_encode(u64 pte, enum xe_cache_level cache)
>>> +{
>>> + pte &= ~XELP_PAT_MASK;
>>> +
>>> + /* Initially assumed as CLOS0.
>>> + * To Do: CLOS1/CLOS2 has to be set depending on requirement
>>> + */
>>> + switch (cache) {
>>> + case XE_CACHE_NONE:
>>> + pte |= xelp_pte_encode_pat(XEHPC_PAT_CLOS0_UNCACHE);
>>> + break;
>>> + case XE_CACHE_WT:
>>> + pte |= xelp_pte_encode_pat(XEHPC_PAT_CLOS0_WT_CACHE);
>>> + break;
>>> + case XE_CACHE_WB:
>>> + pte |= xelp_pte_encode_pat(XEHPC_PAT_CLOS0_WB_CACHE);
>>> + break;
>>> + default:
>>> + /* pte |= PPAT_CACHED; */
>>> + break;
>>> + }
>>> +
>>> + return pte;
>>> +}
>>> +
>>> +static u64 __xelpg_pte_encode(u64 pte, enum xe_cache_level cache)
>>> +{
>>> + pte &= ~XELP_PAT_MASK;
>>> +
>>> + /* Initially assumed as Non cohorent mode, correct values
>>> + * has to be set depending on requirement.
>>> + */
>>> + switch (cache) {
>>> + case XE_CACHE_NONE:
>>> + pte |= xelp_pte_encode_pat(XELPG_PAT_UNCACHE);
>>> + break;
>>> + case XE_CACHE_WT:
>>> + pte |= xelp_pte_encode_pat(XELPG_PAT_WT_CACHE);
>>> + break;
>>> + case XE_CACHE_WB:
>>> + pte |= xelp_pte_encode_pat(XELPG_PAT_WB_CACHE);
>>> + break;
>>> + default:
>>> + /* pte |= PPAT_CACHED; */
>>> + break;
>>> + }
>>> +
>>> + return pte;
>>> +}
>>> +
>>> /**
>>> * xe_pt_create() - Create a page-table.
>>> * @vm: The vm to create for.
>>> @@ -216,6 +293,13 @@ struct xe_pt *xe_pt_create(struct xe_vm *vm,
>>> struct xe_tile *tile,
>>> if (!pt)
>>> return ERR_PTR(-ENOMEM);
>>>
>>> + if (GRAPHICS_VERx100(vm->xe) >= 1270)
>>> + vm->pte_encode = __xelpg_pte_encode;
>>> + else if (GRAPHICS_VERx100(vm->xe) >= 1260)
>>> + vm->pte_encode = __xehpc_pte_encode;
>>> + else
>>> + vm->pte_encode = __xelp_pte_encode;
>>> +
>>> bo = xe_bo_create_pin_map(vm->xe, tile, vm, SZ_4K,
>>> ttm_bo_type_kernel,
>>> XE_BO_CREATE_VRAM_IF_DGFX(tile) |
>>> diff --git a/drivers/gpu/drm/xe/xe_pte.h b/drivers/gpu/drm/xe/xe_pte.h
>>> new file mode 100644
>>> index 000000000000..8b3b5b471a99
>>> --- /dev/null
>>> +++ b/drivers/gpu/drm/xe/xe_pte.h
>>
>> s/xe_pte.h/xe_pte_pat.h
>>
>> Just my option prefer this.
>
> dunno, but since we have different pte formats for ggtt and ppgtt, IMO
> it would add to the confusion.
>
> Maybe this belongs in xe_pat.h since it's xe_pat.c that programs the
> tables giving the meaning spelled out in the macros here for each index.
> The rest of the code then need to set the bits in the PTE to reference
> those indexes, according to the PTE format.
>
>>
>>> @@ -0,0 +1,31 @@
>>> +/* SPDX-License-Identifier: MIT */
>>> +/*
>>> + * Copyright © 2021 Intel Corporation
>>> + */
>>> +
>>> +#ifndef _XE_PTE_H_
>>> +#define _XE_PTE_H_
>>> +
>>> +#define XELP_PAT_MASK BIT_ULL(7) | BIT_ULL(4) |
>>> BIT_ULL(3)
>>> +#define XELP_PAT_WB_CACHE 0
>>> +#define XELP_PAT_WC_CACHE 1
>>> +#define XELP_PAT_WT_CACHE 2
>>> +#define XELP_PAT_UNCACHE 3
>
> example... this duplicates what we have in xe_pat.c (comment added here
> for clarity)
>
> static const u32 tgl_pat_table[] = {
> [0] = TGL_PAT_WB,
> [1] = TGL_PAT_WC,
> [2] = TGL_PAT_WT,
> [3] = TGL_PAT_UC,
> // .... those are programmed, but not expected to be used
> [4] = TGL_PAT_WB,
> [5] = TGL_PAT_WB,
> [6] = TGL_PAT_WB,
> [7] = TGL_PAT_WB,
> };
>
TGL_PAT_WB maps to PAT table register bit mapping, we cannot directly
use tgl_pat_table to get PAT index rather we
need to implement code to retrieve the index like below instead we can
use macro directly.
xelp_pte_encode_pat(u8 cache_type) //example pass TGL_PAT_WB
{
u8 index;
for (i=0, i < 32, i++)
if (tgl_pat_table[i] == cache_type)
index = i;
}
>
> IMO the abstration could be something like
>
> // this is valid for both ppgtt and ggtt
> pat = xe_pat_get_index(xe, cache);
>
> // this changes between ggtt / ppgtt
> pte |= vm->pte_encode_pat(xe, pat);
>
I have one approach like below example where we can use same function
for both ggtt and ppgtt
static u64 pte_encode_pat(enum gtt_type, u8 pat_index)
{
u64 pte_pat = 0;
if (gtt_type == PPGTT)
{
if (pat_index & BIT(0))
pte_pat |= BIT(3);
if (pat_index & BIT(1))
pte_pat |= BIT(4);
if (pat_index & BIT(2))
pte_pat |= BIT(7);
}
else if(gtt_type = GGTT)
{
GGTT type PAT PTE encoding code here.
}
return pte_pat;
}
vm->pte_encode_pat(u64 pte, enum gtt_type, xe_cache_level cache)
{
switch (cache) {
case XE_CACHE_NONE:
pte |= pte_encode_pat(gtt_type, XELP_PAT_UNCACHE);
break;
}
what's your opinion.
Thanks,
Ravi Kumar V
>
> so the PAT indexes are handled by xe_pat.[ch]. The pte *encoding*
> is then handled differently by ggtt and ppgtt. Thoughts?
>
> Lucas De Marchi
>
>
>>> +
>>> +#define XEHPC_PAT_CLOS0_UNCACHE 0
>>> +#define XEHPC_PAT_CLOS0_WC_CACHE 1
>>> +#define XEHPC_PAT_CLOS0_WT_CACHE 2
>>> +#define XEHPC_PAT_CLOS0_WB_CACHE 3
>>> +#define XEHPC_PAT_CLOS1_WT_CACHE 4
>>> +#define XEHPC_PAT_CLOS1_WB_CACHE 5
>>> +#define XEHPC_PAT_CLOS2_WT_CACHE 6
>>> +#define XEHPC_PAT_CLOS2_WB_CACHE 7
>>> +
>>> +#define XELPG_PAT_WB_CACHE 0
>>> +#define XELPG_PAT_WT_CACHE 1
>>> +#define XELPG_PAT_UNCACHE 2
>>> +#define XELPG_PAT_1_WAY_WB_CACHE 3
>>> +#define XELPG_PAT_2_WAY_WB_CACHE 4
>>> +
>>> +#endif
>>> +
>>> diff --git a/drivers/gpu/drm/xe/xe_vm_types.h
>>> b/drivers/gpu/drm/xe/xe_vm_types.h
>>> index edb3c99a9c81..9e1b33c17d05 100644
>>> --- a/drivers/gpu/drm/xe/xe_vm_types.h
>>> +++ b/drivers/gpu/drm/xe/xe_vm_types.h
>>> @@ -312,6 +312,7 @@ struct xe_vm {
>>>
>>> /** @batch_invalidate_tlb: Always invalidate TLB before batch
>>> start */
>>> bool batch_invalidate_tlb;
>>> + u64 (*pte_encode)(u64 pte, enum xe_cache_level cache);
>>
>> Add kernel doc.
>>
>> s/pte_encode/pte_pat_encode
>>
>> Again just my opinion. Let see if anyone else has an opinion on the
>> naming.
>>
>> Other than these NITs it LGTM. Didn't read to the bspec to check the
>> actually bit setting, but can do that ahead of the next rev. Speaking of
>> that, do you have a bspec link so I don't have to search through it?
>>
>> Matt
>>
>>> };
>>>
>>> /** struct xe_vma_op_map - VMA map operation */
>>> --
>>> 2.25.1
>>>
More information about the Intel-xe
mailing list