[Intel-xe] [PATCH] drm/xe/ppgtt: Add support for correct PAT encoding in Page Table Entry (PTE) format

Fri Jul 21 16:13:56 UTC 2023

On Thu, Jul 20, 2023 at 07:48:33PM +0530, Vodapalli, Ravi Kumar wrote:
>
>
>On 7/17/2023 6:53 PM, Lucas De Marchi wrote:
>>On Wed, Jul 12, 2023 at 02:52:37PM +0000, Matthew Brost wrote:
>>>On Wed, Jul 12, 2023 at 06:26:40PM +0530, Ravi Kumar Vodapalli wrote:
>>>>Different platforms has different PAT encoding in PTE format, add
>>>>correct PAT encoding for pre-Xe2 platforms (XELP, XEHPC, XELPG).
>>>>
>>>>Bspec: 45101, 71582
>>>>Signed-off-by: Ravi Kumar Vodapalli <ravi.kumar.vodapalli at intel.com>
>>>>---
>>>> drivers/gpu/drm/xe/xe_pt.c       | 110 +++++++++++++++++++++++++++----
>>>> drivers/gpu/drm/xe/xe_pte.h      |  31 +++++++++
>>>> drivers/gpu/drm/xe/xe_vm_types.h |   1 +
>>>> 3 files changed, 129 insertions(+), 13 deletions(-)
>>>> create mode 100644 drivers/gpu/drm/xe/xe_pte.h
>>>>
>>>>diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c
>>>>index 00855681c0d5..e7f8bd4dd518 100644
>>>>--- a/drivers/gpu/drm/xe/xe_pt.c
>>>>+++ b/drivers/gpu/drm/xe/xe_pt.c
>>>>@@ -16,6 +16,7 @@
>>>> #include "xe_trace.h"
>>>> #include "xe_ttm_stolen_mgr.h"
>>>> #include "xe_vm.h"
>>>>+#include "xe_pte.h"
>>>>
>>>> struct xe_pt_dir {
>>>>     struct xe_pt pt;
>>>>@@ -111,19 +112,7 @@ static u64 __pte_encode(u64 pte, enum 
>>>>xe_cache_level cache,
>>>>     if (unlikely(vma && xe_vma_is_null(vma)))
>>>>         pte |= XE_PTE_NULL;
>>>>
>>>>-    /* FIXME: I don't think the PPAT handling is correct for MTL */
>>>>-
>>>>-    switch (cache) {
>>>>-    case XE_CACHE_NONE:
>>>>-        pte |= PPAT_UNCACHED;
>>>>-        break;
>>>>-    case XE_CACHE_WT:
>>>>-        pte |= PPAT_DISPLAY_ELLC;
>>>>-        break;
>>>>-    default:
>>>>-        pte |= PPAT_CACHED;
>>>>-        break;
>>>>-    }
>>>>+    pte = xe_vma_vm(vma)->pte_encode(pte, cache);
>>>>
>>>>     if (pt_level == 1)
>>>>         pte |= XE_PDE_PS_2M;
>>>>@@ -187,6 +176,94 @@ static u64 __xe_pt_empty_pte(struct xe_tile 
>>>>*tile, struct xe_vm *vm,
>>>>     }
>>>> }
>>>>
>>>>+static u64 xelp_pte_encode_pat(u8 pat_index)
>>>>+{
>>>>+    u64 pte_pat = 0;
>>>>+
>>>>+    if (pat_index & BIT(0))
>>>>+        pte_pat |= BIT(3);
>>>>+
>>>>+    if (pat_index & BIT(1))
>>>>+        pte_pat |= BIT(4);
>>>>+
>>>>+    if (pat_index & BIT(2))
>>>>+        pte_pat |= BIT(7);
>>>>+
>>>>+    return pte_pat;
>>>>+}
>>>>+
>>>>+static u64 __xelp_pte_encode(u64 pte, enum xe_cache_level cache)
>>>>+{
>>>>+    pte &= ~XELP_PAT_MASK;
>>>>+
>>>>+    switch (cache) {
>>>>+    case XE_CACHE_NONE:
>>>>+        pte |= xelp_pte_encode_pat(XELP_PAT_UNCACHE);
>>>>+        break;
>>>>+    case XE_CACHE_WT:
>>>>+        pte |= xelp_pte_encode_pat(XELP_PAT_WT_CACHE);
>>>>+        break;
>>>>+    case XE_CACHE_WB:
>>>>+        pte |= xelp_pte_encode_pat(XELP_PAT_WB_CACHE);
>>>>+        break;
>>>>+    default:
>>>>+        /* pte |= PPAT_CACHED; */
>>>>+        break;
>>>>+    }
>>>>+
>>>>+    return pte;
>>>>+}
>>>>+
>>>>+static u64 __xehpc_pte_encode(u64 pte, enum xe_cache_level cache)
>>>>+{
>>>>+    pte &= ~XELP_PAT_MASK;
>>>>+
>>>>+    /* Initially assumed as CLOS0.
>>>>+     * To Do: CLOS1/CLOS2 has to be set depending on requirement
>>>>+     */
>>>>+    switch (cache) {
>>>>+    case XE_CACHE_NONE:
>>>>+        pte |= xelp_pte_encode_pat(XEHPC_PAT_CLOS0_UNCACHE);
>>>>+        break;
>>>>+    case XE_CACHE_WT:
>>>>+        pte |= xelp_pte_encode_pat(XEHPC_PAT_CLOS0_WT_CACHE);
>>>>+        break;
>>>>+    case XE_CACHE_WB:
>>>>+        pte |= xelp_pte_encode_pat(XEHPC_PAT_CLOS0_WB_CACHE);
>>>>+        break;
>>>>+    default:
>>>>+        /* pte |= PPAT_CACHED; */
>>>>+        break;
>>>>+    }
>>>>+
>>>>+    return pte;
>>>>+}
>>>>+
>>>>+static u64 __xelpg_pte_encode(u64 pte, enum xe_cache_level cache)
>>>>+{
>>>>+    pte &= ~XELP_PAT_MASK;
>>>>+
>>>>+    /* Initially assumed as Non cohorent mode, correct values
>>>>+     * has to be set depending on requirement.
>>>>+     */
>>>>+    switch (cache) {
>>>>+    case XE_CACHE_NONE:
>>>>+        pte |= xelp_pte_encode_pat(XELPG_PAT_UNCACHE);
>>>>+        break;
>>>>+    case XE_CACHE_WT:
>>>>+        pte |= xelp_pte_encode_pat(XELPG_PAT_WT_CACHE);
>>>>+        break;
>>>>+    case XE_CACHE_WB:
>>>>+        pte |= xelp_pte_encode_pat(XELPG_PAT_WB_CACHE);
>>>>+        break;
>>>>+    default:
>>>>+        /* pte |= PPAT_CACHED; */
>>>>+        break;
>>>>+    }
>>>>+
>>>>+    return pte;
>>>>+}
>>>>+
>>>> /**
>>>>  * xe_pt_create() - Create a page-table.
>>>>  * @vm: The vm to create for.
>>>>@@ -216,6 +293,13 @@ struct xe_pt *xe_pt_create(struct xe_vm 
>>>>*vm, struct xe_tile *tile,
>>>>     if (!pt)
>>>>         return ERR_PTR(-ENOMEM);
>>>>
>>>>+    if (GRAPHICS_VERx100(vm->xe) >= 1270)
>>>>+        vm->pte_encode = __xelpg_pte_encode;
>>>>+    else if (GRAPHICS_VERx100(vm->xe) >= 1260)
>>>>+        vm->pte_encode = __xehpc_pte_encode;
>>>>+    else
>>>>+        vm->pte_encode = __xelp_pte_encode;
>>>>+
>>>>     bo = xe_bo_create_pin_map(vm->xe, tile, vm, SZ_4K,
>>>>                   ttm_bo_type_kernel,
>>>>                   XE_BO_CREATE_VRAM_IF_DGFX(tile) |
>>>>diff --git a/drivers/gpu/drm/xe/xe_pte.h b/drivers/gpu/drm/xe/xe_pte.h
>>>>new file mode 100644
>>>>index 000000000000..8b3b5b471a99
>>>>--- /dev/null
>>>>+++ b/drivers/gpu/drm/xe/xe_pte.h
>>>
>>>s/xe_pte.h/xe_pte_pat.h
>>>
>>>Just my option prefer this.
>>
>>dunno, but since we have different pte formats for ggtt and ppgtt, IMO
>>it would add to the confusion.
>>
>>Maybe this belongs in xe_pat.h since it's xe_pat.c that programs the
>>tables giving the meaning spelled out in the macros here for each index.
>>The rest of the code then need to set the bits in the PTE to reference
>>those indexes, according to the PTE format.
>>
>>>
>>>>@@ -0,0 +1,31 @@
>>>>+/* SPDX-License-Identifier: MIT */
>>>>+/*
>>>>+ * Copyright © 2021 Intel Corporation
>>>>+ */
>>>>+
>>>>+#ifndef _XE_PTE_H_
>>>>+#define _XE_PTE_H_
>>>>+
>>>>+#define XELP_PAT_MASK                  BIT_ULL(7) | BIT_ULL(4) 
>>>>| BIT_ULL(3)
>>>>+#define XELP_PAT_WB_CACHE              0
>>>>+#define XELP_PAT_WC_CACHE              1
>>>>+#define XELP_PAT_WT_CACHE              2
>>>>+#define XELP_PAT_UNCACHE               3
>>
>>example... this duplicates what we have in xe_pat.c (comment added here
>>for clarity)
>>
>>static const u32 tgl_pat_table[] = {
>>        [0] = TGL_PAT_WB,
>>        [1] = TGL_PAT_WC,
>>        [2] = TGL_PAT_WT,
>>        [3] = TGL_PAT_UC,
>>    // .... those are programmed, but not expected to be used
>>        [4] = TGL_PAT_WB,
>>        [5] = TGL_PAT_WB,
>>        [6] = TGL_PAT_WB,
>>        [7] = TGL_PAT_WB,
>>};
>>
>
>TGL_PAT_WB maps to PAT table register bit mapping, we cannot directly 
>use tgl_pat_table to get PAT index rather we
>need to implement code to retrieve the index like below instead we can 
>use macro directly.
>
>xelp_pte_encode_pat(u8 cache_type) //example pass TGL_PAT_WB
>{
>    u8 index;
>    for (i=0, i < 32, i++)
>        if (tgl_pat_table[i] == cache_type)

no, we don't want the caller to pass the platform-specific value.
It should pass a platfrom-agnostic value and xe_pat_get_index()
translates that to the platform-specific index. There are just 2 entries
really used by the kernel: uc and wb. Then the xe_pat.c returns what is
the entry to be used in those cases.

Lucas De Marchi

>            index = i;
>}
>
>
>>
>>IMO the abstration could be something like
>>
>>    // this is valid for both ppgtt and ggtt
>>    pat = xe_pat_get_index(xe, cache);
>>
>>    // this changes between ggtt / ppgtt
>>    pte |= vm->pte_encode_pat(xe, pat);
>>
>
>I have one approach like below example where we can use same function 
>for both ggtt and ppgtt
>
>static u64 pte_encode_pat(enum gtt_type, u8 pat_index)
>{
>    u64 pte_pat = 0;
>
>    if (gtt_type == PPGTT)
>    {
>        if (pat_index & BIT(0))
>            pte_pat |= BIT(3);
>
>        if (pat_index & BIT(1))
>            pte_pat |= BIT(4);
>
>        if (pat_index & BIT(2))
>            pte_pat |= BIT(7);
>     }
>    else if(gtt_type = GGTT)
>    {
>        GGTT type PAT PTE encoding code here.
>    }
>        return pte_pat;
>}
>
>
>vm->pte_encode_pat(u64 pte, enum gtt_type, xe_cache_level cache)
>{
>    switch (cache) {
>           case XE_CACHE_NONE:
>            pte |= pte_encode_pat(gtt_type, XELP_PAT_UNCACHE);
>            break;
>}
>
>what's your opinion.
>
>Thanks,
>Ravi Kumar V
>>
>>so the PAT indexes are handled by xe_pat.[ch]. The pte *encoding*
>>is then handled differently by ggtt and ppgtt. Thoughts?
>>
>>Lucas De Marchi
>>
>>
>>>>+
>>>>+#define XEHPC_PAT_CLOS0_UNCACHE        0
>>>>+#define XEHPC_PAT_CLOS0_WC_CACHE       1
>>>>+#define XEHPC_PAT_CLOS0_WT_CACHE       2
>>>>+#define XEHPC_PAT_CLOS0_WB_CACHE       3
>>>>+#define XEHPC_PAT_CLOS1_WT_CACHE       4
>>>>+#define XEHPC_PAT_CLOS1_WB_CACHE       5
>>>>+#define XEHPC_PAT_CLOS2_WT_CACHE       6
>>>>+#define XEHPC_PAT_CLOS2_WB_CACHE       7
>>>>+
>>>>+#define XELPG_PAT_WB_CACHE             0
>>>>+#define XELPG_PAT_WT_CACHE             1
>>>>+#define XELPG_PAT_UNCACHE              2
>>>>+#define XELPG_PAT_1_WAY_WB_CACHE       3
>>>>+#define XELPG_PAT_2_WAY_WB_CACHE       4
>>>>+
>>>>+#endif
>>>>+
>>>>diff --git a/drivers/gpu/drm/xe/xe_vm_types.h 
>>>>b/drivers/gpu/drm/xe/xe_vm_types.h
>>>>index edb3c99a9c81..9e1b33c17d05 100644
>>>>--- a/drivers/gpu/drm/xe/xe_vm_types.h
>>>>+++ b/drivers/gpu/drm/xe/xe_vm_types.h
>>>>@@ -312,6 +312,7 @@ struct xe_vm {
>>>>
>>>>     /** @batch_invalidate_tlb: Always invalidate TLB before 
>>>>batch start */
>>>>     bool batch_invalidate_tlb;
>>>>+    u64 (*pte_encode)(u64 pte, enum xe_cache_level cache);
>>>
>>>Add kernel doc.
>>>
>>>s/pte_encode/pte_pat_encode
>>>
>>>Again just my opinion. Let see if anyone else has an opinion on 
>>>the naming.
>>>
>>>Other than these NITs it LGTM. Didn't read to the bspec to check the
>>>actually bit setting, but can do that ahead of the next rev. Speaking of
>>>that, do you have a bspec link so I don't have to search through it?
>>>
>>>Matt
>>>
>>>> };
>>>>
>>>> /** struct xe_vma_op_map - VMA map operation */
>>>>-- 
>>>>2.25.1
>>>>
>