[Intel-xe] [PATCH] drm/xe/ppgtt: Add support for correct PAT encoding in Page Table Entry (PTE) format

Lucas De Marchi lucas.demarchi at intel.com
Fri Jul 21 18:12:14 UTC 2023


On Fri, Jul 21, 2023 at 10:21:31PM +0530, Vodapalli, Ravi Kumar wrote:
>
>
>On 7/21/2023 9:43 PM, Lucas De Marchi wrote:
>>On Thu, Jul 20, 2023 at 07:48:33PM +0530, Vodapalli, Ravi Kumar wrote:
>>>
>>>
>>>On 7/17/2023 6:53 PM, Lucas De Marchi wrote:
>>>>On Wed, Jul 12, 2023 at 02:52:37PM +0000, Matthew Brost wrote:
>>>>>On Wed, Jul 12, 2023 at 06:26:40PM +0530, Ravi Kumar Vodapalli wrote:
>>>>>>Different platforms has different PAT encoding in PTE format, add
>>>>>>correct PAT encoding for pre-Xe2 platforms (XELP, XEHPC, XELPG).
>>>>>>
>>>>>>Bspec: 45101, 71582
>>>>>>Signed-off-by: Ravi Kumar Vodapalli <ravi.kumar.vodapalli at intel.com>
>>>>>>---
>>>>>> drivers/gpu/drm/xe/xe_pt.c       | 110 
>>>>>>+++++++++++++++++++++++++++----
>>>>>> drivers/gpu/drm/xe/xe_pte.h      |  31 +++++++++
>>>>>> drivers/gpu/drm/xe/xe_vm_types.h |   1 +
>>>>>> 3 files changed, 129 insertions(+), 13 deletions(-)
>>>>>> create mode 100644 drivers/gpu/drm/xe/xe_pte.h
>>>>>>
>>>>>>diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c
>>>>>>index 00855681c0d5..e7f8bd4dd518 100644
>>>>>>--- a/drivers/gpu/drm/xe/xe_pt.c
>>>>>>+++ b/drivers/gpu/drm/xe/xe_pt.c
>>>>>>@@ -16,6 +16,7 @@
>>>>>> #include "xe_trace.h"
>>>>>> #include "xe_ttm_stolen_mgr.h"
>>>>>> #include "xe_vm.h"
>>>>>>+#include "xe_pte.h"
>>>>>>
>>>>>> struct xe_pt_dir {
>>>>>>     struct xe_pt pt;
>>>>>>@@ -111,19 +112,7 @@ static u64 __pte_encode(u64 pte, enum 
>>>>>>xe_cache_level cache,
>>>>>>     if (unlikely(vma && xe_vma_is_null(vma)))
>>>>>>         pte |= XE_PTE_NULL;
>>>>>>
>>>>>>-    /* FIXME: I don't think the PPAT handling is correct for MTL */
>>>>>>-
>>>>>>-    switch (cache) {
>>>>>>-    case XE_CACHE_NONE:
>>>>>>-        pte |= PPAT_UNCACHED;
>>>>>>-        break;
>>>>>>-    case XE_CACHE_WT:
>>>>>>-        pte |= PPAT_DISPLAY_ELLC;
>>>>>>-        break;
>>>>>>-    default:
>>>>>>-        pte |= PPAT_CACHED;
>>>>>>-        break;
>>>>>>-    }
>>>>>>+    pte = xe_vma_vm(vma)->pte_encode(pte, cache);
>>>>>>
>>>>>>     if (pt_level == 1)
>>>>>>         pte |= XE_PDE_PS_2M;
>>>>>>@@ -187,6 +176,94 @@ static u64 __xe_pt_empty_pte(struct 
>>>>>>xe_tile *tile, struct xe_vm *vm,
>>>>>>     }
>>>>>> }
>>>>>>
>>>>>>+static u64 xelp_pte_encode_pat(u8 pat_index)
>>>>>>+{
>>>>>>+    u64 pte_pat = 0;
>>>>>>+
>>>>>>+    if (pat_index & BIT(0))
>>>>>>+        pte_pat |= BIT(3);
>>>>>>+
>>>>>>+    if (pat_index & BIT(1))
>>>>>>+        pte_pat |= BIT(4);
>>>>>>+
>>>>>>+    if (pat_index & BIT(2))
>>>>>>+        pte_pat |= BIT(7);
>>>>>>+
>>>>>>+    return pte_pat;
>>>>>>+}
>>>>>>+
>>>>>>+static u64 __xelp_pte_encode(u64 pte, enum xe_cache_level cache)
>>>>>>+{
>>>>>>+    pte &= ~XELP_PAT_MASK;
>>>>>>+
>>>>>>+    switch (cache) {
>>>>>>+    case XE_CACHE_NONE:
>>>>>>+        pte |= xelp_pte_encode_pat(XELP_PAT_UNCACHE);
>>>>>>+        break;
>>>>>>+    case XE_CACHE_WT:
>>>>>>+        pte |= xelp_pte_encode_pat(XELP_PAT_WT_CACHE);
>>>>>>+        break;
>>>>>>+    case XE_CACHE_WB:
>>>>>>+        pte |= xelp_pte_encode_pat(XELP_PAT_WB_CACHE);
>>>>>>+        break;
>>>>>>+    default:
>>>>>>+        /* pte |= PPAT_CACHED; */
>>>>>>+        break;
>>>>>>+    }
>>>>>>+
>>>>>>+    return pte;
>>>>>>+}
>>>>>>+
>>>>>>+static u64 __xehpc_pte_encode(u64 pte, enum xe_cache_level cache)
>>>>>>+{
>>>>>>+    pte &= ~XELP_PAT_MASK;
>>>>>>+
>>>>>>+    /* Initially assumed as CLOS0.
>>>>>>+     * To Do: CLOS1/CLOS2 has to be set depending on requirement
>>>>>>+     */
>>>>>>+    switch (cache) {
>>>>>>+    case XE_CACHE_NONE:
>>>>>>+        pte |= xelp_pte_encode_pat(XEHPC_PAT_CLOS0_UNCACHE);
>>>>>>+        break;
>>>>>>+    case XE_CACHE_WT:
>>>>>>+        pte |= xelp_pte_encode_pat(XEHPC_PAT_CLOS0_WT_CACHE);
>>>>>>+        break;
>>>>>>+    case XE_CACHE_WB:
>>>>>>+        pte |= xelp_pte_encode_pat(XEHPC_PAT_CLOS0_WB_CACHE);
>>>>>>+        break;
>>>>>>+    default:
>>>>>>+        /* pte |= PPAT_CACHED; */
>>>>>>+        break;
>>>>>>+    }
>>>>>>+
>>>>>>+    return pte;
>>>>>>+}
>>>>>>+
>>>>>>+static u64 __xelpg_pte_encode(u64 pte, enum xe_cache_level cache)
>>>>>>+{
>>>>>>+    pte &= ~XELP_PAT_MASK;
>>>>>>+
>>>>>>+    /* Initially assumed as Non cohorent mode, correct values
>>>>>>+     * has to be set depending on requirement.
>>>>>>+     */
>>>>>>+    switch (cache) {
>>>>>>+    case XE_CACHE_NONE:
>>>>>>+        pte |= xelp_pte_encode_pat(XELPG_PAT_UNCACHE);
>>>>>>+        break;
>>>>>>+    case XE_CACHE_WT:
>>>>>>+        pte |= xelp_pte_encode_pat(XELPG_PAT_WT_CACHE);
>>>>>>+        break;
>>>>>>+    case XE_CACHE_WB:
>>>>>>+        pte |= xelp_pte_encode_pat(XELPG_PAT_WB_CACHE);
>>>>>>+        break;
>>>>>>+    default:
>>>>>>+        /* pte |= PPAT_CACHED; */
>>>>>>+        break;
>>>>>>+    }
>>>>>>+
>>>>>>+    return pte;
>>>>>>+}
>>>>>>+
>>>>>> /**
>>>>>>  * xe_pt_create() - Create a page-table.
>>>>>>  * @vm: The vm to create for.
>>>>>>@@ -216,6 +293,13 @@ struct xe_pt *xe_pt_create(struct xe_vm 
>>>>>>*vm, struct xe_tile *tile,
>>>>>>     if (!pt)
>>>>>>         return ERR_PTR(-ENOMEM);
>>>>>>
>>>>>>+    if (GRAPHICS_VERx100(vm->xe) >= 1270)
>>>>>>+        vm->pte_encode = __xelpg_pte_encode;
>>>>>>+    else if (GRAPHICS_VERx100(vm->xe) >= 1260)
>>>>>>+        vm->pte_encode = __xehpc_pte_encode;
>>>>>>+    else
>>>>>>+        vm->pte_encode = __xelp_pte_encode;
>>>>>>+
>>>>>>     bo = xe_bo_create_pin_map(vm->xe, tile, vm, SZ_4K,
>>>>>>                   ttm_bo_type_kernel,
>>>>>>                   XE_BO_CREATE_VRAM_IF_DGFX(tile) |
>>>>>>diff --git a/drivers/gpu/drm/xe/xe_pte.h 
>>>>>>b/drivers/gpu/drm/xe/xe_pte.h
>>>>>>new file mode 100644
>>>>>>index 000000000000..8b3b5b471a99
>>>>>>--- /dev/null
>>>>>>+++ b/drivers/gpu/drm/xe/xe_pte.h
>>>>>
>>>>>s/xe_pte.h/xe_pte_pat.h
>>>>>
>>>>>Just my option prefer this.
>>>>
>>>>dunno, but since we have different pte formats for ggtt and ppgtt, IMO
>>>>it would add to the confusion.
>>>>
>>>>Maybe this belongs in xe_pat.h since it's xe_pat.c that programs the
>>>>tables giving the meaning spelled out in the macros here for 
>>>>each index.
>>>>The rest of the code then need to set the bits in the PTE to reference
>>>>those indexes, according to the PTE format.
>>>>
>>>>>
>>>>>>@@ -0,0 +1,31 @@
>>>>>>+/* SPDX-License-Identifier: MIT */
>>>>>>+/*
>>>>>>+ * Copyright © 2021 Intel Corporation
>>>>>>+ */
>>>>>>+
>>>>>>+#ifndef _XE_PTE_H_
>>>>>>+#define _XE_PTE_H_
>>>>>>+
>>>>>>+#define XELP_PAT_MASK                  BIT_ULL(7) | 
>>>>>>BIT_ULL(4) | BIT_ULL(3)
>>>>>>+#define XELP_PAT_WB_CACHE              0
>>>>>>+#define XELP_PAT_WC_CACHE              1
>>>>>>+#define XELP_PAT_WT_CACHE              2
>>>>>>+#define XELP_PAT_UNCACHE               3
>>>>
>>>>example... this duplicates what we have in xe_pat.c (comment added here
>>>>for clarity)
>>>>
>>>>static const u32 tgl_pat_table[] = {
>>>>        [0] = TGL_PAT_WB,
>>>>        [1] = TGL_PAT_WC,
>>>>        [2] = TGL_PAT_WT,
>>>>        [3] = TGL_PAT_UC,
>>>>    // .... those are programmed, but not expected to be used
>>>>        [4] = TGL_PAT_WB,
>>>>        [5] = TGL_PAT_WB,
>>>>        [6] = TGL_PAT_WB,
>>>>        [7] = TGL_PAT_WB,
>>>>};
>>>>
>>>
>>>TGL_PAT_WB maps to PAT table register bit mapping, we cannot 
>>>directly use tgl_pat_table to get PAT index rather we
>>>need to implement code to retrieve the index like below instead we 
>>>can use macro directly.
>>>
>>>xelp_pte_encode_pat(u8 cache_type) //example pass TGL_PAT_WB
>>>{
>>>    u8 index;
>>>    for (i=0, i < 32, i++)
>>>        if (tgl_pat_table[i] == cache_type)
>>
>>no, we don't want the caller to pass the platform-specific value.
>>It should pass a platfrom-agnostic value and xe_pat_get_index()
>>translates that to the platform-specific index. There are just 2 entries
>>really used by the kernel: uc and wb. Then the xe_pat.c returns what is
>>the entry to be used in those cases.
>>
>>Lucas De Marchi
>>
>
>I have discussed Regarding encoding the index into pat pte with Matt,Roper
>He said instead of writing common function for both ppgtt and ggtt, 
>the best approach would be as below so that in future adding more 
>support will be easier
>
>static u64 xelp_pte_encode_pat(u8 pat_index)
>{
>    u64 pte_pat = 0;
>
>    if (pat_index & BIT(0))
>        pte_pat |= BIT(3);
>
>    if (pat_index & BIT(1))
>        pte_pat |= BIT(4);
>
>    if (pat_index & BIT(2))
>        pte_pat |= BIT(7);
>
>    return pte_pat;
>}

that is fine. My point is that selecting the *index* and the encoding
are 2 different things. Here it's only doing a index -> pte, which is
fine: there will be one for ppgtt and another for ggtt. However gettting
the platform-specific index is shared. Check again the snippet I shared
previously:

	// this is valid for both ppgtt and ggtt
	pat = xe_pat_get_index(xe, cache);

	// this changes between ggtt / ppgtt
	pte |= vm->pte_encode_pat(xe, pat);


pte_encode_pat() is the hook set during init to xelp_pte_encode_pat(),
for this platform.  The index  however is obtained in the call above.

Lucas De Marchi


More information about the Intel-xe mailing list