[Intel-xe] [PATCH] drm/xe/ppgtt: Add support for correct PAT encoding in PTE and PDE
Rodrigo Vivi
rodrigo.vivi at intel.com
Fri Jul 28 18:29:44 UTC 2023
On Wed, Jul 26, 2023 at 07:27:01PM +0000, Matthew Brost wrote:
> On Wed, Jul 26, 2023 at 06:19:16PM +0530, Ravi Kumar Vodapalli wrote:
> > Different platforms has different PAT encoding in PTE and PDE format, add
> > correct PAT encoding for pre-Xe2 platforms (XELP, XEHPC, XELPG).
> >
> > Bspec: 45101, 71582
> > Signed-off-by: Ravi Kumar Vodapalli <ravi.kumar.vodapalli at intel.com>
> > ---
> > drivers/gpu/drm/xe/xe_pt.c | 119 ++++++++++++++++++++++++++-----
> > drivers/gpu/drm/xe/xe_pt.h | 2 +
> > drivers/gpu/drm/xe/xe_vm_types.h | 2 +
> > 3 files changed, 106 insertions(+), 17 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c
> > index d4660520ac2c..09dafb0e47ea 100644
> > --- a/drivers/gpu/drm/xe/xe_pt.c
> > +++ b/drivers/gpu/drm/xe/xe_pt.c
> > @@ -16,6 +16,7 @@
> > #include "xe_trace.h"
> > #include "xe_ttm_stolen_mgr.h"
> > #include "xe_vm.h"
> > +#include "xe_pt.h"
> >
> > struct xe_pt_dir {
> > struct xe_pt pt;
> > @@ -62,6 +63,8 @@ u64 xe_pde_encode(struct xe_bo *bo, u64 bo_offset,
> > {
> > u64 pde;
> > bool is_vram;
> > + u8 pat_index = 0;
> > + struct xe_vm *vm = bo->vm;
> >
> > pde = xe_bo_addr(bo, bo_offset, XE_PAGE_SIZE, &is_vram);
> > pde |= XE_PAGE_PRESENT | XE_PAGE_RW;
> > @@ -70,10 +73,9 @@ u64 xe_pde_encode(struct xe_bo *bo, u64 bo_offset,
> >
> > /* FIXME: I don't think the PPAT handling is correct for MTL */
>
> Stale comment, this should be correct after this series.
>
> >
> > - if (level != XE_CACHE_NONE)
> > - pde |= PPAT_CACHED_PDE;
> > - else
> > - pde |= PPAT_UNCACHED;
> > +
> > + pat_index = xe_pat_get_index(xe_bo_device(bo), level);
> > + pde = vm->ppgtt_pde_encode(pde, pat_index);
> >
> > return pde;
> > }
> > @@ -103,6 +105,9 @@ static dma_addr_t vma_addr(struct xe_vma *vma, u64 offset,
> > static u64 __pte_encode(u64 pte, enum xe_cache_level cache,
> > struct xe_vma *vma, u32 pt_level)
> > {
> > + struct xe_vm *vm = xe_vma_vm(vma);
> > + u8 pat_index = 0;
> > +
> > pte |= XE_PAGE_PRESENT | XE_PAGE_RW;
> >
> > if (unlikely(vma && xe_vma_read_only(vma)))
> > @@ -111,19 +116,8 @@ static u64 __pte_encode(u64 pte, enum xe_cache_level cache,
> > if (unlikely(vma && xe_vma_is_null(vma)))
> > pte |= XE_PTE_NULL;
> >
> > - /* FIXME: I don't think the PPAT handling is correct for MTL */
> > -
> > - switch (cache) {
> > - case XE_CACHE_NONE:
> > - pte |= PPAT_UNCACHED;
> > - break;
> > - case XE_CACHE_WT:
> > - pte |= PPAT_DISPLAY_ELLC;
> > - break;
> > - default:
> > - pte |= PPAT_CACHED;
> > - break;
> > - }
> > + pat_index = xe_pat_get_index(vm->xe, cache);
> > + pte = vm->ppgtt_pte_encode(pte, pat_index);
> >
> > if (pt_level == 1)
> > pte |= XE_PDE_PS_2M;
> > @@ -187,6 +181,91 @@ static u64 __xe_pt_empty_pte(struct xe_tile *tile, struct xe_vm *vm,
> > }
> > }
> >
> > +static u64 __xelp_ppgtt_pde_encode_pat(u64 pte_pat, u8 pat_index)
> > +{
> > +
> > + pte_pat &= ~(BIT(12) | BIT(4) | BIT(3));
>
> Defines rather than just doing BITs.
besides that, this combination is the pde_pat, not the pte.
which btw, I was trying to clarify some of the old confusions with
this patch: https://patchwork.freedesktop.org/series/121478/
whatever we do there we need to clarify the pat_index construction
and get rid of the encode |= 0 (PPAT_CACHED_PDE)
Feel free to get parts of my patch and use on this patch of yours here.
Thanks,
Rodrigo.
>
> > +
> > + if (pat_index & BIT(0))
> > + pte_pat |= BIT(3);
> > +
> > + if (pat_index & BIT(1))
> > + pte_pat |= BIT(4);
> > +
> > + if (pat_index & BIT(2))
> > + pte_pat |= BIT(12);
> > +
> > + return pte_pat;
> > +}
> > +
> > +static u64 __xelp_ppgtt_pte_encode_pat(u64 pte_pat, u8 pat_index)
> > +{
> > +
> > + pte_pat &= ~(BIT(7) | BIT(4) | BIT(3));
> > +
> > + if (pat_index & BIT(0))
> > + pte_pat |= BIT(3);
> > +
> > + if (pat_index & BIT(1))
> > + pte_pat |= BIT(4);
> > +
> > + if (pat_index & BIT(2))
> > + pte_pat |= BIT(7);
> > +
> > + return pte_pat;
> > +}
> > +
> > +u8 xe_pat_get_index(struct xe_device *xe, enum xe_cache_level cache)
>
> This doesn't need to be export, i.e. make this static.
>
> Also in general this is a really goofy use of vfuncs vs. if else. The
> vfuncs are only set to 1 function and this function has 3 branches. Very
> confusing.
>
> It would make more sense to have a vfunc for getting the pat index with
> different implementations based on graphics version. Then non-vfuncs to
> the pat in the PTE and PDE.
>
> > +{
> > + u8 pat_index;
> > +
> > + if (GRAPHICS_VERx100(xe) >= 1270) {
> > + switch (cache) {
> > + case XE_CACHE_NONE:
> > + pat_index = 2;
> > + break;
> > + case XE_CACHE_WT:
> > + pat_index = 1;
> > + break;
> > + case XE_CACHE_WB:
> > + default:
> > + pat_index = 0;
> > + break;
> > + }
> > + }
> > + else if (GRAPHICS_VERx100(xe) >= 1260) {
> > + switch (cache) {
> > + case XE_CACHE_NONE:
> > + pat_index = 0;
> > + break;
> > + case XE_CACHE_WT:
> > + pat_index = 2;
> > + break;
> > + case XE_CACHE_WB:
> > + default:
> > + pat_index = 3;
> > + break;
> > + }
> > + }
> > + else
> > + {
> > + switch (cache) {
> > + case XE_CACHE_NONE:
> > + pat_index = 3;
> > + break;
> > + case XE_CACHE_WT:
> > + pat_index = 2;
> > + break;
> > + case XE_CACHE_WB:
> > + default:
> > + pat_index = 0;
> > + break;
> > + }
> > + }
> > +
> > + return pat_index;
> > +}
> > +
> > /**
> > * xe_pt_create() - Create a page-table.
> > * @vm: The vm to create for.
> > @@ -216,6 +295,12 @@ struct xe_pt *xe_pt_create(struct xe_vm *vm, struct xe_tile *tile,
> > if (!pt)
> > return ERR_PTR(-ENOMEM);
> >
> > + /* __xelp_pte_encode applies to xelp, xehpc and xelpg platform*/
> > + vm->ppgtt_pte_encode = __xelp_ppgtt_pte_encode_pat;
> > +
> > + /* __xelp_pde_encode applies to xelp, xehpc and xelpg platform*/
> > + vm->ppgtt_pde_encode = __xelp_ppgtt_pde_encode_pat;
> > +
> > bo = xe_bo_create_pin_map(vm->xe, tile, vm, SZ_4K,
> > ttm_bo_type_kernel,
> > XE_BO_CREATE_VRAM_IF_DGFX(tile) |
> > diff --git a/drivers/gpu/drm/xe/xe_pt.h b/drivers/gpu/drm/xe/xe_pt.h
> > index aaf4b7b851e2..0ed69abf8202 100644
> > --- a/drivers/gpu/drm/xe/xe_pt.h
> > +++ b/drivers/gpu/drm/xe/xe_pt.h
> > @@ -51,4 +51,6 @@ u64 xe_pde_encode(struct xe_bo *bo, u64 bo_offset,
> > u64 xe_pte_encode(struct xe_vma *vma, struct xe_bo *bo,
> > u64 offset, enum xe_cache_level cache,
> > u32 pt_level);
> > +
> > +u8 xe_pat_get_index(struct xe_device *xe, enum xe_cache_level cache);
> > #endif
> > diff --git a/drivers/gpu/drm/xe/xe_vm_types.h b/drivers/gpu/drm/xe/xe_vm_types.h
> > index 261a4b0e8570..fc976e207462 100644
> > --- a/drivers/gpu/drm/xe/xe_vm_types.h
> > +++ b/drivers/gpu/drm/xe/xe_vm_types.h
> > @@ -336,6 +336,8 @@ struct xe_vm {
> >
> > /** @batch_invalidate_tlb: Always invalidate TLB before batch start */
> > bool batch_invalidate_tlb;
> > + u64 (*ppgtt_pte_encode)(u64 pte_pat, u8 pat_index);
> > + u64 (*ppgtt_pde_encode)(u64 pte_pat, u8 pat_index);
>
> Kernel doc and for vfuncs make this a const struct of ops.
>
> An example of what I'm refering to is 'struct xe_engine_ops'.
>
> Matt
>
> > };
> >
> > /** struct xe_vma_op_map - VMA map operation */
> > --
> > 2.25.1
> >
More information about the Intel-xe
mailing list