[PATCH] drm/xe: rework PDE PAT index selection

Summers, Stuart stuart.summers at intel.com
Fri Aug 1 19:31:23 UTC 2025


On Fri, 2025-08-01 at 16:21 +0100, Matthew Auld wrote:
> For non-leaf paging structures we end up selecting a random index
> between [0, 3], depending on the first user if the page-table is
> shared,
> since non-leaf structures only have two bits in the HW for encoding
> the
> PAT index, and here we are just passing along the full user provided
> index, which can be an index as large as ~31 on xe2+. The user
> provided
> index is meant for the leaf node, which maps the actual BO pages
> where
> we have more PAT bits, and not the non-leaf nodes which are only
> mapping
> other paging structures, and so only needs a minimal PAT index range.
> Also the chosen index might need to consider how the driver mapped
> the
> paging structures on the host side, like wc vs wb, which is separate
> from the user provided index.
> 
> With that move the PDE PAT index selection under driver control. For
> now
> just use a coherent index on platforms with page-tables that are
> cached
> on host side, and incoherent otherwise. Using a coherent index could
> potentially be expensive, and would be overkill if we know the page-
> table
> is always uncached on host side.
> 
> BSpec: 59510
> Signed-off-by: Matthew Auld <matthew.auld at intel.com>
> Cc: Stuart Summers <stuart.summers at intel.com>
> Cc: Matthew Brost <matthew.brost at intel.com>

Hey Matt,

Do we have any known failures related here? Or what is the motivation
for this change generally?

I see on the LRC side we always set to WB, even on discrete. Why not
use that here too rather than force UC?

The patch is doing what you're saying, I'm just not sure why we need
this change now when to my knowledge things have been working for the
user-defined PAT cases.

Thanks,
Stuart

> ---
>  drivers/gpu/drm/xe/xe_migrate.c  | 10 ++++------
>  drivers/gpu/drm/xe/xe_pt.c       |  4 ++--
>  drivers/gpu/drm/xe/xe_pt_types.h |  3 +--
>  drivers/gpu/drm/xe/xe_vm.c       | 15 +++++++++++----
>  4 files changed, 18 insertions(+), 14 deletions(-)
> 
> diff --git a/drivers/gpu/drm/xe/xe_migrate.c
> b/drivers/gpu/drm/xe/xe_migrate.c
> index 3a276e2348a2..43599d4419a7 100644
> --- a/drivers/gpu/drm/xe/xe_migrate.c
> +++ b/drivers/gpu/drm/xe/xe_migrate.c
> @@ -151,8 +151,7 @@ static void xe_migrate_program_identity(struct
> xe_device *xe, struct xe_vm *vm,
>         for (pos = dpa_base; pos < vram_limit;
>              pos += SZ_1G, ofs += 8) {
>                 if (pos + SZ_1G >= vram_limit) {
> -                       entry = vm->pt_ops->pde_encode_bo(bo,
> pt_2m_ofs,
> -                                                         pat_index);
> +                       entry = vm->pt_ops->pde_encode_bo(bo,
> pt_2m_ofs);
>                         xe_map_wr(xe, &bo->vmap, ofs, u64, entry);
>  
>                         flags = vm->pt_ops->pte_encode_addr(xe, 0,
> @@ -206,7 +205,7 @@ static int xe_migrate_prepare_vm(struct xe_tile
> *tile, struct xe_migrate *m,
>  
>         /* PT30 & PT31 reserved for 2M identity map */
>         pt29_ofs = xe_bo_size(bo) - 3 * XE_PAGE_SIZE;
> -       entry = vm->pt_ops->pde_encode_bo(bo, pt29_ofs, pat_index);
> +       entry = vm->pt_ops->pde_encode_bo(bo, pt29_ofs);
>         xe_pt_write(xe, &vm->pt_root[id]->bo->vmap, 0, entry);
>  
>         map_ofs = (num_entries - num_setup) * XE_PAGE_SIZE;
> @@ -274,15 +273,14 @@ static int xe_migrate_prepare_vm(struct xe_tile
> *tile, struct xe_migrate *m,
>                         flags = XE_PDE_64K;
>  
>                 entry = vm->pt_ops->pde_encode_bo(bo, map_ofs +
> (u64)(level - 1) *
> -                                                 XE_PAGE_SIZE,
> pat_index);
> +                                                 XE_PAGE_SIZE);
>                 xe_map_wr(xe, &bo->vmap, map_ofs + XE_PAGE_SIZE *
> level, u64,
>                           entry | flags);
>         }
>  
>         /* Write PDE's that point to our BO. */
>         for (i = 0; i < map_ofs / PAGE_SIZE; i++) {
> -               entry = vm->pt_ops->pde_encode_bo(bo, (u64)i *
> XE_PAGE_SIZE,
> -                                                 pat_index);
> +               entry = vm->pt_ops->pde_encode_bo(bo, (u64)i *
> XE_PAGE_SIZE);
>  
>                 xe_map_wr(xe, &bo->vmap, map_ofs + XE_PAGE_SIZE +
>                           (i + 1) * 8, u64, entry);
> diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c
> index 330cc0f54a3f..f3a39e734a90 100644
> --- a/drivers/gpu/drm/xe/xe_pt.c
> +++ b/drivers/gpu/drm/xe/xe_pt.c
> @@ -69,7 +69,7 @@ static u64 __xe_pt_empty_pte(struct xe_tile *tile,
> struct xe_vm *vm,
>  
>         if (level > MAX_HUGEPTE_LEVEL)
>                 return vm->pt_ops->pde_encode_bo(vm-
> >scratch_pt[id][level - 1]->bo,
> -                                                0, pat_index);
> +                                                0);
>  
>         return vm->pt_ops->pte_encode_addr(xe, 0, pat_index, level,
> IS_DGFX(xe), 0) |
>                 XE_PTE_NULL;
> @@ -616,7 +616,7 @@ xe_pt_stage_bind_entry(struct xe_ptw *parent,
> pgoff_t offset,
>                         xe_child->is_compact = true;
>                 }
>  
> -               pte = vm->pt_ops->pde_encode_bo(xe_child->bo, 0,
> pat_index) | flags;
> +               pte = vm->pt_ops->pde_encode_bo(xe_child->bo, 0) |
> flags;
>                 ret = xe_pt_insert_entry(xe_walk, xe_parent, offset,
> xe_child,
>                                          pte);
>         }
> diff --git a/drivers/gpu/drm/xe/xe_pt_types.h
> b/drivers/gpu/drm/xe/xe_pt_types.h
> index 69eab6f37cfe..17cdd7c7e9f5 100644
> --- a/drivers/gpu/drm/xe/xe_pt_types.h
> +++ b/drivers/gpu/drm/xe/xe_pt_types.h
> @@ -45,8 +45,7 @@ struct xe_pt_ops {
>         u64 (*pte_encode_addr)(struct xe_device *xe, u64 addr,
>                                u16 pat_index,
>                                u32 pt_level, bool devmem, u64 flags);
> -       u64 (*pde_encode_bo)(struct xe_bo *bo, u64 bo_offset,
> -                            u16 pat_index);
> +       u64 (*pde_encode_bo)(struct xe_bo *bo, u64 bo_offset);
>  };
>  
>  struct xe_pt_entry {
> diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
> index 432ea325677d..42e3b1b8be5b 100644
> --- a/drivers/gpu/drm/xe/xe_vm.c
> +++ b/drivers/gpu/drm/xe/xe_vm.c
> @@ -1512,13 +1512,21 @@ static u64 pte_encode_ps(u32 pt_level)
>         return 0;
>  }
>  
> -static u64 xelp_pde_encode_bo(struct xe_bo *bo, u64 bo_offset,
> -                             const u16 pat_index)
> +static u64 xelp_pde_encode_bo(struct xe_bo *bo, u64 bo_offset)
>  {
> +       struct xe_device *xe = xe_bo_device(bo);
> +       u16 pat_index;
>         u64 pde;
>  
>         pde = xe_bo_addr(bo, bo_offset, XE_PAGE_SIZE);
>         pde |= XE_PAGE_PRESENT | XE_PAGE_RW;
> +
> +       if (!xe_bo_is_vram(bo) && bo->ttm.ttm->caching == ttm_cached)
> +               pat_index = xe->pat.idx[XE_CACHE_WB];
> +       else
> +               pat_index = xe->pat.idx[XE_CACHE_NONE];
> +
> +       xe_assert(xe, pat_index <= 3);
>         pde |= pde_encode_pat_index(pat_index);
>  
>         return pde;
> @@ -2024,8 +2032,7 @@ struct xe_vm *xe_vm_lookup(struct xe_file *xef,
> u32 id)
>  
>  u64 xe_vm_pdp4_descriptor(struct xe_vm *vm, struct xe_tile *tile)
>  {
> -       return vm->pt_ops->pde_encode_bo(vm->pt_root[tile->id]->bo,
> 0,
> -                                        tile_to_xe(tile)-
> >pat.idx[XE_CACHE_WB]);
> +       return vm->pt_ops->pde_encode_bo(vm->pt_root[tile->id]->bo,
> 0);
>  }
>  
>  static struct xe_exec_queue *



More information about the Intel-xe mailing list