[Intel-xe] [PATCH v3 03/11] drm/xe: Use vfunc for pte/pde ppgtt encoding
Welty, Brian
brian.welty at intel.com
Thu Sep 28 22:39:55 UTC 2023
On 9/27/2023 12:38 PM, Lucas De Marchi wrote:
> Move the function to encode pte/pde to be vfuncs inside struct xe_vm.
> This will allow to easily extend to platforms that don't have a
> compatible encoding.
>
> v2: Fix kunit build
>
> Reviewed-by: Matt Roper <matthew.d.roper at intel.com>
> Signed-off-by: Lucas De Marchi <lucas.demarchi at intel.com>
> ---
> drivers/gpu/drm/xe/tests/xe_migrate.c | 2 +-
> drivers/gpu/drm/xe/xe_migrate.c | 18 ++--
> drivers/gpu/drm/xe/xe_pt.c | 125 +++-----------------------
> drivers/gpu/drm/xe/xe_pt.h | 6 --
> drivers/gpu/drm/xe/xe_pt_types.h | 14 +++
> drivers/gpu/drm/xe/xe_vm.c | 93 ++++++++++++++++++-
> drivers/gpu/drm/xe/xe_vm_types.h | 2 +
> 7 files changed, 128 insertions(+), 132 deletions(-)
>
> diff --git a/drivers/gpu/drm/xe/tests/xe_migrate.c b/drivers/gpu/drm/xe/tests/xe_migrate.c
> index f58cd1da1a34..6906ff9d9c31 100644
> --- a/drivers/gpu/drm/xe/tests/xe_migrate.c
> +++ b/drivers/gpu/drm/xe/tests/xe_migrate.c
> @@ -301,7 +301,7 @@ static void xe_migrate_sanity_test(struct xe_migrate *m, struct kunit *test)
> /* First part of the test, are we updating our pagetable bo with a new entry? */
> xe_map_wr(xe, &bo->vmap, XE_PAGE_SIZE * (NUM_KERNEL_PDE - 1), u64,
> 0xdeaddeadbeefbeef);
> - expected = xe_pte_encode(pt, 0, XE_CACHE_WB, 0);
> + expected = m->q->vm->pt_ops->pte_encode_bo(pt, 0, XE_CACHE_WB, 0);
> if (m->q->vm->flags & XE_VM_FLAG_64K)
> expected |= XE_PTE_PS64;
> if (xe_bo_is_vram(pt))
> diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c
> index 9438f609d18b..aa0396330903 100644
> --- a/drivers/gpu/drm/xe/xe_migrate.c
> +++ b/drivers/gpu/drm/xe/xe_migrate.c
> @@ -189,14 +189,15 @@ static int xe_migrate_prepare_vm(struct xe_tile *tile, struct xe_migrate *m,
> return ret;
> }
>
> - entry = xe_pde_encode(bo, bo->size - XE_PAGE_SIZE, XE_CACHE_WB);
> + entry = vm->pt_ops->pde_encode_bo(bo, bo->size - XE_PAGE_SIZE, XE_CACHE_WB);
> xe_pt_write(xe, &vm->pt_root[id]->bo->vmap, 0, entry);
>
> map_ofs = (num_entries - num_level) * XE_PAGE_SIZE;
>
> /* Map the entire BO in our level 0 pt */
> for (i = 0, level = 0; i < num_entries; level++) {
> - entry = xe_pte_encode(bo, i * XE_PAGE_SIZE, XE_CACHE_WB, 0);
> + entry = vm->pt_ops->pte_encode_bo(bo, i * XE_PAGE_SIZE,
> + XE_CACHE_WB, 0);
>
> xe_map_wr(xe, &bo->vmap, map_ofs + level * 8, u64, entry);
>
> @@ -214,7 +215,8 @@ static int xe_migrate_prepare_vm(struct xe_tile *tile, struct xe_migrate *m,
> for (i = 0; i < batch->size;
> i += vm->flags & XE_VM_FLAG_64K ? XE_64K_PAGE_SIZE :
> XE_PAGE_SIZE) {
> - entry = xe_pte_encode(batch, i, XE_CACHE_WB, 0);
> + entry = vm->pt_ops->pte_encode_bo(batch, i,
> + XE_CACHE_WB, 0);
>
> xe_map_wr(xe, &bo->vmap, map_ofs + level * 8, u64,
> entry);
> @@ -238,16 +240,16 @@ static int xe_migrate_prepare_vm(struct xe_tile *tile, struct xe_migrate *m,
> if (vm->flags & XE_VM_FLAG_64K && level == 1)
> flags = XE_PDE_64K;
>
> - entry = xe_pde_encode(bo, map_ofs + (level - 1) *
> - XE_PAGE_SIZE, XE_CACHE_WB);
> + entry = vm->pt_ops->pde_encode_bo(bo, map_ofs + (level - 1) *
> + XE_PAGE_SIZE, XE_CACHE_WB);
> xe_map_wr(xe, &bo->vmap, map_ofs + XE_PAGE_SIZE * level, u64,
> entry | flags);
> }
>
> /* Write PDE's that point to our BO. */
> for (i = 0; i < num_entries - num_level; i++) {
> - entry = xe_pde_encode(bo, i * XE_PAGE_SIZE,
> - XE_CACHE_WB);
> + entry = vm->pt_ops->pde_encode_bo(bo, i * XE_PAGE_SIZE,
> + XE_CACHE_WB);
>
> xe_map_wr(xe, &bo->vmap, map_ofs + XE_PAGE_SIZE +
> (i + 1) * 8, u64, entry);
> @@ -1255,7 +1257,7 @@ xe_migrate_update_pgtables(struct xe_migrate *m,
>
> xe_tile_assert(tile, pt_bo->size == SZ_4K);
>
> - addr = xe_pte_encode(pt_bo, 0, XE_CACHE_WB, 0);
> + addr = vm->pt_ops->pte_encode_bo(pt_bo, 0, XE_CACHE_WB, 0);
> bb->cs[bb->len++] = lower_32_bits(addr);
> bb->cs[bb->len++] = upper_32_bits(addr);
> }
> diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c
> index 0b8a45609e83..4d4c6a4c305e 100644
> --- a/drivers/gpu/drm/xe/xe_pt.c
> +++ b/drivers/gpu/drm/xe/xe_pt.c
> @@ -47,109 +47,6 @@ static struct xe_pt *xe_pt_entry(struct xe_pt_dir *pt_dir, unsigned int index)
> return container_of(pt_dir->dir.entries[index], struct xe_pt, base);
> }
>
> -static u64 pde_encode_cache(enum xe_cache_level cache)
> -{
> - /* FIXME: I don't think the PPAT handling is correct for MTL */
> -
> - if (cache != XE_CACHE_NONE)
> - return PPAT_CACHED_PDE;
> -
> - return PPAT_UNCACHED;
> -}
> -
> -static u64 pte_encode_cache(enum xe_cache_level cache)
> -{
> - /* FIXME: I don't think the PPAT handling is correct for MTL */
> - switch (cache) {
> - case XE_CACHE_NONE:
> - return PPAT_UNCACHED;
> - case XE_CACHE_WT:
> - return PPAT_DISPLAY_ELLC;
> - default:
> - return PPAT_CACHED;
> - }
> -}
> -
> -static u64 pte_encode_ps(u32 pt_level)
> -{
> - /* XXX: Does hw support 1 GiB pages? */
> - XE_WARN_ON(pt_level > 2);
> -
> - if (pt_level == 1)
> - return XE_PDE_PS_2M;
> - else if (pt_level == 2)
> - return XE_PDPE_PS_1G;
> -
> - return 0;
> -}
> -
> -/**
> - * xe_pde_encode() - Encode a page-table directory entry pointing to
> - * another page-table.
> - * @bo: The page-table bo of the page-table to point to.
> - * @bo_offset: Offset in the page-table bo to point to.
> - * @cache: The cache level indicating the caching of @bo.
> - *
> - * TODO: Rename.
> - *
> - * Return: An encoded page directory entry. No errors.
> - */
> -u64 xe_pde_encode(struct xe_bo *bo, u64 bo_offset,
> - const enum xe_cache_level cache)
> -{
> - u64 pde;
> -
> - pde = xe_bo_addr(bo, bo_offset, XE_PAGE_SIZE);
> - pde |= XE_PAGE_PRESENT | XE_PAGE_RW;
> - pde |= pde_encode_cache(cache);
> -
> - return pde;
> -}
> -
> -/**
> - * xe_pte_encode() - Encode a page-table entry pointing to memory.
> - * @bo: The BO representing the memory to point to.
> - * @bo_offset: The offset into @bo.
> - * @cache: The cache level indicating
> - * @pt_level: The page-table level of the page-table into which the entry
> - * is to be inserted.
> - *
> - * Return: An encoded page-table entry. No errors.
> - */
> -u64 xe_pte_encode(struct xe_bo *bo, u64 bo_offset, enum xe_cache_level cache,
> - u32 pt_level)
> -{
> - u64 pte;
> -
> - pte = xe_bo_addr(bo, bo_offset, XE_PAGE_SIZE);
> - pte |= XE_PAGE_PRESENT | XE_PAGE_RW;
> - pte |= pte_encode_cache(cache);
> - pte |= pte_encode_ps(pt_level);
> -
> - if (xe_bo_is_vram(bo) || xe_bo_is_stolen_devmem(bo))
> - pte |= XE_PPGTT_PTE_DM;
> -
> - return pte;
> -}
> -
> -/* Like xe_pte_encode(), but with a vma and a partially-encoded pte */
> -static u64 __vma_pte_encode(u64 pte, struct xe_vma *vma,
> - enum xe_cache_level cache, u32 pt_level)
> -{
> - pte |= XE_PAGE_PRESENT;
> -
> - if (likely(!xe_vma_read_only(vma)))
> - pte |= XE_PAGE_RW;
> -
> - pte |= pte_encode_cache(cache);
> - pte |= pte_encode_ps(pt_level);
> -
> - if (unlikely(xe_vma_is_null(vma)))
> - pte |= XE_PTE_NULL;
> -
> - return pte;
> -}
> -
> static u64 __xe_pt_empty_pte(struct xe_tile *tile, struct xe_vm *vm,
> unsigned int level)
> {
> @@ -158,15 +55,11 @@ static u64 __xe_pt_empty_pte(struct xe_tile *tile, struct xe_vm *vm,
> if (!vm->scratch_bo[id])
> return 0;
>
> - if (level == 0) {
> - u64 empty = xe_pte_encode(vm->scratch_bo[id], 0,
> - XE_CACHE_WB, 0);
> + if (level > 0)
> + return vm->pt_ops->pde_encode_bo(vm->scratch_pt[id][level - 1]->bo,
> + 0, XE_CACHE_WB);
>
> - return empty;
> - } else {
> - return xe_pde_encode(vm->scratch_pt[id][level - 1]->bo, 0,
> - XE_CACHE_WB);
> - }
> + return vm->pt_ops->pte_encode_bo(vm->scratch_bo[id], 0, XE_CACHE_WB, 0);
> }
>
> /**
> @@ -618,6 +511,7 @@ xe_pt_stage_bind_entry(struct xe_ptw *parent, pgoff_t offset,
> struct xe_pt_stage_bind_walk *xe_walk =
> container_of(walk, typeof(*xe_walk), base);
> struct xe_pt *xe_parent = container_of(parent, typeof(*xe_parent), base);
> + struct xe_vm *vm = xe_walk->vm;
> struct xe_pt *xe_child;
> bool covers;
> int ret = 0;
> @@ -630,9 +524,9 @@ xe_pt_stage_bind_entry(struct xe_ptw *parent, pgoff_t offset,
>
> XE_WARN_ON(xe_walk->va_curs_start != addr);
>
> - pte = __vma_pte_encode(is_null ? 0 :
> - xe_res_dma(curs) + xe_walk->dma_offset,
> - xe_walk->vma, xe_walk->cache, level);
> + pte = vm->pt_ops->pte_encode_vma(is_null ? 0 :
> + xe_res_dma(curs) + xe_walk->dma_offset,
> + xe_walk->vma, xe_walk->cache, level);
> pte |= xe_walk->default_pte;
>
> /*
> @@ -697,7 +591,8 @@ xe_pt_stage_bind_entry(struct xe_ptw *parent, pgoff_t offset,
> xe_child->is_compact = true;
> }
>
> - pte = xe_pde_encode(xe_child->bo, 0, xe_walk->cache) | flags;
> + pte = vm->pt_ops->pde_encode_bo(xe_child->bo, 0,
> + xe_walk->cache) | flags;
> ret = xe_pt_insert_entry(xe_walk, xe_parent, offset, xe_child,
> pte);
> }
> diff --git a/drivers/gpu/drm/xe/xe_pt.h b/drivers/gpu/drm/xe/xe_pt.h
> index 01be7ab08f87..d5460e58dbbf 100644
> --- a/drivers/gpu/drm/xe/xe_pt.h
> +++ b/drivers/gpu/drm/xe/xe_pt.h
> @@ -45,10 +45,4 @@ __xe_pt_unbind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_exec_queu
>
> bool xe_pt_zap_ptes(struct xe_tile *tile, struct xe_vma *vma);
>
> -u64 xe_pde_encode(struct xe_bo *bo, u64 bo_offset,
> - const enum xe_cache_level level);
> -
> -u64 xe_pte_encode(struct xe_bo *bo, u64 offset, enum xe_cache_level cache,
> - u32 pt_level);
> -
> #endif
> diff --git a/drivers/gpu/drm/xe/xe_pt_types.h b/drivers/gpu/drm/xe/xe_pt_types.h
> index 2ed64c0a4485..c58f6926fabf 100644
> --- a/drivers/gpu/drm/xe/xe_pt_types.h
> +++ b/drivers/gpu/drm/xe/xe_pt_types.h
> @@ -6,8 +6,13 @@
> #ifndef _XE_PT_TYPES_H_
> #define _XE_PT_TYPES_H_
>
> +#include <linux/types.h>
> +
> #include "xe_pt_walk.h"
>
> +struct xe_bo;
> +struct xe_vma;
> +
> enum xe_cache_level {
> XE_CACHE_NONE,
> XE_CACHE_WT,
> @@ -29,6 +34,15 @@ struct xe_pt {
> #endif
> };
>
> +struct xe_pt_ops {
> + u64 (*pte_encode_bo)(struct xe_bo *bo, u64 bo_offset,
> + enum xe_cache_level cache, u32 pt_level);
> + u64 (*pte_encode_vma)(u64 pte, struct xe_vma *vma,
> + enum xe_cache_level cache, u32 pt_level);
> + u64 (*pde_encode_bo)(struct xe_bo *bo, u64 bo_offset,
> + const enum xe_cache_level cache);
> +};
> +
> struct xe_pt_entry {
> struct xe_pt *pt;
> u64 pte;
> diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
> index 861d050871bb..2e1b4d46d9ea 100644
> --- a/drivers/gpu/drm/xe/xe_vm.c
> +++ b/drivers/gpu/drm/xe/xe_vm.c
> @@ -1191,6 +1191,93 @@ static struct drm_gpuva_fn_ops gpuva_ops = {
> .op_alloc = xe_vm_op_alloc,
> };
>
> +static u64 pde_encode_cache(enum xe_cache_level cache)
> +{
> + /* FIXME: I don't think the PPAT handling is correct for MTL */
> +
> + if (cache != XE_CACHE_NONE)
> + return PPAT_CACHED_PDE;
> +
> + return PPAT_UNCACHED;
> +}
> +
> +static u64 pte_encode_cache(enum xe_cache_level cache)
> +{
> + /* FIXME: I don't think the PPAT handling is correct for MTL */
> + switch (cache) {
> + case XE_CACHE_NONE:
> + return PPAT_UNCACHED;
> + case XE_CACHE_WT:
> + return PPAT_DISPLAY_ELLC;
> + default:
> + return PPAT_CACHED;
> + }
> +}
> +
> +static u64 pte_encode_ps(u32 pt_level)
> +{
> + /* XXX: Does hw support 1 GiB pages? */
> + XE_WARN_ON(pt_level > 2);
> +
> + if (pt_level == 1)
> + return XE_PDE_PS_2M;
> + else if (pt_level == 2)
> + return XE_PDPE_PS_1G;
> +
> + return 0;
> +}
> +
> +static u64 xelp_pde_encode_bo(struct xe_bo *bo, u64 bo_offset,
> + const enum xe_cache_level cache)
> +{
> + u64 pde;
> +
> + pde = xe_bo_addr(bo, bo_offset, XE_PAGE_SIZE);
> + pde |= XE_PAGE_PRESENT | XE_PAGE_RW;
> + pde |= pde_encode_cache(cache);
> +
> + return pde;
> +}
> +
> +static u64 xelp_pte_encode_bo(struct xe_bo *bo, u64 bo_offset,
> + enum xe_cache_level cache, u32 pt_level)
> +{
> + u64 pte;
> +
> + pte = xe_bo_addr(bo, bo_offset, XE_PAGE_SIZE);
> + pte |= XE_PAGE_PRESENT | XE_PAGE_RW;
> + pte |= pte_encode_cache(cache);
> + pte |= pte_encode_ps(pt_level);
> +
> + if (xe_bo_is_vram(bo) || xe_bo_is_stolen_devmem(bo))
> + pte |= XE_PPGTT_PTE_DM;
> +
> + return pte;
> +}
> +
> +static u64 xelp_pte_encode_vma(u64 pte, struct xe_vma *vma,
> + enum xe_cache_level cache, u32 pt_level)
> +{
> + pte |= XE_PAGE_PRESENT;
> +
> + if (likely(!xe_vma_read_only(vma)))
> + pte |= XE_PAGE_RW;
> +
> + pte |= pte_encode_cache(cache);
> + pte |= pte_encode_ps(pt_level);
> +
> + if (unlikely(xe_vma_is_null(vma)))
> + pte |= XE_PTE_NULL;
> +
> + return pte;
> +}
> +
> +static const struct xe_pt_ops xelp_pt_ops = {
> + .pte_encode_bo = xelp_pte_encode_bo,
do we need a .pte_encode_bo?
In the next patch you add a .pte_encode_addr.
It looks like our pte_encode_bo op callers could just use a wrapper
around calling the pte_encode_addr operation instead?
such as:
ops.pte_encode_addr(xe_bo_addr(bo, bo_offset, XE_PAGE_SIZE),
cache,
xe_bo_is_vram(bo) || xe_bo_is_stolen_devmem(bo),
pt_level, flags);
Just an idea if we want to consoldiate the number of ops here.
-Brian
> + .pte_encode_vma = xelp_pte_encode_vma,
> + .pde_encode_bo = xelp_pde_encode_bo,
> +};
> +
> static void xe_vma_op_work_func(struct work_struct *w);
> static void vm_destroy_work_func(struct work_struct *w);
>
> @@ -1239,6 +1326,8 @@ struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags)
>
> INIT_LIST_HEAD(&vm->extobj.list);
>
> + vm->pt_ops = &xelp_pt_ops;
> +
> if (!(flags & XE_VM_FLAG_MIGRATION))
> xe_device_mem_access_get(xe);
>
> @@ -1574,8 +1663,8 @@ struct xe_vm *xe_vm_lookup(struct xe_file *xef, u32 id)
>
> u64 xe_vm_pdp4_descriptor(struct xe_vm *vm, struct xe_tile *tile)
> {
> - return xe_pde_encode(vm->pt_root[tile->id]->bo, 0,
> - XE_CACHE_WB);
> + return vm->pt_ops->pde_encode_bo(vm->pt_root[tile->id]->bo, 0,
> + XE_CACHE_WB);
> }
>
> static struct dma_fence *
> diff --git a/drivers/gpu/drm/xe/xe_vm_types.h b/drivers/gpu/drm/xe/xe_vm_types.h
> index af2ba4acf1f9..1c5553b842d7 100644
> --- a/drivers/gpu/drm/xe/xe_vm_types.h
> +++ b/drivers/gpu/drm/xe/xe_vm_types.h
> @@ -249,6 +249,8 @@ struct xe_vm {
> bool munmap_rebind_inflight;
> } async_ops;
>
> + const struct xe_pt_ops *pt_ops;
> +
> /** @userptr: user pointer state */
> struct {
> /**
More information about the Intel-xe
mailing list