[Intel-xe] [PATCH v3 03/11] drm/xe: Use vfunc for pte/pde ppgtt encoding

Thu Sep 28 22:39:55 UTC 2023

On 9/27/2023 12:38 PM, Lucas De Marchi wrote:
> Move the function to encode pte/pde to be vfuncs inside struct xe_vm.
> This will allow to easily extend to platforms that don't have a
> compatible encoding.
> 
> v2: Fix kunit build
> 
> Reviewed-by: Matt Roper <matthew.d.roper at intel.com>
> Signed-off-by: Lucas De Marchi <lucas.demarchi at intel.com>
> ---
>   drivers/gpu/drm/xe/tests/xe_migrate.c |   2 +-
>   drivers/gpu/drm/xe/xe_migrate.c       |  18 ++--
>   drivers/gpu/drm/xe/xe_pt.c            | 125 +++-----------------------
>   drivers/gpu/drm/xe/xe_pt.h            |   6 --
>   drivers/gpu/drm/xe/xe_pt_types.h      |  14 +++
>   drivers/gpu/drm/xe/xe_vm.c            |  93 ++++++++++++++++++-
>   drivers/gpu/drm/xe/xe_vm_types.h      |   2 +
>   7 files changed, 128 insertions(+), 132 deletions(-)
> 
> diff --git a/drivers/gpu/drm/xe/tests/xe_migrate.c b/drivers/gpu/drm/xe/tests/xe_migrate.c
> index f58cd1da1a34..6906ff9d9c31 100644
> --- a/drivers/gpu/drm/xe/tests/xe_migrate.c
> +++ b/drivers/gpu/drm/xe/tests/xe_migrate.c
> @@ -301,7 +301,7 @@ static void xe_migrate_sanity_test(struct xe_migrate *m, struct kunit *test)
>   	/* First part of the test, are we updating our pagetable bo with a new entry? */
>   	xe_map_wr(xe, &bo->vmap, XE_PAGE_SIZE * (NUM_KERNEL_PDE - 1), u64,
>   		  0xdeaddeadbeefbeef);
> -	expected = xe_pte_encode(pt, 0, XE_CACHE_WB, 0);
> +	expected = m->q->vm->pt_ops->pte_encode_bo(pt, 0, XE_CACHE_WB, 0);
>   	if (m->q->vm->flags & XE_VM_FLAG_64K)
>   		expected |= XE_PTE_PS64;
>   	if (xe_bo_is_vram(pt))
> diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c
> index 9438f609d18b..aa0396330903 100644
> --- a/drivers/gpu/drm/xe/xe_migrate.c
> +++ b/drivers/gpu/drm/xe/xe_migrate.c
> @@ -189,14 +189,15 @@ static int xe_migrate_prepare_vm(struct xe_tile *tile, struct xe_migrate *m,
>   		return ret;
>   	}
>   
> -	entry = xe_pde_encode(bo, bo->size - XE_PAGE_SIZE, XE_CACHE_WB);
> +	entry = vm->pt_ops->pde_encode_bo(bo, bo->size - XE_PAGE_SIZE, XE_CACHE_WB);
>   	xe_pt_write(xe, &vm->pt_root[id]->bo->vmap, 0, entry);
>   
>   	map_ofs = (num_entries - num_level) * XE_PAGE_SIZE;
>   
>   	/* Map the entire BO in our level 0 pt */
>   	for (i = 0, level = 0; i < num_entries; level++) {
> -		entry = xe_pte_encode(bo, i * XE_PAGE_SIZE, XE_CACHE_WB, 0);
> +		entry = vm->pt_ops->pte_encode_bo(bo, i * XE_PAGE_SIZE,
> +						  XE_CACHE_WB, 0);
>   
>   		xe_map_wr(xe, &bo->vmap, map_ofs + level * 8, u64, entry);
>   
> @@ -214,7 +215,8 @@ static int xe_migrate_prepare_vm(struct xe_tile *tile, struct xe_migrate *m,
>   		for (i = 0; i < batch->size;
>   		     i += vm->flags & XE_VM_FLAG_64K ? XE_64K_PAGE_SIZE :
>   		     XE_PAGE_SIZE) {
> -			entry = xe_pte_encode(batch, i, XE_CACHE_WB, 0);
> +			entry = vm->pt_ops->pte_encode_bo(batch, i,
> +							  XE_CACHE_WB, 0);
>   
>   			xe_map_wr(xe, &bo->vmap, map_ofs + level * 8, u64,
>   				  entry);
> @@ -238,16 +240,16 @@ static int xe_migrate_prepare_vm(struct xe_tile *tile, struct xe_migrate *m,
>   		if (vm->flags & XE_VM_FLAG_64K && level == 1)
>   			flags = XE_PDE_64K;
>   
> -		entry = xe_pde_encode(bo, map_ofs + (level - 1) *
> -					XE_PAGE_SIZE, XE_CACHE_WB);
> +		entry = vm->pt_ops->pde_encode_bo(bo, map_ofs + (level - 1) *
> +						  XE_PAGE_SIZE, XE_CACHE_WB);
>   		xe_map_wr(xe, &bo->vmap, map_ofs + XE_PAGE_SIZE * level, u64,
>   			  entry | flags);
>   	}
>   
>   	/* Write PDE's that point to our BO. */
>   	for (i = 0; i < num_entries - num_level; i++) {
> -		entry = xe_pde_encode(bo, i * XE_PAGE_SIZE,
> -				      XE_CACHE_WB);
> +		entry = vm->pt_ops->pde_encode_bo(bo, i * XE_PAGE_SIZE,
> +						  XE_CACHE_WB);
>   
>   		xe_map_wr(xe, &bo->vmap, map_ofs + XE_PAGE_SIZE +
>   			  (i + 1) * 8, u64, entry);
> @@ -1255,7 +1257,7 @@ xe_migrate_update_pgtables(struct xe_migrate *m,
>   
>   			xe_tile_assert(tile, pt_bo->size == SZ_4K);
>   
> -			addr = xe_pte_encode(pt_bo, 0, XE_CACHE_WB, 0);
> +			addr = vm->pt_ops->pte_encode_bo(pt_bo, 0, XE_CACHE_WB, 0);
>   			bb->cs[bb->len++] = lower_32_bits(addr);
>   			bb->cs[bb->len++] = upper_32_bits(addr);
>   		}
> diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c
> index 0b8a45609e83..4d4c6a4c305e 100644
> --- a/drivers/gpu/drm/xe/xe_pt.c
> +++ b/drivers/gpu/drm/xe/xe_pt.c
> @@ -47,109 +47,6 @@ static struct xe_pt *xe_pt_entry(struct xe_pt_dir *pt_dir, unsigned int index)
>   	return container_of(pt_dir->dir.entries[index], struct xe_pt, base);
>   }
>   
> -static u64 pde_encode_cache(enum xe_cache_level cache)
> -{
> -	/* FIXME: I don't think the PPAT handling is correct for MTL */
> -
> -	if (cache != XE_CACHE_NONE)
> -		return PPAT_CACHED_PDE;
> -
> -	return PPAT_UNCACHED;
> -}
> -
> -static u64 pte_encode_cache(enum xe_cache_level cache)
> -{
> -	/* FIXME: I don't think the PPAT handling is correct for MTL */
> -	switch (cache) {
> -	case XE_CACHE_NONE:
> -		return PPAT_UNCACHED;
> -	case XE_CACHE_WT:
> -		return PPAT_DISPLAY_ELLC;
> -	default:
> -		return PPAT_CACHED;
> -	}
> -}
> -
> -static u64 pte_encode_ps(u32 pt_level)
> -{
> -	/* XXX: Does hw support 1 GiB pages? */
> -	XE_WARN_ON(pt_level > 2);
> -
> -	if (pt_level == 1)
> -		return XE_PDE_PS_2M;
> -	else if (pt_level == 2)
> -		return XE_PDPE_PS_1G;
> -
> -	return 0;
> -}
> -
> -/**
> - * xe_pde_encode() - Encode a page-table directory entry pointing to
> - * another page-table.
> - * @bo: The page-table bo of the page-table to point to.
> - * @bo_offset: Offset in the page-table bo to point to.
> - * @cache: The cache level indicating the caching of @bo.
> - *
> - * TODO: Rename.
> - *
> - * Return: An encoded page directory entry. No errors.
> - */
> -u64 xe_pde_encode(struct xe_bo *bo, u64 bo_offset,
> -		  const enum xe_cache_level cache)
> -{
> -	u64 pde;
> -
> -	pde = xe_bo_addr(bo, bo_offset, XE_PAGE_SIZE);
> -	pde |= XE_PAGE_PRESENT | XE_PAGE_RW;
> -	pde |= pde_encode_cache(cache);
> -
> -	return pde;
> -}
> -
> -/**
> - * xe_pte_encode() - Encode a page-table entry pointing to memory.
> - * @bo: The BO representing the memory to point to.
> - * @bo_offset: The offset into @bo.
> - * @cache: The cache level indicating
> - * @pt_level: The page-table level of the page-table into which the entry
> - * is to be inserted.
> - *
> - * Return: An encoded page-table entry. No errors.
> - */
> -u64 xe_pte_encode(struct xe_bo *bo, u64 bo_offset, enum xe_cache_level cache,
> -		  u32 pt_level)
> -{
> -	u64 pte;
> -
> -	pte = xe_bo_addr(bo, bo_offset, XE_PAGE_SIZE);
> -	pte |= XE_PAGE_PRESENT | XE_PAGE_RW;
> -	pte |= pte_encode_cache(cache);
> -	pte |= pte_encode_ps(pt_level);
> -
> -	if (xe_bo_is_vram(bo) || xe_bo_is_stolen_devmem(bo))
> -		pte |= XE_PPGTT_PTE_DM;
> -
> -	return pte;
> -}
> -
> -/* Like xe_pte_encode(), but with a vma and a partially-encoded pte */
> -static u64 __vma_pte_encode(u64 pte, struct xe_vma *vma,
> -			    enum xe_cache_level cache, u32 pt_level)
> -{
> -	pte |= XE_PAGE_PRESENT;
> -
> -	if (likely(!xe_vma_read_only(vma)))
> -		pte |= XE_PAGE_RW;
> -
> -	pte |= pte_encode_cache(cache);
> -	pte |= pte_encode_ps(pt_level);
> -
> -	if (unlikely(xe_vma_is_null(vma)))
> -		pte |= XE_PTE_NULL;
> -
> -	return pte;
> -}
> -
>   static u64 __xe_pt_empty_pte(struct xe_tile *tile, struct xe_vm *vm,
>   			     unsigned int level)
>   {
> @@ -158,15 +55,11 @@ static u64 __xe_pt_empty_pte(struct xe_tile *tile, struct xe_vm *vm,
>   	if (!vm->scratch_bo[id])
>   		return 0;
>   
> -	if (level == 0) {
> -		u64 empty = xe_pte_encode(vm->scratch_bo[id], 0,
> -					  XE_CACHE_WB, 0);
> +	if (level > 0)
> +		return vm->pt_ops->pde_encode_bo(vm->scratch_pt[id][level - 1]->bo,
> +						 0, XE_CACHE_WB);
>   
> -		return empty;
> -	} else {
> -		return xe_pde_encode(vm->scratch_pt[id][level - 1]->bo, 0,
> -				     XE_CACHE_WB);
> -	}
> +	return vm->pt_ops->pte_encode_bo(vm->scratch_bo[id], 0, XE_CACHE_WB, 0);
>   }
>   
>   /**
> @@ -618,6 +511,7 @@ xe_pt_stage_bind_entry(struct xe_ptw *parent, pgoff_t offset,
>   	struct xe_pt_stage_bind_walk *xe_walk =
>   		container_of(walk, typeof(*xe_walk), base);
>   	struct xe_pt *xe_parent = container_of(parent, typeof(*xe_parent), base);
> +	struct xe_vm *vm = xe_walk->vm;
>   	struct xe_pt *xe_child;
>   	bool covers;
>   	int ret = 0;
> @@ -630,9 +524,9 @@ xe_pt_stage_bind_entry(struct xe_ptw *parent, pgoff_t offset,
>   
>   		XE_WARN_ON(xe_walk->va_curs_start != addr);
>   
> -		pte = __vma_pte_encode(is_null ? 0 :
> -				       xe_res_dma(curs) + xe_walk->dma_offset,
> -				       xe_walk->vma, xe_walk->cache, level);
> +		pte = vm->pt_ops->pte_encode_vma(is_null ? 0 :
> +						 xe_res_dma(curs) + xe_walk->dma_offset,
> +						 xe_walk->vma, xe_walk->cache, level);
>   		pte |= xe_walk->default_pte;
>   
>   		/*
> @@ -697,7 +591,8 @@ xe_pt_stage_bind_entry(struct xe_ptw *parent, pgoff_t offset,
>   			xe_child->is_compact = true;
>   		}
>   
> -		pte = xe_pde_encode(xe_child->bo, 0, xe_walk->cache) | flags;
> +		pte = vm->pt_ops->pde_encode_bo(xe_child->bo, 0,
> +						xe_walk->cache) | flags;
>   		ret = xe_pt_insert_entry(xe_walk, xe_parent, offset, xe_child,
>   					 pte);
>   	}
> diff --git a/drivers/gpu/drm/xe/xe_pt.h b/drivers/gpu/drm/xe/xe_pt.h
> index 01be7ab08f87..d5460e58dbbf 100644
> --- a/drivers/gpu/drm/xe/xe_pt.h
> +++ b/drivers/gpu/drm/xe/xe_pt.h
> @@ -45,10 +45,4 @@ __xe_pt_unbind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_exec_queu
>   
>   bool xe_pt_zap_ptes(struct xe_tile *tile, struct xe_vma *vma);
>   
> -u64 xe_pde_encode(struct xe_bo *bo, u64 bo_offset,
> -		  const enum xe_cache_level level);
> -
> -u64 xe_pte_encode(struct xe_bo *bo, u64 offset, enum xe_cache_level cache,
> -		  u32 pt_level);
> -
>   #endif
> diff --git a/drivers/gpu/drm/xe/xe_pt_types.h b/drivers/gpu/drm/xe/xe_pt_types.h
> index 2ed64c0a4485..c58f6926fabf 100644
> --- a/drivers/gpu/drm/xe/xe_pt_types.h
> +++ b/drivers/gpu/drm/xe/xe_pt_types.h
> @@ -6,8 +6,13 @@
>   #ifndef _XE_PT_TYPES_H_
>   #define _XE_PT_TYPES_H_
>   
> +#include <linux/types.h>
> +
>   #include "xe_pt_walk.h"
>   
> +struct xe_bo;
> +struct xe_vma;
> +
>   enum xe_cache_level {
>   	XE_CACHE_NONE,
>   	XE_CACHE_WT,
> @@ -29,6 +34,15 @@ struct xe_pt {
>   #endif
>   };
>   
> +struct xe_pt_ops {
> +	u64 (*pte_encode_bo)(struct xe_bo *bo, u64 bo_offset,
> +			     enum xe_cache_level cache, u32 pt_level);
> +	u64 (*pte_encode_vma)(u64 pte, struct xe_vma *vma,
> +			      enum xe_cache_level cache, u32 pt_level);
> +	u64 (*pde_encode_bo)(struct xe_bo *bo, u64 bo_offset,
> +			     const enum xe_cache_level cache);
> +};
> +
>   struct xe_pt_entry {
>   	struct xe_pt *pt;
>   	u64 pte;
> diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
> index 861d050871bb..2e1b4d46d9ea 100644
> --- a/drivers/gpu/drm/xe/xe_vm.c
> +++ b/drivers/gpu/drm/xe/xe_vm.c
> @@ -1191,6 +1191,93 @@ static struct drm_gpuva_fn_ops gpuva_ops = {
>   	.op_alloc = xe_vm_op_alloc,
>   };
>   
> +static u64 pde_encode_cache(enum xe_cache_level cache)
> +{
> +	/* FIXME: I don't think the PPAT handling is correct for MTL */
> +
> +	if (cache != XE_CACHE_NONE)
> +		return PPAT_CACHED_PDE;
> +
> +	return PPAT_UNCACHED;
> +}
> +
> +static u64 pte_encode_cache(enum xe_cache_level cache)
> +{
> +	/* FIXME: I don't think the PPAT handling is correct for MTL */
> +	switch (cache) {
> +	case XE_CACHE_NONE:
> +		return PPAT_UNCACHED;
> +	case XE_CACHE_WT:
> +		return PPAT_DISPLAY_ELLC;
> +	default:
> +		return PPAT_CACHED;
> +	}
> +}
> +
> +static u64 pte_encode_ps(u32 pt_level)
> +{
> +	/* XXX: Does hw support 1 GiB pages? */
> +	XE_WARN_ON(pt_level > 2);
> +
> +	if (pt_level == 1)
> +		return XE_PDE_PS_2M;
> +	else if (pt_level == 2)
> +		return XE_PDPE_PS_1G;
> +
> +	return 0;
> +}
> +
> +static u64 xelp_pde_encode_bo(struct xe_bo *bo, u64 bo_offset,
> +			      const enum xe_cache_level cache)
> +{
> +	u64 pde;
> +
> +	pde = xe_bo_addr(bo, bo_offset, XE_PAGE_SIZE);
> +	pde |= XE_PAGE_PRESENT | XE_PAGE_RW;
> +	pde |= pde_encode_cache(cache);
> +
> +	return pde;
> +}
> +
> +static u64 xelp_pte_encode_bo(struct xe_bo *bo, u64 bo_offset,
> +			      enum xe_cache_level cache, u32 pt_level)
> +{
> +	u64 pte;
> +
> +	pte = xe_bo_addr(bo, bo_offset, XE_PAGE_SIZE);
> +	pte |= XE_PAGE_PRESENT | XE_PAGE_RW;
> +	pte |= pte_encode_cache(cache);
> +	pte |= pte_encode_ps(pt_level);
> +
> +	if (xe_bo_is_vram(bo) || xe_bo_is_stolen_devmem(bo))
> +		pte |= XE_PPGTT_PTE_DM;
> +
> +	return pte;
> +}
> +
> +static u64 xelp_pte_encode_vma(u64 pte, struct xe_vma *vma,
> +			       enum xe_cache_level cache, u32 pt_level)
> +{
> +	pte |= XE_PAGE_PRESENT;
> +
> +	if (likely(!xe_vma_read_only(vma)))
> +		pte |= XE_PAGE_RW;
> +
> +	pte |= pte_encode_cache(cache);
> +	pte |= pte_encode_ps(pt_level);
> +
> +	if (unlikely(xe_vma_is_null(vma)))
> +		pte |= XE_PTE_NULL;
> +
> +	return pte;
> +}
> +
> +static const struct xe_pt_ops xelp_pt_ops = {
> +	.pte_encode_bo = xelp_pte_encode_bo,

do we need a .pte_encode_bo?

In the next patch you add a .pte_encode_addr.
It looks like our pte_encode_bo op callers could just use a wrapper 
around calling the pte_encode_addr operation instead?

such as:

  ops.pte_encode_addr(xe_bo_addr(bo, bo_offset, XE_PAGE_SIZE),
			cache,
			xe_bo_is_vram(bo) || xe_bo_is_stolen_devmem(bo),
			pt_level, flags);

Just an idea if we want to consoldiate the number of ops here.

-Brian

> +	.pte_encode_vma = xelp_pte_encode_vma,
> +	.pde_encode_bo = xelp_pde_encode_bo,
> +};
> +
>   static void xe_vma_op_work_func(struct work_struct *w);
>   static void vm_destroy_work_func(struct work_struct *w);
>   
> @@ -1239,6 +1326,8 @@ struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags)
>   
>   	INIT_LIST_HEAD(&vm->extobj.list);
>   
> +	vm->pt_ops = &xelp_pt_ops;
> +
>   	if (!(flags & XE_VM_FLAG_MIGRATION))
>   		xe_device_mem_access_get(xe);
>   
> @@ -1574,8 +1663,8 @@ struct xe_vm *xe_vm_lookup(struct xe_file *xef, u32 id)
>   
>   u64 xe_vm_pdp4_descriptor(struct xe_vm *vm, struct xe_tile *tile)
>   {
> -	return xe_pde_encode(vm->pt_root[tile->id]->bo, 0,
> -			     XE_CACHE_WB);
> +	return vm->pt_ops->pde_encode_bo(vm->pt_root[tile->id]->bo, 0,
> +					 XE_CACHE_WB);
>   }
>   
>   static struct dma_fence *
> diff --git a/drivers/gpu/drm/xe/xe_vm_types.h b/drivers/gpu/drm/xe/xe_vm_types.h
> index af2ba4acf1f9..1c5553b842d7 100644
> --- a/drivers/gpu/drm/xe/xe_vm_types.h
> +++ b/drivers/gpu/drm/xe/xe_vm_types.h
> @@ -249,6 +249,8 @@ struct xe_vm {
>   		bool munmap_rebind_inflight;
>   	} async_ops;
>   
> +	const struct xe_pt_ops *pt_ops;
> +
>   	/** @userptr: user pointer state */
>   	struct {
>   		/**