[Intel-xe] [PATCH 7/9] drm/xe: Use pat_index to encode pte

Tue Sep 26 14:59:57 UTC 2023

On Mon, Sep 25, 2023 at 04:27:20PM -0700, Matt Roper wrote:
>On Mon, Sep 25, 2023 at 03:10:47PM -0700, Lucas De Marchi wrote:
>> Change the xelp_pte_encode() function to use the platform-dependent
>> pat_index.  The same function can be used for all platforms as they only
>> need to encode the pat_index bits in the same pte layout. For platforms
>> that don't have the most significant bit, as long as they don't return a
>> bogus index they should be fine.
>>
>> To maintain consistency, also add xe argument to pde_encode_cache().
>> This will be needed for Xe2 later.
>>
>> Signed-off-by: Lucas De Marchi <lucas.demarchi at intel.com>
>> ---
>>  drivers/gpu/drm/xe/xe_bo.h       | 11 ++++---
>>  drivers/gpu/drm/xe/xe_migrate.c  |  6 ++--
>>  drivers/gpu/drm/xe/xe_pt_types.h |  4 ++-
>>  drivers/gpu/drm/xe/xe_vm.c       | 50 +++++++++++++++++++-------------
>>  4 files changed, 44 insertions(+), 27 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/xe/xe_bo.h b/drivers/gpu/drm/xe/xe_bo.h
>> index e3c90d45e723..885412581158 100644
>> --- a/drivers/gpu/drm/xe/xe_bo.h
>> +++ b/drivers/gpu/drm/xe/xe_bo.h
>> @@ -39,10 +39,13 @@
>>  #define XE_BO_INTERNAL_TEST		BIT(30)
>>  #define XE_BO_INTERNAL_64K		BIT(31)
>>
>> -#define PPAT_UNCACHED                   GENMASK_ULL(4, 3)
>> -#define PPAT_CACHED_PDE                 0
>> -#define PPAT_CACHED                     BIT_ULL(7)
>> -#define PPAT_DISPLAY_ELLC               BIT_ULL(4)
>> +#define XE_PPGTT_PDE_UNCACHED		GENMASK_ULL(4, 3)
>> +#define XE_PPGTT_PDE_CACHED		0
>
>Shouldn't these two be going away?  Indices 0 and 3 are only correct for
>certain platforms.  The PDE uses the same PAT encoding as the PTE, as
>documented in the bspec's PTF_COMMON_BITS (55730) and referenced from
>pages 62361, 55731, etc.

I was postponing this change to when it's needed in xe2, but it seems
this is already not compatible with (at least) MTL and what we have
currently have in xe is broken.

I will take a look to include this in v2. Thanks.

Lucas De Marchi

>
>> +
>> +#define XELPG_PPGTT_PTE_PAT3		BIT_ULL(62)
>> +#define XE_PPGTT_PTE_PAT2		BIT_ULL(7)
>> +#define XE_PPGTT_PTE_PAT1		BIT_ULL(4)
>> +#define XE_PPGTT_PTE_PAT0		BIT_ULL(3)
>>
>>  #define XE_PTE_SHIFT			12
>>  #define XE_PAGE_SIZE			(1 << XE_PTE_SHIFT)
>> diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c
>> index f22356d69ae3..11dc3b0e0350 100644
>> --- a/drivers/gpu/drm/xe/xe_migrate.c
>> +++ b/drivers/gpu/drm/xe/xe_migrate.c
>> @@ -261,7 +261,8 @@ static int xe_migrate_prepare_vm(struct xe_tile *tile, struct xe_migrate *m,
>>
>>  		level = 2;
>>  		ofs = map_ofs + XE_PAGE_SIZE * level + 256 * 8;
>> -		flags = vm->pt_ops->pte_encode_addr(0, XE_CACHE_WB, level, true, 0);
>> +		flags = vm->pt_ops->pte_encode_addr(xe, 0, XE_CACHE_WB, level,
>> +						    true, 0);
>>
>>  		/*
>>  		 * Use 1GB pages, it shouldn't matter the physical amount of
>> @@ -498,7 +499,8 @@ static void emit_pte(struct xe_migrate *m,
>>  				devmem = true;
>>  			}
>>
>> -			addr |= m->q->vm->pt_ops->pte_encode_addr(addr, XE_CACHE_WB,
>> +			addr |= m->q->vm->pt_ops->pte_encode_addr(m->tile->xe,
>> +								  addr, XE_CACHE_WB,
>>  								  0, devmem, flags);
>>  			bb->cs[bb->len++] = lower_32_bits(addr);
>>  			bb->cs[bb->len++] = upper_32_bits(addr);
>> diff --git a/drivers/gpu/drm/xe/xe_pt_types.h b/drivers/gpu/drm/xe/xe_pt_types.h
>> index bf5000499251..bd6645295fe6 100644
>> --- a/drivers/gpu/drm/xe/xe_pt_types.h
>> +++ b/drivers/gpu/drm/xe/xe_pt_types.h
>> @@ -11,6 +11,7 @@
>>  #include "xe_pt_walk.h"
>>
>>  struct xe_bo;
>> +struct xe_device;
>>  struct xe_vma;
>>
>>  enum xe_cache_level {
>> @@ -40,7 +41,8 @@ struct xe_pt_ops {
>>  			     enum xe_cache_level cache, u32 pt_level);
>>  	u64 (*pte_encode_vma)(u64 pte, struct xe_vma *vma,
>>  			      enum xe_cache_level cache, u32 pt_level);
>> -	u64 (*pte_encode_addr)(u64 addr, enum xe_cache_level cache,
>> +	u64 (*pte_encode_addr)(struct xe_device *xe, u64 addr,
>> +			       enum xe_cache_level cache,
>>  			       u32 pt_level, bool devmem, u64 flags);
>>  	u64 (*pde_encode_bo)(struct xe_bo *bo, u64 bo_offset,
>>  			     const enum xe_cache_level cache);
>> diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
>> index 23452b98d853..ab9cf1de566a 100644
>> --- a/drivers/gpu/drm/xe/xe_vm.c
>> +++ b/drivers/gpu/drm/xe/xe_vm.c
>> @@ -1191,27 +1191,32 @@ static struct drm_gpuva_fn_ops gpuva_ops = {
>>  	.op_alloc = xe_vm_op_alloc,
>>  };
>>
>> -static u64 pde_encode_cache(enum xe_cache_level cache)
>> +static u64 pde_encode_cache(struct xe_device *xe, enum xe_cache_level cache)
>>  {
>> -	/* FIXME: I don't think the PPAT handling is correct for MTL */
>> -
>>  	if (cache != XE_CACHE_NONE)
>> -		return PPAT_CACHED_PDE;
>> +		return XE_PPGTT_PDE_CACHED;
>>
>> -	return PPAT_UNCACHED;
>> +	return XE_PPGTT_PDE_UNCACHED;
>
>As noted above, we're getting different behavior from these on different
>platforms.  We should be encoding the same way we do for the PTE.
>
>
>Matt
>
>>  }
>>
>> -static u64 pte_encode_cache(enum xe_cache_level cache)
>> +static u64 pte_encode_cache(struct xe_device *xe, enum xe_cache_level cache)
>>  {
>> -	/* FIXME: I don't think the PPAT handling is correct for MTL */
>> -	switch (cache) {
>> -	case XE_CACHE_NONE:
>> -		return PPAT_UNCACHED;
>> -	case XE_CACHE_WT:
>> -		return PPAT_DISPLAY_ELLC;
>> -	default:
>> -		return PPAT_CACHED;
>> -	}
>> +	u32 pat_index = xe->pat.idx[cache];
>> +	u64 pte = 0;
>> +
>> +	if (pat_index & BIT(0))
>> +		pte |= XE_PPGTT_PTE_PAT0;
>> +
>> +	if (pat_index & BIT(1))
>> +		pte |= XE_PPGTT_PTE_PAT1;
>> +
>> +	if (pat_index & BIT(2))
>> +		pte |= XE_PPGTT_PTE_PAT2;
>> +
>> +	if (pat_index & BIT(3))
>> +		pte |= XELPG_PPGTT_PTE_PAT3;
>> +
>> +	return pte;
>>  }
>>
>>  static u64 pte_encode_ps(u32 pt_level)
>> @@ -1229,11 +1234,12 @@ static u64 pte_encode_ps(u32 pt_level)
>>  static u64 xelp_pde_encode_bo(struct xe_bo *bo, u64 bo_offset,
>>  			      const enum xe_cache_level cache)
>>  {
>> +	struct xe_device *xe = xe_bo_device(bo);
>>  	u64 pde;
>>
>>  	pde = xe_bo_addr(bo, bo_offset, XE_PAGE_SIZE);
>>  	pde |= XE_PAGE_PRESENT | XE_PAGE_RW;
>> -	pde |= pde_encode_cache(cache);
>> +	pde |= pde_encode_cache(xe, cache);
>>
>>  	return pde;
>>  }
>> @@ -1241,11 +1247,12 @@ static u64 xelp_pde_encode_bo(struct xe_bo *bo, u64 bo_offset,
>>  static u64 xelp_pte_encode_bo(struct xe_bo *bo, u64 bo_offset,
>>  			      enum xe_cache_level cache, u32 pt_level)
>>  {
>> +	struct xe_device *xe = xe_bo_device(bo);
>>  	u64 pte;
>>
>>  	pte = xe_bo_addr(bo, bo_offset, XE_PAGE_SIZE);
>>  	pte |= XE_PAGE_PRESENT | XE_PAGE_RW;
>> -	pte |= pte_encode_cache(cache);
>> +	pte |= pte_encode_cache(xe, cache);
>>  	pte |= pte_encode_ps(pt_level);
>>
>>  	if (xe_bo_is_vram(bo) || xe_bo_is_stolen_devmem(bo))
>> @@ -1257,12 +1264,14 @@ static u64 xelp_pte_encode_bo(struct xe_bo *bo, u64 bo_offset,
>>  static u64 xelp_pte_encode_vma(u64 pte, struct xe_vma *vma,
>>  			       enum xe_cache_level cache, u32 pt_level)
>>  {
>> +	struct xe_device *xe = xe_vma_vm(vma)->xe;
>> +
>>  	pte |= XE_PAGE_PRESENT;
>>
>>  	if (likely(!xe_vma_read_only(vma)))
>>  		pte |= XE_PAGE_RW;
>>
>> -	pte |= pte_encode_cache(cache);
>> +	pte |= pte_encode_cache(xe, cache);
>>  	pte |= pte_encode_ps(pt_level);
>>
>>  	if (unlikely(xe_vma_is_null(vma)))
>> @@ -1271,7 +1280,8 @@ static u64 xelp_pte_encode_vma(u64 pte, struct xe_vma *vma,
>>  	return pte;
>>  }
>>
>> -static u64 xelp_pte_encode_addr(u64 addr, enum xe_cache_level cache,
>> +static u64 xelp_pte_encode_addr(struct xe_device *xe, u64 addr,
>> +				enum xe_cache_level cache,
>>  				u32 pt_level, bool devmem, u64 flags)
>>  {
>>  	u64 pte;
>> @@ -1281,7 +1291,7 @@ static u64 xelp_pte_encode_addr(u64 addr, enum xe_cache_level cache,
>>
>>  	pte = addr;
>>  	pte |= XE_PAGE_PRESENT | XE_PAGE_RW;
>> -	pte |= pte_encode_cache(cache);
>> +	pte |= pte_encode_cache(xe, cache);
>>  	pte |= pte_encode_ps(pt_level);
>>
>>  	if (devmem)
>> --
>> 2.40.1
>>
>
>-- 
>Matt Roper
>Graphics Software Engineer
>Linux GPU Platform Enablement
>Intel Corporation