[Intel-xe] [PATCH] drm/xe/ppgtt: Add support for correct PAT encoding in PTE and PDE

Vodapalli, Ravi Kumar ravi.kumar.vodapalli at intel.com
Mon Jul 31 16:05:30 UTC 2023



On 7/27/2023 12:57 AM, Matthew Brost wrote:
> On Wed, Jul 26, 2023 at 06:19:16PM +0530, Ravi Kumar Vodapalli wrote:
>> Different platforms has different PAT encoding in PTE and PDE format, add
>> correct PAT encoding for pre-Xe2 platforms (XELP, XEHPC, XELPG).
>>
>> Bspec: 45101, 71582
>> Signed-off-by: Ravi Kumar Vodapalli <ravi.kumar.vodapalli at intel.com>
>> ---
>>   drivers/gpu/drm/xe/xe_pt.c       | 119 ++++++++++++++++++++++++++-----
>>   drivers/gpu/drm/xe/xe_pt.h       |   2 +
>>   drivers/gpu/drm/xe/xe_vm_types.h |   2 +
>>   3 files changed, 106 insertions(+), 17 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c
>> index d4660520ac2c..09dafb0e47ea 100644
>> --- a/drivers/gpu/drm/xe/xe_pt.c
>> +++ b/drivers/gpu/drm/xe/xe_pt.c
>> @@ -16,6 +16,7 @@
>>   #include "xe_trace.h"
>>   #include "xe_ttm_stolen_mgr.h"
>>   #include "xe_vm.h"
>> +#include "xe_pt.h"
>>   
>>   struct xe_pt_dir {
>>   	struct xe_pt pt;
>> @@ -62,6 +63,8 @@ u64 xe_pde_encode(struct xe_bo *bo, u64 bo_offset,
>>   {
>>   	u64 pde;
>>   	bool is_vram;
>> +	u8 pat_index = 0;
>> +	struct xe_vm *vm = bo->vm;
>>   
>>   	pde = xe_bo_addr(bo, bo_offset, XE_PAGE_SIZE, &is_vram);
>>   	pde |= XE_PAGE_PRESENT | XE_PAGE_RW;
>> @@ -70,10 +73,9 @@ u64 xe_pde_encode(struct xe_bo *bo, u64 bo_offset,
>>   
>>   	/* FIXME: I don't think the PPAT handling is correct for MTL */
> Stale comment, this should be correct after this series.
>
>>   
>> -	if (level != XE_CACHE_NONE)
>> -		pde |= PPAT_CACHED_PDE;
>> -	else
>> -		pde |= PPAT_UNCACHED;
>> +
>> +	pat_index = xe_pat_get_index(xe_bo_device(bo), level);
>> +	pde = vm->ppgtt_pde_encode(pde, pat_index);
>>   
>>   	return pde;
>>   }
>> @@ -103,6 +105,9 @@ static dma_addr_t vma_addr(struct xe_vma *vma, u64 offset,
>>   static u64 __pte_encode(u64 pte, enum xe_cache_level cache,
>>   			struct xe_vma *vma, u32 pt_level)
>>   {
>> +	struct xe_vm *vm = xe_vma_vm(vma);
>> +	u8 pat_index = 0;
>> +
>>   	pte |= XE_PAGE_PRESENT | XE_PAGE_RW;
>>   
>>   	if (unlikely(vma && xe_vma_read_only(vma)))
>> @@ -111,19 +116,8 @@ static u64 __pte_encode(u64 pte, enum xe_cache_level cache,
>>   	if (unlikely(vma && xe_vma_is_null(vma)))
>>   		pte |= XE_PTE_NULL;
>>   
>> -	/* FIXME: I don't think the PPAT handling is correct for MTL */
>> -
>> -	switch (cache) {
>> -	case XE_CACHE_NONE:
>> -		pte |= PPAT_UNCACHED;
>> -		break;
>> -	case XE_CACHE_WT:
>> -		pte |= PPAT_DISPLAY_ELLC;
>> -		break;
>> -	default:
>> -		pte |= PPAT_CACHED;
>> -		break;
>> -	}
>> +	pat_index = xe_pat_get_index(vm->xe, cache);
>> +	pte = vm->ppgtt_pte_encode(pte, pat_index);
>>   
>>   	if (pt_level == 1)
>>   		pte |= XE_PDE_PS_2M;
>> @@ -187,6 +181,91 @@ static u64 __xe_pt_empty_pte(struct xe_tile *tile, struct xe_vm *vm,
>>   	}
>>   }
>>   
>> +static u64 __xelp_ppgtt_pde_encode_pat(u64 pte_pat, u8 pat_index)
>> +{
>> +
>> +	pte_pat &= ~(BIT(12) | BIT(4) | BIT(3));
> Defines rather than just doing BITs.
>
>> +
>> +	if (pat_index & BIT(0))
>> +		pte_pat |= BIT(3);
>> +
>> +	if (pat_index & BIT(1))
>> +		pte_pat |= BIT(4);
>> +
>> +	if (pat_index & BIT(2))
>> +		pte_pat |= BIT(12);
>> +
>> +	return pte_pat;
>> +}
>> +
>> +static u64 __xelp_ppgtt_pte_encode_pat(u64 pte_pat, u8 pat_index)
>> +{
>> +
>> +	pte_pat &= ~(BIT(7) | BIT(4) | BIT(3));
>> +
>> +	if (pat_index & BIT(0))
>> +		pte_pat |= BIT(3);
>> +
>> +	if (pat_index & BIT(1))
>> +		pte_pat |= BIT(4);
>> +
>> +	if (pat_index & BIT(2))
>> +		pte_pat |= BIT(7);
>> +
>> +	return pte_pat;
>> +}
>> +
>> +u8 xe_pat_get_index(struct xe_device *xe, enum xe_cache_level cache)
> This doesn't need to be export, i.e. make this static.
>
> Also in general this is a really goofy use of vfuncs vs. if else. The
> vfuncs are only set to 1 function and this function has 3 branches. Very
> confusing.
>
> It would make more sense to have a vfunc for getting the pat index with
> different implementations based on graphics version. Then non-vfuncs to
> the pat in the PTE and PDE.

As per Lucas comments there should be only one common function to get 
pat index
if we implement to get pat index in one function then there would be if 
else, or we can
do one thing place platform specific get index in separate functions and 
call them from
xe_pat_get_index() again if else cannot be avoided
other possibility is implement vfunc for every platform and initialize 
that in some function
and call it when encode is needed.
Lucas has to comment which would be better approach

>> +{
>> +	u8 pat_index;
>> +
>> +	if (GRAPHICS_VERx100(xe) >= 1270) {
>> +		switch (cache) {
>> +		case XE_CACHE_NONE:
>> +			pat_index = 2;
>> +		break;
>> +		case XE_CACHE_WT:
>> +			pat_index = 1;
>> +		break;
>> +		case XE_CACHE_WB:
>> +		default:
>> +			pat_index = 0;
>> +		break;
>> +		}
>> +	}
>> +	else if (GRAPHICS_VERx100(xe) >= 1260) {
>> +		switch (cache) {
>> +		case XE_CACHE_NONE:
>> +			pat_index = 0;
>> +		break;
>> +		case XE_CACHE_WT:
>> +			pat_index = 2;
>> +		break;
>> +		case XE_CACHE_WB:
>> +		default:
>> +			pat_index = 3;
>> +		break;
>> +		}
>> +	}
>> +	else
>> +	{
>> +		switch (cache) {
>> +		case XE_CACHE_NONE:
>> +			pat_index = 3;
>> +		break;
>> +		case XE_CACHE_WT:
>> +			pat_index = 2;
>> +		break;
>> +		case XE_CACHE_WB:
>> +		default:
>> +			pat_index = 0;
>> +		break;
>> +		}
>> +	}
>> +
>> +	return pat_index;
>> +}
>> +
>>   /**
>>    * xe_pt_create() - Create a page-table.
>>    * @vm: The vm to create for.
>> @@ -216,6 +295,12 @@ struct xe_pt *xe_pt_create(struct xe_vm *vm, struct xe_tile *tile,
>>   	if (!pt)
>>   		return ERR_PTR(-ENOMEM);
>>   
>> +	/* __xelp_pte_encode applies to xelp, xehpc and xelpg platform*/
>> +	vm->ppgtt_pte_encode = __xelp_ppgtt_pte_encode_pat;
>> +
>> +	/* __xelp_pde_encode applies to xelp, xehpc and xelpg platform*/
>> +	vm->ppgtt_pde_encode = __xelp_ppgtt_pde_encode_pat;
>> +
>>   	bo = xe_bo_create_pin_map(vm->xe, tile, vm, SZ_4K,
>>   				  ttm_bo_type_kernel,
>>   				  XE_BO_CREATE_VRAM_IF_DGFX(tile) |
>> diff --git a/drivers/gpu/drm/xe/xe_pt.h b/drivers/gpu/drm/xe/xe_pt.h
>> index aaf4b7b851e2..0ed69abf8202 100644
>> --- a/drivers/gpu/drm/xe/xe_pt.h
>> +++ b/drivers/gpu/drm/xe/xe_pt.h
>> @@ -51,4 +51,6 @@ u64 xe_pde_encode(struct xe_bo *bo, u64 bo_offset,
>>   u64 xe_pte_encode(struct xe_vma *vma, struct xe_bo *bo,
>>   		  u64 offset, enum xe_cache_level cache,
>>   		  u32 pt_level);
>> +
>> +u8 xe_pat_get_index(struct xe_device *xe, enum xe_cache_level cache);
>>   #endif
>> diff --git a/drivers/gpu/drm/xe/xe_vm_types.h b/drivers/gpu/drm/xe/xe_vm_types.h
>> index 261a4b0e8570..fc976e207462 100644
>> --- a/drivers/gpu/drm/xe/xe_vm_types.h
>> +++ b/drivers/gpu/drm/xe/xe_vm_types.h
>> @@ -336,6 +336,8 @@ struct xe_vm {
>>   
>>   	/** @batch_invalidate_tlb: Always invalidate TLB before batch start */
>>   	bool batch_invalidate_tlb;
>> +	u64 (*ppgtt_pte_encode)(u64 pte_pat, u8 pat_index);
>> +	u64 (*ppgtt_pde_encode)(u64 pte_pat, u8 pat_index);
> Kernel doc and for vfuncs make this a const struct of ops.
>
> An example of what I'm refering to is 'struct xe_engine_ops'.
>
> Matt
>
>>   };
>>   
>>   /** struct xe_vma_op_map - VMA map operation */
>> -- 
>> 2.25.1
>>



More information about the Intel-xe mailing list