[Intel-xe] [PATCH v2 06/10] drm/xe: Set PTE_DM bit for stolen on MTL

Wed Jul 26 16:58:14 UTC 2023

On Wed, Jul 26, 2023 at 09:07:04AM -0700, Lucas De Marchi wrote:
> Integrated graphics 1270 and beyond should set the PTE_LM bit in the PTE
> when it's stolen memory. Add a new function, xe_bo_is_stolen_devmem(),
> and use it when encoding the PTE.
> 
> In some places in the spec the PTE bit is called "Local Memory",
> abbreviated as LM, and in others it's called "Device Memory" (DM). Since
> we moved away from "Local Memory" and preferred the "vram" terminology,
> also rename the macros as DM to follow the name of the new function.
> 
> Signed-off-by: Lucas De Marchi <lucas.demarchi at intel.com>
> ---
>  drivers/gpu/drm/xe/xe_bo.c      | 15 +++++++++++++++
>  drivers/gpu/drm/xe/xe_bo.h      |  5 +++--
>  drivers/gpu/drm/xe/xe_ggtt.c    |  4 ++--
>  drivers/gpu/drm/xe/xe_migrate.c |  4 ++--
>  drivers/gpu/drm/xe/xe_pt.c      | 13 +++++++------
>  5 files changed, 29 insertions(+), 12 deletions(-)
> 
> diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c
> index 29813271cc4c..a34a9248ed5c 100644
> --- a/drivers/gpu/drm/xe/xe_bo.c
> +++ b/drivers/gpu/drm/xe/xe_bo.c
> @@ -80,6 +80,21 @@ bool xe_bo_is_stolen(struct xe_bo *bo)
>  	return bo->ttm.resource->mem_type == XE_PL_STOLEN;
>  }
>  
> +/**
> + * xe_bo_is_stolen_devmem - check if BO is of stolen type accessed via PCI BAR
> + * @bo: The BO
> + *
> + * The stolen memory is accessed through the PCI BAR for both DGFX and some
> + * integrated platforms that have a dedicated bit in the PTE for devmem (DM).
> + *
> + * Returns: true if it's stolen memory accessed via PCI BAR, false otherwise.
> + */
> +bool xe_bo_is_stolen_devmem(struct xe_bo *bo)
> +{
> +	return xe_bo_is_stolen(bo) &&
> +		GRAPHICS_VERx100(xe_bo_device(bo)) >= 1270;
> +}
> +
>  static bool xe_bo_is_user(struct xe_bo *bo)
>  {
>  	return bo->flags & XE_BO_CREATE_USER_BIT;
> diff --git a/drivers/gpu/drm/xe/xe_bo.h b/drivers/gpu/drm/xe/xe_bo.h
> index 894ea0deb34b..b29750a47d23 100644
> --- a/drivers/gpu/drm/xe/xe_bo.h
> +++ b/drivers/gpu/drm/xe/xe_bo.h
> @@ -58,9 +58,9 @@
>  #define XE_PDPE_PS_1G			BIT_ULL(7)
>  #define XE_PDE_IPS_64K			BIT_ULL(11)
>  
> -#define XE_GGTT_PTE_LM			BIT_ULL(1)
> +#define XE_GGTT_PTE_DM			BIT_ULL(1)
>  #define XE_USM_PPGTT_PTE_AE		BIT_ULL(10)
> -#define XE_PPGTT_PTE_LM			BIT_ULL(11)
> +#define XE_PPGTT_PTE_DM			BIT_ULL(11)
>  #define XE_PDE_64K			BIT_ULL(6)
>  #define XE_PTE_PS64			BIT_ULL(8)
>  #define XE_PTE_NULL			BIT_ULL(9)
> @@ -230,6 +230,7 @@ void xe_bo_vunmap(struct xe_bo *bo);
>  bool mem_type_is_vram(u32 mem_type);
>  bool xe_bo_is_vram(struct xe_bo *bo);
>  bool xe_bo_is_stolen(struct xe_bo *bo);
> +bool xe_bo_is_stolen_devmem(struct xe_bo *bo);
>  uint64_t vram_region_gpu_offset(struct ttm_resource *res);
>  
>  bool xe_bo_can_migrate(struct xe_bo *bo, u32 mem_type);
> diff --git a/drivers/gpu/drm/xe/xe_ggtt.c b/drivers/gpu/drm/xe/xe_ggtt.c
> index f57dd8703d93..9b6bff724613 100644
> --- a/drivers/gpu/drm/xe/xe_ggtt.c
> +++ b/drivers/gpu/drm/xe/xe_ggtt.c
> @@ -35,8 +35,8 @@ u64 xe_ggtt_pte_encode(struct xe_bo *bo, u64 bo_offset)
>  	pte = xe_bo_addr(bo, bo_offset, XE_PAGE_SIZE);
>  	pte |= XE_PAGE_PRESENT;
>  
> -	if (xe_bo_is_vram(bo))
> -		pte |= XE_GGTT_PTE_LM;
> +	if (xe_bo_is_vram(bo) || xe_bo_is_stolen_devmem(bo))
> +		pte |= XE_GGTT_PTE_DM;
>  
>  	/* FIXME: vfunc + pass in caching rules */
>  	if (xe->info.platform == XE_METEORLAKE) {
> diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c
> index 2a4b22c3a024..aad76a6a8094 100644
> --- a/drivers/gpu/drm/xe/xe_migrate.c
> +++ b/drivers/gpu/drm/xe/xe_migrate.c
> @@ -258,7 +258,7 @@ static int xe_migrate_prepare_vm(struct xe_tile *tile, struct xe_migrate *m,
>  		level = 2;
>  		ofs = map_ofs + XE_PAGE_SIZE * level + 256 * 8;
>  		flags = XE_PAGE_RW | XE_PAGE_PRESENT | PPAT_CACHED |
> -			XE_PPGTT_PTE_LM | XE_PDPE_PS_1G;
> +			XE_PPGTT_PTE_DM | XE_PDPE_PS_1G;
>  
>  		/*
>  		 * Use 1GB pages, it shouldn't matter the physical amount of
> @@ -463,7 +463,7 @@ static void emit_pte(struct xe_migrate *m,
>  				}
>  
>  				addr += vram_region_gpu_offset(bo->ttm.resource);
> -				addr |= XE_PPGTT_PTE_LM;
> +				addr |= XE_PPGTT_PTE_DM;
>  			}
>  			addr |= PPAT_CACHED | XE_PAGE_PRESENT | XE_PAGE_RW;
>  			bb->cs[bb->len++] = lower_32_bits(addr);
> diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c
> index 38ccc96c4584..debfe6860c28 100644
> --- a/drivers/gpu/drm/xe/xe_pt.c
> +++ b/drivers/gpu/drm/xe/xe_pt.c
> @@ -127,8 +127,8 @@ u64 xe_pte_encode(struct xe_bo *bo, u64 offset, enum xe_cache_level cache,
>  	u64 pte;
>  
>  	pte = xe_bo_addr(bo, offset, XE_PAGE_SIZE);
> -	if (xe_bo_is_vram(bo))
> -		pte |= XE_PPGTT_PTE_LM;
> +	if (xe_bo_is_vram(bo) || xe_bo_is_stolen_devmem(bo))
> +		pte |= XE_PPGTT_PTE_DM;

Can stolen memory ever get bound into a PPGTT?  Either way, the
condition definitely doesn't hurt anything.

Reviewed-by: Matt Roper <matthew.d.roper at intel.com>

>  
>  	return __pte_encode(pte, cache, NULL, pt_level);
>  }
> @@ -714,7 +714,8 @@ xe_pt_stage_bind(struct xe_tile *tile, struct xe_vma *vma,
>  		 struct xe_vm_pgtable_update *entries, u32 *num_entries)
>  {
>  	struct xe_bo *bo = xe_vma_bo(vma);
> -	bool is_vram = !xe_vma_is_userptr(vma) && bo && xe_bo_is_vram(bo);
> +	bool is_devmem = !xe_vma_is_userptr(vma) && bo &&
> +		(xe_bo_is_vram(bo) || xe_bo_is_stolen_devmem(bo));
>  	struct xe_res_cursor curs;
>  	struct xe_pt_stage_bind_walk xe_walk = {
>  		.base = {
> @@ -728,13 +729,13 @@ xe_pt_stage_bind(struct xe_tile *tile, struct xe_vma *vma,
>  		.va_curs_start = xe_vma_start(vma),
>  		.vma = vma,
>  		.wupd.entries = entries,
> -		.needs_64K = (xe_vma_vm(vma)->flags & XE_VM_FLAG_64K) && is_vram,
> +		.needs_64K = (xe_vma_vm(vma)->flags & XE_VM_FLAG_64K) && is_devmem,
>  	};
>  	struct xe_pt *pt = xe_vma_vm(vma)->pt_root[tile->id];
>  	int ret;
>  
> -	if (is_vram) {
> -		xe_walk.default_pte = XE_PPGTT_PTE_LM;
> +	if (is_devmem) {
> +		xe_walk.default_pte = XE_PPGTT_PTE_DM;
>  		if (vma && vma->gpuva.flags & XE_VMA_ATOMIC_PTE_BIT)
>  			xe_walk.default_pte |= XE_USM_PPGTT_PTE_AE;
>  		xe_walk.dma_offset = vram_region_gpu_offset(bo->ttm.resource);
> -- 
> 2.40.1
> 

-- 
Matt Roper
Graphics Software Engineer
Linux GPU Platform Enablement
Intel Corporation