[Intel-xe] [PATCH 1/3] drm/xe/bo: support tiered vram allocation for small-bar

Souza, Jose jose.souza at intel.com
Wed Mar 22 17:01:50 UTC 2023


On Wed, 2023-03-22 at 14:19 +0000, Matthew Auld wrote:
> Add the new flag XE_BO_NEEDS_CPU_ACCESS, to force allocating in the
> mappable part of vram. If no flag is specified we do a topdown
> allocation, to limit the chances of stealing the precious mappable part,
> if we don't need it. If this is a full-bar system, then this all gets
> nooped.
> 
> For kernel users, it looks like xe_bo_create_pin_map() is the central
> place which users should call if they want CPU access to the object, so
> add the flag there.
> 
> We still need to plumb this through for userspace allocations. Also it
> looks like page-tables are using pin_map(), which is less than ideal. If
> we can already use the GPU to do page-table management, then maybe we
> should just force that for small-bar.
> 
> Signed-off-by: Matthew Auld <matthew.auld at intel.com>
> Cc: Gwan-gyeong Mun <gwan-gyeong.mun at intel.com>
> Cc: Thomas Hellström <thomas.hellstrom at linux.intel.com>
> Cc: Lucas De Marchi <lucas.demarchi at intel.com>
> Reviewed-by: Maarten Lankhorst <maarten.lankhorst at linux.intel.com>
> ---
>  drivers/gpu/drm/xe/tests/xe_migrate.c |  3 +-
>  drivers/gpu/drm/xe/xe_bo.c            | 83 ++++++++++++++++++---------
>  drivers/gpu/drm/xe/xe_bo.h            |  1 +
>  drivers/gpu/drm/xe/xe_ttm_vram_mgr.c  |  4 ++
>  4 files changed, 62 insertions(+), 29 deletions(-)
> 
> diff --git a/drivers/gpu/drm/xe/tests/xe_migrate.c b/drivers/gpu/drm/xe/tests/xe_migrate.c
> index 17829f878757..de101c3a6406 100644
> --- a/drivers/gpu/drm/xe/tests/xe_migrate.c
> +++ b/drivers/gpu/drm/xe/tests/xe_migrate.c
> @@ -108,7 +108,8 @@ static void test_copy(struct xe_migrate *m, struct xe_bo *bo,
>  	struct xe_bo *sysmem = xe_bo_create_locked(xe, m->gt, NULL,
>  						   bo->size,
>  						   ttm_bo_type_kernel,
> -						   XE_BO_CREATE_SYSTEM_BIT);
> +						   XE_BO_CREATE_SYSTEM_BIT |
> +						   XE_BO_NEEDS_CPU_ACCESS);
>  	if (IS_ERR(sysmem)) {
>  		KUNIT_FAIL(test, "Failed to allocate sysmem bo for %s: %li\n",
>  			   str, PTR_ERR(sysmem));
> diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c
> index ab9cd9286f77..86908d87fb99 100644
> --- a/drivers/gpu/drm/xe/xe_bo.c
> +++ b/drivers/gpu/drm/xe/xe_bo.c
> @@ -96,22 +96,30 @@ static void try_add_system(struct xe_bo *bo, struct ttm_place *places,
>  static void try_add_vram0(struct xe_device *xe, struct xe_bo *bo,
>  			  struct ttm_place *places, u32 bo_flags, u32 *c)
>  {
> -	struct xe_gt *gt;
> -
>  	if (bo_flags & XE_BO_CREATE_VRAM0_BIT) {
> +		struct ttm_place place = {};

struct ttm_place place = {
   .place.mem_type = place.mem_type,
};

> +		struct xe_gt *gt;
> +		u64 io_size;
> +
>  		gt = mem_type_to_gt(xe, XE_PL_VRAM0);
> +		io_size = gt->mem.vram.io_size;
>  		XE_BUG_ON(!gt->mem.vram.size);
>  
> -		places[*c] = (struct ttm_place) {
> -			.mem_type = XE_PL_VRAM0,
> -			/*
> -			 * For eviction / restore on suspend / resume objects
> -			 * pinned in VRAM must be contiguous
> -			 */
> -			.flags = bo_flags & (XE_BO_CREATE_PINNED_BIT |
> -					     XE_BO_CREATE_GGTT_BIT) ?
> -				TTM_PL_FLAG_CONTIGUOUS : 0,
> -		};
> +		place.mem_type = XE_PL_VRAM0;
> +
> +		if (bo_flags & (XE_BO_CREATE_PINNED_BIT |
> +				XE_BO_CREATE_GGTT_BIT))
> +			place.flags |= TTM_PL_FLAG_CONTIGUOUS;
> +
> +		if (io_size < gt->mem.vram.size) {
> +			if (bo_flags & XE_BO_NEEDS_CPU_ACCESS) {
> +				place.fpfn = 0;
> +				place.lpfn = io_size >> PAGE_SHIFT;
> +			} else {
> +				place.flags |= TTM_PL_FLAG_TOPDOWN;
> +			}
> +		}
> +		places[*c] = place;
>  		*c += 1;
>  
>  		if (bo->props.preferred_mem_type == XE_BO_PROPS_INVALID)
> @@ -122,22 +130,30 @@ static void try_add_vram0(struct xe_device *xe, struct xe_bo *bo,
>  static void try_add_vram1(struct xe_device *xe, struct xe_bo *bo,
>  			  struct ttm_place *places, u32 bo_flags, u32 *c)

try_add_vram0() and try_add_vram1() could share code.

>  {
> -	struct xe_gt *gt;
> -
>  	if (bo_flags & XE_BO_CREATE_VRAM1_BIT) {
> +		struct ttm_place place = {};
> +		struct xe_gt *gt;
> +		u64 io_size;
> +
>  		gt = mem_type_to_gt(xe, XE_PL_VRAM1);
> +		io_size = gt->mem.vram.io_size;
>  		XE_BUG_ON(!gt->mem.vram.size);
>  
> -		places[*c] = (struct ttm_place) {
> -			.mem_type = XE_PL_VRAM1,
> -			/*
> -			 * For eviction / restore on suspend / resume objects
> -			 * pinned in VRAM must be contiguous
> -			 */
> -			.flags = bo_flags & (XE_BO_CREATE_PINNED_BIT |
> -					     XE_BO_CREATE_GGTT_BIT) ?
> -				TTM_PL_FLAG_CONTIGUOUS : 0,
> -		};
> +		place.mem_type = XE_PL_VRAM1;
> +
> +		if (bo_flags & (XE_BO_CREATE_PINNED_BIT |
> +				XE_BO_CREATE_GGTT_BIT))
> +			place.flags |= TTM_PL_FLAG_CONTIGUOUS;
> +
> +		if (io_size < gt->mem.vram.size) {
> +			if (bo_flags & XE_BO_NEEDS_CPU_ACCESS) {
> +				place.fpfn = 0;
> +				place.lpfn = io_size >> PAGE_SHIFT;
> +			} else {
> +				place.flags |= TTM_PL_FLAG_TOPDOWN;
> +			}
> +		}
> +		places[*c] = place;
>  		*c += 1;
>  
>  		if (bo->props.preferred_mem_type == XE_BO_PROPS_INVALID)
> @@ -367,15 +383,22 @@ static int xe_ttm_io_mem_reserve(struct ttm_device *bdev,
>  				 struct ttm_resource *mem)
>  {
>  	struct xe_device *xe = ttm_to_xe_device(bdev);
> -	struct xe_gt *gt;
>  
>  	switch (mem->mem_type) {
>  	case XE_PL_SYSTEM:
>  	case XE_PL_TT:
>  		return 0;
>  	case XE_PL_VRAM0:
> -	case XE_PL_VRAM1:
> +	case XE_PL_VRAM1: {
> +		struct xe_ttm_vram_mgr_resource *vres =
> +			to_xe_ttm_vram_mgr_resource(mem);
> +		struct xe_gt *gt;
> +
> +		if (vres->used_visible_size < mem->size)
> +			return -EINVAL;
> +
>  		gt = mem_type_to_gt(xe, mem->mem_type);
> +
>  		mem->bus.offset = mem->start << PAGE_SHIFT;
>  
>  		if (gt->mem.vram.mapping &&
> @@ -390,7 +413,7 @@ static int xe_ttm_io_mem_reserve(struct ttm_device *bdev,
>  		mem->bus.caching = ttm_write_combined;
>  #endif
>  		return 0;
> -	case XE_PL_STOLEN:
> +	} case XE_PL_STOLEN:
>  		return xe_ttm_stolen_io_mem_reserve(xe, mem);
>  	default:
>  		return -EINVAL;
> @@ -1170,7 +1193,8 @@ struct xe_bo *xe_bo_create_pin_map_at(struct xe_device *xe, struct xe_gt *gt,
>  	    xe_ttm_stolen_cpu_access_needs_ggtt(xe))
>  		flags |= XE_BO_CREATE_GGTT_BIT;
>  
> -	bo = xe_bo_create_locked_range(xe, gt, vm, size, start, end, type, flags);
> +	bo = xe_bo_create_locked_range(xe, gt, vm, size, start, end, type,
> +				       flags | XE_BO_NEEDS_CPU_ACCESS);
>  	if (IS_ERR(bo))
>  		return bo;
>  
> @@ -1468,6 +1492,9 @@ int xe_bo_vmap(struct xe_bo *bo)
>  
>  	xe_bo_assert_held(bo);
>  
> +	if (!(bo->flags & XE_BO_NEEDS_CPU_ACCESS))
> +		return -EINVAL;
> +
>  	if (!iosys_map_is_null(&bo->vmap))
>  		return 0;
>  
> diff --git a/drivers/gpu/drm/xe/xe_bo.h b/drivers/gpu/drm/xe/xe_bo.h
> index f841e74cd417..58c063230f41 100644
> --- a/drivers/gpu/drm/xe/xe_bo.h
> +++ b/drivers/gpu/drm/xe/xe_bo.h
> @@ -28,6 +28,7 @@
>  #define XE_BO_DEFER_BACKING		BIT(8)
>  #define XE_BO_SCANOUT_BIT		BIT(9)
>  #define XE_BO_FIXED_PLACEMENT_BIT	BIT(10)
> +#define XE_BO_NEEDS_CPU_ACCESS		BIT(11)
>  /* this one is trigger internally only */
>  #define XE_BO_INTERNAL_TEST		BIT(30)
>  #define XE_BO_INTERNAL_64K		BIT(31)
> diff --git a/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c b/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c
> index 73836b9b7fed..cf081e4aedf6 100644
> --- a/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c
> +++ b/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c
> @@ -373,12 +373,16 @@ int xe_ttm_vram_mgr_alloc_sgt(struct xe_device *xe,
>  			      enum dma_data_direction dir,
>  			      struct sg_table **sgt)
>  {
> +	struct xe_ttm_vram_mgr_resource *vres = to_xe_ttm_vram_mgr_resource(res);
>  	struct xe_gt *gt = xe_device_get_gt(xe, res->mem_type - XE_PL_VRAM0);
>  	struct xe_res_cursor cursor;
>  	struct scatterlist *sg;
>  	int num_entries = 0;
>  	int i, r;
>  
> +	if (vres->used_visible_size < res->size)
> +		return -EOPNOTSUPP;
> +
>  	*sgt = kmalloc(sizeof(**sgt), GFP_KERNEL);
>  	if (!*sgt)
>  		return -ENOMEM;



More information about the Intel-xe mailing list