[Intel-xe] [PATCH v2 4/6] drm/xe/bo: support tiered vram allocation for small-bar

Gwan-gyeong Mun gwan-gyeong.mun at intel.com
Sat Mar 25 23:30:41 UTC 2023


looks good to me.

Reviewed-by: Gwan-gyeong Mun <gwan-gyeong.mun at intel.com>

On 3/23/23 1:59 PM, Matthew Auld wrote:
> Add the new flag XE_BO_NEEDS_CPU_ACCESS, to force allocating in the
> mappable part of vram. If no flag is specified we do a topdown
> allocation, to limit the chances of stealing the precious mappable part,
> if we don't need it. If this is a full-bar system, then this all gets
> nooped.
> 
> For kernel users, it looks like xe_bo_create_pin_map() is the central
> place which users should call if they want CPU access to the object, so
> add the flag there.
> 
> We still need to plumb this through for userspace allocations. Also it
> looks like page-tables are using pin_map(), which is less than ideal. If
> we can already use the GPU to do page-table management, then maybe we
> should just force that for small-bar.
> 
> Signed-off-by: Matthew Auld <matthew.auld at intel.com>
> Cc: Gwan-gyeong Mun <gwan-gyeong.mun at intel.com>
> Cc: Thomas Hellström <thomas.hellstrom at linux.intel.com>
> Cc: Lucas De Marchi <lucas.demarchi at intel.com>
> Reviewed-by: Maarten Lankhorst <maarten.lankhorst at linux.intel.com>
> ---
>   drivers/gpu/drm/xe/tests/xe_migrate.c |  3 +-
>   drivers/gpu/drm/xe/xe_bo.c            | 48 +++++++++++++++++++--------
>   drivers/gpu/drm/xe/xe_bo.h            |  1 +
>   drivers/gpu/drm/xe/xe_ttm_vram_mgr.c  |  4 +++
>   4 files changed, 41 insertions(+), 15 deletions(-)
> 
> diff --git a/drivers/gpu/drm/xe/tests/xe_migrate.c b/drivers/gpu/drm/xe/tests/xe_migrate.c
> index 17829f878757..de101c3a6406 100644
> --- a/drivers/gpu/drm/xe/tests/xe_migrate.c
> +++ b/drivers/gpu/drm/xe/tests/xe_migrate.c
> @@ -108,7 +108,8 @@ static void test_copy(struct xe_migrate *m, struct xe_bo *bo,
>   	struct xe_bo *sysmem = xe_bo_create_locked(xe, m->gt, NULL,
>   						   bo->size,
>   						   ttm_bo_type_kernel,
> -						   XE_BO_CREATE_SYSTEM_BIT);
> +						   XE_BO_CREATE_SYSTEM_BIT |
> +						   XE_BO_NEEDS_CPU_ACCESS);
>   	if (IS_ERR(sysmem)) {
>   		KUNIT_FAIL(test, "Failed to allocate sysmem bo for %s: %li\n",
>   			   str, PTR_ERR(sysmem));
> diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c
> index 1c8e0fbaf1df..de57ccc5b57c 100644
> --- a/drivers/gpu/drm/xe/xe_bo.c
> +++ b/drivers/gpu/drm/xe/xe_bo.c
> @@ -96,20 +96,29 @@ static void try_add_system(struct xe_bo *bo, struct ttm_place *places,
>   static void add_vram(struct xe_device *xe, struct xe_bo *bo,
>   		     struct ttm_place *places, u32 bo_flags, u32 mem_type, u32 *c)
>   {
> +	struct ttm_place place = { .mem_type = mem_type };
>   	struct xe_gt *gt = mem_type_to_gt(xe, mem_type);
> +	u64 io_size = gt->mem.vram.io_size;
>   
>   	XE_BUG_ON(!gt->mem.vram.size);
>   
> -	places[*c] = (struct ttm_place) {
> -		.mem_type = mem_type,
> -		/*
> -		 * For eviction / restore on suspend / resume objects
> -		 * pinned in VRAM must be contiguous
> -		 */
> -		.flags = bo_flags & (XE_BO_CREATE_PINNED_BIT |
> -				     XE_BO_CREATE_GGTT_BIT) ?
> -			TTM_PL_FLAG_CONTIGUOUS : 0,
> -	};
> +	/*
> +	 * For eviction / restore on suspend / resume objects
> +	 * pinned in VRAM must be contiguous
> +	 */
> +	if (bo_flags & (XE_BO_CREATE_PINNED_BIT |
> +			XE_BO_CREATE_GGTT_BIT))
> +		place.flags |= TTM_PL_FLAG_CONTIGUOUS;
> +
> +	if (io_size < gt->mem.vram.size) {
> +		if (bo_flags & XE_BO_NEEDS_CPU_ACCESS) {
> +			place.fpfn = 0;
> +			place.lpfn = io_size >> PAGE_SHIFT;
> +		} else {
> +			place.flags |= TTM_PL_FLAG_TOPDOWN;
> +		}
> +	}
> +	places[*c] = place;
>   	*c += 1;
>   
>   	if (bo->props.preferred_mem_type == XE_BO_PROPS_INVALID)
> @@ -343,15 +352,22 @@ static int xe_ttm_io_mem_reserve(struct ttm_device *bdev,
>   				 struct ttm_resource *mem)
>   {
>   	struct xe_device *xe = ttm_to_xe_device(bdev);
> -	struct xe_gt *gt;
>   
>   	switch (mem->mem_type) {
>   	case XE_PL_SYSTEM:
>   	case XE_PL_TT:
>   		return 0;
>   	case XE_PL_VRAM0:
> -	case XE_PL_VRAM1:
> +	case XE_PL_VRAM1: {
> +		struct xe_ttm_vram_mgr_resource *vres =
> +			to_xe_ttm_vram_mgr_resource(mem);
> +		struct xe_gt *gt;
> +
> +		if (vres->used_visible_size < mem->size)
> +			return -EINVAL;
> +
>   		gt = mem_type_to_gt(xe, mem->mem_type);
> +
>   		mem->bus.offset = mem->start << PAGE_SHIFT;
>   
>   		if (gt->mem.vram.mapping &&
> @@ -366,7 +382,7 @@ static int xe_ttm_io_mem_reserve(struct ttm_device *bdev,
>   		mem->bus.caching = ttm_write_combined;
>   #endif
>   		return 0;
> -	case XE_PL_STOLEN:
> +	} case XE_PL_STOLEN:
>   		return xe_ttm_stolen_io_mem_reserve(xe, mem);
>   	default:
>   		return -EINVAL;
> @@ -1144,7 +1160,8 @@ struct xe_bo *xe_bo_create_pin_map_at(struct xe_device *xe, struct xe_gt *gt,
>   	    xe_ttm_stolen_cpu_access_needs_ggtt(xe))
>   		flags |= XE_BO_CREATE_GGTT_BIT;
>   
> -	bo = xe_bo_create_locked_range(xe, gt, vm, size, start, end, type, flags);
> +	bo = xe_bo_create_locked_range(xe, gt, vm, size, start, end, type,
> +				       flags | XE_BO_NEEDS_CPU_ACCESS);
>   	if (IS_ERR(bo))
>   		return bo;
>   
> @@ -1442,6 +1459,9 @@ int xe_bo_vmap(struct xe_bo *bo)
>   
>   	xe_bo_assert_held(bo);
>   
> +	if (!(bo->flags & XE_BO_NEEDS_CPU_ACCESS))
> +		return -EINVAL;
> +
>   	if (!iosys_map_is_null(&bo->vmap))
>   		return 0;
>   
> diff --git a/drivers/gpu/drm/xe/xe_bo.h b/drivers/gpu/drm/xe/xe_bo.h
> index 3c3a09f47fb4..24c45bfa998f 100644
> --- a/drivers/gpu/drm/xe/xe_bo.h
> +++ b/drivers/gpu/drm/xe/xe_bo.h
> @@ -30,6 +30,7 @@
>   #define XE_BO_DEFER_BACKING		BIT(8)
>   #define XE_BO_SCANOUT_BIT		BIT(9)
>   #define XE_BO_FIXED_PLACEMENT_BIT	BIT(10)
> +#define XE_BO_NEEDS_CPU_ACCESS		BIT(11)
>   /* this one is trigger internally only */
>   #define XE_BO_INTERNAL_TEST		BIT(30)
>   #define XE_BO_INTERNAL_64K		BIT(31)
> diff --git a/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c b/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c
> index 73836b9b7fed..cf081e4aedf6 100644
> --- a/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c
> +++ b/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c
> @@ -373,12 +373,16 @@ int xe_ttm_vram_mgr_alloc_sgt(struct xe_device *xe,
>   			      enum dma_data_direction dir,
>   			      struct sg_table **sgt)
>   {
> +	struct xe_ttm_vram_mgr_resource *vres = to_xe_ttm_vram_mgr_resource(res);
>   	struct xe_gt *gt = xe_device_get_gt(xe, res->mem_type - XE_PL_VRAM0);
>   	struct xe_res_cursor cursor;
>   	struct scatterlist *sg;
>   	int num_entries = 0;
>   	int i, r;
>   
> +	if (vres->used_visible_size < res->size)
> +		return -EOPNOTSUPP;
> +
>   	*sgt = kmalloc(sizeof(**sgt), GFP_KERNEL);
>   	if (!*sgt)
>   		return -ENOMEM;


More information about the Intel-xe mailing list