[Intel-xe] [PATCH v2 4/6] drm/xe/bo: support tiered vram allocation for small-bar
Gwan-gyeong Mun
gwan-gyeong.mun at intel.com
Sat Mar 25 23:30:41 UTC 2023
looks good to me.
Reviewed-by: Gwan-gyeong Mun <gwan-gyeong.mun at intel.com>
On 3/23/23 1:59 PM, Matthew Auld wrote:
> Add the new flag XE_BO_NEEDS_CPU_ACCESS, to force allocating in the
> mappable part of vram. If no flag is specified we do a topdown
> allocation, to limit the chances of stealing the precious mappable part,
> if we don't need it. If this is a full-bar system, then this all gets
> nooped.
>
> For kernel users, it looks like xe_bo_create_pin_map() is the central
> place which users should call if they want CPU access to the object, so
> add the flag there.
>
> We still need to plumb this through for userspace allocations. Also it
> looks like page-tables are using pin_map(), which is less than ideal. If
> we can already use the GPU to do page-table management, then maybe we
> should just force that for small-bar.
>
> Signed-off-by: Matthew Auld <matthew.auld at intel.com>
> Cc: Gwan-gyeong Mun <gwan-gyeong.mun at intel.com>
> Cc: Thomas Hellström <thomas.hellstrom at linux.intel.com>
> Cc: Lucas De Marchi <lucas.demarchi at intel.com>
> Reviewed-by: Maarten Lankhorst <maarten.lankhorst at linux.intel.com>
> ---
> drivers/gpu/drm/xe/tests/xe_migrate.c | 3 +-
> drivers/gpu/drm/xe/xe_bo.c | 48 +++++++++++++++++++--------
> drivers/gpu/drm/xe/xe_bo.h | 1 +
> drivers/gpu/drm/xe/xe_ttm_vram_mgr.c | 4 +++
> 4 files changed, 41 insertions(+), 15 deletions(-)
>
> diff --git a/drivers/gpu/drm/xe/tests/xe_migrate.c b/drivers/gpu/drm/xe/tests/xe_migrate.c
> index 17829f878757..de101c3a6406 100644
> --- a/drivers/gpu/drm/xe/tests/xe_migrate.c
> +++ b/drivers/gpu/drm/xe/tests/xe_migrate.c
> @@ -108,7 +108,8 @@ static void test_copy(struct xe_migrate *m, struct xe_bo *bo,
> struct xe_bo *sysmem = xe_bo_create_locked(xe, m->gt, NULL,
> bo->size,
> ttm_bo_type_kernel,
> - XE_BO_CREATE_SYSTEM_BIT);
> + XE_BO_CREATE_SYSTEM_BIT |
> + XE_BO_NEEDS_CPU_ACCESS);
> if (IS_ERR(sysmem)) {
> KUNIT_FAIL(test, "Failed to allocate sysmem bo for %s: %li\n",
> str, PTR_ERR(sysmem));
> diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c
> index 1c8e0fbaf1df..de57ccc5b57c 100644
> --- a/drivers/gpu/drm/xe/xe_bo.c
> +++ b/drivers/gpu/drm/xe/xe_bo.c
> @@ -96,20 +96,29 @@ static void try_add_system(struct xe_bo *bo, struct ttm_place *places,
> static void add_vram(struct xe_device *xe, struct xe_bo *bo,
> struct ttm_place *places, u32 bo_flags, u32 mem_type, u32 *c)
> {
> + struct ttm_place place = { .mem_type = mem_type };
> struct xe_gt *gt = mem_type_to_gt(xe, mem_type);
> + u64 io_size = gt->mem.vram.io_size;
>
> XE_BUG_ON(!gt->mem.vram.size);
>
> - places[*c] = (struct ttm_place) {
> - .mem_type = mem_type,
> - /*
> - * For eviction / restore on suspend / resume objects
> - * pinned in VRAM must be contiguous
> - */
> - .flags = bo_flags & (XE_BO_CREATE_PINNED_BIT |
> - XE_BO_CREATE_GGTT_BIT) ?
> - TTM_PL_FLAG_CONTIGUOUS : 0,
> - };
> + /*
> + * For eviction / restore on suspend / resume objects
> + * pinned in VRAM must be contiguous
> + */
> + if (bo_flags & (XE_BO_CREATE_PINNED_BIT |
> + XE_BO_CREATE_GGTT_BIT))
> + place.flags |= TTM_PL_FLAG_CONTIGUOUS;
> +
> + if (io_size < gt->mem.vram.size) {
> + if (bo_flags & XE_BO_NEEDS_CPU_ACCESS) {
> + place.fpfn = 0;
> + place.lpfn = io_size >> PAGE_SHIFT;
> + } else {
> + place.flags |= TTM_PL_FLAG_TOPDOWN;
> + }
> + }
> + places[*c] = place;
> *c += 1;
>
> if (bo->props.preferred_mem_type == XE_BO_PROPS_INVALID)
> @@ -343,15 +352,22 @@ static int xe_ttm_io_mem_reserve(struct ttm_device *bdev,
> struct ttm_resource *mem)
> {
> struct xe_device *xe = ttm_to_xe_device(bdev);
> - struct xe_gt *gt;
>
> switch (mem->mem_type) {
> case XE_PL_SYSTEM:
> case XE_PL_TT:
> return 0;
> case XE_PL_VRAM0:
> - case XE_PL_VRAM1:
> + case XE_PL_VRAM1: {
> + struct xe_ttm_vram_mgr_resource *vres =
> + to_xe_ttm_vram_mgr_resource(mem);
> + struct xe_gt *gt;
> +
> + if (vres->used_visible_size < mem->size)
> + return -EINVAL;
> +
> gt = mem_type_to_gt(xe, mem->mem_type);
> +
> mem->bus.offset = mem->start << PAGE_SHIFT;
>
> if (gt->mem.vram.mapping &&
> @@ -366,7 +382,7 @@ static int xe_ttm_io_mem_reserve(struct ttm_device *bdev,
> mem->bus.caching = ttm_write_combined;
> #endif
> return 0;
> - case XE_PL_STOLEN:
> + } case XE_PL_STOLEN:
> return xe_ttm_stolen_io_mem_reserve(xe, mem);
> default:
> return -EINVAL;
> @@ -1144,7 +1160,8 @@ struct xe_bo *xe_bo_create_pin_map_at(struct xe_device *xe, struct xe_gt *gt,
> xe_ttm_stolen_cpu_access_needs_ggtt(xe))
> flags |= XE_BO_CREATE_GGTT_BIT;
>
> - bo = xe_bo_create_locked_range(xe, gt, vm, size, start, end, type, flags);
> + bo = xe_bo_create_locked_range(xe, gt, vm, size, start, end, type,
> + flags | XE_BO_NEEDS_CPU_ACCESS);
> if (IS_ERR(bo))
> return bo;
>
> @@ -1442,6 +1459,9 @@ int xe_bo_vmap(struct xe_bo *bo)
>
> xe_bo_assert_held(bo);
>
> + if (!(bo->flags & XE_BO_NEEDS_CPU_ACCESS))
> + return -EINVAL;
> +
> if (!iosys_map_is_null(&bo->vmap))
> return 0;
>
> diff --git a/drivers/gpu/drm/xe/xe_bo.h b/drivers/gpu/drm/xe/xe_bo.h
> index 3c3a09f47fb4..24c45bfa998f 100644
> --- a/drivers/gpu/drm/xe/xe_bo.h
> +++ b/drivers/gpu/drm/xe/xe_bo.h
> @@ -30,6 +30,7 @@
> #define XE_BO_DEFER_BACKING BIT(8)
> #define XE_BO_SCANOUT_BIT BIT(9)
> #define XE_BO_FIXED_PLACEMENT_BIT BIT(10)
> +#define XE_BO_NEEDS_CPU_ACCESS BIT(11)
> /* this one is trigger internally only */
> #define XE_BO_INTERNAL_TEST BIT(30)
> #define XE_BO_INTERNAL_64K BIT(31)
> diff --git a/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c b/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c
> index 73836b9b7fed..cf081e4aedf6 100644
> --- a/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c
> +++ b/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c
> @@ -373,12 +373,16 @@ int xe_ttm_vram_mgr_alloc_sgt(struct xe_device *xe,
> enum dma_data_direction dir,
> struct sg_table **sgt)
> {
> + struct xe_ttm_vram_mgr_resource *vres = to_xe_ttm_vram_mgr_resource(res);
> struct xe_gt *gt = xe_device_get_gt(xe, res->mem_type - XE_PL_VRAM0);
> struct xe_res_cursor cursor;
> struct scatterlist *sg;
> int num_entries = 0;
> int i, r;
>
> + if (vres->used_visible_size < res->size)
> + return -EOPNOTSUPP;
> +
> *sgt = kmalloc(sizeof(**sgt), GFP_KERNEL);
> if (!*sgt)
> return -ENOMEM;
More information about the Intel-xe
mailing list