[PATCH v1 1/4] drm/xe: Add initial support for separate kernel VRAM region on the tile
Matthew Auld
matthew.auld at intel.com
Wed Aug 13 16:53:53 UTC 2025
On 18/07/2025 09:17, Piórkowski, Piotr wrote:
> From: Piotr Piórkowski <piotr.piorkowski at intel.com>
>
> So far, kernel and userspace allocations have shared the same VRAM region.
> However, in some scenarios, it may be necessary to reserve a separate
> VRAM area exclusively for kernel allocations.
> Let's add preliminary support for such a configuration.
>
> Signed-off-by: Piotr Piórkowski <piotr.piorkowski at intel.com>
> ---
> drivers/gpu/drm/xe/xe_bo.c | 87 ++++++++++++++++++++--------
> drivers/gpu/drm/xe/xe_bo.h | 6 +-
> drivers/gpu/drm/xe/xe_device_types.h | 10 +++-
> drivers/gpu/drm/xe/xe_tile.c | 8 +++
> drivers/gpu/drm/xe/xe_tile.h | 5 ++
> drivers/gpu/drm/xe/xe_vram.c | 6 +-
> 6 files changed, 94 insertions(+), 28 deletions(-)
>
> diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c
> index 00ce067d5fd3..12e899726534 100644
> --- a/drivers/gpu/drm/xe/xe_bo.c
> +++ b/drivers/gpu/drm/xe/xe_bo.c
> @@ -33,6 +33,7 @@
> #include "xe_pxp.h"
> #include "xe_res_cursor.h"
> #include "xe_shrinker.h"
> +#include "xe_tile.h"
> #include "xe_trace_bo.h"
> #include "xe_ttm_stolen_mgr.h"
> #include "xe_vm.h"
> @@ -208,6 +209,27 @@ static bool force_contiguous(u32 bo_flags)
> bo_flags & XE_BO_FLAG_PINNED;
> }
>
> +static u8 vram_bo_flag_to_tile_id(struct xe_device *xe, u32 vram_bo_flag)
> +{
> + xe_assert(xe, vram_bo_flag & XE_BO_FLAG_VRAM_MASK);
> + xe_assert(xe, (vram_bo_flag & (vram_bo_flag - 1)) == 0);
> +
> + return __ffs(vram_bo_flag >> (__ffs(XE_BO_FLAG_VRAM0) - 1)) - 1;
> +}
> +
> +static u32 bo_vram_flags_to_vram_placement(struct xe_device *xe, u32 bo_flags, u32 vram_flag,
Do you need bo_flags?
> + enum ttm_bo_type type)
> +{
> + u8 tile_id = vram_bo_flag_to_tile_id(xe, vram_flag);
> +
> + xe_assert(xe, tile_id < xe->info.tile_count);
> +
> + if (type == ttm_bo_type_kernel)
> + return xe->tiles[tile_id].mem.kernel_vram->placement;
> + else
> + return xe->tiles[tile_id].mem.vram->placement;
> +}
> +
> static void add_vram(struct xe_device *xe, struct xe_bo *bo,
> struct ttm_place *places, u32 bo_flags, u32 mem_type, u32 *c)
> {
> @@ -240,12 +262,17 @@ static void add_vram(struct xe_device *xe, struct xe_bo *bo,
> }
>
> static void try_add_vram(struct xe_device *xe, struct xe_bo *bo,
> - u32 bo_flags, u32 *c)
> + u32 bo_flags, enum ttm_bo_type type, u32 *c)
> {
> - if (bo_flags & XE_BO_FLAG_VRAM0)
> - add_vram(xe, bo, bo->placements, bo_flags, XE_PL_VRAM0, c);
> - if (bo_flags & XE_BO_FLAG_VRAM1)
> - add_vram(xe, bo, bo->placements, bo_flags, XE_PL_VRAM1, c);
> + u32 vram_flag;
> +
> + for_each_bo_flag_vram(vram_flag) {
> + if (bo_flags & vram_flag) {
Would it be more natural to fold this into the macro somehow?
for_each_set_bo_vram_flag(vram_flag, bo_flags)
bo_vram_flags_to_vram_placement(xe, vram_flag, type);
> + u32 pl = bo_vram_flags_to_vram_placement(xe, bo_flags, vram_flag, type);
> +
> + add_vram(xe, bo, bo->placements, bo_flags, pl, c);
> + }
> + }
> }
>
> static void try_add_stolen(struct xe_device *xe, struct xe_bo *bo,
> @@ -264,11 +291,11 @@ static void try_add_stolen(struct xe_device *xe, struct xe_bo *bo,
> }
>
> static int __xe_bo_placement_for_flags(struct xe_device *xe, struct xe_bo *bo,
> - u32 bo_flags)
> + u32 bo_flags, enum ttm_bo_type type)
> {
> u32 c = 0;
>
> - try_add_vram(xe, bo, bo_flags, &c);
> + try_add_vram(xe, bo, bo_flags, type, &c);
> try_add_system(xe, bo, bo_flags, &c);
> try_add_stolen(xe, bo, bo_flags, &c);
>
> @@ -284,10 +311,10 @@ static int __xe_bo_placement_for_flags(struct xe_device *xe, struct xe_bo *bo,
> }
>
> int xe_bo_placement_for_flags(struct xe_device *xe, struct xe_bo *bo,
> - u32 bo_flags)
> + u32 bo_flags, enum ttm_bo_type type)
> {
> xe_bo_assert_held(bo);
> - return __xe_bo_placement_for_flags(xe, bo, bo_flags);
> + return __xe_bo_placement_for_flags(xe, bo, bo_flags, type);
> }
>
> static void xe_evict_flags(struct ttm_buffer_object *tbo,
> @@ -1895,7 +1922,7 @@ struct xe_bo *___xe_bo_create_locked(struct xe_device *xe, struct xe_bo *bo,
> }
>
> if (!(flags & XE_BO_FLAG_FIXED_PLACEMENT)) {
> - err = __xe_bo_placement_for_flags(xe, bo, bo->flags);
> + err = __xe_bo_placement_for_flags(xe, bo, bo->flags, type);
> if (WARN_ON(err)) {
> xe_ttm_bo_destroy(&bo->ttm);
> return ERR_PTR(err);
> @@ -1953,34 +1980,33 @@ struct xe_bo *___xe_bo_create_locked(struct xe_device *xe, struct xe_bo *bo,
> }
>
> static int __xe_bo_fixed_placement(struct xe_device *xe,
> - struct xe_bo *bo,
> + struct xe_bo *bo, enum ttm_bo_type type,
> u32 flags,
> u64 start, u64 end, u64 size)
> {
> struct ttm_place *place = bo->placements;
> + u32 vram_flag, vram_stolen_flags;
>
> if (flags & (XE_BO_FLAG_USER | XE_BO_FLAG_SYSTEM))
> return -EINVAL;
>
> + vram_flag = flags & XE_BO_FLAG_VRAM_MASK;
> + vram_stolen_flags = (flags & (XE_BO_FLAG_STOLEN)) | vram_flag;
> +
> + /* check if more than one VRAM/STOLEN flag is set */
> + if ((vram_stolen_flags & (vram_stolen_flags - 1)) != 0)
hweight32() > 1 is more readable here?
> + return -EINVAL;
> +
> place->flags = TTM_PL_FLAG_CONTIGUOUS;
> place->fpfn = start >> PAGE_SHIFT;
> place->lpfn = end >> PAGE_SHIFT;
>
> - switch (flags & (XE_BO_FLAG_STOLEN | XE_BO_FLAG_VRAM_MASK)) {
> - case XE_BO_FLAG_VRAM0:
> - place->mem_type = XE_PL_VRAM0;
> - break;
> - case XE_BO_FLAG_VRAM1:
> - place->mem_type = XE_PL_VRAM1;
> - break;
> - case XE_BO_FLAG_STOLEN:
> + if (flags & XE_BO_FLAG_STOLEN)
> place->mem_type = XE_PL_STOLEN;
> - break;
> -
> - default:
> - /* 0 or multiple of the above set */
> + else if (vram_flag)
> + place->mem_type = bo_vram_flags_to_vram_placement(xe, flags, vram_flag, type);
> + else
> return -EINVAL;
> - }
>
> bo->placement = (struct ttm_placement) {
> .num_placement = 1,
> @@ -2003,13 +2029,24 @@ __xe_bo_create_locked(struct xe_device *xe,
> if (vm)
> xe_vm_assert_held(vm);
>
> + /*
> + * In the case of kernel allocations, if the tile has dedicated kernel
> + * VRAM region, and tile->id does not match to tile->vram_id, it means
> + * that we are using unified VRAM and we need fix VRAM BO flags.
> + */
> + if (tile && type == ttm_bo_type_kernel && xe_tile_has_separate_kernel_vram(tile) &&
> + (flags & XE_BO_FLAG_VRAM_MASK) && tile->mem.vram->id != tile->mem.kernel_vram->id) {
> + flags &= ~XE_BO_FLAG_VRAM_MASK;
> + flags |= (XE_BO_FLAG_VRAM0 << tile->mem.kernel_vram->id);
> + }
> +
> if (start || end != ~0ULL) {
> bo = xe_bo_alloc();
> if (IS_ERR(bo))
> return bo;
>
> flags |= XE_BO_FLAG_FIXED_PLACEMENT;
> - err = __xe_bo_fixed_placement(xe, bo, flags, start, end, size);
> + err = __xe_bo_fixed_placement(xe, bo, type, flags, start, end, size);
> if (err) {
> xe_bo_free(bo);
> return ERR_PTR(err);
> diff --git a/drivers/gpu/drm/xe/xe_bo.h b/drivers/gpu/drm/xe/xe_bo.h
> index 8cce413b5235..dde8e0274ff2 100644
> --- a/drivers/gpu/drm/xe/xe_bo.h
> +++ b/drivers/gpu/drm/xe/xe_bo.h
> @@ -61,6 +61,10 @@
> #define XE_BO_FLAG_GGTTx(tile) \
> (XE_BO_FLAG_GGTT0 << (tile)->id)
>
> +#define for_each_bo_flag_vram(bit__) \
> + for (unsigned int __bit_tmp = BIT(0); __bit_tmp <= XE_BO_FLAG_VRAM_MASK; __bit_tmp <<= 1) \
> + for_each_if(((bit__) = __bit_tmp) & XE_BO_FLAG_VRAM_MASK)
Should this be exported or can it be moved to .c?
> +
> #define XE_PTE_SHIFT 12
> #define XE_PAGE_SIZE (1 << XE_PTE_SHIFT)
> #define XE_PTE_MASK (XE_PAGE_SIZE - 1)
> @@ -127,7 +131,7 @@ struct xe_bo *xe_managed_bo_create_from_data(struct xe_device *xe, struct xe_til
> int xe_managed_bo_reinit_in_vram(struct xe_device *xe, struct xe_tile *tile, struct xe_bo **src);
>
> int xe_bo_placement_for_flags(struct xe_device *xe, struct xe_bo *bo,
> - u32 bo_flags);
> + u32 bo_flags, enum ttm_bo_type type);
>
> static inline struct xe_bo *ttm_to_xe_bo(const struct ttm_buffer_object *bo)
> {
> diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h
> index eb6105523f23..3a417305c1b8 100644
> --- a/drivers/gpu/drm/xe/xe_device_types.h
> +++ b/drivers/gpu/drm/xe/xe_device_types.h
> @@ -155,7 +155,15 @@ struct xe_tile {
> /** @mem: memory management info for tile */
> struct {
> /**
> - * @mem.vram: VRAM info for tile.
> + * @mem.kernel_vram: kernel-dedicated VRAM info for tile.
> + *
> + * Although VRAM is associated with a specific tile, it can
> + * still be accessed by all tiles' GTs.
> + */
> + struct xe_vram_region *kernel_vram;
> +
> + /**
> + * @mem.vram: general purpose VRAM info for tile.
> *
> * Although VRAM is associated with a specific tile, it can
> * still be accessed by all tiles' GTs.
> diff --git a/drivers/gpu/drm/xe/xe_tile.c b/drivers/gpu/drm/xe/xe_tile.c
> index 0be0a5c57ef4..a14f549effdd 100644
> --- a/drivers/gpu/drm/xe/xe_tile.c
> +++ b/drivers/gpu/drm/xe/xe_tile.c
> @@ -124,6 +124,14 @@ int xe_tile_alloc_vram(struct xe_tile *tile)
> return PTR_ERR(vram);
> tile->mem.vram = vram;
>
> + /*
> + * If the kernel_vram is not already allocated,
> + * it means that tile has common VRAM region for
> + * kernel and user space.
> + */
> + if (!tile->mem.kernel_vram)
> + tile->mem.kernel_vram = tile->mem.vram;
> +
> return 0;
> }
>
> diff --git a/drivers/gpu/drm/xe/xe_tile.h b/drivers/gpu/drm/xe/xe_tile.h
> index dceb6297aa01..5d834378b354 100644
> --- a/drivers/gpu/drm/xe/xe_tile.h
> +++ b/drivers/gpu/drm/xe/xe_tile.h
> @@ -23,4 +23,9 @@ static inline bool xe_tile_is_root(struct xe_tile *tile)
> return tile->id == 0;
> }
>
> +static inline bool xe_tile_has_separate_kernel_vram(const struct xe_tile *tile)
> +{
> + return tile->mem.vram != tile->mem.kernel_vram;
> +}
> +
> #endif
> diff --git a/drivers/gpu/drm/xe/xe_vram.c b/drivers/gpu/drm/xe/xe_vram.c
> index b44ebf50fedb..7adfccf68e4c 100644
> --- a/drivers/gpu/drm/xe/xe_vram.c
> +++ b/drivers/gpu/drm/xe/xe_vram.c
> @@ -13,6 +13,7 @@
> #include "regs/xe_gt_regs.h"
> #include "regs/xe_regs.h"
> #include "xe_assert.h"
> +#include "xe_bo.h"
> #include "xe_device.h"
> #include "xe_force_wake.h"
> #include "xe_gt_mcr.h"
> @@ -283,8 +284,11 @@ static void vram_fini(void *arg)
>
> xe->mem.vram->mapping = NULL;
>
> - for_each_tile(tile, xe, id)
> + for_each_tile(tile, xe, id) {
> tile->mem.vram->mapping = NULL;
> + if (tile->mem.kernel_vram)
> + tile->mem.kernel_vram->mapping = NULL;
> + }
> }
>
> struct xe_vram_region *xe_vram_region_alloc(struct xe_device *xe, u8 id, u32 placement)
More information about the Intel-xe
mailing list