[PATCH 06/15] drm/xe: Convert xe_bo_create_user() for exhaustive eviction
Matthew Brost
matthew.brost at intel.com
Thu Aug 14 02:23:49 UTC 2025
On Wed, Aug 13, 2025 at 12:51:12PM +0200, Thomas Hellström wrote:
> Use the xe_validation_guard() to convert xe_bo_create_user()
> for exhaustive eviction.
>
> Signed-off-by: Thomas Hellström <thomas.hellstrom at linux.intel.com>
Reviewed-by: Matthew Brost <matthew.brost at intel.com>
> ---
> drivers/gpu/drm/xe/tests/xe_bo.c | 16 ++--
> drivers/gpu/drm/xe/tests/xe_dma_buf.c | 4 +-
> drivers/gpu/drm/xe/tests/xe_migrate.c | 12 +--
> drivers/gpu/drm/xe/xe_bo.c | 116 +++++++++++++++++---------
> drivers/gpu/drm/xe/xe_bo.h | 9 +-
> drivers/gpu/drm/xe/xe_device.c | 2 +
> drivers/gpu/drm/xe/xe_device_types.h | 3 +
> drivers/gpu/drm/xe/xe_vm.c | 14 ++++
> drivers/gpu/drm/xe/xe_vm.h | 2 +
> 9 files changed, 116 insertions(+), 62 deletions(-)
>
> diff --git a/drivers/gpu/drm/xe/tests/xe_bo.c b/drivers/gpu/drm/xe/tests/xe_bo.c
> index 06ceba6c3c25..42f914692a02 100644
> --- a/drivers/gpu/drm/xe/tests/xe_bo.c
> +++ b/drivers/gpu/drm/xe/tests/xe_bo.c
> @@ -139,8 +139,8 @@ static void ccs_test_run_tile(struct xe_device *xe, struct xe_tile *tile,
> else
> kunit_info(test, "Testing system memory\n");
>
> - bo = xe_bo_create_user(xe, NULL, NULL, SZ_1M, DRM_XE_GEM_CPU_CACHING_WC,
> - bo_flags);
> + bo = xe_bo_create_user(xe, NULL, SZ_1M, DRM_XE_GEM_CPU_CACHING_WC,
> + bo_flags, exec);
> if (IS_ERR(bo)) {
> KUNIT_FAIL(test, "Failed to create bo.\n");
> return;
> @@ -220,18 +220,18 @@ static int evict_test_run_tile(struct xe_device *xe, struct xe_tile *tile, struc
>
> for (i = 0; i < 2; ++i) {
> xe_vm_lock(vm, false);
> - bo = xe_bo_create_user(xe, NULL, vm, 0x10000,
> + bo = xe_bo_create_user(xe, vm, 0x10000,
> DRM_XE_GEM_CPU_CACHING_WC,
> - bo_flags);
> + bo_flags, exec);
> xe_vm_unlock(vm);
> if (IS_ERR(bo)) {
> KUNIT_FAIL(test, "bo create err=%pe\n", bo);
> break;
> }
>
> - external = xe_bo_create_user(xe, NULL, NULL, 0x10000,
> + external = xe_bo_create_user(xe, NULL, 0x10000,
> DRM_XE_GEM_CPU_CACHING_WC,
> - bo_flags);
> + bo_flags, NULL);
> if (IS_ERR(external)) {
> KUNIT_FAIL(test, "external bo create err=%pe\n", external);
> goto cleanup_bo;
> @@ -497,9 +497,9 @@ static int shrink_test_run_device(struct xe_device *xe)
> INIT_LIST_HEAD(&link->link);
>
> /* We can create bos using WC caching here. But it is slower. */
> - bo = xe_bo_create_user(xe, NULL, NULL, XE_BO_SHRINK_SIZE,
> + bo = xe_bo_create_user(xe, NULL, XE_BO_SHRINK_SIZE,
> DRM_XE_GEM_CPU_CACHING_WB,
> - XE_BO_FLAG_SYSTEM);
> + XE_BO_FLAG_SYSTEM, NULL);
> if (IS_ERR(bo)) {
> if (bo != ERR_PTR(-ENOMEM) && bo != ERR_PTR(-ENOSPC) &&
> bo != ERR_PTR(-EINTR) && bo != ERR_PTR(-ERESTARTSYS))
> diff --git a/drivers/gpu/drm/xe/tests/xe_dma_buf.c b/drivers/gpu/drm/xe/tests/xe_dma_buf.c
> index 965dd3280468..8126b35f4aeb 100644
> --- a/drivers/gpu/drm/xe/tests/xe_dma_buf.c
> +++ b/drivers/gpu/drm/xe/tests/xe_dma_buf.c
> @@ -122,8 +122,8 @@ static void xe_test_dmabuf_import_same_driver(struct xe_device *xe)
> size = SZ_64K;
>
> kunit_info(test, "running %s\n", __func__);
> - bo = xe_bo_create_user(xe, NULL, NULL, size, DRM_XE_GEM_CPU_CACHING_WC,
> - params->mem_mask);
> + bo = xe_bo_create_user(xe, NULL, size, DRM_XE_GEM_CPU_CACHING_WC,
> + params->mem_mask, NULL);
> if (IS_ERR(bo)) {
> KUNIT_FAIL(test, "xe_bo_create() failed with err=%ld\n",
> PTR_ERR(bo));
> diff --git a/drivers/gpu/drm/xe/tests/xe_migrate.c b/drivers/gpu/drm/xe/tests/xe_migrate.c
> index dfb445d09759..afa794e56065 100644
> --- a/drivers/gpu/drm/xe/tests/xe_migrate.c
> +++ b/drivers/gpu/drm/xe/tests/xe_migrate.c
> @@ -642,11 +642,11 @@ static void validate_ccs_test_run_tile(struct xe_device *xe, struct xe_tile *til
> struct drm_exec *exec;
> long ret;
>
> - sys_bo = xe_bo_create_user(xe, NULL, NULL, SZ_4M,
> + sys_bo = xe_bo_create_user(xe, NULL, SZ_4M,
> DRM_XE_GEM_CPU_CACHING_WC,
> XE_BO_FLAG_SYSTEM |
> XE_BO_FLAG_NEEDS_CPU_ACCESS |
> - XE_BO_FLAG_PINNED);
> + XE_BO_FLAG_PINNED, NULL);
>
> if (IS_ERR(sys_bo)) {
> KUNIT_FAIL(test, "xe_bo_create() failed with err=%ld\n",
> @@ -669,10 +669,10 @@ static void validate_ccs_test_run_tile(struct xe_device *xe, struct xe_tile *til
> }
> xe_bo_unlock(sys_bo);
>
> - ccs_bo = xe_bo_create_user(xe, NULL, NULL, SZ_4M,
> + ccs_bo = xe_bo_create_user(xe, NULL, SZ_4M,
> DRM_XE_GEM_CPU_CACHING_WC,
> bo_flags | XE_BO_FLAG_NEEDS_CPU_ACCESS |
> - XE_BO_FLAG_PINNED);
> + XE_BO_FLAG_PINNED, NULL);
>
> if (IS_ERR(ccs_bo)) {
> KUNIT_FAIL(test, "xe_bo_create() failed with err=%ld\n",
> @@ -694,10 +694,10 @@ static void validate_ccs_test_run_tile(struct xe_device *xe, struct xe_tile *til
> }
> xe_bo_unlock(ccs_bo);
>
> - vram_bo = xe_bo_create_user(xe, NULL, NULL, SZ_4M,
> + vram_bo = xe_bo_create_user(xe, NULL, SZ_4M,
> DRM_XE_GEM_CPU_CACHING_WC,
> bo_flags | XE_BO_FLAG_NEEDS_CPU_ACCESS |
> - XE_BO_FLAG_PINNED);
> + XE_BO_FLAG_PINNED, NULL);
> if (IS_ERR(vram_bo)) {
> KUNIT_FAIL(test, "xe_bo_create() failed with err=%ld\n",
> PTR_ERR(vram_bo));
> diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c
> index e71addf51ed0..5e40b6cb8d2a 100644
> --- a/drivers/gpu/drm/xe/xe_bo.c
> +++ b/drivers/gpu/drm/xe/xe_bo.c
> @@ -2185,30 +2185,66 @@ struct xe_bo *xe_bo_create_locked(struct xe_device *xe, struct xe_tile *tile,
> flags, 0, exec);
> }
>
> -struct xe_bo *xe_bo_create_user(struct xe_device *xe, struct xe_tile *tile,
> - struct xe_vm *vm, size_t size,
> - u16 cpu_caching,
> - u32 flags)
> -{
> - struct drm_exec *exec = vm ? xe_vm_validation_exec(vm) : XE_VALIDATION_UNIMPLEMENTED;
> - struct xe_bo *bo = __xe_bo_create_locked(xe, tile, vm, size, 0, ~0ULL,
> - cpu_caching, ttm_bo_type_device,
> - flags | XE_BO_FLAG_USER, 0, exec);
> - if (!IS_ERR(bo))
> - xe_bo_unlock_vm_held(bo);
> +static struct xe_bo *xe_bo_create_novm(struct xe_device *xe, struct xe_tile *tile,
> + size_t size, u16 cpu_caching,
> + enum ttm_bo_type type, u32 flags,
> + u64 alignment, bool intr)
> +{
> + u32 drm_exec_flags = intr ? DRM_EXEC_INTERRUPTIBLE_WAIT : 0;
> + struct xe_validation_ctx ctx;
> + struct drm_exec exec;
> + struct xe_bo *bo;
> + int ret = 0;
>
> - return bo;
> + xe_validation_guard(&ctx, &xe->val, &exec, drm_exec_flags, ret, false) {
> + bo = __xe_bo_create_locked(xe, tile, NULL, size, 0, ~0ULL,
> + cpu_caching, type, flags, alignment, &exec);
> + drm_exec_retry_on_contention(&exec);
> + if (IS_ERR(bo)) {
> + ret = PTR_ERR(bo);
> + xe_validation_retry_on_oom(&ctx, &ret);
> + } else {
> + xe_bo_unlock(bo);
> + }
> + }
> +
> + return ret ? ERR_PTR(ret) : bo;
> }
>
> -struct xe_bo *xe_bo_create(struct xe_device *xe, struct xe_tile *tile,
> - struct xe_vm *vm, size_t size,
> - enum ttm_bo_type type, u32 flags)
> +/**
> + * xe_bo_create_user() - Create a user BO
> + * @xe: The xe device.
> + * @vm: The local vm or NULL for external objects.
> + * @size: The storage size to use for the bo.
> + * @cpu_caching: The caching mode to be used for system backing store.
> + * @flags: XE_BO_FLAG_ flags.
> + * @exec: The drm_exec transaction to use for exhaustive eviction, or NULL
> + * if such a transaction should be initiated by the call.
> + *
> + * Create a bo on behalf of user-space.
> + *
> + * Return: The buffer object on success. Negative error pointer on failure.
> + */
> +struct xe_bo *xe_bo_create_user(struct xe_device *xe,
> + struct xe_vm *vm, size_t size,
> + u16 cpu_caching,
> + u32 flags, struct drm_exec *exec)
> {
> - struct drm_exec *exec = vm ? xe_vm_validation_exec(vm) : XE_VALIDATION_UNIMPLEMENTED;
> - struct xe_bo *bo = xe_bo_create_locked(xe, tile, vm, size, type, flags, exec);
> + struct xe_bo *bo;
> +
> + flags |= XE_BO_FLAG_USER;
>
> - if (!IS_ERR(bo))
> - xe_bo_unlock_vm_held(bo);
> + if (vm || exec) {
> + xe_assert(xe, exec);
> + bo = __xe_bo_create_locked(xe, NULL, vm, size, 0, ~0ULL,
> + cpu_caching, ttm_bo_type_device,
> + flags, 0, exec);
> + if (!IS_ERR(bo))
> + xe_bo_unlock_vm_held(bo);
> + } else {
> + bo = xe_bo_create_novm(xe, NULL, size, cpu_caching,
> + ttm_bo_type_device, flags, 0, true);
> + }
>
> return bo;
> }
> @@ -2757,8 +2793,9 @@ int xe_gem_create_ioctl(struct drm_device *dev, void *data,
> struct xe_device *xe = to_xe_device(dev);
> struct xe_file *xef = to_xe_file(file);
> struct drm_xe_gem_create *args = data;
> + struct xe_validation_ctx ctx;
> + struct drm_exec exec;
> struct xe_vm *vm = NULL;
> - ktime_t end = 0;
> struct xe_bo *bo;
> unsigned int bo_flags;
> u32 handle;
> @@ -2832,25 +2869,26 @@ int xe_gem_create_ioctl(struct drm_device *dev, void *data,
> return -ENOENT;
> }
>
> -retry:
> - if (vm) {
> - err = xe_vm_lock(vm, true);
> - if (err)
> - goto out_vm;
> + err = 0;
> + xe_validation_guard(&ctx, &xe->val, &exec,
> + DRM_EXEC_INTERRUPTIBLE_WAIT, err, false) {
> + if (vm) {
> + err = xe_vm_drm_exec_lock(vm, &exec);
> + drm_exec_retry_on_contention(&exec);
> + if (err)
> + break;
> + }
> + bo = xe_bo_create_user(xe, vm, args->size, args->cpu_caching,
> + bo_flags, &exec);
> + drm_exec_retry_on_contention(&exec);
> + if (IS_ERR(bo)) {
> + err = PTR_ERR(bo);
> + xe_validation_retry_on_oom(&ctx, &err);
> + break;
> + }
> }
> -
> - bo = xe_bo_create_user(xe, NULL, vm, args->size, args->cpu_caching,
> - bo_flags);
> -
> - if (vm)
> - xe_vm_unlock(vm);
> -
> - if (IS_ERR(bo)) {
> - err = PTR_ERR(bo);
> - if (xe_vm_validate_should_retry(NULL, err, &end))
> - goto retry;
> + if (err)
> goto out_vm;
> - }
>
> if (args->extensions) {
> err = gem_create_user_extensions(xe, bo, args->extensions, 0);
> @@ -3223,11 +3261,11 @@ int xe_bo_dumb_create(struct drm_file *file_priv,
> args->size = ALIGN(mul_u32_u32(args->pitch, args->height),
> page_size);
>
> - bo = xe_bo_create_user(xe, NULL, NULL, args->size,
> + bo = xe_bo_create_user(xe, NULL, args->size,
> DRM_XE_GEM_CPU_CACHING_WC,
> XE_BO_FLAG_VRAM_IF_DGFX(xe_device_get_root_tile(xe)) |
> XE_BO_FLAG_SCANOUT |
> - XE_BO_FLAG_NEEDS_CPU_ACCESS);
> + XE_BO_FLAG_NEEDS_CPU_ACCESS, NULL);
> if (IS_ERR(bo))
> return PTR_ERR(bo);
>
> diff --git a/drivers/gpu/drm/xe/xe_bo.h b/drivers/gpu/drm/xe/xe_bo.h
> index b1b6cb622d71..c6bb90ca5c2e 100644
> --- a/drivers/gpu/drm/xe/xe_bo.h
> +++ b/drivers/gpu/drm/xe/xe_bo.h
> @@ -104,13 +104,8 @@ struct xe_bo *xe_bo_create_locked(struct xe_device *xe, struct xe_tile *tile,
> struct xe_vm *vm, size_t size,
> enum ttm_bo_type type, u32 flags,
> struct drm_exec *exec);
> -struct xe_bo *xe_bo_create(struct xe_device *xe, struct xe_tile *tile,
> - struct xe_vm *vm, size_t size,
> - enum ttm_bo_type type, u32 flags);
> -struct xe_bo *xe_bo_create_user(struct xe_device *xe, struct xe_tile *tile,
> - struct xe_vm *vm, size_t size,
> - u16 cpu_caching,
> - u32 flags);
> +struct xe_bo *xe_bo_create_user(struct xe_device *xe, struct xe_vm *vm, size_t size,
> + u16 cpu_caching, u32 flags, struct drm_exec *exec);
> struct xe_bo *xe_bo_create_pin_map(struct xe_device *xe, struct xe_tile *tile,
> struct xe_vm *vm, size_t size,
> enum ttm_bo_type type, u32 flags);
> diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c
> index 3e0402dff423..6b152aa89dbb 100644
> --- a/drivers/gpu/drm/xe/xe_device.c
> +++ b/drivers/gpu/drm/xe/xe_device.c
> @@ -452,6 +452,8 @@ struct xe_device *xe_device_create(struct pci_dev *pdev,
> if (err)
> goto err;
>
> + xe_validation_device_init(&xe->val);
> +
> init_waitqueue_head(&xe->ufence_wq);
>
> init_rwsem(&xe->usm.lock);
> diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h
> index 01e8fa0d2f9f..a4eb32bac151 100644
> --- a/drivers/gpu/drm/xe/xe_device_types.h
> +++ b/drivers/gpu/drm/xe/xe_device_types.h
> @@ -26,6 +26,7 @@
> #include "xe_sriov_vf_ccs_types.h"
> #include "xe_step_types.h"
> #include "xe_survivability_mode_types.h"
> +#include "xe_validation.h"
>
> #if IS_ENABLED(CONFIG_DRM_XE_DEBUG)
> #define TEST_VM_OPS_ERROR
> @@ -575,6 +576,8 @@ struct xe_device {
> */
> atomic64_t global_total_pages;
> #endif
> + /** @val: The domain for exhaustive eviction, which is currently per device. */
> + struct xe_validation_device val;
>
> /* private: */
>
> diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
> index 600aaadb4bee..1c2d9d9065c6 100644
> --- a/drivers/gpu/drm/xe/xe_vm.c
> +++ b/drivers/gpu/drm/xe/xe_vm.c
> @@ -47,6 +47,20 @@ static struct drm_gem_object *xe_vm_obj(struct xe_vm *vm)
> return vm->gpuvm.r_obj;
> }
>
> +/**
> + * xe_vm_drm_exec_lock() - Lock the vm's resv with a drm_exec transaction
> + * @vm: The vm whose resv is to be locked.
> + * @exec: The drm_exec transaction.
> + *
> + * Helper to lock the vm's resv as part of a drm_exec transaction.
> + *
> + * Return: %0 on success. See drm_exec_lock_obj() for error codes.
> + */
> +int xe_vm_drm_exec_lock(struct xe_vm *vm, struct drm_exec *exec)
> +{
> + return drm_exec_lock_obj(exec, xe_vm_obj(vm));
> +}
> +
> /**
> * xe_vma_userptr_check_repin() - Advisory check for repin needed
> * @uvma: The userptr vma
> diff --git a/drivers/gpu/drm/xe/xe_vm.h b/drivers/gpu/drm/xe/xe_vm.h
> index 4ba26eed7e96..3b6e7234dac4 100644
> --- a/drivers/gpu/drm/xe/xe_vm.h
> +++ b/drivers/gpu/drm/xe/xe_vm.h
> @@ -292,6 +292,8 @@ void xe_vm_kill(struct xe_vm *vm, bool unlocked);
> */
> #define xe_vm_assert_held(vm) dma_resv_assert_held(xe_vm_resv(vm))
>
> +int xe_vm_drm_exec_lock(struct xe_vm *vm, struct drm_exec *exec);
> +
> #if IS_ENABLED(CONFIG_DRM_XE_DEBUG_VM)
> #define vm_dbg drm_dbg
> #else
> --
> 2.50.1
>
More information about the Intel-xe
mailing list