[PATCH v2 14/32] drm/xe/svm: Implement prefetch support for SVM ranges
Matthew Brost
matthew.brost at intel.com
Thu Apr 24 23:48:35 UTC 2025
On Mon, Apr 07, 2025 at 03:47:01PM +0530, Himal Prasad Ghimiray wrote:
> This commit adds prefetch support for SVM ranges, utilizing the
> existing ioctl vm_bind functionality to achieve this.
>
> v2: rebase
>
> Cc: Matthew Brost <matthew.brost at intel.com>
> Signed-off-by: Himal Prasad Ghimiray <himal.prasad.ghimiray at intel.com>
> ---
> drivers/gpu/drm/xe/xe_pt.c | 61 +++++++++---
> drivers/gpu/drm/xe/xe_vm.c | 185 +++++++++++++++++++++++++++++++++++--
> 2 files changed, 222 insertions(+), 24 deletions(-)
>
> diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c
> index de4e3edda758..59dc065fae93 100644
> --- a/drivers/gpu/drm/xe/xe_pt.c
> +++ b/drivers/gpu/drm/xe/xe_pt.c
> @@ -1458,7 +1458,8 @@ static int xe_pt_svm_pre_commit(struct xe_migrate_pt_update *pt_update)
> struct xe_vm *vm = pt_update->vops->vm;
> struct xe_vma_ops *vops = pt_update->vops;
> struct xe_vma_op *op;
> - int err;
> + int ranges_count;
> + int err, i;
>
> err = xe_pt_pre_commit(pt_update);
> if (err)
> @@ -1467,20 +1468,33 @@ static int xe_pt_svm_pre_commit(struct xe_migrate_pt_update *pt_update)
> xe_svm_notifier_lock(vm);
>
> list_for_each_entry(op, &vops->list, link) {
> - struct xe_svm_range *range = op->map_range.range;
> + struct xe_svm_range *range = NULL;
>
> if (op->subop == XE_VMA_SUBOP_UNMAP_RANGE)
> continue;
>
> - xe_svm_range_debug(range, "PRE-COMMIT");
> -
> - xe_assert(vm->xe, xe_vma_is_cpu_addr_mirror(op->map_range.vma));
> - xe_assert(vm->xe, op->subop == XE_VMA_SUBOP_MAP_RANGE);
> + if (op->base.op == DRM_GPUVA_OP_PREFETCH) {
> + xe_assert(vm->xe,
> + xe_vma_is_cpu_addr_mirror(gpuva_to_vma(op->base.prefetch.va)));
> + ranges_count = op->prefetch_range.ranges_count;
> + } else {
> + xe_assert(vm->xe, xe_vma_is_cpu_addr_mirror(op->map_range.vma));
> + xe_assert(vm->xe, op->subop == XE_VMA_SUBOP_MAP_RANGE);
> + ranges_count = 1;
> + }
>
> - if (!xe_svm_range_pages_valid(range)) {
> - xe_svm_range_debug(range, "PRE-COMMIT - RETRY");
> - xe_svm_notifier_unlock(vm);
> - return -EAGAIN;
> + for (i = 0; i < ranges_count; i++) {
> + if (op->base.op == DRM_GPUVA_OP_PREFETCH)
> + range = xa_load(&op->prefetch_range.range, i);
> + else
> + range = op->map_range.range;
> + xe_svm_range_debug(range, "PRE-COMMIT");
> +
> + if (!xe_svm_range_pages_valid(range)) {
> + xe_svm_range_debug(range, "PRE-COMMIT - RETRY");
> + xe_svm_notifier_unlock(vm);
> + return -EAGAIN;
> + }
> }
> }
>
> @@ -2065,11 +2079,21 @@ static int op_prepare(struct xe_vm *vm,
> {
> struct xe_vma *vma = gpuva_to_vma(op->base.prefetch.va);
>
> - if (xe_vma_is_cpu_addr_mirror(vma))
> - break;
> + if (xe_vma_is_cpu_addr_mirror(vma)) {
> + struct xe_svm_range *range;
> + int i;
>
> - err = bind_op_prepare(vm, tile, pt_update_ops, vma, false);
> - pt_update_ops->wait_vm_kernel = true;
> + for (i = 0; i < op->prefetch_range.ranges_count; i++) {
> + range = xa_load(&op->prefetch_range.range, i);
> + err = bind_range_prepare(vm, tile, pt_update_ops,
> + vma, range);
> + if (err)
> + return err;
> + }
> + } else {
> + err = bind_op_prepare(vm, tile, pt_update_ops, vma, false);
> + pt_update_ops->wait_vm_kernel = true;
> + }
> break;
> }
> case DRM_GPUVA_OP_DRIVER:
> @@ -2273,9 +2297,16 @@ static void op_commit(struct xe_vm *vm,
> {
> struct xe_vma *vma = gpuva_to_vma(op->base.prefetch.va);
>
> - if (!xe_vma_is_cpu_addr_mirror(vma))
> + if (xe_vma_is_cpu_addr_mirror(vma)) {
> + for (int i = 0 ; i < op->prefetch_range.ranges_count; i++) {
> + struct xe_svm_range *range = xa_load(&op->prefetch_range.range, i);
> +
> + range_present_and_invalidated_tile(vm, range, tile->id);
> + }
> + } else {
> bind_op_commit(vm, tile, pt_update_ops, vma, fence,
> fence2, false);
> + }
> break;
> }
> case DRM_GPUVA_OP_DRIVER:
> diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
> index 57af2c37f927..ffd7ad664921 100644
> --- a/drivers/gpu/drm/xe/xe_vm.c
> +++ b/drivers/gpu/drm/xe/xe_vm.c
> @@ -798,10 +798,36 @@ static int xe_vma_ops_alloc(struct xe_vma_ops *vops, bool array_of_binds)
> }
> ALLOW_ERROR_INJECTION(xe_vma_ops_alloc, ERRNO);
>
> +static void clean_svm_prefetch_op(struct xe_vma_op *op)
> +{
> + struct xe_vma *vma;
> +
> + vma = gpuva_to_vma(op->base.prefetch.va);
> +
> + if (op->base.op == DRM_GPUVA_OP_PREFETCH && xe_vma_is_cpu_addr_mirror(vma)) {
> + xa_destroy(&op->prefetch_range.range);
> + op->prefetch_range.ranges_count = 0;
> + }
> +}
> +
> +static void clean_svm_prefetch_in_vma_ops(struct xe_vma_ops *vops)
> +{
> + struct xe_vma_op *op;
> +
> + if (!(vops->flags & XE_VMA_OPS_HAS_SVM_PREFETCH))
> + return;
> +
> + list_for_each_entry(op, &vops->list, link) {
> + clean_svm_prefetch_op(op);
> + }
> +}
> +
> static void xe_vma_ops_fini(struct xe_vma_ops *vops)
> {
> int i;
>
> + clean_svm_prefetch_in_vma_ops(vops);
> +
> for (i = 0; i < XE_MAX_TILES_PER_DEVICE; ++i)
> kfree(vops->pt_update_ops[i].ops);
> }
> @@ -2248,13 +2274,25 @@ static bool __xe_vm_needs_clear_scratch_pages(struct xe_vm *vm, u32 bind_flags)
> return true;
> }
>
> +static void clean_svm_prefetch_in_gpuva_ops(struct drm_gpuva_ops *ops)
> +{
> + struct drm_gpuva_op *__op;
> +
> + drm_gpuva_for_each_op(__op, ops) {
> + struct xe_vma_op *op = gpuva_op_to_vma_op(__op);
> +
> + clean_svm_prefetch_op(op);
> + }
> +}
> +
> /*
> * Create operations list from IOCTL arguments, setup operations fields so parse
> * and commit steps are decoupled from IOCTL arguments. This step can fail.
> */
> static struct drm_gpuva_ops *
> -vm_bind_ioctl_ops_create(struct xe_vm *vm, struct xe_bo *bo,
> - u64 bo_offset_or_userptr, u64 addr, u64 range,
> +vm_bind_ioctl_ops_create(struct xe_vm *vm, struct xe_vma_ops *vops,
> + struct xe_bo *bo, u64 bo_offset_or_userptr,
> + u64 addr, u64 range,
> u32 operation, u32 flags,
> u32 prefetch_region, u16 pat_index)
> {
> @@ -2262,6 +2300,7 @@ vm_bind_ioctl_ops_create(struct xe_vm *vm, struct xe_bo *bo,
> struct drm_gpuva_ops *ops;
> struct drm_gpuva_op *__op;
> struct drm_gpuvm_bo *vm_bo;
> + u64 range_end = addr + range;
> int err;
>
> lockdep_assert_held_write(&vm->lock);
> @@ -2323,14 +2362,61 @@ vm_bind_ioctl_ops_create(struct xe_vm *vm, struct xe_bo *bo,
> op->map.invalidate_on_bind =
> __xe_vm_needs_clear_scratch_pages(vm, flags);
> } else if (__op->op == DRM_GPUVA_OP_PREFETCH) {
> - op->prefetch.region = prefetch_region;
> - }
> + struct xe_vma *vma = gpuva_to_vma(op->base.prefetch.va);
> +
> + if (!xe_vma_is_cpu_addr_mirror(vma)) {
> + op->prefetch.region = prefetch_region;
> + break;
> + }
>
> + struct drm_gpusvm_ctx ctx = {
> + .read_only = xe_vma_read_only(vma),
> + .devmem_possible = IS_DGFX(vm->xe) &&
> + IS_ENABLED(CONFIG_DRM_XE_DEVMEM_MIRROR),
> + .check_pages_threshold = IS_DGFX(vm->xe) &&
> + IS_ENABLED(CONFIG_DRM_XE_DEVMEM_MIRROR) ?
> + SZ_64K : 0,
> + };
> +
> + op->prefetch_range.region = prefetch_region;
> + struct xe_svm_range *svm_range;
> + int i = 0;
> +
> + xa_init(&op->prefetch_range.range);
> + op->prefetch_range.ranges_count = 0;
> +alloc_next_range:
> + svm_range = xe_svm_range_find_or_insert(vm, addr, vma, &ctx);
> +
> + if (PTR_ERR(svm_range) == -ENOENT)
> + break;
I missed this in previous review. -ENOENT means a CPU VMA does not
exist. I think it fairly reasonable use to case for a UMD to issue a
prefetch to sparsely populated CPU VMA range so I don't think breaking
here is correct, rather a goto alloc_next_range after adjusting to the
next address. This gets tricky as we likely don't want to iterate 4k at
a time... Maybe we add GPU SVM support function which wraps a CPU VMA
lookup function (find_vma I think) to find the next CPU VMA and returns
the starting address, if the starting address is within the prefetch
range we continue the walk.
Matt
> +
> + if (IS_ERR(svm_range)) {
> + err = PTR_ERR(svm_range);
> + goto unwind_prefetch_ops;
> + }
> +
> + xa_store(&op->prefetch_range.range, i, svm_range, GFP_KERNEL);
> + op->prefetch_range.ranges_count++;
> + vops->flags |= XE_VMA_OPS_HAS_SVM_PREFETCH;
> +
> + if (range_end > xe_svm_range_end(svm_range) &&
> + xe_svm_range_end(svm_range) < xe_vma_end(vma)) {
> + i++;
> + addr = xe_svm_range_end(svm_range);
> + goto alloc_next_range;
> + }
> + }
> print_op(vm->xe, __op);
> }
>
> return ops;
> +
> +unwind_prefetch_ops:
> + clean_svm_prefetch_in_gpuva_ops(ops);
> + drm_gpuva_ops_free(&vm->gpuvm, ops);
> + return ERR_PTR(err);
> }
> +
> ALLOW_ERROR_INJECTION(vm_bind_ioctl_ops_create, ERRNO);
>
> static struct xe_vma *new_vma(struct xe_vm *vm, struct drm_gpuva_op_map *op,
> @@ -2645,8 +2731,12 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct drm_gpuva_ops *ops,
> return err;
> }
>
> - if (!xe_vma_is_cpu_addr_mirror(vma))
> + if (xe_vma_is_cpu_addr_mirror(vma))
> + xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask,
> + op->prefetch_range.ranges_count);
> + else
> xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask, 1);
> +
> break;
> default:
> drm_warn(&vm->xe->drm, "NOT POSSIBLE");
> @@ -2772,6 +2862,58 @@ static int check_ufence(struct xe_vma *vma)
> return 0;
> }
>
> +static int prefetch_ranges_lock_and_prep(struct xe_vm *vm,
> + struct xe_vma_op *op)
> +{
> + int err = 0;
> +
> + if (op->base.op == DRM_GPUVA_OP_PREFETCH) {
> + struct xe_vma *vma = gpuva_to_vma(op->base.prefetch.va);
> + struct drm_gpusvm_ctx ctx = {
> + .read_only = xe_vma_read_only(vma),
> + .devmem_possible = IS_DGFX(vm->xe) &&
> + IS_ENABLED(CONFIG_DRM_XE_DEVMEM_MIRROR),
> + .check_pages_threshold = IS_DGFX(vm->xe) &&
> + IS_ENABLED(CONFIG_DRM_XE_DEVMEM_MIRROR) ?
> + SZ_64K : 0,
> + };
> + struct xe_svm_range *svm_range;
> + struct xe_tile *tile;
> + u32 region;
> + int i;
> +
> + if (!xe_vma_is_cpu_addr_mirror(vma))
> + return 0;
> +
> + region = op->prefetch_range.region;
> +
> + /* TODO: Threading the migration */
> + for (i = 0; i < op->prefetch_range.ranges_count; i++) {
> + svm_range = xa_load(&op->prefetch_range.range, i);
> + if (xe_svm_range_needs_migrate_to_vram(svm_range, vma, region)) {
> + tile = &vm->xe->tiles[region_to_mem_type[region] - XE_PL_VRAM0];
> + err = xe_svm_alloc_vram(vm, tile, svm_range, &ctx);
> + if (err) {
> + drm_err(&vm->xe->drm, "VRAM allocation failed, can be retried from userspace, asid=%u, gpusvm=%p, errno=%pe\n",
> + vm->usm.asid, &vm->svm.gpusvm, ERR_PTR(err));
> + return -ENODATA;
> + }
> + }
> +
> + err = xe_svm_range_get_pages(vm, svm_range, &ctx);
> + if (err) {
> + if (err == -EOPNOTSUPP || err == -EFAULT || err == -EPERM)
> + err = -ENODATA;
> +
> + drm_err(&vm->xe->drm, "Get pages failed, asid=%u, gpusvm=%p, errno=%pe\n",
> + vm->usm.asid, &vm->svm.gpusvm, ERR_PTR(err));
> + return err;
> + }
> + }
> + }
> + return err;
> +}
> +
> static int op_lock_and_prep(struct drm_exec *exec, struct xe_vm *vm,
> struct xe_vma_op *op)
> {
> @@ -2809,7 +2951,12 @@ static int op_lock_and_prep(struct drm_exec *exec, struct xe_vm *vm,
> case DRM_GPUVA_OP_PREFETCH:
> {
> struct xe_vma *vma = gpuva_to_vma(op->base.prefetch.va);
> - u32 region = op->prefetch.region;
> + u32 region;
> +
> + if (xe_vma_is_cpu_addr_mirror(vma))
> + region = op->prefetch_range.region;
> + else
> + region = op->prefetch.region;
>
> xe_assert(vm->xe, region <= ARRAY_SIZE(region_to_mem_type));
>
> @@ -2828,6 +2975,23 @@ static int op_lock_and_prep(struct drm_exec *exec, struct xe_vm *vm,
> return err;
> }
>
> +static int xe_vma_ops_execute_ready(struct xe_vm *vm, struct xe_vma_ops *vops)
> +{
> + struct xe_vma_op *op;
> + int err;
> +
> + if (!(vops->flags & XE_VMA_OPS_HAS_SVM_PREFETCH))
> + return 0;
> +
> + list_for_each_entry(op, &vops->list, link) {
> + err = prefetch_ranges_lock_and_prep(vm, op);
> + if (err)
> + return err;
> + }
> +
> + return 0;
> +}
> +
> static int vm_bind_ioctl_ops_lock_and_prep(struct drm_exec *exec,
> struct xe_vm *vm,
> struct xe_vma_ops *vops)
> @@ -2850,7 +3014,6 @@ static int vm_bind_ioctl_ops_lock_and_prep(struct drm_exec *exec,
> vm->xe->vm_inject_error_position == FORCE_OP_ERROR_LOCK)
> return -ENOSPC;
> #endif
> -
> return 0;
> }
>
> @@ -3492,7 +3655,7 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
> u32 prefetch_region = bind_ops[i].prefetch_mem_region_instance;
> u16 pat_index = bind_ops[i].pat_index;
>
> - ops[i] = vm_bind_ioctl_ops_create(vm, bos[i], obj_offset,
> + ops[i] = vm_bind_ioctl_ops_create(vm, &vops, bos[i], obj_offset,
> addr, range, op, flags,
> prefetch_region, pat_index);
> if (IS_ERR(ops[i])) {
> @@ -3525,6 +3688,10 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
> if (err)
> goto unwind_ops;
>
> + err = xe_vma_ops_execute_ready(vm, &vops);
> + if (err)
> + goto unwind_ops;
> +
> fence = vm_bind_ioctl_ops_execute(vm, &vops);
> if (IS_ERR(fence))
> err = PTR_ERR(fence);
> @@ -3594,7 +3761,7 @@ struct dma_fence *xe_vm_bind_kernel_bo(struct xe_vm *vm, struct xe_bo *bo,
>
> xe_vma_ops_init(&vops, vm, q, NULL, 0);
>
> - ops = vm_bind_ioctl_ops_create(vm, bo, 0, addr, bo->size,
> + ops = vm_bind_ioctl_ops_create(vm, &vops, bo, 0, addr, bo->size,
> DRM_XE_VM_BIND_OP_MAP, 0, 0,
> vm->xe->pat.idx[cache_lvl]);
> if (IS_ERR(ops)) {
> --
> 2.34.1
>
More information about the Intel-xe
mailing list