[PATCH v4 17/18] drm/xe/svm: Implement prefetch support for SVM ranges
Matthew Brost
matthew.brost at intel.com
Tue Apr 29 06:53:51 UTC 2025
On Tue, Apr 29, 2025 at 12:05:59PM +0530, Ghimiray, Himal Prasad wrote:
>
>
> On 29-04-2025 03:18, Matthew Brost wrote:
> > On Mon, Apr 28, 2025 at 04:02:23PM +0530, Himal Prasad Ghimiray wrote:
> > > This commit adds prefetch support for SVM ranges, utilizing the
> > > existing ioctl vm_bind functionality to achieve this.
> > >
> > > v2: rebase
> > >
> > > v3:
> > > - use xa_for_each() instead of manual loop
> > > - check range is valid and in preferred location before adding to
> > > xarray
> > > - Fix naming conventions
> > > - Fix return condition as -ENODATA instead of -EAGAIN (Matthew Brost)
> > > - Handle sparsely populated cpu vma range (Matthew Brost)
> > >
> > > v4:
> > > - fix end address to find next cpu vma in case of -ENOENT
> > >
> > > Cc: Matthew Brost <matthew.brost at intel.com>
> > > Signed-off-by: Himal Prasad Ghimiray <himal.prasad.ghimiray at intel.com>
> > > ---
> > > drivers/gpu/drm/xe/xe_pt.c | 58 ++++++++---
> > > drivers/gpu/drm/xe/xe_vm.c | 208 +++++++++++++++++++++++++++++++++++--
> > > 2 files changed, 245 insertions(+), 21 deletions(-)
> > >
> > > diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c
> > > index de4e3edda758..f3a99ee4f733 100644
> > > --- a/drivers/gpu/drm/xe/xe_pt.c
> > > +++ b/drivers/gpu/drm/xe/xe_pt.c
> > > @@ -1458,6 +1458,7 @@ static int xe_pt_svm_pre_commit(struct xe_migrate_pt_update *pt_update)
> > > struct xe_vm *vm = pt_update->vops->vm;
> > > struct xe_vma_ops *vops = pt_update->vops;
> > > struct xe_vma_op *op;
> > > + unsigned long i;
> > > int err;
> > > err = xe_pt_pre_commit(pt_update);
> > > @@ -1467,20 +1468,35 @@ static int xe_pt_svm_pre_commit(struct xe_migrate_pt_update *pt_update)
> > > xe_svm_notifier_lock(vm);
> > > list_for_each_entry(op, &vops->list, link) {
> > > - struct xe_svm_range *range = op->map_range.range;
> > > + struct xe_svm_range *range = NULL;
> > > if (op->subop == XE_VMA_SUBOP_UNMAP_RANGE)
> > > continue;
> > > - xe_svm_range_debug(range, "PRE-COMMIT");
> > > + if (op->base.op == DRM_GPUVA_OP_PREFETCH) {
> > > + xe_assert(vm->xe,
> > > + xe_vma_is_cpu_addr_mirror(gpuva_to_vma(op->base.prefetch.va)));
> > > + xa_for_each(&op->prefetch_range.range, i, range) {
> > > + xe_svm_range_debug(range, "PRE-COMMIT");
> > > - xe_assert(vm->xe, xe_vma_is_cpu_addr_mirror(op->map_range.vma));
> > > - xe_assert(vm->xe, op->subop == XE_VMA_SUBOP_MAP_RANGE);
> > > + if (!xe_svm_range_pages_valid(range)) {
> > > + xe_svm_range_debug(range, "PRE-COMMIT - RETRY");
> > > + xe_svm_notifier_unlock(vm);
> > > + return -ENODATA;
> > > + }
> > > + }
> > > + } else {
> > > + xe_assert(vm->xe, xe_vma_is_cpu_addr_mirror(op->map_range.vma));
> > > + xe_assert(vm->xe, op->subop == XE_VMA_SUBOP_MAP_RANGE);
> > > + range = op->map_range.range;
> > > - if (!xe_svm_range_pages_valid(range)) {
> > > - xe_svm_range_debug(range, "PRE-COMMIT - RETRY");
> > > - xe_svm_notifier_unlock(vm);
> > > - return -EAGAIN;
> > > + xe_svm_range_debug(range, "PRE-COMMIT");
> > > +
> > > + if (!xe_svm_range_pages_valid(range)) {
> > > + xe_svm_range_debug(range, "PRE-COMMIT - RETRY");
> > > + xe_svm_notifier_unlock(vm);
> > > + return -EAGAIN;
> > > + }
> > > }
> > > }
> > > @@ -2065,11 +2081,20 @@ static int op_prepare(struct xe_vm *vm,
> > > {
> > > struct xe_vma *vma = gpuva_to_vma(op->base.prefetch.va);
> > > - if (xe_vma_is_cpu_addr_mirror(vma))
> > > - break;
> > > + if (xe_vma_is_cpu_addr_mirror(vma)) {
> > > + struct xe_svm_range *range;
> > > + unsigned long i;
> > > - err = bind_op_prepare(vm, tile, pt_update_ops, vma, false);
> > > - pt_update_ops->wait_vm_kernel = true;
> > > + xa_for_each(&op->prefetch_range.range, i, range) {
> > > + err = bind_range_prepare(vm, tile, pt_update_ops,
> > > + vma, range);
> > > + if (err)
> > > + return err;
> > > + }
> > > + } else {
> > > + err = bind_op_prepare(vm, tile, pt_update_ops, vma, false);
> > > + pt_update_ops->wait_vm_kernel = true;
> > > + }
> > > break;
> > > }
> > > case DRM_GPUVA_OP_DRIVER:
> > > @@ -2273,9 +2298,16 @@ static void op_commit(struct xe_vm *vm,
> > > {
> > > struct xe_vma *vma = gpuva_to_vma(op->base.prefetch.va);
> > > - if (!xe_vma_is_cpu_addr_mirror(vma))
> > > + if (xe_vma_is_cpu_addr_mirror(vma)) {
> > > + struct xe_svm_range *range = NULL;
> > > + unsigned long i;
> > > +
> > > + xa_for_each(&op->prefetch_range.range, i, range)
> > > + range_present_and_invalidated_tile(vm, range, tile->id);
> > > + } else {
> > > bind_op_commit(vm, tile, pt_update_ops, vma, fence,
> > > fence2, false);
> > > + }
> > > break;
> > > }
> > > case DRM_GPUVA_OP_DRIVER:
> > > diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
> > > index 555ed10dac85..14c01e155cb7 100644
> > > --- a/drivers/gpu/drm/xe/xe_vm.c
> > > +++ b/drivers/gpu/drm/xe/xe_vm.c
> > > @@ -798,10 +798,33 @@ static int xe_vma_ops_alloc(struct xe_vma_ops *vops, bool array_of_binds)
> > > }
> > > ALLOW_ERROR_INJECTION(xe_vma_ops_alloc, ERRNO);
> > > +static void xe_vma_svm_prefetch_op_fini(struct xe_vma_op *op)
> > > +{
> > > + struct xe_vma *vma;
> > > +
> > > + vma = gpuva_to_vma(op->base.prefetch.va);
> > > +
> > > + if (op->base.op == DRM_GPUVA_OP_PREFETCH && xe_vma_is_cpu_addr_mirror(vma))
> > > + xa_destroy(&op->prefetch_range.range);
> > > +}
> > > +
> > > +static void xe_vma_svm_prefetch_ops_fini(struct xe_vma_ops *vops)
> > > +{
> > > + struct xe_vma_op *op;
> > > +
> > > + if (!(vops->flags & XE_VMA_OPS_FLAG_HAS_SVM_PREFETCH))
> > > + return;
> > > +
> > > + list_for_each_entry(op, &vops->list, link)
> > > + xe_vma_svm_prefetch_op_fini(op);
> > > +}
> > > +
> > > static void xe_vma_ops_fini(struct xe_vma_ops *vops)
> > > {
> > > int i;
> > > + xe_vma_svm_prefetch_ops_fini(vops);
> > > +
> > > for (i = 0; i < XE_MAX_TILES_PER_DEVICE; ++i)
> > > kfree(vops->pt_update_ops[i].ops);
> > > }
> > > @@ -2248,13 +2271,40 @@ static bool __xe_vm_needs_clear_scratch_pages(struct xe_vm *vm, u32 bind_flags)
> > > return true;
> > > }
> > > +static void xe_svm_prefetch_gpuva_ops_fini(struct drm_gpuva_ops *ops)
> > > +{
> > > + struct drm_gpuva_op *__op;
> > > +
> > > + drm_gpuva_for_each_op(__op, ops) {
> > > + struct xe_vma_op *op = gpuva_op_to_vma_op(__op);
> > > +
> > > + xe_vma_svm_prefetch_op_fini(op);
> > > + }
> > > +}
> > > +
> > > +static u64 find_cpu_vma_start_in_range(struct mm_struct *mm, u64 start, u64 end)
> >
> > Ideally we should move this to GPU SVM as we shouldn't be looking CPU
> > VMAs in driver code rather the common DRM code.
> >
> > With that, then a xe_svm layer wrapper too.
>
> Sure will do that.
>
> >
> > > +{
> > > + struct vm_area_struct *vma;
> > > + u64 addr = 0;
> > > +
> > > + mmap_read_lock(mm);
> > > +
> > > + vma = find_vma_intersection(mm, start, end);
> > > + if (vma)
> > > + addr = max(start, vma->vm_start);
> > > +
> > > + mmap_read_unlock(mm);
> > > + return addr;
> > > +}
> > > +
> > > /*
> > > * Create operations list from IOCTL arguments, setup operations fields so parse
> > > * and commit steps are decoupled from IOCTL arguments. This step can fail.
> > > */
> > > static struct drm_gpuva_ops *
> > > -vm_bind_ioctl_ops_create(struct xe_vm *vm, struct xe_bo *bo,
> > > - u64 bo_offset_or_userptr, u64 addr, u64 range,
> > > +vm_bind_ioctl_ops_create(struct xe_vm *vm, struct xe_vma_ops *vops,
> > > + struct xe_bo *bo, u64 bo_offset_or_userptr,
> > > + u64 addr, u64 range,
> > > u32 operation, u32 flags,
> > > u32 prefetch_region, u16 pat_index)
> > > {
> > > @@ -2262,6 +2312,7 @@ vm_bind_ioctl_ops_create(struct xe_vm *vm, struct xe_bo *bo,
> > > struct drm_gpuva_ops *ops;
> > > struct drm_gpuva_op *__op;
> > > struct drm_gpuvm_bo *vm_bo;
> > > + u64 range_end = addr + range;
> > > int err;
> > > lockdep_assert_held_write(&vm->lock);
> > > @@ -2323,14 +2374,79 @@ vm_bind_ioctl_ops_create(struct xe_vm *vm, struct xe_bo *bo,
> > > op->map.invalidate_on_bind =
> > > __xe_vm_needs_clear_scratch_pages(vm, flags);
> > > } else if (__op->op == DRM_GPUVA_OP_PREFETCH) {
> > > - op->prefetch.region = prefetch_region;
> > > - }
> > > + struct xe_vma *vma = gpuva_to_vma(op->base.prefetch.va);
> > > +
> > > + if (!xe_vma_is_cpu_addr_mirror(vma)) {
> > > + op->prefetch.region = prefetch_region;
> > > + break;
> > > + }
> > > + struct drm_gpusvm_ctx ctx = {
> > > + .read_only = xe_vma_read_only(vma),
> > > + .devmem_possible = IS_DGFX(vm->xe) &&
> > > + IS_ENABLED(CONFIG_DRM_XE_DEVMEM_MIRROR),
> > > + };
> > > +
> >
> > Does the compiler not complain for this declaration after an if
> > statement. I though C rules were all declartions before logic.
>
> AFAIK that changed with kernel moving to c11 standard from c89.
> For readibility we still prefer all declaration before logic. But from
> compilation perspective it should be safe. Will fall back to declarations
> before logic it causes no harm.
>
>
> >
> > > + struct xe_svm_range *svm_range;
> > > + struct xe_tile *tile;
> > > + u8 id, tile_mask = 0;
> > > + u32 i;
> > > +
> > > + for_each_tile(tile, vm->xe, id)
> > > + tile_mask |= 0x1 << id;
> > > +
> > > + xa_init_flags(&op->prefetch_range.range, XA_FLAGS_ALLOC);
> > > + op->prefetch_range.region = prefetch_region;
> > > + op->prefetch_range.ranges_count = 0;
> > > +alloc_next_range:
> > > + svm_range = xe_svm_range_find_or_insert(vm, addr, vma, &ctx);
> > > +
> > > + if (PTR_ERR(svm_range) == -ENOENT) {
> > > + addr = find_cpu_vma_start_in_range(vm->svm.gpusvm.mm,
> > > + max(addr, xe_vma_start(vma)),
> > > + min(range_end, xe_vma_end(vma)));
> > > + if (addr)
> > > + goto alloc_next_range;
> >
> > Won't this cause a problem in xe_svm_range_end(svm_range) below as
> > svm_range is PTR_ERR?
>
> goto is to alloc_next_range not to check_next_range, there is no
> xe_svm_range_end() here.
>
Yep, I misread the code here, mistake. Look fine as is.
Matt
> >
> > > + else
> > > + continue;
> >
> > s/continue/goto print_op_label/
>
> Sure
>
> >
> > > + }
> > > +
> > > + if (IS_ERR(svm_range)) {
> > > + err = PTR_ERR(svm_range);
> > > + goto unwind_prefetch_ops;
> > > + }
> > > +
> > > + if (xe_svm_range_validate_and_evict(vm, svm_range,
> > > + tile_mask, !!prefetch_region))
> > > + goto check_next_range;
> > > +
> > > + err = xa_alloc(&op->prefetch_range.range,
> > > + &i, svm_range, xa_limit_32b,
> > > + GFP_KERNEL);
> > > +
> > > + if (err)
> > > + goto unwind_prefetch_ops;
> > > +
> > > + op->prefetch_range.ranges_count++;
> > > + vops->flags |= XE_VMA_OPS_FLAG_HAS_SVM_PREFETCH;
> > > +check_next_range:
> > > + if (range_end > xe_svm_range_end(svm_range) &&
> > > + xe_svm_range_end(svm_range) < xe_vma_end(vma)) {
> > > + addr = xe_svm_range_end(svm_range);
> > > + goto alloc_next_range;
> > > + }
> > > + }
> >
> > print_op_label:
>
> Sure.
>
> >
> > > print_op(vm->xe, __op);
> > > }
> > > return ops;
> > > +
> > > +unwind_prefetch_ops:
> > > + xe_svm_prefetch_gpuva_ops_fini(ops);
> > > + drm_gpuva_ops_free(&vm->gpuvm, ops);
> > > + return ERR_PTR(err);
> > > }
> > > +
> > > ALLOW_ERROR_INJECTION(vm_bind_ioctl_ops_create, ERRNO);
> > > static struct xe_vma *new_vma(struct xe_vm *vm, struct drm_gpuva_op_map *op,
> > > @@ -2645,8 +2761,12 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct drm_gpuva_ops *ops,
> > > return err;
> > > }
> > > - if (!xe_vma_is_cpu_addr_mirror(vma))
> > > + if (xe_vma_is_cpu_addr_mirror(vma))
> > > + xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask,
> > > + op->prefetch_range.ranges_count);
> > > + else
> > > xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask, 1);
> > > +
> > > break;
> > > default:
> > > drm_warn(&vm->xe->drm, "NOT POSSIBLE");
> > > @@ -2772,6 +2892,50 @@ static int check_ufence(struct xe_vma *vma)
> > > return 0;
> > > }
> > > +static int prefetch_ranges(struct xe_vm *vm, struct xe_vma_op *op)
> > > +{
> > > + struct xe_vma *vma = gpuva_to_vma(op->base.prefetch.va);
> > > + int err = 0;
> > > +
> > > + struct xe_svm_range *svm_range;
> > > + struct xe_tile *tile;
> > > + unsigned long i;
> > > + u32 region;
> > > +
> > > + if (!xe_vma_is_cpu_addr_mirror(vma))
> > > + return 0;
> > > + region = op->prefetch_range.region;
> > > + struct drm_gpusvm_ctx ctx = {
> > > + .read_only = xe_vma_read_only(vma),
> > > + .devmem_possible = IS_DGFX(vm->xe) && IS_ENABLED(CONFIG_DRM_XE_DEVMEM_MIRROR),
> > > + .check_pages_threshold = IS_DGFX(vm->xe) &&
> > > + IS_ENABLED(CONFIG_DRM_XE_DEVMEM_MIRROR) ? SZ_64K : 0,
> > > + .devmem_only = region && IS_DGFX(vm->xe) && IS_ENABLED(CONFIG_DRM_XE_DEVMEM_MIRROR),
> > > + };
> >
> >
> > Same question here about a declartion after logic, pretty sure some
> > compilers will complain.
>
> Will address it in next version.
>
> >
> > Matt
> >
> > > + /* TODO: Threading the migration */
> > > + xa_for_each(&op->prefetch_range.range, i, svm_range) {
> > > + if (xe_svm_range_needs_migrate_to_vram(svm_range, vma, region)) {
> > > + tile = &vm->xe->tiles[region_to_mem_type[region] - XE_PL_VRAM0];
> > > + err = xe_svm_alloc_vram(vm, tile, svm_range, &ctx);
> > > + if (err) {
> > > + drm_dbg(&vm->xe->drm, "VRAM allocation failed, retry from userspace, asid=%u, gpusvm=%p, errno=%pe\n",
> > > + vm->usm.asid, &vm->svm.gpusvm, ERR_PTR(err));
> > > + return -ENODATA;
> > > + }
> > > + }
> > > +
> > > + err = xe_svm_range_get_pages(vm, svm_range, &ctx);
> > > + if (err) {
> > > + if (err == -EOPNOTSUPP || err == -EFAULT || err == -EPERM)
> > > + err = -ENODATA;
> > > + drm_dbg(&vm->xe->drm, "Get pages failed, asid=%u, gpusvm=%p, errno=%pe\n",
> > > + vm->usm.asid, &vm->svm.gpusvm, ERR_PTR(err));
> > > + return err;
> > > + }
> > > + }
> > > + return err;
> > > +}
> > > +
> > > static int op_lock_and_prep(struct drm_exec *exec, struct xe_vm *vm,
> > > struct xe_vma_op *op)
> > > {
> > > @@ -2809,7 +2973,12 @@ static int op_lock_and_prep(struct drm_exec *exec, struct xe_vm *vm,
> > > case DRM_GPUVA_OP_PREFETCH:
> > > {
> > > struct xe_vma *vma = gpuva_to_vma(op->base.prefetch.va);
> > > - u32 region = op->prefetch.region;
> > > + u32 region;
> > > +
> > > + if (xe_vma_is_cpu_addr_mirror(vma))
> > > + region = op->prefetch_range.region;
> > > + else
> > > + region = op->prefetch.region;
> > > xe_assert(vm->xe, region <= ARRAY_SIZE(region_to_mem_type));
> > > @@ -2828,6 +2997,25 @@ static int op_lock_and_prep(struct drm_exec *exec, struct xe_vm *vm,
> > > return err;
> > > }
> > > +static int vm_bind_ioctl_ops_prefetch_ranges(struct xe_vm *vm, struct xe_vma_ops *vops)
> > > +{
> > > + struct xe_vma_op *op;
> > > + int err;
> > > +
> > > + if (!(vops->flags & XE_VMA_OPS_FLAG_HAS_SVM_PREFETCH))
> > > + return 0;
> > > +
> > > + list_for_each_entry(op, &vops->list, link) {
> > > + if (op->base.op == DRM_GPUVA_OP_PREFETCH) {
> > > + err = prefetch_ranges(vm, op);
> > > + if (err)
> > > + return err;
> > > + }
> > > + }
> > > +
> > > + return 0;
> > > +}
> > > +
> > > static int vm_bind_ioctl_ops_lock_and_prep(struct drm_exec *exec,
> > > struct xe_vm *vm,
> > > struct xe_vma_ops *vops)
> > > @@ -3478,7 +3666,7 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
> > > u32 prefetch_region = bind_ops[i].prefetch_mem_region_instance;
> > > u16 pat_index = bind_ops[i].pat_index;
> > > - ops[i] = vm_bind_ioctl_ops_create(vm, bos[i], obj_offset,
> > > + ops[i] = vm_bind_ioctl_ops_create(vm, &vops, bos[i], obj_offset,
> > > addr, range, op, flags,
> > > prefetch_region, pat_index);
> > > if (IS_ERR(ops[i])) {
> > > @@ -3511,6 +3699,10 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
> > > if (err)
> > > goto unwind_ops;
> > > + err = vm_bind_ioctl_ops_prefetch_ranges(vm, &vops);
> > > + if (err)
> > > + goto unwind_ops;
> > > +
> > > fence = vm_bind_ioctl_ops_execute(vm, &vops);
> > > if (IS_ERR(fence))
> > > err = PTR_ERR(fence);
> > > @@ -3580,7 +3772,7 @@ struct dma_fence *xe_vm_bind_kernel_bo(struct xe_vm *vm, struct xe_bo *bo,
> > > xe_vma_ops_init(&vops, vm, q, NULL, 0);
> > > - ops = vm_bind_ioctl_ops_create(vm, bo, 0, addr, bo->size,
> > > + ops = vm_bind_ioctl_ops_create(vm, &vops, bo, 0, addr, bo->size,
> > > DRM_XE_VM_BIND_OP_MAP, 0, 0,
> > > vm->xe->pat.idx[cache_lvl]);
> > > if (IS_ERR(ops)) {
> > > --
> > > 2.34.1
> > >
>
More information about the Intel-xe
mailing list