[PATCH v5 23/25] drm/xe: Reset VMA attributes to default in SVM garbage collector
Matthew Brost
matthew.brost at intel.com
Wed Aug 6 04:06:55 UTC 2025
On Wed, Jul 30, 2025 at 06:30:48PM +0530, Himal Prasad Ghimiray wrote:
> Restore default memory attributes for VMAs during garbage collection
> if they were modified by madvise. Reuse existing VMA if fully overlapping;
> otherwise, allocate a new mirror VMA.
>
> v2 (Matthew Brost)
> - Add helper for vma split
> - Add retry to get updated vma
>
> Suggested-by: Matthew Brost <matthew.brost at intel.com>
> Signed-off-by: Himal Prasad Ghimiray <himal.prasad.ghimiray at intel.com>
> ---
> drivers/gpu/drm/xe/xe_svm.c | 114 +++++++++++++++++++++-----
> drivers/gpu/drm/xe/xe_vm.c | 155 ++++++++++++++++++++++++++----------
> drivers/gpu/drm/xe/xe_vm.h | 2 +
> 3 files changed, 206 insertions(+), 65 deletions(-)
>
> diff --git a/drivers/gpu/drm/xe/xe_svm.c b/drivers/gpu/drm/xe/xe_svm.c
> index aef76e08b460..9b3a3f61758c 100644
> --- a/drivers/gpu/drm/xe/xe_svm.c
> +++ b/drivers/gpu/drm/xe/xe_svm.c
> @@ -253,9 +253,55 @@ static int __xe_svm_garbage_collector(struct xe_vm *vm,
> return 0;
> }
>
> +static int xe_svm_range_set_default_attr(struct xe_vm *vm, u64 range_start, u64 range_end)
> +{
> + struct xe_vma *vma;
> + struct xe_vma_mem_attr default_attr = {
> + .preferred_loc = {
> + .devmem_fd = DRM_XE_PREFERRED_LOC_DEFAULT_DEVICE,
> + .migration_policy = DRM_XE_MIGRATE_ALL_PAGES,
> + },
> + .atomic_access = DRM_XE_ATOMIC_UNDEFINED,
> + };
> + int err = 0;
> +
> + vma = xe_vm_find_vma_by_addr(vm, range_start);
> + if (!vma)
> + return -EINVAL;
> +
> + if (xe_vma_has_default_mem_attrs(vma))
> + return 0;
> +
> + vm_dbg(&vm->xe->drm, "Existing VMA start=0x%016llx, vma_end=0x%016llx",
> + xe_vma_start(vma), xe_vma_end(vma));
> +
> + if (xe_vma_start(vma) == range_start && xe_vma_end(vma) == range_end) {
> + default_attr.pat_index = vma->attr.default_pat_index;
> + default_attr.default_pat_index = vma->attr.default_pat_index;
> + vma->attr = default_attr;
> + } else {
> + vm_dbg(&vm->xe->drm, "Split VMA start=0x%016llx, vma_end=0x%016llx",
> + range_start, range_end);
> + err = xe_vm_alloc_cpu_addr_mirror_vma(vm, range_start, range_end - range_start);
> + if (err) {
> + drm_warn(&vm->xe->drm, "VMA SPLIT failed: %pe\n", ERR_PTR(err));
> + xe_vm_kill(vm, true);
> + return err;
> + }
> + }
> +
> + /*
> + * On call from xe_svm_handle_pagefault original VMA might be changed
> + * signal this to lookup for VMA again.
> + */
> + return -EAGAIN;
> +}
> +
> static int xe_svm_garbage_collector(struct xe_vm *vm)
> {
> struct xe_svm_range *range;
> + u64 range_start;
> + u64 range_end;
> int err;
>
> lockdep_assert_held_write(&vm->lock);
> @@ -271,6 +317,9 @@ static int xe_svm_garbage_collector(struct xe_vm *vm)
> if (!range)
> break;
>
> + range_start = xe_svm_range_start(range);
> + range_end = xe_svm_range_end(range);
> +
> list_del(&range->garbage_collector_link);
> spin_unlock(&vm->svm.garbage_collector.lock);
>
> @@ -283,6 +332,10 @@ static int xe_svm_garbage_collector(struct xe_vm *vm)
> return err;
> }
>
> + err = xe_svm_range_set_default_attr(vm, range_start, range_end);
> + if (err)
> + return err;
You don't want to return on -EAGAIN here, rather collect it, continue
and return -EAGAIN once the garbage collector list is empty. No need to
contiously lookup the VMA in xe_svm_handle_pagefault (in next rev
__xe_svm_handle_pagefault), this only need be done once.
> +
> spin_lock(&vm->svm.garbage_collector.lock);
> }
> spin_unlock(&vm->svm.garbage_collector.lock);
> @@ -793,40 +846,59 @@ int xe_svm_handle_pagefault(struct xe_vm *vm, struct xe_vma *vma,
> struct xe_gt *gt, u64 fault_addr,
> bool atomic)
> {
> - int need_vram = xe_vma_need_vram_for_atomic(vm->xe, vma, atomic);
> -
> - if (need_vram < 0)
> - return need_vram;
> -
> - struct drm_gpusvm_ctx ctx = {
> - .read_only = xe_vma_read_only(vma),
> - .devmem_possible = IS_DGFX(vm->xe) &&
> - IS_ENABLED(CONFIG_DRM_XE_PAGEMAP),
> - .check_pages_threshold = IS_DGFX(vm->xe) &&
> - IS_ENABLED(CONFIG_DRM_XE_PAGEMAP) ? SZ_64K : 0,
> - .devmem_only = need_vram && IS_ENABLED(CONFIG_DRM_XE_PAGEMAP),
> - .timeslice_ms = atomic && IS_DGFX(vm->xe) &&
> - IS_ENABLED(CONFIG_DRM_XE_PAGEMAP) ?
> - vm->xe->atomic_svm_timeslice_ms : 0,
> - };
> + struct drm_gpusvm_ctx ctx = { };
> + struct drm_pagemap *dpagemap;
> struct xe_svm_range *range;
> struct dma_fence *fence;
> - struct drm_pagemap *dpagemap;
> struct xe_tile *tile = gt_to_tile(gt);
> - int migrate_try_count = ctx.devmem_only ? 3 : 1;
> + bool vma_updated = false;
> + int need_vram;
> + int migrate_try_count;
> ktime_t end = 0;
> int err;
>
> - lockdep_assert_held_write(&vm->lock);
> +find_vma:
> + if (vma_updated) {
> + vma = xe_vm_find_vma_by_addr(vm, fault_addr);
> + if (!vma)
> + return -EINVAL;
> + }
> +
> xe_assert(vm->xe, xe_vma_is_cpu_addr_mirror(vma));
> + vma_updated = false;
> +
> + need_vram = xe_vma_need_vram_for_atomic(vm->xe, vma, atomic);
> + if (need_vram < 0)
> + return need_vram;
This is a bit ugly. I think if you have __xe_svm_handle_pagefault and
xe_svm_handle_pagefault as here [1] this can be handled cleaner (i.e.
still a static setup of drm_gpusvm_ctx).
If xe_svm_garbage_collector returns an in __xe_svm_handle_pagefault kick
it up to xe_svm_handle_pagefault, you catch -EAGAIN there, relookup the
VMA and call __xe_svm_handle_pagefault again. I think that would look
quite a bit better.
Matt
[1] https://patchwork.freedesktop.org/patch/666222/?series=149550&rev=5#comment_1222471
> +
> + ctx.read_only = xe_vma_read_only(vma);
> + ctx.devmem_possible = IS_DGFX(vm->xe) && IS_ENABLED(CONFIG_DRM_XE_PAGEMAP);
> + ctx.check_pages_threshold = IS_DGFX(vm->xe) && IS_ENABLED(CONFIG_DRM_XE_PAGEMAP) ?
> + SZ_64K : 0;
> + ctx.devmem_only = need_vram && IS_ENABLED(CONFIG_DRM_XE_PAGEMAP);
> + ctx.timeslice_ms = atomic && IS_DGFX(vm->xe) && IS_ENABLED(CONFIG_DRM_XE_PAGEMAP) ?
> + vm->xe->atomic_svm_timeslice_ms : 0;
>
> + migrate_try_count = ctx.devmem_only ? 3 : 1;
> +
> + lockdep_assert_held_write(&vm->lock);
> xe_gt_stats_incr(gt, XE_GT_STATS_ID_SVM_PAGEFAULT_COUNT, 1);
>
> retry:
> /* Always process UNMAPs first so view SVM ranges is current */
> err = xe_svm_garbage_collector(vm);
> - if (err)
> - return err;
> + if (err) {
> + if (err == -EAGAIN) {
> + /*
> + * VMA might have changed due to garbage
> + * collection; retry lookup
> + */
> + vma_updated = true;
> + goto find_vma;
> + } else {
> + return err;
> + }
> + }
>
> range = xe_svm_range_find_or_insert(vm, fault_addr, vma, &ctx);
>
> diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
> index 5ee38e9cf6c6..e77c04f92d0b 100644
> --- a/drivers/gpu/drm/xe/xe_vm.c
> +++ b/drivers/gpu/drm/xe/xe_vm.c
> @@ -4263,36 +4263,24 @@ int xe_vma_need_vram_for_atomic(struct xe_device *xe, struct xe_vma *vma, bool i
> }
> }
>
> -/**
> - * xe_vm_alloc_madvise_vma - Allocate VMA's with madvise ops
> - * @vm: Pointer to the xe_vm structure
> - * @start: Starting input address
> - * @range: Size of the input range
> - *
> - * This function splits existing vma to create new vma for user provided input range
> - *
> - * Return: 0 if success
> - */
> -int xe_vm_alloc_madvise_vma(struct xe_vm *vm, uint64_t start, uint64_t range)
> +static int xe_vm_alloc_vma(struct xe_vm *vm, struct drm_gpuva_op_map *map_req)
> {
> - struct drm_gpuva_op_map map_req = {
> - .va.addr = start,
> - .va.range = range,
> - .flags = DRM_GPUVM_SM_MAP_OPS_FLAG_SPLIT_MADVISE,
> - };
> -
> struct xe_vma_ops vops;
> struct drm_gpuva_ops *ops = NULL;
> struct drm_gpuva_op *__op;
> bool is_cpu_addr_mirror = false;
> bool remap_op = false;
> + bool is_madvise = (map_req->flags & DRM_GPUVM_SM_MAP_OPS_FLAG_SPLIT_MADVISE);
> struct xe_vma_mem_attr tmp_attr;
> + u16 default_pat;
> int err;
>
> lockdep_assert_held_write(&vm->lock);
>
> - vm_dbg(&vm->xe->drm, "MADVISE_OPS_CREATE: addr=0x%016llx, size=0x%016llx", start, range);
> - ops = drm_gpuvm_sm_map_ops_create(&vm->gpuvm, &map_req);
> + vm_dbg(&vm->xe->drm, "MADVISE_OPS_CREATE: addr=0x%016llx, size=0x%016llx",
> + map_req->va.addr, map_req->va.range);
> +
> + ops = drm_gpuvm_sm_map_ops_create(&vm->gpuvm, map_req);
> if (IS_ERR(ops))
> return PTR_ERR(ops);
>
> @@ -4303,33 +4291,56 @@ int xe_vm_alloc_madvise_vma(struct xe_vm *vm, uint64_t start, uint64_t range)
>
> drm_gpuva_for_each_op(__op, ops) {
> struct xe_vma_op *op = gpuva_op_to_vma_op(__op);
> + struct xe_vma *vma = NULL;
>
> - if (__op->op == DRM_GPUVA_OP_REMAP) {
> - xe_assert(vm->xe, !remap_op);
> - remap_op = true;
> + if (!is_madvise) {
> + if (__op->op == DRM_GPUVA_OP_UNMAP) {
> + vma = gpuva_to_vma(op->base.unmap.va);
> + XE_WARN_ON(!xe_vma_has_default_mem_attrs(vma));
> + default_pat = vma->attr.default_pat_index;
> + }
>
> - if (xe_vma_is_cpu_addr_mirror(gpuva_to_vma(op->base.remap.unmap->va)))
> - is_cpu_addr_mirror = true;
> - else
> - is_cpu_addr_mirror = false;
> - }
> + if (__op->op == DRM_GPUVA_OP_REMAP) {
> + vma = gpuva_to_vma(op->base.remap.unmap->va);
> + default_pat = vma->attr.default_pat_index;
> + }
>
> - if (__op->op == DRM_GPUVA_OP_MAP) {
> - xe_assert(vm->xe, remap_op);
> - remap_op = false;
> + if (__op->op == DRM_GPUVA_OP_MAP) {
> + op->map.is_cpu_addr_mirror = true;
> + op->map.pat_index = default_pat;
> + }
> + } else {
> + if (__op->op == DRM_GPUVA_OP_REMAP) {
> + vma = gpuva_to_vma(op->base.remap.unmap->va);
> + xe_assert(vm->xe, !remap_op);
> + remap_op = true;
>
> - /* In case of madvise ops DRM_GPUVA_OP_MAP is always after
> - * DRM_GPUVA_OP_REMAP, so ensure we assign op->map.is_cpu_addr_mirror true
> - * if REMAP is for xe_vma_is_cpu_addr_mirror vma
> - */
> - op->map.is_cpu_addr_mirror = is_cpu_addr_mirror;
> - }
> + if (xe_vma_is_cpu_addr_mirror(vma))
> + is_cpu_addr_mirror = true;
> + else
> + is_cpu_addr_mirror = false;
> + }
>
> + if (__op->op == DRM_GPUVA_OP_MAP) {
> + xe_assert(vm->xe, remap_op);
> + remap_op = false;
> + /*
> + * In case of madvise ops DRM_GPUVA_OP_MAP is
> + * always after DRM_GPUVA_OP_REMAP, so ensure
> + * we assign op->map.is_cpu_addr_mirror true
> + * if REMAP is for xe_vma_is_cpu_addr_mirror vma
> + */
> + op->map.is_cpu_addr_mirror = is_cpu_addr_mirror;
> + }
> + }
> print_op(vm->xe, __op);
> }
>
> xe_vma_ops_init(&vops, vm, NULL, NULL, 0);
> - vops.flags |= XE_VMA_OPS_FLAG_MADVISE;
> +
> + if (is_madvise)
> + vops.flags |= XE_VMA_OPS_FLAG_MADVISE;
> +
> err = vm_bind_ioctl_ops_parse(vm, ops, &vops);
> if (err)
> goto unwind_ops;
> @@ -4341,15 +4352,20 @@ int xe_vm_alloc_madvise_vma(struct xe_vm *vm, uint64_t start, uint64_t range)
> struct xe_vma *vma;
>
> if (__op->op == DRM_GPUVA_OP_UNMAP) {
> - /* There should be no unmap */
> - XE_WARN_ON("UNEXPECTED UNMAP");
> - xe_vma_destroy(gpuva_to_vma(op->base.unmap.va), NULL);
> + vma = gpuva_to_vma(op->base.unmap.va);
> + /* There should be no unmap for madvise */
> + if (is_madvise)
> + XE_WARN_ON("UNEXPECTED UNMAP");
> +
> + xe_vma_destroy(vma, NULL);
> } else if (__op->op == DRM_GPUVA_OP_REMAP) {
> vma = gpuva_to_vma(op->base.remap.unmap->va);
> - /* Store attributes for REMAP UNMAPPED VMA, so they can be assigned
> - * to newly MAP created vma.
> + /* In case of madvise ops Store attributes for REMAP UNMAPPED
> + * VMA, so they can be assigned to newly MAP created vma.
> */
> - tmp_attr = vma->attr;
> + if (is_madvise)
> + tmp_attr = vma->attr;
> +
> xe_vma_destroy(gpuva_to_vma(op->base.remap.unmap->va), NULL);
> } else if (__op->op == DRM_GPUVA_OP_MAP) {
> vma = op->map.vma;
> @@ -4357,7 +4373,8 @@ int xe_vm_alloc_madvise_vma(struct xe_vm *vm, uint64_t start, uint64_t range)
> * Therefore temp_attr will always have sane values, making it safe to
> * copy them to new vma.
> */
> - vma->attr = tmp_attr;
> + if (is_madvise)
> + vma->attr = tmp_attr;
> }
> }
>
> @@ -4371,3 +4388,53 @@ int xe_vm_alloc_madvise_vma(struct xe_vm *vm, uint64_t start, uint64_t range)
> drm_gpuva_ops_free(&vm->gpuvm, ops);
> return err;
> }
> +
> +/**
> + * xe_vm_alloc_madvise_vma - Allocate VMA's with madvise ops
> + * @vm: Pointer to the xe_vm structure
> + * @start: Starting input address
> + * @range: Size of the input range
> + *
> + * This function splits existing vma to create new vma for user provided input range
> + *
> + * Return: 0 if success
> + */
> +int xe_vm_alloc_madvise_vma(struct xe_vm *vm, uint64_t start, uint64_t range)
> +{
> + struct drm_gpuva_op_map map_req = {
> + .va.addr = start,
> + .va.range = range,
> + .flags = DRM_GPUVM_SM_MAP_OPS_FLAG_SPLIT_MADVISE,
> + };
> +
> + lockdep_assert_held_write(&vm->lock);
> +
> + vm_dbg(&vm->xe->drm, "MADVISE_OPS_CREATE: addr=0x%016llx, size=0x%016llx", start, range);
> +
> + return xe_vm_alloc_vma(vm, &map_req);
> +}
> +
> +/**
> + * xe_vm_alloc_cpu_addr_mirror_vma - Allocate CPU addr mirror vma
> + * @vm: Pointer to the xe_vm structure
> + * @start: Starting input address
> + * @range: Size of the input range
> + *
> + * This function splits/merges existing vma to create new vma for user provided input range
> + *
> + * Return: 0 if success
> + */
> +int xe_vm_alloc_cpu_addr_mirror_vma(struct xe_vm *vm, uint64_t start, uint64_t range)
> +{
> + struct drm_gpuva_op_map map_req = {
> + .va.addr = start,
> + .va.range = range,
> + };
> +
> + lockdep_assert_held_write(&vm->lock);
> +
> + vm_dbg(&vm->xe->drm, "CPU_ADDR_MIRROR_VMA_OPS_CREATE: addr=0x%016llx, size=0x%016llx",
> + start, range);
> +
> + return xe_vm_alloc_vma(vm, &map_req);
> +}
> diff --git a/drivers/gpu/drm/xe/xe_vm.h b/drivers/gpu/drm/xe/xe_vm.h
> index f735d994806d..6538cddf158b 100644
> --- a/drivers/gpu/drm/xe/xe_vm.h
> +++ b/drivers/gpu/drm/xe/xe_vm.h
> @@ -177,6 +177,8 @@ int xe_vma_need_vram_for_atomic(struct xe_device *xe, struct xe_vma *vma, bool i
>
> int xe_vm_alloc_madvise_vma(struct xe_vm *vm, uint64_t addr, uint64_t size);
>
> +int xe_vm_alloc_cpu_addr_mirror_vma(struct xe_vm *vm, uint64_t addr, uint64_t size);
> +
> /**
> * to_userptr_vma() - Return a pointer to an embedding userptr vma
> * @vma: Pointer to the embedded struct xe_vma
> --
> 2.34.1
>
More information about the Intel-xe
mailing list