[PATCH v5 23/25] drm/xe: Reset VMA attributes to default in SVM garbage collector

Wed Aug 6 05:32:12 UTC 2025

On 06-08-2025 09:36, Matthew Brost wrote:
> On Wed, Jul 30, 2025 at 06:30:48PM +0530, Himal Prasad Ghimiray wrote:
>> Restore default memory attributes for VMAs during garbage collection
>> if they were modified by madvise. Reuse existing VMA if fully overlapping;
>> otherwise, allocate a new mirror VMA.
>>
>> v2 (Matthew Brost)
>> - Add helper for vma split
>> - Add retry to get updated vma
>>
>> Suggested-by: Matthew Brost <matthew.brost at intel.com>
>> Signed-off-by: Himal Prasad Ghimiray <himal.prasad.ghimiray at intel.com>
>> ---
>>   drivers/gpu/drm/xe/xe_svm.c | 114 +++++++++++++++++++++-----
>>   drivers/gpu/drm/xe/xe_vm.c  | 155 ++++++++++++++++++++++++++----------
>>   drivers/gpu/drm/xe/xe_vm.h  |   2 +
>>   3 files changed, 206 insertions(+), 65 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/xe/xe_svm.c b/drivers/gpu/drm/xe/xe_svm.c
>> index aef76e08b460..9b3a3f61758c 100644
>> --- a/drivers/gpu/drm/xe/xe_svm.c
>> +++ b/drivers/gpu/drm/xe/xe_svm.c
>> @@ -253,9 +253,55 @@ static int __xe_svm_garbage_collector(struct xe_vm *vm,
>>   	return 0;
>>   }
>>   
>> +static int xe_svm_range_set_default_attr(struct xe_vm *vm, u64 range_start, u64 range_end)
>> +{
>> +	struct xe_vma *vma;
>> +	struct xe_vma_mem_attr default_attr = {
>> +		.preferred_loc = {
>> +			.devmem_fd = DRM_XE_PREFERRED_LOC_DEFAULT_DEVICE,
>> +			.migration_policy = DRM_XE_MIGRATE_ALL_PAGES,
>> +		},
>> +		.atomic_access = DRM_XE_ATOMIC_UNDEFINED,
>> +	};
>> +	int err = 0;
>> +
>> +	vma = xe_vm_find_vma_by_addr(vm, range_start);
>> +	if (!vma)
>> +		return -EINVAL;
>> +
>> +	if (xe_vma_has_default_mem_attrs(vma))
>> +		return 0;
>> +
>> +	vm_dbg(&vm->xe->drm, "Existing VMA start=0x%016llx, vma_end=0x%016llx",
>> +	       xe_vma_start(vma), xe_vma_end(vma));
>> +
>> +	if (xe_vma_start(vma) == range_start && xe_vma_end(vma) == range_end) {
>> +		default_attr.pat_index = vma->attr.default_pat_index;
>> +		default_attr.default_pat_index  = vma->attr.default_pat_index;
>> +		vma->attr = default_attr;
>> +	} else {
>> +		vm_dbg(&vm->xe->drm, "Split VMA start=0x%016llx, vma_end=0x%016llx",
>> +		       range_start, range_end);
>> +		err = xe_vm_alloc_cpu_addr_mirror_vma(vm, range_start, range_end - range_start);
>> +		if (err) {
>> +			drm_warn(&vm->xe->drm, "VMA SPLIT failed: %pe\n", ERR_PTR(err));
>> +			xe_vm_kill(vm, true);
>> +			return err;
>> +		}
>> +	}
>> +
>> +	/*
>> +	 * On call from xe_svm_handle_pagefault original VMA might be changed
>> +	 * signal this to lookup for VMA again.
>> +	 */
>> +	return -EAGAIN;
>> +}
>> +
>>   static int xe_svm_garbage_collector(struct xe_vm *vm)
>>   {
>>   	struct xe_svm_range *range;
>> +	u64 range_start;
>> +	u64 range_end;
>>   	int err;
>>   
>>   	lockdep_assert_held_write(&vm->lock);
>> @@ -271,6 +317,9 @@ static int xe_svm_garbage_collector(struct xe_vm *vm)
>>   		if (!range)
>>   			break;
>>   
>> +		range_start = xe_svm_range_start(range);
>> +		range_end = xe_svm_range_end(range);
>> +
>>   		list_del(&range->garbage_collector_link);
>>   		spin_unlock(&vm->svm.garbage_collector.lock);
>>   
>> @@ -283,6 +332,10 @@ static int xe_svm_garbage_collector(struct xe_vm *vm)
>>   			return err;
>>   		}
>>   
>> +		err = xe_svm_range_set_default_attr(vm, range_start, range_end);
>> +		if (err)
>> +			return err;
> 
> You don't want to return on -EAGAIN here, rather collect it, continue
> and return -EAGAIN once the garbage collector list is empty. No need to
> contiously lookup the VMA in xe_svm_handle_pagefault (in next rev
> __xe_svm_handle_pagefault), this only need be done once.

True. makes sense.
> 
>> +
>>   		spin_lock(&vm->svm.garbage_collector.lock);
>>   	}
>>   	spin_unlock(&vm->svm.garbage_collector.lock);
>> @@ -793,40 +846,59 @@ int xe_svm_handle_pagefault(struct xe_vm *vm, struct xe_vma *vma,
>>   			    struct xe_gt *gt, u64 fault_addr,
>>   			    bool atomic)
>>   {
>> -	int need_vram = xe_vma_need_vram_for_atomic(vm->xe, vma, atomic);
>> -
>> -	if (need_vram < 0)
>> -		return need_vram;
>> -
>> -	struct drm_gpusvm_ctx ctx = {
>> -		.read_only = xe_vma_read_only(vma),
>> -		.devmem_possible = IS_DGFX(vm->xe) &&
>> -			IS_ENABLED(CONFIG_DRM_XE_PAGEMAP),
>> -		.check_pages_threshold = IS_DGFX(vm->xe) &&
>> -			IS_ENABLED(CONFIG_DRM_XE_PAGEMAP) ? SZ_64K : 0,
>> -		.devmem_only = need_vram && IS_ENABLED(CONFIG_DRM_XE_PAGEMAP),
>> -		.timeslice_ms = atomic && IS_DGFX(vm->xe) &&
>> -			IS_ENABLED(CONFIG_DRM_XE_PAGEMAP) ?
>> -			vm->xe->atomic_svm_timeslice_ms : 0,
>> -	};
>> +	struct drm_gpusvm_ctx ctx = { };
>> +	struct drm_pagemap *dpagemap;
>>   	struct xe_svm_range *range;
>>   	struct dma_fence *fence;
>> -	struct drm_pagemap *dpagemap;
>>   	struct xe_tile *tile = gt_to_tile(gt);
>> -	int migrate_try_count = ctx.devmem_only ? 3 : 1;
>> +	bool vma_updated = false;
>> +	int need_vram;
>> +	int migrate_try_count;
>>   	ktime_t end = 0;
>>   	int err;
>>   
>> -	lockdep_assert_held_write(&vm->lock);
>> +find_vma:
>> +	if (vma_updated) {
>> +		vma = xe_vm_find_vma_by_addr(vm, fault_addr);
>> +		if (!vma)
>> +			return -EINVAL;
>> +	}
>> +
>>   	xe_assert(vm->xe, xe_vma_is_cpu_addr_mirror(vma));
>> +	vma_updated = false;
>> +
>> +	need_vram = xe_vma_need_vram_for_atomic(vm->xe, vma, atomic);
>> +	if (need_vram < 0)
>> +		return need_vram;
> 
> This is a bit ugly. I think if you have __xe_svm_handle_pagefault and
> xe_svm_handle_pagefault as here [1] this can be handled cleaner (i.e.
> still a static setup of drm_gpusvm_ctx).
> 
> If xe_svm_garbage_collector returns an in __xe_svm_handle_pagefault kick
> it up to xe_svm_handle_pagefault, you catch -EAGAIN there, relookup the
> VMA and call __xe_svm_handle_pagefault again. I think that would look
> quite a bit better.

Agreed. Will update in next version.

Thanks>
> Matt
> 
> [1] https://patchwork.freedesktop.org/patch/666222/?series=149550&rev=5#comment_1222471
> 
>> +
>> +	ctx.read_only = xe_vma_read_only(vma);
>> +	ctx.devmem_possible = IS_DGFX(vm->xe) && IS_ENABLED(CONFIG_DRM_XE_PAGEMAP);
>> +	ctx.check_pages_threshold = IS_DGFX(vm->xe) && IS_ENABLED(CONFIG_DRM_XE_PAGEMAP) ?
>> +				    SZ_64K : 0;
>> +	ctx.devmem_only = need_vram && IS_ENABLED(CONFIG_DRM_XE_PAGEMAP);
>> +	ctx.timeslice_ms = atomic && IS_DGFX(vm->xe) && IS_ENABLED(CONFIG_DRM_XE_PAGEMAP) ?
>> +			   vm->xe->atomic_svm_timeslice_ms : 0;
>>   
>> +	migrate_try_count = ctx.devmem_only ? 3 : 1;
>> +
>> +	lockdep_assert_held_write(&vm->lock);
>>   	xe_gt_stats_incr(gt, XE_GT_STATS_ID_SVM_PAGEFAULT_COUNT, 1);
>>   
>>   retry:
>>   	/* Always process UNMAPs first so view SVM ranges is current */
>>   	err = xe_svm_garbage_collector(vm);
>> -	if (err)
>> -		return err;
>> +	if (err) {
>> +		if (err == -EAGAIN) {
>> +			/*
>> +			 * VMA might have changed due to garbage
>> +			 * collection; retry lookup
>> +			 */
>> +			vma_updated = true;
>> +			goto find_vma;
>> +		} else {
>> +			return err;
>> +		}
>> +	}
>>   
>>   	range = xe_svm_range_find_or_insert(vm, fault_addr, vma, &ctx);
>>   
>> diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
>> index 5ee38e9cf6c6..e77c04f92d0b 100644
>> --- a/drivers/gpu/drm/xe/xe_vm.c
>> +++ b/drivers/gpu/drm/xe/xe_vm.c
>> @@ -4263,36 +4263,24 @@ int xe_vma_need_vram_for_atomic(struct xe_device *xe, struct xe_vma *vma, bool i
>>   	}
>>   }
>>   
>> -/**
>> - * xe_vm_alloc_madvise_vma - Allocate VMA's with madvise ops
>> - * @vm: Pointer to the xe_vm structure
>> - * @start: Starting input address
>> - * @range: Size of the input range
>> - *
>> - * This function splits existing vma to create new vma for user provided input range
>> - *
>> - *  Return: 0 if success
>> - */
>> -int xe_vm_alloc_madvise_vma(struct xe_vm *vm, uint64_t start, uint64_t range)
>> +static int xe_vm_alloc_vma(struct xe_vm *vm, struct drm_gpuva_op_map *map_req)
>>   {
>> -	struct drm_gpuva_op_map map_req = {
>> -		.va.addr = start,
>> -		.va.range = range,
>> -		.flags = DRM_GPUVM_SM_MAP_OPS_FLAG_SPLIT_MADVISE,
>> -	};
>> -
>>   	struct xe_vma_ops vops;
>>   	struct drm_gpuva_ops *ops = NULL;
>>   	struct drm_gpuva_op *__op;
>>   	bool is_cpu_addr_mirror = false;
>>   	bool remap_op = false;
>> +	bool is_madvise = (map_req->flags & DRM_GPUVM_SM_MAP_OPS_FLAG_SPLIT_MADVISE);
>>   	struct xe_vma_mem_attr tmp_attr;
>> +	u16 default_pat;
>>   	int err;
>>   
>>   	lockdep_assert_held_write(&vm->lock);
>>   
>> -	vm_dbg(&vm->xe->drm, "MADVISE_OPS_CREATE: addr=0x%016llx, size=0x%016llx", start, range);
>> -	ops = drm_gpuvm_sm_map_ops_create(&vm->gpuvm, &map_req);
>> +	vm_dbg(&vm->xe->drm, "MADVISE_OPS_CREATE: addr=0x%016llx, size=0x%016llx",
>> +	       map_req->va.addr, map_req->va.range);
>> +
>> +	ops = drm_gpuvm_sm_map_ops_create(&vm->gpuvm, map_req);
>>   	if (IS_ERR(ops))
>>   		return PTR_ERR(ops);
>>   
>> @@ -4303,33 +4291,56 @@ int xe_vm_alloc_madvise_vma(struct xe_vm *vm, uint64_t start, uint64_t range)
>>   
>>   	drm_gpuva_for_each_op(__op, ops) {
>>   		struct xe_vma_op *op = gpuva_op_to_vma_op(__op);
>> +		struct xe_vma *vma = NULL;
>>   
>> -		if (__op->op == DRM_GPUVA_OP_REMAP) {
>> -			xe_assert(vm->xe, !remap_op);
>> -			remap_op = true;
>> +		if (!is_madvise) {
>> +			if (__op->op == DRM_GPUVA_OP_UNMAP) {
>> +				vma = gpuva_to_vma(op->base.unmap.va);
>> +				XE_WARN_ON(!xe_vma_has_default_mem_attrs(vma));
>> +				default_pat = vma->attr.default_pat_index;
>> +			}
>>   
>> -			if (xe_vma_is_cpu_addr_mirror(gpuva_to_vma(op->base.remap.unmap->va)))
>> -				is_cpu_addr_mirror = true;
>> -			else
>> -				is_cpu_addr_mirror = false;
>> -		}
>> +			if (__op->op == DRM_GPUVA_OP_REMAP) {
>> +				vma = gpuva_to_vma(op->base.remap.unmap->va);
>> +				default_pat = vma->attr.default_pat_index;
>> +			}
>>   
>> -		if (__op->op == DRM_GPUVA_OP_MAP) {
>> -			xe_assert(vm->xe, remap_op);
>> -			remap_op = false;
>> +			if (__op->op == DRM_GPUVA_OP_MAP) {
>> +				op->map.is_cpu_addr_mirror = true;
>> +				op->map.pat_index = default_pat;
>> +			}
>> +		} else {
>> +			if (__op->op == DRM_GPUVA_OP_REMAP) {
>> +				vma = gpuva_to_vma(op->base.remap.unmap->va);
>> +				xe_assert(vm->xe, !remap_op);
>> +				remap_op = true;
>>   
>> -			/* In case of madvise ops DRM_GPUVA_OP_MAP is always after
>> -			 * DRM_GPUVA_OP_REMAP, so ensure we assign op->map.is_cpu_addr_mirror true
>> -			 * if REMAP is for xe_vma_is_cpu_addr_mirror vma
>> -			 */
>> -			op->map.is_cpu_addr_mirror = is_cpu_addr_mirror;
>> -		}
>> +				if (xe_vma_is_cpu_addr_mirror(vma))
>> +					is_cpu_addr_mirror = true;
>> +				else
>> +					is_cpu_addr_mirror = false;
>> +			}
>>   
>> +			if (__op->op == DRM_GPUVA_OP_MAP) {
>> +				xe_assert(vm->xe, remap_op);
>> +				remap_op = false;
>> +				/*
>> +				 * In case of madvise ops DRM_GPUVA_OP_MAP is
>> +				 * always after DRM_GPUVA_OP_REMAP, so ensure
>> +				 * we assign op->map.is_cpu_addr_mirror true
>> +				 * if REMAP is for xe_vma_is_cpu_addr_mirror vma
>> +				 */
>> +				op->map.is_cpu_addr_mirror = is_cpu_addr_mirror;
>> +			}
>> +		}
>>   		print_op(vm->xe, __op);
>>   	}
>>   
>>   	xe_vma_ops_init(&vops, vm, NULL, NULL, 0);
>> -	vops.flags |= XE_VMA_OPS_FLAG_MADVISE;
>> +
>> +	if (is_madvise)
>> +		vops.flags |= XE_VMA_OPS_FLAG_MADVISE;
>> +
>>   	err = vm_bind_ioctl_ops_parse(vm, ops, &vops);
>>   	if (err)
>>   		goto unwind_ops;
>> @@ -4341,15 +4352,20 @@ int xe_vm_alloc_madvise_vma(struct xe_vm *vm, uint64_t start, uint64_t range)
>>   		struct xe_vma *vma;
>>   
>>   		if (__op->op == DRM_GPUVA_OP_UNMAP) {
>> -			/* There should be no unmap */
>> -			XE_WARN_ON("UNEXPECTED UNMAP");
>> -			xe_vma_destroy(gpuva_to_vma(op->base.unmap.va), NULL);
>> +			vma = gpuva_to_vma(op->base.unmap.va);
>> +			/* There should be no unmap for madvise */
>> +			if (is_madvise)
>> +				XE_WARN_ON("UNEXPECTED UNMAP");
>> +
>> +			xe_vma_destroy(vma, NULL);
>>   		} else if (__op->op == DRM_GPUVA_OP_REMAP) {
>>   			vma = gpuva_to_vma(op->base.remap.unmap->va);
>> -			/* Store attributes for REMAP UNMAPPED VMA, so they can be assigned
>> -			 * to newly MAP created vma.
>> +			/* In case of madvise ops Store attributes for REMAP UNMAPPED
>> +			 * VMA, so they can be assigned to newly MAP created vma.
>>   			 */
>> -			tmp_attr = vma->attr;
>> +			if (is_madvise)
>> +				tmp_attr = vma->attr;
>> +
>>   			xe_vma_destroy(gpuva_to_vma(op->base.remap.unmap->va), NULL);
>>   		} else if (__op->op == DRM_GPUVA_OP_MAP) {
>>   			vma = op->map.vma;
>> @@ -4357,7 +4373,8 @@ int xe_vm_alloc_madvise_vma(struct xe_vm *vm, uint64_t start, uint64_t range)
>>   			 * Therefore temp_attr will always have sane values, making it safe to
>>   			 * copy them to new vma.
>>   			 */
>> -			vma->attr = tmp_attr;
>> +			if (is_madvise)
>> +				vma->attr = tmp_attr;
>>   		}
>>   	}
>>   
>> @@ -4371,3 +4388,53 @@ int xe_vm_alloc_madvise_vma(struct xe_vm *vm, uint64_t start, uint64_t range)
>>   	drm_gpuva_ops_free(&vm->gpuvm, ops);
>>   	return err;
>>   }
>> +
>> +/**
>> + * xe_vm_alloc_madvise_vma - Allocate VMA's with madvise ops
>> + * @vm: Pointer to the xe_vm structure
>> + * @start: Starting input address
>> + * @range: Size of the input range
>> + *
>> + * This function splits existing vma to create new vma for user provided input range
>> + *
>> + * Return: 0 if success
>> + */
>> +int xe_vm_alloc_madvise_vma(struct xe_vm *vm, uint64_t start, uint64_t range)
>> +{
>> +	struct drm_gpuva_op_map map_req = {
>> +		.va.addr = start,
>> +		.va.range = range,
>> +		.flags = DRM_GPUVM_SM_MAP_OPS_FLAG_SPLIT_MADVISE,
>> +	};
>> +
>> +	lockdep_assert_held_write(&vm->lock);
>> +
>> +	vm_dbg(&vm->xe->drm, "MADVISE_OPS_CREATE: addr=0x%016llx, size=0x%016llx", start, range);
>> +
>> +	return xe_vm_alloc_vma(vm, &map_req);
>> +}
>> +
>> +/**
>> + * xe_vm_alloc_cpu_addr_mirror_vma - Allocate CPU addr mirror vma
>> + * @vm: Pointer to the xe_vm structure
>> + * @start: Starting input address
>> + * @range: Size of the input range
>> + *
>> + * This function splits/merges existing vma to create new vma for user provided input range
>> + *
>> + * Return: 0 if success
>> + */
>> +int xe_vm_alloc_cpu_addr_mirror_vma(struct xe_vm *vm, uint64_t start, uint64_t range)
>> +{
>> +	struct drm_gpuva_op_map map_req = {
>> +		.va.addr = start,
>> +		.va.range = range,
>> +	};
>> +
>> +	lockdep_assert_held_write(&vm->lock);
>> +
>> +	vm_dbg(&vm->xe->drm, "CPU_ADDR_MIRROR_VMA_OPS_CREATE: addr=0x%016llx, size=0x%016llx",
>> +	       start, range);
>> +
>> +	return xe_vm_alloc_vma(vm, &map_req);
>> +}
>> diff --git a/drivers/gpu/drm/xe/xe_vm.h b/drivers/gpu/drm/xe/xe_vm.h
>> index f735d994806d..6538cddf158b 100644
>> --- a/drivers/gpu/drm/xe/xe_vm.h
>> +++ b/drivers/gpu/drm/xe/xe_vm.h
>> @@ -177,6 +177,8 @@ int xe_vma_need_vram_for_atomic(struct xe_device *xe, struct xe_vma *vma, bool i
>>   
>>   int xe_vm_alloc_madvise_vma(struct xe_vm *vm, uint64_t addr, uint64_t size);
>>   
>> +int xe_vm_alloc_cpu_addr_mirror_vma(struct xe_vm *vm, uint64_t addr, uint64_t size);
>> +
>>   /**
>>    * to_userptr_vma() - Return a pointer to an embedding userptr vma
>>    * @vma: Pointer to the embedded struct xe_vma
>> -- 
>> 2.34.1
>>