[PATCH v2 07/16] drm/xe: Convert SVM validation for exhaustive eviction

Matthew Brost matthew.brost at intel.com
Fri Aug 22 19:13:27 UTC 2025


On Fri, Aug 22, 2025 at 11:40:21AM +0200, Thomas Hellström wrote:
> Convert SVM validation to support exhaustive eviction,
> using xe_validation_guard().
> 
> v2:
> - Wrap also xe_vm_range_rebind (Matt Brost)
> - Adapt to argument changes of xe_validation_guard().
> 
> Signed-off-by: Thomas Hellström <thomas.hellstrom at linux.intel.com>

Reviewed-by: Matthew Brost <matthew.brost at intel.com>

> ---
>  drivers/gpu/drm/xe/xe_svm.c | 99 +++++++++++++++++++------------------
>  1 file changed, 51 insertions(+), 48 deletions(-)
> 
> diff --git a/drivers/gpu/drm/xe/xe_svm.c b/drivers/gpu/drm/xe/xe_svm.c
> index 39e3aa6df25a..667ca1f7cc29 100644
> --- a/drivers/gpu/drm/xe/xe_svm.c
> +++ b/drivers/gpu/drm/xe/xe_svm.c
> @@ -699,51 +699,48 @@ static int xe_drm_pagemap_populate_mm(struct drm_pagemap *dpagemap,
>  	struct xe_device *xe = vr->xe;
>  	struct device *dev = xe->drm.dev;
>  	struct drm_buddy_block *block;
> +	struct xe_validation_ctx vctx;
>  	struct list_head *blocks;
> -	struct drm_exec *exec;
> +	struct drm_exec exec;
>  	struct xe_bo *bo;
> -	ktime_t time_end = 0;
> -	int err, idx;
> +	int err = 0, idx;
>  
>  	if (!drm_dev_enter(&xe->drm, &idx))
>  		return -ENODEV;
>  
>  	xe_pm_runtime_get(xe);
> -	exec = XE_VALIDATION_UNIMPLEMENTED;
> -
> - retry:
> -	bo = xe_bo_create_locked(vr->xe, NULL, NULL, end - start,
> -				 ttm_bo_type_device,
> -				 (IS_DGFX(xe) ? XE_BO_FLAG_VRAM(vr) : XE_BO_FLAG_SYSTEM) |
> -				 XE_BO_FLAG_CPU_ADDR_MIRROR, exec);
> -	if (IS_ERR(bo)) {
> -		err = PTR_ERR(bo);
> -		if (xe_vm_validate_should_retry(NULL, err, &time_end))
> -			goto retry;
> -		goto out_pm_put;
> -	}
>  
> -	drm_pagemap_devmem_init(&bo->devmem_allocation, dev, mm,
> -				&dpagemap_devmem_ops, dpagemap, end - start);
> -
> -	blocks = &to_xe_ttm_vram_mgr_resource(bo->ttm.resource)->blocks;
> -	list_for_each_entry(block, blocks, link)
> -		block->private = vr;
> +	xe_validation_guard(&vctx, &xe->val, &exec, (struct xe_val_flags) {}, err) {
> +		bo = xe_bo_create_locked(xe, NULL, NULL, end - start,
> +					 ttm_bo_type_device,
> +					 (IS_DGFX(xe) ? XE_BO_FLAG_VRAM(vr) : XE_BO_FLAG_SYSTEM) |
> +					 XE_BO_FLAG_CPU_ADDR_MIRROR, &exec);
> +		drm_exec_retry_on_contention(&exec);
> +		if (IS_ERR(bo)) {
> +			err = PTR_ERR(bo);
> +			xe_validation_retry_on_oom(&vctx, &err);
> +			break;
> +		}
>  
> -	xe_bo_get(bo);
> +		drm_pagemap_devmem_init(&bo->devmem_allocation, dev, mm,
> +					&dpagemap_devmem_ops, dpagemap, end - start);
>  
> -	/* Ensure the device has a pm ref while there are device pages active. */
> -	xe_pm_runtime_get_noresume(xe);
> -	err = drm_pagemap_migrate_to_devmem(&bo->devmem_allocation, mm,
> -					    start, end, timeslice_ms,
> -					    xe_svm_devm_owner(xe));
> -	if (err)
> -		xe_svm_devmem_release(&bo->devmem_allocation);
> +		blocks = &to_xe_ttm_vram_mgr_resource(bo->ttm.resource)->blocks;
> +		list_for_each_entry(block, blocks, link)
> +			block->private = vr;
>  
> -	xe_bo_unlock(bo);
> -	xe_bo_put(bo);
> +		xe_bo_get(bo);
>  
> -out_pm_put:
> +		/* Ensure the device has a pm ref while there are device pages active. */
> +		xe_pm_runtime_get_noresume(xe);
> +		err = drm_pagemap_migrate_to_devmem(&bo->devmem_allocation, mm,
> +						    start, end, timeslice_ms,
> +						    xe_svm_devm_owner(xe));
> +		if (err)
> +			xe_svm_devmem_release(&bo->devmem_allocation);
> +		xe_bo_unlock(bo);
> +		xe_bo_put(bo);
> +	}
>  	xe_pm_runtime_put(xe);
>  	drm_dev_exit(idx);
>  
> @@ -820,11 +817,12 @@ int xe_svm_handle_pagefault(struct xe_vm *vm, struct xe_vma *vma,
>  			IS_ENABLED(CONFIG_DRM_XE_PAGEMAP) ?
>  			vm->xe->atomic_svm_timeslice_ms : 0,
>  	};
> +	struct xe_validation_ctx vctx;
> +	struct drm_exec exec;
>  	struct xe_svm_range *range;
>  	struct dma_fence *fence;
>  	struct xe_tile *tile = gt_to_tile(gt);
>  	int migrate_try_count = ctx.devmem_only ? 3 : 1;
> -	ktime_t end = 0;
>  	int err;
>  
>  	lockdep_assert_held_write(&vm->lock);
> @@ -894,27 +892,32 @@ int xe_svm_handle_pagefault(struct xe_vm *vm, struct xe_vma *vma,
>  
>  	range_debug(range, "PAGE FAULT - BIND");
>  
> -retry_bind:
> -	xe_vm_lock(vm, false);
> -	fence = xe_vm_range_rebind(vm, vma, range, BIT(tile->id));
> -	if (IS_ERR(fence)) {
> -		xe_vm_unlock(vm);
> -		err = PTR_ERR(fence);
> -		if (err == -EAGAIN) {
> -			ctx.timeslice_ms <<= 1;	/* Double timeslice if we have to retry */
> -			range_debug(range, "PAGE FAULT - RETRY BIND");
> -			goto retry;
> +	xe_validation_guard(&vctx, &vm->xe->val, &exec, (struct xe_val_flags) {}, err) {
> +		err = xe_vm_drm_exec_lock(vm, &exec);
> +		drm_exec_retry_on_contention(&exec);
> +
> +		xe_vm_set_validation_exec(vm, &exec);
> +		fence = xe_vm_range_rebind(vm, vma, range, BIT(tile->id));
> +		xe_vm_set_validation_exec(vm, NULL);
> +		if (IS_ERR(fence)) {
> +			drm_exec_retry_on_contention(&exec);
> +			err = PTR_ERR(fence);
> +			xe_validation_retry_on_oom(&vctx, &err);
>  		}
> -		if (xe_vm_validate_should_retry(NULL, err, &end))
> -			goto retry_bind;
> -		goto err_out;
>  	}
> -	xe_vm_unlock(vm);
> +	if (err)
> +		goto err_out;
>  
>  	dma_fence_wait(fence, false);
>  	dma_fence_put(fence);
> +	return 0;
>  
>  err_out:
> +	if (err == -EAGAIN) {
> +		ctx.timeslice_ms <<= 1;	/* Double timeslice if we have to retry */
> +		range_debug(range, "PAGE FAULT - RETRY BIND");
> +		goto retry;
> +	}
>  
>  	return err;
>  }
> -- 
> 2.50.1
> 


More information about the Intel-xe mailing list