[PATCH 07/15] drm/xe: Convert SVM validation for exhaustive eviction

Thomas Hellström thomas.hellstrom at linux.intel.com
Thu Aug 14 12:24:27 UTC 2025


On Wed, 2025-08-13 at 08:32 -0700, Matthew Brost wrote:
> On Wed, Aug 13, 2025 at 12:51:13PM +0200, Thomas Hellström wrote:
> > Convert SVM validation to support exhaustive eviction,
> > using xe_validation_guard().
> > 
> 
> Do we not need to validation guard + xe_vm_set_validation_exec around
> xe_vm_range_rebind, given that on first fault of range we can
> allocate
> PTs?

Yes, you're right. I see this comment in a later patch as well.
The reason the asserts didn't trigger here is that we were leaking an
xe_vm_set_validation(). I'm fixing that up in v2 and add more asserts
so that's much less likely to happen.

Ideally we'd want to pass the drm_exec (perhaps part of a pt_details
struct) all the way down to pt allocation. But when I tried that during
developement the headers became quite bloated.

/Thomas


> 
> Matt
> 
> > Signed-off-by: Thomas Hellström <thomas.hellstrom at linux.intel.com>
> > ---
> >  drivers/gpu/drm/xe/xe_svm.c | 63 ++++++++++++++++++---------------
> > ----
> >  1 file changed, 30 insertions(+), 33 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/xe/xe_svm.c
> > b/drivers/gpu/drm/xe/xe_svm.c
> > index 39e3aa6df25a..ba85665d85d4 100644
> > --- a/drivers/gpu/drm/xe/xe_svm.c
> > +++ b/drivers/gpu/drm/xe/xe_svm.c
> > @@ -699,51 +699,48 @@ static int xe_drm_pagemap_populate_mm(struct
> > drm_pagemap *dpagemap,
> >  	struct xe_device *xe = vr->xe;
> >  	struct device *dev = xe->drm.dev;
> >  	struct drm_buddy_block *block;
> > +	struct xe_validation_ctx vctx;
> >  	struct list_head *blocks;
> > -	struct drm_exec *exec;
> > +	struct drm_exec exec;
> >  	struct xe_bo *bo;
> > -	ktime_t time_end = 0;
> > -	int err, idx;
> > +	int err = 0, idx;
> >  
> >  	if (!drm_dev_enter(&xe->drm, &idx))
> >  		return -ENODEV;
> >  
> >  	xe_pm_runtime_get(xe);
> > -	exec = XE_VALIDATION_UNIMPLEMENTED;
> > -
> > - retry:
> > -	bo = xe_bo_create_locked(vr->xe, NULL, NULL, end - start,
> > -				 ttm_bo_type_device,
> > -				 (IS_DGFX(xe) ?
> > XE_BO_FLAG_VRAM(vr) : XE_BO_FLAG_SYSTEM) |
> > -				 XE_BO_FLAG_CPU_ADDR_MIRROR,
> > exec);
> > -	if (IS_ERR(bo)) {
> > -		err = PTR_ERR(bo);
> > -		if (xe_vm_validate_should_retry(NULL, err,
> > &time_end))
> > -			goto retry;
> > -		goto out_pm_put;
> > -	}
> >  
> > -	drm_pagemap_devmem_init(&bo->devmem_allocation, dev, mm,
> > -				&dpagemap_devmem_ops, dpagemap,
> > end - start);
> > -
> > -	blocks = &to_xe_ttm_vram_mgr_resource(bo->ttm.resource)-
> > >blocks;
> > -	list_for_each_entry(block, blocks, link)
> > -		block->private = vr;
> > +	xe_validation_guard(&vctx, &xe->val, &exec, 0, err, false)
> > {
> > +		bo = xe_bo_create_locked(xe, NULL, NULL, end -
> > start,
> > +					 ttm_bo_type_device,
> > +					 (IS_DGFX(xe) ?
> > XE_BO_FLAG_VRAM(vr) : XE_BO_FLAG_SYSTEM) |
> > +					
> > XE_BO_FLAG_CPU_ADDR_MIRROR, &exec);
> > +		drm_exec_retry_on_contention(&exec);
> > +		if (IS_ERR(bo)) {
> > +			err = PTR_ERR(bo);
> > +			xe_validation_retry_on_oom(&vctx, &err);
> > +			break;
> > +		}
> >  
> > -	xe_bo_get(bo);
> > +		drm_pagemap_devmem_init(&bo->devmem_allocation,
> > dev, mm,
> > +					&dpagemap_devmem_ops,
> > dpagemap, end - start);
> >  
> > -	/* Ensure the device has a pm ref while there are device
> > pages active. */
> > -	xe_pm_runtime_get_noresume(xe);
> > -	err = drm_pagemap_migrate_to_devmem(&bo-
> > >devmem_allocation, mm,
> > -					    start, end,
> > timeslice_ms,
> > -					   
> > xe_svm_devm_owner(xe));
> > -	if (err)
> > -		xe_svm_devmem_release(&bo->devmem_allocation);
> > +		blocks = &to_xe_ttm_vram_mgr_resource(bo-
> > >ttm.resource)->blocks;
> > +		list_for_each_entry(block, blocks, link)
> > +			block->private = vr;
> >  
> > -	xe_bo_unlock(bo);
> > -	xe_bo_put(bo);
> > +		xe_bo_get(bo);
> >  
> > -out_pm_put:
> > +		/* Ensure the device has a pm ref while there are
> > device pages active. */
> > +		xe_pm_runtime_get_noresume(xe);
> > +		err = drm_pagemap_migrate_to_devmem(&bo-
> > >devmem_allocation, mm,
> > +						    start, end,
> > timeslice_ms,
> > +						   
> > xe_svm_devm_owner(xe));
> > +		if (err)
> > +			xe_svm_devmem_release(&bo-
> > >devmem_allocation);
> > +		xe_bo_unlock(bo);
> > +		xe_bo_put(bo);
> > +	}
> >  	xe_pm_runtime_put(xe);
> >  	drm_dev_exit(idx);
> >  
> > -- 
> > 2.50.1
> > 



More information about the Intel-xe mailing list