[PATCH] drm/xe: Misc refine for svm
Lin, Shuicheng
shuicheng.lin at intel.com
Mon Jul 21 15:27:56 UTC 2025
On Fri, July 18, 2025 2:39 PM Matthew Brost wrote:
> On Fri, Jul 18, 2025 at 09:24:10PM +0000, Shuicheng Lin wrote:
> > in function xe_svm_handle_pagefault(), migrate_try_count is set to 3
> > when ctx.devmem_only is true. If ctx.devmem_only is false, the try
> > count is 1, which means no retry. Remove the "!ctx.devmem_only" check
> > for the retry path.
> >
> > Below other changes should have no function effect.
> > 1. Correct typo of "operation"in macro range_debug().
> > 2. Combine 2 spin_lock() call in xe_svm_garbage_collector() into 1.
> > 3. Remove preferred_region_is_vram check in
> xe_svm_range_needs_migrate_to_vram()
> > as it is already checked in the begin of this function.
> > 4. Combine the devmem_possible check in xe_svm_handle_pagefault().
> > 5. Combine 2 xe_vm_unlock() call in xe_svm_handle_pagefault() into 1.
> >
> > Cc: Matthew Brost <matthew.brost at intel.com>
> > Signed-off-by: Shuicheng Lin <shuicheng.lin at intel.com>
> > ---
> > drivers/gpu/drm/xe/xe_svm.c | 34 ++++++++++++++--------------------
> > 1 file changed, 14 insertions(+), 20 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/xe/xe_svm.c b/drivers/gpu/drm/xe/xe_svm.c
> > index 10c8a1bcb86e..2c47bbe7d4b4 100644
> > --- a/drivers/gpu/drm/xe/xe_svm.c
> > +++ b/drivers/gpu/drm/xe/xe_svm.c
> > @@ -50,11 +50,11 @@ static struct xe_vm *range_to_vm(struct
> drm_gpusvm_range *r)
> > return gpusvm_to_vm(r->gpusvm);
> > }
> >
> > -#define range_debug(r__, operaton__)
> \
> > +#define range_debug(r__, operation__)
> \
> > vm_dbg(&range_to_vm(&(r__)->base)->xe->drm,
> \
> > "%s: asid=%u, gpusvm=%p, vram=%d,%d, seqno=%lu, " \
> > "start=0x%014lx, end=0x%014lx, size=%lu", \
> > - (operaton__), range_to_vm(&(r__)->base)->usm.asid, \
> > + (operation__), range_to_vm(&(r__)->base)->usm.asid, \
>
> Looks good.
>
> > (r__)->base.gpusvm, \
> > xe_svm_range_in_vram((r__)) ? 1 : 0, \
> > xe_svm_range_has_vram_binding((r__)) ? 1 : 0, \
> > @@ -263,8 +263,8 @@ static int xe_svm_garbage_collector(struct xe_vm
> *vm)
> > if (xe_vm_is_closed_or_banned(vm))
> > return -ENOENT;
> >
> > - spin_lock(&vm->svm.garbage_collector.lock);
> > for (;;) {
> > + spin_lock(&vm->svm.garbage_collector.lock);
>
> Also looks good.
>
> > range = list_first_entry_or_null(&vm-
> >svm.garbage_collector.range_list,
> > typeof(*range),
> > garbage_collector_link);
> > @@ -282,8 +282,6 @@ static int xe_svm_garbage_collector(struct xe_vm
> *vm)
> > xe_vm_kill(vm, true);
> > return err;
> > }
> > -
> > - spin_lock(&vm->svm.garbage_collector.lock);
> > }
> > spin_unlock(&vm->svm.garbage_collector.lock);
> >
> > @@ -763,12 +761,12 @@ bool
> xe_svm_range_needs_migrate_to_vram(struct
> > xe_svm_range *range, struct xe_vm
> >
> > xe_assert(vm->xe, IS_DGFX(vm->xe));
> >
> > - if (preferred_region_is_vram && xe_svm_range_in_vram(range)) {
> > + if (xe_svm_range_in_vram(range)) {
> > drm_info(&vm->xe->drm, "Range is already in VRAM\n");
> > return false;
> > }
> >
> > - if (preferred_region_is_vram && range_size < SZ_64K
> && !supports_4K_migration(vm->xe)) {
> > + if (range_size < SZ_64K && !supports_4K_migration(vm->xe)) {
>
> Also looks good.
>
> > drm_dbg(&vm->xe->drm, "Platform doesn't support SZ_4K
> range migration\n");
> > return false;
> > }
> > @@ -793,16 +791,14 @@ int xe_svm_handle_pagefault(struct xe_vm *vm,
> struct xe_vma *vma,
> > struct xe_gt *gt, u64 fault_addr,
> > bool atomic)
> > {
> > + int devmem_possible = IS_DGFX(vm->xe) &&
> > + IS_ENABLED(CONFIG_DRM_XE_PAGEMAP);
> > struct drm_gpusvm_ctx ctx = {
> > .read_only = xe_vma_read_only(vma),
> > - .devmem_possible = IS_DGFX(vm->xe) &&
> > - IS_ENABLED(CONFIG_DRM_XE_PAGEMAP),
> > - .check_pages_threshold = IS_DGFX(vm->xe) &&
> > - IS_ENABLED(CONFIG_DRM_XE_PAGEMAP) ? SZ_64K :
> 0,
> > - .devmem_only = atomic && IS_DGFX(vm->xe) &&
> > - IS_ENABLED(CONFIG_DRM_XE_PAGEMAP),
> > - .timeslice_ms = atomic && IS_DGFX(vm->xe) &&
> > - IS_ENABLED(CONFIG_DRM_XE_PAGEMAP) ?
> > + .devmem_possible = devmem_possible,
> > + .check_pages_threshold = devmem_possible ? SZ_64K : 0,
> > + .devmem_only = atomic && devmem_possible,
> > + .timeslice_ms = atomic && devmem_possible ?
>
> Also good.
>
> > vm->xe->atomic_svm_timeslice_ms : 0,
> > };
> > struct xe_svm_range *range;
> > @@ -841,7 +837,7 @@ int xe_svm_handle_pagefault(struct xe_vm *vm,
> struct xe_vma *vma,
> > err = xe_svm_alloc_vram(tile, range, &ctx);
> > ctx.timeslice_ms <<= 1; /* Double timeslice if we have to retry
> */
> > if (err) {
> > - if (migrate_try_count || !ctx.devmem_only) {
> > + if (migrate_try_count) {
>
> This is a behavior change. Instead of retrying non-devmem only faults in SRAM
> we'd abort which is not the desired behavior. Run xe_exec_system_allocator
> on BMG and I suspect it will quickly fail.
You are right. Xe_exec_system_allocator will fail with this change. Let me revert it.
Thanks.
Shuicheng
>
> > drm_dbg(&vm->xe->drm,
> > "VRAM allocation failed, falling back to
> retrying fault, asid=%u, errno=%pe\n",
> > vm->usm.asid, ERR_PTR(err));
> > @@ -860,7 +856,7 @@ int xe_svm_handle_pagefault(struct xe_vm *vm,
> struct xe_vma *vma,
> > /* Corner where CPU mappings have changed */
> > if (err == -EOPNOTSUPP || err == -EFAULT || err == -EPERM) {
> > ctx.timeslice_ms <<= 1; /* Double timeslice if we have to retry
> */
> > - if (migrate_try_count > 0 || !ctx.devmem_only) {
> > + if (migrate_try_count > 0) {
>
> Same thing here.
>
> > drm_dbg(&vm->xe->drm,
> > "Get pages failed, falling back to retrying,
> asid=%u, gpusvm=%p, errno=%pe\n",
> > vm->usm.asid, &vm->svm.gpusvm,
> ERR_PTR(err)); @@ -882,8 +878,8 @@
> > int xe_svm_handle_pagefault(struct xe_vm *vm, struct xe_vma *vma,
> > retry_bind:
> > xe_vm_lock(vm, false);
> > fence = xe_vm_range_rebind(vm, vma, range, BIT(tile->id));
> > + xe_vm_unlock(vm);
>
> Also good.
>
> Matt
>
> > if (IS_ERR(fence)) {
> > - xe_vm_unlock(vm);
> > err = PTR_ERR(fence);
> > if (err == -EAGAIN) {
> > ctx.timeslice_ms <<= 1; /* Double timeslice if we have
> to retry */
> > @@ -894,13 +890,11 @@ int xe_svm_handle_pagefault(struct xe_vm *vm,
> struct xe_vma *vma,
> > goto retry_bind;
> > goto err_out;
> > }
> > - xe_vm_unlock(vm);
> >
> > dma_fence_wait(fence, false);
> > dma_fence_put(fence);
> >
> > err_out:
> > -
> > return err;
> > }
> >
> > --
> > 2.49.0
> >
More information about the Intel-xe
mailing list