[PATCH v3 12/19] drm/xe/svm : Add svm ranges migration policy on atomic access

Thu May 29 23:27:09 UTC 2025

On Tue, May 27, 2025 at 10:09:56PM +0530, Himal Prasad Ghimiray wrote:
> If the platform does not support atomic access on system memory, and the
> ranges are in system memory, but the user requires atomic accesses on
> the VMA, then migrate the ranges to VRAM. Apply this policy for prefetch
> operations as well.
> 
> v2
> - Drop unnecessary vm_dbg
> 
> Signed-off-by: Himal Prasad Ghimiray <himal.prasad.ghimiray at intel.com>
> ---
>  drivers/gpu/drm/xe/xe_pt.c         |  9 +++++--
>  drivers/gpu/drm/xe/xe_svm.c        |  4 +++-
>  drivers/gpu/drm/xe/xe_vm.c         | 38 ++++++++++++++++++++++++++++--
>  drivers/gpu/drm/xe/xe_vm.h         |  2 ++
>  drivers/gpu/drm/xe/xe_vm_madvise.c | 10 +++++++-
>  5 files changed, 57 insertions(+), 6 deletions(-)
> 
> diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c
> index 39bc1964089e..ad17ded0ecaa 100644
> --- a/drivers/gpu/drm/xe/xe_pt.c
> +++ b/drivers/gpu/drm/xe/xe_pt.c
> @@ -645,13 +645,18 @@ static bool xe_atomic_for_vram(struct xe_vm *vm)
>  	return true;
>  }
>  
> -static bool xe_atomic_for_system(struct xe_vm *vm, struct xe_bo *bo)
> +static bool xe_atomic_for_system(struct xe_vm *vm,
> +				 struct xe_bo *bo,
> +				 struct xe_vma *vma)

You can get the BO from the VMA, so I'd drop the BO argument.

>  {
>  	struct xe_device *xe = vm->xe;
>  
>  	if (!xe->info.has_device_atomics_on_smem)
>  		return false;
>  
> +	if (vma->attr.atomic_access == DRM_XE_VMA_ATOMIC_DEVICE)
> +		return true;
> +
>  	/*
>  	 * If a SMEM+LMEM allocation is backed by SMEM, a device
>  	 * atomics will cause a gpu page fault and which then
> @@ -745,7 +750,7 @@ xe_pt_stage_bind(struct xe_tile *tile, struct xe_vma *vma,
>  
>  	if (vma->gpuva.flags & XE_VMA_ATOMIC_PTE_BIT) {
>  		xe_walk.default_vram_pte = xe_atomic_for_vram(vm) ? XE_USM_PPGTT_PTE_AE : 0;
> -		xe_walk.default_system_pte = xe_atomic_for_system(vm, bo) ?
> +		xe_walk.default_system_pte = xe_atomic_for_system(vm, bo, vma) ?
>  			XE_USM_PPGTT_PTE_AE : 0;
>  	}
>  
> diff --git a/drivers/gpu/drm/xe/xe_svm.c b/drivers/gpu/drm/xe/xe_svm.c
> index 5691bb9dbf26..743bb1f7d39c 100644
> --- a/drivers/gpu/drm/xe/xe_svm.c
> +++ b/drivers/gpu/drm/xe/xe_svm.c
> @@ -771,6 +771,8 @@ bool xe_svm_range_needs_migrate_to_vram(struct xe_svm_range *range, struct xe_vm
>  	struct xe_vm *vm = range_to_vm(&range->base);
>  	u64 range_size = xe_svm_range_size(range);
>  
> +	preferred_region_is_vram |=  xe_vma_need_vram_migrate_for_atomic(vm->xe, vma);
> +

I'm not sure about this. Shouldn't we just set preferred_region_is_vram
at the caller (prefered_vram || atomic fault) in the fault handler?

>  	if (!range->base.flags.migrate_devmem || !preferred_region_is_vram)
>  		return false;
>  
> @@ -812,7 +814,7 @@ int xe_svm_handle_pagefault(struct xe_vm *vm, struct xe_vma *vma,
>  			IS_ENABLED(CONFIG_DRM_XE_DEVMEM_MIRROR),
>  		.check_pages_threshold = IS_DGFX(vm->xe) &&
>  			IS_ENABLED(CONFIG_DRM_XE_DEVMEM_MIRROR) ? SZ_64K : 0,
> -		.devmem_only = atomic && IS_DGFX(vm->xe) &&
> +		.devmem_only = atomic && xe_vma_need_vram_migrate_for_atomic(vm->xe, vma) &&
>  			IS_ENABLED(CONFIG_DRM_XE_DEVMEM_MIRROR),
>  		.timeslice_ms = atomic && IS_DGFX(vm->xe) &&
>  			IS_ENABLED(CONFIG_DRM_XE_DEVMEM_MIRROR) ?
> diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
> index 8208409485f6..e5fc2c2be8b2 100644
> --- a/drivers/gpu/drm/xe/xe_vm.c
> +++ b/drivers/gpu/drm/xe/xe_vm.c
> @@ -2930,13 +2930,22 @@ static int prefetch_ranges(struct xe_vm *vm, struct xe_vma_op *op)
>  	ctx.read_only = xe_vma_read_only(vma);
>  	ctx.devmem_possible = devmem_possible;
>  	ctx.check_pages_threshold = devmem_possible ? SZ_64K : 0;
> +	ctx.devmem_only = xe_vma_need_vram_migrate_for_atomic(vm->xe, vma) &&
> +			  IS_ENABLED(CONFIG_DRM_XE_DEVMEM_MIRROR);

I still wouldn't set devmem only for prefetch as I don't think we should
fail the prefetch unless we absolutely have too. A fault will still fix
up atomic faults that are in system memory if needed.

>  
>  	/* TODO: Threading the migration */
>  	xa_for_each(&op->prefetch_range.range, i, svm_range) {
> -		if (!region)
> +		bool needs_vram = xe_svm_range_needs_migrate_to_vram(svm_range, vma, region);
> +
> +		if (!needs_vram) {
>  			xe_svm_range_migrate_to_smem(vm, svm_range);
> +		} else if (needs_vram) {
> +			/* If  migration is mandated by atomic attributes
> +			 * in vma and prefetch region is smem force prefetch
> +			 * in vram of root tile.
> +			 */
> +			region = region ? region : 1;
>

I don't this logic needs to change until we have preferred location is
implemented. I don't think the atomic mode has any bearing on prefetch.

> -		if (xe_svm_range_needs_migrate_to_vram(svm_range, vma, region)) {
>  			tile = &vm->xe->tiles[region_to_mem_type[region] - XE_PL_VRAM0];
>  			err = xe_svm_alloc_vram(vm, tile, svm_range, &ctx);
>  			if (err) {
> @@ -4178,6 +4187,31 @@ void xe_vm_snapshot_free(struct xe_vm_snapshot *snap)
>  	kvfree(snap);
>  }
>  
> +/**
> + * xe_vma_need_vram_migrate_for_atomic - Check if VMA needs VRAM migration for atomic operations
> + * @xe: Pointer to the XE device structure
> + * @vma: Pointer to the virtual memory area (VMA) structure
> + *
> + * This function determines whether the given VMA needs to be migrated to
> + * VRAM in order to do atomic GPU operation.
> + *
> + * Return: true if migration to VRAM is required, false otherwise.
> + */
> +bool xe_vma_need_vram_migrate_for_atomic(struct xe_device *xe, struct xe_vma *vma)
> +{
> +	/* Note: The checks implemented here are platform-specific. For instance,
> +	 * on a device supporting CXL atomics, these would ideally work universally
> +	 * without additional handling.
> +	 */
> +	if (!IS_DGFX(xe) || vma->attr.atomic_access == DRM_XE_VMA_ATOMIC_UNDEFINED ||

I think DRM_XE_VMA_ATOMIC_UNDEFINED is same as GLOBAL, right? Isn't that
the default? Or global the default? We have been told whatever the
default is, just has to work for SVM so maybe set to GLOBAL by default?

> +	    vma->attr.atomic_access == DRM_XE_VMA_ATOMIC_CPU ||
> +	    (xe->info.has_device_atomics_on_smem &&
> +	     vma->attr.atomic_access == DRM_XE_VMA_ATOMIC_DEVICE))
> +		return false;
> +
> +	return true;
> +}
> +
>  /**
>   * xe_vm_alloc_madvise_vma - Allocate VMA's with madvise ops
>   * @vm: Pointer to the xe_vm structure
> diff --git a/drivers/gpu/drm/xe/xe_vm.h b/drivers/gpu/drm/xe/xe_vm.h
> index 8151b1b01a13..edd6ffd7c3ac 100644
> --- a/drivers/gpu/drm/xe/xe_vm.h
> +++ b/drivers/gpu/drm/xe/xe_vm.h
> @@ -171,6 +171,8 @@ static inline bool xe_vma_is_userptr(struct xe_vma *vma)
>  
>  struct xe_vma *xe_vm_find_vma_by_addr(struct xe_vm *vm, u64 page_addr);
>  
> +bool xe_vma_need_vram_migrate_for_atomic(struct xe_device *xe, struct xe_vma *vma);
> +
>  int xe_vm_alloc_madvise_vma(struct xe_vm *vm, uint64_t addr, uint64_t size);
>  
>  /**
> diff --git a/drivers/gpu/drm/xe/xe_vm_madvise.c b/drivers/gpu/drm/xe/xe_vm_madvise.c
> index f7edefe5f6cf..084719660401 100644
> --- a/drivers/gpu/drm/xe/xe_vm_madvise.c
> +++ b/drivers/gpu/drm/xe/xe_vm_madvise.c
> @@ -69,7 +69,15 @@ static int madvise_atomic(struct xe_device *xe, struct xe_vm *vm,
>  			  struct xe_vma **vmas, int num_vmas,
>  			  struct drm_xe_madvise_ops ops)
>  {
> -	/* Implementation pending */
> +	int i;
> +
> +	xe_assert(vm->xe, ops.type == DRM_XE_VMA_ATTR_ATOMIC);
> +	xe_assert(vm->xe, ops.atomic.val > DRM_XE_VMA_ATOMIC_UNDEFINED &&

>= DRM_XE_VMA_ATOMIC_UNDEFINED, right?

Also santize this input before here as discussed in patch 19 and 10.

Matt

> +		  ops.atomic.val <= DRM_XE_VMA_ATOMIC_CPU);
> +
> +	for (i = 0; i < num_vmas; i++)
> +		vmas[i]->attr.atomic_access = ops.atomic.val;
> +	/*TODO: handle bo backed vmas */
>  	return 0;
>  }
>  
> -- 
> 2.34.1
>