[PATCH v4 18/20] drm/xe/bo: Update atomic_access attribute on madvise

Mon Jun 23 16:19:18 UTC 2025

On Fri, Jun 13, 2025 at 06:25:56PM +0530, Himal Prasad Ghimiray wrote:
> Update the bo_atomic_access based on user-provided input and determine
> the migration to smem during a CPU fault
> 
> v2 (Matthew Brost)
> - Avoid cpu unmapping if bo is already in smem
> - check atomics on smem too for ioctl
> - Add comments
> 
> v3
> - Avoid migration in prefetch
> 
> Cc: Matthew Brost <matthew.brost at intel.com>
> Signed-off-by: Himal Prasad Ghimiray <himal.prasad.ghimiray at intel.com>
> ---
>  drivers/gpu/drm/xe/xe_bo.c           | 21 ++++++++--
>  drivers/gpu/drm/xe/xe_gt_pagefault.c |  2 +-
>  drivers/gpu/drm/xe/xe_vm.c           |  5 ++-
>  drivers/gpu/drm/xe/xe_vm_madvise.c   | 61 +++++++++++++++++++++++++++-
>  4 files changed, 82 insertions(+), 7 deletions(-)
> 
> diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c
> index 4e39188a021a..82b33dfc5515 100644
> --- a/drivers/gpu/drm/xe/xe_bo.c
> +++ b/drivers/gpu/drm/xe/xe_bo.c
> @@ -1667,6 +1667,12 @@ static void xe_gem_object_close(struct drm_gem_object *obj,
>  	}
>  }
>  
> +static bool should_migrate_to_smem(struct xe_bo *bo)
> +{

Again maybe a quick platform specfic comment here so we remember to fix
this on different platforms.

> +	return bo->attr.atomic_access == DRM_XE_VMA_ATOMIC_GLOBAL ||
> +	       bo->attr.atomic_access == DRM_XE_VMA_ATOMIC_CPU;
> +}
> +
>  static vm_fault_t xe_gem_fault(struct vm_fault *vmf)
>  {
>  	struct ttm_buffer_object *tbo = vmf->vma->vm_private_data;
> @@ -1675,7 +1681,7 @@ static vm_fault_t xe_gem_fault(struct vm_fault *vmf)
>  	struct xe_bo *bo = ttm_to_xe_bo(tbo);
>  	bool needs_rpm = bo->flags & XE_BO_FLAG_VRAM_MASK;
>  	vm_fault_t ret;
> -	int idx;
> +	int idx, r = 0;
>  
>  	if (needs_rpm)
>  		xe_pm_runtime_get(xe);
> @@ -1687,8 +1693,17 @@ static vm_fault_t xe_gem_fault(struct vm_fault *vmf)
>  	if (drm_dev_enter(ddev, &idx)) {
>  		trace_xe_bo_cpu_fault(bo);
>  
> -		ret = ttm_bo_vm_fault_reserved(vmf, vmf->vma->vm_page_prot,
> -					       TTM_BO_VM_NUM_PREFAULT);
> +		if (should_migrate_to_smem(bo)) {

Maybe assert here that a BO has system placements to ensure our
sanitization of madvise IOCTLs is correct.

> +			r = xe_bo_migrate(bo, XE_PL_TT);
> +			if (r == -EBUSY || r == -ERESTARTSYS || r == -EINTR)
> +				ret = VM_FAULT_NOPAGE;
> +			else if (r)
> +				ret = VM_FAULT_SIGBUS;
> +		}
> +		if (!ret)
> +			ret = ttm_bo_vm_fault_reserved(vmf,
> +						       vmf->vma->vm_page_prot,
> +						       TTM_BO_VM_NUM_PREFAULT);
>  		drm_dev_exit(idx);
>  	} else {
>  		ret = ttm_bo_vm_dummy_page(vmf, vmf->vma->vm_page_prot);
> diff --git a/drivers/gpu/drm/xe/xe_gt_pagefault.c b/drivers/gpu/drm/xe/xe_gt_pagefault.c
> index e2d975b2fddb..1335c5fc0e10 100644
> --- a/drivers/gpu/drm/xe/xe_gt_pagefault.c
> +++ b/drivers/gpu/drm/xe/xe_gt_pagefault.c
> @@ -89,7 +89,7 @@ static int xe_pf_begin(struct drm_exec *exec, struct xe_vma *vma,
>  	if (err)
>  		return err;
>  
> -	if (atomic && IS_DGFX(vm->xe)) {
> +	if (xe_vma_need_vram_for_atomic(vm->xe, vma, atomic)) {
>  		if (xe_vma_is_userptr(vma)) {
>  			err = -EACCES;
>  			return err;
> diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
> index 0dd9f9e11030..14e554f3f4d5 100644
> --- a/drivers/gpu/drm/xe/xe_vm.c
> +++ b/drivers/gpu/drm/xe/xe_vm.c
> @@ -4200,6 +4200,9 @@ void xe_vm_snapshot_free(struct xe_vm_snapshot *snap)
>   */
>  bool xe_vma_need_vram_for_atomic(struct xe_device *xe, struct xe_vma *vma, bool is_atomic)
>  {
> +	u32 atomic_access = xe_vma_bo(vma) ? xe_vma_bo(vma)->attr.atomic_access :
> +					     vma->attr.atomic_access;
> +
>  	if (!IS_DGFX(xe))
>  		return false;
>  
> @@ -4207,7 +4210,7 @@ bool xe_vma_need_vram_for_atomic(struct xe_device *xe, struct xe_vma *vma, bool
>  	 * on a device supporting CXL atomics, these would ideally work universally
>  	 * without additional handling.
>  	 */
> -	switch (vma->attr.atomic_access) {
> +	switch (atomic_access) {
>  	case DRM_XE_VMA_ATOMIC_DEVICE:
>  		return !xe->info.has_device_atomics_on_smem;
>  
> diff --git a/drivers/gpu/drm/xe/xe_vm_madvise.c b/drivers/gpu/drm/xe/xe_vm_madvise.c
> index 973627edb23c..5b96c8fc73a5 100644
> --- a/drivers/gpu/drm/xe/xe_vm_madvise.c
> +++ b/drivers/gpu/drm/xe/xe_vm_madvise.c
> @@ -102,14 +102,28 @@ static void madvise_atomic(struct xe_device *xe, struct xe_vm *vm,
>  			   struct xe_vma **vmas, int num_vmas,
>  			   struct drm_xe_madvise *op)
>  {
> +	struct xe_bo *bo;
>  	int i;
>  
>  	xe_assert(vm->xe, op->type == DRM_XE_VMA_ATTR_ATOMIC);
>  	xe_assert(vm->xe, op->atomic.val <= DRM_XE_VMA_ATOMIC_CPU);
>  
> -	for (i = 0; i < num_vmas; i++)
> +	for (i = 0; i < num_vmas; i++) {
>  		vmas[i]->attr.atomic_access = op->atomic.val;
> -	/*TODO: handle bo backed vmas */
> +
> +		bo = xe_vma_bo(vmas[i]);
> +		if (!bo)
> +			continue;
> +
> +		xe_bo_assert_held(bo);
> +		bo->attr.atomic_access = op->atomic.val;
> +
> +		/* Invalidate cpu page table, so bo can migrate to smem in next access */
> +		if (xe_bo_is_vram(bo) &&
> +		    (bo->attr.atomic_access == DRM_XE_VMA_ATOMIC_CPU ||
> +		     bo->attr.atomic_access == DRM_XE_VMA_ATOMIC_GLOBAL))
> +			ttm_bo_unmap_virtual(&bo->ttm);
> +	}
>  }
>  
>  static void madvise_pat_index(struct xe_device *xe, struct xe_vm *vm,
> @@ -231,6 +245,41 @@ static int drm_xe_madvise_args_are_sane(struct xe_device *xe, const struct drm_x
>  	return 0;
>  }
>  
> +static int check_bo_args_are_sane(struct xe_vm *vm, struct xe_vma **vmas,
> +				  int num_vmas, u32 atomic_val)
> +{
> +	struct xe_device *xe = vm->xe;
> +	struct xe_bo *bo;
> +	int i;
> +
> +	for (i = 0; i < num_vmas; i++) {
> +		bo = xe_vma_bo(vmas[i]);
> +		if (!bo)
> +			continue;
> +
> +		if (XE_IOCTL_DBG(xe, atomic_val == DRM_XE_VMA_ATOMIC_CPU &&
> +				 !(bo->flags & XE_BO_FLAG_SYSTEM)))
> +			return -EINVAL;
> +
> +		/* NOTE: The following atomic checks are platform-specific. For example,
> +		 * if a device supports CXL atomics, these may not be necessary or
> +		 * may behave differently.
> +		 */

/*
 * NOTE:

I believe above is the style and IIRC checkpatch will complain about the
way you have it.

Also this comment could be at the top of the function or at least above
the first check.

Nits aside, patch LGTM.

Matt

> +		if (XE_IOCTL_DBG(xe, atomic_val == DRM_XE_VMA_ATOMIC_DEVICE &&
> +				 !(bo->flags & XE_BO_FLAG_VRAM0) &&
> +				 !(bo->flags & XE_BO_FLAG_VRAM1) &&
> +				 !(bo->flags & XE_BO_FLAG_SYSTEM &&
> +				   xe->info.has_device_atomics_on_smem)))
> +			return -EINVAL;
> +
> +		if (XE_IOCTL_DBG(xe, atomic_val == DRM_XE_VMA_ATOMIC_GLOBAL &&
> +				 (!(bo->flags & XE_BO_FLAG_SYSTEM) ||
> +				  (!(bo->flags & XE_BO_FLAG_VRAM0) &&
> +				   !(bo->flags & XE_BO_FLAG_VRAM1)))))
> +			return -EINVAL;
> +	}
> +	return 0;
> +}
>  /**
>   * xe_vm_madvise_ioctl - Handle MADVise ioctl for a VM
>   * @dev: DRM device pointer
> @@ -276,6 +325,14 @@ int xe_vm_madvise_ioctl(struct drm_device *dev, void *data, struct drm_file *fil
>  		goto unlock_vm;
>  
>  	if (madvise_range.has_bo_vmas) {
> +		if (args->type == DRM_XE_VMA_ATTR_ATOMIC) {
> +			err = check_bo_args_are_sane(vm, madvise_range.vmas,
> +						     madvise_range.num_vmas,
> +						     args->atomic.val);
> +			if (err)
> +				goto unlock_vm;
> +		}
> +
>  		drm_exec_init(&exec, DRM_EXEC_IGNORE_DUPLICATES, 0);
>  		drm_exec_until_all_locked(&exec) {
>  			for (int i = 0; i < madvise_range.num_vmas; i++) {
> -- 
> 2.34.1
>