[PATCH 3/3] drm/xe/uapi: Introduce VMA bind flag for device atomics

Matthew Brost matthew.brost at intel.com
Wed Apr 10 17:35:14 UTC 2024


On Wed, Apr 10, 2024 at 07:03:08PM +0200, Nirmoy Das wrote:
> Adds a new VMA bind flag to enable device atomics on SMEM only buffers.
> 
> Given that simultaneous usage of device atomics and CPU atomics on
> the same SMEM buffer is not guaranteed to function without migration,
> and UMD expects no migration for SMEM-only buffer objects, so this provide
> a way to set device atomics when UMD is certain to use the buffer only
> for device atomics.
> 

For new uAPI we will need a UMD PR using it and provide a link to the PR
in the commit message.

> Signed-off-by: Nirmoy Das <nirmoy.das at intel.com>
> ---
>  drivers/gpu/drm/xe/xe_vm.c       | 27 +++++++++++++++++++++++++--
>  drivers/gpu/drm/xe/xe_vm_types.h |  2 ++
>  include/uapi/drm/xe_drm.h        |  9 +++++----
>  3 files changed, 32 insertions(+), 6 deletions(-)
> 
> diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
> index 8f3474c5f480..530b4bbc186c 100644
> --- a/drivers/gpu/drm/xe/xe_vm.c
> +++ b/drivers/gpu/drm/xe/xe_vm.c
> @@ -851,6 +851,7 @@ static void xe_vma_free(struct xe_vma *vma)
>  #define VMA_CREATE_FLAG_READ_ONLY	BIT(0)
>  #define VMA_CREATE_FLAG_IS_NULL		BIT(1)
>  #define VMA_CREATE_FLAG_DUMPABLE	BIT(2)
> +#define VMA_CREATE_FLAG_DEVICE_ATOMICS	BIT(3)
>  
>  static struct xe_vma *xe_vma_create(struct xe_vm *vm,
>  				    struct xe_bo *bo,
> @@ -864,6 +865,7 @@ static struct xe_vma *xe_vma_create(struct xe_vm *vm,
>  	bool read_only = (flags & VMA_CREATE_FLAG_READ_ONLY);
>  	bool is_null = (flags & VMA_CREATE_FLAG_IS_NULL);
>  	bool dumpable = (flags & VMA_CREATE_FLAG_DUMPABLE);
> +	bool enable_atomics = (flags & VMA_CREATE_FLAG_IS_NULL);

s/flags & VMA_CREATE_FLAG_IS_NULL/flags & VMA_CREATE_FLAG_DEVICE_ATOMICS/

>  
>  	xe_assert(vm->xe, start < end);
>  	xe_assert(vm->xe, end < vm->size);
> @@ -912,7 +914,7 @@ static struct xe_vma *xe_vma_create(struct xe_vm *vm,
>  		xe_bo_assert_held(bo);
>  
>  		if (GRAPHICS_VER(vm->xe) >= 20 || xe_bo_is_vram(bo) ||
> -		    !IS_DGFX(vm->xe))
> +		    !IS_DGFX(vm->xe) || enable_atomics)
>  			vma->gpuva.flags |= XE_VMA_ATOMIC_PTE_BIT;
>  
>  		vm_bo = drm_gpuvm_bo_obtain(vma->gpuva.vm, &bo->ttm.base);
> @@ -2174,6 +2176,18 @@ vm_bind_ioctl_ops_create(struct xe_vm *vm, struct xe_bo *bo,
>  	       operation, (ULL)addr, (ULL)range,
>  	       (ULL)bo_offset_or_userptr);
>  
> +	if (bo && (flags & DRM_XE_VM_BIND_FLAG_DEVICE_ATOMICS) &&
> +	    (vm->xe->info.platform == XE_PVC && !xe_bo_is_vram(bo))) {
> +		drm_warn(&vm->xe->drm, "Setting device atomics on SMEM is not supported for this platform");
> +		return ERR_PTR(-EINVAL);
> +	}
> +
> +	if (bo && (flags & DRM_XE_VM_BIND_FLAG_DEVICE_ATOMICS) &&
> +	    !xe_bo_has_single_placement(bo))
> +		drm_warn(&vm->xe->drm, "DRM_XE_VM_BIND_FLAG_DEVICE_ATOMICS can be only set if the BO has single placement");
> +		return ERR_PTR(-EINVAL);
> +	}
> +

A few things here.

- These check should go in xe_vm_bind_ioctl() in the loop that looks up
  the BOs and parses them. Probably move implementation to a helper(s)
  too as it is already a pretty big ugly loop in xe_vm_bind_ioctl and
  adding more logic will make it uglier. Search for
  drm_gem_object_lookup in xe_vm_bind_ioctl() for the loop I am refering
  to.

- Rather than drm_warns just use XE_IOCTL_DBG for these checks, that is
  the style or input to IOCTLs when we return -EINVAL in Xe.

- Are these checks valid? At one point on PVC I had to code to do
  atomics between CPU and GPU by migrating the BO back and forth on page
  faults. I think the i915 does this too? Are we abandoning that idea?

- Lastly if these check are valid, rather than a platform check in the
  code I'd rather see a bit in intel_device_info.

>  	switch (operation) {
>  	case DRM_XE_VM_BIND_OP_MAP:
>  	case DRM_XE_VM_BIND_OP_MAP_USERPTR:
> @@ -2216,6 +2230,7 @@ vm_bind_ioctl_ops_create(struct xe_vm *vm, struct xe_bo *bo,
>  		if (__op->op == DRM_GPUVA_OP_MAP) {
>  			op->map.is_null = flags & DRM_XE_VM_BIND_FLAG_NULL;
>  			op->map.dumpable = flags & DRM_XE_VM_BIND_FLAG_DUMPABLE;
> +			op->map.enable_device_atomics = flags & DRM_XE_VM_BIND_FLAG_DEVICE_ATOMICS;
>  			op->map.pat_index = pat_index;
>  		} else if (__op->op == DRM_GPUVA_OP_PREFETCH) {
>  			op->prefetch.region = prefetch_region;
> @@ -2412,6 +2427,8 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct xe_exec_queue *q,
>  				VMA_CREATE_FLAG_IS_NULL : 0;
>  			flags |= op->map.dumpable ?
>  				VMA_CREATE_FLAG_DUMPABLE : 0;
> +			flags |= op->map.enable_device_atomics ?
> +				VMA_CREATE_FLAG_DEVICE_ATOMICS : 0;
>  
>  			vma = new_vma(vm, &op->base.map, op->map.pat_index,
>  				      flags);
> @@ -2439,6 +2456,8 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct xe_exec_queue *q,
>  				flags |= op->base.remap.unmap->va->flags &
>  					XE_VMA_DUMPABLE ?
>  					VMA_CREATE_FLAG_DUMPABLE : 0;
> +				flags |= op->base.remap.unmap->va->flags ?
> +					VMA_CREATE_FLAG_DEVICE_ATOMICS : 0;
>  
>  				vma = new_vma(vm, op->base.remap.prev,
>  					      old->pat_index, flags);
> @@ -2476,6 +2495,9 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct xe_exec_queue *q,
>  				flags |= op->base.remap.unmap->va->flags &
>  					XE_VMA_DUMPABLE ?
>  					VMA_CREATE_FLAG_DUMPABLE : 0;
> +				flags |= op->base.remap.unmap->va->flags ?
> +					VMA_CREATE_FLAG_DEVICE_ATOMICS : 0;
> +

Extra newline.

>  
>  				vma = new_vma(vm, op->base.remap.next,
>  					      old->pat_index, flags);
> @@ -2831,7 +2853,8 @@ static int vm_bind_ioctl_ops_execute(struct xe_vm *vm,
>  	(DRM_XE_VM_BIND_FLAG_READONLY | \
>  	 DRM_XE_VM_BIND_FLAG_IMMEDIATE | \
>  	 DRM_XE_VM_BIND_FLAG_NULL | \
> -	 DRM_XE_VM_BIND_FLAG_DUMPABLE)
> +	 DRM_XE_VM_BIND_FLAG_DUMPABLE | \
> +	 DRM_XE_VM_BIND_FLAG_DEVICE_ATOMICS)
>  #define XE_64K_PAGE_MASK 0xffffull
>  #define ALL_DRM_XE_SYNCS_FLAGS (DRM_XE_SYNCS_FLAG_WAIT_FOR_OP)
>  
> diff --git a/drivers/gpu/drm/xe/xe_vm_types.h b/drivers/gpu/drm/xe/xe_vm_types.h
> index badf3945083d..7b9c68909c78 100644
> --- a/drivers/gpu/drm/xe/xe_vm_types.h
> +++ b/drivers/gpu/drm/xe/xe_vm_types.h
> @@ -282,6 +282,8 @@ struct xe_vma_op_map {
>  	bool dumpable;
>  	/** @pat_index: The pat index to use for this operation. */
>  	u16 pat_index;
> +	/** @enable_device_atomics: Whether the VMA will allow device atomics */
> +	bool enable_device_atomics;

Put this next to the other bools in xe_vma_op_map.

>  };
>  
>  /** struct xe_vma_op_remap - VMA remap operation */
> diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h
> index 1446c3bae515..bffe8b1c040c 100644
> --- a/include/uapi/drm/xe_drm.h
> +++ b/include/uapi/drm/xe_drm.h
> @@ -969,10 +969,11 @@ struct drm_xe_vm_bind_op {
>  	/** @op: Bind operation to perform */
>  	__u32 op;
>  
> -#define DRM_XE_VM_BIND_FLAG_READONLY	(1 << 0)
> -#define DRM_XE_VM_BIND_FLAG_IMMEDIATE	(1 << 1)
> -#define DRM_XE_VM_BIND_FLAG_NULL	(1 << 2)
> -#define DRM_XE_VM_BIND_FLAG_DUMPABLE	(1 << 3)
> +#define DRM_XE_VM_BIND_FLAG_READONLY		(1 << 0)
> +#define DRM_XE_VM_BIND_FLAG_IMMEDIATE		(1 << 1)
> +#define DRM_XE_VM_BIND_FLAG_NULL		(1 << 2)
> +#define DRM_XE_VM_BIND_FLAG_DUMPABLE		(1 << 3)
> +#define DRM_XE_VM_BIND_FLAG_DEVICE_ATOMICS	(1 << 4)

Kernel doc for new flag.

Matt

>  	/** @flags: Bind flags */
>  	__u32 flags;
>  
> -- 
> 2.42.0
> 


More information about the Intel-xe mailing list