[PATCH v3 6/7] drm/xe/uapi: Introduce VMA bind flag for device atomics

Lionel Landwerlin lionel.g.landwerlin at intel.com
Fri Apr 19 07:16:54 UTC 2024


On 15/04/2024 17:52, Nirmoy Das wrote:
> Adds a new VMA bind flag to enable device atomics on SMEM only buffers.
>
> Given that simultaneous usage of device atomics and CPU atomics on
> the same SMEM buffer is not guaranteed to function without migration,
> and UMD expects no migration for SMEM-only buffer objects, so this provide
> a way to set device atomics when UMD is certain to use the buffer only
> for device atomics.
>
> Signed-off-by: Nirmoy Das<nirmoy.das at intel.com>
> ---
>   drivers/gpu/drm/xe/xe_vm.c       | 28 ++++++++++++++++++++++++----
>   drivers/gpu/drm/xe/xe_vm_types.h |  2 ++
>   include/uapi/drm/xe_drm.h        | 17 +++++++++++++----
>   3 files changed, 39 insertions(+), 8 deletions(-)
>
> diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
> index 8380f1d23074..b0907a7bb88b 100644
> --- a/drivers/gpu/drm/xe/xe_vm.c
> +++ b/drivers/gpu/drm/xe/xe_vm.c
> @@ -753,6 +753,7 @@ static void xe_vma_free(struct xe_vma *vma)
>   #define VMA_CREATE_FLAG_READ_ONLY	BIT(0)
>   #define VMA_CREATE_FLAG_IS_NULL		BIT(1)
>   #define VMA_CREATE_FLAG_DUMPABLE	BIT(2)
> +#define VMA_CREATE_FLAG_DEVICE_ATOMICS	BIT(3)
>   
>   static struct xe_vma *xe_vma_create(struct xe_vm *vm,
>   				    struct xe_bo *bo,
> @@ -766,6 +767,7 @@ static struct xe_vma *xe_vma_create(struct xe_vm *vm,
>   	bool read_only = (flags & VMA_CREATE_FLAG_READ_ONLY);
>   	bool is_null = (flags & VMA_CREATE_FLAG_IS_NULL);
>   	bool dumpable = (flags & VMA_CREATE_FLAG_DUMPABLE);
> +	bool enable_atomics = (flags & VMA_CREATE_FLAG_DEVICE_ATOMICS);
>   
>   	xe_assert(vm->xe, start < end);
>   	xe_assert(vm->xe, end < vm->size);
> @@ -814,7 +816,7 @@ static struct xe_vma *xe_vma_create(struct xe_vm *vm,
>   		xe_bo_assert_held(bo);
>   
>   		if (vm->xe->info.has_atomic_enable_pte_bit &&
> -		    (xe_bo_is_vram(bo) || !IS_DGFX(vm->xe)))
> +		    (xe_bo_is_vram(bo) || !IS_DGFX(vm->xe) || enable_atomics))
>   			vma->gpuva.flags |= XE_VMA_ATOMIC_PTE_BIT;
>   
>   		vm_bo = drm_gpuvm_bo_obtain(vma->gpuva.vm, &bo->ttm.base);
> @@ -2116,6 +2118,7 @@ vm_bind_ioctl_ops_create(struct xe_vm *vm, struct xe_bo *bo,
>   		if (__op->op == DRM_GPUVA_OP_MAP) {
>   			op->map.is_null = flags & DRM_XE_VM_BIND_FLAG_NULL;
>   			op->map.dumpable = flags & DRM_XE_VM_BIND_FLAG_DUMPABLE;
> +			op->map.enable_device_atomics = flags & DRM_XE_VM_BIND_FLAG_DEVICE_ATOMICS;
>   			op->map.pat_index = pat_index;
>   		} else if (__op->op == DRM_GPUVA_OP_PREFETCH) {
>   			op->prefetch.region = prefetch_region;
> @@ -2312,6 +2315,8 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct xe_exec_queue *q,
>   				VMA_CREATE_FLAG_IS_NULL : 0;
>   			flags |= op->map.dumpable ?
>   				VMA_CREATE_FLAG_DUMPABLE : 0;
> +			flags |= op->map.enable_device_atomics ?
> +				VMA_CREATE_FLAG_DEVICE_ATOMICS : 0;
>   
>   			vma = new_vma(vm, &op->base.map, op->map.pat_index,
>   				      flags);
> @@ -2339,6 +2344,8 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct xe_exec_queue *q,
>   				flags |= op->base.remap.unmap->va->flags &
>   					XE_VMA_DUMPABLE ?
>   					VMA_CREATE_FLAG_DUMPABLE : 0;
> +				flags |= op->base.remap.unmap->va->flags ?
> +					VMA_CREATE_FLAG_DEVICE_ATOMICS : 0;
>   
>   				vma = new_vma(vm, op->base.remap.prev,
>   					      old->pat_index, flags);
> @@ -2376,6 +2383,8 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct xe_exec_queue *q,
>   				flags |= op->base.remap.unmap->va->flags &
>   					XE_VMA_DUMPABLE ?
>   					VMA_CREATE_FLAG_DUMPABLE : 0;
> +				flags |= op->base.remap.unmap->va->flags ?
> +					VMA_CREATE_FLAG_DEVICE_ATOMICS : 0;
>   
>   				vma = new_vma(vm, op->base.remap.next,
>   					      old->pat_index, flags);
> @@ -2731,7 +2740,8 @@ static int vm_bind_ioctl_ops_execute(struct xe_vm *vm,
>   	(DRM_XE_VM_BIND_FLAG_READONLY | \
>   	 DRM_XE_VM_BIND_FLAG_IMMEDIATE | \
>   	 DRM_XE_VM_BIND_FLAG_NULL | \
> -	 DRM_XE_VM_BIND_FLAG_DUMPABLE)
> +	 DRM_XE_VM_BIND_FLAG_DUMPABLE | \
> +	 DRM_XE_VM_BIND_FLAG_DEVICE_ATOMICS)
>   #define XE_64K_PAGE_MASK 0xffffull
>   #define ALL_DRM_XE_SYNCS_FLAGS (DRM_XE_SYNCS_FLAG_WAIT_FOR_OP)
>   
> @@ -2874,7 +2884,7 @@ static int vm_bind_ioctl_signal_fences(struct xe_vm *vm,
>   
>   static int xe_vm_bind_ioctl_validate_bo(struct xe_device *xe, struct xe_bo *bo,
>   					u64 addr, u64 range, u64 obj_offset,
> -					u16 pat_index)
> +					u16 pat_index, u32 flags)
>   {
>   	u16 coh_mode;
>   
> @@ -2909,6 +2919,15 @@ static int xe_vm_bind_ioctl_validate_bo(struct xe_device *xe, struct xe_bo *bo,
>   		return  -EINVAL;
>   	}
>   
> +	if (XE_IOCTL_DBG(xe, (flags & DRM_XE_VM_BIND_FLAG_DEVICE_ATOMICS) &&
> +			 (!xe->info.has_device_atomics_on_smem &&
> +			  !xe_bo_is_vram(bo))))
> +		return -EINVAL;


Is the check correct?

I'm not sure what you're trying to forbid here.


I would have guessed :


  (flags & DRM_XE_VM_BIND_FLAG_DEVICE_ATOMICS) &&
  (!xe->info.has_device_atomics_on_smem || !xe_bo_is_vram(bo))


> +
> +	if (XE_IOCTL_DBG(xe, (flags & DRM_XE_VM_BIND_FLAG_DEVICE_ATOMICS) &&
> +			 !xe_bo_has_single_placement(bo)))
> +		return -EINVAL;
> +
>   	return 0;
>   }
>   
> @@ -3007,7 +3026,8 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
>   		bos[i] = gem_to_xe_bo(gem_obj);
>   
>   		err = xe_vm_bind_ioctl_validate_bo(xe, bos[i], addr, range,
> -						   obj_offset, pat_index);
> +						   obj_offset, pat_index,
> +						   bind_ops[i].flags);
>   		if (err)
>   			goto put_obj;
>   	}
> diff --git a/drivers/gpu/drm/xe/xe_vm_types.h b/drivers/gpu/drm/xe/xe_vm_types.h
> index badf3945083d..5a20bd80c456 100644
> --- a/drivers/gpu/drm/xe/xe_vm_types.h
> +++ b/drivers/gpu/drm/xe/xe_vm_types.h
> @@ -276,6 +276,8 @@ struct xe_vm {
>   struct xe_vma_op_map {
>   	/** @vma: VMA to map */
>   	struct xe_vma *vma;
> +	/** @enable_device_atomics: Whether the VMA will allow device atomics */
> +	bool enable_device_atomics;
>   	/** @is_null: is NULL binding */
>   	bool is_null;
>   	/** @dumpable: whether BO is dumped on GPU hang */
> diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h
> index 1446c3bae515..ca4447e10ac9 100644
> --- a/include/uapi/drm/xe_drm.h
> +++ b/include/uapi/drm/xe_drm.h
> @@ -883,6 +883,14 @@ struct drm_xe_vm_destroy {
>    *    will only be valid for DRM_XE_VM_BIND_OP_MAP operations, the BO
>    *    handle MBZ, and the BO offset MBZ. This flag is intended to
>    *    implement VK sparse bindings.
> + *  - %DRM_XE_VM_BIND_FLAG_DEVICE_ATOMICS - When this flag is set for
> + *    a VA range, all the corresponding PTEs will have atomic access bit
> + *    set. This will allow device atomics operation for that VA range.
> + *    This flag only works for single placement buffer objects and
> + *    mainly for SMEM only buffer objects where CPU atomics can be perform
> + *    by an application and so KMD can not set device atomics on such buffers
> + *    by default. This flag has no effect on LMEM only placed buffers as atomic
> + *    access bit is always set for LMEM backed PTEs.


Maybe we should be more explicit about when this flag is allowed :

   - error ifDRM_XE_QUERY_CONFIG_FLAG_HAS_DEV_ATOMIC_ON_SMEM is not reported

- error on multi region BOs

- ignored for LMEM only BOs

-Lionel


>    */
>   struct drm_xe_vm_bind_op {
>   	/** @extensions: Pointer to the first extension struct, if any */
> @@ -969,10 +977,11 @@ struct drm_xe_vm_bind_op {
>   	/** @op: Bind operation to perform */
>   	__u32 op;
>   
> -#define DRM_XE_VM_BIND_FLAG_READONLY	(1 << 0)
> -#define DRM_XE_VM_BIND_FLAG_IMMEDIATE	(1 << 1)
> -#define DRM_XE_VM_BIND_FLAG_NULL	(1 << 2)
> -#define DRM_XE_VM_BIND_FLAG_DUMPABLE	(1 << 3)
> +#define DRM_XE_VM_BIND_FLAG_READONLY		(1 << 0)
> +#define DRM_XE_VM_BIND_FLAG_IMMEDIATE		(1 << 1)
> +#define DRM_XE_VM_BIND_FLAG_NULL		(1 << 2)
> +#define DRM_XE_VM_BIND_FLAG_DUMPABLE		(1 << 3)
> +#define DRM_XE_VM_BIND_FLAG_DEVICE_ATOMICS	(1 << 4)
>   	/** @flags: Bind flags */
>   	__u32 flags;
>   

-------------- next part --------------
An HTML attachment was scrubbed...
URL: <https://lists.freedesktop.org/archives/intel-xe/attachments/20240419/a3671127/attachment-0001.htm>


More information about the Intel-xe mailing list