[PATCH v3 6/7] drm/xe/uapi: Introduce VMA bind flag for device atomics
Nirmoy Das
nirmoy.das at linux.intel.com
Mon Apr 22 08:39:04 UTC 2024
Hi Lionel,
On 4/19/2024 9:16 AM, Lionel Landwerlin wrote:
> On 15/04/2024 17:52, Nirmoy Das wrote:
>> Adds a new VMA bind flag to enable device atomics on SMEM only buffers.
>>
>> Given that simultaneous usage of device atomics and CPU atomics on
>> the same SMEM buffer is not guaranteed to function without migration,
>> and UMD expects no migration for SMEM-only buffer objects, so this provide
>> a way to set device atomics when UMD is certain to use the buffer only
>> for device atomics.
>>
>> Signed-off-by: Nirmoy Das<nirmoy.das at intel.com>
>> ---
>> drivers/gpu/drm/xe/xe_vm.c | 28 ++++++++++++++++++++++++----
>> drivers/gpu/drm/xe/xe_vm_types.h | 2 ++
>> include/uapi/drm/xe_drm.h | 17 +++++++++++++----
>> 3 files changed, 39 insertions(+), 8 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
>> index 8380f1d23074..b0907a7bb88b 100644
>> --- a/drivers/gpu/drm/xe/xe_vm.c
>> +++ b/drivers/gpu/drm/xe/xe_vm.c
>> @@ -753,6 +753,7 @@ static void xe_vma_free(struct xe_vma *vma)
>> #define VMA_CREATE_FLAG_READ_ONLY BIT(0)
>> #define VMA_CREATE_FLAG_IS_NULL BIT(1)
>> #define VMA_CREATE_FLAG_DUMPABLE BIT(2)
>> +#define VMA_CREATE_FLAG_DEVICE_ATOMICS BIT(3)
>>
>> static struct xe_vma *xe_vma_create(struct xe_vm *vm,
>> struct xe_bo *bo,
>> @@ -766,6 +767,7 @@ static struct xe_vma *xe_vma_create(struct xe_vm *vm,
>> bool read_only = (flags & VMA_CREATE_FLAG_READ_ONLY);
>> bool is_null = (flags & VMA_CREATE_FLAG_IS_NULL);
>> bool dumpable = (flags & VMA_CREATE_FLAG_DUMPABLE);
>> + bool enable_atomics = (flags & VMA_CREATE_FLAG_DEVICE_ATOMICS);
>>
>> xe_assert(vm->xe, start < end);
>> xe_assert(vm->xe, end < vm->size);
>> @@ -814,7 +816,7 @@ static struct xe_vma *xe_vma_create(struct xe_vm *vm,
>> xe_bo_assert_held(bo);
>>
>> if (vm->xe->info.has_atomic_enable_pte_bit &&
>> - (xe_bo_is_vram(bo) || !IS_DGFX(vm->xe)))
>> + (xe_bo_is_vram(bo) || !IS_DGFX(vm->xe) || enable_atomics))
>> vma->gpuva.flags |= XE_VMA_ATOMIC_PTE_BIT;
>>
>> vm_bo = drm_gpuvm_bo_obtain(vma->gpuva.vm, &bo->ttm.base);
>> @@ -2116,6 +2118,7 @@ vm_bind_ioctl_ops_create(struct xe_vm *vm, struct xe_bo *bo,
>> if (__op->op == DRM_GPUVA_OP_MAP) {
>> op->map.is_null = flags & DRM_XE_VM_BIND_FLAG_NULL;
>> op->map.dumpable = flags & DRM_XE_VM_BIND_FLAG_DUMPABLE;
>> + op->map.enable_device_atomics = flags & DRM_XE_VM_BIND_FLAG_DEVICE_ATOMICS;
>> op->map.pat_index = pat_index;
>> } else if (__op->op == DRM_GPUVA_OP_PREFETCH) {
>> op->prefetch.region = prefetch_region;
>> @@ -2312,6 +2315,8 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct xe_exec_queue *q,
>> VMA_CREATE_FLAG_IS_NULL : 0;
>> flags |= op->map.dumpable ?
>> VMA_CREATE_FLAG_DUMPABLE : 0;
>> + flags |= op->map.enable_device_atomics ?
>> + VMA_CREATE_FLAG_DEVICE_ATOMICS : 0;
>>
>> vma = new_vma(vm, &op->base.map, op->map.pat_index,
>> flags);
>> @@ -2339,6 +2344,8 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct xe_exec_queue *q,
>> flags |= op->base.remap.unmap->va->flags &
>> XE_VMA_DUMPABLE ?
>> VMA_CREATE_FLAG_DUMPABLE : 0;
>> + flags |= op->base.remap.unmap->va->flags ?
>> + VMA_CREATE_FLAG_DEVICE_ATOMICS : 0;
>>
>> vma = new_vma(vm, op->base.remap.prev,
>> old->pat_index, flags);
>> @@ -2376,6 +2383,8 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct xe_exec_queue *q,
>> flags |= op->base.remap.unmap->va->flags &
>> XE_VMA_DUMPABLE ?
>> VMA_CREATE_FLAG_DUMPABLE : 0;
>> + flags |= op->base.remap.unmap->va->flags ?
>> + VMA_CREATE_FLAG_DEVICE_ATOMICS : 0;
>>
>> vma = new_vma(vm, op->base.remap.next,
>> old->pat_index, flags);
>> @@ -2731,7 +2740,8 @@ static int vm_bind_ioctl_ops_execute(struct xe_vm *vm,
>> (DRM_XE_VM_BIND_FLAG_READONLY | \
>> DRM_XE_VM_BIND_FLAG_IMMEDIATE | \
>> DRM_XE_VM_BIND_FLAG_NULL | \
>> - DRM_XE_VM_BIND_FLAG_DUMPABLE)
>> + DRM_XE_VM_BIND_FLAG_DUMPABLE | \
>> + DRM_XE_VM_BIND_FLAG_DEVICE_ATOMICS)
>> #define XE_64K_PAGE_MASK 0xffffull
>> #define ALL_DRM_XE_SYNCS_FLAGS (DRM_XE_SYNCS_FLAG_WAIT_FOR_OP)
>>
>> @@ -2874,7 +2884,7 @@ static int vm_bind_ioctl_signal_fences(struct xe_vm *vm,
>>
>> static int xe_vm_bind_ioctl_validate_bo(struct xe_device *xe, struct xe_bo *bo,
>> u64 addr, u64 range, u64 obj_offset,
>> - u16 pat_index)
>> + u16 pat_index, u32 flags)
>> {
>> u16 coh_mode;
>>
>> @@ -2909,6 +2919,15 @@ static int xe_vm_bind_ioctl_validate_bo(struct xe_device *xe, struct xe_bo *bo,
>> return -EINVAL;
>> }
>>
>> + if (XE_IOCTL_DBG(xe, (flags & DRM_XE_VM_BIND_FLAG_DEVICE_ATOMICS) &&
>> + (!xe->info.has_device_atomics_on_smem &&
>> + !xe_bo_is_vram(bo))))
>> + return -EINVAL;
>
>
> Is the check correct?
>
> I'm not sure what you're trying to forbid here.
>
I should have added a comment. I will add more details in my next revision.
I wanted to forbid setting this flag on platform that doesn't support
device atomics on SMEM only BO like PVC.
>
> I would have guessed :
>
>
> (flags & DRM_XE_VM_BIND_FLAG_DEVICE_ATOMICS) &&
> (!xe->info.has_device_atomics_on_smem || !xe_bo_is_vram(bo))
>
>
>> +
>> + if (XE_IOCTL_DBG(xe, (flags & DRM_XE_VM_BIND_FLAG_DEVICE_ATOMICS) &&
>> + !xe_bo_has_single_placement(bo)))
>> + return -EINVAL;
>> +
>> return 0;
>> }
>>
>> @@ -3007,7 +3026,8 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
>> bos[i] = gem_to_xe_bo(gem_obj);
>>
>> err = xe_vm_bind_ioctl_validate_bo(xe, bos[i], addr, range,
>> - obj_offset, pat_index);
>> + obj_offset, pat_index,
>> + bind_ops[i].flags);
>> if (err)
>> goto put_obj;
>> }
>> diff --git a/drivers/gpu/drm/xe/xe_vm_types.h b/drivers/gpu/drm/xe/xe_vm_types.h
>> index badf3945083d..5a20bd80c456 100644
>> --- a/drivers/gpu/drm/xe/xe_vm_types.h
>> +++ b/drivers/gpu/drm/xe/xe_vm_types.h
>> @@ -276,6 +276,8 @@ struct xe_vm {
>> struct xe_vma_op_map {
>> /** @vma: VMA to map */
>> struct xe_vma *vma;
>> + /** @enable_device_atomics: Whether the VMA will allow device atomics */
>> + bool enable_device_atomics;
>> /** @is_null: is NULL binding */
>> bool is_null;
>> /** @dumpable: whether BO is dumped on GPU hang */
>> diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h
>> index 1446c3bae515..ca4447e10ac9 100644
>> --- a/include/uapi/drm/xe_drm.h
>> +++ b/include/uapi/drm/xe_drm.h
>> @@ -883,6 +883,14 @@ struct drm_xe_vm_destroy {
>> * will only be valid for DRM_XE_VM_BIND_OP_MAP operations, the BO
>> * handle MBZ, and the BO offset MBZ. This flag is intended to
>> * implement VK sparse bindings.
>> + * - %DRM_XE_VM_BIND_FLAG_DEVICE_ATOMICS - When this flag is set for
>> + * a VA range, all the corresponding PTEs will have atomic access bit
>> + * set. This will allow device atomics operation for that VA range.
>> + * This flag only works for single placement buffer objects and
>> + * mainly for SMEM only buffer objects where CPU atomics can be perform
>> + * by an application and so KMD can not set device atomics on such buffers
>> + * by default. This flag has no effect on LMEM only placed buffers as atomic
>> + * access bit is always set for LMEM backed PTEs.
>
>
> Maybe we should be more explicit about when this flag is allowed :
>
> - error ifDRM_XE_QUERY_CONFIG_FLAG_HAS_DEV_ATOMIC_ON_SMEM is not
> reported
>
> - error on multi region BOs
>
> - ignored for LMEM only BOs
>
Agreed, the comment probably is not very clear. I will add more explicit
requirements for this flag by expanding this comment.
Regards,
Nirmoy
> -Lionel
>
>
>> */
>> struct drm_xe_vm_bind_op {
>> /** @extensions: Pointer to the first extension struct, if any */
>> @@ -969,10 +977,11 @@ struct drm_xe_vm_bind_op {
>> /** @op: Bind operation to perform */
>> __u32 op;
>>
>> -#define DRM_XE_VM_BIND_FLAG_READONLY (1 << 0)
>> -#define DRM_XE_VM_BIND_FLAG_IMMEDIATE (1 << 1)
>> -#define DRM_XE_VM_BIND_FLAG_NULL (1 << 2)
>> -#define DRM_XE_VM_BIND_FLAG_DUMPABLE (1 << 3)
>> +#define DRM_XE_VM_BIND_FLAG_READONLY (1 << 0)
>> +#define DRM_XE_VM_BIND_FLAG_IMMEDIATE (1 << 1)
>> +#define DRM_XE_VM_BIND_FLAG_NULL (1 << 2)
>> +#define DRM_XE_VM_BIND_FLAG_DUMPABLE (1 << 3)
>> +#define DRM_XE_VM_BIND_FLAG_DEVICE_ATOMICS (1 << 4)
>> /** @flags: Bind flags */
>> __u32 flags;
>>
>
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <https://lists.freedesktop.org/archives/intel-xe/attachments/20240422/7b136c7b/attachment.htm>
More information about the Intel-xe
mailing list