[PATCH 3/3] drm/xe/uapi: Introduce VMA bind flag for device atomics

Nirmoy Das nirmoy.das at intel.com
Thu Apr 11 13:42:01 UTC 2024


Hi Lionel,

On 4/11/2024 3:14 PM, Lionel Landwerlin wrote:
> On 10/04/2024 20:03, Nirmoy Das wrote:
>> Adds a new VMA bind flag to enable device atomics on SMEM only buffers.
>>
>> Given that simultaneous usage of device atomics and CPU atomics on
>> the same SMEM buffer is not guaranteed to function without migration,
>> and UMD expects no migration for SMEM-only buffer objects, so this 
>> provide
>> a way to set device atomics when UMD is certain to use the buffer only
>> for device atomics.
>>
>> Signed-off-by: Nirmoy Das <nirmoy.das at intel.com>
>> ---
>>   drivers/gpu/drm/xe/xe_vm.c       | 27 +++++++++++++++++++++++++--
>>   drivers/gpu/drm/xe/xe_vm_types.h |  2 ++
>>   include/uapi/drm/xe_drm.h        |  9 +++++----
>>   3 files changed, 32 insertions(+), 6 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
>> index 8f3474c5f480..530b4bbc186c 100644
>> --- a/drivers/gpu/drm/xe/xe_vm.c
>> +++ b/drivers/gpu/drm/xe/xe_vm.c
>> @@ -851,6 +851,7 @@ static void xe_vma_free(struct xe_vma *vma)
>>   #define VMA_CREATE_FLAG_READ_ONLY    BIT(0)
>>   #define VMA_CREATE_FLAG_IS_NULL        BIT(1)
>>   #define VMA_CREATE_FLAG_DUMPABLE    BIT(2)
>> +#define VMA_CREATE_FLAG_DEVICE_ATOMICS    BIT(3)
>>     static struct xe_vma *xe_vma_create(struct xe_vm *vm,
>>                       struct xe_bo *bo,
>> @@ -864,6 +865,7 @@ static struct xe_vma *xe_vma_create(struct xe_vm 
>> *vm,
>>       bool read_only = (flags & VMA_CREATE_FLAG_READ_ONLY);
>>       bool is_null = (flags & VMA_CREATE_FLAG_IS_NULL);
>>       bool dumpable = (flags & VMA_CREATE_FLAG_DUMPABLE);
>> +    bool enable_atomics = (flags & VMA_CREATE_FLAG_IS_NULL);
>>         xe_assert(vm->xe, start < end);
>>       xe_assert(vm->xe, end < vm->size);
>> @@ -912,7 +914,7 @@ static struct xe_vma *xe_vma_create(struct xe_vm 
>> *vm,
>>           xe_bo_assert_held(bo);
>>             if (GRAPHICS_VER(vm->xe) >= 20 || xe_bo_is_vram(bo) ||
>> -            !IS_DGFX(vm->xe))
>> +            !IS_DGFX(vm->xe) || enable_atomics)
>>               vma->gpuva.flags |= XE_VMA_ATOMIC_PTE_BIT;
>>             vm_bo = drm_gpuvm_bo_obtain(vma->gpuva.vm, &bo->ttm.base);
>> @@ -2174,6 +2176,18 @@ vm_bind_ioctl_ops_create(struct xe_vm *vm, 
>> struct xe_bo *bo,
>>              operation, (ULL)addr, (ULL)range,
>>              (ULL)bo_offset_or_userptr);
>>   +    if (bo && (flags & DRM_XE_VM_BIND_FLAG_DEVICE_ATOMICS) &&
>> +        (vm->xe->info.platform == XE_PVC && !xe_bo_is_vram(bo))) {
>> +        drm_warn(&vm->xe->drm, "Setting device atomics on SMEM is 
>> not supported for this platform");
>> +        return ERR_PTR(-EINVAL);
>> +    }
>> +
>> +    if (bo && (flags & DRM_XE_VM_BIND_FLAG_DEVICE_ATOMICS) &&
>> +        !xe_bo_has_single_placement(bo))
>> +        drm_warn(&vm->xe->drm, "DRM_XE_VM_BIND_FLAG_DEVICE_ATOMICS 
>> can be only set if the BO has single placement");
>> +        return ERR_PTR(-EINVAL);
>> +    }
>> +
>>       switch (operation) {
>>       case DRM_XE_VM_BIND_OP_MAP:
>>       case DRM_XE_VM_BIND_OP_MAP_USERPTR:
>> @@ -2216,6 +2230,7 @@ vm_bind_ioctl_ops_create(struct xe_vm *vm, 
>> struct xe_bo *bo,
>>           if (__op->op == DRM_GPUVA_OP_MAP) {
>>               op->map.is_null = flags & DRM_XE_VM_BIND_FLAG_NULL;
>>               op->map.dumpable = flags & DRM_XE_VM_BIND_FLAG_DUMPABLE;
>> +            op->map.enable_device_atomics = flags & 
>> DRM_XE_VM_BIND_FLAG_DEVICE_ATOMICS;
>>               op->map.pat_index = pat_index;
>>           } else if (__op->op == DRM_GPUVA_OP_PREFETCH) {
>>               op->prefetch.region = prefetch_region;
>> @@ -2412,6 +2427,8 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm 
>> *vm, struct xe_exec_queue *q,
>>                   VMA_CREATE_FLAG_IS_NULL : 0;
>>               flags |= op->map.dumpable ?
>>                   VMA_CREATE_FLAG_DUMPABLE : 0;
>> +            flags |= op->map.enable_device_atomics ?
>> +                VMA_CREATE_FLAG_DEVICE_ATOMICS : 0;
>>                 vma = new_vma(vm, &op->base.map, op->map.pat_index,
>>                         flags);
>> @@ -2439,6 +2456,8 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm 
>> *vm, struct xe_exec_queue *q,
>>                   flags |= op->base.remap.unmap->va->flags &
>>                       XE_VMA_DUMPABLE ?
>>                       VMA_CREATE_FLAG_DUMPABLE : 0;
>> +                flags |= op->base.remap.unmap->va->flags ?
>> +                    VMA_CREATE_FLAG_DEVICE_ATOMICS : 0;
>>                     vma = new_vma(vm, op->base.remap.prev,
>>                             old->pat_index, flags);
>> @@ -2476,6 +2495,9 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm 
>> *vm, struct xe_exec_queue *q,
>>                   flags |= op->base.remap.unmap->va->flags &
>>                       XE_VMA_DUMPABLE ?
>>                       VMA_CREATE_FLAG_DUMPABLE : 0;
>> +                flags |= op->base.remap.unmap->va->flags ?
>> +                    VMA_CREATE_FLAG_DEVICE_ATOMICS : 0;
>> +
>>                     vma = new_vma(vm, op->base.remap.next,
>>                             old->pat_index, flags);
>> @@ -2831,7 +2853,8 @@ static int vm_bind_ioctl_ops_execute(struct 
>> xe_vm *vm,
>>       (DRM_XE_VM_BIND_FLAG_READONLY | \
>>        DRM_XE_VM_BIND_FLAG_IMMEDIATE | \
>>        DRM_XE_VM_BIND_FLAG_NULL | \
>> -     DRM_XE_VM_BIND_FLAG_DUMPABLE)
>> +     DRM_XE_VM_BIND_FLAG_DUMPABLE | \
>> +     DRM_XE_VM_BIND_FLAG_DEVICE_ATOMICS)
>>   #define XE_64K_PAGE_MASK 0xffffull
>>   #define ALL_DRM_XE_SYNCS_FLAGS (DRM_XE_SYNCS_FLAG_WAIT_FOR_OP)
>>   diff --git a/drivers/gpu/drm/xe/xe_vm_types.h 
>> b/drivers/gpu/drm/xe/xe_vm_types.h
>> index badf3945083d..7b9c68909c78 100644
>> --- a/drivers/gpu/drm/xe/xe_vm_types.h
>> +++ b/drivers/gpu/drm/xe/xe_vm_types.h
>> @@ -282,6 +282,8 @@ struct xe_vma_op_map {
>>       bool dumpable;
>>       /** @pat_index: The pat index to use for this operation. */
>>       u16 pat_index;
>> +    /** @enable_device_atomics: Whether the VMA will allow device 
>> atomics */
>> +    bool enable_device_atomics;
>>   };
>>     /** struct xe_vma_op_remap - VMA remap operation */
>> diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h
>> index 1446c3bae515..bffe8b1c040c 100644
>> --- a/include/uapi/drm/xe_drm.h
>> +++ b/include/uapi/drm/xe_drm.h
>> @@ -969,10 +969,11 @@ struct drm_xe_vm_bind_op {
>>       /** @op: Bind operation to perform */
>>       __u32 op;
>>   -#define DRM_XE_VM_BIND_FLAG_READONLY    (1 << 0)
>> -#define DRM_XE_VM_BIND_FLAG_IMMEDIATE    (1 << 1)
>> -#define DRM_XE_VM_BIND_FLAG_NULL    (1 << 2)
>> -#define DRM_XE_VM_BIND_FLAG_DUMPABLE    (1 << 3)
>> +#define DRM_XE_VM_BIND_FLAG_READONLY        (1 << 0)
>> +#define DRM_XE_VM_BIND_FLAG_IMMEDIATE        (1 << 1)
>> +#define DRM_XE_VM_BIND_FLAG_NULL        (1 << 2)
>> +#define DRM_XE_VM_BIND_FLAG_DUMPABLE        (1 << 3)
>
>
> We should probably document that it's an error to add this flag if the 
> BO has a single memory region.

It is an error if this flag is set on non single memory region as 
atomics on SMEM+LMEM  buffer will be handled with migration.

Let me know if I got that wrong.

>
> Are we supposed to the ability to set that flag or is there going to 
> be a query?
>
> A query might make sense since it's going to be rejected for some 
> platform.

Can we use the rejection as a query ? I am using it in a IGT test 
https://patchwork.freedesktop.org/patch/588759/?series=132289&rev=1


Regards,

Nirmoy

>
>
> -Lionel
>
>
>> +#define DRM_XE_VM_BIND_FLAG_DEVICE_ATOMICS    (1 << 4)
>>       /** @flags: Bind flags */
>>       __u32 flags;
>
>


More information about the Intel-xe mailing list