[PATCH v2] drm/xe/uapi: Uniform async vs sync handling
Thomas Hellström
thomas.hellstrom at linux.intel.com
Wed Dec 13 18:09:29 UTC 2023
On 12/13/23 19:05, Matthew Brost wrote:
> On Wed, Dec 13, 2023 at 11:24:12AM +0100, Thomas Hellström wrote:
>> On Tue, 2023-12-12 at 20:35 -0800, Matthew Brost wrote:
>>> Remove concept of async vs sync VM bind queues, rather make async vs
>>> sync a per IOCTL choice. Since this is per IOCTL, it makes sense to
>>> have
>>> a singular flag IOCTL rather than per VM bind op flag too. Add
>>> DRM_XE_SYNCS_FLAG_WAIT_FOR_OP which is an input sync flag to support
>>> this. Support this new flag for both the VM bind IOCTL only with a
>>> path
>>> to support this for execs too.
>>>
>>> v2: Add cookie, move sync wait outside of any locks.
>>>
>>> Cc: Rodrigo Vivi <rodrigo.vivi at intel.com>
>>> Cc: Thomas Hellström <thomas.hellstrom at linux.intel.com>
>>> Cc: Francois Dugast <francois.dugast at intel.com>
>>> Signed-off-by: Matthew Brost <matthew.brost at intel.com>
>> Some minor things below,
>> But I don't see where we read the cooke and bypass the submission to
>> just wait for the last fence where *cookie != 0?
> See other email to Jose and you.
>
> I missed that part and can fix in next rev if we agree to the uAPI.
>
> Matt
You have an ack from me for that, although I do think it's awkward. (The
most awkward thing being the two different paths).
BTW isn't it only two IOCTL calls if using async + user-fence? One of
them only needed if user-fence hasn't already signaled on return?
/Thomas
>> Like:
>> vm_bind()
>> commit();
>> wait_for_last_fence(); - Returns -ERESTARTSYS;
>> set_cookie(1);
>> return or restart;
>>
>> vm_bind();
>> if (cookie() == 1)
>> wait_for_last_fence();
>> return;
>> } else {
>> ...
>>
>>
>>
>>
>> /Thomas
>>
>>
>>> ---
>>> drivers/gpu/drm/xe/xe_exec.c | 12 ++-
>>> drivers/gpu/drm/xe/xe_exec_queue.c | 7 +-
>>> drivers/gpu/drm/xe/xe_exec_queue_types.h | 2 -
>>> drivers/gpu/drm/xe/xe_vm.c | 116 +++++++++++----------
>>> --
>>> drivers/gpu/drm/xe/xe_vm_types.h | 13 ++-
>>> include/uapi/drm/xe_drm.h | 64 ++++++++-----
>>> 6 files changed, 109 insertions(+), 105 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/xe/xe_exec.c
>>> b/drivers/gpu/drm/xe/xe_exec.c
>>> index ba92e5619da3..8a1530dab65f 100644
>>> --- a/drivers/gpu/drm/xe/xe_exec.c
>>> +++ b/drivers/gpu/drm/xe/xe_exec.c
>>> @@ -104,7 +104,7 @@ int xe_exec_ioctl(struct drm_device *dev, void
>>> *data, struct drm_file *file)
>>> struct xe_device *xe = to_xe_device(dev);
>>> struct xe_file *xef = to_xe_file(file);
>>> struct drm_xe_exec *args = data;
>>> - struct drm_xe_sync __user *syncs_user = u64_to_user_ptr(args-
>>>> syncs);
>>> + struct drm_xe_sync __user *syncs_user = u64_to_user_ptr(args-
>>>> syncs.syncs);
>>> u64 __user *addresses_user = u64_to_user_ptr(args->address);
>>> struct xe_exec_queue *q;
>>> struct xe_sync_entry *syncs = NULL;
>>> @@ -120,7 +120,9 @@ int xe_exec_ioctl(struct drm_device *dev, void
>>> *data, struct drm_file *file)
>>> int err = 0;
>>>
>>> if (XE_IOCTL_DBG(xe, args->extensions) ||
>>> - XE_IOCTL_DBG(xe, args->pad[0] || args->pad[1] || args-
>>>> pad[2]) ||
>>> + XE_IOCTL_DBG(xe, args->pad || args->pad2[0] || args-
>>>> pad2[1] || args->pad2[2]) ||
>>> + XE_IOCTL_DBG(xe, args->syncs.flags) ||
>>> + XE_IOCTL_DBG(xe, args->syncs.cookie) ||
>>> XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1]))
>>> return -EINVAL;
>>>
>>> @@ -140,8 +142,8 @@ int xe_exec_ioctl(struct drm_device *dev, void
>>> *data, struct drm_file *file)
>>> goto err_exec_queue;
>>> }
>>>
>>> - if (args->num_syncs) {
>>> - syncs = kcalloc(args->num_syncs, sizeof(*syncs),
>>> GFP_KERNEL);
>>> + if (args->syncs.num_syncs) {
>>> + syncs = kcalloc(args->syncs.num_syncs,
>>> sizeof(*syncs), GFP_KERNEL);
>>> if (!syncs) {
>>> err = -ENOMEM;
>>> goto err_exec_queue;
>>> @@ -150,7 +152,7 @@ int xe_exec_ioctl(struct drm_device *dev, void
>>> *data, struct drm_file *file)
>>>
>>> vm = q->vm;
>>>
>>> - for (i = 0; i < args->num_syncs; i++) {
>>> + for (i = 0; i < args->syncs.num_syncs; i++) {
>>> err = xe_sync_entry_parse(xe, xef,
>>> &syncs[num_syncs++],
>>> &syncs_user[i],
>>> SYNC_PARSE_FLAG_EXEC |
>>> (xe_vm_in_lr_mode(vm) ?
>>> diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c
>>> b/drivers/gpu/drm/xe/xe_exec_queue.c
>>> index eeb9605dd45f..a25d67971fdd 100644
>>> --- a/drivers/gpu/drm/xe/xe_exec_queue.c
>>> +++ b/drivers/gpu/drm/xe/xe_exec_queue.c
>>> @@ -625,10 +625,7 @@ int xe_exec_queue_create_ioctl(struct drm_device
>>> *dev, void *data,
>>> if (XE_IOCTL_DBG(xe, eci[0].gt_id >= xe->info.gt_count))
>>> return -EINVAL;
>>>
>>> - if (eci[0].engine_class >= DRM_XE_ENGINE_CLASS_VM_BIND_ASYNC)
>>> {
>>> - bool sync = eci[0].engine_class ==
>>> - DRM_XE_ENGINE_CLASS_VM_BIND_SYNC;
>>> -
>>> + if (eci[0].engine_class == DRM_XE_ENGINE_CLASS_VM_BIND) {
>>> for_each_gt(gt, xe, id) {
>>> struct xe_exec_queue *new;
>>>
>>> @@ -654,8 +651,6 @@ int xe_exec_queue_create_ioctl(struct drm_device
>>> *dev, void *data,
>>> args->width, hwe,
>>>
>>> EXEC_QUEUE_FLAG_PERSISTENT |
>>> EXEC_QUEUE_FLAG_VM
>>> |
>>> - (sync ? 0 :
>>> -
>>> EXEC_QUEUE_FLAG_VM_ASYNC) |
>>> (id ?
>>>
>>> EXEC_QUEUE_FLAG_BIND_ENGINE_CHILD :
>>> 0));
>>> diff --git a/drivers/gpu/drm/xe/xe_exec_queue_types.h
>>> b/drivers/gpu/drm/xe/xe_exec_queue_types.h
>>> index c7aefa1c8c31..0f7f6cded4a3 100644
>>> --- a/drivers/gpu/drm/xe/xe_exec_queue_types.h
>>> +++ b/drivers/gpu/drm/xe/xe_exec_queue_types.h
>>> @@ -84,8 +84,6 @@ struct xe_exec_queue {
>>> #define EXEC_QUEUE_FLAG_VM BIT(4)
>>> /* child of VM queue for multi-tile VM jobs */
>>> #define EXEC_QUEUE_FLAG_BIND_ENGINE_CHILD BIT(5)
>>> -/* VM jobs for this queue are asynchronous */
>>> -#define EXEC_QUEUE_FLAG_VM_ASYNC BIT(6)
>>>
>>> /**
>>> * @flags: flags for this exec queue, should statically setup
>>> aside from ban
>>> diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
>>> index 2f3df9ee67c9..ab38685d2daf 100644
>>> --- a/drivers/gpu/drm/xe/xe_vm.c
>>> +++ b/drivers/gpu/drm/xe/xe_vm.c
>>> @@ -1343,9 +1343,7 @@ struct xe_vm *xe_vm_create(struct xe_device
>>> *xe, u32 flags)
>>> struct xe_gt *gt = tile->primary_gt;
>>> struct xe_vm *migrate_vm;
>>> struct xe_exec_queue *q;
>>> - u32 create_flags = EXEC_QUEUE_FLAG_VM |
>>> - ((flags & XE_VM_FLAG_ASYNC_DEFAULT) ?
>>> - EXEC_QUEUE_FLAG_VM_ASYNC : 0);
>>> + u32 create_flags = EXEC_QUEUE_FLAG_VM;
>>>
>>> if (!vm->pt_root[id])
>>> continue;
>>> @@ -1712,12 +1710,6 @@ xe_vm_bind_vma(struct xe_vma *vma, struct
>>> xe_exec_queue *q,
>>> return ERR_PTR(err);
>>> }
>>>
>>> -static bool xe_vm_sync_mode(struct xe_vm *vm, struct xe_exec_queue
>>> *q)
>>> -{
>>> - return q ? !(q->flags & EXEC_QUEUE_FLAG_VM_ASYNC) :
>>> - !(vm->flags & XE_VM_FLAG_ASYNC_DEFAULT);
>>> -}
>>> -
>>> static int __xe_vm_bind(struct xe_vm *vm, struct xe_vma *vma,
>>> struct xe_exec_queue *q, struct xe_sync_entry
>>> *syncs,
>>> u32 num_syncs, bool immediate, bool first_op,
>>> @@ -1747,8 +1739,6 @@ static int __xe_vm_bind(struct xe_vm *vm,
>>> struct xe_vma *vma,
>>>
>>> if (last_op)
>>> xe_exec_queue_last_fence_set(wait_exec_queue, vm,
>>> fence);
>>> - if (last_op && xe_vm_sync_mode(vm, q))
>>> - dma_fence_wait(fence, true);
>>> dma_fence_put(fence);
>>>
>>> return 0;
>>> @@ -1791,8 +1781,6 @@ static int xe_vm_unbind(struct xe_vm *vm,
>>> struct xe_vma *vma,
>>> xe_vma_destroy(vma, fence);
>>> if (last_op)
>>> xe_exec_queue_last_fence_set(wait_exec_queue, vm,
>>> fence);
>>> - if (last_op && xe_vm_sync_mode(vm, q))
>>> - dma_fence_wait(fence, true);
>>> dma_fence_put(fence);
>>>
>>> return 0;
>>> @@ -1800,7 +1788,6 @@ static int xe_vm_unbind(struct xe_vm *vm,
>>> struct xe_vma *vma,
>>>
>>> #define ALL_DRM_XE_VM_CREATE_FLAGS
>>> (DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE | \
>>> DRM_XE_VM_CREATE_FLAG_LR_MODE | \
>>> -
>>> DRM_XE_VM_CREATE_FLAG_ASYNC_DEFAULT | \
>>> DRM_XE_VM_CREATE_FLAG_FAULT_MODE)
>>>
>>> int xe_vm_create_ioctl(struct drm_device *dev, void *data,
>>> @@ -1854,8 +1841,6 @@ int xe_vm_create_ioctl(struct drm_device *dev,
>>> void *data,
>>> flags |= XE_VM_FLAG_SCRATCH_PAGE;
>>> if (args->flags & DRM_XE_VM_CREATE_FLAG_LR_MODE)
>>> flags |= XE_VM_FLAG_LR_MODE;
>>> - if (args->flags & DRM_XE_VM_CREATE_FLAG_ASYNC_DEFAULT)
>>> - flags |= XE_VM_FLAG_ASYNC_DEFAULT;
>>> if (args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE)
>>> flags |= XE_VM_FLAG_FAULT_MODE;
>>>
>>> @@ -2263,8 +2248,7 @@ static int xe_vma_op_commit(struct xe_vm *vm,
>>> struct xe_vma_op *op)
>>> static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct
>>> xe_exec_queue *q,
>>> struct drm_gpuva_ops *ops,
>>> struct xe_sync_entry *syncs, u32
>>> num_syncs,
>>> - struct list_head *ops_list, bool
>>> last,
>>> - bool async)
>>> + struct list_head *ops_list, bool
>>> last)
>>> {
>>> struct xe_vma_op *last_op = NULL;
>>> struct drm_gpuva_op *__op;
>>> @@ -2696,16 +2680,16 @@ static int vm_bind_ioctl_ops_execute(struct
>>> xe_vm *vm,
>>>
>>> #ifdef TEST_VM_ASYNC_OPS_ERROR
>>> #define SUPPORTED_FLAGS \
>>> - (FORCE_ASYNC_OP_ERROR | DRM_XE_VM_BIND_FLAG_ASYNC | \
>>> - DRM_XE_VM_BIND_FLAG_READONLY | DRM_XE_VM_BIND_FLAG_IMMEDIATE
>>> | \
>>> - DRM_XE_VM_BIND_FLAG_NULL | 0xffff)
>>> + (FORCE_ASYNC_OP_ERROR | DRM_XE_VM_BIND_FLAG_READONLY | \
>>> + DRM_XE_VM_BIND_FLAG_IMMEDIATE | DRM_XE_VM_BIND_FLAG_NULL |
>>> 0xffff)
>>> #else
>>> #define SUPPORTED_FLAGS \
>>> - (DRM_XE_VM_BIND_FLAG_ASYNC | DRM_XE_VM_BIND_FLAG_READONLY | \
>>> + (DRM_XE_VM_BIND_FLAG_READONLY | \
>>> DRM_XE_VM_BIND_FLAG_IMMEDIATE | DRM_XE_VM_BIND_FLAG_NULL | \
>>> 0xffff)
>>> #endif
>>> #define XE_64K_PAGE_MASK 0xffffull
>>> +#define ALL_DRM_XE_SYNCS_FLAGS (DRM_XE_SYNCS_FLAG_WAIT_FOR_OP)
>>>
>>> #define MAX_BINDS 512 /* FIXME: Picking random upper limit
>>> */
>>>
>>> @@ -2717,7 +2701,7 @@ static int vm_bind_ioctl_check_args(struct
>>> xe_device *xe,
>>> int err;
>>> int i;
>>>
>>> - if (XE_IOCTL_DBG(xe, args->pad || args->pad2) ||
>>> + if (XE_IOCTL_DBG(xe, args->pad) ||
>>> XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1]))
>>> return -EINVAL;
>>>
>>> @@ -2745,6 +2729,14 @@ static int vm_bind_ioctl_check_args(struct
>>> xe_device *xe,
>>> *bind_ops = &args->bind;
>>> }
>>>
>>> + *async = !(args->syncs.flags &
>>> DRM_XE_SYNCS_FLAG_WAIT_FOR_OP);
>>> +
>>> + if (XE_IOCTL_DBG(xe, args->syncs.flags &
>>> ~ALL_DRM_XE_SYNCS_FLAGS) ||
>>> + XE_IOCTL_DBG(xe, !*async && args->syncs.num_syncs)) {
>>> + err = -EINVAL;
>>> + goto free_bind_ops;
>>> + }
>>> +
>>> for (i = 0; i < args->num_binds; ++i) {
>>> u64 range = (*bind_ops)[i].range;
>>> u64 addr = (*bind_ops)[i].addr;
>>> @@ -2775,18 +2767,6 @@ static int vm_bind_ioctl_check_args(struct
>>> xe_device *xe,
>>> goto free_bind_ops;
>>> }
>>>
>>> - if (i == 0) {
>>> - *async = !!(flags &
>>> DRM_XE_VM_BIND_FLAG_ASYNC);
>>> - if (XE_IOCTL_DBG(xe, !*async && args-
>>>> num_syncs)) {
>>> - err = -EINVAL;
>>> - goto free_bind_ops;
>>> - }
>>> - } else if (XE_IOCTL_DBG(xe, *async !=
>>> - !!(flags &
>>> DRM_XE_VM_BIND_FLAG_ASYNC))) {
>>> - err = -EINVAL;
>>> - goto free_bind_ops;
>>> - }
>>> -
>>> if (XE_IOCTL_DBG(xe, op > DRM_XE_VM_BIND_OP_PREFETCH)
>>> ||
>>> XE_IOCTL_DBG(xe, flags & ~SUPPORTED_FLAGS) ||
>>> XE_IOCTL_DBG(xe, obj && is_null) ||
>>> @@ -2854,12 +2834,25 @@ static int vm_bind_ioctl_signal_fences(struct
>>> xe_vm *vm,
>>>
>>> xe_exec_queue_last_fence_set(to_wait_exec_queue(vm, q), vm,
>>> fence);
>>> + dma_fence_put(fence);
>>>
>>> - if (xe_vm_sync_mode(vm, q)) {
>>> - long timeout = dma_fence_wait(fence, true);
>>> + return err;
>>> +}
>>>
>>> - if (timeout < 0)
>>> - err = -EINTR;
>>> +static int vm_bind_ioctl_sync_wait(struct xe_vm *vm,
>>> + struct dma_fence *fence,
>>> + u64 __user *cookie)
>>> +{
>>> + long timeout;
>> Perhaps use err? It can only ever be <= 0, so timeout is misleading.
>>
>>> + int err = 0;
>>> +
>>> + timeout = dma_fence_wait(fence, true);
>>> + if (timeout < 0) {
>>> + u64 value = 1;
>>> +
>>> + err = -ERESTARTSYS;
>> Just forward the return value from dma_fence_wait().
>>
>>> + if (copy_to_user(cookie, &value, sizeof(value)))
>> put_user()
>>
>>> + err = -EFAULT;
>>> }
>>>
>>> dma_fence_put(fence);
>>> @@ -2875,6 +2868,7 @@ int xe_vm_bind_ioctl(struct drm_device *dev,
>>> void *data, struct drm_file *file)
>>> struct drm_xe_sync __user *syncs_user;
>>> struct xe_bo **bos = NULL;
>>> struct drm_gpuva_ops **ops = NULL;
>>> + struct dma_fence *fence = NULL;
>>> struct xe_vm *vm;
>>> struct xe_exec_queue *q = NULL;
>>> u32 num_syncs;
>>> @@ -2889,7 +2883,7 @@ int xe_vm_bind_ioctl(struct drm_device *dev,
>>> void *data, struct drm_file *file)
>>> if (err)
>>> return err;
>>>
>>> - if (XE_IOCTL_DBG(xe, args->pad || args->pad2) ||
>>> + if (XE_IOCTL_DBG(xe, args->pad) ||
>>> XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1]))
>>> return -EINVAL;
>>>
>>> @@ -2904,12 +2898,6 @@ int xe_vm_bind_ioctl(struct drm_device *dev,
>>> void *data, struct drm_file *file)
>>> err = -EINVAL;
>>> goto put_exec_queue;
>>> }
>>> -
>>> - if (XE_IOCTL_DBG(xe, args->num_binds && async !=
>>> - !!(q->flags &
>>> EXEC_QUEUE_FLAG_VM_ASYNC))) {
>>> - err = -EINVAL;
>>> - goto put_exec_queue;
>>> - }
>>> }
>>>
>>> vm = xe_vm_lookup(xef, args->vm_id);
>>> @@ -2918,14 +2906,6 @@ int xe_vm_bind_ioctl(struct drm_device *dev,
>>> void *data, struct drm_file *file)
>>> goto put_exec_queue;
>>> }
>>>
>>> - if (!args->exec_queue_id) {
>>> - if (XE_IOCTL_DBG(xe, args->num_binds && async !=
>>> - !!(vm->flags &
>>> XE_VM_FLAG_ASYNC_DEFAULT))) {
>>> - err = -EINVAL;
>>> - goto put_vm;
>>> - }
>>> - }
>>> -
>>> err = down_write_killable(&vm->lock);
>>> if (err)
>>> goto put_vm;
>>> @@ -3015,16 +2995,16 @@ int xe_vm_bind_ioctl(struct drm_device *dev,
>>> void *data, struct drm_file *file)
>>> }
>>> }
>>>
>>> - if (args->num_syncs) {
>>> - syncs = kcalloc(args->num_syncs, sizeof(*syncs),
>>> GFP_KERNEL);
>>> + if (args->syncs.num_syncs) {
>>> + syncs = kcalloc(args->syncs.num_syncs,
>>> sizeof(*syncs), GFP_KERNEL);
>>> if (!syncs) {
>>> err = -ENOMEM;
>>> goto put_obj;
>>> }
>>> }
>>>
>>> - syncs_user = u64_to_user_ptr(args->syncs);
>>> - for (num_syncs = 0; num_syncs < args->num_syncs; num_syncs++)
>>> {
>>> + syncs_user = u64_to_user_ptr(args->syncs.syncs);
>>> + for (num_syncs = 0; num_syncs < args->syncs.num_syncs;
>>> num_syncs++) {
>>> err = xe_sync_entry_parse(xe, xef, &syncs[num_syncs],
>>> &syncs_user[num_syncs],
>>> (xe_vm_in_lr_mode(vm) ?
>>> @@ -3060,8 +3040,7 @@ int xe_vm_bind_ioctl(struct drm_device *dev,
>>> void *data, struct drm_file *file)
>>>
>>> err = vm_bind_ioctl_ops_parse(vm, q, ops[i], syncs,
>>> num_syncs,
>>> &ops_list,
>>> - i == args->num_binds -
>>> 1,
>>> - async);
>>> + i == args->num_binds -
>>> 1);
>>> if (err)
>>> goto unwind_ops;
>>> }
>>> @@ -3077,7 +3056,10 @@ int xe_vm_bind_ioctl(struct drm_device *dev,
>>> void *data, struct drm_file *file)
>>> xe_exec_queue_get(q);
>>>
>>> err = vm_bind_ioctl_ops_execute(vm, &ops_list);
>>> -
>>> + if (!err && !async) {
>>> + fence =
>>> xe_exec_queue_last_fence_get(to_wait_exec_queue(vm, q), vm);
>>> + dma_fence_get(fence);
>>> + }
>>> up_write(&vm->lock);
>>>
>>> if (q)
>>> @@ -3092,13 +3074,19 @@ int xe_vm_bind_ioctl(struct drm_device *dev,
>>> void *data, struct drm_file *file)
>>> if (args->num_binds > 1)
>>> kfree(bind_ops);
>>>
>>> - return err;
>>> + return fence ? vm_bind_ioctl_sync_wait(vm, fence,
>>> u64_to_user_ptr(args->syncs.cookie)) :
>>> + err;
>>>
>>> unwind_ops:
>>> vm_bind_ioctl_ops_unwind(vm, ops, args->num_binds);
>>> free_syncs:
>>> - if (err == -ENODATA)
>>> + if (err == -ENODATA) {
>>> err = vm_bind_ioctl_signal_fences(vm, q, syncs,
>>> num_syncs);
>>> + if (!async) {
>>> + fence =
>>> xe_exec_queue_last_fence_get(to_wait_exec_queue(vm, q), vm);
>>> + dma_fence_get(fence);
>>> + }
>>> + }
>>> while (num_syncs--)
>>> xe_sync_entry_cleanup(&syncs[num_syncs]);
>>>
>>> @@ -3108,6 +3096,8 @@ int xe_vm_bind_ioctl(struct drm_device *dev,
>>> void *data, struct drm_file *file)
>>> xe_bo_put(bos[i]);
>>> release_vm_lock:
>>> up_write(&vm->lock);
>>> + if (fence)
>>> + err = vm_bind_ioctl_sync_wait(vm, fence,
>>> u64_to_user_ptr(args->syncs.cookie));
>>> put_vm:
>>> xe_vm_put(vm);
>>> put_exec_queue:
>>> diff --git a/drivers/gpu/drm/xe/xe_vm_types.h
>>> b/drivers/gpu/drm/xe/xe_vm_types.h
>>> index 2e023596cb15..63e8a50b88e9 100644
>>> --- a/drivers/gpu/drm/xe/xe_vm_types.h
>>> +++ b/drivers/gpu/drm/xe/xe_vm_types.h
>>> @@ -138,13 +138,12 @@ struct xe_vm {
>>> */
>>> #define XE_VM_FLAG_64K BIT(0)
>>> #define XE_VM_FLAG_LR_MODE BIT(1)
>>> -#define XE_VM_FLAG_ASYNC_DEFAULT BIT(2)
>>> -#define XE_VM_FLAG_MIGRATION BIT(3)
>>> -#define XE_VM_FLAG_SCRATCH_PAGE BIT(4)
>>> -#define XE_VM_FLAG_FAULT_MODE BIT(5)
>>> -#define XE_VM_FLAG_BANNED BIT(6)
>>> -#define XE_VM_FLAG_TILE_ID(flags) FIELD_GET(GENMASK(8, 7),
>>> flags)
>>> -#define XE_VM_FLAG_SET_TILE_ID(tile) FIELD_PREP(GENMASK(8, 7),
>>> (tile)->id)
>>> +#define XE_VM_FLAG_MIGRATION BIT(2)
>>> +#define XE_VM_FLAG_SCRATCH_PAGE BIT(3)
>>> +#define XE_VM_FLAG_FAULT_MODE BIT(4)
>>> +#define XE_VM_FLAG_BANNED BIT(5)
>>> +#define XE_VM_FLAG_TILE_ID(flags) FIELD_GET(GENMASK(7, 6),
>>> flags)
>>> +#define XE_VM_FLAG_SET_TILE_ID(tile) FIELD_PREP(GENMASK(7, 6),
>>> (tile)->id)
>>> unsigned long flags;
>>>
>>> /** @composite_fence_ctx: context composite fence */
>>> diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h
>>> index 0895e4d2a981..d72e4441cc80 100644
>>> --- a/include/uapi/drm/xe_drm.h
>>> +++ b/include/uapi/drm/xe_drm.h
>>> @@ -140,8 +140,7 @@ struct drm_xe_engine_class_instance {
>>> * Kernel only classes (not actual hardware engine class).
>>> Used for
>>> * creating ordered queues of VM bind operations.
>>> */
>>> -#define DRM_XE_ENGINE_CLASS_VM_BIND_ASYNC 5
>>> -#define DRM_XE_ENGINE_CLASS_VM_BIND_SYNC 6
>>> +#define DRM_XE_ENGINE_CLASS_VM_BIND 5
>>> /** @engine_class: engine class id */
>>> __u16 engine_class;
>>> /** @engine_instance: engine instance id */
>>> @@ -661,7 +660,6 @@ struct drm_xe_vm_create {
>>> * still enable recoverable pagefaults if supported by the
>>> device.
>>> */
>>> #define DRM_XE_VM_CREATE_FLAG_LR_MODE (1 << 1)
>>> -#define DRM_XE_VM_CREATE_FLAG_ASYNC_DEFAULT (1 << 2)
>>> /*
>>> * DRM_XE_VM_CREATE_FLAG_FAULT_MODE requires also
>>> * DRM_XE_VM_CREATE_FLAG_LR_MODE. It allows memory to be
>>> allocated
>>> @@ -669,7 +667,7 @@ struct drm_xe_vm_create {
>>> * The xe driver internally uses recoverable pagefaults to
>>> implement
>>> * this.
>>> */
>>> -#define DRM_XE_VM_CREATE_FLAG_FAULT_MODE (1 << 3)
>>> +#define DRM_XE_VM_CREATE_FLAG_FAULT_MODE (1 << 2)
>>> /** @flags: Flags */
>>> __u32 flags;
>>>
>>> @@ -777,12 +775,11 @@ struct drm_xe_vm_bind_op {
>>> __u32 op;
>>>
>>> #define DRM_XE_VM_BIND_FLAG_READONLY (1 << 0)
>>> -#define DRM_XE_VM_BIND_FLAG_ASYNC (1 << 1)
>>> /*
>>> * Valid on a faulting VM only, do the MAP operation
>>> immediately rather
>>> * than deferring the MAP to the page fault handler.
>>> */
>>> -#define DRM_XE_VM_BIND_FLAG_IMMEDIATE (1 << 2)
>>> +#define DRM_XE_VM_BIND_FLAG_IMMEDIATE (1 << 1)
>>> /*
>>> * When the NULL flag is set, the page tables are setup with
>>> a special
>>> * bit which indicates writes are dropped and all reads
>>> return zero. In
>>> @@ -790,7 +787,7 @@ struct drm_xe_vm_bind_op {
>>> * operations, the BO handle MBZ, and the BO offset MBZ. This
>>> flag is
>>> * intended to implement VK sparse bindings.
>>> */
>>> -#define DRM_XE_VM_BIND_FLAG_NULL (1 << 3)
>>> +#define DRM_XE_VM_BIND_FLAG_NULL (1 << 2)
>>> /** @flags: Bind flags */
>>> __u32 flags;
>>>
>>> @@ -808,6 +805,35 @@ struct drm_xe_vm_bind_op {
>>> __u64 reserved[3];
>>> };
>>>
>>> +/**
>>> + * struct drm_xe_syncs - In / out syncs for IOCTLs.
>>> + */
>>> +struct drm_xe_syncs {
>>> + /** @num_syncs: amount of syncs to wait on */
>>> + __u32 num_syncs;
>>> +
>>> + /*
>>> + * Block in IOCTL until operation complete, num_syncs MBZ if
>>> set.
>>> + */
>>> +#define DRM_XE_SYNCS_FLAG_WAIT_FOR_OP (1 << 0)
>>> + /** @flags: Sync flags */
>>> + __u32 flags;
>>> +
>>> + /**
>>> + * @cookie: pointer which is written with an non-zero value
>>> if IOCTL
>>> + * operation has been committed but the wait for completion
>>> + * (DRM_XE_SYNCS_FLAG_WAIT_FOR_OP is set) is interrupted.
>>> Value only
>>> + * valid when IOCTL returns -EINTR or -ERESTARTSYS.
>> MBZ on initial call. Should be opaque to user-space.
>>
>> Could we add that the preferred method for production code is to use
>> async vm_binds and if necessary wait on an out-fence, preferrably user-
>> fence?.
>>
>>
>>> + */
>>> + __u64 cookie;
>>> +
>>> + /** @syncs: pointer to struct drm_xe_sync array */
>>> + __u64 syncs;
>>> +
>>> + /** @reserved: Reserved */
>>> + __u64 reserved[2];
>>> +};
>>> +
>>> struct drm_xe_vm_bind {
>>> /** @extensions: Pointer to the first extension struct, if
>>> any */
>>> __u64 extensions;
>>> @@ -839,14 +865,8 @@ struct drm_xe_vm_bind {
>>> __u64 vector_of_binds;
>>> };
>>>
>>> - /** @pad: MBZ */
>>> - __u32 pad2;
>>> -
>>> - /** @num_syncs: amount of syncs to wait on */
>>> - __u32 num_syncs;
>>> -
>>> - /** @syncs: pointer to struct drm_xe_sync array */
>>> - __u64 syncs;
>>> + /** @syncs: syncs for bind */
>>> + struct drm_xe_syncs syncs;
>>>
>>> /** @reserved: Reserved */
>>> __u64 reserved[2];
>>> @@ -975,14 +995,14 @@ struct drm_xe_exec {
>>> /** @extensions: Pointer to the first extension struct, if
>>> any */
>>> __u64 extensions;
>>>
>>> + /** @pad: MBZ */
>>> + __u32 pad;
>>> +
>>> /** @exec_queue_id: Exec queue ID for the batch buffer */
>>> __u32 exec_queue_id;
>>>
>>> - /** @num_syncs: Amount of struct drm_xe_sync in array. */
>>> - __u32 num_syncs;
>>> -
>>> - /** @syncs: Pointer to struct drm_xe_sync array. */
>>> - __u64 syncs;
>>> + /** @syncs: syncs for exec */
>>> + struct drm_xe_syncs syncs;
>>>
>>> /**
>>> * @address: address of batch buffer if num_batch_buffer == 1
>>> or an
>>> @@ -996,8 +1016,8 @@ struct drm_xe_exec {
>>> */
>>> __u16 num_batch_buffer;
>>>
>>> - /** @pad: MBZ */
>>> - __u16 pad[3];
>>> + /** @pad2: MBZ */
>>> + __u16 pad2[3];
>>>
>>> /** @reserved: Reserved */
>>> __u64 reserved[2];
More information about the Intel-xe
mailing list