[PATCH v3] drm/xe/uapi: Uniform async vs sync handling

Matthew Brost matthew.brost at intel.com
Thu Dec 14 00:26:42 UTC 2023


On Wed, Dec 13, 2023 at 03:49:53PM -0700, Souza, Jose wrote:
> On Wed, 2023-12-13 at 14:29 -0800, Matthew Brost wrote:
> > Remove concept of async vs sync VM bind queues, rather make async vs
> > sync a per IOCTL choice. Since this is per IOCTL, it makes sense to have
> > a singular flag IOCTL rather than per VM bind op flag too. Add
> > DRM_XE_SYNCS_FLAG_WAIT_FOR_OP which is an input sync flag to support
> > this. Support this new flag for both the VM bind IOCTL only with a path
> > to support this for execs too.
> > 
> > v2: Add cookie, move sync wait outside of any locks.
> > v3: Skip bind operations if *cookie non-zero, check reserved bits zero
> > 
> > Cc: Rodrigo Vivi <rodrigo.vivi at intel.com>
> > Cc: Thomas Hellström <thomas.hellstrom at linux.intel.com>
> > Cc: Francois Dugast <francois.dugast at intel.com>
> > Signed-off-by: Matthew Brost <matthew.brost at intel.com>
> > ---
> >  drivers/gpu/drm/xe/xe_exec.c             |  13 ++-
> >  drivers/gpu/drm/xe/xe_exec_queue.c       |   7 +-
> >  drivers/gpu/drm/xe/xe_exec_queue_types.h |   2 -
> >  drivers/gpu/drm/xe/xe_vm.c               | 140 ++++++++++++-----------
> >  drivers/gpu/drm/xe/xe_vm_types.h         |  13 +--
> >  include/uapi/drm/xe_drm.h                |  65 +++++++----
> >  6 files changed, 131 insertions(+), 109 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/xe/xe_exec.c b/drivers/gpu/drm/xe/xe_exec.c
> > index ba92e5619da3..af2b676c8eef 100644
> > --- a/drivers/gpu/drm/xe/xe_exec.c
> > +++ b/drivers/gpu/drm/xe/xe_exec.c
> > @@ -104,7 +104,7 @@ int xe_exec_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
> >  	struct xe_device *xe = to_xe_device(dev);
> >  	struct xe_file *xef = to_xe_file(file);
> >  	struct drm_xe_exec *args = data;
> > -	struct drm_xe_sync __user *syncs_user = u64_to_user_ptr(args->syncs);
> > +	struct drm_xe_sync __user *syncs_user = u64_to_user_ptr(args->syncs.syncs);
> >  	u64 __user *addresses_user = u64_to_user_ptr(args->address);
> >  	struct xe_exec_queue *q;
> >  	struct xe_sync_entry *syncs = NULL;
> > @@ -120,7 +120,10 @@ int xe_exec_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
> >  	int err = 0;
> >  
> >  	if (XE_IOCTL_DBG(xe, args->extensions) ||
> > -	    XE_IOCTL_DBG(xe, args->pad[0] || args->pad[1] || args->pad[2]) ||
> > +	    XE_IOCTL_DBG(xe, args->pad || args->pad2[0] || args->pad2[1] || args->pad2[2]) ||
> > +	    XE_IOCTL_DBG(xe, args->syncs.flags) ||
> > +	    XE_IOCTL_DBG(xe, args->syncs.cookie) ||
> > +	    XE_IOCTL_DBG(xe, args->syncs.reserved[0] || args->syncs.reserved[1]) ||
> >  	    XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1]))
> >  		return -EINVAL;
> >  
> > @@ -140,8 +143,8 @@ int xe_exec_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
> >  		goto err_exec_queue;
> >  	}
> >  
> > -	if (args->num_syncs) {
> > -		syncs = kcalloc(args->num_syncs, sizeof(*syncs), GFP_KERNEL);
> > +	if (args->syncs.num_syncs) {
> > +		syncs = kcalloc(args->syncs.num_syncs, sizeof(*syncs), GFP_KERNEL);
> >  		if (!syncs) {
> >  			err = -ENOMEM;
> >  			goto err_exec_queue;
> > @@ -150,7 +153,7 @@ int xe_exec_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
> >  
> >  	vm = q->vm;
> >  
> > -	for (i = 0; i < args->num_syncs; i++) {
> > +	for (i = 0; i < args->syncs.num_syncs; i++) {
> >  		err = xe_sync_entry_parse(xe, xef, &syncs[num_syncs++],
> >  					  &syncs_user[i], SYNC_PARSE_FLAG_EXEC |
> >  					  (xe_vm_in_lr_mode(vm) ?
> > diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c
> > index eeb9605dd45f..a25d67971fdd 100644
> > --- a/drivers/gpu/drm/xe/xe_exec_queue.c
> > +++ b/drivers/gpu/drm/xe/xe_exec_queue.c
> > @@ -625,10 +625,7 @@ int xe_exec_queue_create_ioctl(struct drm_device *dev, void *data,
> >  	if (XE_IOCTL_DBG(xe, eci[0].gt_id >= xe->info.gt_count))
> >  		return -EINVAL;
> >  
> > -	if (eci[0].engine_class >= DRM_XE_ENGINE_CLASS_VM_BIND_ASYNC) {
> > -		bool sync = eci[0].engine_class ==
> > -			DRM_XE_ENGINE_CLASS_VM_BIND_SYNC;
> > -
> > +	if (eci[0].engine_class == DRM_XE_ENGINE_CLASS_VM_BIND) {
> >  		for_each_gt(gt, xe, id) {
> >  			struct xe_exec_queue *new;
> >  
> > @@ -654,8 +651,6 @@ int xe_exec_queue_create_ioctl(struct drm_device *dev, void *data,
> >  						   args->width, hwe,
> >  						   EXEC_QUEUE_FLAG_PERSISTENT |
> >  						   EXEC_QUEUE_FLAG_VM |
> > -						   (sync ? 0 :
> > -						    EXEC_QUEUE_FLAG_VM_ASYNC) |
> >  						   (id ?
> >  						    EXEC_QUEUE_FLAG_BIND_ENGINE_CHILD :
> >  						    0));
> > diff --git a/drivers/gpu/drm/xe/xe_exec_queue_types.h b/drivers/gpu/drm/xe/xe_exec_queue_types.h
> > index c7aefa1c8c31..0f7f6cded4a3 100644
> > --- a/drivers/gpu/drm/xe/xe_exec_queue_types.h
> > +++ b/drivers/gpu/drm/xe/xe_exec_queue_types.h
> > @@ -84,8 +84,6 @@ struct xe_exec_queue {
> >  #define EXEC_QUEUE_FLAG_VM			BIT(4)
> >  /* child of VM queue for multi-tile VM jobs */
> >  #define EXEC_QUEUE_FLAG_BIND_ENGINE_CHILD	BIT(5)
> > -/* VM jobs for this queue are asynchronous */
> > -#define EXEC_QUEUE_FLAG_VM_ASYNC		BIT(6)
> >  
> >  	/**
> >  	 * @flags: flags for this exec queue, should statically setup aside from ban
> > diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
> > index 2f3df9ee67c9..8407c4d54a45 100644
> > --- a/drivers/gpu/drm/xe/xe_vm.c
> > +++ b/drivers/gpu/drm/xe/xe_vm.c
> > @@ -1343,9 +1343,7 @@ struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags)
> >  			struct xe_gt *gt = tile->primary_gt;
> >  			struct xe_vm *migrate_vm;
> >  			struct xe_exec_queue *q;
> > -			u32 create_flags = EXEC_QUEUE_FLAG_VM |
> > -				((flags & XE_VM_FLAG_ASYNC_DEFAULT) ?
> > -				EXEC_QUEUE_FLAG_VM_ASYNC : 0);
> > +			u32 create_flags = EXEC_QUEUE_FLAG_VM;
> >  
> >  			if (!vm->pt_root[id])
> >  				continue;
> > @@ -1712,12 +1710,6 @@ xe_vm_bind_vma(struct xe_vma *vma, struct xe_exec_queue *q,
> >  	return ERR_PTR(err);
> >  }
> >  
> > -static bool xe_vm_sync_mode(struct xe_vm *vm, struct xe_exec_queue *q)
> > -{
> > -	return q ? !(q->flags & EXEC_QUEUE_FLAG_VM_ASYNC) :
> > -		!(vm->flags & XE_VM_FLAG_ASYNC_DEFAULT);
> > -}
> > -
> >  static int __xe_vm_bind(struct xe_vm *vm, struct xe_vma *vma,
> >  			struct xe_exec_queue *q, struct xe_sync_entry *syncs,
> >  			u32 num_syncs, bool immediate, bool first_op,
> > @@ -1747,8 +1739,6 @@ static int __xe_vm_bind(struct xe_vm *vm, struct xe_vma *vma,
> >  
> >  	if (last_op)
> >  		xe_exec_queue_last_fence_set(wait_exec_queue, vm, fence);
> > -	if (last_op && xe_vm_sync_mode(vm, q))
> > -		dma_fence_wait(fence, true);
> >  	dma_fence_put(fence);
> >  
> >  	return 0;
> > @@ -1791,8 +1781,6 @@ static int xe_vm_unbind(struct xe_vm *vm, struct xe_vma *vma,
> >  	xe_vma_destroy(vma, fence);
> >  	if (last_op)
> >  		xe_exec_queue_last_fence_set(wait_exec_queue, vm, fence);
> > -	if (last_op && xe_vm_sync_mode(vm, q))
> > -		dma_fence_wait(fence, true);
> >  	dma_fence_put(fence);
> >  
> >  	return 0;
> > @@ -1800,7 +1788,6 @@ static int xe_vm_unbind(struct xe_vm *vm, struct xe_vma *vma,
> >  
> >  #define ALL_DRM_XE_VM_CREATE_FLAGS (DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE | \
> >  				    DRM_XE_VM_CREATE_FLAG_LR_MODE | \
> > -				    DRM_XE_VM_CREATE_FLAG_ASYNC_DEFAULT | \
> >  				    DRM_XE_VM_CREATE_FLAG_FAULT_MODE)
> >  
> >  int xe_vm_create_ioctl(struct drm_device *dev, void *data,
> > @@ -1854,8 +1841,6 @@ int xe_vm_create_ioctl(struct drm_device *dev, void *data,
> >  		flags |= XE_VM_FLAG_SCRATCH_PAGE;
> >  	if (args->flags & DRM_XE_VM_CREATE_FLAG_LR_MODE)
> >  		flags |= XE_VM_FLAG_LR_MODE;
> > -	if (args->flags & DRM_XE_VM_CREATE_FLAG_ASYNC_DEFAULT)
> > -		flags |= XE_VM_FLAG_ASYNC_DEFAULT;
> >  	if (args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE)
> >  		flags |= XE_VM_FLAG_FAULT_MODE;
> >  
> > @@ -2263,8 +2248,7 @@ static int xe_vma_op_commit(struct xe_vm *vm, struct xe_vma_op *op)
> >  static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct xe_exec_queue *q,
> >  				   struct drm_gpuva_ops *ops,
> >  				   struct xe_sync_entry *syncs, u32 num_syncs,
> > -				   struct list_head *ops_list, bool last,
> > -				   bool async)
> > +				   struct list_head *ops_list, bool last)
> >  {
> >  	struct xe_vma_op *last_op = NULL;
> >  	struct drm_gpuva_op *__op;
> > @@ -2696,28 +2680,28 @@ static int vm_bind_ioctl_ops_execute(struct xe_vm *vm,
> >  
> >  #ifdef TEST_VM_ASYNC_OPS_ERROR
> >  #define SUPPORTED_FLAGS	\
> > -	(FORCE_ASYNC_OP_ERROR | DRM_XE_VM_BIND_FLAG_ASYNC | \
> > -	 DRM_XE_VM_BIND_FLAG_READONLY | DRM_XE_VM_BIND_FLAG_IMMEDIATE | \
> > -	 DRM_XE_VM_BIND_FLAG_NULL | 0xffff)
> > +	(FORCE_ASYNC_OP_ERROR | DRM_XE_VM_BIND_FLAG_READONLY | \
> > +	 DRM_XE_VM_BIND_FLAG_IMMEDIATE | DRM_XE_VM_BIND_FLAG_NULL | 0xffff)
> >  #else
> >  #define SUPPORTED_FLAGS	\
> > -	(DRM_XE_VM_BIND_FLAG_ASYNC | DRM_XE_VM_BIND_FLAG_READONLY | \
> > +	(DRM_XE_VM_BIND_FLAG_READONLY | \
> >  	 DRM_XE_VM_BIND_FLAG_IMMEDIATE | DRM_XE_VM_BIND_FLAG_NULL | \
> >  	 0xffff)
> >  #endif
> >  #define XE_64K_PAGE_MASK 0xffffull
> > +#define ALL_DRM_XE_SYNCS_FLAGS (DRM_XE_SYNCS_FLAG_WAIT_FOR_OP)
> >  
> >  #define MAX_BINDS	512	/* FIXME: Picking random upper limit */
> >  
> >  static int vm_bind_ioctl_check_args(struct xe_device *xe,
> >  				    struct drm_xe_vm_bind *args,
> >  				    struct drm_xe_vm_bind_op **bind_ops,
> > -				    bool *async)
> > +				    bool *async, bool *skip_ops)
> >  {
> >  	int err;
> >  	int i;
> >  
> > -	if (XE_IOCTL_DBG(xe, args->pad || args->pad2) ||
> > +	if (XE_IOCTL_DBG(xe, args->pad) ||
> >  	    XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1]))
> >  		return -EINVAL;
> >  
> > @@ -2745,6 +2729,30 @@ static int vm_bind_ioctl_check_args(struct xe_device *xe,
> >  		*bind_ops = &args->bind;
> >  	}
> >  
> > +	*async = !(args->syncs.flags & DRM_XE_SYNCS_FLAG_WAIT_FOR_OP);
> > +	*skip_ops = false;
> > +
> > +	if (XE_IOCTL_DBG(xe, args->syncs.flags & ~ALL_DRM_XE_SYNCS_FLAGS) ||
> > +	    XE_IOCTL_DBG(xe, *async && args->syncs.cookie) ||
> > +	    XE_IOCTL_DBG(xe, args->syncs.reserved[0] || args->syncs.reserved[1]) ||
> > +	    XE_IOCTL_DBG(xe, !*async && args->syncs.num_syncs)) {
> > +		err = -EINVAL;
> > +		goto free_bind_ops;
> > +	}
> > +
> > +	if (!*async) {
> > +		u64 __user *cookie = u64_to_user_ptr(args->syncs.cookie);
> > +		u64 value;
> > +
> > +		err = __copy_from_user(&value, cookie, sizeof(u64));
> > +		if (XE_IOCTL_DBG(xe, err)) {
> > +			err = -EFAULT;
> > +			goto free_bind_ops;
> > +		}
> > +		if (value)
> > +			*skip_ops = true;
> > +	}
> > +
> >  	for (i = 0; i < args->num_binds; ++i) {
> >  		u64 range = (*bind_ops)[i].range;
> >  		u64 addr = (*bind_ops)[i].addr;
> > @@ -2775,18 +2783,6 @@ static int vm_bind_ioctl_check_args(struct xe_device *xe,
> >  			goto free_bind_ops;
> >  		}
> >  
> > -		if (i == 0) {
> > -			*async = !!(flags & DRM_XE_VM_BIND_FLAG_ASYNC);
> > -			if (XE_IOCTL_DBG(xe, !*async && args->num_syncs)) {
> > -				err = -EINVAL;
> > -				goto free_bind_ops;
> > -			}
> > -		} else if (XE_IOCTL_DBG(xe, *async !=
> > -					!!(flags & DRM_XE_VM_BIND_FLAG_ASYNC))) {
> > -			err = -EINVAL;
> > -			goto free_bind_ops;
> > -		}
> > -
> >  		if (XE_IOCTL_DBG(xe, op > DRM_XE_VM_BIND_OP_PREFETCH) ||
> >  		    XE_IOCTL_DBG(xe, flags & ~SUPPORTED_FLAGS) ||
> >  		    XE_IOCTL_DBG(xe, obj && is_null) ||
> > @@ -2854,12 +2850,25 @@ static int vm_bind_ioctl_signal_fences(struct xe_vm *vm,
> >  
> >  	xe_exec_queue_last_fence_set(to_wait_exec_queue(vm, q), vm,
> >  				     fence);
> > +	dma_fence_put(fence);
> >  
> > -	if (xe_vm_sync_mode(vm, q)) {
> > -		long timeout = dma_fence_wait(fence, true);
> > +	return err;
> > +}
> > +
> > +static int vm_bind_ioctl_sync_wait(struct xe_vm *vm,
> > +				   struct dma_fence *fence,
> > +				   u64 __user *cookie)
> > +{
> > +	long timeout;
> > +	int err = 0;
> > +
> > +	timeout = dma_fence_wait(fence, true);
> > +	if (timeout < 0) {
> > +		u64 value = 1;
> >  
> > -		if (timeout < 0)
> > -			err = -EINTR;
> > +		err = -ERESTARTSYS;
> > +		if (copy_to_user(cookie, &value, sizeof(value)))
> > +			err = -EFAULT;
> >  	}
> >  
> >  	dma_fence_put(fence);
> > @@ -2875,21 +2884,22 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
> >  	struct drm_xe_sync __user *syncs_user;
> >  	struct xe_bo **bos = NULL;
> >  	struct drm_gpuva_ops **ops = NULL;
> > +	struct dma_fence *fence = NULL;
> >  	struct xe_vm *vm;
> >  	struct xe_exec_queue *q = NULL;
> >  	u32 num_syncs;
> >  	struct xe_sync_entry *syncs = NULL;
> >  	struct drm_xe_vm_bind_op *bind_ops;
> >  	LIST_HEAD(ops_list);
> > -	bool async;
> > +	bool async, skip_ops;
> >  	int err;
> >  	int i;
> >  
> > -	err = vm_bind_ioctl_check_args(xe, args, &bind_ops, &async);
> > +	err = vm_bind_ioctl_check_args(xe, args, &bind_ops, &async, &skip_ops);
> >  	if (err)
> >  		return err;
> >  
> > -	if (XE_IOCTL_DBG(xe, args->pad || args->pad2) ||
> > +	if (XE_IOCTL_DBG(xe, args->pad) ||
> >  	    XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1]))
> >  		return -EINVAL;
> >  
> > @@ -2904,12 +2914,6 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
> >  			err = -EINVAL;
> >  			goto put_exec_queue;
> >  		}
> > -
> > -		if (XE_IOCTL_DBG(xe, args->num_binds && async !=
> > -				 !!(q->flags & EXEC_QUEUE_FLAG_VM_ASYNC))) {
> > -			err = -EINVAL;
> > -			goto put_exec_queue;
> > -		}
> >  	}
> >  
> >  	vm = xe_vm_lookup(xef, args->vm_id);
> > @@ -2918,14 +2922,6 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
> >  		goto put_exec_queue;
> >  	}
> >  
> > -	if (!args->exec_queue_id) {
> > -		if (XE_IOCTL_DBG(xe, args->num_binds && async !=
> > -				 !!(vm->flags & XE_VM_FLAG_ASYNC_DEFAULT))) {
> > -			err = -EINVAL;
> > -			goto put_vm;
> > -		}
> > -	}
> > -
> >  	err = down_write_killable(&vm->lock);
> >  	if (err)
> >  		goto put_vm;
> > @@ -3015,16 +3011,16 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
> >  		}
> >  	}
> >  
> > -	if (args->num_syncs) {
> > -		syncs = kcalloc(args->num_syncs, sizeof(*syncs), GFP_KERNEL);
> > +	if (args->syncs.num_syncs) {
> > +		syncs = kcalloc(args->syncs.num_syncs, sizeof(*syncs), GFP_KERNEL);
> >  		if (!syncs) {
> >  			err = -ENOMEM;
> >  			goto put_obj;
> >  		}
> >  	}
> >  
> > -	syncs_user = u64_to_user_ptr(args->syncs);
> > -	for (num_syncs = 0; num_syncs < args->num_syncs; num_syncs++) {
> > +	syncs_user = u64_to_user_ptr(args->syncs.syncs);
> > +	for (num_syncs = 0; num_syncs < args->syncs.num_syncs; num_syncs++) {
> >  		err = xe_sync_entry_parse(xe, xef, &syncs[num_syncs],
> >  					  &syncs_user[num_syncs],
> >  					  (xe_vm_in_lr_mode(vm) ?
> > @@ -3035,7 +3031,7 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
> >  			goto free_syncs;
> >  	}
> >  
> > -	if (!args->num_binds) {
> > +	if (!args->num_binds || skip_ops) {
> >  		err = -ENODATA;
> >  		goto free_syncs;
> >  	}
> > @@ -3060,8 +3056,7 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
> >  
> >  		err = vm_bind_ioctl_ops_parse(vm, q, ops[i], syncs, num_syncs,
> >  					      &ops_list,
> > -					      i == args->num_binds - 1,
> > -					      async);
> > +					      i == args->num_binds - 1);
> >  		if (err)
> >  			goto unwind_ops;
> >  	}
> > @@ -3077,7 +3072,10 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
> >  		xe_exec_queue_get(q);
> >  
> >  	err = vm_bind_ioctl_ops_execute(vm, &ops_list);
> > -
> > +	if (!err && !async) {
> > +		fence = xe_exec_queue_last_fence_get(to_wait_exec_queue(vm, q), vm);
> > +		dma_fence_get(fence);
> > +	}
> >  	up_write(&vm->lock);
> >  
> >  	if (q)
> > @@ -3092,13 +3090,19 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
> >  	if (args->num_binds > 1)
> >  		kfree(bind_ops);
> >  
> > -	return err;
> > +	return fence ? vm_bind_ioctl_sync_wait(vm, fence, u64_to_user_ptr(args->syncs.cookie)) :
> > +		err;
> >  
> >  unwind_ops:
> >  	vm_bind_ioctl_ops_unwind(vm, ops, args->num_binds);
> >  free_syncs:
> > -	if (err == -ENODATA)
> > +	if (err == -ENODATA) {
> >  		err = vm_bind_ioctl_signal_fences(vm, q, syncs, num_syncs);
> > +		if (!async) {
> > +			fence = xe_exec_queue_last_fence_get(to_wait_exec_queue(vm, q), vm);
> > +			dma_fence_get(fence);
> > +		}
> > +	}
> >  	while (num_syncs--)
> >  		xe_sync_entry_cleanup(&syncs[num_syncs]);
> >  
> > @@ -3108,6 +3112,8 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
> >  		xe_bo_put(bos[i]);
> >  release_vm_lock:
> >  	up_write(&vm->lock);
> > +	if (fence)
> > +		err = vm_bind_ioctl_sync_wait(vm, fence, u64_to_user_ptr(args->syncs.cookie));
> >  put_vm:
> >  	xe_vm_put(vm);
> >  put_exec_queue:
> > diff --git a/drivers/gpu/drm/xe/xe_vm_types.h b/drivers/gpu/drm/xe/xe_vm_types.h
> > index 2e023596cb15..63e8a50b88e9 100644
> > --- a/drivers/gpu/drm/xe/xe_vm_types.h
> > +++ b/drivers/gpu/drm/xe/xe_vm_types.h
> > @@ -138,13 +138,12 @@ struct xe_vm {
> >  	 */
> >  #define XE_VM_FLAG_64K			BIT(0)
> >  #define XE_VM_FLAG_LR_MODE		BIT(1)
> > -#define XE_VM_FLAG_ASYNC_DEFAULT	BIT(2)
> > -#define XE_VM_FLAG_MIGRATION		BIT(3)
> > -#define XE_VM_FLAG_SCRATCH_PAGE		BIT(4)
> > -#define XE_VM_FLAG_FAULT_MODE		BIT(5)
> > -#define XE_VM_FLAG_BANNED		BIT(6)
> > -#define XE_VM_FLAG_TILE_ID(flags)	FIELD_GET(GENMASK(8, 7), flags)
> > -#define XE_VM_FLAG_SET_TILE_ID(tile)	FIELD_PREP(GENMASK(8, 7), (tile)->id)
> > +#define XE_VM_FLAG_MIGRATION		BIT(2)
> > +#define XE_VM_FLAG_SCRATCH_PAGE		BIT(3)
> > +#define XE_VM_FLAG_FAULT_MODE		BIT(4)
> > +#define XE_VM_FLAG_BANNED		BIT(5)
> > +#define XE_VM_FLAG_TILE_ID(flags)	FIELD_GET(GENMASK(7, 6), flags)
> > +#define XE_VM_FLAG_SET_TILE_ID(tile)	FIELD_PREP(GENMASK(7, 6), (tile)->id)
> >  	unsigned long flags;
> >  
> >  	/** @composite_fence_ctx: context composite fence */
> > diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h
> > index 0895e4d2a981..8cbe7ca6fe57 100644
> > --- a/include/uapi/drm/xe_drm.h
> > +++ b/include/uapi/drm/xe_drm.h
> > @@ -140,8 +140,7 @@ struct drm_xe_engine_class_instance {
> >  	 * Kernel only classes (not actual hardware engine class). Used for
> >  	 * creating ordered queues of VM bind operations.
> >  	 */
> > -#define DRM_XE_ENGINE_CLASS_VM_BIND_ASYNC	5
> > -#define DRM_XE_ENGINE_CLASS_VM_BIND_SYNC	6
> > +#define DRM_XE_ENGINE_CLASS_VM_BIND		5
> >  	/** @engine_class: engine class id */
> >  	__u16 engine_class;
> >  	/** @engine_instance: engine instance id */
> > @@ -661,7 +660,6 @@ struct drm_xe_vm_create {
> >  	 * still enable recoverable pagefaults if supported by the device.
> >  	 */
> >  #define DRM_XE_VM_CREATE_FLAG_LR_MODE	        (1 << 1)
> > -#define DRM_XE_VM_CREATE_FLAG_ASYNC_DEFAULT	(1 << 2)
> >  	/*
> >  	 * DRM_XE_VM_CREATE_FLAG_FAULT_MODE requires also
> >  	 * DRM_XE_VM_CREATE_FLAG_LR_MODE. It allows memory to be allocated
> > @@ -669,7 +667,7 @@ struct drm_xe_vm_create {
> >  	 * The xe driver internally uses recoverable pagefaults to implement
> >  	 * this.
> >  	 */
> > -#define DRM_XE_VM_CREATE_FLAG_FAULT_MODE	(1 << 3)
> > +#define DRM_XE_VM_CREATE_FLAG_FAULT_MODE	(1 << 2)
> >  	/** @flags: Flags */
> >  	__u32 flags;
> >  
> > @@ -777,12 +775,11 @@ struct drm_xe_vm_bind_op {
> >  	__u32 op;
> >  
> >  #define DRM_XE_VM_BIND_FLAG_READONLY	(1 << 0)
> > -#define DRM_XE_VM_BIND_FLAG_ASYNC	(1 << 1)
> >  	/*
> >  	 * Valid on a faulting VM only, do the MAP operation immediately rather
> >  	 * than deferring the MAP to the page fault handler.
> >  	 */
> > -#define DRM_XE_VM_BIND_FLAG_IMMEDIATE	(1 << 2)
> > +#define DRM_XE_VM_BIND_FLAG_IMMEDIATE	(1 << 1)
> >  	/*
> >  	 * When the NULL flag is set, the page tables are setup with a special
> >  	 * bit which indicates writes are dropped and all reads return zero.  In
> > @@ -790,7 +787,7 @@ struct drm_xe_vm_bind_op {
> >  	 * operations, the BO handle MBZ, and the BO offset MBZ. This flag is
> >  	 * intended to implement VK sparse bindings.
> >  	 */
> > -#define DRM_XE_VM_BIND_FLAG_NULL	(1 << 3)
> > +#define DRM_XE_VM_BIND_FLAG_NULL	(1 << 2)
> >  	/** @flags: Bind flags */
> >  	__u32 flags;
> >  
> > @@ -808,6 +805,36 @@ struct drm_xe_vm_bind_op {
> >  	__u64 reserved[3];
> >  };
> >  
> > +/**
> > + * struct drm_xe_syncs - In / out syncs for IOCTLs.
> > + */
> > +struct drm_xe_syncs {
> > +	/** @num_syncs: amount of syncs to wait on */
> > +	__u32 num_syncs;
> > +
> > +	/*
> > +	 * Block in IOCTL until operation complete, num_syncs MBZ if set.
> > +	 */
> > +#define DRM_XE_SYNCS_FLAG_WAIT_FOR_OP (1 << 0)
> > +	/** @flags: Sync flags */
> > +	__u32 flags;
> > +
> > +	/**
> > +	 * @cookie: pointer which is written with an non-zero value if IOCTL
> > +	 * operation has been committed but the wait for completion
> > +	 * (DRM_XE_SYNCS_FLAG_WAIT_FOR_OP is set) is interrupted. Value of
> > +	 * *cookie MBZ for IOCTL operations to be executed, if non-zero
> > +	 * operations are skipped and blocks until exec queue for IOCTL is idle.
> > +	 */
> > +	__u64 cookie;
> 
> just left a comment in v2.
> pointer in the comment is misleading, looks like UMD needs to allocate memory and set it in here.
> about the name I think it should be named as reserved, so UMDs dont touch it at all in fact I think UMDs should not even know that the reserved field
> is being used for something.
> 

The UMD does need to allocate memory, here is an IGT snippet:

 93 int  __xe_vm_bind(int fd, uint32_t vm, uint32_t exec_queue, uint32_t bo,
 94                   uint64_t offset, uint64_t addr, uint64_t size, uint32_t op,
 95                   uint32_t flags, struct drm_xe_sync *sync, uint32_t num_syncs,
 96                   uint32_t prefetch_region, uint8_t pat_index, uint64_t ext)
 97 {
 98         uint64_t cookie = 0;
 99         struct drm_xe_vm_bind bind = {
100                 .extensions = ext,
101                 .vm_id = vm,
102                 .num_binds = 1,
103                 .bind.obj = bo,
104                 .bind.obj_offset = offset,
105                 .bind.range = size,
106                 .bind.addr = addr,
107                 .bind.op = op,
108                 .bind.flags = flags,
109                 .bind.prefetch_mem_region_instance = prefetch_region,
110                 .syncs.num_syncs = num_syncs,
111                 .syncs.syncs = (uintptr_t)sync,
112                 .syncs.flags = num_syncs == 0 ?
113                         DRM_XE_SYNCS_FLAG_WAIT_FOR_OP : 0,
114                 .syncs.cookie = num_syncs == 0 ? (uintptr_t)&cookie : 0,
115                 .exec_queue_id = exec_queue,
116                 .bind.pat_index = (pat_index == DEFAULT_PAT_INDEX) ?
117                         intel_get_pat_idx_wb(fd) : pat_index,
118         };
119
120         if (igt_ioctl(fd, DRM_IOCTL_XE_VM_BIND, &bind))
121                 return -errno;
122
123         return 0;
124 }

The cookie is a user pointer. If this IOCTL has interrupted and then
restarted by the UMD the value of cookie would tell the KMD what to do
on the restart.

Does this make sense?

Matt

> > +
> > +	/** @syncs: pointer to struct drm_xe_sync array */
> > +	__u64 syncs;
> > +
> > +	/** @reserved: Reserved */
> > +	__u64 reserved[2];
> > +};
> > +
> >  struct drm_xe_vm_bind {
> >  	/** @extensions: Pointer to the first extension struct, if any */
> >  	__u64 extensions;
> > @@ -839,14 +866,8 @@ struct drm_xe_vm_bind {
> >  		__u64 vector_of_binds;
> >  	};
> >  
> > -	/** @pad: MBZ */
> > -	__u32 pad2;
> > -
> > -	/** @num_syncs: amount of syncs to wait on */
> > -	__u32 num_syncs;
> > -
> > -	/** @syncs: pointer to struct drm_xe_sync array */
> > -	__u64 syncs;
> > +	/** @syncs: syncs for bind */
> > +	struct drm_xe_syncs syncs;
> >  
> >  	/** @reserved: Reserved */
> >  	__u64 reserved[2];
> > @@ -975,14 +996,14 @@ struct drm_xe_exec {
> >  	/** @extensions: Pointer to the first extension struct, if any */
> >  	__u64 extensions;
> >  
> > +	/** @pad: MBZ */
> > +	__u32 pad;
> > +
> >  	/** @exec_queue_id: Exec queue ID for the batch buffer */
> >  	__u32 exec_queue_id;
> >  
> > -	/** @num_syncs: Amount of struct drm_xe_sync in array. */
> > -	__u32 num_syncs;
> > -
> > -	/** @syncs: Pointer to struct drm_xe_sync array. */
> > -	__u64 syncs;
> > +	/** @syncs: syncs for exec */
> > +	struct drm_xe_syncs syncs;
> 
> if not implemented should not be used
> 

The idea is 'struct drm_xe_syncs' is a unified interface between IOCTLs
that require syncs. Currently execs implement a subset of 'struct
drm_xe_syncs' features.

Matt

> >  
> >  	/**
> >  	 * @address: address of batch buffer if num_batch_buffer == 1 or an
> > @@ -996,8 +1017,8 @@ struct drm_xe_exec {
> >  	 */
> >  	__u16 num_batch_buffer;
> >  
> > -	/** @pad: MBZ */
> > -	__u16 pad[3];
> > +	/** @pad2: MBZ */
> > +	__u16 pad2[3];
> >  
> >  	/** @reserved: Reserved */
> >  	__u64 reserved[2];
> 


More information about the Intel-xe mailing list