[Intel-xe] [PATCH v2 38/50] drm/xe/uapi: Rename couple exec_queue items

Thu Nov 9 20:02:12 UTC 2023

On Thu, 2023-11-09 at 13:40 -0500, Rodrigo Vivi wrote:
> On Thu, Nov 09, 2023 at 05:14:17PM +0000, Souza, Jose wrote:
> > On Fri, 2023-11-03 at 14:34 +0000, Francois Dugast wrote:
> > > From: Rodrigo Vivi <rodrigo.vivi at intel.com>
> > > 
> > > 'Placement' is no used in many terms around the memory_region selection
> > > where the BO or the page table will live. Also, the job itself deserves
> > > a word of more action since it is dispatched to the engine.
> > 
> > num_dispositions is a bad name, placement is better in my opinion.
> > it says exactly what is does, in what hw engines the batch buffers can be placed.
> 
> Please take a look to the patch 43 in this series or to the new squashed
> version in the take2-v3 that Francois just sent.
> 
> [PATCH v2 43/50] squash! drm/xe/uapi: Rename couple exec_queue items
> 
> I don't like the 'placement' word exactly because it is not true that
> the batch buffer is 'placed' in the engine. The batch buffer is placed
> in memory and we now have 'placement' variables on memory regions.
> So, double confusing.
> After BB is placed in the memory, then the instruction with offset
> of that placement is sent to the engines so the EUs can find that.
> So, let's keep placement name in the memory and use something else
> for the engine. Please take a look to the final patch and then we
> continue the discussion there trying to find a better naming.

num_eng_per_bb is also not a good name in my opinion.
I still believe placement is a better name.

Xe KMD will create drm_xe_exec_queue_create.num_bb_per_exec contexts and each context can be placed among drm_xe_exec_queue_create.num_placements hw
engines.
At the exec uAPI, scheduler will pick one of the hw engines allowed to execute a batch buffer and place the context to be executed in that hw engine.

> 
> Thanks,
> Rodrigo.
> 
> > 
> > > 
> > > 'width' is so generic and in graphics world can mean many other different
> > > things. Let's be more specific here on the intent of that.
> > 
> > This one sounds good.
> > 
> > > 
> > > Signed-off-by: Rodrigo Vivi <rodrigo.vivi at intel.com>
> > > ---
> > >  drivers/gpu/drm/xe/xe_devcoredump.c      |  8 ++--
> > >  drivers/gpu/drm/xe/xe_exec.c             |  4 +-
> > >  drivers/gpu/drm/xe/xe_exec_queue.c       | 49 ++++++++++++------------
> > >  drivers/gpu/drm/xe/xe_exec_queue.h       |  4 +-
> > >  drivers/gpu/drm/xe/xe_exec_queue_types.h |  4 +-
> > >  drivers/gpu/drm/xe/xe_guc_submit.c       | 32 ++++++++--------
> > >  drivers/gpu/drm/xe/xe_ring_ops.c         |  8 ++--
> > >  drivers/gpu/drm/xe/xe_sched_job.c        | 10 ++---
> > >  drivers/gpu/drm/xe/xe_trace.h            |  8 ++--
> > >  include/uapi/drm/xe_drm.h                | 20 ++++++----
> > >  10 files changed, 77 insertions(+), 70 deletions(-)
> > > 
> > > diff --git a/drivers/gpu/drm/xe/xe_devcoredump.c b/drivers/gpu/drm/xe/xe_devcoredump.c
> > > index 68abc0b195be..b4e8de4903b9 100644
> > > --- a/drivers/gpu/drm/xe/xe_devcoredump.c
> > > +++ b/drivers/gpu/drm/xe/xe_devcoredump.c
> > > @@ -130,7 +130,7 @@ static void devcoredump_snapshot(struct xe_devcoredump *coredump,
> > >  	struct xe_hw_engine *hwe;
> > >  	enum xe_hw_engine_id id;
> > >  	u32 adj_logical_mask = q->logical_mask;
> > > -	u32 width_mask = (0x1 << q->width) - 1;
> > > +	u32 num_bb_per_exec_mask = (0x1 << q->num_bb_per_exec) - 1;
> > >  	int i;
> > >  	bool cookie;
> > >  
> > > @@ -138,10 +138,10 @@ static void devcoredump_snapshot(struct xe_devcoredump *coredump,
> > >  	ss->boot_time = ktime_get_boottime();
> > >  
> > >  	cookie = dma_fence_begin_signalling();
> > > -	for (i = 0; q->width > 1 && i < XE_HW_ENGINE_MAX_INSTANCE;) {
> > > +	for (i = 0; q->num_bb_per_exec > 1 && i < XE_HW_ENGINE_MAX_INSTANCE;) {
> > >  		if (adj_logical_mask & BIT(i)) {
> > > -			adj_logical_mask |= width_mask << i;
> > > -			i += q->width;
> > > +			adj_logical_mask |= num_bb_per_exec_mask << i;
> > > +			i += q->num_bb_per_exec;
> > >  		} else {
> > >  			++i;
> > >  		}
> > > diff --git a/drivers/gpu/drm/xe/xe_exec.c b/drivers/gpu/drm/xe/xe_exec.c
> > > index 28e84a0bbeb0..ca922635db89 100644
> > > --- a/drivers/gpu/drm/xe/xe_exec.c
> > > +++ b/drivers/gpu/drm/xe/xe_exec.c
> > > @@ -161,7 +161,7 @@ int xe_exec_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
> > >  	if (XE_IOCTL_DBG(xe, q->flags & EXEC_QUEUE_FLAG_VM))
> > >  		return -EINVAL;
> > >  
> > > -	if (XE_IOCTL_DBG(xe, q->width != args->num_batch_buffer))
> > > +	if (XE_IOCTL_DBG(xe, q->num_bb_per_exec != args->num_batch_buffer))
> > >  		return -EINVAL;
> > >  
> > >  	if (XE_IOCTL_DBG(xe, q->flags & EXEC_QUEUE_FLAG_BANNED)) {
> > > @@ -189,7 +189,7 @@ int xe_exec_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
> > >  
> > >  	if (xe_exec_queue_is_parallel(q)) {
> > >  		err = __copy_from_user(addresses, addresses_user, sizeof(u64) *
> > > -				       q->width);
> > > +				       q->num_bb_per_exec);
> > >  		if (err) {
> > >  			err = -EFAULT;
> > >  			goto err_syncs;
> > > diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c
> > > index 59e8d1ed34f7..849e463c4ed8 100644
> > > --- a/drivers/gpu/drm/xe/xe_exec_queue.c
> > > +++ b/drivers/gpu/drm/xe/xe_exec_queue.c
> > > @@ -33,7 +33,8 @@ enum xe_exec_queue_sched_prop {
> > >  static struct xe_exec_queue *__xe_exec_queue_create(struct xe_device *xe,
> > >  						    struct xe_vm *vm,
> > >  						    u32 logical_mask,
> > > -						    u16 width, struct xe_hw_engine *hwe,
> > > +						    u16 num_bb_per_exec,
> > > +						    struct xe_hw_engine *hwe,
> > >  						    u32 flags)
> > >  {
> > >  	struct xe_exec_queue *q;
> > > @@ -44,7 +45,7 @@ static struct xe_exec_queue *__xe_exec_queue_create(struct xe_device *xe,
> > >  	/* only kernel queues can be permanent */
> > >  	XE_WARN_ON((flags & EXEC_QUEUE_FLAG_PERMANENT) && !(flags & EXEC_QUEUE_FLAG_KERNEL));
> > >  
> > > -	q = kzalloc(sizeof(*q) + sizeof(struct xe_lrc) * width, GFP_KERNEL);
> > > +	q = kzalloc(sizeof(*q) + sizeof(struct xe_lrc) * num_bb_per_exec, GFP_KERNEL);
> > >  	if (!q)
> > >  		return ERR_PTR(-ENOMEM);
> > >  
> > > @@ -55,7 +56,7 @@ static struct xe_exec_queue *__xe_exec_queue_create(struct xe_device *xe,
> > >  	if (vm)
> > >  		q->vm = xe_vm_get(vm);
> > >  	q->class = hwe->class;
> > > -	q->width = width;
> > > +	q->num_bb_per_exec = num_bb_per_exec;
> > >  	q->logical_mask = logical_mask;
> > >  	q->fence_irq = &gt->fence_irq[hwe->class];
> > >  	q->ring_ops = gt->ring_ops[hwe->class];
> > > @@ -77,7 +78,7 @@ static struct xe_exec_queue *__xe_exec_queue_create(struct xe_device *xe,
> > >  		q->bind.fence_seqno = XE_FENCE_INITIAL_SEQNO;
> > >  	}
> > >  
> > > -	for (i = 0; i < width; ++i) {
> > > +	for (i = 0; i < num_bb_per_exec; ++i) {
> > >  		err = xe_lrc_init(q->lrc + i, hwe, q, vm, SZ_16K);
> > >  		if (err)
> > >  			goto err_lrc;
> > > @@ -108,7 +109,7 @@ static struct xe_exec_queue *__xe_exec_queue_create(struct xe_device *xe,
> > >  }
> > >  
> > >  struct xe_exec_queue *xe_exec_queue_create(struct xe_device *xe, struct xe_vm *vm,
> > > -					   u32 logical_mask, u16 width,
> > > +					   u32 logical_mask, u16 num_bb_per_exec,
> > >  					   struct xe_hw_engine *hwe, u32 flags)
> > >  {
> > >  	struct xe_exec_queue *q;
> > > @@ -119,7 +120,7 @@ struct xe_exec_queue *xe_exec_queue_create(struct xe_device *xe, struct xe_vm *v
> > >  		if (err)
> > >  			return ERR_PTR(err);
> > >  	}
> > > -	q = __xe_exec_queue_create(xe, vm, logical_mask, width, hwe, flags);
> > > +	q = __xe_exec_queue_create(xe, vm, logical_mask, num_bb_per_exec, hwe, flags);
> > >  	if (vm)
> > >  		xe_vm_unlock(vm);
> > >  
> > > @@ -170,7 +171,7 @@ void xe_exec_queue_fini(struct xe_exec_queue *q)
> > >  {
> > >  	int i;
> > >  
> > > -	for (i = 0; i < q->width; ++i)
> > > +	for (i = 0; i < q->num_bb_per_exec; ++i)
> > >  		xe_lrc_finish(q->lrc + i);
> > >  	if (q->vm)
> > >  		xe_vm_put(q->vm);
> > > @@ -512,15 +513,15 @@ find_hw_engine(struct xe_device *xe,
> > >  
> > >  static u32 bind_exec_queue_logical_mask(struct xe_device *xe, struct xe_gt *gt,
> > >  					struct drm_xe_engine_class_instance *eci,
> > > -					u16 width, u16 num_placements)
> > > +					u16 num_bb_per_exec, u16 num_dispositions)
> > >  {
> > >  	struct xe_hw_engine *hwe;
> > >  	enum xe_hw_engine_id id;
> > >  	u32 logical_mask = 0;
> > >  
> > > -	if (XE_IOCTL_DBG(xe, width != 1))
> > > +	if (XE_IOCTL_DBG(xe, num_bb_per_exec != 1))
> > >  		return 0;
> > > -	if (XE_IOCTL_DBG(xe, num_placements != 1))
> > > +	if (XE_IOCTL_DBG(xe, num_dispositions != 1))
> > >  		return 0;
> > >  	if (XE_IOCTL_DBG(xe, eci[0].engine_instance != 0))
> > >  		return 0;
> > > @@ -541,9 +542,9 @@ static u32 bind_exec_queue_logical_mask(struct xe_device *xe, struct xe_gt *gt,
> > >  
> > >  static u32 calc_validate_logical_mask(struct xe_device *xe, struct xe_gt *gt,
> > >  				      struct drm_xe_engine_class_instance *eci,
> > > -				      u16 width, u16 num_placements)
> > > +				      u16 num_bb_per_exec, u16 num_dispositions)
> > >  {
> > > -	int len = width * num_placements;
> > > +	int len = num_bb_per_exec * num_dispositions;
> > >  	int i, j, n;
> > >  	u16 class;
> > >  	u16 gt_id;
> > > @@ -553,13 +554,13 @@ static u32 calc_validate_logical_mask(struct xe_device *xe, struct xe_gt *gt,
> > >  			 len > 1))
> > >  		return 0;
> > >  
> > > -	for (i = 0; i < width; ++i) {
> > > +	for (i = 0; i < num_bb_per_exec; ++i) {
> > >  		u32 current_mask = 0;
> > >  
> > > -		for (j = 0; j < num_placements; ++j) {
> > > +		for (j = 0; j < num_dispositions; ++j) {
> > >  			struct xe_hw_engine *hwe;
> > >  
> > > -			n = j * width + i;
> > > +			n = j * num_bb_per_exec + i;
> > >  
> > >  			hwe = find_hw_engine(xe, eci[n]);
> > >  			if (XE_IOCTL_DBG(xe, !hwe))
> > > @@ -575,7 +576,7 @@ static u32 calc_validate_logical_mask(struct xe_device *xe, struct xe_gt *gt,
> > >  			class = eci[n].engine_class;
> > >  			gt_id = eci[n].gt_id;
> > >  
> > > -			if (width == 1 || !i)
> > > +			if (num_bb_per_exec == 1 || !i)
> > >  				return_mask |= BIT(eci[n].engine_instance);
> > >  			current_mask |= BIT(eci[n].engine_instance);
> > >  		}
> > > @@ -612,7 +613,7 @@ int xe_exec_queue_create_ioctl(struct drm_device *dev, void *data,
> > >  	    XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1]))
> > >  		return -EINVAL;
> > >  
> > > -	len = args->width * args->num_placements;
> > > +	len = args->num_bb_per_exec * args->num_dispositions;
> > >  	if (XE_IOCTL_DBG(xe, !len || len > XE_HW_ENGINE_MAX_INSTANCE))
> > >  		return -EINVAL;
> > >  
> > > @@ -637,8 +638,8 @@ int xe_exec_queue_create_ioctl(struct drm_device *dev, void *data,
> > >  
> > >  			eci[0].gt_id = gt->info.id;
> > >  			logical_mask = bind_exec_queue_logical_mask(xe, gt, eci,
> > > -								    args->width,
> > > -								    args->num_placements);
> > > +								    args->num_bb_per_exec,
> > > +								    args->num_dispositions);
> > >  			if (XE_IOCTL_DBG(xe, !logical_mask))
> > >  				return -EINVAL;
> > >  
> > > @@ -651,7 +652,7 @@ int xe_exec_queue_create_ioctl(struct drm_device *dev, void *data,
> > >  
> > >  			migrate_vm = xe_migrate_get_vm(gt_to_tile(gt)->migrate);
> > >  			new = xe_exec_queue_create(xe, migrate_vm, logical_mask,
> > > -						   args->width, hwe,
> > > +						   args->num_bb_per_exec, hwe,
> > >  						   EXEC_QUEUE_FLAG_PERSISTENT |
> > >  						   EXEC_QUEUE_FLAG_VM |
> > >  						   (sync ? 0 :
> > > @@ -678,8 +679,8 @@ int xe_exec_queue_create_ioctl(struct drm_device *dev, void *data,
> > >  	} else {
> > >  		gt = xe_device_get_gt(xe, eci[0].gt_id);
> > >  		logical_mask = calc_validate_logical_mask(xe, gt, eci,
> > > -							  args->width,
> > > -							  args->num_placements);
> > > +							  args->num_bb_per_exec,
> > > +							  args->num_dispositions);
> > >  		if (XE_IOCTL_DBG(xe, !logical_mask))
> > >  			return -EINVAL;
> > >  
> > > @@ -704,7 +705,7 @@ int xe_exec_queue_create_ioctl(struct drm_device *dev, void *data,
> > >  		}
> > >  
> > >  		q = xe_exec_queue_create(xe, vm, logical_mask,
> > > -					 args->width, hwe,
> > > +					 args->num_bb_per_exec, hwe,
> > >  					 xe_vm_no_dma_fences(vm) ? 0 :
> > >  					 EXEC_QUEUE_FLAG_PERSISTENT);
> > >  		up_read(&vm->lock);
> > > @@ -827,7 +828,7 @@ bool xe_exec_queue_is_idle(struct xe_exec_queue *q)
> > >  	if (xe_exec_queue_is_parallel(q)) {
> > >  		int i;
> > >  
> > > -		for (i = 0; i < q->width; ++i) {
> > > +		for (i = 0; i < q->num_bb_per_exec; ++i) {
> > >  			if (xe_lrc_seqno(&q->lrc[i]) !=
> > >  			    q->lrc[i].fence_ctx.next_seqno - 1)
> > >  				return false;
> > > diff --git a/drivers/gpu/drm/xe/xe_exec_queue.h b/drivers/gpu/drm/xe/xe_exec_queue.h
> > > index 59a54bfb9a8c..6782f3ce9faf 100644
> > > --- a/drivers/gpu/drm/xe/xe_exec_queue.h
> > > +++ b/drivers/gpu/drm/xe/xe_exec_queue.h
> > > @@ -15,7 +15,7 @@ struct xe_device;
> > >  struct xe_file;
> > >  
> > >  struct xe_exec_queue *xe_exec_queue_create(struct xe_device *xe, struct xe_vm *vm,
> > > -					   u32 logical_mask, u16 width,
> > > +					   u32 logical_mask, u16 num_bb_per_exec,
> > >  					   struct xe_hw_engine *hw_engine, u32 flags);
> > >  struct xe_exec_queue *xe_exec_queue_create_class(struct xe_device *xe, struct xe_gt *gt,
> > >  						 struct xe_vm *vm,
> > > @@ -40,7 +40,7 @@ static inline void xe_exec_queue_put(struct xe_exec_queue *q)
> > >  
> > >  static inline bool xe_exec_queue_is_parallel(struct xe_exec_queue *q)
> > >  {
> > > -	return q->width > 1;
> > > +	return q->num_bb_per_exec > 1;
> > >  }
> > >  
> > >  bool xe_exec_queue_is_lr(struct xe_exec_queue *q);
> > > diff --git a/drivers/gpu/drm/xe/xe_exec_queue_types.h b/drivers/gpu/drm/xe/xe_exec_queue_types.h
> > > index ecd761177567..eb924a3e5d98 100644
> > > --- a/drivers/gpu/drm/xe/xe_exec_queue_types.h
> > > +++ b/drivers/gpu/drm/xe/xe_exec_queue_types.h
> > > @@ -47,8 +47,8 @@ struct xe_exec_queue {
> > >  	u32 logical_mask;
> > >  	/** @name: name of this exec queue */
> > >  	char name[MAX_FENCE_NAME_LEN];
> > > -	/** @width: width (number BB submitted per exec) of this exec queue */
> > > -	u16 width;
> > > +	/** @num_bb_per_exec: the width of this exec queue */
> > > +	u16 num_bb_per_exec;
> > >  	/** @fence_irq: fence IRQ used to signal job completion */
> > >  	struct xe_hw_fence_irq *fence_irq;
> > >  
> > > diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c
> > > index 870dc5c532fa..b5a41a772445 100644
> > > --- a/drivers/gpu/drm/xe/xe_guc_submit.c
> > > +++ b/drivers/gpu/drm/xe/xe_guc_submit.c
> > > @@ -259,7 +259,7 @@ static void __release_guc_id(struct xe_guc *guc, struct xe_exec_queue *q, u32 xa
> > >  	if (xe_exec_queue_is_parallel(q))
> > >  		bitmap_release_region(guc->submission_state.guc_ids_bitmap,
> > >  				      q->guc->id - GUC_ID_START_MLRC,
> > > -				      order_base_2(q->width));
> > > +				      order_base_2(q->num_bb_per_exec));
> > >  	else
> > >  		ida_simple_remove(&guc->submission_state.guc_ids, q->guc->id);
> > >  }
> > > @@ -283,7 +283,7 @@ static int alloc_guc_id(struct xe_guc *guc, struct xe_exec_queue *q)
> > >  		void *bitmap = guc->submission_state.guc_ids_bitmap;
> > >  
> > >  		ret = bitmap_find_free_region(bitmap, GUC_ID_NUMBER_MLRC,
> > > -					      order_base_2(q->width));
> > > +					      order_base_2(q->num_bb_per_exec));
> > >  	} else {
> > >  		ret = ida_simple_get(&guc->submission_state.guc_ids, 0,
> > >  				     GUC_ID_NUMBER_SLRC, GFP_NOWAIT);
> > > @@ -295,7 +295,7 @@ static int alloc_guc_id(struct xe_guc *guc, struct xe_exec_queue *q)
> > >  	if (xe_exec_queue_is_parallel(q))
> > >  		q->guc->id += GUC_ID_START_MLRC;
> > >  
> > > -	for (i = 0; i < q->width; ++i) {
> > > +	for (i = 0; i < q->num_bb_per_exec; ++i) {
> > >  		ptr = xa_store(&guc->submission_state.exec_queue_lookup,
> > >  			       q->guc->id + i, q, GFP_NOWAIT);
> > >  		if (IS_ERR(ptr)) {
> > > @@ -315,7 +315,7 @@ static int alloc_guc_id(struct xe_guc *guc, struct xe_exec_queue *q)
> > >  static void release_guc_id(struct xe_guc *guc, struct xe_exec_queue *q)
> > >  {
> > >  	mutex_lock(&guc->submission_state.lock);
> > > -	__release_guc_id(guc, q, q->width);
> > > +	__release_guc_id(guc, q, q->num_bb_per_exec);
> > >  	mutex_unlock(&guc->submission_state.lock);
> > >  }
> > >  
> > > @@ -426,11 +426,11 @@ static void __register_mlrc_engine(struct xe_guc *guc,
> > >  	action[len++] = info->wq_base_lo;
> > >  	action[len++] = info->wq_base_hi;
> > >  	action[len++] = info->wq_size;
> > > -	action[len++] = q->width;
> > > +	action[len++] = q->num_bb_per_exec;
> > >  	action[len++] = info->hwlrca_lo;
> > >  	action[len++] = info->hwlrca_hi;
> > >  
> > > -	for (i = 1; i < q->width; ++i) {
> > > +	for (i = 1; i < q->num_bb_per_exec; ++i) {
> > >  		struct xe_lrc *lrc = q->lrc + i;
> > >  
> > >  		action[len++] = lower_32_bits(xe_lrc_descriptor(lrc));
> > > @@ -578,7 +578,7 @@ static void wq_item_append(struct xe_exec_queue *q)
> > >  	struct iosys_map map = xe_lrc_parallel_map(q->lrc);
> > >  #define WQ_HEADER_SIZE	4	/* Includes 1 LRC address too */
> > >  	u32 wqi[XE_HW_ENGINE_MAX_INSTANCE + (WQ_HEADER_SIZE - 1)];
> > > -	u32 wqi_size = (q->width + (WQ_HEADER_SIZE - 1)) * sizeof(u32);
> > > +	u32 wqi_size = (q->num_bb_per_exec + (WQ_HEADER_SIZE - 1)) * sizeof(u32);
> > >  	u32 len_dw = (wqi_size / sizeof(u32)) - 1;
> > >  	int i = 0, j;
> > >  
> > > @@ -595,7 +595,7 @@ static void wq_item_append(struct xe_exec_queue *q)
> > >  	wqi[i++] = FIELD_PREP(WQ_GUC_ID_MASK, q->guc->id) |
> > >  		FIELD_PREP(WQ_RING_TAIL_MASK, q->lrc->ring.tail / sizeof(u64));
> > >  	wqi[i++] = 0;
> > > -	for (j = 1; j < q->width; ++j) {
> > > +	for (j = 1; j < q->num_bb_per_exec; ++j) {
> > >  		struct xe_lrc *lrc = q->lrc + j;
> > >  
> > >  		wqi[i++] = lrc->ring.tail / sizeof(u64);
> > > @@ -766,17 +766,17 @@ static void simple_error_capture(struct xe_exec_queue *q)
> > >  	struct xe_hw_engine *hwe;
> > >  	enum xe_hw_engine_id id;
> > >  	u32 adj_logical_mask = q->logical_mask;
> > > -	u32 width_mask = (0x1 << q->width) - 1;
> > > +	u32 width_mask = (0x1 << q->num_bb_per_exec) - 1;
> > >  	int i;
> > >  	bool cookie;
> > >  
> > >  	if (q->vm && !q->vm->error_capture.capture_once) {
> > >  		q->vm->error_capture.capture_once = true;
> > >  		cookie = dma_fence_begin_signalling();
> > > -		for (i = 0; q->width > 1 && i < XE_HW_ENGINE_MAX_INSTANCE;) {
> > > +		for (i = 0; q->num_bb_per_exec > 1 && i < XE_HW_ENGINE_MAX_INSTANCE;) {
> > >  			if (adj_logical_mask & BIT(i)) {
> > >  				adj_logical_mask |= width_mask << i;
> > > -				i += q->width;
> > > +				i += q->num_bb_per_exec;
> > >  			} else {
> > >  				++i;
> > >  			}
> > > @@ -1462,7 +1462,7 @@ static void guc_exec_queue_start(struct xe_exec_queue *q)
> > >  		int i;
> > >  
> > >  		trace_xe_exec_queue_resubmit(q);
> > > -		for (i = 0; i < q->width; ++i)
> > > +		for (i = 0; i < q->num_bb_per_exec; ++i)
> > >  			xe_lrc_set_ring_head(q->lrc + i, q->lrc[i].ring.tail);
> > >  		drm_sched_resubmit_jobs(sched);
> > >  	}
> > > @@ -1508,7 +1508,7 @@ g2h_exec_queue_lookup(struct xe_guc *guc, u32 guc_id)
> > >  	}
> > >  
> > >  	xe_assert(xe, guc_id >= q->guc->id);
> > > -	xe_assert(xe, guc_id < (q->guc->id + q->width));
> > > +	xe_assert(xe, guc_id < (q->guc->id + q->num_bb_per_exec));
> > >  
> > >  	return q;
> > >  }
> > > @@ -1768,20 +1768,20 @@ xe_guc_exec_queue_snapshot_capture(struct xe_exec_queue *q)
> > >  	memcpy(&snapshot->name, &q->name, sizeof(snapshot->name));
> > >  	snapshot->class = q->class;
> > >  	snapshot->logical_mask = q->logical_mask;
> > > -	snapshot->width = q->width;
> > > +	snapshot->width = q->num_bb_per_exec;
> > >  	snapshot->refcount = kref_read(&q->refcount);
> > >  	snapshot->sched_timeout = sched->timeout;
> > >  	snapshot->sched_props.timeslice_us = q->sched_props.timeslice_us;
> > >  	snapshot->sched_props.preempt_timeout_us =
> > >  		q->sched_props.preempt_timeout_us;
> > >  
> > > -	snapshot->lrc = kmalloc_array(q->width, sizeof(struct lrc_snapshot),
> > > +	snapshot->lrc = kmalloc_array(q->num_bb_per_exec, sizeof(struct lrc_snapshot),
> > >  				      GFP_ATOMIC);
> > >  
> > >  	if (!snapshot->lrc) {
> > >  		drm_err(&xe->drm, "Skipping GuC Engine LRC snapshot.\n");
> > >  	} else {
> > > -		for (i = 0; i < q->width; ++i) {
> > > +		for (i = 0; i < q->num_bb_per_exec; ++i) {
> > >  			struct xe_lrc *lrc = q->lrc + i;
> > >  
> > >  			snapshot->lrc[i].context_desc =
> > > diff --git a/drivers/gpu/drm/xe/xe_ring_ops.c b/drivers/gpu/drm/xe/xe_ring_ops.c
> > > index 59e0aa2d6a4c..d3d671784e8e 100644
> > > --- a/drivers/gpu/drm/xe/xe_ring_ops.c
> > > +++ b/drivers/gpu/drm/xe/xe_ring_ops.c
> > > @@ -383,7 +383,7 @@ static void emit_job_gen12_gsc(struct xe_sched_job *job)
> > >  {
> > >  	struct xe_gt *gt = job->q->gt;
> > >  
> > > -	xe_gt_assert(gt, job->q->width <= 1); /* no parallel submission for GSCCS */
> > > +	xe_gt_assert(gt, job->q->num_bb_per_exec <= 1); /* no parallel submission for GSCCS */
> > >  
> > >  	__emit_job_gen12_simple(job, job->q->lrc,
> > >  				job->batch_addr[0],
> > > @@ -400,7 +400,7 @@ static void emit_job_gen12_copy(struct xe_sched_job *job)
> > >  		return;
> > >  	}
> > >  
> > > -	for (i = 0; i < job->q->width; ++i)
> > > +	for (i = 0; i < job->q->num_bb_per_exec; ++i)
> > >  		__emit_job_gen12_simple(job, job->q->lrc + i,
> > >  				        job->batch_addr[i],
> > >  				        xe_sched_job_seqno(job));
> > > @@ -411,7 +411,7 @@ static void emit_job_gen12_video(struct xe_sched_job *job)
> > >  	int i;
> > >  
> > >  	/* FIXME: Not doing parallel handshake for now */
> > > -	for (i = 0; i < job->q->width; ++i)
> > > +	for (i = 0; i < job->q->num_bb_per_exec; ++i)
> > >  		__emit_job_gen12_video(job, job->q->lrc + i,
> > >  				       job->batch_addr[i],
> > >  				       xe_sched_job_seqno(job));
> > > @@ -421,7 +421,7 @@ static void emit_job_gen12_render_compute(struct xe_sched_job *job)
> > >  {
> > >  	int i;
> > >  
> > > -	for (i = 0; i < job->q->width; ++i)
> > > +	for (i = 0; i < job->q->num_bb_per_exec; ++i)
> > >  		__emit_job_gen12_render_compute(job, job->q->lrc + i,
> > >  						job->batch_addr[i],
> > >  						xe_sched_job_seqno(job));
> > > diff --git a/drivers/gpu/drm/xe/xe_sched_job.c b/drivers/gpu/drm/xe/xe_sched_job.c
> > > index adbd82f8744e..1884b6b6b398 100644
> > > --- a/drivers/gpu/drm/xe/xe_sched_job.c
> > > +++ b/drivers/gpu/drm/xe/xe_sched_job.c
> > > @@ -117,13 +117,13 @@ struct xe_sched_job *xe_sched_job_create(struct xe_exec_queue *q,
> > >  	} else {
> > >  		struct dma_fence_array *cf;
> > >  
> > > -		fences = kmalloc_array(q->width, sizeof(*fences), GFP_KERNEL);
> > > +		fences = kmalloc_array(q->num_bb_per_exec, sizeof(*fences), GFP_KERNEL);
> > >  		if (!fences) {
> > >  			err = -ENOMEM;
> > >  			goto err_sched_job;
> > >  		}
> > >  
> > > -		for (j = 0; j < q->width; ++j) {
> > > +		for (j = 0; j < q->num_bb_per_exec; ++j) {
> > >  			fences[j] = xe_lrc_create_seqno_fence(q->lrc + j);
> > >  			if (IS_ERR(fences[j])) {
> > >  				err = PTR_ERR(fences[j]);
> > > @@ -131,7 +131,7 @@ struct xe_sched_job *xe_sched_job_create(struct xe_exec_queue *q,
> > >  			}
> > >  		}
> > >  
> > > -		cf = dma_fence_array_create(q->width, fences,
> > > +		cf = dma_fence_array_create(q->num_bb_per_exec, fences,
> > >  					    q->parallel.composite_fence_ctx,
> > >  					    q->parallel.composite_fence_seqno++,
> > >  					    false);
> > > @@ -142,13 +142,13 @@ struct xe_sched_job *xe_sched_job_create(struct xe_exec_queue *q,
> > >  		}
> > >  
> > >  		/* Sanity check */
> > > -		for (j = 0; j < q->width; ++j)
> > > +		for (j = 0; j < q->num_bb_per_exec; ++j)
> > >  			xe_assert(job_to_xe(job), cf->base.seqno == fences[j]->seqno);
> > >  
> > >  		job->fence = &cf->base;
> > >  	}
> > >  
> > > -	width = q->width;
> > > +	width = q->num_bb_per_exec;
> > >  	if (is_migration)
> > >  		width = 2;
> > >  
> > > diff --git a/drivers/gpu/drm/xe/xe_trace.h b/drivers/gpu/drm/xe/xe_trace.h
> > > index d55dd1521df3..dcf28aaeb78a 100644
> > > --- a/drivers/gpu/drm/xe/xe_trace.h
> > > +++ b/drivers/gpu/drm/xe/xe_trace.h
> > > @@ -112,7 +112,7 @@ DECLARE_EVENT_CLASS(xe_exec_queue,
> > >  			     __field(enum xe_engine_class, class)
> > >  			     __field(u32, logical_mask)
> > >  			     __field(u8, gt_id)
> > > -			     __field(u16, width)
> > > +			     __field(u16, num_bb_per_exec)
> > >  			     __field(u16, guc_id)
> > >  			     __field(u32, guc_state)
> > >  			     __field(u32, flags)
> > > @@ -122,15 +122,15 @@ DECLARE_EVENT_CLASS(xe_exec_queue,
> > >  			   __entry->class = q->class;
> > >  			   __entry->logical_mask = q->logical_mask;
> > >  			   __entry->gt_id = q->gt->info.id;
> > > -			   __entry->width = q->width;
> > > +			   __entry->num_bb_per_exec = q->num_bb_per_exec;
> > >  			   __entry->guc_id = q->guc->id;
> > >  			   __entry->guc_state = atomic_read(&q->guc->state);
> > >  			   __entry->flags = q->flags;
> > >  			   ),
> > >  
> > > -		    TP_printk("%d:0x%x, gt=%d, width=%d, guc_id=%d, guc_state=0x%x, flags=0x%x",
> > > +		    TP_printk("%d:0x%x, gt=%d, num_bb_per_exec=%d, guc_id=%d, guc_state=0x%x, flags=0x%x",
> > >  			      __entry->class, __entry->logical_mask,
> > > -			      __entry->gt_id, __entry->width, __entry->guc_id,
> > > +			      __entry->gt_id, __entry->num_bb_per_exec, __entry->guc_id,
> > >  			      __entry->guc_state, __entry->flags)
> > >  );
> > >  
> > > diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h
> > > index 2d0fb4386a69..a6c70b8697c7 100644
> > > --- a/include/uapi/drm/xe_drm.h
> > > +++ b/include/uapi/drm/xe_drm.h
> > > @@ -1013,11 +1013,17 @@ struct drm_xe_exec_queue_create {
> > >  	/** @extensions: Pointer to the first extension struct, if any */
> > >  	__u64 extensions;
> > >  
> > > -	/** @width: submission width (number BB per exec) for this exec queue */
> > > -	__u16 width;
> > > +	/**
> > > +	 * @num_bb_per_exec: Indicates a submission width for this exec queue,
> > > +	 * for how many batch buffers can be submitted in parallel.
> > 
> > 'can' sounds like 'up to' in here, would change that to 'will'.
> > 
> > > +	 */
> > > +	__u16 num_bb_per_exec;
> > >  
> > > -	/** @num_placements: number of valid placements for this exec queue */
> > > -	__u16 num_placements;
> > > +	/**
> > > +	 * @num_dispositions: Indicates how the batch buffers will be
> > > +	 * distributed to the hardware engines listed on @instance.
> > > +	 */
> > > +	__u16 num_dispositions;
> > >  
> > >  	/** @vm_id: VM to use for this exec queue */
> > >  	__u32 vm_id;
> > > @@ -1032,8 +1038,8 @@ struct drm_xe_exec_queue_create {
> > >  	 * @instances: user pointer to a 2-d array of struct
> > >  	 * drm_xe_engine_class_instance
> > >  	 *
> > > -	 * length = width (i) * num_placements (j)
> > > -	 * index = j + i * width
> > > +	 * length = num_bb_per_exec (i) * num_dispositions (j)
> > > +	 * index = j + i * num_bb_per_exec
> > >  	 */
> > >  	__u64 instances;
> > >  
> > > @@ -1143,7 +1149,7 @@ struct drm_xe_exec {
> > >  
> > >  	/**
> > >  	 * @num_batch_buffer: number of batch buffer in this exec, must match
> > > -	 * the width of the engine
> > > +	 * the @num_bb_per_exec of the struct drm_xe_exec_queue_create
> > >  	 */
> > >  	__u16 num_batch_buffer;
> > >  
> >