[Intel-xe] [PATCH v2 38/50] drm/xe/uapi: Rename couple exec_queue items
Souza, Jose
jose.souza at intel.com
Thu Nov 9 17:14:17 UTC 2023
On Fri, 2023-11-03 at 14:34 +0000, Francois Dugast wrote:
> From: Rodrigo Vivi <rodrigo.vivi at intel.com>
>
> 'Placement' is no used in many terms around the memory_region selection
> where the BO or the page table will live. Also, the job itself deserves
> a word of more action since it is dispatched to the engine.
num_dispositions is a bad name, placement is better in my opinion.
it says exactly what is does, in what hw engines the batch buffers can be placed.
>
> 'width' is so generic and in graphics world can mean many other different
> things. Let's be more specific here on the intent of that.
This one sounds good.
>
> Signed-off-by: Rodrigo Vivi <rodrigo.vivi at intel.com>
> ---
> drivers/gpu/drm/xe/xe_devcoredump.c | 8 ++--
> drivers/gpu/drm/xe/xe_exec.c | 4 +-
> drivers/gpu/drm/xe/xe_exec_queue.c | 49 ++++++++++++------------
> drivers/gpu/drm/xe/xe_exec_queue.h | 4 +-
> drivers/gpu/drm/xe/xe_exec_queue_types.h | 4 +-
> drivers/gpu/drm/xe/xe_guc_submit.c | 32 ++++++++--------
> drivers/gpu/drm/xe/xe_ring_ops.c | 8 ++--
> drivers/gpu/drm/xe/xe_sched_job.c | 10 ++---
> drivers/gpu/drm/xe/xe_trace.h | 8 ++--
> include/uapi/drm/xe_drm.h | 20 ++++++----
> 10 files changed, 77 insertions(+), 70 deletions(-)
>
> diff --git a/drivers/gpu/drm/xe/xe_devcoredump.c b/drivers/gpu/drm/xe/xe_devcoredump.c
> index 68abc0b195be..b4e8de4903b9 100644
> --- a/drivers/gpu/drm/xe/xe_devcoredump.c
> +++ b/drivers/gpu/drm/xe/xe_devcoredump.c
> @@ -130,7 +130,7 @@ static void devcoredump_snapshot(struct xe_devcoredump *coredump,
> struct xe_hw_engine *hwe;
> enum xe_hw_engine_id id;
> u32 adj_logical_mask = q->logical_mask;
> - u32 width_mask = (0x1 << q->width) - 1;
> + u32 num_bb_per_exec_mask = (0x1 << q->num_bb_per_exec) - 1;
> int i;
> bool cookie;
>
> @@ -138,10 +138,10 @@ static void devcoredump_snapshot(struct xe_devcoredump *coredump,
> ss->boot_time = ktime_get_boottime();
>
> cookie = dma_fence_begin_signalling();
> - for (i = 0; q->width > 1 && i < XE_HW_ENGINE_MAX_INSTANCE;) {
> + for (i = 0; q->num_bb_per_exec > 1 && i < XE_HW_ENGINE_MAX_INSTANCE;) {
> if (adj_logical_mask & BIT(i)) {
> - adj_logical_mask |= width_mask << i;
> - i += q->width;
> + adj_logical_mask |= num_bb_per_exec_mask << i;
> + i += q->num_bb_per_exec;
> } else {
> ++i;
> }
> diff --git a/drivers/gpu/drm/xe/xe_exec.c b/drivers/gpu/drm/xe/xe_exec.c
> index 28e84a0bbeb0..ca922635db89 100644
> --- a/drivers/gpu/drm/xe/xe_exec.c
> +++ b/drivers/gpu/drm/xe/xe_exec.c
> @@ -161,7 +161,7 @@ int xe_exec_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
> if (XE_IOCTL_DBG(xe, q->flags & EXEC_QUEUE_FLAG_VM))
> return -EINVAL;
>
> - if (XE_IOCTL_DBG(xe, q->width != args->num_batch_buffer))
> + if (XE_IOCTL_DBG(xe, q->num_bb_per_exec != args->num_batch_buffer))
> return -EINVAL;
>
> if (XE_IOCTL_DBG(xe, q->flags & EXEC_QUEUE_FLAG_BANNED)) {
> @@ -189,7 +189,7 @@ int xe_exec_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
>
> if (xe_exec_queue_is_parallel(q)) {
> err = __copy_from_user(addresses, addresses_user, sizeof(u64) *
> - q->width);
> + q->num_bb_per_exec);
> if (err) {
> err = -EFAULT;
> goto err_syncs;
> diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c
> index 59e8d1ed34f7..849e463c4ed8 100644
> --- a/drivers/gpu/drm/xe/xe_exec_queue.c
> +++ b/drivers/gpu/drm/xe/xe_exec_queue.c
> @@ -33,7 +33,8 @@ enum xe_exec_queue_sched_prop {
> static struct xe_exec_queue *__xe_exec_queue_create(struct xe_device *xe,
> struct xe_vm *vm,
> u32 logical_mask,
> - u16 width, struct xe_hw_engine *hwe,
> + u16 num_bb_per_exec,
> + struct xe_hw_engine *hwe,
> u32 flags)
> {
> struct xe_exec_queue *q;
> @@ -44,7 +45,7 @@ static struct xe_exec_queue *__xe_exec_queue_create(struct xe_device *xe,
> /* only kernel queues can be permanent */
> XE_WARN_ON((flags & EXEC_QUEUE_FLAG_PERMANENT) && !(flags & EXEC_QUEUE_FLAG_KERNEL));
>
> - q = kzalloc(sizeof(*q) + sizeof(struct xe_lrc) * width, GFP_KERNEL);
> + q = kzalloc(sizeof(*q) + sizeof(struct xe_lrc) * num_bb_per_exec, GFP_KERNEL);
> if (!q)
> return ERR_PTR(-ENOMEM);
>
> @@ -55,7 +56,7 @@ static struct xe_exec_queue *__xe_exec_queue_create(struct xe_device *xe,
> if (vm)
> q->vm = xe_vm_get(vm);
> q->class = hwe->class;
> - q->width = width;
> + q->num_bb_per_exec = num_bb_per_exec;
> q->logical_mask = logical_mask;
> q->fence_irq = >->fence_irq[hwe->class];
> q->ring_ops = gt->ring_ops[hwe->class];
> @@ -77,7 +78,7 @@ static struct xe_exec_queue *__xe_exec_queue_create(struct xe_device *xe,
> q->bind.fence_seqno = XE_FENCE_INITIAL_SEQNO;
> }
>
> - for (i = 0; i < width; ++i) {
> + for (i = 0; i < num_bb_per_exec; ++i) {
> err = xe_lrc_init(q->lrc + i, hwe, q, vm, SZ_16K);
> if (err)
> goto err_lrc;
> @@ -108,7 +109,7 @@ static struct xe_exec_queue *__xe_exec_queue_create(struct xe_device *xe,
> }
>
> struct xe_exec_queue *xe_exec_queue_create(struct xe_device *xe, struct xe_vm *vm,
> - u32 logical_mask, u16 width,
> + u32 logical_mask, u16 num_bb_per_exec,
> struct xe_hw_engine *hwe, u32 flags)
> {
> struct xe_exec_queue *q;
> @@ -119,7 +120,7 @@ struct xe_exec_queue *xe_exec_queue_create(struct xe_device *xe, struct xe_vm *v
> if (err)
> return ERR_PTR(err);
> }
> - q = __xe_exec_queue_create(xe, vm, logical_mask, width, hwe, flags);
> + q = __xe_exec_queue_create(xe, vm, logical_mask, num_bb_per_exec, hwe, flags);
> if (vm)
> xe_vm_unlock(vm);
>
> @@ -170,7 +171,7 @@ void xe_exec_queue_fini(struct xe_exec_queue *q)
> {
> int i;
>
> - for (i = 0; i < q->width; ++i)
> + for (i = 0; i < q->num_bb_per_exec; ++i)
> xe_lrc_finish(q->lrc + i);
> if (q->vm)
> xe_vm_put(q->vm);
> @@ -512,15 +513,15 @@ find_hw_engine(struct xe_device *xe,
>
> static u32 bind_exec_queue_logical_mask(struct xe_device *xe, struct xe_gt *gt,
> struct drm_xe_engine_class_instance *eci,
> - u16 width, u16 num_placements)
> + u16 num_bb_per_exec, u16 num_dispositions)
> {
> struct xe_hw_engine *hwe;
> enum xe_hw_engine_id id;
> u32 logical_mask = 0;
>
> - if (XE_IOCTL_DBG(xe, width != 1))
> + if (XE_IOCTL_DBG(xe, num_bb_per_exec != 1))
> return 0;
> - if (XE_IOCTL_DBG(xe, num_placements != 1))
> + if (XE_IOCTL_DBG(xe, num_dispositions != 1))
> return 0;
> if (XE_IOCTL_DBG(xe, eci[0].engine_instance != 0))
> return 0;
> @@ -541,9 +542,9 @@ static u32 bind_exec_queue_logical_mask(struct xe_device *xe, struct xe_gt *gt,
>
> static u32 calc_validate_logical_mask(struct xe_device *xe, struct xe_gt *gt,
> struct drm_xe_engine_class_instance *eci,
> - u16 width, u16 num_placements)
> + u16 num_bb_per_exec, u16 num_dispositions)
> {
> - int len = width * num_placements;
> + int len = num_bb_per_exec * num_dispositions;
> int i, j, n;
> u16 class;
> u16 gt_id;
> @@ -553,13 +554,13 @@ static u32 calc_validate_logical_mask(struct xe_device *xe, struct xe_gt *gt,
> len > 1))
> return 0;
>
> - for (i = 0; i < width; ++i) {
> + for (i = 0; i < num_bb_per_exec; ++i) {
> u32 current_mask = 0;
>
> - for (j = 0; j < num_placements; ++j) {
> + for (j = 0; j < num_dispositions; ++j) {
> struct xe_hw_engine *hwe;
>
> - n = j * width + i;
> + n = j * num_bb_per_exec + i;
>
> hwe = find_hw_engine(xe, eci[n]);
> if (XE_IOCTL_DBG(xe, !hwe))
> @@ -575,7 +576,7 @@ static u32 calc_validate_logical_mask(struct xe_device *xe, struct xe_gt *gt,
> class = eci[n].engine_class;
> gt_id = eci[n].gt_id;
>
> - if (width == 1 || !i)
> + if (num_bb_per_exec == 1 || !i)
> return_mask |= BIT(eci[n].engine_instance);
> current_mask |= BIT(eci[n].engine_instance);
> }
> @@ -612,7 +613,7 @@ int xe_exec_queue_create_ioctl(struct drm_device *dev, void *data,
> XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1]))
> return -EINVAL;
>
> - len = args->width * args->num_placements;
> + len = args->num_bb_per_exec * args->num_dispositions;
> if (XE_IOCTL_DBG(xe, !len || len > XE_HW_ENGINE_MAX_INSTANCE))
> return -EINVAL;
>
> @@ -637,8 +638,8 @@ int xe_exec_queue_create_ioctl(struct drm_device *dev, void *data,
>
> eci[0].gt_id = gt->info.id;
> logical_mask = bind_exec_queue_logical_mask(xe, gt, eci,
> - args->width,
> - args->num_placements);
> + args->num_bb_per_exec,
> + args->num_dispositions);
> if (XE_IOCTL_DBG(xe, !logical_mask))
> return -EINVAL;
>
> @@ -651,7 +652,7 @@ int xe_exec_queue_create_ioctl(struct drm_device *dev, void *data,
>
> migrate_vm = xe_migrate_get_vm(gt_to_tile(gt)->migrate);
> new = xe_exec_queue_create(xe, migrate_vm, logical_mask,
> - args->width, hwe,
> + args->num_bb_per_exec, hwe,
> EXEC_QUEUE_FLAG_PERSISTENT |
> EXEC_QUEUE_FLAG_VM |
> (sync ? 0 :
> @@ -678,8 +679,8 @@ int xe_exec_queue_create_ioctl(struct drm_device *dev, void *data,
> } else {
> gt = xe_device_get_gt(xe, eci[0].gt_id);
> logical_mask = calc_validate_logical_mask(xe, gt, eci,
> - args->width,
> - args->num_placements);
> + args->num_bb_per_exec,
> + args->num_dispositions);
> if (XE_IOCTL_DBG(xe, !logical_mask))
> return -EINVAL;
>
> @@ -704,7 +705,7 @@ int xe_exec_queue_create_ioctl(struct drm_device *dev, void *data,
> }
>
> q = xe_exec_queue_create(xe, vm, logical_mask,
> - args->width, hwe,
> + args->num_bb_per_exec, hwe,
> xe_vm_no_dma_fences(vm) ? 0 :
> EXEC_QUEUE_FLAG_PERSISTENT);
> up_read(&vm->lock);
> @@ -827,7 +828,7 @@ bool xe_exec_queue_is_idle(struct xe_exec_queue *q)
> if (xe_exec_queue_is_parallel(q)) {
> int i;
>
> - for (i = 0; i < q->width; ++i) {
> + for (i = 0; i < q->num_bb_per_exec; ++i) {
> if (xe_lrc_seqno(&q->lrc[i]) !=
> q->lrc[i].fence_ctx.next_seqno - 1)
> return false;
> diff --git a/drivers/gpu/drm/xe/xe_exec_queue.h b/drivers/gpu/drm/xe/xe_exec_queue.h
> index 59a54bfb9a8c..6782f3ce9faf 100644
> --- a/drivers/gpu/drm/xe/xe_exec_queue.h
> +++ b/drivers/gpu/drm/xe/xe_exec_queue.h
> @@ -15,7 +15,7 @@ struct xe_device;
> struct xe_file;
>
> struct xe_exec_queue *xe_exec_queue_create(struct xe_device *xe, struct xe_vm *vm,
> - u32 logical_mask, u16 width,
> + u32 logical_mask, u16 num_bb_per_exec,
> struct xe_hw_engine *hw_engine, u32 flags);
> struct xe_exec_queue *xe_exec_queue_create_class(struct xe_device *xe, struct xe_gt *gt,
> struct xe_vm *vm,
> @@ -40,7 +40,7 @@ static inline void xe_exec_queue_put(struct xe_exec_queue *q)
>
> static inline bool xe_exec_queue_is_parallel(struct xe_exec_queue *q)
> {
> - return q->width > 1;
> + return q->num_bb_per_exec > 1;
> }
>
> bool xe_exec_queue_is_lr(struct xe_exec_queue *q);
> diff --git a/drivers/gpu/drm/xe/xe_exec_queue_types.h b/drivers/gpu/drm/xe/xe_exec_queue_types.h
> index ecd761177567..eb924a3e5d98 100644
> --- a/drivers/gpu/drm/xe/xe_exec_queue_types.h
> +++ b/drivers/gpu/drm/xe/xe_exec_queue_types.h
> @@ -47,8 +47,8 @@ struct xe_exec_queue {
> u32 logical_mask;
> /** @name: name of this exec queue */
> char name[MAX_FENCE_NAME_LEN];
> - /** @width: width (number BB submitted per exec) of this exec queue */
> - u16 width;
> + /** @num_bb_per_exec: the width of this exec queue */
> + u16 num_bb_per_exec;
> /** @fence_irq: fence IRQ used to signal job completion */
> struct xe_hw_fence_irq *fence_irq;
>
> diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c
> index 870dc5c532fa..b5a41a772445 100644
> --- a/drivers/gpu/drm/xe/xe_guc_submit.c
> +++ b/drivers/gpu/drm/xe/xe_guc_submit.c
> @@ -259,7 +259,7 @@ static void __release_guc_id(struct xe_guc *guc, struct xe_exec_queue *q, u32 xa
> if (xe_exec_queue_is_parallel(q))
> bitmap_release_region(guc->submission_state.guc_ids_bitmap,
> q->guc->id - GUC_ID_START_MLRC,
> - order_base_2(q->width));
> + order_base_2(q->num_bb_per_exec));
> else
> ida_simple_remove(&guc->submission_state.guc_ids, q->guc->id);
> }
> @@ -283,7 +283,7 @@ static int alloc_guc_id(struct xe_guc *guc, struct xe_exec_queue *q)
> void *bitmap = guc->submission_state.guc_ids_bitmap;
>
> ret = bitmap_find_free_region(bitmap, GUC_ID_NUMBER_MLRC,
> - order_base_2(q->width));
> + order_base_2(q->num_bb_per_exec));
> } else {
> ret = ida_simple_get(&guc->submission_state.guc_ids, 0,
> GUC_ID_NUMBER_SLRC, GFP_NOWAIT);
> @@ -295,7 +295,7 @@ static int alloc_guc_id(struct xe_guc *guc, struct xe_exec_queue *q)
> if (xe_exec_queue_is_parallel(q))
> q->guc->id += GUC_ID_START_MLRC;
>
> - for (i = 0; i < q->width; ++i) {
> + for (i = 0; i < q->num_bb_per_exec; ++i) {
> ptr = xa_store(&guc->submission_state.exec_queue_lookup,
> q->guc->id + i, q, GFP_NOWAIT);
> if (IS_ERR(ptr)) {
> @@ -315,7 +315,7 @@ static int alloc_guc_id(struct xe_guc *guc, struct xe_exec_queue *q)
> static void release_guc_id(struct xe_guc *guc, struct xe_exec_queue *q)
> {
> mutex_lock(&guc->submission_state.lock);
> - __release_guc_id(guc, q, q->width);
> + __release_guc_id(guc, q, q->num_bb_per_exec);
> mutex_unlock(&guc->submission_state.lock);
> }
>
> @@ -426,11 +426,11 @@ static void __register_mlrc_engine(struct xe_guc *guc,
> action[len++] = info->wq_base_lo;
> action[len++] = info->wq_base_hi;
> action[len++] = info->wq_size;
> - action[len++] = q->width;
> + action[len++] = q->num_bb_per_exec;
> action[len++] = info->hwlrca_lo;
> action[len++] = info->hwlrca_hi;
>
> - for (i = 1; i < q->width; ++i) {
> + for (i = 1; i < q->num_bb_per_exec; ++i) {
> struct xe_lrc *lrc = q->lrc + i;
>
> action[len++] = lower_32_bits(xe_lrc_descriptor(lrc));
> @@ -578,7 +578,7 @@ static void wq_item_append(struct xe_exec_queue *q)
> struct iosys_map map = xe_lrc_parallel_map(q->lrc);
> #define WQ_HEADER_SIZE 4 /* Includes 1 LRC address too */
> u32 wqi[XE_HW_ENGINE_MAX_INSTANCE + (WQ_HEADER_SIZE - 1)];
> - u32 wqi_size = (q->width + (WQ_HEADER_SIZE - 1)) * sizeof(u32);
> + u32 wqi_size = (q->num_bb_per_exec + (WQ_HEADER_SIZE - 1)) * sizeof(u32);
> u32 len_dw = (wqi_size / sizeof(u32)) - 1;
> int i = 0, j;
>
> @@ -595,7 +595,7 @@ static void wq_item_append(struct xe_exec_queue *q)
> wqi[i++] = FIELD_PREP(WQ_GUC_ID_MASK, q->guc->id) |
> FIELD_PREP(WQ_RING_TAIL_MASK, q->lrc->ring.tail / sizeof(u64));
> wqi[i++] = 0;
> - for (j = 1; j < q->width; ++j) {
> + for (j = 1; j < q->num_bb_per_exec; ++j) {
> struct xe_lrc *lrc = q->lrc + j;
>
> wqi[i++] = lrc->ring.tail / sizeof(u64);
> @@ -766,17 +766,17 @@ static void simple_error_capture(struct xe_exec_queue *q)
> struct xe_hw_engine *hwe;
> enum xe_hw_engine_id id;
> u32 adj_logical_mask = q->logical_mask;
> - u32 width_mask = (0x1 << q->width) - 1;
> + u32 width_mask = (0x1 << q->num_bb_per_exec) - 1;
> int i;
> bool cookie;
>
> if (q->vm && !q->vm->error_capture.capture_once) {
> q->vm->error_capture.capture_once = true;
> cookie = dma_fence_begin_signalling();
> - for (i = 0; q->width > 1 && i < XE_HW_ENGINE_MAX_INSTANCE;) {
> + for (i = 0; q->num_bb_per_exec > 1 && i < XE_HW_ENGINE_MAX_INSTANCE;) {
> if (adj_logical_mask & BIT(i)) {
> adj_logical_mask |= width_mask << i;
> - i += q->width;
> + i += q->num_bb_per_exec;
> } else {
> ++i;
> }
> @@ -1462,7 +1462,7 @@ static void guc_exec_queue_start(struct xe_exec_queue *q)
> int i;
>
> trace_xe_exec_queue_resubmit(q);
> - for (i = 0; i < q->width; ++i)
> + for (i = 0; i < q->num_bb_per_exec; ++i)
> xe_lrc_set_ring_head(q->lrc + i, q->lrc[i].ring.tail);
> drm_sched_resubmit_jobs(sched);
> }
> @@ -1508,7 +1508,7 @@ g2h_exec_queue_lookup(struct xe_guc *guc, u32 guc_id)
> }
>
> xe_assert(xe, guc_id >= q->guc->id);
> - xe_assert(xe, guc_id < (q->guc->id + q->width));
> + xe_assert(xe, guc_id < (q->guc->id + q->num_bb_per_exec));
>
> return q;
> }
> @@ -1768,20 +1768,20 @@ xe_guc_exec_queue_snapshot_capture(struct xe_exec_queue *q)
> memcpy(&snapshot->name, &q->name, sizeof(snapshot->name));
> snapshot->class = q->class;
> snapshot->logical_mask = q->logical_mask;
> - snapshot->width = q->width;
> + snapshot->width = q->num_bb_per_exec;
> snapshot->refcount = kref_read(&q->refcount);
> snapshot->sched_timeout = sched->timeout;
> snapshot->sched_props.timeslice_us = q->sched_props.timeslice_us;
> snapshot->sched_props.preempt_timeout_us =
> q->sched_props.preempt_timeout_us;
>
> - snapshot->lrc = kmalloc_array(q->width, sizeof(struct lrc_snapshot),
> + snapshot->lrc = kmalloc_array(q->num_bb_per_exec, sizeof(struct lrc_snapshot),
> GFP_ATOMIC);
>
> if (!snapshot->lrc) {
> drm_err(&xe->drm, "Skipping GuC Engine LRC snapshot.\n");
> } else {
> - for (i = 0; i < q->width; ++i) {
> + for (i = 0; i < q->num_bb_per_exec; ++i) {
> struct xe_lrc *lrc = q->lrc + i;
>
> snapshot->lrc[i].context_desc =
> diff --git a/drivers/gpu/drm/xe/xe_ring_ops.c b/drivers/gpu/drm/xe/xe_ring_ops.c
> index 59e0aa2d6a4c..d3d671784e8e 100644
> --- a/drivers/gpu/drm/xe/xe_ring_ops.c
> +++ b/drivers/gpu/drm/xe/xe_ring_ops.c
> @@ -383,7 +383,7 @@ static void emit_job_gen12_gsc(struct xe_sched_job *job)
> {
> struct xe_gt *gt = job->q->gt;
>
> - xe_gt_assert(gt, job->q->width <= 1); /* no parallel submission for GSCCS */
> + xe_gt_assert(gt, job->q->num_bb_per_exec <= 1); /* no parallel submission for GSCCS */
>
> __emit_job_gen12_simple(job, job->q->lrc,
> job->batch_addr[0],
> @@ -400,7 +400,7 @@ static void emit_job_gen12_copy(struct xe_sched_job *job)
> return;
> }
>
> - for (i = 0; i < job->q->width; ++i)
> + for (i = 0; i < job->q->num_bb_per_exec; ++i)
> __emit_job_gen12_simple(job, job->q->lrc + i,
> job->batch_addr[i],
> xe_sched_job_seqno(job));
> @@ -411,7 +411,7 @@ static void emit_job_gen12_video(struct xe_sched_job *job)
> int i;
>
> /* FIXME: Not doing parallel handshake for now */
> - for (i = 0; i < job->q->width; ++i)
> + for (i = 0; i < job->q->num_bb_per_exec; ++i)
> __emit_job_gen12_video(job, job->q->lrc + i,
> job->batch_addr[i],
> xe_sched_job_seqno(job));
> @@ -421,7 +421,7 @@ static void emit_job_gen12_render_compute(struct xe_sched_job *job)
> {
> int i;
>
> - for (i = 0; i < job->q->width; ++i)
> + for (i = 0; i < job->q->num_bb_per_exec; ++i)
> __emit_job_gen12_render_compute(job, job->q->lrc + i,
> job->batch_addr[i],
> xe_sched_job_seqno(job));
> diff --git a/drivers/gpu/drm/xe/xe_sched_job.c b/drivers/gpu/drm/xe/xe_sched_job.c
> index adbd82f8744e..1884b6b6b398 100644
> --- a/drivers/gpu/drm/xe/xe_sched_job.c
> +++ b/drivers/gpu/drm/xe/xe_sched_job.c
> @@ -117,13 +117,13 @@ struct xe_sched_job *xe_sched_job_create(struct xe_exec_queue *q,
> } else {
> struct dma_fence_array *cf;
>
> - fences = kmalloc_array(q->width, sizeof(*fences), GFP_KERNEL);
> + fences = kmalloc_array(q->num_bb_per_exec, sizeof(*fences), GFP_KERNEL);
> if (!fences) {
> err = -ENOMEM;
> goto err_sched_job;
> }
>
> - for (j = 0; j < q->width; ++j) {
> + for (j = 0; j < q->num_bb_per_exec; ++j) {
> fences[j] = xe_lrc_create_seqno_fence(q->lrc + j);
> if (IS_ERR(fences[j])) {
> err = PTR_ERR(fences[j]);
> @@ -131,7 +131,7 @@ struct xe_sched_job *xe_sched_job_create(struct xe_exec_queue *q,
> }
> }
>
> - cf = dma_fence_array_create(q->width, fences,
> + cf = dma_fence_array_create(q->num_bb_per_exec, fences,
> q->parallel.composite_fence_ctx,
> q->parallel.composite_fence_seqno++,
> false);
> @@ -142,13 +142,13 @@ struct xe_sched_job *xe_sched_job_create(struct xe_exec_queue *q,
> }
>
> /* Sanity check */
> - for (j = 0; j < q->width; ++j)
> + for (j = 0; j < q->num_bb_per_exec; ++j)
> xe_assert(job_to_xe(job), cf->base.seqno == fences[j]->seqno);
>
> job->fence = &cf->base;
> }
>
> - width = q->width;
> + width = q->num_bb_per_exec;
> if (is_migration)
> width = 2;
>
> diff --git a/drivers/gpu/drm/xe/xe_trace.h b/drivers/gpu/drm/xe/xe_trace.h
> index d55dd1521df3..dcf28aaeb78a 100644
> --- a/drivers/gpu/drm/xe/xe_trace.h
> +++ b/drivers/gpu/drm/xe/xe_trace.h
> @@ -112,7 +112,7 @@ DECLARE_EVENT_CLASS(xe_exec_queue,
> __field(enum xe_engine_class, class)
> __field(u32, logical_mask)
> __field(u8, gt_id)
> - __field(u16, width)
> + __field(u16, num_bb_per_exec)
> __field(u16, guc_id)
> __field(u32, guc_state)
> __field(u32, flags)
> @@ -122,15 +122,15 @@ DECLARE_EVENT_CLASS(xe_exec_queue,
> __entry->class = q->class;
> __entry->logical_mask = q->logical_mask;
> __entry->gt_id = q->gt->info.id;
> - __entry->width = q->width;
> + __entry->num_bb_per_exec = q->num_bb_per_exec;
> __entry->guc_id = q->guc->id;
> __entry->guc_state = atomic_read(&q->guc->state);
> __entry->flags = q->flags;
> ),
>
> - TP_printk("%d:0x%x, gt=%d, width=%d, guc_id=%d, guc_state=0x%x, flags=0x%x",
> + TP_printk("%d:0x%x, gt=%d, num_bb_per_exec=%d, guc_id=%d, guc_state=0x%x, flags=0x%x",
> __entry->class, __entry->logical_mask,
> - __entry->gt_id, __entry->width, __entry->guc_id,
> + __entry->gt_id, __entry->num_bb_per_exec, __entry->guc_id,
> __entry->guc_state, __entry->flags)
> );
>
> diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h
> index 2d0fb4386a69..a6c70b8697c7 100644
> --- a/include/uapi/drm/xe_drm.h
> +++ b/include/uapi/drm/xe_drm.h
> @@ -1013,11 +1013,17 @@ struct drm_xe_exec_queue_create {
> /** @extensions: Pointer to the first extension struct, if any */
> __u64 extensions;
>
> - /** @width: submission width (number BB per exec) for this exec queue */
> - __u16 width;
> + /**
> + * @num_bb_per_exec: Indicates a submission width for this exec queue,
> + * for how many batch buffers can be submitted in parallel.
'can' sounds like 'up to' in here, would change that to 'will'.
> + */
> + __u16 num_bb_per_exec;
>
> - /** @num_placements: number of valid placements for this exec queue */
> - __u16 num_placements;
> + /**
> + * @num_dispositions: Indicates how the batch buffers will be
> + * distributed to the hardware engines listed on @instance.
> + */
> + __u16 num_dispositions;
>
> /** @vm_id: VM to use for this exec queue */
> __u32 vm_id;
> @@ -1032,8 +1038,8 @@ struct drm_xe_exec_queue_create {
> * @instances: user pointer to a 2-d array of struct
> * drm_xe_engine_class_instance
> *
> - * length = width (i) * num_placements (j)
> - * index = j + i * width
> + * length = num_bb_per_exec (i) * num_dispositions (j)
> + * index = j + i * num_bb_per_exec
> */
> __u64 instances;
>
> @@ -1143,7 +1149,7 @@ struct drm_xe_exec {
>
> /**
> * @num_batch_buffer: number of batch buffer in this exec, must match
> - * the width of the engine
> + * the @num_bb_per_exec of the struct drm_xe_exec_queue_create
> */
> __u16 num_batch_buffer;
>
More information about the Intel-xe
mailing list