[Intel-xe] [PATCH v2 43/50] squash! drm/xe/uapi: Rename couple exec_queue items

Fri Nov 3 14:34:49 UTC 2023

From: Rodrigo Vivi <rodrigo.vivi at intel.com>

(squash instead of the fixup so the commit message can be updated)

(new full commit message with new subject)

drm/xe/uapi: Exec queue documentation and variable renaming

Rename 'placement' to num_eng_per_bb and 'width' to num_bb_per_exec, and
give a graphical documentation to it.

Let's make it obvious and straight forward. Not only because it is important
to have variable names that are clear and descriptive, but also because
'placement' is now used in many terms around the memory_region selection
where the BO or the page table will live and 'width' is so generic and with
so many other common meaning in the graphics world.

Signed-off-by: Rodrigo Vivi <rodrigo.vivi at intel.com>
---
 drivers/gpu/drm/xe/xe_exec_queue.c | 16 +++----
 include/uapi/drm/xe_drm.h          | 70 ++++++++++++++++++++++++++++--
 2 files changed, 74 insertions(+), 12 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c
index 15fe709384e7..e30363bb5152 100644
--- a/drivers/gpu/drm/xe/xe_exec_queue.c
+++ b/drivers/gpu/drm/xe/xe_exec_queue.c
@@ -513,7 +513,7 @@ find_hw_engine(struct xe_device *xe,
 
 static u32 bind_exec_queue_logical_mask(struct xe_device *xe, struct xe_gt *gt,
 					struct drm_xe_engine_class_instance *eci,
-					u16 num_bb_per_exec, u16 num_dispositions)
+					u16 num_bb_per_exec, u16 num_eng_per_bb)
 {
 	struct xe_hw_engine *hwe;
 	enum xe_hw_engine_id id;
@@ -521,7 +521,7 @@ static u32 bind_exec_queue_logical_mask(struct xe_device *xe, struct xe_gt *gt,
 
 	if (XE_IOCTL_DBG(xe, num_bb_per_exec != 1))
 		return 0;
-	if (XE_IOCTL_DBG(xe, num_dispositions != 1))
+	if (XE_IOCTL_DBG(xe, num_eng_per_bb != 1))
 		return 0;
 	if (XE_IOCTL_DBG(xe, eci[0].engine_instance != 0))
 		return 0;
@@ -542,9 +542,9 @@ static u32 bind_exec_queue_logical_mask(struct xe_device *xe, struct xe_gt *gt,
 
 static u32 calc_validate_logical_mask(struct xe_device *xe, struct xe_gt *gt,
 				      struct drm_xe_engine_class_instance *eci,
-				      u16 num_bb_per_exec, u16 num_dispositions)
+				      u16 num_bb_per_exec, u16 num_eng_per_bb)
 {
-	int len = num_bb_per_exec * num_dispositions;
+	int len = num_bb_per_exec * num_eng_per_bb;
 	int i, j, n;
 	u16 class;
 	u16 sched_group_id;
@@ -557,7 +557,7 @@ static u32 calc_validate_logical_mask(struct xe_device *xe, struct xe_gt *gt,
 	for (i = 0; i < num_bb_per_exec; ++i) {
 		u32 current_mask = 0;
 
-		for (j = 0; j < num_dispositions; ++j) {
+		for (j = 0; j < num_eng_per_bb; ++j) {
 			struct xe_hw_engine *hwe;
 
 			n = j * num_bb_per_exec + i;
@@ -614,7 +614,7 @@ int xe_exec_queue_create_ioctl(struct drm_device *dev, void *data,
 	    XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1]))
 		return -EINVAL;
 
-	len = args->num_bb_per_exec * args->num_dispositions;
+	len = args->num_bb_per_exec * args->num_eng_per_bb;
 	if (XE_IOCTL_DBG(xe, !len || len > XE_HW_ENGINE_MAX_INSTANCE))
 		return -EINVAL;
 
@@ -640,7 +640,7 @@ int xe_exec_queue_create_ioctl(struct drm_device *dev, void *data,
 			eci[0].sched_group_id = gt->info.id;
 			logical_mask = bind_exec_queue_logical_mask(xe, gt, eci,
 								    args->num_bb_per_exec,
-								    args->num_dispositions);
+								    args->num_eng_per_bb);
 			if (XE_IOCTL_DBG(xe, !logical_mask))
 				return -EINVAL;
 
@@ -681,7 +681,7 @@ int xe_exec_queue_create_ioctl(struct drm_device *dev, void *data,
 		gt = xe_device_get_gt(xe, eci[0].sched_group_id);
 		logical_mask = calc_validate_logical_mask(xe, gt, eci,
 							  args->num_bb_per_exec,
-							  args->num_dispositions);
+							  args->num_eng_per_bb);
 		if (XE_IOCTL_DBG(xe, !logical_mask))
 			return -EINVAL;
 
diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h
index c541efc80e5d..690d18e07650 100644
--- a/include/uapi/drm/xe_drm.h
+++ b/include/uapi/drm/xe_drm.h
@@ -1012,6 +1012,68 @@ struct drm_xe_sync {
 	__u64 reserved[2];
 };
 
+/**
+ * DOC: Execution Queue
+ *
+ * The Execution Queue abstracts the Hardware Engine that is going to be used
+ * with the execution of the Batch Buffers in &DRM_IOCTL_XE_EXEC
+ *
+ * In a regular usage of this execution queue, only one hardware engine pointer
+ * would be given as input of the @instances below and both @num_bb_per_exec and
+ * @num_eng_per_bb would be set to '1'.
+ *
+ * Regular execution example::
+ *
+ *                    ┌─────┐
+ *                    │ BB0 │
+ *                    └──┬──┘
+ *                       │     @num_bb_per_exec = 1
+ *                       │     @num_eng_per_bb = 1
+ *                       │     @instances = {Engine0}
+ *                       ▼
+ *                   ┌───────┐
+ *                   │Engine0│
+ *                   └───────┘
+ *
+ * However this execution queue is flexible to be used for parallel submission or
+ * for load balancing submission (a.k.a virtual load balancing).
+ *
+ * In a parallel submission, different batch buffers will be simultaneously
+ * dispatched to different engines listed in @instances, in a 1-1 relationship.
+ *
+ * Parallel execution example::
+ *
+ *               ┌─────┐   ┌─────┐
+ *               │ BB0 │   │ BB1 │
+ *               └──┬──┘   └──┬──┘
+ *                  │         │     @num_bb_per_exec = 2
+ *                  │         │     @num_eng_per_bb = 1
+ *                  │         │     @instances = {Engine0, Engine1}
+ *                  ▼         ▼
+ *              ┌───────┐ ┌───────┐
+ *              │Engine0│ │Engine1│
+ *              └───────┘ └───────┘
+ *
+ * On a load balancing submission, each batch buffer is virtually dispatched
+ * to all of the listed engine @instances. Then, underneath driver, firmware, or
+ * hardware can select the best available engine to actually run the job.
+ *
+ * Virtual Load Balancing example::
+ *
+ *                    ┌─────┐
+ *                    │ BB0 │
+ *                    └──┬──┘
+ *                       │      @num_bb_per_exec = 1
+ *                       │      @num_eng_per_bb = 2
+ *                       │      @instances = {Engine0, Engine1}
+ *                  ┌────┴────┐
+ *                  │         │
+ *                  ▼         ▼
+ *              ┌───────┐ ┌───────┐
+ *              │Engine0│ │Engine1│
+ *              └───────┘ └───────┘
+ */
+
 /**
  * struct drm_xe_exec_queue_create - Input of &DRM_IOCTL_XE_EXEC_QUEUE_CREATE
  */
@@ -1027,10 +1089,10 @@ struct drm_xe_exec_queue_create {
 	__u16 num_bb_per_exec;
 
 	/**
-	 * @num_dispositions: Indicates how the batch buffers will be
-	 * distributed to the hardware engines listed on @instance.
+	 * @num_eng_per_bb: Indicates how many possible engines are available
+	 * at @instances for the Xe to distribute the load.
 	 */
-	__u16 num_dispositions;
+	__u16 num_eng_per_bb;
 
 	/** @vm_id: VM to use for this exec queue */
 	__u32 vm_id;
@@ -1047,7 +1109,7 @@ struct drm_xe_exec_queue_create {
 	 *
 	 * Every engine in the array needs to have the same @sched_group_id
 	 *
-	 * length = num_bb_per_exec (i) * num_dispositions (j)
+	 * length = num_bb_per_exec (i) * num_eng_per_bb (j)
 	 * index = j + i * num_bb_per_exec
 	 */
 	__u64 instances;
-- 
2.34.1