[PATCH 2/2] drm/xe/lrc: Prepare WA BB setup for more users

Lucas De Marchi lucas.demarchi at intel.com
Wed Jun 11 20:18:42 UTC 2025


On Mon, Jun 09, 2025 at 01:01:49PM +0100, Tvrtko Ursulin wrote:
>
>On 04/06/2025 16:03, Lucas De Marchi wrote:
>>The post context restore (WA BB) is a mechanism in HW that may be used
>>for things other than the utilization setup. Create a new function
>>called setup_wa_bb() that wraps any function writing useful commands in
>>the buffer.
>>
>>Signed-off-by: Lucas De Marchi <lucas.demarchi at intel.com>
>>---
>>  drivers/gpu/drm/xe/xe_lrc.c | 69 ++++++++++++++++++++++++++++++++++++---------
>>  1 file changed, 55 insertions(+), 14 deletions(-)
>>
>>diff --git a/drivers/gpu/drm/xe/xe_lrc.c b/drivers/gpu/drm/xe/xe_lrc.c
>>index bf7c3981897de..529c6a972a55b 100644
>>--- a/drivers/gpu/drm/xe/xe_lrc.c
>>+++ b/drivers/gpu/drm/xe/xe_lrc.c
>>@@ -914,9 +914,8 @@ static void xe_lrc_finish(struct xe_lrc *lrc)
>>  }
>>  /*
>>- * xe_lrc_setup_utilization() - Setup wa bb to assist in calculating active
>>- * context run ticks.
>>- * @lrc: Pointer to the lrc.
>>+ * wa_bb_setup_utilization() - Write commands to wa bb to assist
>>+ * in calculating active context run ticks.
>>   *
>>   * Context Timestamp (CTX_TIMESTAMP) in the LRC accumulates the run ticks of the
>>   * context, but only gets updated when the context switches out. In order to
>>@@ -941,18 +940,13 @@ static void xe_lrc_finish(struct xe_lrc *lrc)
>>   * store it in the PPHSWP.
>>   */
>>  #define CONTEXT_ACTIVE 1ULL
>>-static int xe_lrc_setup_utilization(struct xe_lrc *lrc)
>>+static ssize_t wa_bb_setup_utilization(struct xe_lrc *lrc, struct xe_hw_engine *hwe,
>>+				       u32 *batch, size_t max_len)
>>  {
>>-	u32 *cmd, *buf = NULL;
>>+	u32 *cmd = batch;
>>-	if (lrc->bb_per_ctx_bo->vmap.is_iomem) {
>>-		buf = kmalloc(lrc->bb_per_ctx_bo->size, GFP_KERNEL);
>>-		if (!buf)
>>-			return -ENOMEM;
>>-		cmd = buf;
>>-	} else {
>>-		cmd = lrc->bb_per_ctx_bo->vmap.vaddr;
>>-	}
>>+	if (xe_gt_WARN_ON(lrc->gt, max_len < 12))
>>+		return -ENOSPC;
>
>Hand counted check is a little bit error prone or could go out of sync 
>but I have no better suggestions apart to maybe removing it altogether 
>and relying on the main warning in setup_wa_bb() catching all errors 
>during patch development?

humn ... but the main warning relies on each individual function to
report the count. I don't think it's a good option to overflow the array
and then later warn about the overflow.

Here it was easy to just give a max. In other places where the size is
more dynamic (coming from configfs) I use the 2 steps approach: first
count the number of entries needed and second write them to the output.

>
>>  	*cmd++ = MI_STORE_REGISTER_MEM | MI_SRM_USE_GGTT | MI_SRM_ADD_CS_OFFSET;
>>  	*cmd++ = ENGINE_ID(0).addr;
>>@@ -971,6 +965,49 @@ static int xe_lrc_setup_utilization(struct xe_lrc *lrc)
>>  		*cmd++ = upper_32_bits(CONTEXT_ACTIVE);
>>  	}
>>+	return cmd - batch;
>>+}
>>+
>>+struct wa_bb_setup {
>>+	ssize_t (*setup)(struct xe_lrc *lrc, struct xe_hw_engine *hwe,
>>+			 u32 *batch, size_t max_size);
>>+};
>>+
>>+static int setup_wa_bb(struct xe_lrc *lrc, struct xe_hw_engine *hwe)
>>+{
>>+	const size_t max_size = lrc->bb_per_ctx_bo->size;
>>+	static const struct wa_bb_setup funcs[] = {
>>+		{ .setup = wa_bb_setup_utilization },
>>+	};
>>+	ssize_t remain;
>>+	u32 *cmd, *buf = NULL;
>>+
>>+	if (lrc->bb_per_ctx_bo->vmap.is_iomem) {
>>+		buf = kmalloc(max_size, GFP_KERNEL);
>>+		if (!buf)
>>+			return -ENOMEM;
>>+		cmd = buf;
>>+	} else {
>>+		cmd = lrc->bb_per_ctx_bo->vmap.vaddr;
>>+	}
>>+
>>+	remain = max_size / sizeof(*cmd);
>>+
>>+	for (size_t i = 0; i < ARRAY_SIZE(funcs); i++) {
>>+		ssize_t len = funcs[i].setup(lrc, hwe, cmd, remain);
>>+
>>+		remain -= len;
>>+
>>+		/*
>>+		 * There should always be at least 1 additional dword for
>>+		 * the end marker
>>+		 */
>>+		if (len < 0 || xe_gt_WARN_ON(lrc->gt, remain < 1))
>>+			goto fail;
>>+
>>+		cmd += len;
>>+	}
>>+
>>  	*cmd++ = MI_BATCH_BUFFER_END;
>>  	if (buf) {
>>@@ -983,6 +1020,10 @@ static int xe_lrc_setup_utilization(struct xe_lrc *lrc)
>>  			     xe_bo_ggtt_addr(lrc->bb_per_ctx_bo) | 1);
>>  	return 0;
>>+
>>+fail:
>>+	kfree(buf);
>>+	return -ENOSPC;
>>  }
>>  #define PVC_CTX_ASID		(0x2e + 1)
>>@@ -1139,7 +1180,7 @@ static int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe,
>>  	map = __xe_lrc_start_seqno_map(lrc);
>>  	xe_map_write32(lrc_to_xe(lrc), &map, lrc->fence_ctx.next_seqno - 1);
>>-	err = xe_lrc_setup_utilization(lrc);
>>+	err = setup_wa_bb(lrc, hwe);
>>  	if (err)
>>  		goto err_lrc_finish;
>>
>
>LGTM, I did not spot any mistakes and I can easily rebase mine on top 
>of this. And follow the same pattern for indirect context.
>
>Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin at igalia.com>

thanks
Lucas De Marchi

>
>Regards,
>
>Tvrtko
>
>


More information about the Intel-xe mailing list