[PATCH] drm/xe: Make WA BB part of LRC BO
Thomas Hellström
thomas.hellstrom at linux.intel.com
Thu Jun 19 15:18:12 UTC 2025
Hi, Matt.
On Wed, 2025-06-11 at 20:19 -0700, Matthew Brost wrote:
> No idea why, but without this GuC context switches randomly fail when
> running IGTs in a loop. Need to follow up why this fixes the
> aforementioned issue but can live with a stable driver for now.
>
> Fixes: 617d824c5323 ("drm/xe: Add WA BB to capture active context
> utilization")
> Cc: stable at vger.kernel.org
> Signed-off-by: Matthew Brost <matthew.brost at intel.com>
This patch has non-trivial conflicts on drm-xe-fixes. If we need to
have it backported please help resolve the conflicts by:
./dim update-branches
./dim checkout drm-xe-fixes
./dim cherry-pick 3a1edef8f4b5
Fix up conflicts and finalize by
git cherry-pick --continue
And please send the resulting patch to me (including any tags that dim
added).
Thanks,
Thomas
> ---
> drivers/gpu/drm/xe/xe_lrc.c | 36 +++++++++++++++--------------
> --
> drivers/gpu/drm/xe/xe_lrc_types.h | 3 ---
> 2 files changed, 18 insertions(+), 21 deletions(-)
>
> diff --git a/drivers/gpu/drm/xe/xe_lrc.c
> b/drivers/gpu/drm/xe/xe_lrc.c
> index 529c6a972a55..a875b93697a5 100644
> --- a/drivers/gpu/drm/xe/xe_lrc.c
> +++ b/drivers/gpu/drm/xe/xe_lrc.c
> @@ -40,6 +40,7 @@
>
> #define LRC_PPHWSP_SIZE SZ_4K
> #define LRC_INDIRECT_RING_STATE_SIZE SZ_4K
> +#define LRC_WA_BB_SIZE SZ_4K
>
> static struct xe_device *
> lrc_to_xe(struct xe_lrc *lrc)
> @@ -910,7 +911,6 @@ static void xe_lrc_finish(struct xe_lrc *lrc)
> {
> xe_hw_fence_ctx_finish(&lrc->fence_ctx);
> xe_bo_unpin_map_no_vm(lrc->bo);
> - xe_bo_unpin_map_no_vm(lrc->bb_per_ctx_bo);
> }
>
> /*
> @@ -973,22 +973,27 @@ struct wa_bb_setup {
> u32 *batch, size_t max_size);
> };
>
> +static size_t wa_bb_offset(struct xe_lrc *lrc)
> +{
> + return lrc->bo->size - LRC_WA_BB_SIZE;
> +}
> +
> static int setup_wa_bb(struct xe_lrc *lrc, struct xe_hw_engine *hwe)
> {
> - const size_t max_size = lrc->bb_per_ctx_bo->size;
> + const size_t max_size = LRC_WA_BB_SIZE;
> static const struct wa_bb_setup funcs[] = {
> { .setup = wa_bb_setup_utilization },
> };
> ssize_t remain;
> u32 *cmd, *buf = NULL;
>
> - if (lrc->bb_per_ctx_bo->vmap.is_iomem) {
> + if (lrc->bo->vmap.is_iomem) {
> buf = kmalloc(max_size, GFP_KERNEL);
> if (!buf)
> return -ENOMEM;
> cmd = buf;
> } else {
> - cmd = lrc->bb_per_ctx_bo->vmap.vaddr;
> + cmd = lrc->bo->vmap.vaddr + wa_bb_offset(lrc);
> }
>
> remain = max_size / sizeof(*cmd);
> @@ -1011,13 +1016,14 @@ static int setup_wa_bb(struct xe_lrc *lrc,
> struct xe_hw_engine *hwe)
> *cmd++ = MI_BATCH_BUFFER_END;
>
> if (buf) {
> - xe_map_memcpy_to(gt_to_xe(lrc->gt), &lrc-
> >bb_per_ctx_bo->vmap, 0,
> - buf, (cmd - buf) * sizeof(*cmd));
> + xe_map_memcpy_to(gt_to_xe(lrc->gt), &lrc->bo->vmap,
> + wa_bb_offset(lrc), buf,
> + (cmd - buf) * sizeof(*cmd));
> kfree(buf);
> }
>
> - xe_lrc_write_ctx_reg(lrc, CTX_BB_PER_CTX_PTR,
> - xe_bo_ggtt_addr(lrc->bb_per_ctx_bo) |
> 1);
> + xe_lrc_write_ctx_reg(lrc, CTX_BB_PER_CTX_PTR,
> xe_bo_ggtt_addr(lrc->bo) +
> + wa_bb_offset(lrc) + 1);
>
> return 0;
>
> @@ -1059,20 +1065,13 @@ static int xe_lrc_init(struct xe_lrc *lrc,
> struct xe_hw_engine *hwe,
> * FIXME: Perma-pinning LRC as we don't yet support moving
> GGTT address
> * via VM bind calls.
> */
> - lrc->bo = xe_bo_create_pin_map(xe, tile, NULL, lrc_size,
> + lrc->bo = xe_bo_create_pin_map(xe, tile, NULL,
> + lrc_size + LRC_WA_BB_SIZE,
> ttm_bo_type_kernel,
> bo_flags);
> if (IS_ERR(lrc->bo))
> return PTR_ERR(lrc->bo);
>
> - lrc->bb_per_ctx_bo = xe_bo_create_pin_map(xe, tile, NULL,
> SZ_4K,
> -
> ttm_bo_type_kernel,
> - bo_flags);
> - if (IS_ERR(lrc->bb_per_ctx_bo)) {
> - err = PTR_ERR(lrc->bb_per_ctx_bo);
> - goto err_lrc_finish;
> - }
> -
> lrc->size = lrc_size;
> lrc->ring.size = ring_size;
> lrc->ring.tail = 0;
> @@ -1860,7 +1859,8 @@ struct xe_lrc_snapshot
> *xe_lrc_snapshot_capture(struct xe_lrc *lrc)
> snapshot->seqno = xe_lrc_seqno(lrc);
> snapshot->lrc_bo = xe_bo_get(lrc->bo);
> snapshot->lrc_offset = xe_lrc_pphwsp_offset(lrc);
> - snapshot->lrc_size = lrc->bo->size - snapshot->lrc_offset;
> + snapshot->lrc_size = lrc->bo->size - snapshot->lrc_offset -
> + LRC_WA_BB_SIZE;
> snapshot->lrc_snapshot = NULL;
> snapshot->ctx_timestamp =
> lower_32_bits(xe_lrc_ctx_timestamp(lrc));
> snapshot->ctx_job_timestamp = xe_lrc_ctx_job_timestamp(lrc);
> diff --git a/drivers/gpu/drm/xe/xe_lrc_types.h
> b/drivers/gpu/drm/xe/xe_lrc_types.h
> index ae24cf6f8dd9..883e550a9423 100644
> --- a/drivers/gpu/drm/xe/xe_lrc_types.h
> +++ b/drivers/gpu/drm/xe/xe_lrc_types.h
> @@ -53,9 +53,6 @@ struct xe_lrc {
>
> /** @ctx_timestamp: readout value of CTX_TIMESTAMP on last
> update */
> u64 ctx_timestamp;
> -
> - /** @bb_per_ctx_bo: buffer object for per context batch wa
> buffer */
> - struct xe_bo *bb_per_ctx_bo;
> };
>
> struct xe_lrc_snapshot;
More information about the Intel-xe
mailing list