[PATCH v8 8/8] drm/xe/vf: Rebase exec queue parallel commands during migration recovery
Michał Winiarski
michal.winiarski at intel.com
Fri Aug 1 20:57:53 UTC 2025
On Fri, Aug 01, 2025 at 03:50:45AM +0200, Tomasz Lis wrote:
> Parallel exec queues have an additional command streamer buffer which holds
> a GGTT reference to data within context status. The GGTT references have to
> be fixed after VF migration.
>
> v2: Properly handle nop entry, verify if parsing goes ok
> v3: Improve error/warn logging, add propagation of errors,
> give names to magic offsets
>
> Signed-off-by: Tomasz Lis <tomasz.lis at intel.com>
> Cc: Michal Winiarski <michal.winiarski at intel.com>
> ---
> drivers/gpu/drm/xe/abi/guc_actions_abi.h | 8 ++++
> drivers/gpu/drm/xe/xe_guc_submit.c | 54 ++++++++++++++++++++++++
> 2 files changed, 62 insertions(+)
>
> diff --git a/drivers/gpu/drm/xe/abi/guc_actions_abi.h b/drivers/gpu/drm/xe/abi/guc_actions_abi.h
> index 81eb046aeebf..d8cf68a0516d 100644
> --- a/drivers/gpu/drm/xe/abi/guc_actions_abi.h
> +++ b/drivers/gpu/drm/xe/abi/guc_actions_abi.h
> @@ -193,6 +193,14 @@ enum xe_guc_register_context_multi_lrc_param_offsets {
> XE_GUC_REGISTER_CONTEXT_MULTI_LRC_MSG_MIN_LEN = 11,
> };
>
> +enum xe_guc_context_wq_item_offsets {
> + XE_GUC_CONTEXT_WQ_HEADER_DATA_0_TYPE_LEN = 0,
> + XE_GUC_CONTEXT_WQ_EL_INFO_DATA_1_CTX_DESC_LOW,
> + XE_GUC_CONTEXT_WQ_EL_INFO_DATA_2_GUCCTX_RINGTAIL_FREEZEPOCS,
> + XE_GUC_CONTEXT_WQ_EL_INFO_DATA_3_WI_FENCE_ID,
> + XE_GUC_CONTEXT_WQ_EL_CHILD_LIST_DATA_4_RINGTAIL,
> +};
> +
> enum xe_guc_report_status {
> XE_GUC_REPORT_STATUS_UNKNOWN = 0x0,
> XE_GUC_REPORT_STATUS_ACKED = 0x1,
> diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c
> index f59fecc58fa8..427694afe0cc 100644
> --- a/drivers/gpu/drm/xe/xe_guc_submit.c
> +++ b/drivers/gpu/drm/xe/xe_guc_submit.c
> @@ -671,12 +671,18 @@ static void wq_item_append(struct xe_exec_queue *q)
> if (wq_wait_for_space(q, wqi_size))
> return;
>
> + xe_gt_assert(guc_to_gt(guc), i == XE_GUC_CONTEXT_WQ_HEADER_DATA_0_TYPE_LEN);
> wqi[i++] = FIELD_PREP(WQ_TYPE_MASK, WQ_TYPE_MULTI_LRC) |
> FIELD_PREP(WQ_LEN_MASK, len_dw);
> + xe_gt_assert(guc_to_gt(guc), i == XE_GUC_CONTEXT_WQ_EL_INFO_DATA_1_CTX_DESC_LOW);
> wqi[i++] = xe_lrc_descriptor(q->lrc[0]);
> + xe_gt_assert(guc_to_gt(guc), i ==
> + XE_GUC_CONTEXT_WQ_EL_INFO_DATA_2_GUCCTX_RINGTAIL_FREEZEPOCS);
> wqi[i++] = FIELD_PREP(WQ_GUC_ID_MASK, q->guc->id) |
> FIELD_PREP(WQ_RING_TAIL_MASK, q->lrc[0]->ring.tail / sizeof(u64));
> + xe_gt_assert(guc_to_gt(guc), i == XE_GUC_CONTEXT_WQ_EL_INFO_DATA_3_WI_FENCE_ID);
> wqi[i++] = 0;
> + xe_gt_assert(guc_to_gt(guc), i == XE_GUC_CONTEXT_WQ_EL_CHILD_LIST_DATA_4_RINGTAIL);
Mixed feelings on the asserts, but I understand that it's just to
"document" the offsets used in the rebase function.
Reviewed-by: Michał Winiarski <michal.winiarski at intel.com>
Thanks,
-Michał
> for (j = 1; j < q->width; ++j) {
> struct xe_lrc *lrc = q->lrc[j];
>
> @@ -697,6 +703,50 @@ static void wq_item_append(struct xe_exec_queue *q)
> parallel_write(xe, map, wq_desc.tail, q->guc->wqi_tail);
> }
>
> +static int wq_items_rebase(struct xe_exec_queue *q)
> +{
> + struct xe_guc *guc = exec_queue_to_guc(q);
> + struct xe_device *xe = guc_to_xe(guc);
> + struct iosys_map map = xe_lrc_parallel_map(q->lrc[0]);
> + int i = q->guc->wqi_head;
> +
> + /* the ring starts after a header struct */
> + iosys_map_incr(&map, offsetof(struct guc_submit_parallel_scratch, wq[0]));
> +
> + while ((i % WQ_SIZE) != (q->guc->wqi_tail % WQ_SIZE)) {
> + u32 len_dw, type, val;
> +
> + if (drm_WARN_ON_ONCE(&xe->drm, i < 0 || i > 2 * WQ_SIZE))
> + break;
> +
> + val = xe_map_rd_ring_u32(xe, &map, i / sizeof(u32) +
> + XE_GUC_CONTEXT_WQ_HEADER_DATA_0_TYPE_LEN,
> + WQ_SIZE / sizeof(u32));
> + len_dw = FIELD_GET(WQ_LEN_MASK, val);
> + type = FIELD_GET(WQ_TYPE_MASK, val);
> +
> + if (drm_WARN_ON_ONCE(&xe->drm, len_dw >= WQ_SIZE / sizeof(u32)))
> + break;
> +
> + if (type == WQ_TYPE_MULTI_LRC) {
> + val = xe_lrc_descriptor(q->lrc[0]);
> + xe_map_wr_ring_u32(xe, &map, i / sizeof(u32) +
> + XE_GUC_CONTEXT_WQ_EL_INFO_DATA_1_CTX_DESC_LOW,
> + WQ_SIZE / sizeof(u32), val);
> + } else if (drm_WARN_ON_ONCE(&xe->drm, type != WQ_TYPE_NOOP)) {
> + break;
> + }
> +
> + i += (len_dw + 1) * sizeof(u32);
> + }
> +
> + if ((i % WQ_SIZE) != (q->guc->wqi_tail % WQ_SIZE)) {
> + xe_gt_err(q->gt, "Exec queue fixups incomplete - wqi parse failed\n");
> + return -EBADMSG;
> + }
> + return 0;
> +}
> +
> #define RESUME_PENDING ~0x0ull
> static void submit_exec_queue(struct xe_exec_queue *q)
> {
> @@ -2543,6 +2593,10 @@ int xe_guc_contexts_hwsp_rebase(struct xe_guc *guc, void *scratch)
> err = xe_exec_queue_contexts_hwsp_rebase(q, scratch);
> if (err)
> break;
> + if (xe_exec_queue_is_parallel(q))
> + err = wq_items_rebase(q);
> + if (err)
> + break;
> }
> mutex_unlock(&guc->submission_state.lock);
>
> --
> 2.25.1
>
More information about the Intel-xe
mailing list