[PATCH v6 3/3] drm/xe/vf: Register CCS read/write contexts with Guc

Mon Jun 9 16:28:56 UTC 2025

On Fri, Jun 06, 2025 at 06:15:58PM +0530, Satyanarayana K V P wrote:
> Register read write contexts with newly added flags with GUC and
> enable the context immediately after registration.
> Re-register the context with Guc when resuming from runtime suspend as
> soft reset is applied to Guc during xe_pm_runtime_resume().
> Make Ring head=tail while unbinding device to avoid issues with VF pause
> after device is unbinded.
> 
> Signed-off-by: Satyanarayana K V P <satyanarayana.k.v.p at intel.com>
> ---
> Cc: Michal Wajdeczko <michal.wajdeczko at intel.com>
> Cc: Michał Winiarski <michal.winiarski at intel.com>
> Cc: Tomasz Lis <tomasz.lis at intel.com>
> Cc: Matthew Brost <matthew.brost at intel.com>
> Cc: Matthew Auld <matthew.auld at intel.com>
> Cc: Maarten Lankhorst <maarten.lankhorst at linux.intel.com>
> 
> V5 -> V6:
> - None
> 
> V4 -> V5:
> - Fixed review comments (Matthew Brost).
> 
> V3 -> V4:
> - Fixed issues reported by patchworks.
> 
> V2 -> V3:
> - Made xe_migrate structure private as per review comments.
> - Created new xe_migrate functions to get lrc and exec_queue.
> 
> V1 -> V2:
> - Fixed review comments.
> ---
>  drivers/gpu/drm/xe/xe_guc_fwif.h     |  5 ++
>  drivers/gpu/drm/xe/xe_guc_submit.c   | 37 +++++++++++-
>  drivers/gpu/drm/xe/xe_guc_submit.h   |  1 +
>  drivers/gpu/drm/xe/xe_migrate.c      | 22 +++++++
>  drivers/gpu/drm/xe/xe_migrate.h      |  3 +
>  drivers/gpu/drm/xe/xe_pm.c           |  4 ++
>  drivers/gpu/drm/xe/xe_sriov_vf_ccs.c | 89 ++++++++++++++++++++++++++++
>  drivers/gpu/drm/xe/xe_sriov_vf_ccs.h |  1 +
>  8 files changed, 160 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/gpu/drm/xe/xe_guc_fwif.h b/drivers/gpu/drm/xe/xe_guc_fwif.h
> index 6f57578b07cb..71a5208d0316 100644
> --- a/drivers/gpu/drm/xe/xe_guc_fwif.h
> +++ b/drivers/gpu/drm/xe/xe_guc_fwif.h
> @@ -45,6 +45,11 @@
>  #define GUC_MAX_ENGINE_CLASSES		16
>  #define GUC_MAX_INSTANCES_PER_CLASS	32
>  
> +#define GUC_CONTEXT_NORMAL			0
> +#define GUC_CONTEXT_COMPRESSION_SAVE		1
> +#define GUC_CONTEXT_COMPRESSION_RESTORE	2
> +#define GUC_CONTEXT_MAX_TYPES			(GUC_CONTEXT_COMPRESSION_RESTORE + 1)

s/GUC_CONTEXT_MAX_TYPES/GUC_CONTEXT_COUNT

> +
>  /* Helper for context registration H2G */
>  struct guc_ctxt_registration_info {
>  	u32 flags;
> diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c
> index 80f748baad3f..4e1423b0f07c 100644
> --- a/drivers/gpu/drm/xe/xe_guc_submit.c
> +++ b/drivers/gpu/drm/xe/xe_guc_submit.c
> @@ -531,7 +531,7 @@ static void __register_exec_queue(struct xe_guc *guc,
>  	xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action), 0, 0);
>  }
>  
> -static void register_exec_queue(struct xe_exec_queue *q)
> +static void register_exec_queue(struct xe_exec_queue *q, int ctx_type)
>  {
>  	struct xe_guc *guc = exec_queue_to_guc(q);
>  	struct xe_device *xe = guc_to_xe(guc);
> @@ -539,6 +539,7 @@ static void register_exec_queue(struct xe_exec_queue *q)
>  	struct guc_ctxt_registration_info info;
>  
>  	xe_gt_assert(guc_to_gt(guc), !exec_queue_registered(q));
> +	xe_gt_assert(guc_to_gt(guc), ctx_type < GUC_CONTEXT_MAX_TYPES);
>  
>  	memset(&info, 0, sizeof(info));
>  	info.context_idx = q->guc->id;
> @@ -548,6 +549,9 @@ static void register_exec_queue(struct xe_exec_queue *q)
>  	info.hwlrca_hi = upper_32_bits(xe_lrc_descriptor(lrc));
>  	info.flags = CONTEXT_REGISTRATION_FLAG_KMD;
>  
> +	if (ctx_type != GUC_CONTEXT_NORMAL)
> +		info.flags |= BIT(ctx_type);
> +
>  	if (xe_exec_queue_is_parallel(q)) {
>  		u64 ggtt_addr = xe_lrc_parallel_ggtt_addr(lrc);
>  		struct iosys_map map = xe_lrc_parallel_map(lrc);
> @@ -750,7 +754,7 @@ guc_exec_queue_run_job(struct drm_sched_job *drm_job)
>  
>  	if (!exec_queue_killed_or_banned_or_wedged(q) && !xe_sched_job_is_error(job)) {
>  		if (!exec_queue_registered(q))
> -			register_exec_queue(q);
> +			register_exec_queue(q, GUC_CONTEXT_NORMAL);
>  		if (!lr)	/* LR jobs are emitted in the exec IOCTL */
>  			q->ring_ops->emit_job(job);
>  		submit_exec_queue(q);
> @@ -2347,6 +2351,35 @@ static void guc_exec_queue_print(struct xe_exec_queue *q, struct drm_printer *p)
>  	xe_guc_exec_queue_snapshot_free(snapshot);
>  }
>  
> +/**
> + * xe_guc_register_exec_queue - Register exec queue for a given context type.
> + * @q - Execution queue
> + * @ctx_type - Type of the context
> + *
> + * This function registers the execution queue with the guc. Special context
> + * types like GUC_CONTEXT_COMPRESSION_SAVE and GUC_CONTEXT_COMPRESSION_RESTORE
> + * are only applicable for IGPU and in the VF.
> + * Submits the execution queue to GUC after registering it.
> + *
> + * Returns - Success or asserts.

It always returns 0, so maybe just convert to void return.

> + */
> +int xe_guc_register_exec_queue(struct xe_exec_queue *q, int ctx_type)
> +{
> +	struct xe_guc *guc = exec_queue_to_guc(q);
> +	struct xe_device *xe = guc_to_xe(guc);
> +
> +	if (ctx_type != GUC_CONTEXT_NORMAL) {

I wouldn't expose this for GUC_CONTEXT_NORMAL as there is not a current
user and registering normal user contexts would like mess with internal
state machine / expose races.

If we want to expose this for normal queues, I think we'd need to think
it through more so best to disallow this or assert it is not normal.

> +		xe_assert(xe, IS_SRIOV_VF(xe) || !IS_DGFX(xe));
> +		xe_assert(xe, (ctx_type > GUC_CONTEXT_NORMAL &&
> +			       ctx_type < GUC_CONTEXT_MAX_TYPES));
> +	}
> +
> +	register_exec_queue(q, ctx_type);
> +	enable_scheduling(q);

I didn't see a reply to my question regarding if enabling scheduling is
required. Please follow up and answer.

> +
> +	return 0;
> +}
> +
>  /**
>   * xe_guc_submit_print - GuC Submit Print.
>   * @guc: GuC.
> diff --git a/drivers/gpu/drm/xe/xe_guc_submit.h b/drivers/gpu/drm/xe/xe_guc_submit.h
> index 9b71a986c6ca..f1a26d498339 100644
> --- a/drivers/gpu/drm/xe/xe_guc_submit.h
> +++ b/drivers/gpu/drm/xe/xe_guc_submit.h
> @@ -39,5 +39,6 @@ xe_guc_exec_queue_snapshot_print(struct xe_guc_submit_exec_queue_snapshot *snaps
>  void
>  xe_guc_exec_queue_snapshot_free(struct xe_guc_submit_exec_queue_snapshot *snapshot);
>  void xe_guc_submit_print(struct xe_guc *guc, struct drm_printer *p);
> +int xe_guc_register_exec_queue(struct xe_exec_queue *q, int ctx_type);
>  
>  #endif
> diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c
> index d7f3009260ee..dbc1bbfc944f 100644
> --- a/drivers/gpu/drm/xe/xe_migrate.c
> +++ b/drivers/gpu/drm/xe/xe_migrate.c
> @@ -1038,6 +1038,28 @@ int xe_migrate_ccs_rw_copy(struct xe_migrate *m,
>  	return err;
>  }
>  
> +/**
> + * xe_get_migrate_lrc() - Get the LRC from migrate context.
> + * @migrate: Migrate context.
> + *
> + * Return: Pointer to LRC on success, error on failure
> + */
> +struct xe_lrc *xe_migrate_get_lrc(struct xe_migrate *migrate)
> +{
> +	return migrate->q->lrc[0];
> +}

s/xe_migrate_get_lrc/xe_migrate_lrc/

'get' generally implies a ref is taken.

> +
> +/**
> + * xe_get_migrate_exec_queue() - Get the execution queue from migrate context.
> + * @migrate: Migrate context.
> + *
> + * Return: Pointer to execution queue on success, error on failure
> + */
> +struct xe_exec_queue *xe_migrate_get_exec_queue(struct xe_migrate *migrate)
> +{
> +	return migrate->q;
> +}

s/xe_migrate_get_exec_queue/xe_migrate_exec_queue/

Maybe drop xe_tile_migrate_exec_queue too and use this new function.

> +
>  static void emit_clear_link_copy(struct xe_gt *gt, struct xe_bb *bb, u64 src_ofs,
>  				 u32 size, u32 pitch)
>  {
> diff --git a/drivers/gpu/drm/xe/xe_migrate.h b/drivers/gpu/drm/xe/xe_migrate.h
> index ab5ebb44d2c9..016747916c31 100644
> --- a/drivers/gpu/drm/xe/xe_migrate.h
> +++ b/drivers/gpu/drm/xe/xe_migrate.h
> @@ -116,6 +116,9 @@ int xe_migrate_ccs_rw_copy(struct xe_migrate *m,
>  			   struct xe_bo *src_bo,
>  			   int read_write);
>  
> +struct xe_lrc *xe_migrate_get_lrc(struct xe_migrate *migrate);
> +struct xe_exec_queue *xe_migrate_get_exec_queue(struct xe_migrate *migrate);
> +
>  int xe_migrate_access_memory(struct xe_migrate *m, struct xe_bo *bo,
>  			     unsigned long offset, void *buf, int len,
>  			     int write);
> diff --git a/drivers/gpu/drm/xe/xe_pm.c b/drivers/gpu/drm/xe/xe_pm.c
> index f64ebd7b854a..a47539ce98eb 100644
> --- a/drivers/gpu/drm/xe/xe_pm.c
> +++ b/drivers/gpu/drm/xe/xe_pm.c
> @@ -22,6 +22,7 @@
>  #include "xe_irq.h"
>  #include "xe_pcode.h"
>  #include "xe_pxp.h"
> +#include "xe_sriov_vf_ccs.h"
>  #include "xe_trace.h"
>  #include "xe_wa.h"
>  
> @@ -546,6 +547,9 @@ int xe_pm_runtime_resume(struct xe_device *xe)
>  
>  	xe_pxp_pm_resume(xe->pxp);
>  
> +	if (IS_SRIOV_VF(xe))
> +		xe_sriov_vf_ccs_register_context(xe);
> +
>  out:
>  	xe_rpm_lockmap_release(xe);
>  	xe_pm_write_callback_task(xe, NULL);
> diff --git a/drivers/gpu/drm/xe/xe_sriov_vf_ccs.c b/drivers/gpu/drm/xe/xe_sriov_vf_ccs.c
> index 4b5cfc0d421b..9afb652f6299 100644
> --- a/drivers/gpu/drm/xe/xe_sriov_vf_ccs.c
> +++ b/drivers/gpu/drm/xe/xe_sriov_vf_ccs.c
> @@ -8,6 +8,9 @@
>  #include "xe_bb.h"
>  #include "xe_bo.h"
>  #include "xe_device.h"
> +#include "xe_exec_queue_types.h"
> +#include "xe_guc_submit.h"
> +#include "xe_lrc.h"
>  #include "xe_migrate.h"
>  #include "xe_sa.h"
>  #include "xe_sriov_printk.h"
> @@ -140,6 +143,82 @@ static int alloc_bb_pool(struct xe_tile *tile, struct xe_tile_vf_ccs *ctx)
>  	return 0;
>  }
>  
> +static void ccs_rw_update_ring(struct xe_tile_vf_ccs *ctx)
> +{
> +	struct xe_lrc *lrc = xe_migrate_get_lrc(ctx->migrate);
> +	u32 addr = ctx->mem.ccs_bb_pool->gpu_addr;
> +	u32 dw[10], i = 0;
> +
> +	dw[i++] = MI_ARB_ON_OFF | MI_ARB_DISABLE;

See my lastest reply to patch #1 about a single BB vs. a list.

You are disabling preemption here, so if save / restore queue is
timesliced, this will explode in the GuC (i.e. it will likely run longer
that preemption timeout period and trigger an engine reset).

If the save / restore queue is not timesliced, then multiple BB for BOs
is useless.

Please follow on my comments in patch #1 and pick one the options laid
out.

> +	dw[i++] = MI_BATCH_BUFFER_START | XE_INSTR_NUM_DW(3);
> +	dw[i++] = addr;
> +	dw[i++] = 0;
> +	dw[i++] = MI_NOOP;
> +
> +	xe_lrc_write_ring(lrc, dw, i * sizeof(u32));
> +}
> +
> +static int register_save_restore_context(struct xe_migrate *m, int ctx_id)
> +{
> +	int err = -EINVAL;
> +	int ctx_type;
> +
> +	switch (ctx_id) {
> +	case XE_SRIOV_VF_CCS_READ_CTX:
> +		ctx_type = GUC_CONTEXT_COMPRESSION_SAVE;
> +		break;
> +	case XE_SRIOV_VF_CCS_WRITE_CTX:
> +		ctx_type = GUC_CONTEXT_COMPRESSION_RESTORE;
> +		break;
> +	default:
> +		return err;
> +	}
> +
> +	err = xe_guc_register_exec_queue(xe_migrate_get_exec_queue(m), ctx_type);
> +	return err;
> +}
> +
> +/**
> + * xe_sriov_vf_ccs_register_context - Register read/write contexts with guc.
> + * @xe: the &xe_device to register contexts on.
> + *
> + * This function registers read and write contexts with Guc. Re-registration
> + * is needed whenever resuming from pm runtime suspend.
> + *
> + * Return: 0 on success. Negative error code on failure.
> + */
> +int xe_sriov_vf_ccs_register_context(struct xe_device *xe)
> +{
> +	struct xe_tile_vf_ccs *ctx;
> +	struct xe_tile *tile;
> +	int tile_id, ctx_id;
> +	int err;
> +
> +	if (!IS_VF_CCS_READY(xe))
> +		return 0;
> +
> +	for_each_tile(tile, xe, tile_id) {
> +		for_each_ccs_rw_ctx(ctx_id) {
> +			ctx = &tile->sriov.vf.ccs[ctx_id];
> +			err = register_save_restore_context(ctx->migrate, ctx_id);

if (err)
	return err;

Matt

> +		}
> +	}
> +
> +	return err;
> +}
> +
> +static void xe_sriov_vf_ccs_fini(void *arg)
> +{
> +	struct xe_tile_vf_ccs *ctx = arg;
> +	struct xe_lrc *lrc = xe_migrate_get_lrc(ctx->migrate);
> +
> +	/*
> +	 * Make TAIL = HEAD in the ring so that no issues are seen if Guc
> +	 * submits this context to HW on VF pause after unbinding device.
> +	 */
> +	xe_lrc_set_ring_tail(lrc, xe_lrc_ring_head(lrc));
> +}
> +
>  /**
>   * xe_sriov_vf_ccs_init - Setup LRCA for save & restore.
>   * @xe: the &xe_device to start recovery on
> @@ -175,6 +254,16 @@ int xe_sriov_vf_ccs_init(struct xe_device *xe)
>  			err = alloc_bb_pool(tile, ctx);
>  			if (err)
>  				goto err_ret;
> +
> +			ccs_rw_update_ring(ctx);
> +
> +			err = register_save_restore_context(migrate, ctx_id);
> +			if (err)
> +				goto err_ret;
> +
> +			err = devm_add_action_or_reset(xe->drm.dev,
> +						       xe_sriov_vf_ccs_fini,
> +						       ctx);
>  		}
>  	}
>  
> diff --git a/drivers/gpu/drm/xe/xe_sriov_vf_ccs.h b/drivers/gpu/drm/xe/xe_sriov_vf_ccs.h
> index 5d5e4bd25904..1f1baf685fec 100644
> --- a/drivers/gpu/drm/xe/xe_sriov_vf_ccs.h
> +++ b/drivers/gpu/drm/xe/xe_sriov_vf_ccs.h
> @@ -12,5 +12,6 @@ struct xe_bo;
>  int xe_sriov_vf_ccs_init(struct xe_device *xe);
>  int xe_sriov_vf_ccs_attach_bo(struct xe_bo *bo);
>  int xe_sriov_vf_ccs_detach_bo(struct xe_bo *bo);
> +int xe_sriov_vf_ccs_register_context(struct xe_device *xe);
>  
>  #endif
> -- 
> 2.43.0
>