[PATCH v2 1/2] drm/xe/vf: Refactor CCS save/restore to use default migration context

Tue Aug 5 03:11:25 UTC 2025

On Fri, Aug 01, 2025 at 06:54:40PM +0530, Satyanarayana K V P wrote:
> Previously, CCS save/restore operations created separate migration
> contexts with new VM memory allocations, resulting in significant
> overhead.
> 
> This commit eliminates redundant context creation reusing the default
> migration context by registering new execution queues for CCS save and
> restore on the existing migrate VM.
> 
> Signed-off-by: Satyanarayana K V P <satyanarayana.k.v.p at intel.com>
> Suggested-by: Matthew Brost <matthew.brost at intel.com>
> Cc: Michal Wajdeczko <michal.wajdeczko at intel.com>
> 
> ---
> V1 -> V2:
> - Fix kernel-doc issues reported by patchworks.
> ---
>  drivers/gpu/drm/xe/xe_migrate.c            | 18 ++++----
>  drivers/gpu/drm/xe/xe_migrate.h            |  2 +-
>  drivers/gpu/drm/xe/xe_sriov_vf_ccs.c       | 49 ++++++++++++----------
>  drivers/gpu/drm/xe/xe_sriov_vf_ccs_types.h |  2 +
>  4 files changed, 39 insertions(+), 32 deletions(-)
> 
> diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c
> index 3a276e2348a2..7d3c7c4e2185 100644
> --- a/drivers/gpu/drm/xe/xe_migrate.c
> +++ b/drivers/gpu/drm/xe/xe_migrate.c
> @@ -960,14 +960,14 @@ struct xe_lrc *xe_migrate_lrc(struct xe_migrate *migrate)
>  	return migrate->q->lrc[0];
>  }
>  
> -static int emit_flush_invalidate(struct xe_migrate *m, u32 *dw, int i,
> +static int emit_flush_invalidate(struct xe_exec_queue *q, u32 *dw, int i,
>  				 u32 flags)
>  {
>  	dw[i++] = MI_FLUSH_DW | MI_INVALIDATE_TLB | MI_FLUSH_DW_OP_STOREDW |
>  		  MI_FLUSH_IMM_DW | flags;
> -	dw[i++] = lower_32_bits(xe_lrc_start_seqno_ggtt_addr(xe_migrate_lrc(m))) |
> +	dw[i++] = lower_32_bits(xe_lrc_start_seqno_ggtt_addr(q->lrc[0])) |
>  		  MI_FLUSH_DW_USE_GTT;
> -	dw[i++] = upper_32_bits(xe_lrc_start_seqno_ggtt_addr(xe_migrate_lrc(m)));
> +	dw[i++] = upper_32_bits(xe_lrc_start_seqno_ggtt_addr(q->lrc[0]));
>  	dw[i++] = MI_NOOP;
>  	dw[i++] = MI_NOOP;
>  
> @@ -976,7 +976,8 @@ static int emit_flush_invalidate(struct xe_migrate *m, u32 *dw, int i,
>  
>  /**
>   * xe_migrate_ccs_rw_copy() - Copy content of TTM resources.
> - * @m: The migration context.
> + * @tile: Tile whose migration context to be used.
> + * @q : Execution to be used along with migration context.
>   * @src_bo: The buffer object @src is currently bound to.
>   * @read_write : Creates BB commands for CCS read/write.
>   *
> @@ -987,7 +988,7 @@ static int emit_flush_invalidate(struct xe_migrate *m, u32 *dw, int i,
>   *
>   * Return: 0 if successful, negative error code on failure.
>   */
> -int xe_migrate_ccs_rw_copy(struct xe_migrate *m,
> +int xe_migrate_ccs_rw_copy(struct xe_tile *tile, struct xe_exec_queue *q,
>  			   struct xe_bo *src_bo,
>  			   enum xe_sriov_vf_ccs_rw_ctxs read_write)
>  
> @@ -995,7 +996,8 @@ int xe_migrate_ccs_rw_copy(struct xe_migrate *m,
>  	bool src_is_pltt = read_write == XE_SRIOV_VF_CCS_READ_CTX;
>  	bool dst_is_pltt = read_write == XE_SRIOV_VF_CCS_WRITE_CTX;
>  	struct ttm_resource *src = src_bo->ttm.resource;
> -	struct xe_gt *gt = m->tile->primary_gt;
> +	struct xe_migrate *m = tile->migrate;
> +	struct xe_gt *gt = tile->primary_gt;
>  	u32 batch_size, batch_size_allocated;
>  	struct xe_device *xe = gt_to_xe(gt);
>  	struct xe_res_cursor src_it, ccs_it;
> @@ -1078,11 +1080,11 @@ int xe_migrate_ccs_rw_copy(struct xe_migrate *m,
>  
>  		emit_pte(m, bb, ccs_pt, false, false, &ccs_it, ccs_size, src);
>  
> -		bb->len = emit_flush_invalidate(m, bb->cs, bb->len, flush_flags);
> +		bb->len = emit_flush_invalidate(q, bb->cs, bb->len, flush_flags);
>  		flush_flags = xe_migrate_ccs_copy(m, bb, src_L0_ofs, src_is_pltt,
>  						  src_L0_ofs, dst_is_pltt,
>  						  src_L0, ccs_ofs, true);
> -		bb->len = emit_flush_invalidate(m, bb->cs, bb->len, flush_flags);
> +		bb->len = emit_flush_invalidate(q, bb->cs, bb->len, flush_flags);
>  
>  		size -= src_L0;
>  	}
> diff --git a/drivers/gpu/drm/xe/xe_migrate.h b/drivers/gpu/drm/xe/xe_migrate.h
> index e81ea6b27fb5..9e20da6d58c2 100644
> --- a/drivers/gpu/drm/xe/xe_migrate.h
> +++ b/drivers/gpu/drm/xe/xe_migrate.h
> @@ -124,7 +124,7 @@ struct dma_fence *xe_migrate_copy(struct xe_migrate *m,
>  				  struct ttm_resource *dst,
>  				  bool copy_only_ccs);
>  
> -int xe_migrate_ccs_rw_copy(struct xe_migrate *m,
> +int xe_migrate_ccs_rw_copy(struct xe_tile *tile, struct xe_exec_queue *q,
>  			   struct xe_bo *src_bo,
>  			   enum xe_sriov_vf_ccs_rw_ctxs read_write);
>  
> diff --git a/drivers/gpu/drm/xe/xe_sriov_vf_ccs.c b/drivers/gpu/drm/xe/xe_sriov_vf_ccs.c
> index bf9fa1238462..616fd57794a6 100644
> --- a/drivers/gpu/drm/xe/xe_sriov_vf_ccs.c
> +++ b/drivers/gpu/drm/xe/xe_sriov_vf_ccs.c
> @@ -8,6 +8,7 @@
>  #include "xe_bb.h"
>  #include "xe_bo.h"
>  #include "xe_device.h"
> +#include "xe_exec_queue.h"
>  #include "xe_exec_queue_types.h"
>  #include "xe_guc_submit.h"
>  #include "xe_lrc.h"
> @@ -168,7 +169,7 @@ static int alloc_bb_pool(struct xe_tile *tile, struct xe_tile_vf_ccs *ctx)
>  
>  static void ccs_rw_update_ring(struct xe_tile_vf_ccs *ctx)
>  {
> -	struct xe_lrc *lrc = xe_migrate_lrc(ctx->migrate);
> +	struct xe_lrc *lrc = ctx->q[ctx->ctx_id]->lrc[0];
>  	u64 addr = ctx->mem.ccs_bb_pool->gpu_addr;
>  	u32 dw[10], i = 0;
>  
> @@ -183,13 +184,12 @@ static void ccs_rw_update_ring(struct xe_tile_vf_ccs *ctx)
>  	xe_lrc_set_ring_tail(lrc, lrc->ring.tail);
>  }
>  
> -static int register_save_restore_context(struct xe_migrate *m,
> -					 enum xe_sriov_vf_ccs_rw_ctxs ctx_id)
> +static int register_save_restore_context(struct xe_tile_vf_ccs *ctx)
>  {
>  	int err = -EINVAL;
>  	int ctx_type;
>  
> -	switch (ctx_id) {
> +	switch (ctx->ctx_id) {
>  	case XE_SRIOV_VF_CCS_READ_CTX:
>  		ctx_type = GUC_CONTEXT_COMPRESSION_SAVE;
>  		break;
> @@ -200,7 +200,7 @@ static int register_save_restore_context(struct xe_migrate *m,
>  		return err;
>  	}
>  
> -	xe_guc_register_exec_queue(xe_migrate_exec_queue(m), ctx_type);
> +	xe_guc_register_exec_queue(ctx->q[ctx->ctx_id], ctx_type);
>  	return 0;
>  }
>  
> @@ -225,7 +225,7 @@ int xe_sriov_vf_ccs_register_context(struct xe_device *xe)
>  
>  	for_each_ccs_rw_ctx(ctx_id) {
>  		ctx = &tile->sriov.vf.ccs[ctx_id];
> -		err = register_save_restore_context(ctx->migrate, ctx_id);
> +		err = register_save_restore_context(ctx);
>  		if (err)
>  			return err;
>  	}
> @@ -236,13 +236,14 @@ int xe_sriov_vf_ccs_register_context(struct xe_device *xe)
>  static void xe_sriov_vf_ccs_fini(void *arg)
>  {
>  	struct xe_tile_vf_ccs *ctx = arg;
> -	struct xe_lrc *lrc = xe_migrate_lrc(ctx->migrate);
> +	struct xe_lrc *lrc = ctx->q[ctx->ctx_id]->lrc[0];
>  
>  	/*
>  	 * Make TAIL = HEAD in the ring so that no issues are seen if Guc
>  	 * submits this context to HW on VF pause after unbinding device.
>  	 */
>  	xe_lrc_set_ring_tail(lrc, xe_lrc_ring_head(lrc));
> +	xe_exec_queue_put(ctx->q[ctx->ctx_id]);
>  }
>  
>  /**
> @@ -258,8 +259,9 @@ int xe_sriov_vf_ccs_init(struct xe_device *xe)
>  {
>  	struct xe_tile *tile = xe_device_get_root_tile(xe);
>  	enum xe_sriov_vf_ccs_rw_ctxs ctx_id;
> -	struct xe_migrate *migrate;
>  	struct xe_tile_vf_ccs *ctx;
> +	struct xe_exec_queue *q;
> +	u32 flags;
>  	int err;
>  
>  	xe_assert(xe, IS_SRIOV_VF(xe));
> @@ -270,27 +272,25 @@ int xe_sriov_vf_ccs_init(struct xe_device *xe)
>  		ctx = &tile->sriov.vf.ccs[ctx_id];
>  		ctx->ctx_id = ctx_id;
>  
> -		migrate = xe_migrate_alloc(tile);
> -		if (IS_ERR(migrate)) {
> -			err = PTR_ERR(migrate);
> +		flags = EXEC_QUEUE_FLAG_KERNEL |
> +			EXEC_QUEUE_FLAG_PERMANENT |
> +			EXEC_QUEUE_FLAG_MIGRATE;
> +		q = xe_exec_queue_create_bind(xe, tile, flags, 0);
> +		if (IS_ERR(q)) {
> +			err = PTR_ERR(q);
>  			goto err_ret;
>  		}
> -
> -		err = xe_migrate_init(migrate);
> -		if (err)
> -			goto err_ret;
> -
> -		ctx->migrate = migrate;
> +		ctx->q[ctx_id] = q;
>  
>  		err = alloc_bb_pool(tile, ctx);
>  		if (err)
> -			goto err_ret;
> +			goto err_free_queue;
>  
>  		ccs_rw_update_ring(ctx);
>  
> -		err = register_save_restore_context(ctx->migrate, ctx_id);
> +		err = register_save_restore_context(ctx);
>  		if (err)
> -			goto err_ret;
> +			goto err_free_queue;
>  
>  		err = devm_add_action_or_reset(xe->drm.dev,
>  					       xe_sriov_vf_ccs_fini,
> @@ -301,6 +301,9 @@ int xe_sriov_vf_ccs_init(struct xe_device *xe)
>  
>  	return 0;
>  
> +err_free_queue:
> +	xe_exec_queue_put(q);
> +
>  err_ret:
>  	return err;
>  }
> @@ -319,7 +322,7 @@ int xe_sriov_vf_ccs_attach_bo(struct xe_bo *bo)
>  {
>  	struct xe_device *xe = xe_bo_device(bo);
>  	enum xe_sriov_vf_ccs_rw_ctxs ctx_id;
> -	struct xe_migrate *migrate;
> +	struct xe_tile_vf_ccs *ctx;
>  	struct xe_tile *tile;
>  	struct xe_bb *bb;
>  	int err = 0;
> @@ -334,8 +337,8 @@ int xe_sriov_vf_ccs_attach_bo(struct xe_bo *bo)
>  		/* bb should be NULL here. Assert if not NULL */
>  		xe_assert(xe, !bb);
>  
> -		migrate = tile->sriov.vf.ccs[ctx_id].migrate;
> -		err = xe_migrate_ccs_rw_copy(migrate, bo, ctx_id);
> +		ctx = &tile->sriov.vf.ccs[ctx_id];
> +		err = xe_migrate_ccs_rw_copy(tile, ctx->q[ctx_id], bo, ctx_id);

This looks odd. You use ctx_id to resolve xe_tile_vf_ccs, and then also
use ctx_id again to resolve the queue within xe_tile_vf_ccs.

Don’t you just need a single queue pointer in xe_tile_vf_ccs, similar to
the existing migrate structure?

Everything else in this patch looks good.

Matt

>  	}
>  	return err;
>  }
> diff --git a/drivers/gpu/drm/xe/xe_sriov_vf_ccs_types.h b/drivers/gpu/drm/xe/xe_sriov_vf_ccs_types.h
> index e240f3fd18af..465db42f2e66 100644
> --- a/drivers/gpu/drm/xe/xe_sriov_vf_ccs_types.h
> +++ b/drivers/gpu/drm/xe/xe_sriov_vf_ccs_types.h
> @@ -43,6 +43,8 @@ struct xe_tile_vf_ccs {
>  	enum xe_sriov_vf_ccs_rw_ctxs ctx_id;
>  	/** @migrate: Migration helper for save/restore of CCS data */
>  	struct xe_migrate *migrate;
> +	/** @q: exec queues used for migration */
> +	struct xe_exec_queue *q[XE_SRIOV_VF_CCS_CTX_COUNT];
>  
>  	struct {
>  		/** @ccs_rw_bb_pool: Pool from which batch buffers are allocated. */
> -- 
> 2.43.0
>