[PATCH v3 1/2] drm/xe/vf: Refactor CCS save/restore to use default migration context
K V P, Satyanarayana
satyanarayana.k.v.p at intel.com
Thu Aug 7 11:15:00 UTC 2025
On 07-08-2025 00:20, John Harrison wrote:
> On 8/6/2025 1:29 AM, Satyanarayana K V P wrote:
>> Previously, CCS save/restore operations created separate migration
>> contexts with new VM memory allocations, resulting in significant
>> overhead.
>>
>> This commit eliminates redundant context creation reusing the default
>> migration context by registering new execution queues for CCS save and
>> restore on the existing migrate VM.
>>
>> Signed-off-by: Satyanarayana K V P <satyanarayana.k.v.p at intel.com>
>> Suggested-by: Matthew Brost <matthew.brost at intel.com>
>> Cc: Michal Wajdeczko <michal.wajdeczko at intel.com>
>>
>> ---
>> V2 -> V3:
>> - Fixed review comments (Matthew Brost).
>>
>> V1 -> V2:
>> - Fixed kernel-doc issues reported by patchworks.
>> ---
>> drivers/gpu/drm/xe/xe_migrate.c | 18 ++++----
>> drivers/gpu/drm/xe/xe_migrate.h | 2 +-
>> drivers/gpu/drm/xe/xe_pm.c | 3 ++
>> drivers/gpu/drm/xe/xe_sriov_vf_ccs.c | 49 ++++++++++++----------
>> drivers/gpu/drm/xe/xe_sriov_vf_ccs_types.h | 6 +--
>> 5 files changed, 43 insertions(+), 35 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/
>> xe_migrate.c
>> index 3a276e2348a2..7d3c7c4e2185 100644
>> --- a/drivers/gpu/drm/xe/xe_migrate.c
>> +++ b/drivers/gpu/drm/xe/xe_migrate.c
>> @@ -960,14 +960,14 @@ struct xe_lrc *xe_migrate_lrc(struct xe_migrate
>> *migrate)
>> return migrate->q->lrc[0];
>> }
>> -static int emit_flush_invalidate(struct xe_migrate *m, u32 *dw, int i,
>> +static int emit_flush_invalidate(struct xe_exec_queue *q, u32 *dw,
>> int i,
>> u32 flags)
>> {
>> dw[i++] = MI_FLUSH_DW | MI_INVALIDATE_TLB |
>> MI_FLUSH_DW_OP_STOREDW |
>> MI_FLUSH_IMM_DW | flags;
>> - dw[i++] =
>> lower_32_bits(xe_lrc_start_seqno_ggtt_addr(xe_migrate_lrc(m))) |
>> + dw[i++] = lower_32_bits(xe_lrc_start_seqno_ggtt_addr(q->lrc[0])) |
>> MI_FLUSH_DW_USE_GTT;
>> - dw[i++] =
>> upper_32_bits(xe_lrc_start_seqno_ggtt_addr(xe_migrate_lrc(m)));
>> + dw[i++] = upper_32_bits(xe_lrc_start_seqno_ggtt_addr(q->lrc[0]));
> Is it worth keeping a helper function here to abstract out future
> changes? E.g. "xe_migrate_lrc(ctx) { return ctx->q->lrc[0] };". Using
> "q->lrc[0]" everywhere seems very like magic number usage. It is not
> obvious why that is correct given the very generic naming.
>
>
Fixed and sent new version.>> dw[i++] = MI_NOOP;
>> dw[i++] = MI_NOOP;
>> @@ -976,7 +976,8 @@ static int emit_flush_invalidate(struct xe_migrate
>> *m, u32 *dw, int i,
>> /**
>> * xe_migrate_ccs_rw_copy() - Copy content of TTM resources.
>> - * @m: The migration context.
>> + * @tile: Tile whose migration context to be used.
>> + * @q : Execution to be used along with migration context.
>> * @src_bo: The buffer object @src is currently bound to.
>> * @read_write : Creates BB commands for CCS read/write.
>> *
>> @@ -987,7 +988,7 @@ static int emit_flush_invalidate(struct xe_migrate
>> *m, u32 *dw, int i,
>> *
>> * Return: 0 if successful, negative error code on failure.
>> */
>> -int xe_migrate_ccs_rw_copy(struct xe_migrate *m,
>> +int xe_migrate_ccs_rw_copy(struct xe_tile *tile, struct xe_exec_queue
>> *q,
>> struct xe_bo *src_bo,
>> enum xe_sriov_vf_ccs_rw_ctxs read_write)
>> @@ -995,7 +996,8 @@ int xe_migrate_ccs_rw_copy(struct xe_migrate *m,
>> bool src_is_pltt = read_write == XE_SRIOV_VF_CCS_READ_CTX;
>> bool dst_is_pltt = read_write == XE_SRIOV_VF_CCS_WRITE_CTX;
>> struct ttm_resource *src = src_bo->ttm.resource;
>> - struct xe_gt *gt = m->tile->primary_gt;
>> + struct xe_migrate *m = tile->migrate;
>> + struct xe_gt *gt = tile->primary_gt;
>> u32 batch_size, batch_size_allocated;
>> struct xe_device *xe = gt_to_xe(gt);
>> struct xe_res_cursor src_it, ccs_it;
>> @@ -1078,11 +1080,11 @@ int xe_migrate_ccs_rw_copy(struct xe_migrate *m,
>> emit_pte(m, bb, ccs_pt, false, false, &ccs_it, ccs_size, src);
>> - bb->len = emit_flush_invalidate(m, bb->cs, bb->len,
>> flush_flags);
>> + bb->len = emit_flush_invalidate(q, bb->cs, bb->len,
>> flush_flags);
>> flush_flags = xe_migrate_ccs_copy(m, bb, src_L0_ofs,
>> src_is_pltt,
>> src_L0_ofs, dst_is_pltt,
>> src_L0, ccs_ofs, true);
>> - bb->len = emit_flush_invalidate(m, bb->cs, bb->len,
>> flush_flags);
>> + bb->len = emit_flush_invalidate(q, bb->cs, bb->len,
>> flush_flags);
>> size -= src_L0;
>> }
>> diff --git a/drivers/gpu/drm/xe/xe_migrate.h b/drivers/gpu/drm/xe/
>> xe_migrate.h
>> index e81ea6b27fb5..9e20da6d58c2 100644
>> --- a/drivers/gpu/drm/xe/xe_migrate.h
>> +++ b/drivers/gpu/drm/xe/xe_migrate.h
>> @@ -124,7 +124,7 @@ struct dma_fence *xe_migrate_copy(struct
>> xe_migrate *m,
>> struct ttm_resource *dst,
>> bool copy_only_ccs);
>> -int xe_migrate_ccs_rw_copy(struct xe_migrate *m,
>> +int xe_migrate_ccs_rw_copy(struct xe_tile *tile, struct xe_exec_queue
>> *q,
>> struct xe_bo *src_bo,
>> enum xe_sriov_vf_ccs_rw_ctxs read_write);
>> diff --git a/drivers/gpu/drm/xe/xe_pm.c b/drivers/gpu/drm/xe/xe_pm.c
>> index 44aaf154ddf7..5e8126ca8e27 100644
>> --- a/drivers/gpu/drm/xe/xe_pm.c
>> +++ b/drivers/gpu/drm/xe/xe_pm.c
>> @@ -209,6 +209,9 @@ int xe_pm_resume(struct xe_device *xe)
>> xe_pxp_pm_resume(xe->pxp);
>> + if (IS_SRIOV_VF(xe))
>> + xe_sriov_vf_ccs_register_context(xe);
>> +
>> drm_dbg(&xe->drm, "Device resumed\n");
>> return 0;
>> err:
>> diff --git a/drivers/gpu/drm/xe/xe_sriov_vf_ccs.c b/drivers/gpu/drm/
>> xe/xe_sriov_vf_ccs.c
>> index f0ca2a9b2bb7..a87f39eae4dc 100644
>> --- a/drivers/gpu/drm/xe/xe_sriov_vf_ccs.c
>> +++ b/drivers/gpu/drm/xe/xe_sriov_vf_ccs.c
>> @@ -8,6 +8,7 @@
>> #include "xe_bb.h"
>> #include "xe_bo.h"
>> #include "xe_device.h"
>> +#include "xe_exec_queue.h"
>> #include "xe_exec_queue_types.h"
>> #include "xe_guc_submit.h"
>> #include "xe_lrc.h"
>> @@ -168,7 +169,7 @@ static int alloc_bb_pool(struct xe_tile *tile,
>> struct xe_tile_vf_ccs *ctx)
>> static void ccs_rw_update_ring(struct xe_tile_vf_ccs *ctx)
>> {
>> - struct xe_lrc *lrc = xe_migrate_lrc(ctx->migrate);
>> + struct xe_lrc *lrc = ctx->q->lrc[0];
>> u64 addr = xe_sa_manager_gpu_addr(ctx->mem.ccs_bb_pool);
>> u32 dw[10], i = 0;
>> @@ -183,13 +184,12 @@ static void ccs_rw_update_ring(struct
>> xe_tile_vf_ccs *ctx)
>> xe_lrc_set_ring_tail(lrc, lrc->ring.tail);
>> }
>> -static int register_save_restore_context(struct xe_migrate *m,
>> - enum xe_sriov_vf_ccs_rw_ctxs ctx_id)
>> +static int register_save_restore_context(struct xe_tile_vf_ccs *ctx)
>> {
>> int err = -EINVAL;
>> int ctx_type;
>> - switch (ctx_id) {
>> + switch (ctx->ctx_id) {
>> case XE_SRIOV_VF_CCS_READ_CTX:
>> ctx_type = GUC_CONTEXT_COMPRESSION_SAVE;
>> break;
>> @@ -200,7 +200,7 @@ static int register_save_restore_context(struct
>> xe_migrate *m,
>> return err;
>> }
>> - xe_guc_register_exec_queue(xe_migrate_exec_queue(m), ctx_type);
>> + xe_guc_register_exec_queue(ctx->q, ctx_type);
>> return 0;
>> }
>> @@ -225,7 +225,7 @@ int xe_sriov_vf_ccs_register_context(struct
>> xe_device *xe)
>> for_each_ccs_rw_ctx(ctx_id) {
>> ctx = &tile->sriov.vf.ccs[ctx_id];
>> - err = register_save_restore_context(ctx->migrate, ctx_id);
>> + err = register_save_restore_context(ctx);
>> if (err)
>> return err;
>> }
>> @@ -236,13 +236,14 @@ int xe_sriov_vf_ccs_register_context(struct
>> xe_device *xe)
>> static void xe_sriov_vf_ccs_fini(void *arg)
>> {
>> struct xe_tile_vf_ccs *ctx = arg;
>> - struct xe_lrc *lrc = xe_migrate_lrc(ctx->migrate);
>> + struct xe_lrc *lrc = ctx->q->lrc[0];
>> /*
>> * Make TAIL = HEAD in the ring so that no issues are seen if Guc
>> * submits this context to HW on VF pause after unbinding device.
>> */
>> xe_lrc_set_ring_tail(lrc, xe_lrc_ring_head(lrc));
>> + xe_exec_queue_put(ctx->q);
>> }
>> /**
>> @@ -258,8 +259,9 @@ int xe_sriov_vf_ccs_init(struct xe_device *xe)
>> {
>> struct xe_tile *tile = xe_device_get_root_tile(xe);
>> enum xe_sriov_vf_ccs_rw_ctxs ctx_id;
>> - struct xe_migrate *migrate;
>> struct xe_tile_vf_ccs *ctx;
>> + struct xe_exec_queue *q;
>> + u32 flags;
>> int err;
>> xe_assert(xe, IS_SRIOV_VF(xe));
>> @@ -270,27 +272,25 @@ int xe_sriov_vf_ccs_init(struct xe_device *xe)
>> ctx = &tile->sriov.vf.ccs[ctx_id];
>> ctx->ctx_id = ctx_id;
>> - migrate = xe_migrate_alloc(tile);
>> - if (!migrate) {
>> - err = -ENOMEM;
>> + flags = EXEC_QUEUE_FLAG_KERNEL |
>> + EXEC_QUEUE_FLAG_PERMANENT |
>> + EXEC_QUEUE_FLAG_MIGRATE;
>> + q = xe_exec_queue_create_bind(xe, tile, flags, 0);
>> + if (IS_ERR(q)) {
>> + err = PTR_ERR(q);
>> goto err_ret;
>> }
>> -
>> - err = xe_migrate_init(migrate);
>> - if (err)
>> - goto err_ret;
>> -
>> - ctx->migrate = migrate;
>> + ctx->q = q;
>> err = alloc_bb_pool(tile, ctx);
>> if (err)
>> - goto err_ret;
>> + goto err_free_queue;
>> ccs_rw_update_ring(ctx);
>> - err = register_save_restore_context(ctx->migrate, ctx_id);
>> + err = register_save_restore_context(ctx);
>> if (err)
>> - goto err_ret;
>> + goto err_free_queue;
>> err = devm_add_action_or_reset(xe->drm.dev,
>> xe_sriov_vf_ccs_fini,
>> @@ -301,6 +301,9 @@ int xe_sriov_vf_ccs_init(struct xe_device *xe)
>> return 0;
>> +err_free_queue:
>> + xe_exec_queue_put(q);
>> +
>> err_ret:
>> return err;
>> }
>> @@ -319,7 +322,7 @@ int xe_sriov_vf_ccs_attach_bo(struct xe_bo *bo)
>> {
>> struct xe_device *xe = xe_bo_device(bo);
>> enum xe_sriov_vf_ccs_rw_ctxs ctx_id;
>> - struct xe_migrate *migrate;
>> + struct xe_tile_vf_ccs *ctx;
>> struct xe_tile *tile;
>> struct xe_bb *bb;
>> int err = 0;
>> @@ -334,8 +337,8 @@ int xe_sriov_vf_ccs_attach_bo(struct xe_bo *bo)
>> /* bb should be NULL here. Assert if not NULL */
>> xe_assert(xe, !bb);
>> - migrate = tile->sriov.vf.ccs[ctx_id].migrate;
>> - err = xe_migrate_ccs_rw_copy(migrate, bo, ctx_id);
>> + ctx = &tile->sriov.vf.ccs[ctx_id];
>> + err = xe_migrate_ccs_rw_copy(tile, ctx->q, bo, ctx_id);
>> }
>> return err;
>> }
>> diff --git a/drivers/gpu/drm/xe/xe_sriov_vf_ccs_types.h b/drivers/gpu/
>> drm/xe/xe_sriov_vf_ccs_types.h
>> index e240f3fd18af..1add0541aed8 100644
>> --- a/drivers/gpu/drm/xe/xe_sriov_vf_ccs_types.h
>> +++ b/drivers/gpu/drm/xe/xe_sriov_vf_ccs_types.h
>> @@ -41,11 +41,11 @@ struct xe_sa_manager;
>> struct xe_tile_vf_ccs {
>> /** @id: Id to which context it belongs to */
>> enum xe_sriov_vf_ccs_rw_ctxs ctx_id;
>> - /** @migrate: Migration helper for save/restore of CCS data */
>> - struct xe_migrate *migrate;
>> + /** @q: exec queues used for migration */
>> + struct xe_exec_queue *q;
> Should this be 'migrate_q' or some such to identify what its purpose is?
> Just calling it 'q' is very generic and open to use by other things
> (accidentally or deliberately).
>
> John.
Fixed and sent new version.>
>> struct {
>> - /** @ccs_rw_bb_pool: Pool from which batch buffers are
>> allocated. */
>> + /** @ccs_bb_pool: Pool from which batch buffers are
>> allocated. */
>> struct xe_sa_manager *ccs_bb_pool;
>> } mem;
>> };
>
More information about the Intel-xe
mailing list