[ONLY FOR INTERNAL REVIEW 1/3] drm/xe/vf: Create contexts for CCS read write

Satyanarayana K V P satyanarayana.k.v.p at intel.com
Fri May 16 11:48:30 UTC 2025


Create two LRCs to handle CCS meta data read / write from CCS pool in the
VM. Read context is used to hold GPU instructions to be executed at save
time and write context is used to hold GPU instructions to be executed at
the restore time.

Allocate batch buffer pool using suballocator for both read and write
contexts.

Migration framework is reused to create LRCAs for read and write.

Signed-off-by: Satyanarayana K V P <satyanarayana.k.v.p at intel.com>
---
Cc: Michal Wajdeczko <michal.wajdeczko at intel.com>
Cc: Michał Winiarski <michal.winiarski at intel.com>
Cc: Tomasz Lis <tomasz.lis at intel.com>
Cc: Matthew Brost <matthew.brost at intel.com>
Cc: Matthew Auld <matthew.auld at intel.com>
---
 drivers/gpu/drm/xe/Makefile          |   3 +-
 drivers/gpu/drm/xe/xe_device.c       |   2 +
 drivers/gpu/drm/xe/xe_device_types.h |  15 +++
 drivers/gpu/drm/xe/xe_gt_debugfs.c   |  17 ++-
 drivers/gpu/drm/xe/xe_sriov.c        |  17 +++
 drivers/gpu/drm/xe/xe_sriov.h        |   1 +
 drivers/gpu/drm/xe/xe_sriov_vf_ccs.c | 156 +++++++++++++++++++++++++++
 drivers/gpu/drm/xe/xe_sriov_vf_ccs.h |  13 +++
 8 files changed, 221 insertions(+), 3 deletions(-)
 create mode 100644 drivers/gpu/drm/xe/xe_sriov_vf_ccs.c
 create mode 100644 drivers/gpu/drm/xe/xe_sriov_vf_ccs.h

diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile
index e4bf484d4121..ad2fb025463a 100644
--- a/drivers/gpu/drm/xe/Makefile
+++ b/drivers/gpu/drm/xe/Makefile
@@ -139,7 +139,8 @@ xe-y += \
 	xe_guc_relay.o \
 	xe_memirq.o \
 	xe_sriov.o \
-	xe_sriov_vf.o
+	xe_sriov_vf.o \
+	xe_sriov_vf_ccs.o
 
 xe-$(CONFIG_PCI_IOV) += \
 	xe_gt_sriov_pf.o \
diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c
index d4b6e623aa48..c8cb25dbd0ac 100644
--- a/drivers/gpu/drm/xe/xe_device.c
+++ b/drivers/gpu/drm/xe/xe_device.c
@@ -929,6 +929,8 @@ int xe_device_probe(struct xe_device *xe)
 
 	xe_vsec_init(xe);
 
+	err = xe_sriov_late_init(xe);
+
 	return devm_add_action_or_reset(xe->drm.dev, xe_device_sanitize, xe);
 
 err_unregister_display:
diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h
index 50b2bfa682ac..4f0b74bd0824 100644
--- a/drivers/gpu/drm/xe/xe_device_types.h
+++ b/drivers/gpu/drm/xe/xe_device_types.h
@@ -234,9 +234,24 @@ struct xe_tile {
 			/** @sriov.pf.lmtt: Local Memory Translation Table. */
 			struct xe_lmtt lmtt;
 		} pf;
+#define XE_CCS_READ_CTX	0
+#define XE_CCS_WRITE_CTX	1
+#define XE_CCS_RW_MAX_CTXS	(XE_CCS_WRITE_CTX + 1)
 		struct {
 			/** @sriov.vf.ggtt_balloon: GGTT regions excluded from use. */
 			struct xe_ggtt_node *ggtt_balloon[2];
+
+			struct xe_ccs_rw_ctx {
+				/** @migrate: Migration helper for save/restore of CCS data */
+				struct xe_migrate *migrate;
+
+				struct {
+					/** @ccs_rw_bb_pool: Pool from which batchbuffers
+					 * are allocated.
+					 */
+					struct xe_sa_manager *ccs_rw_bb_pool;
+				} mem;
+			} ccs_rw_ctx[XE_CCS_RW_MAX_CTXS];
 		} vf;
 	} sriov;
 
diff --git a/drivers/gpu/drm/xe/xe_gt_debugfs.c b/drivers/gpu/drm/xe/xe_gt_debugfs.c
index 119a55bb7580..1ff54bdcfc3d 100644
--- a/drivers/gpu/drm/xe/xe_gt_debugfs.c
+++ b/drivers/gpu/drm/xe/xe_gt_debugfs.c
@@ -143,10 +143,23 @@ static int force_reset_sync(struct xe_gt *gt, struct drm_printer *p)
 static int sa_info(struct xe_gt *gt, struct drm_printer *p)
 {
 	struct xe_tile *tile = gt_to_tile(gt);
+	struct xe_device *xe = gt_to_xe(gt);
+	struct xe_sa_manager *bb_pool;
 
 	xe_pm_runtime_get(gt_to_xe(gt));
-	drm_suballoc_dump_debug_info(&tile->mem.kernel_bb_pool->base, p,
-				     tile->mem.kernel_bb_pool->gpu_addr);
+
+	drm_printf(p, "kernel_bb_pool info\n");
+	drm_printf(p, "-------------------------\n");
+	bb_pool = tile->mem.kernel_bb_pool;
+	drm_suballoc_dump_debug_info(&bb_pool->base, p, bb_pool->gpu_addr);
+
+	if (IS_SRIOV_VF(xe)) {
+		drm_printf(p, "\nccs_rw_bb_pool info\n");
+		drm_printf(p, "-------------------------\n");
+		bb_pool = tile->sriov.vf.ccs_rw_ctx[0].mem.ccs_rw_bb_pool;
+		drm_suballoc_dump_debug_info(&bb_pool->base, p, bb_pool->gpu_addr);
+	}
+
 	xe_pm_runtime_put(gt_to_xe(gt));
 
 	return 0;
diff --git a/drivers/gpu/drm/xe/xe_sriov.c b/drivers/gpu/drm/xe/xe_sriov.c
index a0eab44c0e76..fa6286e123f0 100644
--- a/drivers/gpu/drm/xe/xe_sriov.c
+++ b/drivers/gpu/drm/xe/xe_sriov.c
@@ -15,6 +15,7 @@
 #include "xe_sriov.h"
 #include "xe_sriov_pf.h"
 #include "xe_sriov_vf.h"
+#include "xe_sriov_vf_ccs.h"
 
 /**
  * xe_sriov_mode_to_string - Convert enum value to string.
@@ -157,3 +158,19 @@ const char *xe_sriov_function_name(unsigned int n, char *buf, size_t size)
 		strscpy(buf, "PF", size);
 	return buf;
 }
+
+/**
+ * xe_sriov_late_init() - SR-IOV late initialization functions.
+ * @xe: the &xe_device to initialize
+ *
+ * Return: 0 on success or a negative error code on failure.
+ */
+int xe_sriov_late_init(struct xe_device *xe)
+{
+	int err = 0;
+
+	if (IS_SRIOV_VF(xe))
+		err = xe_sriov_vf_ccs_rw_init(xe);
+
+	return err;
+}
diff --git a/drivers/gpu/drm/xe/xe_sriov.h b/drivers/gpu/drm/xe/xe_sriov.h
index 688fbabf08f1..0e0c1abf2d14 100644
--- a/drivers/gpu/drm/xe/xe_sriov.h
+++ b/drivers/gpu/drm/xe/xe_sriov.h
@@ -18,6 +18,7 @@ const char *xe_sriov_function_name(unsigned int n, char *buf, size_t len);
 void xe_sriov_probe_early(struct xe_device *xe);
 void xe_sriov_print_info(struct xe_device *xe, struct drm_printer *p);
 int xe_sriov_init(struct xe_device *xe);
+int xe_sriov_late_init(struct xe_device *xe);
 
 static inline enum xe_sriov_mode xe_device_sriov_mode(const struct xe_device *xe)
 {
diff --git a/drivers/gpu/drm/xe/xe_sriov_vf_ccs.c b/drivers/gpu/drm/xe/xe_sriov_vf_ccs.c
new file mode 100644
index 000000000000..a8a21336dc12
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_sriov_vf_ccs.c
@@ -0,0 +1,156 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2025 Intel Corporation
+ */
+
+#include "instructions/xe_mi_commands.h"
+#include "xe_bo.h"
+#include "xe_device.h"
+#include "xe_migrate.h"
+#include "xe_sa.h"
+#include "xe_sriov_printk.h"
+#include "xe_sriov_vf_ccs.h"
+
+/**
+ * DOC: VF save/restore of compression Meta Data.
+ *
+ * VF KMD register two special context/LRCA.
+ *
+ * Save Context/LRCA: contain necessary cmds+page table to trigger Meta data /
+ * compression control surface (Aka CCS) save in regular System memory in VM.
+ *
+ * Restore Context/LRCA: contain necessary cmds+page table to trigger Meta data /
+ * compression control surface (Aka CCS) Restore from regular System memory in
+ * VM to corresponding CCS pool.
+ *
+ * Below diagram explain steps needed for VF save/Restore of compression Meta
+ * Data::
+ *
+ * CCS Save    CCS Restore          VF KMD                          Guc       BCS
+ *  LRCA        LRCA
+ *   |           |                     |                              |         |
+ *   |           |                     |                              |         |
+ *   |     Create Save LRCA            |                              |         |
+ *  [ ]<----------------------------- [ ]                             |         |
+ *   |           |                     |                              |         |
+ *   |           |                     |                              |         |
+ *   |           |                     |   Register LRCA with Guc     |         |
+ *   |           |                    [ ]--------------------------->[ ]        |
+ *   |           |                     |                              |         |
+ *   |           |                     |                              |         |
+ *   |           | Create restore LRCA |                              |         |
+ *   |          [ ]<------------------[ ]                             |         |
+ *   |           |                     |                              |         |
+ *   |           |                     |                              |         |
+ *   |           |                    [ ]-----------------------      |         |
+ *   |           |                    [ ]  Allocate main memory |     |         |
+ *   |           |                    [ ]  Allocate CCS memory  |     |         |
+ *   |           |                    [ ]<----------------------      |         |
+ *   |           |                     |                              |         |
+ *   |           |                     |                              |         |
+ *   | Update Main memory&CCS pages    |                              |         |
+ *   |   PPGTT + BB cmds to save       |                              |         |
+ *  [ ]<------------------------------[ ]                             |         |
+ *   |           |                     |                              |         |
+ *   |           |                     |                              |         |
+ *   |           | Update Main memory  |                              |         |
+ *   |           | & CCS pages PPGTT + |                              |         |
+ *   |           | BB cms to restore   |                              |         |
+ *   |          [ ]<------------------[ ]                             |         |
+ *   |           |                     |                              |         |
+ *   |           |                     |                              |         |
+ *   |           |                   VF Pause                         |         |
+ *   |           |                     |                              |Schedule |
+ *   |           |                     |                              |CCS Save |
+ *   |           |                     |                              | LRCA    |
+ *   |           |                     |                             [ ]------>[ ]
+ *   |           |                     |                              |         |
+ *   |           |                     |                              |         |
+ *   |           |                   VF Restore                       |         |
+ *   |           |                     |                              |         |
+ *   |           |                     |                              |         |
+ *   |           |                    [ ]--------------               |         |
+ *   |           |                    [ ] Fix up GGTT  |              |         |
+ *   |           |                    [ ]<-------------               |         |
+ *   |           |                     |                              |         |
+ *   |           |                     |                              |         |
+ *   |           |                     |                              |Schedule |
+ *   |           |                     |                              |CCS      |
+ *   |           |                     |                              |Restore  |
+ *   |           |                     |                              |LRCA     |
+ *   |           |                     |                             [ ]------>[ ]
+ *   |           |                     |                              |         |
+ *   |           |                     |                              |         |
+ *
+ */
+
+#define for_each_ccs_rw_ctx(id__) \
+       for ((id__) = 0; (id__) < XE_CCS_RW_MAX_CTXS; (id__)++)
+
+#define MAX_CCS_RW_BB_POOL_SIZE	(SZ_4K * XE_PAGE_SIZE)
+
+static int alloc_bb_pool(struct xe_tile *tile, struct xe_ccs_rw_ctx *ctx)
+{
+	struct xe_device *xe = tile_to_xe(tile);
+	struct xe_sa_manager *sa_manager;
+	int offset, err;
+
+	sa_manager = xe_sa_bo_manager_init(tile, MAX_CCS_RW_BB_POOL_SIZE, SZ_16);
+
+	if (IS_ERR(sa_manager)) {
+		xe_sriov_err(xe, "Suballocator init failed\n");
+		err = PTR_ERR(sa_manager);
+		return err;
+	}
+
+	offset = 0;
+	xe_map_memset(xe, &sa_manager->bo->vmap, offset, MI_NOOP,
+		      MAX_CCS_RW_BB_POOL_SIZE);
+
+	offset = MAX_CCS_RW_BB_POOL_SIZE - sizeof(u32);
+	xe_map_wr(xe, &sa_manager->bo->vmap, offset, u32, MI_BATCH_BUFFER_END);
+
+	ctx->mem.ccs_rw_bb_pool = sa_manager;
+
+	return 0;
+}
+
+/**
+ * xe_sriov_vf_ccs_save_restore_init - Setup LRCA for save & restore.
+ * @xe: the &xe_device to start recovery on
+ *
+ * This function shall be called only by VF.
+ *
+ * Return: 0 on success. Negative error code on failure.
+ */
+int xe_sriov_vf_ccs_rw_init(struct xe_device *xe)
+{
+	struct xe_migrate *migrate;
+	struct xe_ccs_rw_ctx *ctx;
+	struct xe_tile *tile;
+	int tile_id, ctx_id;
+	int err = 0;
+
+	if (!IS_SRIOV_VF(xe) || IS_DGFX(xe))
+		return 0;
+
+	for_each_tile(tile, xe, tile_id) {
+		for_each_ccs_rw_ctx(ctx_id) {
+			ctx = &tile->sriov.vf.ccs_rw_ctx[ctx_id];
+			migrate = xe_migrate_init(tile);
+			if (IS_ERR(migrate)) {
+				err = PTR_ERR(migrate);
+				goto err_ret;
+			}
+			ctx->migrate = migrate;
+
+			err = alloc_bb_pool(tile, ctx);
+			if (err)
+				goto err_ret;
+		}
+	}
+	return 0;
+
+err_ret:
+	return err;
+}
diff --git a/drivers/gpu/drm/xe/xe_sriov_vf_ccs.h b/drivers/gpu/drm/xe/xe_sriov_vf_ccs.h
new file mode 100644
index 000000000000..c371aabb4d21
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_sriov_vf_ccs.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2025 Intel Corporation
+ */
+
+#ifndef _XE_SRIOV_VF_CCS_H_
+#define _XE_SRIOV_VF_CCS_H_
+
+struct xe_device;
+
+int xe_sriov_vf_ccs_rw_init(struct xe_device *xe);
+
+#endif
-- 
2.43.0



More information about the Intel-xe mailing list