[PATCH v2 4/4] drm/xe/pf: Allow to view and replace VF LMEM and CCS state over debugfs

Lukasz Laguna lukasz.laguna at intel.com
Thu Oct 31 15:17:25 UTC 2024


For feature enabling and testing purposes, allow to save VF LMEM and CCS
state and to replace it using debugfs blob file, but only under strict
debug config.

Signed-off-by: Lukasz Laguna <lukasz.laguna at intel.com>
---
 drivers/gpu/drm/xe/xe_gt_sriov_pf_debugfs.c   |  78 +++++++
 drivers/gpu/drm/xe/xe_gt_sriov_pf_migration.c | 205 ++++++++++++++++++
 drivers/gpu/drm/xe/xe_gt_sriov_pf_migration.h |   9 +
 3 files changed, 292 insertions(+)

diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_debugfs.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_debugfs.c
index 05df4ab3514b..001fc6b585ee 100644
--- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_debugfs.c
+++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_debugfs.c
@@ -422,6 +422,76 @@ static const struct file_operations guc_state_ops = {
 	.llseek		= default_llseek,
 };
 
+/*
+ *      /sys/kernel/debug/dri/0/
+ *      ├── gt0
+ *      │   ├── vf1
+ *      │   │   ├── lmem_state
+ */
+static ssize_t lmem_state_read(struct file *file, char __user *buf,
+			       size_t count, loff_t *pos)
+{
+	struct dentry *dent = file_dentry(file);
+	struct dentry *parent = dent->d_parent;
+	struct xe_gt *gt = extract_gt(parent);
+	unsigned int vfid = extract_vfid(parent);
+
+	return xe_gt_sriov_pf_migration_read_lmem_state(gt, vfid, buf, count, pos);
+}
+
+static ssize_t lmem_state_write(struct file *file, const char __user *buf,
+				size_t count, loff_t *pos)
+{
+	struct dentry *dent = file_dentry(file);
+	struct dentry *parent = dent->d_parent;
+	struct xe_gt *gt = extract_gt(parent);
+	unsigned int vfid = extract_vfid(parent);
+
+	return xe_gt_sriov_pf_migration_write_lmem_state(gt, vfid, buf, count, pos);
+}
+
+static const struct file_operations lmem_state_ops = {
+	.owner		= THIS_MODULE,
+	.read		= lmem_state_read,
+	.write		= lmem_state_write,
+	.llseek		= default_llseek,
+};
+
+/*
+ *      /sys/kernel/debug/dri/0/
+ *      ├── gt0
+ *      │   ├── vf1
+ *      │   │   ├── ccs_state
+ */
+static ssize_t ccs_state_read(struct file *file, char __user *buf,
+			      size_t count, loff_t *pos)
+{
+	struct dentry *dent = file_dentry(file);
+	struct dentry *parent = dent->d_parent;
+	struct xe_gt *gt = extract_gt(parent);
+	unsigned int vfid = extract_vfid(parent);
+
+	return xe_gt_sriov_pf_migration_read_ccs_state(gt, vfid, buf, count, pos);
+}
+
+static ssize_t ccs_state_write(struct file *file, const char __user *buf,
+			       size_t count, loff_t *pos)
+{
+	struct dentry *dent = file_dentry(file);
+	struct dentry *parent = dent->d_parent;
+	struct xe_gt *gt = extract_gt(parent);
+	unsigned int vfid = extract_vfid(parent);
+
+	return xe_gt_sriov_pf_migration_write_ccs_state(gt, vfid, buf, count, pos);
+}
+
+static const struct file_operations ccs_state_ops = {
+	.owner		= THIS_MODULE,
+	.read		= ccs_state_read,
+	.write		= ccs_state_write,
+	.llseek		= default_llseek,
+};
+
 /*
  *      /sys/kernel/debug/dri/0/
  *      ├── gt0
@@ -554,6 +624,14 @@ void xe_gt_sriov_pf_debugfs_register(struct xe_gt *gt, struct dentry *root)
 			debugfs_create_file("config_blob",
 					    IS_ENABLED(CONFIG_DRM_XE_DEBUG_SRIOV) ? 0600 : 0400,
 					    vfdentry, NULL, &config_blob_ops);
+			if (IS_DGFX(xe)) {
+				debugfs_create_file("lmem_state",
+						    IS_ENABLED(CONFIG_DRM_XE_DEBUG_SRIOV) ?
+						    0600 : 0400, vfdentry, NULL, &lmem_state_ops);
+				debugfs_create_file("ccs_state",
+						    IS_ENABLED(CONFIG_DRM_XE_DEBUG_SRIOV) ?
+						    0600 : 0400, vfdentry, NULL, &ccs_state_ops);
+			}
 		}
 	}
 }
diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_migration.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_migration.c
index eca01c96a348..8ff6b7eebb33 100644
--- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_migration.c
+++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_migration.c
@@ -6,7 +6,9 @@
 #include <drm/drm_managed.h>
 
 #include "abi/guc_actions_sriov_abi.h"
+#include "instructions/xe_gpu_commands.h"
 #include "xe_bo.h"
+#include "xe_gt_sriov_pf_config.h"
 #include "xe_gt_sriov_pf_helpers.h"
 #include "xe_gt_sriov_pf_migration.h"
 #include "xe_gt_sriov_printk.h"
@@ -381,6 +383,209 @@ ssize_t xe_gt_sriov_pf_migration_write_guc_state(struct xe_gt *gt, unsigned int
 
 	return ret;
 }
+
+static ssize_t pf_read_lmem_state(struct xe_gt *gt, unsigned int vfid, bool ccs,
+				  char __user *buf, size_t count, loff_t *pos)
+{
+	struct xe_device *xe = gt_to_xe(gt);
+	size_t lmem_size, chunk_size;
+	struct xe_bo *smem_bo;
+	struct dma_fence *fence;
+	loff_t smem_bo_pos = 0;
+	ssize_t ret;
+
+	xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt)));
+	xe_gt_assert(gt, vfid != PFID);
+	xe_gt_assert(gt, vfid <= xe_sriov_pf_get_totalvfs(gt_to_xe(gt)));
+
+	if (!pf_migration_supported(gt))
+		return -ENOPKG;
+
+	lmem_size = xe_gt_sriov_pf_config_get_lmem(gt, vfid);
+	if (!lmem_size)
+		return -ENODATA;
+
+	chunk_size = min(count, (ccs ? xe_device_ccs_bytes(xe, lmem_size) : lmem_size) - *pos);
+	if (!chunk_size)
+		return 0;
+
+	smem_bo = xe_bo_create_pin_map(xe, NULL, NULL, PAGE_ALIGN(chunk_size), ttm_bo_type_kernel,
+				      XE_BO_FLAG_SYSTEM | XE_BO_FLAG_NEEDS_CPU_ACCESS |
+				      XE_BO_FLAG_PINNED);
+	if (IS_ERR(smem_bo))
+		return PTR_ERR(smem_bo);
+
+	if (ccs)
+		fence = xe_gt_sriov_pf_migration_save_lmem(gt, vfid, NULL, 0, smem_bo, 0,
+							   *pos * NUM_BYTES_PER_CCS_BYTE(xe),
+							   chunk_size * NUM_BYTES_PER_CCS_BYTE(xe));
+	else
+		fence = xe_gt_sriov_pf_migration_save_lmem(gt, vfid, smem_bo, 0, NULL, 0, *pos,
+							   chunk_size);
+
+	ret = dma_fence_wait_timeout(fence, false, 5 * HZ);
+	dma_fence_put(fence);
+	if (!ret) {
+		ret = -ETIME;
+		goto err_smem_bo_put;
+	}
+
+	ret = simple_read_from_buffer(buf, chunk_size, &smem_bo_pos, smem_bo->vmap.vaddr,
+				      chunk_size);
+	if (ret > 0) {
+		if (ret != chunk_size)
+			xe_gt_sriov_dbg(gt, "Failed to copy the entire chunk (copied bytes: %ld, expected: %lu)\n",
+				ret, chunk_size);
+		*pos +=	ret;
+	}
+
+err_smem_bo_put:
+	xe_bo_unpin_map_no_vm(smem_bo);
+
+	return ret;
+}
+
+static ssize_t pf_write_lmem_state(struct xe_gt *gt, unsigned int vfid, bool ccs,
+				   const char __user *buf, size_t count, loff_t *pos)
+{
+	struct xe_device *xe = gt_to_xe(gt);
+	struct xe_bo *smem_bo;
+	loff_t smem_bo_pos = 0;
+	struct dma_fence *fence;
+	ssize_t ret, err;
+
+	xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt)));
+	xe_gt_assert(gt, vfid != PFID);
+	xe_gt_assert(gt, vfid <= xe_sriov_pf_get_totalvfs(gt_to_xe(gt)));
+
+	if (!pf_migration_supported(gt))
+		return -ENOPKG;
+
+	if (!xe_gt_sriov_pf_config_get_lmem(gt, vfid))
+		return -ENOPKG;
+
+	smem_bo = xe_bo_create_pin_map(xe, NULL, NULL, PAGE_ALIGN(count), ttm_bo_type_kernel,
+				      XE_BO_FLAG_SYSTEM | XE_BO_FLAG_NEEDS_CPU_ACCESS |
+				      XE_BO_FLAG_PINNED);
+	if (IS_ERR(smem_bo))
+		return PTR_ERR(smem_bo);
+
+	ret = simple_write_to_buffer(smem_bo->vmap.vaddr, count, &smem_bo_pos, buf, count);
+	if (ret < 0)
+		goto err_smem_bo_put;
+	if (ret != count)
+		xe_gt_sriov_dbg(gt, "Failed to copy the entire chunk (copied bytes: %ld, expected: %lu)\n",
+				ret, count);
+
+	if (ccs)
+		fence = xe_gt_sriov_pf_migration_restore_lmem(gt, vfid, NULL, 0, smem_bo, 0,
+							      *pos * NUM_BYTES_PER_CCS_BYTE(xe),
+							      ret * NUM_BYTES_PER_CCS_BYTE(xe));
+	else
+		fence = xe_gt_sriov_pf_migration_restore_lmem(gt, vfid, smem_bo, 0, NULL, 0, *pos,
+							      ret);
+
+	err = dma_fence_wait_timeout(fence, false, 5 * HZ);
+	dma_fence_put(fence);
+	if (!err) {
+		ret = -ETIME;
+		goto err_smem_bo_put;
+	}
+
+	*pos +=	ret;
+
+err_smem_bo_put:
+	xe_bo_unpin_map_no_vm(smem_bo);
+
+	return ret;
+}
+
+/**
+ * xe_gt_sriov_pf_migration_read_lmem_state() - Read a VF LMEM state.
+ * @gt: the &xe_gt
+ * @vfid: the VF identifier
+ * @buf: the user space buffer to read to
+ * @count: the maximum number of bytes to read
+ * @pos: the current position in the buffer
+ *
+ * This function is for PF only.
+ *
+ * This function reads up to @count bytes from assigned to VF LMEM buffer
+ * object at offset @pos into the user space address starting at @buf.
+ *
+ * Return: the number of bytes read or a negative error code on failure.
+ */
+ssize_t xe_gt_sriov_pf_migration_read_lmem_state(struct xe_gt *gt, unsigned int vfid,
+						 char __user *buf, size_t count, loff_t *pos)
+{
+	return pf_read_lmem_state(gt, vfid, false, buf, count, pos);
+}
+
+/**
+ * xe_gt_sriov_pf_migration_write_lmem_state() - Write a VF LMEM state.
+ * @gt: the &xe_gt
+ * @vfid: the VF identifier
+ * @buf: the user space buffer with VF LMEM state
+ * @count: the maximum number of bytes to write (in bytes)
+ * @pos: the current position in the buffer
+ *
+ * This function is for PF only.
+ *
+ * This function reads @count bytes of the VF LMEM state stored at offset @pos
+ * of the user space buffer @buf and writes it into a assigned to VF LMEM buffer
+ * object.
+ *
+ * Return: the number of bytes used or a negative error code on failure.
+ */
+ssize_t xe_gt_sriov_pf_migration_write_lmem_state(struct xe_gt *gt, unsigned int vfid,
+						  const char __user *buf, size_t count,
+						  loff_t *pos)
+{
+	return pf_write_lmem_state(gt, vfid, false, buf, count, pos);
+}
+
+/**
+ * xe_gt_sriov_pf_migration_read_ccs_state() - Read a VF CCS state.
+ * @gt: the &xe_gt
+ * @vfid: the VF identifier
+ * @buf: the user space buffer to read to
+ * @count: the maximum number of bytes to read
+ * @pos: the current position in the buffer
+ *
+ * This function is for PF only.
+ *
+ * This function reads up to @count bytes of the VF CCS data at offset @pos
+ * into the user space address starting at @buf.
+ *
+ * Return: the number of bytes read or a negative error code on failure.
+ */
+ssize_t xe_gt_sriov_pf_migration_read_ccs_state(struct xe_gt *gt, unsigned int vfid,
+						char __user *buf, size_t count, loff_t *pos)
+{
+	return pf_read_lmem_state(gt, vfid, true, buf, count, pos);
+}
+
+/**
+ * xe_gt_sriov_pf_migration_write_ccs_state() - Write a VF CCS state.
+ * @gt: the &xe_gt
+ * @vfid: the VF identifier
+ * @buf: the user space buffer with VF CCS state
+ * @count: the maximum number of bytes to write (in bytes)
+ * @pos: the current position in the buffer
+ *
+ * This function is for PF only.
+ *
+ * This function reads @count bytes of the VF CCS state from the user space
+ * address @buf at offset @pos and writes it into a device memory where VF CCS
+ * is stored.
+ *
+ * Return: the number of bytes used or a negative error code on failure.
+ */
+ssize_t xe_gt_sriov_pf_migration_write_ccs_state(struct xe_gt *gt, unsigned int vfid,
+						 const char __user *buf, size_t count, loff_t *pos)
+{
+	return pf_write_lmem_state(gt, vfid, true, buf, count, pos);
+}
 #endif /* CONFIG_DEBUG_FS */
 
 static struct dma_fence *pf_save_restore_lmem(struct xe_gt *gt, unsigned int vfid,
diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_migration.h b/drivers/gpu/drm/xe/xe_gt_sriov_pf_migration.h
index a4301574d92c..da1b067baf56 100644
--- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_migration.h
+++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_migration.h
@@ -28,6 +28,15 @@ ssize_t xe_gt_sriov_pf_migration_read_guc_state(struct xe_gt *gt, unsigned int v
 						char __user *buf, size_t count, loff_t *pos);
 ssize_t xe_gt_sriov_pf_migration_write_guc_state(struct xe_gt *gt, unsigned int vfid,
 						 const char __user *buf, size_t count);
+ssize_t xe_gt_sriov_pf_migration_read_lmem_state(struct xe_gt *gt, unsigned int vfid,
+						 char __user *buf, size_t count, loff_t *pos);
+ssize_t xe_gt_sriov_pf_migration_write_lmem_state(struct xe_gt *gt, unsigned int vfid,
+						  const char __user *buf, size_t count,
+						  loff_t *pos);
+ssize_t xe_gt_sriov_pf_migration_read_ccs_state(struct xe_gt *gt, unsigned int vfid,
+						char __user *buf, size_t count, loff_t *pos);
+ssize_t xe_gt_sriov_pf_migration_write_ccs_state(struct xe_gt *gt, unsigned int vfid,
+						 const char __user *buf, size_t count, loff_t *pos);
 #endif
 
 #endif
-- 
2.40.0



More information about the Intel-xe mailing list