[Intel-gfx][RFC V2 6/7] drm/i915/gvt: Add support to new VFIO subregion VFIO_REGION_SUBTYPE_INTEL_IGD_DEVICE_STATE

Yulei Zhang yulei.zhang at intel.com
Mon Jul 31 07:18:33 UTC 2017


Add new VFIO subregion VFIO_REGION_SUBTYPE_INTEL_IGD_DEVICE_STATE support
in vGPU, through this new region it can fetch or store the status of mdev
vGPU device for migration.

Signed-off-by: Yulei Zhang <yulei.zhang at intel.com>
---
 drivers/gpu/drm/i915/gvt/gvt.h     |   1 +
 drivers/gpu/drm/i915/gvt/kvmgt.c   | 106 +++++++++++++++++++++++++++++++++++--
 drivers/gpu/drm/i915/gvt/migrate.h |  88 ++++++++++++++++++++++++++++++
 include/uapi/linux/vfio.h          |   7 +--
 4 files changed, 196 insertions(+), 6 deletions(-)
 create mode 100644 drivers/gpu/drm/i915/gvt/migrate.h

diff --git a/drivers/gpu/drm/i915/gvt/gvt.h b/drivers/gpu/drm/i915/gvt/gvt.h
index 23eeb7c..6393632 100644
--- a/drivers/gpu/drm/i915/gvt/gvt.h
+++ b/drivers/gpu/drm/i915/gvt/gvt.h
@@ -46,6 +46,7 @@
 #include "sched_policy.h"
 #include "render.h"
 #include "cmd_parser.h"
+#include "migrate.h"
 
 #define GVT_MAX_VGPU 8
 
diff --git a/drivers/gpu/drm/i915/gvt/kvmgt.c b/drivers/gpu/drm/i915/gvt/kvmgt.c
index d2b13ae..b962174 100644
--- a/drivers/gpu/drm/i915/gvt/kvmgt.c
+++ b/drivers/gpu/drm/i915/gvt/kvmgt.c
@@ -53,11 +53,21 @@ static const struct intel_gvt_ops *intel_gvt_ops;
 #define VFIO_PCI_INDEX_TO_OFFSET(index) ((u64)(index) << VFIO_PCI_OFFSET_SHIFT)
 #define VFIO_PCI_OFFSET_MASK    (((u64)(1) << VFIO_PCI_OFFSET_SHIFT) - 1)
 
+struct vfio_region;
+struct intel_vgpu_regops {
+	size_t (*rw)(struct intel_vgpu *vgpu, char *buf,
+			size_t count, loff_t *ppos, bool iswrite);
+	void (*release)(struct intel_vgpu *vgpu,
+			struct vfio_region *region);
+};
+
 struct vfio_region {
 	u32				type;
 	u32				subtype;
 	size_t				size;
 	u32				flags;
+	const struct intel_vgpu_regops	*ops;
+	void				*data;
 };
 
 struct kvmgt_pgfn {
@@ -426,6 +436,73 @@ static void kvmgt_protect_table_del(struct kvmgt_guest_info *info,
 	}
 }
 
+static size_t intel_vgpu_reg_rw_device_state(struct intel_vgpu *vgpu, char *buf,
+		size_t count, loff_t *ppos, bool iswrite)
+{
+
+}
+
+static void intel_vgpu_reg_release_device_state(struct intel_vgpu *vgpu,
+		struct vfio_region *region)
+{
+	vfree(region->data);
+}
+
+static const struct intel_vgpu_regops intel_vgpu_regops_device_state = {
+	.rw 	 = intel_vgpu_reg_rw_device_state,
+	.release = intel_vgpu_reg_release_device_state,
+};
+
+static int intel_vgpu_register_region(struct intel_vgpu *vgpu,
+		unsigned int type, unsigned int subtype,
+		const struct intel_vgpu_regops *ops,
+		size_t size, u32 flags, void *data)
+{
+	struct vfio_region *region;
+
+	region = krealloc(vgpu->vdev.region,
+			(vgpu->vdev.num_regions + 1) * sizeof(*region),
+			GFP_KERNEL);
+	if (!region)
+		return -ENOMEM;
+
+	vgpu->vdev.region = region;
+	vgpu->vdev.region[vgpu->vdev.num_regions].type = type;
+	vgpu->vdev.region[vgpu->vdev.num_regions].subtype = subtype;
+	vgpu->vdev.region[vgpu->vdev.num_regions].ops = ops;
+	vgpu->vdev.region[vgpu->vdev.num_regions].size = size;
+	vgpu->vdev.region[vgpu->vdev.num_regions].flags = flags;
+	vgpu->vdev.region[vgpu->vdev.num_regions].data = data;
+	vgpu->vdev.num_regions++;
+
+	return 0;
+}
+
+static int kvmgt_init_migration(struct intel_vgpu *vgpu)
+{
+	void *base;
+	int ret;
+
+	base = vzalloc(MIGRATION_IMG_MAX_SIZE);
+	if (base == NULL) {
+		gvt_vgpu_err("Unable to allocate size: %ld\n",
+				MIGRATION_IMG_MAX_SIZE);
+		return -ENOMEM;
+	}
+
+	ret = intel_vgpu_register_region(vgpu,
+			PCI_VENDOR_ID_INTEL | VFIO_REGION_TYPE_PCI_VENDOR_TYPE,
+			VFIO_REGION_SUBTYPE_INTEL_IGD_DEVICE_STATE,
+			&intel_vgpu_regops_device_state, MIGRATION_IMG_MAX_SIZE,
+			VFIO_REGION_INFO_FLAG_READ | VFIO_REGION_INFO_FLAG_WRITE,
+			base);
+	if(ret)
+		vfree(base);
+
+	return ret;
+
+}
+
 static int intel_vgpu_create(struct kobject *kobj, struct mdev_device *mdev)
 {
 	struct intel_vgpu *vgpu = NULL;
@@ -546,6 +623,8 @@ static int intel_vgpu_open(struct mdev_device *mdev)
 	if (ret)
 		goto undo_group;
 
+	kvmgt_init_migration(vgpu);
+
 	intel_gvt_ops->vgpu_activate(vgpu);
 
 	atomic_set(&vgpu->vdev.released, 0);
@@ -566,6 +645,7 @@ static void __intel_vgpu_release(struct intel_vgpu *vgpu)
 {
 	struct kvmgt_guest_info *info;
 	int ret;
+	int i;
 
 	if (!handle_valid(vgpu->handle))
 		return;
@@ -575,6 +655,13 @@ static void __intel_vgpu_release(struct intel_vgpu *vgpu)
 
 	intel_gvt_ops->vgpu_deactivate(vgpu);
 
+	for (i = 0; i < vgpu->vdev.num_regions; i++)
+		vgpu->vdev.region[i].ops->release(vgpu, &vgpu->vdev.region[i]);
+
+	vgpu->vdev.num_regions = 0;
+	kfree(vgpu->vdev.region);
+	vgpu->vdev.region = NULL;
+
 	ret = vfio_unregister_notifier(mdev_dev(vgpu->vdev.mdev), VFIO_IOMMU_NOTIFY,
 					&vgpu->vdev.iommu_notifier);
 	WARN(ret, "vfio_unregister_notifier for iommu failed: %d\n", ret);
@@ -642,7 +729,7 @@ static ssize_t intel_vgpu_rw(struct mdev_device *mdev, char *buf,
 	int ret = -EINVAL;
 
 
-	if (index >= VFIO_PCI_NUM_REGIONS) {
+	if (index >= VFIO_PCI_NUM_REGIONS + vgpu->vdev.num_regions) {
 		gvt_vgpu_err("invalid index: %u\n", index);
 		return -EINVAL;
 	}
@@ -676,8 +763,11 @@ static ssize_t intel_vgpu_rw(struct mdev_device *mdev, char *buf,
 	case VFIO_PCI_BAR5_REGION_INDEX:
 	case VFIO_PCI_VGA_REGION_INDEX:
 	case VFIO_PCI_ROM_REGION_INDEX:
+		break;
 	default:
-		gvt_vgpu_err("unsupported region: %u\n", index);
+		index -= VFIO_PCI_NUM_REGIONS;
+		ret = vgpu->vdev.region[index].ops->rw(vgpu, buf,
+				count, ppos, is_write);
 	}
 
 	return ret == 0 ? count : ret;
@@ -688,6 +778,10 @@ static ssize_t intel_vgpu_read(struct mdev_device *mdev, char __user *buf,
 {
 	unsigned int done = 0;
 	int ret;
+	unsigned int index = VFIO_PCI_OFFSET_TO_INDEX(*ppos);
+
+	if (index >= VFIO_PCI_NUM_REGIONS)
+		return intel_vgpu_rw(mdev, (char *)buf, count, ppos, false);
 
 	while (count) {
 		size_t filled;
@@ -748,6 +842,10 @@ static ssize_t intel_vgpu_write(struct mdev_device *mdev,
 {
 	unsigned int done = 0;
 	int ret;
+	unsigned int index = VFIO_PCI_OFFSET_TO_INDEX(*ppos);
+
+	if (index >= VFIO_PCI_NUM_REGIONS)
+		return intel_vgpu_rw(mdev, (char *)buf, count, ppos, true);
 
 	while (count) {
 		size_t filled;
@@ -940,7 +1038,8 @@ static long intel_vgpu_ioctl(struct mdev_device *mdev, unsigned int cmd,
 
 		info.flags = VFIO_DEVICE_FLAGS_PCI;
 		info.flags |= VFIO_DEVICE_FLAGS_RESET;
-		info.num_regions = VFIO_PCI_NUM_REGIONS;
+		info.num_regions = VFIO_PCI_NUM_REGIONS +
+				   vgpu->vdev.num_regions;
 		info.num_irqs = VFIO_PCI_NUM_IRQS;
 
 		return copy_to_user((void __user *)arg, &info, minsz) ?
@@ -1060,6 +1159,7 @@ static long intel_vgpu_ioctl(struct mdev_device *mdev, unsigned int cmd,
 		}
 
 		if (caps.size) {
+			info.flags |= VFIO_REGION_INFO_FLAG_CAPS;
 			if (info.argsz < sizeof(info) + caps.size) {
 				info.argsz = sizeof(info) + caps.size;
 				info.cap_offset = 0;
diff --git a/drivers/gpu/drm/i915/gvt/migrate.h b/drivers/gpu/drm/i915/gvt/migrate.h
new file mode 100644
index 0000000..737815f
--- /dev/null
+++ b/drivers/gpu/drm/i915/gvt/migrate.h
@@ -0,0 +1,88 @@
+/*
+ * Copyright(c) 2011-2016 Intel Corporation. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __GVT_MIGRATE_H__
+#define __GVT_MIGRATE_H__
+
+/* Assume 9MB is eough to descript VM kernel state */
+#define MIGRATION_IMG_MAX_SIZE (9*1024UL*1024UL)
+#define GVT_MMIO_SIZE (2*1024UL*1024UL)
+#define GVT_MIGRATION_VERSION	0
+
+enum gvt_migration_type_t {
+	GVT_MIGRATION_NONE,
+	GVT_MIGRATION_HEAD,
+	GVT_MIGRATION_CFG_SPACE,
+	GVT_MIGRATION_VREG,
+	GVT_MIGRATION_SREG,
+	GVT_MIGRATION_GTT,
+	GVT_MIGRATION_PPGTT,
+	GVT_MIGRATION_WORKLOAD,
+};
+
+typedef struct gvt_ppgtt_entry_t {
+	int page_table_level;
+	u32 pdp[8];
+} gvt_ppgtt_entry_t;
+
+typedef struct gvt_pending_workload_t {
+	int ring_id;
+	struct intel_vgpu_elsp_dwords elsp_dwords;
+} gvt_pending_workload_t;
+
+typedef struct gvt_region_t {
+	enum gvt_migration_type_t type;
+	u32 size;		/* obj size of bytes to read/write */
+} gvt_region_t;
+
+typedef struct gvt_migration_obj_t {
+	void *img;
+	void *vgpu;
+	u32 offset;
+	gvt_region_t region;
+	/* operation func defines how data save-restore */
+	struct gvt_migration_operation_t *ops;
+	char *name;
+} gvt_migration_obj_t;
+
+typedef struct gvt_migration_operation_t {
+	/* called during pre-copy stage, VM is still alive */
+	int (*pre_copy)(const gvt_migration_obj_t *obj);
+	/* called before when VM was paused,
+	 * return bytes transferred
+	 */
+	int (*pre_save)(const gvt_migration_obj_t *obj);
+	/* called before load the state of device */
+	int (*pre_load)(const gvt_migration_obj_t *obj, u32 size);
+	/* called after load the state of device, VM already alive */
+	int (*post_load)(const gvt_migration_obj_t *obj, u32 size);
+} gvt_migration_operation_t;
+
+typedef struct gvt_image_header_t {
+	int version;
+	int data_size;
+	u64 crc_check;
+	u64 global_data[64];
+} gvt_image_header_t;
+
+#endif
diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h
index ae46105..b2a1952 100644
--- a/include/uapi/linux/vfio.h
+++ b/include/uapi/linux/vfio.h
@@ -296,9 +296,10 @@ struct vfio_region_info_cap_type {
 #define VFIO_REGION_TYPE_PCI_VENDOR_MASK	(0xffff)
 
 /* 8086 Vendor sub-types */
-#define VFIO_REGION_SUBTYPE_INTEL_IGD_OPREGION	(1)
-#define VFIO_REGION_SUBTYPE_INTEL_IGD_HOST_CFG	(2)
-#define VFIO_REGION_SUBTYPE_INTEL_IGD_LPC_CFG	(3)
+#define VFIO_REGION_SUBTYPE_INTEL_IGD_OPREGION		(1)
+#define VFIO_REGION_SUBTYPE_INTEL_IGD_HOST_CFG		(2)
+#define VFIO_REGION_SUBTYPE_INTEL_IGD_LPC_CFG		(3)
+#define VFIO_REGION_SUBTYPE_INTEL_IGD_DEVICE_STATE	(4)
 
 /**
  * VFIO_DEVICE_GET_IRQ_INFO - _IOWR(VFIO_TYPE, VFIO_BASE + 9,
-- 
2.7.4



More information about the intel-gvt-dev mailing list