[Intel-gfx][RFC V2 6/7] drm/i915/gvt: Add support to new VFIO subregion VFIO_REGION_SUBTYPE_INTEL_IGD_DEVICE_STATE
Yulei Zhang
yulei.zhang at intel.com
Mon Jul 31 07:18:33 UTC 2017
Add new VFIO subregion VFIO_REGION_SUBTYPE_INTEL_IGD_DEVICE_STATE support
in vGPU, through this new region it can fetch or store the status of mdev
vGPU device for migration.
Signed-off-by: Yulei Zhang <yulei.zhang at intel.com>
---
drivers/gpu/drm/i915/gvt/gvt.h | 1 +
drivers/gpu/drm/i915/gvt/kvmgt.c | 106 +++++++++++++++++++++++++++++++++++--
drivers/gpu/drm/i915/gvt/migrate.h | 88 ++++++++++++++++++++++++++++++
include/uapi/linux/vfio.h | 7 +--
4 files changed, 196 insertions(+), 6 deletions(-)
create mode 100644 drivers/gpu/drm/i915/gvt/migrate.h
diff --git a/drivers/gpu/drm/i915/gvt/gvt.h b/drivers/gpu/drm/i915/gvt/gvt.h
index 23eeb7c..6393632 100644
--- a/drivers/gpu/drm/i915/gvt/gvt.h
+++ b/drivers/gpu/drm/i915/gvt/gvt.h
@@ -46,6 +46,7 @@
#include "sched_policy.h"
#include "render.h"
#include "cmd_parser.h"
+#include "migrate.h"
#define GVT_MAX_VGPU 8
diff --git a/drivers/gpu/drm/i915/gvt/kvmgt.c b/drivers/gpu/drm/i915/gvt/kvmgt.c
index d2b13ae..b962174 100644
--- a/drivers/gpu/drm/i915/gvt/kvmgt.c
+++ b/drivers/gpu/drm/i915/gvt/kvmgt.c
@@ -53,11 +53,21 @@ static const struct intel_gvt_ops *intel_gvt_ops;
#define VFIO_PCI_INDEX_TO_OFFSET(index) ((u64)(index) << VFIO_PCI_OFFSET_SHIFT)
#define VFIO_PCI_OFFSET_MASK (((u64)(1) << VFIO_PCI_OFFSET_SHIFT) - 1)
+struct vfio_region;
+struct intel_vgpu_regops {
+ size_t (*rw)(struct intel_vgpu *vgpu, char *buf,
+ size_t count, loff_t *ppos, bool iswrite);
+ void (*release)(struct intel_vgpu *vgpu,
+ struct vfio_region *region);
+};
+
struct vfio_region {
u32 type;
u32 subtype;
size_t size;
u32 flags;
+ const struct intel_vgpu_regops *ops;
+ void *data;
};
struct kvmgt_pgfn {
@@ -426,6 +436,73 @@ static void kvmgt_protect_table_del(struct kvmgt_guest_info *info,
}
}
+static size_t intel_vgpu_reg_rw_device_state(struct intel_vgpu *vgpu, char *buf,
+ size_t count, loff_t *ppos, bool iswrite)
+{
+
+}
+
+static void intel_vgpu_reg_release_device_state(struct intel_vgpu *vgpu,
+ struct vfio_region *region)
+{
+ vfree(region->data);
+}
+
+static const struct intel_vgpu_regops intel_vgpu_regops_device_state = {
+ .rw = intel_vgpu_reg_rw_device_state,
+ .release = intel_vgpu_reg_release_device_state,
+};
+
+static int intel_vgpu_register_region(struct intel_vgpu *vgpu,
+ unsigned int type, unsigned int subtype,
+ const struct intel_vgpu_regops *ops,
+ size_t size, u32 flags, void *data)
+{
+ struct vfio_region *region;
+
+ region = krealloc(vgpu->vdev.region,
+ (vgpu->vdev.num_regions + 1) * sizeof(*region),
+ GFP_KERNEL);
+ if (!region)
+ return -ENOMEM;
+
+ vgpu->vdev.region = region;
+ vgpu->vdev.region[vgpu->vdev.num_regions].type = type;
+ vgpu->vdev.region[vgpu->vdev.num_regions].subtype = subtype;
+ vgpu->vdev.region[vgpu->vdev.num_regions].ops = ops;
+ vgpu->vdev.region[vgpu->vdev.num_regions].size = size;
+ vgpu->vdev.region[vgpu->vdev.num_regions].flags = flags;
+ vgpu->vdev.region[vgpu->vdev.num_regions].data = data;
+ vgpu->vdev.num_regions++;
+
+ return 0;
+}
+
+static int kvmgt_init_migration(struct intel_vgpu *vgpu)
+{
+ void *base;
+ int ret;
+
+ base = vzalloc(MIGRATION_IMG_MAX_SIZE);
+ if (base == NULL) {
+ gvt_vgpu_err("Unable to allocate size: %ld\n",
+ MIGRATION_IMG_MAX_SIZE);
+ return -ENOMEM;
+ }
+
+ ret = intel_vgpu_register_region(vgpu,
+ PCI_VENDOR_ID_INTEL | VFIO_REGION_TYPE_PCI_VENDOR_TYPE,
+ VFIO_REGION_SUBTYPE_INTEL_IGD_DEVICE_STATE,
+ &intel_vgpu_regops_device_state, MIGRATION_IMG_MAX_SIZE,
+ VFIO_REGION_INFO_FLAG_READ | VFIO_REGION_INFO_FLAG_WRITE,
+ base);
+ if(ret)
+ vfree(base);
+
+ return ret;
+
+}
+
static int intel_vgpu_create(struct kobject *kobj, struct mdev_device *mdev)
{
struct intel_vgpu *vgpu = NULL;
@@ -546,6 +623,8 @@ static int intel_vgpu_open(struct mdev_device *mdev)
if (ret)
goto undo_group;
+ kvmgt_init_migration(vgpu);
+
intel_gvt_ops->vgpu_activate(vgpu);
atomic_set(&vgpu->vdev.released, 0);
@@ -566,6 +645,7 @@ static void __intel_vgpu_release(struct intel_vgpu *vgpu)
{
struct kvmgt_guest_info *info;
int ret;
+ int i;
if (!handle_valid(vgpu->handle))
return;
@@ -575,6 +655,13 @@ static void __intel_vgpu_release(struct intel_vgpu *vgpu)
intel_gvt_ops->vgpu_deactivate(vgpu);
+ for (i = 0; i < vgpu->vdev.num_regions; i++)
+ vgpu->vdev.region[i].ops->release(vgpu, &vgpu->vdev.region[i]);
+
+ vgpu->vdev.num_regions = 0;
+ kfree(vgpu->vdev.region);
+ vgpu->vdev.region = NULL;
+
ret = vfio_unregister_notifier(mdev_dev(vgpu->vdev.mdev), VFIO_IOMMU_NOTIFY,
&vgpu->vdev.iommu_notifier);
WARN(ret, "vfio_unregister_notifier for iommu failed: %d\n", ret);
@@ -642,7 +729,7 @@ static ssize_t intel_vgpu_rw(struct mdev_device *mdev, char *buf,
int ret = -EINVAL;
- if (index >= VFIO_PCI_NUM_REGIONS) {
+ if (index >= VFIO_PCI_NUM_REGIONS + vgpu->vdev.num_regions) {
gvt_vgpu_err("invalid index: %u\n", index);
return -EINVAL;
}
@@ -676,8 +763,11 @@ static ssize_t intel_vgpu_rw(struct mdev_device *mdev, char *buf,
case VFIO_PCI_BAR5_REGION_INDEX:
case VFIO_PCI_VGA_REGION_INDEX:
case VFIO_PCI_ROM_REGION_INDEX:
+ break;
default:
- gvt_vgpu_err("unsupported region: %u\n", index);
+ index -= VFIO_PCI_NUM_REGIONS;
+ ret = vgpu->vdev.region[index].ops->rw(vgpu, buf,
+ count, ppos, is_write);
}
return ret == 0 ? count : ret;
@@ -688,6 +778,10 @@ static ssize_t intel_vgpu_read(struct mdev_device *mdev, char __user *buf,
{
unsigned int done = 0;
int ret;
+ unsigned int index = VFIO_PCI_OFFSET_TO_INDEX(*ppos);
+
+ if (index >= VFIO_PCI_NUM_REGIONS)
+ return intel_vgpu_rw(mdev, (char *)buf, count, ppos, false);
while (count) {
size_t filled;
@@ -748,6 +842,10 @@ static ssize_t intel_vgpu_write(struct mdev_device *mdev,
{
unsigned int done = 0;
int ret;
+ unsigned int index = VFIO_PCI_OFFSET_TO_INDEX(*ppos);
+
+ if (index >= VFIO_PCI_NUM_REGIONS)
+ return intel_vgpu_rw(mdev, (char *)buf, count, ppos, true);
while (count) {
size_t filled;
@@ -940,7 +1038,8 @@ static long intel_vgpu_ioctl(struct mdev_device *mdev, unsigned int cmd,
info.flags = VFIO_DEVICE_FLAGS_PCI;
info.flags |= VFIO_DEVICE_FLAGS_RESET;
- info.num_regions = VFIO_PCI_NUM_REGIONS;
+ info.num_regions = VFIO_PCI_NUM_REGIONS +
+ vgpu->vdev.num_regions;
info.num_irqs = VFIO_PCI_NUM_IRQS;
return copy_to_user((void __user *)arg, &info, minsz) ?
@@ -1060,6 +1159,7 @@ static long intel_vgpu_ioctl(struct mdev_device *mdev, unsigned int cmd,
}
if (caps.size) {
+ info.flags |= VFIO_REGION_INFO_FLAG_CAPS;
if (info.argsz < sizeof(info) + caps.size) {
info.argsz = sizeof(info) + caps.size;
info.cap_offset = 0;
diff --git a/drivers/gpu/drm/i915/gvt/migrate.h b/drivers/gpu/drm/i915/gvt/migrate.h
new file mode 100644
index 0000000..737815f
--- /dev/null
+++ b/drivers/gpu/drm/i915/gvt/migrate.h
@@ -0,0 +1,88 @@
+/*
+ * Copyright(c) 2011-2016 Intel Corporation. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __GVT_MIGRATE_H__
+#define __GVT_MIGRATE_H__
+
+/* Assume 9MB is eough to descript VM kernel state */
+#define MIGRATION_IMG_MAX_SIZE (9*1024UL*1024UL)
+#define GVT_MMIO_SIZE (2*1024UL*1024UL)
+#define GVT_MIGRATION_VERSION 0
+
+enum gvt_migration_type_t {
+ GVT_MIGRATION_NONE,
+ GVT_MIGRATION_HEAD,
+ GVT_MIGRATION_CFG_SPACE,
+ GVT_MIGRATION_VREG,
+ GVT_MIGRATION_SREG,
+ GVT_MIGRATION_GTT,
+ GVT_MIGRATION_PPGTT,
+ GVT_MIGRATION_WORKLOAD,
+};
+
+typedef struct gvt_ppgtt_entry_t {
+ int page_table_level;
+ u32 pdp[8];
+} gvt_ppgtt_entry_t;
+
+typedef struct gvt_pending_workload_t {
+ int ring_id;
+ struct intel_vgpu_elsp_dwords elsp_dwords;
+} gvt_pending_workload_t;
+
+typedef struct gvt_region_t {
+ enum gvt_migration_type_t type;
+ u32 size; /* obj size of bytes to read/write */
+} gvt_region_t;
+
+typedef struct gvt_migration_obj_t {
+ void *img;
+ void *vgpu;
+ u32 offset;
+ gvt_region_t region;
+ /* operation func defines how data save-restore */
+ struct gvt_migration_operation_t *ops;
+ char *name;
+} gvt_migration_obj_t;
+
+typedef struct gvt_migration_operation_t {
+ /* called during pre-copy stage, VM is still alive */
+ int (*pre_copy)(const gvt_migration_obj_t *obj);
+ /* called before when VM was paused,
+ * return bytes transferred
+ */
+ int (*pre_save)(const gvt_migration_obj_t *obj);
+ /* called before load the state of device */
+ int (*pre_load)(const gvt_migration_obj_t *obj, u32 size);
+ /* called after load the state of device, VM already alive */
+ int (*post_load)(const gvt_migration_obj_t *obj, u32 size);
+} gvt_migration_operation_t;
+
+typedef struct gvt_image_header_t {
+ int version;
+ int data_size;
+ u64 crc_check;
+ u64 global_data[64];
+} gvt_image_header_t;
+
+#endif
diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h
index ae46105..b2a1952 100644
--- a/include/uapi/linux/vfio.h
+++ b/include/uapi/linux/vfio.h
@@ -296,9 +296,10 @@ struct vfio_region_info_cap_type {
#define VFIO_REGION_TYPE_PCI_VENDOR_MASK (0xffff)
/* 8086 Vendor sub-types */
-#define VFIO_REGION_SUBTYPE_INTEL_IGD_OPREGION (1)
-#define VFIO_REGION_SUBTYPE_INTEL_IGD_HOST_CFG (2)
-#define VFIO_REGION_SUBTYPE_INTEL_IGD_LPC_CFG (3)
+#define VFIO_REGION_SUBTYPE_INTEL_IGD_OPREGION (1)
+#define VFIO_REGION_SUBTYPE_INTEL_IGD_HOST_CFG (2)
+#define VFIO_REGION_SUBTYPE_INTEL_IGD_LPC_CFG (3)
+#define VFIO_REGION_SUBTYPE_INTEL_IGD_DEVICE_STATE (4)
/**
* VFIO_DEVICE_GET_IRQ_INFO - _IOWR(VFIO_TYPE, VFIO_BASE + 9,
--
2.7.4
More information about the intel-gvt-dev
mailing list