[PATCH 2/5] vfio/migration: support device of device config capability
Cornelia Huck
cohuck at redhat.com
Tue Feb 19 14:37:24 UTC 2019
On Tue, 19 Feb 2019 16:52:27 +0800
Yan Zhao <yan.y.zhao at intel.com> wrote:
> Device config is the default data that every device should have. so
> device config capability is by default on, no need to set.
>
> - Currently two type of resources are saved/loaded for device of device
> config capability:
> General PCI config data, and Device config data.
> They are copies as a whole when precopy is stopped.
>
> Migration setup flow:
> - Setup device state regions, check its device state version and capabilities.
> Mmap Device Config Region and Dirty Bitmap Region, if available.
> - If device state regions are failed to get setup, a migration blocker is
> registered instead.
> - Added SaveVMHandlers to register device state save/load handlers.
> - Register VM state change handler to set device's running/stop states.
> - On migration startup on source machine, set device's state to
> VFIO_DEVICE_STATE_LOGGING
>
> Signed-off-by: Yan Zhao <yan.y.zhao at intel.com>
> Signed-off-by: Yulei Zhang <yulei.zhang at intel.com>
> ---
> hw/vfio/Makefile.objs | 2 +-
> hw/vfio/migration.c | 633 ++++++++++++++++++++++++++++++++++++++++++
> hw/vfio/pci.c | 1 -
> hw/vfio/pci.h | 25 +-
> include/hw/vfio/vfio-common.h | 1 +
> 5 files changed, 659 insertions(+), 3 deletions(-)
> create mode 100644 hw/vfio/migration.c
>
> diff --git a/hw/vfio/Makefile.objs b/hw/vfio/Makefile.objs
> index 8b3f664..f32ff19 100644
> --- a/hw/vfio/Makefile.objs
> +++ b/hw/vfio/Makefile.objs
> @@ -1,6 +1,6 @@
> ifeq ($(CONFIG_LINUX), y)
> obj-$(CONFIG_SOFTMMU) += common.o
> -obj-$(CONFIG_PCI) += pci.o pci-quirks.o display.o
> +obj-$(CONFIG_PCI) += pci.o pci-quirks.o display.o migration.o
I think you want to split the migration code: The type-independent
code, and the pci-specific code.
> obj-$(CONFIG_VFIO_CCW) += ccw.o
> obj-$(CONFIG_SOFTMMU) += platform.o
> obj-$(CONFIG_VFIO_XGMAC) += calxeda-xgmac.o
> diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c
> new file mode 100644
> index 0000000..16d6395
> --- /dev/null
> +++ b/hw/vfio/migration.c
> @@ -0,0 +1,633 @@
> +#include "qemu/osdep.h"
> +
> +#include "hw/vfio/vfio-common.h"
> +#include "migration/blocker.h"
> +#include "migration/register.h"
> +#include "qapi/error.h"
> +#include "pci.h"
> +#include "sysemu/kvm.h"
> +#include "exec/ram_addr.h"
> +
> +#define VFIO_SAVE_FLAG_SETUP 0
> +#define VFIO_SAVE_FLAG_PCI 1
> +#define VFIO_SAVE_FLAG_DEVCONFIG 2
> +#define VFIO_SAVE_FLAG_DEVMEMORY 4
> +#define VFIO_SAVE_FLAG_CONTINUE 8
> +
> +static int vfio_device_state_region_setup(VFIOPCIDevice *vdev,
> + VFIORegion *region, uint32_t subtype, const char *name)
This function looks like it should be more generic and e.g. take a
VFIODevice instead of a VFIOPCIDevice as argument.
> +{
> + VFIODevice *vbasedev = &vdev->vbasedev;
> + struct vfio_region_info *info;
> + int ret;
> +
> + ret = vfio_get_dev_region_info(vbasedev, VFIO_REGION_TYPE_DEVICE_STATE,
> + subtype, &info);
> + if (ret) {
> + error_report("Failed to get info of region %s", name);
> + return ret;
> + }
> +
> + if (vfio_region_setup(OBJECT(vdev), vbasedev,
> + region, info->index, name)) {
> + error_report("Failed to setup migrtion region %s", name);
> + return ret;
> + }
> +
> + if (vfio_region_mmap(region)) {
> + error_report("Failed to mmap migrtion region %s", name);
> + }
> +
> + return 0;
> +}
> +
> +bool vfio_device_data_cap_system_memory(VFIOPCIDevice *vdev)
> +{
> + return !!(vdev->migration->data_caps & VFIO_DEVICE_DATA_CAP_SYSTEM_MEMORY);
> +}
> +
> +bool vfio_device_data_cap_device_memory(VFIOPCIDevice *vdev)
> +{
> + return !!(vdev->migration->data_caps & VFIO_DEVICE_DATA_CAP_DEVICE_MEMORY);
> +}
These two as well. The migration structure should probably hang off the
VFIODevice instead.
> +
> +static bool vfio_device_state_region_mmaped(VFIORegion *region)
> +{
> + bool mmaped = true;
> + if (region->nr_mmaps != 1 || region->mmaps[0].offset ||
> + (region->size != region->mmaps[0].size) ||
> + (region->mmaps[0].mmap == NULL)) {
> + mmaped = false;
> + }
> +
> + return mmaped;
> +}
s/mmaped/mmapped/ ?
> +
> +static int vfio_get_device_config_size(VFIOPCIDevice *vdev)
> +{
> + VFIODevice *vbasedev = &vdev->vbasedev;
> + VFIORegion *region_ctl =
> + &vdev->migration->region[VFIO_DEVSTATE_REGION_CTL];
> + VFIORegion *region_config =
> + &vdev->migration->region[VFIO_DEVSTATE_REGION_DATA_CONFIG];
This looks like it should not depend on pci, either.
> + uint64_t len;
> + int sz;
> +
> + sz = sizeof(len);
> + if (pread(vbasedev->fd, &len, sz,
> + region_ctl->fd_offset +
> + offsetof(struct vfio_device_state_ctl, device_config.size))
> + != sz) {
> + error_report("vfio: Failed to get length of device config");
> + return -1;
> + }
> + if (len > region_config->size) {
> + error_report("vfio: Error device config length");
> + return -1;
> + }
> + vdev->migration->devconfig_size = len;
> +
> + return 0;
> +}
> +
> +static int vfio_set_device_config_size(VFIOPCIDevice *vdev, uint64_t size)
> +{
> + VFIODevice *vbasedev = &vdev->vbasedev;
> + VFIORegion *region_ctl =
> + &vdev->migration->region[VFIO_DEVSTATE_REGION_CTL];
> + VFIORegion *region_config =
> + &vdev->migration->region[VFIO_DEVSTATE_REGION_DATA_CONFIG];
Ditto. Also for the functions below.
> + int sz;
> +
> + if (size > region_config->size) {
> + return -1;
> + }
> +
> + sz = sizeof(size);
> + if (pwrite(vbasedev->fd, &size, sz,
> + region_ctl->fd_offset +
> + offsetof(struct vfio_device_state_ctl, device_config.size))
> + != sz) {
> + error_report("vfio: Failed to set length of device config");
> + return -1;
> + }
> + vdev->migration->devconfig_size = size;
> + return 0;
> +}
> +
> +static int vfio_save_data_device_config(VFIOPCIDevice *vdev, QEMUFile *f)
> +{
> + VFIODevice *vbasedev = &vdev->vbasedev;
> + VFIORegion *region_ctl =
> + &vdev->migration->region[VFIO_DEVSTATE_REGION_CTL];
> + VFIORegion *region_config =
> + &vdev->migration->region[VFIO_DEVSTATE_REGION_DATA_CONFIG];
> + void *dest;
> + uint32_t sz;
> + uint8_t *buf = NULL;
> + uint32_t action = VFIO_DEVICE_DATA_ACTION_GET_BUFFER;
> + uint64_t len = vdev->migration->devconfig_size;
> +
> + qemu_put_be64(f, len);
Why big endian? (Generally, do we need any endianness considerations?)
> +
> + sz = sizeof(action);
> + if (pwrite(vbasedev->fd, &action, sz,
> + region_ctl->fd_offset +
> + offsetof(struct vfio_device_state_ctl, device_config.action))
> + != sz) {
> + error_report("vfio: action failure for device config get buffer");
> + return -1;
> + }
Might make sense to wrap this into a set_action() helper that takes a
SET_BUFFER/GET_BUFFER argument.
> +
> + if (!vfio_device_state_region_mmaped(region_config)) {
> + buf = g_malloc(len);
> + if (buf == NULL) {
> + error_report("vfio: Failed to allocate memory for migrate");
> + return -1;
> + }
> + if (pread(vbasedev->fd, buf, len, region_config->fd_offset) != len) {
> + error_report("vfio: Failed read device config buffer");
> + return -1;
> + }
> + qemu_put_buffer(f, buf, len);
> + g_free(buf);
> + } else {
> + dest = region_config->mmaps[0].mmap;
> + qemu_put_buffer(f, dest, len);
> + }
> + return 0;
> +}
> +
> +static int vfio_load_data_device_config(VFIOPCIDevice *vdev,
> + QEMUFile *f, uint64_t len)
> +{
> + VFIODevice *vbasedev = &vdev->vbasedev;
> + VFIORegion *region_ctl =
> + &vdev->migration->region[VFIO_DEVSTATE_REGION_CTL];
> + VFIORegion *region_config =
> + &vdev->migration->region[VFIO_DEVSTATE_REGION_DATA_CONFIG];
> + void *dest;
> + uint32_t sz;
> + uint8_t *buf = NULL;
> + uint32_t action = VFIO_DEVICE_DATA_ACTION_SET_BUFFER;
> +
> + vfio_set_device_config_size(vdev, len);
> +
> + if (!vfio_device_state_region_mmaped(region_config)) {
> + buf = g_malloc(len);
> + if (buf == NULL) {
> + error_report("vfio: Failed to allocate memory for migrate");
> + return -1;
> + }
> + qemu_get_buffer(f, buf, len);
> + if (pwrite(vbasedev->fd, buf, len,
> + region_config->fd_offset) != len) {
> + error_report("vfio: Failed to write devie config buffer");
> + return -1;
> + }
> + g_free(buf);
> + } else {
> + dest = region_config->mmaps[0].mmap;
> + qemu_get_buffer(f, dest, len);
> + }
> +
> + sz = sizeof(action);
> + if (pwrite(vbasedev->fd, &action, sz,
> + region_ctl->fd_offset +
> + offsetof(struct vfio_device_state_ctl, device_config.action))
> + != sz) {
> + error_report("vfio: action failure for device config set buffer");
> + return -1;
> + }
> +
> + return 0;
> +}
> +
> +static int vfio_set_dirty_page_bitmap_chunk(VFIOPCIDevice *vdev,
> + uint64_t start_addr, uint64_t page_nr)
> +{
> +
> + VFIODevice *vbasedev = &vdev->vbasedev;
> + VFIORegion *region_ctl =
> + &vdev->migration->region[VFIO_DEVSTATE_REGION_CTL];
> + VFIORegion *region_bitmap =
> + &vdev->migration->region[VFIO_DEVSTATE_REGION_DATA_BITMAP];
> + unsigned long bitmap_size =
> + BITS_TO_LONGS(page_nr) * sizeof(unsigned long);
> + uint32_t sz;
> +
> + struct {
> + __u64 start_addr;
> + __u64 page_nr;
> + } system_memory;
> + system_memory.start_addr = start_addr;
> + system_memory.page_nr = page_nr;
> + sz = sizeof(system_memory);
> + if (pwrite(vbasedev->fd, &system_memory, sz,
> + region_ctl->fd_offset +
> + offsetof(struct vfio_device_state_ctl, system_memory))
> + != sz) {
> + error_report("vfio: Failed to set system memory range for dirty pages");
> + return -1;
> + }
> +
> + if (!vfio_device_state_region_mmaped(region_bitmap)) {
> + void *bitmap = g_malloc0(bitmap_size);
> +
> + if (pread(vbasedev->fd, bitmap, bitmap_size,
> + region_bitmap->fd_offset) != bitmap_size) {
> + error_report("vfio: Failed to read dirty bitmap data");
> + return -1;
> + }
> +
> + cpu_physical_memory_set_dirty_lebitmap(bitmap, start_addr, page_nr);
> +
> + g_free(bitmap);
> + } else {
> + cpu_physical_memory_set_dirty_lebitmap(
> + region_bitmap->mmaps[0].mmap,
> + start_addr, page_nr);
> + }
> + return 0;
> +}
> +
> +int vfio_set_dirty_page_bitmap(VFIOPCIDevice *vdev,
> + uint64_t start_addr, uint64_t page_nr)
> +{
> + VFIORegion *region_bitmap =
> + &vdev->migration->region[VFIO_DEVSTATE_REGION_DATA_BITMAP];
> + unsigned long chunk_size = region_bitmap->size;
> + uint64_t chunk_pg_nr = (chunk_size / sizeof(unsigned long)) *
> + BITS_PER_LONG;
> +
> + uint64_t cnt_left;
> + int rc = 0;
> +
> + cnt_left = page_nr;
> +
> + while (cnt_left >= chunk_pg_nr) {
> + rc = vfio_set_dirty_page_bitmap_chunk(vdev, start_addr, chunk_pg_nr);
> + if (rc) {
> + goto exit;
> + }
> + cnt_left -= chunk_pg_nr;
> + start_addr += start_addr;
> + }
> + rc = vfio_set_dirty_page_bitmap_chunk(vdev, start_addr, cnt_left);
> +
> +exit:
> + return rc;
> +}
> +
> +static int vfio_set_device_state(VFIOPCIDevice *vdev,
> + uint32_t dev_state)
> +{
> + VFIODevice *vbasedev = &vdev->vbasedev;
> + VFIORegion *region =
> + &vdev->migration->region[VFIO_DEVSTATE_REGION_CTL];
> + uint32_t sz = sizeof(dev_state);
> +
> + if (!vdev->migration) {
> + return -1;
> + }
> +
> + if (pwrite(vbasedev->fd, &dev_state, sz,
> + region->fd_offset +
> + offsetof(struct vfio_device_state_ctl, device_state))
> + != sz) {
> + error_report("vfio: Failed to set device state %d", dev_state);
Can the kernel reject this if a state transition is not allowed (or are
all transitions allowed?)
> + return -1;
> + }
> + vdev->migration->device_state = dev_state;
> + return 0;
> +}
> +
> +static int vfio_get_device_data_caps(VFIOPCIDevice *vdev)
> +{
> + VFIODevice *vbasedev = &vdev->vbasedev;
> + VFIORegion *region =
> + &vdev->migration->region[VFIO_DEVSTATE_REGION_CTL];
> +
> + uint32_t caps;
> + uint32_t size = sizeof(caps);
> +
> + if (pread(vbasedev->fd, &caps, size,
> + region->fd_offset +
> + offsetof(struct vfio_device_state_ctl, caps))
> + != size) {
> + error_report("%s Failed to read data caps of device states",
> + vbasedev->name);
> + return -1;
> + }
> + vdev->migration->data_caps = caps;
> + return 0;
> +}
> +
> +
> +static int vfio_check_devstate_version(VFIOPCIDevice *vdev)
> +{
> + VFIODevice *vbasedev = &vdev->vbasedev;
> + VFIORegion *region =
> + &vdev->migration->region[VFIO_DEVSTATE_REGION_CTL];
> +
> + uint32_t version;
> + uint32_t size = sizeof(version);
> +
> + if (pread(vbasedev->fd, &version, size,
> + region->fd_offset +
> + offsetof(struct vfio_device_state_ctl, version))
> + != size) {
> + error_report("%s Failed to read version of device state interfaces",
> + vbasedev->name);
> + return -1;
> + }
> +
> + if (version != VFIO_DEVICE_STATE_INTERFACE_VERSION) {
> + error_report("%s migration version mismatch, right version is %d",
> + vbasedev->name, VFIO_DEVICE_STATE_INTERFACE_VERSION);
So, we require an exact match... or should we allow to extend the
interface in an backwards-compatible way, in which case we'd require
(QEMU interface version) <= (kernel interface version)?
> + return -1;
> + }
> +
> + return 0;
> +}
> +
> +static void vfio_vm_change_state_handler(void *pv, int running, RunState state)
> +{
> + VFIOPCIDevice *vdev = pv;
> + uint32_t dev_state = vdev->migration->device_state;
> +
> + if (!running) {
> + dev_state |= VFIO_DEVICE_STATE_STOP;
> + } else {
> + dev_state &= ~VFIO_DEVICE_STATE_STOP;
> + }
> +
> + vfio_set_device_state(vdev, dev_state);
> +}
> +
> +static void vfio_save_live_pending(QEMUFile *f, void *opaque,
> + uint64_t max_size,
> + uint64_t *res_precopy_only,
> + uint64_t *res_compatible,
> + uint64_t *res_post_copy_only)
> +{
> + VFIOPCIDevice *vdev = opaque;
> +
> + if (!vfio_device_data_cap_device_memory(vdev)) {
> + return;
> + }
> +
> + return;
> +}
> +
> +static int vfio_save_iterate(QEMUFile *f, void *opaque)
> +{
> + VFIOPCIDevice *vdev = opaque;
> +
> + if (!vfio_device_data_cap_device_memory(vdev)) {
> + return 0;
> + }
> +
> + return 0;
> +}
These look a bit weird...
> +
> +static void vfio_pci_load_config(VFIOPCIDevice *vdev, QEMUFile *f)
> +{
> + PCIDevice *pdev = &vdev->pdev;
> + uint32_t ctl, msi_lo, msi_hi, msi_data, bar_cfg, i;
> + bool msi_64bit;
> +
> + /* retore pci bar configuration */
> + ctl = pci_default_read_config(pdev, PCI_COMMAND, 2);
> + vfio_pci_write_config(pdev, PCI_COMMAND,
> + ctl & (!(PCI_COMMAND_IO | PCI_COMMAND_MEMORY)), 2);
> + for (i = 0; i < PCI_ROM_SLOT; i++) {
> + bar_cfg = qemu_get_be32(f);
> + vfio_pci_write_config(pdev, PCI_BASE_ADDRESS_0 + i * 4, bar_cfg, 4);
> + }
> + vfio_pci_write_config(pdev, PCI_COMMAND,
> + ctl | PCI_COMMAND_IO | PCI_COMMAND_MEMORY, 2);
> +
> + /* restore msi configuration */
> + ctl = pci_default_read_config(pdev, pdev->msi_cap + PCI_MSI_FLAGS, 2);
> + msi_64bit = !!(ctl & PCI_MSI_FLAGS_64BIT);
> +
> + vfio_pci_write_config(&vdev->pdev,
> + pdev->msi_cap + PCI_MSI_FLAGS,
> + ctl & (!PCI_MSI_FLAGS_ENABLE), 2);
> +
> + msi_lo = qemu_get_be32(f);
> + vfio_pci_write_config(pdev, pdev->msi_cap + PCI_MSI_ADDRESS_LO, msi_lo, 4);
> +
> + if (msi_64bit) {
> + msi_hi = qemu_get_be32(f);
> + vfio_pci_write_config(pdev, pdev->msi_cap + PCI_MSI_ADDRESS_HI,
> + msi_hi, 4);
> + }
> + msi_data = qemu_get_be32(f);
> + vfio_pci_write_config(pdev,
> + pdev->msi_cap + (msi_64bit ? PCI_MSI_DATA_64 : PCI_MSI_DATA_32),
> + msi_data, 2);
> +
> + vfio_pci_write_config(&vdev->pdev, pdev->msi_cap + PCI_MSI_FLAGS,
> + ctl | PCI_MSI_FLAGS_ENABLE, 2);
> +
Ok, this function is indeed pci-specific and probably should be moved
to the vfio-pci code (other types could hook themselves up in the same
place, then).
> +}
> +
> +static int vfio_load_state(QEMUFile *f, void *opaque, int version_id)
> +{
> + VFIOPCIDevice *vdev = opaque;
> + int flag;
> + uint64_t len;
> + int ret = 0;
> +
> + if (version_id != VFIO_DEVICE_STATE_INTERFACE_VERSION) {
> + return -EINVAL;
> + }
> +
> + do {
> + flag = qemu_get_byte(f);
> +
> + switch (flag & ~VFIO_SAVE_FLAG_CONTINUE) {
> + case VFIO_SAVE_FLAG_SETUP:
> + break;
> + case VFIO_SAVE_FLAG_PCI:
> + vfio_pci_load_config(vdev, f);
> + break;
> + case VFIO_SAVE_FLAG_DEVCONFIG:
> + len = qemu_get_be64(f);
> + vfio_load_data_device_config(vdev, f, len);
> + break;
> + default:
> + ret = -EINVAL;
> + }
> + } while (flag & VFIO_SAVE_FLAG_CONTINUE);
> +
> + return ret;
> +}
> +
> +static void vfio_pci_save_config(VFIOPCIDevice *vdev, QEMUFile *f)
> +{
> + PCIDevice *pdev = &vdev->pdev;
> + uint32_t msi_cfg, msi_lo, msi_hi, msi_data, bar_cfg, i;
> + bool msi_64bit;
> +
> + for (i = 0; i < PCI_ROM_SLOT; i++) {
> + bar_cfg = pci_default_read_config(pdev, PCI_BASE_ADDRESS_0 + i * 4, 4);
> + qemu_put_be32(f, bar_cfg);
> + }
> +
> + msi_cfg = pci_default_read_config(pdev, pdev->msi_cap + PCI_MSI_FLAGS, 2);
> + msi_64bit = !!(msi_cfg & PCI_MSI_FLAGS_64BIT);
> +
> + msi_lo = pci_default_read_config(pdev,
> + pdev->msi_cap + PCI_MSI_ADDRESS_LO, 4);
> + qemu_put_be32(f, msi_lo);
> +
> + if (msi_64bit) {
> + msi_hi = pci_default_read_config(pdev,
> + pdev->msi_cap + PCI_MSI_ADDRESS_HI,
> + 4);
> + qemu_put_be32(f, msi_hi);
> + }
> +
> + msi_data = pci_default_read_config(pdev,
> + pdev->msi_cap + (msi_64bit ? PCI_MSI_DATA_64 : PCI_MSI_DATA_32),
> + 2);
> + qemu_put_be32(f, msi_data);
> +
> +}
> +
> +static int vfio_save_complete_precopy(QEMUFile *f, void *opaque)
> +{
> + VFIOPCIDevice *vdev = opaque;
> + int rc = 0;
> +
> + qemu_put_byte(f, VFIO_SAVE_FLAG_PCI | VFIO_SAVE_FLAG_CONTINUE);
> + vfio_pci_save_config(vdev, f);
> +
> + qemu_put_byte(f, VFIO_SAVE_FLAG_DEVCONFIG);
> + rc += vfio_get_device_config_size(vdev);
> + rc += vfio_save_data_device_config(vdev, f);
> +
> + return rc;
> +}
> +
> +static int vfio_save_setup(QEMUFile *f, void *opaque)
> +{
> + VFIOPCIDevice *vdev = opaque;
> + qemu_put_byte(f, VFIO_SAVE_FLAG_SETUP);
> +
> + vfio_set_device_state(vdev, VFIO_DEVICE_STATE_RUNNING |
> + VFIO_DEVICE_STATE_LOGGING);
> + return 0;
> +}
> +
> +static int vfio_load_setup(QEMUFile *f, void *opaque)
> +{
> + return 0;
> +}
> +
> +static void vfio_save_cleanup(void *opaque)
> +{
> + VFIOPCIDevice *vdev = opaque;
> + uint32_t dev_state = vdev->migration->device_state;
> +
> + dev_state &= ~VFIO_DEVICE_STATE_LOGGING;
> +
> + vfio_set_device_state(vdev, dev_state);
> +}
These look like they should be type-independent, again.
> +
> +static SaveVMHandlers savevm_vfio_handlers = {
> + .save_setup = vfio_save_setup,
> + .save_live_pending = vfio_save_live_pending,
> + .save_live_iterate = vfio_save_iterate,
> + .save_live_complete_precopy = vfio_save_complete_precopy,
> + .save_cleanup = vfio_save_cleanup,
> + .load_setup = vfio_load_setup,
> + .load_state = vfio_load_state,
> +};
> +
> +int vfio_migration_init(VFIOPCIDevice *vdev, Error **errp)
> +{
> + int ret;
> + Error *local_err = NULL;
> + vdev->migration = g_new0(VFIOMigration, 1);
> +
> + if (vfio_device_state_region_setup(vdev,
> + &vdev->migration->region[VFIO_DEVSTATE_REGION_CTL],
> + VFIO_REGION_SUBTYPE_DEVICE_STATE_CTL,
> + "device-state-ctl")) {
> + goto error;
> + }
> +
> + if (vfio_check_devstate_version(vdev)) {
> + goto error;
> + }
> +
> + if (vfio_get_device_data_caps(vdev)) {
> + goto error;
> + }
> +
> + if (vfio_device_state_region_setup(vdev,
> + &vdev->migration->region[VFIO_DEVSTATE_REGION_DATA_CONFIG],
> + VFIO_REGION_SUBTYPE_DEVICE_STATE_DATA_CONFIG,
> + "device-state-data-device-config")) {
> + goto error;
> + }
> +
> + if (vfio_device_data_cap_device_memory(vdev)) {
> + error_report("No suppport of data cap device memory Yet");
> + goto error;
> + }
> +
> + if (vfio_device_data_cap_system_memory(vdev) &&
> + vfio_device_state_region_setup(vdev,
> + &vdev->migration->region[VFIO_DEVSTATE_REGION_DATA_BITMAP],
> + VFIO_REGION_SUBTYPE_DEVICE_STATE_DATA_DIRTYBITMAP,
> + "device-state-data-dirtybitmap")) {
> + goto error;
> + }
> +
> + vdev->migration->device_state = VFIO_DEVICE_STATE_RUNNING;
> +
> + register_savevm_live(NULL, TYPE_VFIO_PCI, -1,
> + VFIO_DEVICE_STATE_INTERFACE_VERSION,
> + &savevm_vfio_handlers,
> + vdev);
> +
> + vdev->migration->vm_state =
> + qemu_add_vm_change_state_handler(vfio_vm_change_state_handler, vdev);
> +
> + return 0;
> +error:
> + error_setg(&vdev->migration_blocker,
> + "VFIO device doesn't support migration");
> + ret = migrate_add_blocker(vdev->migration_blocker, &local_err);
> + if (local_err) {
> + error_propagate(errp, local_err);
> + error_free(vdev->migration_blocker);
> + }
> +
> + g_free(vdev->migration);
> + vdev->migration = NULL;
> +
> + return ret;
> +}
> +
> +void vfio_migration_finalize(VFIOPCIDevice *vdev)
> +{
> + if (vdev->migration) {
> + int i;
> + qemu_del_vm_change_state_handler(vdev->migration->vm_state);
> + unregister_savevm(NULL, TYPE_VFIO_PCI, vdev);
> + for (i = 0; i < VFIO_DEVSTATE_REGION_NUM; i++) {
> + vfio_region_finalize(&vdev->migration->region[i]);
> + }
> + g_free(vdev->migration);
> + vdev->migration = NULL;
> + } else if (vdev->migration_blocker) {
> + migrate_del_blocker(vdev->migration_blocker);
> + error_free(vdev->migration_blocker);
> + }
> +}
> diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
> index c0cb1ec..b8e006b 100644
> --- a/hw/vfio/pci.c
> +++ b/hw/vfio/pci.c
> @@ -37,7 +37,6 @@
>
> #define MSIX_CAP_LENGTH 12
>
> -#define TYPE_VFIO_PCI "vfio-pci"
Why do you need to move this? That looks like a sign that the layering
needs work.
> #define PCI_VFIO(obj) OBJECT_CHECK(VFIOPCIDevice, obj, TYPE_VFIO_PCI)
>
> static void vfio_disable_interrupts(VFIOPCIDevice *vdev);
> diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h
> index b1ae4c0..4b7b1bb 100644
> --- a/hw/vfio/pci.h
> +++ b/hw/vfio/pci.h
> @@ -19,6 +19,7 @@
> #include "qemu/event_notifier.h"
> #include "qemu/queue.h"
> #include "qemu/timer.h"
> +#include "sysemu/sysemu.h"
>
> #define PCI_ANY_ID (~0)
>
> @@ -56,6 +57,21 @@ typedef struct VFIOBAR {
> QLIST_HEAD(, VFIOQuirk) quirks;
> } VFIOBAR;
>
> +enum {
> + VFIO_DEVSTATE_REGION_CTL = 0,
> + VFIO_DEVSTATE_REGION_DATA_CONFIG,
> + VFIO_DEVSTATE_REGION_DATA_DEVICE_MEMORY,
> + VFIO_DEVSTATE_REGION_DATA_BITMAP,
> + VFIO_DEVSTATE_REGION_NUM,
> +};
> +typedef struct VFIOMigration {
> + VFIORegion region[VFIO_DEVSTATE_REGION_NUM];
> + uint32_t data_caps;
> + uint32_t device_state;
> + uint64_t devconfig_size;
> + VMChangeStateEntry *vm_state;
> +} VFIOMigration;
> +
> typedef struct VFIOVGARegion {
> MemoryRegion mem;
> off_t offset;
> @@ -132,6 +148,8 @@ typedef struct VFIOPCIDevice {
> VFIOBAR bars[PCI_NUM_REGIONS - 1]; /* No ROM */
> VFIOVGA *vga; /* 0xa0000, 0x3b0, 0x3c0 */
> void *igd_opregion;
> + VFIOMigration *migration;
As said, it would probably be better to hang this off VFIODevice.
> + Error *migration_blocker;
> PCIHostDeviceAddress host;
> EventNotifier err_notifier;
> EventNotifier req_notifier;
> @@ -198,5 +216,10 @@ int vfio_pci_igd_opregion_init(VFIOPCIDevice *vdev,
> void vfio_display_reset(VFIOPCIDevice *vdev);
> int vfio_display_probe(VFIOPCIDevice *vdev, Error **errp);
> void vfio_display_finalize(VFIOPCIDevice *vdev);
> -
> +bool vfio_device_data_cap_system_memory(VFIOPCIDevice *vdev);
> +bool vfio_device_data_cap_device_memory(VFIOPCIDevice *vdev);
> +int vfio_set_dirty_page_bitmap(VFIOPCIDevice *vdev,
> + uint64_t start_addr, uint64_t page_nr);
> +int vfio_migration_init(VFIOPCIDevice *vdev, Error **errp);
> +void vfio_migration_finalize(VFIOPCIDevice *vdev);
And the interfaces should be in vfio-common.
> #endif /* HW_VFIO_VFIO_PCI_H */
> diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
> index 1b434d0..ed43613 100644
> --- a/include/hw/vfio/vfio-common.h
> +++ b/include/hw/vfio/vfio-common.h
> @@ -32,6 +32,7 @@
> #endif
>
> #define VFIO_MSG_PREFIX "vfio %s: "
> +#define TYPE_VFIO_PCI "vfio-pci"
>
> enum {
> VFIO_DEVICE_TYPE_PCI = 0,
More information about the intel-gvt-dev
mailing list