[Intel-gfx] [IGDVFIO] [PATCH 2/8] RFC and help completing: Intel IGD Direct Assignment with VFIO

Andrew Barnes umbramalison at gmail.com
Wed Sep 24 15:20:22 CEST 2014


hw/misc/vfio.c

this patch adds:
* memory map intel opregion
* mirroring of bdsm to guest's device 0 not hosts.

patch
---------------------

diff --git a/hw/misc/vfio.c b/hw/misc/vfio.c
index e88b610..54e549b 100644
--- a/hw/misc/vfio.c
+++ b/hw/misc/vfio.c
@@ -5,6 +5,7 @@
  *
  * Authors:
  *  Alex Williamson <alex.williamson at redhat.com>
+ *  Andrew Barnes <andy at outsideglobe.com> IGD Support
  *
  * This work is licensed under the terms of the GNU GPL, version 2.  See
  * the COPYING file in the top-level directory.
@@ -56,6 +57,45 @@
 #define VFIO_ALLOW_KVM_MSI 1
 #define VFIO_ALLOW_KVM_MSIX 1

+/* A handy list of IGD device ID's */
+#define IS_IGD_HASWELL(id)            (id == 0x0402 \
+                                        || id == 0x0406 \
+                                        || id == 0x040a \
+                                        || id == 0x0412 \
+                                        || id == 0x0416 \
+                                        || id == 0x041a \
+                                        || id == 0x0a04 \
+                                        || id == 0x0a16 \
+                                        || id == 0x0a22 \
+                                        || id == 0x0a26 \
+                                        || id == 0x0a2a )
+#define IS_IGD_IVYBRIDGE(id)          (id == 0x0162 \
+                                        || id == 0x0166 \
+                                        || id == 0x016a \
+                                        || id == 0x0152 \
+                                        || id == 0x0156 \
+                                        || id == 0x015a )
+#define IS_IGD_SANDYBRIDGE(id)        (id == 0x0102 \
+                                        || id == 0x0106 \
+                                        || id == 0x0112 \
+                                        || id == 0x0116 \
+                                        || id == 0x0122 \
+                                        || id == 0x0126 \
+                                        || id ==0x010a )
+#define IS_IGD_IRONLAKE_CLARKDALE(id) (id == 0x0042 )
+#define IS_IGD_IRONLAKE_ARRANDALE(id) (id == 0x0046 )
+#define IS_IGD(id)                    (IS_IGD_IRONLAKE_CLARKDALE(id) \
+                                        || IS_IGD_IRONLAKE_ARRANDALE(id) \
+                                        || IS_IGD_SANDYBRIDGE(id) \
+                                        || IS_IGD_IVYBRIDGE(id) \
+                                        || IS_IGD_HASWELL(id) )
+#define IGD_BAR_MASK                  0xFFFFFFFFFFFF0000
+#define DMAR_OPERATION_TIMEOUT        ((s_time_t)((_ms) * 1000000ULL))
+
+#define PCI_CONFIG_INTEL_OPREGION       0xfc
+#define INTEL_OPREGION_PAGES            3
+#define INTEL_OPREGION_SIZE             INTEL_OPREGION_PAGES *
TARGET_PAGE_SIZE
+
 struct VFIODevice;

 typedef struct VFIOQuirk {
@@ -227,6 +267,8 @@ typedef struct VFIODevice {
     bool has_pm_reset;
     bool needs_reset;
     bool rom_read_failed;
+    MemoryRegion opregion; /* Intel opregion */
+    uint32_t host_opregion; /* Host address of opregion */
 } VFIODevice;

 typedef struct VFIOGroup {
@@ -283,6 +325,18 @@ static void vfio_pci_write_config(PCIDevice *pdev,
uint32_t addr,
                                   uint32_t val, int len);
 static void vfio_mmap_set_enabled(VFIODevice *vdev, bool enabled);

+static void vfio_set_word_bits(uint8_t *buf, uint16_t val, uint16_t mask);
+static void vfio_add_emulated_word(VFIODevice *vdev, int pos,
+                                   uint16_t val, uint16_t mask);
+static void vfio_set_long_bits(uint8_t *buf, uint32_t val, uint32_t mask);
+static void vfio_add_emulated_long(VFIODevice *vdev, int pos,
+                                   uint32_t val, uint32_t mask);
+static void vfio_add_emulated_rw_long(VFIODevice *vdev, int pos,
+                                   uint32_t val, uint32_t mask);
+static void vfio_map_igdopregion(VFIODevice *vdev, uint32_t
guest_opregion);
+
+static VFIODevice *igdvfio;
+
 /*
  * Common VFIO interrupt disable
  */
@@ -2324,27 +2378,46 @@ static uint32_t vfio_pci_read_config(PCIDevice
*pdev, uint32_t addr, int len)
     VFIODevice *vdev = DO_UPCAST(VFIODevice, pdev, pdev);
     uint32_t emu_bits = 0, emu_val = 0, phys_val = 0, val;

-    memcpy(&emu_bits, vdev->emulated_config_bits + addr, len);
-    emu_bits = le32_to_cpu(emu_bits);
-
-    if (emu_bits) {
-        emu_val = pci_default_read_config(pdev, addr, len);
+    /* BDSM mirror - BDSM can be read at either 0xb0 device 0, or 0x5c
device 2.
+     * Redirect this mirror from host 0xb0 device 0 to guest 0xb0 device
0.*/
+    if (IS_IGD(pci_get_word(pdev->config + PCI_DEVICE_ID)) &&
ranges_overlap(addr,len,0x5c,4))
+    {
+        DPRINTF("%s Read Trapped (%04x:%02x:%02x.%x, @0x%x, 0x%x,
len=0x%x)\n", __func__,
+                    vdev->host.domain, vdev->host.bus, vdev->host.slot,
+                                vdev->host.function, addr, val, len);
+        PCIBus *root = pci_find_primary_bus();
+        PCIDevice *q35 = pci_find_device(root,0,PCI_DEVFN(0, 0));
+        val = pci_default_read_config(q35, 0xb0, len);
     }

-    if (~emu_bits & (0xffffffffU >> (32 - len * 8))) {
-        ssize_t ret;
+    else
+    {
+        memcpy(&emu_bits, vdev->emulated_config_bits + addr, len);
+        emu_bits = le32_to_cpu(emu_bits);
+
+        if (emu_bits) {
+            emu_val = pci_default_read_config(pdev, addr, len);
+            DPRINTF("%s emulated read: %x \n",__func__,emu_val);

-        ret = pread(vdev->fd, &phys_val, len, vdev->config_offset + addr);
-        if (ret != len) {
-            error_report("%s(%04x:%02x:%02x.%x, 0x%x, 0x%x) failed: %m",
-                         __func__, vdev->host.domain, vdev->host.bus,
-                         vdev->host.slot, vdev->host.function, addr, len);
-            return -errno;
         }
-        phys_val = le32_to_cpu(phys_val);
-    }

-    val = (emu_val & emu_bits) | (phys_val & ~emu_bits);
+        if (~emu_bits & (0xffffffffU >> (32 - len * 8))) {
+            ssize_t ret;
+
+            ret = pread(vdev->fd, &phys_val, len, vdev->config_offset +
addr);
+            if (ret != len) {
+                error_report("%s(%04x:%02x:%02x.%x, 0x%x, 0x%x) failed:
%m",
+                             __func__, vdev->host.domain, vdev->host.bus,
+                             vdev->host.slot, vdev->host.function, addr,
len);
+                return -errno;
+            }
+            phys_val = le32_to_cpu(phys_val);
+            DPRINTF("%s direct read: %x \n",__func__,phys_val);
+
+        }
+
+        val = (emu_val & emu_bits) | (phys_val & ~emu_bits);
+    }

     DPRINTF("%s(%04x:%02x:%02x.%x, @0x%x, len=0x%x) %x\n", __func__,
             vdev->host.domain, vdev->host.bus, vdev->host.slot,
@@ -2363,12 +2436,30 @@ static void vfio_pci_write_config(PCIDevice *pdev,
uint32_t addr,
             vdev->host.domain, vdev->host.bus, vdev->host.slot,
             vdev->host.function, addr, val, len);

+    /* A write to OPREGION base address means that seabios has allocated a
new memory region for OPREGION
+     * in the guest. */
+    if (IS_IGD(pci_get_word(pdev->config + PCI_DEVICE_ID)) &&
ranges_overlap(addr,len,PCI_CONFIG_INTEL_OPREGION,4))
+    {
+        DPRINTF("%s Write Trapped (%04x:%02x:%02x.%x, @0x%x, 0x%x,
len=0x%x)\n", __func__,
+                    vdev->host.domain, vdev->host.bus, vdev->host.slot,
+                                vdev->host.function, addr, val, len);
+        //val = (val & 0xfffff000) | (vdev->host_opregion & 0xfff);
+        vfio_map_igdopregion(vdev,val);
+        goto defaultwrite;
+    }
+
     /* Write everything to VFIO, let it filter out what we can't write */
-    if (pwrite(vdev->fd, &val_le, len, vdev->config_offset + addr) != len)
{
+    else if (pwrite(vdev->fd, &val_le, len, vdev->config_offset + addr) !=
len) {
         error_report("%s(%04x:%02x:%02x.%x, 0x%x, 0x%x, 0x%x) failed: %m",
                      __func__, vdev->host.domain, vdev->host.bus,
                      vdev->host.slot, vdev->host.function, addr, val, len);
     }
+    else
+    {
+        DPRINTF("%s Written to VFIO (%04x:%02x:%02x.%x, @0x%x, 0x%x,
len=0x%x)\n", __func__,
+                  vdev->host.domain, vdev->host.bus, vdev->host.slot,
+                              vdev->host.function, addr, val, len);
+    }

     /* MSI/MSI-X Enabling/Disabling */
     if (pdev->cap_present & QEMU_PCI_CAP_MSI &&
@@ -2405,7 +2496,11 @@ static void vfio_pci_write_config(PCIDevice *pdev,
uint32_t addr,
         }
     } else {
         /* Write everything to QEMU to keep emulated bits correct */
-        pci_default_write_config(pdev, addr, val, len);
+defaultwrite:
+       pci_default_write_config(pdev, addr, val, len);
+       DPRINTF("%s Default Write (%04x:%02x:%02x.%x, @0x%x, 0x%x,
len=0x%x)\n", __func__,
+                   vdev->host.domain, vdev->host.bus, vdev->host.slot,
+                               vdev->host.function, addr, val, len);
     }
 }

@@ -3065,7 +3160,7 @@ static void vfio_set_word_bits(uint8_t *buf, uint16_t
val, uint16_t mask)
 {
     pci_set_word(buf, (pci_get_word(buf) & ~mask) | val);
 }
-
+/* helper functions make read-only emulated registers! */
 static void vfio_add_emulated_word(VFIODevice *vdev, int pos,
                                    uint16_t val, uint16_t mask)
 {
@@ -3087,6 +3182,62 @@ static void vfio_add_emulated_long(VFIODevice *vdev,
int pos,
     vfio_set_long_bits(vdev->emulated_config_bits + pos, mask, mask);
 }

+static void vfio_add_emulated_rw_long(VFIODevice *vdev, int pos,
+                                   uint32_t val, uint32_t mask)
+{
+    vfio_set_long_bits(vdev->pdev.config + pos, val, mask);
+    vfio_set_long_bits(vdev->pdev.wmask + pos, mask, mask);
+    vfio_set_long_bits(vdev->emulated_config_bits + pos, mask, mask);
+}
+
+/* Setup the mapping of the opregion ready for Seabios to allocate the
guest location */
+static void vfio_setup_igdopregion(VFIODevice *vdev)
+{
+    PCIDevice *pdev = &vdev->pdev;
+    int fd;
+    char name[64];
+    void *map;
+
+    vdev->host_opregion =
vfio_pci_read_config(pdev,PCI_CONFIG_INTEL_OPREGION,4);
+
+    DPRINTF("%s Setup IGD OpRegion: %x\n",__func__,vdev->host_opregion);
+
+    snprintf(name, sizeof(name), "VFIO %04x:%02x:%02x.%x IGDOPREGION mmap",
+             vdev->host.domain, vdev->host.bus, vdev->host.slot,
+             vdev->host.function);
+
+    fd = open("/dev/mem", O_RDWR);
+    map =
mmap(NULL,INTEL_OPREGION_SIZE,PROT_READ|PROT_WRITE,MAP_SHARED,fd,(vdev->host_opregion
& ~0xfff));
+    if (map == MAP_FAILED)
+    {
+        map = NULL;
+        DPRINTF("%s Map IGD OpRegion: MAP_FAILED\n",__func__);
+    }
+    memory_region_init_ram_ptr(&vdev->opregion, OBJECT(vdev), name,
INTEL_OPREGION_SIZE, map);
+}
+
+/* Seabios allocates the guest location for the opregion. This function
then memmory maps
+ * host memory at that guest location */
+static void vfio_map_igdopregion(VFIODevice *vdev, uint32_t new_opregion)
+{
+    PCIDevice *pdev = &vdev->pdev;
+    MemoryRegion *guest_memory = get_system_memory();
+    uint32_t current_opregion =
vfio_pci_read_config(pdev,PCI_CONFIG_INTEL_OPREGION,4);
+
+    if ( current_opregion != vdev->host_opregion )
+    {
+        // remap
+        DPRINTF("%s Delete IGD OpRegion: %x\n",__func__,(current_opregion
& ~0xfff));
+        memory_region_del_subregion(guest_memory, &vdev->opregion);
+    }
+
+    DPRINTF("%s Map IGD OpRegion: %x ->
%x\n",__func__,vdev->host_opregion,new_opregion);
+    memory_region_add_subregion(guest_memory, (new_opregion & ~0xfff),
&vdev->opregion);
+
+    DPRINTF("%s Adding 0xfc to emulated bits\n", __func__);
+    vfio_add_emulated_rw_long(vdev, PCI_CONFIG_INTEL_OPREGION,
new_opregion, 0xffffffff);
+}
+
 static int vfio_setup_pcie_cap(VFIODevice *vdev, int pos, uint8_t size)
 {
     uint16_t flags;
@@ -4028,7 +4179,8 @@ static int vfio_get_device(VFIOGroup *group, const
char *name, VFIODevice *vdev)
     ret = ioctl(vdev->fd, VFIO_DEVICE_GET_IRQ_INFO, &irq_info);
     if (ret) {
         /* This can fail for an old kernel or legacy PCI dev */
-        DPRINTF("VFIO_DEVICE_GET_IRQ_INFO failure: %m\n");
+        //DPRINTF("VFIO_DEVICE_GET_IRQ_INFO failure: %m\n");
+        DPRINTF("VFIO_DEVICE_GET_IRQ_INFO failure ret=%d\n", ret);
         ret = 0;
     } else if (irq_info.count == 1) {
         vdev->pci_aer = true;
@@ -4253,6 +4405,11 @@ static int vfio_initfn(PCIDevice *pdev)
     vdev->emulated_config_bits[PCI_HEADER_TYPE] =

 PCI_HEADER_TYPE_MULTI_FUNCTION;

+    if (IS_IGD(pci_get_word(pdev->config + PCI_DEVICE_ID)))
+    {
+        vfio_setup_igdopregion(vdev);
+    }
+
     /* Restore or clear multifunction, this is always controlled by QEMU */
     if (vdev->pdev.cap_present & QEMU_PCI_CAP_MULTIFUNCTION) {
         vdev->pdev.config[PCI_HEADER_TYPE] |=
PCI_HEADER_TYPE_MULTI_FUNCTION;
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.freedesktop.org/archives/intel-gfx/attachments/20140924/2b65e360/attachment.html>


More information about the Intel-gfx mailing list