[Intel-gfx] [IGDVFIO] [PATCH 3/8] RFC and help completing: Intel IGD Direct Assignment with VFIO
Alex Williamson
alex.williamson at redhat.com
Wed Sep 24 21:47:06 CEST 2014
On Wed, 2014-09-24 at 14:20 +0100, Andrew Barnes wrote:
> hw/pci-host/q35.c
>
> this patch adds:
> * redirect some PCI config reads/writes to host
> * memory map BSDM, BGSM, TSEG
>
> patch
> ---------------------
>
> diff --git a/hw/pci-host/q35.c b/hw/pci-host/q35.c
> index a0a3068..05348ac 100644
> --- a/hw/pci-host/q35.c
> +++ b/hw/pci-host/q35.c
> @@ -6,6 +6,7 @@
> * Isaku Yamahata <yamahata at valinux co jp>
> * VA Linux Systems Japan K.K.
> * Copyright (C) 2012 Jason Baron <jbaron at redhat.com>
> + * 2014 Andrew barnes <andy at outsideglobe.com> IGD Support
> *
> * This is based on piix_pci.c, but heavily modified.
> *
> @@ -30,11 +31,26 @@
> #include "hw/hw.h"
> #include "hw/pci-host/q35.h"
> #include "qapi/visitor.h"
> +#include "hw/pci/pci.h"
> +#include <sys/mman.h> /* memory map functions */
> +
> +/* #define DEBUG_Q35 */
> +#ifdef DEBUG_Q35
> +# define Q35_DPRINTF(format, ...) printf("Q35: " format, ##
> __VA_ARGS__)
> +#else
> +# define Q35_DPRINTF(format, ...) do { } while (0)
> +#endif
> +
> +/* for intel-spec conforming config */
> +/* #define CORRECT_CONFIG */
>
> /****************************************************************************
> * Q35 host
> */
>
> +/* BEWARE: only set this if you are passing IGD through */
> +static bool IGD_PASSTHROUGH = true;
> +
> static void q35_host_realize(DeviceState *dev, Error **errp)
> {
> PCIHostState *pci = PCI_HOST_BRIDGE(dev);
> @@ -284,11 +300,193 @@ static void mch_set_smm(int smm, void *arg)
> memory_region_transaction_commit();
> }
>
> -static void mch_write_config(PCIDevice *d,
> +/* TODO: Move these variables/defines to be consistent with Q35 coding
> style */
> +MemoryRegion bdsm;
> +uint32_t bdsm_host;
> +MemoryRegion bgsm;
> +uint32_t bgsm_host;
> +MemoryRegion tseg;
> +uint32_t tseg_host;
> +#define MCH_CONFIG_BDSM 0xb0
> +#define MCH_CONFIG_BGSM 0xb4
> +#define MCH_CONFIG_TSEG 0xb8
> +#define MCH_CONFIG_GMCH 0x50
> +
> +/* Setup memory map for Stolen Memory */
> +static void mch_setup_bdsm(PCIDevice *d)
> +{
> + int fd;
> + char name[64];
> + void *map;
> + bdsm_host =
> host_pci_read_config(0,PCI_SLOT(d->devfn),PCI_FUNC(d->devfn),MCH_CONFIG_BDSM,4);
> + int dsm_size;
> + uint16_t gmch_host =
> host_pci_read_config(0,PCI_SLOT(d->devfn),PCI_FUNC(d->devfn),MCH_CONFIG_GMCH,2);
> + uint16_t gmch_dsm = (gmch_host & 0xF8) >> 3;
> + MCHPCIState *mch = MCH_PCI_DEVICE(d);
> +
> + Q35_DPRINTF("%s Setup BDSM: %x\n",__func__,bdsm_host);
> +
> + snprintf(name, sizeof(name), "MCH-BDSM-mmap");
> + fd = open("/dev/mem", O_RDWR);
> +
> + switch (gmch_dsm)
> + {
> + case 0x0: dsm_size = 0; break;
> + case 0x1: dsm_size = 32; break;
> + case 0x2: dsm_size = 64; break;
> + case 0x3: dsm_size = 96; break;
> + case 0x4: dsm_size = 128; break;
> + case 0x5: dsm_size = 160; break;
> + case 0x6: dsm_size = 192; break;
> + case 0x7: dsm_size = 224; break;
> + case 0x8: dsm_size = 256; break;
> + case 0x9: dsm_size = 288; break;
> + case 0xa: dsm_size = 320; break;
> + case 0xb: dsm_size = 352; break;
> + case 0xc: dsm_size = 384; break;
> + case 0xd: dsm_size = 416; break;
> + case 0xe: dsm_size = 448; break;
> + case 0xf: dsm_size = 480; break;
> + case 0x10: dsm_size = 512; break;
> + // default: /* panic */
Certainly do something other than pass a multiple of an uninitialized
variable through to mmap.
> + }
> +
> + dsm_size = dsm_size * 1024 * 1024;
> + map = mmap(NULL,dsm_size,PROT_READ|PROT_WRITE,MAP_SHARED,fd,(bdsm_host
> & 0xFFF00000));
> +
> + if (map == MAP_FAILED)
> + {
> + map = NULL;
> + Q35_DPRINTF("%s Setup BDSM: MAP_FAILED\n",__func__);
> + }
> +
> + memory_region_init_ram_ptr(&bdsm, OBJECT(mch), name, dsm_size, map);
> +}
Presumably this is something else we'd need to provide through the vfio
device. So we're up to
1. opregion (3 pages... wasn't it 2 before?)
2. stolen memory (up to 512M)
> +
> +/* this function memory maps the region of host memory assigned to Stolen
> memory
> + * to the guest location provided by seabios. */
> +static void mch_map_bdsm(PCIDevice *d, uint32_t bdsm_new)
> +{
> + MemoryRegion *guest_memory = get_system_memory();
> + uint32_t bdsm_current = pci_default_read_config(d,MCH_CONFIG_BDSM,4);
> +
> + if ( bdsm_current != bdsm_host && bdsm_current != 0 )
> + {
> + // remap
> + Q35_DPRINTF("%s Delete BDSM mapping: %x\n",__func__,(bdsm_current
> & 0xFFF00000));
> + memory_region_del_subregion(guest_memory, &bdsm);
> + }
> + Q35_DPRINTF("%s Add BDSM mapping: %x ->
> %x\n",__func__,bdsm_host,bdsm_new);
> + memory_region_add_subregion(guest_memory, (bdsm_new & 0xFFF00000),
> &bdsm);
> +}
> +
> +/* Setup memory map for GTT Stolen Memory */
> +static void mch_setup_bgsm(PCIDevice *d)
> +{
> + int fd;
> + char name[64];
> + void *map;
> + bgsm_host =
> host_pci_read_config(0,PCI_SLOT(d->devfn),PCI_FUNC(d->devfn),MCH_CONFIG_BGSM,4);
> + int gsm_size;
> + uint16_t gmch_host =
> host_pci_read_config(0,PCI_SLOT(d->devfn),PCI_FUNC(d->devfn),MCH_CONFIG_GMCH,2);
> + uint16_t gmch_gsm = (gmch_host & 0x300) >> 8;
> + MCHPCIState *mch = MCH_PCI_DEVICE(d);
> +
> + Q35_DPRINTF("%s Setup BGSM: %x\n",__func__,bgsm_host);
> +
> + snprintf(name, sizeof(name), "MCH %04x:%02x:%02x.%x BGSM mmap",
> + 0, 0, 0, 0);
> +
> + fd = open("/dev/mem", O_RDWR);
> +
> + switch (gmch_gsm)
> + {
> + case 0x2: gsm_size = 2; break;
> + case 0x1: gsm_size = 1; break;
> + case 0x0: gsm_size = 0; break;
> + // default: /* panic */
> + }
> +
> + gsm_size = gsm_size * 1024 * 1024;
> +
> + map = mmap(NULL,gsm_size,PROT_READ|PROT_WRITE,MAP_SHARED,fd,(bgsm_host
> & 0xFFF00000));
> + if (map == MAP_FAILED)
> + {
> + map = NULL;
> + Q35_DPRINTF("%s Setup BGSM: MAP_FAILED\n",__func__);
> + }
> +
> + memory_region_init_ram_ptr(&bgsm, OBJECT(mch), name, gsm_size, map);
> +}
And a third:
3. GTT stolen memory (up to 2M)
Somehow vfio needs to be in control of these mappings since they'll go
through a vfio device file descriptor.
> +
> +/* this function memory maps the region of host memory assigned to GTT
> Stolen memory
> + * to the guest location provided by seabios. */
> +static void mch_map_bgsm(PCIDevice *d, uint32_t bgsm_new)
> +{
> + MemoryRegion *guest_memory = get_system_memory();
> + uint32_t bgsm_current = pci_default_read_config(d,MCH_CONFIG_BGSM,4);
> +
> + if ( bgsm_current != bgsm_host && bgsm_current != 0)
> + {
> + // remap
> + Q35_DPRINTF("%s Delete BGSM mapping: %x\n",__func__,(bgsm_current
> & 0xFFF00000));
> + memory_region_del_subregion(guest_memory, &bgsm);
> + }
> + Q35_DPRINTF("%s Add BGSM mapping: %x ->
> %x\n",__func__,bgsm_host,bgsm_new);
> + memory_region_add_subregion(guest_memory, (bgsm_new & 0xFFF00000),
> &bgsm);
> +}
> +
> +/* Setup memory map for TSEG */
> +static void mch_setup_tseg(PCIDevice *d)
> +{
> + int fd;
> + char name[64];
> + void *map;
> + tseg_host =
> host_pci_read_config(0,PCI_SLOT(d->devfn),PCI_FUNC(d->devfn),MCH_CONFIG_TSEG,4);
> + int tseg_size = 8 * 1024 * 1024;
> + MCHPCIState *mch = MCH_PCI_DEVICE(d);
> +
> + Q35_DPRINTF("%s Setup TSEG: %x\n",__func__,tseg_host);
> +
> + snprintf(name, sizeof(name), "MCH-TSEG-mmap");
> +
> + fd = open("/dev/mem", O_RDWR);
> +
> + map =
> mmap(NULL,tseg_size,PROT_READ|PROT_WRITE,MAP_SHARED,fd,(tseg_host &
> 0xFFF00000));
> + if (map == MAP_FAILED)
> + {
> + map = NULL;
> + Q35_DPRINTF("%s Setup TSEG: MAP_FAILED\n",__func__);
> + }
> +
> + memory_region_init_ram_ptr(&tseg, OBJECT(mch), name, tseg_size, map);
> +}
Ugh, number four?!
4. tseg (8M)
> +
> +/* this function memory maps the region of host memory assigned to TSEG
> + * to the guest location provided by seabios. */
> +static void mch_map_tseg(PCIDevice *d, uint32_t tseg_new)
> +{
> + MemoryRegion *guest_memory = get_system_memory();
> + uint32_t tseg_current = pci_default_read_config(d,MCH_CONFIG_TSEG,4);
> +
> + if ( tseg_current != tseg_host && tseg_current != 0)
> + {
> + // remap
> + Q35_DPRINTF("%s Delete TSEG mapping: %x\n",__func__,(tseg_current
> & 0xFFF00000));
> + memory_region_del_subregion(guest_memory, &tseg);
> + }
> + Q35_DPRINTF("%s Add TSEG mapping: %x ->
> %x\n",__func__,tseg_host,tseg_new);
> + memory_region_add_subregion(guest_memory, (tseg_new & 0xFFF00000),
> &tseg);
> +}
> +
> +static void mch_write_config_default(PCIDevice *d,
> uint32_t address, uint32_t val, int len)
> {
> MCHPCIState *mch = MCH_PCI_DEVICE(d);
>
> + Q35_DPRINTF("%s(%04x:%02x:%02x.%x, @0x%x, len=0x%x) %x\n", __func__,
> + 0000, 00, PCI_SLOT(d->devfn),PCI_FUNC(d->devfn), address,
> len, val);
> +
> /* XXX: implement SMRAM.D_LOCK */
> pci_default_write_config(d, address, val, len);
>
> @@ -308,6 +506,73 @@ static void mch_write_config(PCIDevice *d,
> }
> }
>
> +static void mch_write_config(PCIDevice *d,
> + uint32_t address, uint32_t val, int len)
> +{
> + Q35_DPRINTF("%s(%04x:%02x:%02x.%x, @0x%x, len=0x%x) %x\n", __func__,
> + 0000, 00, PCI_SLOT(d->devfn),PCI_FUNC(d->devfn), address,
> len, val);
> +
> + /* PAVPC - Protected Audio Video Path Control */
> + if (IGD_PASSTHROUGH && ranges_overlap(address, len, 0x58, 4))
> + {
> +
> host_pci_write_config(0,PCI_SLOT(d->devfn),PCI_FUNC(d->devfn),address,len,val);
Direct access to a completely different host device than assigned.
Lovely. Somehow this likely needs to get bundled into vfio too unless
Intel modifies their driver to not need it.
> + }
> + /* BDSM - */
> + else if (IGD_PASSTHROUGH && ranges_overlap(address, len,
> MCH_CONFIG_BDSM, 4))
> + {
> + mch_map_bdsm(d,val);
> + mch_write_config_default(d,address,val,len);
> + }
> + /* BGSM - */
> + else if (IGD_PASSTHROUGH && ranges_overlap(address, len,
> MCH_CONFIG_BGSM, 4))
> + {
> + mch_map_bgsm(d,val);
> + mch_write_config_default(d,address,val,len);
> + }
> + /* TSEG - */
> + else if (IGD_PASSTHROUGH && ranges_overlap(address, len,
> MCH_CONFIG_TSEG, 4))
> + {
> + mch_map_tseg(d,val);
> + mch_write_config_default(d,address,val,len);
> + }
So the opregion is mapped by a config write on the IGD device itself and
the other 3 regions, that we know about so far, are mapped via writes to
the host bridge. This is pretty ridiculous, Intel.
> + else
> + {
> + mch_write_config_default(d,address,val,len);
> + }
> +}
> +
> +static uint32_t mch_read_config(PCIDevice *d,
> + uint32_t address, int len)
> +{
> + uint32_t val;
> +
> + if (IGD_PASSTHROUGH)
> + {
> + if (ranges_overlap(address, len, 0x2c, 2) || /* SVID - Subsystem
> Vendor Identification */
> + ranges_overlap(address, len, 0x2e, 2) || /* SID - Subsystem
> Identification */
> + ranges_overlap(address, len, 0x50, 2) || /* GMCH - SNB
> Graphics Control Register */
> + ranges_overlap(address, len, 0x58, 4)) /* PAVPC - Protected
> Audio Video Path Control */
> + {
> + val =
> host_pci_read_config(0,PCI_SLOT(d->devfn),PCI_FUNC(d->devfn),address,len);
> + }
> + else
> + {
> + goto defaultread;
> + }
> + }
> + else
> + {
> +defaultread:
> + val = pci_default_read_config(d,address,len);
> + }
> +
> + Q35_DPRINTF("%s(%04x:%02x:%02x.%x, @0x%x, len=0x%x) %x\n", __func__,
> + 0000, 00, PCI_SLOT(d->devfn),PCI_FUNC(d->devfn), address,
> len, val);
> +
> + return val;
> +
> +}
> +
> static void mch_update(MCHPCIState *mch)
> {
> mch_update_pciexbar(mch);
> @@ -370,6 +635,69 @@ static int mch_init(PCIDevice *d)
> &mch->pam_regions[i+1], PAM_EXPAN_BASE + i *
> PAM_EXPAN_SIZE,
> PAM_EXPAN_SIZE);
> }
> +
> + mch_setup_bdsm(d);
> + mch_setup_bgsm(d);
> + mch_setup_tseg(d);
> +
> + /* Unsure if this is important. but the following is as per INTEL spec
> + * which otherwise is non-conforming. */
> +#ifdef CORRECT_CONFIG
> + /* PCICMD Register */
> + pci_set_word(d->wmask + D0F0_PCICMD,(D0F0_PCICMD_SERR |
> D0F0_PCICMD_PERR)); // set writable
> + pci_set_word(d->config + D0F0_PCICMD,(D0F0_PCICMD_MAE |
> D0F0_PCICMD_BME)); // set 1
> +
> + /* PCISTS Register */
> + pci_set_word(d->w1cmask + D0F0_PCISTS,(D0F0_PCISTS_DPE |
> D0F0_PCISTS_SSE | D0F0_PCISTS_RMAS |
> + D0F0_PCISTS_RTAS | D0F0_PCISTS_DPD));
> + pci_set_word(d->config + D0F0_PCISTS,(D0F0_PCISTS_CLIST |
> D0F0_PCISTS_FB2B));
> +
> + /* CC Register */
> + /* No RW Registers */
> + //pci_set_byte(d->config + D0F0_CC + D0F0_CC_BCC, 0x06); /* indicating
> a bridge device */
> +
> + /* HDR Register */
> + /* No RW Registers */
> + /* !nnz */
> +
> + /* SVID and SID need not be changed */
> +
> + /* PXPEPBAR - TODO? */
> + pci_set_quad(d->wmask + D0F0_PXPEPBAR, (D0F0_PXPEPBAR_PXPEPBAR |
> D0F0_PXPEPBAR_PXPEPBAREN)); // set writable
> +
> + /* MCHBAR - TODO? */
> + pci_set_quad(d->wmask + D0F0_MCHBAR, (D0F0_MCHBAR_MCHBAR |
> D0F0_MCHBAR_MCHBAREN)); // set writable
> +
> + /* GGC-GMCH */
> + /* RW Registers can be locked by GCCLCK - TODO, assumed RO */
> + //pci_set_word(d->config + D0F0_GGC, (data & (D0F0_GGC_VAMEN |
> D0F0_GGC_GGMS | D0F0_GGC_GMS | D0F0_GGC_GGCLCK)));
> +
> + /* DEVEN */
> + pci_set_long(d->wmask + D0F0_DEVEN, D0F0_DEVEN_D0EN);
> + pci_set_long(d->config + D0F0_DEVEN, D0F0_DEVEN_D2EN);
> +
> + /* DMIBAR */
> + pci_set_quad(d->wmask + D0F0_DMIBAR, (D0F0_DMIBAR_DMIBAR |
> D0F0_DMIBAR_DMIBAREN)); // set writable
> +
> + /* TOM - Top Of Memory Register */
> + pci_set_quad(d->wmask + D0F0_TOM, (D0F0_TOM_TOM | D0F0_TOM_LOCK ));
> +
> + /* TOUUD - Top Of Upper Usable DRAM Register */
> + pci_set_quad(d->wmask + D0F0_TOUUD, (D0F0_TOUUD_TOUUD |
> D0F0_TOUUD_LOCK ));
> +
> + /* BDSM - Base Data of Stolen Memory Register */
> + pci_set_long(d->wmask + D0F0_BDSM, (D0F0_BDSM_BDSM | D0F0_BDSM_LOCK));
> +
> + /* BGSM - Base of GTT Stolen Memory Register */
> + pci_set_long(d->wmask + D0F0_BGSM, (D0F0_BGSM_BGSM | D0F0_BGSM_LOCK));
> +
> + /* TSEG - G Memory Base Register */
> + pci_set_long(d->wmask + D0F0_TSEG, (D0F0_TSEG_TSEGMB |
> D0F0_TSEG_LOCK));
> +
> + /* TOLUD - Top of Low Usable DRAM */
> + pci_set_long(d->wmask + D0F0_TOLUD, (D0F0_TOLUD_TOLUD |
> D0F0_TOLUD_LOCK));
> +#endif
> +
A huge pile of migration breaks if nothing else.
> return 0;
> }
>
> @@ -390,6 +718,7 @@ static void mch_class_init(ObjectClass *klass, void
> *data)
>
> k->init = mch_init;
> k->config_write = mch_write_config;
> + k->config_read = mch_read_config;
> dc->reset = mch_reset;
> set_bit(DEVICE_CATEGORY_BRIDGE, dc->categories);
> dc->desc = "Host bridge";
> @@ -403,6 +732,16 @@ static void mch_class_init(ObjectClass *klass, void
> *data)
> * host-facing part, which can't be device_add'ed, yet.
> */
> dc->cannot_instantiate_with_device_add_yet = true;
> +
> + /* XEN code suggests these values should match the host.
> + * However, it's not that simple because currently seabios expects
> + * either PII3X or Q35.
> + * Further more, I have only found reason for the LPC to match the host
> + * for the PCH identification. No mention about the host-bridge. */
> +
> +// k->vendor_id = host_pci_read_config(0,0,0,0x00,2);
> +// k->device_id = host_pci_read_config(0,0,0,0x02,2);
> +// k->revision = host_pci_read_config(0,0,0,0x08,1);
> }
>
> static const TypeInfo mch_info = {
More information about the Intel-gfx
mailing list